aboutsummaryrefslogtreecommitdiff
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/Kconfig6
-rw-r--r--drivers/infiniband/Makefile18
-rw-r--r--drivers/infiniband/core/Makefile7
-rw-r--r--drivers/infiniband/core/addr.c97
-rw-r--r--drivers/infiniband/core/cm.c40
-rw-r--r--drivers/infiniband/core/cma.c158
-rw-r--r--drivers/infiniband/core/core_priv.h2
-rw-r--r--drivers/infiniband/core/iwcm.c14
-rw-r--r--drivers/infiniband/core/iwpm_msg.c685
-rw-r--r--drivers/infiniband/core/iwpm_util.c607
-rw-r--r--drivers/infiniband/core/iwpm_util.h238
-rw-r--r--drivers/infiniband/core/mad.c14
-rw-r--r--drivers/infiniband/core/netlink.c20
-rw-r--r--drivers/infiniband/core/sa_query.c14
-rw-r--r--drivers/infiniband/core/sysfs.c87
-rw-r--r--drivers/infiniband/core/ucma.c22
-rw-r--r--drivers/infiniband/core/umem.c120
-rw-r--r--drivers/infiniband/core/user_mad.c75
-rw-r--r--drivers/infiniband/core/uverbs.h42
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c124
-rw-r--r--drivers/infiniband/core/uverbs_main.c127
-rw-r--r--drivers/infiniband/core/verbs.c173
-rw-r--r--drivers/infiniband/hw/Makefile12
-rw-r--r--drivers/infiniband/hw/amso1100/c2.c4
-rw-r--r--drivers/infiniband/hw/amso1100/c2_ae.c2
-rw-r--r--drivers/infiniband/hw/amso1100/c2_intr.c3
-rw-r--r--drivers/infiniband/hw/amso1100/c2_provider.c23
-rw-r--r--drivers/infiniband/hw/amso1100/c2_rnic.c3
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c6
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c19
-rw-r--r--drivers/infiniband/hw/cxgb4/Kconfig6
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c645
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c64
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c311
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h64
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c65
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c50
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c217
-rw-r--r--drivers/infiniband/hw/cxgb4/resource.c10
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h79
-rw-r--r--drivers/infiniband/hw/cxgb4/t4fw_ri_api.h15
-rw-r--r--drivers/infiniband/hw/cxgb4/user.h7
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h2
-rw-r--r--drivers/infiniband/hw/ehca/ehca_cq.c1
-rw-r--r--drivers/infiniband/hw/ehca/ehca_mrmw.c257
-rw-r--r--drivers/infiniband/hw/ehca/ehca_qp.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_diag.c68
-rw-r--r--drivers/infiniband/hw/ipath/ipath_dma.c43
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mr.c39
-rw-r--r--drivers/infiniband/hw/ipath/ipath_qp.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_sdma.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_user_sdma.c7
-rw-r--r--drivers/infiniband/hw/mlx4/Kconfig2
-rw-r--r--drivers/infiniband/hw/mlx4/ah.c42
-rw-r--r--drivers/infiniband/hw/mlx4/alias_GUID.c2
-rw-r--r--drivers/infiniband/hw/mlx4/cm.c80
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c52
-rw-r--r--drivers/infiniband/hw/mlx4/doorbell.c4
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c162
-rw-r--r--drivers/infiniband/hw/mlx4/main.c947
-rw-r--r--drivers/infiniband/hw/mlx4/mcg.c5
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h46
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c39
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c546
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c7
-rw-r--r--drivers/infiniband/hw/mlx4/sysfs.c118
-rw-r--r--drivers/infiniband/hw/mlx5/Kconfig2
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c410
-rw-r--r--drivers/infiniband/hw/mlx5/doorbell.c4
-rw-r--r--drivers/infiniband/hw/mlx5/main.c72
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c80
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h35
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c461
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c806
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c26
-rw-r--r--drivers/infiniband/hw/mlx5/user.h12
-rw-r--r--drivers/infiniband/hw/mthca/mthca_eq.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c8
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c43
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c3
-rw-r--r--drivers/infiniband/hw/nes/nes.c30
-rw-r--r--drivers/infiniband/hw/nes/nes.h3
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c444
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.h15
-rw-r--r--drivers/infiniband/hw/nes/nes_user.h5
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c263
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.h1
-rw-r--r--drivers/infiniband/hw/ocrdma/Kconfig2
-rw-r--r--drivers/infiniband/hw/ocrdma/Makefile2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma.h171
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_abi.h7
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.c8
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c328
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.h7
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c230
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_sli.h263
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_stats.c616
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_stats.h54
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c250
-rw-r--r--drivers/infiniband/hw/qib/qib.h12
-rw-r--r--drivers/infiniband/hw/qib/qib_diag.c52
-rw-r--r--drivers/infiniband/hw/qib/qib_dma.c21
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c5
-rw-r--r--drivers/infiniband/hw/qib/qib_fs.c8
-rw-r--r--drivers/infiniband/hw/qib/qib_iba6120.c11
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7220.c12
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c53
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c104
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.c46
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.h14
-rw-r--r--drivers/infiniband/hw/qib/qib_mr.c14
-rw-r--r--drivers/infiniband/hw/qib/qib_pcie.c181
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c5
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_ruc.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_ud.c15
-rw-r--r--drivers/infiniband/hw/qib/qib_user_sdma.c142
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c8
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h26
-rw-r--r--drivers/infiniband/hw/usnic/Kconfig10
-rw-r--r--drivers/infiniband/hw/usnic/Makefile15
-rw-r--r--drivers/infiniband/hw/usnic/usnic.h29
-rw-r--r--drivers/infiniband/hw/usnic/usnic_abi.h73
-rw-r--r--drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h27
-rw-r--r--drivers/infiniband/hw/usnic/usnic_common_util.h68
-rw-r--r--drivers/infiniband/hw/usnic/usnic_debugfs.c154
-rw-r--r--drivers/infiniband/hw/usnic/usnic_debugfs.h29
-rw-r--r--drivers/infiniband/hw/usnic/usnic_fwd.c350
-rw-r--r--drivers/infiniband/hw/usnic/usnic_fwd.h113
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib.h118
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_main.c682
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c761
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h117
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_sysfs.c341
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_sysfs.h29
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c768
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.h72
-rw-r--r--drivers/infiniband/hw/usnic/usnic_log.h58
-rw-r--r--drivers/infiniband/hw/usnic/usnic_transport.c202
-rw-r--r--drivers/infiniband/hw/usnic/usnic_transport.h51
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.c604
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.h80
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c254
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h73
-rw-r--r--drivers/infiniband/hw/usnic/usnic_vnic.c467
-rw-r--r--drivers/infiniband/hw/usnic/usnic_vnic.h103
-rw-r--r--drivers/infiniband/ulp/Makefile5
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c32
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ethtool.c2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c24
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c16
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c29
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_netlink.c7
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c3
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_vlan.c10
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c188
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h93
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c179
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c470
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c418
-rw-r--r--drivers/infiniband/ulp/isert/Kconfig4
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c1328
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.h63
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c1206
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h116
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c65
169 files changed, 18110 insertions, 3972 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 5ceda710f51..77089399359 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -3,6 +3,8 @@ menuconfig INFINIBAND
depends on PCI || BROKEN
depends on HAS_IOMEM
depends on NET
+ depends on INET
+ depends on m || IPV6 != m
---help---
Core support for InfiniBand (IB). Make sure to also select
any protocols you wish to use as well as drivers for your
@@ -38,8 +40,7 @@ config INFINIBAND_USER_MEM
config INFINIBAND_ADDR_TRANS
bool
- depends on INET
- depends on !(INFINIBAND = y && IPV6 = m)
+ depends on INFINIBAND
default y
source "drivers/infiniband/hw/mthca/Kconfig"
@@ -53,6 +54,7 @@ source "drivers/infiniband/hw/mlx4/Kconfig"
source "drivers/infiniband/hw/mlx5/Kconfig"
source "drivers/infiniband/hw/nes/Kconfig"
source "drivers/infiniband/hw/ocrdma/Kconfig"
+source "drivers/infiniband/hw/usnic/Kconfig"
source "drivers/infiniband/ulp/ipoib/Kconfig"
diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile
index 1fe69888515..dc21836b5a8 100644
--- a/drivers/infiniband/Makefile
+++ b/drivers/infiniband/Makefile
@@ -1,17 +1,3 @@
obj-$(CONFIG_INFINIBAND) += core/
-obj-$(CONFIG_INFINIBAND_MTHCA) += hw/mthca/
-obj-$(CONFIG_INFINIBAND_IPATH) += hw/ipath/
-obj-$(CONFIG_INFINIBAND_QIB) += hw/qib/
-obj-$(CONFIG_INFINIBAND_EHCA) += hw/ehca/
-obj-$(CONFIG_INFINIBAND_AMSO1100) += hw/amso1100/
-obj-$(CONFIG_INFINIBAND_CXGB3) += hw/cxgb3/
-obj-$(CONFIG_INFINIBAND_CXGB4) += hw/cxgb4/
-obj-$(CONFIG_MLX4_INFINIBAND) += hw/mlx4/
-obj-$(CONFIG_MLX5_INFINIBAND) += hw/mlx5/
-obj-$(CONFIG_INFINIBAND_NES) += hw/nes/
-obj-$(CONFIG_INFINIBAND_OCRDMA) += hw/ocrdma/
-obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/
-obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/
-obj-$(CONFIG_INFINIBAND_SRPT) += ulp/srpt/
-obj-$(CONFIG_INFINIBAND_ISER) += ulp/iser/
-obj-$(CONFIG_INFINIBAND_ISERT) += ulp/isert/
+obj-$(CONFIG_INFINIBAND) += hw/
+obj-$(CONFIG_INFINIBAND) += ulp/
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index c8bbaef1bec..ffd0af6734a 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -1,8 +1,9 @@
-infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := ib_addr.o rdma_cm.o
+infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_cm.o
user_access-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_ucm.o
obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \
- ib_cm.o iw_cm.o $(infiniband-y)
+ ib_cm.o iw_cm.o ib_addr.o \
+ $(infiniband-y)
obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o
obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
$(user_access-y)
@@ -17,7 +18,7 @@ ib_sa-y := sa_query.o multicast.o
ib_cm-y := cm.o
-iw_cm-y := iwcm.o
+iw_cm-y := iwcm.o iwpm_util.o iwpm_msg.o
rdma_cm-y := cma.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index e90f2b2eabd..8172d37f9ad 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -86,6 +86,8 @@ int rdma_addr_size(struct sockaddr *addr)
}
EXPORT_SYMBOL(rdma_addr_size);
+static struct rdma_addr_client self;
+
void rdma_addr_register_client(struct rdma_addr_client *client)
{
atomic_set(&client->refcount, 1);
@@ -119,7 +121,8 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
}
EXPORT_SYMBOL(rdma_copy_addr);
-int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
+int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
+ u16 *vlan_id)
{
struct net_device *dev;
int ret = -EADDRNOTAVAIL;
@@ -142,6 +145,8 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
return ret;
ret = rdma_copy_addr(dev_addr, dev, NULL);
+ if (vlan_id)
+ *vlan_id = rdma_vlan_dev_vlan_id(dev);
dev_put(dev);
break;
@@ -153,6 +158,8 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
&((struct sockaddr_in6 *) addr)->sin6_addr,
dev, 1)) {
ret = rdma_copy_addr(dev_addr, dev, NULL);
+ if (vlan_id)
+ *vlan_id = rdma_vlan_dev_vlan_id(dev);
break;
}
}
@@ -238,7 +245,7 @@ static int addr4_resolve(struct sockaddr_in *src_in,
src_in->sin_addr.s_addr = fl4.saddr;
if (rt->dst.dev->flags & IFF_LOOPBACK) {
- ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
+ ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
if (!ret)
memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
goto put;
@@ -286,7 +293,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
}
if (dst->dev->flags & IFF_LOOPBACK) {
- ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
+ ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
if (!ret)
memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
goto put;
@@ -437,6 +444,88 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr)
}
EXPORT_SYMBOL(rdma_addr_cancel);
+struct resolve_cb_context {
+ struct rdma_dev_addr *addr;
+ struct completion comp;
+};
+
+static void resolve_cb(int status, struct sockaddr *src_addr,
+ struct rdma_dev_addr *addr, void *context)
+{
+ memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct
+ rdma_dev_addr));
+ complete(&((struct resolve_cb_context *)context)->comp);
+}
+
+int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac,
+ u16 *vlan_id)
+{
+ int ret = 0;
+ struct rdma_dev_addr dev_addr;
+ struct resolve_cb_context ctx;
+ struct net_device *dev;
+
+ union {
+ struct sockaddr _sockaddr;
+ struct sockaddr_in _sockaddr_in;
+ struct sockaddr_in6 _sockaddr_in6;
+ } sgid_addr, dgid_addr;
+
+
+ ret = rdma_gid2ip(&sgid_addr._sockaddr, sgid);
+ if (ret)
+ return ret;
+
+ ret = rdma_gid2ip(&dgid_addr._sockaddr, dgid);
+ if (ret)
+ return ret;
+
+ memset(&dev_addr, 0, sizeof(dev_addr));
+
+ ctx.addr = &dev_addr;
+ init_completion(&ctx.comp);
+ ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
+ &dev_addr, 1000, resolve_cb, &ctx);
+ if (ret)
+ return ret;
+
+ wait_for_completion(&ctx.comp);
+
+ memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
+ dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
+ if (!dev)
+ return -ENODEV;
+ if (vlan_id)
+ *vlan_id = rdma_vlan_dev_vlan_id(dev);
+ dev_put(dev);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh);
+
+int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
+{
+ int ret = 0;
+ struct rdma_dev_addr dev_addr;
+ union {
+ struct sockaddr _sockaddr;
+ struct sockaddr_in _sockaddr_in;
+ struct sockaddr_in6 _sockaddr_in6;
+ } gid_addr;
+
+ ret = rdma_gid2ip(&gid_addr._sockaddr, sgid);
+
+ if (ret)
+ return ret;
+ memset(&dev_addr, 0, sizeof(dev_addr));
+ ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
+ if (ret)
+ return ret;
+
+ memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid);
+
static int netevent_callback(struct notifier_block *self, unsigned long event,
void *ctx)
{
@@ -461,11 +550,13 @@ static int __init addr_init(void)
return -ENOMEM;
register_netevent_notifier(&nb);
+ rdma_addr_register_client(&self);
return 0;
}
static void __exit addr_cleanup(void)
{
+ rdma_addr_unregister_client(&self);
unregister_netevent_notifier(&nb);
destroy_workqueue(addr_wq);
}
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 784b97cb05b..c3239170d8b 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -47,6 +47,7 @@
#include <linux/sysfs.h>
#include <linux/workqueue.h>
#include <linux/kdev_t.h>
+#include <linux/etherdevice.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_cm.h>
@@ -177,6 +178,8 @@ struct cm_av {
struct ib_ah_attr ah_attr;
u16 pkey_index;
u8 timeout;
+ u8 valid;
+ u8 smac[ETH_ALEN];
};
struct cm_work {
@@ -376,6 +379,9 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path,
&av->ah_attr);
av->timeout = path->packet_life_time + 1;
+ memcpy(av->smac, path->smac, sizeof(av->smac));
+
+ av->valid = 1;
return 0;
}
@@ -383,14 +389,11 @@ static int cm_alloc_id(struct cm_id_private *cm_id_priv)
{
unsigned long flags;
int id;
- static int next_id;
idr_preload(GFP_KERNEL);
spin_lock_irqsave(&cm.lock, flags);
- id = idr_alloc(&cm.local_id_table, cm_id_priv, next_id, 0, GFP_NOWAIT);
- if (id >= 0)
- next_id = max(id + 1, 0);
+ id = idr_alloc_cyclic(&cm.local_id_table, cm_id_priv, 0, 0, GFP_NOWAIT);
spin_unlock_irqrestore(&cm.lock, flags);
idr_preload_end();
@@ -1557,6 +1560,9 @@ static int cm_req_handler(struct cm_work *work)
cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
+
+ memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN);
+ work->path[0].vlan_id = cm_id_priv->av.ah_attr.vlan_id;
ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
if (ret) {
ib_get_cached_gid(work->port->cm_dev->ib_device,
@@ -3503,6 +3509,32 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
*qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
IB_QP_DEST_QPN | IB_QP_RQ_PSN;
qp_attr->ah_attr = cm_id_priv->av.ah_attr;
+ if (!cm_id_priv->av.valid) {
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ return -EINVAL;
+ }
+ if (cm_id_priv->av.ah_attr.vlan_id != 0xffff) {
+ qp_attr->vlan_id = cm_id_priv->av.ah_attr.vlan_id;
+ *qp_attr_mask |= IB_QP_VID;
+ }
+ if (!is_zero_ether_addr(cm_id_priv->av.smac)) {
+ memcpy(qp_attr->smac, cm_id_priv->av.smac,
+ sizeof(qp_attr->smac));
+ *qp_attr_mask |= IB_QP_SMAC;
+ }
+ if (cm_id_priv->alt_av.valid) {
+ if (cm_id_priv->alt_av.ah_attr.vlan_id != 0xffff) {
+ qp_attr->alt_vlan_id =
+ cm_id_priv->alt_av.ah_attr.vlan_id;
+ *qp_attr_mask |= IB_QP_ALT_VID;
+ }
+ if (!is_zero_ether_addr(cm_id_priv->alt_av.smac)) {
+ memcpy(qp_attr->alt_smac,
+ cm_id_priv->alt_av.smac,
+ sizeof(qp_attr->alt_smac));
+ *qp_attr_mask |= IB_QP_ALT_SMAC;
+ }
+ }
qp_attr->path_mtu = cm_id_priv->path_mtu;
qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index dab4b41f171..d570030d899 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -328,28 +328,6 @@ static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey)
return ret;
}
-static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_num)
-{
- int i;
- int err;
- struct ib_port_attr props;
- union ib_gid tmp;
-
- err = ib_query_port(device, port_num, &props);
- if (err)
- return err;
-
- for (i = 0; i < props.gid_tbl_len; ++i) {
- err = ib_query_gid(device, port_num, i, &tmp);
- if (err)
- return err;
- if (!memcmp(&tmp, gid, sizeof tmp))
- return 0;
- }
-
- return -EADDRNOTAVAIL;
-}
-
static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr)
{
dev_addr->dev_type = ARPHRD_INFINIBAND;
@@ -362,7 +340,7 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
int ret;
if (addr->sa_family != AF_IB) {
- ret = rdma_translate_ip(addr, dev_addr);
+ ret = rdma_translate_ip(addr, dev_addr, NULL);
} else {
cma_translate_ib((struct sockaddr_ib *) addr, dev_addr);
ret = 0;
@@ -371,13 +349,14 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
return ret;
}
-static int cma_acquire_dev(struct rdma_id_private *id_priv)
+static int cma_acquire_dev(struct rdma_id_private *id_priv,
+ struct rdma_id_private *listen_id_priv)
{
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
struct cma_device *cma_dev;
union ib_gid gid, iboe_gid;
int ret = -ENODEV;
- u8 port;
+ u8 port, found_port;
enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ?
IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
@@ -386,20 +365,44 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv)
return -EINVAL;
mutex_lock(&lock);
- iboe_addr_get_sgid(dev_addr, &iboe_gid);
+ rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
+ &iboe_gid);
+
memcpy(&gid, dev_addr->src_dev_addr +
rdma_addr_gid_offset(dev_addr), sizeof gid);
+ if (listen_id_priv &&
+ rdma_port_get_link_layer(listen_id_priv->id.device,
+ listen_id_priv->id.port_num) == dev_ll) {
+ cma_dev = listen_id_priv->cma_dev;
+ port = listen_id_priv->id.port_num;
+ if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB &&
+ rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET)
+ ret = ib_find_cached_gid(cma_dev->device, &iboe_gid,
+ &found_port, NULL);
+ else
+ ret = ib_find_cached_gid(cma_dev->device, &gid,
+ &found_port, NULL);
+
+ if (!ret && (port == found_port)) {
+ id_priv->id.port_num = found_port;
+ goto out;
+ }
+ }
list_for_each_entry(cma_dev, &dev_list, list) {
for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
+ if (listen_id_priv &&
+ listen_id_priv->cma_dev == cma_dev &&
+ listen_id_priv->id.port_num == port)
+ continue;
if (rdma_port_get_link_layer(cma_dev->device, port) == dev_ll) {
if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB &&
rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET)
- ret = find_gid_port(cma_dev->device, &iboe_gid, port);
+ ret = ib_find_cached_gid(cma_dev->device, &iboe_gid, &found_port, NULL);
else
- ret = find_gid_port(cma_dev->device, &gid, port);
+ ret = ib_find_cached_gid(cma_dev->device, &gid, &found_port, NULL);
- if (!ret) {
- id_priv->id.port_num = port;
+ if (!ret && (port == found_port)) {
+ id_priv->id.port_num = found_port;
goto out;
}
}
@@ -602,6 +605,7 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
{
struct ib_qp_attr qp_attr;
int qp_attr_mask, ret;
+ union ib_gid sgid;
mutex_lock(&id_priv->qp_mutex);
if (!id_priv->id.qp) {
@@ -624,6 +628,20 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
if (ret)
goto out;
+ ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num,
+ qp_attr.ah_attr.grh.sgid_index, &sgid);
+ if (ret)
+ goto out;
+
+ if (rdma_node_get_transport(id_priv->cma_dev->device->node_type)
+ == RDMA_TRANSPORT_IB &&
+ rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num)
+ == IB_LINK_LAYER_ETHERNET) {
+ ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr.smac, NULL);
+
+ if (ret)
+ goto out;
+ }
if (conn_param)
qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
@@ -724,6 +742,7 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
else
ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
qp_attr_mask);
+
if (qp_attr->qp_state == IB_QPS_RTR)
qp_attr->rq_psn = id_priv->seq_num;
break;
@@ -1292,7 +1311,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
}
mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
- ret = cma_acquire_dev(conn_id);
+ ret = cma_acquire_dev(conn_id, listen_id);
if (ret)
goto err2;
@@ -1308,13 +1327,13 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
ret = conn_id->id.event_handler(&conn_id->id, &event);
if (ret)
goto err3;
-
/*
* Acquire mutex to prevent user executing rdma_destroy_id()
* while we're accessing the cm_id.
*/
mutex_lock(&lock);
- if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD))
+ if (cma_comp(conn_id, RDMA_CM_CONNECT) &&
+ (conn_id->id.qp_type != IB_QPT_UD))
ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
mutex_unlock(&lock);
mutex_unlock(&conn_id->handler_mutex);
@@ -1451,7 +1470,6 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
{
struct rdma_cm_id *new_cm_id;
struct rdma_id_private *listen_id, *conn_id;
- struct net_device *dev = NULL;
struct rdma_cm_event event;
int ret;
struct ib_device_attr attr;
@@ -1474,14 +1492,14 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
conn_id->state = RDMA_CM_CONNECT;
- ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr);
+ ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr, NULL);
if (ret) {
mutex_unlock(&conn_id->handler_mutex);
rdma_destroy_id(new_cm_id);
goto out;
}
- ret = cma_acquire_dev(conn_id);
+ ret = cma_acquire_dev(conn_id, listen_id);
if (ret) {
mutex_unlock(&conn_id->handler_mutex);
rdma_destroy_id(new_cm_id);
@@ -1529,8 +1547,6 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
cma_deref_id(conn_id);
out:
- if (dev)
- dev_put(dev);
mutex_unlock(&listen_id->handler_mutex);
return ret;
}
@@ -1848,6 +1864,26 @@ static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
return 0;
}
+static int iboe_tos_to_sl(struct net_device *ndev, int tos)
+{
+ int prio;
+ struct net_device *dev;
+
+ prio = rt_tos2priority(tos);
+ dev = ndev->priv_flags & IFF_802_1Q_VLAN ?
+ vlan_dev_real_dev(ndev) : ndev;
+
+ if (dev->num_tc)
+ return netdev_get_prio_tc_map(dev, prio);
+
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
+ if (ndev->priv_flags & IFF_802_1Q_VLAN)
+ return (vlan_dev_get_egress_qos_mask(ndev, prio) &
+ VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+#endif
+ return 0;
+}
+
static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
{
struct rdma_route *route = &id_priv->id.route;
@@ -1855,7 +1891,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
struct cma_work *work;
int ret;
struct net_device *ndev = NULL;
- u16 vid;
+
work = kzalloc(sizeof *work, GFP_KERNEL);
if (!work)
@@ -1879,20 +1915,20 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
goto err2;
}
- vid = rdma_vlan_dev_vlan_id(ndev);
+ route->path_rec->vlan_id = rdma_vlan_dev_vlan_id(ndev);
+ memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN);
+ memcpy(route->path_rec->smac, ndev->dev_addr, ndev->addr_len);
- iboe_mac_vlan_to_ll(&route->path_rec->sgid, addr->dev_addr.src_dev_addr, vid);
- iboe_mac_vlan_to_ll(&route->path_rec->dgid, addr->dev_addr.dst_dev_addr, vid);
+ rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
+ &route->path_rec->sgid);
+ rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr,
+ &route->path_rec->dgid);
route->path_rec->hop_limit = 1;
route->path_rec->reversible = 1;
route->path_rec->pkey = cpu_to_be16(0xffff);
route->path_rec->mtu_selector = IB_SA_EQ;
- route->path_rec->sl = netdev_get_prio_tc_map(
- ndev->priv_flags & IFF_802_1Q_VLAN ?
- vlan_dev_real_dev(ndev) : ndev,
- rt_tos2priority(id_priv->tos));
-
+ route->path_rec->sl = iboe_tos_to_sl(ndev, id_priv->tos);
route->path_rec->mtu = iboe_get_mtu(ndev->mtu);
route->path_rec->rate_selector = IB_SA_EQ;
route->path_rec->rate = iboe_get_rate(ndev);
@@ -2049,8 +2085,9 @@ static void addr_handler(int status, struct sockaddr *src_addr,
RDMA_CM_ADDR_RESOLVED))
goto out;
+ memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr));
if (!status && !id_priv->cma_dev)
- status = cma_acquire_dev(id_priv);
+ status = cma_acquire_dev(id_priv, NULL);
if (status) {
if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
@@ -2058,10 +2095,8 @@ static void addr_handler(int status, struct sockaddr *src_addr,
goto out;
event.event = RDMA_CM_EVENT_ADDR_ERROR;
event.status = status;
- } else {
- memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr));
+ } else
event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
- }
if (id_priv->id.event_handler(&id_priv->id, &event)) {
cma_exch(id_priv, RDMA_CM_DESTROYING);
@@ -2294,9 +2329,9 @@ static int cma_alloc_any_port(struct idr *ps, struct rdma_id_private *id_priv)
int low, high, remaining;
unsigned int rover;
- inet_get_local_port_range(&low, &high);
+ inet_get_local_port_range(&init_net, &low, &high);
remaining = (high - low) + 1;
- rover = net_random() % remaining + low;
+ rover = prandom_u32() % remaining + low;
retry:
if (last_used_port != rover &&
!idr_find(ps, (unsigned short) rover)) {
@@ -2466,8 +2501,11 @@ static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
return 0;
sin6 = (struct sockaddr_in6 *) addr;
- if ((ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) &&
- !sin6->sin6_scope_id)
+
+ if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL))
+ return 0;
+
+ if (!sin6->sin6_scope_id)
return -EINVAL;
dev_addr->bound_dev_if = sin6->sin6_scope_id;
@@ -2542,17 +2580,17 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
if (ret)
goto err1;
+ memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr));
if (!cma_any_addr(addr)) {
ret = cma_translate_addr(addr, &id->route.addr.dev_addr);
if (ret)
goto err1;
- ret = cma_acquire_dev(id_priv);
+ ret = cma_acquire_dev(id_priv, NULL);
if (ret)
goto err1;
}
- memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr));
if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) {
if (addr->sa_family == AF_INET)
id_priv->afonly = 1;
@@ -3281,7 +3319,8 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
err = -EINVAL;
goto out2;
}
- iboe_addr_get_sgid(dev_addr, &mc->multicast.ib->rec.port_gid);
+ rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
+ &mc->multicast.ib->rec.port_gid);
work->id = id_priv;
work->mc = mc;
INIT_WORK(&work->work, iboe_mcast_work_handler);
@@ -3568,7 +3607,8 @@ static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb)
id_stats = ibnl_put_msg(skb, &nlh, cb->nlh->nlmsg_seq,
sizeof *id_stats, RDMA_NL_RDMA_CM,
- RDMA_NL_RDMA_CM_ID_STATS);
+ RDMA_NL_RDMA_CM_ID_STATS,
+ NLM_F_MULTI);
if (!id_stats)
goto out;
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index a565af5c2d2..87d1936f5c1 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -49,4 +49,6 @@ void ib_sysfs_cleanup(void);
int ib_cache_setup(void);
void ib_cache_cleanup(void);
+int ib_resolve_eth_l2_attrs(struct ib_qp *qp,
+ struct ib_qp_attr *qp_attr, int *qp_attr_mask);
#endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index c47c2034ca7..3d2e489ab73 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -181,9 +181,16 @@ static void add_ref(struct iw_cm_id *cm_id)
static void rem_ref(struct iw_cm_id *cm_id)
{
struct iwcm_id_private *cm_id_priv;
+ int cb_destroy;
+
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
- if (iwcm_deref_id(cm_id_priv) &&
- test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)) {
+
+ /*
+ * Test bit before deref in case the cm_id gets freed on another
+ * thread.
+ */
+ cb_destroy = test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
+ if (iwcm_deref_id(cm_id_priv) && cb_destroy) {
BUG_ON(!list_empty(&cm_id_priv->work_list));
free_cm_id(cm_id_priv);
}
@@ -327,7 +334,6 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
{
struct iwcm_id_private *cm_id_priv;
unsigned long flags;
- int ret;
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
/*
@@ -343,7 +349,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
cm_id_priv->state = IW_CM_STATE_DESTROYING;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
/* destroy the listening endpoint */
- ret = cm_id->device->iwcm->destroy_listen(cm_id);
+ cm_id->device->iwcm->destroy_listen(cm_id);
spin_lock_irqsave(&cm_id_priv->lock, flags);
break;
case IW_CM_STATE_ESTABLISHED:
diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c
new file mode 100644
index 00000000000..b85ddbc979e
--- /dev/null
+++ b/drivers/infiniband/core/iwpm_msg.c
@@ -0,0 +1,685 @@
+/*
+ * Copyright (c) 2014 Intel Corporation. All rights reserved.
+ * Copyright (c) 2014 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "iwpm_util.h"
+
+static const char iwpm_ulib_name[] = "iWarpPortMapperUser";
+static int iwpm_ulib_version = 3;
+static int iwpm_user_pid = IWPM_PID_UNDEFINED;
+static atomic_t echo_nlmsg_seq;
+
+int iwpm_valid_pid(void)
+{
+ return iwpm_user_pid > 0;
+}
+EXPORT_SYMBOL(iwpm_valid_pid);
+
+/*
+ * iwpm_register_pid - Send a netlink query to user space
+ * for the iwarp port mapper pid
+ *
+ * nlmsg attributes:
+ * [IWPM_NLA_REG_PID_SEQ]
+ * [IWPM_NLA_REG_IF_NAME]
+ * [IWPM_NLA_REG_IBDEV_NAME]
+ * [IWPM_NLA_REG_ULIB_NAME]
+ */
+int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client)
+{
+ struct sk_buff *skb = NULL;
+ struct iwpm_nlmsg_request *nlmsg_request = NULL;
+ struct nlmsghdr *nlh;
+ u32 msg_seq;
+ const char *err_str = "";
+ int ret = -EINVAL;
+
+ if (!iwpm_valid_client(nl_client)) {
+ err_str = "Invalid port mapper client";
+ goto pid_query_error;
+ }
+ if (iwpm_registered_client(nl_client))
+ return 0;
+ skb = iwpm_create_nlmsg(RDMA_NL_IWPM_REG_PID, &nlh, nl_client);
+ if (!skb) {
+ err_str = "Unable to create a nlmsg";
+ goto pid_query_error;
+ }
+ nlh->nlmsg_seq = iwpm_get_nlmsg_seq();
+ nlmsg_request = iwpm_get_nlmsg_request(nlh->nlmsg_seq, nl_client, GFP_KERNEL);
+ if (!nlmsg_request) {
+ err_str = "Unable to allocate netlink request";
+ goto pid_query_error;
+ }
+ msg_seq = atomic_read(&echo_nlmsg_seq);
+
+ /* fill in the pid request message */
+ err_str = "Unable to put attribute of the nlmsg";
+ ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq, IWPM_NLA_REG_PID_SEQ);
+ if (ret)
+ goto pid_query_error;
+ ret = ibnl_put_attr(skb, nlh, IWPM_IFNAME_SIZE,
+ pm_msg->if_name, IWPM_NLA_REG_IF_NAME);
+ if (ret)
+ goto pid_query_error;
+ ret = ibnl_put_attr(skb, nlh, IWPM_DEVNAME_SIZE,
+ pm_msg->dev_name, IWPM_NLA_REG_IBDEV_NAME);
+ if (ret)
+ goto pid_query_error;
+ ret = ibnl_put_attr(skb, nlh, IWPM_ULIBNAME_SIZE,
+ (char *)iwpm_ulib_name, IWPM_NLA_REG_ULIB_NAME);
+ if (ret)
+ goto pid_query_error;
+
+ pr_debug("%s: Multicasting a nlmsg (dev = %s ifname = %s iwpm = %s)\n",
+ __func__, pm_msg->dev_name, pm_msg->if_name, iwpm_ulib_name);
+
+ ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_IWPM, GFP_KERNEL);
+ if (ret) {
+ skb = NULL; /* skb is freed in the netlink send-op handling */
+ iwpm_set_registered(nl_client, 1);
+ iwpm_user_pid = IWPM_PID_UNAVAILABLE;
+ err_str = "Unable to send a nlmsg";
+ goto pid_query_error;
+ }
+ nlmsg_request->req_buffer = pm_msg;
+ ret = iwpm_wait_complete_req(nlmsg_request);
+ return ret;
+pid_query_error:
+ pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
+ if (skb)
+ dev_kfree_skb(skb);
+ if (nlmsg_request)
+ iwpm_free_nlmsg_request(&nlmsg_request->kref);
+ return ret;
+}
+EXPORT_SYMBOL(iwpm_register_pid);
+
+/*
+ * iwpm_add_mapping - Send a netlink add mapping message
+ * to the port mapper
+ * nlmsg attributes:
+ * [IWPM_NLA_MANAGE_MAPPING_SEQ]
+ * [IWPM_NLA_MANAGE_ADDR]
+ */
+int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
+{
+ struct sk_buff *skb = NULL;
+ struct iwpm_nlmsg_request *nlmsg_request = NULL;
+ struct nlmsghdr *nlh;
+ u32 msg_seq;
+ const char *err_str = "";
+ int ret = -EINVAL;
+
+ if (!iwpm_valid_client(nl_client)) {
+ err_str = "Invalid port mapper client";
+ goto add_mapping_error;
+ }
+ if (!iwpm_registered_client(nl_client)) {
+ err_str = "Unregistered port mapper client";
+ goto add_mapping_error;
+ }
+ if (!iwpm_valid_pid())
+ return 0;
+ skb = iwpm_create_nlmsg(RDMA_NL_IWPM_ADD_MAPPING, &nlh, nl_client);
+ if (!skb) {
+ err_str = "Unable to create a nlmsg";
+ goto add_mapping_error;
+ }
+ nlh->nlmsg_seq = iwpm_get_nlmsg_seq();
+ nlmsg_request = iwpm_get_nlmsg_request(nlh->nlmsg_seq, nl_client, GFP_KERNEL);
+ if (!nlmsg_request) {
+ err_str = "Unable to allocate netlink request";
+ goto add_mapping_error;
+ }
+ msg_seq = atomic_read(&echo_nlmsg_seq);
+ /* fill in the add mapping message */
+ err_str = "Unable to put attribute of the nlmsg";
+ ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq,
+ IWPM_NLA_MANAGE_MAPPING_SEQ);
+ if (ret)
+ goto add_mapping_error;
+ ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage),
+ &pm_msg->loc_addr, IWPM_NLA_MANAGE_ADDR);
+ if (ret)
+ goto add_mapping_error;
+ nlmsg_request->req_buffer = pm_msg;
+
+ ret = ibnl_unicast(skb, nlh, iwpm_user_pid);
+ if (ret) {
+ skb = NULL; /* skb is freed in the netlink send-op handling */
+ iwpm_user_pid = IWPM_PID_UNDEFINED;
+ err_str = "Unable to send a nlmsg";
+ goto add_mapping_error;
+ }
+ ret = iwpm_wait_complete_req(nlmsg_request);
+ return ret;
+add_mapping_error:
+ pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
+ if (skb)
+ dev_kfree_skb(skb);
+ if (nlmsg_request)
+ iwpm_free_nlmsg_request(&nlmsg_request->kref);
+ return ret;
+}
+EXPORT_SYMBOL(iwpm_add_mapping);
+
+/*
+ * iwpm_add_and_query_mapping - Send a netlink add and query
+ * mapping message to the port mapper
+ * nlmsg attributes:
+ * [IWPM_NLA_QUERY_MAPPING_SEQ]
+ * [IWPM_NLA_QUERY_LOCAL_ADDR]
+ * [IWPM_NLA_QUERY_REMOTE_ADDR]
+ */
+int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
+{
+ struct sk_buff *skb = NULL;
+ struct iwpm_nlmsg_request *nlmsg_request = NULL;
+ struct nlmsghdr *nlh;
+ u32 msg_seq;
+ const char *err_str = "";
+ int ret = -EINVAL;
+
+ if (!iwpm_valid_client(nl_client)) {
+ err_str = "Invalid port mapper client";
+ goto query_mapping_error;
+ }
+ if (!iwpm_registered_client(nl_client)) {
+ err_str = "Unregistered port mapper client";
+ goto query_mapping_error;
+ }
+ if (!iwpm_valid_pid())
+ return 0;
+ ret = -ENOMEM;
+ skb = iwpm_create_nlmsg(RDMA_NL_IWPM_QUERY_MAPPING, &nlh, nl_client);
+ if (!skb) {
+ err_str = "Unable to create a nlmsg";
+ goto query_mapping_error;
+ }
+ nlh->nlmsg_seq = iwpm_get_nlmsg_seq();
+ nlmsg_request = iwpm_get_nlmsg_request(nlh->nlmsg_seq,
+ nl_client, GFP_KERNEL);
+ if (!nlmsg_request) {
+ err_str = "Unable to allocate netlink request";
+ goto query_mapping_error;
+ }
+ msg_seq = atomic_read(&echo_nlmsg_seq);
+
+ /* fill in the query message */
+ err_str = "Unable to put attribute of the nlmsg";
+ ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq,
+ IWPM_NLA_QUERY_MAPPING_SEQ);
+ if (ret)
+ goto query_mapping_error;
+ ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage),
+ &pm_msg->loc_addr, IWPM_NLA_QUERY_LOCAL_ADDR);
+ if (ret)
+ goto query_mapping_error;
+ ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage),
+ &pm_msg->rem_addr, IWPM_NLA_QUERY_REMOTE_ADDR);
+ if (ret)
+ goto query_mapping_error;
+ nlmsg_request->req_buffer = pm_msg;
+
+ ret = ibnl_unicast(skb, nlh, iwpm_user_pid);
+ if (ret) {
+ skb = NULL; /* skb is freed in the netlink send-op handling */
+ err_str = "Unable to send a nlmsg";
+ goto query_mapping_error;
+ }
+ ret = iwpm_wait_complete_req(nlmsg_request);
+ return ret;
+query_mapping_error:
+ pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
+ if (skb)
+ dev_kfree_skb(skb);
+ if (nlmsg_request)
+ iwpm_free_nlmsg_request(&nlmsg_request->kref);
+ return ret;
+}
+EXPORT_SYMBOL(iwpm_add_and_query_mapping);
+
+/*
+ * iwpm_remove_mapping - Send a netlink remove mapping message
+ * to the port mapper
+ * nlmsg attributes:
+ * [IWPM_NLA_MANAGE_MAPPING_SEQ]
+ * [IWPM_NLA_MANAGE_ADDR]
+ */
+int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client)
+{
+ struct sk_buff *skb = NULL;
+ struct nlmsghdr *nlh;
+ u32 msg_seq;
+ const char *err_str = "";
+ int ret = -EINVAL;
+
+ if (!iwpm_valid_client(nl_client)) {
+ err_str = "Invalid port mapper client";
+ goto remove_mapping_error;
+ }
+ if (!iwpm_registered_client(nl_client)) {
+ err_str = "Unregistered port mapper client";
+ goto remove_mapping_error;
+ }
+ if (!iwpm_valid_pid())
+ return 0;
+ skb = iwpm_create_nlmsg(RDMA_NL_IWPM_REMOVE_MAPPING, &nlh, nl_client);
+ if (!skb) {
+ ret = -ENOMEM;
+ err_str = "Unable to create a nlmsg";
+ goto remove_mapping_error;
+ }
+ msg_seq = atomic_read(&echo_nlmsg_seq);
+ nlh->nlmsg_seq = iwpm_get_nlmsg_seq();
+ err_str = "Unable to put attribute of the nlmsg";
+ ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq,
+ IWPM_NLA_MANAGE_MAPPING_SEQ);
+ if (ret)
+ goto remove_mapping_error;
+ ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage),
+ local_addr, IWPM_NLA_MANAGE_ADDR);
+ if (ret)
+ goto remove_mapping_error;
+
+ ret = ibnl_unicast(skb, nlh, iwpm_user_pid);
+ if (ret) {
+ skb = NULL; /* skb is freed in the netlink send-op handling */
+ iwpm_user_pid = IWPM_PID_UNDEFINED;
+ err_str = "Unable to send a nlmsg";
+ goto remove_mapping_error;
+ }
+ iwpm_print_sockaddr(local_addr,
+ "remove_mapping: Local sockaddr:");
+ return 0;
+remove_mapping_error:
+ pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
+ if (skb)
+ dev_kfree_skb_any(skb);
+ return ret;
+}
+EXPORT_SYMBOL(iwpm_remove_mapping);
+
+/* netlink attribute policy for the received response to register pid request */
+static const struct nla_policy resp_reg_policy[IWPM_NLA_RREG_PID_MAX] = {
+ [IWPM_NLA_RREG_PID_SEQ] = { .type = NLA_U32 },
+ [IWPM_NLA_RREG_IBDEV_NAME] = { .type = NLA_STRING,
+ .len = IWPM_DEVNAME_SIZE - 1 },
+ [IWPM_NLA_RREG_ULIB_NAME] = { .type = NLA_STRING,
+ .len = IWPM_ULIBNAME_SIZE - 1 },
+ [IWPM_NLA_RREG_ULIB_VER] = { .type = NLA_U16 },
+ [IWPM_NLA_RREG_PID_ERR] = { .type = NLA_U16 }
+};
+
+/*
+ * iwpm_register_pid_cb - Process a port mapper response to
+ * iwpm_register_pid()
+ */
+int iwpm_register_pid_cb(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct iwpm_nlmsg_request *nlmsg_request = NULL;
+ struct nlattr *nltb[IWPM_NLA_RREG_PID_MAX];
+ struct iwpm_dev_data *pm_msg;
+ char *dev_name, *iwpm_name;
+ u32 msg_seq;
+ u8 nl_client;
+ u16 iwpm_version;
+ const char *msg_type = "Register Pid response";
+
+ if (iwpm_parse_nlmsg(cb, IWPM_NLA_RREG_PID_MAX,
+ resp_reg_policy, nltb, msg_type))
+ return -EINVAL;
+
+ msg_seq = nla_get_u32(nltb[IWPM_NLA_RREG_PID_SEQ]);
+ nlmsg_request = iwpm_find_nlmsg_request(msg_seq);
+ if (!nlmsg_request) {
+ pr_info("%s: Could not find a matching request (seq = %u)\n",
+ __func__, msg_seq);
+ return -EINVAL;
+ }
+ pm_msg = nlmsg_request->req_buffer;
+ nl_client = nlmsg_request->nl_client;
+ dev_name = (char *)nla_data(nltb[IWPM_NLA_RREG_IBDEV_NAME]);
+ iwpm_name = (char *)nla_data(nltb[IWPM_NLA_RREG_ULIB_NAME]);
+ iwpm_version = nla_get_u16(nltb[IWPM_NLA_RREG_ULIB_VER]);
+
+ /* check device name, ulib name and version */
+ if (strcmp(pm_msg->dev_name, dev_name) ||
+ strcmp(iwpm_ulib_name, iwpm_name) ||
+ iwpm_version != iwpm_ulib_version) {
+
+ pr_info("%s: Incorrect info (dev = %s name = %s version = %d)\n",
+ __func__, dev_name, iwpm_name, iwpm_version);
+ nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
+ goto register_pid_response_exit;
+ }
+ iwpm_user_pid = cb->nlh->nlmsg_pid;
+ atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
+ pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n",
+ __func__, iwpm_user_pid);
+ if (iwpm_valid_client(nl_client))
+ iwpm_set_registered(nl_client, 1);
+register_pid_response_exit:
+ nlmsg_request->request_done = 1;
+ /* always for found nlmsg_request */
+ kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
+ barrier();
+ wake_up(&nlmsg_request->waitq);
+ return 0;
+}
+EXPORT_SYMBOL(iwpm_register_pid_cb);
+
+/* netlink attribute policy for the received response to add mapping request */
+static const struct nla_policy resp_add_policy[IWPM_NLA_RMANAGE_MAPPING_MAX] = {
+ [IWPM_NLA_MANAGE_MAPPING_SEQ] = { .type = NLA_U32 },
+ [IWPM_NLA_MANAGE_ADDR] = { .len = sizeof(struct sockaddr_storage) },
+ [IWPM_NLA_MANAGE_MAPPED_LOC_ADDR] = { .len = sizeof(struct sockaddr_storage) },
+ [IWPM_NLA_RMANAGE_MAPPING_ERR] = { .type = NLA_U16 }
+};
+
+/*
+ * iwpm_add_mapping_cb - Process a port mapper response to
+ * iwpm_add_mapping()
+ */
+int iwpm_add_mapping_cb(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct iwpm_sa_data *pm_msg;
+ struct iwpm_nlmsg_request *nlmsg_request = NULL;
+ struct nlattr *nltb[IWPM_NLA_RMANAGE_MAPPING_MAX];
+ struct sockaddr_storage *local_sockaddr;
+ struct sockaddr_storage *mapped_sockaddr;
+ const char *msg_type;
+ u32 msg_seq;
+
+ msg_type = "Add Mapping response";
+ if (iwpm_parse_nlmsg(cb, IWPM_NLA_RMANAGE_MAPPING_MAX,
+ resp_add_policy, nltb, msg_type))
+ return -EINVAL;
+
+ atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
+
+ msg_seq = nla_get_u32(nltb[IWPM_NLA_MANAGE_MAPPING_SEQ]);
+ nlmsg_request = iwpm_find_nlmsg_request(msg_seq);
+ if (!nlmsg_request) {
+ pr_info("%s: Could not find a matching request (seq = %u)\n",
+ __func__, msg_seq);
+ return -EINVAL;
+ }
+ pm_msg = nlmsg_request->req_buffer;
+ local_sockaddr = (struct sockaddr_storage *)
+ nla_data(nltb[IWPM_NLA_MANAGE_ADDR]);
+ mapped_sockaddr = (struct sockaddr_storage *)
+ nla_data(nltb[IWPM_NLA_MANAGE_MAPPED_LOC_ADDR]);
+
+ if (iwpm_compare_sockaddr(local_sockaddr, &pm_msg->loc_addr)) {
+ nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
+ goto add_mapping_response_exit;
+ }
+ if (mapped_sockaddr->ss_family != local_sockaddr->ss_family) {
+ pr_info("%s: Sockaddr family doesn't match the requested one\n",
+ __func__);
+ nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
+ goto add_mapping_response_exit;
+ }
+ memcpy(&pm_msg->mapped_loc_addr, mapped_sockaddr,
+ sizeof(*mapped_sockaddr));
+ iwpm_print_sockaddr(&pm_msg->loc_addr,
+ "add_mapping: Local sockaddr:");
+ iwpm_print_sockaddr(&pm_msg->mapped_loc_addr,
+ "add_mapping: Mapped local sockaddr:");
+
+add_mapping_response_exit:
+ nlmsg_request->request_done = 1;
+ /* always for found request */
+ kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
+ barrier();
+ wake_up(&nlmsg_request->waitq);
+ return 0;
+}
+EXPORT_SYMBOL(iwpm_add_mapping_cb);
+
+/* netlink attribute policy for the response to add and query mapping request */
+static const struct nla_policy resp_query_policy[IWPM_NLA_RQUERY_MAPPING_MAX] = {
+ [IWPM_NLA_QUERY_MAPPING_SEQ] = { .type = NLA_U32 },
+ [IWPM_NLA_QUERY_LOCAL_ADDR] = { .len = sizeof(struct sockaddr_storage) },
+ [IWPM_NLA_QUERY_REMOTE_ADDR] = { .len = sizeof(struct sockaddr_storage) },
+ [IWPM_NLA_RQUERY_MAPPED_LOC_ADDR] = { .len = sizeof(struct sockaddr_storage) },
+ [IWPM_NLA_RQUERY_MAPPED_REM_ADDR] = { .len = sizeof(struct sockaddr_storage) },
+ [IWPM_NLA_RQUERY_MAPPING_ERR] = { .type = NLA_U16 }
+};
+
+/*
+ * iwpm_add_and_query_mapping_cb - Process a port mapper response to
+ * iwpm_add_and_query_mapping()
+ */
+int iwpm_add_and_query_mapping_cb(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct iwpm_sa_data *pm_msg;
+ struct iwpm_nlmsg_request *nlmsg_request = NULL;
+ struct nlattr *nltb[IWPM_NLA_RQUERY_MAPPING_MAX];
+ struct sockaddr_storage *local_sockaddr, *remote_sockaddr;
+ struct sockaddr_storage *mapped_loc_sockaddr, *mapped_rem_sockaddr;
+ const char *msg_type;
+ u32 msg_seq;
+ u16 err_code;
+
+ msg_type = "Query Mapping response";
+ if (iwpm_parse_nlmsg(cb, IWPM_NLA_RQUERY_MAPPING_MAX,
+ resp_query_policy, nltb, msg_type))
+ return -EINVAL;
+ atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
+
+ msg_seq = nla_get_u32(nltb[IWPM_NLA_QUERY_MAPPING_SEQ]);
+ nlmsg_request = iwpm_find_nlmsg_request(msg_seq);
+ if (!nlmsg_request) {
+ pr_info("%s: Could not find a matching request (seq = %u)\n",
+ __func__, msg_seq);
+ return -EINVAL;
+ }
+ pm_msg = nlmsg_request->req_buffer;
+ local_sockaddr = (struct sockaddr_storage *)
+ nla_data(nltb[IWPM_NLA_QUERY_LOCAL_ADDR]);
+ remote_sockaddr = (struct sockaddr_storage *)
+ nla_data(nltb[IWPM_NLA_QUERY_REMOTE_ADDR]);
+ mapped_loc_sockaddr = (struct sockaddr_storage *)
+ nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_LOC_ADDR]);
+ mapped_rem_sockaddr = (struct sockaddr_storage *)
+ nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_REM_ADDR]);
+
+ err_code = nla_get_u16(nltb[IWPM_NLA_RQUERY_MAPPING_ERR]);
+ if (err_code == IWPM_REMOTE_QUERY_REJECT) {
+ pr_info("%s: Received a Reject (pid = %u, echo seq = %u)\n",
+ __func__, cb->nlh->nlmsg_pid, msg_seq);
+ nlmsg_request->err_code = IWPM_REMOTE_QUERY_REJECT;
+ }
+ if (iwpm_compare_sockaddr(local_sockaddr, &pm_msg->loc_addr) ||
+ iwpm_compare_sockaddr(remote_sockaddr, &pm_msg->rem_addr)) {
+ pr_info("%s: Incorrect local sockaddr\n", __func__);
+ nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
+ goto query_mapping_response_exit;
+ }
+ if (mapped_loc_sockaddr->ss_family != local_sockaddr->ss_family ||
+ mapped_rem_sockaddr->ss_family != remote_sockaddr->ss_family) {
+ pr_info("%s: Sockaddr family doesn't match the requested one\n",
+ __func__);
+ nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
+ goto query_mapping_response_exit;
+ }
+ memcpy(&pm_msg->mapped_loc_addr, mapped_loc_sockaddr,
+ sizeof(*mapped_loc_sockaddr));
+ memcpy(&pm_msg->mapped_rem_addr, mapped_rem_sockaddr,
+ sizeof(*mapped_rem_sockaddr));
+
+ iwpm_print_sockaddr(&pm_msg->loc_addr,
+ "query_mapping: Local sockaddr:");
+ iwpm_print_sockaddr(&pm_msg->mapped_loc_addr,
+ "query_mapping: Mapped local sockaddr:");
+ iwpm_print_sockaddr(&pm_msg->rem_addr,
+ "query_mapping: Remote sockaddr:");
+ iwpm_print_sockaddr(&pm_msg->mapped_rem_addr,
+ "query_mapping: Mapped remote sockaddr:");
+query_mapping_response_exit:
+ nlmsg_request->request_done = 1;
+ /* always for found request */
+ kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
+ barrier();
+ wake_up(&nlmsg_request->waitq);
+ return 0;
+}
+EXPORT_SYMBOL(iwpm_add_and_query_mapping_cb);
+
+/* netlink attribute policy for the received request for mapping info */
+static const struct nla_policy resp_mapinfo_policy[IWPM_NLA_MAPINFO_REQ_MAX] = {
+ [IWPM_NLA_MAPINFO_ULIB_NAME] = { .type = NLA_STRING,
+ .len = IWPM_ULIBNAME_SIZE - 1 },
+ [IWPM_NLA_MAPINFO_ULIB_VER] = { .type = NLA_U16 }
+};
+
+/*
+ * iwpm_mapping_info_cb - Process a port mapper request for mapping info
+ */
+int iwpm_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct nlattr *nltb[IWPM_NLA_MAPINFO_REQ_MAX];
+ const char *msg_type = "Mapping Info response";
+ int iwpm_pid;
+ u8 nl_client;
+ char *iwpm_name;
+ u16 iwpm_version;
+ int ret = -EINVAL;
+
+ if (iwpm_parse_nlmsg(cb, IWPM_NLA_MAPINFO_REQ_MAX,
+ resp_mapinfo_policy, nltb, msg_type)) {
+ pr_info("%s: Unable to parse nlmsg\n", __func__);
+ return ret;
+ }
+ iwpm_name = (char *)nla_data(nltb[IWPM_NLA_MAPINFO_ULIB_NAME]);
+ iwpm_version = nla_get_u16(nltb[IWPM_NLA_MAPINFO_ULIB_VER]);
+ if (strcmp(iwpm_ulib_name, iwpm_name) ||
+ iwpm_version != iwpm_ulib_version) {
+ pr_info("%s: Invalid port mapper name = %s version = %d\n",
+ __func__, iwpm_name, iwpm_version);
+ return ret;
+ }
+ nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type);
+ if (!iwpm_valid_client(nl_client)) {
+ pr_info("%s: Invalid port mapper client = %d\n",
+ __func__, nl_client);
+ return ret;
+ }
+ iwpm_set_registered(nl_client, 0);
+ atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
+ if (!iwpm_mapinfo_available())
+ return 0;
+ iwpm_pid = cb->nlh->nlmsg_pid;
+ pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n",
+ __func__, iwpm_pid);
+ ret = iwpm_send_mapinfo(nl_client, iwpm_pid);
+ return ret;
+}
+EXPORT_SYMBOL(iwpm_mapping_info_cb);
+
+/* netlink attribute policy for the received mapping info ack */
+static const struct nla_policy ack_mapinfo_policy[IWPM_NLA_MAPINFO_NUM_MAX] = {
+ [IWPM_NLA_MAPINFO_SEQ] = { .type = NLA_U32 },
+ [IWPM_NLA_MAPINFO_SEND_NUM] = { .type = NLA_U32 },
+ [IWPM_NLA_MAPINFO_ACK_NUM] = { .type = NLA_U32 }
+};
+
+/*
+ * iwpm_ack_mapping_info_cb - Process a port mapper ack for
+ * the provided mapping info records
+ */
+int iwpm_ack_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct nlattr *nltb[IWPM_NLA_MAPINFO_NUM_MAX];
+ u32 mapinfo_send, mapinfo_ack;
+ const char *msg_type = "Mapping Info Ack";
+
+ if (iwpm_parse_nlmsg(cb, IWPM_NLA_MAPINFO_NUM_MAX,
+ ack_mapinfo_policy, nltb, msg_type))
+ return -EINVAL;
+ mapinfo_send = nla_get_u32(nltb[IWPM_NLA_MAPINFO_SEND_NUM]);
+ mapinfo_ack = nla_get_u32(nltb[IWPM_NLA_MAPINFO_ACK_NUM]);
+ if (mapinfo_ack != mapinfo_send)
+ pr_info("%s: Invalid mapinfo number (sent = %u ack-ed = %u)\n",
+ __func__, mapinfo_send, mapinfo_ack);
+ atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
+ return 0;
+}
+EXPORT_SYMBOL(iwpm_ack_mapping_info_cb);
+
+/* netlink attribute policy for the received port mapper error message */
+static const struct nla_policy map_error_policy[IWPM_NLA_ERR_MAX] = {
+ [IWPM_NLA_ERR_SEQ] = { .type = NLA_U32 },
+ [IWPM_NLA_ERR_CODE] = { .type = NLA_U16 },
+};
+
+/*
+ * iwpm_mapping_error_cb - Process a port mapper error message
+ */
+int iwpm_mapping_error_cb(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct iwpm_nlmsg_request *nlmsg_request = NULL;
+ int nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type);
+ struct nlattr *nltb[IWPM_NLA_ERR_MAX];
+ u32 msg_seq;
+ u16 err_code;
+ const char *msg_type = "Mapping Error Msg";
+
+ if (iwpm_parse_nlmsg(cb, IWPM_NLA_ERR_MAX,
+ map_error_policy, nltb, msg_type))
+ return -EINVAL;
+
+ msg_seq = nla_get_u32(nltb[IWPM_NLA_ERR_SEQ]);
+ err_code = nla_get_u16(nltb[IWPM_NLA_ERR_CODE]);
+ pr_info("%s: Received msg seq = %u err code = %u client = %d\n",
+ __func__, msg_seq, err_code, nl_client);
+ /* look for nlmsg_request */
+ nlmsg_request = iwpm_find_nlmsg_request(msg_seq);
+ if (!nlmsg_request) {
+ /* not all errors have associated requests */
+ pr_debug("Could not find matching req (seq = %u)\n", msg_seq);
+ return 0;
+ }
+ atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
+ nlmsg_request->err_code = err_code;
+ nlmsg_request->request_done = 1;
+ /* always for found request */
+ kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
+ barrier();
+ wake_up(&nlmsg_request->waitq);
+ return 0;
+}
+EXPORT_SYMBOL(iwpm_mapping_error_cb);
diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c
new file mode 100644
index 00000000000..69e9f84c160
--- /dev/null
+++ b/drivers/infiniband/core/iwpm_util.c
@@ -0,0 +1,607 @@
+/*
+ * Copyright (c) 2014 Chelsio, Inc. All rights reserved.
+ * Copyright (c) 2014 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "iwpm_util.h"
+
+#define IWPM_HASH_BUCKET_SIZE 512
+#define IWPM_HASH_BUCKET_MASK (IWPM_HASH_BUCKET_SIZE - 1)
+
+static LIST_HEAD(iwpm_nlmsg_req_list);
+static DEFINE_SPINLOCK(iwpm_nlmsg_req_lock);
+
+static struct hlist_head *iwpm_hash_bucket;
+static DEFINE_SPINLOCK(iwpm_mapinfo_lock);
+
+static DEFINE_MUTEX(iwpm_admin_lock);
+static struct iwpm_admin_data iwpm_admin;
+
+int iwpm_init(u8 nl_client)
+{
+ if (iwpm_valid_client(nl_client))
+ return -EINVAL;
+ mutex_lock(&iwpm_admin_lock);
+ if (atomic_read(&iwpm_admin.refcount) == 0) {
+ iwpm_hash_bucket = kzalloc(IWPM_HASH_BUCKET_SIZE *
+ sizeof(struct hlist_head), GFP_KERNEL);
+ if (!iwpm_hash_bucket) {
+ mutex_unlock(&iwpm_admin_lock);
+ pr_err("%s Unable to create mapinfo hash table\n", __func__);
+ return -ENOMEM;
+ }
+ }
+ atomic_inc(&iwpm_admin.refcount);
+ mutex_unlock(&iwpm_admin_lock);
+ iwpm_set_valid(nl_client, 1);
+ return 0;
+}
+EXPORT_SYMBOL(iwpm_init);
+
+static void free_hash_bucket(void);
+
+int iwpm_exit(u8 nl_client)
+{
+
+ if (!iwpm_valid_client(nl_client))
+ return -EINVAL;
+ mutex_lock(&iwpm_admin_lock);
+ if (atomic_read(&iwpm_admin.refcount) == 0) {
+ mutex_unlock(&iwpm_admin_lock);
+ pr_err("%s Incorrect usage - negative refcount\n", __func__);
+ return -EINVAL;
+ }
+ if (atomic_dec_and_test(&iwpm_admin.refcount)) {
+ free_hash_bucket();
+ pr_debug("%s: Mapinfo hash table is destroyed\n", __func__);
+ }
+ mutex_unlock(&iwpm_admin_lock);
+ iwpm_set_valid(nl_client, 0);
+ return 0;
+}
+EXPORT_SYMBOL(iwpm_exit);
+
+static struct hlist_head *get_hash_bucket_head(struct sockaddr_storage *,
+ struct sockaddr_storage *);
+
+int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
+ struct sockaddr_storage *mapped_sockaddr,
+ u8 nl_client)
+{
+ struct hlist_head *hash_bucket_head;
+ struct iwpm_mapping_info *map_info;
+ unsigned long flags;
+
+ if (!iwpm_valid_client(nl_client))
+ return -EINVAL;
+ map_info = kzalloc(sizeof(struct iwpm_mapping_info), GFP_KERNEL);
+ if (!map_info) {
+ pr_err("%s: Unable to allocate a mapping info\n", __func__);
+ return -ENOMEM;
+ }
+ memcpy(&map_info->local_sockaddr, local_sockaddr,
+ sizeof(struct sockaddr_storage));
+ memcpy(&map_info->mapped_sockaddr, mapped_sockaddr,
+ sizeof(struct sockaddr_storage));
+ map_info->nl_client = nl_client;
+
+ spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
+ if (iwpm_hash_bucket) {
+ hash_bucket_head = get_hash_bucket_head(
+ &map_info->local_sockaddr,
+ &map_info->mapped_sockaddr);
+ hlist_add_head(&map_info->hlist_node, hash_bucket_head);
+ }
+ spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
+ return 0;
+}
+EXPORT_SYMBOL(iwpm_create_mapinfo);
+
+int iwpm_remove_mapinfo(struct sockaddr_storage *local_sockaddr,
+ struct sockaddr_storage *mapped_local_addr)
+{
+ struct hlist_node *tmp_hlist_node;
+ struct hlist_head *hash_bucket_head;
+ struct iwpm_mapping_info *map_info = NULL;
+ unsigned long flags;
+ int ret = -EINVAL;
+
+ spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
+ if (iwpm_hash_bucket) {
+ hash_bucket_head = get_hash_bucket_head(
+ local_sockaddr,
+ mapped_local_addr);
+ hlist_for_each_entry_safe(map_info, tmp_hlist_node,
+ hash_bucket_head, hlist_node) {
+
+ if (!iwpm_compare_sockaddr(&map_info->mapped_sockaddr,
+ mapped_local_addr)) {
+
+ hlist_del_init(&map_info->hlist_node);
+ kfree(map_info);
+ ret = 0;
+ break;
+ }
+ }
+ }
+ spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
+ return ret;
+}
+EXPORT_SYMBOL(iwpm_remove_mapinfo);
+
+static void free_hash_bucket(void)
+{
+ struct hlist_node *tmp_hlist_node;
+ struct iwpm_mapping_info *map_info;
+ unsigned long flags;
+ int i;
+
+ /* remove all the mapinfo data from the list */
+ spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
+ for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) {
+ hlist_for_each_entry_safe(map_info, tmp_hlist_node,
+ &iwpm_hash_bucket[i], hlist_node) {
+
+ hlist_del_init(&map_info->hlist_node);
+ kfree(map_info);
+ }
+ }
+ /* free the hash list */
+ kfree(iwpm_hash_bucket);
+ iwpm_hash_bucket = NULL;
+ spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
+}
+
+struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq,
+ u8 nl_client, gfp_t gfp)
+{
+ struct iwpm_nlmsg_request *nlmsg_request = NULL;
+ unsigned long flags;
+
+ nlmsg_request = kzalloc(sizeof(struct iwpm_nlmsg_request), gfp);
+ if (!nlmsg_request) {
+ pr_err("%s Unable to allocate a nlmsg_request\n", __func__);
+ return NULL;
+ }
+ spin_lock_irqsave(&iwpm_nlmsg_req_lock, flags);
+ list_add_tail(&nlmsg_request->inprocess_list, &iwpm_nlmsg_req_list);
+ spin_unlock_irqrestore(&iwpm_nlmsg_req_lock, flags);
+
+ kref_init(&nlmsg_request->kref);
+ kref_get(&nlmsg_request->kref);
+ nlmsg_request->nlmsg_seq = nlmsg_seq;
+ nlmsg_request->nl_client = nl_client;
+ nlmsg_request->request_done = 0;
+ nlmsg_request->err_code = 0;
+ return nlmsg_request;
+}
+
+void iwpm_free_nlmsg_request(struct kref *kref)
+{
+ struct iwpm_nlmsg_request *nlmsg_request;
+ unsigned long flags;
+
+ nlmsg_request = container_of(kref, struct iwpm_nlmsg_request, kref);
+
+ spin_lock_irqsave(&iwpm_nlmsg_req_lock, flags);
+ list_del_init(&nlmsg_request->inprocess_list);
+ spin_unlock_irqrestore(&iwpm_nlmsg_req_lock, flags);
+
+ if (!nlmsg_request->request_done)
+ pr_debug("%s Freeing incomplete nlmsg request (seq = %u).\n",
+ __func__, nlmsg_request->nlmsg_seq);
+ kfree(nlmsg_request);
+}
+
+struct iwpm_nlmsg_request *iwpm_find_nlmsg_request(__u32 echo_seq)
+{
+ struct iwpm_nlmsg_request *nlmsg_request;
+ struct iwpm_nlmsg_request *found_request = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&iwpm_nlmsg_req_lock, flags);
+ list_for_each_entry(nlmsg_request, &iwpm_nlmsg_req_list,
+ inprocess_list) {
+ if (nlmsg_request->nlmsg_seq == echo_seq) {
+ found_request = nlmsg_request;
+ kref_get(&nlmsg_request->kref);
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&iwpm_nlmsg_req_lock, flags);
+ return found_request;
+}
+
+int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request)
+{
+ int ret;
+ init_waitqueue_head(&nlmsg_request->waitq);
+
+ ret = wait_event_timeout(nlmsg_request->waitq,
+ (nlmsg_request->request_done != 0), IWPM_NL_TIMEOUT);
+ if (!ret) {
+ ret = -EINVAL;
+ pr_info("%s: Timeout %d sec for netlink request (seq = %u)\n",
+ __func__, (IWPM_NL_TIMEOUT/HZ), nlmsg_request->nlmsg_seq);
+ } else {
+ ret = nlmsg_request->err_code;
+ }
+ kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
+ return ret;
+}
+
+int iwpm_get_nlmsg_seq(void)
+{
+ return atomic_inc_return(&iwpm_admin.nlmsg_seq);
+}
+
+int iwpm_valid_client(u8 nl_client)
+{
+ if (nl_client >= RDMA_NL_NUM_CLIENTS)
+ return 0;
+ return iwpm_admin.client_list[nl_client];
+}
+
+void iwpm_set_valid(u8 nl_client, int valid)
+{
+ if (nl_client >= RDMA_NL_NUM_CLIENTS)
+ return;
+ iwpm_admin.client_list[nl_client] = valid;
+}
+
+/* valid client */
+int iwpm_registered_client(u8 nl_client)
+{
+ return iwpm_admin.reg_list[nl_client];
+}
+
+/* valid client */
+void iwpm_set_registered(u8 nl_client, int reg)
+{
+ iwpm_admin.reg_list[nl_client] = reg;
+}
+
+int iwpm_compare_sockaddr(struct sockaddr_storage *a_sockaddr,
+ struct sockaddr_storage *b_sockaddr)
+{
+ if (a_sockaddr->ss_family != b_sockaddr->ss_family)
+ return 1;
+ if (a_sockaddr->ss_family == AF_INET) {
+ struct sockaddr_in *a4_sockaddr =
+ (struct sockaddr_in *)a_sockaddr;
+ struct sockaddr_in *b4_sockaddr =
+ (struct sockaddr_in *)b_sockaddr;
+ if (!memcmp(&a4_sockaddr->sin_addr,
+ &b4_sockaddr->sin_addr, sizeof(struct in_addr))
+ && a4_sockaddr->sin_port == b4_sockaddr->sin_port)
+ return 0;
+
+ } else if (a_sockaddr->ss_family == AF_INET6) {
+ struct sockaddr_in6 *a6_sockaddr =
+ (struct sockaddr_in6 *)a_sockaddr;
+ struct sockaddr_in6 *b6_sockaddr =
+ (struct sockaddr_in6 *)b_sockaddr;
+ if (!memcmp(&a6_sockaddr->sin6_addr,
+ &b6_sockaddr->sin6_addr, sizeof(struct in6_addr))
+ && a6_sockaddr->sin6_port == b6_sockaddr->sin6_port)
+ return 0;
+
+ } else {
+ pr_err("%s: Invalid sockaddr family\n", __func__);
+ }
+ return 1;
+}
+
+struct sk_buff *iwpm_create_nlmsg(u32 nl_op, struct nlmsghdr **nlh,
+ int nl_client)
+{
+ struct sk_buff *skb = NULL;
+
+ skb = dev_alloc_skb(NLMSG_GOODSIZE);
+ if (!skb) {
+ pr_err("%s Unable to allocate skb\n", __func__);
+ goto create_nlmsg_exit;
+ }
+ if (!(ibnl_put_msg(skb, nlh, 0, 0, nl_client, nl_op,
+ NLM_F_REQUEST))) {
+ pr_warn("%s: Unable to put the nlmsg header\n", __func__);
+ dev_kfree_skb(skb);
+ skb = NULL;
+ }
+create_nlmsg_exit:
+ return skb;
+}
+
+int iwpm_parse_nlmsg(struct netlink_callback *cb, int policy_max,
+ const struct nla_policy *nlmsg_policy,
+ struct nlattr *nltb[], const char *msg_type)
+{
+ int nlh_len = 0;
+ int ret;
+ const char *err_str = "";
+
+ ret = nlmsg_validate(cb->nlh, nlh_len, policy_max-1, nlmsg_policy);
+ if (ret) {
+ err_str = "Invalid attribute";
+ goto parse_nlmsg_error;
+ }
+ ret = nlmsg_parse(cb->nlh, nlh_len, nltb, policy_max-1, nlmsg_policy);
+ if (ret) {
+ err_str = "Unable to parse the nlmsg";
+ goto parse_nlmsg_error;
+ }
+ ret = iwpm_validate_nlmsg_attr(nltb, policy_max);
+ if (ret) {
+ err_str = "Invalid NULL attribute";
+ goto parse_nlmsg_error;
+ }
+ return 0;
+parse_nlmsg_error:
+ pr_warn("%s: %s (msg type %s ret = %d)\n",
+ __func__, err_str, msg_type, ret);
+ return ret;
+}
+
+void iwpm_print_sockaddr(struct sockaddr_storage *sockaddr, char *msg)
+{
+ struct sockaddr_in6 *sockaddr_v6;
+ struct sockaddr_in *sockaddr_v4;
+
+ switch (sockaddr->ss_family) {
+ case AF_INET:
+ sockaddr_v4 = (struct sockaddr_in *)sockaddr;
+ pr_debug("%s IPV4 %pI4: %u(0x%04X)\n",
+ msg, &sockaddr_v4->sin_addr,
+ ntohs(sockaddr_v4->sin_port),
+ ntohs(sockaddr_v4->sin_port));
+ break;
+ case AF_INET6:
+ sockaddr_v6 = (struct sockaddr_in6 *)sockaddr;
+ pr_debug("%s IPV6 %pI6: %u(0x%04X)\n",
+ msg, &sockaddr_v6->sin6_addr,
+ ntohs(sockaddr_v6->sin6_port),
+ ntohs(sockaddr_v6->sin6_port));
+ break;
+ default:
+ break;
+ }
+}
+
+static u32 iwpm_ipv6_jhash(struct sockaddr_in6 *ipv6_sockaddr)
+{
+ u32 ipv6_hash = jhash(&ipv6_sockaddr->sin6_addr, sizeof(struct in6_addr), 0);
+ u32 hash = jhash_2words(ipv6_hash, (__force u32) ipv6_sockaddr->sin6_port, 0);
+ return hash;
+}
+
+static u32 iwpm_ipv4_jhash(struct sockaddr_in *ipv4_sockaddr)
+{
+ u32 ipv4_hash = jhash(&ipv4_sockaddr->sin_addr, sizeof(struct in_addr), 0);
+ u32 hash = jhash_2words(ipv4_hash, (__force u32) ipv4_sockaddr->sin_port, 0);
+ return hash;
+}
+
+static struct hlist_head *get_hash_bucket_head(struct sockaddr_storage
+ *local_sockaddr,
+ struct sockaddr_storage
+ *mapped_sockaddr)
+{
+ u32 local_hash, mapped_hash, hash;
+
+ if (local_sockaddr->ss_family == AF_INET) {
+ local_hash = iwpm_ipv4_jhash((struct sockaddr_in *) local_sockaddr);
+ mapped_hash = iwpm_ipv4_jhash((struct sockaddr_in *) mapped_sockaddr);
+
+ } else if (local_sockaddr->ss_family == AF_INET6) {
+ local_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) local_sockaddr);
+ mapped_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) mapped_sockaddr);
+ } else {
+ pr_err("%s: Invalid sockaddr family\n", __func__);
+ return NULL;
+ }
+
+ if (local_hash == mapped_hash) /* if port mapper isn't available */
+ hash = local_hash;
+ else
+ hash = jhash_2words(local_hash, mapped_hash, 0);
+
+ return &iwpm_hash_bucket[hash & IWPM_HASH_BUCKET_MASK];
+}
+
+static int send_mapinfo_num(u32 mapping_num, u8 nl_client, int iwpm_pid)
+{
+ struct sk_buff *skb = NULL;
+ struct nlmsghdr *nlh;
+ u32 msg_seq;
+ const char *err_str = "";
+ int ret = -EINVAL;
+
+ skb = iwpm_create_nlmsg(RDMA_NL_IWPM_MAPINFO_NUM, &nlh, nl_client);
+ if (!skb) {
+ err_str = "Unable to create a nlmsg";
+ goto mapinfo_num_error;
+ }
+ nlh->nlmsg_seq = iwpm_get_nlmsg_seq();
+ msg_seq = 0;
+ err_str = "Unable to put attribute of mapinfo number nlmsg";
+ ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq, IWPM_NLA_MAPINFO_SEQ);
+ if (ret)
+ goto mapinfo_num_error;
+ ret = ibnl_put_attr(skb, nlh, sizeof(u32),
+ &mapping_num, IWPM_NLA_MAPINFO_SEND_NUM);
+ if (ret)
+ goto mapinfo_num_error;
+ ret = ibnl_unicast(skb, nlh, iwpm_pid);
+ if (ret) {
+ skb = NULL;
+ err_str = "Unable to send a nlmsg";
+ goto mapinfo_num_error;
+ }
+ pr_debug("%s: Sent mapping number = %d\n", __func__, mapping_num);
+ return 0;
+mapinfo_num_error:
+ pr_info("%s: %s\n", __func__, err_str);
+ if (skb)
+ dev_kfree_skb(skb);
+ return ret;
+}
+
+static int send_nlmsg_done(struct sk_buff *skb, u8 nl_client, int iwpm_pid)
+{
+ struct nlmsghdr *nlh = NULL;
+ int ret = 0;
+
+ if (!skb)
+ return ret;
+ if (!(ibnl_put_msg(skb, &nlh, 0, 0, nl_client,
+ RDMA_NL_IWPM_MAPINFO, NLM_F_MULTI))) {
+ pr_warn("%s Unable to put NLMSG_DONE\n", __func__);
+ return -ENOMEM;
+ }
+ nlh->nlmsg_type = NLMSG_DONE;
+ ret = ibnl_unicast(skb, (struct nlmsghdr *)skb->data, iwpm_pid);
+ if (ret)
+ pr_warn("%s Unable to send a nlmsg\n", __func__);
+ return ret;
+}
+
+int iwpm_send_mapinfo(u8 nl_client, int iwpm_pid)
+{
+ struct iwpm_mapping_info *map_info;
+ struct sk_buff *skb = NULL;
+ struct nlmsghdr *nlh;
+ int skb_num = 0, mapping_num = 0;
+ int i = 0, nlmsg_bytes = 0;
+ unsigned long flags;
+ const char *err_str = "";
+ int ret;
+
+ skb = dev_alloc_skb(NLMSG_GOODSIZE);
+ if (!skb) {
+ ret = -ENOMEM;
+ err_str = "Unable to allocate skb";
+ goto send_mapping_info_exit;
+ }
+ skb_num++;
+ spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
+ for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) {
+ hlist_for_each_entry(map_info, &iwpm_hash_bucket[i],
+ hlist_node) {
+ if (map_info->nl_client != nl_client)
+ continue;
+ nlh = NULL;
+ if (!(ibnl_put_msg(skb, &nlh, 0, 0, nl_client,
+ RDMA_NL_IWPM_MAPINFO, NLM_F_MULTI))) {
+ ret = -ENOMEM;
+ err_str = "Unable to put the nlmsg header";
+ goto send_mapping_info_unlock;
+ }
+ err_str = "Unable to put attribute of the nlmsg";
+ ret = ibnl_put_attr(skb, nlh,
+ sizeof(struct sockaddr_storage),
+ &map_info->local_sockaddr,
+ IWPM_NLA_MAPINFO_LOCAL_ADDR);
+ if (ret)
+ goto send_mapping_info_unlock;
+
+ ret = ibnl_put_attr(skb, nlh,
+ sizeof(struct sockaddr_storage),
+ &map_info->mapped_sockaddr,
+ IWPM_NLA_MAPINFO_MAPPED_ADDR);
+ if (ret)
+ goto send_mapping_info_unlock;
+
+ iwpm_print_sockaddr(&map_info->local_sockaddr,
+ "send_mapping_info: Local sockaddr:");
+ iwpm_print_sockaddr(&map_info->mapped_sockaddr,
+ "send_mapping_info: Mapped local sockaddr:");
+ mapping_num++;
+ nlmsg_bytes += nlh->nlmsg_len;
+
+ /* check if all mappings can fit in one skb */
+ if (NLMSG_GOODSIZE - nlmsg_bytes < nlh->nlmsg_len * 2) {
+ /* and leave room for NLMSG_DONE */
+ nlmsg_bytes = 0;
+ skb_num++;
+ spin_unlock_irqrestore(&iwpm_mapinfo_lock,
+ flags);
+ /* send the skb */
+ ret = send_nlmsg_done(skb, nl_client, iwpm_pid);
+ skb = NULL;
+ if (ret) {
+ err_str = "Unable to send map info";
+ goto send_mapping_info_exit;
+ }
+ if (skb_num == IWPM_MAPINFO_SKB_COUNT) {
+ ret = -ENOMEM;
+ err_str = "Insufficient skbs for map info";
+ goto send_mapping_info_exit;
+ }
+ skb = dev_alloc_skb(NLMSG_GOODSIZE);
+ if (!skb) {
+ ret = -ENOMEM;
+ err_str = "Unable to allocate skb";
+ goto send_mapping_info_exit;
+ }
+ spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
+ }
+ }
+ }
+send_mapping_info_unlock:
+ spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
+send_mapping_info_exit:
+ if (ret) {
+ pr_warn("%s: %s (ret = %d)\n", __func__, err_str, ret);
+ if (skb)
+ dev_kfree_skb(skb);
+ return ret;
+ }
+ send_nlmsg_done(skb, nl_client, iwpm_pid);
+ return send_mapinfo_num(mapping_num, nl_client, iwpm_pid);
+}
+
+int iwpm_mapinfo_available(void)
+{
+ unsigned long flags;
+ int full_bucket = 0, i = 0;
+
+ spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
+ if (iwpm_hash_bucket) {
+ for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) {
+ if (!hlist_empty(&iwpm_hash_bucket[i])) {
+ full_bucket = 1;
+ break;
+ }
+ }
+ }
+ spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
+ return full_bucket;
+}
diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h
new file mode 100644
index 00000000000..9777c869a14
--- /dev/null
+++ b/drivers/infiniband/core/iwpm_util.h
@@ -0,0 +1,238 @@
+/*
+ * Copyright (c) 2014 Intel Corporation. All rights reserved.
+ * Copyright (c) 2014 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _IWPM_UTIL_H
+#define _IWPM_UTIL_H
+
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/delay.h>
+#include <linux/workqueue.h>
+#include <linux/mutex.h>
+#include <linux/jhash.h>
+#include <linux/kref.h>
+#include <net/netlink.h>
+#include <linux/errno.h>
+#include <rdma/iw_portmap.h>
+#include <rdma/rdma_netlink.h>
+
+
+#define IWPM_NL_RETRANS 3
+#define IWPM_NL_TIMEOUT (10*HZ)
+#define IWPM_MAPINFO_SKB_COUNT 20
+
+#define IWPM_PID_UNDEFINED -1
+#define IWPM_PID_UNAVAILABLE -2
+
+struct iwpm_nlmsg_request {
+ struct list_head inprocess_list;
+ __u32 nlmsg_seq;
+ void *req_buffer;
+ u8 nl_client;
+ u8 request_done;
+ u16 err_code;
+ wait_queue_head_t waitq;
+ struct kref kref;
+};
+
+struct iwpm_mapping_info {
+ struct hlist_node hlist_node;
+ struct sockaddr_storage local_sockaddr;
+ struct sockaddr_storage mapped_sockaddr;
+ u8 nl_client;
+};
+
+struct iwpm_admin_data {
+ atomic_t refcount;
+ atomic_t nlmsg_seq;
+ int client_list[RDMA_NL_NUM_CLIENTS];
+ int reg_list[RDMA_NL_NUM_CLIENTS];
+};
+
+/**
+ * iwpm_get_nlmsg_request - Allocate and initialize netlink message request
+ * @nlmsg_seq: Sequence number of the netlink message
+ * @nl_client: The index of the netlink client
+ * @gfp: Indicates how the memory for the request should be allocated
+ *
+ * Returns the newly allocated netlink request object if successful,
+ * otherwise returns NULL
+ */
+struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq,
+ u8 nl_client, gfp_t gfp);
+
+/**
+ * iwpm_free_nlmsg_request - Deallocate netlink message request
+ * @kref: Holds reference of netlink message request
+ */
+void iwpm_free_nlmsg_request(struct kref *kref);
+
+/**
+ * iwpm_find_nlmsg_request - Find netlink message request in the request list
+ * @echo_seq: Sequence number of the netlink request to find
+ *
+ * Returns the found netlink message request,
+ * if not found, returns NULL
+ */
+struct iwpm_nlmsg_request *iwpm_find_nlmsg_request(__u32 echo_seq);
+
+/**
+ * iwpm_wait_complete_req - Block while servicing the netlink request
+ * @nlmsg_request: Netlink message request to service
+ *
+ * Wakes up, after the request is completed or expired
+ * Returns 0 if the request is complete without error
+ */
+int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request);
+
+/**
+ * iwpm_get_nlmsg_seq - Get the sequence number for a netlink
+ * message to send to the port mapper
+ *
+ * Returns the sequence number for the netlink message.
+ */
+int iwpm_get_nlmsg_seq(void);
+
+/**
+ * iwpm_valid_client - Check if the port mapper client is valid
+ * @nl_client: The index of the netlink client
+ *
+ * Valid clients need to call iwpm_init() before using
+ * the port mapper
+ */
+int iwpm_valid_client(u8 nl_client);
+
+/**
+ * iwpm_set_valid - Set the port mapper client to valid or not
+ * @nl_client: The index of the netlink client
+ * @valid: 1 if valid or 0 if invalid
+ */
+void iwpm_set_valid(u8 nl_client, int valid);
+
+/**
+ * iwpm_registered_client - Check if the port mapper client is registered
+ * @nl_client: The index of the netlink client
+ *
+ * Call iwpm_register_pid() to register a client
+ */
+int iwpm_registered_client(u8 nl_client);
+
+/**
+ * iwpm_set_registered - Set the port mapper client to registered or not
+ * @nl_client: The index of the netlink client
+ * @reg: 1 if registered or 0 if not
+ */
+void iwpm_set_registered(u8 nl_client, int reg);
+
+/**
+ * iwpm_send_mapinfo - Send local and mapped IPv4/IPv6 address info of
+ * a client to the user space port mapper
+ * @nl_client: The index of the netlink client
+ * @iwpm_pid: The pid of the user space port mapper
+ *
+ * If successful, returns the number of sent mapping info records
+ */
+int iwpm_send_mapinfo(u8 nl_client, int iwpm_pid);
+
+/**
+ * iwpm_mapinfo_available - Check if any mapping info records is available
+ * in the hash table
+ *
+ * Returns 1 if mapping information is available, otherwise returns 0
+ */
+int iwpm_mapinfo_available(void);
+
+/**
+ * iwpm_compare_sockaddr - Compare two sockaddr storage structs
+ *
+ * Returns 0 if they are holding the same ip/tcp address info,
+ * otherwise returns 1
+ */
+int iwpm_compare_sockaddr(struct sockaddr_storage *a_sockaddr,
+ struct sockaddr_storage *b_sockaddr);
+
+/**
+ * iwpm_validate_nlmsg_attr - Check for NULL netlink attributes
+ * @nltb: Holds address of each netlink message attributes
+ * @nla_count: Number of netlink message attributes
+ *
+ * Returns error if any of the nla_count attributes is NULL
+ */
+static inline int iwpm_validate_nlmsg_attr(struct nlattr *nltb[],
+ int nla_count)
+{
+ int i;
+ for (i = 1; i < nla_count; i++) {
+ if (!nltb[i])
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/**
+ * iwpm_create_nlmsg - Allocate skb and form a netlink message
+ * @nl_op: Netlink message opcode
+ * @nlh: Holds address of the netlink message header in skb
+ * @nl_client: The index of the netlink client
+ *
+ * Returns the newly allcated skb, or NULL if the tailroom of the skb
+ * is insufficient to store the message header and payload
+ */
+struct sk_buff *iwpm_create_nlmsg(u32 nl_op, struct nlmsghdr **nlh,
+ int nl_client);
+
+/**
+ * iwpm_parse_nlmsg - Validate and parse the received netlink message
+ * @cb: Netlink callback structure
+ * @policy_max: Maximum attribute type to be expected
+ * @nlmsg_policy: Validation policy
+ * @nltb: Array to store policy_max parsed elements
+ * @msg_type: Type of netlink message
+ *
+ * Returns 0 on success or a negative error code
+ */
+int iwpm_parse_nlmsg(struct netlink_callback *cb, int policy_max,
+ const struct nla_policy *nlmsg_policy,
+ struct nlattr *nltb[], const char *msg_type);
+
+/**
+ * iwpm_print_sockaddr - Print IPv4/IPv6 address and TCP port
+ * @sockaddr: Socket address to print
+ * @msg: Message to print
+ */
+void iwpm_print_sockaddr(struct sockaddr_storage *sockaddr, char *msg);
+#endif
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 4c837e66516..ab31f136d04 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -1022,12 +1022,21 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
mad_send_wr->send_buf.mad,
sge[0].length,
DMA_TO_DEVICE);
+ if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[0].addr)))
+ return -ENOMEM;
+
mad_send_wr->header_mapping = sge[0].addr;
sge[1].addr = ib_dma_map_single(mad_agent->device,
ib_get_payload(mad_send_wr),
sge[1].length,
DMA_TO_DEVICE);
+ if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[1].addr))) {
+ ib_dma_unmap_single(mad_agent->device,
+ mad_send_wr->header_mapping,
+ sge[0].length, DMA_TO_DEVICE);
+ return -ENOMEM;
+ }
mad_send_wr->payload_mapping = sge[1].addr;
spin_lock_irqsave(&qp_info->send_queue.lock, flags);
@@ -2590,6 +2599,11 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
sizeof *mad_priv -
sizeof mad_priv->header,
DMA_FROM_DEVICE);
+ if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device,
+ sg_list.addr))) {
+ ret = -ENOMEM;
+ break;
+ }
mad_priv->header.mapping = sg_list.addr;
recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
mad_priv->header.mad_list.mad_queue = recv_queue;
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index da06abde9e0..23dd5a5c759 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c
@@ -103,13 +103,13 @@ int ibnl_remove_client(int index)
EXPORT_SYMBOL(ibnl_remove_client);
void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq,
- int len, int client, int op)
+ int len, int client, int op, int flags)
{
unsigned char *prev_tail;
prev_tail = skb_tail_pointer(skb);
*nlh = nlmsg_put(skb, 0, seq, RDMA_NL_GET_TYPE(client, op),
- len, NLM_F_MULTI);
+ len, flags);
if (!*nlh)
goto out_nlmsg_trim;
(*nlh)->nlmsg_len = skb_tail_pointer(skb) - prev_tail;
@@ -148,7 +148,7 @@ static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
list_for_each_entry(client, &client_list, list) {
if (client->index == index) {
if (op < 0 || op >= client->nops ||
- !client->cb_table[RDMA_NL_GET_OP(op)].dump)
+ !client->cb_table[op].dump)
return -EINVAL;
{
@@ -172,6 +172,20 @@ static void ibnl_rcv(struct sk_buff *skb)
mutex_unlock(&ibnl_mutex);
}
+int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh,
+ __u32 pid)
+{
+ return nlmsg_unicast(nls, skb, pid);
+}
+EXPORT_SYMBOL(ibnl_unicast);
+
+int ibnl_multicast(struct sk_buff *skb, struct nlmsghdr *nlh,
+ unsigned int group, gfp_t flags)
+{
+ return nlmsg_multicast(nls, skb, 0, group, flags);
+}
+EXPORT_SYMBOL(ibnl_multicast);
+
int __init ibnl_init(void)
{
struct netlink_kernel_cfg cfg = {
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 9838ca48438..233eaf541f5 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -42,7 +42,7 @@
#include <linux/kref.h>
#include <linux/idr.h>
#include <linux/workqueue.h>
-
+#include <uapi/linux/if_ether.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_cache.h>
#include "sa.h"
@@ -556,6 +556,13 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
ah_attr->grh.hop_limit = rec->hop_limit;
ah_attr->grh.traffic_class = rec->traffic_class;
}
+ if (force_grh) {
+ memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN);
+ ah_attr->vlan_id = rec->vlan_id;
+ } else {
+ ah_attr->vlan_id = 0xffff;
+ }
+
return 0;
}
EXPORT_SYMBOL(ib_init_ah_from_path);
@@ -611,7 +618,7 @@ static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
{
- bool preload = gfp_mask & __GFP_WAIT;
+ bool preload = !!(gfp_mask & __GFP_WAIT);
unsigned long flags;
int ret, id;
@@ -670,6 +677,9 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
mad->data, &rec);
+ rec.vlan_id = 0xffff;
+ memset(rec.dmac, 0, ETH_ALEN);
+ memset(rec.smac, 0, ETH_ALEN);
query->callback(status, &rec, query->context);
} else
query->callback(status, NULL, query->context);
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index cde1e7b5b85..cbd0383f622 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -429,15 +429,19 @@ static void ib_port_release(struct kobject *kobj)
struct attribute *a;
int i;
- for (i = 0; (a = p->gid_group.attrs[i]); ++i)
- kfree(a);
+ if (p->gid_group.attrs) {
+ for (i = 0; (a = p->gid_group.attrs[i]); ++i)
+ kfree(a);
- kfree(p->gid_group.attrs);
+ kfree(p->gid_group.attrs);
+ }
- for (i = 0; (a = p->pkey_group.attrs[i]); ++i)
- kfree(a);
+ if (p->pkey_group.attrs) {
+ for (i = 0; (a = p->pkey_group.attrs[i]); ++i)
+ kfree(a);
- kfree(p->pkey_group.attrs);
+ kfree(p->pkey_group.attrs);
+ }
kfree(p);
}
@@ -534,10 +538,12 @@ static int add_port(struct ib_device *device, int port_num,
p->port_num = port_num;
ret = kobject_init_and_add(&p->kobj, &port_type,
- kobject_get(device->ports_parent),
+ device->ports_parent,
"%d", port_num);
- if (ret)
- goto err_put;
+ if (ret) {
+ kfree(p);
+ return ret;
+ }
ret = sysfs_create_group(&p->kobj, &pma_group);
if (ret)
@@ -585,6 +591,7 @@ err_free_pkey:
kfree(p->pkey_group.attrs[i]);
kfree(p->pkey_group.attrs);
+ p->pkey_group.attrs = NULL;
err_remove_gid:
sysfs_remove_group(&p->kobj, &p->gid_group);
@@ -594,13 +601,13 @@ err_free_gid:
kfree(p->gid_group.attrs[i]);
kfree(p->gid_group.attrs);
+ p->gid_group.attrs = NULL;
err_remove_pma:
sysfs_remove_group(&p->kobj, &pma_group);
err_put:
- kobject_put(device->ports_parent);
- kfree(p);
+ kobject_put(&p->kobj);
return ret;
}
@@ -612,6 +619,8 @@ static ssize_t show_node_type(struct device *device,
switch (dev->node_type) {
case RDMA_NODE_IB_CA: return sprintf(buf, "%d: CA\n", dev->node_type);
case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type);
+ case RDMA_NODE_USNIC: return sprintf(buf, "%d: usNIC\n", dev->node_type);
+ case RDMA_NODE_USNIC_UDP: return sprintf(buf, "%d: usNIC UDP\n", dev->node_type);
case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
default: return sprintf(buf, "%d: <unknown>\n", dev->node_type);
@@ -807,6 +816,22 @@ static struct attribute_group iw_stats_group = {
.attrs = iw_proto_stats_attrs,
};
+static void free_port_list_attributes(struct ib_device *device)
+{
+ struct kobject *p, *t;
+
+ list_for_each_entry_safe(p, t, &device->port_list, entry) {
+ struct ib_port *port = container_of(p, struct ib_port, kobj);
+ list_del(&p->entry);
+ sysfs_remove_group(p, &pma_group);
+ sysfs_remove_group(p, &port->pkey_group);
+ sysfs_remove_group(p, &port->gid_group);
+ kobject_put(p);
+ }
+
+ kobject_put(device->ports_parent);
+}
+
int ib_device_register_sysfs(struct ib_device *device,
int (*port_callback)(struct ib_device *,
u8, struct kobject *))
@@ -833,7 +858,7 @@ int ib_device_register_sysfs(struct ib_device *device,
}
device->ports_parent = kobject_create_and_add("ports",
- kobject_get(&class_dev->kobj));
+ &class_dev->kobj);
if (!device->ports_parent) {
ret = -ENOMEM;
goto err_put;
@@ -860,21 +885,7 @@ int ib_device_register_sysfs(struct ib_device *device,
return 0;
err_put:
- {
- struct kobject *p, *t;
- struct ib_port *port;
-
- list_for_each_entry_safe(p, t, &device->port_list, entry) {
- list_del(&p->entry);
- port = container_of(p, struct ib_port, kobj);
- sysfs_remove_group(p, &pma_group);
- sysfs_remove_group(p, &port->pkey_group);
- sysfs_remove_group(p, &port->gid_group);
- kobject_put(p);
- }
- }
-
- kobject_put(&class_dev->kobj);
+ free_port_list_attributes(device);
err_unregister:
device_unregister(class_dev);
@@ -885,22 +896,18 @@ err:
void ib_device_unregister_sysfs(struct ib_device *device)
{
- struct kobject *p, *t;
- struct ib_port *port;
-
/* Hold kobject until ib_dealloc_device() */
- kobject_get(&device->dev.kobj);
+ struct kobject *kobj_dev = kobject_get(&device->dev.kobj);
+ int i;
- list_for_each_entry_safe(p, t, &device->port_list, entry) {
- list_del(&p->entry);
- port = container_of(p, struct ib_port, kobj);
- sysfs_remove_group(p, &pma_group);
- sysfs_remove_group(p, &port->pkey_group);
- sysfs_remove_group(p, &port->gid_group);
- kobject_put(p);
- }
+ if (device->node_type == RDMA_NODE_RNIC && device->get_protocol_stats)
+ sysfs_remove_group(kobj_dev, &iw_stats_group);
+
+ free_port_list_attributes(device);
+
+ for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i)
+ device_remove_file(&device->dev, ib_class_attributes[i]);
- kobject_put(device->ports_parent);
device_unregister(&device->dev);
}
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index b0f189be543..56a4b7ca7ee 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -57,7 +57,7 @@ MODULE_LICENSE("Dual BSD/GPL");
static unsigned int max_backlog = 1024;
static struct ctl_table_header *ucma_ctl_table_hdr;
-static ctl_table ucma_ctl_table[] = {
+static struct ctl_table ucma_ctl_table[] = {
{
.procname = "max_backlog",
.data = &max_backlog,
@@ -271,7 +271,7 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,
goto out;
}
ctx->backlog--;
- } else if (!ctx->uid) {
+ } else if (!ctx->uid || ctx->cm_id != cm_id) {
/*
* We ignore events for new connections until userspace has set
* their context. This can only happen if an error occurs on a
@@ -655,24 +655,14 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp,
static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp,
struct rdma_route *route)
{
- struct rdma_dev_addr *dev_addr;
- struct net_device *dev;
- u16 vid = 0;
resp->num_paths = route->num_paths;
switch (route->num_paths) {
case 0:
- dev_addr = &route->addr.dev_addr;
- dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
- if (dev) {
- vid = rdma_vlan_dev_vlan_id(dev);
- dev_put(dev);
- }
-
- iboe_mac_vlan_to_ll((union ib_gid *) &resp->ib_route[0].dgid,
- dev_addr->dst_dev_addr, vid);
- iboe_addr_get_sgid(dev_addr,
- (union ib_gid *) &resp->ib_route[0].sgid);
+ rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr,
+ (union ib_gid *)&resp->ib_route[0].dgid);
+ rdma_ip2gid((struct sockaddr *)&route->addr.src_addr,
+ (union ib_gid *)&resp->ib_route[0].sgid);
resp->ib_route[0].pkey = cpu_to_be16(0xffff);
break;
case 2:
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index a8411232207..a3a2e9c1639 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -42,29 +42,29 @@
#include "uverbs.h"
-#define IB_UMEM_MAX_PAGE_CHUNK \
- ((PAGE_SIZE - offsetof(struct ib_umem_chunk, page_list)) / \
- ((void *) &((struct ib_umem_chunk *) 0)->page_list[1] - \
- (void *) &((struct ib_umem_chunk *) 0)->page_list[0]))
static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
{
- struct ib_umem_chunk *chunk, *tmp;
+ struct scatterlist *sg;
+ struct page *page;
int i;
- list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) {
- ib_dma_unmap_sg(dev, chunk->page_list,
- chunk->nents, DMA_BIDIRECTIONAL);
- for (i = 0; i < chunk->nents; ++i) {
- struct page *page = sg_page(&chunk->page_list[i]);
+ if (umem->nmap > 0)
+ ib_dma_unmap_sg(dev, umem->sg_head.sgl,
+ umem->nmap,
+ DMA_BIDIRECTIONAL);
- if (umem->writable && dirty)
- set_page_dirty_lock(page);
- put_page(page);
- }
+ for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) {
- kfree(chunk);
+ page = sg_page(sg);
+ if (umem->writable && dirty)
+ set_page_dirty_lock(page);
+ put_page(page);
}
+
+ sg_free_table(&umem->sg_head);
+ return;
+
}
/**
@@ -81,15 +81,15 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
struct ib_umem *umem;
struct page **page_list;
struct vm_area_struct **vma_list;
- struct ib_umem_chunk *chunk;
unsigned long locked;
unsigned long lock_limit;
unsigned long cur_base;
unsigned long npages;
int ret;
- int off;
int i;
DEFINE_DMA_ATTRS(attrs);
+ struct scatterlist *sg, *sg_list_start;
+ int need_release = 0;
if (dmasync)
dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs);
@@ -97,7 +97,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (!can_do_mlock())
return ERR_PTR(-EPERM);
- umem = kmalloc(sizeof *umem, GFP_KERNEL);
+ umem = kzalloc(sizeof *umem, GFP_KERNEL);
if (!umem)
return ERR_PTR(-ENOMEM);
@@ -117,8 +117,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
/* We assume the memory is from hugetlb until proved otherwise */
umem->hugetlb = 1;
- INIT_LIST_HEAD(&umem->chunk_list);
-
page_list = (struct page **) __get_free_page(GFP_KERNEL);
if (!page_list) {
kfree(umem);
@@ -147,7 +145,18 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
cur_base = addr & PAGE_MASK;
- ret = 0;
+ if (npages == 0) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL);
+ if (ret)
+ goto out;
+
+ need_release = 1;
+ sg_list_start = umem->sg_head.sgl;
+
while (npages) {
ret = get_user_pages(current, current->mm, cur_base,
min_t(unsigned long, npages,
@@ -157,54 +166,38 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (ret < 0)
goto out;
+ umem->npages += ret;
cur_base += ret * PAGE_SIZE;
npages -= ret;
- off = 0;
-
- while (ret) {
- chunk = kmalloc(sizeof *chunk + sizeof (struct scatterlist) *
- min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK),
- GFP_KERNEL);
- if (!chunk) {
- ret = -ENOMEM;
- goto out;
- }
-
- chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK);
- sg_init_table(chunk->page_list, chunk->nents);
- for (i = 0; i < chunk->nents; ++i) {
- if (vma_list &&
- !is_vm_hugetlb_page(vma_list[i + off]))
- umem->hugetlb = 0;
- sg_set_page(&chunk->page_list[i], page_list[i + off], PAGE_SIZE, 0);
- }
-
- chunk->nmap = ib_dma_map_sg_attrs(context->device,
- &chunk->page_list[0],
- chunk->nents,
- DMA_BIDIRECTIONAL,
- &attrs);
- if (chunk->nmap <= 0) {
- for (i = 0; i < chunk->nents; ++i)
- put_page(sg_page(&chunk->page_list[i]));
- kfree(chunk);
-
- ret = -ENOMEM;
- goto out;
- }
-
- ret -= chunk->nents;
- off += chunk->nents;
- list_add_tail(&chunk->list, &umem->chunk_list);
+ for_each_sg(sg_list_start, sg, ret, i) {
+ if (vma_list && !is_vm_hugetlb_page(vma_list[i]))
+ umem->hugetlb = 0;
+
+ sg_set_page(sg, page_list[i], PAGE_SIZE, 0);
}
- ret = 0;
+ /* preparing for next loop */
+ sg_list_start = sg;
}
+ umem->nmap = ib_dma_map_sg_attrs(context->device,
+ umem->sg_head.sgl,
+ umem->npages,
+ DMA_BIDIRECTIONAL,
+ &attrs);
+
+ if (umem->nmap <= 0) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = 0;
+
out:
if (ret < 0) {
- __ib_umem_release(context->device, umem, 0);
+ if (need_release)
+ __ib_umem_release(context->device, umem, 0);
kfree(umem);
} else
current->mm->pinned_vm = locked;
@@ -278,17 +271,16 @@ EXPORT_SYMBOL(ib_umem_release);
int ib_umem_page_count(struct ib_umem *umem)
{
- struct ib_umem_chunk *chunk;
int shift;
int i;
int n;
+ struct scatterlist *sg;
shift = ilog2(umem->page_size);
n = 0;
- list_for_each_entry(chunk, &umem->chunk_list, list)
- for (i = 0; i < chunk->nmap; ++i)
- n += sg_dma_len(&chunk->page_list[i]) >> shift;
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i)
+ n += sg_dma_len(sg) >> shift;
return n;
}
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index f0d588f8859..1acb9910055 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -98,7 +98,7 @@ struct ib_umad_port {
struct ib_umad_device {
int start_port, end_port;
- struct kref ref;
+ struct kobject kobj;
struct ib_umad_port port[0];
};
@@ -134,14 +134,18 @@ static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS);
static void ib_umad_add_one(struct ib_device *device);
static void ib_umad_remove_one(struct ib_device *device);
-static void ib_umad_release_dev(struct kref *ref)
+static void ib_umad_release_dev(struct kobject *kobj)
{
struct ib_umad_device *dev =
- container_of(ref, struct ib_umad_device, ref);
+ container_of(kobj, struct ib_umad_device, kobj);
kfree(dev);
}
+static struct kobj_type ib_umad_dev_ktype = {
+ .release = ib_umad_release_dev,
+};
+
static int hdr_size(struct ib_umad_file *file)
{
return file->use_pkey_index ? sizeof (struct ib_user_mad_hdr) :
@@ -780,27 +784,19 @@ static int ib_umad_open(struct inode *inode, struct file *filp)
{
struct ib_umad_port *port;
struct ib_umad_file *file;
- int ret;
+ int ret = -ENXIO;
port = container_of(inode->i_cdev, struct ib_umad_port, cdev);
- if (port)
- kref_get(&port->umad_dev->ref);
- else
- return -ENXIO;
mutex_lock(&port->file_mutex);
- if (!port->ib_dev) {
- ret = -ENXIO;
+ if (!port->ib_dev)
goto out;
- }
+ ret = -ENOMEM;
file = kzalloc(sizeof *file, GFP_KERNEL);
- if (!file) {
- kref_put(&port->umad_dev->ref, ib_umad_release_dev);
- ret = -ENOMEM;
+ if (!file)
goto out;
- }
mutex_init(&file->mutex);
spin_lock_init(&file->send_lock);
@@ -814,6 +810,13 @@ static int ib_umad_open(struct inode *inode, struct file *filp)
list_add_tail(&file->port_list, &port->file_list);
ret = nonseekable_open(inode, filp);
+ if (ret) {
+ list_del(&file->port_list);
+ kfree(file);
+ goto out;
+ }
+
+ kobject_get(&port->umad_dev->kobj);
out:
mutex_unlock(&port->file_mutex);
@@ -852,7 +855,7 @@ static int ib_umad_close(struct inode *inode, struct file *filp)
mutex_unlock(&file->port->file_mutex);
kfree(file);
- kref_put(&dev->ref, ib_umad_release_dev);
+ kobject_put(&dev->kobj);
return 0;
}
@@ -880,10 +883,6 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp)
int ret;
port = container_of(inode->i_cdev, struct ib_umad_port, sm_cdev);
- if (port)
- kref_get(&port->umad_dev->ref);
- else
- return -ENXIO;
if (filp->f_flags & O_NONBLOCK) {
if (down_trylock(&port->sm_sem)) {
@@ -898,17 +897,27 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp)
}
ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props);
- if (ret) {
- up(&port->sm_sem);
- goto fail;
- }
+ if (ret)
+ goto err_up_sem;
filp->private_data = port;
- return nonseekable_open(inode, filp);
+ ret = nonseekable_open(inode, filp);
+ if (ret)
+ goto err_clr_sm_cap;
+
+ kobject_get(&port->umad_dev->kobj);
+
+ return 0;
+
+err_clr_sm_cap:
+ swap(props.set_port_cap_mask, props.clr_port_cap_mask);
+ ib_modify_port(port->ib_dev, port->port_num, 0, &props);
+
+err_up_sem:
+ up(&port->sm_sem);
fail:
- kref_put(&port->umad_dev->ref, ib_umad_release_dev);
return ret;
}
@@ -927,7 +936,7 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp)
up(&port->sm_sem);
- kref_put(&port->umad_dev->ref, ib_umad_release_dev);
+ kobject_put(&port->umad_dev->kobj);
return ret;
}
@@ -995,6 +1004,7 @@ static int find_overflow_devnum(void)
}
static int ib_umad_init_port(struct ib_device *device, int port_num,
+ struct ib_umad_device *umad_dev,
struct ib_umad_port *port)
{
int devnum;
@@ -1027,6 +1037,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
cdev_init(&port->cdev, &umad_fops);
port->cdev.owner = THIS_MODULE;
+ port->cdev.kobj.parent = &umad_dev->kobj;
kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num);
if (cdev_add(&port->cdev, base, 1))
goto err_cdev;
@@ -1045,6 +1056,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
base += IB_UMAD_MAX_PORTS;
cdev_init(&port->sm_cdev, &umad_sm_fops);
port->sm_cdev.owner = THIS_MODULE;
+ port->sm_cdev.kobj.parent = &umad_dev->kobj;
kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num);
if (cdev_add(&port->sm_cdev, base, 1))
goto err_sm_cdev;
@@ -1138,7 +1150,7 @@ static void ib_umad_add_one(struct ib_device *device)
if (!umad_dev)
return;
- kref_init(&umad_dev->ref);
+ kobject_init(&umad_dev->kobj, &ib_umad_dev_ktype);
umad_dev->start_port = s;
umad_dev->end_port = e;
@@ -1146,7 +1158,8 @@ static void ib_umad_add_one(struct ib_device *device)
for (i = s; i <= e; ++i) {
umad_dev->port[i - s].umad_dev = umad_dev;
- if (ib_umad_init_port(device, i, &umad_dev->port[i - s]))
+ if (ib_umad_init_port(device, i, umad_dev,
+ &umad_dev->port[i - s]))
goto err;
}
@@ -1158,7 +1171,7 @@ err:
while (--i >= s)
ib_umad_kill_port(&umad_dev->port[i - s]);
- kref_put(&umad_dev->ref, ib_umad_release_dev);
+ kobject_put(&umad_dev->kobj);
}
static void ib_umad_remove_one(struct ib_device *device)
@@ -1172,7 +1185,7 @@ static void ib_umad_remove_one(struct ib_device *device)
for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i)
ib_umad_kill_port(&umad_dev->port[i]);
- kref_put(&umad_dev->ref, ib_umad_release_dev);
+ kobject_put(&umad_dev->kobj);
}
static char *umad_devnode(struct device *dev, umode_t *mode)
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index d040b877475..a283274a5a0 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -47,6 +47,22 @@
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
+#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \
+ do { \
+ (udata)->inbuf = (const void __user *) (ibuf); \
+ (udata)->outbuf = (void __user *) (obuf); \
+ (udata)->inlen = (ilen); \
+ (udata)->outlen = (olen); \
+ } while (0)
+
+#define INIT_UDATA_BUF_OR_NULL(udata, ibuf, obuf, ilen, olen) \
+ do { \
+ (udata)->inbuf = (ilen) ? (const void __user *) (ibuf) : NULL; \
+ (udata)->outbuf = (olen) ? (void __user *) (obuf) : NULL; \
+ (udata)->inlen = (ilen); \
+ (udata)->outlen = (olen); \
+ } while (0)
+
/*
* Our lifetime rules for these structs are the following:
*
@@ -178,6 +194,22 @@ void ib_uverbs_event_handler(struct ib_event_handler *handler,
struct ib_event *event);
void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd);
+struct ib_uverbs_flow_spec {
+ union {
+ union {
+ struct ib_uverbs_flow_spec_hdr hdr;
+ struct {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ };
+ };
+ struct ib_uverbs_flow_spec_eth eth;
+ struct ib_uverbs_flow_spec_ipv4 ipv4;
+ struct ib_uverbs_flow_spec_tcp_udp tcp_udp;
+ };
+};
+
#define IB_UVERBS_DECLARE_CMD(name) \
ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
const char __user *buf, int in_len, \
@@ -217,7 +249,13 @@ IB_UVERBS_DECLARE_CMD(destroy_srq);
IB_UVERBS_DECLARE_CMD(create_xsrq);
IB_UVERBS_DECLARE_CMD(open_xrcd);
IB_UVERBS_DECLARE_CMD(close_xrcd);
-IB_UVERBS_DECLARE_CMD(create_flow);
-IB_UVERBS_DECLARE_CMD(destroy_flow);
+
+#define IB_UVERBS_DECLARE_EX_CMD(name) \
+ int ib_uverbs_ex_##name(struct ib_uverbs_file *file, \
+ struct ib_udata *ucore, \
+ struct ib_udata *uhw)
+
+IB_UVERBS_DECLARE_EX_CMD(create_flow);
+IB_UVERBS_DECLARE_EX_CMD(destroy_flow);
#endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index f2b81b9ee0d..ea6203ee7bc 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -40,6 +40,7 @@
#include <asm/uaccess.h>
#include "uverbs.h"
+#include "core_priv.h"
struct uverbs_lock_class {
struct lock_class_key key;
@@ -56,14 +57,6 @@ static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" };
static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" };
static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" };
-#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \
- do { \
- (udata)->inbuf = (void __user *) (ibuf); \
- (udata)->outbuf = (void __user *) (obuf); \
- (udata)->inlen = (ilen); \
- (udata)->outlen = (olen); \
- } while (0)
-
/*
* The ib_uobject locking scheme is as follows:
*
@@ -937,13 +930,9 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
return -EINVAL;
- /*
- * Local write permission is required if remote write or
- * remote atomic permission is also requested.
- */
- if (cmd.access_flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) &&
- !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE))
- return -EINVAL;
+ ret = ib_check_mr_access(cmd.access_flags);
+ if (ret)
+ return ret;
uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
if (!uobj)
@@ -1973,6 +1962,9 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
attr->alt_ah_attr.port_num = cmd.alt_dest.port_num;
if (qp->real_qp == qp) {
+ ret = ib_resolve_eth_l2_attrs(qp, attr, &cmd.attr_mask);
+ if (ret)
+ goto out;
ret = qp->device->modify_qp(qp, attr,
modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata);
} else {
@@ -2126,6 +2118,9 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
}
next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn;
next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey;
+ if (next->opcode == IB_WR_SEND_WITH_IMM)
+ next->ex.imm_data =
+ (__be32 __force) user_wr->ex.imm_data;
} else {
switch (next->opcode) {
case IB_WR_RDMA_WRITE_WITH_IMM:
@@ -2599,9 +2594,12 @@ out_put:
return ret ? ret : in_len;
}
-static int kern_spec_to_ib_spec(struct ib_kern_spec *kern_spec,
+static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
union ib_flow_spec *ib_spec)
{
+ if (kern_spec->reserved)
+ return -EINVAL;
+
ib_spec->type = kern_spec->type;
switch (ib_spec->type) {
@@ -2639,28 +2637,34 @@ static int kern_spec_to_ib_spec(struct ib_kern_spec *kern_spec,
return 0;
}
-ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
{
struct ib_uverbs_create_flow cmd;
struct ib_uverbs_create_flow_resp resp;
struct ib_uobject *uobj;
struct ib_flow *flow_id;
- struct ib_kern_flow_attr *kern_flow_attr;
+ struct ib_uverbs_flow_attr *kern_flow_attr;
struct ib_flow_attr *flow_attr;
struct ib_qp *qp;
int err = 0;
void *kern_spec;
void *ib_spec;
int i;
- int kern_attr_size;
- if (out_len < sizeof(resp))
+ if (ucore->inlen < sizeof(cmd))
+ return -EINVAL;
+
+ if (ucore->outlen < sizeof(resp))
return -ENOSPC;
- if (copy_from_user(&cmd, buf, sizeof(cmd)))
- return -EFAULT;
+ err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+ if (err)
+ return err;
+
+ ucore->inbuf += sizeof(cmd);
+ ucore->inlen -= sizeof(cmd);
if (cmd.comp_mask)
return -EINVAL;
@@ -2669,32 +2673,31 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
!capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW))
return -EPERM;
- if (cmd.flow_attr.num_of_specs < 0 ||
- cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS)
+ if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS)
return -EINVAL;
- kern_attr_size = cmd.flow_attr.size - sizeof(cmd) -
- sizeof(struct ib_uverbs_cmd_hdr_ex);
+ if (cmd.flow_attr.size > ucore->inlen ||
+ cmd.flow_attr.size >
+ (cmd.flow_attr.num_of_specs * sizeof(struct ib_uverbs_flow_spec)))
+ return -EINVAL;
- if (cmd.flow_attr.size < 0 || cmd.flow_attr.size > in_len ||
- kern_attr_size < 0 || kern_attr_size >
- (cmd.flow_attr.num_of_specs * sizeof(struct ib_kern_spec)))
+ if (cmd.flow_attr.reserved[0] ||
+ cmd.flow_attr.reserved[1])
return -EINVAL;
if (cmd.flow_attr.num_of_specs) {
- kern_flow_attr = kmalloc(cmd.flow_attr.size, GFP_KERNEL);
+ kern_flow_attr = kmalloc(sizeof(*kern_flow_attr) + cmd.flow_attr.size,
+ GFP_KERNEL);
if (!kern_flow_attr)
return -ENOMEM;
memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr));
- if (copy_from_user(kern_flow_attr + 1, buf + sizeof(cmd),
- kern_attr_size)) {
- err = -EFAULT;
+ err = ib_copy_from_udata(kern_flow_attr + 1, ucore,
+ cmd.flow_attr.size);
+ if (err)
goto err_free_attr;
- }
} else {
kern_flow_attr = &cmd.flow_attr;
- kern_attr_size = sizeof(cmd.flow_attr);
}
uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
@@ -2711,7 +2714,7 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
goto err_uobj;
}
- flow_attr = kmalloc(cmd.flow_attr.size, GFP_KERNEL);
+ flow_attr = kmalloc(sizeof(*flow_attr) + cmd.flow_attr.size, GFP_KERNEL);
if (!flow_attr) {
err = -ENOMEM;
goto err_put;
@@ -2726,19 +2729,23 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
kern_spec = kern_flow_attr + 1;
ib_spec = flow_attr + 1;
- for (i = 0; i < flow_attr->num_of_specs && kern_attr_size > 0; i++) {
+ for (i = 0; i < flow_attr->num_of_specs &&
+ cmd.flow_attr.size > offsetof(struct ib_uverbs_flow_spec, reserved) &&
+ cmd.flow_attr.size >=
+ ((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) {
err = kern_spec_to_ib_spec(kern_spec, ib_spec);
if (err)
goto err_free;
flow_attr->size +=
((union ib_flow_spec *) ib_spec)->size;
- kern_attr_size -= ((struct ib_kern_spec *) kern_spec)->size;
- kern_spec += ((struct ib_kern_spec *) kern_spec)->size;
+ cmd.flow_attr.size -= ((struct ib_uverbs_flow_spec *)kern_spec)->size;
+ kern_spec += ((struct ib_uverbs_flow_spec *) kern_spec)->size;
ib_spec += ((union ib_flow_spec *) ib_spec)->size;
}
- if (kern_attr_size) {
- pr_warn("create flow failed, %d bytes left from uverb cmd\n",
- kern_attr_size);
+ if (cmd.flow_attr.size || (i != flow_attr->num_of_specs)) {
+ pr_warn("create flow failed, flow %d: %d bytes left from uverb cmd\n",
+ i, cmd.flow_attr.size);
+ err = -EINVAL;
goto err_free;
}
flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER);
@@ -2757,11 +2764,10 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
memset(&resp, 0, sizeof(resp));
resp.flow_handle = uobj->id;
- if (copy_to_user((void __user *)(unsigned long) cmd.response,
- &resp, sizeof(resp))) {
- err = -EFAULT;
+ err = ib_copy_to_udata(ucore,
+ &resp, sizeof(resp));
+ if (err)
goto err_copy;
- }
put_qp_read(qp);
mutex_lock(&file->mutex);
@@ -2774,7 +2780,7 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
kfree(flow_attr);
if (cmd.flow_attr.num_of_specs)
kfree(kern_flow_attr);
- return in_len;
+ return 0;
err_copy:
idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
destroy_flow:
@@ -2791,16 +2797,24 @@ err_free_attr:
return err;
}
-ssize_t ib_uverbs_destroy_flow(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len) {
+int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
+{
struct ib_uverbs_destroy_flow cmd;
struct ib_flow *flow_id;
struct ib_uobject *uobj;
int ret;
- if (copy_from_user(&cmd, buf, sizeof(cmd)))
- return -EFAULT;
+ if (ucore->inlen < sizeof(cmd))
+ return -EINVAL;
+
+ ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+ if (ret)
+ return ret;
+
+ if (cmd.comp_mask)
+ return -EINVAL;
uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle,
file->ucontext);
@@ -2822,7 +2836,7 @@ ssize_t ib_uverbs_destroy_flow(struct ib_uverbs_file *file,
put_uobj(uobj);
- return ret ? ret : in_len;
+ return ret;
}
static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 75ad86c4abf..08219fb3338 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -115,8 +115,13 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
[IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd,
[IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq,
[IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp,
- [IB_USER_VERBS_CMD_CREATE_FLOW] = ib_uverbs_create_flow,
- [IB_USER_VERBS_CMD_DESTROY_FLOW] = ib_uverbs_destroy_flow
+};
+
+static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw) = {
+ [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow,
+ [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow
};
static void ib_uverbs_add_one(struct ib_device *device);
@@ -587,6 +592,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
{
struct ib_uverbs_file *file = filp->private_data;
struct ib_uverbs_cmd_hdr hdr;
+ __u32 flags;
if (count < sizeof hdr)
return -EINVAL;
@@ -594,41 +600,110 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
if (copy_from_user(&hdr, buf, sizeof hdr))
return -EFAULT;
- if (hdr.command >= ARRAY_SIZE(uverbs_cmd_table) ||
- !uverbs_cmd_table[hdr.command])
- return -EINVAL;
+ flags = (hdr.command &
+ IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
- if (!file->ucontext &&
- hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT)
- return -EINVAL;
+ if (!flags) {
+ __u32 command;
- if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command)))
- return -ENOSYS;
+ if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
+ IB_USER_VERBS_CMD_COMMAND_MASK))
+ return -EINVAL;
- if (hdr.command >= IB_USER_VERBS_CMD_THRESHOLD) {
- struct ib_uverbs_cmd_hdr_ex hdr_ex;
+ command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
- if (copy_from_user(&hdr_ex, buf, sizeof(hdr_ex)))
- return -EFAULT;
+ if (command >= ARRAY_SIZE(uverbs_cmd_table) ||
+ !uverbs_cmd_table[command])
+ return -EINVAL;
- if (((hdr_ex.in_words + hdr_ex.provider_in_words) * 4) != count)
+ if (!file->ucontext &&
+ command != IB_USER_VERBS_CMD_GET_CONTEXT)
return -EINVAL;
- return uverbs_cmd_table[hdr.command](file,
- buf + sizeof(hdr_ex),
- (hdr_ex.in_words +
- hdr_ex.provider_in_words) * 4,
- (hdr_ex.out_words +
- hdr_ex.provider_out_words) * 4);
- } else {
+ if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << command)))
+ return -ENOSYS;
+
if (hdr.in_words * 4 != count)
return -EINVAL;
- return uverbs_cmd_table[hdr.command](file,
- buf + sizeof(hdr),
- hdr.in_words * 4,
- hdr.out_words * 4);
+ return uverbs_cmd_table[command](file,
+ buf + sizeof(hdr),
+ hdr.in_words * 4,
+ hdr.out_words * 4);
+
+ } else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) {
+ __u32 command;
+
+ struct ib_uverbs_ex_cmd_hdr ex_hdr;
+ struct ib_udata ucore;
+ struct ib_udata uhw;
+ int err;
+ size_t written_count = count;
+
+ if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
+ IB_USER_VERBS_CMD_COMMAND_MASK))
+ return -EINVAL;
+
+ command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
+
+ if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) ||
+ !uverbs_ex_cmd_table[command])
+ return -ENOSYS;
+
+ if (!file->ucontext)
+ return -EINVAL;
+
+ if (!(file->device->ib_dev->uverbs_ex_cmd_mask & (1ull << command)))
+ return -ENOSYS;
+
+ if (count < (sizeof(hdr) + sizeof(ex_hdr)))
+ return -EINVAL;
+
+ if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
+ return -EFAULT;
+
+ count -= sizeof(hdr) + sizeof(ex_hdr);
+ buf += sizeof(hdr) + sizeof(ex_hdr);
+
+ if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count)
+ return -EINVAL;
+
+ if (ex_hdr.cmd_hdr_reserved)
+ return -EINVAL;
+
+ if (ex_hdr.response) {
+ if (!hdr.out_words && !ex_hdr.provider_out_words)
+ return -EINVAL;
+
+ if (!access_ok(VERIFY_WRITE,
+ (void __user *) (unsigned long) ex_hdr.response,
+ (hdr.out_words + ex_hdr.provider_out_words) * 8))
+ return -EFAULT;
+ } else {
+ if (hdr.out_words || ex_hdr.provider_out_words)
+ return -EINVAL;
+ }
+
+ INIT_UDATA_BUF_OR_NULL(&ucore, buf, (unsigned long) ex_hdr.response,
+ hdr.in_words * 8, hdr.out_words * 8);
+
+ INIT_UDATA_BUF_OR_NULL(&uhw,
+ buf + ucore.inlen,
+ (unsigned long) ex_hdr.response + ucore.outlen,
+ ex_hdr.provider_in_words * 8,
+ ex_hdr.provider_out_words * 8);
+
+ err = uverbs_ex_cmd_table[command](file,
+ &ucore,
+ &uhw);
+
+ if (err)
+ return err;
+
+ return written_count;
}
+
+ return -ENOSYS;
}
static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index a321df28bab..c2b89cc5dbc 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -44,8 +44,11 @@
#include <rdma/ib_verbs.h>
#include <rdma/ib_cache.h>
+#include <rdma/ib_addr.h>
-int ib_rate_to_mult(enum ib_rate rate)
+#include "core_priv.h"
+
+__attribute_const__ int ib_rate_to_mult(enum ib_rate rate)
{
switch (rate) {
case IB_RATE_2_5_GBPS: return 1;
@@ -62,7 +65,7 @@ int ib_rate_to_mult(enum ib_rate rate)
}
EXPORT_SYMBOL(ib_rate_to_mult);
-enum ib_rate mult_to_ib_rate(int mult)
+__attribute_const__ enum ib_rate mult_to_ib_rate(int mult)
{
switch (mult) {
case 1: return IB_RATE_2_5_GBPS;
@@ -79,7 +82,7 @@ enum ib_rate mult_to_ib_rate(int mult)
}
EXPORT_SYMBOL(mult_to_ib_rate);
-int ib_rate_to_mbps(enum ib_rate rate)
+__attribute_const__ int ib_rate_to_mbps(enum ib_rate rate)
{
switch (rate) {
case IB_RATE_2_5_GBPS: return 2500;
@@ -104,7 +107,7 @@ int ib_rate_to_mbps(enum ib_rate rate)
}
EXPORT_SYMBOL(ib_rate_to_mbps);
-enum rdma_transport_type
+__attribute_const__ enum rdma_transport_type
rdma_node_get_transport(enum rdma_node_type node_type)
{
switch (node_type) {
@@ -114,6 +117,10 @@ rdma_node_get_transport(enum rdma_node_type node_type)
return RDMA_TRANSPORT_IB;
case RDMA_NODE_RNIC:
return RDMA_TRANSPORT_IWARP;
+ case RDMA_NODE_USNIC:
+ return RDMA_TRANSPORT_USNIC;
+ case RDMA_NODE_USNIC_UDP:
+ return RDMA_TRANSPORT_USNIC_UDP;
default:
BUG();
return 0;
@@ -130,6 +137,8 @@ enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_
case RDMA_TRANSPORT_IB:
return IB_LINK_LAYER_INFINIBAND;
case RDMA_TRANSPORT_IWARP:
+ case RDMA_TRANSPORT_USNIC:
+ case RDMA_TRANSPORT_USNIC_UDP:
return IB_LINK_LAYER_ETHERNET;
default:
return IB_LINK_LAYER_UNSPECIFIED;
@@ -189,8 +198,28 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc,
u32 flow_class;
u16 gid_index;
int ret;
+ int is_eth = (rdma_port_get_link_layer(device, port_num) ==
+ IB_LINK_LAYER_ETHERNET);
memset(ah_attr, 0, sizeof *ah_attr);
+ if (is_eth) {
+ if (!(wc->wc_flags & IB_WC_GRH))
+ return -EPROTOTYPE;
+
+ if (wc->wc_flags & IB_WC_WITH_SMAC &&
+ wc->wc_flags & IB_WC_WITH_VLAN) {
+ memcpy(ah_attr->dmac, wc->smac, ETH_ALEN);
+ ah_attr->vlan_id = wc->vlan_id;
+ } else {
+ ret = rdma_addr_find_dmac_by_grh(&grh->dgid, &grh->sgid,
+ ah_attr->dmac, &ah_attr->vlan_id);
+ if (ret)
+ return ret;
+ }
+ } else {
+ ah_attr->vlan_id = 0xffff;
+ }
+
ah_attr->dlid = wc->slid;
ah_attr->sl = wc->sl;
ah_attr->src_path_bits = wc->dlid_path_bits;
@@ -473,7 +502,9 @@ EXPORT_SYMBOL(ib_create_qp);
static const struct {
int valid;
enum ib_qp_attr_mask req_param[IB_QPT_MAX];
+ enum ib_qp_attr_mask req_param_add_eth[IB_QPT_MAX];
enum ib_qp_attr_mask opt_param[IB_QPT_MAX];
+ enum ib_qp_attr_mask opt_param_add_eth[IB_QPT_MAX];
} qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
[IB_QPS_RESET] = {
[IB_QPS_RESET] = { .valid = 1 },
@@ -554,6 +585,12 @@ static const struct {
IB_QP_MAX_DEST_RD_ATOMIC |
IB_QP_MIN_RNR_TIMER),
},
+ .req_param_add_eth = {
+ [IB_QPT_RC] = (IB_QP_SMAC),
+ [IB_QPT_UC] = (IB_QP_SMAC),
+ [IB_QPT_XRC_INI] = (IB_QP_SMAC),
+ [IB_QPT_XRC_TGT] = (IB_QP_SMAC)
+ },
.opt_param = {
[IB_QPT_UD] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
@@ -573,7 +610,21 @@ static const struct {
IB_QP_QKEY),
[IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
- }
+ },
+ .opt_param_add_eth = {
+ [IB_QPT_RC] = (IB_QP_ALT_SMAC |
+ IB_QP_VID |
+ IB_QP_ALT_VID),
+ [IB_QPT_UC] = (IB_QP_ALT_SMAC |
+ IB_QP_VID |
+ IB_QP_ALT_VID),
+ [IB_QPT_XRC_INI] = (IB_QP_ALT_SMAC |
+ IB_QP_VID |
+ IB_QP_ALT_VID),
+ [IB_QPT_XRC_TGT] = (IB_QP_ALT_SMAC |
+ IB_QP_VID |
+ IB_QP_ALT_VID)
+ }
}
},
[IB_QPS_RTR] = {
@@ -776,7 +827,8 @@ static const struct {
};
int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
- enum ib_qp_type type, enum ib_qp_attr_mask mask)
+ enum ib_qp_type type, enum ib_qp_attr_mask mask,
+ enum rdma_link_layer ll)
{
enum ib_qp_attr_mask req_param, opt_param;
@@ -795,6 +847,13 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
req_param = qp_state_table[cur_state][next_state].req_param[type];
opt_param = qp_state_table[cur_state][next_state].opt_param[type];
+ if (ll == IB_LINK_LAYER_ETHERNET) {
+ req_param |= qp_state_table[cur_state][next_state].
+ req_param_add_eth[type];
+ opt_param |= qp_state_table[cur_state][next_state].
+ opt_param_add_eth[type];
+ }
+
if ((mask & req_param) != req_param)
return 0;
@@ -805,10 +864,51 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
}
EXPORT_SYMBOL(ib_modify_qp_is_ok);
+int ib_resolve_eth_l2_attrs(struct ib_qp *qp,
+ struct ib_qp_attr *qp_attr, int *qp_attr_mask)
+{
+ int ret = 0;
+ union ib_gid sgid;
+
+ if ((*qp_attr_mask & IB_QP_AV) &&
+ (rdma_port_get_link_layer(qp->device, qp_attr->ah_attr.port_num) == IB_LINK_LAYER_ETHERNET)) {
+ ret = ib_query_gid(qp->device, qp_attr->ah_attr.port_num,
+ qp_attr->ah_attr.grh.sgid_index, &sgid);
+ if (ret)
+ goto out;
+ if (rdma_link_local_addr((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw)) {
+ rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw, qp_attr->ah_attr.dmac);
+ rdma_get_ll_mac((struct in6_addr *)sgid.raw, qp_attr->smac);
+ qp_attr->vlan_id = rdma_get_vlan_id(&sgid);
+ } else {
+ ret = rdma_addr_find_dmac_by_grh(&sgid, &qp_attr->ah_attr.grh.dgid,
+ qp_attr->ah_attr.dmac, &qp_attr->vlan_id);
+ if (ret)
+ goto out;
+ ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr->smac, NULL);
+ if (ret)
+ goto out;
+ }
+ *qp_attr_mask |= IB_QP_SMAC;
+ if (qp_attr->vlan_id < 0xFFFF)
+ *qp_attr_mask |= IB_QP_VID;
+ }
+out:
+ return ret;
+}
+EXPORT_SYMBOL(ib_resolve_eth_l2_attrs);
+
+
int ib_modify_qp(struct ib_qp *qp,
struct ib_qp_attr *qp_attr,
int qp_attr_mask)
{
+ int ret;
+
+ ret = ib_resolve_eth_l2_attrs(qp, qp_attr, &qp_attr_mask);
+ if (ret)
+ return ret;
+
return qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL);
}
EXPORT_SYMBOL(ib_modify_qp);
@@ -958,6 +1058,11 @@ EXPORT_SYMBOL(ib_resize_cq);
struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
{
struct ib_mr *mr;
+ int err;
+
+ err = ib_check_mr_access(mr_access_flags);
+ if (err)
+ return ERR_PTR(err);
mr = pd->device->get_dma_mr(pd, mr_access_flags);
@@ -980,6 +1085,11 @@ struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd,
u64 *iova_start)
{
struct ib_mr *mr;
+ int err;
+
+ err = ib_check_mr_access(mr_access_flags);
+ if (err)
+ return ERR_PTR(err);
if (!pd->device->reg_phys_mr)
return ERR_PTR(-ENOSYS);
@@ -1010,6 +1120,10 @@ int ib_rereg_phys_mr(struct ib_mr *mr,
struct ib_pd *old_pd;
int ret;
+ ret = ib_check_mr_access(mr_access_flags);
+ if (ret)
+ return ret;
+
if (!mr->device->rereg_phys_mr)
return -ENOSYS;
@@ -1055,6 +1169,45 @@ int ib_dereg_mr(struct ib_mr *mr)
}
EXPORT_SYMBOL(ib_dereg_mr);
+struct ib_mr *ib_create_mr(struct ib_pd *pd,
+ struct ib_mr_init_attr *mr_init_attr)
+{
+ struct ib_mr *mr;
+
+ if (!pd->device->create_mr)
+ return ERR_PTR(-ENOSYS);
+
+ mr = pd->device->create_mr(pd, mr_init_attr);
+
+ if (!IS_ERR(mr)) {
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->uobject = NULL;
+ atomic_inc(&pd->usecnt);
+ atomic_set(&mr->usecnt, 0);
+ }
+
+ return mr;
+}
+EXPORT_SYMBOL(ib_create_mr);
+
+int ib_destroy_mr(struct ib_mr *mr)
+{
+ struct ib_pd *pd;
+ int ret;
+
+ if (atomic_read(&mr->usecnt))
+ return -EBUSY;
+
+ pd = mr->pd;
+ ret = mr->device->destroy_mr(mr);
+ if (!ret)
+ atomic_dec(&pd->usecnt);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_destroy_mr);
+
struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len)
{
struct ib_mr *mr;
@@ -1284,3 +1437,11 @@ int ib_destroy_flow(struct ib_flow *flow_id)
return err;
}
EXPORT_SYMBOL(ib_destroy_flow);
+
+int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
+ struct ib_mr_status *mr_status)
+{
+ return mr->device->check_mr_status ?
+ mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS;
+}
+EXPORT_SYMBOL(ib_check_mr_status);
diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile
new file mode 100644
index 00000000000..e900b03531a
--- /dev/null
+++ b/drivers/infiniband/hw/Makefile
@@ -0,0 +1,12 @@
+obj-$(CONFIG_INFINIBAND_MTHCA) += mthca/
+obj-$(CONFIG_INFINIBAND_IPATH) += ipath/
+obj-$(CONFIG_INFINIBAND_QIB) += qib/
+obj-$(CONFIG_INFINIBAND_EHCA) += ehca/
+obj-$(CONFIG_INFINIBAND_AMSO1100) += amso1100/
+obj-$(CONFIG_INFINIBAND_CXGB3) += cxgb3/
+obj-$(CONFIG_INFINIBAND_CXGB4) += cxgb4/
+obj-$(CONFIG_MLX4_INFINIBAND) += mlx4/
+obj-$(CONFIG_MLX5_INFINIBAND) += mlx5/
+obj-$(CONFIG_INFINIBAND_NES) += nes/
+obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma/
+obj-$(CONFIG_INFINIBAND_USNIC) += usnic/
diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c
index d53cf519f42..00400c352c1 100644
--- a/drivers/infiniband/hw/amso1100/c2.c
+++ b/drivers/infiniband/hw/amso1100/c2.c
@@ -1082,6 +1082,7 @@ static int c2_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
/* Initialize network device */
if ((netdev = c2_devinit(c2dev, mmio_regs)) == NULL) {
+ ret = -ENOMEM;
iounmap(mmio_regs);
goto bail4;
}
@@ -1151,7 +1152,8 @@ static int c2_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
goto bail10;
}
- if (c2_register_device(c2dev))
+ ret = c2_register_device(c2dev);
+ if (ret)
goto bail10;
return 0;
diff --git a/drivers/infiniband/hw/amso1100/c2_ae.c b/drivers/infiniband/hw/amso1100/c2_ae.c
index d5d1929753e..cedda25232b 100644
--- a/drivers/infiniband/hw/amso1100/c2_ae.c
+++ b/drivers/infiniband/hw/amso1100/c2_ae.c
@@ -141,7 +141,7 @@ static const char *to_qp_state_str(int state)
return "C2_QP_STATE_ERROR";
default:
return "<invalid QP state>";
- };
+ }
}
void c2_ae_event(struct c2_dev *c2dev, u32 mq_index)
diff --git a/drivers/infiniband/hw/amso1100/c2_intr.c b/drivers/infiniband/hw/amso1100/c2_intr.c
index 8951db4ae29..3a17d9b36db 100644
--- a/drivers/infiniband/hw/amso1100/c2_intr.c
+++ b/drivers/infiniband/hw/amso1100/c2_intr.c
@@ -169,7 +169,8 @@ static void handle_vq(struct c2_dev *c2dev, u32 mq_index)
* We should never get here, as the adapter should
* never send us a reply that we're not expecting.
*/
- vq_repbuf_free(c2dev, host_msg);
+ if (reply_msg != NULL)
+ vq_repbuf_free(c2dev, host_msg);
pr_debug("handle_vq: UNEXPECTEDLY got NULL req\n");
return;
}
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
index 07eb3a8067d..8af33cf1fc4 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.c
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -431,9 +431,9 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 *pages;
u64 kva = 0;
int shift, n, len;
- int i, j, k;
+ int i, k, entry;
int err = 0;
- struct ib_umem_chunk *chunk;
+ struct scatterlist *sg;
struct c2_pd *c2pd = to_c2pd(pd);
struct c2_mr *c2mr;
@@ -452,10 +452,7 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
shift = ffs(c2mr->umem->page_size) - 1;
-
- n = 0;
- list_for_each_entry(chunk, &c2mr->umem->chunk_list, list)
- n += chunk->nents;
+ n = c2mr->umem->nmap;
pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
if (!pages) {
@@ -464,14 +461,12 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
i = 0;
- list_for_each_entry(chunk, &c2mr->umem->chunk_list, list) {
- for (j = 0; j < chunk->nmap; ++j) {
- len = sg_dma_len(&chunk->page_list[j]) >> shift;
- for (k = 0; k < len; ++k) {
- pages[i++] =
- sg_dma_address(&chunk->page_list[j]) +
- (c2mr->umem->page_size * k);
- }
+ for_each_sg(c2mr->umem->sg_head.sgl, sg, c2mr->umem->nmap, entry) {
+ len = sg_dma_len(sg) >> shift;
+ for (k = 0; k < len; ++k) {
+ pages[i++] =
+ sg_dma_address(sg) +
+ (c2mr->umem->page_size * k);
}
}
diff --git a/drivers/infiniband/hw/amso1100/c2_rnic.c b/drivers/infiniband/hw/amso1100/c2_rnic.c
index b7c98699005..d2a6d961344 100644
--- a/drivers/infiniband/hw/amso1100/c2_rnic.c
+++ b/drivers/infiniband/hw/amso1100/c2_rnic.c
@@ -576,7 +576,8 @@ int c2_rnic_init(struct c2_dev *c2dev)
goto bail4;
/* Initialize cached the adapter limits */
- if (c2_rnic_query(c2dev, &c2dev->props))
+ err = c2_rnic_query(c2dev, &c2dev->props);
+ if (err)
goto bail5;
/* Initialize the PD pool */
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index c3f5aca4ef0..de1c61b417d 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -735,14 +735,12 @@ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
((perm & TPT_MW_BIND) ? F_TPT_MW_BIND_ENABLE : 0) |
V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) |
V_TPT_PAGE_SIZE(page_size));
- tpt.rsvd_pbl_addr = reset_tpt_entry ? 0 :
- cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3));
+ tpt.rsvd_pbl_addr = cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3));
tpt.len = cpu_to_be32(len);
tpt.va_hi = cpu_to_be32((u32) (to >> 32));
tpt.va_low_or_fbo = cpu_to_be32((u32) (to & 0xFFFFFFFFULL));
tpt.rsvd_bind_cnt_or_pstag = 0;
- tpt.rsvd_pbl_size = reset_tpt_entry ? 0 :
- cpu_to_be32(V_TPT_PBL_SIZE(pbl_size >> 2));
+ tpt.rsvd_pbl_size = cpu_to_be32(V_TPT_PBL_SIZE(pbl_size >> 2));
}
err = cxio_hal_ctrl_qp_write_mem(rdev_p,
stag_idx +
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 095bb046e2c..cb78b1e9bcd 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -418,6 +418,7 @@ static int send_abort(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp)
skb->priority = CPL_PRIORITY_DATA;
set_arp_failure_handler(skb, abort_arp_failure);
req = (struct cpl_abort_req *) skb_put(skb, sizeof(*req));
+ memset(req, 0, sizeof(*req));
req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid));
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index d2283837d45..811b24a539c 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -618,14 +618,13 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
{
__be64 *pages;
int shift, n, len;
- int i, j, k;
+ int i, k, entry;
int err = 0;
- struct ib_umem_chunk *chunk;
struct iwch_dev *rhp;
struct iwch_pd *php;
struct iwch_mr *mhp;
struct iwch_reg_user_mr_resp uresp;
-
+ struct scatterlist *sg;
PDBG("%s ib_pd %p\n", __func__, pd);
php = to_iwch_pd(pd);
@@ -645,9 +644,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
shift = ffs(mhp->umem->page_size) - 1;
- n = 0;
- list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
- n += chunk->nents;
+ n = mhp->umem->nmap;
err = iwch_alloc_pbl(mhp, n);
if (err)
@@ -661,12 +658,10 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
i = n = 0;
- list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
- for (j = 0; j < chunk->nmap; ++j) {
- len = sg_dma_len(&chunk->page_list[j]) >> shift;
+ for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) {
+ len = sg_dma_len(sg) >> shift;
for (k = 0; k < len; ++k) {
- pages[i++] = cpu_to_be64(sg_dma_address(
- &chunk->page_list[j]) +
+ pages[i++] = cpu_to_be64(sg_dma_address(sg) +
mhp->umem->page_size * k);
if (i == PAGE_SIZE / sizeof *pages) {
err = iwch_write_pbl(mhp, pages, i, n);
@@ -676,7 +671,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
i = 0;
}
}
- }
+ }
if (i)
err = iwch_write_pbl(mhp, pages, i, n);
diff --git a/drivers/infiniband/hw/cxgb4/Kconfig b/drivers/infiniband/hw/cxgb4/Kconfig
index d4e8983fba5..23f38cf2c5c 100644
--- a/drivers/infiniband/hw/cxgb4/Kconfig
+++ b/drivers/infiniband/hw/cxgb4/Kconfig
@@ -1,10 +1,10 @@
config INFINIBAND_CXGB4
- tristate "Chelsio T4 RDMA Driver"
+ tristate "Chelsio T4/T5 RDMA Driver"
depends on CHELSIO_T4 && INET && (IPV6 || IPV6=n)
select GENERIC_ALLOCATOR
---help---
- This is an iWARP/RDMA driver for the Chelsio T4 1GbE and
- 10GbE adapters.
+ This is an iWARP/RDMA driver for the Chelsio T4 and T5
+ 1GbE, 10GbE adapters and T5 40GbE adapter.
For general information about Chelsio and our products, visit
our website at <http://www.chelsio.com>.
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 12fef76c791..768a0fb67dd 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
+ * Copyright (c) 2009-2014 Chelsio, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -47,6 +47,8 @@
#include <net/ip6_route.h>
#include <net/addrconf.h>
+#include <rdma/ib_addr.h>
+
#include "iw_cxgb4.h"
static char *states[] = {
@@ -98,9 +100,9 @@ int c4iw_debug;
module_param(c4iw_debug, int, 0644);
MODULE_PARM_DESC(c4iw_debug, "Enable debug logging (default=0)");
-static int peer2peer;
+static int peer2peer = 1;
module_param(peer2peer, int, 0644);
-MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)");
+MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=1)");
static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ;
module_param(p2p_type, int, 0644);
@@ -173,12 +175,15 @@ static void start_ep_timer(struct c4iw_ep *ep)
add_timer(&ep->timer);
}
-static void stop_ep_timer(struct c4iw_ep *ep)
+static int stop_ep_timer(struct c4iw_ep *ep)
{
PDBG("%s ep %p stopping\n", __func__, ep);
del_timer_sync(&ep->timer);
- if (!test_and_set_bit(TIMEOUT, &ep->com.flags))
+ if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
c4iw_put_ep(&ep->com);
+ return 0;
+ }
+ return 1;
}
static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb,
@@ -229,12 +234,16 @@ static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb)
static void set_emss(struct c4iw_ep *ep, u16 opt)
{
- ep->emss = ep->com.dev->rdev.lldi.mtus[GET_TCPOPT_MSS(opt)] - 40;
+ ep->emss = ep->com.dev->rdev.lldi.mtus[GET_TCPOPT_MSS(opt)] -
+ sizeof(struct iphdr) - sizeof(struct tcphdr);
ep->mss = ep->emss;
if (GET_TCPOPT_TSTAMP(opt))
ep->emss -= 12;
if (ep->emss < 128)
ep->emss = 128;
+ if (ep->emss & 7)
+ PDBG("Warning: misaligned mtu idx %u mss %u emss=%u\n",
+ GET_TCPOPT_MSS(opt), ep->mss, ep->emss);
PDBG("%s mss_idx %u mss %u emss=%u\n", __func__, GET_TCPOPT_MSS(opt),
ep->mss, ep->emss);
}
@@ -291,6 +300,12 @@ void _c4iw_free_ep(struct kref *kref)
dst_release(ep->dst);
cxgb4_l2t_release(ep->l2t);
}
+ if (test_bit(RELEASE_MAPINFO, &ep->com.flags)) {
+ print_addr(&ep->com, __func__, "remove_mapinfo/mapping");
+ iwpm_remove_mapinfo(&ep->com.local_addr,
+ &ep->com.mapped_local_addr);
+ iwpm_remove_mapping(&ep->com.local_addr, RDMA_NL_C4IW);
+ }
kfree(ep);
}
@@ -338,10 +353,7 @@ static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp)
static struct net_device *get_real_dev(struct net_device *egress_dev)
{
- struct net_device *phys_dev = egress_dev;
- if (egress_dev->priv_flags & IFF_802_1Q_VLAN)
- phys_dev = vlan_dev_real_dev(egress_dev);
- return phys_dev;
+ return rdma_vlan_dev_real_dev(egress_dev) ? : egress_dev;
}
static int our_interface(struct c4iw_dev *dev, struct net_device *egress_dev)
@@ -400,7 +412,8 @@ static struct dst_entry *find_route(struct c4iw_dev *dev, __be32 local_ip,
n = dst_neigh_lookup(&rt->dst, &peer_ip);
if (!n)
return NULL;
- if (!our_interface(dev, n->dev)) {
+ if (!our_interface(dev, n->dev) &&
+ !(n->dev->flags & IFF_LOOPBACK)) {
dst_release(&rt->dst);
return NULL;
}
@@ -419,8 +432,17 @@ static void arp_failure_discard(void *handle, struct sk_buff *skb)
*/
static void act_open_req_arp_failure(void *handle, struct sk_buff *skb)
{
+ struct c4iw_ep *ep = handle;
+
printk(KERN_ERR MOD "ARP failure duing connect\n");
kfree_skb(skb);
+ connect_reply_upcall(ep, -EHOSTUNREACH);
+ state_set(&ep->com, DEAD);
+ remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid);
+ cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
+ dst_release(ep->dst);
+ cxgb4_l2t_release(ep->l2t);
+ c4iw_put_ep(&ep->com);
}
/*
@@ -464,7 +486,7 @@ static void send_flowc(struct c4iw_ep *ep, struct sk_buff *skb)
flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT;
flowc->mnemval[5].val = cpu_to_be32(ep->rcv_seq);
flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF;
- flowc->mnemval[6].val = cpu_to_be32(snd_win);
+ flowc->mnemval[6].val = cpu_to_be32(ep->snd_win);
flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS;
flowc->mnemval[7].val = cpu_to_be32(ep->emss);
/* Pad WR to 16 byte boundary */
@@ -524,48 +546,47 @@ static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp)
return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
}
-#define VLAN_NONE 0xfff
-#define FILTER_SEL_VLAN_NONE 0xffff
-#define FILTER_SEL_WIDTH_P_FC (3+1) /* port uses 3 bits, FCoE one bit */
-#define FILTER_SEL_WIDTH_VIN_P_FC \
- (6 + 7 + FILTER_SEL_WIDTH_P_FC) /* 6 bits are unused, VF uses 7 bits*/
-#define FILTER_SEL_WIDTH_TAG_P_FC \
- (3 + FILTER_SEL_WIDTH_VIN_P_FC) /* PF uses 3 bits */
-#define FILTER_SEL_WIDTH_VLD_TAG_P_FC (1 + FILTER_SEL_WIDTH_TAG_P_FC)
+/*
+ * c4iw_form_pm_msg - Form a port mapper message with mapping info
+ */
+static void c4iw_form_pm_msg(struct c4iw_ep *ep,
+ struct iwpm_sa_data *pm_msg)
+{
+ memcpy(&pm_msg->loc_addr, &ep->com.local_addr,
+ sizeof(ep->com.local_addr));
+ memcpy(&pm_msg->rem_addr, &ep->com.remote_addr,
+ sizeof(ep->com.remote_addr));
+}
-static unsigned int select_ntuple(struct c4iw_dev *dev, struct dst_entry *dst,
- struct l2t_entry *l2t)
+/*
+ * c4iw_form_reg_msg - Form a port mapper message with dev info
+ */
+static void c4iw_form_reg_msg(struct c4iw_dev *dev,
+ struct iwpm_dev_data *pm_msg)
{
- unsigned int ntuple = 0;
- u32 viid;
+ memcpy(pm_msg->dev_name, dev->ibdev.name, IWPM_DEVNAME_SIZE);
+ memcpy(pm_msg->if_name, dev->rdev.lldi.ports[0]->name,
+ IWPM_IFNAME_SIZE);
+}
- switch (dev->rdev.lldi.filt_mode) {
+static void c4iw_record_pm_msg(struct c4iw_ep *ep,
+ struct iwpm_sa_data *pm_msg)
+{
+ memcpy(&ep->com.mapped_local_addr, &pm_msg->mapped_loc_addr,
+ sizeof(ep->com.mapped_local_addr));
+ memcpy(&ep->com.mapped_remote_addr, &pm_msg->mapped_rem_addr,
+ sizeof(ep->com.mapped_remote_addr));
+}
- /* default filter mode */
- case HW_TPL_FR_MT_PR_IV_P_FC:
- if (l2t->vlan == VLAN_NONE)
- ntuple |= FILTER_SEL_VLAN_NONE << FILTER_SEL_WIDTH_P_FC;
- else {
- ntuple |= l2t->vlan << FILTER_SEL_WIDTH_P_FC;
- ntuple |= 1 << FILTER_SEL_WIDTH_TAG_P_FC;
- }
- ntuple |= l2t->lport << S_PORT | IPPROTO_TCP <<
- FILTER_SEL_WIDTH_VLD_TAG_P_FC;
- break;
- case HW_TPL_FR_MT_PR_OV_P_FC: {
- viid = cxgb4_port_viid(l2t->neigh->dev);
-
- ntuple |= FW_VIID_VIN_GET(viid) << FILTER_SEL_WIDTH_P_FC;
- ntuple |= FW_VIID_PFN_GET(viid) << FILTER_SEL_WIDTH_VIN_P_FC;
- ntuple |= FW_VIID_VIVLD_GET(viid) << FILTER_SEL_WIDTH_TAG_P_FC;
- ntuple |= l2t->lport << S_PORT | IPPROTO_TCP <<
- FILTER_SEL_WIDTH_VLD_TAG_P_FC;
- break;
- }
- default:
- break;
- }
- return ntuple;
+static void best_mtu(const unsigned short *mtus, unsigned short mtu,
+ unsigned int *idx, int use_ts)
+{
+ unsigned short hdr_size = sizeof(struct iphdr) +
+ sizeof(struct tcphdr) +
+ (use_ts ? 12 : 0);
+ unsigned short data_size = mtu - hdr_size;
+
+ cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx);
}
static int send_connect(struct c4iw_ep *ep)
@@ -586,10 +607,15 @@ static int send_connect(struct c4iw_ep *ep)
int sizev6 = is_t4(ep->com.dev->rdev.lldi.adapter_type) ?
sizeof(struct cpl_act_open_req6) :
sizeof(struct cpl_t5_act_open_req6);
- struct sockaddr_in *la = (struct sockaddr_in *)&ep->com.local_addr;
- struct sockaddr_in *ra = (struct sockaddr_in *)&ep->com.remote_addr;
- struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&ep->com.local_addr;
- struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&ep->com.remote_addr;
+ struct sockaddr_in *la = (struct sockaddr_in *)
+ &ep->com.mapped_local_addr;
+ struct sockaddr_in *ra = (struct sockaddr_in *)
+ &ep->com.mapped_remote_addr;
+ struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)
+ &ep->com.mapped_local_addr;
+ struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)
+ &ep->com.mapped_remote_addr;
+ int win;
wrlen = (ep->com.remote_addr.ss_family == AF_INET) ?
roundup(sizev4, 16) :
@@ -605,8 +631,18 @@ static int send_connect(struct c4iw_ep *ep)
}
set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
- cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx);
+ best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
+ enable_tcp_timestamps);
wscale = compute_wscale(rcv_win);
+
+ /*
+ * Specify the largest window that will fit in opt0. The
+ * remainder will be specified in the rx_data_ack.
+ */
+ win = ep->rcv_win >> 10;
+ if (win > RCV_BUFSIZ_MASK)
+ win = RCV_BUFSIZ_MASK;
+
opt0 = (nocong ? NO_CONG(1) : 0) |
KEEP_ALIVE(1) |
DELACK(1) |
@@ -617,7 +653,7 @@ static int send_connect(struct c4iw_ep *ep)
SMAC_SEL(ep->smac_idx) |
DSCP(ep->tos) |
ULP_MODE(ULP_MODE_TCPDDP) |
- RCV_BUFSIZ(rcv_win>>10);
+ RCV_BUFSIZ(win);
opt2 = RX_CHANNEL(0) |
CCTRL_ECN(enable_ecn) |
RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid);
@@ -627,7 +663,11 @@ static int send_connect(struct c4iw_ep *ep)
opt2 |= SACK_EN(1);
if (wscale && enable_tcp_window_scaling)
opt2 |= WND_SCALE_EN(1);
- t4_set_arp_err_handler(skb, NULL, act_open_req_arp_failure);
+ if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
+ opt2 |= T5_OPT_2_VALID;
+ opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE);
+ }
+ t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure);
if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
if (ep->com.remote_addr.ss_family == AF_INET) {
@@ -641,8 +681,9 @@ static int send_connect(struct c4iw_ep *ep)
req->local_ip = la->sin_addr.s_addr;
req->peer_ip = ra->sin_addr.s_addr;
req->opt0 = cpu_to_be64(opt0);
- req->params = cpu_to_be32(select_ntuple(ep->com.dev,
- ep->dst, ep->l2t));
+ req->params = cpu_to_be32(cxgb4_select_ntuple(
+ ep->com.dev->rdev.lldi.ports[0],
+ ep->l2t));
req->opt2 = cpu_to_be32(opt2);
} else {
req6 = (struct cpl_act_open_req6 *)skb_put(skb, wrlen);
@@ -662,12 +703,19 @@ static int send_connect(struct c4iw_ep *ep)
req6->peer_ip_lo = *((__be64 *)
(ra6->sin6_addr.s6_addr + 8));
req6->opt0 = cpu_to_be64(opt0);
- req6->params = cpu_to_be32(
- select_ntuple(ep->com.dev, ep->dst,
- ep->l2t));
+ req6->params = cpu_to_be32(cxgb4_select_ntuple(
+ ep->com.dev->rdev.lldi.ports[0],
+ ep->l2t));
req6->opt2 = cpu_to_be32(opt2);
}
} else {
+ u32 isn = (prandom_u32() & ~7UL) - 1;
+
+ opt2 |= T5_OPT_2_VALID;
+ opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */
+ if (peer2peer)
+ isn += 4;
+
if (ep->com.remote_addr.ss_family == AF_INET) {
t5_req = (struct cpl_t5_act_open_req *)
skb_put(skb, wrlen);
@@ -681,8 +729,12 @@ static int send_connect(struct c4iw_ep *ep)
t5_req->peer_ip = ra->sin_addr.s_addr;
t5_req->opt0 = cpu_to_be64(opt0);
t5_req->params = cpu_to_be64(V_FILTER_TUPLE(
- select_ntuple(ep->com.dev,
- ep->dst, ep->l2t)));
+ cxgb4_select_ntuple(
+ ep->com.dev->rdev.lldi.ports[0],
+ ep->l2t)));
+ t5_req->rsvd = cpu_to_be32(isn);
+ PDBG("%s snd_isn %u\n", __func__,
+ be32_to_cpu(t5_req->rsvd));
t5_req->opt2 = cpu_to_be32(opt2);
} else {
t5_req6 = (struct cpl_t5_act_open_req6 *)
@@ -703,7 +755,12 @@ static int send_connect(struct c4iw_ep *ep)
(ra6->sin6_addr.s6_addr + 8));
t5_req6->opt0 = cpu_to_be64(opt0);
t5_req6->params = (__force __be64)cpu_to_be32(
- select_ntuple(ep->com.dev, ep->dst, ep->l2t));
+ cxgb4_select_ntuple(
+ ep->com.dev->rdev.lldi.ports[0],
+ ep->l2t));
+ t5_req6->rsvd = cpu_to_be32(isn);
+ PDBG("%s snd_isn %u\n", __func__,
+ be32_to_cpu(t5_req6->rsvd));
t5_req6->opt2 = cpu_to_be32(opt2);
}
}
@@ -799,8 +856,9 @@ static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
ep->mpa_skb = skb;
c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
start_ep_timer(ep);
- state_set(&ep->com, MPA_REQ_SENT);
+ __state_set(&ep->com, MPA_REQ_SENT);
ep->mpa_attr.initiator = 1;
+ ep->snd_seq += mpalen;
return;
}
@@ -880,6 +938,7 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
BUG_ON(ep->mpa_skb);
ep->mpa_skb = skb;
+ ep->snd_seq += mpalen;
return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
}
@@ -963,7 +1022,8 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
skb_get(skb);
t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
ep->mpa_skb = skb;
- state_set(&ep->com, MPA_REP_SENT);
+ __state_set(&ep->com, MPA_REP_SENT);
+ ep->snd_seq += mpalen;
return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
}
@@ -980,6 +1040,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
PDBG("%s ep %p tid %u snd_isn %u rcv_isn %u\n", __func__, ep, tid,
be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn));
+ mutex_lock(&ep->com.mutex);
dst_confirm(ep->dst);
/* setup the hwtid for this connection */
@@ -1003,17 +1064,18 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
send_mpa_req(ep, skb, 1);
else
send_mpa_req(ep, skb, mpa_rev);
-
+ mutex_unlock(&ep->com.mutex);
return 0;
}
-static void close_complete_upcall(struct c4iw_ep *ep)
+static void close_complete_upcall(struct c4iw_ep *ep, int status)
{
struct iw_cm_event event;
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CLOSE;
+ event.status = status;
if (ep->com.cm_id) {
PDBG("close complete delivered ep %p cm_id %p tid %u\n",
ep, ep->com.cm_id, ep->hwtid);
@@ -1027,8 +1089,7 @@ static void close_complete_upcall(struct c4iw_ep *ep)
static int abort_connection(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp)
{
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
- close_complete_upcall(ep);
- state_set(&ep->com, ABORTING);
+ __state_set(&ep->com, ABORTING);
set_bit(ABORT_CONN, &ep->com.history);
return send_abort(ep, skb, gfp);
}
@@ -1106,9 +1167,10 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status)
}
}
-static void connect_request_upcall(struct c4iw_ep *ep)
+static int connect_request_upcall(struct c4iw_ep *ep)
{
struct iw_cm_event event;
+ int ret;
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
memset(&event, 0, sizeof(event));
@@ -1133,15 +1195,14 @@ static void connect_request_upcall(struct c4iw_ep *ep)
event.private_data_len = ep->plen;
event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
}
- if (state_read(&ep->parent_ep->com) != DEAD) {
- c4iw_get_ep(&ep->com);
- ep->parent_ep->com.cm_id->event_handler(
- ep->parent_ep->com.cm_id,
- &event);
- }
+ c4iw_get_ep(&ep->com);
+ ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id,
+ &event);
+ if (ret)
+ c4iw_put_ep(&ep->com);
set_bit(CONNREQ_UPCALL, &ep->com.history);
c4iw_put_ep(&ep->parent_ep->com);
- ep->parent_ep = NULL;
+ return ret;
}
static void established_upcall(struct c4iw_ep *ep)
@@ -1173,6 +1234,14 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
return 0;
}
+ /*
+ * If we couldn't specify the entire rcv window at connection setup
+ * due to the limit in the number of bits in the RCV_BUFSIZ field,
+ * then add the overage in to the credits returned.
+ */
+ if (ep->rcv_win > RCV_BUFSIZ_MASK * 1024)
+ credits += ep->rcv_win - RCV_BUFSIZ_MASK * 1024;
+
req = (struct cpl_rx_data_ack *) skb_put(skb, wrlen);
memset(req, 0, wrlen);
INIT_TP_WR(req, ep->hwtid);
@@ -1186,7 +1255,7 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
return credits;
}
-static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
+static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
{
struct mpa_message *mpa;
struct mpa_v2_conn_params *mpa_v2_params;
@@ -1196,17 +1265,17 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
struct c4iw_qp_attributes attrs;
enum c4iw_qp_attr_mask mask;
int err;
+ int disconnect = 0;
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
/*
- * Stop mpa timer. If it expired, then the state has
- * changed and we bail since ep_timeout already aborted
- * the connection.
+ * Stop mpa timer. If it expired, then
+ * we ignore the MPA reply. process_timeout()
+ * will abort the connection.
*/
- stop_ep_timer(ep);
- if (state_read(&ep->com) != MPA_REQ_SENT)
- return;
+ if (stop_ep_timer(ep))
+ return 0;
/*
* If we get more than the supported amount of private data
@@ -1228,7 +1297,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
* if we don't even have the mpa message, then bail.
*/
if (ep->mpa_pkt_len < sizeof(*mpa))
- return;
+ return 0;
mpa = (struct mpa_message *) ep->mpa_pkt;
/* Validate MPA header. */
@@ -1268,7 +1337,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
* We'll continue process when more data arrives.
*/
if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
- return;
+ return 0;
if (mpa->flags & MPA_REJECT) {
err = -ECONNREFUSED;
@@ -1280,7 +1349,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
* start reply message including private data. And
* the MPA header is valid.
*/
- state_set(&ep->com, FPDU_MODE);
+ __state_set(&ep->com, FPDU_MODE);
ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
ep->mpa_attr.recv_marker_enabled = markers_enabled;
ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
@@ -1370,9 +1439,11 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
attrs.layer_etype = LAYER_MPA | DDP_LLP;
attrs.ecode = MPA_NOMATCH_RTR;
attrs.next_state = C4IW_QP_STATE_TERMINATE;
+ attrs.send_term = 1;
err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
- C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
+ C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
err = -ENOMEM;
+ disconnect = 1;
goto out;
}
@@ -1388,18 +1459,20 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
attrs.layer_etype = LAYER_MPA | DDP_LLP;
attrs.ecode = MPA_INSUFF_IRD;
attrs.next_state = C4IW_QP_STATE_TERMINATE;
+ attrs.send_term = 1;
err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
- C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
+ C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
err = -ENOMEM;
+ disconnect = 1;
goto out;
}
goto out;
err:
- state_set(&ep->com, ABORTING);
+ __state_set(&ep->com, ABORTING);
send_abort(ep, skb, GFP_KERNEL);
out:
connect_reply_upcall(ep, err);
- return;
+ return disconnect;
}
static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
@@ -1410,15 +1483,12 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
- if (state_read(&ep->com) != MPA_REQ_WAIT)
- return;
-
/*
* If we get more than the supported amount of private data
* then we must fail this connection.
*/
if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
- stop_ep_timer(ep);
+ (void)stop_ep_timer(ep);
abort_connection(ep, skb, GFP_KERNEL);
return;
}
@@ -1440,7 +1510,6 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
return;
PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
- stop_ep_timer(ep);
mpa = (struct mpa_message *) ep->mpa_pkt;
/*
@@ -1449,13 +1518,13 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
if (mpa->revision > mpa_rev) {
printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
" Received = %d\n", __func__, mpa_rev, mpa->revision);
- stop_ep_timer(ep);
+ (void)stop_ep_timer(ep);
abort_connection(ep, skb, GFP_KERNEL);
return;
}
if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) {
- stop_ep_timer(ep);
+ (void)stop_ep_timer(ep);
abort_connection(ep, skb, GFP_KERNEL);
return;
}
@@ -1466,7 +1535,7 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
* Fail if there's too much private data.
*/
if (plen > MPA_MAX_PRIVATE_DATA) {
- stop_ep_timer(ep);
+ (void)stop_ep_timer(ep);
abort_connection(ep, skb, GFP_KERNEL);
return;
}
@@ -1475,7 +1544,7 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
* If plen does not account for pkt size
*/
if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
- stop_ep_timer(ep);
+ (void)stop_ep_timer(ep);
abort_connection(ep, skb, GFP_KERNEL);
return;
}
@@ -1532,10 +1601,24 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
ep->mpa_attr.p2p_type);
- state_set(&ep->com, MPA_REQ_RCVD);
-
- /* drive upcall */
- connect_request_upcall(ep);
+ /*
+ * If the endpoint timer already expired, then we ignore
+ * the start request. process_timeout() will abort
+ * the connection.
+ */
+ if (!stop_ep_timer(ep)) {
+ __state_set(&ep->com, MPA_REQ_RCVD);
+
+ /* drive upcall */
+ mutex_lock(&ep->parent_ep->com.mutex);
+ if (ep->parent_ep->com.state != DEAD) {
+ if (connect_request_upcall(ep))
+ abort_connection(ep, skb, GFP_KERNEL);
+ } else {
+ abort_connection(ep, skb, GFP_KERNEL);
+ }
+ mutex_unlock(&ep->parent_ep->com.mutex);
+ }
return;
}
@@ -1547,19 +1630,23 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
unsigned int tid = GET_TID(hdr);
struct tid_info *t = dev->rdev.lldi.tids;
__u8 status = hdr->status;
+ int disconnect = 0;
ep = lookup_tid(t, tid);
+ if (!ep)
+ return 0;
PDBG("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen);
skb_pull(skb, sizeof(*hdr));
skb_trim(skb, dlen);
+ mutex_lock(&ep->com.mutex);
/* update RX credits */
update_rx_credits(ep, dlen);
- switch (state_read(&ep->com)) {
+ switch (ep->com.state) {
case MPA_REQ_SENT:
ep->rcv_seq += dlen;
- process_mpa_reply(ep, skb);
+ disconnect = process_mpa_reply(ep, skb);
break;
case MPA_REQ_WAIT:
ep->rcv_seq += dlen;
@@ -1572,15 +1659,19 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
pr_err("%s Unexpected streaming data." \
" qpid %u ep %p state %d tid %u status %d\n",
__func__, ep->com.qp->wq.sq.qid, ep,
- state_read(&ep->com), ep->hwtid, status);
+ ep->com.state, ep->hwtid, status);
attrs.next_state = C4IW_QP_STATE_TERMINATE;
c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
- C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
+ C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
+ disconnect = 1;
break;
}
default:
break;
}
+ mutex_unlock(&ep->com.mutex);
+ if (disconnect)
+ c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
return 0;
}
@@ -1624,18 +1715,20 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
unsigned int mtu_idx;
int wscale;
struct sockaddr_in *sin;
+ int win;
skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
req = (struct fw_ofld_connection_wr *)__skb_put(skb, sizeof(*req));
memset(req, 0, sizeof(*req));
req->op_compl = htonl(V_WR_OP(FW_OFLD_CONNECTION_WR));
req->len16_pkd = htonl(FW_WR_LEN16(DIV_ROUND_UP(sizeof(*req), 16)));
- req->le.filter = cpu_to_be32(select_ntuple(ep->com.dev, ep->dst,
+ req->le.filter = cpu_to_be32(cxgb4_select_ntuple(
+ ep->com.dev->rdev.lldi.ports[0],
ep->l2t));
- sin = (struct sockaddr_in *)&ep->com.local_addr;
+ sin = (struct sockaddr_in *)&ep->com.mapped_local_addr;
req->le.lport = sin->sin_port;
req->le.u.ipv4.lip = sin->sin_addr.s_addr;
- sin = (struct sockaddr_in *)&ep->com.remote_addr;
+ sin = (struct sockaddr_in *)&ep->com.mapped_remote_addr;
req->le.pport = sin->sin_port;
req->le.u.ipv4.pip = sin->sin_addr.s_addr;
req->tcb.t_state_to_astid =
@@ -1645,8 +1738,18 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
htons(F_FW_OFLD_CONNECTION_WR_CPLRXDATAACK);
req->tcb.tx_max = (__force __be32) jiffies;
req->tcb.rcv_adv = htons(1);
- cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx);
+ best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
+ enable_tcp_timestamps);
wscale = compute_wscale(rcv_win);
+
+ /*
+ * Specify the largest window that will fit in opt0. The
+ * remainder will be specified in the rx_data_ack.
+ */
+ win = ep->rcv_win >> 10;
+ if (win > RCV_BUFSIZ_MASK)
+ win = RCV_BUFSIZ_MASK;
+
req->tcb.opt0 = (__force __be64) (TCAM_BYPASS(1) |
(nocong ? NO_CONG(1) : 0) |
KEEP_ALIVE(1) |
@@ -1658,7 +1761,7 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
SMAC_SEL(ep->smac_idx) |
DSCP(ep->tos) |
ULP_MODE(ULP_MODE_TCPDDP) |
- RCV_BUFSIZ(rcv_win >> 10));
+ RCV_BUFSIZ(win));
req->tcb.opt2 = (__force __be32) (PACE(1) |
TX_QUEUE(ep->com.dev->rdev.lldi.tx_modq[ep->tx_chan]) |
RX_CHANNEL(0) |
@@ -1686,6 +1789,22 @@ static inline int act_open_has_tid(int status)
status != CPL_ERR_ARP_MISS;
}
+/* Returns whether a CPL status conveys negative advice.
+ */
+static int is_neg_adv(unsigned int status)
+{
+ return status == CPL_ERR_RTX_NEG_ADVICE ||
+ status == CPL_ERR_PERSIST_NEG_ADVICE ||
+ status == CPL_ERR_KEEPALV_NEG_ADVICE;
+}
+
+static void set_tcp_window(struct c4iw_ep *ep, struct port_info *pi)
+{
+ ep->snd_win = snd_win;
+ ep->rcv_win = rcv_win;
+ PDBG("%s snd_win %d rcv_win %d\n", __func__, ep->snd_win, ep->rcv_win);
+}
+
#define ACT_OPEN_RETRY_COUNT 2
static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
@@ -1734,6 +1853,7 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
ep->ctrlq_idx = cxgb4_port_idx(pdev);
ep->rss_qid = cdev->rdev.lldi.rxq_ids[
cxgb4_port_idx(pdev) * step];
+ set_tcp_window(ep, (struct port_info *)netdev_priv(pdev));
dev_put(pdev);
} else {
pdev = get_real_dev(n->dev);
@@ -1742,16 +1862,17 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
if (!ep->l2t)
goto out;
ep->mtu = dst_mtu(dst);
- ep->tx_chan = cxgb4_port_chan(n->dev);
- ep->smac_idx = (cxgb4_port_viid(n->dev) & 0x7F) << 1;
+ ep->tx_chan = cxgb4_port_chan(pdev);
+ ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1;
step = cdev->rdev.lldi.ntxq /
cdev->rdev.lldi.nchan;
- ep->txq_idx = cxgb4_port_idx(n->dev) * step;
- ep->ctrlq_idx = cxgb4_port_idx(n->dev);
+ ep->txq_idx = cxgb4_port_idx(pdev) * step;
+ ep->ctrlq_idx = cxgb4_port_idx(pdev);
step = cdev->rdev.lldi.nrxq /
cdev->rdev.lldi.nchan;
ep->rss_qid = cdev->rdev.lldi.rxq_ids[
- cxgb4_port_idx(n->dev) * step];
+ cxgb4_port_idx(pdev) * step];
+ set_tcp_window(ep, (struct port_info *)netdev_priv(pdev));
if (clear_mpa_v1) {
ep->retry_with_mpa_v1 = 0;
@@ -1866,15 +1987,15 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
struct sockaddr_in6 *ra6;
ep = lookup_atid(t, atid);
- la = (struct sockaddr_in *)&ep->com.local_addr;
- ra = (struct sockaddr_in *)&ep->com.remote_addr;
- la6 = (struct sockaddr_in6 *)&ep->com.local_addr;
- ra6 = (struct sockaddr_in6 *)&ep->com.remote_addr;
+ la = (struct sockaddr_in *)&ep->com.mapped_local_addr;
+ ra = (struct sockaddr_in *)&ep->com.mapped_remote_addr;
+ la6 = (struct sockaddr_in6 *)&ep->com.mapped_local_addr;
+ ra6 = (struct sockaddr_in6 *)&ep->com.mapped_remote_addr;
PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid,
status, status2errno(status));
- if (status == CPL_ERR_RTX_NEG_ADVICE) {
+ if (is_neg_adv(status)) {
printk(KERN_WARNING MOD "Connection problems for atid %u\n",
atid);
return 0;
@@ -1982,13 +2103,36 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
u64 opt0;
u32 opt2;
int wscale;
+ struct cpl_t5_pass_accept_rpl *rpl5 = NULL;
+ int win;
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
BUG_ON(skb_cloned(skb));
- skb_trim(skb, sizeof(*rpl));
+
skb_get(skb);
- cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx);
+ rpl = cplhdr(skb);
+ if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
+ skb_trim(skb, roundup(sizeof(*rpl5), 16));
+ rpl5 = (void *)rpl;
+ INIT_TP_WR(rpl5, ep->hwtid);
+ } else {
+ skb_trim(skb, sizeof(*rpl));
+ INIT_TP_WR(rpl, ep->hwtid);
+ }
+ OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
+ ep->hwtid));
+
+ best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
+ enable_tcp_timestamps && req->tcpopt.tstamp);
wscale = compute_wscale(rcv_win);
+
+ /*
+ * Specify the largest window that will fit in opt0. The
+ * remainder will be specified in the rx_data_ack.
+ */
+ win = ep->rcv_win >> 10;
+ if (win > RCV_BUFSIZ_MASK)
+ win = RCV_BUFSIZ_MASK;
opt0 = (nocong ? NO_CONG(1) : 0) |
KEEP_ALIVE(1) |
DELACK(1) |
@@ -1999,7 +2143,7 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
SMAC_SEL(ep->smac_idx) |
DSCP(ep->tos >> 2) |
ULP_MODE(ULP_MODE_TCPDDP) |
- RCV_BUFSIZ(rcv_win>>10);
+ RCV_BUFSIZ(win);
opt2 = RX_CHANNEL(0) |
RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid);
@@ -2018,11 +2162,19 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
if (tcph->ece && tcph->cwr)
opt2 |= CCTRL_ECN(1);
}
+ if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
+ u32 isn = (prandom_u32() & ~7UL) - 1;
+ opt2 |= T5_OPT_2_VALID;
+ opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE);
+ opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */
+ rpl5 = (void *)rpl;
+ memset(&rpl5->iss, 0, roundup(sizeof(*rpl5)-sizeof(*rpl), 16));
+ if (peer2peer)
+ isn += 4;
+ rpl5->iss = cpu_to_be32(isn);
+ PDBG("%s iss %u\n", __func__, be32_to_cpu(rpl5->iss));
+ }
- rpl = cplhdr(skb);
- INIT_TP_WR(rpl, ep->hwtid);
- OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
- ep->hwtid));
rpl->opt0 = cpu_to_be64(opt0);
rpl->opt2 = cpu_to_be32(opt2);
set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
@@ -2037,7 +2189,6 @@ static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb)
PDBG("%s c4iw_dev %p tid %u\n", __func__, dev, hwtid);
BUG_ON(skb_cloned(skb));
skb_trim(skb, sizeof(struct cpl_tid_release));
- skb_get(skb);
release_tid(&dev->rdev, hwtid, skb);
return;
}
@@ -2087,6 +2238,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
int err;
u16 peer_mss = ntohs(req->tcpopt.mss);
int iptype;
+ unsigned short hdrs;
parent_ep = lookup_stid(t, stid);
if (!parent_ep) {
@@ -2144,8 +2296,10 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
goto reject;
}
- if (peer_mss && child_ep->mtu > (peer_mss + 40))
- child_ep->mtu = peer_mss + 40;
+ hdrs = sizeof(struct iphdr) + sizeof(struct tcphdr) +
+ ((enable_tcp_timestamps && req->tcpopt.tstamp) ? 12 : 0);
+ if (peer_mss && child_ep->mtu > (peer_mss + hdrs))
+ child_ep->mtu = peer_mss + hdrs;
state_set(&child_ep->com, CONNECTING);
child_ep->com.dev = dev;
@@ -2279,13 +2433,13 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
disconnect = 0;
break;
case MORIBUND:
- stop_ep_timer(ep);
+ (void)stop_ep_timer(ep);
if (ep->com.cm_id && ep->com.qp) {
attrs.next_state = C4IW_QP_STATE_IDLE;
c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
}
- close_complete_upcall(ep);
+ close_complete_upcall(ep, 0);
__state_set(&ep->com, DEAD);
release = 1;
disconnect = 0;
@@ -2304,15 +2458,6 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
return 0;
}
-/*
- * Returns whether an ABORT_REQ_RSS message is a negative advice.
- */
-static int is_neg_adv_abort(unsigned int status)
-{
- return status == CPL_ERR_RTX_NEG_ADVICE ||
- status == CPL_ERR_PERSIST_NEG_ADVICE;
-}
-
static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
{
struct cpl_abort_req_rss *req = cplhdr(skb);
@@ -2326,7 +2471,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
unsigned int tid = GET_TID(req);
ep = lookup_tid(t, tid);
- if (is_neg_adv_abort(req->status)) {
+ if (is_neg_adv(req->status)) {
PDBG("%s neg_adv_abort ep %p tid %u\n", __func__, ep,
ep->hwtid);
return 0;
@@ -2348,10 +2493,10 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
case CONNECTING:
break;
case MPA_REQ_WAIT:
- stop_ep_timer(ep);
+ (void)stop_ep_timer(ep);
break;
case MPA_REQ_SENT:
- stop_ep_timer(ep);
+ (void)stop_ep_timer(ep);
if (mpa_rev == 1 || (mpa_rev == 2 && ep->tried_with_mpa_v1))
connect_reply_upcall(ep, -ECONNRESET);
else {
@@ -2456,7 +2601,7 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
__state_set(&ep->com, MORIBUND);
break;
case MORIBUND:
- stop_ep_timer(ep);
+ (void)stop_ep_timer(ep);
if ((ep->com.cm_id) && (ep->com.qp)) {
attrs.next_state = C4IW_QP_STATE_IDLE;
c4iw_modify_qp(ep->com.qp->rhp,
@@ -2464,7 +2609,7 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
C4IW_QP_ATTR_NEXT_STATE,
&attrs, 1);
}
- close_complete_upcall(ep);
+ close_complete_upcall(ep, 0);
__state_set(&ep->com, DEAD);
release = 1;
break;
@@ -2539,22 +2684,28 @@ static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb)
int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
{
- int err;
+ int err = 0;
+ int disconnect = 0;
struct c4iw_ep *ep = to_ep(cm_id);
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
- if (state_read(&ep->com) == DEAD) {
+ mutex_lock(&ep->com.mutex);
+ if (ep->com.state == DEAD) {
+ mutex_unlock(&ep->com.mutex);
c4iw_put_ep(&ep->com);
return -ECONNRESET;
}
set_bit(ULP_REJECT, &ep->com.history);
- BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
+ BUG_ON(ep->com.state != MPA_REQ_RCVD);
if (mpa_rev == 0)
abort_connection(ep, NULL, GFP_KERNEL);
else {
err = send_mpa_reject(ep, pdata, pdata_len);
- err = c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
+ disconnect = 1;
}
+ mutex_unlock(&ep->com.mutex);
+ if (disconnect)
+ err = c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
c4iw_put_ep(&ep->com);
return 0;
}
@@ -2569,12 +2720,14 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
struct c4iw_qp *qp = get_qhp(h, conn_param->qpn);
PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
- if (state_read(&ep->com) == DEAD) {
+
+ mutex_lock(&ep->com.mutex);
+ if (ep->com.state == DEAD) {
err = -ECONNRESET;
goto err;
}
- BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
+ BUG_ON(ep->com.state != MPA_REQ_RCVD);
BUG_ON(!qp);
set_bit(ULP_ACCEPT, &ep->com.history);
@@ -2643,14 +2796,16 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (err)
goto err1;
- state_set(&ep->com, FPDU_MODE);
+ __state_set(&ep->com, FPDU_MODE);
established_upcall(ep);
+ mutex_unlock(&ep->com.mutex);
c4iw_put_ep(&ep->com);
return 0;
err1:
ep->com.cm_id = NULL;
cm_id->rem_ref(cm_id);
err:
+ mutex_unlock(&ep->com.mutex);
c4iw_put_ep(&ep->com);
return err;
}
@@ -2721,13 +2876,15 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
struct c4iw_ep *ep;
int err = 0;
- struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
- struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
- struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *)&cm_id->local_addr;
- struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)
- &cm_id->remote_addr;
+ struct sockaddr_in *laddr;
+ struct sockaddr_in *raddr;
+ struct sockaddr_in6 *laddr6;
+ struct sockaddr_in6 *raddr6;
+ struct iwpm_dev_data pm_reg_msg;
+ struct iwpm_sa_data pm_msg;
__u8 *ra;
int iptype;
+ int iwpm_err = 0;
if ((conn_param->ord > c4iw_max_read_depth) ||
(conn_param->ird > c4iw_max_read_depth)) {
@@ -2758,7 +2915,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (!ep->com.qp) {
PDBG("%s qpn 0x%x not found!\n", __func__, conn_param->qpn);
err = -EINVAL;
- goto fail2;
+ goto fail1;
}
ref_qp(ep);
PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
@@ -2771,10 +2928,50 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (ep->atid == -1) {
printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
err = -ENOMEM;
- goto fail2;
+ goto fail1;
}
insert_handle(dev, &dev->atid_idr, ep, ep->atid);
+ memcpy(&ep->com.local_addr, &cm_id->local_addr,
+ sizeof(ep->com.local_addr));
+ memcpy(&ep->com.remote_addr, &cm_id->remote_addr,
+ sizeof(ep->com.remote_addr));
+
+ /* No port mapper available, go with the specified peer information */
+ memcpy(&ep->com.mapped_local_addr, &cm_id->local_addr,
+ sizeof(ep->com.mapped_local_addr));
+ memcpy(&ep->com.mapped_remote_addr, &cm_id->remote_addr,
+ sizeof(ep->com.mapped_remote_addr));
+
+ c4iw_form_reg_msg(dev, &pm_reg_msg);
+ iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_C4IW);
+ if (iwpm_err) {
+ PDBG("%s: Port Mapper reg pid fail (err = %d).\n",
+ __func__, iwpm_err);
+ }
+ if (iwpm_valid_pid() && !iwpm_err) {
+ c4iw_form_pm_msg(ep, &pm_msg);
+ iwpm_err = iwpm_add_and_query_mapping(&pm_msg, RDMA_NL_C4IW);
+ if (iwpm_err)
+ PDBG("%s: Port Mapper query fail (err = %d).\n",
+ __func__, iwpm_err);
+ else
+ c4iw_record_pm_msg(ep, &pm_msg);
+ }
+ if (iwpm_create_mapinfo(&ep->com.local_addr,
+ &ep->com.mapped_local_addr, RDMA_NL_C4IW)) {
+ iwpm_remove_mapping(&ep->com.local_addr, RDMA_NL_C4IW);
+ err = -ENOMEM;
+ goto fail1;
+ }
+ print_addr(&ep->com, __func__, "add_query/create_mapinfo");
+ set_bit(RELEASE_MAPINFO, &ep->com.flags);
+
+ laddr = (struct sockaddr_in *)&ep->com.mapped_local_addr;
+ raddr = (struct sockaddr_in *)&ep->com.mapped_remote_addr;
+ laddr6 = (struct sockaddr_in6 *)&ep->com.mapped_local_addr;
+ raddr6 = (struct sockaddr_in6 *) &ep->com.mapped_remote_addr;
+
if (cm_id->remote_addr.ss_family == AF_INET) {
iptype = 4;
ra = (__u8 *)&raddr->sin_addr;
@@ -2785,7 +2982,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if ((__force int)raddr->sin_addr.s_addr == INADDR_ANY) {
err = pick_local_ipaddrs(dev, cm_id);
if (err)
- goto fail2;
+ goto fail1;
}
/* find a route */
@@ -2805,7 +3002,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) {
err = pick_local_ip6addrs(dev, cm_id);
if (err)
- goto fail2;
+ goto fail1;
}
/* find a route */
@@ -2821,13 +3018,13 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (!ep->dst) {
printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
err = -EHOSTUNREACH;
- goto fail3;
+ goto fail2;
}
err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true);
if (err) {
printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
- goto fail4;
+ goto fail3;
}
PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
@@ -2836,10 +3033,6 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
state_set(&ep->com, CONNECTING);
ep->tos = 0;
- memcpy(&ep->com.local_addr, &cm_id->local_addr,
- sizeof(ep->com.local_addr));
- memcpy(&ep->com.remote_addr, &cm_id->remote_addr,
- sizeof(ep->com.remote_addr));
/* send connect request to rnic */
err = send_connect(ep);
@@ -2847,12 +3040,12 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
goto out;
cxgb4_l2t_release(ep->l2t);
-fail4:
- dst_release(ep->dst);
fail3:
+ dst_release(ep->dst);
+fail2:
remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid);
cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
-fail2:
+fail1:
cm_id->rem_ref(cm_id);
c4iw_put_ep(&ep->com);
out:
@@ -2862,7 +3055,8 @@ out:
static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
{
int err;
- struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&ep->com.local_addr;
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)
+ &ep->com.mapped_local_addr;
c4iw_init_wr_wait(&ep->com.wr_wait);
err = cxgb4_create_server6(ep->com.dev->rdev.lldi.ports[0],
@@ -2883,7 +3077,8 @@ static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
{
int err;
- struct sockaddr_in *sin = (struct sockaddr_in *)&ep->com.local_addr;
+ struct sockaddr_in *sin = (struct sockaddr_in *)
+ &ep->com.mapped_local_addr;
if (dev->rdev.lldi.enable_fw_ofld_conn) {
do {
@@ -2918,6 +3113,9 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
int err = 0;
struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
struct c4iw_listen_ep *ep;
+ struct iwpm_dev_data pm_reg_msg;
+ struct iwpm_sa_data pm_msg;
+ int iwpm_err = 0;
might_sleep();
@@ -2938,7 +3136,8 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
/*
* Allocate a server TID.
*/
- if (dev->rdev.lldi.enable_fw_ofld_conn)
+ if (dev->rdev.lldi.enable_fw_ofld_conn &&
+ ep->com.local_addr.ss_family == AF_INET)
ep->stid = cxgb4_alloc_sftid(dev->rdev.lldi.tids,
cm_id->local_addr.ss_family, ep);
else
@@ -2951,6 +3150,37 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
goto fail2;
}
insert_handle(dev, &dev->stid_idr, ep, ep->stid);
+
+ /* No port mapper available, go with the specified info */
+ memcpy(&ep->com.mapped_local_addr, &cm_id->local_addr,
+ sizeof(ep->com.mapped_local_addr));
+
+ c4iw_form_reg_msg(dev, &pm_reg_msg);
+ iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_C4IW);
+ if (iwpm_err) {
+ PDBG("%s: Port Mapper reg pid fail (err = %d).\n",
+ __func__, iwpm_err);
+ }
+ if (iwpm_valid_pid() && !iwpm_err) {
+ memcpy(&pm_msg.loc_addr, &ep->com.local_addr,
+ sizeof(ep->com.local_addr));
+ iwpm_err = iwpm_add_mapping(&pm_msg, RDMA_NL_C4IW);
+ if (iwpm_err)
+ PDBG("%s: Port Mapper query fail (err = %d).\n",
+ __func__, iwpm_err);
+ else
+ memcpy(&ep->com.mapped_local_addr,
+ &pm_msg.mapped_loc_addr,
+ sizeof(ep->com.mapped_local_addr));
+ }
+ if (iwpm_create_mapinfo(&ep->com.local_addr,
+ &ep->com.mapped_local_addr, RDMA_NL_C4IW)) {
+ err = -ENOMEM;
+ goto fail3;
+ }
+ print_addr(&ep->com, __func__, "add_mapping/create_mapinfo");
+
+ set_bit(RELEASE_MAPINFO, &ep->com.flags);
state_set(&ep->com, LISTEN);
if (ep->com.local_addr.ss_family == AF_INET)
err = create_server4(dev, ep);
@@ -2960,6 +3190,8 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
cm_id->provider_data = ep;
goto out;
}
+
+fail3:
cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
ep->com.local_addr.ss_family);
fail2:
@@ -3018,7 +3250,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
rdev = &ep->com.dev->rdev;
if (c4iw_fatal_error(rdev)) {
fatal = 1;
- close_complete_upcall(ep);
+ close_complete_upcall(ep, -EIO);
ep->com.state = DEAD;
}
switch (ep->com.state) {
@@ -3040,7 +3272,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
close = 1;
if (abrupt) {
- stop_ep_timer(ep);
+ (void)stop_ep_timer(ep);
ep->com.state = ABORTING;
} else
ep->com.state = MORIBUND;
@@ -3060,7 +3292,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
if (close) {
if (abrupt) {
set_bit(EP_DISC_ABORT, &ep->com.history);
- close_complete_upcall(ep);
+ close_complete_upcall(ep, -ECONNRESET);
ret = send_abort(ep, NULL, gfp);
} else {
set_bit(EP_DISC_CLOSE, &ep->com.history);
@@ -3241,6 +3473,7 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb,
struct sk_buff *req_skb;
struct fw_ofld_connection_wr *req;
struct cpl_pass_accept_req *cpl = cplhdr(skb);
+ int ret;
req_skb = alloc_skb(sizeof(struct fw_ofld_connection_wr), GFP_KERNEL);
req = (struct fw_ofld_connection_wr *)__skb_put(req_skb, sizeof(*req));
@@ -3277,7 +3510,13 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb,
req->cookie = (unsigned long)skb;
set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id);
- cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb);
+ ret = cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb);
+ if (ret < 0) {
+ pr_err("%s - cxgb4_ofld_send error %d - dropping\n", __func__,
+ ret);
+ kfree_skb(skb);
+ kfree_skb(req_skb);
+ }
}
/*
@@ -3323,9 +3562,7 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
/*
* Calculate the server tid from filter hit index from cpl_rx_pkt.
*/
- stid = (__force int) cpu_to_be32((__force u32) rss->hash_val)
- - dev->rdev.lldi.tids->sftid_base
- + dev->rdev.lldi.tids->nstids;
+ stid = (__force int) cpu_to_be32((__force u32) rss->hash_val);
lep = (struct c4iw_ep *)lookup_stid(dev->rdev.lldi.tids, stid);
if (!lep) {
@@ -3386,6 +3623,7 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
pi = (struct port_info *)netdev_priv(pdev);
tx_chan = cxgb4_port_chan(pdev);
}
+ neigh_release(neigh);
if (!e) {
pr_err("%s - failed to allocate l2t entry!\n",
__func__);
@@ -3397,7 +3635,9 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
window = (__force u16) htons((__force u16)tcph->window);
/* Calcuate filter portion for LE region. */
- filter = (__force unsigned int) cpu_to_be32(select_ntuple(dev, dst, e));
+ filter = (__force unsigned int) cpu_to_be32(cxgb4_select_ntuple(
+ dev->rdev.lldi.ports[0],
+ e));
/*
* Synthesize the cpl_pass_accept_req. We have everything except the
@@ -3464,15 +3704,26 @@ static void process_timeout(struct c4iw_ep *ep)
&attrs, 1);
}
__state_set(&ep->com, ABORTING);
+ close_complete_upcall(ep, -ETIMEDOUT);
+ break;
+ case ABORTING:
+ case DEAD:
+
+ /*
+ * These states are expected if the ep timed out at the same
+ * time as another thread was calling stop_ep_timer().
+ * So we silently do nothing for these states.
+ */
+ abort = 0;
break;
default:
WARN(1, "%s unexpected state ep %p tid %u state %u\n",
__func__, ep, ep->hwtid, ep->com.state);
abort = 0;
}
- mutex_unlock(&ep->com.mutex);
if (abort)
abort_connection(ep, NULL, GFP_KERNEL);
+ mutex_unlock(&ep->com.mutex);
c4iw_put_ep(&ep->com);
}
@@ -3486,6 +3737,8 @@ static void process_timedout_eps(void)
tmp = timeout_list.next;
list_del(tmp);
+ tmp->next = NULL;
+ tmp->prev = NULL;
spin_unlock_irq(&timeout_lock);
ep = list_entry(tmp, struct c4iw_ep, entry);
process_timeout(ep);
@@ -3502,6 +3755,7 @@ static void process_work(struct work_struct *work)
unsigned int opcode;
int ret;
+ process_timedout_eps();
while ((skb = skb_dequeue(&rxq))) {
rpl = cplhdr(skb);
dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *)));
@@ -3511,8 +3765,8 @@ static void process_work(struct work_struct *work)
ret = work_handlers[opcode](dev, skb);
if (!ret)
kfree_skb(skb);
+ process_timedout_eps();
}
- process_timedout_eps();
}
static DECLARE_WORK(skb_work, process_work);
@@ -3524,8 +3778,13 @@ static void ep_timeout(unsigned long arg)
spin_lock(&timeout_lock);
if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
- list_add_tail(&ep->entry, &timeout_list);
- kickit = 1;
+ /*
+ * Only insert if it is not already on the list.
+ */
+ if (!ep->entry.next) {
+ list_add_tail(&ep->entry, &timeout_list);
+ kickit = 1;
+ }
}
spin_unlock(&timeout_lock);
if (kickit)
@@ -3607,7 +3866,7 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb)
kfree_skb(skb);
return 0;
}
- if (is_neg_adv_abort(req->status)) {
+ if (is_neg_adv(req->status)) {
PDBG("%s neg_adv_abort ep %p tid %u\n", __func__, ep,
ep->hwtid);
kfree_skb(skb);
@@ -3666,7 +3925,7 @@ int __init c4iw_cm_init(void)
return 0;
}
-void __exit c4iw_cm_term(void)
+void c4iw_cm_term(void)
{
WARN_ON(!list_empty(&timeout_list));
flush_workqueue(workq);
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 88de3aa9c5b..c04292c950f 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -134,7 +134,8 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
V_FW_RI_RES_WR_IQANUS(0) |
V_FW_RI_RES_WR_IQANUD(1) |
F_FW_RI_RES_WR_IQANDST |
- V_FW_RI_RES_WR_IQANDSTINDEX(*rdev->lldi.rxq_ids));
+ V_FW_RI_RES_WR_IQANDSTINDEX(
+ rdev->lldi.ciq_ids[cq->vector]));
res->u.cq.iqdroprss_to_iqesize = cpu_to_be16(
F_FW_RI_RES_WR_IQDROPRSS |
V_FW_RI_RES_WR_IQPCIECH(2) |
@@ -235,27 +236,21 @@ int c4iw_flush_sq(struct c4iw_qp *qhp)
struct t4_cq *cq = &chp->cq;
int idx;
struct t4_swsqe *swsqe;
- int error = (qhp->attr.state != C4IW_QP_STATE_CLOSING &&
- qhp->attr.state != C4IW_QP_STATE_IDLE);
if (wq->sq.flush_cidx == -1)
wq->sq.flush_cidx = wq->sq.cidx;
idx = wq->sq.flush_cidx;
BUG_ON(idx >= wq->sq.size);
while (idx != wq->sq.pidx) {
- if (error) {
- swsqe = &wq->sq.sw_sq[idx];
- BUG_ON(swsqe->flushed);
- swsqe->flushed = 1;
- insert_sq_cqe(wq, cq, swsqe);
- if (wq->sq.oldest_read == swsqe) {
- BUG_ON(swsqe->opcode != FW_RI_READ_REQ);
- advance_oldest_read(wq);
- }
- flushed++;
- } else {
- t4_sq_consume(wq);
+ swsqe = &wq->sq.sw_sq[idx];
+ BUG_ON(swsqe->flushed);
+ swsqe->flushed = 1;
+ insert_sq_cqe(wq, cq, swsqe);
+ if (wq->sq.oldest_read == swsqe) {
+ BUG_ON(swsqe->opcode != FW_RI_READ_REQ);
+ advance_oldest_read(wq);
}
+ flushed++;
if (++idx == wq->sq.size)
idx = 0;
}
@@ -365,8 +360,14 @@ void c4iw_flush_hw_cq(struct c4iw_cq *chp)
if (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP) {
- /*
- * drop peer2peer RTR reads.
+ /* If we have reached here because of async
+ * event or other error, and have egress error
+ * then drop
+ */
+ if (CQE_TYPE(hw_cqe) == 1)
+ goto next_cqe;
+
+ /* drop peer2peer RTR reads.
*/
if (CQE_WRID_STAG(hw_cqe) == 1)
goto next_cqe;
@@ -511,8 +512,18 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
*/
if (RQ_TYPE(hw_cqe) && (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP)) {
- /*
- * If this is an unsolicited read response, then the read
+ /* If we have reached here because of async
+ * event or other error, and have egress error
+ * then drop
+ */
+ if (CQE_TYPE(hw_cqe) == 1) {
+ if (CQE_STATUS(hw_cqe))
+ t4_set_wq_in_error(wq);
+ ret = -EAGAIN;
+ goto skip_cqe;
+ }
+
+ /* If this is an unsolicited read response, then the read
* was generated by the kernel driver as part of peer-2-peer
* connection setup. So ignore the completion.
*/
@@ -603,7 +614,7 @@ proc_cqe:
*/
if (SQ_TYPE(hw_cqe)) {
int idx = CQE_WRID_SQ_IDX(hw_cqe);
- BUG_ON(idx > wq->sq.size);
+ BUG_ON(idx >= wq->sq.size);
/*
* Account for any unsignaled completions completed by
@@ -617,7 +628,7 @@ proc_cqe:
wq->sq.in_use -= wq->sq.size + idx - wq->sq.cidx;
else
wq->sq.in_use -= idx - wq->sq.cidx;
- BUG_ON(wq->sq.in_use < 0 && wq->sq.in_use < wq->sq.size);
+ BUG_ON(wq->sq.in_use <= 0 && wq->sq.in_use >= wq->sq.size);
wq->sq.cidx = (uint16_t)idx;
PDBG("%s completing sq idx %u\n", __func__, wq->sq.cidx);
@@ -662,7 +673,7 @@ skip_cqe:
static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
{
struct c4iw_qp *qhp = NULL;
- struct t4_cqe cqe = {0, 0}, *rd_cqe;
+ struct t4_cqe uninitialized_var(cqe), *rd_cqe;
struct t4_wq *wq;
u32 credit = 0;
u8 cqe_flushed;
@@ -860,6 +871,9 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,
rhp = to_c4iw_dev(ibdev);
+ if (vector >= rhp->rdev.lldi.nciq)
+ return ERR_PTR(-EINVAL);
+
chp = kzalloc(sizeof(*chp), GFP_KERNEL);
if (!chp)
return ERR_PTR(-ENOMEM);
@@ -881,7 +895,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,
/*
* Make actual HW queue 2x to avoid cdix_inc overflows.
*/
- hwentries = entries * 2;
+ hwentries = min(entries * 2, T4_MAX_IQ_SIZE);
/*
* Make HW queue at least 64 entries so GTS updates aren't too
@@ -905,6 +919,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,
}
chp->cq.size = hwentries;
chp->cq.memsize = memsize;
+ chp->cq.vector = vector;
ret = create_cq(&rhp->rdev, &chp->cq,
ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
@@ -940,7 +955,8 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,
uresp.gts_key = ucontext->key;
ucontext->key += PAGE_SIZE;
spin_unlock(&ucontext->mmap_lock);
- ret = ib_copy_to_udata(udata, &uresp, sizeof uresp);
+ ret = ib_copy_to_udata(udata, &uresp,
+ sizeof(uresp) - sizeof(uresp.reserved));
if (ret)
goto err5;
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 33d2cc6ab56..7db82b24302 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -64,6 +64,10 @@ struct uld_ctx {
static LIST_HEAD(uld_ctx_list);
static DEFINE_MUTEX(dev_mutex);
+#define DB_FC_RESUME_SIZE 64
+#define DB_FC_RESUME_DELAY 1
+#define DB_FC_DRAIN_THRESH 0
+
static struct dentry *c4iw_debugfs_root;
struct c4iw_debugfs_data {
@@ -73,6 +77,16 @@ struct c4iw_debugfs_data {
int pos;
};
+/* registered cxgb4 netlink callbacks */
+static struct ibnl_client_cbs c4iw_nl_cb_table[] = {
+ [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb},
+ [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb},
+ [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb},
+ [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb},
+ [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb},
+ [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb}
+};
+
static int count_idrs(int id, void *p, void *data)
{
int *countp = data;
@@ -109,35 +123,49 @@ static int dump_qp(int id, void *p, void *data)
&qp->ep->com.local_addr;
struct sockaddr_in *rsin = (struct sockaddr_in *)
&qp->ep->com.remote_addr;
+ struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
+ &qp->ep->com.mapped_local_addr;
+ struct sockaddr_in *mapped_rsin = (struct sockaddr_in *)
+ &qp->ep->com.mapped_remote_addr;
cc = snprintf(qpd->buf + qpd->pos, space,
"rc qp sq id %u rq id %u state %u "
"onchip %u ep tid %u state %u "
- "%pI4:%u->%pI4:%u\n",
+ "%pI4:%u/%u->%pI4:%u/%u\n",
qp->wq.sq.qid, qp->wq.rq.qid,
(int)qp->attr.state,
qp->wq.sq.flags & T4_SQ_ONCHIP,
qp->ep->hwtid, (int)qp->ep->com.state,
&lsin->sin_addr, ntohs(lsin->sin_port),
- &rsin->sin_addr, ntohs(rsin->sin_port));
+ ntohs(mapped_lsin->sin_port),
+ &rsin->sin_addr, ntohs(rsin->sin_port),
+ ntohs(mapped_rsin->sin_port));
} else {
struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
&qp->ep->com.local_addr;
struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *)
&qp->ep->com.remote_addr;
+ struct sockaddr_in6 *mapped_lsin6 =
+ (struct sockaddr_in6 *)
+ &qp->ep->com.mapped_local_addr;
+ struct sockaddr_in6 *mapped_rsin6 =
+ (struct sockaddr_in6 *)
+ &qp->ep->com.mapped_remote_addr;
cc = snprintf(qpd->buf + qpd->pos, space,
"rc qp sq id %u rq id %u state %u "
"onchip %u ep tid %u state %u "
- "%pI6:%u->%pI6:%u\n",
+ "%pI6:%u/%u->%pI6:%u/%u\n",
qp->wq.sq.qid, qp->wq.rq.qid,
(int)qp->attr.state,
qp->wq.sq.flags & T4_SQ_ONCHIP,
qp->ep->hwtid, (int)qp->ep->com.state,
&lsin6->sin6_addr,
ntohs(lsin6->sin6_port),
+ ntohs(mapped_lsin6->sin6_port),
&rsin6->sin6_addr,
- ntohs(rsin6->sin6_port));
+ ntohs(rsin6->sin6_port),
+ ntohs(mapped_rsin6->sin6_port));
}
} else
cc = snprintf(qpd->buf + qpd->pos, space,
@@ -282,7 +310,7 @@ static const struct file_operations stag_debugfs_fops = {
.llseek = default_llseek,
};
-static char *db_state_str[] = {"NORMAL", "FLOW_CONTROL", "RECOVERY"};
+static char *db_state_str[] = {"NORMAL", "FLOW_CONTROL", "RECOVERY", "STOPPED"};
static int stats_show(struct seq_file *seq, void *v)
{
@@ -311,9 +339,10 @@ static int stats_show(struct seq_file *seq, void *v)
seq_printf(seq, " DB FULL: %10llu\n", dev->rdev.stats.db_full);
seq_printf(seq, " DB EMPTY: %10llu\n", dev->rdev.stats.db_empty);
seq_printf(seq, " DB DROP: %10llu\n", dev->rdev.stats.db_drop);
- seq_printf(seq, " DB State: %s Transitions %llu\n",
+ seq_printf(seq, " DB State: %s Transitions %llu FC Interruptions %llu\n",
db_state_str[dev->db_state],
- dev->rdev.stats.db_state_transitions);
+ dev->rdev.stats.db_state_transitions,
+ dev->rdev.stats.db_fc_interruptions);
seq_printf(seq, "TCAM_FULL: %10llu\n", dev->rdev.stats.tcam_full);
seq_printf(seq, "ACT_OFLD_CONN_FAILS: %10llu\n",
dev->rdev.stats.act_ofld_conn_fails);
@@ -381,31 +410,43 @@ static int dump_ep(int id, void *p, void *data)
&ep->com.local_addr;
struct sockaddr_in *rsin = (struct sockaddr_in *)
&ep->com.remote_addr;
+ struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
+ &ep->com.mapped_local_addr;
+ struct sockaddr_in *mapped_rsin = (struct sockaddr_in *)
+ &ep->com.mapped_remote_addr;
cc = snprintf(epd->buf + epd->pos, space,
"ep %p cm_id %p qp %p state %d flags 0x%lx "
"history 0x%lx hwtid %d atid %d "
- "%pI4:%d <-> %pI4:%d\n",
+ "%pI4:%d/%d <-> %pI4:%d/%d\n",
ep, ep->com.cm_id, ep->com.qp,
(int)ep->com.state, ep->com.flags,
ep->com.history, ep->hwtid, ep->atid,
&lsin->sin_addr, ntohs(lsin->sin_port),
- &rsin->sin_addr, ntohs(rsin->sin_port));
+ ntohs(mapped_lsin->sin_port),
+ &rsin->sin_addr, ntohs(rsin->sin_port),
+ ntohs(mapped_rsin->sin_port));
} else {
struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
&ep->com.local_addr;
struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *)
&ep->com.remote_addr;
+ struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *)
+ &ep->com.mapped_local_addr;
+ struct sockaddr_in6 *mapped_rsin6 = (struct sockaddr_in6 *)
+ &ep->com.mapped_remote_addr;
cc = snprintf(epd->buf + epd->pos, space,
"ep %p cm_id %p qp %p state %d flags 0x%lx "
"history 0x%lx hwtid %d atid %d "
- "%pI6:%d <-> %pI6:%d\n",
+ "%pI6:%d/%d <-> %pI6:%d/%d\n",
ep, ep->com.cm_id, ep->com.qp,
(int)ep->com.state, ep->com.flags,
ep->com.history, ep->hwtid, ep->atid,
&lsin6->sin6_addr, ntohs(lsin6->sin6_port),
- &rsin6->sin6_addr, ntohs(rsin6->sin6_port));
+ ntohs(mapped_lsin6->sin6_port),
+ &rsin6->sin6_addr, ntohs(rsin6->sin6_port),
+ ntohs(mapped_rsin6->sin6_port));
}
if (cc < space)
epd->pos += cc;
@@ -426,23 +467,29 @@ static int dump_listen_ep(int id, void *p, void *data)
if (ep->com.local_addr.ss_family == AF_INET) {
struct sockaddr_in *lsin = (struct sockaddr_in *)
&ep->com.local_addr;
+ struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
+ &ep->com.mapped_local_addr;
cc = snprintf(epd->buf + epd->pos, space,
"ep %p cm_id %p state %d flags 0x%lx stid %d "
- "backlog %d %pI4:%d\n",
+ "backlog %d %pI4:%d/%d\n",
ep, ep->com.cm_id, (int)ep->com.state,
ep->com.flags, ep->stid, ep->backlog,
- &lsin->sin_addr, ntohs(lsin->sin_port));
+ &lsin->sin_addr, ntohs(lsin->sin_port),
+ ntohs(mapped_lsin->sin_port));
} else {
struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
&ep->com.local_addr;
+ struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *)
+ &ep->com.mapped_local_addr;
cc = snprintf(epd->buf + epd->pos, space,
"ep %p cm_id %p state %d flags 0x%lx stid %d "
- "backlog %d %pI6:%d\n",
+ "backlog %d %pI6:%d/%d\n",
ep, ep->com.cm_id, (int)ep->com.state,
ep->com.flags, ep->stid, ep->backlog,
- &lsin6->sin6_addr, ntohs(lsin6->sin6_port));
+ &lsin6->sin6_addr, ntohs(lsin6->sin6_port),
+ ntohs(mapped_lsin6->sin6_port));
}
if (cc < space)
epd->pos += cc;
@@ -602,10 +649,10 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
rdev->lldi.vr->qp.size,
rdev->lldi.vr->cq.start,
rdev->lldi.vr->cq.size);
- PDBG("udb len 0x%x udb base %p db_reg %p gts_reg %p qpshift %lu "
+ PDBG("udb len 0x%x udb base %llx db_reg %p gts_reg %p qpshift %lu "
"qpmask 0x%x cqshift %lu cqmask 0x%x\n",
(unsigned)pci_resource_len(rdev->lldi.pdev, 2),
- (void *)(unsigned long)pci_resource_start(rdev->lldi.pdev, 2),
+ (u64)pci_resource_start(rdev->lldi.pdev, 2),
rdev->lldi.db_reg,
rdev->lldi.gts_reg,
rdev->qpshift, rdev->qpmask,
@@ -643,6 +690,13 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
printk(KERN_ERR MOD "error %d initializing ocqp pool\n", err);
goto err4;
}
+ rdev->status_page = (struct t4_dev_status_page *)
+ __get_free_page(GFP_KERNEL);
+ if (!rdev->status_page) {
+ pr_err(MOD "error allocating status page\n");
+ goto err4;
+ }
+ rdev->status_page->db_off = 0;
return 0;
err4:
c4iw_rqtpool_destroy(rdev);
@@ -656,6 +710,7 @@ err1:
static void c4iw_rdev_close(struct c4iw_rdev *rdev)
{
+ free_page((unsigned long)rdev->status_page);
c4iw_pblpool_destroy(rdev);
c4iw_rqtpool_destroy(rdev);
c4iw_destroy_resource(&rdev->resource);
@@ -670,7 +725,10 @@ static void c4iw_dealloc(struct uld_ctx *ctx)
idr_destroy(&ctx->dev->hwtid_idr);
idr_destroy(&ctx->dev->stid_idr);
idr_destroy(&ctx->dev->atid_idr);
- iounmap(ctx->dev->rdev.oc_mw_kva);
+ if (ctx->dev->rdev.bar2_kva)
+ iounmap(ctx->dev->rdev.bar2_kva);
+ if (ctx->dev->rdev.oc_mw_kva)
+ iounmap(ctx->dev->rdev.oc_mw_kva);
ib_dealloc_device(&ctx->dev->ibdev);
ctx->dev = NULL;
}
@@ -703,18 +761,6 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
pr_info("%s: On-Chip Queues not supported on this device.\n",
pci_name(infop->pdev));
- if (!is_t4(infop->adapter_type)) {
- if (!allow_db_fc_on_t5) {
- db_fc_threshold = 100000;
- pr_info("DB Flow Control Disabled.\n");
- }
-
- if (!allow_db_coalescing_on_t5) {
- db_coalescing_threshold = -1;
- pr_info("DB Coalescing Disabled.\n");
- }
- }
-
devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp));
if (!devp) {
printk(KERN_ERR MOD "Cannot allocate ib device\n");
@@ -722,11 +768,33 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
}
devp->rdev.lldi = *infop;
- devp->rdev.oc_mw_pa = pci_resource_start(devp->rdev.lldi.pdev, 2) +
- (pci_resource_len(devp->rdev.lldi.pdev, 2) -
- roundup_pow_of_two(devp->rdev.lldi.vr->ocq.size));
- devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa,
- devp->rdev.lldi.vr->ocq.size);
+ /*
+ * For T5 devices, we map all of BAR2 with WC.
+ * For T4 devices with onchip qp mem, we map only that part
+ * of BAR2 with WC.
+ */
+ devp->rdev.bar2_pa = pci_resource_start(devp->rdev.lldi.pdev, 2);
+ if (is_t5(devp->rdev.lldi.adapter_type)) {
+ devp->rdev.bar2_kva = ioremap_wc(devp->rdev.bar2_pa,
+ pci_resource_len(devp->rdev.lldi.pdev, 2));
+ if (!devp->rdev.bar2_kva) {
+ pr_err(MOD "Unable to ioremap BAR2\n");
+ ib_dealloc_device(&devp->ibdev);
+ return ERR_PTR(-EINVAL);
+ }
+ } else if (ocqp_supported(infop)) {
+ devp->rdev.oc_mw_pa =
+ pci_resource_start(devp->rdev.lldi.pdev, 2) +
+ pci_resource_len(devp->rdev.lldi.pdev, 2) -
+ roundup_pow_of_two(devp->rdev.lldi.vr->ocq.size);
+ devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa,
+ devp->rdev.lldi.vr->ocq.size);
+ if (!devp->rdev.oc_mw_kva) {
+ pr_err(MOD "Unable to ioremap onchip mem\n");
+ ib_dealloc_device(&devp->ibdev);
+ return ERR_PTR(-EINVAL);
+ }
+ }
PDBG(KERN_INFO MOD "ocq memory: "
"hw_start 0x%x size %u mw_pa 0x%lx mw_kva %p\n",
@@ -749,6 +817,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
spin_lock_init(&devp->lock);
mutex_init(&devp->rdev.stats.lock);
mutex_init(&devp->db_mutex);
+ INIT_LIST_HEAD(&devp->db_fc_list);
if (c4iw_debugfs_root) {
devp->debugfs_root = debugfs_create_dir(
@@ -756,6 +825,8 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
c4iw_debugfs_root);
setup_debugfs(devp);
}
+
+
return devp;
}
@@ -897,11 +968,13 @@ static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp,
}
opcode = *(u8 *)rsp;
- if (c4iw_handlers[opcode])
+ if (c4iw_handlers[opcode]) {
c4iw_handlers[opcode](dev, skb);
- else
+ } else {
pr_info("%s no handler opcode 0x%x...\n", __func__,
opcode);
+ kfree_skb(skb);
+ }
return 0;
nomem:
@@ -977,13 +1050,16 @@ static int disable_qp_db(int id, void *p, void *data)
static void stop_queues(struct uld_ctx *ctx)
{
- spin_lock_irq(&ctx->dev->lock);
- if (ctx->dev->db_state == NORMAL) {
- ctx->dev->rdev.stats.db_state_transitions++;
- ctx->dev->db_state = FLOW_CONTROL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ctx->dev->lock, flags);
+ ctx->dev->rdev.stats.db_state_transitions++;
+ ctx->dev->db_state = STOPPED;
+ if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED)
idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL);
- }
- spin_unlock_irq(&ctx->dev->lock);
+ else
+ ctx->dev->rdev.status_page->db_off = 1;
+ spin_unlock_irqrestore(&ctx->dev->lock, flags);
}
static int enable_qp_db(int id, void *p, void *data)
@@ -994,15 +1070,72 @@ static int enable_qp_db(int id, void *p, void *data)
return 0;
}
+static void resume_rc_qp(struct c4iw_qp *qp)
+{
+ spin_lock(&qp->lock);
+ t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc,
+ is_t5(qp->rhp->rdev.lldi.adapter_type), NULL);
+ qp->wq.sq.wq_pidx_inc = 0;
+ t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc,
+ is_t5(qp->rhp->rdev.lldi.adapter_type), NULL);
+ qp->wq.rq.wq_pidx_inc = 0;
+ spin_unlock(&qp->lock);
+}
+
+static void resume_a_chunk(struct uld_ctx *ctx)
+{
+ int i;
+ struct c4iw_qp *qp;
+
+ for (i = 0; i < DB_FC_RESUME_SIZE; i++) {
+ qp = list_first_entry(&ctx->dev->db_fc_list, struct c4iw_qp,
+ db_fc_entry);
+ list_del_init(&qp->db_fc_entry);
+ resume_rc_qp(qp);
+ if (list_empty(&ctx->dev->db_fc_list))
+ break;
+ }
+}
+
static void resume_queues(struct uld_ctx *ctx)
{
spin_lock_irq(&ctx->dev->lock);
- if (ctx->dev->qpcnt <= db_fc_threshold &&
- ctx->dev->db_state == FLOW_CONTROL) {
- ctx->dev->db_state = NORMAL;
- ctx->dev->rdev.stats.db_state_transitions++;
- idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL);
+ if (ctx->dev->db_state != STOPPED)
+ goto out;
+ ctx->dev->db_state = FLOW_CONTROL;
+ while (1) {
+ if (list_empty(&ctx->dev->db_fc_list)) {
+ WARN_ON(ctx->dev->db_state != FLOW_CONTROL);
+ ctx->dev->db_state = NORMAL;
+ ctx->dev->rdev.stats.db_state_transitions++;
+ if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED) {
+ idr_for_each(&ctx->dev->qpidr, enable_qp_db,
+ NULL);
+ } else {
+ ctx->dev->rdev.status_page->db_off = 0;
+ }
+ break;
+ } else {
+ if (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1)
+ < (ctx->dev->rdev.lldi.dbfifo_int_thresh <<
+ DB_FC_DRAIN_THRESH)) {
+ resume_a_chunk(ctx);
+ }
+ if (!list_empty(&ctx->dev->db_fc_list)) {
+ spin_unlock_irq(&ctx->dev->lock);
+ if (DB_FC_RESUME_DELAY) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(DB_FC_RESUME_DELAY);
+ }
+ spin_lock_irq(&ctx->dev->lock);
+ if (ctx->dev->db_state != FLOW_CONTROL)
+ break;
+ }
+ }
}
+out:
+ if (ctx->dev->db_state != NORMAL)
+ ctx->dev->rdev.stats.db_fc_interruptions++;
spin_unlock_irq(&ctx->dev->lock);
}
@@ -1028,12 +1161,12 @@ static int count_qps(int id, void *p, void *data)
return 0;
}
-static void deref_qps(struct qp_list qp_list)
+static void deref_qps(struct qp_list *qp_list)
{
int idx;
- for (idx = 0; idx < qp_list.idx; idx++)
- c4iw_qp_rem_ref(&qp_list.qps[idx]->ibqp);
+ for (idx = 0; idx < qp_list->idx; idx++)
+ c4iw_qp_rem_ref(&qp_list->qps[idx]->ibqp);
}
static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list)
@@ -1044,17 +1177,22 @@ static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list)
for (idx = 0; idx < qp_list->idx; idx++) {
struct c4iw_qp *qp = qp_list->qps[idx];
+ spin_lock_irq(&qp->rhp->lock);
+ spin_lock(&qp->lock);
ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
qp->wq.sq.qid,
t4_sq_host_wq_pidx(&qp->wq),
t4_sq_wq_size(&qp->wq));
if (ret) {
- printk(KERN_ERR MOD "%s: Fatal error - "
+ pr_err(KERN_ERR MOD "%s: Fatal error - "
"DB overflow recovery failed - "
"error syncing SQ qid %u\n",
pci_name(ctx->lldi.pdev), qp->wq.sq.qid);
+ spin_unlock(&qp->lock);
+ spin_unlock_irq(&qp->rhp->lock);
return;
}
+ qp->wq.sq.wq_pidx_inc = 0;
ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
qp->wq.rq.qid,
@@ -1062,12 +1200,17 @@ static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list)
t4_rq_wq_size(&qp->wq));
if (ret) {
- printk(KERN_ERR MOD "%s: Fatal error - "
+ pr_err(KERN_ERR MOD "%s: Fatal error - "
"DB overflow recovery failed - "
"error syncing RQ qid %u\n",
pci_name(ctx->lldi.pdev), qp->wq.rq.qid);
+ spin_unlock(&qp->lock);
+ spin_unlock_irq(&qp->rhp->lock);
return;
}
+ qp->wq.rq.wq_pidx_inc = 0;
+ spin_unlock(&qp->lock);
+ spin_unlock_irq(&qp->rhp->lock);
/* Wait for the dbfifo to drain */
while (cxgb4_dbfifo_count(qp->rhp->rdev.lldi.ports[0], 1) > 0) {
@@ -1083,36 +1226,22 @@ static void recover_queues(struct uld_ctx *ctx)
struct qp_list qp_list;
int ret;
- /* lock out kernel db ringers */
- mutex_lock(&ctx->dev->db_mutex);
-
- /* put all queues in to recovery mode */
- spin_lock_irq(&ctx->dev->lock);
- ctx->dev->db_state = RECOVERY;
- ctx->dev->rdev.stats.db_state_transitions++;
- idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL);
- spin_unlock_irq(&ctx->dev->lock);
-
/* slow everybody down */
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(usecs_to_jiffies(1000));
- /* Wait for the dbfifo to completely drain. */
- while (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) > 0) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(usecs_to_jiffies(10));
- }
-
/* flush the SGE contexts */
ret = cxgb4_flush_eq_cache(ctx->dev->rdev.lldi.ports[0]);
if (ret) {
printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
pci_name(ctx->lldi.pdev));
- goto out;
+ return;
}
/* Count active queues so we can build a list of queues to recover */
spin_lock_irq(&ctx->dev->lock);
+ WARN_ON(ctx->dev->db_state != STOPPED);
+ ctx->dev->db_state = RECOVERY;
idr_for_each(&ctx->dev->qpidr, count_qps, &count);
qp_list.qps = kzalloc(count * sizeof *qp_list.qps, GFP_ATOMIC);
@@ -1120,7 +1249,7 @@ static void recover_queues(struct uld_ctx *ctx)
printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
pci_name(ctx->lldi.pdev));
spin_unlock_irq(&ctx->dev->lock);
- goto out;
+ return;
}
qp_list.idx = 0;
@@ -1133,29 +1262,13 @@ static void recover_queues(struct uld_ctx *ctx)
recover_lost_dbs(ctx, &qp_list);
/* we're almost done! deref the qps and clean up */
- deref_qps(qp_list);
+ deref_qps(&qp_list);
kfree(qp_list.qps);
- /* Wait for the dbfifo to completely drain again */
- while (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) > 0) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(usecs_to_jiffies(10));
- }
-
- /* resume the queues */
spin_lock_irq(&ctx->dev->lock);
- if (ctx->dev->qpcnt > db_fc_threshold)
- ctx->dev->db_state = FLOW_CONTROL;
- else {
- ctx->dev->db_state = NORMAL;
- idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL);
- }
- ctx->dev->rdev.stats.db_state_transitions++;
+ WARN_ON(ctx->dev->db_state != RECOVERY);
+ ctx->dev->db_state = STOPPED;
spin_unlock_irq(&ctx->dev->lock);
-
-out:
- /* start up kernel db ringers again */
- mutex_unlock(&ctx->dev->db_mutex);
}
static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
@@ -1165,9 +1278,7 @@ static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
switch (control) {
case CXGB4_CONTROL_DB_FULL:
stop_queues(ctx);
- mutex_lock(&ctx->dev->rdev.stats.lock);
ctx->dev->rdev.stats.db_full++;
- mutex_unlock(&ctx->dev->rdev.stats.lock);
break;
case CXGB4_CONTROL_DB_EMPTY:
resume_queues(ctx);
@@ -1210,6 +1321,20 @@ static int __init c4iw_init_module(void)
printk(KERN_WARNING MOD
"could not create debugfs entry, continuing\n");
+ if (ibnl_add_client(RDMA_NL_C4IW, RDMA_NL_IWPM_NUM_OPS,
+ c4iw_nl_cb_table))
+ pr_err("%s[%u]: Failed to add netlink callback\n"
+ , __func__, __LINE__);
+
+ err = iwpm_init(RDMA_NL_C4IW);
+ if (err) {
+ pr_err("port mapper initialization failed with %d\n", err);
+ ibnl_remove_client(RDMA_NL_C4IW);
+ c4iw_cm_term();
+ debugfs_remove_recursive(c4iw_debugfs_root);
+ return err;
+ }
+
cxgb4_register_uld(CXGB4_ULD_RDMA, &c4iw_uld_info);
return 0;
@@ -1227,6 +1352,8 @@ static void __exit c4iw_exit_module(void)
}
mutex_unlock(&dev_mutex);
cxgb4_unregister_uld(CXGB4_ULD_RDMA);
+ iwpm_exit(RDMA_NL_C4IW);
+ ibnl_remove_client(RDMA_NL_C4IW);
c4iw_cm_term();
debugfs_remove_recursive(c4iw_debugfs_root);
}
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 23eaeabab93..361fff7a074 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -52,6 +52,8 @@
#include <rdma/ib_verbs.h>
#include <rdma/iw_cm.h>
+#include <rdma/rdma_netlink.h>
+#include <rdma/iw_portmap.h>
#include "cxgb4.h"
#include "cxgb4_uld.h"
@@ -109,6 +111,7 @@ struct c4iw_dev_ucontext {
enum c4iw_rdev_flags {
T4_FATAL_ERROR = (1<<0),
+ T4_STATUS_PAGE_DISABLED = (1<<1),
};
struct c4iw_stat {
@@ -130,6 +133,7 @@ struct c4iw_stats {
u64 db_empty;
u64 db_drop;
u64 db_state_transitions;
+ u64 db_fc_interruptions;
u64 tcam_full;
u64 act_ofld_conn_fails;
u64 pas_ofld_conn_fails;
@@ -147,9 +151,12 @@ struct c4iw_rdev {
struct gen_pool *ocqp_pool;
u32 flags;
struct cxgb4_lld_info lldi;
+ unsigned long bar2_pa;
+ void __iomem *bar2_kva;
unsigned long oc_mw_pa;
void __iomem *oc_mw_kva;
struct c4iw_stats stats;
+ struct t4_dev_status_page *status_page;
};
static inline int c4iw_fatal_error(struct c4iw_rdev *rdev)
@@ -211,7 +218,8 @@ static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev,
enum db_state {
NORMAL = 0,
FLOW_CONTROL = 1,
- RECOVERY = 2
+ RECOVERY = 2,
+ STOPPED = 3
};
struct c4iw_dev {
@@ -225,10 +233,10 @@ struct c4iw_dev {
struct mutex db_mutex;
struct dentry *debugfs_root;
enum db_state db_state;
- int qpcnt;
struct idr hwtid_idr;
struct idr atid_idr;
struct idr stid_idr;
+ struct list_head db_fc_list;
};
static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev)
@@ -369,6 +377,7 @@ struct c4iw_fr_page_list {
DEFINE_DMA_UNMAP_ADDR(mapping);
dma_addr_t dma_addr;
struct c4iw_dev *dev;
+ int pll_len;
};
static inline struct c4iw_fr_page_list *to_c4iw_fr_page_list(
@@ -428,10 +437,12 @@ struct c4iw_qp_attributes {
u8 ecode;
u16 sq_db_inc;
u16 rq_db_inc;
+ u8 send_term;
};
struct c4iw_qp {
struct ib_qp ibqp;
+ struct list_head db_fc_entry;
struct c4iw_dev *rhp;
struct c4iw_ep *ep;
struct c4iw_qp_attributes attr;
@@ -441,6 +452,7 @@ struct c4iw_qp {
atomic_t refcnt;
wait_queue_head_t wait;
struct timer_list timer;
+ int sq_sig_all;
};
static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp)
@@ -718,6 +730,7 @@ enum c4iw_ep_flags {
CLOSE_SENT = 3,
TIMEOUT = 4,
QP_REFERENCED = 5,
+ RELEASE_MAPINFO = 6,
};
enum c4iw_ep_history {
@@ -754,6 +767,8 @@ struct c4iw_ep_common {
struct mutex mutex;
struct sockaddr_storage local_addr;
struct sockaddr_storage remote_addr;
+ struct sockaddr_storage mapped_local_addr;
+ struct sockaddr_storage mapped_remote_addr;
struct c4iw_wr_wait wr_wait;
unsigned long flags;
unsigned long history;
@@ -795,7 +810,48 @@ struct c4iw_ep {
u8 retry_with_mpa_v1;
u8 tried_with_mpa_v1;
unsigned int retry_count;
-};
+ int snd_win;
+ int rcv_win;
+};
+
+static inline void print_addr(struct c4iw_ep_common *epc, const char *func,
+ const char *msg)
+{
+
+#define SINA(a) (&(((struct sockaddr_in *)(a))->sin_addr.s_addr))
+#define SINP(a) ntohs(((struct sockaddr_in *)(a))->sin_port)
+#define SIN6A(a) (&(((struct sockaddr_in6 *)(a))->sin6_addr))
+#define SIN6P(a) ntohs(((struct sockaddr_in6 *)(a))->sin6_port)
+
+ if (c4iw_debug) {
+ switch (epc->local_addr.ss_family) {
+ case AF_INET:
+ PDBG("%s %s %pI4:%u/%u <-> %pI4:%u/%u\n",
+ func, msg, SINA(&epc->local_addr),
+ SINP(&epc->local_addr),
+ SINP(&epc->mapped_local_addr),
+ SINA(&epc->remote_addr),
+ SINP(&epc->remote_addr),
+ SINP(&epc->mapped_remote_addr));
+ break;
+ case AF_INET6:
+ PDBG("%s %s %pI6:%u/%u <-> %pI6:%u/%u\n",
+ func, msg, SIN6A(&epc->local_addr),
+ SIN6P(&epc->local_addr),
+ SIN6P(&epc->mapped_local_addr),
+ SIN6A(&epc->remote_addr),
+ SIN6P(&epc->remote_addr),
+ SIN6P(&epc->mapped_remote_addr));
+ break;
+ default:
+ break;
+ }
+ }
+#undef SINA
+#undef SINP
+#undef SIN6A
+#undef SIN6P
+}
static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id)
{
@@ -852,7 +908,7 @@ int c4iw_destroy_ctrl_qp(struct c4iw_rdev *rdev);
int c4iw_register_device(struct c4iw_dev *dev);
void c4iw_unregister_device(struct c4iw_dev *dev);
int __init c4iw_cm_init(void);
-void __exit c4iw_cm_term(void);
+void c4iw_cm_term(void);
void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev,
struct c4iw_dev_ucontext *uctx);
void c4iw_init_dev_ucontext(struct c4iw_rdev *rdev,
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 4cb8eb24497..ec7a2988a70 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -37,9 +37,9 @@
#include "iw_cxgb4.h"
-int use_dsgl = 1;
+int use_dsgl = 0;
module_param(use_dsgl, int, 0644);
-MODULE_PARM_DESC(use_dsgl, "Use DSGL for PBL/FastReg (default=1)");
+MODULE_PARM_DESC(use_dsgl, "Use DSGL for PBL/FastReg (default=0)");
#define T4_ULPTX_MIN_IO 32
#define C4IW_MAX_INLINE_SIZE 96
@@ -76,7 +76,7 @@ static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr,
INIT_ULPTX_WR(req, wr_len, 0, 0);
req->wr.wr_hi = cpu_to_be32(FW_WR_OP(FW_ULPTX_WR) |
(wait ? FW_WR_COMPL(1) : 0));
- req->wr.wr_lo = wait ? (__force __be64)&wr_wait : 0;
+ req->wr.wr_lo = wait ? (__force __be64)(unsigned long) &wr_wait : 0L;
req->wr.wr_mid = cpu_to_be32(FW_WR_LEN16(DIV_ROUND_UP(wr_len, 16)));
req->cmd = cpu_to_be32(ULPTX_CMD(ULP_TX_MEM_WRITE));
req->cmd |= cpu_to_be32(V_T5_ULP_MEMIO_ORDER(1));
@@ -173,7 +173,7 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
return ret;
}
-int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, void *data)
+static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, void *data)
{
u32 remain = len;
u32 dmalen;
@@ -259,8 +259,12 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
if ((!reset_tpt_entry) && (*stag == T4_STAG_UNSET)) {
stag_idx = c4iw_get_resource(&rdev->resource.tpt_table);
- if (!stag_idx)
+ if (!stag_idx) {
+ mutex_lock(&rdev->stats.lock);
+ rdev->stats.stag.fail++;
+ mutex_unlock(&rdev->stats.lock);
return -ENOMEM;
+ }
mutex_lock(&rdev->stats.lock);
rdev->stats.stag.cur += 32;
if (rdev->stats.stag.cur > rdev->stats.stag.max)
@@ -678,9 +682,9 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
{
__be64 *pages;
int shift, n, len;
- int i, j, k;
+ int i, k, entry;
int err = 0;
- struct ib_umem_chunk *chunk;
+ struct scatterlist *sg;
struct c4iw_dev *rhp;
struct c4iw_pd *php;
struct c4iw_mr *mhp;
@@ -710,10 +714,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
shift = ffs(mhp->umem->page_size) - 1;
- n = 0;
- list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
- n += chunk->nents;
-
+ n = mhp->umem->nmap;
err = alloc_pbl(mhp, n);
if (err)
goto err;
@@ -726,24 +727,22 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
i = n = 0;
- list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
- for (j = 0; j < chunk->nmap; ++j) {
- len = sg_dma_len(&chunk->page_list[j]) >> shift;
- for (k = 0; k < len; ++k) {
- pages[i++] = cpu_to_be64(sg_dma_address(
- &chunk->page_list[j]) +
- mhp->umem->page_size * k);
- if (i == PAGE_SIZE / sizeof *pages) {
- err = write_pbl(&mhp->rhp->rdev,
- pages,
- mhp->attr.pbl_addr + (n << 3), i);
- if (err)
- goto pbl_done;
- n += i;
- i = 0;
- }
+ for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) {
+ len = sg_dma_len(sg) >> shift;
+ for (k = 0; k < len; ++k) {
+ pages[i++] = cpu_to_be64(sg_dma_address(sg) +
+ mhp->umem->page_size * k);
+ if (i == PAGE_SIZE / sizeof *pages) {
+ err = write_pbl(&mhp->rhp->rdev,
+ pages,
+ mhp->attr.pbl_addr + (n << 3), i);
+ if (err)
+ goto pbl_done;
+ n += i;
+ i = 0;
}
}
+ }
if (i)
err = write_pbl(&mhp->rhp->rdev, pages,
@@ -903,7 +902,11 @@ struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(struct ib_device *device,
dma_unmap_addr_set(c4pl, mapping, dma_addr);
c4pl->dma_addr = dma_addr;
c4pl->dev = dev;
- c4pl->ibpl.max_page_list_len = pll_len;
+ c4pl->pll_len = pll_len;
+
+ PDBG("%s c4pl %p pll_len %u page_list %p dma_addr %pad\n",
+ __func__, c4pl, c4pl->pll_len, c4pl->ibpl.page_list,
+ &c4pl->dma_addr);
return &c4pl->ibpl;
}
@@ -912,8 +915,12 @@ void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list *ibpl)
{
struct c4iw_fr_page_list *c4pl = to_c4iw_fr_page_list(ibpl);
+ PDBG("%s c4pl %p pll_len %u page_list %p dma_addr %pad\n",
+ __func__, c4pl, c4pl->pll_len, c4pl->ibpl.page_list,
+ &c4pl->dma_addr);
+
dma_free_coherent(&c4pl->dev->rdev.lldi.pdev->dev,
- c4pl->ibpl.max_page_list_len,
+ c4pl->pll_len,
c4pl->ibpl.page_list, dma_unmap_addr(c4pl, mapping));
kfree(c4pl);
}
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 7e94c9a656a..b1d305338de 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -106,15 +106,57 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev,
{
struct c4iw_ucontext *context;
struct c4iw_dev *rhp = to_c4iw_dev(ibdev);
+ static int warned;
+ struct c4iw_alloc_ucontext_resp uresp;
+ int ret = 0;
+ struct c4iw_mm_entry *mm = NULL;
PDBG("%s ibdev %p\n", __func__, ibdev);
context = kzalloc(sizeof(*context), GFP_KERNEL);
- if (!context)
- return ERR_PTR(-ENOMEM);
+ if (!context) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
c4iw_init_dev_ucontext(&rhp->rdev, &context->uctx);
INIT_LIST_HEAD(&context->mmaps);
spin_lock_init(&context->mmap_lock);
+
+ if (udata->outlen < sizeof(uresp) - sizeof(uresp.reserved)) {
+ if (!warned++)
+ pr_err(MOD "Warning - downlevel libcxgb4 (non-fatal), device status page disabled.");
+ rhp->rdev.flags |= T4_STATUS_PAGE_DISABLED;
+ } else {
+ mm = kmalloc(sizeof(*mm), GFP_KERNEL);
+ if (!mm) {
+ ret = -ENOMEM;
+ goto err_free;
+ }
+
+ uresp.status_page_size = PAGE_SIZE;
+
+ spin_lock(&context->mmap_lock);
+ uresp.status_page_key = context->key;
+ context->key += PAGE_SIZE;
+ spin_unlock(&context->mmap_lock);
+
+ ret = ib_copy_to_udata(udata, &uresp,
+ sizeof(uresp) - sizeof(uresp.reserved));
+ if (ret)
+ goto err_mm;
+
+ mm->key = uresp.status_page_key;
+ mm->addr = virt_to_phys(rhp->rdev.status_page);
+ mm->len = PAGE_SIZE;
+ insert_mmap(context, mm);
+ }
return &context->ibucontext;
+err_mm:
+ kfree(mm);
+err_free:
+ kfree(context);
+err:
+ return ERR_PTR(ret);
}
static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
@@ -287,7 +329,7 @@ static int c4iw_query_device(struct ib_device *ibdev,
props->max_mr = c4iw_num_stags(&dev->rdev);
props->max_pd = T4_MAX_NUM_PD;
props->local_ca_ack_delay = 0;
- props->max_fast_reg_page_list_len = T4_MAX_FR_DEPTH;
+ props->max_fast_reg_page_list_len = t4_max_fr_depth(use_dsgl);
return 0;
}
@@ -458,7 +500,7 @@ int c4iw_register_device(struct c4iw_dev *dev)
dev->ibdev.node_type = RDMA_NODE_RNIC;
memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC));
dev->ibdev.phys_port_cnt = dev->rdev.lldi.nports;
- dev->ibdev.num_comp_vectors = 1;
+ dev->ibdev.num_comp_vectors = dev->rdev.lldi.nciq;
dev->ibdev.dma_device = &(dev->rdev.lldi.pdev->dev);
dev->ibdev.query_device = c4iw_query_device;
dev->ibdev.query_port = c4iw_query_port;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 582936708e6..086f62f5dc9 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -212,13 +212,23 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
wq->db = rdev->lldi.db_reg;
wq->gts = rdev->lldi.gts_reg;
- if (user) {
- wq->sq.udb = (u64)pci_resource_start(rdev->lldi.pdev, 2) +
- (wq->sq.qid << rdev->qpshift);
- wq->sq.udb &= PAGE_MASK;
- wq->rq.udb = (u64)pci_resource_start(rdev->lldi.pdev, 2) +
- (wq->rq.qid << rdev->qpshift);
- wq->rq.udb &= PAGE_MASK;
+ if (user || is_t5(rdev->lldi.adapter_type)) {
+ u32 off;
+
+ off = (wq->sq.qid << rdev->qpshift) & PAGE_MASK;
+ if (user) {
+ wq->sq.udb = (u64 __iomem *)(rdev->bar2_pa + off);
+ } else {
+ off += 128 * (wq->sq.qid & rdev->qpmask) + 8;
+ wq->sq.udb = (u64 __iomem *)(rdev->bar2_kva + off);
+ }
+ off = (wq->rq.qid << rdev->qpshift) & PAGE_MASK;
+ if (user) {
+ wq->rq.udb = (u64 __iomem *)(rdev->bar2_pa + off);
+ } else {
+ off += 128 * (wq->rq.qid & rdev->qpmask) + 8;
+ wq->rq.udb = (u64 __iomem *)(rdev->bar2_kva + off);
+ }
}
wq->rdev = rdev;
wq->rq.msn = 1;
@@ -299,9 +309,10 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
if (ret)
goto free_dma;
- PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%llx rqudb 0x%llx\n",
+ PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%lx rqudb 0x%lx\n",
__func__, wq->sq.qid, wq->rq.qid, wq->db,
- (unsigned long long)wq->sq.udb, (unsigned long long)wq->rq.udb);
+ (__force unsigned long) wq->sq.udb,
+ (__force unsigned long) wq->rq.udb);
return 0;
free_dma:
@@ -425,6 +436,8 @@ static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe,
default:
return -EINVAL;
}
+ wqe->send.r3 = 0;
+ wqe->send.r4 = 0;
plen = 0;
if (wr->num_sge) {
@@ -555,7 +568,8 @@ static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe,
int pbllen = roundup(wr->wr.fast_reg.page_list_len * sizeof(u64), 32);
int rem;
- if (wr->wr.fast_reg.page_list_len > T4_MAX_FR_DEPTH)
+ if (wr->wr.fast_reg.page_list_len >
+ t4_max_fr_depth(use_dsgl))
return -EINVAL;
wqe->fr.qpbinde_to_dcacpu = 0;
@@ -638,6 +652,48 @@ void c4iw_qp_rem_ref(struct ib_qp *qp)
wake_up(&(to_c4iw_qp(qp)->wait));
}
+static void add_to_fc_list(struct list_head *head, struct list_head *entry)
+{
+ if (list_empty(entry))
+ list_add_tail(entry, head);
+}
+
+static int ring_kernel_sq_db(struct c4iw_qp *qhp, u16 inc)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&qhp->rhp->lock, flags);
+ spin_lock(&qhp->lock);
+ if (qhp->rhp->db_state == NORMAL)
+ t4_ring_sq_db(&qhp->wq, inc,
+ is_t5(qhp->rhp->rdev.lldi.adapter_type), NULL);
+ else {
+ add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry);
+ qhp->wq.sq.wq_pidx_inc += inc;
+ }
+ spin_unlock(&qhp->lock);
+ spin_unlock_irqrestore(&qhp->rhp->lock, flags);
+ return 0;
+}
+
+static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&qhp->rhp->lock, flags);
+ spin_lock(&qhp->lock);
+ if (qhp->rhp->db_state == NORMAL)
+ t4_ring_rq_db(&qhp->wq, inc,
+ is_t5(qhp->rhp->rdev.lldi.adapter_type), NULL);
+ else {
+ add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry);
+ qhp->wq.rq.wq_pidx_inc += inc;
+ }
+ spin_unlock(&qhp->lock);
+ spin_unlock_irqrestore(&qhp->rhp->lock, flags);
+ return 0;
+}
+
int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr)
{
@@ -646,7 +702,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
enum fw_wr_opcodes fw_opcode = 0;
enum fw_ri_wr_flags fw_flags;
struct c4iw_qp *qhp;
- union t4_wr *wqe;
+ union t4_wr *wqe = NULL;
u32 num_wrs;
struct t4_swsqe *swsqe;
unsigned long flag;
@@ -675,7 +731,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
fw_flags = 0;
if (wr->send_flags & IB_SEND_SOLICITED)
fw_flags |= FW_RI_SOLICITED_EVENT_FLAG;
- if (wr->send_flags & IB_SEND_SIGNALED)
+ if (wr->send_flags & IB_SEND_SIGNALED || qhp->sq_sig_all)
fw_flags |= FW_RI_COMPLETION_FLAG;
swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx];
switch (wr->opcode) {
@@ -736,7 +792,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
swsqe->idx = qhp->wq.sq.pidx;
swsqe->complete = 0;
- swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED);
+ swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED) ||
+ qhp->sq_sig_all;
swsqe->flushed = 0;
swsqe->wr_id = wr->wr_id;
@@ -750,9 +807,14 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
t4_sq_produce(&qhp->wq, len16);
idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
}
- if (t4_wq_db_enabled(&qhp->wq))
- t4_ring_sq_db(&qhp->wq, idx);
- spin_unlock_irqrestore(&qhp->lock, flag);
+ if (!qhp->rhp->rdev.status_page->db_off) {
+ t4_ring_sq_db(&qhp->wq, idx,
+ is_t5(qhp->rhp->rdev.lldi.adapter_type), wqe);
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ } else {
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ ring_kernel_sq_db(qhp, idx);
+ }
return err;
}
@@ -761,7 +823,7 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
{
int err = 0;
struct c4iw_qp *qhp;
- union t4_recv_wr *wqe;
+ union t4_recv_wr *wqe = NULL;
u32 num_wrs;
u8 len16 = 0;
unsigned long flag;
@@ -812,9 +874,14 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
wr = wr->next;
num_wrs--;
}
- if (t4_wq_db_enabled(&qhp->wq))
- t4_ring_rq_db(&qhp->wq, idx);
- spin_unlock_irqrestore(&qhp->lock, flag);
+ if (!qhp->rhp->rdev.status_page->db_off) {
+ t4_ring_rq_db(&qhp->wq, idx,
+ is_t5(qhp->rhp->rdev.lldi.adapter_type), wqe);
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ } else {
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ ring_kernel_rq_db(qhp, idx);
+ }
return err;
}
@@ -1200,35 +1267,6 @@ out:
return ret;
}
-/*
- * Called by the library when the qp has user dbs disabled due to
- * a DB_FULL condition. This function will single-thread all user
- * DB rings to avoid overflowing the hw db-fifo.
- */
-static int ring_kernel_db(struct c4iw_qp *qhp, u32 qid, u16 inc)
-{
- int delay = db_delay_usecs;
-
- mutex_lock(&qhp->rhp->db_mutex);
- do {
-
- /*
- * The interrupt threshold is dbfifo_int_thresh << 6. So
- * make sure we don't cross that and generate an interrupt.
- */
- if (cxgb4_dbfifo_count(qhp->rhp->rdev.lldi.ports[0], 1) <
- (qhp->rhp->rdev.lldi.dbfifo_int_thresh << 5)) {
- writel(QID(qid) | PIDX(inc), qhp->wq.db);
- break;
- }
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(usecs_to_jiffies(delay));
- delay = min(delay << 1, 2000);
- } while (1);
- mutex_unlock(&qhp->rhp->db_mutex);
- return 0;
-}
-
int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
enum c4iw_qp_attr_mask mask,
struct c4iw_qp_attributes *attrs,
@@ -1278,11 +1316,11 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
}
if (mask & C4IW_QP_ATTR_SQ_DB) {
- ret = ring_kernel_db(qhp, qhp->wq.sq.qid, attrs->sq_db_inc);
+ ret = ring_kernel_sq_db(qhp, attrs->sq_db_inc);
goto out;
}
if (mask & C4IW_QP_ATTR_RQ_DB) {
- ret = ring_kernel_db(qhp, qhp->wq.rq.qid, attrs->rq_db_inc);
+ ret = ring_kernel_rq_db(qhp, attrs->rq_db_inc);
goto out;
}
@@ -1332,6 +1370,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
switch (attrs->next_state) {
case C4IW_QP_STATE_CLOSING:
BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2);
+ t4_set_wq_in_error(&qhp->wq);
set_state(qhp, C4IW_QP_STATE_CLOSING);
ep = qhp->ep;
if (!internal) {
@@ -1339,30 +1378,30 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
disconnect = 1;
c4iw_get_ep(&qhp->ep->com);
}
- t4_set_wq_in_error(&qhp->wq);
ret = rdma_fini(rhp, qhp, ep);
if (ret)
goto err;
break;
case C4IW_QP_STATE_TERMINATE:
+ t4_set_wq_in_error(&qhp->wq);
set_state(qhp, C4IW_QP_STATE_TERMINATE);
qhp->attr.layer_etype = attrs->layer_etype;
qhp->attr.ecode = attrs->ecode;
- t4_set_wq_in_error(&qhp->wq);
ep = qhp->ep;
- disconnect = 1;
- if (!internal)
+ if (!internal) {
+ c4iw_get_ep(&qhp->ep->com);
terminate = 1;
- else {
+ disconnect = 1;
+ } else {
+ terminate = qhp->attr.send_term;
ret = rdma_fini(rhp, qhp, ep);
if (ret)
goto err;
}
- c4iw_get_ep(&qhp->ep->com);
break;
case C4IW_QP_STATE_ERROR:
- set_state(qhp, C4IW_QP_STATE_ERROR);
t4_set_wq_in_error(&qhp->wq);
+ set_state(qhp, C4IW_QP_STATE_ERROR);
if (!internal) {
abort = 1;
disconnect = 1;
@@ -1465,14 +1504,6 @@ out:
return ret;
}
-static int enable_qp_db(int id, void *p, void *data)
-{
- struct c4iw_qp *qp = p;
-
- t4_enable_wq_db(&qp->wq);
- return 0;
-}
-
int c4iw_destroy_qp(struct ib_qp *ib_qp)
{
struct c4iw_dev *rhp;
@@ -1490,22 +1521,15 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp)
c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
wait_event(qhp->wait, !qhp->ep);
- spin_lock_irq(&rhp->lock);
- remove_handle_nolock(rhp, &rhp->qpidr, qhp->wq.sq.qid);
- rhp->qpcnt--;
- BUG_ON(rhp->qpcnt < 0);
- if (rhp->qpcnt <= db_fc_threshold && rhp->db_state == FLOW_CONTROL) {
- rhp->rdev.stats.db_state_transitions++;
- rhp->db_state = NORMAL;
- idr_for_each(&rhp->qpidr, enable_qp_db, NULL);
- }
- if (db_coalescing_threshold >= 0)
- if (rhp->qpcnt <= db_coalescing_threshold)
- cxgb4_enable_db_coalescing(rhp->rdev.lldi.ports[0]);
- spin_unlock_irq(&rhp->lock);
+ remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid);
atomic_dec(&qhp->refcnt);
wait_event(qhp->wait, !atomic_read(&qhp->refcnt));
+ spin_lock_irq(&rhp->lock);
+ if (!list_empty(&qhp->db_fc_entry))
+ list_del_init(&qhp->db_fc_entry);
+ spin_unlock_irq(&rhp->lock);
+
ucontext = ib_qp->uobject ?
to_c4iw_ucontext(ib_qp->uobject->context) : NULL;
destroy_qp(&rhp->rdev, &qhp->wq,
@@ -1516,14 +1540,6 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp)
return 0;
}
-static int disable_qp_db(int id, void *p, void *data)
-{
- struct c4iw_qp *qp = p;
-
- t4_disable_wq_db(&qp->wq);
- return 0;
-}
-
struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
struct ib_udata *udata)
{
@@ -1533,7 +1549,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
struct c4iw_cq *schp;
struct c4iw_cq *rchp;
struct c4iw_create_qp_resp uresp;
- int sqsize, rqsize;
+ unsigned int sqsize, rqsize;
struct c4iw_ucontext *ucontext;
int ret;
struct c4iw_mm_entry *mm1, *mm2, *mm3, *mm4, *mm5 = NULL;
@@ -1605,25 +1621,13 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
qhp->attr.enable_bind = 1;
qhp->attr.max_ord = 1;
qhp->attr.max_ird = 1;
+ qhp->sq_sig_all = attrs->sq_sig_type == IB_SIGNAL_ALL_WR;
spin_lock_init(&qhp->lock);
mutex_init(&qhp->mutex);
init_waitqueue_head(&qhp->wait);
atomic_set(&qhp->refcnt, 1);
- spin_lock_irq(&rhp->lock);
- if (rhp->db_state != NORMAL)
- t4_disable_wq_db(&qhp->wq);
- rhp->qpcnt++;
- if (rhp->qpcnt > db_fc_threshold && rhp->db_state == NORMAL) {
- rhp->rdev.stats.db_state_transitions++;
- rhp->db_state = FLOW_CONTROL;
- idr_for_each(&rhp->qpidr, disable_qp_db, NULL);
- }
- if (db_coalescing_threshold >= 0)
- if (rhp->qpcnt > db_coalescing_threshold)
- cxgb4_disable_db_coalescing(rhp->rdev.lldi.ports[0]);
- ret = insert_handle_nolock(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid);
- spin_unlock_irq(&rhp->lock);
+ ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid);
if (ret)
goto err2;
@@ -1692,11 +1696,11 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
mm2->len = PAGE_ALIGN(qhp->wq.rq.memsize);
insert_mmap(ucontext, mm2);
mm3->key = uresp.sq_db_gts_key;
- mm3->addr = qhp->wq.sq.udb;
+ mm3->addr = (__force unsigned long) qhp->wq.sq.udb;
mm3->len = PAGE_SIZE;
insert_mmap(ucontext, mm3);
mm4->key = uresp.rq_db_gts_key;
- mm4->addr = qhp->wq.rq.udb;
+ mm4->addr = (__force unsigned long) qhp->wq.rq.udb;
mm4->len = PAGE_SIZE;
insert_mmap(ucontext, mm4);
if (mm5) {
@@ -1709,6 +1713,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
}
qhp->ibqp.qp_num = qhp->wq.sq.qid;
init_timer(&(qhp->timer));
+ INIT_LIST_HEAD(&qhp->db_fc_entry);
PDBG("%s qhp %p sq_num_entries %d, rq_num_entries %d qpid 0x%0x\n",
__func__, qhp, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries,
qhp->wq.sq.qid);
@@ -1772,11 +1777,15 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
/*
* Use SQ_PSN and RQ_PSN to pass in IDX_INC values for
* ringing the queue db when we're in DB_FULL mode.
+ * Only allow this on T4 devices.
*/
attrs.sq_db_inc = attr->sq_psn;
attrs.rq_db_inc = attr->rq_psn;
mask |= (attr_mask & IB_QP_SQ_PSN) ? C4IW_QP_ATTR_SQ_DB : 0;
mask |= (attr_mask & IB_QP_RQ_PSN) ? C4IW_QP_ATTR_RQ_DB : 0;
+ if (is_t5(to_c4iw_qp(ibqp)->rhp->rdev.lldi.adapter_type) &&
+ (mask & (C4IW_QP_ATTR_SQ_DB|C4IW_QP_ATTR_RQ_DB)))
+ return -EINVAL;
return c4iw_modify_qp(rhp, qhp, mask, &attrs, 0);
}
diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c
index cdef4d7fb6d..67df71a7012 100644
--- a/drivers/infiniband/hw/cxgb4/resource.c
+++ b/drivers/infiniband/hw/cxgb4/resource.c
@@ -179,8 +179,12 @@ u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx)
kfree(entry);
} else {
qid = c4iw_get_resource(&rdev->resource.qid_table);
- if (!qid)
+ if (!qid) {
+ mutex_lock(&rdev->stats.lock);
+ rdev->stats.qid.fail++;
+ mutex_unlock(&rdev->stats.lock);
goto out;
+ }
mutex_lock(&rdev->stats.lock);
rdev->stats.qid.cur += rdev->qpmask + 1;
mutex_unlock(&rdev->stats.lock);
@@ -322,8 +326,8 @@ u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size)
unsigned long addr = gen_pool_alloc(rdev->rqt_pool, size << 6);
PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6);
if (!addr)
- printk_ratelimited(KERN_WARNING MOD "%s: Out of RQT memory\n",
- pci_name(rdev->lldi.pdev));
+ pr_warn_ratelimited(MOD "%s: Out of RQT memory\n",
+ pci_name(rdev->lldi.pdev));
mutex_lock(&rdev->stats.lock);
if (addr) {
rdev->stats.rqt.cur += roundup(size << 6, 1 << MIN_RQT_SHIFT);
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index e73ace73918..68b0a6bf4eb 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -84,7 +84,14 @@ struct t4_status_page {
sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge))
#define T4_MAX_FR_IMMD ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_fr_nsmr_wr) - \
sizeof(struct fw_ri_immd)) & ~31UL)
-#define T4_MAX_FR_DEPTH (1024 / sizeof(u64))
+#define T4_MAX_FR_IMMD_DEPTH (T4_MAX_FR_IMMD / sizeof(u64))
+#define T4_MAX_FR_DSGL 1024
+#define T4_MAX_FR_DSGL_DEPTH (T4_MAX_FR_DSGL / sizeof(u64))
+
+static inline int t4_max_fr_depth(int use_dsgl)
+{
+ return use_dsgl ? T4_MAX_FR_DSGL_DEPTH : T4_MAX_FR_IMMD_DEPTH;
+}
#define T4_RQ_NUM_SLOTS 2
#define T4_RQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_RQ_NUM_SLOTS)
@@ -292,7 +299,7 @@ struct t4_sq {
unsigned long phys_addr;
struct t4_swsqe *sw_sq;
struct t4_swsqe *oldest_read;
- u64 udb;
+ u64 __iomem *udb;
size_t memsize;
u32 qid;
u16 in_use;
@@ -300,6 +307,7 @@ struct t4_sq {
u16 cidx;
u16 pidx;
u16 wq_pidx;
+ u16 wq_pidx_inc;
u16 flags;
short flush_cidx;
};
@@ -313,7 +321,7 @@ struct t4_rq {
dma_addr_t dma_addr;
DEFINE_DMA_UNMAP_ADDR(mapping);
struct t4_swrqe *sw_rq;
- u64 udb;
+ u64 __iomem *udb;
size_t memsize;
u32 qid;
u32 msn;
@@ -324,6 +332,7 @@ struct t4_rq {
u16 cidx;
u16 pidx;
u16 wq_pidx;
+ u16 wq_pidx_inc;
};
struct t4_wq {
@@ -433,15 +442,67 @@ static inline u16 t4_sq_wq_size(struct t4_wq *wq)
return wq->sq.size * T4_SQ_NUM_SLOTS;
}
-static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc)
+/* This function copies 64 byte coalesced work request to memory
+ * mapped BAR2 space. For coalesced WRs, the SGE fetches data
+ * from the FIFO instead of from Host.
+ */
+static inline void pio_copy(u64 __iomem *dst, u64 *src)
+{
+ int count = 8;
+
+ while (count) {
+ writeq(*src, dst);
+ src++;
+ dst++;
+ count--;
+ }
+}
+
+static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, u8 t5,
+ union t4_wr *wqe)
{
+
+ /* Flush host queue memory writes. */
wmb();
+ if (t5) {
+ if (inc == 1 && wqe) {
+ PDBG("%s: WC wq->sq.pidx = %d\n",
+ __func__, wq->sq.pidx);
+ pio_copy(wq->sq.udb + 7, (void *)wqe);
+ } else {
+ PDBG("%s: DB wq->sq.pidx = %d\n",
+ __func__, wq->sq.pidx);
+ writel(PIDX_T5(inc), wq->sq.udb);
+ }
+
+ /* Flush user doorbell area writes. */
+ wmb();
+ return;
+ }
writel(QID(wq->sq.qid) | PIDX(inc), wq->db);
}
-static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc)
+static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc, u8 t5,
+ union t4_recv_wr *wqe)
{
+
+ /* Flush host queue memory writes. */
wmb();
+ if (t5) {
+ if (inc == 1 && wqe) {
+ PDBG("%s: WC wq->rq.pidx = %d\n",
+ __func__, wq->rq.pidx);
+ pio_copy(wq->rq.udb + 7, (void *)wqe);
+ } else {
+ PDBG("%s: DB wq->rq.pidx = %d\n",
+ __func__, wq->rq.pidx);
+ writel(PIDX_T5(inc), wq->rq.udb);
+ }
+
+ /* Flush user doorbell area writes. */
+ wmb();
+ return;
+ }
writel(QID(wq->rq.qid) | PIDX(inc), wq->db);
}
@@ -481,6 +542,7 @@ struct t4_cq {
size_t memsize;
__be64 bits_type_ts;
u32 cqid;
+ int vector;
u16 size; /* including status page */
u16 cidx;
u16 sw_pidx;
@@ -566,6 +628,9 @@ static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe)
printk(KERN_ERR MOD "cq overflow cqid %u\n", cq->cqid);
BUG_ON(1);
} else if (t4_valid_cqe(cq, &cq->queue[cq->cidx])) {
+
+ /* Ensure CQE is flushed to memory */
+ rmb();
*cqe = &cq->queue[cq->cidx];
ret = 0;
} else
@@ -609,3 +674,7 @@ static inline void t4_set_cq_in_error(struct t4_cq *cq)
((struct t4_status_page *)&cq->queue[cq->size])->qp_err = 1;
}
#endif
+
+struct t4_dev_status_page {
+ u8 db_off;
+};
diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
index dc193c29267..91289a051af 100644
--- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
+++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
@@ -836,4 +836,19 @@ struct ulptx_idata {
#define V_RX_DACK_CHANGE(x) ((x) << S_RX_DACK_CHANGE)
#define F_RX_DACK_CHANGE V_RX_DACK_CHANGE(1U)
+enum { /* TCP congestion control algorithms */
+ CONG_ALG_RENO,
+ CONG_ALG_TAHOE,
+ CONG_ALG_NEWRENO,
+ CONG_ALG_HIGHSPEED
+};
+
+#define S_CONG_CNTRL 14
+#define M_CONG_CNTRL 0x3
+#define V_CONG_CNTRL(x) ((x) << S_CONG_CNTRL)
+#define G_CONG_CNTRL(x) (((x) >> S_CONG_CNTRL) & M_CONG_CNTRL)
+
+#define CONG_CNTRL_VALID (1 << 18)
+#define T5_OPT_2_VALID (1 << 31)
+
#endif /* _T4FW_RI_API_H_ */
diff --git a/drivers/infiniband/hw/cxgb4/user.h b/drivers/infiniband/hw/cxgb4/user.h
index 32b754c35ab..cbd0ce17072 100644
--- a/drivers/infiniband/hw/cxgb4/user.h
+++ b/drivers/infiniband/hw/cxgb4/user.h
@@ -48,6 +48,7 @@ struct c4iw_create_cq_resp {
__u32 cqid;
__u32 size;
__u32 qid_mask;
+ __u32 reserved; /* explicit padding (optional for i386) */
};
@@ -70,4 +71,10 @@ struct c4iw_create_qp_resp {
__u32 qid_mask;
__u32 flags;
};
+
+struct c4iw_alloc_ucontext_resp {
+ __u64 status_page_key;
+ __u32 status_page_size;
+ __u32 reserved; /* explicit padding (optional for i386) */
+};
#endif
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index f08f6eaf3fa..bd45e0f3923 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -322,7 +322,7 @@ struct ehca_mr_pginfo {
} phy;
struct { /* type EHCA_MR_PGI_USER section */
struct ib_umem *region;
- struct ib_umem_chunk *next_chunk;
+ struct scatterlist *next_sg;
u64 next_nmap;
} usr;
struct { /* type EHCA_MR_PGI_FMR section */
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
index 212150c25ea..8cc83753776 100644
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -283,6 +283,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
(my_cq->galpas.user.fw_handle & (PAGE_SIZE - 1));
if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
ehca_err(device, "Copy to udata failed.");
+ cq = ERR_PTR(-EFAULT);
goto create_cq_exit4;
}
}
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index bcfb0c18362..3488e8c9fcb 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -400,10 +400,7 @@ reg_user_mr_fallback:
pginfo.num_hwpages = num_hwpages;
pginfo.u.usr.region = e_mr->umem;
pginfo.next_hwpage = e_mr->umem->offset / hwpage_size;
- pginfo.u.usr.next_chunk = list_prepare_entry(pginfo.u.usr.next_chunk,
- (&e_mr->umem->chunk_list),
- list);
-
+ pginfo.u.usr.next_sg = pginfo.u.usr.region->sg_head.sgl;
ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags,
e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
&e_mr->ib.ib_mr.rkey, EHCA_REG_MR);
@@ -1858,61 +1855,39 @@ static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo,
u64 *kpage)
{
int ret = 0;
- struct ib_umem_chunk *prev_chunk;
- struct ib_umem_chunk *chunk;
u64 pgaddr;
- u32 i = 0;
u32 j = 0;
int hwpages_per_kpage = PAGE_SIZE / pginfo->hwpage_size;
-
- /* loop over desired chunk entries */
- chunk = pginfo->u.usr.next_chunk;
- prev_chunk = pginfo->u.usr.next_chunk;
- list_for_each_entry_continue(
- chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
- for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
- pgaddr = page_to_pfn(sg_page(&chunk->page_list[i]))
- << PAGE_SHIFT ;
- *kpage = pgaddr + (pginfo->next_hwpage *
- pginfo->hwpage_size);
- if ( !(*kpage) ) {
- ehca_gen_err("pgaddr=%llx "
- "chunk->page_list[i]=%llx "
- "i=%x next_hwpage=%llx",
- pgaddr, (u64)sg_dma_address(
- &chunk->page_list[i]),
- i, pginfo->next_hwpage);
- return -EFAULT;
- }
- (pginfo->hwpage_cnt)++;
- (pginfo->next_hwpage)++;
- kpage++;
- if (pginfo->next_hwpage % hwpages_per_kpage == 0) {
- (pginfo->kpage_cnt)++;
- (pginfo->u.usr.next_nmap)++;
- pginfo->next_hwpage = 0;
- i++;
- }
- j++;
- if (j >= number) break;
+ struct scatterlist **sg = &pginfo->u.usr.next_sg;
+
+ while (*sg != NULL) {
+ pgaddr = page_to_pfn(sg_page(*sg))
+ << PAGE_SHIFT;
+ *kpage = pgaddr + (pginfo->next_hwpage *
+ pginfo->hwpage_size);
+ if (!(*kpage)) {
+ ehca_gen_err("pgaddr=%llx "
+ "sg_dma_address=%llx "
+ "entry=%llx next_hwpage=%llx",
+ pgaddr, (u64)sg_dma_address(*sg),
+ pginfo->u.usr.next_nmap,
+ pginfo->next_hwpage);
+ return -EFAULT;
}
- if ((pginfo->u.usr.next_nmap >= chunk->nmap) &&
- (j >= number)) {
- pginfo->u.usr.next_nmap = 0;
- prev_chunk = chunk;
- break;
- } else if (pginfo->u.usr.next_nmap >= chunk->nmap) {
- pginfo->u.usr.next_nmap = 0;
- prev_chunk = chunk;
- } else if (j >= number)
+ (pginfo->hwpage_cnt)++;
+ (pginfo->next_hwpage)++;
+ kpage++;
+ if (pginfo->next_hwpage % hwpages_per_kpage == 0) {
+ (pginfo->kpage_cnt)++;
+ (pginfo->u.usr.next_nmap)++;
+ pginfo->next_hwpage = 0;
+ *sg = sg_next(*sg);
+ }
+ j++;
+ if (j >= number)
break;
- else
- prev_chunk = chunk;
}
- pginfo->u.usr.next_chunk =
- list_prepare_entry(prev_chunk,
- (&(pginfo->u.usr.region->chunk_list)),
- list);
+
return ret;
}
@@ -1920,20 +1895,19 @@ static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo,
* check given pages for contiguous layout
* last page addr is returned in prev_pgaddr for further check
*/
-static int ehca_check_kpages_per_ate(struct scatterlist *page_list,
- int start_idx, int end_idx,
+static int ehca_check_kpages_per_ate(struct scatterlist **sg,
+ int num_pages,
u64 *prev_pgaddr)
{
- int t;
- for (t = start_idx; t <= end_idx; t++) {
- u64 pgaddr = page_to_pfn(sg_page(&page_list[t])) << PAGE_SHIFT;
+ for (; *sg && num_pages > 0; *sg = sg_next(*sg), num_pages--) {
+ u64 pgaddr = page_to_pfn(sg_page(*sg)) << PAGE_SHIFT;
if (ehca_debug_level >= 3)
ehca_gen_dbg("chunk_page=%llx value=%016llx", pgaddr,
*(u64 *)__va(pgaddr));
if (pgaddr - PAGE_SIZE != *prev_pgaddr) {
ehca_gen_err("uncontiguous page found pgaddr=%llx "
- "prev_pgaddr=%llx page_list_i=%x",
- pgaddr, *prev_pgaddr, t);
+ "prev_pgaddr=%llx entries_left_in_hwpage=%x",
+ pgaddr, *prev_pgaddr, num_pages);
return -EINVAL;
}
*prev_pgaddr = pgaddr;
@@ -1947,111 +1921,80 @@ static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo,
u64 *kpage)
{
int ret = 0;
- struct ib_umem_chunk *prev_chunk;
- struct ib_umem_chunk *chunk;
u64 pgaddr, prev_pgaddr;
- u32 i = 0;
u32 j = 0;
int kpages_per_hwpage = pginfo->hwpage_size / PAGE_SIZE;
int nr_kpages = kpages_per_hwpage;
+ struct scatterlist **sg = &pginfo->u.usr.next_sg;
+
+ while (*sg != NULL) {
- /* loop over desired chunk entries */
- chunk = pginfo->u.usr.next_chunk;
- prev_chunk = pginfo->u.usr.next_chunk;
- list_for_each_entry_continue(
- chunk, (&(pginfo->u.usr.region->chunk_list)), list) {
- for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
- if (nr_kpages == kpages_per_hwpage) {
- pgaddr = ( page_to_pfn(sg_page(&chunk->page_list[i]))
- << PAGE_SHIFT );
- *kpage = pgaddr;
- if ( !(*kpage) ) {
- ehca_gen_err("pgaddr=%llx i=%x",
- pgaddr, i);
+ if (nr_kpages == kpages_per_hwpage) {
+ pgaddr = (page_to_pfn(sg_page(*sg))
+ << PAGE_SHIFT);
+ *kpage = pgaddr;
+ if (!(*kpage)) {
+ ehca_gen_err("pgaddr=%llx entry=%llx",
+ pgaddr, pginfo->u.usr.next_nmap);
+ ret = -EFAULT;
+ return ret;
+ }
+ /*
+ * The first page in a hwpage must be aligned;
+ * the first MR page is exempt from this rule.
+ */
+ if (pgaddr & (pginfo->hwpage_size - 1)) {
+ if (pginfo->hwpage_cnt) {
+ ehca_gen_err(
+ "invalid alignment "
+ "pgaddr=%llx entry=%llx "
+ "mr_pgsize=%llx",
+ pgaddr, pginfo->u.usr.next_nmap,
+ pginfo->hwpage_size);
ret = -EFAULT;
return ret;
}
- /*
- * The first page in a hwpage must be aligned;
- * the first MR page is exempt from this rule.
- */
- if (pgaddr & (pginfo->hwpage_size - 1)) {
- if (pginfo->hwpage_cnt) {
- ehca_gen_err(
- "invalid alignment "
- "pgaddr=%llx i=%x "
- "mr_pgsize=%llx",
- pgaddr, i,
- pginfo->hwpage_size);
- ret = -EFAULT;
- return ret;
- }
- /* first MR page */
- pginfo->kpage_cnt =
- (pgaddr &
- (pginfo->hwpage_size - 1)) >>
- PAGE_SHIFT;
- nr_kpages -= pginfo->kpage_cnt;
- *kpage = pgaddr &
- ~(pginfo->hwpage_size - 1);
- }
- if (ehca_debug_level >= 3) {
- u64 val = *(u64 *)__va(pgaddr);
- ehca_gen_dbg("kpage=%llx chunk_page=%llx "
- "value=%016llx",
- *kpage, pgaddr, val);
- }
- prev_pgaddr = pgaddr;
- i++;
- pginfo->kpage_cnt++;
- pginfo->u.usr.next_nmap++;
- nr_kpages--;
- if (!nr_kpages)
- goto next_kpage;
- continue;
+ /* first MR page */
+ pginfo->kpage_cnt =
+ (pgaddr &
+ (pginfo->hwpage_size - 1)) >>
+ PAGE_SHIFT;
+ nr_kpages -= pginfo->kpage_cnt;
+ *kpage = pgaddr &
+ ~(pginfo->hwpage_size - 1);
}
- if (i + nr_kpages > chunk->nmap) {
- ret = ehca_check_kpages_per_ate(
- chunk->page_list, i,
- chunk->nmap - 1, &prev_pgaddr);
- if (ret) return ret;
- pginfo->kpage_cnt += chunk->nmap - i;
- pginfo->u.usr.next_nmap += chunk->nmap - i;
- nr_kpages -= chunk->nmap - i;
- break;
+ if (ehca_debug_level >= 3) {
+ u64 val = *(u64 *)__va(pgaddr);
+ ehca_gen_dbg("kpage=%llx page=%llx "
+ "value=%016llx",
+ *kpage, pgaddr, val);
}
+ prev_pgaddr = pgaddr;
+ *sg = sg_next(*sg);
+ pginfo->kpage_cnt++;
+ pginfo->u.usr.next_nmap++;
+ nr_kpages--;
+ if (!nr_kpages)
+ goto next_kpage;
+ continue;
+ }
+
+ ret = ehca_check_kpages_per_ate(sg, nr_kpages,
+ &prev_pgaddr);
+ if (ret)
+ return ret;
+ pginfo->kpage_cnt += nr_kpages;
+ pginfo->u.usr.next_nmap += nr_kpages;
- ret = ehca_check_kpages_per_ate(chunk->page_list, i,
- i + nr_kpages - 1,
- &prev_pgaddr);
- if (ret) return ret;
- i += nr_kpages;
- pginfo->kpage_cnt += nr_kpages;
- pginfo->u.usr.next_nmap += nr_kpages;
next_kpage:
- nr_kpages = kpages_per_hwpage;
- (pginfo->hwpage_cnt)++;
- kpage++;
- j++;
- if (j >= number) break;
- }
- if ((pginfo->u.usr.next_nmap >= chunk->nmap) &&
- (j >= number)) {
- pginfo->u.usr.next_nmap = 0;
- prev_chunk = chunk;
- break;
- } else if (pginfo->u.usr.next_nmap >= chunk->nmap) {
- pginfo->u.usr.next_nmap = 0;
- prev_chunk = chunk;
- } else if (j >= number)
+ nr_kpages = kpages_per_hwpage;
+ (pginfo->hwpage_cnt)++;
+ kpage++;
+ j++;
+ if (j >= number)
break;
- else
- prev_chunk = chunk;
}
- pginfo->u.usr.next_chunk =
- list_prepare_entry(prev_chunk,
- (&(pginfo->u.usr.region->chunk_list)),
- list);
+
return ret;
}
@@ -2591,16 +2534,6 @@ static void ehca_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg,
/* This is only a stub; nothing to be done here */
}
-static u64 ehca_dma_address(struct ib_device *dev, struct scatterlist *sg)
-{
- return sg->dma_address;
-}
-
-static unsigned int ehca_dma_len(struct ib_device *dev, struct scatterlist *sg)
-{
- return sg->length;
-}
-
static void ehca_dma_sync_single_for_cpu(struct ib_device *dev, u64 addr,
size_t size,
enum dma_data_direction dir)
@@ -2653,8 +2586,6 @@ struct ib_dma_mapping_ops ehca_dma_mapping_ops = {
.unmap_page = ehca_dma_unmap_page,
.map_sg = ehca_dma_map_sg,
.unmap_sg = ehca_dma_unmap_sg,
- .dma_address = ehca_dma_address,
- .dma_len = ehca_dma_len,
.sync_single_for_cpu = ehca_dma_sync_single_for_cpu,
.sync_single_for_device = ehca_dma_sync_single_for_device,
.alloc_coherent = ehca_dma_alloc_coherent,
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index 00d6861a6a1..2e89356c46f 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -1329,7 +1329,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
qp_new_state = attr_mask & IB_QP_STATE ? attr->qp_state : qp_cur_state;
if (!smi_reset2init &&
!ib_modify_qp_is_ok(qp_cur_state, qp_new_state, ibqp->qp_type,
- attr_mask)) {
+ attr_mask, IB_LINK_LAYER_UNSPECIFIED)) {
ret = -EINVAL;
ehca_err(ibqp->device,
"Invalid qp transition new_state=%x cur_state=%x "
diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c
index 714293b7851..45802e97332 100644
--- a/drivers/infiniband/hw/ipath/ipath_diag.c
+++ b/drivers/infiniband/hw/ipath/ipath_diag.c
@@ -326,7 +326,7 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
size_t count, loff_t *off)
{
u32 __iomem *piobuf;
- u32 plen, clen, pbufn;
+ u32 plen, pbufn, maxlen_reserve;
struct ipath_diag_pkt odp;
struct ipath_diag_xpkt dp;
u32 *tmpbuf = NULL;
@@ -335,42 +335,24 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
u64 val;
u32 l_state, lt_state; /* LinkState, LinkTrainingState */
- if (count < sizeof(odp)) {
- ret = -EINVAL;
- goto bail;
- }
if (count == sizeof(dp)) {
if (copy_from_user(&dp, data, sizeof(dp))) {
ret = -EFAULT;
goto bail;
}
- } else if (copy_from_user(&odp, data, sizeof(odp))) {
- ret = -EFAULT;
- goto bail;
- }
-
- /*
- * Due to padding/alignment issues (lessened with new struct)
- * the old and new structs are the same length. We need to
- * disambiguate them, which we can do because odp.len has never
- * been less than the total of LRH+BTH+DETH so far, while
- * dp.unit (same offset) unit is unlikely to get that high.
- * Similarly, dp.data, the pointer to user at the same offset
- * as odp.unit, is almost certainly at least one (512byte)page
- * "above" NULL. The if-block below can be omitted if compatibility
- * between a new driver and older diagnostic code is unimportant.
- * compatibility the other direction (new diags, old driver) is
- * handled in the diagnostic code, with a warning.
- */
- if (dp.unit >= 20 && dp.data < 512) {
- /* very probable version mismatch. Fix it up */
- memcpy(&odp, &dp, sizeof(odp));
- /* We got a legacy dp, copy elements to dp */
+ } else if (count == sizeof(odp)) {
+ if (copy_from_user(&odp, data, sizeof(odp))) {
+ ret = -EFAULT;
+ goto bail;
+ }
+ dp.len = odp.len;
dp.unit = odp.unit;
dp.data = odp.data;
- dp.len = odp.len;
- dp.pbc_wd = 0; /* Indicate we need to compute PBC wd */
+ dp.pbc_wd = 0;
+ } else {
+ ret = -EINVAL;
+ goto bail;
}
/* send count must be an exact number of dwords */
@@ -379,7 +361,7 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
goto bail;
}
- clen = dp.len >> 2;
+ plen = dp.len >> 2;
dd = ipath_lookup(dp.unit);
if (!dd || !(dd->ipath_flags & IPATH_PRESENT) ||
@@ -422,16 +404,22 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
goto bail;
}
- /* need total length before first word written */
- /* +1 word is for the qword padding */
- plen = sizeof(u32) + dp.len;
-
- if ((plen + 4) > dd->ipath_ibmaxlen) {
+ /*
+ * need total length before first word written, plus 2 Dwords. One Dword
+ * is for padding so we get the full user data when not aligned on
+ * a word boundary. The other Dword is to make sure we have room for the
+ * ICRC which gets tacked on later.
+ */
+ maxlen_reserve = 2 * sizeof(u32);
+ if (dp.len > dd->ipath_ibmaxlen - maxlen_reserve) {
ipath_dbg("Pkt len 0x%x > ibmaxlen %x\n",
- plen - 4, dd->ipath_ibmaxlen);
+ dp.len, dd->ipath_ibmaxlen);
ret = -EINVAL;
- goto bail; /* before writing pbc */
+ goto bail;
}
+
+ plen = sizeof(u32) + dp.len;
+
tmpbuf = vmalloc(plen);
if (!tmpbuf) {
dev_info(&dd->pcidev->dev, "Unable to allocate tmp buffer, "
@@ -473,11 +461,11 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
*/
if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) {
ipath_flush_wc();
- __iowrite32_copy(piobuf + 2, tmpbuf, clen - 1);
+ __iowrite32_copy(piobuf + 2, tmpbuf, plen - 1);
ipath_flush_wc();
- __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1);
+ __raw_writel(tmpbuf[plen - 1], piobuf + plen + 1);
} else
- __iowrite32_copy(piobuf + 2, tmpbuf, clen);
+ __iowrite32_copy(piobuf + 2, tmpbuf, plen);
ipath_flush_wc();
diff --git a/drivers/infiniband/hw/ipath/ipath_dma.c b/drivers/infiniband/hw/ipath/ipath_dma.c
index 644c2c74e05..123a8c05353 100644
--- a/drivers/infiniband/hw/ipath/ipath_dma.c
+++ b/drivers/infiniband/hw/ipath/ipath_dma.c
@@ -115,6 +115,10 @@ static int ipath_map_sg(struct ib_device *dev, struct scatterlist *sgl,
ret = 0;
break;
}
+ sg->dma_address = addr + sg->offset;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+ sg->dma_length = sg->length;
+#endif
}
return ret;
}
@@ -126,21 +130,6 @@ static void ipath_unmap_sg(struct ib_device *dev,
BUG_ON(!valid_dma_direction(direction));
}
-static u64 ipath_sg_dma_address(struct ib_device *dev, struct scatterlist *sg)
-{
- u64 addr = (u64) page_address(sg_page(sg));
-
- if (addr)
- addr += sg->offset;
- return addr;
-}
-
-static unsigned int ipath_sg_dma_len(struct ib_device *dev,
- struct scatterlist *sg)
-{
- return sg->length;
-}
-
static void ipath_sync_single_for_cpu(struct ib_device *dev,
u64 addr,
size_t size,
@@ -176,17 +165,15 @@ static void ipath_dma_free_coherent(struct ib_device *dev, size_t size,
}
struct ib_dma_mapping_ops ipath_dma_mapping_ops = {
- ipath_mapping_error,
- ipath_dma_map_single,
- ipath_dma_unmap_single,
- ipath_dma_map_page,
- ipath_dma_unmap_page,
- ipath_map_sg,
- ipath_unmap_sg,
- ipath_sg_dma_address,
- ipath_sg_dma_len,
- ipath_sync_single_for_cpu,
- ipath_sync_single_for_device,
- ipath_dma_alloc_coherent,
- ipath_dma_free_coherent
+ .mapping_error = ipath_mapping_error,
+ .map_single = ipath_dma_map_single,
+ .unmap_single = ipath_dma_unmap_single,
+ .map_page = ipath_dma_map_page,
+ .unmap_page = ipath_dma_unmap_page,
+ .map_sg = ipath_map_sg,
+ .unmap_sg = ipath_unmap_sg,
+ .sync_single_for_cpu = ipath_sync_single_for_cpu,
+ .sync_single_for_device = ipath_sync_single_for_device,
+ .alloc_coherent = ipath_dma_alloc_coherent,
+ .free_coherent = ipath_dma_free_coherent
};
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 26dfbc8ee0f..01ba792791a 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -70,7 +70,7 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
int i;
if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG) &&
- dd->ipath_lastcancel > jiffies) {
+ time_after(dd->ipath_lastcancel, jiffies)) {
__IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG,
"SendbufErrs %lx %lx", sbuf[0],
sbuf[1]);
@@ -755,7 +755,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
/* likely due to cancel; so suppress message unless verbose */
if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) &&
- dd->ipath_lastcancel > jiffies) {
+ time_after(dd->ipath_lastcancel, jiffies)) {
/* armlaunch takes precedence; it often causes both. */
ipath_cdbg(VERBOSE,
"Suppressed %s error (%llx) after sendbuf cancel\n",
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c
index e346d3890a0..5e61e9bff69 100644
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -188,8 +188,8 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
{
struct ipath_mr *mr;
struct ib_umem *umem;
- struct ib_umem_chunk *chunk;
- int n, m, i;
+ int n, m, entry;
+ struct scatterlist *sg;
struct ib_mr *ret;
if (length == 0) {
@@ -202,10 +202,7 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (IS_ERR(umem))
return (void *) umem;
- n = 0;
- list_for_each_entry(chunk, &umem->chunk_list, list)
- n += chunk->nents;
-
+ n = umem->nmap;
mr = alloc_mr(n, &to_idev(pd->device)->lk_table);
if (!mr) {
ret = ERR_PTR(-ENOMEM);
@@ -224,22 +221,20 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
m = 0;
n = 0;
- list_for_each_entry(chunk, &umem->chunk_list, list) {
- for (i = 0; i < chunk->nents; i++) {
- void *vaddr;
-
- vaddr = page_address(sg_page(&chunk->page_list[i]));
- if (!vaddr) {
- ret = ERR_PTR(-EINVAL);
- goto bail;
- }
- mr->mr.map[m]->segs[n].vaddr = vaddr;
- mr->mr.map[m]->segs[n].length = umem->page_size;
- n++;
- if (n == IPATH_SEGSZ) {
- m++;
- n = 0;
- }
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+ void *vaddr;
+
+ vaddr = page_address(sg_page(sg));
+ if (!vaddr) {
+ ret = ERR_PTR(-EINVAL);
+ goto bail;
+ }
+ mr->mr.map[m]->segs[n].vaddr = vaddr;
+ mr->mr.map[m]->segs[n].length = umem->page_size;
+ n++;
+ if (n == IPATH_SEGSZ) {
+ m++;
+ n = 0;
}
}
ret = &mr->ibmr;
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 0857a9c3cd3..face87602dc 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -463,7 +463,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
- attr_mask))
+ attr_mask, IB_LINK_LAYER_UNSPECIFIED))
goto inval;
if (attr_mask & IB_QP_AV) {
diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c
index 98ac18ec977..17a517766ad 100644
--- a/drivers/infiniband/hw/ipath/ipath_sdma.c
+++ b/drivers/infiniband/hw/ipath/ipath_sdma.c
@@ -247,7 +247,7 @@ static void sdma_abort_task(unsigned long opaque)
/* ipath_sdma_abort() is done, waiting for interrupt */
if (status == IPATH_SDMA_ABORT_DISARMED) {
- if (jiffies < dd->ipath_sdma_abort_intr_timeout)
+ if (time_before(jiffies, dd->ipath_sdma_abort_intr_timeout))
goto resched_noprint;
/* give up, intr got lost somewhere */
ipath_dbg("give up waiting for SDMADISABLED intr\n");
@@ -341,7 +341,7 @@ resched:
* JAG - this is bad to just have default be a loop without
* state change
*/
- if (jiffies > dd->ipath_sdma_abort_jiffies) {
+ if (time_after(jiffies, dd->ipath_sdma_abort_jiffies)) {
ipath_dbg("looping with status 0x%08lx\n",
dd->ipath_sdma_status);
dd->ipath_sdma_abort_jiffies = jiffies + 5 * HZ;
diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.c b/drivers/infiniband/hw/ipath/ipath_user_sdma.c
index f5cb13b2144..cc04b7ba348 100644
--- a/drivers/infiniband/hw/ipath/ipath_user_sdma.c
+++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.c
@@ -280,9 +280,7 @@ static int ipath_user_sdma_pin_pages(const struct ipath_devdata *dd,
int j;
int ret;
- ret = get_user_pages(current, current->mm, addr,
- npages, 0, 1, pages, NULL);
-
+ ret = get_user_pages_fast(addr, npages, 0, pages);
if (ret != npages) {
int i;
@@ -811,10 +809,7 @@ int ipath_user_sdma_writev(struct ipath_devdata *dd,
while (dim) {
const int mxp = 8;
- down_write(&current->mm->mmap_sem);
ret = ipath_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp);
- up_write(&current->mm->mmap_sem);
-
if (ret <= 0)
goto done_unlock;
else {
diff --git a/drivers/infiniband/hw/mlx4/Kconfig b/drivers/infiniband/hw/mlx4/Kconfig
index 24ab11a9ad1..fc01deac1d3 100644
--- a/drivers/infiniband/hw/mlx4/Kconfig
+++ b/drivers/infiniband/hw/mlx4/Kconfig
@@ -1,6 +1,6 @@
config MLX4_INFINIBAND
tristate "Mellanox ConnectX HCA support"
- depends on NETDEVICES && ETHERNET && PCI
+ depends on NETDEVICES && ETHERNET && PCI && INET
select NET_VENDOR_MELLANOX
select MLX4_CORE
---help---
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index a251becdaa9..2d8c3397774 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -39,25 +39,6 @@
#include "mlx4_ib.h"
-int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr,
- u8 *mac, int *is_mcast, u8 port)
-{
- struct in6_addr in6;
-
- *is_mcast = 0;
-
- memcpy(&in6, ah_attr->grh.dgid.raw, sizeof in6);
- if (rdma_link_local_addr(&in6))
- rdma_get_ll_mac(&in6, mac);
- else if (rdma_is_multicast_addr(&in6)) {
- rdma_get_mcast_mac(&in6, mac);
- *is_mcast = 1;
- } else
- return -EINVAL;
-
- return 0;
-}
-
static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
struct mlx4_ib_ah *ah)
{
@@ -92,21 +73,18 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
{
struct mlx4_ib_dev *ibdev = to_mdev(pd->device);
struct mlx4_dev *dev = ibdev->dev;
- union ib_gid sgid;
- u8 mac[6];
- int err;
- int is_mcast;
+ int is_mcast = 0;
+ struct in6_addr in6;
u16 vlan_tag;
- err = mlx4_ib_resolve_grh(ibdev, ah_attr, mac, &is_mcast, ah_attr->port_num);
- if (err)
- return ERR_PTR(err);
-
- memcpy(ah->av.eth.mac, mac, 6);
- err = ib_get_cached_gid(pd->device, ah_attr->port_num, ah_attr->grh.sgid_index, &sgid);
- if (err)
- return ERR_PTR(err);
- vlan_tag = rdma_get_vlan_id(&sgid);
+ memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6));
+ if (rdma_is_multicast_addr(&in6)) {
+ is_mcast = 1;
+ rdma_get_mcast_mac(&in6, ah->av.eth.mac);
+ } else {
+ memcpy(ah->av.eth.mac, ah_attr->dmac, ETH_ALEN);
+ }
+ vlan_tag = ah_attr->vlan_id;
if (vlan_tag < 0x1000)
vlan_tag |= (ah_attr->sl & 7) << 13;
ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c
index 2f215b93db6..0eb141c4141 100644
--- a/drivers/infiniband/hw/mlx4/alias_GUID.c
+++ b/drivers/infiniband/hw/mlx4/alias_GUID.c
@@ -154,7 +154,7 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
continue;
slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ;
- if (slave_id >= dev->dev->num_slaves)
+ if (slave_id >= dev->dev->num_vfs + 1)
return;
tmp_cur_ag = *(__be64 *)&p_data[i * GUID_REC_SIZE];
form_cache_ag = get_cached_alias_guid(dev, port_num,
diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c
index d1f5f1dd77b..56a593e0ae5 100644
--- a/drivers/infiniband/hw/mlx4/cm.c
+++ b/drivers/infiniband/hw/mlx4/cm.c
@@ -61,6 +61,11 @@ struct cm_generic_msg {
__be32 remote_comm_id;
};
+struct cm_sidr_generic_msg {
+ struct ib_mad_hdr hdr;
+ __be32 request_id;
+};
+
struct cm_req_msg {
unsigned char unused[0x60];
union ib_gid primary_path_sgid;
@@ -69,28 +74,62 @@ struct cm_req_msg {
static void set_local_comm_id(struct ib_mad *mad, u32 cm_id)
{
- struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
- msg->local_comm_id = cpu_to_be32(cm_id);
+ if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
+ struct cm_sidr_generic_msg *msg =
+ (struct cm_sidr_generic_msg *)mad;
+ msg->request_id = cpu_to_be32(cm_id);
+ } else if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
+ pr_err("trying to set local_comm_id in SIDR_REP\n");
+ return;
+ } else {
+ struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
+ msg->local_comm_id = cpu_to_be32(cm_id);
+ }
}
static u32 get_local_comm_id(struct ib_mad *mad)
{
- struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
-
- return be32_to_cpu(msg->local_comm_id);
+ if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
+ struct cm_sidr_generic_msg *msg =
+ (struct cm_sidr_generic_msg *)mad;
+ return be32_to_cpu(msg->request_id);
+ } else if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
+ pr_err("trying to set local_comm_id in SIDR_REP\n");
+ return -1;
+ } else {
+ struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
+ return be32_to_cpu(msg->local_comm_id);
+ }
}
static void set_remote_comm_id(struct ib_mad *mad, u32 cm_id)
{
- struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
- msg->remote_comm_id = cpu_to_be32(cm_id);
+ if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
+ struct cm_sidr_generic_msg *msg =
+ (struct cm_sidr_generic_msg *)mad;
+ msg->request_id = cpu_to_be32(cm_id);
+ } else if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
+ pr_err("trying to set remote_comm_id in SIDR_REQ\n");
+ return;
+ } else {
+ struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
+ msg->remote_comm_id = cpu_to_be32(cm_id);
+ }
}
static u32 get_remote_comm_id(struct ib_mad *mad)
{
- struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
-
- return be32_to_cpu(msg->remote_comm_id);
+ if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
+ struct cm_sidr_generic_msg *msg =
+ (struct cm_sidr_generic_msg *)mad;
+ return be32_to_cpu(msg->request_id);
+ } else if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
+ pr_err("trying to set remote_comm_id in SIDR_REQ\n");
+ return -1;
+ } else {
+ struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
+ return be32_to_cpu(msg->remote_comm_id);
+ }
}
static union ib_gid gid_from_req_msg(struct ib_device *ibdev, struct ib_mad *mad)
@@ -282,19 +321,21 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id
u32 sl_cm_id;
int pv_cm_id = -1;
- sl_cm_id = get_local_comm_id(mad);
-
if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID ||
- mad->mad_hdr.attr_id == CM_REP_ATTR_ID) {
+ mad->mad_hdr.attr_id == CM_REP_ATTR_ID ||
+ mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
+ sl_cm_id = get_local_comm_id(mad);
id = id_map_alloc(ibdev, slave_id, sl_cm_id);
if (IS_ERR(id)) {
mlx4_ib_warn(ibdev, "%s: id{slave: %d, sl_cm_id: 0x%x} Failed to id_map_alloc\n",
__func__, slave_id, sl_cm_id);
return PTR_ERR(id);
}
- } else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID) {
+ } else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID ||
+ mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
return 0;
} else {
+ sl_cm_id = get_local_comm_id(mad);
id = id_map_get(ibdev, &pv_cm_id, slave_id, sl_cm_id);
}
@@ -315,14 +356,18 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id
}
int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
- struct ib_mad *mad)
+ struct ib_mad *mad)
{
u32 pv_cm_id;
struct id_map_entry *id;
- if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID) {
+ if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID ||
+ mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
union ib_gid gid;
+ if (!slave)
+ return 0;
+
gid = gid_from_req_msg(ibdev, mad);
*slave = mlx4_ib_find_real_gid(ibdev, port, gid.global.interface_id);
if (*slave < 0) {
@@ -341,7 +386,8 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
return -ENOENT;
}
- *slave = id->slave_id;
+ if (slave)
+ *slave = id->slave_id;
set_remote_comm_id(mad, id->sl_cm_id);
if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID)
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index d5e60f44ba5..1066eec854a 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -102,7 +102,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
int err;
err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size,
- PAGE_SIZE * 2, &buf->buf);
+ PAGE_SIZE * 2, &buf->buf, GFP_KERNEL);
if (err)
goto out;
@@ -113,7 +113,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
if (err)
goto err_buf;
- err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf);
+ err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf, GFP_KERNEL);
if (err)
goto err_mtt;
@@ -209,7 +209,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
uar = &to_mucontext(context)->uar;
} else {
- err = mlx4_db_alloc(dev->dev, &cq->db, 1);
+ err = mlx4_db_alloc(dev->dev, &cq->db, 1, GFP_KERNEL);
if (err)
goto err_cq;
@@ -324,7 +324,7 @@ static int mlx4_ib_get_outstanding_cqes(struct mlx4_ib_cq *cq)
u32 i;
i = cq->mcq.cons_index;
- while (get_sw_cqe(cq, i & cq->ibcq.cqe))
+ while (get_sw_cqe(cq, i))
++i;
return i - cq->mcq.cons_index;
@@ -365,7 +365,7 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
mutex_lock(&cq->resize_mutex);
- if (entries < 1 || entries > dev->dev->caps.max_cqes) {
+ if (entries < 1) {
err = -EINVAL;
goto out;
}
@@ -376,6 +376,11 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
goto out;
}
+ if (entries > dev->dev->caps.max_cqes) {
+ err = -EINVAL;
+ goto out;
+ }
+
if (ibcq->uobject) {
err = mlx4_alloc_resize_umem(dev, cq, entries, udata);
if (err)
@@ -559,7 +564,7 @@ static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum)
}
static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc,
- unsigned tail, struct mlx4_cqe *cqe)
+ unsigned tail, struct mlx4_cqe *cqe, int is_eth)
{
struct mlx4_ib_proxy_sqp_hdr *hdr;
@@ -569,12 +574,20 @@ static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct
DMA_FROM_DEVICE);
hdr = (struct mlx4_ib_proxy_sqp_hdr *) (qp->sqp_proxy_rcv[tail].addr);
wc->pkey_index = be16_to_cpu(hdr->tun.pkey_index);
- wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32);
- wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12);
wc->src_qp = be32_to_cpu(hdr->tun.flags_src_qp) & 0xFFFFFF;
wc->wc_flags |= (hdr->tun.g_ml_path & 0x80) ? (IB_WC_GRH) : 0;
wc->dlid_path_bits = 0;
+ if (is_eth) {
+ wc->vlan_id = be16_to_cpu(hdr->tun.sl_vid);
+ memcpy(&(wc->smac[0]), (char *)&hdr->tun.mac_31_0, 4);
+ memcpy(&(wc->smac[4]), (char *)&hdr->tun.slid_mac_47_32, 2);
+ wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC);
+ } else {
+ wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32);
+ wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12);
+ }
+
return 0;
}
@@ -589,6 +602,7 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
struct mlx4_srq *msrq = NULL;
int is_send;
int is_error;
+ int is_eth;
u32 g_mlpath_rqpn;
u16 wqe_ctr;
unsigned tail = 0;
@@ -773,11 +787,15 @@ repoll:
break;
}
+ is_eth = (rdma_port_get_link_layer(wc->qp->device,
+ (*cur_qp)->port) ==
+ IB_LINK_LAYER_ETHERNET);
if (mlx4_is_mfunc(to_mdev(cq->ibcq.device)->dev)) {
if ((*cur_qp)->mlx4_ib_qp_type &
(MLX4_IB_QPT_PROXY_SMI_OWNER |
MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
- return use_tunnel_data(*cur_qp, cq, wc, tail, cqe);
+ return use_tunnel_data(*cur_qp, cq, wc, tail,
+ cqe, is_eth);
}
wc->slid = be16_to_cpu(cqe->rlid);
@@ -788,11 +806,21 @@ repoll:
wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f;
wc->wc_flags |= mlx4_ib_ipoib_csum_ok(cqe->status,
cqe->checksum) ? IB_WC_IP_CSUM_OK : 0;
- if (rdma_port_get_link_layer(wc->qp->device,
- (*cur_qp)->port) == IB_LINK_LAYER_ETHERNET)
+ if (is_eth) {
wc->sl = be16_to_cpu(cqe->sl_vid) >> 13;
- else
+ if (be32_to_cpu(cqe->vlan_my_qpn) &
+ MLX4_CQE_VLAN_PRESENT_MASK) {
+ wc->vlan_id = be16_to_cpu(cqe->sl_vid) &
+ MLX4_CQE_VID_MASK;
+ } else {
+ wc->vlan_id = 0xffff;
+ }
+ memcpy(wc->smac, cqe->smac, ETH_ALEN);
+ wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC);
+ } else {
wc->sl = be16_to_cpu(cqe->sl_vid) >> 12;
+ wc->vlan_id = 0xffff;
+ }
}
return 0;
diff --git a/drivers/infiniband/hw/mlx4/doorbell.c b/drivers/infiniband/hw/mlx4/doorbell.c
index 8aee4233b38..c5174098636 100644
--- a/drivers/infiniband/hw/mlx4/doorbell.c
+++ b/drivers/infiniband/hw/mlx4/doorbell.c
@@ -45,7 +45,6 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
struct mlx4_db *db)
{
struct mlx4_ib_user_db_page *page;
- struct ib_umem_chunk *chunk;
int err = 0;
mutex_lock(&context->db_page_mutex);
@@ -73,8 +72,7 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
list_add(&page->list, &context->db_page_list);
found:
- chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list);
- db->dma = sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK);
+ db->dma = sg_dma_address(page->umem->sg_head.sgl) + (virt & ~PAGE_MASK);
db->u.user_page = page;
++page->refcnt;
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index f2a3f48107e..287ad0564ac 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -467,6 +467,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
int ret = 0;
u16 tun_pkey_ix;
u16 cached_pkey;
+ u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
if (dest_qpt > IB_QPT_GSI)
return -EINVAL;
@@ -477,10 +478,6 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
if (!tun_ctx || tun_ctx->state != DEMUX_PV_STATE_ACTIVE)
return -EAGAIN;
- /* QP0 forwarding only for Dom0 */
- if (!dest_qpt && (mlx4_master_func_num(dev->dev) != slave))
- return -EINVAL;
-
if (!dest_qpt)
tun_qp = &tun_ctx->qp[0];
else
@@ -509,6 +506,10 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
* The driver will set the force loopback bit in post_send */
memset(&attr, 0, sizeof attr);
attr.port_num = port;
+ if (is_eth) {
+ memcpy(&attr.grh.dgid.raw[0], &grh->dgid.raw[0], 16);
+ attr.ah_flags = IB_AH_GRH;
+ }
ah = ib_create_ah(tun_ctx->pd, &attr);
if (IS_ERR(ah))
return -ENOMEM;
@@ -540,11 +541,36 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
/* adjust tunnel data */
tun_mad->hdr.pkey_index = cpu_to_be16(tun_pkey_ix);
- tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12);
- tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid);
tun_mad->hdr.flags_src_qp = cpu_to_be32(wc->src_qp & 0xFFFFFF);
tun_mad->hdr.g_ml_path = (grh && (wc->wc_flags & IB_WC_GRH)) ? 0x80 : 0;
+ if (is_eth) {
+ u16 vlan = 0;
+ if (mlx4_get_slave_default_vlan(dev->dev, port, slave, &vlan,
+ NULL)) {
+ /* VST mode */
+ if (vlan != wc->vlan_id)
+ /* Packet vlan is not the VST-assigned vlan.
+ * Drop the packet.
+ */
+ goto out;
+ else
+ /* Remove the vlan tag before forwarding
+ * the packet to the VF.
+ */
+ vlan = 0xffff;
+ } else {
+ vlan = wc->vlan_id;
+ }
+
+ tun_mad->hdr.sl_vid = cpu_to_be16(vlan);
+ memcpy((char *)&tun_mad->hdr.mac_31_0, &(wc->smac[0]), 4);
+ memcpy((char *)&tun_mad->hdr.slid_mac_47_32, &(wc->smac[4]), 2);
+ } else {
+ tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12);
+ tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid);
+ }
+
ib_dma_sync_single_for_device(&dev->ib_dev,
tun_qp->tx_ring[tun_tx_ix].buf.map,
sizeof (struct mlx4_rcv_tunnel_mad),
@@ -580,6 +606,41 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
int err;
int slave;
u8 *slave_id;
+ int is_eth = 0;
+
+ if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND)
+ is_eth = 0;
+ else
+ is_eth = 1;
+
+ if (is_eth) {
+ if (!(wc->wc_flags & IB_WC_GRH)) {
+ mlx4_ib_warn(ibdev, "RoCE grh not present.\n");
+ return -EINVAL;
+ }
+ if (mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_CM) {
+ mlx4_ib_warn(ibdev, "RoCE mgmt class is not CM\n");
+ return -EINVAL;
+ }
+ if (mlx4_get_slave_from_roce_gid(dev->dev, port, grh->dgid.raw, &slave)) {
+ mlx4_ib_warn(ibdev, "failed matching grh\n");
+ return -ENOENT;
+ }
+ if (slave >= dev->dev->caps.sqp_demux) {
+ mlx4_ib_warn(ibdev, "slave id: %d is bigger than allowed:%d\n",
+ slave, dev->dev->caps.sqp_demux);
+ return -ENOENT;
+ }
+
+ if (mlx4_ib_demux_cm_handler(ibdev, port, NULL, mad))
+ return 0;
+
+ err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
+ if (err)
+ pr_debug("failed sending to slave %d via tunnel qp (%d)\n",
+ slave, err);
+ return 0;
+ }
/* Initially assume that this mad is for us */
slave = mlx4_master_func_num(dev->dev);
@@ -602,6 +663,21 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
}
/* Class-specific handling */
switch (mad->mad_hdr.mgmt_class) {
+ case IB_MGMT_CLASS_SUBN_LID_ROUTED:
+ case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
+ /* 255 indicates the dom0 */
+ if (slave != 255 && slave != mlx4_master_func_num(dev->dev)) {
+ if (!mlx4_vf_smi_enabled(dev->dev, slave, port))
+ return -EPERM;
+ /* for a VF. drop unsolicited MADs */
+ if (!(mad->mad_hdr.method & IB_MGMT_METHOD_RESP)) {
+ mlx4_ib_warn(ibdev, "demux QP0. rejecting unsolicited mad for slave %d class 0x%x, method 0x%x\n",
+ slave, mad->mad_hdr.mgmt_class,
+ mad->mad_hdr.method);
+ return -EINVAL;
+ }
+ }
+ break;
case IB_MGMT_CLASS_SUBN_ADM:
if (mlx4_ib_demux_sa_handler(ibdev, port, slave,
(struct ib_sa_mad *) mad))
@@ -1076,8 +1152,9 @@ static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
- enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
- u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad)
+ enum ib_qp_type dest_qpt, u16 pkey_index,
+ u32 remote_qpn, u32 qkey, struct ib_ah_attr *attr,
+ u8 *s_mac, struct ib_mad *mad)
{
struct ib_sge list;
struct ib_send_wr wr, *bad_wr;
@@ -1099,10 +1176,6 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
if (!sqp_ctx || sqp_ctx->state != DEMUX_PV_STATE_ACTIVE)
return -EAGAIN;
- /* QP0 forwarding only for Dom0 */
- if (dest_qpt == IB_QPT_SMI && (mlx4_master_func_num(dev->dev) != slave))
- return -EINVAL;
-
if (dest_qpt == IB_QPT_SMI) {
src_qpnum = 0;
sqp = &sqp_ctx->qp[0];
@@ -1166,6 +1239,9 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
wr.num_sge = 1;
wr.opcode = IB_WR_SEND;
wr.send_flags = IB_SEND_SIGNALED;
+ if (s_mac)
+ memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6);
+
ret = ib_post_send(send_qp, &wr, &bad_wr);
out:
@@ -1174,6 +1250,22 @@ out:
return ret;
}
+static int get_slave_base_gid_ix(struct mlx4_ib_dev *dev, int slave, int port)
+{
+ if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND)
+ return slave;
+ return mlx4_get_base_gid_ix(dev->dev, slave, port);
+}
+
+static void fill_in_real_sgid_index(struct mlx4_ib_dev *dev, int slave, int port,
+ struct ib_ah_attr *ah_attr)
+{
+ if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND)
+ ah_attr->grh.sgid_index = slave;
+ else
+ ah_attr->grh.sgid_index += get_slave_base_gid_ix(dev, slave, port);
+}
+
static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc *wc)
{
struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
@@ -1184,6 +1276,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
struct ib_ah_attr ah_attr;
u8 *slave_id;
int slave;
+ int port;
/* Get slave that sent this packet */
if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn ||
@@ -1199,11 +1292,6 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
"belongs to another slave\n", wc->src_qp);
return;
}
- if (slave != mlx4_master_func_num(dev->dev) && !(wc->src_qp & 0x2)) {
- mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: "
- "non-master trying to send QP0 packets\n", wc->src_qp);
- return;
- }
/* Map transaction ID */
ib_dma_sync_single_for_cpu(ctx->ib_dev, tun_qp->ring[wr_ix].map,
@@ -1231,6 +1319,12 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
/* Class-specific handling */
switch (tunnel->mad.mad_hdr.mgmt_class) {
+ case IB_MGMT_CLASS_SUBN_LID_ROUTED:
+ case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
+ if (slave != mlx4_master_func_num(dev->dev) &&
+ !mlx4_vf_smi_enabled(dev->dev, slave, ctx->port))
+ return;
+ break;
case IB_MGMT_CLASS_SUBN_ADM:
if (mlx4_ib_multiplex_sa_handler(ctx->ib_dev, ctx->port, slave,
(struct ib_sa_mad *) &tunnel->mad))
@@ -1260,12 +1354,18 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
memcpy(&ah.av, &tunnel->hdr.av, sizeof (struct mlx4_av));
ah.ibah.device = ctx->ib_dev;
mlx4_ib_query_ah(&ah.ibah, &ah_attr);
- if ((ah_attr.ah_flags & IB_AH_GRH) &&
- (ah_attr.grh.sgid_index != slave)) {
- mlx4_ib_warn(ctx->ib_dev, "slave:%d accessed invalid sgid_index:%d\n",
- slave, ah_attr.grh.sgid_index);
+ if (ah_attr.ah_flags & IB_AH_GRH)
+ fill_in_real_sgid_index(dev, slave, ctx->port, &ah_attr);
+
+ port = mlx4_slave_convert_port(dev->dev, slave, ah_attr.port_num);
+ if (port < 0)
return;
- }
+ ah_attr.port_num = port;
+ memcpy(ah_attr.dmac, tunnel->hdr.mac, 6);
+ ah_attr.vlan_id = be16_to_cpu(tunnel->hdr.vlan);
+ /* if slave have default vlan use it */
+ mlx4_get_slave_default_vlan(dev->dev, ctx->port, slave,
+ &ah_attr.vlan_id, &ah_attr.sl);
mlx4_ib_send_to_wire(dev, slave, ctx->port,
is_proxy_qp0(dev, wc->src_qp, slave) ?
@@ -1273,7 +1373,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
be16_to_cpu(tunnel->hdr.pkey_index),
be32_to_cpu(tunnel->hdr.remote_qpn),
be32_to_cpu(tunnel->hdr.qkey),
- &ah_attr, &tunnel->mad);
+ &ah_attr, wc->smac, &tunnel->mad);
}
static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
@@ -1657,9 +1757,9 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
return -EEXIST;
ctx->state = DEMUX_PV_STATE_STARTING;
- /* have QP0 only on port owner, and only if link layer is IB */
- if (ctx->slave == mlx4_master_func_num(to_mdev(ctx->ib_dev)->dev) &&
- rdma_port_get_link_layer(ibdev, ctx->port) == IB_LINK_LAYER_INFINIBAND)
+ /* have QP0 only if link layer is IB */
+ if (rdma_port_get_link_layer(ibdev, ctx->port) ==
+ IB_LINK_LAYER_INFINIBAND)
ctx->has_smi = 1;
if (ctx->has_smi) {
@@ -1850,7 +1950,15 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
ctx->port = port;
ctx->ib_dev = &dev->ib_dev;
- for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
+ for (i = 0;
+ i < min(dev->dev->caps.sqp_demux, (u16)(dev->dev->num_vfs + 1));
+ i++) {
+ struct mlx4_active_ports actv_ports =
+ mlx4_get_active_ports(dev->dev, i);
+
+ if (!test_bit(port - 1, actv_ports.ports))
+ continue;
+
ret = alloc_pv_object(dev, i, port, &ctx->tun[i]);
if (ret) {
ret = -ENOMEM;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index d6c5a73becf..0f7027e7db1 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -39,6 +39,8 @@
#include <linux/inetdevice.h>
#include <linux/rtnetlink.h>
#include <linux/if_vlan.h>
+#include <net/ipv6.h>
+#include <net/addrconf.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_user_verbs.h>
@@ -46,15 +48,17 @@
#include <linux/mlx4/driver.h>
#include <linux/mlx4/cmd.h>
+#include <linux/mlx4/qp.h>
#include "mlx4_ib.h"
#include "user.h"
#define DRV_NAME MLX4_IB_DRV_NAME
-#define DRV_VERSION "1.0"
-#define DRV_RELDATE "April 4, 2008"
+#define DRV_VERSION "2.2-1"
+#define DRV_RELDATE "Feb 2014"
#define MLX4_IB_FLOW_MAX_PRIO 0xFFF
+#define MLX4_IB_FLOW_QPN_MASK 0xFFFFFF
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
@@ -92,21 +96,27 @@ static union ib_gid zgid;
static int check_flow_steering_support(struct mlx4_dev *dev)
{
+ int eth_num_ports = 0;
int ib_num_ports = 0;
- int i;
- mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
- ib_num_ports++;
-
- if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
- if (ib_num_ports || mlx4_is_mfunc(dev)) {
- pr_warn("Device managed flow steering is unavailable "
- "for IB ports or in multifunction env.\n");
- return 0;
+ int dmfs = dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED;
+
+ if (dmfs) {
+ int i;
+ mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH)
+ eth_num_ports++;
+ mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
+ ib_num_ports++;
+ dmfs &= (!ib_num_ports ||
+ (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB)) &&
+ (!eth_num_ports ||
+ (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN));
+ if (ib_num_ports && mlx4_is_mfunc(dev)) {
+ pr_warn("Device managed flow steering is unavailable for IB port in multifunction env.\n");
+ dmfs = 0;
}
- return 1;
}
- return 0;
+ return dmfs;
}
static int mlx4_ib_query_device(struct ib_device *ibdev,
@@ -165,7 +175,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B;
else
props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A;
- if (check_flow_steering_support(dev->dev))
+ if (dev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED)
props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
}
@@ -177,18 +187,18 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->max_mr_size = ~0ull;
props->page_size_cap = dev->dev->caps.page_size_cap;
- props->max_qp = dev->dev->caps.num_qps - dev->dev->caps.reserved_qps;
+ props->max_qp = dev->dev->quotas.qp;
props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
props->max_sge = min(dev->dev->caps.max_sq_sg,
dev->dev->caps.max_rq_sg);
- props->max_cq = dev->dev->caps.num_cqs - dev->dev->caps.reserved_cqs;
+ props->max_cq = dev->dev->quotas.cq;
props->max_cqe = dev->dev->caps.max_cqes;
- props->max_mr = dev->dev->caps.num_mpts - dev->dev->caps.reserved_mrws;
+ props->max_mr = dev->dev->quotas.mpt;
props->max_pd = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds;
props->max_qp_rd_atom = dev->dev->caps.max_qp_dest_rdma;
props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma;
props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
- props->max_srq = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs;
+ props->max_srq = dev->dev->quotas.srq;
props->max_srq_wr = dev->dev->caps.max_srq_wqes - 1;
props->max_srq_sge = dev->dev->caps.max_srq_sge;
props->max_fast_reg_page_list_len = MLX4_MAX_FAST_REG_PAGES;
@@ -338,7 +348,7 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) ?
IB_WIDTH_4X : IB_WIDTH_1X;
props->active_speed = IB_SPEED_QDR;
- props->port_cap_flags = IB_PORT_CM_SUP;
+ props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS;
props->gid_tbl_len = mdev->dev->caps.gid_table_len[port];
props->max_msg_sz = mdev->dev->caps.max_msg_sz;
props->pkey_tbl_len = 1;
@@ -526,7 +536,6 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
if (IS_ERR(mailbox))
return 0;
- memset(mailbox->buf, 0, 256);
memcpy(mailbox->buf, props->node_desc, 64);
mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0,
MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
@@ -536,19 +545,16 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
return 0;
}
-static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
- u32 cap_mask)
+static int mlx4_ib_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
+ u32 cap_mask)
{
struct mlx4_cmd_mailbox *mailbox;
int err;
- u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
mailbox = mlx4_alloc_cmd_mailbox(dev->dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
- memset(mailbox->buf, 0, 256);
-
if (dev->dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
*(u8 *) mailbox->buf = !!reset_qkey_viols << 6;
((__be32 *) mailbox->buf)[2] = cpu_to_be32(cap_mask);
@@ -557,8 +563,8 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask);
}
- err = mlx4_cmd(dev->dev, mailbox->dma, port, is_eth, MLX4_CMD_SET_PORT,
- MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+ err = mlx4_cmd(dev->dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT,
+ MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
mlx4_free_cmd_mailbox(dev->dev, mailbox);
return err;
@@ -567,11 +573,20 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
struct ib_port_modify *props)
{
+ struct mlx4_ib_dev *mdev = to_mdev(ibdev);
+ u8 is_eth = mdev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
struct ib_port_attr attr;
u32 cap_mask;
int err;
- mutex_lock(&to_mdev(ibdev)->cap_mask_mutex);
+ /* return OK if this is RoCE. CM calls ib_modify_port() regardless
+ * of whether port link layer is ETH or IB. For ETH ports, qkey
+ * violations and port capabilities are not meaningful.
+ */
+ if (is_eth)
+ return 0;
+
+ mutex_lock(&mdev->cap_mask_mutex);
err = mlx4_ib_query_port(ibdev, port, &attr);
if (err)
@@ -580,9 +595,9 @@ static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
~props->clr_port_cap_mask;
- err = mlx4_SET_PORT(to_mdev(ibdev), port,
- !!(mask & IB_PORT_RESET_QKEY_CNTR),
- cap_mask);
+ err = mlx4_ib_SET_PORT(mdev, port,
+ !!(mask & IB_PORT_RESET_QKEY_CNTR),
+ cap_mask);
out:
mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
@@ -790,7 +805,6 @@ static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
union ib_gid *gid)
{
- u8 mac[6];
struct net_device *ndev;
int ret = 0;
@@ -804,11 +818,7 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
spin_unlock(&mdev->iboe.lock);
if (ndev) {
- rdma_get_mcast_mac((struct in6_addr *)gid, mac);
- rtnl_lock();
- dev_mc_add(mdev->iboe.netdevs[mqp->port - 1], mac);
ret = 1;
- rtnl_unlock();
dev_put(ndev);
}
@@ -822,6 +832,7 @@ struct mlx4_ib_steering {
};
static int parse_flow_attr(struct mlx4_dev *dev,
+ u32 qp_num,
union ib_flow_spec *ib_spec,
struct _rule_hw *mlx4_spec)
{
@@ -837,6 +848,14 @@ static int parse_flow_attr(struct mlx4_dev *dev,
mlx4_spec->eth.vlan_tag = ib_spec->eth.val.vlan_tag;
mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag;
break;
+ case IB_FLOW_SPEC_IB:
+ type = MLX4_NET_TRANS_RULE_ID_IB;
+ mlx4_spec->ib.l3_qpn =
+ cpu_to_be32(qp_num);
+ mlx4_spec->ib.qpn_mask =
+ cpu_to_be32(MLX4_IB_FLOW_QPN_MASK);
+ break;
+
case IB_FLOW_SPEC_IPV4:
type = MLX4_NET_TRANS_RULE_ID_IPV4;
@@ -868,6 +887,115 @@ static int parse_flow_attr(struct mlx4_dev *dev,
return mlx4_hw_rule_sz(dev, type);
}
+struct default_rules {
+ __u32 mandatory_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
+ __u32 mandatory_not_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
+ __u32 rules_create_list[IB_FLOW_SPEC_SUPPORT_LAYERS];
+ __u8 link_layer;
+};
+static const struct default_rules default_table[] = {
+ {
+ .mandatory_fields = {IB_FLOW_SPEC_IPV4},
+ .mandatory_not_fields = {IB_FLOW_SPEC_ETH},
+ .rules_create_list = {IB_FLOW_SPEC_IB},
+ .link_layer = IB_LINK_LAYER_INFINIBAND
+ }
+};
+
+static int __mlx4_ib_default_rules_match(struct ib_qp *qp,
+ struct ib_flow_attr *flow_attr)
+{
+ int i, j, k;
+ void *ib_flow;
+ const struct default_rules *pdefault_rules = default_table;
+ u8 link_layer = rdma_port_get_link_layer(qp->device, flow_attr->port);
+
+ for (i = 0; i < sizeof(default_table)/sizeof(default_table[0]); i++,
+ pdefault_rules++) {
+ __u32 field_types[IB_FLOW_SPEC_SUPPORT_LAYERS];
+ memset(&field_types, 0, sizeof(field_types));
+
+ if (link_layer != pdefault_rules->link_layer)
+ continue;
+
+ ib_flow = flow_attr + 1;
+ /* we assume the specs are sorted */
+ for (j = 0, k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS &&
+ j < flow_attr->num_of_specs; k++) {
+ union ib_flow_spec *current_flow =
+ (union ib_flow_spec *)ib_flow;
+
+ /* same layer but different type */
+ if (((current_flow->type & IB_FLOW_SPEC_LAYER_MASK) ==
+ (pdefault_rules->mandatory_fields[k] &
+ IB_FLOW_SPEC_LAYER_MASK)) &&
+ (current_flow->type !=
+ pdefault_rules->mandatory_fields[k]))
+ goto out;
+
+ /* same layer, try match next one */
+ if (current_flow->type ==
+ pdefault_rules->mandatory_fields[k]) {
+ j++;
+ ib_flow +=
+ ((union ib_flow_spec *)ib_flow)->size;
+ }
+ }
+
+ ib_flow = flow_attr + 1;
+ for (j = 0; j < flow_attr->num_of_specs;
+ j++, ib_flow += ((union ib_flow_spec *)ib_flow)->size)
+ for (k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS; k++)
+ /* same layer and same type */
+ if (((union ib_flow_spec *)ib_flow)->type ==
+ pdefault_rules->mandatory_not_fields[k])
+ goto out;
+
+ return i;
+ }
+out:
+ return -1;
+}
+
+static int __mlx4_ib_create_default_rules(
+ struct mlx4_ib_dev *mdev,
+ struct ib_qp *qp,
+ const struct default_rules *pdefault_rules,
+ struct _rule_hw *mlx4_spec) {
+ int size = 0;
+ int i;
+
+ for (i = 0; i < sizeof(pdefault_rules->rules_create_list)/
+ sizeof(pdefault_rules->rules_create_list[0]); i++) {
+ int ret;
+ union ib_flow_spec ib_spec;
+ switch (pdefault_rules->rules_create_list[i]) {
+ case 0:
+ /* no rule */
+ continue;
+ case IB_FLOW_SPEC_IB:
+ ib_spec.type = IB_FLOW_SPEC_IB;
+ ib_spec.size = sizeof(struct ib_flow_spec_ib);
+
+ break;
+ default:
+ /* invalid rule */
+ return -EINVAL;
+ }
+ /* We must put empty rule, qpn is being ignored */
+ ret = parse_flow_attr(mdev->dev, 0, &ib_spec,
+ mlx4_spec);
+ if (ret < 0) {
+ pr_info("invalid parsing\n");
+ return -EINVAL;
+ }
+
+ mlx4_spec = (void *)mlx4_spec + ret;
+ size += ret;
+ }
+ return size;
+}
+
static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
int domain,
enum mlx4_net_trans_promisc_mode flow_type,
@@ -879,8 +1007,7 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att
struct mlx4_ib_dev *mdev = to_mdev(qp->device);
struct mlx4_cmd_mailbox *mailbox;
struct mlx4_net_trans_rule_hw_ctrl *ctrl;
- size_t rule_size = sizeof(struct mlx4_net_trans_rule_hw_ctrl) +
- (sizeof(struct _rule_hw) * flow_attr->num_of_specs);
+ int default_flow;
static const u16 __mlx4_domain[] = {
[IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS,
@@ -905,7 +1032,6 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att
mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
- memset(mailbox->buf, 0, rule_size);
ctrl = mailbox->buf;
ctrl->prio = cpu_to_be16(__mlx4_domain[domain] |
@@ -916,8 +1042,21 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att
ib_flow = flow_attr + 1;
size += sizeof(struct mlx4_net_trans_rule_hw_ctrl);
+ /* Add default flows */
+ default_flow = __mlx4_ib_default_rules_match(qp, flow_attr);
+ if (default_flow >= 0) {
+ ret = __mlx4_ib_create_default_rules(
+ mdev, qp, default_table + default_flow,
+ mailbox->buf + size);
+ if (ret < 0) {
+ mlx4_free_cmd_mailbox(mdev->dev, mailbox);
+ return -EINVAL;
+ }
+ size += ret;
+ }
for (i = 0; i < flow_attr->num_of_specs; i++) {
- ret = parse_flow_attr(mdev->dev, ib_flow, mailbox->buf + size);
+ ret = parse_flow_attr(mdev->dev, qp->qp_num, ib_flow,
+ mailbox->buf + size);
if (ret < 0) {
mlx4_free_cmd_mailbox(mdev->dev, mailbox);
return -EINVAL;
@@ -1031,6 +1170,8 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
u64 reg_id;
struct mlx4_ib_steering *ib_steering = NULL;
+ enum mlx4_protocol prot = (gid->raw[1] == 0x0e) ?
+ MLX4_PROT_IB_IPV4 : MLX4_PROT_IB_IPV6;
if (mdev->dev->caps.steering_mode ==
MLX4_STEERING_MODE_DEVICE_MANAGED) {
@@ -1042,7 +1183,7 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port,
!!(mqp->flags &
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
- MLX4_PROT_IB_IPV6, &reg_id);
+ prot, &reg_id);
if (err)
goto err_malloc;
@@ -1061,7 +1202,7 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
err_add:
mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
- MLX4_PROT_IB_IPV6, reg_id);
+ prot, reg_id);
err_malloc:
kfree(ib_steering);
@@ -1089,10 +1230,11 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
int err;
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
- u8 mac[6];
struct net_device *ndev;
struct mlx4_ib_gid_entry *ge;
u64 reg_id = 0;
+ enum mlx4_protocol prot = (gid->raw[1] == 0x0e) ?
+ MLX4_PROT_IB_IPV4 : MLX4_PROT_IB_IPV6;
if (mdev->dev->caps.steering_mode ==
MLX4_STEERING_MODE_DEVICE_MANAGED) {
@@ -1115,7 +1257,7 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
}
err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
- MLX4_PROT_IB_IPV6, reg_id);
+ prot, reg_id);
if (err)
return err;
@@ -1127,13 +1269,8 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
if (ndev)
dev_hold(ndev);
spin_unlock(&mdev->iboe.lock);
- rdma_get_mcast_mac((struct in6_addr *)gid, mac);
- if (ndev) {
- rtnl_lock();
- dev_mc_del(mdev->iboe.netdevs[ge->port - 1], mac);
- rtnl_unlock();
+ if (ndev)
dev_put(ndev);
- }
list_del(&ge->list);
kfree(ge);
} else
@@ -1229,7 +1366,8 @@ static struct device_attribute *mlx4_class_attributes[] = {
&dev_attr_board_id
};
-static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, struct net_device *dev)
+static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id,
+ struct net_device *dev)
{
memcpy(eui, dev->dev_addr, 3);
memcpy(eui + 5, dev->dev_addr + 3, 3);
@@ -1265,162 +1403,437 @@ static void update_gids_task(struct work_struct *work)
MLX4_CMD_WRAPPED);
if (err)
pr_warn("set port command failed\n");
- else {
- memcpy(gw->dev->iboe.gid_table[gw->port - 1], gw->gids, sizeof gw->gids);
+ else
mlx4_ib_dispatch_event(gw->dev, gw->port, IB_EVENT_GID_CHANGE);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ kfree(gw);
+}
+
+static void reset_gids_task(struct work_struct *work)
+{
+ struct update_gid_work *gw =
+ container_of(work, struct update_gid_work, work);
+ struct mlx4_cmd_mailbox *mailbox;
+ union ib_gid *gids;
+ int err;
+ struct mlx4_dev *dev = gw->dev->dev;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox)) {
+ pr_warn("reset gid table failed\n");
+ goto free;
+ }
+
+ gids = mailbox->buf;
+ memcpy(gids, gw->gids, sizeof(gw->gids));
+
+ if (mlx4_ib_port_link_layer(&gw->dev->ib_dev, gw->port) ==
+ IB_LINK_LAYER_ETHERNET) {
+ err = mlx4_cmd(dev, mailbox->dma,
+ MLX4_SET_PORT_GID_TABLE << 8 | gw->port,
+ 1, MLX4_CMD_SET_PORT,
+ MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+ if (err)
+ pr_warn(KERN_WARNING
+ "set port %d command failed\n", gw->port);
}
mlx4_free_cmd_mailbox(dev, mailbox);
+free:
kfree(gw);
}
-static int update_ipv6_gids(struct mlx4_ib_dev *dev, int port, int clear)
+static int update_gid_table(struct mlx4_ib_dev *dev, int port,
+ union ib_gid *gid, int clear,
+ int default_gid)
{
- struct net_device *ndev = dev->iboe.netdevs[port - 1];
struct update_gid_work *work;
- struct net_device *tmp;
int i;
- u8 *hits;
- int ret;
- union ib_gid gid;
- int free;
- int found;
int need_update = 0;
- u16 vid;
-
- work = kzalloc(sizeof *work, GFP_ATOMIC);
- if (!work)
- return -ENOMEM;
-
- hits = kzalloc(128, GFP_ATOMIC);
- if (!hits) {
- ret = -ENOMEM;
- goto out;
- }
+ int free = -1;
+ int found = -1;
+ int max_gids;
- rcu_read_lock();
- for_each_netdev_rcu(&init_net, tmp) {
- if (ndev && (tmp == ndev || rdma_vlan_dev_real_dev(tmp) == ndev)) {
- gid.global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
- vid = rdma_vlan_dev_vlan_id(tmp);
- mlx4_addrconf_ifid_eui48(&gid.raw[8], vid, ndev);
- found = 0;
- free = -1;
- for (i = 0; i < 128; ++i) {
- if (free < 0 &&
- !memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid))
- free = i;
- if (!memcmp(&dev->iboe.gid_table[port - 1][i], &gid, sizeof gid)) {
- hits[i] = 1;
- found = 1;
+ if (default_gid) {
+ free = 0;
+ } else {
+ max_gids = dev->dev->caps.gid_table_len[port];
+ for (i = 1; i < max_gids; ++i) {
+ if (!memcmp(&dev->iboe.gid_table[port - 1][i], gid,
+ sizeof(*gid)))
+ found = i;
+
+ if (clear) {
+ if (found >= 0) {
+ need_update = 1;
+ dev->iboe.gid_table[port - 1][found] =
+ zgid;
break;
}
- }
+ } else {
+ if (found >= 0)
+ break;
- if (!found) {
- if (tmp == ndev &&
- (memcmp(&dev->iboe.gid_table[port - 1][0],
- &gid, sizeof gid) ||
- !memcmp(&dev->iboe.gid_table[port - 1][0],
- &zgid, sizeof gid))) {
- dev->iboe.gid_table[port - 1][0] = gid;
- ++need_update;
- hits[0] = 1;
- } else if (free >= 0) {
- dev->iboe.gid_table[port - 1][free] = gid;
- hits[free] = 1;
- ++need_update;
- }
+ if (free < 0 &&
+ !memcmp(&dev->iboe.gid_table[port - 1][i],
+ &zgid, sizeof(*gid)))
+ free = i;
}
}
}
- rcu_read_unlock();
- for (i = 0; i < 128; ++i)
- if (!hits[i]) {
- if (memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid))
- ++need_update;
- dev->iboe.gid_table[port - 1][i] = zgid;
- }
+ if (found == -1 && !clear && free >= 0) {
+ dev->iboe.gid_table[port - 1][free] = *gid;
+ need_update = 1;
+ }
- if (need_update) {
- memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof work->gids);
- INIT_WORK(&work->work, update_gids_task);
- work->port = port;
- work->dev = dev;
- queue_work(wq, &work->work);
- } else
- kfree(work);
+ if (!need_update)
+ return 0;
+
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return -ENOMEM;
+
+ memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof(work->gids));
+ INIT_WORK(&work->work, update_gids_task);
+ work->port = port;
+ work->dev = dev;
+ queue_work(wq, &work->work);
- kfree(hits);
return 0;
+}
-out:
- kfree(work);
- return ret;
+static void mlx4_make_default_gid(struct net_device *dev, union ib_gid *gid)
+{
+ gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
+ mlx4_addrconf_ifid_eui48(&gid->raw[8], 0xffff, dev);
}
-static void handle_en_event(struct mlx4_ib_dev *dev, int port, unsigned long event)
+
+static int reset_gid_table(struct mlx4_ib_dev *dev, u8 port)
{
- switch (event) {
- case NETDEV_UP:
- case NETDEV_CHANGEADDR:
- update_ipv6_gids(dev, port, 0);
- break;
+ struct update_gid_work *work;
- case NETDEV_DOWN:
- update_ipv6_gids(dev, port, 1);
- dev->iboe.netdevs[port - 1] = NULL;
- }
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return -ENOMEM;
+
+ memset(dev->iboe.gid_table[port - 1], 0, sizeof(work->gids));
+ memset(work->gids, 0, sizeof(work->gids));
+ INIT_WORK(&work->work, reset_gids_task);
+ work->dev = dev;
+ work->port = port;
+ queue_work(wq, &work->work);
+ return 0;
}
-static void netdev_added(struct mlx4_ib_dev *dev, int port)
+static int mlx4_ib_addr_event(int event, struct net_device *event_netdev,
+ struct mlx4_ib_dev *ibdev, union ib_gid *gid)
{
- update_ipv6_gids(dev, port, 0);
+ struct mlx4_ib_iboe *iboe;
+ int port = 0;
+ struct net_device *real_dev = rdma_vlan_dev_real_dev(event_netdev) ?
+ rdma_vlan_dev_real_dev(event_netdev) :
+ event_netdev;
+ union ib_gid default_gid;
+
+ mlx4_make_default_gid(real_dev, &default_gid);
+
+ if (!memcmp(gid, &default_gid, sizeof(*gid)))
+ return 0;
+
+ if (event != NETDEV_DOWN && event != NETDEV_UP)
+ return 0;
+
+ if ((real_dev != event_netdev) &&
+ (event == NETDEV_DOWN) &&
+ rdma_link_local_addr((struct in6_addr *)gid))
+ return 0;
+
+ iboe = &ibdev->iboe;
+ spin_lock(&iboe->lock);
+
+ for (port = 1; port <= ibdev->dev->caps.num_ports; ++port)
+ if ((netif_is_bond_master(real_dev) &&
+ (real_dev == iboe->masters[port - 1])) ||
+ (!netif_is_bond_master(real_dev) &&
+ (real_dev == iboe->netdevs[port - 1])))
+ update_gid_table(ibdev, port, gid,
+ event == NETDEV_DOWN, 0);
+
+ spin_unlock(&iboe->lock);
+ return 0;
+
}
-static void netdev_removed(struct mlx4_ib_dev *dev, int port)
+static u8 mlx4_ib_get_dev_port(struct net_device *dev,
+ struct mlx4_ib_dev *ibdev)
{
- update_ipv6_gids(dev, port, 1);
+ u8 port = 0;
+ struct mlx4_ib_iboe *iboe;
+ struct net_device *real_dev = rdma_vlan_dev_real_dev(dev) ?
+ rdma_vlan_dev_real_dev(dev) : dev;
+
+ iboe = &ibdev->iboe;
+
+ for (port = 1; port <= ibdev->dev->caps.num_ports; ++port)
+ if ((netif_is_bond_master(real_dev) &&
+ (real_dev == iboe->masters[port - 1])) ||
+ (!netif_is_bond_master(real_dev) &&
+ (real_dev == iboe->netdevs[port - 1])))
+ break;
+
+ if ((port == 0) || (port > ibdev->dev->caps.num_ports))
+ return 0;
+ else
+ return port;
}
-static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event,
+static int mlx4_ib_inet_event(struct notifier_block *this, unsigned long event,
+ void *ptr)
+{
+ struct mlx4_ib_dev *ibdev;
+ struct in_ifaddr *ifa = ptr;
+ union ib_gid gid;
+ struct net_device *event_netdev = ifa->ifa_dev->dev;
+
+ ipv6_addr_set_v4mapped(ifa->ifa_address, (struct in6_addr *)&gid);
+
+ ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb_inet);
+
+ mlx4_ib_addr_event(event, event_netdev, ibdev, &gid);
+ return NOTIFY_DONE;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int mlx4_ib_inet6_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct mlx4_ib_dev *ibdev;
- struct net_device *oldnd;
+ struct inet6_ifaddr *ifa = ptr;
+ union ib_gid *gid = (union ib_gid *)&ifa->addr;
+ struct net_device *event_netdev = ifa->idev->dev;
+
+ ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb_inet6);
+
+ mlx4_ib_addr_event(event, event_netdev, ibdev, gid);
+ return NOTIFY_DONE;
+}
+#endif
+
+#define MLX4_IB_INVALID_MAC ((u64)-1)
+static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev,
+ struct net_device *dev,
+ int port)
+{
+ u64 new_smac = 0;
+ u64 release_mac = MLX4_IB_INVALID_MAC;
+ struct mlx4_ib_qp *qp;
+
+ read_lock(&dev_base_lock);
+ new_smac = mlx4_mac_to_u64(dev->dev_addr);
+ read_unlock(&dev_base_lock);
+
+ mutex_lock(&ibdev->qp1_proxy_lock[port - 1]);
+ qp = ibdev->qp1_proxy[port - 1];
+ if (qp) {
+ int new_smac_index;
+ u64 old_smac = qp->pri.smac;
+ struct mlx4_update_qp_params update_params;
+
+ if (new_smac == old_smac)
+ goto unlock;
+
+ new_smac_index = mlx4_register_mac(ibdev->dev, port, new_smac);
+
+ if (new_smac_index < 0)
+ goto unlock;
+
+ update_params.smac_index = new_smac_index;
+ if (mlx4_update_qp(ibdev->dev, &qp->mqp, MLX4_UPDATE_QP_SMAC,
+ &update_params)) {
+ release_mac = new_smac;
+ goto unlock;
+ }
+
+ qp->pri.smac = new_smac;
+ qp->pri.smac_index = new_smac_index;
+
+ release_mac = old_smac;
+ }
+
+unlock:
+ mutex_unlock(&ibdev->qp1_proxy_lock[port - 1]);
+ if (release_mac != MLX4_IB_INVALID_MAC)
+ mlx4_unregister_mac(ibdev->dev, port, release_mac);
+}
+
+static void mlx4_ib_get_dev_addr(struct net_device *dev,
+ struct mlx4_ib_dev *ibdev, u8 port)
+{
+ struct in_device *in_dev;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct inet6_dev *in6_dev;
+ union ib_gid *pgid;
+ struct inet6_ifaddr *ifp;
+#endif
+ union ib_gid gid;
+
+
+ if ((port == 0) || (port > ibdev->dev->caps.num_ports))
+ return;
+
+ /* IPv4 gids */
+ in_dev = in_dev_get(dev);
+ if (in_dev) {
+ for_ifa(in_dev) {
+ /*ifa->ifa_address;*/
+ ipv6_addr_set_v4mapped(ifa->ifa_address,
+ (struct in6_addr *)&gid);
+ update_gid_table(ibdev, port, &gid, 0, 0);
+ }
+ endfor_ifa(in_dev);
+ in_dev_put(in_dev);
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ /* IPv6 gids */
+ in6_dev = in6_dev_get(dev);
+ if (in6_dev) {
+ read_lock_bh(&in6_dev->lock);
+ list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
+ pgid = (union ib_gid *)&ifp->addr;
+ update_gid_table(ibdev, port, pgid, 0, 0);
+ }
+ read_unlock_bh(&in6_dev->lock);
+ in6_dev_put(in6_dev);
+ }
+#endif
+}
+
+static void mlx4_ib_set_default_gid(struct mlx4_ib_dev *ibdev,
+ struct net_device *dev, u8 port)
+{
+ union ib_gid gid;
+ mlx4_make_default_gid(dev, &gid);
+ update_gid_table(ibdev, port, &gid, 0, 1);
+}
+
+static int mlx4_ib_init_gid_table(struct mlx4_ib_dev *ibdev)
+{
+ struct net_device *dev;
+ struct mlx4_ib_iboe *iboe = &ibdev->iboe;
+ int i;
+
+ for (i = 1; i <= ibdev->num_ports; ++i)
+ if (reset_gid_table(ibdev, i))
+ return -1;
+
+ read_lock(&dev_base_lock);
+ spin_lock(&iboe->lock);
+
+ for_each_netdev(&init_net, dev) {
+ u8 port = mlx4_ib_get_dev_port(dev, ibdev);
+ if (port)
+ mlx4_ib_get_dev_addr(dev, ibdev, port);
+ }
+
+ spin_unlock(&iboe->lock);
+ read_unlock(&dev_base_lock);
+
+ return 0;
+}
+
+static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
+ struct net_device *dev,
+ unsigned long event)
+
+{
struct mlx4_ib_iboe *iboe;
+ int update_qps_port = -1;
int port;
- if (!net_eq(dev_net(dev), &init_net))
- return NOTIFY_DONE;
-
- ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb);
iboe = &ibdev->iboe;
spin_lock(&iboe->lock);
mlx4_foreach_ib_transport_port(port, ibdev->dev) {
- oldnd = iboe->netdevs[port - 1];
+ enum ib_port_state port_state = IB_PORT_NOP;
+ struct net_device *old_master = iboe->masters[port - 1];
+ struct net_device *curr_netdev;
+ struct net_device *curr_master;
+
iboe->netdevs[port - 1] =
mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port);
- if (oldnd != iboe->netdevs[port - 1]) {
- if (iboe->netdevs[port - 1])
- netdev_added(ibdev, port);
- else
- netdev_removed(ibdev, port);
+ if (iboe->netdevs[port - 1])
+ mlx4_ib_set_default_gid(ibdev,
+ iboe->netdevs[port - 1], port);
+ curr_netdev = iboe->netdevs[port - 1];
+
+ if (iboe->netdevs[port - 1] &&
+ netif_is_bond_slave(iboe->netdevs[port - 1])) {
+ iboe->masters[port - 1] = netdev_master_upper_dev_get(
+ iboe->netdevs[port - 1]);
+ } else {
+ iboe->masters[port - 1] = NULL;
+ }
+ curr_master = iboe->masters[port - 1];
+
+ if (dev == iboe->netdevs[port - 1] &&
+ (event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER ||
+ event == NETDEV_UP || event == NETDEV_CHANGE))
+ update_qps_port = port;
+
+ if (curr_netdev) {
+ port_state = (netif_running(curr_netdev) && netif_carrier_ok(curr_netdev)) ?
+ IB_PORT_ACTIVE : IB_PORT_DOWN;
+ mlx4_ib_set_default_gid(ibdev, curr_netdev, port);
+ } else {
+ reset_gid_table(ibdev, port);
+ }
+ /* if using bonding/team and a slave port is down, we don't the bond IP
+ * based gids in the table since flows that select port by gid may get
+ * the down port.
+ */
+ if (curr_master && (port_state == IB_PORT_DOWN)) {
+ reset_gid_table(ibdev, port);
+ mlx4_ib_set_default_gid(ibdev, curr_netdev, port);
+ }
+ /* if bonding is used it is possible that we add it to masters
+ * only after IP address is assigned to the net bonding
+ * interface.
+ */
+ if (curr_master && (old_master != curr_master)) {
+ reset_gid_table(ibdev, port);
+ mlx4_ib_set_default_gid(ibdev, curr_netdev, port);
+ mlx4_ib_get_dev_addr(curr_master, ibdev, port);
}
- }
- if (dev == iboe->netdevs[0] ||
- (iboe->netdevs[0] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[0]))
- handle_en_event(ibdev, 1, event);
- else if (dev == iboe->netdevs[1]
- || (iboe->netdevs[1] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[1]))
- handle_en_event(ibdev, 2, event);
+ if (!curr_master && (old_master != curr_master)) {
+ reset_gid_table(ibdev, port);
+ mlx4_ib_set_default_gid(ibdev, curr_netdev, port);
+ mlx4_ib_get_dev_addr(curr_netdev, ibdev, port);
+ }
+ }
spin_unlock(&iboe->lock);
+ if (update_qps_port > 0)
+ mlx4_ib_update_qps(ibdev, dev, update_qps_port);
+}
+
+static int mlx4_ib_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct mlx4_ib_dev *ibdev;
+
+ if (!net_eq(dev_net(dev), &init_net))
+ return NOTIFY_DONE;
+
+ ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb);
+ mlx4_ib_scan_netdevs(ibdev, dev, event);
+
return NOTIFY_DONE;
}
@@ -1458,7 +1871,7 @@ static void init_pkeys(struct mlx4_ib_dev *ibdev)
static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
{
- char name[32];
+ char name[80];
int eq_per_port = 0;
int added_eqs = 0;
int total_eqs = 0;
@@ -1488,8 +1901,8 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
eq = 0;
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) {
for (j = 0; j < eq_per_port; j++) {
- sprintf(name, "mlx4-ib-%d-%d@%s",
- i, j, dev->pdev->bus->name);
+ snprintf(name, sizeof(name), "mlx4-ib-%d-%d@%s",
+ i, j, dev->pdev->bus->name);
/* Set IRQ for specific name (per ring) */
if (mlx4_assign_eq(dev, name, NULL,
&ibdev->eq_table[eq])) {
@@ -1539,17 +1952,10 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
int i, j;
int err;
struct mlx4_ib_iboe *iboe;
+ int ib_num_ports = 0;
pr_info_once("%s", mlx4_ib_version);
- mlx4_foreach_non_ib_transport_port(i, dev)
- num_ports++;
-
- if (mlx4_is_mfunc(dev) && num_ports) {
- dev_err(&dev->pdev->dev, "RoCE is not supported over SRIOV as yet\n");
- return NULL;
- }
-
num_ports = 0;
mlx4_foreach_ib_transport_port(i, dev)
num_ports++;
@@ -1688,12 +2094,13 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
}
if (check_flow_steering_support(dev)) {
+ ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED;
ibdev->ib_dev.create_flow = mlx4_ib_create_flow;
ibdev->ib_dev.destroy_flow = mlx4_ib_destroy_flow;
- ibdev->ib_dev.uverbs_cmd_mask |=
- (1ull << IB_USER_VERBS_CMD_CREATE_FLOW) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_FLOW);
+ ibdev->ib_dev.uverbs_ex_cmd_mask |=
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
}
mlx4_ib_alloc_eqs(dev, ibdev);
@@ -1704,20 +2111,53 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
goto err_map;
for (i = 0; i < ibdev->num_ports; ++i) {
+ mutex_init(&ibdev->qp1_proxy_lock[i]);
if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) ==
IB_LINK_LAYER_ETHERNET) {
err = mlx4_counter_alloc(ibdev->dev, &ibdev->counters[i]);
if (err)
ibdev->counters[i] = -1;
- } else
- ibdev->counters[i] = -1;
+ } else {
+ ibdev->counters[i] = -1;
+ }
}
+ mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
+ ib_num_ports++;
+
spin_lock_init(&ibdev->sm_lock);
mutex_init(&ibdev->cap_mask_mutex);
+ if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED &&
+ ib_num_ports) {
+ ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS;
+ err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count,
+ MLX4_IB_UC_STEER_QPN_ALIGN,
+ &ibdev->steer_qpn_base);
+ if (err)
+ goto err_counter;
+
+ ibdev->ib_uc_qpns_bitmap =
+ kmalloc(BITS_TO_LONGS(ibdev->steer_qpn_count) *
+ sizeof(long),
+ GFP_KERNEL);
+ if (!ibdev->ib_uc_qpns_bitmap) {
+ dev_err(&dev->pdev->dev, "bit map alloc failed\n");
+ goto err_steer_qp_release;
+ }
+
+ bitmap_zero(ibdev->ib_uc_qpns_bitmap, ibdev->steer_qpn_count);
+
+ err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE(
+ dev, ibdev->steer_qpn_base,
+ ibdev->steer_qpn_base +
+ ibdev->steer_qpn_count - 1);
+ if (err)
+ goto err_steer_free_bitmap;
+ }
+
if (ib_register_device(&ibdev->ib_dev, NULL))
- goto err_counter;
+ goto err_steer_free_bitmap;
if (mlx4_ib_mad_init(ibdev))
goto err_reg;
@@ -1725,11 +2165,39 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (mlx4_ib_init_sriov(ibdev))
goto err_mad;
- if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) {
- iboe->nb.notifier_call = mlx4_ib_netdev_event;
- err = register_netdevice_notifier(&iboe->nb);
- if (err)
- goto err_sriov;
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) {
+ if (!iboe->nb.notifier_call) {
+ iboe->nb.notifier_call = mlx4_ib_netdev_event;
+ err = register_netdevice_notifier(&iboe->nb);
+ if (err) {
+ iboe->nb.notifier_call = NULL;
+ goto err_notif;
+ }
+ }
+ if (!iboe->nb_inet.notifier_call) {
+ iboe->nb_inet.notifier_call = mlx4_ib_inet_event;
+ err = register_inetaddr_notifier(&iboe->nb_inet);
+ if (err) {
+ iboe->nb_inet.notifier_call = NULL;
+ goto err_notif;
+ }
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ if (!iboe->nb_inet6.notifier_call) {
+ iboe->nb_inet6.notifier_call = mlx4_ib_inet6_event;
+ err = register_inet6addr_notifier(&iboe->nb_inet6);
+ if (err) {
+ iboe->nb_inet6.notifier_call = NULL;
+ goto err_notif;
+ }
+ }
+#endif
+ for (i = 1 ; i <= ibdev->num_ports ; ++i)
+ reset_gid_table(ibdev, i);
+ rtnl_lock();
+ mlx4_ib_scan_netdevs(ibdev, NULL, 0);
+ rtnl_unlock();
+ mlx4_ib_init_gid_table(ibdev);
}
for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
@@ -1755,11 +2223,25 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
return ibdev;
err_notif:
- if (unregister_netdevice_notifier(&ibdev->iboe.nb))
- pr_warn("failure unregistering notifier\n");
+ if (ibdev->iboe.nb.notifier_call) {
+ if (unregister_netdevice_notifier(&ibdev->iboe.nb))
+ pr_warn("failure unregistering notifier\n");
+ ibdev->iboe.nb.notifier_call = NULL;
+ }
+ if (ibdev->iboe.nb_inet.notifier_call) {
+ if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet))
+ pr_warn("failure unregistering notifier\n");
+ ibdev->iboe.nb_inet.notifier_call = NULL;
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ if (ibdev->iboe.nb_inet6.notifier_call) {
+ if (unregister_inet6addr_notifier(&ibdev->iboe.nb_inet6))
+ pr_warn("failure unregistering notifier\n");
+ ibdev->iboe.nb_inet6.notifier_call = NULL;
+ }
+#endif
flush_workqueue(wq);
-err_sriov:
mlx4_ib_close_sriov(ibdev);
err_mad:
@@ -1768,6 +2250,13 @@ err_mad:
err_reg:
ib_unregister_device(&ibdev->ib_dev);
+err_steer_free_bitmap:
+ kfree(ibdev->ib_uc_qpns_bitmap);
+
+err_steer_qp_release:
+ if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED)
+ mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
+ ibdev->steer_qpn_count);
err_counter:
for (; i; --i)
if (ibdev->counters[i - 1] != -1)
@@ -1788,6 +2277,69 @@ err_dealloc:
return NULL;
}
+int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn)
+{
+ int offset;
+
+ WARN_ON(!dev->ib_uc_qpns_bitmap);
+
+ offset = bitmap_find_free_region(dev->ib_uc_qpns_bitmap,
+ dev->steer_qpn_count,
+ get_count_order(count));
+ if (offset < 0)
+ return offset;
+
+ *qpn = dev->steer_qpn_base + offset;
+ return 0;
+}
+
+void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count)
+{
+ if (!qpn ||
+ dev->steering_support != MLX4_STEERING_MODE_DEVICE_MANAGED)
+ return;
+
+ BUG_ON(qpn < dev->steer_qpn_base);
+
+ bitmap_release_region(dev->ib_uc_qpns_bitmap,
+ qpn - dev->steer_qpn_base,
+ get_count_order(count));
+}
+
+int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
+ int is_attach)
+{
+ int err;
+ size_t flow_size;
+ struct ib_flow_attr *flow = NULL;
+ struct ib_flow_spec_ib *ib_spec;
+
+ if (is_attach) {
+ flow_size = sizeof(struct ib_flow_attr) +
+ sizeof(struct ib_flow_spec_ib);
+ flow = kzalloc(flow_size, GFP_KERNEL);
+ if (!flow)
+ return -ENOMEM;
+ flow->port = mqp->port;
+ flow->num_of_specs = 1;
+ flow->size = flow_size;
+ ib_spec = (struct ib_flow_spec_ib *)(flow + 1);
+ ib_spec->type = IB_FLOW_SPEC_IB;
+ ib_spec->size = sizeof(struct ib_flow_spec_ib);
+ /* Add an empty rule for IB L2 */
+ memset(&ib_spec->mask, 0, sizeof(ib_spec->mask));
+
+ err = __mlx4_ib_create_flow(&mqp->ibqp, flow,
+ IB_FLOW_DOMAIN_NIC,
+ MLX4_FS_REGULAR,
+ &mqp->reg_id);
+ } else {
+ err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id);
+ }
+ kfree(flow);
+ return err;
+}
+
static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
{
struct mlx4_ib_dev *ibdev = ibdev_ptr;
@@ -1801,6 +2353,26 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
pr_warn("failure unregistering notifier\n");
ibdev->iboe.nb.notifier_call = NULL;
}
+
+ if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
+ ibdev->steer_qpn_count);
+ kfree(ibdev->ib_uc_qpns_bitmap);
+ }
+
+ if (ibdev->iboe.nb_inet.notifier_call) {
+ if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet))
+ pr_warn("failure unregistering notifier\n");
+ ibdev->iboe.nb_inet.notifier_call = NULL;
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ if (ibdev->iboe.nb_inet6.notifier_call) {
+ if (unregister_inet6addr_notifier(&ibdev->iboe.nb_inet6))
+ pr_warn("failure unregistering notifier\n");
+ ibdev->iboe.nb_inet6.notifier_call = NULL;
+ }
+#endif
+
iounmap(ibdev->uar_map);
for (p = 0; p < ibdev->num_ports; ++p)
if (ibdev->counters[p] != -1)
@@ -1821,17 +2393,24 @@ static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init)
struct mlx4_dev *dev = ibdev->dev;
int i;
unsigned long flags;
+ struct mlx4_active_ports actv_ports;
+ unsigned int ports;
+ unsigned int first_port;
if (!mlx4_is_master(dev))
return;
- dm = kcalloc(dev->caps.num_ports, sizeof *dm, GFP_ATOMIC);
+ actv_ports = mlx4_get_active_ports(dev, slave);
+ ports = bitmap_weight(actv_ports.ports, dev->caps.num_ports);
+ first_port = find_first_bit(actv_ports.ports, dev->caps.num_ports);
+
+ dm = kcalloc(ports, sizeof(*dm), GFP_ATOMIC);
if (!dm) {
pr_err("failed to allocate memory for tunneling qp update\n");
goto out;
}
- for (i = 0; i < dev->caps.num_ports; i++) {
+ for (i = 0; i < ports; i++) {
dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC);
if (!dm[i]) {
pr_err("failed to allocate memory for tunneling qp update work struct\n");
@@ -1843,9 +2422,9 @@ static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init)
}
}
/* initialize or tear down tunnel QPs for the slave */
- for (i = 0; i < dev->caps.num_ports; i++) {
+ for (i = 0; i < ports; i++) {
INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work);
- dm[i]->port = i + 1;
+ dm[i]->port = first_port + i + 1;
dm[i]->slave = slave;
dm[i]->do_init = do_init;
dm[i]->dev = ibdev;
diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c
index 25b2cdff00f..ed327e6c8fd 100644
--- a/drivers/infiniband/hw/mlx4/mcg.c
+++ b/drivers/infiniband/hw/mlx4/mcg.c
@@ -215,8 +215,9 @@ static int send_mad_to_wire(struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad)
}
mlx4_ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr);
spin_unlock(&dev->sm_lock);
- return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev), ctx->port,
- IB_QPT_GSI, 0, 1, IB_QP1_QKEY, &ah_attr, mad);
+ return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev),
+ ctx->port, IB_QPT_GSI, 0, 1, IB_QP1_QKEY,
+ &ah_attr, NULL, mad);
}
static int send_mad_to_slave(int slave, struct mlx4_ib_demux_ctx *ctx,
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 036b663dd26..369da3ca5d6 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -68,6 +68,8 @@ enum {
/*module param to indicate if SM assigns the alias_GUID*/
extern int mlx4_ib_sm_guid_assign;
+#define MLX4_IB_UC_STEER_QPN_ALIGN 1
+#define MLX4_IB_UC_MAX_NUM_QPS 256
struct mlx4_ib_ucontext {
struct ib_ucontext ibucontext;
struct mlx4_uar uar;
@@ -153,6 +155,8 @@ struct mlx4_ib_wq {
enum mlx4_ib_qp_flags {
MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
+ MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP,
+ MLX4_IB_QP_CREATE_USE_GFP_NOIO = IB_QP_CREATE_USE_GFP_NOIO,
MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30,
MLX4_IB_SRIOV_SQP = 1 << 31,
};
@@ -238,6 +242,22 @@ struct mlx4_ib_proxy_sqp_hdr {
struct mlx4_rcv_tunnel_hdr tun;
} __packed;
+struct mlx4_roce_smac_vlan_info {
+ u64 smac;
+ int smac_index;
+ int smac_port;
+ u64 candidate_smac;
+ int candidate_smac_index;
+ int candidate_smac_port;
+ u16 vid;
+ int vlan_index;
+ int vlan_port;
+ u16 candidate_vid;
+ int candidate_vlan_index;
+ int candidate_vlan_port;
+ int update_vid;
+};
+
struct mlx4_ib_qp {
struct ib_qp ibqp;
struct mlx4_qp mqp;
@@ -270,7 +290,9 @@ struct mlx4_ib_qp {
struct list_head gid_list;
struct list_head steering_rules;
struct mlx4_ib_buf *sqp_proxy_rcv;
-
+ struct mlx4_roce_smac_vlan_info pri;
+ struct mlx4_roce_smac_vlan_info alt;
+ u64 reg_id;
};
struct mlx4_ib_srq {
@@ -428,7 +450,10 @@ struct mlx4_ib_sriov {
struct mlx4_ib_iboe {
spinlock_t lock;
struct net_device *netdevs[MLX4_MAX_PORTS];
+ struct net_device *masters[MLX4_MAX_PORTS];
struct notifier_block nb;
+ struct notifier_block nb_inet;
+ struct notifier_block nb_inet6;
union ib_gid gid_table[MLX4_MAX_PORTS][128];
};
@@ -494,6 +519,13 @@ struct mlx4_ib_dev {
struct kobject *dev_ports_parent[MLX4_MFUNC_MAX];
struct mlx4_ib_iov_port iov_ports[MLX4_MAX_PORTS];
struct pkey_mgt pkeys;
+ unsigned long *ib_uc_qpns_bitmap;
+ int steer_qpn_count;
+ int steer_qpn_base;
+ int steering_support;
+ struct mlx4_ib_qp *qp1_proxy[MLX4_MAX_PORTS];
+ /* lock when destroying qp1_proxy and getting netdev events */
+ struct mutex qp1_proxy_lock[MLX4_MAX_PORTS];
};
struct ib_event_work {
@@ -675,9 +707,6 @@ int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
union ib_gid *gid, int netw_view);
-int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr,
- u8 *mac, int *is_mcast, u8 port);
-
static inline bool mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
{
u8 port = be32_to_cpu(ah->av.ib.port_pd) >> 24 & 3;
@@ -712,9 +741,12 @@ void mlx4_ib_tunnels_update_work(struct work_struct *work);
int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
enum ib_qp_type qpt, struct ib_wc *wc,
struct ib_grh *grh, struct ib_mad *mad);
+
int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
- u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad);
+ u32 qkey, struct ib_ah_attr *attr, u8 *s_mac,
+ struct ib_mad *mad);
+
__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx);
int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
@@ -752,5 +784,9 @@ void mlx4_ib_device_unregister_sysfs(struct mlx4_ib_dev *device);
__be64 mlx4_ib_gen_node_guid(void);
+int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn);
+void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count);
+int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
+ int is_attach);
#endif /* MLX4_IB_H */
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index e471f089ff0..cb2a8727f3f 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -90,11 +90,11 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
struct ib_umem *umem)
{
u64 *pages;
- struct ib_umem_chunk *chunk;
- int i, j, k;
+ int i, k, entry;
int n;
int len;
int err = 0;
+ struct scatterlist *sg;
pages = (u64 *) __get_free_page(GFP_KERNEL);
if (!pages)
@@ -102,26 +102,25 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
i = n = 0;
- list_for_each_entry(chunk, &umem->chunk_list, list)
- for (j = 0; j < chunk->nmap; ++j) {
- len = sg_dma_len(&chunk->page_list[j]) >> mtt->page_shift;
- for (k = 0; k < len; ++k) {
- pages[i++] = sg_dma_address(&chunk->page_list[j]) +
- umem->page_size * k;
- /*
- * Be friendly to mlx4_write_mtt() and
- * pass it chunks of appropriate size.
- */
- if (i == PAGE_SIZE / sizeof (u64)) {
- err = mlx4_write_mtt(dev->dev, mtt, n,
- i, pages);
- if (err)
- goto out;
- n += i;
- i = 0;
- }
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+ len = sg_dma_len(sg) >> mtt->page_shift;
+ for (k = 0; k < len; ++k) {
+ pages[i++] = sg_dma_address(sg) +
+ umem->page_size * k;
+ /*
+ * Be friendly to mlx4_write_mtt() and
+ * pass it chunks of appropriate size.
+ */
+ if (i == PAGE_SIZE / sizeof (u64)) {
+ err = mlx4_write_mtt(dev->dev, mtt, n,
+ i, pages);
+ if (err)
+ goto out;
+ n += i;
+ i = 0;
}
}
+ }
if (i)
err = mlx4_write_mtt(dev->dev, mtt, n, i, pages);
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 4f10af2905b..67780452f0c 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -90,6 +90,21 @@ enum {
MLX4_RAW_QP_MSGMAX = 31,
};
+#ifndef ETH_ALEN
+#define ETH_ALEN 6
+#endif
+static inline u64 mlx4_mac_to_u64(u8 *addr)
+{
+ u64 mac = 0;
+ int i;
+
+ for (i = 0; i < ETH_ALEN; i++) {
+ mac <<= 8;
+ mac |= addr[i];
+ }
+ return mac;
+}
+
static const __be32 mlx4_ib_opcode[] = {
[IB_WR_SEND] = cpu_to_be32(MLX4_OPCODE_SEND),
[IB_WR_LSO] = cpu_to_be32(MLX4_OPCODE_LSO),
@@ -593,9 +608,20 @@ static int qp_has_rq(struct ib_qp_init_attr *attr)
return !attr->srq;
}
+static int qp0_enabled_vf(struct mlx4_dev *dev, int qpn)
+{
+ int i;
+ for (i = 0; i < dev->caps.num_ports; i++) {
+ if (qpn == dev->caps.qp0_proxy[i])
+ return !!dev->caps.qp0_qkey[i];
+ }
+ return 0;
+}
+
static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp)
+ struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp,
+ gfp_t gfp)
{
int qpn;
int err;
@@ -610,10 +636,13 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
!(init_attr->create_flags & MLX4_IB_SRIOV_SQP))) {
if (init_attr->qp_type == IB_QPT_GSI)
qp_type = MLX4_IB_QPT_PROXY_GSI;
- else if (mlx4_is_master(dev->dev))
- qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER;
- else
- qp_type = MLX4_IB_QPT_PROXY_SMI;
+ else {
+ if (mlx4_is_master(dev->dev) ||
+ qp0_enabled_vf(dev->dev, sqpn))
+ qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER;
+ else
+ qp_type = MLX4_IB_QPT_PROXY_SMI;
+ }
}
qpn = sqpn;
/* add extra sg entry for tunneling */
@@ -628,7 +657,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
return -EINVAL;
if (tnl_init->proxy_qp_type == IB_QPT_GSI)
qp_type = MLX4_IB_QPT_TUN_GSI;
- else if (tnl_init->slave == mlx4_master_func_num(dev->dev))
+ else if (tnl_init->slave == mlx4_master_func_num(dev->dev) ||
+ mlx4_vf_smi_enabled(dev->dev, tnl_init->slave,
+ tnl_init->port))
qp_type = MLX4_IB_QPT_TUN_SMI_OWNER;
else
qp_type = MLX4_IB_QPT_TUN_SMI;
@@ -643,14 +674,18 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI ||
(qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER |
MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) {
- sqp = kzalloc(sizeof (struct mlx4_ib_sqp), GFP_KERNEL);
+ sqp = kzalloc(sizeof (struct mlx4_ib_sqp), gfp);
if (!sqp)
return -ENOMEM;
qp = &sqp->qp;
+ qp->pri.vid = 0xFFFF;
+ qp->alt.vid = 0xFFFF;
} else {
- qp = kzalloc(sizeof (struct mlx4_ib_qp), GFP_KERNEL);
+ qp = kzalloc(sizeof (struct mlx4_ib_qp), gfp);
if (!qp)
return -ENOMEM;
+ qp->pri.vid = 0xFFFF;
+ qp->alt.vid = 0xFFFF;
}
} else
qp = *caller_qp;
@@ -716,19 +751,27 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
qp->flags |= MLX4_IB_QP_LSO;
+ if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) {
+ if (dev->steering_support ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED)
+ qp->flags |= MLX4_IB_QP_NETIF;
+ else
+ goto err;
+ }
+
err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp);
if (err)
goto err;
if (qp_has_rq(init_attr)) {
- err = mlx4_db_alloc(dev->dev, &qp->db, 0);
+ err = mlx4_db_alloc(dev->dev, &qp->db, 0, gfp);
if (err)
goto err;
*qp->db.db = 0;
}
- if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) {
+ if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf, gfp)) {
err = -ENOMEM;
goto err_db;
}
@@ -738,13 +781,12 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (err)
goto err_buf;
- err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf);
+ err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf, gfp);
if (err)
goto err_mtt;
- qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), GFP_KERNEL);
- qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), GFP_KERNEL);
-
+ qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), gfp);
+ qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), gfp);
if (!qp->sq.wrid || !qp->rq.wrid) {
err = -ENOMEM;
goto err_wrid;
@@ -765,12 +807,16 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (init_attr->qp_type == IB_QPT_RAW_PACKET)
err = mlx4_qp_reserve_range(dev->dev, 1, 1 << 8, &qpn);
else
- err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn);
+ if (qp->flags & MLX4_IB_QP_NETIF)
+ err = mlx4_ib_steer_qp_alloc(dev, 1, &qpn);
+ else
+ err = mlx4_qp_reserve_range(dev->dev, 1, 1,
+ &qpn);
if (err)
goto err_proxy;
}
- err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp);
+ err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp, gfp);
if (err)
goto err_qpn;
@@ -790,8 +836,12 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
return 0;
err_qpn:
- if (!sqpn)
- mlx4_qp_release_range(dev->dev, qpn, 1);
+ if (!sqpn) {
+ if (qp->flags & MLX4_IB_QP_NETIF)
+ mlx4_ib_steer_qp_free(dev, qpn, 1);
+ else
+ mlx4_qp_release_range(dev->dev, qpn, 1);
+ }
err_proxy:
if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI)
free_proxy_bufs(pd->device, qp);
@@ -909,11 +959,32 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
{
struct mlx4_ib_cq *send_cq, *recv_cq;
- if (qp->state != IB_QPS_RESET)
+ if (qp->state != IB_QPS_RESET) {
if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state),
MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp))
pr_warn("modify QP %06x to RESET failed.\n",
qp->mqp.qpn);
+ if (qp->pri.smac) {
+ mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac);
+ qp->pri.smac = 0;
+ }
+ if (qp->alt.smac) {
+ mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac);
+ qp->alt.smac = 0;
+ }
+ if (qp->pri.vid < 0x1000) {
+ mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, qp->pri.vid);
+ qp->pri.vid = 0xFFFF;
+ qp->pri.candidate_vid = 0xFFFF;
+ qp->pri.update_vid = 0;
+ }
+ if (qp->alt.vid < 0x1000) {
+ mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, qp->alt.vid);
+ qp->alt.vid = 0xFFFF;
+ qp->alt.candidate_vid = 0xFFFF;
+ qp->alt.update_vid = 0;
+ }
+ }
get_cqs(qp, &send_cq, &recv_cq);
@@ -932,8 +1003,12 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
mlx4_qp_free(dev->dev, &qp->mqp);
- if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp))
- mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);
+ if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp)) {
+ if (qp->flags & MLX4_IB_QP_NETIF)
+ mlx4_ib_steer_qp_free(dev, qp->mqp.qpn, 1);
+ else
+ mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);
+ }
mlx4_mtt_cleanup(dev->dev, &qp->mtt);
@@ -980,19 +1055,30 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
struct mlx4_ib_qp *qp = NULL;
int err;
u16 xrcdn = 0;
+ gfp_t gfp;
+ gfp = (init_attr->create_flags & MLX4_IB_QP_CREATE_USE_GFP_NOIO) ?
+ GFP_NOIO : GFP_KERNEL;
/*
* We only support LSO, vendor flag1, and multicast loopback blocking,
* and only for kernel UD QPs.
*/
if (init_attr->create_flags & ~(MLX4_IB_QP_LSO |
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK |
- MLX4_IB_SRIOV_TUNNEL_QP | MLX4_IB_SRIOV_SQP))
+ MLX4_IB_SRIOV_TUNNEL_QP |
+ MLX4_IB_SRIOV_SQP |
+ MLX4_IB_QP_NETIF |
+ MLX4_IB_QP_CREATE_USE_GFP_NOIO))
return ERR_PTR(-EINVAL);
+ if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) {
+ if (init_attr->qp_type != IB_QPT_UD)
+ return ERR_PTR(-EINVAL);
+ }
+
if (init_attr->create_flags &&
(udata ||
- ((init_attr->create_flags & ~MLX4_IB_SRIOV_SQP) &&
+ ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP | MLX4_IB_QP_CREATE_USE_GFP_NOIO)) &&
init_attr->qp_type != IB_QPT_UD) ||
((init_attr->create_flags & MLX4_IB_SRIOV_SQP) &&
init_attr->qp_type > IB_QPT_GSI)))
@@ -1012,14 +1098,16 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
case IB_QPT_RC:
case IB_QPT_UC:
case IB_QPT_RAW_PACKET:
- qp = kzalloc(sizeof *qp, GFP_KERNEL);
+ qp = kzalloc(sizeof *qp, gfp);
if (!qp)
return ERR_PTR(-ENOMEM);
+ qp->pri.vid = 0xFFFF;
+ qp->alt.vid = 0xFFFF;
/* fall through */
case IB_QPT_UD:
{
err = create_qp_common(to_mdev(pd->device), pd, init_attr,
- udata, 0, &qp);
+ udata, 0, &qp, gfp);
if (err)
return ERR_PTR(err);
@@ -1037,7 +1125,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata,
get_sqp_num(to_mdev(pd->device), init_attr),
- &qp);
+ &qp, gfp);
if (err)
return ERR_PTR(err);
@@ -1063,6 +1151,12 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp)
if (is_qp0(dev, mqp))
mlx4_CLOSE_PORT(dev->dev, mqp->port);
+ if (dev->qp1_proxy[mqp->port - 1] == mqp) {
+ mutex_lock(&dev->qp1_proxy_lock[mqp->port - 1]);
+ dev->qp1_proxy[mqp->port - 1] = NULL;
+ mutex_unlock(&dev->qp1_proxy_lock[mqp->port - 1]);
+ }
+
pd = get_pd(mqp);
destroy_qp_common(dev, mqp, !!pd->ibpd.uobject);
@@ -1144,16 +1238,16 @@ static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port)
path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6);
}
-static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
- struct mlx4_qp_path *path, u8 port)
+static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
+ u64 smac, u16 vlan_tag, struct mlx4_qp_path *path,
+ struct mlx4_roce_smac_vlan_info *smac_info, u8 port)
{
- int err;
int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port) ==
IB_LINK_LAYER_ETHERNET;
- u8 mac[6];
- int is_mcast;
- u16 vlan_tag;
int vidx;
+ int smac_index;
+ int err;
+
path->grh_mylmc = ah->src_path_bits & 0x7f;
path->rlid = cpu_to_be16(ah->dlid);
@@ -1182,36 +1276,105 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
}
if (is_eth) {
- path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
- ((port - 1) << 6) | ((ah->sl & 7) << 3);
-
if (!(ah->ah_flags & IB_AH_GRH))
return -1;
- err = mlx4_ib_resolve_grh(dev, ah, mac, &is_mcast, port);
- if (err)
- return err;
-
- memcpy(path->dmac, mac, 6);
- path->ackto = MLX4_IB_LINK_TYPE_ETH;
- /* use index 0 into MAC table for IBoE */
- path->grh_mylmc &= 0x80;
+ path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
+ ((port - 1) << 6) | ((ah->sl & 7) << 3);
- vlan_tag = rdma_get_vlan_id(&dev->iboe.gid_table[port - 1][ah->grh.sgid_index]);
+ path->feup |= MLX4_FEUP_FORCE_ETH_UP;
if (vlan_tag < 0x1000) {
- if (mlx4_find_cached_vlan(dev->dev, port, vlan_tag, &vidx))
- return -ENOENT;
-
- path->vlan_index = vidx;
+ if (smac_info->vid < 0x1000) {
+ /* both valid vlan ids */
+ if (smac_info->vid != vlan_tag) {
+ /* different VIDs. unreg old and reg new */
+ err = mlx4_register_vlan(dev->dev, port, vlan_tag, &vidx);
+ if (err)
+ return err;
+ smac_info->candidate_vid = vlan_tag;
+ smac_info->candidate_vlan_index = vidx;
+ smac_info->candidate_vlan_port = port;
+ smac_info->update_vid = 1;
+ path->vlan_index = vidx;
+ } else {
+ path->vlan_index = smac_info->vlan_index;
+ }
+ } else {
+ /* no current vlan tag in qp */
+ err = mlx4_register_vlan(dev->dev, port, vlan_tag, &vidx);
+ if (err)
+ return err;
+ smac_info->candidate_vid = vlan_tag;
+ smac_info->candidate_vlan_index = vidx;
+ smac_info->candidate_vlan_port = port;
+ smac_info->update_vid = 1;
+ path->vlan_index = vidx;
+ }
+ path->feup |= MLX4_FVL_FORCE_ETH_VLAN;
path->fl = 1 << 6;
+ } else {
+ /* have current vlan tag. unregister it at modify-qp success */
+ if (smac_info->vid < 0x1000) {
+ smac_info->candidate_vid = 0xFFFF;
+ smac_info->update_vid = 1;
+ }
}
- } else
+
+ /* get smac_index for RoCE use.
+ * If no smac was yet assigned, register one.
+ * If one was already assigned, but the new mac differs,
+ * unregister the old one and register the new one.
+ */
+ if (!smac_info->smac || smac_info->smac != smac) {
+ /* register candidate now, unreg if needed, after success */
+ smac_index = mlx4_register_mac(dev->dev, port, smac);
+ if (smac_index >= 0) {
+ smac_info->candidate_smac_index = smac_index;
+ smac_info->candidate_smac = smac;
+ smac_info->candidate_smac_port = port;
+ } else {
+ return -EINVAL;
+ }
+ } else {
+ smac_index = smac_info->smac_index;
+ }
+
+ memcpy(path->dmac, ah->dmac, 6);
+ path->ackto = MLX4_IB_LINK_TYPE_ETH;
+ /* put MAC table smac index for IBoE */
+ path->grh_mylmc = (u8) (smac_index) | 0x80;
+ } else {
path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
((port - 1) << 6) | ((ah->sl & 0xf) << 2);
+ }
return 0;
}
+static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_qp_attr *qp,
+ enum ib_qp_attr_mask qp_attr_mask,
+ struct mlx4_ib_qp *mqp,
+ struct mlx4_qp_path *path, u8 port)
+{
+ return _mlx4_set_path(dev, &qp->ah_attr,
+ mlx4_mac_to_u64((u8 *)qp->smac),
+ (qp_attr_mask & IB_QP_VID) ? qp->vlan_id : 0xffff,
+ path, &mqp->pri, port);
+}
+
+static int mlx4_set_alt_path(struct mlx4_ib_dev *dev,
+ const struct ib_qp_attr *qp,
+ enum ib_qp_attr_mask qp_attr_mask,
+ struct mlx4_ib_qp *mqp,
+ struct mlx4_qp_path *path, u8 port)
+{
+ return _mlx4_set_path(dev, &qp->alt_ah_attr,
+ mlx4_mac_to_u64((u8 *)qp->alt_smac),
+ (qp_attr_mask & IB_QP_ALT_VID) ?
+ qp->alt_vlan_id : 0xffff,
+ path, &mqp->alt, port);
+}
+
static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
{
struct mlx4_ib_gid_entry *ge, *tmp;
@@ -1224,6 +1387,37 @@ static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
}
}
+static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, u8 *smac,
+ struct mlx4_qp_context *context)
+{
+ struct net_device *ndev;
+ u64 u64_mac;
+ int smac_index;
+
+
+ ndev = dev->iboe.netdevs[qp->port - 1];
+ if (ndev) {
+ smac = ndev->dev_addr;
+ u64_mac = mlx4_mac_to_u64(smac);
+ } else {
+ u64_mac = dev->dev->caps.def_mac[qp->port];
+ }
+
+ context->pri_path.sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | ((qp->port - 1) << 6);
+ if (!qp->pri.smac) {
+ smac_index = mlx4_register_mac(dev->dev, qp->port, u64_mac);
+ if (smac_index >= 0) {
+ qp->pri.candidate_smac_index = smac_index;
+ qp->pri.candidate_smac = u64_mac;
+ qp->pri.candidate_smac_port = qp->port;
+ context->pri_path.grh_mylmc = 0x80 | (u8) smac_index;
+ } else {
+ return -ENOENT;
+ }
+ }
+ return 0;
+}
+
static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
const struct ib_qp_attr *attr, int attr_mask,
enum ib_qp_state cur_state, enum ib_qp_state new_state)
@@ -1235,6 +1429,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
struct mlx4_qp_context *context;
enum mlx4_qp_optpar optpar = 0;
int sqd_event;
+ int steer_qp = 0;
int err = -EINVAL;
context = kzalloc(sizeof *context, GFP_KERNEL);
@@ -1319,6 +1514,11 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX;
} else
context->pri_path.counter_index = 0xff;
+
+ if (qp->flags & MLX4_IB_QP_NETIF) {
+ mlx4_ib_steer_qp_reg(dev, qp, 1);
+ steer_qp = 1;
+ }
}
if (attr_mask & IB_QP_PKEY_INDEX) {
@@ -1329,7 +1529,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
if (attr_mask & IB_QP_AV) {
- if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path,
+ if (mlx4_set_path(dev, attr, attr_mask, qp, &context->pri_path,
attr_mask & IB_QP_PORT ?
attr->port_num : qp->port))
goto out;
@@ -1352,8 +1552,9 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
dev->dev->caps.pkey_table_len[attr->alt_port_num])
goto out;
- if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
- attr->alt_port_num))
+ if (mlx4_set_alt_path(dev, attr, attr_mask, qp,
+ &context->alt_path,
+ attr->alt_port_num))
goto out;
context->alt_path.pkey_index = attr->alt_pkey_index;
@@ -1458,12 +1659,39 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
context->pri_path.fl = 0x80;
context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE;
}
+ if (rdma_port_get_link_layer(&dev->ib_dev, qp->port) ==
+ IB_LINK_LAYER_ETHERNET) {
+ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI ||
+ qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI)
+ context->pri_path.feup = 1 << 7; /* don't fsm */
+ /* handle smac_index */
+ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_UD ||
+ qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI ||
+ qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI) {
+ err = handle_eth_ud_smac_index(dev, qp, (u8 *)attr->smac, context);
+ if (err)
+ return -EINVAL;
+ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI)
+ dev->qp1_proxy[qp->port - 1] = qp;
+ }
+ }
}
if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET)
context->pri_path.ackto = (context->pri_path.ackto & 0xf8) |
MLX4_IB_LINK_TYPE_ETH;
+ if (ibqp->qp_type == IB_QPT_UD && (new_state == IB_QPS_RTR)) {
+ int is_eth = rdma_port_get_link_layer(
+ &dev->ib_dev, qp->port) ==
+ IB_LINK_LAYER_ETHERNET;
+ if (is_eth) {
+ context->pri_path.ackto = MLX4_IB_LINK_TYPE_ETH;
+ optpar |= MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH;
+ }
+ }
+
+
if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&
attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)
sqd_event = 1;
@@ -1534,23 +1762,113 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
* If we moved a kernel QP to RESET, clean up all old CQ
* entries and reinitialize the QP.
*/
- if (new_state == IB_QPS_RESET && !ibqp->uobject) {
- mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,
- ibqp->srq ? to_msrq(ibqp->srq): NULL);
- if (send_cq != recv_cq)
- mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
+ if (new_state == IB_QPS_RESET) {
+ if (!ibqp->uobject) {
+ mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,
+ ibqp->srq ? to_msrq(ibqp->srq) : NULL);
+ if (send_cq != recv_cq)
+ mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
+
+ qp->rq.head = 0;
+ qp->rq.tail = 0;
+ qp->sq.head = 0;
+ qp->sq.tail = 0;
+ qp->sq_next_wqe = 0;
+ if (qp->rq.wqe_cnt)
+ *qp->db.db = 0;
+
+ if (qp->flags & MLX4_IB_QP_NETIF)
+ mlx4_ib_steer_qp_reg(dev, qp, 0);
+ }
+ if (qp->pri.smac) {
+ mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac);
+ qp->pri.smac = 0;
+ }
+ if (qp->alt.smac) {
+ mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac);
+ qp->alt.smac = 0;
+ }
+ if (qp->pri.vid < 0x1000) {
+ mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, qp->pri.vid);
+ qp->pri.vid = 0xFFFF;
+ qp->pri.candidate_vid = 0xFFFF;
+ qp->pri.update_vid = 0;
+ }
- qp->rq.head = 0;
- qp->rq.tail = 0;
- qp->sq.head = 0;
- qp->sq.tail = 0;
- qp->sq_next_wqe = 0;
- if (qp->rq.wqe_cnt)
- *qp->db.db = 0;
+ if (qp->alt.vid < 0x1000) {
+ mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, qp->alt.vid);
+ qp->alt.vid = 0xFFFF;
+ qp->alt.candidate_vid = 0xFFFF;
+ qp->alt.update_vid = 0;
+ }
}
-
out:
+ if (err && steer_qp)
+ mlx4_ib_steer_qp_reg(dev, qp, 0);
kfree(context);
+ if (qp->pri.candidate_smac) {
+ if (err) {
+ mlx4_unregister_mac(dev->dev, qp->pri.candidate_smac_port, qp->pri.candidate_smac);
+ } else {
+ if (qp->pri.smac)
+ mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac);
+ qp->pri.smac = qp->pri.candidate_smac;
+ qp->pri.smac_index = qp->pri.candidate_smac_index;
+ qp->pri.smac_port = qp->pri.candidate_smac_port;
+ }
+ qp->pri.candidate_smac = 0;
+ qp->pri.candidate_smac_index = 0;
+ qp->pri.candidate_smac_port = 0;
+ }
+ if (qp->alt.candidate_smac) {
+ if (err) {
+ mlx4_unregister_mac(dev->dev, qp->alt.candidate_smac_port, qp->alt.candidate_smac);
+ } else {
+ if (qp->alt.smac)
+ mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac);
+ qp->alt.smac = qp->alt.candidate_smac;
+ qp->alt.smac_index = qp->alt.candidate_smac_index;
+ qp->alt.smac_port = qp->alt.candidate_smac_port;
+ }
+ qp->alt.candidate_smac = 0;
+ qp->alt.candidate_smac_index = 0;
+ qp->alt.candidate_smac_port = 0;
+ }
+
+ if (qp->pri.update_vid) {
+ if (err) {
+ if (qp->pri.candidate_vid < 0x1000)
+ mlx4_unregister_vlan(dev->dev, qp->pri.candidate_vlan_port,
+ qp->pri.candidate_vid);
+ } else {
+ if (qp->pri.vid < 0x1000)
+ mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port,
+ qp->pri.vid);
+ qp->pri.vid = qp->pri.candidate_vid;
+ qp->pri.vlan_port = qp->pri.candidate_vlan_port;
+ qp->pri.vlan_index = qp->pri.candidate_vlan_index;
+ }
+ qp->pri.candidate_vid = 0xFFFF;
+ qp->pri.update_vid = 0;
+ }
+
+ if (qp->alt.update_vid) {
+ if (err) {
+ if (qp->alt.candidate_vid < 0x1000)
+ mlx4_unregister_vlan(dev->dev, qp->alt.candidate_vlan_port,
+ qp->alt.candidate_vid);
+ } else {
+ if (qp->alt.vid < 0x1000)
+ mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port,
+ qp->alt.vid);
+ qp->alt.vid = qp->alt.candidate_vid;
+ qp->alt.vlan_port = qp->alt.candidate_vlan_port;
+ qp->alt.vlan_index = qp->alt.candidate_vlan_index;
+ }
+ qp->alt.candidate_vid = 0xFFFF;
+ qp->alt.update_vid = 0;
+ }
+
return err;
}
@@ -1561,13 +1879,21 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct mlx4_ib_qp *qp = to_mqp(ibqp);
enum ib_qp_state cur_state, new_state;
int err = -EINVAL;
-
+ int ll;
mutex_lock(&qp->mutex);
cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
- if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) {
+ if (cur_state == new_state && cur_state == IB_QPS_RESET) {
+ ll = IB_LINK_LAYER_UNSPECIFIED;
+ } else {
+ int port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
+ ll = rdma_port_get_link_layer(&dev->ib_dev, port);
+ }
+
+ if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
+ attr_mask, ll)) {
pr_debug("qpn 0x%x: invalid attribute mask specified "
"for transition %d to %d. qp_type %d,"
" attr_mask 0x%x\n",
@@ -1631,6 +1957,19 @@ out:
return err;
}
+static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey)
+{
+ int i;
+ for (i = 0; i < dev->caps.num_ports; i++) {
+ if (qpn == dev->caps.qp0_proxy[i] ||
+ qpn == dev->caps.qp0_tunnel[i]) {
+ *qkey = dev->caps.qp0_qkey[i];
+ return 0;
+ }
+ }
+ return -EINVAL;
+}
+
static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
struct ib_send_wr *wr,
void *wqe, unsigned *mlx_seg_len)
@@ -1688,8 +2027,13 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]);
sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
- if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
- return -EINVAL;
+ if (mlx4_is_master(mdev->dev)) {
+ if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
+ return -EINVAL;
+ } else {
+ if (vf_get_qp0_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
+ return -EINVAL;
+ }
sqp->ud_header.deth.qkey = cpu_to_be32(qkey);
sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn);
@@ -1744,9 +2088,9 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
{
struct ib_device *ib_dev = sqp->qp.ibqp.device;
struct mlx4_wqe_mlx_seg *mlx = wqe;
+ struct mlx4_wqe_ctrl_seg *ctrl = wqe;
struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
- struct net_device *ndev;
union ib_gid sgid;
u16 pkey;
int send_size;
@@ -1770,12 +2114,11 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
/* When multi-function is enabled, the ib_core gid
* indexes don't necessarily match the hw ones, so
* we must use our own cache */
- sgid.global.subnet_prefix =
- to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
- subnet_prefix;
- sgid.global.interface_id =
- to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
- guid_cache[ah->av.ib.gid_index];
+ err = mlx4_get_roce_gid_from_slave(to_mdev(ib_dev)->dev,
+ be32_to_cpu(ah->av.ib.port_pd) >> 24,
+ ah->av.ib.gid_index, &sgid.raw[0]);
+ if (err)
+ return err;
} else {
err = ib_get_cached_gid(ib_dev,
be32_to_cpu(ah->av.ib.port_pd) >> 24,
@@ -1784,8 +2127,10 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
return err;
}
- vlan = rdma_get_vlan_id(&sgid);
- is_vlan = vlan < 0x1000;
+ if (ah->av.eth.vlan != cpu_to_be16(0xffff)) {
+ vlan = be16_to_cpu(ah->av.eth.vlan) & 0x0fff;
+ is_vlan = 1;
+ }
}
ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header);
@@ -1802,6 +2147,9 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
sqp->ud_header.grh.flow_label =
ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit;
+ if (is_eth)
+ memcpy(sqp->ud_header.grh.source_gid.raw, sgid.raw, 16);
+ else {
if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
/* When multi-function is enabled, the ib_core gid
* indexes don't necessarily match the hw ones, so
@@ -1817,6 +2165,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
be32_to_cpu(ah->av.ib.port_pd) >> 24,
ah->av.ib.gid_index,
&sqp->ud_header.grh.source_gid);
+ }
memcpy(sqp->ud_header.grh.destination_gid.raw,
ah->av.ib.dgid, 16);
}
@@ -1849,16 +2198,23 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
if (is_eth) {
u8 *smac;
+ struct in6_addr in6;
+
u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13;
mlx->sched_prio = cpu_to_be16(pcp);
memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6);
/* FIXME: cache smac value? */
- ndev = to_mdev(sqp->qp.ibqp.device)->iboe.netdevs[sqp->qp.port - 1];
- if (!ndev)
- return -ENODEV;
- smac = ndev->dev_addr;
+ memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2);
+ memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4);
+ memcpy(&in6, sgid.raw, sizeof(in6));
+
+ if (!mlx4_is_mfunc(to_mdev(ib_dev)->dev))
+ smac = to_mdev(sqp->qp.ibqp.device)->
+ iboe.netdevs[sqp->qp.port - 1]->dev_addr;
+ else /* use the src mac of the tunnel */
+ smac = ah->av.eth.s_mac;
memcpy(sqp->ud_header.eth.smac_h, smac, 6);
if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6))
mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
@@ -2059,7 +2415,8 @@ static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
struct mlx4_wqe_datagram_seg *dseg,
- struct ib_send_wr *wr, enum ib_qp_type qpt)
+ struct ib_send_wr *wr,
+ enum mlx4_ib_qp_type qpt)
{
union mlx4_ext_av *av = &to_mah(wr->wr.ud.ah)->av;
struct mlx4_av sqp_av = {0};
@@ -2072,8 +2429,10 @@ static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
cpu_to_be32(0xf0000000);
memcpy(dseg->av, &sqp_av, sizeof (struct mlx4_av));
- /* This function used only for sending on QP1 proxies */
- dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]);
+ if (qpt == MLX4_IB_QPT_PROXY_GSI)
+ dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]);
+ else
+ dseg->dqpn = cpu_to_be32(dev->dev->caps.qp0_tunnel[port - 1]);
/* Use QKEY from the QP context, which is set by master */
dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY);
}
@@ -2090,6 +2449,8 @@ static void build_tunnel_header(struct ib_send_wr *wr, void *wqe, unsigned *mlx_
hdr.remote_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
hdr.pkey_index = cpu_to_be16(wr->wr.ud.pkey_index);
hdr.qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
+ memcpy(hdr.mac, ah->av.eth.mac, 6);
+ hdr.vlan = ah->av.eth.vlan;
spc = MLX4_INLINE_ALIGN -
((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
@@ -2366,11 +2727,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
break;
case MLX4_IB_QPT_PROXY_SMI_OWNER:
- if (unlikely(!mlx4_is_master(to_mdev(ibqp->device)->dev))) {
- err = -ENOSYS;
- *bad_wr = wr;
- goto out;
- }
err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
if (unlikely(err)) {
*bad_wr = wr;
@@ -2387,16 +2743,13 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
size += seglen / 16;
break;
case MLX4_IB_QPT_PROXY_SMI:
- /* don't allow QP0 sends on guests */
- err = -ENOSYS;
- *bad_wr = wr;
- goto out;
case MLX4_IB_QPT_PROXY_GSI:
/* If we are tunneling special qps, this is a UD qp.
* In this case we first add a UD segment targeting
* the tunnel qp, and then add a header with address
* information */
- set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr, ibqp->qp_type);
+ set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr,
+ qp->mlx4_ib_qp_type);
wqe += sizeof (struct mlx4_wqe_datagram_seg);
size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
build_tunnel_header(wr, wqe, &seglen);
@@ -2762,6 +3115,9 @@ done:
if (qp->flags & MLX4_IB_QP_LSO)
qp_init_attr->create_flags |= IB_QP_CREATE_IPOIB_UD_LSO;
+ if (qp->flags & MLX4_IB_QP_NETIF)
+ qp_init_attr->create_flags |= IB_QP_CREATE_NETIF_QP;
+
qp_init_attr->sq_sig_type =
qp->sq_signal_bits == cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) ?
IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index 60c5fb025fc..62d9285300a 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -134,13 +134,14 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
if (err)
goto err_mtt;
} else {
- err = mlx4_db_alloc(dev->dev, &srq->db, 0);
+ err = mlx4_db_alloc(dev->dev, &srq->db, 0, GFP_KERNEL);
if (err)
goto err_srq;
*srq->db.db = 0;
- if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf)) {
+ if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf,
+ GFP_KERNEL)) {
err = -ENOMEM;
goto err_db;
}
@@ -165,7 +166,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
if (err)
goto err_buf;
- err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf);
+ err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf, GFP_KERNEL);
if (err)
goto err_mtt;
diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c
index 97516eb363b..cb4c66e723b 100644
--- a/drivers/infiniband/hw/mlx4/sysfs.c
+++ b/drivers/infiniband/hw/mlx4/sysfs.c
@@ -389,8 +389,10 @@ struct mlx4_port {
struct mlx4_ib_dev *dev;
struct attribute_group pkey_group;
struct attribute_group gid_group;
- u8 port_num;
+ struct device_attribute enable_smi_admin;
+ struct device_attribute smi_enabled;
int slave;
+ u8 port_num;
};
@@ -558,6 +560,101 @@ err:
return NULL;
}
+static ssize_t sysfs_show_smi_enabled(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct mlx4_port *p =
+ container_of(attr, struct mlx4_port, smi_enabled);
+ ssize_t len = 0;
+
+ if (mlx4_vf_smi_enabled(p->dev->dev, p->slave, p->port_num))
+ len = sprintf(buf, "%d\n", 1);
+ else
+ len = sprintf(buf, "%d\n", 0);
+
+ return len;
+}
+
+static ssize_t sysfs_show_enable_smi_admin(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct mlx4_port *p =
+ container_of(attr, struct mlx4_port, enable_smi_admin);
+ ssize_t len = 0;
+
+ if (mlx4_vf_get_enable_smi_admin(p->dev->dev, p->slave, p->port_num))
+ len = sprintf(buf, "%d\n", 1);
+ else
+ len = sprintf(buf, "%d\n", 0);
+
+ return len;
+}
+
+static ssize_t sysfs_store_enable_smi_admin(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct mlx4_port *p =
+ container_of(attr, struct mlx4_port, enable_smi_admin);
+ int enable;
+
+ if (sscanf(buf, "%i", &enable) != 1 ||
+ enable < 0 || enable > 1)
+ return -EINVAL;
+
+ if (mlx4_vf_set_enable_smi_admin(p->dev->dev, p->slave, p->port_num, enable))
+ return -EINVAL;
+ return count;
+}
+
+static int add_vf_smi_entries(struct mlx4_port *p)
+{
+ int is_eth = rdma_port_get_link_layer(&p->dev->ib_dev, p->port_num) ==
+ IB_LINK_LAYER_ETHERNET;
+ int ret;
+
+ /* do not display entries if eth transport, or if master */
+ if (is_eth || p->slave == mlx4_master_func_num(p->dev->dev))
+ return 0;
+
+ sysfs_attr_init(&p->smi_enabled.attr);
+ p->smi_enabled.show = sysfs_show_smi_enabled;
+ p->smi_enabled.store = NULL;
+ p->smi_enabled.attr.name = "smi_enabled";
+ p->smi_enabled.attr.mode = 0444;
+ ret = sysfs_create_file(&p->kobj, &p->smi_enabled.attr);
+ if (ret) {
+ pr_err("failed to create smi_enabled\n");
+ return ret;
+ }
+
+ sysfs_attr_init(&p->enable_smi_admin.attr);
+ p->enable_smi_admin.show = sysfs_show_enable_smi_admin;
+ p->enable_smi_admin.store = sysfs_store_enable_smi_admin;
+ p->enable_smi_admin.attr.name = "enable_smi_admin";
+ p->enable_smi_admin.attr.mode = 0644;
+ ret = sysfs_create_file(&p->kobj, &p->enable_smi_admin.attr);
+ if (ret) {
+ pr_err("failed to create enable_smi_admin\n");
+ sysfs_remove_file(&p->kobj, &p->smi_enabled.attr);
+ return ret;
+ }
+ return 0;
+}
+
+static void remove_vf_smi_entries(struct mlx4_port *p)
+{
+ int is_eth = rdma_port_get_link_layer(&p->dev->ib_dev, p->port_num) ==
+ IB_LINK_LAYER_ETHERNET;
+
+ if (is_eth || p->slave == mlx4_master_func_num(p->dev->dev))
+ return;
+
+ sysfs_remove_file(&p->kobj, &p->smi_enabled.attr);
+ sysfs_remove_file(&p->kobj, &p->enable_smi_admin.attr);
+}
+
static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave)
{
struct mlx4_port *p;
@@ -582,8 +679,10 @@ static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave)
p->pkey_group.attrs =
alloc_group_attrs(show_port_pkey, store_port_pkey,
dev->dev->caps.pkey_table_len[port_num]);
- if (!p->pkey_group.attrs)
+ if (!p->pkey_group.attrs) {
+ ret = -ENOMEM;
goto err_alloc;
+ }
ret = sysfs_create_group(&p->kobj, &p->pkey_group);
if (ret)
@@ -591,13 +690,19 @@ static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave)
p->gid_group.name = "gid_idx";
p->gid_group.attrs = alloc_group_attrs(show_port_gid_idx, NULL, 1);
- if (!p->gid_group.attrs)
+ if (!p->gid_group.attrs) {
+ ret = -ENOMEM;
goto err_free_pkey;
+ }
ret = sysfs_create_group(&p->kobj, &p->gid_group);
if (ret)
goto err_free_gid;
+ ret = add_vf_smi_entries(p);
+ if (ret)
+ goto err_free_gid;
+
list_add_tail(&p->kobj.entry, &dev->pkeys.pkey_port_list[slave]);
return 0;
@@ -623,6 +728,7 @@ static int register_one_pkey_tree(struct mlx4_ib_dev *dev, int slave)
int port;
struct kobject *p, *t;
struct mlx4_port *mport;
+ struct mlx4_active_ports actv_ports;
get_name(dev, name, slave, sizeof name);
@@ -645,7 +751,11 @@ static int register_one_pkey_tree(struct mlx4_ib_dev *dev, int slave)
goto err_ports;
}
+ actv_ports = mlx4_get_active_ports(dev->dev, slave);
+
for (port = 1; port <= dev->dev->caps.num_ports; ++port) {
+ if (!test_bit(port - 1, actv_ports.ports))
+ continue;
err = add_port(dev, port, slave);
if (err)
goto err_add;
@@ -660,6 +770,7 @@ err_add:
mport = container_of(p, struct mlx4_port, kobj);
sysfs_remove_group(p, &mport->pkey_group);
sysfs_remove_group(p, &mport->gid_group);
+ remove_vf_smi_entries(mport);
kobject_put(p);
}
kobject_put(dev->dev_ports_parent[slave]);
@@ -704,6 +815,7 @@ static void unregister_pkey_tree(struct mlx4_ib_dev *device)
port = container_of(p, struct mlx4_port, kobj);
sysfs_remove_group(p, &port->pkey_group);
sysfs_remove_group(p, &port->gid_group);
+ remove_vf_smi_entries(port);
kobject_put(p);
kobject_put(device->dev_ports_parent[slave]);
}
diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig
index 8e6aebfaf8a..10df386c634 100644
--- a/drivers/infiniband/hw/mlx5/Kconfig
+++ b/drivers/infiniband/hw/mlx5/Kconfig
@@ -1,6 +1,6 @@
config MLX5_INFINIBAND
tristate "Mellanox Connect-IB HCA support"
- depends on NETDEVICES && ETHERNET && PCI && X86
+ depends on NETDEVICES && ETHERNET && PCI
select NET_VENDOR_MELLANOX
select MLX5_CORE
---help---
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 344ab03948a..8ae4f896cb4 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -32,6 +32,7 @@
#include <linux/kref.h>
#include <rdma/ib_umem.h>
+#include <rdma/ib_user_verbs.h>
#include "mlx5_ib.h"
#include "user.h"
@@ -73,14 +74,24 @@ static void *get_cqe(struct mlx5_ib_cq *cq, int n)
return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz);
}
+static u8 sw_ownership_bit(int n, int nent)
+{
+ return (n & nent) ? 1 : 0;
+}
+
static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n)
{
void *cqe = get_cqe(cq, n & cq->ibcq.cqe);
struct mlx5_cqe64 *cqe64;
cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
- return ((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^
- !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe;
+
+ if (likely((cqe64->op_own) >> 4 != MLX5_CQE_INVALID) &&
+ !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1)))) {
+ return cqe;
+ } else {
+ return NULL;
+ }
}
static void *next_cqe_sw(struct mlx5_ib_cq *cq)
@@ -351,6 +362,43 @@ static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
qp->sq.last_poll = tail;
}
+static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
+{
+ mlx5_buf_free(&dev->mdev, &buf->buf);
+}
+
+static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
+ struct ib_sig_err *item)
+{
+ u16 syndrome = be16_to_cpu(cqe->syndrome);
+
+#define GUARD_ERR (1 << 13)
+#define APPTAG_ERR (1 << 12)
+#define REFTAG_ERR (1 << 11)
+
+ if (syndrome & GUARD_ERR) {
+ item->err_type = IB_SIG_BAD_GUARD;
+ item->expected = be32_to_cpu(cqe->expected_trans_sig) >> 16;
+ item->actual = be32_to_cpu(cqe->actual_trans_sig) >> 16;
+ } else
+ if (syndrome & REFTAG_ERR) {
+ item->err_type = IB_SIG_BAD_REFTAG;
+ item->expected = be32_to_cpu(cqe->expected_reftag);
+ item->actual = be32_to_cpu(cqe->actual_reftag);
+ } else
+ if (syndrome & APPTAG_ERR) {
+ item->err_type = IB_SIG_BAD_APPTAG;
+ item->expected = be32_to_cpu(cqe->expected_trans_sig) & 0xffff;
+ item->actual = be32_to_cpu(cqe->actual_trans_sig) & 0xffff;
+ } else {
+ pr_err("Got signature completion error with bad syndrome %04x\n",
+ syndrome);
+ }
+
+ item->sig_err_offset = be64_to_cpu(cqe->err_offset);
+ item->key = be32_to_cpu(cqe->mkey);
+}
+
static int mlx5_poll_one(struct mlx5_ib_cq *cq,
struct mlx5_ib_qp **cur_qp,
struct ib_wc *wc)
@@ -360,12 +408,16 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
struct mlx5_cqe64 *cqe64;
struct mlx5_core_qp *mqp;
struct mlx5_ib_wq *wq;
+ struct mlx5_sig_err_cqe *sig_err_cqe;
+ struct mlx5_core_mr *mmr;
+ struct mlx5_ib_mr *mr;
uint8_t opcode;
uint32_t qpn;
u16 wqe_ctr;
void *cqe;
int idx;
+repoll:
cqe = next_cqe_sw(cq);
if (!cqe)
return -EAGAIN;
@@ -379,7 +431,18 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
*/
rmb();
- /* TBD: resize CQ */
+ opcode = cqe64->op_own >> 4;
+ if (unlikely(opcode == MLX5_CQE_RESIZE_CQ)) {
+ if (likely(cq->resize_buf)) {
+ free_cq_buf(dev, &cq->buf);
+ cq->buf = *cq->resize_buf;
+ kfree(cq->resize_buf);
+ cq->resize_buf = NULL;
+ goto repoll;
+ } else {
+ mlx5_ib_warn(dev, "unexpected resize cqe\n");
+ }
+ }
qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff;
if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) {
@@ -398,7 +461,6 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
}
wc->qp = &(*cur_qp)->ibqp;
- opcode = cqe64->op_own >> 4;
switch (opcode) {
case MLX5_CQE_REQ:
wq = &(*cur_qp)->sq;
@@ -449,6 +511,33 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
}
}
break;
+ case MLX5_CQE_SIG_ERR:
+ sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64;
+
+ read_lock(&dev->mdev.priv.mr_table.lock);
+ mmr = __mlx5_mr_lookup(&dev->mdev,
+ mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
+ if (unlikely(!mmr)) {
+ read_unlock(&dev->mdev.priv.mr_table.lock);
+ mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n",
+ cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey));
+ return -EINVAL;
+ }
+
+ mr = to_mibmr(mmr);
+ get_sig_err_item(sig_err_cqe, &mr->sig->err_item);
+ mr->sig->sig_err_exists = true;
+ mr->sig->sigerr_count++;
+
+ mlx5_ib_warn(dev, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n",
+ cq->mcq.cqn, mr->sig->err_item.key,
+ mr->sig->err_item.err_type,
+ mr->sig->err_item.sig_err_offset,
+ mr->sig->err_item.expected,
+ mr->sig->err_item.actual);
+
+ read_unlock(&dev->mdev.priv.mr_table.lock);
+ goto repoll;
}
return 0;
@@ -503,29 +592,35 @@ static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf,
return err;
buf->cqe_size = cqe_size;
+ buf->nent = nent;
return 0;
}
-static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
-{
- mlx5_buf_free(&dev->mdev, &buf->buf);
-}
-
static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
struct ib_ucontext *context, struct mlx5_ib_cq *cq,
int entries, struct mlx5_create_cq_mbox_in **cqb,
int *cqe_size, int *index, int *inlen)
{
struct mlx5_ib_create_cq ucmd;
+ size_t ucmdlen;
int page_shift;
int npages;
int ncont;
int err;
- if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
+ ucmdlen =
+ (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) <
+ sizeof(ucmd)) ? (sizeof(ucmd) -
+ sizeof(ucmd.reserved)) : sizeof(ucmd);
+
+ if (ib_copy_from_udata(&ucmd, udata, ucmdlen))
return -EFAULT;
+ if (ucmdlen == sizeof(ucmd) &&
+ ucmd.reserved != 0)
+ return -EINVAL;
+
if (ucmd.cqe_size != 64 && ucmd.cqe_size != 128)
return -EINVAL;
@@ -556,7 +651,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
goto err_db;
}
mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, (*cqb)->pas, 0);
- (*cqb)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
+ (*cqb)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
*index = to_mucontext(context)->uuari.uars[0].index;
@@ -576,16 +671,16 @@ static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_ucontext *context)
ib_umem_release(cq->buf.umem);
}
-static void init_cq_buf(struct mlx5_ib_cq *cq, int nent)
+static void init_cq_buf(struct mlx5_ib_cq *cq, struct mlx5_ib_cq_buf *buf)
{
int i;
void *cqe;
struct mlx5_cqe64 *cqe64;
- for (i = 0; i < nent; i++) {
- cqe = get_cqe(cq, i);
- cqe64 = (cq->buf.cqe_size == 64) ? cqe : cqe + 64;
- cqe64->op_own = 0xf1;
+ for (i = 0; i < buf->nent; i++) {
+ cqe = get_cqe_from_buf(buf, i, buf->cqe_size);
+ cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64;
+ cqe64->op_own = MLX5_CQE_INVALID << 4;
}
}
@@ -610,7 +705,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
if (err)
goto err_db;
- init_cq_buf(cq, entries);
+ init_cq_buf(cq, &cq->buf);
*inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * cq->buf.buf.npages;
*cqb = mlx5_vzalloc(*inlen);
@@ -620,7 +715,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
}
mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas);
- (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - PAGE_SHIFT;
+ (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
*index = dev->mdev.priv.uuari.uars[0].index;
return 0;
@@ -653,8 +748,11 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
int eqn;
int err;
+ if (entries < 0)
+ return ERR_PTR(-EINVAL);
+
entries = roundup_pow_of_two(entries + 1);
- if (entries < 1 || entries > dev->mdev.caps.max_cqes)
+ if (entries > dev->mdev.caps.max_cqes)
return ERR_PTR(-EINVAL);
cq = kzalloc(sizeof(*cq), GFP_KERNEL);
@@ -747,17 +845,9 @@ int mlx5_ib_destroy_cq(struct ib_cq *cq)
return 0;
}
-static int is_equal_rsn(struct mlx5_cqe64 *cqe64, struct mlx5_ib_srq *srq,
- u32 rsn)
+static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn)
{
- u32 lrsn;
-
- if (srq)
- lrsn = be32_to_cpu(cqe64->srqn) & 0xffffff;
- else
- lrsn = be32_to_cpu(cqe64->sop_drop_qpn) & 0xffffff;
-
- return rsn == lrsn;
+ return rsn == (ntohl(cqe64->sop_drop_qpn) & 0xffffff);
}
void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq)
@@ -787,8 +877,8 @@ void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq)
while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
- if (is_equal_rsn(cqe64, srq, rsn)) {
- if (srq)
+ if (is_equal_rsn(cqe64, rsn)) {
+ if (srq && (ntohl(cqe64->srqn) & 0xffffff))
mlx5_ib_free_srq_wqe(srq, be16_to_cpu(cqe64->wqe_counter));
++nfreed;
} else if (nfreed) {
@@ -823,12 +913,266 @@ void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq)
int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
{
- return -ENOSYS;
+ struct mlx5_modify_cq_mbox_in *in;
+ struct mlx5_ib_dev *dev = to_mdev(cq->device);
+ struct mlx5_ib_cq *mcq = to_mcq(cq);
+ int err;
+ u32 fsel;
+
+ if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_CQ_MODER))
+ return -ENOSYS;
+
+ in = kzalloc(sizeof(*in), GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ in->cqn = cpu_to_be32(mcq->mcq.cqn);
+ fsel = (MLX5_CQ_MODIFY_PERIOD | MLX5_CQ_MODIFY_COUNT);
+ in->ctx.cq_period = cpu_to_be16(cq_period);
+ in->ctx.cq_max_count = cpu_to_be16(cq_count);
+ in->field_select = cpu_to_be32(fsel);
+ err = mlx5_core_modify_cq(&dev->mdev, &mcq->mcq, in, sizeof(*in));
+ kfree(in);
+
+ if (err)
+ mlx5_ib_warn(dev, "modify cq 0x%x failed\n", mcq->mcq.cqn);
+
+ return err;
+}
+
+static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
+ int entries, struct ib_udata *udata, int *npas,
+ int *page_shift, int *cqe_size)
+{
+ struct mlx5_ib_resize_cq ucmd;
+ struct ib_umem *umem;
+ int err;
+ int npages;
+ struct ib_ucontext *context = cq->buf.umem->context;
+
+ err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
+ if (err)
+ return err;
+
+ if (ucmd.reserved0 || ucmd.reserved1)
+ return -EINVAL;
+
+ umem = ib_umem_get(context, ucmd.buf_addr, entries * ucmd.cqe_size,
+ IB_ACCESS_LOCAL_WRITE, 1);
+ if (IS_ERR(umem)) {
+ err = PTR_ERR(umem);
+ return err;
+ }
+
+ mlx5_ib_cont_pages(umem, ucmd.buf_addr, &npages, page_shift,
+ npas, NULL);
+
+ cq->resize_umem = umem;
+ *cqe_size = ucmd.cqe_size;
+
+ return 0;
+}
+
+static void un_resize_user(struct mlx5_ib_cq *cq)
+{
+ ib_umem_release(cq->resize_umem);
+}
+
+static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
+ int entries, int cqe_size)
+{
+ int err;
+
+ cq->resize_buf = kzalloc(sizeof(*cq->resize_buf), GFP_KERNEL);
+ if (!cq->resize_buf)
+ return -ENOMEM;
+
+ err = alloc_cq_buf(dev, cq->resize_buf, entries, cqe_size);
+ if (err)
+ goto ex;
+
+ init_cq_buf(cq, cq->resize_buf);
+
+ return 0;
+
+ex:
+ kfree(cq->resize_buf);
+ return err;
+}
+
+static void un_resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq)
+{
+ free_cq_buf(dev, cq->resize_buf);
+ cq->resize_buf = NULL;
+}
+
+static int copy_resize_cqes(struct mlx5_ib_cq *cq)
+{
+ struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
+ struct mlx5_cqe64 *scqe64;
+ struct mlx5_cqe64 *dcqe64;
+ void *start_cqe;
+ void *scqe;
+ void *dcqe;
+ int ssize;
+ int dsize;
+ int i;
+ u8 sw_own;
+
+ ssize = cq->buf.cqe_size;
+ dsize = cq->resize_buf->cqe_size;
+ if (ssize != dsize) {
+ mlx5_ib_warn(dev, "resize from different cqe size is not supported\n");
+ return -EINVAL;
+ }
+
+ i = cq->mcq.cons_index;
+ scqe = get_sw_cqe(cq, i);
+ scqe64 = ssize == 64 ? scqe : scqe + 64;
+ start_cqe = scqe;
+ if (!scqe) {
+ mlx5_ib_warn(dev, "expected cqe in sw ownership\n");
+ return -EINVAL;
+ }
+
+ while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) {
+ dcqe = get_cqe_from_buf(cq->resize_buf,
+ (i + 1) & (cq->resize_buf->nent),
+ dsize);
+ dcqe64 = dsize == 64 ? dcqe : dcqe + 64;
+ sw_own = sw_ownership_bit(i + 1, cq->resize_buf->nent);
+ memcpy(dcqe, scqe, dsize);
+ dcqe64->op_own = (dcqe64->op_own & ~MLX5_CQE_OWNER_MASK) | sw_own;
+
+ ++i;
+ scqe = get_sw_cqe(cq, i);
+ scqe64 = ssize == 64 ? scqe : scqe + 64;
+ if (!scqe) {
+ mlx5_ib_warn(dev, "expected cqe in sw ownership\n");
+ return -EINVAL;
+ }
+
+ if (scqe == start_cqe) {
+ pr_warn("resize CQ failed to get resize CQE, CQN 0x%x\n",
+ cq->mcq.cqn);
+ return -ENOMEM;
+ }
+ }
+ ++cq->mcq.cons_index;
+ return 0;
}
int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
{
- return -ENOSYS;
+ struct mlx5_ib_dev *dev = to_mdev(ibcq->device);
+ struct mlx5_ib_cq *cq = to_mcq(ibcq);
+ struct mlx5_modify_cq_mbox_in *in;
+ int err;
+ int npas;
+ int page_shift;
+ int inlen;
+ int uninitialized_var(cqe_size);
+ unsigned long flags;
+
+ if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_RESIZE_CQ)) {
+ pr_info("Firmware does not support resize CQ\n");
+ return -ENOSYS;
+ }
+
+ if (entries < 1)
+ return -EINVAL;
+
+ entries = roundup_pow_of_two(entries + 1);
+ if (entries > dev->mdev.caps.max_cqes + 1)
+ return -EINVAL;
+
+ if (entries == ibcq->cqe + 1)
+ return 0;
+
+ mutex_lock(&cq->resize_mutex);
+ if (udata) {
+ err = resize_user(dev, cq, entries, udata, &npas, &page_shift,
+ &cqe_size);
+ } else {
+ cqe_size = 64;
+ err = resize_kernel(dev, cq, entries, cqe_size);
+ if (!err) {
+ npas = cq->resize_buf->buf.npages;
+ page_shift = cq->resize_buf->buf.page_shift;
+ }
+ }
+
+ if (err)
+ goto ex;
+
+ inlen = sizeof(*in) + npas * sizeof(in->pas[0]);
+ in = mlx5_vzalloc(inlen);
+ if (!in) {
+ err = -ENOMEM;
+ goto ex_resize;
+ }
+
+ if (udata)
+ mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift,
+ in->pas, 0);
+ else
+ mlx5_fill_page_array(&cq->resize_buf->buf, in->pas);
+
+ in->field_select = cpu_to_be32(MLX5_MODIFY_CQ_MASK_LOG_SIZE |
+ MLX5_MODIFY_CQ_MASK_PG_OFFSET |
+ MLX5_MODIFY_CQ_MASK_PG_SIZE);
+ in->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+ in->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
+ in->ctx.page_offset = 0;
+ in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(entries) << 24);
+ in->hdr.opmod = cpu_to_be16(MLX5_CQ_OPMOD_RESIZE);
+ in->cqn = cpu_to_be32(cq->mcq.cqn);
+
+ err = mlx5_core_modify_cq(&dev->mdev, &cq->mcq, in, inlen);
+ if (err)
+ goto ex_alloc;
+
+ if (udata) {
+ cq->ibcq.cqe = entries - 1;
+ ib_umem_release(cq->buf.umem);
+ cq->buf.umem = cq->resize_umem;
+ cq->resize_umem = NULL;
+ } else {
+ struct mlx5_ib_cq_buf tbuf;
+ int resized = 0;
+
+ spin_lock_irqsave(&cq->lock, flags);
+ if (cq->resize_buf) {
+ err = copy_resize_cqes(cq);
+ if (!err) {
+ tbuf = cq->buf;
+ cq->buf = *cq->resize_buf;
+ kfree(cq->resize_buf);
+ cq->resize_buf = NULL;
+ resized = 1;
+ }
+ }
+ cq->ibcq.cqe = entries - 1;
+ spin_unlock_irqrestore(&cq->lock, flags);
+ if (resized)
+ free_cq_buf(dev, &tbuf);
+ }
+ mutex_unlock(&cq->resize_mutex);
+
+ mlx5_vfree(in);
+ return 0;
+
+ex_alloc:
+ mlx5_vfree(in);
+
+ex_resize:
+ if (udata)
+ un_resize_user(cq);
+ else
+ un_resize_kernel(dev, cq);
+ex:
+ mutex_unlock(&cq->resize_mutex);
+ return err;
}
int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq)
diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c
index 256a23344f2..ece028fc47d 100644
--- a/drivers/infiniband/hw/mlx5/doorbell.c
+++ b/drivers/infiniband/hw/mlx5/doorbell.c
@@ -47,7 +47,6 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
struct mlx5_db *db)
{
struct mlx5_ib_user_db_page *page;
- struct ib_umem_chunk *chunk;
int err = 0;
mutex_lock(&context->db_page_mutex);
@@ -75,8 +74,7 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
list_add(&page->list, &context->db_page_list);
found:
- chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list);
- db->dma = sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK);
+ db->dma = sg_dma_address(page->umem->sg_head.sgl) + (virt & ~PAGE_MASK);
db->u.user_page = page;
++page->refcnt;
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 3f831de9a4d..364d4b6937f 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -46,8 +46,8 @@
#include "mlx5_ib.h"
#define DRIVER_NAME "mlx5_ib"
-#define DRIVER_VERSION "1.0"
-#define DRIVER_RELDATE "June 2013"
+#define DRIVER_VERSION "2.2-1"
+#define DRIVER_RELDATE "Feb 2014"
MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
@@ -164,6 +164,7 @@ int mlx5_vector2eqn(struct mlx5_ib_dev *dev, int vector, int *eqn, int *irqn)
static int alloc_comp_eqs(struct mlx5_ib_dev *dev)
{
struct mlx5_eq_table *table = &dev->mdev.priv.eq_table;
+ char name[MLX5_MAX_EQ_NAME];
struct mlx5_eq *eq, *n;
int ncomp_vec;
int nent;
@@ -180,11 +181,10 @@ static int alloc_comp_eqs(struct mlx5_ib_dev *dev)
goto clean;
}
- snprintf(eq->name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i);
+ snprintf(name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i);
err = mlx5_create_map_eq(&dev->mdev, eq,
i + MLX5_EQ_VEC_COMP_BASE, nent, 0,
- eq->name,
- &dev->mdev.priv.uuari.uars[0]);
+ name, &dev->mdev.priv.uuari.uars[0]);
if (err) {
kfree(eq);
goto clean;
@@ -261,8 +261,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
IB_DEVICE_PORT_ACTIVE_EVENT |
IB_DEVICE_SYS_IMAGE_GUID |
- IB_DEVICE_RC_RNR_NAK_GEN |
- IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
+ IB_DEVICE_RC_RNR_NAK_GEN;
flags = dev->mdev.caps.flags;
if (flags & MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR)
props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
@@ -274,6 +273,17 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (flags & MLX5_DEV_CAP_FLAG_XRC)
props->device_cap_flags |= IB_DEVICE_XRC;
props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
+ if (flags & MLX5_DEV_CAP_FLAG_SIG_HAND_OVER) {
+ props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER;
+ /* At this stage no support for signature handover */
+ props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 |
+ IB_PROT_T10DIF_TYPE_2 |
+ IB_PROT_T10DIF_TYPE_3;
+ props->sig_guard_cap = IB_GUARD_T10DIF_CRC |
+ IB_GUARD_T10DIF_CSUM;
+ }
+ if (flags & MLX5_DEV_CAP_FLAG_BLOCK_MCAST)
+ props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
props->vendor_id = be32_to_cpup((__be32 *)(out_mad->data + 36)) &
0xffffff;
@@ -301,9 +311,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->max_srq_sge = max_rq_sg - 1;
props->max_fast_reg_page_list_len = (unsigned int)-1;
props->local_ca_ack_delay = dev->mdev.caps.local_ca_ack_delay;
- props->atomic_cap = dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_ATOMIC ?
- IB_ATOMIC_HCA : IB_ATOMIC_NONE;
- props->masked_atomic_cap = IB_ATOMIC_HCA;
+ props->atomic_cap = IB_ATOMIC_NONE;
+ props->masked_atomic_cap = IB_ATOMIC_NONE;
props->max_pkeys = be16_to_cpup((__be16 *)(out_mad->data + 28));
props->max_mcast_grp = 1 << dev->mdev.caps.log_max_mcg;
props->max_mcast_qp_attach = dev->mdev.caps.max_qp_mcg;
@@ -537,34 +546,51 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
- struct mlx5_ib_alloc_ucontext_req req;
+ struct mlx5_ib_alloc_ucontext_req_v2 req;
struct mlx5_ib_alloc_ucontext_resp resp;
struct mlx5_ib_ucontext *context;
struct mlx5_uuar_info *uuari;
struct mlx5_uar *uars;
+ int gross_uuars;
int num_uars;
+ int ver;
int uuarn;
int err;
int i;
+ int reqlen;
if (!dev->ib_active)
return ERR_PTR(-EAGAIN);
- err = ib_copy_from_udata(&req, udata, sizeof(req));
+ memset(&req, 0, sizeof(req));
+ reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
+ if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
+ ver = 0;
+ else if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req_v2))
+ ver = 2;
+ else
+ return ERR_PTR(-EINVAL);
+
+ err = ib_copy_from_udata(&req, udata, reqlen);
if (err)
return ERR_PTR(err);
+ if (req.flags || req.reserved)
+ return ERR_PTR(-EINVAL);
+
if (req.total_num_uuars > MLX5_MAX_UUARS)
return ERR_PTR(-ENOMEM);
if (req.total_num_uuars == 0)
return ERR_PTR(-EINVAL);
- req.total_num_uuars = ALIGN(req.total_num_uuars, MLX5_BF_REGS_PER_PAGE);
+ req.total_num_uuars = ALIGN(req.total_num_uuars,
+ MLX5_NON_FP_BF_REGS_PER_PAGE);
if (req.num_low_latency_uuars > req.total_num_uuars - 1)
return ERR_PTR(-EINVAL);
- num_uars = req.total_num_uuars / MLX5_BF_REGS_PER_PAGE;
+ num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE;
+ gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE;
resp.qp_tab_size = 1 << dev->mdev.caps.log_max_qp;
resp.bf_reg_size = dev->mdev.caps.bf_reg_size;
resp.cache_line_size = L1_CACHE_BYTES;
@@ -586,7 +612,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
goto out_ctx;
}
- uuari->bitmap = kcalloc(BITS_TO_LONGS(req.total_num_uuars),
+ uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars),
sizeof(*uuari->bitmap),
GFP_KERNEL);
if (!uuari->bitmap) {
@@ -596,13 +622,13 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
/*
* clear all fast path uuars
*/
- for (i = 0; i < req.total_num_uuars; i++) {
+ for (i = 0; i < gross_uuars; i++) {
uuarn = i & 3;
if (uuarn == 2 || uuarn == 3)
set_bit(i, uuari->bitmap);
}
- uuari->count = kcalloc(req.total_num_uuars, sizeof(*uuari->count), GFP_KERNEL);
+ uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL);
if (!uuari->count) {
err = -ENOMEM;
goto out_bitmap;
@@ -624,6 +650,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
if (err)
goto out_uars;
+ uuari->ver = ver;
uuari->num_low_latency_uuars = req.num_low_latency_uuars;
uuari->uars = uars;
uuari->num_uars = num_uars;
@@ -746,7 +773,8 @@ static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn)
seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
seg->start_addr = 0;
- err = mlx5_core_create_mkey(&dev->mdev, &mr, in, sizeof(*in));
+ err = mlx5_core_create_mkey(&dev->mdev, &mr, in, sizeof(*in),
+ NULL, NULL, NULL);
if (err) {
mlx5_ib_warn(dev, "failed to create mkey, %d\n", err);
goto err_in;
@@ -1006,6 +1034,11 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
ibev.device = &ibdev->ib_dev;
ibev.element.port_num = port;
+ if (port < 1 || port > ibdev->num_ports) {
+ mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
+ return;
+ }
+
if (ibdev->ib_active)
ib_dispatch_event(&ibev);
}
@@ -1401,12 +1434,15 @@ static int init_one(struct pci_dev *pdev,
dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr;
dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr;
dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr;
+ dev->ib_dev.destroy_mr = mlx5_ib_destroy_mr;
dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach;
dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach;
dev->ib_dev.process_mad = mlx5_ib_process_mad;
+ dev->ib_dev.create_mr = mlx5_ib_create_mr;
dev->ib_dev.alloc_fast_reg_mr = mlx5_ib_alloc_fast_reg_mr;
dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list;
dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list;
+ dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
if (mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC) {
dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index 3a5322870b9..8499aec94db 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -44,16 +44,17 @@
void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
int *ncont, int *order)
{
- struct ib_umem_chunk *chunk;
unsigned long tmp;
unsigned long m;
- int i, j, k;
+ int i, k;
u64 base = 0;
int p = 0;
int skip;
int mask;
u64 len;
u64 pfn;
+ struct scatterlist *sg;
+ int entry;
addr = addr >> PAGE_SHIFT;
tmp = (unsigned long)addr;
@@ -61,32 +62,31 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
skip = 1 << m;
mask = skip - 1;
i = 0;
- list_for_each_entry(chunk, &umem->chunk_list, list)
- for (j = 0; j < chunk->nmap; j++) {
- len = sg_dma_len(&chunk->page_list[j]) >> PAGE_SHIFT;
- pfn = sg_dma_address(&chunk->page_list[j]) >> PAGE_SHIFT;
- for (k = 0; k < len; k++) {
- if (!(i & mask)) {
- tmp = (unsigned long)pfn;
- m = min(m, find_first_bit(&tmp, sizeof(tmp)));
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+ len = sg_dma_len(sg) >> PAGE_SHIFT;
+ pfn = sg_dma_address(sg) >> PAGE_SHIFT;
+ for (k = 0; k < len; k++) {
+ if (!(i & mask)) {
+ tmp = (unsigned long)pfn;
+ m = min(m, find_first_bit(&tmp, sizeof(tmp)));
+ skip = 1 << m;
+ mask = skip - 1;
+ base = pfn;
+ p = 0;
+ } else {
+ if (base + p != pfn) {
+ tmp = (unsigned long)p;
+ m = find_first_bit(&tmp, sizeof(tmp));
skip = 1 << m;
mask = skip - 1;
base = pfn;
p = 0;
- } else {
- if (base + p != pfn) {
- tmp = (unsigned long)p;
- m = find_first_bit(&tmp, sizeof(tmp));
- skip = 1 << m;
- mask = skip - 1;
- base = pfn;
- p = 0;
- }
}
- p++;
- i++;
}
+ p++;
+ i++;
}
+ }
if (i) {
m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m);
@@ -112,32 +112,32 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
{
int shift = page_shift - PAGE_SHIFT;
int mask = (1 << shift) - 1;
- struct ib_umem_chunk *chunk;
- int i, j, k;
+ int i, k;
u64 cur = 0;
u64 base;
int len;
+ struct scatterlist *sg;
+ int entry;
i = 0;
- list_for_each_entry(chunk, &umem->chunk_list, list)
- for (j = 0; j < chunk->nmap; j++) {
- len = sg_dma_len(&chunk->page_list[j]) >> PAGE_SHIFT;
- base = sg_dma_address(&chunk->page_list[j]);
- for (k = 0; k < len; k++) {
- if (!(i & mask)) {
- cur = base + (k << PAGE_SHIFT);
- if (umr)
- cur |= 3;
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+ len = sg_dma_len(sg) >> PAGE_SHIFT;
+ base = sg_dma_address(sg);
+ for (k = 0; k < len; k++) {
+ if (!(i & mask)) {
+ cur = base + (k << PAGE_SHIFT);
+ if (umr)
+ cur |= 3;
- pas[i >> shift] = cpu_to_be64(cur);
- mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n",
- i >> shift, be64_to_cpu(pas[i >> shift]));
- } else
- mlx5_ib_dbg(dev, "=====> 0x%llx\n",
- base + (k << PAGE_SHIFT));
- i++;
- }
+ pas[i >> shift] = cpu_to_be64(cur);
+ mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n",
+ i >> shift, be64_to_cpu(pas[i >> shift]));
+ } else
+ mlx5_ib_dbg(dev, "=====> 0x%llx\n",
+ base + (k << PAGE_SHIFT));
+ i++;
}
+ }
}
int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset)
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 836be915724..f2ccf1a5a29 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -189,12 +189,16 @@ struct mlx5_ib_qp {
int create_type;
u32 pa_lkey;
+
+ /* Store signature errors */
+ bool signature_en;
};
struct mlx5_ib_cq_buf {
struct mlx5_buf buf;
struct ib_umem *umem;
int cqe_size;
+ int nent;
};
enum mlx5_ib_qp_flags {
@@ -220,7 +224,7 @@ struct mlx5_ib_cq {
/* protect resize cq
*/
struct mutex resize_mutex;
- struct mlx5_ib_cq_resize *resize_buf;
+ struct mlx5_ib_cq_buf *resize_buf;
struct ib_umem *resize_umem;
int cqe_size;
};
@@ -260,8 +264,9 @@ struct mlx5_ib_mr {
__be64 *pas;
dma_addr_t dma;
int npages;
- struct completion done;
- enum ib_wc_status status;
+ struct mlx5_ib_dev *dev;
+ struct mlx5_create_mkey_mbox_out out;
+ struct mlx5_core_sig_ctx *sig;
};
struct mlx5_ib_fast_reg_page_list {
@@ -270,6 +275,17 @@ struct mlx5_ib_fast_reg_page_list {
dma_addr_t map;
};
+struct mlx5_ib_umr_context {
+ enum ib_wc_status status;
+ struct completion done;
+};
+
+static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
+{
+ context->status = -1;
+ init_completion(&context->done);
+}
+
struct umr_common {
struct ib_pd *pd;
struct ib_cq *cq;
@@ -323,6 +339,7 @@ struct mlx5_cache_ent {
struct mlx5_ib_dev *dev;
struct work_struct work;
struct delayed_work dwork;
+ int pending;
};
struct mlx5_mr_cache {
@@ -358,6 +375,8 @@ struct mlx5_ib_dev {
spinlock_t mr_lock;
struct mlx5_ib_resources devr;
struct mlx5_mr_cache cache;
+ struct timer_list delay_timer;
+ int fill_delay;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -390,6 +409,11 @@ static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp)
return container_of(mqp, struct mlx5_ib_qp, mqp);
}
+static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mr *mmr)
+{
+ return container_of(mmr, struct mlx5_ib_mr, mmr);
+}
+
static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd)
{
return container_of(ibpd, struct mlx5_ib_pd, ibpd);
@@ -489,6 +513,9 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata);
int mlx5_ib_dereg_mr(struct ib_mr *ibmr);
+int mlx5_ib_destroy_mr(struct ib_mr *ibmr);
+struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
+ struct ib_mr_init_attr *mr_init_attr);
struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
int max_page_list_len);
struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
@@ -524,6 +551,8 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift);
void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context);
+int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
+ struct ib_mr_status *mr_status);
static inline void init_query_mad(struct ib_smp *mad)
{
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index bd41df95b6f..afa873bd028 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -35,11 +35,16 @@
#include <linux/random.h>
#include <linux/debugfs.h>
#include <linux/export.h>
+#include <linux/delay.h>
#include <rdma/ib_umem.h>
#include "mlx5_ib.h"
enum {
- DEF_CACHE_SIZE = 10,
+ MAX_PENDING_REG_MR = 8,
+};
+
+enum {
+ MLX5_UMR_ALIGN = 2048
};
static __be64 *mr_align(__be64 *ptr, int align)
@@ -59,15 +64,67 @@ static int order2idx(struct mlx5_ib_dev *dev, int order)
return order - cache->ent[0].order;
}
+static void reg_mr_callback(int status, void *context)
+{
+ struct mlx5_ib_mr *mr = context;
+ struct mlx5_ib_dev *dev = mr->dev;
+ struct mlx5_mr_cache *cache = &dev->cache;
+ int c = order2idx(dev, mr->order);
+ struct mlx5_cache_ent *ent = &cache->ent[c];
+ u8 key;
+ unsigned long flags;
+ struct mlx5_mr_table *table = &dev->mdev.priv.mr_table;
+ int err;
+
+ spin_lock_irqsave(&ent->lock, flags);
+ ent->pending--;
+ spin_unlock_irqrestore(&ent->lock, flags);
+ if (status) {
+ mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
+ kfree(mr);
+ dev->fill_delay = 1;
+ mod_timer(&dev->delay_timer, jiffies + HZ);
+ return;
+ }
+
+ if (mr->out.hdr.status) {
+ mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n",
+ mr->out.hdr.status,
+ be32_to_cpu(mr->out.hdr.syndrome));
+ kfree(mr);
+ dev->fill_delay = 1;
+ mod_timer(&dev->delay_timer, jiffies + HZ);
+ return;
+ }
+
+ spin_lock_irqsave(&dev->mdev.priv.mkey_lock, flags);
+ key = dev->mdev.priv.mkey_key++;
+ spin_unlock_irqrestore(&dev->mdev.priv.mkey_lock, flags);
+ mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key;
+
+ cache->last_add = jiffies;
+
+ spin_lock_irqsave(&ent->lock, flags);
+ list_add_tail(&mr->list, &ent->head);
+ ent->cur++;
+ ent->size++;
+ spin_unlock_irqrestore(&ent->lock, flags);
+
+ write_lock_irqsave(&table->lock, flags);
+ err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmr.key),
+ &mr->mmr);
+ if (err)
+ pr_err("Error inserting to mr tree. 0x%x\n", -err);
+ write_unlock_irqrestore(&table->lock, flags);
+}
+
static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
{
- struct device *ddev = dev->ib_dev.dma_device;
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent = &cache->ent[c];
struct mlx5_create_mkey_mbox_in *in;
struct mlx5_ib_mr *mr;
int npages = 1 << ent->order;
- int size = sizeof(u64) * npages;
int err = 0;
int i;
@@ -76,87 +133,66 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
return -ENOMEM;
for (i = 0; i < num; i++) {
+ if (ent->pending >= MAX_PENDING_REG_MR) {
+ err = -EAGAIN;
+ break;
+ }
+
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr) {
err = -ENOMEM;
- goto out;
+ break;
}
mr->order = ent->order;
mr->umred = 1;
- mr->pas = kmalloc(size + 0x3f, GFP_KERNEL);
- if (!mr->pas) {
- kfree(mr);
- err = -ENOMEM;
- goto out;
- }
- mr->dma = dma_map_single(ddev, mr_align(mr->pas, 0x40), size,
- DMA_TO_DEVICE);
- if (dma_mapping_error(ddev, mr->dma)) {
- kfree(mr->pas);
- kfree(mr);
- err = -ENOMEM;
- goto out;
- }
-
+ mr->dev = dev;
in->seg.status = 1 << 6;
in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
in->seg.log2_page_size = 12;
+ spin_lock_irq(&ent->lock);
+ ent->pending++;
+ spin_unlock_irq(&ent->lock);
err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in,
- sizeof(*in));
+ sizeof(*in), reg_mr_callback,
+ mr, &mr->out);
if (err) {
mlx5_ib_warn(dev, "create mkey failed %d\n", err);
- dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
- kfree(mr->pas);
kfree(mr);
- goto out;
+ break;
}
- cache->last_add = jiffies;
-
- spin_lock(&ent->lock);
- list_add_tail(&mr->list, &ent->head);
- ent->cur++;
- ent->size++;
- spin_unlock(&ent->lock);
}
-out:
kfree(in);
return err;
}
static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
{
- struct device *ddev = dev->ib_dev.dma_device;
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent = &cache->ent[c];
struct mlx5_ib_mr *mr;
- int size;
int err;
int i;
for (i = 0; i < num; i++) {
- spin_lock(&ent->lock);
+ spin_lock_irq(&ent->lock);
if (list_empty(&ent->head)) {
- spin_unlock(&ent->lock);
+ spin_unlock_irq(&ent->lock);
return;
}
mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
list_del(&mr->list);
ent->cur--;
ent->size--;
- spin_unlock(&ent->lock);
+ spin_unlock_irq(&ent->lock);
err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
- if (err) {
+ if (err)
mlx5_ib_warn(dev, "failed destroy mkey\n");
- } else {
- size = ALIGN(sizeof(u64) * (1 << mr->order), 0x40);
- dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
- kfree(mr->pas);
+ else
kfree(mr);
- }
}
}
@@ -183,9 +219,13 @@ static ssize_t size_write(struct file *filp, const char __user *buf,
return -EINVAL;
if (var > ent->size) {
- err = add_keys(dev, c, var - ent->size);
- if (err)
- return err;
+ do {
+ err = add_keys(dev, c, var - ent->size);
+ if (err && err != -EAGAIN)
+ return err;
+
+ usleep_range(3000, 5000);
+ } while (err);
} else if (var < ent->size) {
remove_keys(dev, c, ent->size - var);
}
@@ -301,23 +341,37 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
struct mlx5_ib_dev *dev = ent->dev;
struct mlx5_mr_cache *cache = &dev->cache;
int i = order2idx(dev, ent->order);
+ int err;
if (cache->stopped)
return;
ent = &dev->cache.ent[i];
- if (ent->cur < 2 * ent->limit) {
- add_keys(dev, i, 1);
- if (ent->cur < 2 * ent->limit)
- queue_work(cache->wq, &ent->work);
+ if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
+ err = add_keys(dev, i, 1);
+ if (ent->cur < 2 * ent->limit) {
+ if (err == -EAGAIN) {
+ mlx5_ib_dbg(dev, "returned eagain, order %d\n",
+ i + 2);
+ queue_delayed_work(cache->wq, &ent->dwork,
+ msecs_to_jiffies(3));
+ } else if (err) {
+ mlx5_ib_warn(dev, "command failed order %d, err %d\n",
+ i + 2, err);
+ queue_delayed_work(cache->wq, &ent->dwork,
+ msecs_to_jiffies(1000));
+ } else {
+ queue_work(cache->wq, &ent->work);
+ }
+ }
} else if (ent->cur > 2 * ent->limit) {
if (!someone_adding(cache) &&
- time_after(jiffies, cache->last_add + 60 * HZ)) {
+ time_after(jiffies, cache->last_add + 300 * HZ)) {
remove_keys(dev, i, 1);
if (ent->cur > ent->limit)
queue_work(cache->wq, &ent->work);
} else {
- queue_delayed_work(cache->wq, &ent->dwork, 60 * HZ);
+ queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
}
}
}
@@ -357,18 +411,18 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
- spin_lock(&ent->lock);
+ spin_lock_irq(&ent->lock);
if (!list_empty(&ent->head)) {
mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
list);
list_del(&mr->list);
ent->cur--;
- spin_unlock(&ent->lock);
+ spin_unlock_irq(&ent->lock);
if (ent->cur < ent->limit)
queue_work(cache->wq, &ent->work);
break;
}
- spin_unlock(&ent->lock);
+ spin_unlock_irq(&ent->lock);
queue_work(cache->wq, &ent->work);
@@ -395,12 +449,12 @@ static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
return;
}
ent = &cache->ent[c];
- spin_lock(&ent->lock);
+ spin_lock_irq(&ent->lock);
list_add_tail(&mr->list, &ent->head);
ent->cur++;
if (ent->cur > 2 * ent->limit)
shrink = 1;
- spin_unlock(&ent->lock);
+ spin_unlock_irq(&ent->lock);
if (shrink)
queue_work(cache->wq, &ent->work);
@@ -408,33 +462,28 @@ static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
static void clean_keys(struct mlx5_ib_dev *dev, int c)
{
- struct device *ddev = dev->ib_dev.dma_device;
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent = &cache->ent[c];
struct mlx5_ib_mr *mr;
- int size;
int err;
+ cancel_delayed_work(&ent->dwork);
while (1) {
- spin_lock(&ent->lock);
+ spin_lock_irq(&ent->lock);
if (list_empty(&ent->head)) {
- spin_unlock(&ent->lock);
+ spin_unlock_irq(&ent->lock);
return;
}
mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
list_del(&mr->list);
ent->cur--;
ent->size--;
- spin_unlock(&ent->lock);
+ spin_unlock_irq(&ent->lock);
err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
- if (err) {
+ if (err)
mlx5_ib_warn(dev, "failed destroy mkey\n");
- } else {
- size = ALIGN(sizeof(u64) * (1 << mr->order), 0x40);
- dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
- kfree(mr->pas);
+ else
kfree(mr);
- }
}
}
@@ -490,12 +539,18 @@ static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
debugfs_remove_recursive(dev->cache.root);
}
+static void delay_time_func(unsigned long ctx)
+{
+ struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx;
+
+ dev->fill_delay = 0;
+}
+
int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
{
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent;
int limit;
- int size;
int err;
int i;
@@ -505,6 +560,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
return -ENOMEM;
}
+ setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);
for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
INIT_LIST_HEAD(&cache->ent[i].head);
spin_lock_init(&cache->ent[i].lock);
@@ -515,13 +571,11 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
ent->order = i + 2;
ent->dev = dev;
- if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE) {
- size = dev->mdev.profile->mr_cache[i].size;
+ if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE)
limit = dev->mdev.profile->mr_cache[i].limit;
- } else {
- size = DEF_CACHE_SIZE;
+ else
limit = 0;
- }
+
INIT_WORK(&ent->work, cache_work_func);
INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
ent->limit = limit;
@@ -540,13 +594,16 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
int i;
dev->cache.stopped = 1;
- destroy_workqueue(dev->cache.wq);
+ flush_workqueue(dev->cache.wq);
mlx5_mr_cache_debugfs_cleanup(dev);
for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
clean_keys(dev, i);
+ destroy_workqueue(dev->cache.wq);
+ del_timer_sync(&dev->delay_timer);
+
return 0;
}
@@ -575,7 +632,8 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
seg->start_addr = 0;
- err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in));
+ err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL,
+ NULL);
if (err)
goto err_in;
@@ -650,7 +708,7 @@ static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)
{
- struct mlx5_ib_mr *mr;
+ struct mlx5_ib_umr_context *context;
struct ib_wc wc;
int err;
@@ -663,9 +721,9 @@ void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)
if (err == 0)
break;
- mr = (struct mlx5_ib_mr *)(unsigned long)wc.wr_id;
- mr->status = wc.status;
- complete(&mr->done);
+ context = (struct mlx5_ib_umr_context *) (unsigned long) wc.wr_id;
+ context->status = wc.status;
+ complete(&context->done);
}
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
}
@@ -675,21 +733,24 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
int page_shift, int order, int access_flags)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct device *ddev = dev->ib_dev.dma_device;
struct umr_common *umrc = &dev->umrc;
+ struct mlx5_ib_umr_context umr_context;
struct ib_send_wr wr, *bad;
struct mlx5_ib_mr *mr;
struct ib_sge sg;
- int err;
+ int size = sizeof(u64) * npages;
+ int err = 0;
int i;
- for (i = 0; i < 10; i++) {
+ for (i = 0; i < 1; i++) {
mr = alloc_cached_mr(dev, order);
if (mr)
break;
err = add_keys(dev, order2idx(dev, order), 1);
- if (err) {
- mlx5_ib_warn(dev, "add_keys failed\n");
+ if (err && err != -EAGAIN) {
+ mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
break;
}
}
@@ -697,38 +758,58 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
if (!mr)
return ERR_PTR(-EAGAIN);
- mlx5_ib_populate_pas(dev, umem, page_shift, mr_align(mr->pas, 0x40), 1);
+ mr->pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
+ if (!mr->pas) {
+ err = -ENOMEM;
+ goto free_mr;
+ }
+
+ mlx5_ib_populate_pas(dev, umem, page_shift,
+ mr_align(mr->pas, MLX5_UMR_ALIGN), 1);
+
+ mr->dma = dma_map_single(ddev, mr_align(mr->pas, MLX5_UMR_ALIGN), size,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(ddev, mr->dma)) {
+ err = -ENOMEM;
+ goto free_pas;
+ }
memset(&wr, 0, sizeof(wr));
- wr.wr_id = (u64)(unsigned long)mr;
+ wr.wr_id = (u64)(unsigned long)&umr_context;
prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags);
- /* We serialize polls so one process does not kidnap another's
- * completion. This is not a problem since wr is completed in
- * around 1 usec
- */
+ mlx5_ib_init_umr_context(&umr_context);
down(&umrc->sem);
- init_completion(&mr->done);
err = ib_post_send(umrc->qp, &wr, &bad);
if (err) {
mlx5_ib_warn(dev, "post send failed, err %d\n", err);
- up(&umrc->sem);
- goto error;
+ goto unmap_dma;
+ } else {
+ wait_for_completion(&umr_context.done);
+ if (umr_context.status != IB_WC_SUCCESS) {
+ mlx5_ib_warn(dev, "reg umr failed\n");
+ err = -EFAULT;
+ }
}
- wait_for_completion(&mr->done);
+
+ mr->mmr.iova = virt_addr;
+ mr->mmr.size = len;
+ mr->mmr.pd = to_mpd(pd)->pdn;
+
+unmap_dma:
up(&umrc->sem);
+ dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
- if (mr->status != IB_WC_SUCCESS) {
- mlx5_ib_warn(dev, "reg umr failed\n");
- err = -EFAULT;
- goto error;
+free_pas:
+ kfree(mr->pas);
+
+free_mr:
+ if (err) {
+ free_cached_mr(dev, mr);
+ return ERR_PTR(err);
}
return mr;
-
-error:
- free_cached_mr(dev, mr);
- return ERR_PTR(err);
}
static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
@@ -763,8 +844,10 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
in->seg.log2_page_size = page_shift;
in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
- in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
- err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen);
+ in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length,
+ 1 << page_shift));
+ err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen, NULL,
+ NULL, NULL);
if (err) {
mlx5_ib_warn(dev, "create mkey failed\n");
goto err_2;
@@ -855,24 +938,26 @@ error:
static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
struct umr_common *umrc = &dev->umrc;
+ struct mlx5_ib_umr_context umr_context;
struct ib_send_wr wr, *bad;
int err;
memset(&wr, 0, sizeof(wr));
- wr.wr_id = (u64)(unsigned long)mr;
+ wr.wr_id = (u64)(unsigned long)&umr_context;
prep_umr_unreg_wqe(dev, &wr, mr->mmr.key);
+ mlx5_ib_init_umr_context(&umr_context);
down(&umrc->sem);
- init_completion(&mr->done);
err = ib_post_send(umrc->qp, &wr, &bad);
if (err) {
up(&umrc->sem);
mlx5_ib_dbg(dev, "err %d\n", err);
goto error;
+ } else {
+ wait_for_completion(&umr_context.done);
+ up(&umrc->sem);
}
- wait_for_completion(&mr->done);
- up(&umrc->sem);
- if (mr->status != IB_WC_SUCCESS) {
+ if (umr_context.status != IB_WC_SUCCESS) {
mlx5_ib_warn(dev, "unreg umr failed\n");
err = -EFAULT;
goto error;
@@ -921,6 +1006,122 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
return 0;
}
+struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
+ struct ib_mr_init_attr *mr_init_attr)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct mlx5_create_mkey_mbox_in *in;
+ struct mlx5_ib_mr *mr;
+ int access_mode, err;
+ int ndescs = roundup(mr_init_attr->max_reg_descriptors, 4);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ in = kzalloc(sizeof(*in), GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+
+ in->seg.status = 1 << 6; /* free */
+ in->seg.xlt_oct_size = cpu_to_be32(ndescs);
+ in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+ in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
+ access_mode = MLX5_ACCESS_MODE_MTT;
+
+ if (mr_init_attr->flags & IB_MR_SIGNATURE_EN) {
+ u32 psv_index[2];
+
+ in->seg.flags_pd = cpu_to_be32(be32_to_cpu(in->seg.flags_pd) |
+ MLX5_MKEY_BSF_EN);
+ in->seg.bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
+ mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
+ if (!mr->sig) {
+ err = -ENOMEM;
+ goto err_free_in;
+ }
+
+ /* create mem & wire PSVs */
+ err = mlx5_core_create_psv(&dev->mdev, to_mpd(pd)->pdn,
+ 2, psv_index);
+ if (err)
+ goto err_free_sig;
+
+ access_mode = MLX5_ACCESS_MODE_KLM;
+ mr->sig->psv_memory.psv_idx = psv_index[0];
+ mr->sig->psv_wire.psv_idx = psv_index[1];
+
+ mr->sig->sig_status_checked = true;
+ mr->sig->sig_err_exists = false;
+ /* Next UMR, Arm SIGERR */
+ ++mr->sig->sigerr_count;
+ }
+
+ in->seg.flags = MLX5_PERM_UMR_EN | access_mode;
+ err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in),
+ NULL, NULL, NULL);
+ if (err)
+ goto err_destroy_psv;
+
+ mr->ibmr.lkey = mr->mmr.key;
+ mr->ibmr.rkey = mr->mmr.key;
+ mr->umem = NULL;
+ kfree(in);
+
+ return &mr->ibmr;
+
+err_destroy_psv:
+ if (mr->sig) {
+ if (mlx5_core_destroy_psv(&dev->mdev,
+ mr->sig->psv_memory.psv_idx))
+ mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
+ mr->sig->psv_memory.psv_idx);
+ if (mlx5_core_destroy_psv(&dev->mdev,
+ mr->sig->psv_wire.psv_idx))
+ mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
+ mr->sig->psv_wire.psv_idx);
+ }
+err_free_sig:
+ kfree(mr->sig);
+err_free_in:
+ kfree(in);
+err_free:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
+int mlx5_ib_destroy_mr(struct ib_mr *ibmr)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ int err;
+
+ if (mr->sig) {
+ if (mlx5_core_destroy_psv(&dev->mdev,
+ mr->sig->psv_memory.psv_idx))
+ mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
+ mr->sig->psv_memory.psv_idx);
+ if (mlx5_core_destroy_psv(&dev->mdev,
+ mr->sig->psv_wire.psv_idx))
+ mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
+ mr->sig->psv_wire.psv_idx);
+ kfree(mr->sig);
+ }
+
+ err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
+ if (err) {
+ mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
+ mr->mmr.key, err);
+ return err;
+ }
+
+ kfree(mr);
+
+ return err;
+}
+
struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
int max_page_list_len)
{
@@ -948,7 +1149,8 @@ struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
* TBD not needed - issue 197292 */
in->seg.log2_page_size = PAGE_SHIFT;
- err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in));
+ err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in), NULL,
+ NULL, NULL);
kfree(in);
if (err)
goto err_free;
@@ -1005,3 +1207,44 @@ void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
kfree(mfrpl->ibfrpl.page_list);
kfree(mfrpl);
}
+
+int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
+ struct ib_mr_status *mr_status)
+{
+ struct mlx5_ib_mr *mmr = to_mmr(ibmr);
+ int ret = 0;
+
+ if (check_mask & ~IB_MR_CHECK_SIG_STATUS) {
+ pr_err("Invalid status check mask\n");
+ ret = -EINVAL;
+ goto done;
+ }
+
+ mr_status->fail_status = 0;
+ if (check_mask & IB_MR_CHECK_SIG_STATUS) {
+ if (!mmr->sig) {
+ ret = -EINVAL;
+ pr_err("signature status check requested on a non-signature enabled MR\n");
+ goto done;
+ }
+
+ mmr->sig->sig_status_checked = true;
+ if (!mmr->sig->sig_err_exists)
+ goto done;
+
+ if (ibmr->lkey == mmr->sig->err_item.key)
+ memcpy(&mr_status->sig_err, &mmr->sig->err_item,
+ sizeof(mr_status->sig_err));
+ else {
+ mr_status->sig_err.err_type = IB_SIG_BAD_GUARD;
+ mr_status->sig_err.sig_err_offset = 0;
+ mr_status->sig_err.key = mmr->sig->err_item.key;
+ }
+
+ mmr->sig->sig_err_exists = false;
+ mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS;
+ }
+
+done:
+ return ret;
+}
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 045f8cdbd30..bbbcf389272 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -203,7 +203,7 @@ static int sq_overhead(enum ib_qp_type qp_type)
switch (qp_type) {
case IB_QPT_XRC_INI:
- size = sizeof(struct mlx5_wqe_xrc_seg);
+ size += sizeof(struct mlx5_wqe_xrc_seg);
/* fall through */
case IB_QPT_RC:
size += sizeof(struct mlx5_wqe_ctrl_seg) +
@@ -211,20 +211,25 @@ static int sq_overhead(enum ib_qp_type qp_type)
sizeof(struct mlx5_wqe_raddr_seg);
break;
+ case IB_QPT_XRC_TGT:
+ return 0;
+
case IB_QPT_UC:
- size = sizeof(struct mlx5_wqe_ctrl_seg) +
- sizeof(struct mlx5_wqe_raddr_seg);
+ size += sizeof(struct mlx5_wqe_ctrl_seg) +
+ sizeof(struct mlx5_wqe_raddr_seg) +
+ sizeof(struct mlx5_wqe_umr_ctrl_seg) +
+ sizeof(struct mlx5_mkey_seg);
break;
case IB_QPT_UD:
case IB_QPT_SMI:
case IB_QPT_GSI:
- size = sizeof(struct mlx5_wqe_ctrl_seg) +
+ size += sizeof(struct mlx5_wqe_ctrl_seg) +
sizeof(struct mlx5_wqe_datagram_seg);
break;
case MLX5_IB_QPT_REG_UMR:
- size = sizeof(struct mlx5_wqe_ctrl_seg) +
+ size += sizeof(struct mlx5_wqe_ctrl_seg) +
sizeof(struct mlx5_wqe_umr_ctrl_seg) +
sizeof(struct mlx5_mkey_seg);
break;
@@ -251,8 +256,11 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr)
}
size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg);
-
- return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB);
+ if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN &&
+ ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB) < MLX5_SIG_WQE_SIZE)
+ return MLX5_SIG_WQE_SIZE;
+ else
+ return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB);
}
static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
@@ -270,7 +278,8 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
return wqe_size;
if (wqe_size > dev->mdev.caps.max_sq_desc_sz) {
- mlx5_ib_dbg(dev, "\n");
+ mlx5_ib_dbg(dev, "wqe_size(%d) > max_sq_desc_sz(%d)\n",
+ wqe_size, dev->mdev.caps.max_sq_desc_sz);
return -EINVAL;
}
@@ -278,11 +287,20 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
sizeof(struct mlx5_wqe_inline_seg);
attr->cap.max_inline_data = qp->max_inline_data;
+ if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN)
+ qp->signature_en = true;
+
wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size);
qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB;
+ if (qp->sq.wqe_cnt > dev->mdev.caps.max_wqes) {
+ mlx5_ib_dbg(dev, "wqe count(%d) exceeds limits(%d)\n",
+ qp->sq.wqe_cnt, dev->mdev.caps.max_wqes);
+ return -ENOMEM;
+ }
qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
qp->sq.max_gs = attr->cap.max_send_sge;
- qp->sq.max_post = 1 << ilog2(wq_size / wqe_size);
+ qp->sq.max_post = wq_size / wqe_size;
+ attr->cap.max_send_wr = qp->sq.max_post;
return wq_size;
}
@@ -330,14 +348,57 @@ static int qp_has_rq(struct ib_qp_init_attr *attr)
return 1;
}
+static int first_med_uuar(void)
+{
+ return 1;
+}
+
+static int next_uuar(int n)
+{
+ n++;
+
+ while (((n % 4) & 2))
+ n++;
+
+ return n;
+}
+
+static int num_med_uuar(struct mlx5_uuar_info *uuari)
+{
+ int n;
+
+ n = uuari->num_uars * MLX5_NON_FP_BF_REGS_PER_PAGE -
+ uuari->num_low_latency_uuars - 1;
+
+ return n >= 0 ? n : 0;
+}
+
+static int max_uuari(struct mlx5_uuar_info *uuari)
+{
+ return uuari->num_uars * 4;
+}
+
+static int first_hi_uuar(struct mlx5_uuar_info *uuari)
+{
+ int med;
+ int i;
+ int t;
+
+ med = num_med_uuar(uuari);
+ for (t = 0, i = first_med_uuar();; i = next_uuar(i)) {
+ t++;
+ if (t == med)
+ return next_uuar(i);
+ }
+
+ return 0;
+}
+
static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari)
{
- int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE;
- int start_uuar;
int i;
- start_uuar = nuuars - uuari->num_low_latency_uuars;
- for (i = start_uuar; i < nuuars; i++) {
+ for (i = first_hi_uuar(uuari); i < max_uuari(uuari); i = next_uuar(i)) {
if (!test_bit(i, uuari->bitmap)) {
set_bit(i, uuari->bitmap);
uuari->count[i]++;
@@ -350,19 +411,10 @@ static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari)
static int alloc_med_class_uuar(struct mlx5_uuar_info *uuari)
{
- int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE;
- int minidx = 1;
- int uuarn;
- int end;
+ int minidx = first_med_uuar();
int i;
- end = nuuars - uuari->num_low_latency_uuars;
-
- for (i = 1; i < end; i++) {
- uuarn = i & 3;
- if (uuarn == 2 || uuarn == 3)
- continue;
-
+ for (i = first_med_uuar(); i < first_hi_uuar(uuari); i = next_uuar(i)) {
if (uuari->count[i] < uuari->count[minidx])
minidx = i;
}
@@ -384,11 +436,17 @@ static int alloc_uuar(struct mlx5_uuar_info *uuari,
break;
case MLX5_IB_LATENCY_CLASS_MEDIUM:
- uuarn = alloc_med_class_uuar(uuari);
+ if (uuari->ver < 2)
+ uuarn = -ENOMEM;
+ else
+ uuarn = alloc_med_class_uuar(uuari);
break;
case MLX5_IB_LATENCY_CLASS_HIGH:
- uuarn = alloc_high_class_uuar(uuari);
+ if (uuari->ver < 2)
+ uuarn = -ENOMEM;
+ else
+ uuarn = alloc_high_class_uuar(uuari);
break;
case MLX5_IB_LATENCY_CLASS_FAST_PATH:
@@ -479,12 +537,12 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
{
struct mlx5_ib_ucontext *context;
struct mlx5_ib_create_qp ucmd;
- int page_shift;
+ int page_shift = 0;
int uar_index;
int npages;
- u32 offset;
+ u32 offset = 0;
int uuarn;
- int ncont;
+ int ncont = 0;
int err;
err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
@@ -500,38 +558,53 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
if (uuarn < 0) {
mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
- mlx5_ib_dbg(dev, "reverting to high latency\n");
- uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
+ mlx5_ib_dbg(dev, "reverting to medium latency\n");
+ uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM);
if (uuarn < 0) {
- mlx5_ib_dbg(dev, "uuar allocation failed\n");
- return uuarn;
+ mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n");
+ mlx5_ib_dbg(dev, "reverting to high latency\n");
+ uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
+ if (uuarn < 0) {
+ mlx5_ib_warn(dev, "uuar allocation failed\n");
+ return uuarn;
+ }
}
}
uar_index = uuarn_to_uar_index(&context->uuari, uuarn);
mlx5_ib_dbg(dev, "uuarn 0x%x, uar_index 0x%x\n", uuarn, uar_index);
+ qp->rq.offset = 0;
+ qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
+ qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
+
err = set_user_buf_size(dev, qp, &ucmd);
if (err)
goto err_uuar;
- qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
- qp->buf_size, 0, 0);
- if (IS_ERR(qp->umem)) {
- mlx5_ib_dbg(dev, "umem_get failed\n");
- err = PTR_ERR(qp->umem);
- goto err_uuar;
+ if (ucmd.buf_addr && qp->buf_size) {
+ qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
+ qp->buf_size, 0, 0);
+ if (IS_ERR(qp->umem)) {
+ mlx5_ib_dbg(dev, "umem_get failed\n");
+ err = PTR_ERR(qp->umem);
+ goto err_uuar;
+ }
+ } else {
+ qp->umem = NULL;
}
- mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift,
- &ncont, NULL);
- err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset);
- if (err) {
- mlx5_ib_warn(dev, "bad offset\n");
- goto err_umem;
+ if (qp->umem) {
+ mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift,
+ &ncont, NULL);
+ err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset);
+ if (err) {
+ mlx5_ib_warn(dev, "bad offset\n");
+ goto err_umem;
+ }
+ mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n",
+ ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset);
}
- mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n",
- ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset);
*inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
*in = mlx5_vzalloc(*inlen);
@@ -539,9 +612,10 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
err = -ENOMEM;
goto err_umem;
}
- mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
+ if (qp->umem)
+ mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
(*in)->ctx.log_pg_sz_remote_qpn =
- cpu_to_be32((page_shift - PAGE_SHIFT) << 24);
+ cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
(*in)->ctx.params2 = cpu_to_be32(offset << 6);
(*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
@@ -570,7 +644,8 @@ err_free:
mlx5_vfree(*in);
err_umem:
- ib_umem_release(qp->umem);
+ if (qp->umem)
+ ib_umem_release(qp->umem);
err_uuar:
free_uuar(&context->uuari, uuarn);
@@ -583,7 +658,8 @@ static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp)
context = to_mucontext(pd->uobject->context);
mlx5_ib_db_unmap_user(context, &qp->db);
- ib_umem_release(qp->umem);
+ if (qp->umem)
+ ib_umem_release(qp->umem);
free_uuar(&context->uuari, qp->uuarn);
}
@@ -599,8 +675,8 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
int err;
uuari = &dev->mdev.priv.uuari;
- if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
- qp->flags |= MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK;
+ if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN | IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK))
+ return -EINVAL;
if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR)
lc = MLX5_IB_LATENCY_CLASS_FAST_PATH;
@@ -638,7 +714,8 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
goto err_buf;
}
(*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
- (*in)->ctx.log_pg_sz_remote_qpn = cpu_to_be32((qp->buf.page_shift - PAGE_SHIFT) << 24);
+ (*in)->ctx.log_pg_sz_remote_qpn =
+ cpu_to_be32((qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
/* Set "fast registration enabled" for all kernel QPs */
(*in)->ctx.params1 |= cpu_to_be32(1 << 11);
(*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4);
@@ -734,6 +811,15 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock);
+ if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
+ if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_BLOCK_MCAST)) {
+ mlx5_ib_dbg(dev, "block multicast loopback isn't supported\n");
+ return -EINVAL;
+ } else {
+ qp->flags |= MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK;
+ }
+ }
+
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
@@ -805,6 +891,9 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
if (qp->wq_sig)
in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_ENABLE_SIG);
+ if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
+ in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_BLOCK_MCAST);
+
if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
int rcqe_sz;
int scqe_sz;
@@ -1280,6 +1369,11 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q
MLX5_QP_OPTPAR_Q_KEY,
[MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_PKEY_INDEX |
MLX5_QP_OPTPAR_Q_KEY,
+ [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
+ MLX5_QP_OPTPAR_RRE |
+ MLX5_QP_OPTPAR_RAE |
+ MLX5_QP_OPTPAR_RWE |
+ MLX5_QP_OPTPAR_PKEY_INDEX,
},
},
[MLX5_QP_STATE_RTR] = {
@@ -1302,9 +1396,11 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q
MLX5_QP_OPTPAR_RAE |
MLX5_QP_OPTPAR_RWE |
MLX5_QP_OPTPAR_RNR_TIMEOUT |
- MLX5_QP_OPTPAR_PM_STATE,
+ MLX5_QP_OPTPAR_PM_STATE |
+ MLX5_QP_OPTPAR_ALT_ADDR_PATH,
[MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE |
- MLX5_QP_OPTPAR_PM_STATE,
+ MLX5_QP_OPTPAR_PM_STATE |
+ MLX5_QP_OPTPAR_ALT_ADDR_PATH,
[MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY |
MLX5_QP_OPTPAR_SRQN |
MLX5_QP_OPTPAR_CQN_RCV,
@@ -1314,6 +1410,11 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q
[MLX5_QP_STATE_RTS] = {
[MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY,
[MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_Q_KEY,
+ [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE,
+ [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RNR_TIMEOUT |
+ MLX5_QP_OPTPAR_RWE |
+ MLX5_QP_OPTPAR_RAE |
+ MLX5_QP_OPTPAR_RRE,
},
},
};
@@ -1530,7 +1631,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
mlx5_cur = to_mlx5_state(cur_state);
mlx5_new = to_mlx5_state(new_state);
mlx5_st = to_mlx5_st(ibqp->qp_type);
- if (mlx5_cur < 0 || mlx5_new < 0 || mlx5_st < 0)
+ if (mlx5_st < 0)
goto out;
optpar = ib_mask_to_mlx5_opt(attr_mask);
@@ -1593,7 +1694,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR &&
- !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))
+ !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
+ IB_LINK_LAYER_UNSPECIFIED))
goto out;
if ((attr_mask & IB_QP_PORT) &&
@@ -1651,29 +1753,6 @@ static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
rseg->reserved = 0;
}
-static void set_atomic_seg(struct mlx5_wqe_atomic_seg *aseg, struct ib_send_wr *wr)
-{
- if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
- aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
- aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
- } else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) {
- aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
- aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add_mask);
- } else {
- aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
- aseg->compare = 0;
- }
-}
-
-static void set_masked_atomic_seg(struct mlx5_wqe_masked_atomic_seg *aseg,
- struct ib_send_wr *wr)
-{
- aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
- aseg->swap_add_mask = cpu_to_be64(wr->wr.atomic.swap_mask);
- aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
- aseg->compare_mask = cpu_to_be64(wr->wr.atomic.compare_add_mask);
-}
-
static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
struct ib_send_wr *wr)
{
@@ -1714,6 +1793,27 @@ static __be64 frwr_mkey_mask(void)
return cpu_to_be64(result);
}
+static __be64 sig_mkey_mask(void)
+{
+ u64 result;
+
+ result = MLX5_MKEY_MASK_LEN |
+ MLX5_MKEY_MASK_PAGE_SIZE |
+ MLX5_MKEY_MASK_START_ADDR |
+ MLX5_MKEY_MASK_EN_SIGERR |
+ MLX5_MKEY_MASK_EN_RINVAL |
+ MLX5_MKEY_MASK_KEY |
+ MLX5_MKEY_MASK_LR |
+ MLX5_MKEY_MASK_LW |
+ MLX5_MKEY_MASK_RR |
+ MLX5_MKEY_MASK_RW |
+ MLX5_MKEY_MASK_SMALL_FENCE |
+ MLX5_MKEY_MASK_FREE |
+ MLX5_MKEY_MASK_BSF_EN;
+
+ return cpu_to_be64(result);
+}
+
static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
struct ib_send_wr *wr, int li)
{
@@ -1747,6 +1847,7 @@ static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
MLX5_MKEY_MASK_PD |
MLX5_MKEY_MASK_LR |
MLX5_MKEY_MASK_LW |
+ MLX5_MKEY_MASK_KEY |
MLX5_MKEY_MASK_RR |
MLX5_MKEY_MASK_RW |
MLX5_MKEY_MASK_A |
@@ -1768,7 +1869,7 @@ static u8 get_umr_flags(int acc)
(acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) |
(acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) |
(acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) |
- MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT;
+ MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN;
}
static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr,
@@ -1780,7 +1881,8 @@ static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr,
return;
}
- seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags);
+ seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags) |
+ MLX5_ACCESS_MODE_MTT;
*writ = seg->flags & (MLX5_PERM_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE);
seg->qpn_mkey7_0 = cpu_to_be32((wr->wr.fast_reg.rkey & 0xff) | 0xffffff00);
seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
@@ -1803,7 +1905,8 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *w
seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
seg->len = cpu_to_be64(wr->wr.fast_reg.length);
seg->log2_page_size = wr->wr.fast_reg.page_shift;
- seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+ seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 |
+ mlx5_mkey_variant(wr->wr.fast_reg.rkey));
}
static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg,
@@ -1895,6 +1998,342 @@ static int set_data_inl_seg(struct mlx5_ib_qp *qp, struct ib_send_wr *wr,
return 0;
}
+static u16 prot_field_size(enum ib_signature_type type)
+{
+ switch (type) {
+ case IB_SIG_TYPE_T10_DIF:
+ return MLX5_DIF_SIZE;
+ default:
+ return 0;
+ }
+}
+
+static u8 bs_selector(int block_size)
+{
+ switch (block_size) {
+ case 512: return 0x1;
+ case 520: return 0x2;
+ case 4096: return 0x3;
+ case 4160: return 0x4;
+ case 1073741824: return 0x5;
+ default: return 0;
+ }
+}
+
+static int format_selector(struct ib_sig_attrs *attr,
+ struct ib_sig_domain *domain,
+ int *selector)
+{
+
+#define FORMAT_DIF_NONE 0
+#define FORMAT_DIF_CRC_INC 8
+#define FORMAT_DIF_CRC_NO_INC 12
+#define FORMAT_DIF_CSUM_INC 13
+#define FORMAT_DIF_CSUM_NO_INC 14
+
+ switch (domain->sig.dif.type) {
+ case IB_T10DIF_NONE:
+ /* No DIF */
+ *selector = FORMAT_DIF_NONE;
+ break;
+ case IB_T10DIF_TYPE1: /* Fall through */
+ case IB_T10DIF_TYPE2:
+ switch (domain->sig.dif.bg_type) {
+ case IB_T10DIF_CRC:
+ *selector = FORMAT_DIF_CRC_INC;
+ break;
+ case IB_T10DIF_CSUM:
+ *selector = FORMAT_DIF_CSUM_INC;
+ break;
+ default:
+ return 1;
+ }
+ break;
+ case IB_T10DIF_TYPE3:
+ switch (domain->sig.dif.bg_type) {
+ case IB_T10DIF_CRC:
+ *selector = domain->sig.dif.type3_inc_reftag ?
+ FORMAT_DIF_CRC_INC :
+ FORMAT_DIF_CRC_NO_INC;
+ break;
+ case IB_T10DIF_CSUM:
+ *selector = domain->sig.dif.type3_inc_reftag ?
+ FORMAT_DIF_CSUM_INC :
+ FORMAT_DIF_CSUM_NO_INC;
+ break;
+ default:
+ return 1;
+ }
+ break;
+ default:
+ return 1;
+ }
+
+ return 0;
+}
+
+static int mlx5_set_bsf(struct ib_mr *sig_mr,
+ struct ib_sig_attrs *sig_attrs,
+ struct mlx5_bsf *bsf, u32 data_size)
+{
+ struct mlx5_core_sig_ctx *msig = to_mmr(sig_mr)->sig;
+ struct mlx5_bsf_basic *basic = &bsf->basic;
+ struct ib_sig_domain *mem = &sig_attrs->mem;
+ struct ib_sig_domain *wire = &sig_attrs->wire;
+ int ret, selector;
+
+ memset(bsf, 0, sizeof(*bsf));
+ switch (sig_attrs->mem.sig_type) {
+ case IB_SIG_TYPE_T10_DIF:
+ if (sig_attrs->wire.sig_type != IB_SIG_TYPE_T10_DIF)
+ return -EINVAL;
+
+ /* Input domain check byte mask */
+ basic->check_byte_mask = sig_attrs->check_mask;
+ if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval &&
+ mem->sig.dif.type == wire->sig.dif.type) {
+ /* Same block structure */
+ basic->bsf_size_sbs = 1 << 4;
+ if (mem->sig.dif.bg_type == wire->sig.dif.bg_type)
+ basic->wire.copy_byte_mask |= 0xc0;
+ if (mem->sig.dif.app_tag == wire->sig.dif.app_tag)
+ basic->wire.copy_byte_mask |= 0x30;
+ if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag)
+ basic->wire.copy_byte_mask |= 0x0f;
+ } else
+ basic->wire.bs_selector = bs_selector(wire->sig.dif.pi_interval);
+
+ basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval);
+ basic->raw_data_size = cpu_to_be32(data_size);
+
+ ret = format_selector(sig_attrs, mem, &selector);
+ if (ret)
+ return -EINVAL;
+ basic->m_bfs_psv = cpu_to_be32(selector << 24 |
+ msig->psv_memory.psv_idx);
+
+ ret = format_selector(sig_attrs, wire, &selector);
+ if (ret)
+ return -EINVAL;
+ basic->w_bfs_psv = cpu_to_be32(selector << 24 |
+ msig->psv_wire.psv_idx);
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
+ void **seg, int *size)
+{
+ struct ib_sig_attrs *sig_attrs = wr->wr.sig_handover.sig_attrs;
+ struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr;
+ struct mlx5_bsf *bsf;
+ u32 data_len = wr->sg_list->length;
+ u32 data_key = wr->sg_list->lkey;
+ u64 data_va = wr->sg_list->addr;
+ int ret;
+ int wqe_size;
+
+ if (!wr->wr.sig_handover.prot ||
+ (data_key == wr->wr.sig_handover.prot->lkey &&
+ data_va == wr->wr.sig_handover.prot->addr &&
+ data_len == wr->wr.sig_handover.prot->length)) {
+ /**
+ * Source domain doesn't contain signature information
+ * or data and protection are interleaved in memory.
+ * So need construct:
+ * ------------------
+ * | data_klm |
+ * ------------------
+ * | BSF |
+ * ------------------
+ **/
+ struct mlx5_klm *data_klm = *seg;
+
+ data_klm->bcount = cpu_to_be32(data_len);
+ data_klm->key = cpu_to_be32(data_key);
+ data_klm->va = cpu_to_be64(data_va);
+ wqe_size = ALIGN(sizeof(*data_klm), 64);
+ } else {
+ /**
+ * Source domain contains signature information
+ * So need construct a strided block format:
+ * ---------------------------
+ * | stride_block_ctrl |
+ * ---------------------------
+ * | data_klm |
+ * ---------------------------
+ * | prot_klm |
+ * ---------------------------
+ * | BSF |
+ * ---------------------------
+ **/
+ struct mlx5_stride_block_ctrl_seg *sblock_ctrl;
+ struct mlx5_stride_block_entry *data_sentry;
+ struct mlx5_stride_block_entry *prot_sentry;
+ u32 prot_key = wr->wr.sig_handover.prot->lkey;
+ u64 prot_va = wr->wr.sig_handover.prot->addr;
+ u16 block_size = sig_attrs->mem.sig.dif.pi_interval;
+ int prot_size;
+
+ sblock_ctrl = *seg;
+ data_sentry = (void *)sblock_ctrl + sizeof(*sblock_ctrl);
+ prot_sentry = (void *)data_sentry + sizeof(*data_sentry);
+
+ prot_size = prot_field_size(sig_attrs->mem.sig_type);
+ if (!prot_size) {
+ pr_err("Bad block size given: %u\n", block_size);
+ return -EINVAL;
+ }
+ sblock_ctrl->bcount_per_cycle = cpu_to_be32(block_size +
+ prot_size);
+ sblock_ctrl->op = cpu_to_be32(MLX5_STRIDE_BLOCK_OP);
+ sblock_ctrl->repeat_count = cpu_to_be32(data_len / block_size);
+ sblock_ctrl->num_entries = cpu_to_be16(2);
+
+ data_sentry->bcount = cpu_to_be16(block_size);
+ data_sentry->key = cpu_to_be32(data_key);
+ data_sentry->va = cpu_to_be64(data_va);
+ data_sentry->stride = cpu_to_be16(block_size);
+
+ prot_sentry->bcount = cpu_to_be16(prot_size);
+ prot_sentry->key = cpu_to_be32(prot_key);
+ prot_sentry->va = cpu_to_be64(prot_va);
+ prot_sentry->stride = cpu_to_be16(prot_size);
+
+ wqe_size = ALIGN(sizeof(*sblock_ctrl) + sizeof(*data_sentry) +
+ sizeof(*prot_sentry), 64);
+ }
+
+ *seg += wqe_size;
+ *size += wqe_size / 16;
+ if (unlikely((*seg == qp->sq.qend)))
+ *seg = mlx5_get_send_wqe(qp, 0);
+
+ bsf = *seg;
+ ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len);
+ if (ret)
+ return -EINVAL;
+
+ *seg += sizeof(*bsf);
+ *size += sizeof(*bsf) / 16;
+ if (unlikely((*seg == qp->sq.qend)))
+ *seg = mlx5_get_send_wqe(qp, 0);
+
+ return 0;
+}
+
+static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
+ struct ib_send_wr *wr, u32 nelements,
+ u32 length, u32 pdn)
+{
+ struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr;
+ u32 sig_key = sig_mr->rkey;
+ u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1;
+
+ memset(seg, 0, sizeof(*seg));
+
+ seg->flags = get_umr_flags(wr->wr.sig_handover.access_flags) |
+ MLX5_ACCESS_MODE_KLM;
+ seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00);
+ seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
+ MLX5_MKEY_BSF_EN | pdn);
+ seg->len = cpu_to_be64(length);
+ seg->xlt_oct_size = cpu_to_be32(be16_to_cpu(get_klm_octo(nelements)));
+ seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
+}
+
+static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
+ struct ib_send_wr *wr, u32 nelements)
+{
+ memset(umr, 0, sizeof(*umr));
+
+ umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE;
+ umr->klm_octowords = get_klm_octo(nelements);
+ umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE);
+ umr->mkey_mask = sig_mkey_mask();
+}
+
+
+static int set_sig_umr_wr(struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
+ void **seg, int *size)
+{
+ struct mlx5_ib_mr *sig_mr = to_mmr(wr->wr.sig_handover.sig_mr);
+ u32 pdn = get_pd(qp)->pdn;
+ u32 klm_oct_size;
+ int region_len, ret;
+
+ if (unlikely(wr->num_sge != 1) ||
+ unlikely(wr->wr.sig_handover.access_flags &
+ IB_ACCESS_REMOTE_ATOMIC) ||
+ unlikely(!sig_mr->sig) || unlikely(!qp->signature_en) ||
+ unlikely(!sig_mr->sig->sig_status_checked))
+ return -EINVAL;
+
+ /* length of the protected region, data + protection */
+ region_len = wr->sg_list->length;
+ if (wr->wr.sig_handover.prot &&
+ (wr->wr.sig_handover.prot->lkey != wr->sg_list->lkey ||
+ wr->wr.sig_handover.prot->addr != wr->sg_list->addr ||
+ wr->wr.sig_handover.prot->length != wr->sg_list->length))
+ region_len += wr->wr.sig_handover.prot->length;
+
+ /**
+ * KLM octoword size - if protection was provided
+ * then we use strided block format (3 octowords),
+ * else we use single KLM (1 octoword)
+ **/
+ klm_oct_size = wr->wr.sig_handover.prot ? 3 : 1;
+
+ set_sig_umr_segment(*seg, wr, klm_oct_size);
+ *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
+ *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
+ if (unlikely((*seg == qp->sq.qend)))
+ *seg = mlx5_get_send_wqe(qp, 0);
+
+ set_sig_mkey_segment(*seg, wr, klm_oct_size, region_len, pdn);
+ *seg += sizeof(struct mlx5_mkey_seg);
+ *size += sizeof(struct mlx5_mkey_seg) / 16;
+ if (unlikely((*seg == qp->sq.qend)))
+ *seg = mlx5_get_send_wqe(qp, 0);
+
+ ret = set_sig_data_segment(wr, qp, seg, size);
+ if (ret)
+ return ret;
+
+ sig_mr->sig->sig_status_checked = false;
+ return 0;
+}
+
+static int set_psv_wr(struct ib_sig_domain *domain,
+ u32 psv_idx, void **seg, int *size)
+{
+ struct mlx5_seg_set_psv *psv_seg = *seg;
+
+ memset(psv_seg, 0, sizeof(*psv_seg));
+ psv_seg->psv_num = cpu_to_be32(psv_idx);
+ switch (domain->sig_type) {
+ case IB_SIG_TYPE_T10_DIF:
+ psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 |
+ domain->sig.dif.app_tag);
+ psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag);
+
+ *seg += sizeof(*psv_seg);
+ *size += sizeof(*psv_seg) / 16;
+ break;
+
+ default:
+ pr_err("Bad signature type given.\n");
+ return 1;
+ }
+
+ return 0;
+}
+
static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size,
struct mlx5_core_dev *mdev, struct mlx5_ib_pd *pd, struct mlx5_ib_qp *qp)
{
@@ -1916,6 +2355,10 @@ static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size,
if (unlikely((*seg == qp->sq.qend)))
*seg = mlx5_get_send_wqe(qp, 0);
if (!li) {
+ if (unlikely(wr->wr.fast_reg.page_list_len >
+ wr->wr.fast_reg.page_list->max_page_list_len))
+ return -ENOMEM;
+
set_frwr_pages(*seg, wr, mdev, pd, writ);
*seg += sizeof(struct mlx5_wqe_data_seg);
*size += (sizeof(struct mlx5_wqe_data_seg) / 16);
@@ -1978,6 +2421,59 @@ static u8 get_fence(u8 fence, struct ib_send_wr *wr)
}
}
+static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
+ struct mlx5_wqe_ctrl_seg **ctrl,
+ struct ib_send_wr *wr, int *idx,
+ int *size, int nreq)
+{
+ int err = 0;
+
+ if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) {
+ err = -ENOMEM;
+ return err;
+ }
+
+ *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
+ *seg = mlx5_get_send_wqe(qp, *idx);
+ *ctrl = *seg;
+ *(uint32_t *)(*seg + 8) = 0;
+ (*ctrl)->imm = send_ieth(wr);
+ (*ctrl)->fm_ce_se = qp->sq_signal_bits |
+ (wr->send_flags & IB_SEND_SIGNALED ?
+ MLX5_WQE_CTRL_CQ_UPDATE : 0) |
+ (wr->send_flags & IB_SEND_SOLICITED ?
+ MLX5_WQE_CTRL_SOLICITED : 0);
+
+ *seg += sizeof(**ctrl);
+ *size = sizeof(**ctrl) / 16;
+
+ return err;
+}
+
+static void finish_wqe(struct mlx5_ib_qp *qp,
+ struct mlx5_wqe_ctrl_seg *ctrl,
+ u8 size, unsigned idx, u64 wr_id,
+ int nreq, u8 fence, u8 next_fence,
+ u32 mlx5_opcode)
+{
+ u8 opmod = 0;
+
+ ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) |
+ mlx5_opcode | ((u32)opmod << 24));
+ ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8));
+ ctrl->fm_ce_se |= fence;
+ qp->fm_cache = next_fence;
+ if (unlikely(qp->wq_sig))
+ ctrl->signature = wq_sig(ctrl);
+
+ qp->sq.wrid[idx] = wr_id;
+ qp->sq.w_list[idx].opcode = mlx5_opcode;
+ qp->sq.wqe_head[idx] = qp->sq.head + nreq;
+ qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
+ qp->sq.w_list[idx].next = qp->sq.cur_post;
+}
+
+
int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr)
{
@@ -1985,13 +2481,13 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
struct mlx5_core_dev *mdev = &dev->mdev;
struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ struct mlx5_ib_mr *mr;
struct mlx5_wqe_data_seg *dpseg;
struct mlx5_wqe_xrc_seg *xrc;
struct mlx5_bf *bf = qp->bf;
int uninitialized_var(size);
void *qend = qp->sq.qend;
unsigned long flags;
- u32 mlx5_opcode;
unsigned idx;
int err = 0;
int inl = 0;
@@ -2000,7 +2496,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
int nreq;
int i;
u8 next_fence = 0;
- u8 opmod = 0;
u8 fence;
spin_lock_irqsave(&qp->sq.lock, flags);
@@ -2013,36 +2508,23 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
goto out;
}
- if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) {
+ fence = qp->fm_cache;
+ num_sge = wr->num_sge;
+ if (unlikely(num_sge > qp->sq.max_gs)) {
mlx5_ib_warn(dev, "\n");
err = -ENOMEM;
*bad_wr = wr;
goto out;
}
- fence = qp->fm_cache;
- num_sge = wr->num_sge;
- if (unlikely(num_sge > qp->sq.max_gs)) {
+ err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq);
+ if (err) {
mlx5_ib_warn(dev, "\n");
err = -ENOMEM;
*bad_wr = wr;
goto out;
}
- idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
- seg = mlx5_get_send_wqe(qp, idx);
- ctrl = seg;
- *(uint32_t *)(seg + 8) = 0;
- ctrl->imm = send_ieth(wr);
- ctrl->fm_ce_se = qp->sq_signal_bits |
- (wr->send_flags & IB_SEND_SIGNALED ?
- MLX5_WQE_CTRL_CQ_UPDATE : 0) |
- (wr->send_flags & IB_SEND_SOLICITED ?
- MLX5_WQE_CTRL_SOLICITED : 0);
-
- seg += sizeof(*ctrl);
- size = sizeof(*ctrl) / 16;
-
switch (ibqp->qp_type) {
case IB_QPT_XRC_INI:
xrc = seg;
@@ -2063,28 +2545,11 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
- set_raddr_seg(seg, wr->wr.atomic.remote_addr,
- wr->wr.atomic.rkey);
- seg += sizeof(struct mlx5_wqe_raddr_seg);
-
- set_atomic_seg(seg, wr);
- seg += sizeof(struct mlx5_wqe_atomic_seg);
-
- size += (sizeof(struct mlx5_wqe_raddr_seg) +
- sizeof(struct mlx5_wqe_atomic_seg)) / 16;
- break;
-
case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
- set_raddr_seg(seg, wr->wr.atomic.remote_addr,
- wr->wr.atomic.rkey);
- seg += sizeof(struct mlx5_wqe_raddr_seg);
-
- set_masked_atomic_seg(seg, wr);
- seg += sizeof(struct mlx5_wqe_masked_atomic_seg);
-
- size += (sizeof(struct mlx5_wqe_raddr_seg) +
- sizeof(struct mlx5_wqe_masked_atomic_seg)) / 16;
- break;
+ mlx5_ib_warn(dev, "Atomic operations are not supported yet\n");
+ err = -ENOSYS;
+ *bad_wr = wr;
+ goto out;
case IB_WR_LOCAL_INV:
next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
@@ -2112,6 +2577,73 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
num_sge = 0;
break;
+ case IB_WR_REG_SIG_MR:
+ qp->sq.wr_data[idx] = IB_WR_REG_SIG_MR;
+ mr = to_mmr(wr->wr.sig_handover.sig_mr);
+
+ ctrl->imm = cpu_to_be32(mr->ibmr.rkey);
+ err = set_sig_umr_wr(wr, qp, &seg, &size);
+ if (err) {
+ mlx5_ib_warn(dev, "\n");
+ *bad_wr = wr;
+ goto out;
+ }
+
+ finish_wqe(qp, ctrl, size, idx, wr->wr_id,
+ nreq, get_fence(fence, wr),
+ next_fence, MLX5_OPCODE_UMR);
+ /*
+ * SET_PSV WQEs are not signaled and solicited
+ * on error
+ */
+ wr->send_flags &= ~IB_SEND_SIGNALED;
+ wr->send_flags |= IB_SEND_SOLICITED;
+ err = begin_wqe(qp, &seg, &ctrl, wr,
+ &idx, &size, nreq);
+ if (err) {
+ mlx5_ib_warn(dev, "\n");
+ err = -ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->mem,
+ mr->sig->psv_memory.psv_idx, &seg,
+ &size);
+ if (err) {
+ mlx5_ib_warn(dev, "\n");
+ *bad_wr = wr;
+ goto out;
+ }
+
+ finish_wqe(qp, ctrl, size, idx, wr->wr_id,
+ nreq, get_fence(fence, wr),
+ next_fence, MLX5_OPCODE_SET_PSV);
+ err = begin_wqe(qp, &seg, &ctrl, wr,
+ &idx, &size, nreq);
+ if (err) {
+ mlx5_ib_warn(dev, "\n");
+ err = -ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
+ err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->wire,
+ mr->sig->psv_wire.psv_idx, &seg,
+ &size);
+ if (err) {
+ mlx5_ib_warn(dev, "\n");
+ *bad_wr = wr;
+ goto out;
+ }
+
+ finish_wqe(qp, ctrl, size, idx, wr->wr_id,
+ nreq, get_fence(fence, wr),
+ next_fence, MLX5_OPCODE_SET_PSV);
+ num_sge = 0;
+ goto skip_psv;
+
default:
break;
}
@@ -2192,22 +2724,10 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
}
- mlx5_opcode = mlx5_ib_opcode[wr->opcode];
- ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) |
- mlx5_opcode |
- ((u32)opmod << 24));
- ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8));
- ctrl->fm_ce_se |= get_fence(fence, wr);
- qp->fm_cache = next_fence;
- if (unlikely(qp->wq_sig))
- ctrl->signature = wq_sig(ctrl);
-
- qp->sq.wrid[idx] = wr->wr_id;
- qp->sq.w_list[idx].opcode = mlx5_opcode;
- qp->sq.wqe_head[idx] = qp->sq.head + nreq;
- qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
- qp->sq.w_list[idx].next = qp->sq.cur_post;
-
+ finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
+ get_fence(fence, wr), next_fence,
+ mlx5_ib_opcode[wr->opcode]);
+skip_psv:
if (0)
dump_wqe(qp, idx, size);
}
@@ -2223,6 +2743,10 @@ out:
qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
+ /* Make sure doorbell record is visible to the HCA before
+ * we hit doorbell */
+ wmb();
+
if (bf->need_lock)
spin_lock(&bf->lock);
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 84d297afd6a..384af6dec5e 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -35,6 +35,7 @@
#include <linux/mlx5/srq.h>
#include <linux/slab.h>
#include <rdma/ib_umem.h>
+#include <rdma/ib_user_verbs.h>
#include "mlx5_ib.h"
#include "user.h"
@@ -78,16 +79,27 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_create_srq ucmd;
+ size_t ucmdlen;
int err;
int npages;
int page_shift;
int ncont;
u32 offset;
- if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
+ ucmdlen =
+ (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) <
+ sizeof(ucmd)) ? (sizeof(ucmd) -
+ sizeof(ucmd.reserved)) : sizeof(ucmd);
+
+ if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) {
mlx5_ib_dbg(dev, "failed copy udata\n");
return -EFAULT;
}
+
+ if (ucmdlen == sizeof(ucmd) &&
+ ucmd.reserved != 0)
+ return -EINVAL;
+
srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE);
srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size,
@@ -123,7 +135,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
goto err_in;
}
- (*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
+ (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
(*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
return 0;
@@ -192,7 +204,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
}
srq->wq_sig = !!srq_signature;
- (*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
+ (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
return 0;
@@ -295,7 +307,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
mlx5_vfree(in);
if (err) {
mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err);
- goto err_srq;
+ goto err_usr_kern_srq;
}
mlx5_ib_dbg(dev, "create SRQ with srqn 0x%x\n", srq->msrq.srqn);
@@ -316,6 +328,8 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
err_core:
mlx5_core_destroy_srq(&dev->mdev, &srq->msrq);
+
+err_usr_kern_srq:
if (pd->uobject)
destroy_srq_user(pd, srq);
else
@@ -388,9 +402,7 @@ int mlx5_ib_destroy_srq(struct ib_srq *srq)
mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db);
ib_umem_release(msrq->umem);
} else {
- kfree(msrq->wrid);
- mlx5_buf_free(&dev->mdev, &msrq->buf);
- mlx5_db_free(&dev->mdev, &msrq->db);
+ destroy_srq_kernel(dev, msrq);
}
kfree(srq);
diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h
index a886de3e593..d0ba264ac1e 100644
--- a/drivers/infiniband/hw/mlx5/user.h
+++ b/drivers/infiniband/hw/mlx5/user.h
@@ -62,6 +62,13 @@ struct mlx5_ib_alloc_ucontext_req {
__u32 num_low_latency_uuars;
};
+struct mlx5_ib_alloc_ucontext_req_v2 {
+ __u32 total_num_uuars;
+ __u32 num_low_latency_uuars;
+ __u32 flags;
+ __u32 reserved;
+};
+
struct mlx5_ib_alloc_ucontext_resp {
__u32 qp_tab_size;
__u32 bf_reg_size;
@@ -84,6 +91,7 @@ struct mlx5_ib_create_cq {
__u64 buf_addr;
__u64 db_addr;
__u32 cqe_size;
+ __u32 reserved; /* explicit padding (optional on i386) */
};
struct mlx5_ib_create_cq_resp {
@@ -93,12 +101,16 @@ struct mlx5_ib_create_cq_resp {
struct mlx5_ib_resize_cq {
__u64 buf_addr;
+ __u16 cqe_size;
+ __u16 reserved0;
+ __u32 reserved1;
};
struct mlx5_ib_create_srq {
__u64 buf_addr;
__u64 db_addr;
__u32 flags;
+ __u32 reserved; /* explicit padding (optional on i386) */
};
struct mlx5_ib_create_srq_resp {
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
index 7c9d35f39d7..69020173899 100644
--- a/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -357,7 +357,7 @@ static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq)
mthca_warn(dev, "Unhandled event %02x(%02x) on EQ %d\n",
eqe->type, eqe->subtype, eq->eqn);
break;
- };
+ }
set_eqe_hw(eqe);
++eq->cons_index;
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 87897b95666..ded76c101dd 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -858,13 +858,9 @@ static int mthca_enable_msi_x(struct mthca_dev *mdev)
entries[1].entry = 1;
entries[2].entry = 2;
- err = pci_enable_msix(mdev->pdev, entries, ARRAY_SIZE(entries));
- if (err) {
- if (err > 0)
- mthca_info(mdev, "Only %d MSI-X vectors available, "
- "not using MSI-X\n", err);
+ err = pci_enable_msix_exact(mdev->pdev, entries, ARRAY_SIZE(entries));
+ if (err)
return err;
- }
mdev->eq_table.eq[MTHCA_EQ_COMP ].msi_x_vector = entries[0].vector;
mdev->eq_table.eq[MTHCA_EQ_ASYNC].msi_x_vector = entries[1].vector;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 5b71d43bd89..415f8e1a54d 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -695,6 +695,7 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries,
if (context && ib_copy_to_udata(udata, &cq->cqn, sizeof (__u32))) {
mthca_free_cq(to_mdev(ibdev), cq);
+ err = -EFAULT;
goto err_free;
}
@@ -976,12 +977,12 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt, int acc, struct ib_udata *udata)
{
struct mthca_dev *dev = to_mdev(pd->device);
- struct ib_umem_chunk *chunk;
+ struct scatterlist *sg;
struct mthca_mr *mr;
struct mthca_reg_mr ucmd;
u64 *pages;
int shift, n, len;
- int i, j, k;
+ int i, k, entry;
int err = 0;
int write_mtt_size;
@@ -1009,10 +1010,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
shift = ffs(mr->umem->page_size) - 1;
-
- n = 0;
- list_for_each_entry(chunk, &mr->umem->chunk_list, list)
- n += chunk->nents;
+ n = mr->umem->nmap;
mr->mtt = mthca_alloc_mtt(dev, n);
if (IS_ERR(mr->mtt)) {
@@ -1030,25 +1028,24 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages));
- list_for_each_entry(chunk, &mr->umem->chunk_list, list)
- for (j = 0; j < chunk->nmap; ++j) {
- len = sg_dma_len(&chunk->page_list[j]) >> shift;
- for (k = 0; k < len; ++k) {
- pages[i++] = sg_dma_address(&chunk->page_list[j]) +
- mr->umem->page_size * k;
- /*
- * Be friendly to write_mtt and pass it chunks
- * of appropriate size.
- */
- if (i == write_mtt_size) {
- err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
- if (err)
- goto mtt_done;
- n += i;
- i = 0;
- }
+ for_each_sg(mr->umem->sg_head.sgl, sg, mr->umem->nmap, entry) {
+ len = sg_dma_len(sg) >> shift;
+ for (k = 0; k < len; ++k) {
+ pages[i++] = sg_dma_address(sg) +
+ mr->umem->page_size * k;
+ /*
+ * Be friendly to write_mtt and pass it chunks
+ * of appropriate size.
+ */
+ if (i == write_mtt_size) {
+ err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
+ if (err)
+ goto mtt_done;
+ n += i;
+ i = 0;
}
}
+ }
if (i)
err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 26a68453610..e354b2f04ad 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -860,7 +860,8 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
- if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) {
+ if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
+ IB_LINK_LAYER_UNSPECIFIED)) {
mthca_dbg(dev, "Bad QP transition (transport %d) "
"%d->%d with attr 0x%08x\n",
qp->transport, cur_state, new_state,
diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
index 429141078ee..3b2a6dc8ea9 100644
--- a/drivers/infiniband/hw/nes/nes.c
+++ b/drivers/infiniband/hw/nes/nes.c
@@ -68,7 +68,6 @@ MODULE_VERSION(DRV_VERSION);
int max_mtu = 9000;
int interrupt_mod_interval = 0;
-
/* Interoperability */
int mpa_version = 1;
module_param(mpa_version, int, 0644);
@@ -112,6 +111,16 @@ static struct pci_device_id nes_pci_table[] = {
MODULE_DEVICE_TABLE(pci, nes_pci_table);
+/* registered nes netlink callbacks */
+static struct ibnl_client_cbs nes_nl_cb_table[] = {
+ [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb},
+ [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb},
+ [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb},
+ [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb},
+ [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb},
+ [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb}
+};
+
static int nes_inetaddr_event(struct notifier_block *, unsigned long, void *);
static int nes_net_event(struct notifier_block *, unsigned long, void *);
static int nes_notifiers_registered;
@@ -672,11 +681,25 @@ static int nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
}
nes_notifiers_registered++;
+ if (ibnl_add_client(RDMA_NL_NES, RDMA_NL_IWPM_NUM_OPS, nes_nl_cb_table))
+ printk(KERN_ERR PFX "%s[%u]: Failed to add netlink callback\n",
+ __func__, __LINE__);
+
+ ret = iwpm_init(RDMA_NL_NES);
+ if (ret) {
+ printk(KERN_ERR PFX "%s: port mapper initialization failed\n",
+ pci_name(pcidev));
+ goto bail7;
+ }
+
INIT_DELAYED_WORK(&nesdev->work, nes_recheck_link_status);
/* Initialize network devices */
- if ((netdev = nes_netdev_init(nesdev, mmio_regs)) == NULL)
+ netdev = nes_netdev_init(nesdev, mmio_regs);
+ if (netdev == NULL) {
+ ret = -ENOMEM;
goto bail7;
+ }
/* Register network device */
ret = register_netdev(netdev);
@@ -707,6 +730,7 @@ static int nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
nes_debug(NES_DBG_INIT, "netdev_count=%d, nesadapter->netdev_count=%d\n",
nesdev->netdev_count, nesdev->nesadapter->netdev_count);
+ ibnl_remove_client(RDMA_NL_NES);
nes_notifiers_registered--;
if (nes_notifiers_registered == 0) {
@@ -770,6 +794,8 @@ static void nes_remove(struct pci_dev *pcidev)
nesdev->nesadapter->netdev_count--;
}
}
+ ibnl_remove_client(RDMA_NL_NES);
+ iwpm_exit(RDMA_NL_NES);
nes_notifiers_registered--;
if (nes_notifiers_registered == 0) {
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index 33cc58941a3..bd9d132f11c 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -51,6 +51,8 @@
#include <rdma/ib_pack.h>
#include <rdma/rdma_cm.h>
#include <rdma/iw_cm.h>
+#include <rdma/rdma_netlink.h>
+#include <rdma/iw_portmap.h>
#define NES_SEND_FIRST_WRITE
@@ -130,6 +132,7 @@
#define NES_DBG_IW_TX 0x00040000
#define NES_DBG_SHUTDOWN 0x00080000
#define NES_DBG_PAU 0x00100000
+#define NES_DBG_NLMSG 0x00200000
#define NES_DBG_RSVD1 0x10000000
#define NES_DBG_RSVD2 0x20000000
#define NES_DBG_RSVD3 0x40000000
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 6b29249aa85..6f09a72e78d 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2014 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -59,6 +59,7 @@
#include <net/route.h>
#include <net/ip_fib.h>
#include <net/tcp.h>
+#include <linux/fcntl.h>
#include "nes.h"
@@ -128,6 +129,7 @@ static void build_mpa_v1(struct nes_cm_node *, void *, u8);
static void build_rdma0_msg(struct nes_cm_node *, struct nes_qp **);
static void print_core(struct nes_cm_core *core);
+static void record_ird_ord(struct nes_cm_node *, u16, u16);
/* External CM API Interface */
/* instance of function pointers for client API */
@@ -165,7 +167,6 @@ int nes_rem_ref_cm_node(struct nes_cm_node *cm_node)
{
return rem_ref_cm_node(cm_node->cm_core, cm_node);
}
-
/**
* create_event
*/
@@ -317,7 +318,6 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type,
}
}
-
if (priv_data_len + mpa_hdr_len != len) {
nes_debug(NES_DBG_CM, "The received ietf buffer was not right"
" complete (%x + %x != %x)\n",
@@ -356,25 +356,57 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type,
/* send reset */
return -EINVAL;
}
+ if (ird_size == IETF_NO_IRD_ORD || ord_size == IETF_NO_IRD_ORD)
+ cm_node->mpav2_ird_ord = IETF_NO_IRD_ORD;
- if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) {
+ if (cm_node->mpav2_ird_ord != IETF_NO_IRD_ORD) {
/* responder */
- if (cm_node->ord_size > ird_size)
- cm_node->ord_size = ird_size;
- } else {
- /* initiator */
- if (cm_node->ord_size > ird_size)
- cm_node->ord_size = ird_size;
-
- if (cm_node->ird_size < ord_size) {
- /* no resources available */
- /* send terminate message */
- return -EINVAL;
+ if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) {
+ /* we are still negotiating */
+ if (ord_size > NES_MAX_IRD) {
+ cm_node->ird_size = NES_MAX_IRD;
+ } else {
+ cm_node->ird_size = ord_size;
+ if (ord_size == 0 &&
+ (rtr_ctrl_ord & IETF_RDMA0_READ)) {
+ cm_node->ird_size = 1;
+ nes_debug(NES_DBG_CM,
+ "%s: Remote peer doesn't support RDMA0_READ (ord=%u)\n",
+ __func__, ord_size);
+ }
+ }
+ if (ird_size > NES_MAX_ORD)
+ cm_node->ord_size = NES_MAX_ORD;
+ else
+ cm_node->ord_size = ird_size;
+ } else { /* initiator */
+ if (ord_size > NES_MAX_IRD) {
+ nes_debug(NES_DBG_CM,
+ "%s: Unable to support the requested (ord =%u)\n",
+ __func__, ord_size);
+ return -EINVAL;
+ }
+ cm_node->ird_size = ord_size;
+
+ if (ird_size > NES_MAX_ORD) {
+ cm_node->ord_size = NES_MAX_ORD;
+ } else {
+ if (ird_size == 0 &&
+ (rtr_ctrl_ord & IETF_RDMA0_READ)) {
+ nes_debug(NES_DBG_CM,
+ "%s: Remote peer doesn't support RDMA0_READ (ird=%u)\n",
+ __func__, ird_size);
+ return -EINVAL;
+ } else {
+ cm_node->ord_size = ird_size;
+ }
+ }
}
}
if (rtr_ctrl_ord & IETF_RDMA0_READ) {
cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
+
} else if (rtr_ctrl_ord & IETF_RDMA0_WRITE) {
cm_node->send_rdma0_op = SEND_RDMA_WRITE_ZERO;
} else { /* Not supported RDMA0 operation */
@@ -450,11 +482,11 @@ static void form_cm_frame(struct sk_buff *skb,
iph->ttl = 0x40;
iph->protocol = 0x06; /* IPPROTO_TCP */
- iph->saddr = htonl(cm_node->loc_addr);
- iph->daddr = htonl(cm_node->rem_addr);
+ iph->saddr = htonl(cm_node->mapped_loc_addr);
+ iph->daddr = htonl(cm_node->mapped_rem_addr);
- tcph->source = htons(cm_node->loc_port);
- tcph->dest = htons(cm_node->rem_port);
+ tcph->source = htons(cm_node->mapped_loc_port);
+ tcph->dest = htons(cm_node->mapped_rem_port);
tcph->seq = htonl(cm_node->tcp_cntxt.loc_seq_num);
if (flags & SET_ACK) {
@@ -493,6 +525,100 @@ static void form_cm_frame(struct sk_buff *skb,
cm_packets_created++;
}
+/*
+ * nes_create_sockaddr - Record ip addr and tcp port in a sockaddr struct
+ */
+static void nes_create_sockaddr(__be32 ip_addr, __be16 port,
+ struct sockaddr_storage *addr)
+{
+ struct sockaddr_in *nes_sockaddr = (struct sockaddr_in *)addr;
+ nes_sockaddr->sin_family = AF_INET;
+ memcpy(&nes_sockaddr->sin_addr.s_addr, &ip_addr, sizeof(__be32));
+ nes_sockaddr->sin_port = port;
+}
+
+/*
+ * nes_create_mapinfo - Create a mapinfo object in the port mapper data base
+ */
+static int nes_create_mapinfo(struct nes_cm_info *cm_info)
+{
+ struct sockaddr_storage local_sockaddr;
+ struct sockaddr_storage mapped_sockaddr;
+
+ nes_create_sockaddr(htonl(cm_info->loc_addr), htons(cm_info->loc_port),
+ &local_sockaddr);
+ nes_create_sockaddr(htonl(cm_info->mapped_loc_addr),
+ htons(cm_info->mapped_loc_port), &mapped_sockaddr);
+
+ return iwpm_create_mapinfo(&local_sockaddr,
+ &mapped_sockaddr, RDMA_NL_NES);
+}
+
+/*
+ * nes_remove_mapinfo - Remove a mapinfo object from the port mapper data base
+ * and send a remove mapping op message to
+ * the userspace port mapper
+ */
+static int nes_remove_mapinfo(u32 loc_addr, u16 loc_port,
+ u32 mapped_loc_addr, u16 mapped_loc_port)
+{
+ struct sockaddr_storage local_sockaddr;
+ struct sockaddr_storage mapped_sockaddr;
+
+ nes_create_sockaddr(htonl(loc_addr), htons(loc_port), &local_sockaddr);
+ nes_create_sockaddr(htonl(mapped_loc_addr), htons(mapped_loc_port),
+ &mapped_sockaddr);
+
+ iwpm_remove_mapinfo(&local_sockaddr, &mapped_sockaddr);
+ return iwpm_remove_mapping(&local_sockaddr, RDMA_NL_NES);
+}
+
+/*
+ * nes_form_pm_msg - Form a port mapper message with mapping info
+ */
+static void nes_form_pm_msg(struct nes_cm_info *cm_info,
+ struct iwpm_sa_data *pm_msg)
+{
+ nes_create_sockaddr(htonl(cm_info->loc_addr), htons(cm_info->loc_port),
+ &pm_msg->loc_addr);
+ nes_create_sockaddr(htonl(cm_info->rem_addr), htons(cm_info->rem_port),
+ &pm_msg->rem_addr);
+}
+
+/*
+ * nes_form_reg_msg - Form a port mapper message with dev info
+ */
+static void nes_form_reg_msg(struct nes_vnic *nesvnic,
+ struct iwpm_dev_data *pm_msg)
+{
+ memcpy(pm_msg->dev_name, nesvnic->nesibdev->ibdev.name,
+ IWPM_DEVNAME_SIZE);
+ memcpy(pm_msg->if_name, nesvnic->netdev->name, IWPM_IFNAME_SIZE);
+}
+
+/*
+ * nes_record_pm_msg - Save the received mapping info
+ */
+static void nes_record_pm_msg(struct nes_cm_info *cm_info,
+ struct iwpm_sa_data *pm_msg)
+{
+ struct sockaddr_in *mapped_loc_addr =
+ (struct sockaddr_in *)&pm_msg->mapped_loc_addr;
+ struct sockaddr_in *mapped_rem_addr =
+ (struct sockaddr_in *)&pm_msg->mapped_rem_addr;
+
+ if (mapped_loc_addr->sin_family == AF_INET) {
+ cm_info->mapped_loc_addr =
+ ntohl(mapped_loc_addr->sin_addr.s_addr);
+ cm_info->mapped_loc_port = ntohs(mapped_loc_addr->sin_port);
+ }
+ if (mapped_rem_addr->sin_family == AF_INET) {
+ cm_info->mapped_rem_addr =
+ ntohl(mapped_rem_addr->sin_addr.s_addr);
+ cm_info->mapped_rem_port = ntohs(mapped_rem_addr->sin_port);
+ }
+}
+
/**
* print_core - dump a cm core
*/
@@ -514,6 +640,19 @@ static void print_core(struct nes_cm_core *core)
nes_debug(NES_DBG_CM, "-------------- end core ---------------\n");
}
+static void record_ird_ord(struct nes_cm_node *cm_node,
+ u16 conn_ird, u16 conn_ord)
+{
+ if (conn_ird > NES_MAX_IRD)
+ conn_ird = NES_MAX_IRD;
+
+ if (conn_ord > NES_MAX_ORD)
+ conn_ord = NES_MAX_ORD;
+
+ cm_node->ird_size = conn_ird;
+ cm_node->ord_size = conn_ord;
+}
+
/**
* cm_build_mpa_frame - build a MPA V1 frame or MPA V2 frame
*/
@@ -557,11 +696,13 @@ static void build_mpa_v2(struct nes_cm_node *cm_node,
mpa_frame->priv_data_len += htons(IETF_RTR_MSG_SIZE);
/* initialize RTR msg */
- ctrl_ird = (cm_node->ird_size > IETF_NO_IRD_ORD) ?
- IETF_NO_IRD_ORD : cm_node->ird_size;
- ctrl_ord = (cm_node->ord_size > IETF_NO_IRD_ORD) ?
- IETF_NO_IRD_ORD : cm_node->ord_size;
-
+ if (cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) {
+ ctrl_ird = IETF_NO_IRD_ORD;
+ ctrl_ord = IETF_NO_IRD_ORD;
+ } else {
+ ctrl_ird = cm_node->ird_size & IETF_NO_IRD_ORD;
+ ctrl_ord = cm_node->ord_size & IETF_NO_IRD_ORD;
+ }
ctrl_ird |= IETF_PEER_TO_PEER;
ctrl_ird |= IETF_FLPDU_ZERO_LEN;
@@ -610,7 +751,7 @@ static void build_rdma0_msg(struct nes_cm_node *cm_node, struct nes_qp **nesqp_a
struct nes_qp *nesqp = *nesqp_addr;
struct nes_hw_qp_wqe *wqe = &nesqp->hwqp.sq_vbase[0];
- u64temp = (unsigned long)nesqp;
+ u64temp = (unsigned long)nesqp->nesuqp_addr;
u64temp |= NES_SW_CONTEXT_ALIGN >> 1;
set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, u64temp);
@@ -1100,8 +1241,11 @@ static struct nes_cm_node *find_node(struct nes_cm_core *cm_core,
loc_addr, loc_port,
cm_node->rem_addr, cm_node->rem_port,
rem_addr, rem_port);
- if ((cm_node->loc_addr == loc_addr) && (cm_node->loc_port == loc_port) &&
- (cm_node->rem_addr == rem_addr) && (cm_node->rem_port == rem_port)) {
+ if ((cm_node->mapped_loc_addr == loc_addr) &&
+ (cm_node->mapped_loc_port == loc_port) &&
+ (cm_node->mapped_rem_addr == rem_addr) &&
+ (cm_node->mapped_rem_port == rem_port)) {
+
add_ref_cm_node(cm_node);
spin_unlock_irqrestore(&cm_core->ht_lock, flags);
return cm_node;
@@ -1118,18 +1262,28 @@ static struct nes_cm_node *find_node(struct nes_cm_core *cm_core,
* find_listener - find a cm node listening on this addr-port pair
*/
static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core,
- nes_addr_t dst_addr, u16 dst_port, enum nes_cm_listener_state listener_state)
+ nes_addr_t dst_addr, u16 dst_port,
+ enum nes_cm_listener_state listener_state, int local)
{
unsigned long flags;
struct nes_cm_listener *listen_node;
+ nes_addr_t listen_addr;
+ u16 listen_port;
/* walk list and find cm_node associated with this session ID */
spin_lock_irqsave(&cm_core->listen_list_lock, flags);
list_for_each_entry(listen_node, &cm_core->listen_list.list, list) {
+ if (local) {
+ listen_addr = listen_node->loc_addr;
+ listen_port = listen_node->loc_port;
+ } else {
+ listen_addr = listen_node->mapped_loc_addr;
+ listen_port = listen_node->mapped_loc_port;
+ }
/* compare node pair, return node handle if a match */
- if (((listen_node->loc_addr == dst_addr) ||
- listen_node->loc_addr == 0x00000000) &&
- (listen_node->loc_port == dst_port) &&
+ if (((listen_addr == dst_addr) ||
+ listen_addr == 0x00000000) &&
+ (listen_port == dst_port) &&
(listener_state & listen_node->listener_state)) {
atomic_inc(&listen_node->ref_count);
spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
@@ -1142,7 +1296,6 @@ static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core,
return NULL;
}
-
/**
* add_hte_node - add a cm node to the hash table
*/
@@ -1263,9 +1416,20 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
- if (listener->nesvnic)
- nes_manage_apbvt(listener->nesvnic, listener->loc_port,
- PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL);
+ if (listener->nesvnic) {
+ nes_manage_apbvt(listener->nesvnic,
+ listener->mapped_loc_port,
+ PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn),
+ NES_MANAGE_APBVT_DEL);
+
+ nes_remove_mapinfo(listener->loc_addr,
+ listener->loc_port,
+ listener->mapped_loc_addr,
+ listener->mapped_loc_port);
+ nes_debug(NES_DBG_NLMSG,
+ "Delete APBVT mapped_loc_port = %04X\n",
+ listener->mapped_loc_port);
+ }
nes_debug(NES_DBG_CM, "destroying listener (%p)\n", listener);
@@ -1354,8 +1518,7 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi
neigh->ha, ntohl(rt->rt_gateway));
if (arpindex >= 0) {
- if (!memcmp(nesadapter->arp_table[arpindex].mac_addr,
- neigh->ha, ETH_ALEN)) {
+ if (ether_addr_equal(nesadapter->arp_table[arpindex].mac_addr, neigh->ha)) {
/* Mac address same as in nes_arp_table */
goto out;
}
@@ -1408,10 +1571,16 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
cm_node->loc_port = cm_info->loc_port;
cm_node->rem_port = cm_info->rem_port;
+ cm_node->mapped_loc_addr = cm_info->mapped_loc_addr;
+ cm_node->mapped_rem_addr = cm_info->mapped_rem_addr;
+ cm_node->mapped_loc_port = cm_info->mapped_loc_port;
+ cm_node->mapped_rem_port = cm_info->mapped_rem_port;
+
cm_node->mpa_frame_rev = mpa_version;
cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
- cm_node->ird_size = IETF_NO_IRD_ORD;
- cm_node->ord_size = IETF_NO_IRD_ORD;
+ cm_node->mpav2_ird_ord = 0;
+ cm_node->ird_size = 0;
+ cm_node->ord_size = 0;
nes_debug(NES_DBG_CM, "Make node addresses : loc = %pI4:%x, rem = %pI4:%x\n",
&cm_node->loc_addr, cm_node->loc_port,
@@ -1453,8 +1622,10 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
cm_node->loopbackpartner = NULL;
/* get the mac addr for the remote node */
- oldarpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE);
- arpindex = nes_addr_resolve_neigh(nesvnic, cm_info->rem_addr, oldarpindex);
+ oldarpindex = nes_arp_table(nesdev, cm_node->mapped_rem_addr,
+ NULL, NES_ARP_RESOLVE);
+ arpindex = nes_addr_resolve_neigh(nesvnic,
+ cm_node->mapped_rem_addr, oldarpindex);
if (arpindex < 0) {
kfree(cm_node);
return NULL;
@@ -1516,11 +1687,14 @@ static int rem_ref_cm_node(struct nes_cm_core *cm_core,
mini_cm_dec_refcnt_listen(cm_core, cm_node->listener, 0);
} else {
if (cm_node->apbvt_set && cm_node->nesvnic) {
- nes_manage_apbvt(cm_node->nesvnic, cm_node->loc_port,
- PCI_FUNC(
- cm_node->nesvnic->nesdev->pcidev->devfn),
+ nes_manage_apbvt(cm_node->nesvnic, cm_node->mapped_loc_port,
+ PCI_FUNC(cm_node->nesvnic->nesdev->pcidev->devfn),
NES_MANAGE_APBVT_DEL);
}
+ nes_debug(NES_DBG_NLMSG, "Delete APBVT mapped_loc_port = %04X\n",
+ cm_node->mapped_loc_port);
+ nes_remove_mapinfo(cm_node->loc_addr, cm_node->loc_port,
+ cm_node->mapped_loc_addr, cm_node->mapped_loc_port);
}
atomic_dec(&cm_core->node_cnt);
@@ -2188,17 +2362,21 @@ static void process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb,
* mini_cm_listen - create a listen node with params
*/
static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
- struct nes_vnic *nesvnic, struct nes_cm_info *cm_info)
+ struct nes_vnic *nesvnic, struct nes_cm_info *cm_info)
{
struct nes_cm_listener *listener;
+ struct iwpm_dev_data pm_reg_msg;
+ struct iwpm_sa_data pm_msg;
unsigned long flags;
+ int iwpm_err = 0;
nes_debug(NES_DBG_CM, "Search for 0x%08x : 0x%04x\n",
cm_info->loc_addr, cm_info->loc_port);
/* cannot have multiple matching listeners */
- listener = find_listener(cm_core, htonl(cm_info->loc_addr),
- htons(cm_info->loc_port), NES_CM_LISTENER_EITHER_STATE);
+ listener = find_listener(cm_core, cm_info->loc_addr, cm_info->loc_port,
+ NES_CM_LISTENER_EITHER_STATE, 1);
+
if (listener && listener->listener_state == NES_CM_LISTENER_ACTIVE_STATE) {
/* find automatically incs ref count ??? */
atomic_dec(&listener->ref_count);
@@ -2207,6 +2385,22 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
}
if (!listener) {
+ nes_form_reg_msg(nesvnic, &pm_reg_msg);
+ iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_NES);
+ if (iwpm_err) {
+ nes_debug(NES_DBG_NLMSG,
+ "Port Mapper reg pid fail (err = %d).\n", iwpm_err);
+ }
+ if (iwpm_valid_pid() && !iwpm_err) {
+ nes_form_pm_msg(cm_info, &pm_msg);
+ iwpm_err = iwpm_add_mapping(&pm_msg, RDMA_NL_NES);
+ if (iwpm_err)
+ nes_debug(NES_DBG_NLMSG,
+ "Port Mapper query fail (err = %d).\n", iwpm_err);
+ else
+ nes_record_pm_msg(cm_info, &pm_msg);
+ }
+
/* create a CM listen node (1/2 node to compare incoming traffic to) */
listener = kzalloc(sizeof(*listener), GFP_ATOMIC);
if (!listener) {
@@ -2214,8 +2408,10 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
return NULL;
}
- listener->loc_addr = htonl(cm_info->loc_addr);
- listener->loc_port = htons(cm_info->loc_port);
+ listener->loc_addr = cm_info->loc_addr;
+ listener->loc_port = cm_info->loc_port;
+ listener->mapped_loc_addr = cm_info->mapped_loc_addr;
+ listener->mapped_loc_port = cm_info->mapped_loc_port;
listener->reused_node = 0;
atomic_set(&listener->ref_count, 1);
@@ -2277,14 +2473,18 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
if (cm_info->loc_addr == cm_info->rem_addr) {
loopbackremotelistener = find_listener(cm_core,
- ntohl(nesvnic->local_ipaddr), cm_node->rem_port,
- NES_CM_LISTENER_ACTIVE_STATE);
+ cm_node->mapped_loc_addr, cm_node->mapped_rem_port,
+ NES_CM_LISTENER_ACTIVE_STATE, 0);
if (loopbackremotelistener == NULL) {
create_event(cm_node, NES_CM_EVENT_ABORTED);
} else {
loopback_cm_info = *cm_info;
loopback_cm_info.loc_port = cm_info->rem_port;
loopback_cm_info.rem_port = cm_info->loc_port;
+ loopback_cm_info.mapped_loc_port =
+ cm_info->mapped_rem_port;
+ loopback_cm_info.mapped_rem_port =
+ cm_info->mapped_loc_port;
loopback_cm_info.cm_id = loopbackremotelistener->cm_id;
loopbackremotenode = make_cm_node(cm_core, nesvnic,
&loopback_cm_info, loopbackremotelistener);
@@ -2513,6 +2713,12 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core,
nfo.rem_addr = ntohl(iph->saddr);
nfo.rem_port = ntohs(tcph->source);
+ /* If port mapper is available these should be mapped address info */
+ nfo.mapped_loc_addr = ntohl(iph->daddr);
+ nfo.mapped_loc_port = ntohs(tcph->dest);
+ nfo.mapped_rem_addr = ntohl(iph->saddr);
+ nfo.mapped_rem_port = ntohs(tcph->source);
+
tmp_daddr = cpu_to_be32(iph->daddr);
tmp_saddr = cpu_to_be32(iph->saddr);
@@ -2521,8 +2727,8 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core,
do {
cm_node = find_node(cm_core,
- nfo.rem_port, nfo.rem_addr,
- nfo.loc_port, nfo.loc_addr);
+ nfo.mapped_rem_port, nfo.mapped_rem_addr,
+ nfo.mapped_loc_port, nfo.mapped_loc_addr);
if (!cm_node) {
/* Only type of packet accepted are for */
@@ -2531,9 +2737,9 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core,
skb_handled = 0;
break;
}
- listener = find_listener(cm_core, nfo.loc_addr,
- nfo.loc_port,
- NES_CM_LISTENER_ACTIVE_STATE);
+ listener = find_listener(cm_core, nfo.mapped_loc_addr,
+ nfo.mapped_loc_port,
+ NES_CM_LISTENER_ACTIVE_STATE, 0);
if (!listener) {
nfo.cm_id = NULL;
nfo.conn_type = 0;
@@ -3028,11 +3234,11 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
rem_ref_cm_node(cm_node->cm_core, cm_node);
return -ECONNRESET;
}
-
/* associate the node with the QP */
nesqp->cm_node = (void *)cm_node;
cm_node->nesqp = nesqp;
+
nes_debug(NES_DBG_CM, "QP%u, cm_node=%p, jiffies = %lu listener = %p\n",
nesqp->hwqp.qp_id, cm_node, jiffies, cm_node->listener);
atomic_inc(&cm_accepts);
@@ -3055,6 +3261,11 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (cm_node->mpa_frame_rev == IETF_MPA_V1)
mpa_frame_offset = 4;
+ if (cm_node->mpa_frame_rev == IETF_MPA_V1 ||
+ cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) {
+ record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord);
+ }
+
memcpy(mpa_v2_frame->priv_data, conn_param->private_data,
conn_param->private_data_len);
@@ -3118,7 +3329,6 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
}
nesqp->skip_lsmm = 1;
-
/* Cache the cm_id in the qp */
nesqp->cm_id = cm_id;
cm_node->cm_id = cm_id;
@@ -3133,10 +3343,12 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
nes_cm_init_tsa_conn(nesqp, cm_node);
- nesqp->nesqp_context->tcpPorts[0] = cpu_to_le16(ntohs(laddr->sin_port));
- nesqp->nesqp_context->tcpPorts[1] = cpu_to_le16(ntohs(raddr->sin_port));
+ nesqp->nesqp_context->tcpPorts[0] =
+ cpu_to_le16(cm_node->mapped_loc_port);
+ nesqp->nesqp_context->tcpPorts[1] =
+ cpu_to_le16(cm_node->mapped_rem_port);
- nesqp->nesqp_context->ip0 = cpu_to_le32(ntohl(raddr->sin_addr.s_addr));
+ nesqp->nesqp_context->ip0 = cpu_to_le32(cm_node->mapped_rem_addr);
nesqp->nesqp_context->misc2 |= cpu_to_le32(
(u32)PCI_FUNC(nesdev->pcidev->devfn) <<
@@ -3155,14 +3367,14 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32(
((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT));
nesqp->nesqp_context->ird_ord_sizes |=
- cpu_to_le32((u32)conn_param->ord);
+ cpu_to_le32((u32)cm_node->ord_size);
memset(&nes_quad, 0, sizeof(nes_quad));
nes_quad.DstIpAdrIndex =
cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
- nes_quad.SrcIpadr = raddr->sin_addr.s_addr;
- nes_quad.TcpPorts[0] = raddr->sin_port;
- nes_quad.TcpPorts[1] = laddr->sin_port;
+ nes_quad.SrcIpadr = htonl(cm_node->mapped_rem_addr);
+ nes_quad.TcpPorts[0] = htons(cm_node->mapped_rem_port);
+ nes_quad.TcpPorts[1] = htons(cm_node->mapped_loc_port);
/* Produce hash key */
crc_value = get_crc_value(&nes_quad);
@@ -3195,6 +3407,9 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
cm_event.remote_addr = cm_id->remote_addr;
cm_event.private_data = NULL;
cm_event.private_data_len = 0;
+ cm_event.ird = cm_node->ird_size;
+ cm_event.ord = cm_node->ord_size;
+
ret = cm_id->event_handler(cm_id, &cm_event);
attr.qp_state = IB_QPS_RTS;
nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL);
@@ -3261,6 +3476,9 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
int apbvt_set = 0;
struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
+ struct iwpm_dev_data pm_reg_msg;
+ struct iwpm_sa_data pm_msg;
+ int iwpm_err = 0;
if (cm_id->remote_addr.ss_family != AF_INET)
return -ENOSYS;
@@ -3291,33 +3509,51 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
/* cache the cm_id in the qp */
nesqp->cm_id = cm_id;
-
cm_id->provider_data = nesqp;
-
nesqp->private_data_len = conn_param->private_data_len;
- nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((u32)conn_param->ord);
- /* space for rdma0 read msg */
- if (conn_param->ord == 0)
- nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32(1);
nes_debug(NES_DBG_CM, "requested ord = 0x%08X.\n", (u32)conn_param->ord);
nes_debug(NES_DBG_CM, "mpa private data len =%u\n",
conn_param->private_data_len);
+ /* set up the connection params for the node */
+ cm_info.loc_addr = ntohl(laddr->sin_addr.s_addr);
+ cm_info.loc_port = ntohs(laddr->sin_port);
+ cm_info.rem_addr = ntohl(raddr->sin_addr.s_addr);
+ cm_info.rem_port = ntohs(raddr->sin_port);
+ cm_info.cm_id = cm_id;
+ cm_info.conn_type = NES_CM_IWARP_CONN_TYPE;
+
+ /* No port mapper available, go with the specified peer information */
+ cm_info.mapped_loc_addr = cm_info.loc_addr;
+ cm_info.mapped_loc_port = cm_info.loc_port;
+ cm_info.mapped_rem_addr = cm_info.rem_addr;
+ cm_info.mapped_rem_port = cm_info.rem_port;
+
+ nes_form_reg_msg(nesvnic, &pm_reg_msg);
+ iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_NES);
+ if (iwpm_err) {
+ nes_debug(NES_DBG_NLMSG,
+ "Port Mapper reg pid fail (err = %d).\n", iwpm_err);
+ }
+ if (iwpm_valid_pid() && !iwpm_err) {
+ nes_form_pm_msg(&cm_info, &pm_msg);
+ iwpm_err = iwpm_add_and_query_mapping(&pm_msg, RDMA_NL_NES);
+ if (iwpm_err)
+ nes_debug(NES_DBG_NLMSG,
+ "Port Mapper query fail (err = %d).\n", iwpm_err);
+ else
+ nes_record_pm_msg(&cm_info, &pm_msg);
+ }
+
if (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr) {
- nes_manage_apbvt(nesvnic, ntohs(laddr->sin_port),
- PCI_FUNC(nesdev->pcidev->devfn),
- NES_MANAGE_APBVT_ADD);
+ nes_manage_apbvt(nesvnic, cm_info.mapped_loc_port,
+ PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD);
apbvt_set = 1;
}
- /* set up the connection params for the node */
- cm_info.loc_addr = htonl(laddr->sin_addr.s_addr);
- cm_info.loc_port = htons(laddr->sin_port);
- cm_info.rem_addr = htonl(raddr->sin_addr.s_addr);
- cm_info.rem_port = htons(raddr->sin_port);
- cm_info.cm_id = cm_id;
- cm_info.conn_type = NES_CM_IWARP_CONN_TYPE;
+ if (nes_create_mapinfo(&cm_info))
+ return -ENOMEM;
cm_id->add_ref(cm_id);
@@ -3327,14 +3563,23 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
&cm_info);
if (!cm_node) {
if (apbvt_set)
- nes_manage_apbvt(nesvnic, ntohs(laddr->sin_port),
+ nes_manage_apbvt(nesvnic, cm_info.mapped_loc_port,
PCI_FUNC(nesdev->pcidev->devfn),
NES_MANAGE_APBVT_DEL);
+ nes_debug(NES_DBG_NLMSG, "Delete mapped_loc_port = %04X\n",
+ cm_info.mapped_loc_port);
+ nes_remove_mapinfo(cm_info.loc_addr, cm_info.loc_port,
+ cm_info.mapped_loc_addr, cm_info.mapped_loc_port);
cm_id->rem_ref(cm_id);
return -ENOMEM;
}
+ record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord);
+ if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO &&
+ cm_node->ord_size == 0)
+ cm_node->ord_size = 1;
+
cm_node->apbvt_set = apbvt_set;
nesqp->cm_node = cm_node;
cm_node->nesqp = nesqp;
@@ -3371,13 +3616,16 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog)
nesvnic->local_ipaddr, laddr->sin_addr.s_addr);
/* setup listen params in our api call struct */
- cm_info.loc_addr = nesvnic->local_ipaddr;
- cm_info.loc_port = laddr->sin_port;
+ cm_info.loc_addr = ntohl(nesvnic->local_ipaddr);
+ cm_info.loc_port = ntohs(laddr->sin_port);
cm_info.backlog = backlog;
cm_info.cm_id = cm_id;
cm_info.conn_type = NES_CM_IWARP_CONN_TYPE;
+ /* No port mapper available, go with the specified info */
+ cm_info.mapped_loc_addr = cm_info.loc_addr;
+ cm_info.mapped_loc_port = cm_info.loc_port;
cm_node = g_cm_core->api->listen(g_cm_core, nesvnic, &cm_info);
if (!cm_node) {
@@ -3389,7 +3637,10 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog)
cm_id->provider_data = cm_node;
if (!cm_node->reused_node) {
- err = nes_manage_apbvt(nesvnic, ntohs(laddr->sin_port),
+ if (nes_create_mapinfo(&cm_info))
+ return -ENOMEM;
+
+ err = nes_manage_apbvt(nesvnic, cm_node->mapped_loc_port,
PCI_FUNC(nesvnic->nesdev->pcidev->devfn),
NES_MANAGE_APBVT_ADD);
if (err) {
@@ -3514,9 +3765,11 @@ static void cm_event_connected(struct nes_cm_event *event)
nes_cm_init_tsa_conn(nesqp, cm_node);
/* set the QP tsa context */
- nesqp->nesqp_context->tcpPorts[0] = cpu_to_le16(ntohs(laddr->sin_port));
- nesqp->nesqp_context->tcpPorts[1] = cpu_to_le16(ntohs(raddr->sin_port));
- nesqp->nesqp_context->ip0 = cpu_to_le32(ntohl(raddr->sin_addr.s_addr));
+ nesqp->nesqp_context->tcpPorts[0] =
+ cpu_to_le16(cm_node->mapped_loc_port);
+ nesqp->nesqp_context->tcpPorts[1] =
+ cpu_to_le16(cm_node->mapped_rem_port);
+ nesqp->nesqp_context->ip0 = cpu_to_le32(cm_node->mapped_rem_addr);
nesqp->nesqp_context->misc2 |= cpu_to_le32(
(u32)PCI_FUNC(nesdev->pcidev->devfn) <<
@@ -3531,6 +3784,8 @@ static void cm_event_connected(struct nes_cm_event *event)
nesqp->nesqp_context->ird_ord_sizes |=
cpu_to_le32((u32)1 <<
NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT);
+ nesqp->nesqp_context->ird_ord_sizes |=
+ cpu_to_le32((u32)cm_node->ord_size);
/* Adjust tail for not having a LSMM */
/*nesqp->hwqp.sq_tail = 1;*/
@@ -3544,9 +3799,9 @@ static void cm_event_connected(struct nes_cm_event *event)
nes_quad.DstIpAdrIndex =
cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
- nes_quad.SrcIpadr = raddr->sin_addr.s_addr;
- nes_quad.TcpPorts[0] = raddr->sin_port;
- nes_quad.TcpPorts[1] = laddr->sin_port;
+ nes_quad.SrcIpadr = htonl(cm_node->mapped_rem_addr);
+ nes_quad.TcpPorts[0] = htons(cm_node->mapped_rem_port);
+ nes_quad.TcpPorts[1] = htons(cm_node->mapped_loc_port);
/* Produce hash key */
crc_value = get_crc_value(&nes_quad);
@@ -3574,7 +3829,7 @@ static void cm_event_connected(struct nes_cm_event *event)
cm_event.ird = cm_node->ird_size;
cm_event.ord = cm_node->ord_size;
- cm_event_laddr->sin_addr.s_addr = event->cm_info.rem_addr;
+ cm_event_laddr->sin_addr.s_addr = htonl(event->cm_info.rem_addr);
ret = cm_id->event_handler(cm_id, &cm_event);
nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
@@ -3743,8 +3998,13 @@ static void cm_event_mpa_req(struct nes_cm_event *event)
cm_event_raddr->sin_addr.s_addr = htonl(event->cm_info.rem_addr);
cm_event.private_data = cm_node->mpa_frame_buf;
cm_event.private_data_len = (u8)cm_node->mpa_frame_size;
+ if (cm_node->mpa_frame_rev == IETF_MPA_V1) {
+ cm_event.ird = NES_MAX_IRD;
+ cm_event.ord = NES_MAX_ORD;
+ } else {
cm_event.ird = cm_node->ird_size;
cm_event.ord = cm_node->ord_size;
+ }
ret = cm_id->event_handler(cm_id, &cm_event);
if (ret)
diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h
index 4646e666608..f522cf63978 100644
--- a/drivers/infiniband/hw/nes/nes_cm.h
+++ b/drivers/infiniband/hw/nes/nes_cm.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved.
+ * Copyright (c) 2006 - 2014 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -58,6 +58,8 @@
#define IETF_RDMA0_WRITE 0x8000
#define IETF_RDMA0_READ 0x4000
#define IETF_NO_IRD_ORD 0x3FFF
+#define NES_MAX_IRD 0x40
+#define NES_MAX_ORD 0x7F
enum ietf_mpa_flags {
IETF_MPA_FLAGS_MARKERS = 0x80, /* receive Markers */
@@ -291,8 +293,8 @@ struct nes_cm_listener {
struct list_head list;
struct nes_cm_core *cm_core;
u8 loc_mac[ETH_ALEN];
- nes_addr_t loc_addr;
- u16 loc_port;
+ nes_addr_t loc_addr, mapped_loc_addr;
+ u16 loc_port, mapped_loc_port;
struct iw_cm_id *cm_id;
enum nes_cm_conn_type conn_type;
atomic_t ref_count;
@@ -306,7 +308,9 @@ struct nes_cm_listener {
/* per connection node and node state information */
struct nes_cm_node {
nes_addr_t loc_addr, rem_addr;
+ nes_addr_t mapped_loc_addr, mapped_rem_addr;
u16 loc_port, rem_port;
+ u16 mapped_loc_port, mapped_rem_port;
u8 loc_mac[ETH_ALEN];
u8 rem_mac[ETH_ALEN];
@@ -333,6 +337,7 @@ struct nes_cm_node {
enum mpa_frame_version mpa_frame_rev;
u16 ird_size;
u16 ord_size;
+ u16 mpav2_ird_ord;
u16 mpa_frame_size;
struct iw_cm_id *cm_id;
@@ -361,6 +366,10 @@ struct nes_cm_info {
u16 rem_port;
nes_addr_t loc_addr;
nes_addr_t rem_addr;
+ u16 mapped_loc_port;
+ u16 mapped_rem_port;
+ nes_addr_t mapped_loc_addr;
+ nes_addr_t mapped_rem_addr;
enum nes_cm_conn_type conn_type;
int backlog;
diff --git a/drivers/infiniband/hw/nes/nes_user.h b/drivers/infiniband/hw/nes/nes_user.h
index 4926de74448..529c421bb15 100644
--- a/drivers/infiniband/hw/nes/nes_user.h
+++ b/drivers/infiniband/hw/nes/nes_user.h
@@ -39,8 +39,8 @@
#include <linux/types.h>
-#define NES_ABI_USERSPACE_VER 1
-#define NES_ABI_KERNEL_VER 1
+#define NES_ABI_USERSPACE_VER 2
+#define NES_ABI_KERNEL_VER 2
/*
* Make sure that all structs defined in this file remain laid out so
@@ -78,6 +78,7 @@ struct nes_create_cq_req {
struct nes_create_qp_req {
__u64 user_wqe_buffers;
+ __u64 user_qp_buffer;
};
enum iwnes_memreg_type {
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 5b53ca5a228..218dd357428 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -1186,11 +1186,13 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
kfree(nesqp->allocated_buffer);
nes_debug(NES_DBG_QP, "ib_copy_from_udata() Failed \n");
- return NULL;
+ return ERR_PTR(-EFAULT);
}
if (req.user_wqe_buffers) {
virt_wqs = 1;
}
+ if (req.user_qp_buffer)
+ nesqp->nesuqp_addr = req.user_qp_buffer;
if ((ibpd->uobject) && (ibpd->uobject->context)) {
nesqp->user_mode = 1;
nes_ucontext = to_nesucontext(ibpd->uobject->context);
@@ -2307,7 +2309,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
struct nes_device *nesdev = nesvnic->nesdev;
struct nes_adapter *nesadapter = nesdev->nesadapter;
struct ib_mr *ibmr = ERR_PTR(-EINVAL);
- struct ib_umem_chunk *chunk;
+ struct scatterlist *sg;
struct nes_ucontext *nes_ucontext;
struct nes_pbl *nespbl;
struct nes_mr *nesmr;
@@ -2315,7 +2317,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
struct nes_mem_reg_req req;
struct nes_vpbl vpbl;
struct nes_root_vpbl root_vpbl;
- int nmap_index, page_index;
+ int entry, page_index;
int page_count = 0;
int err, pbl_depth = 0;
int chunk_pages;
@@ -2330,6 +2332,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u16 pbl_count;
u8 single_page = 1;
u8 stag_key;
+ int first_page = 1;
region = ib_umem_get(pd->uobject->context, start, length, acc, 0);
if (IS_ERR(region)) {
@@ -2380,128 +2383,125 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
nesmr->region = region;
- list_for_each_entry(chunk, &region->chunk_list, list) {
- nes_debug(NES_DBG_MR, "Chunk: nents = %u, nmap = %u .\n",
- chunk->nents, chunk->nmap);
- for (nmap_index = 0; nmap_index < chunk->nmap; ++nmap_index) {
- if (sg_dma_address(&chunk->page_list[nmap_index]) & ~PAGE_MASK) {
- ib_umem_release(region);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- nes_debug(NES_DBG_MR, "Unaligned Memory Buffer: 0x%x\n",
- (unsigned int) sg_dma_address(&chunk->page_list[nmap_index]));
- ibmr = ERR_PTR(-EINVAL);
- kfree(nesmr);
- goto reg_user_mr_err;
- }
+ for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) {
+ if (sg_dma_address(sg) & ~PAGE_MASK) {
+ ib_umem_release(region);
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+ nes_debug(NES_DBG_MR, "Unaligned Memory Buffer: 0x%x\n",
+ (unsigned int) sg_dma_address(sg));
+ ibmr = ERR_PTR(-EINVAL);
+ kfree(nesmr);
+ goto reg_user_mr_err;
+ }
- if (!sg_dma_len(&chunk->page_list[nmap_index])) {
- ib_umem_release(region);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs,
- stag_index);
- nes_debug(NES_DBG_MR, "Invalid Buffer Size\n");
- ibmr = ERR_PTR(-EINVAL);
- kfree(nesmr);
- goto reg_user_mr_err;
- }
+ if (!sg_dma_len(sg)) {
+ ib_umem_release(region);
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs,
+ stag_index);
+ nes_debug(NES_DBG_MR, "Invalid Buffer Size\n");
+ ibmr = ERR_PTR(-EINVAL);
+ kfree(nesmr);
+ goto reg_user_mr_err;
+ }
- region_length += sg_dma_len(&chunk->page_list[nmap_index]);
- chunk_pages = sg_dma_len(&chunk->page_list[nmap_index]) >> 12;
- region_length -= skip_pages << 12;
- for (page_index=skip_pages; page_index < chunk_pages; page_index++) {
- skip_pages = 0;
- if ((page_count!=0)&&(page_count<<12)-(region->offset&(4096-1))>=region->length)
- goto enough_pages;
- if ((page_count&0x01FF) == 0) {
- if (page_count >= 1024 * 512) {
+ region_length += sg_dma_len(sg);
+ chunk_pages = sg_dma_len(sg) >> 12;
+ region_length -= skip_pages << 12;
+ for (page_index = skip_pages; page_index < chunk_pages; page_index++) {
+ skip_pages = 0;
+ if ((page_count != 0) && (page_count<<12)-(region->offset&(4096-1)) >= region->length)
+ goto enough_pages;
+ if ((page_count&0x01FF) == 0) {
+ if (page_count >= 1024 * 512) {
+ ib_umem_release(region);
+ nes_free_resource(nesadapter,
+ nesadapter->allocated_mrs, stag_index);
+ kfree(nesmr);
+ ibmr = ERR_PTR(-E2BIG);
+ goto reg_user_mr_err;
+ }
+ if (root_pbl_index == 1) {
+ root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev,
+ 8192, &root_vpbl.pbl_pbase);
+ nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n",
+ root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase);
+ if (!root_vpbl.pbl_vbase) {
ib_umem_release(region);
- nes_free_resource(nesadapter,
- nesadapter->allocated_mrs, stag_index);
+ pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
+ vpbl.pbl_pbase);
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs,
+ stag_index);
kfree(nesmr);
- ibmr = ERR_PTR(-E2BIG);
+ ibmr = ERR_PTR(-ENOMEM);
goto reg_user_mr_err;
}
- if (root_pbl_index == 1) {
- root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev,
- 8192, &root_vpbl.pbl_pbase);
- nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n",
- root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase);
- if (!root_vpbl.pbl_vbase) {
- ib_umem_release(region);
- pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
- vpbl.pbl_pbase);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs,
- stag_index);
- kfree(nesmr);
- ibmr = ERR_PTR(-ENOMEM);
- goto reg_user_mr_err;
- }
- root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024,
- GFP_KERNEL);
- if (!root_vpbl.leaf_vpbl) {
- ib_umem_release(region);
- pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
- root_vpbl.pbl_pbase);
- pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
- vpbl.pbl_pbase);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs,
- stag_index);
- kfree(nesmr);
- ibmr = ERR_PTR(-ENOMEM);
- goto reg_user_mr_err;
- }
- root_vpbl.pbl_vbase[0].pa_low =
- cpu_to_le32((u32)vpbl.pbl_pbase);
- root_vpbl.pbl_vbase[0].pa_high =
- cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32)));
- root_vpbl.leaf_vpbl[0] = vpbl;
- }
- vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
- &vpbl.pbl_pbase);
- nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%08X\n",
- vpbl.pbl_vbase, (unsigned int)vpbl.pbl_pbase);
- if (!vpbl.pbl_vbase) {
+ root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024,
+ GFP_KERNEL);
+ if (!root_vpbl.leaf_vpbl) {
ib_umem_release(region);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- ibmr = ERR_PTR(-ENOMEM);
+ pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
+ root_vpbl.pbl_pbase);
+ pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
+ vpbl.pbl_pbase);
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs,
+ stag_index);
kfree(nesmr);
+ ibmr = ERR_PTR(-ENOMEM);
goto reg_user_mr_err;
}
- if (1 <= root_pbl_index) {
- root_vpbl.pbl_vbase[root_pbl_index].pa_low =
- cpu_to_le32((u32)vpbl.pbl_pbase);
- root_vpbl.pbl_vbase[root_pbl_index].pa_high =
- cpu_to_le32((u32)((((u64)vpbl.pbl_pbase)>>32)));
- root_vpbl.leaf_vpbl[root_pbl_index] = vpbl;
- }
- root_pbl_index++;
- cur_pbl_index = 0;
+ root_vpbl.pbl_vbase[0].pa_low =
+ cpu_to_le32((u32)vpbl.pbl_pbase);
+ root_vpbl.pbl_vbase[0].pa_high =
+ cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32)));
+ root_vpbl.leaf_vpbl[0] = vpbl;
}
- if (single_page) {
- if (page_count != 0) {
- if ((last_dma_addr+4096) !=
- (sg_dma_address(&chunk->page_list[nmap_index])+
- (page_index*4096)))
- single_page = 0;
- last_dma_addr = sg_dma_address(&chunk->page_list[nmap_index])+
- (page_index*4096);
- } else {
- first_dma_addr = sg_dma_address(&chunk->page_list[nmap_index])+
- (page_index*4096);
- last_dma_addr = first_dma_addr;
- }
+ vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
+ &vpbl.pbl_pbase);
+ nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%08X\n",
+ vpbl.pbl_vbase, (unsigned int)vpbl.pbl_pbase);
+ if (!vpbl.pbl_vbase) {
+ ib_umem_release(region);
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+ ibmr = ERR_PTR(-ENOMEM);
+ kfree(nesmr);
+ goto reg_user_mr_err;
+ }
+ if (1 <= root_pbl_index) {
+ root_vpbl.pbl_vbase[root_pbl_index].pa_low =
+ cpu_to_le32((u32)vpbl.pbl_pbase);
+ root_vpbl.pbl_vbase[root_pbl_index].pa_high =
+ cpu_to_le32((u32)((((u64)vpbl.pbl_pbase)>>32)));
+ root_vpbl.leaf_vpbl[root_pbl_index] = vpbl;
+ }
+ root_pbl_index++;
+ cur_pbl_index = 0;
+ }
+ if (single_page) {
+ if (page_count != 0) {
+ if ((last_dma_addr+4096) !=
+ (sg_dma_address(sg)+
+ (page_index*4096)))
+ single_page = 0;
+ last_dma_addr = sg_dma_address(sg)+
+ (page_index*4096);
+ } else {
+ first_dma_addr = sg_dma_address(sg)+
+ (page_index*4096);
+ last_dma_addr = first_dma_addr;
}
-
- vpbl.pbl_vbase[cur_pbl_index].pa_low =
- cpu_to_le32((u32)(sg_dma_address(&chunk->page_list[nmap_index])+
- (page_index*4096)));
- vpbl.pbl_vbase[cur_pbl_index].pa_high =
- cpu_to_le32((u32)((((u64)(sg_dma_address(&chunk->page_list[nmap_index])+
- (page_index*4096))) >> 32)));
- cur_pbl_index++;
- page_count++;
}
+
+ vpbl.pbl_vbase[cur_pbl_index].pa_low =
+ cpu_to_le32((u32)(sg_dma_address(sg)+
+ (page_index*4096)));
+ vpbl.pbl_vbase[cur_pbl_index].pa_high =
+ cpu_to_le32((u32)((((u64)(sg_dma_address(sg)+
+ (page_index*4096))) >> 32)));
+ cur_pbl_index++;
+ page_count++;
}
}
+
enough_pages:
nes_debug(NES_DBG_MR, "calculating stag, stag_index=0x%08x, driver_key=0x%08x,"
" stag_key=0x%08x\n",
@@ -2613,25 +2613,28 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
nespbl->pbl_size, (unsigned long) nespbl->pbl_pbase,
(void *) nespbl->pbl_vbase, nespbl->user_base);
- list_for_each_entry(chunk, &region->chunk_list, list) {
- for (nmap_index = 0; nmap_index < chunk->nmap; ++nmap_index) {
- chunk_pages = sg_dma_len(&chunk->page_list[nmap_index]) >> 12;
- chunk_pages += (sg_dma_len(&chunk->page_list[nmap_index]) & (4096-1)) ? 1 : 0;
- nespbl->page = sg_page(&chunk->page_list[0]);
- for (page_index=0; page_index<chunk_pages; page_index++) {
- ((__le32 *)pbl)[0] = cpu_to_le32((u32)
- (sg_dma_address(&chunk->page_list[nmap_index])+
- (page_index*4096)));
- ((__le32 *)pbl)[1] = cpu_to_le32(((u64)
- (sg_dma_address(&chunk->page_list[nmap_index])+
- (page_index*4096)))>>32);
- nes_debug(NES_DBG_MR, "pbl=%p, *pbl=0x%016llx, 0x%08x%08x\n", pbl,
- (unsigned long long)*pbl,
- le32_to_cpu(((__le32 *)pbl)[1]), le32_to_cpu(((__le32 *)pbl)[0]));
- pbl++;
- }
+ for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) {
+ chunk_pages = sg_dma_len(sg) >> 12;
+ chunk_pages += (sg_dma_len(sg) & (4096-1)) ? 1 : 0;
+ if (first_page) {
+ nespbl->page = sg_page(sg);
+ first_page = 0;
+ }
+
+ for (page_index = 0; page_index < chunk_pages; page_index++) {
+ ((__le32 *)pbl)[0] = cpu_to_le32((u32)
+ (sg_dma_address(sg)+
+ (page_index*4096)));
+ ((__le32 *)pbl)[1] = cpu_to_le32(((u64)
+ (sg_dma_address(sg)+
+ (page_index*4096)))>>32);
+ nes_debug(NES_DBG_MR, "pbl=%p, *pbl=0x%016llx, 0x%08x%08x\n", pbl,
+ (unsigned long long)*pbl,
+ le32_to_cpu(((__le32 *)pbl)[1]), le32_to_cpu(((__le32 *)pbl)[0]));
+ pbl++;
}
}
+
if (req.reg_type == IWNES_MEMREG_TYPE_QP) {
list_add_tail(&nespbl->list, &nes_ucontext->qp_reg_mem_list);
} else {
@@ -2834,7 +2837,7 @@ static int nes_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
init_attr->qp_context = nesqp->ibqp.qp_context;
init_attr->send_cq = nesqp->ibqp.send_cq;
init_attr->recv_cq = nesqp->ibqp.recv_cq;
- init_attr->srq = nesqp->ibqp.srq = nesqp->ibqp.srq;
+ init_attr->srq = nesqp->ibqp.srq;
init_attr->cap = attr->cap;
return 0;
@@ -3134,9 +3137,7 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
" original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n",
nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
original_last_aeq, nesqp->last_aeq);
- if ((!ret) ||
- ((original_last_aeq != NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) &&
- (ret))) {
+ if (!ret || original_last_aeq != NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) {
if (dont_wait) {
if (nesqp->cm_id && nesqp->hw_tcp_state != 0) {
nes_debug(NES_DBG_MOD_QP, "QP%u Queuing fake disconnect for QP refcount (%d),"
diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h
index 0eff7c44d76..309b31c31ae 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.h
+++ b/drivers/infiniband/hw/nes/nes_verbs.h
@@ -184,5 +184,6 @@ struct nes_qp {
u8 pau_busy;
u8 pau_pending;
u8 pau_state;
+ __u64 nesuqp_addr;
};
#endif /* NES_VERBS_H */
diff --git a/drivers/infiniband/hw/ocrdma/Kconfig b/drivers/infiniband/hw/ocrdma/Kconfig
index b5b6056c851..c0cddc0192d 100644
--- a/drivers/infiniband/hw/ocrdma/Kconfig
+++ b/drivers/infiniband/hw/ocrdma/Kconfig
@@ -1,6 +1,6 @@
config INFINIBAND_OCRDMA
tristate "Emulex One Connect HCA support"
- depends on ETHERNET && NETDEVICES && PCI && (IPV6 || IPV6=n)
+ depends on ETHERNET && NETDEVICES && PCI && INET && (IPV6 || IPV6=n)
select NET_VENDOR_EMULEX
select BE2NET
---help---
diff --git a/drivers/infiniband/hw/ocrdma/Makefile b/drivers/infiniband/hw/ocrdma/Makefile
index 06a5bed12e4..d1bfd4f4cdd 100644
--- a/drivers/infiniband/hw/ocrdma/Makefile
+++ b/drivers/infiniband/hw/ocrdma/Makefile
@@ -2,4 +2,4 @@ ccflags-y := -Idrivers/net/ethernet/emulex/benet
obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma.o
-ocrdma-y := ocrdma_main.o ocrdma_verbs.o ocrdma_hw.o ocrdma_ah.o
+ocrdma-y := ocrdma_main.o ocrdma_verbs.o ocrdma_hw.o ocrdma_ah.o ocrdma_stats.o
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h
index adc11d14f87..19011dbb930 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma.h
@@ -35,17 +35,27 @@
#include <rdma/ib_verbs.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_addr.h>
#include <be_roce.h>
#include "ocrdma_sli.h"
-#define OCRDMA_ROCE_DEV_VERSION "1.0.0"
+#define OCRDMA_ROCE_DRV_VERSION "10.2.145.0u"
+
+#define OCRDMA_ROCE_DRV_DESC "Emulex OneConnect RoCE Driver"
#define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA"
+#define OC_NAME_SH OCRDMA_NODE_DESC "(Skyhawk)"
+#define OC_NAME_UNKNOWN OCRDMA_NODE_DESC "(Unknown)"
+
+#define OC_SKH_DEVICE_PF 0x720
+#define OC_SKH_DEVICE_VF 0x728
#define OCRDMA_MAX_AH 512
#define OCRDMA_UVERBS(CMD_NAME) (1ull << IB_USER_VERBS_CMD_##CMD_NAME)
+#define convert_to_64bit(lo, hi) ((u64)hi << 32 | (u64)lo)
+
struct ocrdma_dev_attr {
u8 fw_ver[32];
u32 vendor_id;
@@ -65,6 +75,7 @@ struct ocrdma_dev_attr {
int max_mr;
u64 max_mr_size;
u32 max_num_mr_pbl;
+ int max_mw;
int max_fmr;
int max_map_per_fmr;
int max_pages_per_frmr;
@@ -83,6 +94,12 @@ struct ocrdma_dev_attr {
u8 num_ird_pages;
};
+struct ocrdma_dma_mem {
+ void *va;
+ dma_addr_t pa;
+ u32 size;
+};
+
struct ocrdma_pbl {
void *va;
dma_addr_t pa;
@@ -122,6 +139,52 @@ struct mqe_ctx {
bool cmd_done;
};
+struct ocrdma_hw_mr {
+ u32 lkey;
+ u8 fr_mr;
+ u8 remote_atomic;
+ u8 remote_rd;
+ u8 remote_wr;
+ u8 local_rd;
+ u8 local_wr;
+ u8 mw_bind;
+ u8 rsvd;
+ u64 len;
+ struct ocrdma_pbl *pbl_table;
+ u32 num_pbls;
+ u32 num_pbes;
+ u32 pbl_size;
+ u32 pbe_size;
+ u64 fbo;
+ u64 va;
+};
+
+struct ocrdma_mr {
+ struct ib_mr ibmr;
+ struct ib_umem *umem;
+ struct ocrdma_hw_mr hwmr;
+};
+
+struct ocrdma_stats {
+ u8 type;
+ struct ocrdma_dev *dev;
+};
+
+struct stats_mem {
+ struct ocrdma_mqe mqe;
+ void *va;
+ dma_addr_t pa;
+ u32 size;
+ char *debugfs_mem;
+};
+
+struct phy_info {
+ u16 auto_speeds_supported;
+ u16 fixed_speeds_supported;
+ u16 phy_type;
+ u16 interface_type;
+};
+
struct ocrdma_dev {
struct ib_device ibdev;
struct ocrdma_dev_attr attr;
@@ -165,12 +228,30 @@ struct ocrdma_dev {
struct mqe_ctx mqe_ctx;
struct be_dev_info nic_info;
+ struct phy_info phy;
+ char model_number[32];
+ u32 hba_port_num;
struct list_head entry;
struct rcu_head rcu;
int id;
u64 stag_arr[OCRDMA_MAX_STAG];
u16 pvid;
+ u32 asic_id;
+
+ ulong last_stats_time;
+ struct mutex stats_lock; /* provide synch for debugfs operations */
+ struct stats_mem stats_mem;
+ struct ocrdma_stats rsrc_stats;
+ struct ocrdma_stats rx_stats;
+ struct ocrdma_stats wqe_stats;
+ struct ocrdma_stats tx_stats;
+ struct ocrdma_stats db_err_stats;
+ struct ocrdma_stats tx_qp_err_stats;
+ struct ocrdma_stats rx_qp_err_stats;
+ struct ocrdma_stats tx_dbg_stats;
+ struct ocrdma_stats rx_dbg_stats;
+ struct dentry *dir;
};
struct ocrdma_cq {
@@ -183,8 +264,8 @@ struct ocrdma_cq {
*/
u32 max_hw_cqe;
bool phase_change;
- bool armed, solicited;
- bool arm_needed;
+ bool deferred_arm, deferred_sol;
+ bool first_arm;
spinlock_t cq_lock ____cacheline_aligned; /* provide synchronization
* to cq polling
@@ -197,6 +278,7 @@ struct ocrdma_cq {
struct ocrdma_ucontext *ucontext;
dma_addr_t pa;
u32 len;
+ u32 cqe_cnt;
/* head of all qp's sq and rq for which cqes need to be flushed
* by the software.
@@ -206,7 +288,6 @@ struct ocrdma_cq {
struct ocrdma_pd {
struct ib_pd ibpd;
- struct ocrdma_dev *dev;
struct ocrdma_ucontext *uctx;
u32 id;
int num_dpp_qp;
@@ -291,33 +372,6 @@ struct ocrdma_qp {
bool dpp_enabled;
u8 *ird_q_va;
bool signaled;
- u16 db_cache;
-};
-
-struct ocrdma_hw_mr {
- u32 lkey;
- u8 fr_mr;
- u8 remote_atomic;
- u8 remote_rd;
- u8 remote_wr;
- u8 local_rd;
- u8 local_wr;
- u8 mw_bind;
- u8 rsvd;
- u64 len;
- struct ocrdma_pbl *pbl_table;
- u32 num_pbls;
- u32 num_pbes;
- u32 pbl_size;
- u32 pbe_size;
- u64 fbo;
- u64 va;
-};
-
-struct ocrdma_mr {
- struct ib_mr ibmr;
- struct ib_umem *umem;
- struct ocrdma_hw_mr hwmr;
};
struct ocrdma_ucontext {
@@ -384,13 +438,6 @@ static inline struct ocrdma_srq *get_ocrdma_srq(struct ib_srq *ibsrq)
return container_of(ibsrq, struct ocrdma_srq, ibsrq);
}
-
-static inline int ocrdma_get_num_posted_shift(struct ocrdma_qp *qp)
-{
- return ((qp->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY &&
- qp->id < 128) ? 24 : 16);
-}
-
static inline int is_cqe_valid(struct ocrdma_cq *cq, struct ocrdma_cqe *cqe)
{
int cqe_valid;
@@ -422,5 +469,53 @@ static inline int is_cqe_wr_imm(struct ocrdma_cqe *cqe)
OCRDMA_CQE_WRITE_IMM) ? 1 : 0;
}
+static inline int ocrdma_resolve_dmac(struct ocrdma_dev *dev,
+ struct ib_ah_attr *ah_attr, u8 *mac_addr)
+{
+ struct in6_addr in6;
+
+ memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6));
+ if (rdma_is_multicast_addr(&in6))
+ rdma_get_mcast_mac(&in6, mac_addr);
+ else
+ memcpy(mac_addr, ah_attr->dmac, ETH_ALEN);
+ return 0;
+}
+
+static inline char *hca_name(struct ocrdma_dev *dev)
+{
+ switch (dev->nic_info.pdev->device) {
+ case OC_SKH_DEVICE_PF:
+ case OC_SKH_DEVICE_VF:
+ return OC_NAME_SH;
+ default:
+ return OC_NAME_UNKNOWN;
+ }
+}
+
+static inline int ocrdma_get_eq_table_index(struct ocrdma_dev *dev,
+ int eqid)
+{
+ int indx;
+
+ for (indx = 0; indx < dev->eq_cnt; indx++) {
+ if (dev->eq_tbl[indx].q.id == eqid)
+ return indx;
+ }
+
+ return -EINVAL;
+}
+
+static inline u8 ocrdma_get_asic_type(struct ocrdma_dev *dev)
+{
+ if (dev->nic_info.dev_family == 0xF && !dev->asic_id) {
+ pci_read_config_dword(
+ dev->nic_info.pdev,
+ OCRDMA_SLI_ASIC_ID_OFFSET, &dev->asic_id);
+ }
+
+ return (dev->asic_id & OCRDMA_SLI_ASIC_GEN_NUM_MASK) >>
+ OCRDMA_SLI_ASIC_GEN_NUM_SHIFT;
+}
#endif
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h
index fbac8eb4403..1554cca5712 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h
@@ -28,7 +28,8 @@
#ifndef __OCRDMA_ABI_H__
#define __OCRDMA_ABI_H__
-#define OCRDMA_ABI_VERSION 1
+#define OCRDMA_ABI_VERSION 2
+#define OCRDMA_BE_ROCE_ABI_VERSION 1
/* user kernel communication data structures. */
struct ocrdma_alloc_ucontext_resp {
@@ -107,9 +108,7 @@ struct ocrdma_create_qp_uresp {
u32 db_sq_offset;
u32 db_rq_offset;
u32 db_shift;
- u64 rsvd1;
- u64 rsvd2;
- u64 rsvd3;
+ u64 rsvd[11];
} __packed;
struct ocrdma_create_srq_uresp {
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index ee499d94225..d4cc01f10c0 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -49,7 +49,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
ah->sgid_index = attr->grh.sgid_index;
- vlan_tag = rdma_get_vlan_id(&attr->grh.dgid);
+ vlan_tag = attr->vlan_id;
if (!vlan_tag || (vlan_tag > 0xFFF))
vlan_tag = dev->pvid;
if (vlan_tag && (vlan_tag < 0x1000)) {
@@ -64,7 +64,8 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
eth_sz = sizeof(struct ocrdma_eth_basic);
}
memcpy(&eth.smac[0], &dev->nic_info.mac_addr[0], ETH_ALEN);
- status = ocrdma_resolve_dgid(dev, &attr->grh.dgid, &eth.dmac[0]);
+ memcpy(&eth.dmac[0], attr->dmac, ETH_ALEN);
+ status = ocrdma_resolve_dmac(dev, attr, &eth.dmac[0]);
if (status)
return status;
status = ocrdma_query_gid(&dev->ibdev, 1, attr->grh.sgid_index,
@@ -84,6 +85,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
memcpy((u8 *)ah->av + eth_sz, &grh, sizeof(struct ocrdma_grh));
if (vlan_enabled)
ah->av->valid |= OCRDMA_AV_VLAN_VALID;
+ ah->av->valid = cpu_to_le32(ah->av->valid);
return status;
}
@@ -98,7 +100,7 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
if (!(attr->ah_flags & IB_AH_GRH))
return ERR_PTR(-EINVAL);
- ah = kzalloc(sizeof *ah, GFP_ATOMIC);
+ ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
if (!ah)
return ERR_PTR(-ENOMEM);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 4ed8235d2d3..3bbf2010a82 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -32,7 +32,6 @@
#include <rdma/ib_verbs.h>
#include <rdma/ib_user_verbs.h>
-#include <rdma/ib_addr.h>
#include "ocrdma.h"
#include "ocrdma_hw.h"
@@ -150,7 +149,7 @@ enum ib_qp_state get_ibqp_state(enum ocrdma_qp_state qps)
return IB_QPS_SQE;
case OCRDMA_QPS_ERR:
return IB_QPS_ERR;
- };
+ }
return IB_QPS_ERR;
}
@@ -171,7 +170,7 @@ static enum ocrdma_qp_state get_ocrdma_qp_state(enum ib_qp_state qps)
return OCRDMA_QPS_SQE;
case IB_QPS_ERR:
return OCRDMA_QPS_ERR;
- };
+ }
return OCRDMA_QPS_ERR;
}
@@ -243,6 +242,23 @@ static int ocrdma_get_mbx_errno(u32 status)
return err_num;
}
+char *port_speed_string(struct ocrdma_dev *dev)
+{
+ char *str = "";
+ u16 speeds_supported;
+
+ speeds_supported = dev->phy.fixed_speeds_supported |
+ dev->phy.auto_speeds_supported;
+ if (speeds_supported & OCRDMA_PHY_SPEED_40GBPS)
+ str = "40Gbps ";
+ else if (speeds_supported & OCRDMA_PHY_SPEED_10GBPS)
+ str = "10Gbps ";
+ else if (speeds_supported & OCRDMA_PHY_SPEED_1GBPS)
+ str = "1Gbps ";
+
+ return str;
+}
+
static int ocrdma_get_mbx_cqe_errno(u16 cqe_status)
{
int err_num = -EINVAL;
@@ -332,6 +348,11 @@ static void *ocrdma_init_emb_mqe(u8 opcode, u32 cmd_len)
return mqe;
}
+static void *ocrdma_alloc_mqe(void)
+{
+ return kzalloc(sizeof(struct ocrdma_mqe), GFP_KERNEL);
+}
+
static void ocrdma_free_q(struct ocrdma_dev *dev, struct ocrdma_queue_info *q)
{
dma_free_coherent(&dev->nic_info.pdev->dev, q->size, q->va, q->dma);
@@ -364,8 +385,8 @@ static void ocrdma_build_q_pages(struct ocrdma_pa *q_pa, int cnt,
}
}
-static int ocrdma_mbx_delete_q(struct ocrdma_dev *dev, struct ocrdma_queue_info *q,
- int queue_type)
+static int ocrdma_mbx_delete_q(struct ocrdma_dev *dev,
+ struct ocrdma_queue_info *q, int queue_type)
{
u8 opcode = 0;
int status;
@@ -444,7 +465,7 @@ mbx_err:
return status;
}
-static int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq)
+int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq)
{
int irq;
@@ -574,6 +595,7 @@ static int ocrdma_create_mq(struct ocrdma_dev *dev)
if (status)
goto alloc_err;
+ dev->eq_tbl[0].cq_cnt++;
status = ocrdma_mbx_mq_cq_create(dev, &dev->mq.cq, &dev->eq_tbl[0].q);
if (status)
goto mbx_cq_free;
@@ -639,7 +661,7 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
{
struct ocrdma_qp *qp = NULL;
struct ocrdma_cq *cq = NULL;
- struct ib_event ib_evt;
+ struct ib_event ib_evt = { 0 };
int cq_event = 0;
int qp_event = 1;
int srq_event = 0;
@@ -664,6 +686,8 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
case OCRDMA_CQ_OVERRUN_ERROR:
ib_evt.element.cq = &cq->ibcq;
ib_evt.event = IB_EVENT_CQ_ERR;
+ cq_event = 1;
+ qp_event = 0;
break;
case OCRDMA_CQ_QPCAT_ERROR:
ib_evt.element.qp = &qp->ibqp;
@@ -725,6 +749,7 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
qp->srq->ibsrq.
srq_context);
} else if (dev_event) {
+ pr_err("%s: Fatal event received\n", dev->ibdev.name);
ib_dispatch_event(&ib_evt);
}
@@ -752,7 +777,6 @@ static void ocrdma_process_grp5_aync(struct ocrdma_dev *dev,
}
}
-
static void ocrdma_process_acqe(struct ocrdma_dev *dev, void *ae_cqe)
{
/* async CQE processing */
@@ -799,8 +823,6 @@ static int ocrdma_mq_cq_handler(struct ocrdma_dev *dev, u16 cq_id)
ocrdma_process_acqe(dev, cqe);
else if (cqe->valid_ae_cmpl_cons & OCRDMA_MCQE_CMPL_MASK)
ocrdma_process_mcqe(dev, cqe);
- else
- pr_err("%s() cqe->compl is not set.\n", __func__);
memset(cqe, 0, sizeof(struct ocrdma_mcqe));
ocrdma_mcq_inc_tail(dev);
}
@@ -858,16 +880,8 @@ static void ocrdma_qp_cq_handler(struct ocrdma_dev *dev, u16 cq_idx)
BUG();
cq = dev->cq_tbl[cq_idx];
- if (cq == NULL) {
- pr_err("%s%d invalid id=0x%x\n", __func__, dev->id, cq_idx);
+ if (cq == NULL)
return;
- }
- spin_lock_irqsave(&cq->cq_lock, flags);
- cq->armed = false;
- cq->solicited = false;
- spin_unlock_irqrestore(&cq->cq_lock, flags);
-
- ocrdma_ring_cq_db(dev, cq->id, false, false, 0);
if (cq->ibcq.comp_handler) {
spin_lock_irqsave(&cq->comp_handler_lock, flags);
@@ -892,27 +906,35 @@ static irqreturn_t ocrdma_irq_handler(int irq, void *handle)
struct ocrdma_dev *dev = eq->dev;
struct ocrdma_eqe eqe;
struct ocrdma_eqe *ptr;
- u16 eqe_popped = 0;
u16 cq_id;
- while (1) {
+ int budget = eq->cq_cnt;
+
+ do {
ptr = ocrdma_get_eqe(eq);
eqe = *ptr;
ocrdma_le32_to_cpu(&eqe, sizeof(eqe));
if ((eqe.id_valid & OCRDMA_EQE_VALID_MASK) == 0)
break;
- eqe_popped += 1;
+
ptr->id_valid = 0;
+ /* ring eq doorbell as soon as its consumed. */
+ ocrdma_ring_eq_db(dev, eq->q.id, false, true, 1);
/* check whether its CQE or not. */
if ((eqe.id_valid & OCRDMA_EQE_FOR_CQE_MASK) == 0) {
cq_id = eqe.id_valid >> OCRDMA_EQE_RESOURCE_ID_SHIFT;
ocrdma_cq_handler(dev, cq_id);
}
ocrdma_eq_inc_tail(eq);
- }
- ocrdma_ring_eq_db(dev, eq->q.id, true, true, eqe_popped);
- /* Ring EQ doorbell with num_popped to 0 to enable interrupts again. */
- if (dev->nic_info.intr_mode == BE_INTERRUPT_MODE_INTX)
- ocrdma_ring_eq_db(dev, eq->q.id, true, true, 0);
+
+ /* There can be a stale EQE after the last bound CQ is
+ * destroyed. EQE valid and budget == 0 implies this.
+ */
+ if (budget)
+ budget--;
+
+ } while (budget);
+
+ ocrdma_ring_eq_db(dev, eq->q.id, true, true, 0);
return IRQ_HANDLED;
}
@@ -949,7 +971,8 @@ static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe)
{
int status = 0;
u16 cqe_status, ext_status;
- struct ocrdma_mqe *rsp;
+ struct ocrdma_mqe *rsp_mqe;
+ struct ocrdma_mbx_rsp *rsp = NULL;
mutex_lock(&dev->mqe_ctx.lock);
ocrdma_post_mqe(dev, mqe);
@@ -958,23 +981,61 @@ static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe)
goto mbx_err;
cqe_status = dev->mqe_ctx.cqe_status;
ext_status = dev->mqe_ctx.ext_status;
- rsp = ocrdma_get_mqe_rsp(dev);
- ocrdma_copy_le32_to_cpu(mqe, rsp, (sizeof(*mqe)));
+ rsp_mqe = ocrdma_get_mqe_rsp(dev);
+ ocrdma_copy_le32_to_cpu(mqe, rsp_mqe, (sizeof(*mqe)));
+ if ((mqe->hdr.spcl_sge_cnt_emb & OCRDMA_MQE_HDR_EMB_MASK) >>
+ OCRDMA_MQE_HDR_EMB_SHIFT)
+ rsp = &mqe->u.rsp;
+
if (cqe_status || ext_status) {
- pr_err("%s() opcode=0x%x, cqe_status=0x%x, ext_status=0x%x\n",
- __func__,
- (rsp->u.rsp.subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >>
- OCRDMA_MBX_RSP_OPCODE_SHIFT, cqe_status, ext_status);
+ pr_err("%s() cqe_status=0x%x, ext_status=0x%x,",
+ __func__, cqe_status, ext_status);
+ if (rsp) {
+ /* This is for embedded cmds. */
+ pr_err("opcode=0x%x, subsystem=0x%x\n",
+ (rsp->subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >>
+ OCRDMA_MBX_RSP_OPCODE_SHIFT,
+ (rsp->subsys_op & OCRDMA_MBX_RSP_SUBSYS_MASK) >>
+ OCRDMA_MBX_RSP_SUBSYS_SHIFT);
+ }
status = ocrdma_get_mbx_cqe_errno(cqe_status);
goto mbx_err;
}
- if (mqe->u.rsp.status & OCRDMA_MBX_RSP_STATUS_MASK)
+ /* For non embedded, rsp errors are handled in ocrdma_nonemb_mbx_cmd */
+ if (rsp && (mqe->u.rsp.status & OCRDMA_MBX_RSP_STATUS_MASK))
status = ocrdma_get_mbx_errno(mqe->u.rsp.status);
mbx_err:
mutex_unlock(&dev->mqe_ctx.lock);
return status;
}
+static int ocrdma_nonemb_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe,
+ void *payload_va)
+{
+ int status = 0;
+ struct ocrdma_mbx_rsp *rsp = payload_va;
+
+ if ((mqe->hdr.spcl_sge_cnt_emb & OCRDMA_MQE_HDR_EMB_MASK) >>
+ OCRDMA_MQE_HDR_EMB_SHIFT)
+ BUG();
+
+ status = ocrdma_mbx_cmd(dev, mqe);
+ if (!status)
+ /* For non embedded, only CQE failures are handled in
+ * ocrdma_mbx_cmd. We need to check for RSP errors.
+ */
+ if (rsp->status & OCRDMA_MBX_RSP_STATUS_MASK)
+ status = ocrdma_get_mbx_errno(rsp->status);
+
+ if (status)
+ pr_err("opcode=0x%x, subsystem=0x%x\n",
+ (rsp->subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >>
+ OCRDMA_MBX_RSP_OPCODE_SHIFT,
+ (rsp->subsys_op & OCRDMA_MBX_RSP_SUBSYS_MASK) >>
+ OCRDMA_MBX_RSP_SUBSYS_SHIFT);
+ return status;
+}
+
static void ocrdma_get_attr(struct ocrdma_dev *dev,
struct ocrdma_dev_attr *attr,
struct ocrdma_mbx_query_config *rsp)
@@ -985,6 +1046,9 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev,
attr->max_qp =
(rsp->qp_srq_cq_ird_ord & OCRDMA_MBX_QUERY_CFG_MAX_QP_MASK) >>
OCRDMA_MBX_QUERY_CFG_MAX_QP_SHIFT;
+ attr->max_srq =
+ (rsp->max_srq_rpir_qps & OCRDMA_MBX_QUERY_CFG_MAX_SRQ_MASK) >>
+ OCRDMA_MBX_QUERY_CFG_MAX_SRQ_OFFSET;
attr->max_send_sge = ((rsp->max_write_send_sge &
OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK) >>
OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT);
@@ -1000,9 +1064,6 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev,
attr->max_ord_per_qp = (rsp->max_ird_ord_per_qp &
OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK) >>
OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT;
- attr->max_srq =
- (rsp->max_srq_rpir_qps & OCRDMA_MBX_QUERY_CFG_MAX_SRQ_MASK) >>
- OCRDMA_MBX_QUERY_CFG_MAX_SRQ_OFFSET;
attr->max_ird_per_qp = (rsp->max_ird_ord_per_qp &
OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_MASK) >>
OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_SHIFT;
@@ -1015,6 +1076,7 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev,
attr->local_ca_ack_delay = (rsp->max_pd_ca_ack_delay &
OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_MASK) >>
OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT;
+ attr->max_mw = rsp->max_mw;
attr->max_mr = rsp->max_mr;
attr->max_mr_size = ~0ull;
attr->max_fmr = 0;
@@ -1036,7 +1098,7 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev,
attr->max_inline_data =
attr->wqe_size - (sizeof(struct ocrdma_hdr_wqe) +
sizeof(struct ocrdma_sge));
- if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
+ if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
attr->ird = 1;
attr->ird_page_size = OCRDMA_MIN_Q_PAGE_SIZE;
attr->num_ird_pages = MAX_OCRDMA_IRD_PAGES;
@@ -1110,6 +1172,96 @@ mbx_err:
return status;
}
+int ocrdma_mbx_rdma_stats(struct ocrdma_dev *dev, bool reset)
+{
+ struct ocrdma_rdma_stats_req *req = dev->stats_mem.va;
+ struct ocrdma_mqe *mqe = &dev->stats_mem.mqe;
+ struct ocrdma_rdma_stats_resp *old_stats = NULL;
+ int status;
+
+ old_stats = kzalloc(sizeof(*old_stats), GFP_KERNEL);
+ if (old_stats == NULL)
+ return -ENOMEM;
+
+ memset(mqe, 0, sizeof(*mqe));
+ mqe->hdr.pyld_len = dev->stats_mem.size;
+ mqe->hdr.spcl_sge_cnt_emb |=
+ (1 << OCRDMA_MQE_HDR_SGE_CNT_SHIFT) &
+ OCRDMA_MQE_HDR_SGE_CNT_MASK;
+ mqe->u.nonemb_req.sge[0].pa_lo = (u32) (dev->stats_mem.pa & 0xffffffff);
+ mqe->u.nonemb_req.sge[0].pa_hi = (u32) upper_32_bits(dev->stats_mem.pa);
+ mqe->u.nonemb_req.sge[0].len = dev->stats_mem.size;
+
+ /* Cache the old stats */
+ memcpy(old_stats, req, sizeof(struct ocrdma_rdma_stats_resp));
+ memset(req, 0, dev->stats_mem.size);
+
+ ocrdma_init_mch((struct ocrdma_mbx_hdr *)req,
+ OCRDMA_CMD_GET_RDMA_STATS,
+ OCRDMA_SUBSYS_ROCE,
+ dev->stats_mem.size);
+ if (reset)
+ req->reset_stats = reset;
+
+ status = ocrdma_nonemb_mbx_cmd(dev, mqe, dev->stats_mem.va);
+ if (status)
+ /* Copy from cache, if mbox fails */
+ memcpy(req, old_stats, sizeof(struct ocrdma_rdma_stats_resp));
+ else
+ ocrdma_le32_to_cpu(req, dev->stats_mem.size);
+
+ kfree(old_stats);
+ return status;
+}
+
+static int ocrdma_mbx_get_ctrl_attribs(struct ocrdma_dev *dev)
+{
+ int status = -ENOMEM;
+ struct ocrdma_dma_mem dma;
+ struct ocrdma_mqe *mqe;
+ struct ocrdma_get_ctrl_attribs_rsp *ctrl_attr_rsp;
+ struct mgmt_hba_attribs *hba_attribs;
+
+ mqe = ocrdma_alloc_mqe();
+ if (!mqe)
+ return status;
+ memset(mqe, 0, sizeof(*mqe));
+
+ dma.size = sizeof(struct ocrdma_get_ctrl_attribs_rsp);
+ dma.va = dma_alloc_coherent(&dev->nic_info.pdev->dev,
+ dma.size, &dma.pa, GFP_KERNEL);
+ if (!dma.va)
+ goto free_mqe;
+
+ mqe->hdr.pyld_len = dma.size;
+ mqe->hdr.spcl_sge_cnt_emb |=
+ (1 << OCRDMA_MQE_HDR_SGE_CNT_SHIFT) &
+ OCRDMA_MQE_HDR_SGE_CNT_MASK;
+ mqe->u.nonemb_req.sge[0].pa_lo = (u32) (dma.pa & 0xffffffff);
+ mqe->u.nonemb_req.sge[0].pa_hi = (u32) upper_32_bits(dma.pa);
+ mqe->u.nonemb_req.sge[0].len = dma.size;
+
+ memset(dma.va, 0, dma.size);
+ ocrdma_init_mch((struct ocrdma_mbx_hdr *)dma.va,
+ OCRDMA_CMD_GET_CTRL_ATTRIBUTES,
+ OCRDMA_SUBSYS_COMMON,
+ dma.size);
+
+ status = ocrdma_nonemb_mbx_cmd(dev, mqe, dma.va);
+ if (!status) {
+ ctrl_attr_rsp = (struct ocrdma_get_ctrl_attribs_rsp *)dma.va;
+ hba_attribs = &ctrl_attr_rsp->ctrl_attribs.hba_attribs;
+
+ dev->hba_port_num = hba_attribs->phy_port;
+ strncpy(dev->model_number,
+ hba_attribs->controller_model_number, 31);
+ }
+ dma_free_coherent(&dev->nic_info.pdev->dev, dma.size, dma.va, dma.pa);
+free_mqe:
+ kfree(mqe);
+ return status;
+}
+
static int ocrdma_mbx_query_dev(struct ocrdma_dev *dev)
{
int status = -ENOMEM;
@@ -1157,6 +1309,35 @@ mbx_err:
return status;
}
+static int ocrdma_mbx_get_phy_info(struct ocrdma_dev *dev)
+{
+ int status = -ENOMEM;
+ struct ocrdma_mqe *cmd;
+ struct ocrdma_get_phy_info_rsp *rsp;
+
+ cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_PHY_DETAILS, sizeof(*cmd));
+ if (!cmd)
+ return status;
+
+ ocrdma_init_mch((struct ocrdma_mbx_hdr *)&cmd->u.cmd[0],
+ OCRDMA_CMD_PHY_DETAILS, OCRDMA_SUBSYS_COMMON,
+ sizeof(*cmd));
+
+ status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+ if (status)
+ goto mbx_err;
+
+ rsp = (struct ocrdma_get_phy_info_rsp *)cmd;
+ dev->phy.phy_type = le16_to_cpu(rsp->phy_type);
+ dev->phy.auto_speeds_supported =
+ le16_to_cpu(rsp->auto_speeds_supported);
+ dev->phy.fixed_speeds_supported =
+ le16_to_cpu(rsp->fixed_speeds_supported);
+mbx_err:
+ kfree(cmd);
+ return status;
+}
+
int ocrdma_mbx_alloc_pd(struct ocrdma_dev *dev, struct ocrdma_pd *pd)
{
int status = -ENOMEM;
@@ -1226,7 +1407,7 @@ static int ocrdma_build_q_conf(u32 *num_entries, int entry_size,
static int ocrdma_mbx_create_ah_tbl(struct ocrdma_dev *dev)
{
- int i ;
+ int i;
int status = 0;
int max_ah;
struct ocrdma_create_ah_tbl *cmd;
@@ -1357,12 +1538,10 @@ static void ocrdma_unbind_eq(struct ocrdma_dev *dev, u16 eq_id)
int i;
mutex_lock(&dev->dev_lock);
- for (i = 0; i < dev->eq_cnt; i++) {
- if (dev->eq_tbl[i].q.id != eq_id)
- continue;
- dev->eq_tbl[i].cq_cnt -= 1;
- break;
- }
+ i = ocrdma_get_eq_table_index(dev, eq_id);
+ if (i == -EINVAL)
+ BUG();
+ dev->eq_tbl[i].cq_cnt -= 1;
mutex_unlock(&dev->dev_lock);
}
@@ -1380,7 +1559,7 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
__func__, dev->id, dev->attr.max_cqe, entries);
return -EINVAL;
}
- if (dpp_cq && (dev->nic_info.dev_family != OCRDMA_GEN2_FAMILY))
+ if (dpp_cq && (ocrdma_get_asic_type(dev) != OCRDMA_ASIC_GEN_SKH_R))
return -EINVAL;
if (dpp_cq) {
@@ -1417,6 +1596,7 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
cq->eqn = ocrdma_bind_eq(dev);
cmd->cmd.req.rsvd_version = OCRDMA_CREATE_CQ_VER3;
cqe_count = cq->len / cqe_size;
+ cq->cqe_cnt = cqe_count;
if (cqe_count > 1024) {
/* Set cnt to 3 to indicate more than 1024 cq entries */
cmd->cmd.ev_cnt_flags |= (0x3 << OCRDMA_CREATE_CQ_CNT_SHIFT);
@@ -1439,7 +1619,7 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
}
/* shared eq between all the consumer cqs. */
cmd->cmd.eqn = cq->eqn;
- if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
+ if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
if (dpp_cq)
cmd->cmd.pgsz_pgcnt |= OCRDMA_CREATE_CQ_DPP <<
OCRDMA_CREATE_CQ_TYPE_SHIFT;
@@ -1484,12 +1664,9 @@ int ocrdma_mbx_destroy_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq)
(cq->id << OCRDMA_DESTROY_CQ_QID_SHIFT) &
OCRDMA_DESTROY_CQ_QID_MASK;
- ocrdma_unbind_eq(dev, cq->eqn);
status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
- if (status)
- goto mbx_err;
+ ocrdma_unbind_eq(dev, cq->eqn);
dma_free_coherent(&dev->nic_info.pdev->dev, cq->len, cq->va, cq->pa);
-mbx_err:
kfree(cmd);
return status;
}
@@ -1783,7 +1960,7 @@ static int ocrdma_set_create_qp_sq_cmd(struct ocrdma_create_qp_req *cmd,
u32 max_sges = attrs->cap.max_send_sge;
/* QP1 may exceed 127 */
- max_wqe_allocated = min_t(int, attrs->cap.max_send_wr + 1,
+ max_wqe_allocated = min_t(u32, attrs->cap.max_send_wr + 1,
dev->attr.max_wqe);
status = ocrdma_build_q_conf(&max_wqe_allocated,
@@ -1982,7 +2159,7 @@ int ocrdma_mbx_create_qp(struct ocrdma_qp *qp, struct ib_qp_init_attr *attrs,
break;
default:
return -EINVAL;
- };
+ }
cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_CREATE_QP, sizeof(*cmd));
if (!cmd)
@@ -2029,8 +2206,7 @@ int ocrdma_mbx_create_qp(struct ocrdma_qp *qp, struct ib_qp_init_attr *attrs,
OCRDMA_CREATE_QP_REQ_RQ_CQID_MASK;
qp->rq_cq = cq;
- if (pd->dpp_enabled && attrs->cap.max_inline_data && pd->num_dpp_qp &&
- (attrs->cap.max_inline_data <= dev->attr.max_inline_data)) {
+ if (pd->dpp_enabled && pd->num_dpp_qp) {
ocrdma_set_create_qp_dpp_cmd(cmd, pd, qp, enable_dpp_cq,
dpp_cq_id);
}
@@ -2076,23 +2252,6 @@ mbx_err:
return status;
}
-int ocrdma_resolve_dgid(struct ocrdma_dev *dev, union ib_gid *dgid,
- u8 *mac_addr)
-{
- struct in6_addr in6;
-
- memcpy(&in6, dgid, sizeof in6);
- if (rdma_is_multicast_addr(&in6)) {
- rdma_get_mcast_mac(&in6, mac_addr);
- } else if (rdma_link_local_addr(&in6)) {
- rdma_get_ll_mac(&in6, mac_addr);
- } else {
- pr_err("%s() fail to resolve mac_addr.\n", __func__);
- return -EINVAL;
- }
- return 0;
-}
-
static int ocrdma_set_av_params(struct ocrdma_qp *qp,
struct ocrdma_modify_qp *cmd,
struct ib_qp_attr *attrs)
@@ -2116,7 +2275,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
memcpy(&cmd->params.dgid[0], &ah_attr->grh.dgid.raw[0],
sizeof(cmd->params.dgid));
status = ocrdma_query_gid(&qp->dev->ibdev, 1,
- ah_attr->grh.sgid_index, &sgid);
+ ah_attr->grh.sgid_index, &sgid);
if (status)
return status;
@@ -2126,14 +2285,14 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
qp->sgid_idx = ah_attr->grh.sgid_index;
memcpy(&cmd->params.sgid[0], &sgid.raw[0], sizeof(cmd->params.sgid));
- ocrdma_resolve_dgid(qp->dev, &ah_attr->grh.dgid, &mac_addr[0]);
+ ocrdma_resolve_dmac(qp->dev, ah_attr, &mac_addr[0]);
cmd->params.dmac_b0_to_b3 = mac_addr[0] | (mac_addr[1] << 8) |
(mac_addr[2] << 16) | (mac_addr[3] << 24);
/* convert them to LE format. */
ocrdma_cpu_to_le32(&cmd->params.dgid[0], sizeof(cmd->params.dgid));
ocrdma_cpu_to_le32(&cmd->params.sgid[0], sizeof(cmd->params.sgid));
cmd->params.vlan_dmac_b4_to_b5 = mac_addr[4] | (mac_addr[5] << 8);
- vlan_id = rdma_get_vlan_id(&sgid);
+ vlan_id = ah_attr->vlan_id;
if (vlan_id && (vlan_id < 0x1000)) {
cmd->params.vlan_dmac_b4_to_b5 |=
vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT;
@@ -2144,8 +2303,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
static int ocrdma_set_qp_params(struct ocrdma_qp *qp,
struct ocrdma_modify_qp *cmd,
- struct ib_qp_attr *attrs, int attr_mask,
- enum ib_qp_state old_qps)
+ struct ib_qp_attr *attrs, int attr_mask)
{
int status = 0;
@@ -2250,8 +2408,7 @@ pmtu_err:
}
int ocrdma_mbx_modify_qp(struct ocrdma_dev *dev, struct ocrdma_qp *qp,
- struct ib_qp_attr *attrs, int attr_mask,
- enum ib_qp_state old_qps)
+ struct ib_qp_attr *attrs, int attr_mask)
{
int status = -ENOMEM;
struct ocrdma_modify_qp *cmd;
@@ -2274,7 +2431,7 @@ int ocrdma_mbx_modify_qp(struct ocrdma_dev *dev, struct ocrdma_qp *qp,
OCRDMA_QP_PARAMS_STATE_MASK;
}
- status = ocrdma_set_qp_params(qp, cmd, attrs, attr_mask, old_qps);
+ status = ocrdma_set_qp_params(qp, cmd, attrs, attr_mask);
if (status)
goto mbx_err;
status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
@@ -2505,7 +2662,7 @@ static int ocrdma_create_eqs(struct ocrdma_dev *dev)
for (i = 0; i < num_eq; i++) {
status = ocrdma_create_eq(dev, &dev->eq_tbl[i],
- OCRDMA_EQ_LEN);
+ OCRDMA_EQ_LEN);
if (status) {
status = -EINVAL;
break;
@@ -2550,6 +2707,13 @@ int ocrdma_init_hw(struct ocrdma_dev *dev)
status = ocrdma_mbx_create_ah_tbl(dev);
if (status)
goto conf_err;
+ status = ocrdma_mbx_get_phy_info(dev);
+ if (status)
+ goto conf_err;
+ status = ocrdma_mbx_get_ctrl_attribs(dev);
+ if (status)
+ goto conf_err;
+
return 0;
conf_err:
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
index f2a89d4cc7c..e513f729314 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
@@ -94,7 +94,6 @@ void ocrdma_ring_cq_db(struct ocrdma_dev *, u16 cq_id, bool armed,
int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed);
int ocrdma_query_config(struct ocrdma_dev *,
struct ocrdma_mbx_query_config *config);
-int ocrdma_resolve_dgid(struct ocrdma_dev *, union ib_gid *dgid, u8 *mac_addr);
int ocrdma_mbx_alloc_pd(struct ocrdma_dev *, struct ocrdma_pd *);
int ocrdma_mbx_dealloc_pd(struct ocrdma_dev *, struct ocrdma_pd *);
@@ -113,8 +112,7 @@ int ocrdma_mbx_create_qp(struct ocrdma_qp *, struct ib_qp_init_attr *attrs,
u8 enable_dpp_cq, u16 dpp_cq_id, u16 *dpp_offset,
u16 *dpp_credit_lmt);
int ocrdma_mbx_modify_qp(struct ocrdma_dev *, struct ocrdma_qp *,
- struct ib_qp_attr *attrs, int attr_mask,
- enum ib_qp_state old_qps);
+ struct ib_qp_attr *attrs, int attr_mask);
int ocrdma_mbx_query_qp(struct ocrdma_dev *, struct ocrdma_qp *,
struct ocrdma_qp_params *param);
int ocrdma_mbx_destroy_qp(struct ocrdma_dev *, struct ocrdma_qp *);
@@ -133,5 +131,8 @@ int ocrdma_qp_state_change(struct ocrdma_qp *, enum ib_qp_state new_state,
bool ocrdma_is_qp_in_sq_flushlist(struct ocrdma_cq *, struct ocrdma_qp *);
bool ocrdma_is_qp_in_rq_flushlist(struct ocrdma_cq *, struct ocrdma_qp *);
void ocrdma_flush_qp(struct ocrdma_qp *);
+int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq);
+int ocrdma_mbx_rdma_stats(struct ocrdma_dev *, bool reset);
+char *port_speed_string(struct ocrdma_dev *dev);
#endif /* __OCRDMA_HW_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 56e004940f1..7c504e07974 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -39,10 +39,11 @@
#include "ocrdma_ah.h"
#include "be_roce.h"
#include "ocrdma_hw.h"
+#include "ocrdma_stats.h"
#include "ocrdma_abi.h"
-MODULE_VERSION(OCRDMA_ROCE_DEV_VERSION);
-MODULE_DESCRIPTION("Emulex RoCE HCA Driver");
+MODULE_VERSION(OCRDMA_ROCE_DRV_VERSION);
+MODULE_DESCRIPTION(OCRDMA_ROCE_DRV_DESC " " OCRDMA_ROCE_DRV_VERSION);
MODULE_AUTHOR("Emulex Corporation");
MODULE_LICENSE("GPL");
@@ -67,46 +68,24 @@ void ocrdma_get_guid(struct ocrdma_dev *dev, u8 *guid)
guid[7] = mac_addr[5];
}
-static void ocrdma_build_sgid_mac(union ib_gid *sgid, unsigned char *mac_addr,
- bool is_vlan, u16 vlan_id)
-{
- sgid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
- sgid->raw[8] = mac_addr[0] ^ 2;
- sgid->raw[9] = mac_addr[1];
- sgid->raw[10] = mac_addr[2];
- if (is_vlan) {
- sgid->raw[11] = vlan_id >> 8;
- sgid->raw[12] = vlan_id & 0xff;
- } else {
- sgid->raw[11] = 0xff;
- sgid->raw[12] = 0xfe;
- }
- sgid->raw[13] = mac_addr[3];
- sgid->raw[14] = mac_addr[4];
- sgid->raw[15] = mac_addr[5];
-}
-
-static bool ocrdma_add_sgid(struct ocrdma_dev *dev, unsigned char *mac_addr,
- bool is_vlan, u16 vlan_id)
+static bool ocrdma_add_sgid(struct ocrdma_dev *dev, union ib_gid *new_sgid)
{
int i;
- union ib_gid new_sgid;
unsigned long flags;
memset(&ocrdma_zero_sgid, 0, sizeof(union ib_gid));
- ocrdma_build_sgid_mac(&new_sgid, mac_addr, is_vlan, vlan_id);
spin_lock_irqsave(&dev->sgid_lock, flags);
for (i = 0; i < OCRDMA_MAX_SGID; i++) {
if (!memcmp(&dev->sgid_tbl[i], &ocrdma_zero_sgid,
sizeof(union ib_gid))) {
/* found free entry */
- memcpy(&dev->sgid_tbl[i], &new_sgid,
+ memcpy(&dev->sgid_tbl[i], new_sgid,
sizeof(union ib_gid));
spin_unlock_irqrestore(&dev->sgid_lock, flags);
return true;
- } else if (!memcmp(&dev->sgid_tbl[i], &new_sgid,
+ } else if (!memcmp(&dev->sgid_tbl[i], new_sgid,
sizeof(union ib_gid))) {
/* entry already present, no addition is required. */
spin_unlock_irqrestore(&dev->sgid_lock, flags);
@@ -117,20 +96,17 @@ static bool ocrdma_add_sgid(struct ocrdma_dev *dev, unsigned char *mac_addr,
return false;
}
-static bool ocrdma_del_sgid(struct ocrdma_dev *dev, unsigned char *mac_addr,
- bool is_vlan, u16 vlan_id)
+static bool ocrdma_del_sgid(struct ocrdma_dev *dev, union ib_gid *sgid)
{
int found = false;
int i;
- union ib_gid sgid;
unsigned long flags;
- ocrdma_build_sgid_mac(&sgid, mac_addr, is_vlan, vlan_id);
spin_lock_irqsave(&dev->sgid_lock, flags);
/* first is default sgid, which cannot be deleted. */
for (i = 1; i < OCRDMA_MAX_SGID; i++) {
- if (!memcmp(&dev->sgid_tbl[i], &sgid, sizeof(union ib_gid))) {
+ if (!memcmp(&dev->sgid_tbl[i], sgid, sizeof(union ib_gid))) {
/* found matching entry */
memset(&dev->sgid_tbl[i], 0, sizeof(union ib_gid));
found = true;
@@ -141,75 +117,18 @@ static bool ocrdma_del_sgid(struct ocrdma_dev *dev, unsigned char *mac_addr,
return found;
}
-static void ocrdma_add_default_sgid(struct ocrdma_dev *dev)
-{
- /* GID Index 0 - Invariant manufacturer-assigned EUI-64 */
- union ib_gid *sgid = &dev->sgid_tbl[0];
-
- sgid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
- ocrdma_get_guid(dev, &sgid->raw[8]);
-}
-
-#if IS_ENABLED(CONFIG_VLAN_8021Q)
-static void ocrdma_add_vlan_sgids(struct ocrdma_dev *dev)
-{
- struct net_device *netdev, *tmp;
- u16 vlan_id;
- bool is_vlan;
-
- netdev = dev->nic_info.netdev;
-
- rcu_read_lock();
- for_each_netdev_rcu(&init_net, tmp) {
- if (netdev == tmp || vlan_dev_real_dev(tmp) == netdev) {
- if (!netif_running(tmp) || !netif_oper_up(tmp))
- continue;
- if (netdev != tmp) {
- vlan_id = vlan_dev_vlan_id(tmp);
- is_vlan = true;
- } else {
- is_vlan = false;
- vlan_id = 0;
- tmp = netdev;
- }
- ocrdma_add_sgid(dev, tmp->dev_addr, is_vlan, vlan_id);
- }
- }
- rcu_read_unlock();
-}
-#else
-static void ocrdma_add_vlan_sgids(struct ocrdma_dev *dev)
-{
-
-}
-#endif /* VLAN */
-
-static int ocrdma_build_sgid_tbl(struct ocrdma_dev *dev)
-{
- ocrdma_add_default_sgid(dev);
- ocrdma_add_vlan_sgids(dev);
- return 0;
-}
-
-#if IS_ENABLED(CONFIG_IPV6)
-
-static int ocrdma_inet6addr_event(struct notifier_block *notifier,
- unsigned long event, void *ptr)
+static int ocrdma_addr_event(unsigned long event, struct net_device *netdev,
+ union ib_gid *gid)
{
- struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
- struct net_device *netdev = ifa->idev->dev;
struct ib_event gid_event;
struct ocrdma_dev *dev;
bool found = false;
bool updated = false;
bool is_vlan = false;
- u16 vid = 0;
is_vlan = netdev->priv_flags & IFF_802_1Q_VLAN;
- if (is_vlan) {
- vid = vlan_dev_vlan_id(netdev);
- netdev = vlan_dev_real_dev(netdev);
- }
+ if (is_vlan)
+ netdev = rdma_vlan_dev_real_dev(netdev);
rcu_read_lock();
list_for_each_entry_rcu(dev, &ocrdma_dev_list, entry) {
@@ -222,16 +141,14 @@ static int ocrdma_inet6addr_event(struct notifier_block *notifier,
if (!found)
return NOTIFY_DONE;
- if (!rdma_link_local_addr((struct in6_addr *)&ifa->addr))
- return NOTIFY_DONE;
mutex_lock(&dev->dev_lock);
switch (event) {
case NETDEV_UP:
- updated = ocrdma_add_sgid(dev, netdev->dev_addr, is_vlan, vid);
+ updated = ocrdma_add_sgid(dev, gid);
break;
case NETDEV_DOWN:
- updated = ocrdma_del_sgid(dev, netdev->dev_addr, is_vlan, vid);
+ updated = ocrdma_del_sgid(dev, gid);
break;
default:
break;
@@ -247,6 +164,32 @@ static int ocrdma_inet6addr_event(struct notifier_block *notifier,
return NOTIFY_OK;
}
+static int ocrdma_inetaddr_event(struct notifier_block *notifier,
+ unsigned long event, void *ptr)
+{
+ struct in_ifaddr *ifa = ptr;
+ union ib_gid gid;
+ struct net_device *netdev = ifa->ifa_dev->dev;
+
+ ipv6_addr_set_v4mapped(ifa->ifa_address, (struct in6_addr *)&gid);
+ return ocrdma_addr_event(event, netdev, &gid);
+}
+
+static struct notifier_block ocrdma_inetaddr_notifier = {
+ .notifier_call = ocrdma_inetaddr_event
+};
+
+#if IS_ENABLED(CONFIG_IPV6)
+
+static int ocrdma_inet6addr_event(struct notifier_block *notifier,
+ unsigned long event, void *ptr)
+{
+ struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
+ union ib_gid *gid = (union ib_gid *)&ifa->addr;
+ struct net_device *netdev = ifa->idev->dev;
+ return ocrdma_addr_event(event, netdev, gid);
+}
+
static struct notifier_block ocrdma_inet6addr_notifier = {
.notifier_call = ocrdma_inet6addr_event
};
@@ -344,7 +287,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
dev->ibdev.process_mad = ocrdma_process_mad;
- if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
+ if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
dev->ibdev.uverbs_cmd_mask |=
OCRDMA_UVERBS(CREATE_SRQ) |
OCRDMA_UVERBS(MODIFY_SRQ) |
@@ -396,9 +339,42 @@ static void ocrdma_free_resources(struct ocrdma_dev *dev)
kfree(dev->sgid_tbl);
}
+/* OCRDMA sysfs interface */
+static ssize_t show_rev(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct ocrdma_dev *dev = dev_get_drvdata(device);
+
+ return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor);
+}
+
+static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct ocrdma_dev *dev = dev_get_drvdata(device);
+
+ return scnprintf(buf, PAGE_SIZE, "%s", &dev->attr.fw_ver[0]);
+}
+
+static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
+static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
+
+static struct device_attribute *ocrdma_attributes[] = {
+ &dev_attr_hw_rev,
+ &dev_attr_fw_ver
+};
+
+static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++)
+ device_remove_file(&dev->ibdev.dev, ocrdma_attributes[i]);
+}
+
static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
{
- int status = 0;
+ int status = 0, i;
struct ocrdma_dev *dev;
dev = (struct ocrdma_dev *)ib_alloc_device(sizeof(struct ocrdma_dev));
@@ -423,19 +399,29 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
if (status)
goto alloc_err;
- status = ocrdma_build_sgid_tbl(dev);
- if (status)
- goto alloc_err;
-
status = ocrdma_register_device(dev);
if (status)
goto alloc_err;
+ for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++)
+ if (device_create_file(&dev->ibdev.dev, ocrdma_attributes[i]))
+ goto sysfs_err;
spin_lock(&ocrdma_devlist_lock);
list_add_tail_rcu(&dev->entry, &ocrdma_dev_list);
spin_unlock(&ocrdma_devlist_lock);
+ /* Init stats */
+ ocrdma_add_port_stats(dev);
+
+ pr_info("%s %s: %s \"%s\" port %d\n",
+ dev_name(&dev->nic_info.pdev->dev), hca_name(dev),
+ port_speed_string(dev), dev->model_number,
+ dev->hba_port_num);
+ pr_info("%s ocrdma%d driver loaded successfully\n",
+ dev_name(&dev->nic_info.pdev->dev), dev->id);
return dev;
+sysfs_err:
+ ocrdma_remove_sysfiles(dev);
alloc_err:
ocrdma_free_resources(dev);
ocrdma_cleanup_hw(dev);
@@ -452,9 +438,6 @@ static void ocrdma_remove_free(struct rcu_head *rcu)
{
struct ocrdma_dev *dev = container_of(rcu, struct ocrdma_dev, rcu);
- ocrdma_free_resources(dev);
- ocrdma_cleanup_hw(dev);
-
idr_remove(&ocrdma_dev_id, dev->id);
kfree(dev->mbx_cmd);
ib_dealloc_device(&dev->ibdev);
@@ -465,11 +448,18 @@ static void ocrdma_remove(struct ocrdma_dev *dev)
/* first unregister with stack to stop all the active traffic
* of the registered clients.
*/
+ ocrdma_rem_port_stats(dev);
+ ocrdma_remove_sysfiles(dev);
+
ib_unregister_device(&dev->ibdev);
spin_lock(&ocrdma_devlist_lock);
list_del_rcu(&dev->entry);
spin_unlock(&ocrdma_devlist_lock);
+
+ ocrdma_free_resources(dev);
+ ocrdma_cleanup_hw(dev);
+
call_rcu(&dev->rcu, ocrdma_remove_free);
}
@@ -498,7 +488,7 @@ static int ocrdma_close(struct ocrdma_dev *dev)
cur_qp = dev->qp_tbl;
for (i = 0; i < OCRDMA_MAX_QP; i++) {
qp = cur_qp[i];
- if (qp) {
+ if (qp && qp->ibqp.qp_type != IB_QPT_GSI) {
/* change the QP state to ERROR */
_ocrdma_modify_qp(&qp->ibqp, &attrs, attr_mask);
@@ -531,7 +521,7 @@ static void ocrdma_event_handler(struct ocrdma_dev *dev, u32 event)
case BE_DEV_DOWN:
ocrdma_close(dev);
break;
- };
+ }
}
static struct ocrdma_driver ocrdma_drv = {
@@ -539,6 +529,7 @@ static struct ocrdma_driver ocrdma_drv = {
.add = ocrdma_add,
.remove = ocrdma_remove,
.state_change_handler = ocrdma_event_handler,
+ .be_abi_version = OCRDMA_BE_ROCE_ABI_VERSION,
};
static void ocrdma_unregister_inet6addr_notifier(void)
@@ -548,20 +539,37 @@ static void ocrdma_unregister_inet6addr_notifier(void)
#endif
}
+static void ocrdma_unregister_inetaddr_notifier(void)
+{
+ unregister_inetaddr_notifier(&ocrdma_inetaddr_notifier);
+}
+
static int __init ocrdma_init_module(void)
{
int status;
+ ocrdma_init_debugfs();
+
+ status = register_inetaddr_notifier(&ocrdma_inetaddr_notifier);
+ if (status)
+ return status;
+
#if IS_ENABLED(CONFIG_IPV6)
status = register_inet6addr_notifier(&ocrdma_inet6addr_notifier);
if (status)
- return status;
+ goto err_notifier6;
#endif
status = be_roce_register_driver(&ocrdma_drv);
if (status)
- ocrdma_unregister_inet6addr_notifier();
+ goto err_be_reg;
+ return 0;
+
+err_be_reg:
+ ocrdma_unregister_inet6addr_notifier();
+err_notifier6:
+ ocrdma_unregister_inetaddr_notifier();
return status;
}
@@ -569,6 +577,8 @@ static void __exit ocrdma_exit_module(void)
{
be_roce_unregister_driver(&ocrdma_drv);
ocrdma_unregister_inet6addr_notifier();
+ ocrdma_unregister_inetaddr_notifier();
+ ocrdma_rem_debugfs();
}
module_init(ocrdma_init_module);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
index 9f9570ec3c2..96c9ee602ba 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
@@ -30,8 +30,16 @@
#define Bit(_b) (1 << (_b))
-#define OCRDMA_GEN1_FAMILY 0xB
-#define OCRDMA_GEN2_FAMILY 0x2
+enum {
+ OCRDMA_ASIC_GEN_SKH_R = 0x04,
+ OCRDMA_ASIC_GEN_LANCER = 0x0B
+};
+
+enum {
+ OCRDMA_ASIC_REV_A0 = 0x00,
+ OCRDMA_ASIC_REV_B0 = 0x10,
+ OCRDMA_ASIC_REV_C0 = 0x20
+};
#define OCRDMA_SUBSYS_ROCE 10
enum {
@@ -64,6 +72,7 @@ enum {
OCRDMA_CMD_ATTACH_MCAST,
OCRDMA_CMD_DETACH_MCAST,
+ OCRDMA_CMD_GET_RDMA_STATS,
OCRDMA_CMD_MAX
};
@@ -74,12 +83,14 @@ enum {
OCRDMA_CMD_CREATE_CQ = 12,
OCRDMA_CMD_CREATE_EQ = 13,
OCRDMA_CMD_CREATE_MQ = 21,
+ OCRDMA_CMD_GET_CTRL_ATTRIBUTES = 32,
OCRDMA_CMD_GET_FW_VER = 35,
OCRDMA_CMD_DELETE_MQ = 53,
OCRDMA_CMD_DELETE_CQ = 54,
OCRDMA_CMD_DELETE_EQ = 55,
OCRDMA_CMD_GET_FW_CONFIG = 58,
- OCRDMA_CMD_CREATE_MQ_EXT = 90
+ OCRDMA_CMD_CREATE_MQ_EXT = 90,
+ OCRDMA_CMD_PHY_DETAILS = 102
};
enum {
@@ -103,7 +114,10 @@ enum {
OCRDMA_DB_GEN2_SRQ_OFFSET = OCRDMA_DB_GEN2_RQ_OFFSET,
OCRDMA_DB_CQ_OFFSET = 0x120,
OCRDMA_DB_EQ_OFFSET = OCRDMA_DB_CQ_OFFSET,
- OCRDMA_DB_MQ_OFFSET = 0x140
+ OCRDMA_DB_MQ_OFFSET = 0x140,
+
+ OCRDMA_DB_SQ_SHIFT = 16,
+ OCRDMA_DB_RQ_SHIFT = 24
};
#define OCRDMA_DB_CQ_RING_ID_MASK 0x3FF /* bits 0 - 9 */
@@ -138,6 +152,10 @@ enum {
#define OCRDMA_MIN_Q_PAGE_SIZE (4096)
#define OCRDMA_MAX_Q_PAGES (8)
+#define OCRDMA_SLI_ASIC_ID_OFFSET 0x9C
+#define OCRDMA_SLI_ASIC_REV_MASK 0x000000FF
+#define OCRDMA_SLI_ASIC_GEN_NUM_MASK 0x0000FF00
+#define OCRDMA_SLI_ASIC_GEN_NUM_SHIFT 0x08
/*
# 0: 4K Bytes
# 1: 8K Bytes
@@ -562,6 +580,30 @@ enum {
OCRDMA_FN_MODE_RDMA = 0x4
};
+struct ocrdma_get_phy_info_rsp {
+ struct ocrdma_mqe_hdr hdr;
+ struct ocrdma_mbx_rsp rsp;
+
+ u16 phy_type;
+ u16 interface_type;
+ u32 misc_params;
+ u16 ext_phy_details;
+ u16 rsvd;
+ u16 auto_speeds_supported;
+ u16 fixed_speeds_supported;
+ u32 future_use[2];
+};
+
+enum {
+ OCRDMA_PHY_SPEED_ZERO = 0x0,
+ OCRDMA_PHY_SPEED_10MBPS = 0x1,
+ OCRDMA_PHY_SPEED_100MBPS = 0x2,
+ OCRDMA_PHY_SPEED_1GBPS = 0x4,
+ OCRDMA_PHY_SPEED_10GBPS = 0x8,
+ OCRDMA_PHY_SPEED_40GBPS = 0x20
+};
+
+
struct ocrdma_get_link_speed_rsp {
struct ocrdma_mqe_hdr hdr;
struct ocrdma_mbx_rsp rsp;
@@ -590,7 +632,7 @@ enum {
enum {
OCRDMA_CREATE_CQ_VER2 = 2,
- OCRDMA_CREATE_CQ_VER3 = 3,
+ OCRDMA_CREATE_CQ_VER3 = 3,
OCRDMA_CREATE_CQ_PAGE_CNT_MASK = 0xFFFF,
OCRDMA_CREATE_CQ_PAGE_SIZE_SHIFT = 16,
@@ -1050,6 +1092,7 @@ enum {
OCRDMA_MODIFY_QP_RSP_MAX_ORD_MASK = 0xFFFF <<
OCRDMA_MODIFY_QP_RSP_MAX_ORD_SHIFT
};
+
struct ocrdma_modify_qp_rsp {
struct ocrdma_mqe_hdr hdr;
struct ocrdma_mbx_rsp rsp;
@@ -1062,8 +1105,8 @@ struct ocrdma_query_qp {
struct ocrdma_mqe_hdr hdr;
struct ocrdma_mbx_hdr req;
-#define OCRDMA_QUERY_UP_QP_ID_SHIFT 0
-#define OCRDMA_QUERY_UP_QP_ID_MASK 0xFFFFFF
+#define OCRDMA_QUERY_UP_QP_ID_SHIFT 0
+#define OCRDMA_QUERY_UP_QP_ID_MASK 0xFFFFFF
u32 qp_id;
};
@@ -1694,7 +1737,7 @@ struct ocrdma_grh {
u16 rsvd;
} __packed;
-#define OCRDMA_AV_VALID Bit(0)
+#define OCRDMA_AV_VALID Bit(7)
#define OCRDMA_AV_VLAN_VALID Bit(1)
struct ocrdma_av {
@@ -1703,4 +1746,208 @@ struct ocrdma_av {
u32 valid;
} __packed;
+struct ocrdma_rsrc_stats {
+ u32 dpp_pds;
+ u32 non_dpp_pds;
+ u32 rc_dpp_qps;
+ u32 uc_dpp_qps;
+ u32 ud_dpp_qps;
+ u32 rc_non_dpp_qps;
+ u32 rsvd;
+ u32 uc_non_dpp_qps;
+ u32 ud_non_dpp_qps;
+ u32 rsvd1;
+ u32 srqs;
+ u32 rbqs;
+ u32 r64K_nsmr;
+ u32 r64K_to_2M_nsmr;
+ u32 r2M_to_44M_nsmr;
+ u32 r44M_to_1G_nsmr;
+ u32 r1G_to_4G_nsmr;
+ u32 nsmr_count_4G_to_32G;
+ u32 r32G_to_64G_nsmr;
+ u32 r64G_to_128G_nsmr;
+ u32 r128G_to_higher_nsmr;
+ u32 embedded_nsmr;
+ u32 frmr;
+ u32 prefetch_qps;
+ u32 ondemand_qps;
+ u32 phy_mr;
+ u32 mw;
+ u32 rsvd2[7];
+};
+
+struct ocrdma_db_err_stats {
+ u32 sq_doorbell_errors;
+ u32 cq_doorbell_errors;
+ u32 rq_srq_doorbell_errors;
+ u32 cq_overflow_errors;
+ u32 rsvd[4];
+};
+
+struct ocrdma_wqe_stats {
+ u32 large_send_rc_wqes_lo;
+ u32 large_send_rc_wqes_hi;
+ u32 large_write_rc_wqes_lo;
+ u32 large_write_rc_wqes_hi;
+ u32 rsvd[4];
+ u32 read_wqes_lo;
+ u32 read_wqes_hi;
+ u32 frmr_wqes_lo;
+ u32 frmr_wqes_hi;
+ u32 mw_bind_wqes_lo;
+ u32 mw_bind_wqes_hi;
+ u32 invalidate_wqes_lo;
+ u32 invalidate_wqes_hi;
+ u32 rsvd1[2];
+ u32 dpp_wqe_drops;
+ u32 rsvd2[5];
+};
+
+struct ocrdma_tx_stats {
+ u32 send_pkts_lo;
+ u32 send_pkts_hi;
+ u32 write_pkts_lo;
+ u32 write_pkts_hi;
+ u32 read_pkts_lo;
+ u32 read_pkts_hi;
+ u32 read_rsp_pkts_lo;
+ u32 read_rsp_pkts_hi;
+ u32 ack_pkts_lo;
+ u32 ack_pkts_hi;
+ u32 send_bytes_lo;
+ u32 send_bytes_hi;
+ u32 write_bytes_lo;
+ u32 write_bytes_hi;
+ u32 read_req_bytes_lo;
+ u32 read_req_bytes_hi;
+ u32 read_rsp_bytes_lo;
+ u32 read_rsp_bytes_hi;
+ u32 ack_timeouts;
+ u32 rsvd[5];
+};
+
+
+struct ocrdma_tx_qp_err_stats {
+ u32 local_length_errors;
+ u32 local_protection_errors;
+ u32 local_qp_operation_errors;
+ u32 retry_count_exceeded_errors;
+ u32 rnr_retry_count_exceeded_errors;
+ u32 rsvd[3];
+};
+
+struct ocrdma_rx_stats {
+ u32 roce_frame_bytes_lo;
+ u32 roce_frame_bytes_hi;
+ u32 roce_frame_icrc_drops;
+ u32 roce_frame_payload_len_drops;
+ u32 ud_drops;
+ u32 qp1_drops;
+ u32 psn_error_request_packets;
+ u32 psn_error_resp_packets;
+ u32 rnr_nak_timeouts;
+ u32 rnr_nak_receives;
+ u32 roce_frame_rxmt_drops;
+ u32 nak_count_psn_sequence_errors;
+ u32 rc_drop_count_lookup_errors;
+ u32 rq_rnr_naks;
+ u32 srq_rnr_naks;
+ u32 roce_frames_lo;
+ u32 roce_frames_hi;
+ u32 rsvd;
+};
+
+struct ocrdma_rx_qp_err_stats {
+ u32 nak_invalid_requst_errors;
+ u32 nak_remote_operation_errors;
+ u32 nak_count_remote_access_errors;
+ u32 local_length_errors;
+ u32 local_protection_errors;
+ u32 local_qp_operation_errors;
+ u32 rsvd[2];
+};
+
+struct ocrdma_tx_dbg_stats {
+ u32 data[100];
+};
+
+struct ocrdma_rx_dbg_stats {
+ u32 data[200];
+};
+
+struct ocrdma_rdma_stats_req {
+ struct ocrdma_mbx_hdr hdr;
+ u8 reset_stats;
+ u8 rsvd[3];
+} __packed;
+
+struct ocrdma_rdma_stats_resp {
+ struct ocrdma_mbx_hdr hdr;
+ struct ocrdma_rsrc_stats act_rsrc_stats;
+ struct ocrdma_rsrc_stats th_rsrc_stats;
+ struct ocrdma_db_err_stats db_err_stats;
+ struct ocrdma_wqe_stats wqe_stats;
+ struct ocrdma_tx_stats tx_stats;
+ struct ocrdma_tx_qp_err_stats tx_qp_err_stats;
+ struct ocrdma_rx_stats rx_stats;
+ struct ocrdma_rx_qp_err_stats rx_qp_err_stats;
+ struct ocrdma_tx_dbg_stats tx_dbg_stats;
+ struct ocrdma_rx_dbg_stats rx_dbg_stats;
+} __packed;
+
+
+struct mgmt_hba_attribs {
+ u8 flashrom_version_string[32];
+ u8 manufacturer_name[32];
+ u32 supported_modes;
+ u32 rsvd0[3];
+ u8 ncsi_ver_string[12];
+ u32 default_extended_timeout;
+ u8 controller_model_number[32];
+ u8 controller_description[64];
+ u8 controller_serial_number[32];
+ u8 ip_version_string[32];
+ u8 firmware_version_string[32];
+ u8 bios_version_string[32];
+ u8 redboot_version_string[32];
+ u8 driver_version_string[32];
+ u8 fw_on_flash_version_string[32];
+ u32 functionalities_supported;
+ u16 max_cdblength;
+ u8 asic_revision;
+ u8 generational_guid[16];
+ u8 hba_port_count;
+ u16 default_link_down_timeout;
+ u8 iscsi_ver_min_max;
+ u8 multifunction_device;
+ u8 cache_valid;
+ u8 hba_status;
+ u8 max_domains_supported;
+ u8 phy_port;
+ u32 firmware_post_status;
+ u32 hba_mtu[8];
+ u32 rsvd1[4];
+};
+
+struct mgmt_controller_attrib {
+ struct mgmt_hba_attribs hba_attribs;
+ u16 pci_vendor_id;
+ u16 pci_device_id;
+ u16 pci_sub_vendor_id;
+ u16 pci_sub_system_id;
+ u8 pci_bus_number;
+ u8 pci_device_number;
+ u8 pci_function_number;
+ u8 interface_type;
+ u64 unique_identifier;
+ u32 rsvd0[5];
+};
+
+struct ocrdma_get_ctrl_attribs_rsp {
+ struct ocrdma_mbx_hdr hdr;
+ struct mgmt_controller_attrib ctrl_attribs;
+};
+
+
#endif /* __OCRDMA_SLI_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
new file mode 100644
index 00000000000..41a9aec9998
--- /dev/null
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
@@ -0,0 +1,616 @@
+/*******************************************************************
+ * This file is part of the Emulex RoCE Device Driver for *
+ * RoCE (RDMA over Converged Ethernet) adapters. *
+ * Copyright (C) 2008-2014 Emulex. All rights reserved. *
+ * EMULEX and SLI are trademarks of Emulex. *
+ * www.emulex.com *
+ * *
+ * This program is free software; you can redistribute it and/or *
+ * modify it under the terms of version 2 of the GNU General *
+ * Public License as published by the Free Software Foundation. *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID. See the GNU General Public License for *
+ * more details, a copy of which can be found in the file COPYING *
+ * included with this package. *
+ *
+ * Contact Information:
+ * linux-drivers@emulex.com
+ *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
+ *******************************************************************/
+
+#include <rdma/ib_addr.h>
+#include "ocrdma_stats.h"
+
+static struct dentry *ocrdma_dbgfs_dir;
+
+static int ocrdma_add_stat(char *start, char *pcur,
+ char *name, u64 count)
+{
+ char buff[128] = {0};
+ int cpy_len = 0;
+
+ snprintf(buff, 128, "%s: %llu\n", name, count);
+ cpy_len = strlen(buff);
+
+ if (pcur + cpy_len > start + OCRDMA_MAX_DBGFS_MEM) {
+ pr_err("%s: No space in stats buff\n", __func__);
+ return 0;
+ }
+
+ memcpy(pcur, buff, cpy_len);
+ return cpy_len;
+}
+
+static bool ocrdma_alloc_stats_mem(struct ocrdma_dev *dev)
+{
+ struct stats_mem *mem = &dev->stats_mem;
+
+ /* Alloc mbox command mem*/
+ mem->size = max_t(u32, sizeof(struct ocrdma_rdma_stats_req),
+ sizeof(struct ocrdma_rdma_stats_resp));
+
+ mem->va = dma_alloc_coherent(&dev->nic_info.pdev->dev, mem->size,
+ &mem->pa, GFP_KERNEL);
+ if (!mem->va) {
+ pr_err("%s: stats mbox allocation failed\n", __func__);
+ return false;
+ }
+
+ memset(mem->va, 0, mem->size);
+
+ /* Alloc debugfs mem */
+ mem->debugfs_mem = kzalloc(OCRDMA_MAX_DBGFS_MEM, GFP_KERNEL);
+ if (!mem->debugfs_mem) {
+ pr_err("%s: stats debugfs mem allocation failed\n", __func__);
+ return false;
+ }
+
+ return true;
+}
+
+static void ocrdma_release_stats_mem(struct ocrdma_dev *dev)
+{
+ struct stats_mem *mem = &dev->stats_mem;
+
+ if (mem->va)
+ dma_free_coherent(&dev->nic_info.pdev->dev, mem->size,
+ mem->va, mem->pa);
+ kfree(mem->debugfs_mem);
+}
+
+static char *ocrdma_resource_stats(struct ocrdma_dev *dev)
+{
+ char *stats = dev->stats_mem.debugfs_mem, *pcur;
+ struct ocrdma_rdma_stats_resp *rdma_stats =
+ (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+ struct ocrdma_rsrc_stats *rsrc_stats = &rdma_stats->act_rsrc_stats;
+
+ memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+ pcur = stats;
+ pcur += ocrdma_add_stat(stats, pcur, "active_dpp_pds",
+ (u64)rsrc_stats->dpp_pds);
+ pcur += ocrdma_add_stat(stats, pcur, "active_non_dpp_pds",
+ (u64)rsrc_stats->non_dpp_pds);
+ pcur += ocrdma_add_stat(stats, pcur, "active_rc_dpp_qps",
+ (u64)rsrc_stats->rc_dpp_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "active_uc_dpp_qps",
+ (u64)rsrc_stats->uc_dpp_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "active_ud_dpp_qps",
+ (u64)rsrc_stats->ud_dpp_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "active_rc_non_dpp_qps",
+ (u64)rsrc_stats->rc_non_dpp_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "active_uc_non_dpp_qps",
+ (u64)rsrc_stats->uc_non_dpp_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "active_ud_non_dpp_qps",
+ (u64)rsrc_stats->ud_non_dpp_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "active_srqs",
+ (u64)rsrc_stats->srqs);
+ pcur += ocrdma_add_stat(stats, pcur, "active_rbqs",
+ (u64)rsrc_stats->rbqs);
+ pcur += ocrdma_add_stat(stats, pcur, "active_64K_nsmr",
+ (u64)rsrc_stats->r64K_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "active_64K_to_2M_nsmr",
+ (u64)rsrc_stats->r64K_to_2M_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "active_2M_to_44M_nsmr",
+ (u64)rsrc_stats->r2M_to_44M_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "active_44M_to_1G_nsmr",
+ (u64)rsrc_stats->r44M_to_1G_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "active_1G_to_4G_nsmr",
+ (u64)rsrc_stats->r1G_to_4G_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "active_nsmr_count_4G_to_32G",
+ (u64)rsrc_stats->nsmr_count_4G_to_32G);
+ pcur += ocrdma_add_stat(stats, pcur, "active_32G_to_64G_nsmr",
+ (u64)rsrc_stats->r32G_to_64G_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "active_64G_to_128G_nsmr",
+ (u64)rsrc_stats->r64G_to_128G_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "active_128G_to_higher_nsmr",
+ (u64)rsrc_stats->r128G_to_higher_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "active_embedded_nsmr",
+ (u64)rsrc_stats->embedded_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "active_frmr",
+ (u64)rsrc_stats->frmr);
+ pcur += ocrdma_add_stat(stats, pcur, "active_prefetch_qps",
+ (u64)rsrc_stats->prefetch_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "active_ondemand_qps",
+ (u64)rsrc_stats->ondemand_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "active_phy_mr",
+ (u64)rsrc_stats->phy_mr);
+ pcur += ocrdma_add_stat(stats, pcur, "active_mw",
+ (u64)rsrc_stats->mw);
+
+ /* Print the threshold stats */
+ rsrc_stats = &rdma_stats->th_rsrc_stats;
+
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_dpp_pds",
+ (u64)rsrc_stats->dpp_pds);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_non_dpp_pds",
+ (u64)rsrc_stats->non_dpp_pds);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_rc_dpp_qps",
+ (u64)rsrc_stats->rc_dpp_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_uc_dpp_qps",
+ (u64)rsrc_stats->uc_dpp_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_ud_dpp_qps",
+ (u64)rsrc_stats->ud_dpp_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_rc_non_dpp_qps",
+ (u64)rsrc_stats->rc_non_dpp_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_uc_non_dpp_qps",
+ (u64)rsrc_stats->uc_non_dpp_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_ud_non_dpp_qps",
+ (u64)rsrc_stats->ud_non_dpp_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_srqs",
+ (u64)rsrc_stats->srqs);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_rbqs",
+ (u64)rsrc_stats->rbqs);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_64K_nsmr",
+ (u64)rsrc_stats->r64K_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_64K_to_2M_nsmr",
+ (u64)rsrc_stats->r64K_to_2M_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_2M_to_44M_nsmr",
+ (u64)rsrc_stats->r2M_to_44M_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_44M_to_1G_nsmr",
+ (u64)rsrc_stats->r44M_to_1G_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_1G_to_4G_nsmr",
+ (u64)rsrc_stats->r1G_to_4G_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_nsmr_count_4G_to_32G",
+ (u64)rsrc_stats->nsmr_count_4G_to_32G);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_32G_to_64G_nsmr",
+ (u64)rsrc_stats->r32G_to_64G_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_64G_to_128G_nsmr",
+ (u64)rsrc_stats->r64G_to_128G_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_128G_to_higher_nsmr",
+ (u64)rsrc_stats->r128G_to_higher_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_embedded_nsmr",
+ (u64)rsrc_stats->embedded_nsmr);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_frmr",
+ (u64)rsrc_stats->frmr);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_prefetch_qps",
+ (u64)rsrc_stats->prefetch_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_ondemand_qps",
+ (u64)rsrc_stats->ondemand_qps);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_phy_mr",
+ (u64)rsrc_stats->phy_mr);
+ pcur += ocrdma_add_stat(stats, pcur, "threshold_mw",
+ (u64)rsrc_stats->mw);
+ return stats;
+}
+
+static char *ocrdma_rx_stats(struct ocrdma_dev *dev)
+{
+ char *stats = dev->stats_mem.debugfs_mem, *pcur;
+ struct ocrdma_rdma_stats_resp *rdma_stats =
+ (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+ struct ocrdma_rx_stats *rx_stats = &rdma_stats->rx_stats;
+
+ memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+ pcur = stats;
+ pcur += ocrdma_add_stat
+ (stats, pcur, "roce_frame_bytes",
+ convert_to_64bit(rx_stats->roce_frame_bytes_lo,
+ rx_stats->roce_frame_bytes_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "roce_frame_icrc_drops",
+ (u64)rx_stats->roce_frame_icrc_drops);
+ pcur += ocrdma_add_stat(stats, pcur, "roce_frame_payload_len_drops",
+ (u64)rx_stats->roce_frame_payload_len_drops);
+ pcur += ocrdma_add_stat(stats, pcur, "ud_drops",
+ (u64)rx_stats->ud_drops);
+ pcur += ocrdma_add_stat(stats, pcur, "qp1_drops",
+ (u64)rx_stats->qp1_drops);
+ pcur += ocrdma_add_stat(stats, pcur, "psn_error_request_packets",
+ (u64)rx_stats->psn_error_request_packets);
+ pcur += ocrdma_add_stat(stats, pcur, "psn_error_resp_packets",
+ (u64)rx_stats->psn_error_resp_packets);
+ pcur += ocrdma_add_stat(stats, pcur, "rnr_nak_timeouts",
+ (u64)rx_stats->rnr_nak_timeouts);
+ pcur += ocrdma_add_stat(stats, pcur, "rnr_nak_receives",
+ (u64)rx_stats->rnr_nak_receives);
+ pcur += ocrdma_add_stat(stats, pcur, "roce_frame_rxmt_drops",
+ (u64)rx_stats->roce_frame_rxmt_drops);
+ pcur += ocrdma_add_stat(stats, pcur, "nak_count_psn_sequence_errors",
+ (u64)rx_stats->nak_count_psn_sequence_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "rc_drop_count_lookup_errors",
+ (u64)rx_stats->rc_drop_count_lookup_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "rq_rnr_naks",
+ (u64)rx_stats->rq_rnr_naks);
+ pcur += ocrdma_add_stat(stats, pcur, "srq_rnr_naks",
+ (u64)rx_stats->srq_rnr_naks);
+ pcur += ocrdma_add_stat(stats, pcur, "roce_frames",
+ convert_to_64bit(rx_stats->roce_frames_lo,
+ rx_stats->roce_frames_hi));
+
+ return stats;
+}
+
+static char *ocrdma_tx_stats(struct ocrdma_dev *dev)
+{
+ char *stats = dev->stats_mem.debugfs_mem, *pcur;
+ struct ocrdma_rdma_stats_resp *rdma_stats =
+ (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+ struct ocrdma_tx_stats *tx_stats = &rdma_stats->tx_stats;
+
+ memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+ pcur = stats;
+ pcur += ocrdma_add_stat(stats, pcur, "send_pkts",
+ convert_to_64bit(tx_stats->send_pkts_lo,
+ tx_stats->send_pkts_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "write_pkts",
+ convert_to_64bit(tx_stats->write_pkts_lo,
+ tx_stats->write_pkts_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "read_pkts",
+ convert_to_64bit(tx_stats->read_pkts_lo,
+ tx_stats->read_pkts_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "read_rsp_pkts",
+ convert_to_64bit(tx_stats->read_rsp_pkts_lo,
+ tx_stats->read_rsp_pkts_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "ack_pkts",
+ convert_to_64bit(tx_stats->ack_pkts_lo,
+ tx_stats->ack_pkts_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "send_bytes",
+ convert_to_64bit(tx_stats->send_bytes_lo,
+ tx_stats->send_bytes_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "write_bytes",
+ convert_to_64bit(tx_stats->write_bytes_lo,
+ tx_stats->write_bytes_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "read_req_bytes",
+ convert_to_64bit(tx_stats->read_req_bytes_lo,
+ tx_stats->read_req_bytes_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "read_rsp_bytes",
+ convert_to_64bit(tx_stats->read_rsp_bytes_lo,
+ tx_stats->read_rsp_bytes_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "ack_timeouts",
+ (u64)tx_stats->ack_timeouts);
+
+ return stats;
+}
+
+static char *ocrdma_wqe_stats(struct ocrdma_dev *dev)
+{
+ char *stats = dev->stats_mem.debugfs_mem, *pcur;
+ struct ocrdma_rdma_stats_resp *rdma_stats =
+ (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+ struct ocrdma_wqe_stats *wqe_stats = &rdma_stats->wqe_stats;
+
+ memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+ pcur = stats;
+ pcur += ocrdma_add_stat(stats, pcur, "large_send_rc_wqes",
+ convert_to_64bit(wqe_stats->large_send_rc_wqes_lo,
+ wqe_stats->large_send_rc_wqes_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "large_write_rc_wqes",
+ convert_to_64bit(wqe_stats->large_write_rc_wqes_lo,
+ wqe_stats->large_write_rc_wqes_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "read_wqes",
+ convert_to_64bit(wqe_stats->read_wqes_lo,
+ wqe_stats->read_wqes_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "frmr_wqes",
+ convert_to_64bit(wqe_stats->frmr_wqes_lo,
+ wqe_stats->frmr_wqes_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "mw_bind_wqes",
+ convert_to_64bit(wqe_stats->mw_bind_wqes_lo,
+ wqe_stats->mw_bind_wqes_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "invalidate_wqes",
+ convert_to_64bit(wqe_stats->invalidate_wqes_lo,
+ wqe_stats->invalidate_wqes_hi));
+ pcur += ocrdma_add_stat(stats, pcur, "dpp_wqe_drops",
+ (u64)wqe_stats->dpp_wqe_drops);
+ return stats;
+}
+
+static char *ocrdma_db_errstats(struct ocrdma_dev *dev)
+{
+ char *stats = dev->stats_mem.debugfs_mem, *pcur;
+ struct ocrdma_rdma_stats_resp *rdma_stats =
+ (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+ struct ocrdma_db_err_stats *db_err_stats = &rdma_stats->db_err_stats;
+
+ memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+ pcur = stats;
+ pcur += ocrdma_add_stat(stats, pcur, "sq_doorbell_errors",
+ (u64)db_err_stats->sq_doorbell_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "cq_doorbell_errors",
+ (u64)db_err_stats->cq_doorbell_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "rq_srq_doorbell_errors",
+ (u64)db_err_stats->rq_srq_doorbell_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "cq_overflow_errors",
+ (u64)db_err_stats->cq_overflow_errors);
+ return stats;
+}
+
+static char *ocrdma_rxqp_errstats(struct ocrdma_dev *dev)
+{
+ char *stats = dev->stats_mem.debugfs_mem, *pcur;
+ struct ocrdma_rdma_stats_resp *rdma_stats =
+ (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+ struct ocrdma_rx_qp_err_stats *rx_qp_err_stats =
+ &rdma_stats->rx_qp_err_stats;
+
+ memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+ pcur = stats;
+ pcur += ocrdma_add_stat(stats, pcur, "nak_invalid_requst_errors",
+ (u64)rx_qp_err_stats->nak_invalid_requst_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "nak_remote_operation_errors",
+ (u64)rx_qp_err_stats->nak_remote_operation_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "nak_count_remote_access_errors",
+ (u64)rx_qp_err_stats->nak_count_remote_access_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "local_length_errors",
+ (u64)rx_qp_err_stats->local_length_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "local_protection_errors",
+ (u64)rx_qp_err_stats->local_protection_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "local_qp_operation_errors",
+ (u64)rx_qp_err_stats->local_qp_operation_errors);
+ return stats;
+}
+
+static char *ocrdma_txqp_errstats(struct ocrdma_dev *dev)
+{
+ char *stats = dev->stats_mem.debugfs_mem, *pcur;
+ struct ocrdma_rdma_stats_resp *rdma_stats =
+ (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+ struct ocrdma_tx_qp_err_stats *tx_qp_err_stats =
+ &rdma_stats->tx_qp_err_stats;
+
+ memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+ pcur = stats;
+ pcur += ocrdma_add_stat(stats, pcur, "local_length_errors",
+ (u64)tx_qp_err_stats->local_length_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "local_protection_errors",
+ (u64)tx_qp_err_stats->local_protection_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "local_qp_operation_errors",
+ (u64)tx_qp_err_stats->local_qp_operation_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "retry_count_exceeded_errors",
+ (u64)tx_qp_err_stats->retry_count_exceeded_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "rnr_retry_count_exceeded_errors",
+ (u64)tx_qp_err_stats->rnr_retry_count_exceeded_errors);
+ return stats;
+}
+
+static char *ocrdma_tx_dbg_stats(struct ocrdma_dev *dev)
+{
+ int i;
+ char *pstats = dev->stats_mem.debugfs_mem;
+ struct ocrdma_rdma_stats_resp *rdma_stats =
+ (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+ struct ocrdma_tx_dbg_stats *tx_dbg_stats =
+ &rdma_stats->tx_dbg_stats;
+
+ memset(pstats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+ for (i = 0; i < 100; i++)
+ pstats += snprintf(pstats, 80, "DW[%d] = 0x%x\n", i,
+ tx_dbg_stats->data[i]);
+
+ return dev->stats_mem.debugfs_mem;
+}
+
+static char *ocrdma_rx_dbg_stats(struct ocrdma_dev *dev)
+{
+ int i;
+ char *pstats = dev->stats_mem.debugfs_mem;
+ struct ocrdma_rdma_stats_resp *rdma_stats =
+ (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
+ struct ocrdma_rx_dbg_stats *rx_dbg_stats =
+ &rdma_stats->rx_dbg_stats;
+
+ memset(pstats, 0, (OCRDMA_MAX_DBGFS_MEM));
+
+ for (i = 0; i < 200; i++)
+ pstats += snprintf(pstats, 80, "DW[%d] = 0x%x\n", i,
+ rx_dbg_stats->data[i]);
+
+ return dev->stats_mem.debugfs_mem;
+}
+
+static void ocrdma_update_stats(struct ocrdma_dev *dev)
+{
+ ulong now = jiffies, secs;
+ int status = 0;
+
+ secs = jiffies_to_msecs(now - dev->last_stats_time) / 1000U;
+ if (secs) {
+ /* update */
+ status = ocrdma_mbx_rdma_stats(dev, false);
+ if (status)
+ pr_err("%s: stats mbox failed with status = %d\n",
+ __func__, status);
+ dev->last_stats_time = jiffies;
+ }
+}
+
+static ssize_t ocrdma_dbgfs_ops_read(struct file *filp, char __user *buffer,
+ size_t usr_buf_len, loff_t *ppos)
+{
+ struct ocrdma_stats *pstats = filp->private_data;
+ struct ocrdma_dev *dev = pstats->dev;
+ ssize_t status = 0;
+ char *data = NULL;
+
+ /* No partial reads */
+ if (*ppos != 0)
+ return 0;
+
+ mutex_lock(&dev->stats_lock);
+
+ ocrdma_update_stats(dev);
+
+ switch (pstats->type) {
+ case OCRDMA_RSRC_STATS:
+ data = ocrdma_resource_stats(dev);
+ break;
+ case OCRDMA_RXSTATS:
+ data = ocrdma_rx_stats(dev);
+ break;
+ case OCRDMA_WQESTATS:
+ data = ocrdma_wqe_stats(dev);
+ break;
+ case OCRDMA_TXSTATS:
+ data = ocrdma_tx_stats(dev);
+ break;
+ case OCRDMA_DB_ERRSTATS:
+ data = ocrdma_db_errstats(dev);
+ break;
+ case OCRDMA_RXQP_ERRSTATS:
+ data = ocrdma_rxqp_errstats(dev);
+ break;
+ case OCRDMA_TXQP_ERRSTATS:
+ data = ocrdma_txqp_errstats(dev);
+ break;
+ case OCRDMA_TX_DBG_STATS:
+ data = ocrdma_tx_dbg_stats(dev);
+ break;
+ case OCRDMA_RX_DBG_STATS:
+ data = ocrdma_rx_dbg_stats(dev);
+ break;
+
+ default:
+ status = -EFAULT;
+ goto exit;
+ }
+
+ if (usr_buf_len < strlen(data)) {
+ status = -ENOSPC;
+ goto exit;
+ }
+
+ status = simple_read_from_buffer(buffer, usr_buf_len, ppos, data,
+ strlen(data));
+exit:
+ mutex_unlock(&dev->stats_lock);
+ return status;
+}
+
+static const struct file_operations ocrdma_dbg_ops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = ocrdma_dbgfs_ops_read,
+};
+
+void ocrdma_add_port_stats(struct ocrdma_dev *dev)
+{
+ if (!ocrdma_dbgfs_dir)
+ return;
+
+ /* Create post stats base dir */
+ dev->dir = debugfs_create_dir(dev->ibdev.name, ocrdma_dbgfs_dir);
+ if (!dev->dir)
+ goto err;
+
+ dev->rsrc_stats.type = OCRDMA_RSRC_STATS;
+ dev->rsrc_stats.dev = dev;
+ if (!debugfs_create_file("resource_stats", S_IRUSR, dev->dir,
+ &dev->rsrc_stats, &ocrdma_dbg_ops))
+ goto err;
+
+ dev->rx_stats.type = OCRDMA_RXSTATS;
+ dev->rx_stats.dev = dev;
+ if (!debugfs_create_file("rx_stats", S_IRUSR, dev->dir,
+ &dev->rx_stats, &ocrdma_dbg_ops))
+ goto err;
+
+ dev->wqe_stats.type = OCRDMA_WQESTATS;
+ dev->wqe_stats.dev = dev;
+ if (!debugfs_create_file("wqe_stats", S_IRUSR, dev->dir,
+ &dev->wqe_stats, &ocrdma_dbg_ops))
+ goto err;
+
+ dev->tx_stats.type = OCRDMA_TXSTATS;
+ dev->tx_stats.dev = dev;
+ if (!debugfs_create_file("tx_stats", S_IRUSR, dev->dir,
+ &dev->tx_stats, &ocrdma_dbg_ops))
+ goto err;
+
+ dev->db_err_stats.type = OCRDMA_DB_ERRSTATS;
+ dev->db_err_stats.dev = dev;
+ if (!debugfs_create_file("db_err_stats", S_IRUSR, dev->dir,
+ &dev->db_err_stats, &ocrdma_dbg_ops))
+ goto err;
+
+
+ dev->tx_qp_err_stats.type = OCRDMA_TXQP_ERRSTATS;
+ dev->tx_qp_err_stats.dev = dev;
+ if (!debugfs_create_file("tx_qp_err_stats", S_IRUSR, dev->dir,
+ &dev->tx_qp_err_stats, &ocrdma_dbg_ops))
+ goto err;
+
+ dev->rx_qp_err_stats.type = OCRDMA_RXQP_ERRSTATS;
+ dev->rx_qp_err_stats.dev = dev;
+ if (!debugfs_create_file("rx_qp_err_stats", S_IRUSR, dev->dir,
+ &dev->rx_qp_err_stats, &ocrdma_dbg_ops))
+ goto err;
+
+
+ dev->tx_dbg_stats.type = OCRDMA_TX_DBG_STATS;
+ dev->tx_dbg_stats.dev = dev;
+ if (!debugfs_create_file("tx_dbg_stats", S_IRUSR, dev->dir,
+ &dev->tx_dbg_stats, &ocrdma_dbg_ops))
+ goto err;
+
+ dev->rx_dbg_stats.type = OCRDMA_RX_DBG_STATS;
+ dev->rx_dbg_stats.dev = dev;
+ if (!debugfs_create_file("rx_dbg_stats", S_IRUSR, dev->dir,
+ &dev->rx_dbg_stats, &ocrdma_dbg_ops))
+ goto err;
+
+ /* Now create dma_mem for stats mbx command */
+ if (!ocrdma_alloc_stats_mem(dev))
+ goto err;
+
+ mutex_init(&dev->stats_lock);
+
+ return;
+err:
+ ocrdma_release_stats_mem(dev);
+ debugfs_remove_recursive(dev->dir);
+ dev->dir = NULL;
+}
+
+void ocrdma_rem_port_stats(struct ocrdma_dev *dev)
+{
+ if (!dev->dir)
+ return;
+ mutex_destroy(&dev->stats_lock);
+ ocrdma_release_stats_mem(dev);
+ debugfs_remove(dev->dir);
+}
+
+void ocrdma_init_debugfs(void)
+{
+ /* Create base dir in debugfs root dir */
+ ocrdma_dbgfs_dir = debugfs_create_dir("ocrdma", NULL);
+}
+
+void ocrdma_rem_debugfs(void)
+{
+ debugfs_remove_recursive(ocrdma_dbgfs_dir);
+}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.h b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h
new file mode 100644
index 00000000000..5f5e20c46d7
--- /dev/null
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h
@@ -0,0 +1,54 @@
+/*******************************************************************
+ * This file is part of the Emulex RoCE Device Driver for *
+ * RoCE (RDMA over Converged Ethernet) adapters. *
+ * Copyright (C) 2008-2014 Emulex. All rights reserved. *
+ * EMULEX and SLI are trademarks of Emulex. *
+ * www.emulex.com *
+ * *
+ * This program is free software; you can redistribute it and/or *
+ * modify it under the terms of version 2 of the GNU General *
+ * Public License as published by the Free Software Foundation. *
+ * This program is distributed in the hope that it will be useful. *
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
+ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
+ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
+ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
+ * TO BE LEGALLY INVALID. See the GNU General Public License for *
+ * more details, a copy of which can be found in the file COPYING *
+ * included with this package. *
+ *
+ * Contact Information:
+ * linux-drivers@emulex.com
+ *
+ * Emulex
+ * 3333 Susan Street
+ * Costa Mesa, CA 92626
+ *******************************************************************/
+
+#ifndef __OCRDMA_STATS_H__
+#define __OCRDMA_STATS_H__
+
+#include <linux/debugfs.h>
+#include "ocrdma.h"
+#include "ocrdma_hw.h"
+
+#define OCRDMA_MAX_DBGFS_MEM 4096
+
+enum OCRDMA_STATS_TYPE {
+ OCRDMA_RSRC_STATS,
+ OCRDMA_RXSTATS,
+ OCRDMA_WQESTATS,
+ OCRDMA_TXSTATS,
+ OCRDMA_DB_ERRSTATS,
+ OCRDMA_RXQP_ERRSTATS,
+ OCRDMA_TXQP_ERRSTATS,
+ OCRDMA_TX_DBG_STATS,
+ OCRDMA_RX_DBG_STATS
+};
+
+void ocrdma_rem_debugfs(void);
+void ocrdma_init_debugfs(void);
+void ocrdma_rem_port_stats(struct ocrdma_dev *dev);
+void ocrdma_add_port_stats(struct ocrdma_dev *dev);
+
+#endif /* __OCRDMA_STATS_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 6e982bb43c3..edf6211d84b 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -53,7 +53,7 @@ int ocrdma_query_gid(struct ib_device *ibdev, u8 port,
dev = get_ocrdma_dev(ibdev);
memset(sgid, 0, sizeof(*sgid));
- if (index >= OCRDMA_MAX_SGID)
+ if (index > OCRDMA_MAX_SGID)
return -EINVAL;
memcpy(sgid, &dev->sgid_tbl[index], sizeof(*sgid));
@@ -89,7 +89,7 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr)
attr->max_cq = dev->attr.max_cq;
attr->max_cqe = dev->attr.max_cqe;
attr->max_mr = dev->attr.max_mr;
- attr->max_mw = 0;
+ attr->max_mw = dev->attr.max_mw;
attr->max_pd = dev->attr.max_pd;
attr->atomic_cap = 0;
attr->max_fmr = 0;
@@ -141,10 +141,9 @@ static inline void get_link_speed_and_width(struct ocrdma_dev *dev,
/* Unsupported */
*ib_speed = IB_SPEED_SDR;
*ib_width = IB_WIDTH_1X;
- };
+ }
}
-
int ocrdma_query_port(struct ib_device *ibdev,
u8 port, struct ib_port_attr *props)
{
@@ -176,7 +175,7 @@ int ocrdma_query_port(struct ib_device *ibdev,
props->port_cap_flags =
IB_PORT_CM_SUP |
IB_PORT_REINIT_SUP |
- IB_PORT_DEVICE_MGMT_SUP | IB_PORT_VENDOR_CLASS_SUP;
+ IB_PORT_DEVICE_MGMT_SUP | IB_PORT_VENDOR_CLASS_SUP | IB_PORT_IP_BASED_GIDS;
props->gid_tbl_len = OCRDMA_MAX_SGID;
props->pkey_tbl_len = 1;
props->bad_pkey_cntr = 0;
@@ -267,7 +266,7 @@ static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev,
if (udata && uctx) {
pd->dpp_enabled =
- dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY;
+ ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R;
pd->num_dpp_qp =
pd->dpp_enabled ? OCRDMA_PD_MAX_DPP_ENABLED_QP : 0;
}
@@ -726,10 +725,10 @@ static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
u32 num_pbes)
{
struct ocrdma_pbe *pbe;
- struct ib_umem_chunk *chunk;
+ struct scatterlist *sg;
struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table;
struct ib_umem *umem = mr->umem;
- int i, shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0;
+ int shift, pg_cnt, pages, pbe_cnt, entry, total_num_pbes = 0;
if (!mr->hwmr.num_pbes)
return;
@@ -739,39 +738,37 @@ static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
shift = ilog2(umem->page_size);
- list_for_each_entry(chunk, &umem->chunk_list, list) {
- /* get all the dma regions from the chunk. */
- for (i = 0; i < chunk->nmap; i++) {
- pages = sg_dma_len(&chunk->page_list[i]) >> shift;
- for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
- /* store the page address in pbe */
- pbe->pa_lo =
- cpu_to_le32(sg_dma_address
- (&chunk->page_list[i]) +
- (umem->page_size * pg_cnt));
- pbe->pa_hi =
- cpu_to_le32(upper_32_bits
- ((sg_dma_address
- (&chunk->page_list[i]) +
- umem->page_size * pg_cnt)));
- pbe_cnt += 1;
- total_num_pbes += 1;
- pbe++;
-
- /* if done building pbes, issue the mbx cmd. */
- if (total_num_pbes == num_pbes)
- return;
-
- /* if the given pbl is full storing the pbes,
- * move to next pbl.
- */
- if (pbe_cnt ==
- (mr->hwmr.pbl_size / sizeof(u64))) {
- pbl_tbl++;
- pbe = (struct ocrdma_pbe *)pbl_tbl->va;
- pbe_cnt = 0;
- }
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+ pages = sg_dma_len(sg) >> shift;
+ for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
+ /* store the page address in pbe */
+ pbe->pa_lo =
+ cpu_to_le32(sg_dma_address
+ (sg) +
+ (umem->page_size * pg_cnt));
+ pbe->pa_hi =
+ cpu_to_le32(upper_32_bits
+ ((sg_dma_address
+ (sg) +
+ umem->page_size * pg_cnt)));
+ pbe_cnt += 1;
+ total_num_pbes += 1;
+ pbe++;
+
+ /* if done building pbes, issue the mbx cmd. */
+ if (total_num_pbes == num_pbes)
+ return;
+
+ /* if the given pbl is full storing the pbes,
+ * move to next pbl.
+ */
+ if (pbe_cnt ==
+ (mr->hwmr.pbl_size / sizeof(u64))) {
+ pbl_tbl++;
+ pbe = (struct ocrdma_pbe *)pbl_tbl->va;
+ pbe_cnt = 0;
}
+
}
}
}
@@ -840,8 +837,7 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr)
status = ocrdma_mbx_dealloc_lkey(dev, mr->hwmr.fr_mr, mr->hwmr.lkey);
- if (mr->hwmr.fr_mr == 0)
- ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
+ ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
/* it could be user registered memory. */
if (mr->umem)
@@ -910,6 +906,7 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector,
spin_lock_init(&cq->comp_handler_lock);
INIT_LIST_HEAD(&cq->sq_head);
INIT_LIST_HEAD(&cq->rq_head);
+ cq->first_arm = true;
if (ib_ctx) {
uctx = get_ocrdma_ucontext(ib_ctx);
@@ -927,9 +924,7 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector,
goto ctx_err;
}
cq->phase = OCRDMA_CQE_VALID;
- cq->arm_needed = true;
dev->cq_tbl[cq->id] = cq;
-
return &cq->ibcq;
ctx_err:
@@ -952,15 +947,52 @@ int ocrdma_resize_cq(struct ib_cq *ibcq, int new_cnt,
return status;
}
+static void ocrdma_flush_cq(struct ocrdma_cq *cq)
+{
+ int cqe_cnt;
+ int valid_count = 0;
+ unsigned long flags;
+
+ struct ocrdma_dev *dev = get_ocrdma_dev(cq->ibcq.device);
+ struct ocrdma_cqe *cqe = NULL;
+
+ cqe = cq->va;
+ cqe_cnt = cq->cqe_cnt;
+
+ /* Last irq might have scheduled a polling thread
+ * sync-up with it before hard flushing.
+ */
+ spin_lock_irqsave(&cq->cq_lock, flags);
+ while (cqe_cnt) {
+ if (is_cqe_valid(cq, cqe))
+ valid_count++;
+ cqe++;
+ cqe_cnt--;
+ }
+ ocrdma_ring_cq_db(dev, cq->id, false, false, valid_count);
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+}
+
int ocrdma_destroy_cq(struct ib_cq *ibcq)
{
int status;
struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
+ struct ocrdma_eq *eq = NULL;
struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device);
int pdid = 0;
+ u32 irq, indx;
- status = ocrdma_mbx_destroy_cq(dev, cq);
+ dev->cq_tbl[cq->id] = NULL;
+ indx = ocrdma_get_eq_table_index(dev, cq->eqn);
+ if (indx == -EINVAL)
+ BUG();
+ eq = &dev->eq_tbl[indx];
+ irq = ocrdma_get_irq(dev, eq);
+ synchronize_irq(irq);
+ ocrdma_flush_cq(cq);
+
+ status = ocrdma_mbx_destroy_cq(dev, cq);
if (cq->ucontext) {
pdid = cq->ucontext->cntxt_pd->id;
ocrdma_del_mmap(cq->ucontext, (u64) cq->pa,
@@ -969,7 +1001,6 @@ int ocrdma_destroy_cq(struct ib_cq *ibcq)
ocrdma_get_db_addr(dev, pdid),
dev->nic_info.db_page_size);
}
- dev->cq_tbl[cq->id] = NULL;
kfree(cq);
return status;
@@ -1092,15 +1123,9 @@ static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp,
}
uresp.db_page_addr = usr_db;
uresp.db_page_size = dev->nic_info.db_page_size;
- if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
- uresp.db_sq_offset = OCRDMA_DB_GEN2_SQ_OFFSET;
- uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET;
- uresp.db_shift = 24;
- } else {
- uresp.db_sq_offset = OCRDMA_DB_SQ_OFFSET;
- uresp.db_rq_offset = OCRDMA_DB_RQ_OFFSET;
- uresp.db_shift = 16;
- }
+ uresp.db_sq_offset = OCRDMA_DB_GEN2_SQ_OFFSET;
+ uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET;
+ uresp.db_shift = OCRDMA_DB_RQ_SHIFT;
if (qp->dpp_enabled) {
uresp.dpp_credit = dpp_credit_lmt;
@@ -1132,7 +1157,7 @@ err:
static void ocrdma_set_qp_db(struct ocrdma_dev *dev, struct ocrdma_qp *qp,
struct ocrdma_pd *pd)
{
- if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
+ if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
qp->sq_db = dev->nic_info.db +
(pd->id * dev->nic_info.db_page_size) +
OCRDMA_DB_GEN2_SQ_OFFSET;
@@ -1182,7 +1207,6 @@ static void ocrdma_set_qp_init_params(struct ocrdma_qp *qp,
qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
}
-
static void ocrdma_store_gsi_qp_cq(struct ocrdma_dev *dev,
struct ib_qp_init_attr *attrs)
{
@@ -1268,17 +1292,6 @@ gen_err:
return ERR_PTR(status);
}
-
-static void ocrdma_flush_rq_db(struct ocrdma_qp *qp)
-{
- if (qp->db_cache) {
- u32 val = qp->rq.dbid | (qp->db_cache <<
- ocrdma_get_num_posted_shift(qp));
- iowrite32(val, qp->rq_db);
- qp->db_cache = 0;
- }
-}
-
int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask)
{
@@ -1296,9 +1309,7 @@ int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
*/
if (status < 0)
return status;
- status = ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask, old_qps);
- if (!status && attr_mask & IB_QP_STATE && attr->qp_state == IB_QPS_RTR)
- ocrdma_flush_rq_db(qp);
+ status = ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask);
return status;
}
@@ -1326,7 +1337,8 @@ int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
new_qps = old_qps;
spin_unlock_irqrestore(&qp->q_lock, flags);
- if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask)) {
+ if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask,
+ IB_LINK_LAYER_ETHERNET)) {
pr_err("%s(%d) invalid attribute mask=0x%x specified for\n"
"qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n",
__func__, dev->id, attr_mask, qp->id, ibqp->qp_type,
@@ -1415,7 +1427,7 @@ int ocrdma_query_qp(struct ib_qp *ibqp,
OCRDMA_QP_PARAMS_HOP_LMT_MASK) >>
OCRDMA_QP_PARAMS_HOP_LMT_SHIFT;
qp_attr->ah_attr.grh.traffic_class = (params.tclass_sq_psn &
- OCRDMA_QP_PARAMS_SQ_PSN_MASK) >>
+ OCRDMA_QP_PARAMS_TCLASS_MASK) >>
OCRDMA_QP_PARAMS_TCLASS_SHIFT;
qp_attr->ah_attr.ah_flags = IB_AH_GRH;
@@ -1509,7 +1521,7 @@ static void ocrdma_discard_cqes(struct ocrdma_qp *qp, struct ocrdma_cq *cq)
int discard_cnt = 0;
u32 cur_getp, stop_getp;
struct ocrdma_cqe *cqe;
- u32 qpn = 0;
+ u32 qpn = 0, wqe_idx = 0;
spin_lock_irqsave(&cq->cq_lock, cq_flags);
@@ -1538,24 +1550,29 @@ static void ocrdma_discard_cqes(struct ocrdma_qp *qp, struct ocrdma_cq *cq)
if (qpn == 0 || qpn != qp->id)
goto skip_cqe;
- /* mark cqe discarded so that it is not picked up later
- * in the poll_cq().
- */
- discard_cnt += 1;
- cqe->cmn.qpn = 0;
if (is_cqe_for_sq(cqe)) {
ocrdma_hwq_inc_tail(&qp->sq);
} else {
if (qp->srq) {
+ wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >>
+ OCRDMA_CQE_BUFTAG_SHIFT) &
+ qp->srq->rq.max_wqe_idx;
+ if (wqe_idx < 1)
+ BUG();
spin_lock_irqsave(&qp->srq->q_lock, flags);
ocrdma_hwq_inc_tail(&qp->srq->rq);
- ocrdma_srq_toggle_bit(qp->srq, cur_getp);
+ ocrdma_srq_toggle_bit(qp->srq, wqe_idx - 1);
spin_unlock_irqrestore(&qp->srq->q_lock, flags);
} else {
ocrdma_hwq_inc_tail(&qp->rq);
}
}
+ /* mark cqe discarded so that it is not picked up later
+ * in the poll_cq().
+ */
+ discard_cnt += 1;
+ cqe->cmn.qpn = 0;
skip_cqe:
cur_getp = (cur_getp + 1) % cq->max_hw_cqe;
} while (cur_getp != stop_getp);
@@ -1658,7 +1675,7 @@ static int ocrdma_copy_srq_uresp(struct ocrdma_dev *dev, struct ocrdma_srq *srq,
(srq->pd->id * dev->nic_info.db_page_size);
uresp.db_page_size = dev->nic_info.db_page_size;
uresp.num_rqe_allocated = srq->rq.max_cnt;
- if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
+ if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET;
uresp.db_shift = 24;
} else {
@@ -1981,9 +1998,7 @@ static int ocrdma_build_fr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
wqe_size = roundup(wqe_size, OCRDMA_WQE_ALIGN_BYTES);
- if ((wr->wr.fast_reg.page_list_len >
- qp->dev->attr.max_pages_per_frmr) ||
- (wr->wr.fast_reg.length > 0xffffffffULL))
+ if (wr->wr.fast_reg.page_list_len > qp->dev->attr.max_pages_per_frmr)
return -EINVAL;
hdr->cw |= (OCRDMA_FR_MR << OCRDMA_WQE_OPCODE_SHIFT);
@@ -2010,15 +2025,15 @@ static int ocrdma_build_fr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
fast_reg->num_sges = wr->wr.fast_reg.page_list_len;
fast_reg->size_sge =
get_encoded_page_size(1 << wr->wr.fast_reg.page_shift);
- mr = (struct ocrdma_mr *) (unsigned long) qp->dev->stag_arr[(hdr->lkey >> 8) &
- (OCRDMA_MAX_STAG - 1)];
+ mr = (struct ocrdma_mr *) (unsigned long)
+ qp->dev->stag_arr[(hdr->lkey >> 8) & (OCRDMA_MAX_STAG - 1)];
build_frmr_pbes(wr, mr->hwmr.pbl_table, &mr->hwmr);
return 0;
}
static void ocrdma_ring_sq_db(struct ocrdma_qp *qp)
{
- u32 val = qp->sq.dbid | (1 << 16);
+ u32 val = qp->sq.dbid | (1 << OCRDMA_DB_SQ_SHIFT);
iowrite32(val, qp->sq_db);
}
@@ -2123,12 +2138,9 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
static void ocrdma_ring_rq_db(struct ocrdma_qp *qp)
{
- u32 val = qp->rq.dbid | (1 << ocrdma_get_num_posted_shift(qp));
+ u32 val = qp->rq.dbid | (1 << OCRDMA_DB_RQ_SHIFT);
- if (qp->state != OCRDMA_QPS_INIT)
- iowrite32(val, qp->rq_db);
- else
- qp->db_cache++;
+ iowrite32(val, qp->rq_db);
}
static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe, struct ib_recv_wr *wr,
@@ -2214,7 +2226,7 @@ static int ocrdma_srq_get_idx(struct ocrdma_srq *srq)
if (row == srq->bit_fields_len)
BUG();
- return indx;
+ return indx + 1; /* Use from index 1 */
}
static void ocrdma_ring_srq_db(struct ocrdma_srq *srq)
@@ -2331,7 +2343,7 @@ static enum ib_wc_status ocrdma_to_ibwc_err(u16 status)
default:
ibwc_status = IB_WC_GENERAL_ERR;
break;
- };
+ }
return ibwc_status;
}
@@ -2370,7 +2382,7 @@ static void ocrdma_update_wc(struct ocrdma_qp *qp, struct ib_wc *ibwc,
pr_err("%s() invalid opcode received = 0x%x\n",
__func__, hdr->cw & OCRDMA_WQE_OPCODE_MASK);
break;
- };
+ }
}
static void ocrdma_set_cqe_status_flushed(struct ocrdma_qp *qp,
@@ -2551,10 +2563,13 @@ static void ocrdma_update_free_srq_cqe(struct ib_wc *ibwc,
srq = get_ocrdma_srq(qp->ibqp.srq);
wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >>
- OCRDMA_CQE_BUFTAG_SHIFT) & srq->rq.max_wqe_idx;
+ OCRDMA_CQE_BUFTAG_SHIFT) & srq->rq.max_wqe_idx;
+ if (wqe_idx < 1)
+ BUG();
+
ibwc->wr_id = srq->rqe_wr_id_tbl[wqe_idx];
spin_lock_irqsave(&srq->q_lock, flags);
- ocrdma_srq_toggle_bit(srq, wqe_idx);
+ ocrdma_srq_toggle_bit(srq, wqe_idx - 1);
spin_unlock_irqrestore(&srq->q_lock, flags);
ocrdma_hwq_inc_tail(&srq->rq);
}
@@ -2706,10 +2721,18 @@ expand_cqe:
}
stop_cqe:
cq->getp = cur_getp;
- if (polled_hw_cqes || expand || stop) {
- ocrdma_ring_cq_db(dev, cq->id, cq->armed, cq->solicited,
+ if (cq->deferred_arm) {
+ ocrdma_ring_cq_db(dev, cq->id, true, cq->deferred_sol,
polled_hw_cqes);
+ cq->deferred_arm = false;
+ cq->deferred_sol = false;
+ } else {
+ /* We need to pop the CQE. No need to arm */
+ ocrdma_ring_cq_db(dev, cq->id, false, cq->deferred_sol,
+ polled_hw_cqes);
+ cq->deferred_sol = false;
}
+
return i;
}
@@ -2781,30 +2804,28 @@ int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags)
struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device);
u16 cq_id;
- u16 cur_getp;
- struct ocrdma_cqe *cqe;
unsigned long flags;
+ bool arm_needed = false, sol_needed = false;
cq_id = cq->id;
spin_lock_irqsave(&cq->cq_lock, flags);
if (cq_flags & IB_CQ_NEXT_COMP || cq_flags & IB_CQ_SOLICITED)
- cq->armed = true;
+ arm_needed = true;
if (cq_flags & IB_CQ_SOLICITED)
- cq->solicited = true;
-
- cur_getp = cq->getp;
- cqe = cq->va + cur_getp;
+ sol_needed = true;
- /* check whether any valid cqe exist or not, if not then safe to
- * arm. If cqe is not yet consumed, then let it get consumed and then
- * we arm it to avoid false interrupts.
- */
- if (!is_cqe_valid(cq, cqe) || cq->arm_needed) {
- cq->arm_needed = false;
- ocrdma_ring_cq_db(dev, cq_id, cq->armed, cq->solicited, 0);
+ if (cq->first_arm) {
+ ocrdma_ring_cq_db(dev, cq_id, arm_needed, sol_needed, 0);
+ cq->first_arm = false;
+ goto skip_defer;
}
+ cq->deferred_arm = true;
+
+skip_defer:
+ cq->deferred_sol = sol_needed;
spin_unlock_irqrestore(&cq->cq_lock, flags);
+
return 0;
}
@@ -2839,7 +2860,8 @@ struct ib_mr *ocrdma_alloc_frmr(struct ib_pd *ibpd, int max_page_list_len)
goto mbx_err;
mr->ibmr.rkey = mr->hwmr.lkey;
mr->ibmr.lkey = mr->hwmr.lkey;
- dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] = (unsigned long) mr;
+ dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] =
+ (unsigned long) mr;
return &mr->ibmr;
mbx_err:
ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index 1946101419a..c00ae093b6f 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -868,8 +868,10 @@ struct qib_devdata {
/* last buffer for user use */
u32 lastctxt_piobuf;
- /* saturating counter of (non-port-specific) device interrupts */
- u32 int_counter;
+ /* reset value */
+ u64 z_int_counter;
+ /* percpu intcounter */
+ u64 __percpu *int_counter;
/* pio bufs allocated per ctxt */
u32 pbufsctxt;
@@ -1184,7 +1186,7 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *);
void qib_set_ctxtcnt(struct qib_devdata *);
int qib_create_ctxts(struct qib_devdata *dd);
struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32, int);
-void qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8);
+int qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8);
void qib_free_ctxtdata(struct qib_devdata *, struct qib_ctxtdata *);
u32 qib_kreceive(struct qib_ctxtdata *, u32 *, u32 *);
@@ -1449,6 +1451,10 @@ void qib_nomsi(struct qib_devdata *);
void qib_nomsix(struct qib_devdata *);
void qib_pcie_getcmd(struct qib_devdata *, u16 *, u8 *, u8 *);
void qib_pcie_reenable(struct qib_devdata *, u16, u8, u8);
+/* interrupts for device */
+u64 qib_int_counter(struct qib_devdata *);
+/* interrupt for all devices */
+u64 qib_sps_ints(void);
/*
* dma_addr wrappers - all 0's invalid for hw
diff --git a/drivers/infiniband/hw/qib/qib_diag.c b/drivers/infiniband/hw/qib/qib_diag.c
index 1686fd4bda8..5dfda4c5cc9 100644
--- a/drivers/infiniband/hw/qib/qib_diag.c
+++ b/drivers/infiniband/hw/qib/qib_diag.c
@@ -546,7 +546,7 @@ static ssize_t qib_diagpkt_write(struct file *fp,
size_t count, loff_t *off)
{
u32 __iomem *piobuf;
- u32 plen, clen, pbufn;
+ u32 plen, pbufn, maxlen_reserve;
struct qib_diag_xpkt dp;
u32 *tmpbuf = NULL;
struct qib_devdata *dd;
@@ -590,15 +590,20 @@ static ssize_t qib_diagpkt_write(struct file *fp,
}
ppd = &dd->pport[dp.port - 1];
- /* need total length before first word written */
- /* +1 word is for the qword padding */
- plen = sizeof(u32) + dp.len;
- clen = dp.len >> 2;
-
- if ((plen + 4) > ppd->ibmaxlen) {
+ /*
+ * need total length before first word written, plus 2 Dwords. One Dword
+ * is for padding so we get the full user data when not aligned on
+ * a word boundary. The other Dword is to make sure we have room for the
+ * ICRC which gets tacked on later.
+ */
+ maxlen_reserve = 2 * sizeof(u32);
+ if (dp.len > ppd->ibmaxlen - maxlen_reserve) {
ret = -EINVAL;
- goto bail; /* before writing pbc */
+ goto bail;
}
+
+ plen = sizeof(u32) + dp.len;
+
tmpbuf = vmalloc(plen);
if (!tmpbuf) {
qib_devinfo(dd->pcidev,
@@ -638,11 +643,11 @@ static ssize_t qib_diagpkt_write(struct file *fp,
*/
if (dd->flags & QIB_PIO_FLUSH_WC) {
qib_flush_wc();
- qib_pio_copy(piobuf + 2, tmpbuf, clen - 1);
+ qib_pio_copy(piobuf + 2, tmpbuf, plen - 1);
qib_flush_wc();
- __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1);
+ __raw_writel(tmpbuf[plen - 1], piobuf + plen + 1);
} else
- qib_pio_copy(piobuf + 2, tmpbuf, clen);
+ qib_pio_copy(piobuf + 2, tmpbuf, plen);
if (dd->flags & QIB_USE_SPCL_TRIG) {
u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023;
@@ -689,28 +694,23 @@ int qib_register_observer(struct qib_devdata *dd,
const struct diag_observer *op)
{
struct diag_observer_list_elt *olp;
- int ret = -EINVAL;
+ unsigned long flags;
if (!dd || !op)
- goto bail;
- ret = -ENOMEM;
+ return -EINVAL;
olp = vmalloc(sizeof *olp);
if (!olp) {
pr_err("vmalloc for observer failed\n");
- goto bail;
+ return -ENOMEM;
}
- if (olp) {
- unsigned long flags;
- spin_lock_irqsave(&dd->qib_diag_trans_lock, flags);
- olp->op = op;
- olp->next = dd->diag_observer_list;
- dd->diag_observer_list = olp;
- spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags);
- ret = 0;
- }
-bail:
- return ret;
+ spin_lock_irqsave(&dd->qib_diag_trans_lock, flags);
+ olp->op = op;
+ olp->next = dd->diag_observer_list;
+ dd->diag_observer_list = olp;
+ spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags);
+
+ return 0;
}
/* Remove all registered observers when device is closed */
diff --git a/drivers/infiniband/hw/qib/qib_dma.c b/drivers/infiniband/hw/qib/qib_dma.c
index 2920bb39a65..59fe092b4b0 100644
--- a/drivers/infiniband/hw/qib/qib_dma.c
+++ b/drivers/infiniband/hw/qib/qib_dma.c
@@ -108,6 +108,10 @@ static int qib_map_sg(struct ib_device *dev, struct scatterlist *sgl,
ret = 0;
break;
}
+ sg->dma_address = addr + sg->offset;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+ sg->dma_length = sg->length;
+#endif
}
return ret;
}
@@ -119,21 +123,6 @@ static void qib_unmap_sg(struct ib_device *dev,
BUG_ON(!valid_dma_direction(direction));
}
-static u64 qib_sg_dma_address(struct ib_device *dev, struct scatterlist *sg)
-{
- u64 addr = (u64) page_address(sg_page(sg));
-
- if (addr)
- addr += sg->offset;
- return addr;
-}
-
-static unsigned int qib_sg_dma_len(struct ib_device *dev,
- struct scatterlist *sg)
-{
- return sg->length;
-}
-
static void qib_sync_single_for_cpu(struct ib_device *dev, u64 addr,
size_t size, enum dma_data_direction dir)
{
@@ -173,8 +162,6 @@ struct ib_dma_mapping_ops qib_dma_mapping_ops = {
.unmap_page = qib_dma_unmap_page,
.map_sg = qib_map_sg,
.unmap_sg = qib_unmap_sg,
- .dma_address = qib_sg_dma_address,
- .dma_len = qib_sg_dma_len,
.sync_single_for_cpu = qib_sync_single_for_cpu,
.sync_single_for_device = qib_sync_single_for_device,
.alloc_coherent = qib_dma_alloc_coherent,
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index 275f247f9fc..b15e34eeef6 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -1459,7 +1459,7 @@ static int get_a_ctxt(struct file *fp, const struct qib_user_info *uinfo,
cused++;
else
cfree++;
- if (pusable && cfree && cused < inuse) {
+ if (cfree && cused < inuse) {
udd = dd;
inuse = cused;
}
@@ -1578,7 +1578,7 @@ static int do_qib_user_sdma_queue_create(struct file *fp)
struct qib_ctxtdata *rcd = fd->rcd;
struct qib_devdata *dd = rcd->dd;
- if (dd->flags & QIB_HAS_SEND_DMA)
+ if (dd->flags & QIB_HAS_SEND_DMA) {
fd->pq = qib_user_sdma_queue_create(&dd->pcidev->dev,
dd->unit,
@@ -1586,6 +1586,7 @@ static int do_qib_user_sdma_queue_create(struct file *fp)
fd->subctxt);
if (!fd->pq)
return -ENOMEM;
+ }
return 0;
}
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index f247fc6e618..cab610ccd50 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -105,6 +105,7 @@ static int create_file(const char *name, umode_t mode,
static ssize_t driver_stats_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
+ qib_stats.sps_ints = qib_sps_ints();
return simple_read_from_buffer(buf, count, ppos, &qib_stats,
sizeof qib_stats);
}
@@ -456,13 +457,13 @@ static int remove_file(struct dentry *parent, char *name)
spin_lock(&tmp->d_lock);
if (!(d_unhashed(tmp) && tmp->d_inode)) {
- dget_dlock(tmp);
__d_drop(tmp);
spin_unlock(&tmp->d_lock);
simple_unlink(parent->d_inode, tmp);
} else {
spin_unlock(&tmp->d_lock);
}
+ dput(tmp);
ret = 0;
bail:
@@ -491,6 +492,7 @@ static int remove_device_files(struct super_block *sb,
goto bail;
}
+ mutex_lock(&dir->d_inode->i_mutex);
remove_file(dir, "counters");
remove_file(dir, "counter_names");
remove_file(dir, "portcounter_names");
@@ -505,8 +507,10 @@ static int remove_device_files(struct super_block *sb,
}
}
remove_file(dir, "flash");
- d_delete(dir);
+ mutex_unlock(&dir->d_inode->i_mutex);
ret = simple_rmdir(root->d_inode, dir);
+ d_delete(dir);
+ dput(dir);
bail:
mutex_unlock(&root->d_inode->i_mutex);
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index 84e593d6007..d68266ac761 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -1634,9 +1634,7 @@ static irqreturn_t qib_6120intr(int irq, void *data)
goto bail;
}
- qib_stats.sps_ints++;
- if (dd->int_counter != (u32) -1)
- dd->int_counter++;
+ this_cpu_inc(*dd->int_counter);
if (unlikely(istat & (~QLOGIC_IB_I_BITSEXTANT |
QLOGIC_IB_I_GPIO | QLOGIC_IB_I_ERROR)))
@@ -1808,7 +1806,8 @@ static int qib_6120_setup_reset(struct qib_devdata *dd)
* isn't set.
*/
dd->flags &= ~(QIB_INITTED | QIB_PRESENT);
- dd->int_counter = 0; /* so we check interrupts work again */
+ /* so we check interrupts work again */
+ dd->z_int_counter = qib_int_counter(dd);
val = dd->control | QLOGIC_IB_C_RESET;
writeq(val, &dd->kregbase[kr_control]);
mb(); /* prevent compiler re-ordering around actual reset */
@@ -3266,7 +3265,9 @@ static int init_6120_variables(struct qib_devdata *dd)
dd->eep_st_masks[2].errs_to_log = ERR_MASK(ResetNegated);
- qib_init_pportdata(ppd, dd, 0, 1);
+ ret = qib_init_pportdata(ppd, dd, 0, 1);
+ if (ret)
+ goto bail;
ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
ppd->link_speed_supported = QIB_IB_SDR;
ppd->link_width_enabled = IB_WIDTH_4X;
diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c
index 454c2e7668f..7dec89fdc12 100644
--- a/drivers/infiniband/hw/qib/qib_iba7220.c
+++ b/drivers/infiniband/hw/qib/qib_iba7220.c
@@ -1962,10 +1962,7 @@ static irqreturn_t qib_7220intr(int irq, void *data)
goto bail;
}
- qib_stats.sps_ints++;
- if (dd->int_counter != (u32) -1)
- dd->int_counter++;
-
+ this_cpu_inc(*dd->int_counter);
if (unlikely(istat & (~QLOGIC_IB_I_BITSEXTANT |
QLOGIC_IB_I_GPIO | QLOGIC_IB_I_ERROR)))
unlikely_7220_intr(dd, istat);
@@ -2120,7 +2117,8 @@ static int qib_setup_7220_reset(struct qib_devdata *dd)
* isn't set.
*/
dd->flags &= ~(QIB_INITTED | QIB_PRESENT);
- dd->int_counter = 0; /* so we check interrupts work again */
+ /* so we check interrupts work again */
+ dd->z_int_counter = qib_int_counter(dd);
val = dd->control | QLOGIC_IB_C_RESET;
writeq(val, &dd->kregbase[kr_control]);
mb(); /* prevent compiler reordering around actual reset */
@@ -4061,7 +4059,9 @@ static int qib_init_7220_variables(struct qib_devdata *dd)
init_waitqueue_head(&cpspec->autoneg_wait);
INIT_DELAYED_WORK(&cpspec->autoneg_work, autoneg_7220_work);
- qib_init_pportdata(ppd, dd, 0, 1);
+ ret = qib_init_pportdata(ppd, dd, 0, 1);
+ if (ret)
+ goto bail;
ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
ppd->link_speed_supported = QIB_IB_SDR | QIB_IB_DDR;
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 016e7429adf..a7eb32517a0 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -2395,6 +2395,11 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd)
qib_write_kreg_port(ppd, krp_ibcctrl_a, ppd->cpspec->ibcctrl_a);
qib_write_kreg(dd, kr_scratch, 0ULL);
+ /* ensure previous Tx parameters are not still forced */
+ qib_write_kreg_port(ppd, krp_tx_deemph_override,
+ SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0,
+ reset_tx_deemphasis_override));
+
if (qib_compat_ddr_negotiate) {
ppd->cpspec->ibdeltainprog = 1;
ppd->cpspec->ibsymsnap = read_7322_creg32_port(ppd,
@@ -3110,9 +3115,7 @@ static irqreturn_t qib_7322intr(int irq, void *data)
goto bail;
}
- qib_stats.sps_ints++;
- if (dd->int_counter != (u32) -1)
- dd->int_counter++;
+ this_cpu_inc(*dd->int_counter);
/* handle "errors" of various kinds first, device ahead of port */
if (unlikely(istat & (~QIB_I_BITSEXTANT | QIB_I_GPIO |
@@ -3181,9 +3184,7 @@ static irqreturn_t qib_7322pintr(int irq, void *data)
*/
return IRQ_HANDLED;
- qib_stats.sps_ints++;
- if (dd->int_counter != (u32) -1)
- dd->int_counter++;
+ this_cpu_inc(*dd->int_counter);
/* Clear the interrupt bit we expect to be set. */
qib_write_kreg(dd, kr_intclear, ((1ULL << QIB_I_RCVAVAIL_LSB) |
@@ -3210,9 +3211,7 @@ static irqreturn_t qib_7322bufavail(int irq, void *data)
*/
return IRQ_HANDLED;
- qib_stats.sps_ints++;
- if (dd->int_counter != (u32) -1)
- dd->int_counter++;
+ this_cpu_inc(*dd->int_counter);
/* Clear the interrupt bit we expect to be set. */
qib_write_kreg(dd, kr_intclear, QIB_I_SPIOBUFAVAIL);
@@ -3243,9 +3242,7 @@ static irqreturn_t sdma_intr(int irq, void *data)
*/
return IRQ_HANDLED;
- qib_stats.sps_ints++;
- if (dd->int_counter != (u32) -1)
- dd->int_counter++;
+ this_cpu_inc(*dd->int_counter);
/* Clear the interrupt bit we expect to be set. */
qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ?
@@ -3272,9 +3269,7 @@ static irqreturn_t sdma_idle_intr(int irq, void *data)
*/
return IRQ_HANDLED;
- qib_stats.sps_ints++;
- if (dd->int_counter != (u32) -1)
- dd->int_counter++;
+ this_cpu_inc(*dd->int_counter);
/* Clear the interrupt bit we expect to be set. */
qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ?
@@ -3301,9 +3296,7 @@ static irqreturn_t sdma_progress_intr(int irq, void *data)
*/
return IRQ_HANDLED;
- qib_stats.sps_ints++;
- if (dd->int_counter != (u32) -1)
- dd->int_counter++;
+ this_cpu_inc(*dd->int_counter);
/* Clear the interrupt bit we expect to be set. */
qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ?
@@ -3331,9 +3324,7 @@ static irqreturn_t sdma_cleanup_intr(int irq, void *data)
*/
return IRQ_HANDLED;
- qib_stats.sps_ints++;
- if (dd->int_counter != (u32) -1)
- dd->int_counter++;
+ this_cpu_inc(*dd->int_counter);
/* Clear the interrupt bit we expect to be set. */
qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ?
@@ -3718,7 +3709,8 @@ static int qib_do_7322_reset(struct qib_devdata *dd)
dd->pport->cpspec->ibsymdelta = 0;
dd->pport->cpspec->iblnkerrdelta = 0;
dd->pport->cpspec->ibmalfdelta = 0;
- dd->int_counter = 0; /* so we check interrupts work again */
+ /* so we check interrupts work again */
+ dd->z_int_counter = qib_int_counter(dd);
/*
* Keep chip from being accessed until we are ready. Use
@@ -6190,21 +6182,20 @@ static int setup_txselect(const char *str, struct kernel_param *kp)
{
struct qib_devdata *dd;
unsigned long val;
- int ret;
-
+ char *n;
if (strlen(str) >= MAX_ATTEN_LEN) {
pr_info("txselect_values string too long\n");
return -ENOSPC;
}
- ret = kstrtoul(str, 0, &val);
- if (ret || val >= (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ +
+ val = simple_strtoul(str, &n, 0);
+ if (n == str || val >= (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ +
TXDDS_MFG_SZ)) {
pr_info("txselect_values must start with a number < %d\n",
TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + TXDDS_MFG_SZ);
- return ret ? ret : -EINVAL;
+ return -EINVAL;
}
-
strcpy(txselect_list, str);
+
list_for_each_entry(dd, &qib_dev_list, list)
if (dd->deviceid == PCI_DEVICE_ID_QLOGIC_IB_7322)
set_no_qsfp_atten(dd, 1);
@@ -6553,7 +6544,11 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
}
dd->num_pports++;
- qib_init_pportdata(ppd, dd, pidx, dd->num_pports);
+ ret = qib_init_pportdata(ppd, dd, pidx, dd->num_pports);
+ if (ret) {
+ dd->num_pports--;
+ goto bail;
+ }
ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X;
ppd->link_width_enabled = IB_WIDTH_4X;
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 24e802f4ea2..8d3c78ddc90 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -130,7 +130,6 @@ void qib_set_ctxtcnt(struct qib_devdata *dd)
int qib_create_ctxts(struct qib_devdata *dd)
{
unsigned i;
- int ret;
int local_node_id = pcibus_to_node(dd->pcidev->bus);
if (local_node_id < 0)
@@ -145,8 +144,7 @@ int qib_create_ctxts(struct qib_devdata *dd)
if (!dd->rcd) {
qib_dev_err(dd,
"Unable to allocate ctxtdata array, failing\n");
- ret = -ENOMEM;
- goto done;
+ return -ENOMEM;
}
/* create (one or more) kctxt */
@@ -163,15 +161,14 @@ int qib_create_ctxts(struct qib_devdata *dd)
if (!rcd) {
qib_dev_err(dd,
"Unable to allocate ctxtdata for Kernel ctxt, failing\n");
- ret = -ENOMEM;
- goto done;
+ kfree(dd->rcd);
+ dd->rcd = NULL;
+ return -ENOMEM;
}
rcd->pkeys[0] = QIB_DEFAULT_P_KEY;
rcd->seq_cnt = 1;
}
- ret = 0;
-done:
- return ret;
+ return 0;
}
/*
@@ -233,7 +230,7 @@ struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt,
/*
* Common code for initializing the physical port structure.
*/
-void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
+int qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
u8 hw_pidx, u8 port)
{
int size;
@@ -243,6 +240,7 @@ void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
spin_lock_init(&ppd->sdma_lock);
spin_lock_init(&ppd->lflags_lock);
+ spin_lock_init(&ppd->cc_shadow_lock);
init_waitqueue_head(&ppd->state_wait);
init_timer(&ppd->symerr_clear_timer);
@@ -250,8 +248,10 @@ void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
ppd->symerr_clear_timer.data = (unsigned long)ppd;
ppd->qib_wq = NULL;
-
- spin_lock_init(&ppd->cc_shadow_lock);
+ ppd->ibport_data.pmastats =
+ alloc_percpu(struct qib_pma_counters);
+ if (!ppd->ibport_data.pmastats)
+ return -ENOMEM;
if (qib_cc_table_size < IB_CCT_MIN_ENTRIES)
goto bail;
@@ -299,7 +299,7 @@ void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
goto bail_3;
}
- return;
+ return 0;
bail_3:
kfree(ppd->ccti_entries_shadow);
@@ -313,7 +313,7 @@ bail_1:
bail:
/* User is intentionally disabling the congestion control agent */
if (!qib_cc_table_size)
- return;
+ return 0;
if (qib_cc_table_size < IB_CCT_MIN_ENTRIES) {
qib_cc_table_size = 0;
@@ -324,7 +324,7 @@ bail:
qib_dev_err(dd, "Congestion Control Agent disabled for port %d\n",
port);
- return;
+ return 0;
}
static int init_pioavailregs(struct qib_devdata *dd)
@@ -525,6 +525,7 @@ static void enable_chip(struct qib_devdata *dd)
static void verify_interrupt(unsigned long opaque)
{
struct qib_devdata *dd = (struct qib_devdata *) opaque;
+ u64 int_counter;
if (!dd)
return; /* being torn down */
@@ -533,7 +534,8 @@ static void verify_interrupt(unsigned long opaque)
* If we don't have a lid or any interrupts, let the user know and
* don't bother checking again.
*/
- if (dd->int_counter == 0) {
+ int_counter = qib_int_counter(dd) - dd->z_int_counter;
+ if (int_counter == 0) {
if (!dd->f_intr_fallback(dd))
dev_err(&dd->pcidev->dev,
"No interrupts detected, not usable.\n");
@@ -633,6 +635,12 @@ wq_error:
return -ENOMEM;
}
+static void qib_free_pportdata(struct qib_pportdata *ppd)
+{
+ free_percpu(ppd->ibport_data.pmastats);
+ ppd->ibport_data.pmastats = NULL;
+}
+
/**
* qib_init - do the actual initialization sequence on the chip
* @dd: the qlogic_ib device
@@ -920,6 +928,7 @@ static void qib_shutdown_device(struct qib_devdata *dd)
destroy_workqueue(ppd->qib_wq);
ppd->qib_wq = NULL;
}
+ qib_free_pportdata(ppd);
}
qib_update_eeprom_log(dd);
@@ -1079,9 +1088,34 @@ void qib_free_devdata(struct qib_devdata *dd)
#ifdef CONFIG_DEBUG_FS
qib_dbg_ibdev_exit(&dd->verbs_dev);
#endif
+ free_percpu(dd->int_counter);
ib_dealloc_device(&dd->verbs_dev.ibdev);
}
+u64 qib_int_counter(struct qib_devdata *dd)
+{
+ int cpu;
+ u64 int_counter = 0;
+
+ for_each_possible_cpu(cpu)
+ int_counter += *per_cpu_ptr(dd->int_counter, cpu);
+ return int_counter;
+}
+
+u64 qib_sps_ints(void)
+{
+ unsigned long flags;
+ struct qib_devdata *dd;
+ u64 sps_ints = 0;
+
+ spin_lock_irqsave(&qib_devs_lock, flags);
+ list_for_each_entry(dd, &qib_dev_list, list) {
+ sps_ints += qib_int_counter(dd);
+ }
+ spin_unlock_irqrestore(&qib_devs_lock, flags);
+ return sps_ints;
+}
+
/*
* Allocate our primary per-unit data structure. Must be done via verbs
* allocator, because the verbs cleanup process both does cleanup and
@@ -1097,14 +1131,10 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
int ret;
dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra);
- if (!dd) {
- dd = ERR_PTR(-ENOMEM);
- goto bail;
- }
+ if (!dd)
+ return ERR_PTR(-ENOMEM);
-#ifdef CONFIG_DEBUG_FS
- qib_dbg_ibdev_init(&dd->verbs_dev);
-#endif
+ INIT_LIST_HEAD(&dd->list);
idr_preload(GFP_KERNEL);
spin_lock_irqsave(&qib_devs_lock, flags);
@@ -1121,11 +1151,13 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
if (ret < 0) {
qib_early_err(&pdev->dev,
"Could not allocate unit ID: error %d\n", -ret);
-#ifdef CONFIG_DEBUG_FS
- qib_dbg_ibdev_exit(&dd->verbs_dev);
-#endif
- ib_dealloc_device(&dd->verbs_dev.ibdev);
- dd = ERR_PTR(ret);
+ goto bail;
+ }
+ dd->int_counter = alloc_percpu(u64);
+ if (!dd->int_counter) {
+ ret = -ENOMEM;
+ qib_early_err(&pdev->dev,
+ "Could not allocate per-cpu int_counter\n");
goto bail;
}
@@ -1139,9 +1171,15 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
qib_early_err(&pdev->dev,
"Could not alloc cpulist info, cpu affinity might be wrong\n");
}
-
-bail:
+#ifdef CONFIG_DEBUG_FS
+ qib_dbg_ibdev_init(&dd->verbs_dev);
+#endif
return dd;
+bail:
+ if (!list_empty(&dd->list))
+ list_del_init(&dd->list);
+ ib_dealloc_device(&dd->verbs_dev.ibdev);
+ return ERR_PTR(ret);;
}
/*
@@ -1234,7 +1272,7 @@ static int qib_notify_dca(struct notifier_block *nb, unsigned long event,
* Do all the generic driver unit- and chip-independent memory
* allocation and initialization.
*/
-static int __init qlogic_ib_init(void)
+static int __init qib_ib_init(void)
{
int ret;
@@ -1278,12 +1316,12 @@ bail:
return ret;
}
-module_init(qlogic_ib_init);
+module_init(qib_ib_init);
/*
* Do the non-unit driver cleanup, memory free, etc. at unload.
*/
-static void __exit qlogic_ib_cleanup(void)
+static void __exit qib_ib_cleanup(void)
{
int ret;
@@ -1308,7 +1346,7 @@ static void __exit qlogic_ib_cleanup(void)
qib_dev_cleanup();
}
-module_exit(qlogic_ib_cleanup);
+module_exit(qib_ib_cleanup);
/* this can only be called after a successful initialization */
static void cleanup_device_data(struct qib_devdata *dd)
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index ccb119143d2..22c720e5740 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -1028,7 +1028,7 @@ static int set_pkeys(struct qib_devdata *dd, u8 port, u16 *pkeys)
event.event = IB_EVENT_PKEY_CHANGE;
event.device = &dd->verbs_dev.ibdev;
- event.element.port_num = 1;
+ event.element.port_num = port;
ib_dispatch_event(&event);
}
return 0;
@@ -1634,6 +1634,23 @@ static int pma_get_portcounters_cong(struct ib_pma_mad *pmp,
return reply((struct ib_smp *)pmp);
}
+static void qib_snapshot_pmacounters(
+ struct qib_ibport *ibp,
+ struct qib_pma_counters *pmacounters)
+{
+ struct qib_pma_counters *p;
+ int cpu;
+
+ memset(pmacounters, 0, sizeof(*pmacounters));
+ for_each_possible_cpu(cpu) {
+ p = per_cpu_ptr(ibp->pmastats, cpu);
+ pmacounters->n_unicast_xmit += p->n_unicast_xmit;
+ pmacounters->n_unicast_rcv += p->n_unicast_rcv;
+ pmacounters->n_multicast_xmit += p->n_multicast_xmit;
+ pmacounters->n_multicast_rcv += p->n_multicast_rcv;
+ }
+}
+
static int pma_get_portcounters_ext(struct ib_pma_mad *pmp,
struct ib_device *ibdev, u8 port)
{
@@ -1642,6 +1659,7 @@ static int pma_get_portcounters_ext(struct ib_pma_mad *pmp,
struct qib_ibport *ibp = to_iport(ibdev, port);
struct qib_pportdata *ppd = ppd_from_ibp(ibp);
u64 swords, rwords, spkts, rpkts, xwait;
+ struct qib_pma_counters pma;
u8 port_select = p->port_select;
memset(pmp->data, 0, sizeof(pmp->data));
@@ -1664,10 +1682,17 @@ static int pma_get_portcounters_ext(struct ib_pma_mad *pmp,
p->port_rcv_data = cpu_to_be64(rwords);
p->port_xmit_packets = cpu_to_be64(spkts);
p->port_rcv_packets = cpu_to_be64(rpkts);
- p->port_unicast_xmit_packets = cpu_to_be64(ibp->n_unicast_xmit);
- p->port_unicast_rcv_packets = cpu_to_be64(ibp->n_unicast_rcv);
- p->port_multicast_xmit_packets = cpu_to_be64(ibp->n_multicast_xmit);
- p->port_multicast_rcv_packets = cpu_to_be64(ibp->n_multicast_rcv);
+
+ qib_snapshot_pmacounters(ibp, &pma);
+
+ p->port_unicast_xmit_packets = cpu_to_be64(pma.n_unicast_xmit
+ - ibp->z_unicast_xmit);
+ p->port_unicast_rcv_packets = cpu_to_be64(pma.n_unicast_rcv
+ - ibp->z_unicast_rcv);
+ p->port_multicast_xmit_packets = cpu_to_be64(pma.n_multicast_xmit
+ - ibp->z_multicast_xmit);
+ p->port_multicast_rcv_packets = cpu_to_be64(pma.n_multicast_rcv
+ - ibp->z_multicast_rcv);
bail:
return reply((struct ib_smp *) pmp);
@@ -1795,6 +1820,7 @@ static int pma_set_portcounters_ext(struct ib_pma_mad *pmp,
struct qib_ibport *ibp = to_iport(ibdev, port);
struct qib_pportdata *ppd = ppd_from_ibp(ibp);
u64 swords, rwords, spkts, rpkts, xwait;
+ struct qib_pma_counters pma;
qib_snapshot_counters(ppd, &swords, &rwords, &spkts, &rpkts, &xwait);
@@ -1810,17 +1836,19 @@ static int pma_set_portcounters_ext(struct ib_pma_mad *pmp,
if (p->counter_select & IB_PMA_SELX_PORT_RCV_PACKETS)
ibp->z_port_rcv_packets = rpkts;
+ qib_snapshot_pmacounters(ibp, &pma);
+
if (p->counter_select & IB_PMA_SELX_PORT_UNI_XMIT_PACKETS)
- ibp->n_unicast_xmit = 0;
+ ibp->z_unicast_xmit = pma.n_unicast_xmit;
if (p->counter_select & IB_PMA_SELX_PORT_UNI_RCV_PACKETS)
- ibp->n_unicast_rcv = 0;
+ ibp->z_unicast_rcv = pma.n_unicast_rcv;
if (p->counter_select & IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS)
- ibp->n_multicast_xmit = 0;
+ ibp->z_multicast_xmit = pma.n_multicast_xmit;
if (p->counter_select & IB_PMA_SELX_PORT_MULTI_RCV_PACKETS)
- ibp->n_multicast_rcv = 0;
+ ibp->z_multicast_rcv = pma.n_multicast_rcv;
return pma_get_portcounters_ext(pmp, ibdev, port);
}
diff --git a/drivers/infiniband/hw/qib/qib_mad.h b/drivers/infiniband/hw/qib/qib_mad.h
index 28874f8606f..941d4d50d8e 100644
--- a/drivers/infiniband/hw/qib/qib_mad.h
+++ b/drivers/infiniband/hw/qib/qib_mad.h
@@ -54,7 +54,7 @@ struct ib_node_info {
__be32 revision;
u8 local_port_num;
u8 vendor_id[3];
-} __attribute__ ((packed));
+} __packed;
struct ib_mad_notice_attr {
u8 generic_type;
@@ -73,7 +73,7 @@ struct ib_mad_notice_attr {
__be16 reserved;
__be16 lid; /* where violation happened */
u8 port_num; /* where violation happened */
- } __attribute__ ((packed)) ntc_129_131;
+ } __packed ntc_129_131;
struct {
__be16 reserved;
@@ -83,14 +83,14 @@ struct ib_mad_notice_attr {
__be32 new_cap_mask; /* new capability mask */
u8 reserved3;
u8 change_flags; /* low 3 bits only */
- } __attribute__ ((packed)) ntc_144;
+ } __packed ntc_144;
struct {
__be16 reserved;
__be16 lid; /* lid where sys guid changed */
__be16 reserved2;
__be64 new_sys_guid;
- } __attribute__ ((packed)) ntc_145;
+ } __packed ntc_145;
struct {
__be16 reserved;
@@ -104,7 +104,7 @@ struct ib_mad_notice_attr {
u8 reserved3;
u8 dr_trunc_hop;
u8 dr_rtn_path[30];
- } __attribute__ ((packed)) ntc_256;
+ } __packed ntc_256;
struct {
__be16 reserved;
@@ -115,7 +115,7 @@ struct ib_mad_notice_attr {
__be32 qp2; /* high 8 bits reserved */
union ib_gid gid1;
union ib_gid gid2;
- } __attribute__ ((packed)) ntc_257_258;
+ } __packed ntc_257_258;
} details;
};
@@ -209,7 +209,7 @@ struct ib_pma_portcounters_cong {
__be64 port_rcv_packets;
__be64 port_xmit_wait;
__be64 port_adr_events;
-} __attribute__ ((packed));
+} __packed;
#define IB_PMA_CONG_HW_CONTROL_TIMER 0x00
#define IB_PMA_CONG_HW_CONTROL_SAMPLE 0x01
diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c
index e6687ded821..9bbb55347cc 100644
--- a/drivers/infiniband/hw/qib/qib_mr.c
+++ b/drivers/infiniband/hw/qib/qib_mr.c
@@ -232,8 +232,8 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
{
struct qib_mr *mr;
struct ib_umem *umem;
- struct ib_umem_chunk *chunk;
- int n, m, i;
+ struct scatterlist *sg;
+ int n, m, entry;
struct ib_mr *ret;
if (length == 0) {
@@ -246,9 +246,7 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (IS_ERR(umem))
return (void *) umem;
- n = 0;
- list_for_each_entry(chunk, &umem->chunk_list, list)
- n += chunk->nents;
+ n = umem->nmap;
mr = alloc_mr(n, pd);
if (IS_ERR(mr)) {
@@ -268,11 +266,10 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mr->mr.page_shift = ilog2(umem->page_size);
m = 0;
n = 0;
- list_for_each_entry(chunk, &umem->chunk_list, list) {
- for (i = 0; i < chunk->nents; i++) {
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
void *vaddr;
- vaddr = page_address(sg_page(&chunk->page_list[i]));
+ vaddr = page_address(sg_page(sg));
if (!vaddr) {
ret = ERR_PTR(-EINVAL);
goto bail;
@@ -284,7 +281,6 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
m++;
n = 0;
}
- }
}
ret = &mr->ibmr;
diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c
index 3f14009fb66..61a0046efb7 100644
--- a/drivers/infiniband/hw/qib/qib_pcie.c
+++ b/drivers/infiniband/hw/qib/qib_pcie.c
@@ -51,8 +51,8 @@
* file calls, even though this violates some
* expectations of harmlessness.
*/
-static int qib_tune_pcie_caps(struct qib_devdata *);
-static int qib_tune_pcie_coalesce(struct qib_devdata *);
+static void qib_tune_pcie_caps(struct qib_devdata *);
+static void qib_tune_pcie_coalesce(struct qib_devdata *);
/*
* Do all the common PCIe setup and initialization.
@@ -197,46 +197,47 @@ static void qib_msix_setup(struct qib_devdata *dd, int pos, u32 *msixcnt,
struct qib_msix_entry *qib_msix_entry)
{
int ret;
- u32 tabsize = 0;
- u16 msix_flags;
+ int nvec = *msixcnt;
struct msix_entry *msix_entry;
int i;
+ ret = pci_msix_vec_count(dd->pcidev);
+ if (ret < 0)
+ goto do_intx;
+
+ nvec = min(nvec, ret);
+
/* We can't pass qib_msix_entry array to qib_msix_setup
* so use a dummy msix_entry array and copy the allocated
* irq back to the qib_msix_entry array. */
- msix_entry = kmalloc(*msixcnt * sizeof(*msix_entry), GFP_KERNEL);
- if (!msix_entry) {
- ret = -ENOMEM;
+ msix_entry = kmalloc(nvec * sizeof(*msix_entry), GFP_KERNEL);
+ if (!msix_entry)
goto do_intx;
- }
- for (i = 0; i < *msixcnt; i++)
+
+ for (i = 0; i < nvec; i++)
msix_entry[i] = qib_msix_entry[i].msix;
- pci_read_config_word(dd->pcidev, pos + PCI_MSIX_FLAGS, &msix_flags);
- tabsize = 1 + (msix_flags & PCI_MSIX_FLAGS_QSIZE);
- if (tabsize > *msixcnt)
- tabsize = *msixcnt;
- ret = pci_enable_msix(dd->pcidev, msix_entry, tabsize);
- if (ret > 0) {
- tabsize = ret;
- ret = pci_enable_msix(dd->pcidev, msix_entry, tabsize);
- }
-do_intx:
- if (ret) {
- qib_dev_err(dd,
- "pci_enable_msix %d vectors failed: %d, falling back to INTx\n",
- tabsize, ret);
- tabsize = 0;
- }
- for (i = 0; i < tabsize; i++)
+ ret = pci_enable_msix_range(dd->pcidev, msix_entry, 1, nvec);
+ if (ret < 0)
+ goto free_msix_entry;
+ else
+ nvec = ret;
+
+ for (i = 0; i < nvec; i++)
qib_msix_entry[i].msix = msix_entry[i];
+
kfree(msix_entry);
- *msixcnt = tabsize;
+ *msixcnt = nvec;
+ return;
- if (ret)
- qib_enable_intx(dd->pcidev);
+free_msix_entry:
+ kfree(msix_entry);
+do_intx:
+ qib_dev_err(dd, "pci_enable_msix_range %d vectors failed: %d, "
+ "falling back to INTx\n", nvec, ret);
+ *msixcnt = 0;
+ qib_enable_intx(dd->pcidev);
}
/**
@@ -476,30 +477,6 @@ void qib_pcie_reenable(struct qib_devdata *dd, u16 cmd, u8 iline, u8 cline)
"pci_enable_device failed after reset: %d\n", r);
}
-/* code to adjust PCIe capabilities. */
-
-static int fld2val(int wd, int mask)
-{
- int lsbmask;
-
- if (!mask)
- return 0;
- wd &= mask;
- lsbmask = mask ^ (mask & (mask - 1));
- wd /= lsbmask;
- return wd;
-}
-
-static int val2fld(int wd, int mask)
-{
- int lsbmask;
-
- if (!mask)
- return 0;
- lsbmask = mask ^ (mask & (mask - 1));
- wd *= lsbmask;
- return wd;
-}
static int qib_pcie_coalesce;
module_param_named(pcie_coalesce, qib_pcie_coalesce, int, S_IRUGO);
@@ -511,7 +488,7 @@ MODULE_PARM_DESC(pcie_coalesce, "tune PCIe colescing on some Intel chipsets");
* of these chipsets, with some BIOS settings, and enabling it on those
* systems may result in the system crashing, and/or data corruption.
*/
-static int qib_tune_pcie_coalesce(struct qib_devdata *dd)
+static void qib_tune_pcie_coalesce(struct qib_devdata *dd)
{
int r;
struct pci_dev *parent;
@@ -519,18 +496,18 @@ static int qib_tune_pcie_coalesce(struct qib_devdata *dd)
u32 mask, bits, val;
if (!qib_pcie_coalesce)
- return 0;
+ return;
/* Find out supported and configured values for parent (root) */
parent = dd->pcidev->bus->self;
if (parent->bus->parent) {
qib_devinfo(dd->pcidev, "Parent not root\n");
- return 1;
+ return;
}
if (!pci_is_pcie(parent))
- return 1;
+ return;
if (parent->vendor != 0x8086)
- return 1;
+ return;
/*
* - bit 12: Max_rdcmp_Imt_EN: need to set to 1
@@ -563,13 +540,12 @@ static int qib_tune_pcie_coalesce(struct qib_devdata *dd)
mask = (3U << 24) | (7U << 10);
} else {
/* not one of the chipsets that we know about */
- return 1;
+ return;
}
pci_read_config_dword(parent, 0x48, &val);
val &= ~mask;
val |= bits;
r = pci_write_config_dword(parent, 0x48, val);
- return 0;
}
/*
@@ -580,55 +556,44 @@ static int qib_pcie_caps;
module_param_named(pcie_caps, qib_pcie_caps, int, S_IRUGO);
MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)");
-static int qib_tune_pcie_caps(struct qib_devdata *dd)
+static void qib_tune_pcie_caps(struct qib_devdata *dd)
{
- int ret = 1; /* Assume the worst */
struct pci_dev *parent;
- u16 pcaps, pctl, ecaps, ectl;
- int rc_sup, ep_sup;
- int rc_cur, ep_cur;
+ u16 rc_mpss, rc_mps, ep_mpss, ep_mps;
+ u16 rc_mrrs, ep_mrrs, max_mrrs;
/* Find out supported and configured values for parent (root) */
parent = dd->pcidev->bus->self;
- if (parent->bus->parent) {
+ if (!pci_is_root_bus(parent->bus)) {
qib_devinfo(dd->pcidev, "Parent not root\n");
- goto bail;
+ return;
}
if (!pci_is_pcie(parent) || !pci_is_pcie(dd->pcidev))
- goto bail;
- pcie_capability_read_word(parent, PCI_EXP_DEVCAP, &pcaps);
- pcie_capability_read_word(parent, PCI_EXP_DEVCTL, &pctl);
+ return;
+
+ rc_mpss = parent->pcie_mpss;
+ rc_mps = ffs(pcie_get_mps(parent)) - 8;
/* Find out supported and configured values for endpoint (us) */
- pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCAP, &ecaps);
- pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &ectl);
+ ep_mpss = dd->pcidev->pcie_mpss;
+ ep_mps = ffs(pcie_get_mps(dd->pcidev)) - 8;
- ret = 0;
/* Find max payload supported by root, endpoint */
- rc_sup = fld2val(pcaps, PCI_EXP_DEVCAP_PAYLOAD);
- ep_sup = fld2val(ecaps, PCI_EXP_DEVCAP_PAYLOAD);
- if (rc_sup > ep_sup)
- rc_sup = ep_sup;
-
- rc_cur = fld2val(pctl, PCI_EXP_DEVCTL_PAYLOAD);
- ep_cur = fld2val(ectl, PCI_EXP_DEVCTL_PAYLOAD);
+ if (rc_mpss > ep_mpss)
+ rc_mpss = ep_mpss;
/* If Supported greater than limit in module param, limit it */
- if (rc_sup > (qib_pcie_caps & 7))
- rc_sup = qib_pcie_caps & 7;
+ if (rc_mpss > (qib_pcie_caps & 7))
+ rc_mpss = qib_pcie_caps & 7;
/* If less than (allowed, supported), bump root payload */
- if (rc_sup > rc_cur) {
- rc_cur = rc_sup;
- pctl = (pctl & ~PCI_EXP_DEVCTL_PAYLOAD) |
- val2fld(rc_cur, PCI_EXP_DEVCTL_PAYLOAD);
- pcie_capability_write_word(parent, PCI_EXP_DEVCTL, pctl);
+ if (rc_mpss > rc_mps) {
+ rc_mps = rc_mpss;
+ pcie_set_mps(parent, 128 << rc_mps);
}
/* If less than (allowed, supported), bump endpoint payload */
- if (rc_sup > ep_cur) {
- ep_cur = rc_sup;
- ectl = (ectl & ~PCI_EXP_DEVCTL_PAYLOAD) |
- val2fld(ep_cur, PCI_EXP_DEVCTL_PAYLOAD);
- pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL, ectl);
+ if (rc_mpss > ep_mps) {
+ ep_mps = rc_mpss;
+ pcie_set_mps(dd->pcidev, 128 << ep_mps);
}
/*
@@ -636,26 +601,22 @@ static int qib_tune_pcie_caps(struct qib_devdata *dd)
* No field for max supported, but PCIe spec limits it to 4096,
* which is code '5' (log2(4096) - 7)
*/
- rc_sup = 5;
- if (rc_sup > ((qib_pcie_caps >> 4) & 7))
- rc_sup = (qib_pcie_caps >> 4) & 7;
- rc_cur = fld2val(pctl, PCI_EXP_DEVCTL_READRQ);
- ep_cur = fld2val(ectl, PCI_EXP_DEVCTL_READRQ);
-
- if (rc_sup > rc_cur) {
- rc_cur = rc_sup;
- pctl = (pctl & ~PCI_EXP_DEVCTL_READRQ) |
- val2fld(rc_cur, PCI_EXP_DEVCTL_READRQ);
- pcie_capability_write_word(parent, PCI_EXP_DEVCTL, pctl);
+ max_mrrs = 5;
+ if (max_mrrs > ((qib_pcie_caps >> 4) & 7))
+ max_mrrs = (qib_pcie_caps >> 4) & 7;
+
+ max_mrrs = 128 << max_mrrs;
+ rc_mrrs = pcie_get_readrq(parent);
+ ep_mrrs = pcie_get_readrq(dd->pcidev);
+
+ if (max_mrrs > rc_mrrs) {
+ rc_mrrs = max_mrrs;
+ pcie_set_readrq(parent, rc_mrrs);
}
- if (rc_sup > ep_cur) {
- ep_cur = rc_sup;
- ectl = (ectl & ~PCI_EXP_DEVCTL_READRQ) |
- val2fld(ep_cur, PCI_EXP_DEVCTL_READRQ);
- pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL, ectl);
+ if (max_mrrs > ep_mrrs) {
+ ep_mrrs = max_mrrs;
+ pcie_set_readrq(dd->pcidev, ep_mrrs);
}
-bail:
- return ret;
}
/* End of PCIe capability tuning */
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index 3cca55b51e5..7fcc150d603 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -585,7 +585,7 @@ int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
- attr_mask))
+ attr_mask, IB_LINK_LAYER_UNSPECIFIED))
goto inval;
if (attr_mask & IB_QP_AV) {
@@ -985,7 +985,8 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
struct ib_qp *ret;
if (init_attr->cap.max_send_sge > ib_qib_max_sges ||
- init_attr->cap.max_send_wr > ib_qib_max_qp_wrs) {
+ init_attr->cap.max_send_wr > ib_qib_max_qp_wrs ||
+ init_attr->create_flags) {
ret = ERR_PTR(-EINVAL);
goto bail;
}
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 3ab341320ea..2f2501890c4 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -752,7 +752,7 @@ void qib_send_rc_ack(struct qib_qp *qp)
qib_flush_wc();
qib_sendbuf_done(dd, pbufn);
- ibp->n_unicast_xmit++;
+ this_cpu_inc(ibp->pmastats->n_unicast_xmit);
goto done;
queue_ack:
diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
index 357b6cfcd46..4c07a8b34ff 100644
--- a/drivers/infiniband/hw/qib/qib_ruc.c
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -703,6 +703,7 @@ void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr,
ohdr->bth[0] = cpu_to_be32(bth0);
ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
ohdr->bth[2] = cpu_to_be32(bth2);
+ this_cpu_inc(ibp->pmastats->n_unicast_xmit);
}
/**
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index d6c7fe7f88d..aaf7039f8ed 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -57,13 +57,20 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
struct qib_sge *sge;
struct ib_wc wc;
u32 length;
+ enum ib_qp_type sqptype, dqptype;
qp = qib_lookup_qpn(ibp, swqe->wr.wr.ud.remote_qpn);
if (!qp) {
ibp->n_pkt_drops++;
return;
}
- if (qp->ibqp.qp_type != sqp->ibqp.qp_type ||
+
+ sqptype = sqp->ibqp.qp_type == IB_QPT_GSI ?
+ IB_QPT_UD : sqp->ibqp.qp_type;
+ dqptype = qp->ibqp.qp_type == IB_QPT_GSI ?
+ IB_QPT_UD : qp->ibqp.qp_type;
+
+ if (dqptype != sqptype ||
!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) {
ibp->n_pkt_drops++;
goto drop;
@@ -273,11 +280,11 @@ int qib_make_ud_req(struct qib_qp *qp)
ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr;
if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE) {
if (ah_attr->dlid != QIB_PERMISSIVE_LID)
- ibp->n_multicast_xmit++;
+ this_cpu_inc(ibp->pmastats->n_multicast_xmit);
else
- ibp->n_unicast_xmit++;
+ this_cpu_inc(ibp->pmastats->n_unicast_xmit);
} else {
- ibp->n_unicast_xmit++;
+ this_cpu_inc(ibp->pmastats->n_unicast_xmit);
lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
if (unlikely(lid == ppd->lid)) {
/*
diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c
index d0a0ea0c14d..d2806cae234 100644
--- a/drivers/infiniband/hw/qib/qib_user_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_user_sdma.c
@@ -52,6 +52,17 @@
/* attempt to drain the queue for 5secs */
#define QIB_USER_SDMA_DRAIN_TIMEOUT 500
+/*
+ * track how many times a process open this driver.
+ */
+static struct rb_root qib_user_sdma_rb_root = RB_ROOT;
+
+struct qib_user_sdma_rb_node {
+ struct rb_node node;
+ int refcount;
+ pid_t pid;
+};
+
struct qib_user_sdma_pkt {
struct list_head list; /* list element */
@@ -120,15 +131,60 @@ struct qib_user_sdma_queue {
/* dma page table */
struct rb_root dma_pages_root;
+ struct qib_user_sdma_rb_node *sdma_rb_node;
+
/* protect everything above... */
struct mutex lock;
};
+static struct qib_user_sdma_rb_node *
+qib_user_sdma_rb_search(struct rb_root *root, pid_t pid)
+{
+ struct qib_user_sdma_rb_node *sdma_rb_node;
+ struct rb_node *node = root->rb_node;
+
+ while (node) {
+ sdma_rb_node = container_of(node,
+ struct qib_user_sdma_rb_node, node);
+ if (pid < sdma_rb_node->pid)
+ node = node->rb_left;
+ else if (pid > sdma_rb_node->pid)
+ node = node->rb_right;
+ else
+ return sdma_rb_node;
+ }
+ return NULL;
+}
+
+static int
+qib_user_sdma_rb_insert(struct rb_root *root, struct qib_user_sdma_rb_node *new)
+{
+ struct rb_node **node = &(root->rb_node);
+ struct rb_node *parent = NULL;
+ struct qib_user_sdma_rb_node *got;
+
+ while (*node) {
+ got = container_of(*node, struct qib_user_sdma_rb_node, node);
+ parent = *node;
+ if (new->pid < got->pid)
+ node = &((*node)->rb_left);
+ else if (new->pid > got->pid)
+ node = &((*node)->rb_right);
+ else
+ return 0;
+ }
+
+ rb_link_node(&new->node, parent, node);
+ rb_insert_color(&new->node, root);
+ return 1;
+}
+
struct qib_user_sdma_queue *
qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
{
struct qib_user_sdma_queue *pq =
kmalloc(sizeof(struct qib_user_sdma_queue), GFP_KERNEL);
+ struct qib_user_sdma_rb_node *sdma_rb_node;
if (!pq)
goto done;
@@ -138,6 +194,7 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
pq->num_pending = 0;
pq->num_sending = 0;
pq->added = 0;
+ pq->sdma_rb_node = NULL;
INIT_LIST_HEAD(&pq->sent);
spin_lock_init(&pq->sent_lock);
@@ -163,8 +220,30 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
pq->dma_pages_root = RB_ROOT;
+ sdma_rb_node = qib_user_sdma_rb_search(&qib_user_sdma_rb_root,
+ current->pid);
+ if (sdma_rb_node) {
+ sdma_rb_node->refcount++;
+ } else {
+ int ret;
+ sdma_rb_node = kmalloc(sizeof(
+ struct qib_user_sdma_rb_node), GFP_KERNEL);
+ if (!sdma_rb_node)
+ goto err_rb;
+
+ sdma_rb_node->refcount = 1;
+ sdma_rb_node->pid = current->pid;
+
+ ret = qib_user_sdma_rb_insert(&qib_user_sdma_rb_root,
+ sdma_rb_node);
+ BUG_ON(ret == 0);
+ }
+ pq->sdma_rb_node = sdma_rb_node;
+
goto done;
+err_rb:
+ dma_pool_destroy(pq->header_cache);
err_slab:
kmem_cache_destroy(pq->pkt_slab);
err_kfree:
@@ -594,8 +673,7 @@ static int qib_user_sdma_pin_pages(const struct qib_devdata *dd,
else
j = npages;
- ret = get_user_pages(current, current->mm, addr,
- j, 0, 1, pages, NULL);
+ ret = get_user_pages_fast(addr, j, 0, pages);
if (ret != j) {
i = 0;
j = ret;
@@ -1021,8 +1099,13 @@ void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq)
if (!pq)
return;
- kmem_cache_destroy(pq->pkt_slab);
+ pq->sdma_rb_node->refcount--;
+ if (pq->sdma_rb_node->refcount == 0) {
+ rb_erase(&pq->sdma_rb_node->node, &qib_user_sdma_rb_root);
+ kfree(pq->sdma_rb_node);
+ }
dma_pool_destroy(pq->header_cache);
+ kmem_cache_destroy(pq->pkt_slab);
kfree(pq);
}
@@ -1242,26 +1325,52 @@ static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd,
struct qib_user_sdma_queue *pq,
struct list_head *pktlist, int count)
{
- int ret = 0;
unsigned long flags;
if (unlikely(!(ppd->lflags & QIBL_LINKACTIVE)))
return -ECOMM;
- spin_lock_irqsave(&ppd->sdma_lock, flags);
-
- if (unlikely(!__qib_sdma_running(ppd))) {
- ret = -ECOMM;
- goto unlock;
+ /* non-blocking mode */
+ if (pq->sdma_rb_node->refcount > 1) {
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+ if (unlikely(!__qib_sdma_running(ppd))) {
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+ return -ECOMM;
+ }
+ pq->num_pending += count;
+ list_splice_tail_init(pktlist, &ppd->sdma_userpending);
+ qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending);
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+ return 0;
}
+ /* In this case, descriptors from this process are not
+ * linked to ppd pending queue, interrupt handler
+ * won't update this process, it is OK to directly
+ * modify without sdma lock.
+ */
+
+
pq->num_pending += count;
- list_splice_tail_init(pktlist, &ppd->sdma_userpending);
- qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending);
+ /*
+ * Blocking mode for single rail process, we must
+ * release/regain sdma_lock to give other process
+ * chance to make progress. This is important for
+ * performance.
+ */
+ do {
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+ if (unlikely(!__qib_sdma_running(ppd))) {
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+ return -ECOMM;
+ }
+ qib_user_sdma_send_desc(ppd, pktlist);
+ if (!list_empty(pktlist))
+ qib_sdma_make_progress(ppd);
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+ } while (!list_empty(pktlist));
-unlock:
- spin_unlock_irqrestore(&ppd->sdma_lock, flags);
- return ret;
+ return 0;
}
int qib_user_sdma_writev(struct qib_ctxtdata *rcd,
@@ -1291,14 +1400,11 @@ int qib_user_sdma_writev(struct qib_ctxtdata *rcd,
qib_user_sdma_queue_clean(ppd, pq);
while (dim) {
- int mxp = 8;
+ int mxp = 1;
int ndesc = 0;
- down_write(&current->mm->mmap_sem);
ret = qib_user_sdma_queue_pkts(dd, ppd, pq,
iov, dim, &list, &mxp, &ndesc);
- up_write(&current->mm->mmap_sem);
-
if (ret < 0)
goto done_unlock;
else {
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 092b0bb1bb7..9bcfbd84298 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -662,7 +662,7 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
mcast = qib_mcast_find(ibp, &hdr->u.l.grh.dgid);
if (mcast == NULL)
goto drop;
- ibp->n_multicast_rcv++;
+ this_cpu_inc(ibp->pmastats->n_multicast_rcv);
list_for_each_entry_rcu(p, &mcast->qp_list, list)
qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp);
/*
@@ -678,8 +678,8 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
&rcd->lookaside_qp->refcount))
wake_up(
&rcd->lookaside_qp->wait);
- rcd->lookaside_qp = NULL;
- }
+ rcd->lookaside_qp = NULL;
+ }
}
if (!rcd->lookaside_qp) {
qp = qib_lookup_qpn(ibp, qp_num);
@@ -689,7 +689,7 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
rcd->lookaside_qpn = qp_num;
} else
qp = rcd->lookaside_qp;
- ibp->n_unicast_rcv++;
+ this_cpu_inc(ibp->pmastats->n_unicast_rcv);
qib_qp_rcv(rcd, hdr, lnh == QIB_LRH_GRH, data, tlen, qp);
}
return;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 012e2c7575a..bfc8948fdd3 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -150,14 +150,14 @@ struct ib_reth {
__be64 vaddr;
__be32 rkey;
__be32 length;
-} __attribute__ ((packed));
+} __packed;
struct ib_atomic_eth {
__be32 vaddr[2]; /* unaligned so access as 2 32-bit words */
__be32 rkey;
__be64 swap_data;
__be64 compare_data;
-} __attribute__ ((packed));
+} __packed;
struct qib_other_headers {
__be32 bth[3];
@@ -178,7 +178,7 @@ struct qib_other_headers {
__be32 aeth;
struct ib_atomic_eth atomic_eth;
} u;
-} __attribute__ ((packed));
+} __packed;
/*
* Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes
@@ -195,12 +195,12 @@ struct qib_ib_header {
} l;
struct qib_other_headers oth;
} u;
-} __attribute__ ((packed));
+} __packed;
struct qib_pio_header {
__le32 pbc[2];
struct qib_ib_header hdr;
-} __attribute__ ((packed));
+} __packed;
/*
* There is one struct qib_mcast for each multicast GID.
@@ -664,6 +664,13 @@ struct qib_opcode_stats_perctx {
struct qib_opcode_stats stats[128];
};
+struct qib_pma_counters {
+ u64 n_unicast_xmit; /* total unicast packets sent */
+ u64 n_unicast_rcv; /* total unicast packets received */
+ u64 n_multicast_xmit; /* total multicast packets sent */
+ u64 n_multicast_rcv; /* total multicast packets received */
+};
+
struct qib_ibport {
struct qib_qp __rcu *qp0;
struct qib_qp __rcu *qp1;
@@ -680,10 +687,11 @@ struct qib_ibport {
__be64 mkey;
__be64 guids[QIB_GUIDS_PER_PORT - 1]; /* writable GUIDs */
u64 tid; /* TID for traps */
- u64 n_unicast_xmit; /* total unicast packets sent */
- u64 n_unicast_rcv; /* total unicast packets received */
- u64 n_multicast_xmit; /* total multicast packets sent */
- u64 n_multicast_rcv; /* total multicast packets received */
+ struct qib_pma_counters __percpu *pmastats;
+ u64 z_unicast_xmit; /* starting count for PMA */
+ u64 z_unicast_rcv; /* starting count for PMA */
+ u64 z_multicast_xmit; /* starting count for PMA */
+ u64 z_multicast_rcv; /* starting count for PMA */
u64 z_symbol_error_counter; /* starting count for PMA */
u64 z_link_error_recovery_counter; /* starting count for PMA */
u64 z_link_downed_counter; /* starting count for PMA */
diff --git a/drivers/infiniband/hw/usnic/Kconfig b/drivers/infiniband/hw/usnic/Kconfig
new file mode 100644
index 00000000000..29ab11c34f3
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/Kconfig
@@ -0,0 +1,10 @@
+config INFINIBAND_USNIC
+ tristate "Verbs support for Cisco VIC"
+ depends on NETDEVICES && ETHERNET && INET && PCI && INTEL_IOMMU
+ select ENIC
+ select NET_VENDOR_CISCO
+ select PCI_IOV
+ select INFINIBAND_USER_ACCESS
+ ---help---
+ This is a low-level driver for Cisco's Virtual Interface
+ Cards (VICs), including the VIC 1240 and 1280 cards.
diff --git a/drivers/infiniband/hw/usnic/Makefile b/drivers/infiniband/hw/usnic/Makefile
new file mode 100644
index 00000000000..99fb2db47cd
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/Makefile
@@ -0,0 +1,15 @@
+ccflags-y := -Idrivers/net/ethernet/cisco/enic
+
+obj-$(CONFIG_INFINIBAND_USNIC)+= usnic_verbs.o
+
+usnic_verbs-y=\
+usnic_fwd.o \
+usnic_transport.o \
+usnic_uiom.o \
+usnic_uiom_interval_tree.o \
+usnic_vnic.o \
+usnic_ib_main.o \
+usnic_ib_qp_grp.o \
+usnic_ib_sysfs.o \
+usnic_ib_verbs.o \
+usnic_debugfs.o \
diff --git a/drivers/infiniband/hw/usnic/usnic.h b/drivers/infiniband/hw/usnic/usnic.h
new file mode 100644
index 00000000000..5be13d8991b
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_H_
+#define USNIC_H_
+
+#define DRV_NAME "usnic_verbs"
+
+#define PCI_DEVICE_ID_CISCO_VIC_USPACE_NIC 0x00cf /* User space NIC */
+
+#define DRV_VERSION "1.0.3"
+#define DRV_RELDATE "December 19, 2013"
+
+#endif /* USNIC_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_abi.h b/drivers/infiniband/hw/usnic/usnic_abi.h
new file mode 100644
index 00000000000..04a66229584
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_abi.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+
+#ifndef USNIC_ABI_H
+#define USNIC_ABI_H
+
+/* ABI between userspace and kernel */
+#define USNIC_UVERBS_ABI_VERSION 4
+
+#define USNIC_QP_GRP_MAX_WQS 8
+#define USNIC_QP_GRP_MAX_RQS 8
+#define USNIC_QP_GRP_MAX_CQS 16
+
+enum usnic_transport_type {
+ USNIC_TRANSPORT_UNKNOWN = 0,
+ USNIC_TRANSPORT_ROCE_CUSTOM = 1,
+ USNIC_TRANSPORT_IPV4_UDP = 2,
+ USNIC_TRANSPORT_MAX = 3,
+};
+
+struct usnic_transport_spec {
+ enum usnic_transport_type trans_type;
+ union {
+ struct {
+ uint16_t port_num;
+ } usnic_roce;
+ struct {
+ uint32_t sock_fd;
+ } udp;
+ };
+};
+
+struct usnic_ib_create_qp_cmd {
+ struct usnic_transport_spec spec;
+};
+
+/*TODO: Future - usnic_modify_qp needs to pass in generic filters */
+struct usnic_ib_create_qp_resp {
+ u32 vfid;
+ u32 qp_grp_id;
+ u64 bar_bus_addr;
+ u32 bar_len;
+/*
+ * WQ, RQ, CQ are explicity specified bc exposing a generic resources inteface
+ * expands the scope of ABI to many files.
+ */
+ u32 wq_cnt;
+ u32 rq_cnt;
+ u32 cq_cnt;
+ u32 wq_idx[USNIC_QP_GRP_MAX_WQS];
+ u32 rq_idx[USNIC_QP_GRP_MAX_RQS];
+ u32 cq_idx[USNIC_QP_GRP_MAX_CQS];
+ u32 transport;
+ u32 reserved[9];
+};
+
+#endif /* USNIC_ABI_H */
diff --git a/drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h b/drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h
new file mode 100644
index 00000000000..39356726614
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_CMN_PKT_HDR_H
+#define USNIC_CMN_PKT_HDR_H
+
+#define USNIC_ROCE_ETHERTYPE (0x8915)
+#define USNIC_ROCE_GRH_VER (8)
+#define USNIC_PROTO_VER (1)
+#define USNIC_ROCE_GRH_VER_SHIFT (4)
+
+#endif /* USNIC_COMMON_PKT_HDR_H */
diff --git a/drivers/infiniband/hw/usnic/usnic_common_util.h b/drivers/infiniband/hw/usnic/usnic_common_util.h
new file mode 100644
index 00000000000..9d737ed5e55
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_common_util.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_CMN_UTIL_H
+#define USNIC_CMN_UTIL_H
+
+static inline void
+usnic_mac_to_gid(const char *const mac, char *raw_gid)
+{
+ raw_gid[0] = 0xfe;
+ raw_gid[1] = 0x80;
+ memset(&raw_gid[2], 0, 6);
+ raw_gid[8] = mac[0]^2;
+ raw_gid[9] = mac[1];
+ raw_gid[10] = mac[2];
+ raw_gid[11] = 0xff;
+ raw_gid[12] = 0xfe;
+ raw_gid[13] = mac[3];
+ raw_gid[14] = mac[4];
+ raw_gid[15] = mac[5];
+}
+
+static inline void
+usnic_mac_ip_to_gid(const char *const mac, const __be32 inaddr, char *raw_gid)
+{
+ raw_gid[0] = 0xfe;
+ raw_gid[1] = 0x80;
+ memset(&raw_gid[2], 0, 2);
+ memcpy(&raw_gid[4], &inaddr, 4);
+ raw_gid[8] = mac[0]^2;
+ raw_gid[9] = mac[1];
+ raw_gid[10] = mac[2];
+ raw_gid[11] = 0xff;
+ raw_gid[12] = 0xfe;
+ raw_gid[13] = mac[3];
+ raw_gid[14] = mac[4];
+ raw_gid[15] = mac[5];
+}
+
+static inline void
+usnic_write_gid_if_id_from_mac(char *mac, char *raw_gid)
+{
+ raw_gid[8] = mac[0]^2;
+ raw_gid[9] = mac[1];
+ raw_gid[10] = mac[2];
+ raw_gid[11] = 0xff;
+ raw_gid[12] = 0xfe;
+ raw_gid[13] = mac[3];
+ raw_gid[14] = mac[4];
+ raw_gid[15] = mac[5];
+}
+
+#endif /* USNIC_COMMON_UTIL_H */
diff --git a/drivers/infiniband/hw/usnic/usnic_debugfs.c b/drivers/infiniband/hw/usnic/usnic_debugfs.c
new file mode 100644
index 00000000000..5d13860161a
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_debugfs.c
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/debugfs.h>
+#include <linux/module.h>
+
+#include "usnic.h"
+#include "usnic_log.h"
+#include "usnic_debugfs.h"
+#include "usnic_ib_qp_grp.h"
+#include "usnic_transport.h"
+
+static struct dentry *debugfs_root;
+static struct dentry *flows_dentry;
+
+static ssize_t usnic_debugfs_buildinfo_read(struct file *f, char __user *data,
+ size_t count, loff_t *ppos)
+{
+ char buf[500];
+ int res;
+
+ if (*ppos > 0)
+ return 0;
+
+ res = scnprintf(buf, sizeof(buf),
+ "version: %s\n"
+ "build date: %s\n",
+ DRV_VERSION, DRV_RELDATE);
+
+ return simple_read_from_buffer(data, count, ppos, buf, res);
+}
+
+static const struct file_operations usnic_debugfs_buildinfo_ops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = usnic_debugfs_buildinfo_read
+};
+
+static ssize_t flowinfo_read(struct file *f, char __user *data,
+ size_t count, loff_t *ppos)
+{
+ struct usnic_ib_qp_grp_flow *qp_flow;
+ int n;
+ int left;
+ char *ptr;
+ char buf[512];
+
+ qp_flow = f->private_data;
+ ptr = buf;
+ left = count;
+
+ if (*ppos > 0)
+ return 0;
+
+ spin_lock(&qp_flow->qp_grp->lock);
+ n = scnprintf(ptr, left,
+ "QP Grp ID: %d Transport: %s ",
+ qp_flow->qp_grp->grp_id,
+ usnic_transport_to_str(qp_flow->trans_type));
+ UPDATE_PTR_LEFT(n, ptr, left);
+ if (qp_flow->trans_type == USNIC_TRANSPORT_ROCE_CUSTOM) {
+ n = scnprintf(ptr, left, "Port_Num:%hu\n",
+ qp_flow->usnic_roce.port_num);
+ UPDATE_PTR_LEFT(n, ptr, left);
+ } else if (qp_flow->trans_type == USNIC_TRANSPORT_IPV4_UDP) {
+ n = usnic_transport_sock_to_str(ptr, left,
+ qp_flow->udp.sock);
+ UPDATE_PTR_LEFT(n, ptr, left);
+ n = scnprintf(ptr, left, "\n");
+ UPDATE_PTR_LEFT(n, ptr, left);
+ }
+ spin_unlock(&qp_flow->qp_grp->lock);
+
+ return simple_read_from_buffer(data, count, ppos, buf, ptr - buf);
+}
+
+static const struct file_operations flowinfo_ops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = flowinfo_read,
+};
+
+void usnic_debugfs_init(void)
+{
+ debugfs_root = debugfs_create_dir(DRV_NAME, NULL);
+ if (IS_ERR(debugfs_root)) {
+ usnic_err("Failed to create debugfs root dir, check if debugfs is enabled in kernel configuration\n");
+ goto out_clear_root;
+ }
+
+ flows_dentry = debugfs_create_dir("flows", debugfs_root);
+ if (IS_ERR_OR_NULL(flows_dentry)) {
+ usnic_err("Failed to create debugfs flow dir with err %ld\n",
+ PTR_ERR(flows_dentry));
+ goto out_free_root;
+ }
+
+ debugfs_create_file("build-info", S_IRUGO, debugfs_root,
+ NULL, &usnic_debugfs_buildinfo_ops);
+ return;
+
+out_free_root:
+ debugfs_remove_recursive(debugfs_root);
+out_clear_root:
+ debugfs_root = NULL;
+}
+
+void usnic_debugfs_exit(void)
+{
+ if (!debugfs_root)
+ return;
+
+ debugfs_remove_recursive(debugfs_root);
+ debugfs_root = NULL;
+}
+
+void usnic_debugfs_flow_add(struct usnic_ib_qp_grp_flow *qp_flow)
+{
+ if (IS_ERR_OR_NULL(flows_dentry))
+ return;
+
+ scnprintf(qp_flow->dentry_name, sizeof(qp_flow->dentry_name),
+ "%u", qp_flow->flow->flow_id);
+ qp_flow->dbgfs_dentry = debugfs_create_file(qp_flow->dentry_name,
+ S_IRUGO,
+ flows_dentry,
+ qp_flow,
+ &flowinfo_ops);
+ if (IS_ERR_OR_NULL(qp_flow->dbgfs_dentry)) {
+ usnic_err("Failed to create dbg fs entry for flow %u\n",
+ qp_flow->flow->flow_id);
+ }
+}
+
+void usnic_debugfs_flow_remove(struct usnic_ib_qp_grp_flow *qp_flow)
+{
+ if (!IS_ERR_OR_NULL(qp_flow->dbgfs_dentry))
+ debugfs_remove(qp_flow->dbgfs_dentry);
+}
diff --git a/drivers/infiniband/hw/usnic/usnic_debugfs.h b/drivers/infiniband/hw/usnic/usnic_debugfs.h
new file mode 100644
index 00000000000..4087d24a88f
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_debugfs.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#ifndef USNIC_DEBUGFS_H_
+#define USNIC_DEBUGFS_H_
+
+#include "usnic_ib_qp_grp.h"
+
+void usnic_debugfs_init(void);
+
+void usnic_debugfs_exit(void);
+void usnic_debugfs_flow_add(struct usnic_ib_qp_grp_flow *qp_flow);
+void usnic_debugfs_flow_remove(struct usnic_ib_qp_grp_flow *qp_flow);
+
+#endif /*!USNIC_DEBUGFS_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.c b/drivers/infiniband/hw/usnic/usnic_fwd.c
new file mode 100644
index 00000000000..e3c9bd9d3ba
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_fwd.c
@@ -0,0 +1,350 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+
+#include "enic_api.h"
+#include "usnic_common_pkt_hdr.h"
+#include "usnic_fwd.h"
+#include "usnic_log.h"
+
+static int usnic_fwd_devcmd_locked(struct usnic_fwd_dev *ufdev, int vnic_idx,
+ enum vnic_devcmd_cmd cmd, u64 *a0,
+ u64 *a1)
+{
+ int status;
+ struct net_device *netdev = ufdev->netdev;
+
+ lockdep_assert_held(&ufdev->lock);
+
+ status = enic_api_devcmd_proxy_by_index(netdev,
+ vnic_idx,
+ cmd,
+ a0, a1,
+ 1000);
+ if (status) {
+ if (status == ERR_EINVAL && cmd == CMD_DEL_FILTER) {
+ usnic_dbg("Dev %s vnic idx %u cmd %u already deleted",
+ ufdev->name, vnic_idx, cmd);
+ } else {
+ usnic_err("Dev %s vnic idx %u cmd %u failed with status %d\n",
+ ufdev->name, vnic_idx, cmd,
+ status);
+ }
+ } else {
+ usnic_dbg("Dev %s vnic idx %u cmd %u success",
+ ufdev->name, vnic_idx, cmd);
+ }
+
+ return status;
+}
+
+static int usnic_fwd_devcmd(struct usnic_fwd_dev *ufdev, int vnic_idx,
+ enum vnic_devcmd_cmd cmd, u64 *a0, u64 *a1)
+{
+ int status;
+
+ spin_lock(&ufdev->lock);
+ status = usnic_fwd_devcmd_locked(ufdev, vnic_idx, cmd, a0, a1);
+ spin_unlock(&ufdev->lock);
+
+ return status;
+}
+
+struct usnic_fwd_dev *usnic_fwd_dev_alloc(struct pci_dev *pdev)
+{
+ struct usnic_fwd_dev *ufdev;
+
+ ufdev = kzalloc(sizeof(*ufdev), GFP_KERNEL);
+ if (!ufdev)
+ return NULL;
+
+ ufdev->pdev = pdev;
+ ufdev->netdev = pci_get_drvdata(pdev);
+ spin_lock_init(&ufdev->lock);
+ strncpy(ufdev->name, netdev_name(ufdev->netdev),
+ sizeof(ufdev->name) - 1);
+
+ return ufdev;
+}
+
+void usnic_fwd_dev_free(struct usnic_fwd_dev *ufdev)
+{
+ kfree(ufdev);
+}
+
+void usnic_fwd_set_mac(struct usnic_fwd_dev *ufdev, char mac[ETH_ALEN])
+{
+ spin_lock(&ufdev->lock);
+ memcpy(&ufdev->mac, mac, sizeof(ufdev->mac));
+ spin_unlock(&ufdev->lock);
+}
+
+int usnic_fwd_add_ipaddr(struct usnic_fwd_dev *ufdev, __be32 inaddr)
+{
+ int status;
+
+ spin_lock(&ufdev->lock);
+ if (ufdev->inaddr == 0) {
+ ufdev->inaddr = inaddr;
+ status = 0;
+ } else {
+ status = -EFAULT;
+ }
+ spin_unlock(&ufdev->lock);
+
+ return status;
+}
+
+void usnic_fwd_del_ipaddr(struct usnic_fwd_dev *ufdev)
+{
+ spin_lock(&ufdev->lock);
+ ufdev->inaddr = 0;
+ spin_unlock(&ufdev->lock);
+}
+
+void usnic_fwd_carrier_up(struct usnic_fwd_dev *ufdev)
+{
+ spin_lock(&ufdev->lock);
+ ufdev->link_up = 1;
+ spin_unlock(&ufdev->lock);
+}
+
+void usnic_fwd_carrier_down(struct usnic_fwd_dev *ufdev)
+{
+ spin_lock(&ufdev->lock);
+ ufdev->link_up = 0;
+ spin_unlock(&ufdev->lock);
+}
+
+void usnic_fwd_set_mtu(struct usnic_fwd_dev *ufdev, unsigned int mtu)
+{
+ spin_lock(&ufdev->lock);
+ ufdev->mtu = mtu;
+ spin_unlock(&ufdev->lock);
+}
+
+static int usnic_fwd_dev_ready_locked(struct usnic_fwd_dev *ufdev)
+{
+ lockdep_assert_held(&ufdev->lock);
+
+ if (!ufdev->link_up)
+ return -EPERM;
+
+ return 0;
+}
+
+static int validate_filter_locked(struct usnic_fwd_dev *ufdev,
+ struct filter *filter)
+{
+
+ lockdep_assert_held(&ufdev->lock);
+
+ if (filter->type == FILTER_IPV4_5TUPLE) {
+ if (!(filter->u.ipv4.flags & FILTER_FIELD_5TUP_DST_AD))
+ return -EACCES;
+ if (!(filter->u.ipv4.flags & FILTER_FIELD_5TUP_DST_PT))
+ return -EBUSY;
+ else if (ufdev->inaddr == 0)
+ return -EINVAL;
+ else if (filter->u.ipv4.dst_port == 0)
+ return -ERANGE;
+ else if (ntohl(ufdev->inaddr) != filter->u.ipv4.dst_addr)
+ return -EFAULT;
+ else
+ return 0;
+ }
+
+ return 0;
+}
+
+static void fill_tlv(struct filter_tlv *tlv, struct filter *filter,
+ struct filter_action *action)
+{
+ tlv->type = CLSF_TLV_FILTER;
+ tlv->length = sizeof(struct filter);
+ *((struct filter *)&tlv->val) = *filter;
+
+ tlv = (struct filter_tlv *)((char *)tlv + sizeof(struct filter_tlv) +
+ sizeof(struct filter));
+ tlv->type = CLSF_TLV_ACTION;
+ tlv->length = sizeof(struct filter_action);
+ *((struct filter_action *)&tlv->val) = *action;
+}
+
+struct usnic_fwd_flow*
+usnic_fwd_alloc_flow(struct usnic_fwd_dev *ufdev, struct filter *filter,
+ struct usnic_filter_action *uaction)
+{
+ struct filter_tlv *tlv;
+ struct pci_dev *pdev;
+ struct usnic_fwd_flow *flow;
+ uint64_t a0, a1;
+ uint64_t tlv_size;
+ dma_addr_t tlv_pa;
+ int status;
+
+ pdev = ufdev->pdev;
+ tlv_size = (2*sizeof(struct filter_tlv) + sizeof(struct filter) +
+ sizeof(struct filter_action));
+
+ flow = kzalloc(sizeof(*flow), GFP_ATOMIC);
+ if (!flow)
+ return ERR_PTR(-ENOMEM);
+
+ tlv = pci_alloc_consistent(pdev, tlv_size, &tlv_pa);
+ if (!tlv) {
+ usnic_err("Failed to allocate memory\n");
+ status = -ENOMEM;
+ goto out_free_flow;
+ }
+
+ fill_tlv(tlv, filter, &uaction->action);
+
+ spin_lock(&ufdev->lock);
+ status = usnic_fwd_dev_ready_locked(ufdev);
+ if (status) {
+ usnic_err("Forwarding dev %s not ready with status %d\n",
+ ufdev->name, status);
+ goto out_free_tlv;
+ }
+
+ status = validate_filter_locked(ufdev, filter);
+ if (status) {
+ usnic_err("Failed to validate filter with status %d\n",
+ status);
+ goto out_free_tlv;
+ }
+
+ /* Issue Devcmd */
+ a0 = tlv_pa;
+ a1 = tlv_size;
+ status = usnic_fwd_devcmd_locked(ufdev, uaction->vnic_idx,
+ CMD_ADD_FILTER, &a0, &a1);
+ if (status) {
+ usnic_err("VF %s Filter add failed with status:%d",
+ ufdev->name, status);
+ status = -EFAULT;
+ goto out_free_tlv;
+ } else {
+ usnic_dbg("VF %s FILTER ID:%llu", ufdev->name, a0);
+ }
+
+ flow->flow_id = (uint32_t) a0;
+ flow->vnic_idx = uaction->vnic_idx;
+ flow->ufdev = ufdev;
+
+out_free_tlv:
+ spin_unlock(&ufdev->lock);
+ pci_free_consistent(pdev, tlv_size, tlv, tlv_pa);
+ if (!status)
+ return flow;
+out_free_flow:
+ kfree(flow);
+ return ERR_PTR(status);
+}
+
+int usnic_fwd_dealloc_flow(struct usnic_fwd_flow *flow)
+{
+ int status;
+ u64 a0, a1;
+
+ a0 = flow->flow_id;
+
+ status = usnic_fwd_devcmd(flow->ufdev, flow->vnic_idx,
+ CMD_DEL_FILTER, &a0, &a1);
+ if (status) {
+ if (status == ERR_EINVAL) {
+ usnic_dbg("Filter %u already deleted for VF Idx %u pf: %s status: %d",
+ flow->flow_id, flow->vnic_idx,
+ flow->ufdev->name, status);
+ } else {
+ usnic_err("PF %s VF Idx %u Filter: %u FILTER DELETE failed with status %d",
+ flow->ufdev->name, flow->vnic_idx,
+ flow->flow_id, status);
+ }
+ status = 0;
+ /*
+ * Log the error and fake success to the caller because if
+ * a flow fails to be deleted in the firmware, it is an
+ * unrecoverable error.
+ */
+ } else {
+ usnic_dbg("PF %s VF Idx %u Filter: %u FILTER DELETED",
+ flow->ufdev->name, flow->vnic_idx,
+ flow->flow_id);
+ }
+
+ kfree(flow);
+ return status;
+}
+
+int usnic_fwd_enable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx)
+{
+ int status;
+ struct net_device *pf_netdev;
+ u64 a0, a1;
+
+ pf_netdev = ufdev->netdev;
+ a0 = qp_idx;
+ a1 = CMD_QP_RQWQ;
+
+ status = usnic_fwd_devcmd(ufdev, vnic_idx, CMD_QP_ENABLE,
+ &a0, &a1);
+ if (status) {
+ usnic_err("PF %s VNIC Index %u RQ Index: %u ENABLE Failed with status %d",
+ netdev_name(pf_netdev),
+ vnic_idx,
+ qp_idx,
+ status);
+ } else {
+ usnic_dbg("PF %s VNIC Index %u RQ Index: %u ENABLED",
+ netdev_name(pf_netdev),
+ vnic_idx, qp_idx);
+ }
+
+ return status;
+}
+
+int usnic_fwd_disable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx)
+{
+ int status;
+ u64 a0, a1;
+ struct net_device *pf_netdev;
+
+ pf_netdev = ufdev->netdev;
+ a0 = qp_idx;
+ a1 = CMD_QP_RQWQ;
+
+ status = usnic_fwd_devcmd(ufdev, vnic_idx, CMD_QP_DISABLE,
+ &a0, &a1);
+ if (status) {
+ usnic_err("PF %s VNIC Index %u RQ Index: %u DISABLE Failed with status %d",
+ netdev_name(pf_netdev),
+ vnic_idx,
+ qp_idx,
+ status);
+ } else {
+ usnic_dbg("PF %s VNIC Index %u RQ Index: %u DISABLED",
+ netdev_name(pf_netdev),
+ vnic_idx,
+ qp_idx);
+ }
+
+ return status;
+}
diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.h b/drivers/infiniband/hw/usnic/usnic_fwd.h
new file mode 100644
index 00000000000..93713a2230b
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_fwd.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_FWD_H_
+#define USNIC_FWD_H_
+
+#include <linux/if.h>
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+#include <linux/in.h>
+
+#include "usnic_abi.h"
+#include "usnic_common_pkt_hdr.h"
+#include "vnic_devcmd.h"
+
+struct usnic_fwd_dev {
+ struct pci_dev *pdev;
+ struct net_device *netdev;
+ spinlock_t lock;
+ /*
+ * The following fields can be read directly off the device.
+ * However, they should be set by a accessor function, except name,
+ * which cannot be changed.
+ */
+ bool link_up;
+ char mac[ETH_ALEN];
+ unsigned int mtu;
+ __be32 inaddr;
+ char name[IFNAMSIZ+1];
+};
+
+struct usnic_fwd_flow {
+ uint32_t flow_id;
+ struct usnic_fwd_dev *ufdev;
+ unsigned int vnic_idx;
+};
+
+struct usnic_filter_action {
+ int vnic_idx;
+ struct filter_action action;
+};
+
+struct usnic_fwd_dev *usnic_fwd_dev_alloc(struct pci_dev *pdev);
+void usnic_fwd_dev_free(struct usnic_fwd_dev *ufdev);
+
+void usnic_fwd_set_mac(struct usnic_fwd_dev *ufdev, char mac[ETH_ALEN]);
+int usnic_fwd_add_ipaddr(struct usnic_fwd_dev *ufdev, __be32 inaddr);
+void usnic_fwd_del_ipaddr(struct usnic_fwd_dev *ufdev);
+void usnic_fwd_carrier_up(struct usnic_fwd_dev *ufdev);
+void usnic_fwd_carrier_down(struct usnic_fwd_dev *ufdev);
+void usnic_fwd_set_mtu(struct usnic_fwd_dev *ufdev, unsigned int mtu);
+
+/*
+ * Allocate a flow on this forwarding device. Whoever calls this function,
+ * must monitor netdev events on ufdev's netdevice. If NETDEV_REBOOT or
+ * NETDEV_DOWN is seen, flow will no longer function and must be
+ * immediately freed by calling usnic_dealloc_flow.
+ */
+struct usnic_fwd_flow*
+usnic_fwd_alloc_flow(struct usnic_fwd_dev *ufdev, struct filter *filter,
+ struct usnic_filter_action *action);
+int usnic_fwd_dealloc_flow(struct usnic_fwd_flow *flow);
+int usnic_fwd_enable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx);
+int usnic_fwd_disable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx);
+
+static inline void usnic_fwd_init_usnic_filter(struct filter *filter,
+ uint32_t usnic_id)
+{
+ filter->type = FILTER_USNIC_ID;
+ filter->u.usnic.ethtype = USNIC_ROCE_ETHERTYPE;
+ filter->u.usnic.flags = FILTER_FIELD_USNIC_ETHTYPE |
+ FILTER_FIELD_USNIC_ID |
+ FILTER_FIELD_USNIC_PROTO;
+ filter->u.usnic.proto_version = (USNIC_ROCE_GRH_VER <<
+ USNIC_ROCE_GRH_VER_SHIFT) |
+ USNIC_PROTO_VER;
+ filter->u.usnic.usnic_id = usnic_id;
+}
+
+static inline void usnic_fwd_init_udp_filter(struct filter *filter,
+ uint32_t daddr, uint16_t dport)
+{
+ filter->type = FILTER_IPV4_5TUPLE;
+ filter->u.ipv4.flags = FILTER_FIELD_5TUP_PROTO;
+ filter->u.ipv4.protocol = PROTO_UDP;
+
+ if (daddr) {
+ filter->u.ipv4.flags |= FILTER_FIELD_5TUP_DST_AD;
+ filter->u.ipv4.dst_addr = daddr;
+ }
+
+ if (dport) {
+ filter->u.ipv4.flags |= FILTER_FIELD_5TUP_DST_PT;
+ filter->u.ipv4.dst_port = dport;
+ }
+}
+
+#endif /* !USNIC_FWD_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_ib.h b/drivers/infiniband/hw/usnic/usnic_ib.h
new file mode 100644
index 00000000000..e5a9297dd1b
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_ib.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_IB_H_
+#define USNIC_IB_H_
+
+#include <linux/iommu.h>
+#include <linux/netdevice.h>
+
+#include <rdma/ib_verbs.h>
+
+
+#include "usnic.h"
+#include "usnic_abi.h"
+#include "usnic_vnic.h"
+
+#define USNIC_IB_PORT_CNT 1
+#define USNIC_IB_NUM_COMP_VECTORS 1
+
+extern unsigned int usnic_ib_share_vf;
+
+struct usnic_ib_ucontext {
+ struct ib_ucontext ibucontext;
+ /* Protected by usnic_ib_dev->usdev_lock */
+ struct list_head qp_grp_list;
+ struct list_head link;
+};
+
+struct usnic_ib_pd {
+ struct ib_pd ibpd;
+ struct usnic_uiom_pd *umem_pd;
+};
+
+struct usnic_ib_mr {
+ struct ib_mr ibmr;
+ struct usnic_uiom_reg *umem;
+};
+
+struct usnic_ib_dev {
+ struct ib_device ib_dev;
+ struct pci_dev *pdev;
+ struct net_device *netdev;
+ struct usnic_fwd_dev *ufdev;
+ struct list_head ib_dev_link;
+ struct list_head vf_dev_list;
+ struct list_head ctx_list;
+ struct mutex usdev_lock;
+
+ /* provisioning information */
+ struct kref vf_cnt;
+ unsigned int vf_res_cnt[USNIC_VNIC_RES_TYPE_MAX];
+
+ /* sysfs vars for QPN reporting */
+ struct kobject *qpn_kobj;
+};
+
+struct usnic_ib_vf {
+ struct usnic_ib_dev *pf;
+ spinlock_t lock;
+ struct usnic_vnic *vnic;
+ unsigned int qp_grp_ref_cnt;
+ struct usnic_ib_pd *pd;
+ struct list_head link;
+};
+
+static inline
+struct usnic_ib_dev *to_usdev(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct usnic_ib_dev, ib_dev);
+}
+
+static inline
+struct usnic_ib_ucontext *to_ucontext(struct ib_ucontext *ibucontext)
+{
+ return container_of(ibucontext, struct usnic_ib_ucontext, ibucontext);
+}
+
+static inline
+struct usnic_ib_pd *to_upd(struct ib_pd *ibpd)
+{
+ return container_of(ibpd, struct usnic_ib_pd, ibpd);
+}
+
+static inline
+struct usnic_ib_ucontext *to_uucontext(struct ib_ucontext *ibucontext)
+{
+ return container_of(ibucontext, struct usnic_ib_ucontext, ibucontext);
+}
+
+static inline
+struct usnic_ib_mr *to_umr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct usnic_ib_mr, ibmr);
+}
+void usnic_ib_log_vf(struct usnic_ib_vf *vf);
+
+#define UPDATE_PTR_LEFT(N, P, L) \
+do { \
+ L -= (N); \
+ P += (N); \
+} while (0)
+
+#endif /* USNIC_IB_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
new file mode 100644
index 00000000000..fb6d026f92c
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -0,0 +1,682 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Author: Upinder Malhi <umalhi@cisco.com>
+ * Author: Anant Deepak <anadeepa@cisco.com>
+ * Author: Cesare Cantu' <cantuc@cisco.com>
+ * Author: Jeff Squyres <jsquyres@cisco.com>
+ * Author: Kiran Thirumalai <kithirum@cisco.com>
+ * Author: Xuyang Wang <xuywang@cisco.com>
+ * Author: Reese Faucette <rfaucett@cisco.com>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/inetdevice.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_addr.h>
+
+#include "usnic_abi.h"
+#include "usnic_common_util.h"
+#include "usnic_ib.h"
+#include "usnic_ib_qp_grp.h"
+#include "usnic_log.h"
+#include "usnic_fwd.h"
+#include "usnic_debugfs.h"
+#include "usnic_ib_verbs.h"
+#include "usnic_transport.h"
+#include "usnic_uiom.h"
+#include "usnic_ib_sysfs.h"
+
+unsigned int usnic_log_lvl = USNIC_LOG_LVL_ERR;
+unsigned int usnic_ib_share_vf = 1;
+
+static const char usnic_version[] =
+ DRV_NAME ": Cisco VIC (USNIC) Verbs Driver v"
+ DRV_VERSION " (" DRV_RELDATE ")\n";
+
+static DEFINE_MUTEX(usnic_ib_ibdev_list_lock);
+static LIST_HEAD(usnic_ib_ibdev_list);
+
+/* Callback dump funcs */
+static int usnic_ib_dump_vf_hdr(void *obj, char *buf, int buf_sz)
+{
+ struct usnic_ib_vf *vf = obj;
+ return scnprintf(buf, buf_sz, "PF: %s ", vf->pf->ib_dev.name);
+}
+/* End callback dump funcs */
+
+static void usnic_ib_dump_vf(struct usnic_ib_vf *vf, char *buf, int buf_sz)
+{
+ usnic_vnic_dump(vf->vnic, buf, buf_sz, vf,
+ usnic_ib_dump_vf_hdr,
+ usnic_ib_qp_grp_dump_hdr, usnic_ib_qp_grp_dump_rows);
+}
+
+void usnic_ib_log_vf(struct usnic_ib_vf *vf)
+{
+ char buf[1000];
+ usnic_ib_dump_vf(vf, buf, sizeof(buf));
+ usnic_dbg("%s\n", buf);
+}
+
+/* Start of netdev section */
+static inline const char *usnic_ib_netdev_event_to_string(unsigned long event)
+{
+ const char *event2str[] = {"NETDEV_NONE", "NETDEV_UP", "NETDEV_DOWN",
+ "NETDEV_REBOOT", "NETDEV_CHANGE",
+ "NETDEV_REGISTER", "NETDEV_UNREGISTER", "NETDEV_CHANGEMTU",
+ "NETDEV_CHANGEADDR", "NETDEV_GOING_DOWN", "NETDEV_FEAT_CHANGE",
+ "NETDEV_BONDING_FAILOVER", "NETDEV_PRE_UP",
+ "NETDEV_PRE_TYPE_CHANGE", "NETDEV_POST_TYPE_CHANGE",
+ "NETDEV_POST_INT", "NETDEV_UNREGISTER_FINAL", "NETDEV_RELEASE",
+ "NETDEV_NOTIFY_PEERS", "NETDEV_JOIN"
+ };
+
+ if (event >= ARRAY_SIZE(event2str))
+ return "UNKNOWN_NETDEV_EVENT";
+ else
+ return event2str[event];
+}
+
+static void usnic_ib_qp_grp_modify_active_to_err(struct usnic_ib_dev *us_ibdev)
+{
+ struct usnic_ib_ucontext *ctx;
+ struct usnic_ib_qp_grp *qp_grp;
+ enum ib_qp_state cur_state;
+ int status;
+
+ BUG_ON(!mutex_is_locked(&us_ibdev->usdev_lock));
+
+ list_for_each_entry(ctx, &us_ibdev->ctx_list, link) {
+ list_for_each_entry(qp_grp, &ctx->qp_grp_list, link) {
+ cur_state = qp_grp->state;
+ if (cur_state == IB_QPS_INIT ||
+ cur_state == IB_QPS_RTR ||
+ cur_state == IB_QPS_RTS) {
+ status = usnic_ib_qp_grp_modify(qp_grp,
+ IB_QPS_ERR,
+ NULL);
+ if (status) {
+ usnic_err("Failed to transistion qp grp %u from %s to %s\n",
+ qp_grp->grp_id,
+ usnic_ib_qp_grp_state_to_string
+ (cur_state),
+ usnic_ib_qp_grp_state_to_string
+ (IB_QPS_ERR));
+ }
+ }
+ }
+ }
+}
+
+static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev,
+ unsigned long event)
+{
+ struct net_device *netdev;
+ struct ib_event ib_event;
+
+ memset(&ib_event, 0, sizeof(ib_event));
+
+ mutex_lock(&us_ibdev->usdev_lock);
+ netdev = us_ibdev->netdev;
+ switch (event) {
+ case NETDEV_REBOOT:
+ usnic_info("PF Reset on %s\n", us_ibdev->ib_dev.name);
+ usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
+ ib_event.event = IB_EVENT_PORT_ERR;
+ ib_event.device = &us_ibdev->ib_dev;
+ ib_event.element.port_num = 1;
+ ib_dispatch_event(&ib_event);
+ break;
+ case NETDEV_UP:
+ case NETDEV_DOWN:
+ case NETDEV_CHANGE:
+ if (!us_ibdev->ufdev->link_up &&
+ netif_carrier_ok(netdev)) {
+ usnic_fwd_carrier_up(us_ibdev->ufdev);
+ usnic_info("Link UP on %s\n", us_ibdev->ib_dev.name);
+ ib_event.event = IB_EVENT_PORT_ACTIVE;
+ ib_event.device = &us_ibdev->ib_dev;
+ ib_event.element.port_num = 1;
+ ib_dispatch_event(&ib_event);
+ } else if (us_ibdev->ufdev->link_up &&
+ !netif_carrier_ok(netdev)) {
+ usnic_fwd_carrier_down(us_ibdev->ufdev);
+ usnic_info("Link DOWN on %s\n", us_ibdev->ib_dev.name);
+ usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
+ ib_event.event = IB_EVENT_PORT_ERR;
+ ib_event.device = &us_ibdev->ib_dev;
+ ib_event.element.port_num = 1;
+ ib_dispatch_event(&ib_event);
+ } else {
+ usnic_dbg("Ignoring %s on %s\n",
+ usnic_ib_netdev_event_to_string(event),
+ us_ibdev->ib_dev.name);
+ }
+ break;
+ case NETDEV_CHANGEADDR:
+ if (!memcmp(us_ibdev->ufdev->mac, netdev->dev_addr,
+ sizeof(us_ibdev->ufdev->mac))) {
+ usnic_dbg("Ignoring addr change on %s\n",
+ us_ibdev->ib_dev.name);
+ } else {
+ usnic_info(" %s old mac: %pM new mac: %pM\n",
+ us_ibdev->ib_dev.name,
+ us_ibdev->ufdev->mac,
+ netdev->dev_addr);
+ usnic_fwd_set_mac(us_ibdev->ufdev, netdev->dev_addr);
+ usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
+ ib_event.event = IB_EVENT_GID_CHANGE;
+ ib_event.device = &us_ibdev->ib_dev;
+ ib_event.element.port_num = 1;
+ ib_dispatch_event(&ib_event);
+ }
+
+ break;
+ case NETDEV_CHANGEMTU:
+ if (us_ibdev->ufdev->mtu != netdev->mtu) {
+ usnic_info("MTU Change on %s old: %u new: %u\n",
+ us_ibdev->ib_dev.name,
+ us_ibdev->ufdev->mtu, netdev->mtu);
+ usnic_fwd_set_mtu(us_ibdev->ufdev, netdev->mtu);
+ usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
+ } else {
+ usnic_dbg("Ignoring MTU change on %s\n",
+ us_ibdev->ib_dev.name);
+ }
+ break;
+ default:
+ usnic_dbg("Ignoring event %s on %s",
+ usnic_ib_netdev_event_to_string(event),
+ us_ibdev->ib_dev.name);
+ }
+ mutex_unlock(&us_ibdev->usdev_lock);
+}
+
+static int usnic_ib_netdevice_event(struct notifier_block *notifier,
+ unsigned long event, void *ptr)
+{
+ struct usnic_ib_dev *us_ibdev;
+
+ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+
+ mutex_lock(&usnic_ib_ibdev_list_lock);
+ list_for_each_entry(us_ibdev, &usnic_ib_ibdev_list, ib_dev_link) {
+ if (us_ibdev->netdev == netdev) {
+ usnic_ib_handle_usdev_event(us_ibdev, event);
+ break;
+ }
+ }
+ mutex_unlock(&usnic_ib_ibdev_list_lock);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block usnic_ib_netdevice_notifier = {
+ .notifier_call = usnic_ib_netdevice_event
+};
+/* End of netdev section */
+
+/* Start of inet section */
+static int usnic_ib_handle_inet_event(struct usnic_ib_dev *us_ibdev,
+ unsigned long event, void *ptr)
+{
+ struct in_ifaddr *ifa = ptr;
+ struct ib_event ib_event;
+
+ mutex_lock(&us_ibdev->usdev_lock);
+
+ switch (event) {
+ case NETDEV_DOWN:
+ usnic_info("%s via ip notifiers",
+ usnic_ib_netdev_event_to_string(event));
+ usnic_fwd_del_ipaddr(us_ibdev->ufdev);
+ usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
+ ib_event.event = IB_EVENT_GID_CHANGE;
+ ib_event.device = &us_ibdev->ib_dev;
+ ib_event.element.port_num = 1;
+ ib_dispatch_event(&ib_event);
+ break;
+ case NETDEV_UP:
+ usnic_fwd_add_ipaddr(us_ibdev->ufdev, ifa->ifa_address);
+ usnic_info("%s via ip notifiers: ip %pI4",
+ usnic_ib_netdev_event_to_string(event),
+ &us_ibdev->ufdev->inaddr);
+ ib_event.event = IB_EVENT_GID_CHANGE;
+ ib_event.device = &us_ibdev->ib_dev;
+ ib_event.element.port_num = 1;
+ ib_dispatch_event(&ib_event);
+ break;
+ default:
+ usnic_info("Ignoring event %s on %s",
+ usnic_ib_netdev_event_to_string(event),
+ us_ibdev->ib_dev.name);
+ }
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ return NOTIFY_DONE;
+}
+
+static int usnic_ib_inetaddr_event(struct notifier_block *notifier,
+ unsigned long event, void *ptr)
+{
+ struct usnic_ib_dev *us_ibdev;
+ struct in_ifaddr *ifa = ptr;
+ struct net_device *netdev = ifa->ifa_dev->dev;
+
+ mutex_lock(&usnic_ib_ibdev_list_lock);
+ list_for_each_entry(us_ibdev, &usnic_ib_ibdev_list, ib_dev_link) {
+ if (us_ibdev->netdev == netdev) {
+ usnic_ib_handle_inet_event(us_ibdev, event, ptr);
+ break;
+ }
+ }
+ mutex_unlock(&usnic_ib_ibdev_list_lock);
+
+ return NOTIFY_DONE;
+}
+static struct notifier_block usnic_ib_inetaddr_notifier = {
+ .notifier_call = usnic_ib_inetaddr_event
+};
+/* End of inet section*/
+
+/* Start of PF discovery section */
+static void *usnic_ib_device_add(struct pci_dev *dev)
+{
+ struct usnic_ib_dev *us_ibdev;
+ union ib_gid gid;
+ struct in_ifaddr *in;
+ struct net_device *netdev;
+
+ usnic_dbg("\n");
+ netdev = pci_get_drvdata(dev);
+
+ us_ibdev = (struct usnic_ib_dev *)ib_alloc_device(sizeof(*us_ibdev));
+ if (IS_ERR_OR_NULL(us_ibdev)) {
+ usnic_err("Device %s context alloc failed\n",
+ netdev_name(pci_get_drvdata(dev)));
+ return ERR_PTR(us_ibdev ? PTR_ERR(us_ibdev) : -EFAULT);
+ }
+
+ us_ibdev->ufdev = usnic_fwd_dev_alloc(dev);
+ if (IS_ERR_OR_NULL(us_ibdev->ufdev)) {
+ usnic_err("Failed to alloc ufdev for %s with err %ld\n",
+ pci_name(dev), PTR_ERR(us_ibdev->ufdev));
+ goto err_dealloc;
+ }
+
+ mutex_init(&us_ibdev->usdev_lock);
+ INIT_LIST_HEAD(&us_ibdev->vf_dev_list);
+ INIT_LIST_HEAD(&us_ibdev->ctx_list);
+
+ us_ibdev->pdev = dev;
+ us_ibdev->netdev = pci_get_drvdata(dev);
+ us_ibdev->ib_dev.owner = THIS_MODULE;
+ us_ibdev->ib_dev.node_type = RDMA_NODE_USNIC_UDP;
+ us_ibdev->ib_dev.phys_port_cnt = USNIC_IB_PORT_CNT;
+ us_ibdev->ib_dev.num_comp_vectors = USNIC_IB_NUM_COMP_VECTORS;
+ us_ibdev->ib_dev.dma_device = &dev->dev;
+ us_ibdev->ib_dev.uverbs_abi_ver = USNIC_UVERBS_ABI_VERSION;
+ strlcpy(us_ibdev->ib_dev.name, "usnic_%d", IB_DEVICE_NAME_MAX);
+
+ us_ibdev->ib_dev.uverbs_cmd_mask =
+ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+ (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
+ (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
+ (1ull << IB_USER_VERBS_CMD_OPEN_QP);
+
+ us_ibdev->ib_dev.query_device = usnic_ib_query_device;
+ us_ibdev->ib_dev.query_port = usnic_ib_query_port;
+ us_ibdev->ib_dev.query_pkey = usnic_ib_query_pkey;
+ us_ibdev->ib_dev.query_gid = usnic_ib_query_gid;
+ us_ibdev->ib_dev.get_link_layer = usnic_ib_port_link_layer;
+ us_ibdev->ib_dev.alloc_pd = usnic_ib_alloc_pd;
+ us_ibdev->ib_dev.dealloc_pd = usnic_ib_dealloc_pd;
+ us_ibdev->ib_dev.create_qp = usnic_ib_create_qp;
+ us_ibdev->ib_dev.modify_qp = usnic_ib_modify_qp;
+ us_ibdev->ib_dev.query_qp = usnic_ib_query_qp;
+ us_ibdev->ib_dev.destroy_qp = usnic_ib_destroy_qp;
+ us_ibdev->ib_dev.create_cq = usnic_ib_create_cq;
+ us_ibdev->ib_dev.destroy_cq = usnic_ib_destroy_cq;
+ us_ibdev->ib_dev.reg_user_mr = usnic_ib_reg_mr;
+ us_ibdev->ib_dev.dereg_mr = usnic_ib_dereg_mr;
+ us_ibdev->ib_dev.alloc_ucontext = usnic_ib_alloc_ucontext;
+ us_ibdev->ib_dev.dealloc_ucontext = usnic_ib_dealloc_ucontext;
+ us_ibdev->ib_dev.mmap = usnic_ib_mmap;
+ us_ibdev->ib_dev.create_ah = usnic_ib_create_ah;
+ us_ibdev->ib_dev.destroy_ah = usnic_ib_destroy_ah;
+ us_ibdev->ib_dev.post_send = usnic_ib_post_send;
+ us_ibdev->ib_dev.post_recv = usnic_ib_post_recv;
+ us_ibdev->ib_dev.poll_cq = usnic_ib_poll_cq;
+ us_ibdev->ib_dev.req_notify_cq = usnic_ib_req_notify_cq;
+ us_ibdev->ib_dev.get_dma_mr = usnic_ib_get_dma_mr;
+
+
+ if (ib_register_device(&us_ibdev->ib_dev, NULL))
+ goto err_fwd_dealloc;
+
+ usnic_fwd_set_mtu(us_ibdev->ufdev, us_ibdev->netdev->mtu);
+ usnic_fwd_set_mac(us_ibdev->ufdev, us_ibdev->netdev->dev_addr);
+ if (netif_carrier_ok(us_ibdev->netdev))
+ usnic_fwd_carrier_up(us_ibdev->ufdev);
+
+ in = ((struct in_device *)(netdev->ip_ptr))->ifa_list;
+ if (in != NULL)
+ usnic_fwd_add_ipaddr(us_ibdev->ufdev, in->ifa_address);
+
+ usnic_mac_ip_to_gid(us_ibdev->netdev->perm_addr,
+ us_ibdev->ufdev->inaddr, &gid.raw[0]);
+ memcpy(&us_ibdev->ib_dev.node_guid, &gid.global.interface_id,
+ sizeof(gid.global.interface_id));
+ kref_init(&us_ibdev->vf_cnt);
+
+ usnic_info("Added ibdev: %s netdev: %s with mac %pM Link: %u MTU: %u\n",
+ us_ibdev->ib_dev.name, netdev_name(us_ibdev->netdev),
+ us_ibdev->ufdev->mac, us_ibdev->ufdev->link_up,
+ us_ibdev->ufdev->mtu);
+ return us_ibdev;
+
+err_fwd_dealloc:
+ usnic_fwd_dev_free(us_ibdev->ufdev);
+err_dealloc:
+ usnic_err("failed -- deallocing device\n");
+ ib_dealloc_device(&us_ibdev->ib_dev);
+ return NULL;
+}
+
+static void usnic_ib_device_remove(struct usnic_ib_dev *us_ibdev)
+{
+ usnic_info("Unregistering %s\n", us_ibdev->ib_dev.name);
+ usnic_ib_sysfs_unregister_usdev(us_ibdev);
+ usnic_fwd_dev_free(us_ibdev->ufdev);
+ ib_unregister_device(&us_ibdev->ib_dev);
+ ib_dealloc_device(&us_ibdev->ib_dev);
+}
+
+static void usnic_ib_undiscover_pf(struct kref *kref)
+{
+ struct usnic_ib_dev *us_ibdev, *tmp;
+ struct pci_dev *dev;
+ bool found = false;
+
+ dev = container_of(kref, struct usnic_ib_dev, vf_cnt)->pdev;
+ mutex_lock(&usnic_ib_ibdev_list_lock);
+ list_for_each_entry_safe(us_ibdev, tmp,
+ &usnic_ib_ibdev_list, ib_dev_link) {
+ if (us_ibdev->pdev == dev) {
+ list_del(&us_ibdev->ib_dev_link);
+ usnic_ib_device_remove(us_ibdev);
+ found = true;
+ break;
+ }
+ }
+
+ WARN(!found, "Failed to remove PF %s\n", pci_name(dev));
+
+ mutex_unlock(&usnic_ib_ibdev_list_lock);
+}
+
+static struct usnic_ib_dev *usnic_ib_discover_pf(struct usnic_vnic *vnic)
+{
+ struct usnic_ib_dev *us_ibdev;
+ struct pci_dev *parent_pci, *vf_pci;
+ int err;
+
+ vf_pci = usnic_vnic_get_pdev(vnic);
+ parent_pci = pci_physfn(vf_pci);
+
+ BUG_ON(!parent_pci);
+
+ mutex_lock(&usnic_ib_ibdev_list_lock);
+ list_for_each_entry(us_ibdev, &usnic_ib_ibdev_list, ib_dev_link) {
+ if (us_ibdev->pdev == parent_pci) {
+ kref_get(&us_ibdev->vf_cnt);
+ goto out;
+ }
+ }
+
+ us_ibdev = usnic_ib_device_add(parent_pci);
+ if (IS_ERR_OR_NULL(us_ibdev)) {
+ us_ibdev = us_ibdev ? us_ibdev : ERR_PTR(-EFAULT);
+ goto out;
+ }
+
+ err = usnic_ib_sysfs_register_usdev(us_ibdev);
+ if (err) {
+ usnic_ib_device_remove(us_ibdev);
+ us_ibdev = ERR_PTR(err);
+ goto out;
+ }
+
+ list_add(&us_ibdev->ib_dev_link, &usnic_ib_ibdev_list);
+out:
+ mutex_unlock(&usnic_ib_ibdev_list_lock);
+ return us_ibdev;
+}
+/* End of PF discovery section */
+
+/* Start of PCI section */
+
+static DEFINE_PCI_DEVICE_TABLE(usnic_ib_pci_ids) = {
+ {PCI_DEVICE(PCI_VENDOR_ID_CISCO, PCI_DEVICE_ID_CISCO_VIC_USPACE_NIC)},
+ {0,}
+};
+
+static int usnic_ib_pci_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ int err;
+ struct usnic_ib_dev *pf;
+ struct usnic_ib_vf *vf;
+ enum usnic_vnic_res_type res_type;
+
+ vf = kzalloc(sizeof(*vf), GFP_KERNEL);
+ if (!vf)
+ return -ENOMEM;
+
+ err = pci_enable_device(pdev);
+ if (err) {
+ usnic_err("Failed to enable %s with err %d\n",
+ pci_name(pdev), err);
+ goto out_clean_vf;
+ }
+
+ err = pci_request_regions(pdev, DRV_NAME);
+ if (err) {
+ usnic_err("Failed to request region for %s with err %d\n",
+ pci_name(pdev), err);
+ goto out_disable_device;
+ }
+
+ pci_set_master(pdev);
+ pci_set_drvdata(pdev, vf);
+
+ vf->vnic = usnic_vnic_alloc(pdev);
+ if (IS_ERR_OR_NULL(vf->vnic)) {
+ err = vf->vnic ? PTR_ERR(vf->vnic) : -ENOMEM;
+ usnic_err("Failed to alloc vnic for %s with err %d\n",
+ pci_name(pdev), err);
+ goto out_release_regions;
+ }
+
+ pf = usnic_ib_discover_pf(vf->vnic);
+ if (IS_ERR_OR_NULL(pf)) {
+ usnic_err("Failed to discover pf of vnic %s with err%ld\n",
+ pci_name(pdev), PTR_ERR(pf));
+ err = pf ? PTR_ERR(pf) : -EFAULT;
+ goto out_clean_vnic;
+ }
+
+ vf->pf = pf;
+ spin_lock_init(&vf->lock);
+ mutex_lock(&pf->usdev_lock);
+ list_add_tail(&vf->link, &pf->vf_dev_list);
+ /*
+ * Save max settings (will be same for each VF, easier to re-write than
+ * to say "if (!set) { set_values(); set=1; }
+ */
+ for (res_type = USNIC_VNIC_RES_TYPE_EOL+1;
+ res_type < USNIC_VNIC_RES_TYPE_MAX;
+ res_type++) {
+ pf->vf_res_cnt[res_type] = usnic_vnic_res_cnt(vf->vnic,
+ res_type);
+ }
+
+ mutex_unlock(&pf->usdev_lock);
+
+ usnic_info("Registering usnic VF %s into PF %s\n", pci_name(pdev),
+ pf->ib_dev.name);
+ usnic_ib_log_vf(vf);
+ return 0;
+
+out_clean_vnic:
+ usnic_vnic_free(vf->vnic);
+out_release_regions:
+ pci_set_drvdata(pdev, NULL);
+ pci_clear_master(pdev);
+ pci_release_regions(pdev);
+out_disable_device:
+ pci_disable_device(pdev);
+out_clean_vf:
+ kfree(vf);
+ return err;
+}
+
+static void usnic_ib_pci_remove(struct pci_dev *pdev)
+{
+ struct usnic_ib_vf *vf = pci_get_drvdata(pdev);
+ struct usnic_ib_dev *pf = vf->pf;
+
+ mutex_lock(&pf->usdev_lock);
+ list_del(&vf->link);
+ mutex_unlock(&pf->usdev_lock);
+
+ kref_put(&pf->vf_cnt, usnic_ib_undiscover_pf);
+ usnic_vnic_free(vf->vnic);
+ pci_set_drvdata(pdev, NULL);
+ pci_clear_master(pdev);
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+ kfree(vf);
+
+ usnic_info("Removed VF %s\n", pci_name(pdev));
+}
+
+/* PCI driver entry points */
+static struct pci_driver usnic_ib_pci_driver = {
+ .name = DRV_NAME,
+ .id_table = usnic_ib_pci_ids,
+ .probe = usnic_ib_pci_probe,
+ .remove = usnic_ib_pci_remove,
+};
+/* End of PCI section */
+
+/* Start of module section */
+static int __init usnic_ib_init(void)
+{
+ int err;
+
+ printk_once(KERN_INFO "%s", usnic_version);
+
+ err = usnic_uiom_init(DRV_NAME);
+ if (err) {
+ usnic_err("Unable to initalize umem with err %d\n", err);
+ return err;
+ }
+
+ if (pci_register_driver(&usnic_ib_pci_driver)) {
+ usnic_err("Unable to register with PCI\n");
+ goto out_umem_fini;
+ }
+
+ err = register_netdevice_notifier(&usnic_ib_netdevice_notifier);
+ if (err) {
+ usnic_err("Failed to register netdev notifier\n");
+ goto out_pci_unreg;
+ }
+
+ err = register_inetaddr_notifier(&usnic_ib_inetaddr_notifier);
+ if (err) {
+ usnic_err("Failed to register inet addr notifier\n");
+ goto out_unreg_netdev_notifier;
+ }
+
+ err = usnic_transport_init();
+ if (err) {
+ usnic_err("Failed to initialize transport\n");
+ goto out_unreg_inetaddr_notifier;
+ }
+
+ usnic_debugfs_init();
+
+ return 0;
+
+out_unreg_inetaddr_notifier:
+ unregister_inetaddr_notifier(&usnic_ib_inetaddr_notifier);
+out_unreg_netdev_notifier:
+ unregister_netdevice_notifier(&usnic_ib_netdevice_notifier);
+out_pci_unreg:
+ pci_unregister_driver(&usnic_ib_pci_driver);
+out_umem_fini:
+ usnic_uiom_fini();
+
+ return err;
+}
+
+static void __exit usnic_ib_destroy(void)
+{
+ usnic_dbg("\n");
+ usnic_debugfs_exit();
+ usnic_transport_fini();
+ unregister_inetaddr_notifier(&usnic_ib_inetaddr_notifier);
+ unregister_netdevice_notifier(&usnic_ib_netdevice_notifier);
+ pci_unregister_driver(&usnic_ib_pci_driver);
+ usnic_uiom_fini();
+}
+
+MODULE_DESCRIPTION("Cisco VIC (usNIC) Verbs Driver");
+MODULE_AUTHOR("Upinder Malhi <umalhi@cisco.com>");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRV_VERSION);
+module_param(usnic_log_lvl, uint, S_IRUGO | S_IWUSR);
+module_param(usnic_ib_share_vf, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(usnic_log_lvl, " Off=0, Err=1, Info=2, Debug=3");
+MODULE_PARM_DESC(usnic_ib_share_vf, "Off=0, On=1 VF sharing amongst QPs");
+MODULE_DEVICE_TABLE(pci, usnic_ib_pci_ids);
+
+module_init(usnic_ib_init);
+module_exit(usnic_ib_destroy);
+/* End of module section */
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
new file mode 100644
index 00000000000..f8dfd76be89
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
@@ -0,0 +1,761 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/bug.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+
+#include "usnic_log.h"
+#include "usnic_vnic.h"
+#include "usnic_fwd.h"
+#include "usnic_uiom.h"
+#include "usnic_debugfs.h"
+#include "usnic_ib_qp_grp.h"
+#include "usnic_ib_sysfs.h"
+#include "usnic_transport.h"
+
+#define DFLT_RQ_IDX 0
+
+const char *usnic_ib_qp_grp_state_to_string(enum ib_qp_state state)
+{
+ switch (state) {
+ case IB_QPS_RESET:
+ return "Rst";
+ case IB_QPS_INIT:
+ return "Init";
+ case IB_QPS_RTR:
+ return "RTR";
+ case IB_QPS_RTS:
+ return "RTS";
+ case IB_QPS_SQD:
+ return "SQD";
+ case IB_QPS_SQE:
+ return "SQE";
+ case IB_QPS_ERR:
+ return "ERR";
+ default:
+ return "UNKOWN STATE";
+
+ }
+}
+
+int usnic_ib_qp_grp_dump_hdr(char *buf, int buf_sz)
+{
+ return scnprintf(buf, buf_sz, "|QPN\t|State\t|PID\t|VF Idx\t|Fil ID");
+}
+
+int usnic_ib_qp_grp_dump_rows(void *obj, char *buf, int buf_sz)
+{
+ struct usnic_ib_qp_grp *qp_grp = obj;
+ struct usnic_ib_qp_grp_flow *default_flow;
+ if (obj) {
+ default_flow = list_first_entry(&qp_grp->flows_lst,
+ struct usnic_ib_qp_grp_flow, link);
+ return scnprintf(buf, buf_sz, "|%d\t|%s\t|%d\t|%hu\t|%d",
+ qp_grp->ibqp.qp_num,
+ usnic_ib_qp_grp_state_to_string(
+ qp_grp->state),
+ qp_grp->owner_pid,
+ usnic_vnic_get_index(qp_grp->vf->vnic),
+ default_flow->flow->flow_id);
+ } else {
+ return scnprintf(buf, buf_sz, "|N/A\t|N/A\t|N/A\t|N/A\t|N/A");
+ }
+}
+
+static struct usnic_vnic_res_chunk *
+get_qp_res_chunk(struct usnic_ib_qp_grp *qp_grp)
+{
+ lockdep_assert_held(&qp_grp->lock);
+ /*
+ * The QP res chunk, used to derive qp indices,
+ * are just indices of the RQs
+ */
+ return usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ);
+}
+
+static int enable_qp_grp(struct usnic_ib_qp_grp *qp_grp)
+{
+
+ int status;
+ int i, vnic_idx;
+ struct usnic_vnic_res_chunk *res_chunk;
+ struct usnic_vnic_res *res;
+
+ lockdep_assert_held(&qp_grp->lock);
+
+ vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic);
+
+ res_chunk = get_qp_res_chunk(qp_grp);
+ if (IS_ERR_OR_NULL(res_chunk)) {
+ usnic_err("Unable to get qp res with err %ld\n",
+ PTR_ERR(res_chunk));
+ return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM;
+ }
+
+ for (i = 0; i < res_chunk->cnt; i++) {
+ res = res_chunk->res[i];
+ status = usnic_fwd_enable_qp(qp_grp->ufdev, vnic_idx,
+ res->vnic_idx);
+ if (status) {
+ usnic_err("Failed to enable qp %d of %s:%d\n with err %d\n",
+ res->vnic_idx, qp_grp->ufdev->name,
+ vnic_idx, status);
+ goto out_err;
+ }
+ }
+
+ return 0;
+
+out_err:
+ for (i--; i >= 0; i--) {
+ res = res_chunk->res[i];
+ usnic_fwd_disable_qp(qp_grp->ufdev, vnic_idx,
+ res->vnic_idx);
+ }
+
+ return status;
+}
+
+static int disable_qp_grp(struct usnic_ib_qp_grp *qp_grp)
+{
+ int i, vnic_idx;
+ struct usnic_vnic_res_chunk *res_chunk;
+ struct usnic_vnic_res *res;
+ int status = 0;
+
+ lockdep_assert_held(&qp_grp->lock);
+ vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic);
+
+ res_chunk = get_qp_res_chunk(qp_grp);
+ if (IS_ERR_OR_NULL(res_chunk)) {
+ usnic_err("Unable to get qp res with err %ld\n",
+ PTR_ERR(res_chunk));
+ return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM;
+ }
+
+ for (i = 0; i < res_chunk->cnt; i++) {
+ res = res_chunk->res[i];
+ status = usnic_fwd_disable_qp(qp_grp->ufdev, vnic_idx,
+ res->vnic_idx);
+ if (status) {
+ usnic_err("Failed to disable rq %d of %s:%d\n with err %d\n",
+ res->vnic_idx,
+ qp_grp->ufdev->name,
+ vnic_idx, status);
+ }
+ }
+
+ return status;
+
+}
+
+static int init_filter_action(struct usnic_ib_qp_grp *qp_grp,
+ struct usnic_filter_action *uaction)
+{
+ struct usnic_vnic_res_chunk *res_chunk;
+
+ res_chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ);
+ if (IS_ERR_OR_NULL(res_chunk)) {
+ usnic_err("Unable to get %s with err %ld\n",
+ usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_RQ),
+ PTR_ERR(res_chunk));
+ return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM;
+ }
+
+ uaction->vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic);
+ uaction->action.type = FILTER_ACTION_RQ_STEERING;
+ uaction->action.u.rq_idx = res_chunk->res[DFLT_RQ_IDX]->vnic_idx;
+
+ return 0;
+}
+
+static struct usnic_ib_qp_grp_flow*
+create_roce_custom_flow(struct usnic_ib_qp_grp *qp_grp,
+ struct usnic_transport_spec *trans_spec)
+{
+ uint16_t port_num;
+ int err;
+ struct filter filter;
+ struct usnic_filter_action uaction;
+ struct usnic_ib_qp_grp_flow *qp_flow;
+ struct usnic_fwd_flow *flow;
+ enum usnic_transport_type trans_type;
+
+ trans_type = trans_spec->trans_type;
+ port_num = trans_spec->usnic_roce.port_num;
+
+ /* Reserve Port */
+ port_num = usnic_transport_rsrv_port(trans_type, port_num);
+ if (port_num == 0)
+ return ERR_PTR(-EINVAL);
+
+ /* Create Flow */
+ usnic_fwd_init_usnic_filter(&filter, port_num);
+ err = init_filter_action(qp_grp, &uaction);
+ if (err)
+ goto out_unreserve_port;
+
+ flow = usnic_fwd_alloc_flow(qp_grp->ufdev, &filter, &uaction);
+ if (IS_ERR_OR_NULL(flow)) {
+ usnic_err("Unable to alloc flow failed with err %ld\n",
+ PTR_ERR(flow));
+ err = flow ? PTR_ERR(flow) : -EFAULT;
+ goto out_unreserve_port;
+ }
+
+ /* Create Flow Handle */
+ qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC);
+ if (IS_ERR_OR_NULL(qp_flow)) {
+ err = qp_flow ? PTR_ERR(qp_flow) : -ENOMEM;
+ goto out_dealloc_flow;
+ }
+ qp_flow->flow = flow;
+ qp_flow->trans_type = trans_type;
+ qp_flow->usnic_roce.port_num = port_num;
+ qp_flow->qp_grp = qp_grp;
+ return qp_flow;
+
+out_dealloc_flow:
+ usnic_fwd_dealloc_flow(flow);
+out_unreserve_port:
+ usnic_transport_unrsrv_port(trans_type, port_num);
+ return ERR_PTR(err);
+}
+
+static void release_roce_custom_flow(struct usnic_ib_qp_grp_flow *qp_flow)
+{
+ usnic_fwd_dealloc_flow(qp_flow->flow);
+ usnic_transport_unrsrv_port(qp_flow->trans_type,
+ qp_flow->usnic_roce.port_num);
+ kfree(qp_flow);
+}
+
+static struct usnic_ib_qp_grp_flow*
+create_udp_flow(struct usnic_ib_qp_grp *qp_grp,
+ struct usnic_transport_spec *trans_spec)
+{
+ struct socket *sock;
+ int sock_fd;
+ int err;
+ struct filter filter;
+ struct usnic_filter_action uaction;
+ struct usnic_ib_qp_grp_flow *qp_flow;
+ struct usnic_fwd_flow *flow;
+ enum usnic_transport_type trans_type;
+ uint32_t addr;
+ uint16_t port_num;
+ int proto;
+
+ trans_type = trans_spec->trans_type;
+ sock_fd = trans_spec->udp.sock_fd;
+
+ /* Get and check socket */
+ sock = usnic_transport_get_socket(sock_fd);
+ if (IS_ERR_OR_NULL(sock))
+ return ERR_CAST(sock);
+
+ err = usnic_transport_sock_get_addr(sock, &proto, &addr, &port_num);
+ if (err)
+ goto out_put_sock;
+
+ if (proto != IPPROTO_UDP) {
+ usnic_err("Protocol for fd %d is not UDP", sock_fd);
+ err = -EPERM;
+ goto out_put_sock;
+ }
+
+ /* Create flow */
+ usnic_fwd_init_udp_filter(&filter, addr, port_num);
+ err = init_filter_action(qp_grp, &uaction);
+ if (err)
+ goto out_put_sock;
+
+ flow = usnic_fwd_alloc_flow(qp_grp->ufdev, &filter, &uaction);
+ if (IS_ERR_OR_NULL(flow)) {
+ usnic_err("Unable to alloc flow failed with err %ld\n",
+ PTR_ERR(flow));
+ err = flow ? PTR_ERR(flow) : -EFAULT;
+ goto out_put_sock;
+ }
+
+ /* Create qp_flow */
+ qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC);
+ if (IS_ERR_OR_NULL(qp_flow)) {
+ err = qp_flow ? PTR_ERR(qp_flow) : -ENOMEM;
+ goto out_dealloc_flow;
+ }
+ qp_flow->flow = flow;
+ qp_flow->trans_type = trans_type;
+ qp_flow->udp.sock = sock;
+ qp_flow->qp_grp = qp_grp;
+ return qp_flow;
+
+out_dealloc_flow:
+ usnic_fwd_dealloc_flow(flow);
+out_put_sock:
+ usnic_transport_put_socket(sock);
+ return ERR_PTR(err);
+}
+
+static void release_udp_flow(struct usnic_ib_qp_grp_flow *qp_flow)
+{
+ usnic_fwd_dealloc_flow(qp_flow->flow);
+ usnic_transport_put_socket(qp_flow->udp.sock);
+ kfree(qp_flow);
+}
+
+static struct usnic_ib_qp_grp_flow*
+create_and_add_flow(struct usnic_ib_qp_grp *qp_grp,
+ struct usnic_transport_spec *trans_spec)
+{
+ struct usnic_ib_qp_grp_flow *qp_flow;
+ enum usnic_transport_type trans_type;
+
+ trans_type = trans_spec->trans_type;
+ switch (trans_type) {
+ case USNIC_TRANSPORT_ROCE_CUSTOM:
+ qp_flow = create_roce_custom_flow(qp_grp, trans_spec);
+ break;
+ case USNIC_TRANSPORT_IPV4_UDP:
+ qp_flow = create_udp_flow(qp_grp, trans_spec);
+ break;
+ default:
+ usnic_err("Unsupported transport %u\n",
+ trans_spec->trans_type);
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (!IS_ERR_OR_NULL(qp_flow)) {
+ list_add_tail(&qp_flow->link, &qp_grp->flows_lst);
+ usnic_debugfs_flow_add(qp_flow);
+ }
+
+
+ return qp_flow;
+}
+
+static void release_and_remove_flow(struct usnic_ib_qp_grp_flow *qp_flow)
+{
+ usnic_debugfs_flow_remove(qp_flow);
+ list_del(&qp_flow->link);
+
+ switch (qp_flow->trans_type) {
+ case USNIC_TRANSPORT_ROCE_CUSTOM:
+ release_roce_custom_flow(qp_flow);
+ break;
+ case USNIC_TRANSPORT_IPV4_UDP:
+ release_udp_flow(qp_flow);
+ break;
+ default:
+ WARN(1, "Unsupported transport %u\n",
+ qp_flow->trans_type);
+ break;
+ }
+}
+
+static void release_and_remove_all_flows(struct usnic_ib_qp_grp *qp_grp)
+{
+ struct usnic_ib_qp_grp_flow *qp_flow, *tmp;
+ list_for_each_entry_safe(qp_flow, tmp, &qp_grp->flows_lst, link)
+ release_and_remove_flow(qp_flow);
+}
+
+int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp,
+ enum ib_qp_state new_state,
+ void *data)
+{
+ int status = 0;
+ int vnic_idx;
+ struct ib_event ib_event;
+ enum ib_qp_state old_state;
+ struct usnic_transport_spec *trans_spec;
+ struct usnic_ib_qp_grp_flow *qp_flow;
+
+ old_state = qp_grp->state;
+ vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic);
+ trans_spec = (struct usnic_transport_spec *) data;
+
+ spin_lock(&qp_grp->lock);
+ switch (new_state) {
+ case IB_QPS_RESET:
+ switch (old_state) {
+ case IB_QPS_RESET:
+ /* NO-OP */
+ break;
+ case IB_QPS_INIT:
+ release_and_remove_all_flows(qp_grp);
+ status = 0;
+ break;
+ case IB_QPS_RTR:
+ case IB_QPS_RTS:
+ case IB_QPS_ERR:
+ status = disable_qp_grp(qp_grp);
+ release_and_remove_all_flows(qp_grp);
+ break;
+ default:
+ status = -EINVAL;
+ }
+ break;
+ case IB_QPS_INIT:
+ switch (old_state) {
+ case IB_QPS_RESET:
+ if (trans_spec) {
+ qp_flow = create_and_add_flow(qp_grp,
+ trans_spec);
+ if (IS_ERR_OR_NULL(qp_flow)) {
+ status = qp_flow ? PTR_ERR(qp_flow) : -EFAULT;
+ break;
+ }
+ } else {
+ /*
+ * Optional to specify filters.
+ */
+ status = 0;
+ }
+ break;
+ case IB_QPS_INIT:
+ if (trans_spec) {
+ qp_flow = create_and_add_flow(qp_grp,
+ trans_spec);
+ if (IS_ERR_OR_NULL(qp_flow)) {
+ status = qp_flow ? PTR_ERR(qp_flow) : -EFAULT;
+ break;
+ }
+ } else {
+ /*
+ * Doesn't make sense to go into INIT state
+ * from INIT state w/o adding filters.
+ */
+ status = -EINVAL;
+ }
+ break;
+ case IB_QPS_RTR:
+ status = disable_qp_grp(qp_grp);
+ break;
+ case IB_QPS_RTS:
+ status = disable_qp_grp(qp_grp);
+ break;
+ default:
+ status = -EINVAL;
+ }
+ break;
+ case IB_QPS_RTR:
+ switch (old_state) {
+ case IB_QPS_INIT:
+ status = enable_qp_grp(qp_grp);
+ break;
+ default:
+ status = -EINVAL;
+ }
+ break;
+ case IB_QPS_RTS:
+ switch (old_state) {
+ case IB_QPS_RTR:
+ /* NO-OP FOR NOW */
+ break;
+ default:
+ status = -EINVAL;
+ }
+ break;
+ case IB_QPS_ERR:
+ ib_event.device = &qp_grp->vf->pf->ib_dev;
+ ib_event.element.qp = &qp_grp->ibqp;
+ ib_event.event = IB_EVENT_QP_FATAL;
+
+ switch (old_state) {
+ case IB_QPS_RESET:
+ qp_grp->ibqp.event_handler(&ib_event,
+ qp_grp->ibqp.qp_context);
+ break;
+ case IB_QPS_INIT:
+ release_and_remove_all_flows(qp_grp);
+ qp_grp->ibqp.event_handler(&ib_event,
+ qp_grp->ibqp.qp_context);
+ break;
+ case IB_QPS_RTR:
+ case IB_QPS_RTS:
+ status = disable_qp_grp(qp_grp);
+ release_and_remove_all_flows(qp_grp);
+ qp_grp->ibqp.event_handler(&ib_event,
+ qp_grp->ibqp.qp_context);
+ break;
+ default:
+ status = -EINVAL;
+ }
+ break;
+ default:
+ status = -EINVAL;
+ }
+ spin_unlock(&qp_grp->lock);
+
+ if (!status) {
+ qp_grp->state = new_state;
+ usnic_info("Transistioned %u from %s to %s",
+ qp_grp->grp_id,
+ usnic_ib_qp_grp_state_to_string(old_state),
+ usnic_ib_qp_grp_state_to_string(new_state));
+ } else {
+ usnic_err("Failed to transistion %u from %s to %s",
+ qp_grp->grp_id,
+ usnic_ib_qp_grp_state_to_string(old_state),
+ usnic_ib_qp_grp_state_to_string(new_state));
+ }
+
+ return status;
+}
+
+static struct usnic_vnic_res_chunk**
+alloc_res_chunk_list(struct usnic_vnic *vnic,
+ struct usnic_vnic_res_spec *res_spec, void *owner_obj)
+{
+ enum usnic_vnic_res_type res_type;
+ struct usnic_vnic_res_chunk **res_chunk_list;
+ int err, i, res_cnt, res_lst_sz;
+
+ for (res_lst_sz = 0;
+ res_spec->resources[res_lst_sz].type != USNIC_VNIC_RES_TYPE_EOL;
+ res_lst_sz++) {
+ /* Do Nothing */
+ }
+
+ res_chunk_list = kzalloc(sizeof(*res_chunk_list)*(res_lst_sz+1),
+ GFP_ATOMIC);
+ if (!res_chunk_list)
+ return ERR_PTR(-ENOMEM);
+
+ for (i = 0; res_spec->resources[i].type != USNIC_VNIC_RES_TYPE_EOL;
+ i++) {
+ res_type = res_spec->resources[i].type;
+ res_cnt = res_spec->resources[i].cnt;
+
+ res_chunk_list[i] = usnic_vnic_get_resources(vnic, res_type,
+ res_cnt, owner_obj);
+ if (IS_ERR_OR_NULL(res_chunk_list[i])) {
+ err = res_chunk_list[i] ?
+ PTR_ERR(res_chunk_list[i]) : -ENOMEM;
+ usnic_err("Failed to get %s from %s with err %d\n",
+ usnic_vnic_res_type_to_str(res_type),
+ usnic_vnic_pci_name(vnic),
+ err);
+ goto out_free_res;
+ }
+ }
+
+ return res_chunk_list;
+
+out_free_res:
+ for (i--; i > 0; i--)
+ usnic_vnic_put_resources(res_chunk_list[i]);
+ kfree(res_chunk_list);
+ return ERR_PTR(err);
+}
+
+static void free_qp_grp_res(struct usnic_vnic_res_chunk **res_chunk_list)
+{
+ int i;
+ for (i = 0; res_chunk_list[i]; i++)
+ usnic_vnic_put_resources(res_chunk_list[i]);
+ kfree(res_chunk_list);
+}
+
+static int qp_grp_and_vf_bind(struct usnic_ib_vf *vf,
+ struct usnic_ib_pd *pd,
+ struct usnic_ib_qp_grp *qp_grp)
+{
+ int err;
+ struct pci_dev *pdev;
+
+ lockdep_assert_held(&vf->lock);
+
+ pdev = usnic_vnic_get_pdev(vf->vnic);
+ if (vf->qp_grp_ref_cnt == 0) {
+ err = usnic_uiom_attach_dev_to_pd(pd->umem_pd, &pdev->dev);
+ if (err) {
+ usnic_err("Failed to attach %s to domain\n",
+ pci_name(pdev));
+ return err;
+ }
+ vf->pd = pd;
+ }
+ vf->qp_grp_ref_cnt++;
+
+ WARN_ON(vf->pd != pd);
+ qp_grp->vf = vf;
+
+ return 0;
+}
+
+static void qp_grp_and_vf_unbind(struct usnic_ib_qp_grp *qp_grp)
+{
+ struct pci_dev *pdev;
+ struct usnic_ib_pd *pd;
+
+ lockdep_assert_held(&qp_grp->vf->lock);
+
+ pd = qp_grp->vf->pd;
+ pdev = usnic_vnic_get_pdev(qp_grp->vf->vnic);
+ if (--qp_grp->vf->qp_grp_ref_cnt == 0) {
+ qp_grp->vf->pd = NULL;
+ usnic_uiom_detach_dev_from_pd(pd->umem_pd, &pdev->dev);
+ }
+ qp_grp->vf = NULL;
+}
+
+static void log_spec(struct usnic_vnic_res_spec *res_spec)
+{
+ char buf[512];
+ usnic_vnic_spec_dump(buf, sizeof(buf), res_spec);
+ usnic_dbg("%s\n", buf);
+}
+
+static int qp_grp_id_from_flow(struct usnic_ib_qp_grp_flow *qp_flow,
+ uint32_t *id)
+{
+ enum usnic_transport_type trans_type = qp_flow->trans_type;
+ int err;
+ uint16_t port_num = 0;
+
+ switch (trans_type) {
+ case USNIC_TRANSPORT_ROCE_CUSTOM:
+ *id = qp_flow->usnic_roce.port_num;
+ break;
+ case USNIC_TRANSPORT_IPV4_UDP:
+ err = usnic_transport_sock_get_addr(qp_flow->udp.sock,
+ NULL, NULL,
+ &port_num);
+ if (err)
+ return err;
+ /*
+ * Copy port_num to stack first and then to *id,
+ * so that the short to int cast works for little
+ * and big endian systems.
+ */
+ *id = port_num;
+ break;
+ default:
+ usnic_err("Unsupported transport %u\n", trans_type);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+struct usnic_ib_qp_grp *
+usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf,
+ struct usnic_ib_pd *pd,
+ struct usnic_vnic_res_spec *res_spec,
+ struct usnic_transport_spec *transport_spec)
+{
+ struct usnic_ib_qp_grp *qp_grp;
+ int err;
+ enum usnic_transport_type transport = transport_spec->trans_type;
+ struct usnic_ib_qp_grp_flow *qp_flow;
+
+ lockdep_assert_held(&vf->lock);
+
+ err = usnic_vnic_res_spec_satisfied(&min_transport_spec[transport],
+ res_spec);
+ if (err) {
+ usnic_err("Spec does not meet miniumum req for transport %d\n",
+ transport);
+ log_spec(res_spec);
+ return ERR_PTR(err);
+ }
+
+ qp_grp = kzalloc(sizeof(*qp_grp), GFP_ATOMIC);
+ if (!qp_grp) {
+ usnic_err("Unable to alloc qp_grp - Out of memory\n");
+ return NULL;
+ }
+
+ qp_grp->res_chunk_list = alloc_res_chunk_list(vf->vnic, res_spec,
+ qp_grp);
+ if (IS_ERR_OR_NULL(qp_grp->res_chunk_list)) {
+ err = qp_grp->res_chunk_list ?
+ PTR_ERR(qp_grp->res_chunk_list) : -ENOMEM;
+ usnic_err("Unable to alloc res for %d with err %d\n",
+ qp_grp->grp_id, err);
+ goto out_free_qp_grp;
+ }
+
+ err = qp_grp_and_vf_bind(vf, pd, qp_grp);
+ if (err)
+ goto out_free_res;
+
+ INIT_LIST_HEAD(&qp_grp->flows_lst);
+ spin_lock_init(&qp_grp->lock);
+ qp_grp->ufdev = ufdev;
+ qp_grp->state = IB_QPS_RESET;
+ qp_grp->owner_pid = current->pid;
+
+ qp_flow = create_and_add_flow(qp_grp, transport_spec);
+ if (IS_ERR_OR_NULL(qp_flow)) {
+ usnic_err("Unable to create and add flow with err %ld\n",
+ PTR_ERR(qp_flow));
+ err = qp_flow ? PTR_ERR(qp_flow) : -EFAULT;
+ goto out_qp_grp_vf_unbind;
+ }
+
+ err = qp_grp_id_from_flow(qp_flow, &qp_grp->grp_id);
+ if (err)
+ goto out_release_flow;
+ qp_grp->ibqp.qp_num = qp_grp->grp_id;
+
+ usnic_ib_sysfs_qpn_add(qp_grp);
+
+ return qp_grp;
+
+out_release_flow:
+ release_and_remove_flow(qp_flow);
+out_qp_grp_vf_unbind:
+ qp_grp_and_vf_unbind(qp_grp);
+out_free_res:
+ free_qp_grp_res(qp_grp->res_chunk_list);
+out_free_qp_grp:
+ kfree(qp_grp);
+
+ return ERR_PTR(err);
+}
+
+void usnic_ib_qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp)
+{
+
+ WARN_ON(qp_grp->state != IB_QPS_RESET);
+ lockdep_assert_held(&qp_grp->vf->lock);
+
+ release_and_remove_all_flows(qp_grp);
+ usnic_ib_sysfs_qpn_remove(qp_grp);
+ qp_grp_and_vf_unbind(qp_grp);
+ free_qp_grp_res(qp_grp->res_chunk_list);
+ kfree(qp_grp);
+}
+
+struct usnic_vnic_res_chunk*
+usnic_ib_qp_grp_get_chunk(struct usnic_ib_qp_grp *qp_grp,
+ enum usnic_vnic_res_type res_type)
+{
+ int i;
+
+ for (i = 0; qp_grp->res_chunk_list[i]; i++) {
+ if (qp_grp->res_chunk_list[i]->type == res_type)
+ return qp_grp->res_chunk_list[i];
+ }
+
+ return ERR_PTR(-EINVAL);
+}
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h
new file mode 100644
index 00000000000..b0aafe8db0c
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_IB_QP_GRP_H_
+#define USNIC_IB_QP_GRP_H_
+
+#include <linux/debugfs.h>
+#include <rdma/ib_verbs.h>
+
+#include "usnic_ib.h"
+#include "usnic_abi.h"
+#include "usnic_fwd.h"
+#include "usnic_vnic.h"
+
+/*
+ * The qp group struct represents all the hw resources needed to present a ib_qp
+ */
+struct usnic_ib_qp_grp {
+ struct ib_qp ibqp;
+ enum ib_qp_state state;
+ int grp_id;
+
+ struct usnic_fwd_dev *ufdev;
+ struct usnic_ib_ucontext *ctx;
+ struct list_head flows_lst;
+
+ struct usnic_vnic_res_chunk **res_chunk_list;
+
+ pid_t owner_pid;
+ struct usnic_ib_vf *vf;
+ struct list_head link;
+
+ spinlock_t lock;
+
+ struct kobject kobj;
+};
+
+struct usnic_ib_qp_grp_flow {
+ struct usnic_fwd_flow *flow;
+ enum usnic_transport_type trans_type;
+ union {
+ struct {
+ uint16_t port_num;
+ } usnic_roce;
+ struct {
+ struct socket *sock;
+ } udp;
+ };
+ struct usnic_ib_qp_grp *qp_grp;
+ struct list_head link;
+
+ /* Debug FS */
+ struct dentry *dbgfs_dentry;
+ char dentry_name[32];
+};
+
+static const struct
+usnic_vnic_res_spec min_transport_spec[USNIC_TRANSPORT_MAX] = {
+ { /*USNIC_TRANSPORT_UNKNOWN*/
+ .resources = {
+ {.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,},
+ },
+ },
+ { /*USNIC_TRANSPORT_ROCE_CUSTOM*/
+ .resources = {
+ {.type = USNIC_VNIC_RES_TYPE_WQ, .cnt = 1,},
+ {.type = USNIC_VNIC_RES_TYPE_RQ, .cnt = 1,},
+ {.type = USNIC_VNIC_RES_TYPE_CQ, .cnt = 1,},
+ {.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,},
+ },
+ },
+ { /*USNIC_TRANSPORT_IPV4_UDP*/
+ .resources = {
+ {.type = USNIC_VNIC_RES_TYPE_WQ, .cnt = 1,},
+ {.type = USNIC_VNIC_RES_TYPE_RQ, .cnt = 1,},
+ {.type = USNIC_VNIC_RES_TYPE_CQ, .cnt = 1,},
+ {.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,},
+ },
+ },
+};
+
+const char *usnic_ib_qp_grp_state_to_string(enum ib_qp_state state);
+int usnic_ib_qp_grp_dump_hdr(char *buf, int buf_sz);
+int usnic_ib_qp_grp_dump_rows(void *obj, char *buf, int buf_sz);
+struct usnic_ib_qp_grp *
+usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf,
+ struct usnic_ib_pd *pd,
+ struct usnic_vnic_res_spec *res_spec,
+ struct usnic_transport_spec *trans_spec);
+void usnic_ib_qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp);
+int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp,
+ enum ib_qp_state new_state,
+ void *data);
+struct usnic_vnic_res_chunk
+*usnic_ib_qp_grp_get_chunk(struct usnic_ib_qp_grp *qp_grp,
+ enum usnic_vnic_res_type type);
+static inline
+struct usnic_ib_qp_grp *to_uqp_grp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct usnic_ib_qp_grp, ibqp);
+}
+#endif /* USNIC_IB_QP_GRP_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
new file mode 100644
index 00000000000..27dc67c1689
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
@@ -0,0 +1,341 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_addr.h>
+
+#include "usnic_common_util.h"
+#include "usnic_ib.h"
+#include "usnic_ib_qp_grp.h"
+#include "usnic_vnic.h"
+#include "usnic_ib_verbs.h"
+#include "usnic_log.h"
+
+static ssize_t usnic_ib_show_fw_ver(struct device *device,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct usnic_ib_dev *us_ibdev =
+ container_of(device, struct usnic_ib_dev, ib_dev.dev);
+ struct ethtool_drvinfo info;
+
+ mutex_lock(&us_ibdev->usdev_lock);
+ us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info);
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ return scnprintf(buf, PAGE_SIZE, "%s\n", info.fw_version);
+}
+
+static ssize_t usnic_ib_show_board(struct device *device,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct usnic_ib_dev *us_ibdev =
+ container_of(device, struct usnic_ib_dev, ib_dev.dev);
+ unsigned short subsystem_device_id;
+
+ mutex_lock(&us_ibdev->usdev_lock);
+ subsystem_device_id = us_ibdev->pdev->subsystem_device;
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ return scnprintf(buf, PAGE_SIZE, "%hu\n", subsystem_device_id);
+}
+
+/*
+ * Report the configuration for this PF
+ */
+static ssize_t
+usnic_ib_show_config(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct usnic_ib_dev *us_ibdev;
+ char *ptr;
+ unsigned left;
+ unsigned n;
+ enum usnic_vnic_res_type res_type;
+
+ us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
+
+ /* Buffer space limit is 1 page */
+ ptr = buf;
+ left = PAGE_SIZE;
+
+ mutex_lock(&us_ibdev->usdev_lock);
+ if (atomic_read(&us_ibdev->vf_cnt.refcount) > 0) {
+ char *busname;
+
+ /*
+ * bus name seems to come with annoying prefix.
+ * Remove it if it is predictable
+ */
+ busname = us_ibdev->pdev->bus->name;
+ if (strncmp(busname, "PCI Bus ", 8) == 0)
+ busname += 8;
+
+ n = scnprintf(ptr, left,
+ "%s: %s:%d.%d, %s, %pM, %u VFs\n Per VF:",
+ us_ibdev->ib_dev.name,
+ busname,
+ PCI_SLOT(us_ibdev->pdev->devfn),
+ PCI_FUNC(us_ibdev->pdev->devfn),
+ netdev_name(us_ibdev->netdev),
+ us_ibdev->ufdev->mac,
+ atomic_read(&us_ibdev->vf_cnt.refcount));
+ UPDATE_PTR_LEFT(n, ptr, left);
+
+ for (res_type = USNIC_VNIC_RES_TYPE_EOL;
+ res_type < USNIC_VNIC_RES_TYPE_MAX;
+ res_type++) {
+ if (us_ibdev->vf_res_cnt[res_type] == 0)
+ continue;
+ n = scnprintf(ptr, left, " %d %s%s",
+ us_ibdev->vf_res_cnt[res_type],
+ usnic_vnic_res_type_to_str(res_type),
+ (res_type < (USNIC_VNIC_RES_TYPE_MAX - 1)) ?
+ "," : "");
+ UPDATE_PTR_LEFT(n, ptr, left);
+ }
+ n = scnprintf(ptr, left, "\n");
+ UPDATE_PTR_LEFT(n, ptr, left);
+ } else {
+ n = scnprintf(ptr, left, "%s: no VFs\n",
+ us_ibdev->ib_dev.name);
+ UPDATE_PTR_LEFT(n, ptr, left);
+ }
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ return ptr - buf;
+}
+
+static ssize_t
+usnic_ib_show_iface(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct usnic_ib_dev *us_ibdev;
+
+ us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%s\n",
+ netdev_name(us_ibdev->netdev));
+}
+
+static ssize_t
+usnic_ib_show_max_vf(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct usnic_ib_dev *us_ibdev;
+
+ us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%u\n",
+ atomic_read(&us_ibdev->vf_cnt.refcount));
+}
+
+static ssize_t
+usnic_ib_show_qp_per_vf(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct usnic_ib_dev *us_ibdev;
+ int qp_per_vf;
+
+ us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
+ qp_per_vf = max(us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_WQ],
+ us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_RQ]);
+
+ return scnprintf(buf, PAGE_SIZE,
+ "%d\n", qp_per_vf);
+}
+
+static ssize_t
+usnic_ib_show_cq_per_vf(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct usnic_ib_dev *us_ibdev;
+
+ us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%d\n",
+ us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]);
+}
+
+static DEVICE_ATTR(fw_ver, S_IRUGO, usnic_ib_show_fw_ver, NULL);
+static DEVICE_ATTR(board_id, S_IRUGO, usnic_ib_show_board, NULL);
+static DEVICE_ATTR(config, S_IRUGO, usnic_ib_show_config, NULL);
+static DEVICE_ATTR(iface, S_IRUGO, usnic_ib_show_iface, NULL);
+static DEVICE_ATTR(max_vf, S_IRUGO, usnic_ib_show_max_vf, NULL);
+static DEVICE_ATTR(qp_per_vf, S_IRUGO, usnic_ib_show_qp_per_vf, NULL);
+static DEVICE_ATTR(cq_per_vf, S_IRUGO, usnic_ib_show_cq_per_vf, NULL);
+
+static struct device_attribute *usnic_class_attributes[] = {
+ &dev_attr_fw_ver,
+ &dev_attr_board_id,
+ &dev_attr_config,
+ &dev_attr_iface,
+ &dev_attr_max_vf,
+ &dev_attr_qp_per_vf,
+ &dev_attr_cq_per_vf,
+};
+
+struct qpn_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct usnic_ib_qp_grp *, char *buf);
+};
+
+/*
+ * Definitions for supporting QPN entries in sysfs
+ */
+static ssize_t
+usnic_ib_qpn_attr_show(struct kobject *kobj, struct attribute *attr, char *buf)
+{
+ struct usnic_ib_qp_grp *qp_grp;
+ struct qpn_attribute *qpn_attr;
+
+ qp_grp = container_of(kobj, struct usnic_ib_qp_grp, kobj);
+ qpn_attr = container_of(attr, struct qpn_attribute, attr);
+
+ return qpn_attr->show(qp_grp, buf);
+}
+
+static const struct sysfs_ops usnic_ib_qpn_sysfs_ops = {
+ .show = usnic_ib_qpn_attr_show
+};
+
+#define QPN_ATTR_RO(NAME) \
+struct qpn_attribute qpn_attr_##NAME = __ATTR_RO(NAME)
+
+static ssize_t context_show(struct usnic_ib_qp_grp *qp_grp, char *buf)
+{
+ return scnprintf(buf, PAGE_SIZE, "0x%p\n", qp_grp->ctx);
+}
+
+static ssize_t summary_show(struct usnic_ib_qp_grp *qp_grp, char *buf)
+{
+ int i, j, n;
+ int left;
+ char *ptr;
+ struct usnic_vnic_res_chunk *res_chunk;
+ struct usnic_vnic_res *vnic_res;
+
+ left = PAGE_SIZE;
+ ptr = buf;
+
+ n = scnprintf(ptr, left,
+ "QPN: %d State: (%s) PID: %u VF Idx: %hu ",
+ qp_grp->ibqp.qp_num,
+ usnic_ib_qp_grp_state_to_string(qp_grp->state),
+ qp_grp->owner_pid,
+ usnic_vnic_get_index(qp_grp->vf->vnic));
+ UPDATE_PTR_LEFT(n, ptr, left);
+
+ for (i = 0; qp_grp->res_chunk_list[i]; i++) {
+ res_chunk = qp_grp->res_chunk_list[i];
+ for (j = 0; j < res_chunk->cnt; j++) {
+ vnic_res = res_chunk->res[j];
+ n = scnprintf(ptr, left, "%s[%d] ",
+ usnic_vnic_res_type_to_str(vnic_res->type),
+ vnic_res->vnic_idx);
+ UPDATE_PTR_LEFT(n, ptr, left);
+ }
+ }
+
+ n = scnprintf(ptr, left, "\n");
+ UPDATE_PTR_LEFT(n, ptr, left);
+
+ return ptr - buf;
+}
+
+static QPN_ATTR_RO(context);
+static QPN_ATTR_RO(summary);
+
+static struct attribute *usnic_ib_qpn_default_attrs[] = {
+ &qpn_attr_context.attr,
+ &qpn_attr_summary.attr,
+ NULL
+};
+
+static struct kobj_type usnic_ib_qpn_type = {
+ .sysfs_ops = &usnic_ib_qpn_sysfs_ops,
+ .default_attrs = usnic_ib_qpn_default_attrs
+};
+
+int usnic_ib_sysfs_register_usdev(struct usnic_ib_dev *us_ibdev)
+{
+ int i;
+ int err;
+ for (i = 0; i < ARRAY_SIZE(usnic_class_attributes); ++i) {
+ err = device_create_file(&us_ibdev->ib_dev.dev,
+ usnic_class_attributes[i]);
+ if (err) {
+ usnic_err("Failed to create device file %d for %s eith err %d",
+ i, us_ibdev->ib_dev.name, err);
+ return -EINVAL;
+ }
+ }
+
+ /* create kernel object for looking at individual QPs */
+ kobject_get(&us_ibdev->ib_dev.dev.kobj);
+ us_ibdev->qpn_kobj = kobject_create_and_add("qpn",
+ &us_ibdev->ib_dev.dev.kobj);
+ if (us_ibdev->qpn_kobj == NULL) {
+ kobject_put(&us_ibdev->ib_dev.dev.kobj);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void usnic_ib_sysfs_unregister_usdev(struct usnic_ib_dev *us_ibdev)
+{
+ int i;
+ for (i = 0; i < ARRAY_SIZE(usnic_class_attributes); ++i) {
+ device_remove_file(&us_ibdev->ib_dev.dev,
+ usnic_class_attributes[i]);
+ }
+
+ kobject_put(us_ibdev->qpn_kobj);
+}
+
+void usnic_ib_sysfs_qpn_add(struct usnic_ib_qp_grp *qp_grp)
+{
+ struct usnic_ib_dev *us_ibdev;
+ int err;
+
+ us_ibdev = qp_grp->vf->pf;
+
+ err = kobject_init_and_add(&qp_grp->kobj, &usnic_ib_qpn_type,
+ kobject_get(us_ibdev->qpn_kobj),
+ "%d", qp_grp->grp_id);
+ if (err) {
+ kobject_put(us_ibdev->qpn_kobj);
+ return;
+ }
+}
+
+void usnic_ib_sysfs_qpn_remove(struct usnic_ib_qp_grp *qp_grp)
+{
+ struct usnic_ib_dev *us_ibdev;
+
+ us_ibdev = qp_grp->vf->pf;
+
+ kobject_put(&qp_grp->kobj);
+ kobject_put(us_ibdev->qpn_kobj);
+}
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h
new file mode 100644
index 00000000000..0d09b493cd0
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_IB_SYSFS_H_
+#define USNIC_IB_SYSFS_H_
+
+#include "usnic_ib.h"
+
+int usnic_ib_sysfs_register_usdev(struct usnic_ib_dev *us_ibdev);
+void usnic_ib_sysfs_unregister_usdev(struct usnic_ib_dev *us_ibdev);
+void usnic_ib_sysfs_qpn_add(struct usnic_ib_qp_grp *qp_grp);
+void usnic_ib_sysfs_qpn_remove(struct usnic_ib_qp_grp *qp_grp);
+
+#endif /* !USNIC_IB_SYSFS_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
new file mode 100644
index 00000000000..53bd6a2d9cd
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -0,0 +1,768 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_addr.h>
+
+#include "usnic_abi.h"
+#include "usnic_ib.h"
+#include "usnic_common_util.h"
+#include "usnic_ib_qp_grp.h"
+#include "usnic_fwd.h"
+#include "usnic_log.h"
+#include "usnic_uiom.h"
+#include "usnic_transport.h"
+
+#define USNIC_DEFAULT_TRANSPORT USNIC_TRANSPORT_ROCE_CUSTOM
+
+static void usnic_ib_fw_string_to_u64(char *fw_ver_str, u64 *fw_ver)
+{
+ *fw_ver = (u64) *fw_ver_str;
+}
+
+static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp,
+ struct ib_udata *udata)
+{
+ struct usnic_ib_dev *us_ibdev;
+ struct usnic_ib_create_qp_resp resp;
+ struct pci_dev *pdev;
+ struct vnic_dev_bar *bar;
+ struct usnic_vnic_res_chunk *chunk;
+ struct usnic_ib_qp_grp_flow *default_flow;
+ int i, err;
+
+ memset(&resp, 0, sizeof(resp));
+
+ us_ibdev = qp_grp->vf->pf;
+ pdev = usnic_vnic_get_pdev(qp_grp->vf->vnic);
+ if (!pdev) {
+ usnic_err("Failed to get pdev of qp_grp %d\n",
+ qp_grp->grp_id);
+ return -EFAULT;
+ }
+
+ bar = usnic_vnic_get_bar(qp_grp->vf->vnic, 0);
+ if (!bar) {
+ usnic_err("Failed to get bar0 of qp_grp %d vf %s",
+ qp_grp->grp_id, pci_name(pdev));
+ return -EFAULT;
+ }
+
+ resp.vfid = usnic_vnic_get_index(qp_grp->vf->vnic);
+ resp.bar_bus_addr = bar->bus_addr;
+ resp.bar_len = bar->len;
+
+ chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ);
+ if (IS_ERR_OR_NULL(chunk)) {
+ usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n",
+ usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_RQ),
+ qp_grp->grp_id,
+ PTR_ERR(chunk));
+ return chunk ? PTR_ERR(chunk) : -ENOMEM;
+ }
+
+ WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_RQ);
+ resp.rq_cnt = chunk->cnt;
+ for (i = 0; i < chunk->cnt; i++)
+ resp.rq_idx[i] = chunk->res[i]->vnic_idx;
+
+ chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_WQ);
+ if (IS_ERR_OR_NULL(chunk)) {
+ usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n",
+ usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_WQ),
+ qp_grp->grp_id,
+ PTR_ERR(chunk));
+ return chunk ? PTR_ERR(chunk) : -ENOMEM;
+ }
+
+ WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_WQ);
+ resp.wq_cnt = chunk->cnt;
+ for (i = 0; i < chunk->cnt; i++)
+ resp.wq_idx[i] = chunk->res[i]->vnic_idx;
+
+ chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_CQ);
+ if (IS_ERR_OR_NULL(chunk)) {
+ usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n",
+ usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_CQ),
+ qp_grp->grp_id,
+ PTR_ERR(chunk));
+ return chunk ? PTR_ERR(chunk) : -ENOMEM;
+ }
+
+ WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_CQ);
+ resp.cq_cnt = chunk->cnt;
+ for (i = 0; i < chunk->cnt; i++)
+ resp.cq_idx[i] = chunk->res[i]->vnic_idx;
+
+ default_flow = list_first_entry(&qp_grp->flows_lst,
+ struct usnic_ib_qp_grp_flow, link);
+ resp.transport = default_flow->trans_type;
+
+ err = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (err) {
+ usnic_err("Failed to copy udata for %s", us_ibdev->ib_dev.name);
+ return err;
+ }
+
+ return 0;
+}
+
+static struct usnic_ib_qp_grp*
+find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev,
+ struct usnic_ib_pd *pd,
+ struct usnic_transport_spec *trans_spec,
+ struct usnic_vnic_res_spec *res_spec)
+{
+ struct usnic_ib_vf *vf;
+ struct usnic_vnic *vnic;
+ struct usnic_ib_qp_grp *qp_grp;
+ struct device *dev, **dev_list;
+ int i, found = 0;
+
+ BUG_ON(!mutex_is_locked(&us_ibdev->usdev_lock));
+
+ if (list_empty(&us_ibdev->vf_dev_list)) {
+ usnic_info("No vfs to allocate\n");
+ return NULL;
+ }
+
+ if (usnic_ib_share_vf) {
+ /* Try to find resouces on a used vf which is in pd */
+ dev_list = usnic_uiom_get_dev_list(pd->umem_pd);
+ for (i = 0; dev_list[i]; i++) {
+ dev = dev_list[i];
+ vf = pci_get_drvdata(to_pci_dev(dev));
+ spin_lock(&vf->lock);
+ vnic = vf->vnic;
+ if (!usnic_vnic_check_room(vnic, res_spec)) {
+ usnic_dbg("Found used vnic %s from %s\n",
+ us_ibdev->ib_dev.name,
+ pci_name(usnic_vnic_get_pdev(
+ vnic)));
+ found = 1;
+ break;
+ }
+ spin_unlock(&vf->lock);
+
+ }
+ usnic_uiom_free_dev_list(dev_list);
+ }
+
+ if (!found) {
+ /* Try to find resources on an unused vf */
+ list_for_each_entry(vf, &us_ibdev->vf_dev_list, link) {
+ spin_lock(&vf->lock);
+ vnic = vf->vnic;
+ if (vf->qp_grp_ref_cnt == 0 &&
+ usnic_vnic_check_room(vnic, res_spec) == 0) {
+ found = 1;
+ break;
+ }
+ spin_unlock(&vf->lock);
+ }
+ }
+
+ if (!found) {
+ usnic_info("No free qp grp found on %s\n",
+ us_ibdev->ib_dev.name);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ qp_grp = usnic_ib_qp_grp_create(us_ibdev->ufdev, vf, pd, res_spec,
+ trans_spec);
+ spin_unlock(&vf->lock);
+ if (IS_ERR_OR_NULL(qp_grp)) {
+ usnic_err("Failed to allocate qp_grp\n");
+ return ERR_PTR(qp_grp ? PTR_ERR(qp_grp) : -ENOMEM);
+ }
+
+ return qp_grp;
+}
+
+static void qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp)
+{
+ struct usnic_ib_vf *vf = qp_grp->vf;
+
+ WARN_ON(qp_grp->state != IB_QPS_RESET);
+
+ spin_lock(&vf->lock);
+ usnic_ib_qp_grp_destroy(qp_grp);
+ spin_unlock(&vf->lock);
+}
+
+static void eth_speed_to_ib_speed(int speed, u8 *active_speed,
+ u8 *active_width)
+{
+ if (speed <= 10000) {
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_FDR10;
+ } else if (speed <= 20000) {
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_DDR;
+ } else if (speed <= 30000) {
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_QDR;
+ } else if (speed <= 40000) {
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_FDR10;
+ } else {
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_EDR;
+ }
+}
+
+static int create_qp_validate_user_data(struct usnic_ib_create_qp_cmd cmd)
+{
+ if (cmd.spec.trans_type <= USNIC_TRANSPORT_UNKNOWN ||
+ cmd.spec.trans_type >= USNIC_TRANSPORT_MAX)
+ return -EINVAL;
+
+ return 0;
+}
+
+/* Start of ib callback functions */
+
+enum rdma_link_layer usnic_ib_port_link_layer(struct ib_device *device,
+ u8 port_num)
+{
+ return IB_LINK_LAYER_ETHERNET;
+}
+
+int usnic_ib_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *props)
+{
+ struct usnic_ib_dev *us_ibdev = to_usdev(ibdev);
+ union ib_gid gid;
+ struct ethtool_drvinfo info;
+ struct ethtool_cmd cmd;
+ int qp_per_vf;
+
+ usnic_dbg("\n");
+ mutex_lock(&us_ibdev->usdev_lock);
+ us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info);
+ us_ibdev->netdev->ethtool_ops->get_settings(us_ibdev->netdev, &cmd);
+ memset(props, 0, sizeof(*props));
+ usnic_mac_ip_to_gid(us_ibdev->ufdev->mac, us_ibdev->ufdev->inaddr,
+ &gid.raw[0]);
+ memcpy(&props->sys_image_guid, &gid.global.interface_id,
+ sizeof(gid.global.interface_id));
+ usnic_ib_fw_string_to_u64(&info.fw_version[0], &props->fw_ver);
+ props->max_mr_size = USNIC_UIOM_MAX_MR_SIZE;
+ props->page_size_cap = USNIC_UIOM_PAGE_SIZE;
+ props->vendor_id = PCI_VENDOR_ID_CISCO;
+ props->vendor_part_id = PCI_DEVICE_ID_CISCO_VIC_USPACE_NIC;
+ props->hw_ver = us_ibdev->pdev->subsystem_device;
+ qp_per_vf = max(us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_WQ],
+ us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_RQ]);
+ props->max_qp = qp_per_vf *
+ atomic_read(&us_ibdev->vf_cnt.refcount);
+ props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT |
+ IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
+ props->max_cq = us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ] *
+ atomic_read(&us_ibdev->vf_cnt.refcount);
+ props->max_pd = USNIC_UIOM_MAX_PD_CNT;
+ props->max_mr = USNIC_UIOM_MAX_MR_CNT;
+ props->local_ca_ack_delay = 0;
+ props->max_pkeys = 0;
+ props->atomic_cap = IB_ATOMIC_NONE;
+ props->masked_atomic_cap = props->atomic_cap;
+ props->max_qp_rd_atom = 0;
+ props->max_qp_init_rd_atom = 0;
+ props->max_res_rd_atom = 0;
+ props->max_srq = 0;
+ props->max_srq_wr = 0;
+ props->max_srq_sge = 0;
+ props->max_fast_reg_page_list_len = 0;
+ props->max_mcast_grp = 0;
+ props->max_mcast_qp_attach = 0;
+ props->max_total_mcast_qp_attach = 0;
+ props->max_map_per_fmr = 0;
+ /* Owned by Userspace
+ * max_qp_wr, max_sge, max_sge_rd, max_cqe */
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ return 0;
+}
+
+int usnic_ib_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props)
+{
+ struct usnic_ib_dev *us_ibdev = to_usdev(ibdev);
+ struct ethtool_cmd cmd;
+
+ usnic_dbg("\n");
+
+ mutex_lock(&us_ibdev->usdev_lock);
+ us_ibdev->netdev->ethtool_ops->get_settings(us_ibdev->netdev, &cmd);
+ memset(props, 0, sizeof(*props));
+
+ props->lid = 0;
+ props->lmc = 1;
+ props->sm_lid = 0;
+ props->sm_sl = 0;
+
+ if (!us_ibdev->ufdev->link_up) {
+ props->state = IB_PORT_DOWN;
+ props->phys_state = 3;
+ } else if (!us_ibdev->ufdev->inaddr) {
+ props->state = IB_PORT_INIT;
+ props->phys_state = 4;
+ } else {
+ props->state = IB_PORT_ACTIVE;
+ props->phys_state = 5;
+ }
+
+ props->port_cap_flags = 0;
+ props->gid_tbl_len = 1;
+ props->pkey_tbl_len = 1;
+ props->bad_pkey_cntr = 0;
+ props->qkey_viol_cntr = 0;
+ eth_speed_to_ib_speed(cmd.speed, &props->active_speed,
+ &props->active_width);
+ props->max_mtu = IB_MTU_4096;
+ props->active_mtu = iboe_get_mtu(us_ibdev->ufdev->mtu);
+ /* Userspace will adjust for hdrs */
+ props->max_msg_sz = us_ibdev->ufdev->mtu;
+ props->max_vl_num = 1;
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ return 0;
+}
+
+int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr)
+{
+ struct usnic_ib_qp_grp *qp_grp;
+ struct usnic_ib_vf *vf;
+ int err;
+
+ usnic_dbg("\n");
+
+ memset(qp_attr, 0, sizeof(*qp_attr));
+ memset(qp_init_attr, 0, sizeof(*qp_init_attr));
+
+ qp_grp = to_uqp_grp(qp);
+ vf = qp_grp->vf;
+ mutex_lock(&vf->pf->usdev_lock);
+ usnic_dbg("\n");
+ qp_attr->qp_state = qp_grp->state;
+ qp_attr->cur_qp_state = qp_grp->state;
+
+ switch (qp_grp->ibqp.qp_type) {
+ case IB_QPT_UD:
+ qp_attr->qkey = 0;
+ break;
+ default:
+ usnic_err("Unexpected qp_type %d\n", qp_grp->ibqp.qp_type);
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ mutex_unlock(&vf->pf->usdev_lock);
+ return 0;
+
+err_out:
+ mutex_unlock(&vf->pf->usdev_lock);
+ return err;
+}
+
+int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
+ union ib_gid *gid)
+{
+
+ struct usnic_ib_dev *us_ibdev = to_usdev(ibdev);
+ usnic_dbg("\n");
+
+ if (index > 1)
+ return -EINVAL;
+
+ mutex_lock(&us_ibdev->usdev_lock);
+ memset(&(gid->raw[0]), 0, sizeof(gid->raw));
+ usnic_mac_ip_to_gid(us_ibdev->ufdev->mac, us_ibdev->ufdev->inaddr,
+ &gid->raw[0]);
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ return 0;
+}
+
+int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+ u16 *pkey)
+{
+ if (index > 1)
+ return -EINVAL;
+
+ *pkey = 0xffff;
+ return 0;
+}
+
+struct ib_pd *usnic_ib_alloc_pd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct usnic_ib_pd *pd;
+ void *umem_pd;
+
+ usnic_dbg("\n");
+
+ pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+ if (!pd)
+ return ERR_PTR(-ENOMEM);
+
+ umem_pd = pd->umem_pd = usnic_uiom_alloc_pd();
+ if (IS_ERR_OR_NULL(umem_pd)) {
+ kfree(pd);
+ return ERR_PTR(umem_pd ? PTR_ERR(umem_pd) : -ENOMEM);
+ }
+
+ usnic_info("domain 0x%p allocated for context 0x%p and device %s\n",
+ pd, context, ibdev->name);
+ return &pd->ibpd;
+}
+
+int usnic_ib_dealloc_pd(struct ib_pd *pd)
+{
+ usnic_info("freeing domain 0x%p\n", pd);
+
+ usnic_uiom_dealloc_pd((to_upd(pd))->umem_pd);
+ kfree(pd);
+ return 0;
+}
+
+struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ int err;
+ struct usnic_ib_dev *us_ibdev;
+ struct usnic_ib_qp_grp *qp_grp;
+ struct usnic_ib_ucontext *ucontext;
+ int cq_cnt;
+ struct usnic_vnic_res_spec res_spec;
+ struct usnic_ib_create_qp_cmd cmd;
+ struct usnic_transport_spec trans_spec;
+
+ usnic_dbg("\n");
+
+ ucontext = to_uucontext(pd->uobject->context);
+ us_ibdev = to_usdev(pd->device);
+
+ if (init_attr->create_flags)
+ return ERR_PTR(-EINVAL);
+
+ err = ib_copy_from_udata(&cmd, udata, sizeof(cmd));
+ if (err) {
+ usnic_err("%s: cannot copy udata for create_qp\n",
+ us_ibdev->ib_dev.name);
+ return ERR_PTR(-EINVAL);
+ }
+
+ err = create_qp_validate_user_data(cmd);
+ if (err) {
+ usnic_err("%s: Failed to validate user data\n",
+ us_ibdev->ib_dev.name);
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (init_attr->qp_type != IB_QPT_UD) {
+ usnic_err("%s asked to make a non-UD QP: %d\n",
+ us_ibdev->ib_dev.name, init_attr->qp_type);
+ return ERR_PTR(-EINVAL);
+ }
+
+ trans_spec = cmd.spec;
+ mutex_lock(&us_ibdev->usdev_lock);
+ cq_cnt = (init_attr->send_cq == init_attr->recv_cq) ? 1 : 2;
+ res_spec = min_transport_spec[trans_spec.trans_type];
+ usnic_vnic_res_spec_update(&res_spec, USNIC_VNIC_RES_TYPE_CQ, cq_cnt);
+ qp_grp = find_free_vf_and_create_qp_grp(us_ibdev, to_upd(pd),
+ &trans_spec,
+ &res_spec);
+ if (IS_ERR_OR_NULL(qp_grp)) {
+ err = qp_grp ? PTR_ERR(qp_grp) : -ENOMEM;
+ goto out_release_mutex;
+ }
+
+ err = usnic_ib_fill_create_qp_resp(qp_grp, udata);
+ if (err) {
+ err = -EBUSY;
+ goto out_release_qp_grp;
+ }
+
+ qp_grp->ctx = ucontext;
+ list_add_tail(&qp_grp->link, &ucontext->qp_grp_list);
+ usnic_ib_log_vf(qp_grp->vf);
+ mutex_unlock(&us_ibdev->usdev_lock);
+ return &qp_grp->ibqp;
+
+out_release_qp_grp:
+ qp_grp_destroy(qp_grp);
+out_release_mutex:
+ mutex_unlock(&us_ibdev->usdev_lock);
+ return ERR_PTR(err);
+}
+
+int usnic_ib_destroy_qp(struct ib_qp *qp)
+{
+ struct usnic_ib_qp_grp *qp_grp;
+ struct usnic_ib_vf *vf;
+
+ usnic_dbg("\n");
+
+ qp_grp = to_uqp_grp(qp);
+ vf = qp_grp->vf;
+ mutex_lock(&vf->pf->usdev_lock);
+ if (usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RESET, NULL)) {
+ usnic_err("Failed to move qp grp %u to reset\n",
+ qp_grp->grp_id);
+ }
+
+ list_del(&qp_grp->link);
+ qp_grp_destroy(qp_grp);
+ mutex_unlock(&vf->pf->usdev_lock);
+
+ return 0;
+}
+
+int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ struct usnic_ib_qp_grp *qp_grp;
+ int status;
+ usnic_dbg("\n");
+
+ qp_grp = to_uqp_grp(ibqp);
+
+ /* TODO: Future Support All States */
+ mutex_lock(&qp_grp->vf->pf->usdev_lock);
+ if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_INIT) {
+ status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_INIT, NULL);
+ } else if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_RTR) {
+ status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RTR, NULL);
+ } else if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_RTS) {
+ status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RTS, NULL);
+ } else {
+ usnic_err("Unexpected combination mask: %u state: %u\n",
+ attr_mask & IB_QP_STATE, attr->qp_state);
+ status = -EINVAL;
+ }
+
+ mutex_unlock(&qp_grp->vf->pf->usdev_lock);
+ return status;
+}
+
+struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, int entries,
+ int vector, struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct ib_cq *cq;
+
+ usnic_dbg("\n");
+ cq = kzalloc(sizeof(*cq), GFP_KERNEL);
+ if (!cq)
+ return ERR_PTR(-EBUSY);
+
+ return cq;
+}
+
+int usnic_ib_destroy_cq(struct ib_cq *cq)
+{
+ usnic_dbg("\n");
+ kfree(cq);
+ return 0;
+}
+
+struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int access_flags,
+ struct ib_udata *udata)
+{
+ struct usnic_ib_mr *mr;
+ int err;
+
+ usnic_dbg("start 0x%llx va 0x%llx length 0x%llx\n", start,
+ virt_addr, length);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (IS_ERR_OR_NULL(mr))
+ return ERR_PTR(mr ? PTR_ERR(mr) : -ENOMEM);
+
+ mr->umem = usnic_uiom_reg_get(to_upd(pd)->umem_pd, start, length,
+ access_flags, 0);
+ if (IS_ERR_OR_NULL(mr->umem)) {
+ err = mr->umem ? PTR_ERR(mr->umem) : -EFAULT;
+ goto err_free;
+ }
+
+ mr->ibmr.lkey = mr->ibmr.rkey = 0;
+ return &mr->ibmr;
+
+err_free:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
+int usnic_ib_dereg_mr(struct ib_mr *ibmr)
+{
+ struct usnic_ib_mr *mr = to_umr(ibmr);
+
+ usnic_dbg("va 0x%lx length 0x%zx\n", mr->umem->va, mr->umem->length);
+
+ usnic_uiom_reg_release(mr->umem, ibmr->pd->uobject->context->closing);
+ kfree(mr);
+ return 0;
+}
+
+struct ib_ucontext *usnic_ib_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata)
+{
+ struct usnic_ib_ucontext *context;
+ struct usnic_ib_dev *us_ibdev = to_usdev(ibdev);
+ usnic_dbg("\n");
+
+ context = kmalloc(sizeof(*context), GFP_KERNEL);
+ if (!context)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_LIST_HEAD(&context->qp_grp_list);
+ mutex_lock(&us_ibdev->usdev_lock);
+ list_add_tail(&context->link, &us_ibdev->ctx_list);
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ return &context->ibucontext;
+}
+
+int usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
+{
+ struct usnic_ib_ucontext *context = to_uucontext(ibcontext);
+ struct usnic_ib_dev *us_ibdev = to_usdev(ibcontext->device);
+ usnic_dbg("\n");
+
+ mutex_lock(&us_ibdev->usdev_lock);
+ BUG_ON(!list_empty(&context->qp_grp_list));
+ list_del(&context->link);
+ mutex_unlock(&us_ibdev->usdev_lock);
+ kfree(context);
+ return 0;
+}
+
+int usnic_ib_mmap(struct ib_ucontext *context,
+ struct vm_area_struct *vma)
+{
+ struct usnic_ib_ucontext *uctx = to_ucontext(context);
+ struct usnic_ib_dev *us_ibdev;
+ struct usnic_ib_qp_grp *qp_grp;
+ struct usnic_ib_vf *vf;
+ struct vnic_dev_bar *bar;
+ dma_addr_t bus_addr;
+ unsigned int len;
+ unsigned int vfid;
+
+ usnic_dbg("\n");
+
+ us_ibdev = to_usdev(context->device);
+ vma->vm_flags |= VM_IO;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ vfid = vma->vm_pgoff;
+ usnic_dbg("Page Offset %lu PAGE_SHIFT %u VFID %u\n",
+ vma->vm_pgoff, PAGE_SHIFT, vfid);
+
+ mutex_lock(&us_ibdev->usdev_lock);
+ list_for_each_entry(qp_grp, &uctx->qp_grp_list, link) {
+ vf = qp_grp->vf;
+ if (usnic_vnic_get_index(vf->vnic) == vfid) {
+ bar = usnic_vnic_get_bar(vf->vnic, 0);
+ if ((vma->vm_end - vma->vm_start) != bar->len) {
+ usnic_err("Bar0 Len %lu - Request map %lu\n",
+ bar->len,
+ vma->vm_end - vma->vm_start);
+ mutex_unlock(&us_ibdev->usdev_lock);
+ return -EINVAL;
+ }
+ bus_addr = bar->bus_addr;
+ len = bar->len;
+ usnic_dbg("bus: %pa vaddr: %p size: %ld\n",
+ &bus_addr, bar->vaddr, bar->len);
+ mutex_unlock(&us_ibdev->usdev_lock);
+
+ return remap_pfn_range(vma,
+ vma->vm_start,
+ bus_addr >> PAGE_SHIFT,
+ len, vma->vm_page_prot);
+ }
+ }
+
+ mutex_unlock(&us_ibdev->usdev_lock);
+ usnic_err("No VF %u found\n", vfid);
+ return -EINVAL;
+}
+
+/* In ib callbacks section - Start of stub funcs */
+struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd,
+ struct ib_ah_attr *ah_attr)
+{
+ usnic_dbg("\n");
+ return ERR_PTR(-EPERM);
+}
+
+int usnic_ib_destroy_ah(struct ib_ah *ah)
+{
+ usnic_dbg("\n");
+ return -EINVAL;
+}
+
+int usnic_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr)
+{
+ usnic_dbg("\n");
+ return -EINVAL;
+}
+
+int usnic_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ usnic_dbg("\n");
+ return -EINVAL;
+}
+
+int usnic_ib_poll_cq(struct ib_cq *ibcq, int num_entries,
+ struct ib_wc *wc)
+{
+ usnic_dbg("\n");
+ return -EINVAL;
+}
+
+int usnic_ib_req_notify_cq(struct ib_cq *cq,
+ enum ib_cq_notify_flags flags)
+{
+ usnic_dbg("\n");
+ return -EINVAL;
+}
+
+struct ib_mr *usnic_ib_get_dma_mr(struct ib_pd *pd, int acc)
+{
+ usnic_dbg("\n");
+ return ERR_PTR(-ENOMEM);
+}
+
+
+/* In ib callbacks section - End of stub funcs */
+/* End of ib callbacks section */
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
new file mode 100644
index 00000000000..bb864f5aed7
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_IB_VERBS_H_
+#define USNIC_IB_VERBS_H_
+
+#include "usnic_ib.h"
+
+enum rdma_link_layer usnic_ib_port_link_layer(struct ib_device *device,
+ u8 port_num);
+int usnic_ib_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *props);
+int usnic_ib_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props);
+int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr);
+int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
+ union ib_gid *gid);
+int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+ u16 *pkey);
+struct ib_pd *usnic_ib_alloc_pd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata);
+int usnic_ib_dealloc_pd(struct ib_pd *pd);
+struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata);
+int usnic_ib_destroy_qp(struct ib_qp *qp);
+int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata);
+struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, int entries,
+ int vector, struct ib_ucontext *context,
+ struct ib_udata *udata);
+int usnic_ib_destroy_cq(struct ib_cq *cq);
+struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int access_flags,
+ struct ib_udata *udata);
+int usnic_ib_dereg_mr(struct ib_mr *ibmr);
+struct ib_ucontext *usnic_ib_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata);
+int usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext);
+int usnic_ib_mmap(struct ib_ucontext *context,
+ struct vm_area_struct *vma);
+struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd,
+ struct ib_ah_attr *ah_attr);
+int usnic_ib_destroy_ah(struct ib_ah *ah);
+int usnic_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr);
+int usnic_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr);
+int usnic_ib_poll_cq(struct ib_cq *ibcq, int num_entries,
+ struct ib_wc *wc);
+int usnic_ib_req_notify_cq(struct ib_cq *cq,
+ enum ib_cq_notify_flags flags);
+struct ib_mr *usnic_ib_get_dma_mr(struct ib_pd *pd, int acc);
+#endif /* !USNIC_IB_VERBS_H */
diff --git a/drivers/infiniband/hw/usnic/usnic_log.h b/drivers/infiniband/hw/usnic/usnic_log.h
new file mode 100644
index 00000000000..75777a66c68
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_log.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_LOG_H_
+#define USNIC_LOG_H_
+
+#include "usnic.h"
+
+extern unsigned int usnic_log_lvl;
+
+#define USNIC_LOG_LVL_NONE (0)
+#define USNIC_LOG_LVL_ERR (1)
+#define USNIC_LOG_LVL_INFO (2)
+#define USNIC_LOG_LVL_DBG (3)
+
+#define usnic_printk(lvl, args...) \
+ do { \
+ printk(lvl "%s:%s:%d: ", DRV_NAME, __func__, \
+ __LINE__); \
+ printk(args); \
+ } while (0)
+
+#define usnic_dbg(args...) \
+ do { \
+ if (unlikely(usnic_log_lvl >= USNIC_LOG_LVL_DBG)) { \
+ usnic_printk(KERN_INFO, args); \
+ } \
+} while (0)
+
+#define usnic_info(args...) \
+do { \
+ if (usnic_log_lvl >= USNIC_LOG_LVL_INFO) { \
+ usnic_printk(KERN_INFO, args); \
+ } \
+} while (0)
+
+#define usnic_err(args...) \
+ do { \
+ if (usnic_log_lvl >= USNIC_LOG_LVL_ERR) { \
+ usnic_printk(KERN_ERR, args); \
+ } \
+ } while (0)
+#endif /* !USNIC_LOG_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_transport.c b/drivers/infiniband/hw/usnic/usnic_transport.c
new file mode 100644
index 00000000000..ddef6f77a78
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_transport.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/bitmap.h>
+#include <linux/file.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <net/inet_sock.h>
+
+#include "usnic_transport.h"
+#include "usnic_log.h"
+
+/* ROCE */
+static unsigned long *roce_bitmap;
+static u16 roce_next_port = 1;
+#define ROCE_BITMAP_SZ ((1 << (8 /*CHAR_BIT*/ * sizeof(u16)))/8 /*CHAR BIT*/)
+static DEFINE_SPINLOCK(roce_bitmap_lock);
+
+const char *usnic_transport_to_str(enum usnic_transport_type type)
+{
+ switch (type) {
+ case USNIC_TRANSPORT_UNKNOWN:
+ return "Unknown";
+ case USNIC_TRANSPORT_ROCE_CUSTOM:
+ return "roce custom";
+ case USNIC_TRANSPORT_IPV4_UDP:
+ return "IPv4 UDP";
+ case USNIC_TRANSPORT_MAX:
+ return "Max?";
+ default:
+ return "Not known";
+ }
+}
+
+int usnic_transport_sock_to_str(char *buf, int buf_sz,
+ struct socket *sock)
+{
+ int err;
+ uint32_t addr;
+ uint16_t port;
+ int proto;
+
+ memset(buf, 0, buf_sz);
+ err = usnic_transport_sock_get_addr(sock, &proto, &addr, &port);
+ if (err)
+ return 0;
+
+ return scnprintf(buf, buf_sz, "Proto:%u Addr:%pI4h Port:%hu",
+ proto, &addr, port);
+}
+
+/*
+ * reserve a port number. if "0" specified, we will try to pick one
+ * starting at roce_next_port. roce_next_port will take on the values
+ * 1..4096
+ */
+u16 usnic_transport_rsrv_port(enum usnic_transport_type type, u16 port_num)
+{
+ if (type == USNIC_TRANSPORT_ROCE_CUSTOM) {
+ spin_lock(&roce_bitmap_lock);
+ if (!port_num) {
+ port_num = bitmap_find_next_zero_area(roce_bitmap,
+ ROCE_BITMAP_SZ,
+ roce_next_port /* start */,
+ 1 /* nr */,
+ 0 /* align */);
+ roce_next_port = (port_num & 4095) + 1;
+ } else if (test_bit(port_num, roce_bitmap)) {
+ usnic_err("Failed to allocate port for %s\n",
+ usnic_transport_to_str(type));
+ spin_unlock(&roce_bitmap_lock);
+ goto out_fail;
+ }
+ bitmap_set(roce_bitmap, port_num, 1);
+ spin_unlock(&roce_bitmap_lock);
+ } else {
+ usnic_err("Failed to allocate port - transport %s unsupported\n",
+ usnic_transport_to_str(type));
+ goto out_fail;
+ }
+
+ usnic_dbg("Allocating port %hu for %s\n", port_num,
+ usnic_transport_to_str(type));
+ return port_num;
+
+out_fail:
+ return 0;
+}
+
+void usnic_transport_unrsrv_port(enum usnic_transport_type type, u16 port_num)
+{
+ if (type == USNIC_TRANSPORT_ROCE_CUSTOM) {
+ spin_lock(&roce_bitmap_lock);
+ if (!port_num) {
+ usnic_err("Unreserved unvalid port num 0 for %s\n",
+ usnic_transport_to_str(type));
+ goto out_roce_custom;
+ }
+
+ if (!test_bit(port_num, roce_bitmap)) {
+ usnic_err("Unreserving invalid %hu for %s\n",
+ port_num,
+ usnic_transport_to_str(type));
+ goto out_roce_custom;
+ }
+ bitmap_clear(roce_bitmap, port_num, 1);
+ usnic_dbg("Freeing port %hu for %s\n", port_num,
+ usnic_transport_to_str(type));
+out_roce_custom:
+ spin_unlock(&roce_bitmap_lock);
+ } else {
+ usnic_err("Freeing invalid port %hu for %d\n", port_num, type);
+ }
+}
+
+struct socket *usnic_transport_get_socket(int sock_fd)
+{
+ struct socket *sock;
+ int err;
+ char buf[25];
+
+ /* sockfd_lookup will internally do a fget */
+ sock = sockfd_lookup(sock_fd, &err);
+ if (!sock) {
+ usnic_err("Unable to lookup socket for fd %d with err %d\n",
+ sock_fd, err);
+ return ERR_PTR(-ENOENT);
+ }
+
+ usnic_transport_sock_to_str(buf, sizeof(buf), sock);
+ usnic_dbg("Get sock %s\n", buf);
+
+ return sock;
+}
+
+void usnic_transport_put_socket(struct socket *sock)
+{
+ char buf[100];
+
+ usnic_transport_sock_to_str(buf, sizeof(buf), sock);
+ usnic_dbg("Put sock %s\n", buf);
+ sockfd_put(sock);
+}
+
+int usnic_transport_sock_get_addr(struct socket *sock, int *proto,
+ uint32_t *addr, uint16_t *port)
+{
+ int len;
+ int err;
+ struct sockaddr_in sock_addr;
+
+ err = sock->ops->getname(sock,
+ (struct sockaddr *)&sock_addr,
+ &len, 0);
+ if (err)
+ return err;
+
+ if (sock_addr.sin_family != AF_INET)
+ return -EINVAL;
+
+ if (proto)
+ *proto = sock->sk->sk_protocol;
+ if (port)
+ *port = ntohs(((struct sockaddr_in *)&sock_addr)->sin_port);
+ if (addr)
+ *addr = ntohl(((struct sockaddr_in *)
+ &sock_addr)->sin_addr.s_addr);
+
+ return 0;
+}
+
+int usnic_transport_init(void)
+{
+ roce_bitmap = kzalloc(ROCE_BITMAP_SZ, GFP_KERNEL);
+ if (!roce_bitmap) {
+ usnic_err("Failed to allocate bit map");
+ return -ENOMEM;
+ }
+
+ /* Do not ever allocate bit 0, hence set it here */
+ bitmap_set(roce_bitmap, 0, 1);
+ return 0;
+}
+
+void usnic_transport_fini(void)
+{
+ kfree(roce_bitmap);
+}
diff --git a/drivers/infiniband/hw/usnic/usnic_transport.h b/drivers/infiniband/hw/usnic/usnic_transport.h
new file mode 100644
index 00000000000..7e5dc6d9f46
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_transport.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_TRANSPORT_H_
+#define USNIC_TRANSPORT_H_
+
+#include "usnic_abi.h"
+
+const char *usnic_transport_to_str(enum usnic_transport_type trans_type);
+/*
+ * Returns number of bytes written, excluding null terminator. If
+ * nothing was written, the function returns 0.
+ */
+int usnic_transport_sock_to_str(char *buf, int buf_sz,
+ struct socket *sock);
+/*
+ * Reserve a port. If "port_num" is set, then the function will try
+ * to reserve that particular port.
+ */
+u16 usnic_transport_rsrv_port(enum usnic_transport_type type, u16 port_num);
+void usnic_transport_unrsrv_port(enum usnic_transport_type type, u16 port_num);
+/*
+ * Do a fget on the socket refered to by sock_fd and returns the socket.
+ * Socket will not be destroyed before usnic_transport_put_socket has
+ * been called.
+ */
+struct socket *usnic_transport_get_socket(int sock_fd);
+void usnic_transport_put_socket(struct socket *sock);
+/*
+ * Call usnic_transport_get_socket before calling *_sock_get_addr
+ */
+int usnic_transport_sock_get_addr(struct socket *sock, int *proto,
+ uint32_t *addr, uint16_t *port);
+int usnic_transport_init(void);
+void usnic_transport_fini(void);
+#endif /* !USNIC_TRANSPORT_H */
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
new file mode 100644
index 00000000000..801a1d6937e
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -0,0 +1,604 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2013 Cisco Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#include <linux/sched.h>
+#include <linux/hugetlb.h>
+#include <linux/dma-attrs.h>
+#include <linux/iommu.h>
+#include <linux/workqueue.h>
+#include <linux/list.h>
+#include <linux/pci.h>
+
+#include "usnic_log.h"
+#include "usnic_uiom.h"
+#include "usnic_uiom_interval_tree.h"
+
+static struct workqueue_struct *usnic_uiom_wq;
+
+#define USNIC_UIOM_PAGE_CHUNK \
+ ((PAGE_SIZE - offsetof(struct usnic_uiom_chunk, page_list)) /\
+ ((void *) &((struct usnic_uiom_chunk *) 0)->page_list[1] - \
+ (void *) &((struct usnic_uiom_chunk *) 0)->page_list[0]))
+
+static void usnic_uiom_reg_account(struct work_struct *work)
+{
+ struct usnic_uiom_reg *umem = container_of(work,
+ struct usnic_uiom_reg, work);
+
+ down_write(&umem->mm->mmap_sem);
+ umem->mm->locked_vm -= umem->diff;
+ up_write(&umem->mm->mmap_sem);
+ mmput(umem->mm);
+ kfree(umem);
+}
+
+static int usnic_uiom_dma_fault(struct iommu_domain *domain,
+ struct device *dev,
+ unsigned long iova, int flags,
+ void *token)
+{
+ usnic_err("Device %s iommu fault domain 0x%pK va 0x%lx flags 0x%x\n",
+ dev_name(dev),
+ domain, iova, flags);
+ return -ENOSYS;
+}
+
+static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty)
+{
+ struct usnic_uiom_chunk *chunk, *tmp;
+ struct page *page;
+ struct scatterlist *sg;
+ int i;
+ dma_addr_t pa;
+
+ list_for_each_entry_safe(chunk, tmp, chunk_list, list) {
+ for_each_sg(chunk->page_list, sg, chunk->nents, i) {
+ page = sg_page(sg);
+ pa = sg_phys(sg);
+ if (dirty)
+ set_page_dirty_lock(page);
+ put_page(page);
+ usnic_dbg("pa: %pa\n", &pa);
+ }
+ kfree(chunk);
+ }
+}
+
+static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
+ int dmasync, struct list_head *chunk_list)
+{
+ struct page **page_list;
+ struct scatterlist *sg;
+ struct usnic_uiom_chunk *chunk;
+ unsigned long locked;
+ unsigned long lock_limit;
+ unsigned long cur_base;
+ unsigned long npages;
+ int ret;
+ int off;
+ int i;
+ int flags;
+ dma_addr_t pa;
+ DEFINE_DMA_ATTRS(attrs);
+
+ if (dmasync)
+ dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs);
+
+ if (!can_do_mlock())
+ return -EPERM;
+
+ INIT_LIST_HEAD(chunk_list);
+
+ page_list = (struct page **) __get_free_page(GFP_KERNEL);
+ if (!page_list)
+ return -ENOMEM;
+
+ npages = PAGE_ALIGN(size + (addr & ~PAGE_MASK)) >> PAGE_SHIFT;
+
+ down_write(&current->mm->mmap_sem);
+
+ locked = npages + current->mm->locked_vm;
+ lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+
+ if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ flags = IOMMU_READ | IOMMU_CACHE;
+ flags |= (writable) ? IOMMU_WRITE : 0;
+ cur_base = addr & PAGE_MASK;
+ ret = 0;
+
+ while (npages) {
+ ret = get_user_pages(current, current->mm, cur_base,
+ min_t(unsigned long, npages,
+ PAGE_SIZE / sizeof(struct page *)),
+ 1, !writable, page_list, NULL);
+
+ if (ret < 0)
+ goto out;
+
+ npages -= ret;
+ off = 0;
+
+ while (ret) {
+ chunk = kmalloc(sizeof(*chunk) +
+ sizeof(struct scatterlist) *
+ min_t(int, ret, USNIC_UIOM_PAGE_CHUNK),
+ GFP_KERNEL);
+ if (!chunk) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ chunk->nents = min_t(int, ret, USNIC_UIOM_PAGE_CHUNK);
+ sg_init_table(chunk->page_list, chunk->nents);
+ for_each_sg(chunk->page_list, sg, chunk->nents, i) {
+ sg_set_page(sg, page_list[i + off],
+ PAGE_SIZE, 0);
+ pa = sg_phys(sg);
+ usnic_dbg("va: 0x%lx pa: %pa\n",
+ cur_base + i*PAGE_SIZE, &pa);
+ }
+ cur_base += chunk->nents * PAGE_SIZE;
+ ret -= chunk->nents;
+ off += chunk->nents;
+ list_add_tail(&chunk->list, chunk_list);
+ }
+
+ ret = 0;
+ }
+
+out:
+ if (ret < 0)
+ usnic_uiom_put_pages(chunk_list, 0);
+ else
+ current->mm->locked_vm = locked;
+
+ up_write(&current->mm->mmap_sem);
+ free_page((unsigned long) page_list);
+ return ret;
+}
+
+static void usnic_uiom_unmap_sorted_intervals(struct list_head *intervals,
+ struct usnic_uiom_pd *pd)
+{
+ struct usnic_uiom_interval_node *interval, *tmp;
+ long unsigned va, size;
+
+ list_for_each_entry_safe(interval, tmp, intervals, link) {
+ va = interval->start << PAGE_SHIFT;
+ size = ((interval->last - interval->start) + 1) << PAGE_SHIFT;
+ while (size > 0) {
+ /* Workaround for RH 970401 */
+ usnic_dbg("va 0x%lx size 0x%lx", va, PAGE_SIZE);
+ iommu_unmap(pd->domain, va, PAGE_SIZE);
+ va += PAGE_SIZE;
+ size -= PAGE_SIZE;
+ }
+ }
+}
+
+static void __usnic_uiom_reg_release(struct usnic_uiom_pd *pd,
+ struct usnic_uiom_reg *uiomr,
+ int dirty)
+{
+ int npages;
+ unsigned long vpn_start, vpn_last;
+ struct usnic_uiom_interval_node *interval, *tmp;
+ int writable = 0;
+ LIST_HEAD(rm_intervals);
+
+ npages = PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT;
+ vpn_start = (uiomr->va & PAGE_MASK) >> PAGE_SHIFT;
+ vpn_last = vpn_start + npages - 1;
+
+ spin_lock(&pd->lock);
+ usnic_uiom_remove_interval(&pd->rb_root, vpn_start,
+ vpn_last, &rm_intervals);
+ usnic_uiom_unmap_sorted_intervals(&rm_intervals, pd);
+
+ list_for_each_entry_safe(interval, tmp, &rm_intervals, link) {
+ if (interval->flags & IOMMU_WRITE)
+ writable = 1;
+ list_del(&interval->link);
+ kfree(interval);
+ }
+
+ usnic_uiom_put_pages(&uiomr->chunk_list, dirty & writable);
+ spin_unlock(&pd->lock);
+}
+
+static int usnic_uiom_map_sorted_intervals(struct list_head *intervals,
+ struct usnic_uiom_reg *uiomr)
+{
+ int i, err;
+ size_t size;
+ struct usnic_uiom_chunk *chunk;
+ struct usnic_uiom_interval_node *interval_node;
+ dma_addr_t pa;
+ dma_addr_t pa_start = 0;
+ dma_addr_t pa_end = 0;
+ long int va_start = -EINVAL;
+ struct usnic_uiom_pd *pd = uiomr->pd;
+ long int va = uiomr->va & PAGE_MASK;
+ int flags = IOMMU_READ | IOMMU_CACHE;
+
+ flags |= (uiomr->writable) ? IOMMU_WRITE : 0;
+ chunk = list_first_entry(&uiomr->chunk_list, struct usnic_uiom_chunk,
+ list);
+ list_for_each_entry(interval_node, intervals, link) {
+iter_chunk:
+ for (i = 0; i < chunk->nents; i++, va += PAGE_SIZE) {
+ pa = sg_phys(&chunk->page_list[i]);
+ if ((va >> PAGE_SHIFT) < interval_node->start)
+ continue;
+
+ if ((va >> PAGE_SHIFT) == interval_node->start) {
+ /* First page of the interval */
+ va_start = va;
+ pa_start = pa;
+ pa_end = pa;
+ }
+
+ WARN_ON(va_start == -EINVAL);
+
+ if ((pa_end + PAGE_SIZE != pa) &&
+ (pa != pa_start)) {
+ /* PAs are not contiguous */
+ size = pa_end - pa_start + PAGE_SIZE;
+ usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x",
+ va_start, &pa_start, size, flags);
+ err = iommu_map(pd->domain, va_start, pa_start,
+ size, flags);
+ if (err) {
+ usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n",
+ va_start, &pa_start, size, err);
+ goto err_out;
+ }
+ va_start = va;
+ pa_start = pa;
+ pa_end = pa;
+ }
+
+ if ((va >> PAGE_SHIFT) == interval_node->last) {
+ /* Last page of the interval */
+ size = pa - pa_start + PAGE_SIZE;
+ usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x\n",
+ va_start, &pa_start, size, flags);
+ err = iommu_map(pd->domain, va_start, pa_start,
+ size, flags);
+ if (err) {
+ usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n",
+ va_start, &pa_start, size, err);
+ goto err_out;
+ }
+ break;
+ }
+
+ if (pa != pa_start)
+ pa_end += PAGE_SIZE;
+ }
+
+ if (i == chunk->nents) {
+ /*
+ * Hit last entry of the chunk,
+ * hence advance to next chunk
+ */
+ chunk = list_first_entry(&chunk->list,
+ struct usnic_uiom_chunk,
+ list);
+ goto iter_chunk;
+ }
+ }
+
+ return 0;
+
+err_out:
+ usnic_uiom_unmap_sorted_intervals(intervals, pd);
+ return err;
+}
+
+struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd,
+ unsigned long addr, size_t size,
+ int writable, int dmasync)
+{
+ struct usnic_uiom_reg *uiomr;
+ unsigned long va_base, vpn_start, vpn_last;
+ unsigned long npages;
+ int offset, err;
+ LIST_HEAD(sorted_diff_intervals);
+
+ /*
+ * Intel IOMMU map throws an error if a translation entry is
+ * changed from read to write. This module may not unmap
+ * and then remap the entry after fixing the permission
+ * b/c this open up a small windows where hw DMA may page fault
+ * Hence, make all entries to be writable.
+ */
+ writable = 1;
+
+ va_base = addr & PAGE_MASK;
+ offset = addr & ~PAGE_MASK;
+ npages = PAGE_ALIGN(size + offset) >> PAGE_SHIFT;
+ vpn_start = (addr & PAGE_MASK) >> PAGE_SHIFT;
+ vpn_last = vpn_start + npages - 1;
+
+ uiomr = kmalloc(sizeof(*uiomr), GFP_KERNEL);
+ if (!uiomr)
+ return ERR_PTR(-ENOMEM);
+
+ uiomr->va = va_base;
+ uiomr->offset = offset;
+ uiomr->length = size;
+ uiomr->writable = writable;
+ uiomr->pd = pd;
+
+ err = usnic_uiom_get_pages(addr, size, writable, dmasync,
+ &uiomr->chunk_list);
+ if (err) {
+ usnic_err("Failed get_pages vpn [0x%lx,0x%lx] err %d\n",
+ vpn_start, vpn_last, err);
+ goto out_free_uiomr;
+ }
+
+ spin_lock(&pd->lock);
+ err = usnic_uiom_get_intervals_diff(vpn_start, vpn_last,
+ (writable) ? IOMMU_WRITE : 0,
+ IOMMU_WRITE,
+ &pd->rb_root,
+ &sorted_diff_intervals);
+ if (err) {
+ usnic_err("Failed disjoint interval vpn [0x%lx,0x%lx] err %d\n",
+ vpn_start, vpn_last, err);
+ goto out_put_pages;
+ }
+
+ err = usnic_uiom_map_sorted_intervals(&sorted_diff_intervals, uiomr);
+ if (err) {
+ usnic_err("Failed map interval vpn [0x%lx,0x%lx] err %d\n",
+ vpn_start, vpn_last, err);
+ goto out_put_intervals;
+
+ }
+
+ err = usnic_uiom_insert_interval(&pd->rb_root, vpn_start, vpn_last,
+ (writable) ? IOMMU_WRITE : 0);
+ if (err) {
+ usnic_err("Failed insert interval vpn [0x%lx,0x%lx] err %d\n",
+ vpn_start, vpn_last, err);
+ goto out_unmap_intervals;
+ }
+
+ usnic_uiom_put_interval_set(&sorted_diff_intervals);
+ spin_unlock(&pd->lock);
+
+ return uiomr;
+
+out_unmap_intervals:
+ usnic_uiom_unmap_sorted_intervals(&sorted_diff_intervals, pd);
+out_put_intervals:
+ usnic_uiom_put_interval_set(&sorted_diff_intervals);
+out_put_pages:
+ usnic_uiom_put_pages(&uiomr->chunk_list, 0);
+ spin_unlock(&pd->lock);
+out_free_uiomr:
+ kfree(uiomr);
+ return ERR_PTR(err);
+}
+
+void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing)
+{
+ struct mm_struct *mm;
+ unsigned long diff;
+
+ __usnic_uiom_reg_release(uiomr->pd, uiomr, 1);
+
+ mm = get_task_mm(current);
+ if (!mm) {
+ kfree(uiomr);
+ return;
+ }
+
+ diff = PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT;
+
+ /*
+ * We may be called with the mm's mmap_sem already held. This
+ * can happen when a userspace munmap() is the call that drops
+ * the last reference to our file and calls our release
+ * method. If there are memory regions to destroy, we'll end
+ * up here and not be able to take the mmap_sem. In that case
+ * we defer the vm_locked accounting to the system workqueue.
+ */
+ if (closing) {
+ if (!down_write_trylock(&mm->mmap_sem)) {
+ INIT_WORK(&uiomr->work, usnic_uiom_reg_account);
+ uiomr->mm = mm;
+ uiomr->diff = diff;
+
+ queue_work(usnic_uiom_wq, &uiomr->work);
+ return;
+ }
+ } else
+ down_write(&mm->mmap_sem);
+
+ current->mm->locked_vm -= diff;
+ up_write(&mm->mmap_sem);
+ mmput(mm);
+ kfree(uiomr);
+}
+
+struct usnic_uiom_pd *usnic_uiom_alloc_pd(void)
+{
+ struct usnic_uiom_pd *pd;
+ void *domain;
+
+ pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+ if (!pd)
+ return ERR_PTR(-ENOMEM);
+
+ pd->domain = domain = iommu_domain_alloc(&pci_bus_type);
+ if (IS_ERR_OR_NULL(domain)) {
+ usnic_err("Failed to allocate IOMMU domain with err %ld\n",
+ PTR_ERR(pd->domain));
+ kfree(pd);
+ return ERR_PTR(domain ? PTR_ERR(domain) : -ENOMEM);
+ }
+
+ iommu_set_fault_handler(pd->domain, usnic_uiom_dma_fault, NULL);
+
+ spin_lock_init(&pd->lock);
+ INIT_LIST_HEAD(&pd->devs);
+
+ return pd;
+}
+
+void usnic_uiom_dealloc_pd(struct usnic_uiom_pd *pd)
+{
+ iommu_domain_free(pd->domain);
+ kfree(pd);
+}
+
+int usnic_uiom_attach_dev_to_pd(struct usnic_uiom_pd *pd, struct device *dev)
+{
+ struct usnic_uiom_dev *uiom_dev;
+ int err;
+
+ uiom_dev = kzalloc(sizeof(*uiom_dev), GFP_ATOMIC);
+ if (!uiom_dev)
+ return -ENOMEM;
+ uiom_dev->dev = dev;
+
+ err = iommu_attach_device(pd->domain, dev);
+ if (err)
+ goto out_free_dev;
+
+ if (!iommu_domain_has_cap(pd->domain, IOMMU_CAP_CACHE_COHERENCY)) {
+ usnic_err("IOMMU of %s does not support cache coherency\n",
+ dev_name(dev));
+ err = -EINVAL;
+ goto out_detach_device;
+ }
+
+ spin_lock(&pd->lock);
+ list_add_tail(&uiom_dev->link, &pd->devs);
+ pd->dev_cnt++;
+ spin_unlock(&pd->lock);
+
+ return 0;
+
+out_detach_device:
+ iommu_detach_device(pd->domain, dev);
+out_free_dev:
+ kfree(uiom_dev);
+ return err;
+}
+
+void usnic_uiom_detach_dev_from_pd(struct usnic_uiom_pd *pd, struct device *dev)
+{
+ struct usnic_uiom_dev *uiom_dev;
+ int found = 0;
+
+ spin_lock(&pd->lock);
+ list_for_each_entry(uiom_dev, &pd->devs, link) {
+ if (uiom_dev->dev == dev) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found) {
+ usnic_err("Unable to free dev %s - not found\n",
+ dev_name(dev));
+ spin_unlock(&pd->lock);
+ return;
+ }
+
+ list_del(&uiom_dev->link);
+ pd->dev_cnt--;
+ spin_unlock(&pd->lock);
+
+ return iommu_detach_device(pd->domain, dev);
+}
+
+struct device **usnic_uiom_get_dev_list(struct usnic_uiom_pd *pd)
+{
+ struct usnic_uiom_dev *uiom_dev;
+ struct device **devs;
+ int i = 0;
+
+ spin_lock(&pd->lock);
+ devs = kcalloc(pd->dev_cnt + 1, sizeof(*devs), GFP_ATOMIC);
+ if (!devs) {
+ devs = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ list_for_each_entry(uiom_dev, &pd->devs, link) {
+ devs[i++] = uiom_dev->dev;
+ }
+out:
+ spin_unlock(&pd->lock);
+ return devs;
+}
+
+void usnic_uiom_free_dev_list(struct device **devs)
+{
+ kfree(devs);
+}
+
+int usnic_uiom_init(char *drv_name)
+{
+ if (!iommu_present(&pci_bus_type)) {
+ usnic_err("IOMMU required but not present or enabled. USNIC QPs will not function w/o enabling IOMMU\n");
+ return -EPERM;
+ }
+
+ usnic_uiom_wq = create_workqueue(drv_name);
+ if (!usnic_uiom_wq) {
+ usnic_err("Unable to alloc wq for drv %s\n", drv_name);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void usnic_uiom_fini(void)
+{
+ flush_workqueue(usnic_uiom_wq);
+ destroy_workqueue(usnic_uiom_wq);
+}
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.h b/drivers/infiniband/hw/usnic/usnic_uiom.h
new file mode 100644
index 00000000000..70440996e8f
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_UIOM_H_
+#define USNIC_UIOM_H_
+
+#include <linux/list.h>
+#include <linux/scatterlist.h>
+
+#include "usnic_uiom_interval_tree.h"
+
+#define USNIC_UIOM_READ (1)
+#define USNIC_UIOM_WRITE (2)
+
+#define USNIC_UIOM_MAX_PD_CNT (1000)
+#define USNIC_UIOM_MAX_MR_CNT (1000000)
+#define USNIC_UIOM_MAX_MR_SIZE (~0UL)
+#define USNIC_UIOM_PAGE_SIZE (PAGE_SIZE)
+
+struct usnic_uiom_dev {
+ struct device *dev;
+ struct list_head link;
+};
+
+struct usnic_uiom_pd {
+ struct iommu_domain *domain;
+ spinlock_t lock;
+ struct rb_root rb_root;
+ struct list_head devs;
+ int dev_cnt;
+};
+
+struct usnic_uiom_reg {
+ struct usnic_uiom_pd *pd;
+ unsigned long va;
+ size_t length;
+ int offset;
+ int page_size;
+ int writable;
+ struct list_head chunk_list;
+ struct work_struct work;
+ struct mm_struct *mm;
+ unsigned long diff;
+};
+
+struct usnic_uiom_chunk {
+ struct list_head list;
+ int nents;
+ struct scatterlist page_list[0];
+};
+
+struct usnic_uiom_pd *usnic_uiom_alloc_pd(void);
+void usnic_uiom_dealloc_pd(struct usnic_uiom_pd *pd);
+int usnic_uiom_attach_dev_to_pd(struct usnic_uiom_pd *pd, struct device *dev);
+void usnic_uiom_detach_dev_from_pd(struct usnic_uiom_pd *pd,
+ struct device *dev);
+struct device **usnic_uiom_get_dev_list(struct usnic_uiom_pd *pd);
+void usnic_uiom_free_dev_list(struct device **devs);
+struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd,
+ unsigned long addr, size_t size,
+ int access, int dmasync);
+void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing);
+int usnic_uiom_init(char *drv_name);
+void usnic_uiom_fini(void);
+#endif /* USNIC_UIOM_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c
new file mode 100644
index 00000000000..3a4288e0fba
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c
@@ -0,0 +1,254 @@
+/*
+ * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/list_sort.h>
+
+#include <linux/interval_tree_generic.h>
+#include "usnic_uiom_interval_tree.h"
+
+#define START(node) ((node)->start)
+#define LAST(node) ((node)->last)
+
+#define MAKE_NODE(node, start, end, ref_cnt, flags, err, err_out) \
+ do { \
+ node = usnic_uiom_interval_node_alloc(start, \
+ end, ref_cnt, flags); \
+ if (!node) { \
+ err = -ENOMEM; \
+ goto err_out; \
+ } \
+ } while (0)
+
+#define MARK_FOR_ADD(node, list) (list_add_tail(&node->link, list))
+
+#define MAKE_NODE_AND_APPEND(node, start, end, ref_cnt, flags, err, \
+ err_out, list) \
+ do { \
+ MAKE_NODE(node, start, end, \
+ ref_cnt, flags, err, \
+ err_out); \
+ MARK_FOR_ADD(node, list); \
+ } while (0)
+
+#define FLAGS_EQUAL(flags1, flags2, mask) \
+ (((flags1) & (mask)) == ((flags2) & (mask)))
+
+static struct usnic_uiom_interval_node*
+usnic_uiom_interval_node_alloc(long int start, long int last, int ref_cnt,
+ int flags)
+{
+ struct usnic_uiom_interval_node *interval = kzalloc(sizeof(*interval),
+ GFP_ATOMIC);
+ if (!interval)
+ return NULL;
+
+ interval->start = start;
+ interval->last = last;
+ interval->flags = flags;
+ interval->ref_cnt = ref_cnt;
+
+ return interval;
+}
+
+static int interval_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+ struct usnic_uiom_interval_node *node_a, *node_b;
+
+ node_a = list_entry(a, struct usnic_uiom_interval_node, link);
+ node_b = list_entry(b, struct usnic_uiom_interval_node, link);
+
+ /* long to int */
+ if (node_a->start < node_b->start)
+ return -1;
+ else if (node_a->start > node_b->start)
+ return 1;
+
+ return 0;
+}
+
+static void
+find_intervals_intersection_sorted(struct rb_root *root, unsigned long start,
+ unsigned long last,
+ struct list_head *list)
+{
+ struct usnic_uiom_interval_node *node;
+
+ INIT_LIST_HEAD(list);
+
+ for (node = usnic_uiom_interval_tree_iter_first(root, start, last);
+ node;
+ node = usnic_uiom_interval_tree_iter_next(node, start, last))
+ list_add_tail(&node->link, list);
+
+ list_sort(NULL, list, interval_cmp);
+}
+
+int usnic_uiom_get_intervals_diff(unsigned long start, unsigned long last,
+ int flags, int flag_mask,
+ struct rb_root *root,
+ struct list_head *diff_set)
+{
+ struct usnic_uiom_interval_node *interval, *tmp;
+ int err = 0;
+ long int pivot = start;
+ LIST_HEAD(intersection_set);
+
+ INIT_LIST_HEAD(diff_set);
+
+ find_intervals_intersection_sorted(root, start, last,
+ &intersection_set);
+
+ list_for_each_entry(interval, &intersection_set, link) {
+ if (pivot < interval->start) {
+ MAKE_NODE_AND_APPEND(tmp, pivot, interval->start - 1,
+ 1, flags, err, err_out,
+ diff_set);
+ pivot = interval->start;
+ }
+
+ /*
+ * Invariant: Set [start, pivot] is either in diff_set or root,
+ * but not in both.
+ */
+
+ if (pivot > interval->last) {
+ continue;
+ } else if (pivot <= interval->last &&
+ FLAGS_EQUAL(interval->flags, flags,
+ flag_mask)) {
+ pivot = interval->last + 1;
+ }
+ }
+
+ if (pivot <= last)
+ MAKE_NODE_AND_APPEND(tmp, pivot, last, 1, flags, err, err_out,
+ diff_set);
+
+ return 0;
+
+err_out:
+ list_for_each_entry_safe(interval, tmp, diff_set, link) {
+ list_del(&interval->link);
+ kfree(interval);
+ }
+
+ return err;
+}
+
+void usnic_uiom_put_interval_set(struct list_head *intervals)
+{
+ struct usnic_uiom_interval_node *interval, *tmp;
+ list_for_each_entry_safe(interval, tmp, intervals, link)
+ kfree(interval);
+}
+
+int usnic_uiom_insert_interval(struct rb_root *root, unsigned long start,
+ unsigned long last, int flags)
+{
+ struct usnic_uiom_interval_node *interval, *tmp;
+ unsigned long istart, ilast;
+ int iref_cnt, iflags;
+ unsigned long lpivot = start;
+ int err = 0;
+ LIST_HEAD(to_add);
+ LIST_HEAD(intersection_set);
+
+ find_intervals_intersection_sorted(root, start, last,
+ &intersection_set);
+
+ list_for_each_entry(interval, &intersection_set, link) {
+ /*
+ * Invariant - lpivot is the left edge of next interval to be
+ * inserted
+ */
+ istart = interval->start;
+ ilast = interval->last;
+ iref_cnt = interval->ref_cnt;
+ iflags = interval->flags;
+
+ if (istart < lpivot) {
+ MAKE_NODE_AND_APPEND(tmp, istart, lpivot - 1, iref_cnt,
+ iflags, err, err_out, &to_add);
+ } else if (istart > lpivot) {
+ MAKE_NODE_AND_APPEND(tmp, lpivot, istart - 1, 1, flags,
+ err, err_out, &to_add);
+ lpivot = istart;
+ } else {
+ lpivot = istart;
+ }
+
+ if (ilast > last) {
+ MAKE_NODE_AND_APPEND(tmp, lpivot, last, iref_cnt + 1,
+ iflags | flags, err, err_out,
+ &to_add);
+ MAKE_NODE_AND_APPEND(tmp, last + 1, ilast, iref_cnt,
+ iflags, err, err_out, &to_add);
+ } else {
+ MAKE_NODE_AND_APPEND(tmp, lpivot, ilast, iref_cnt + 1,
+ iflags | flags, err, err_out,
+ &to_add);
+ }
+
+ lpivot = ilast + 1;
+ }
+
+ if (lpivot <= last)
+ MAKE_NODE_AND_APPEND(tmp, lpivot, last, 1, flags, err, err_out,
+ &to_add);
+
+ list_for_each_entry_safe(interval, tmp, &intersection_set, link) {
+ usnic_uiom_interval_tree_remove(interval, root);
+ kfree(interval);
+ }
+
+ list_for_each_entry(interval, &to_add, link)
+ usnic_uiom_interval_tree_insert(interval, root);
+
+ return 0;
+
+err_out:
+ list_for_each_entry_safe(interval, tmp, &to_add, link)
+ kfree(interval);
+
+ return err;
+}
+
+void usnic_uiom_remove_interval(struct rb_root *root, unsigned long start,
+ unsigned long last, struct list_head *removed)
+{
+ struct usnic_uiom_interval_node *interval;
+
+ for (interval = usnic_uiom_interval_tree_iter_first(root, start, last);
+ interval;
+ interval = usnic_uiom_interval_tree_iter_next(interval,
+ start,
+ last)) {
+ if (--interval->ref_cnt == 0)
+ list_add_tail(&interval->link, removed);
+ }
+
+ list_for_each_entry(interval, removed, link)
+ usnic_uiom_interval_tree_remove(interval, root);
+}
+
+INTERVAL_TREE_DEFINE(struct usnic_uiom_interval_node, rb,
+ unsigned long, __subtree_last,
+ START, LAST, , usnic_uiom_interval_tree)
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h
new file mode 100644
index 00000000000..d4f752e258f
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_UIOM_INTERVAL_TREE_H_
+#define USNIC_UIOM_INTERVAL_TREE_H_
+
+#include <linux/rbtree.h>
+
+struct usnic_uiom_interval_node {
+ struct rb_node rb;
+ struct list_head link;
+ unsigned long start;
+ unsigned long last;
+ unsigned long __subtree_last;
+ unsigned int ref_cnt;
+ int flags;
+};
+
+extern void
+usnic_uiom_interval_tree_insert(struct usnic_uiom_interval_node *node,
+ struct rb_root *root);
+extern void
+usnic_uiom_interval_tree_remove(struct usnic_uiom_interval_node *node,
+ struct rb_root *root);
+extern struct usnic_uiom_interval_node *
+usnic_uiom_interval_tree_iter_first(struct rb_root *root,
+ unsigned long start,
+ unsigned long last);
+extern struct usnic_uiom_interval_node *
+usnic_uiom_interval_tree_iter_next(struct usnic_uiom_interval_node *node,
+ unsigned long start, unsigned long last);
+/*
+ * Inserts {start...last} into {root}. If there are overlaps,
+ * nodes will be broken up and merged
+ */
+int usnic_uiom_insert_interval(struct rb_root *root,
+ unsigned long start, unsigned long last,
+ int flags);
+/*
+ * Removed {start...last} from {root}. The nodes removed are returned in
+ * 'removed.' The caller is responsibile for freeing memory of nodes in
+ * 'removed.'
+ */
+void usnic_uiom_remove_interval(struct rb_root *root,
+ unsigned long start, unsigned long last,
+ struct list_head *removed);
+/*
+ * Returns {start...last} - {root} (relative complement of {start...last} in
+ * {root}) in diff_set sorted ascendingly
+ */
+int usnic_uiom_get_intervals_diff(unsigned long start,
+ unsigned long last, int flags,
+ int flag_mask,
+ struct rb_root *root,
+ struct list_head *diff_set);
+/* Call this to free diff_set returned by usnic_uiom_get_intervals_diff */
+void usnic_uiom_put_interval_set(struct list_head *intervals);
+#endif /* USNIC_UIOM_INTERVAL_TREE_H_ */
diff --git a/drivers/infiniband/hw/usnic/usnic_vnic.c b/drivers/infiniband/hw/usnic/usnic_vnic.c
new file mode 100644
index 00000000000..656b88c39ed
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_vnic.c
@@ -0,0 +1,467 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "usnic_ib.h"
+#include "vnic_resource.h"
+#include "usnic_log.h"
+#include "usnic_vnic.h"
+
+struct usnic_vnic {
+ struct vnic_dev *vdev;
+ struct vnic_dev_bar bar[PCI_NUM_RESOURCES];
+ struct usnic_vnic_res_chunk chunks[USNIC_VNIC_RES_TYPE_MAX];
+ spinlock_t res_lock;
+};
+
+static enum vnic_res_type _to_vnic_res_type(enum usnic_vnic_res_type res_type)
+{
+#define DEFINE_USNIC_VNIC_RES_AT(usnic_vnic_res_t, vnic_res_type, desc, val) \
+ vnic_res_type,
+#define DEFINE_USNIC_VNIC_RES(usnic_vnic_res_t, vnic_res_type, desc) \
+ vnic_res_type,
+ static enum vnic_res_type usnic_vnic_type_2_vnic_type[] = {
+ USNIC_VNIC_RES_TYPES};
+#undef DEFINE_USNIC_VNIC_RES
+#undef DEFINE_USNIC_VNIC_RES_AT
+
+ if (res_type >= USNIC_VNIC_RES_TYPE_MAX)
+ return RES_TYPE_MAX;
+
+ return usnic_vnic_type_2_vnic_type[res_type];
+}
+
+const char *usnic_vnic_res_type_to_str(enum usnic_vnic_res_type res_type)
+{
+#define DEFINE_USNIC_VNIC_RES_AT(usnic_vnic_res_t, vnic_res_type, desc, val) \
+ desc,
+#define DEFINE_USNIC_VNIC_RES(usnic_vnic_res_t, vnic_res_type, desc) \
+ desc,
+ static const char * const usnic_vnic_res_type_desc[] = {
+ USNIC_VNIC_RES_TYPES};
+#undef DEFINE_USNIC_VNIC_RES
+#undef DEFINE_USNIC_VNIC_RES_AT
+
+ if (res_type >= USNIC_VNIC_RES_TYPE_MAX)
+ return "unknown";
+
+ return usnic_vnic_res_type_desc[res_type];
+
+}
+
+const char *usnic_vnic_pci_name(struct usnic_vnic *vnic)
+{
+ return pci_name(usnic_vnic_get_pdev(vnic));
+}
+
+int usnic_vnic_dump(struct usnic_vnic *vnic, char *buf,
+ int buf_sz,
+ void *hdr_obj,
+ int (*printtitle)(void *, char*, int),
+ int (*printcols)(char *, int),
+ int (*printrow)(void *, char *, int))
+{
+ struct usnic_vnic_res_chunk *chunk;
+ struct usnic_vnic_res *res;
+ struct vnic_dev_bar *bar0;
+ int i, j, offset;
+
+ offset = 0;
+ bar0 = usnic_vnic_get_bar(vnic, 0);
+ offset += scnprintf(buf + offset, buf_sz - offset,
+ "VF:%hu BAR0 bus_addr=%pa vaddr=0x%p size=%ld ",
+ usnic_vnic_get_index(vnic),
+ &bar0->bus_addr,
+ bar0->vaddr, bar0->len);
+ if (printtitle)
+ offset += printtitle(hdr_obj, buf + offset, buf_sz - offset);
+ offset += scnprintf(buf + offset, buf_sz - offset, "\n");
+ offset += scnprintf(buf + offset, buf_sz - offset,
+ "|RES\t|CTRL_PIN\t\t|IN_USE\t");
+ if (printcols)
+ offset += printcols(buf + offset, buf_sz - offset);
+ offset += scnprintf(buf + offset, buf_sz - offset, "\n");
+
+ spin_lock(&vnic->res_lock);
+ for (i = 0; i < ARRAY_SIZE(vnic->chunks); i++) {
+ chunk = &vnic->chunks[i];
+ for (j = 0; j < chunk->cnt; j++) {
+ res = chunk->res[j];
+ offset += scnprintf(buf + offset, buf_sz - offset,
+ "|%s[%u]\t|0x%p\t|%u\t",
+ usnic_vnic_res_type_to_str(res->type),
+ res->vnic_idx, res->ctrl, !!res->owner);
+ if (printrow) {
+ offset += printrow(res->owner, buf + offset,
+ buf_sz - offset);
+ }
+ offset += scnprintf(buf + offset, buf_sz - offset,
+ "\n");
+ }
+ }
+ spin_unlock(&vnic->res_lock);
+ return offset;
+}
+
+void usnic_vnic_res_spec_update(struct usnic_vnic_res_spec *spec,
+ enum usnic_vnic_res_type trgt_type,
+ u16 cnt)
+{
+ int i;
+
+ for (i = 0; i < USNIC_VNIC_RES_TYPE_MAX; i++) {
+ if (spec->resources[i].type == trgt_type) {
+ spec->resources[i].cnt = cnt;
+ return;
+ }
+ }
+
+ WARN_ON(1);
+}
+
+int usnic_vnic_res_spec_satisfied(const struct usnic_vnic_res_spec *min_spec,
+ struct usnic_vnic_res_spec *res_spec)
+{
+ int found, i, j;
+
+ for (i = 0; i < USNIC_VNIC_RES_TYPE_MAX; i++) {
+ found = 0;
+
+ for (j = 0; j < USNIC_VNIC_RES_TYPE_MAX; j++) {
+ if (res_spec->resources[i].type !=
+ min_spec->resources[i].type)
+ continue;
+ found = 1;
+ if (min_spec->resources[i].cnt >
+ res_spec->resources[i].cnt)
+ return -EINVAL;
+ break;
+ }
+
+ if (!found)
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int usnic_vnic_spec_dump(char *buf, int buf_sz,
+ struct usnic_vnic_res_spec *res_spec)
+{
+ enum usnic_vnic_res_type res_type;
+ int res_cnt;
+ int i;
+ int offset = 0;
+
+ for (i = 0; i < USNIC_VNIC_RES_TYPE_MAX; i++) {
+ res_type = res_spec->resources[i].type;
+ res_cnt = res_spec->resources[i].cnt;
+ offset += scnprintf(buf + offset, buf_sz - offset,
+ "Res: %s Cnt: %d ",
+ usnic_vnic_res_type_to_str(res_type),
+ res_cnt);
+ }
+
+ return offset;
+}
+
+int usnic_vnic_check_room(struct usnic_vnic *vnic,
+ struct usnic_vnic_res_spec *res_spec)
+{
+ int i;
+ enum usnic_vnic_res_type res_type;
+ int res_cnt;
+
+ for (i = 0; i < USNIC_VNIC_RES_TYPE_MAX; i++) {
+ res_type = res_spec->resources[i].type;
+ res_cnt = res_spec->resources[i].cnt;
+
+ if (res_type == USNIC_VNIC_RES_TYPE_EOL)
+ break;
+
+ if (res_cnt > usnic_vnic_res_free_cnt(vnic, res_type))
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+int usnic_vnic_res_cnt(struct usnic_vnic *vnic,
+ enum usnic_vnic_res_type type)
+{
+ return vnic->chunks[type].cnt;
+}
+
+int usnic_vnic_res_free_cnt(struct usnic_vnic *vnic,
+ enum usnic_vnic_res_type type)
+{
+ return vnic->chunks[type].free_cnt;
+}
+
+struct usnic_vnic_res_chunk *
+usnic_vnic_get_resources(struct usnic_vnic *vnic, enum usnic_vnic_res_type type,
+ int cnt, void *owner)
+{
+ struct usnic_vnic_res_chunk *src, *ret;
+ struct usnic_vnic_res *res;
+ int i;
+
+ if (usnic_vnic_res_free_cnt(vnic, type) < cnt || cnt < 1 || !owner)
+ return ERR_PTR(-EINVAL);
+
+ ret = kzalloc(sizeof(*ret), GFP_ATOMIC);
+ if (!ret) {
+ usnic_err("Failed to allocate chunk for %s - Out of memory\n",
+ usnic_vnic_pci_name(vnic));
+ return ERR_PTR(-ENOMEM);
+ }
+
+ ret->res = kzalloc(sizeof(*(ret->res))*cnt, GFP_ATOMIC);
+ if (!ret->res) {
+ usnic_err("Failed to allocate resources for %s. Out of memory\n",
+ usnic_vnic_pci_name(vnic));
+ kfree(ret);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ spin_lock(&vnic->res_lock);
+ src = &vnic->chunks[type];
+ for (i = 0; i < src->cnt && ret->cnt < cnt; i++) {
+ res = src->res[i];
+ if (!res->owner) {
+ src->free_cnt--;
+ res->owner = owner;
+ ret->res[ret->cnt++] = res;
+ }
+ }
+
+ spin_unlock(&vnic->res_lock);
+ ret->type = type;
+ ret->vnic = vnic;
+ WARN_ON(ret->cnt != cnt);
+
+ return ret;
+}
+
+void usnic_vnic_put_resources(struct usnic_vnic_res_chunk *chunk)
+{
+
+ struct usnic_vnic_res *res;
+ int i;
+ struct usnic_vnic *vnic = chunk->vnic;
+
+ spin_lock(&vnic->res_lock);
+ while ((i = --chunk->cnt) >= 0) {
+ res = chunk->res[i];
+ chunk->res[i] = NULL;
+ res->owner = NULL;
+ vnic->chunks[res->type].free_cnt++;
+ }
+ spin_unlock(&vnic->res_lock);
+
+ kfree(chunk->res);
+ kfree(chunk);
+}
+
+u16 usnic_vnic_get_index(struct usnic_vnic *vnic)
+{
+ return usnic_vnic_get_pdev(vnic)->devfn - 1;
+}
+
+static int usnic_vnic_alloc_res_chunk(struct usnic_vnic *vnic,
+ enum usnic_vnic_res_type type,
+ struct usnic_vnic_res_chunk *chunk)
+{
+ int cnt, err, i;
+ struct usnic_vnic_res *res;
+
+ cnt = vnic_dev_get_res_count(vnic->vdev, _to_vnic_res_type(type));
+ if (cnt < 1)
+ return -EINVAL;
+
+ chunk->cnt = chunk->free_cnt = cnt;
+ chunk->res = kzalloc(sizeof(*(chunk->res))*cnt, GFP_KERNEL);
+ if (!chunk->res)
+ return -ENOMEM;
+
+ for (i = 0; i < cnt; i++) {
+ res = kzalloc(sizeof(*res), GFP_KERNEL);
+ if (!res) {
+ err = -ENOMEM;
+ goto fail;
+ }
+ res->type = type;
+ res->vnic_idx = i;
+ res->vnic = vnic;
+ res->ctrl = vnic_dev_get_res(vnic->vdev,
+ _to_vnic_res_type(type), i);
+ chunk->res[i] = res;
+ }
+
+ chunk->vnic = vnic;
+ return 0;
+fail:
+ for (i--; i >= 0; i--)
+ kfree(chunk->res[i]);
+ kfree(chunk->res);
+ return err;
+}
+
+static void usnic_vnic_free_res_chunk(struct usnic_vnic_res_chunk *chunk)
+{
+ int i;
+ for (i = 0; i < chunk->cnt; i++)
+ kfree(chunk->res[i]);
+ kfree(chunk->res);
+}
+
+static int usnic_vnic_discover_resources(struct pci_dev *pdev,
+ struct usnic_vnic *vnic)
+{
+ enum usnic_vnic_res_type res_type;
+ int i;
+ int err = 0;
+
+ for (i = 0; i < ARRAY_SIZE(vnic->bar); i++) {
+ if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM))
+ continue;
+ vnic->bar[i].len = pci_resource_len(pdev, i);
+ vnic->bar[i].vaddr = pci_iomap(pdev, i, vnic->bar[i].len);
+ if (!vnic->bar[i].vaddr) {
+ usnic_err("Cannot memory-map BAR %d, aborting\n",
+ i);
+ err = -ENODEV;
+ goto out_clean_bar;
+ }
+ vnic->bar[i].bus_addr = pci_resource_start(pdev, i);
+ }
+
+ vnic->vdev = vnic_dev_register(NULL, pdev, pdev, vnic->bar,
+ ARRAY_SIZE(vnic->bar));
+ if (!vnic->vdev) {
+ usnic_err("Failed to register device %s\n",
+ pci_name(pdev));
+ err = -EINVAL;
+ goto out_clean_bar;
+ }
+
+ for (res_type = USNIC_VNIC_RES_TYPE_EOL + 1;
+ res_type < USNIC_VNIC_RES_TYPE_MAX; res_type++) {
+ err = usnic_vnic_alloc_res_chunk(vnic, res_type,
+ &vnic->chunks[res_type]);
+ if (err) {
+ usnic_err("Failed to alloc res %s with err %d\n",
+ usnic_vnic_res_type_to_str(res_type),
+ err);
+ goto out_clean_chunks;
+ }
+ }
+
+ return 0;
+
+out_clean_chunks:
+ for (res_type--; res_type > USNIC_VNIC_RES_TYPE_EOL; res_type--)
+ usnic_vnic_free_res_chunk(&vnic->chunks[res_type]);
+ vnic_dev_unregister(vnic->vdev);
+out_clean_bar:
+ for (i = 0; i < ARRAY_SIZE(vnic->bar); i++) {
+ if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM))
+ continue;
+ if (!vnic->bar[i].vaddr)
+ break;
+
+ iounmap(vnic->bar[i].vaddr);
+ }
+
+ return err;
+}
+
+struct pci_dev *usnic_vnic_get_pdev(struct usnic_vnic *vnic)
+{
+ return vnic_dev_get_pdev(vnic->vdev);
+}
+
+struct vnic_dev_bar *usnic_vnic_get_bar(struct usnic_vnic *vnic,
+ int bar_num)
+{
+ return (bar_num < ARRAY_SIZE(vnic->bar)) ? &vnic->bar[bar_num] : NULL;
+}
+
+static void usnic_vnic_release_resources(struct usnic_vnic *vnic)
+{
+ int i;
+ struct pci_dev *pdev;
+ enum usnic_vnic_res_type res_type;
+
+ pdev = usnic_vnic_get_pdev(vnic);
+
+ for (res_type = USNIC_VNIC_RES_TYPE_EOL + 1;
+ res_type < USNIC_VNIC_RES_TYPE_MAX; res_type++)
+ usnic_vnic_free_res_chunk(&vnic->chunks[res_type]);
+
+ vnic_dev_unregister(vnic->vdev);
+
+ for (i = 0; i < ARRAY_SIZE(vnic->bar); i++) {
+ if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM))
+ continue;
+ iounmap(vnic->bar[i].vaddr);
+ }
+}
+
+struct usnic_vnic *usnic_vnic_alloc(struct pci_dev *pdev)
+{
+ struct usnic_vnic *vnic;
+ int err = 0;
+
+ if (!pci_is_enabled(pdev)) {
+ usnic_err("PCI dev %s is disabled\n", pci_name(pdev));
+ return ERR_PTR(-EINVAL);
+ }
+
+ vnic = kzalloc(sizeof(*vnic), GFP_KERNEL);
+ if (!vnic) {
+ usnic_err("Failed to alloc vnic for %s - out of memory\n",
+ pci_name(pdev));
+ return ERR_PTR(-ENOMEM);
+ }
+
+ spin_lock_init(&vnic->res_lock);
+
+ err = usnic_vnic_discover_resources(pdev, vnic);
+ if (err) {
+ usnic_err("Failed to discover %s resources with err %d\n",
+ pci_name(pdev), err);
+ goto out_free_vnic;
+ }
+
+ usnic_dbg("Allocated vnic for %s\n", usnic_vnic_pci_name(vnic));
+
+ return vnic;
+
+out_free_vnic:
+ kfree(vnic);
+
+ return ERR_PTR(err);
+}
+
+void usnic_vnic_free(struct usnic_vnic *vnic)
+{
+ usnic_vnic_release_resources(vnic);
+ kfree(vnic);
+}
diff --git a/drivers/infiniband/hw/usnic/usnic_vnic.h b/drivers/infiniband/hw/usnic/usnic_vnic.h
new file mode 100644
index 00000000000..14d931a8829
--- /dev/null
+++ b/drivers/infiniband/hw/usnic/usnic_vnic.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_VNIC_H_
+#define USNIC_VNIC_H_
+
+#include <linux/pci.h>
+
+#include "vnic_dev.h"
+
+/* =USNIC_VNIC_RES_TYPE= =VNIC_RES= =DESC= */
+#define USNIC_VNIC_RES_TYPES \
+ DEFINE_USNIC_VNIC_RES_AT(EOL, RES_TYPE_EOL, "EOL", 0) \
+ DEFINE_USNIC_VNIC_RES(WQ, RES_TYPE_WQ, "WQ") \
+ DEFINE_USNIC_VNIC_RES(RQ, RES_TYPE_RQ, "RQ") \
+ DEFINE_USNIC_VNIC_RES(CQ, RES_TYPE_CQ, "CQ") \
+ DEFINE_USNIC_VNIC_RES(INTR, RES_TYPE_INTR_CTRL, "INT") \
+ DEFINE_USNIC_VNIC_RES(MAX, RES_TYPE_MAX, "MAX")\
+
+#define DEFINE_USNIC_VNIC_RES_AT(usnic_vnic_res_t, vnic_res_type, desc, val) \
+ USNIC_VNIC_RES_TYPE_##usnic_vnic_res_t = val,
+#define DEFINE_USNIC_VNIC_RES(usnic_vnic_res_t, vnic_res_type, desc) \
+ USNIC_VNIC_RES_TYPE_##usnic_vnic_res_t,
+enum usnic_vnic_res_type {
+ USNIC_VNIC_RES_TYPES
+};
+#undef DEFINE_USNIC_VNIC_RES
+#undef DEFINE_USNIC_VNIC_RES_AT
+
+struct usnic_vnic_res {
+ enum usnic_vnic_res_type type;
+ unsigned int vnic_idx;
+ struct usnic_vnic *vnic;
+ void __iomem *ctrl;
+ void *owner;
+};
+
+struct usnic_vnic_res_chunk {
+ enum usnic_vnic_res_type type;
+ int cnt;
+ int free_cnt;
+ struct usnic_vnic_res **res;
+ struct usnic_vnic *vnic;
+};
+
+struct usnic_vnic_res_desc {
+ enum usnic_vnic_res_type type;
+ uint16_t cnt;
+};
+
+struct usnic_vnic_res_spec {
+ struct usnic_vnic_res_desc resources[USNIC_VNIC_RES_TYPE_MAX];
+};
+
+const char *usnic_vnic_res_type_to_str(enum usnic_vnic_res_type res_type);
+const char *usnic_vnic_pci_name(struct usnic_vnic *vnic);
+int usnic_vnic_dump(struct usnic_vnic *vnic, char *buf, int buf_sz,
+ void *hdr_obj,
+ int (*printtitle)(void *, char*, int),
+ int (*printcols)(char *, int),
+ int (*printrow)(void *, char *, int));
+void usnic_vnic_res_spec_update(struct usnic_vnic_res_spec *spec,
+ enum usnic_vnic_res_type trgt_type,
+ u16 cnt);
+int usnic_vnic_res_spec_satisfied(const struct usnic_vnic_res_spec *min_spec,
+ struct usnic_vnic_res_spec *res_spec);
+int usnic_vnic_spec_dump(char *buf, int buf_sz,
+ struct usnic_vnic_res_spec *res_spec);
+int usnic_vnic_check_room(struct usnic_vnic *vnic,
+ struct usnic_vnic_res_spec *res_spec);
+int usnic_vnic_res_cnt(struct usnic_vnic *vnic,
+ enum usnic_vnic_res_type type);
+int usnic_vnic_res_free_cnt(struct usnic_vnic *vnic,
+ enum usnic_vnic_res_type type);
+struct usnic_vnic_res_chunk *
+usnic_vnic_get_resources(struct usnic_vnic *vnic,
+ enum usnic_vnic_res_type type,
+ int cnt,
+ void *owner);
+void usnic_vnic_put_resources(struct usnic_vnic_res_chunk *chunk);
+struct pci_dev *usnic_vnic_get_pdev(struct usnic_vnic *vnic);
+struct vnic_dev_bar *usnic_vnic_get_bar(struct usnic_vnic *vnic,
+ int bar_num);
+struct usnic_vnic *usnic_vnic_alloc(struct pci_dev *pdev);
+void usnic_vnic_free(struct usnic_vnic *vnic);
+u16 usnic_vnic_get_index(struct usnic_vnic *vnic);
+
+#endif /*!USNIC_VNIC_H_*/
diff --git a/drivers/infiniband/ulp/Makefile b/drivers/infiniband/ulp/Makefile
new file mode 100644
index 00000000000..f3c7dcf0309
--- /dev/null
+++ b/drivers/infiniband/ulp/Makefile
@@ -0,0 +1,5 @@
+obj-$(CONFIG_INFINIBAND_IPOIB) += ipoib/
+obj-$(CONFIG_INFINIBAND_SRP) += srp/
+obj-$(CONFIG_INFINIBAND_SRPT) += srpt/
+obj-$(CONFIG_INFINIBAND_ISER) += iser/
+obj-$(CONFIG_INFINIBAND_ISERT) += isert/
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index eb71aaa26a9..c639f90cfda 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -101,6 +101,7 @@ enum {
IPOIB_MCAST_FLAG_SENDONLY = 1,
IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */
IPOIB_MCAST_FLAG_ATTACHED = 3,
+ IPOIB_MCAST_JOIN_STARTED = 4,
MAX_SEND_CQE = 16,
IPOIB_CM_COPYBREAK = 256,
@@ -151,6 +152,7 @@ struct ipoib_mcast {
struct sk_buff_head pkt_queue;
struct net_device *dev;
+ struct completion done;
};
struct ipoib_rx_buf {
@@ -299,7 +301,7 @@ struct ipoib_dev_priv {
unsigned long flags;
- struct mutex vlan_mutex;
+ struct rw_semaphore vlan_rwsem;
struct rb_root path_tree;
struct list_head path_list;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 7a3175400b2..933efcea0d0 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -140,7 +140,8 @@ static int ipoib_cm_post_receive_nonsrq(struct net_device *dev,
static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev,
struct ipoib_cm_rx_buf *rx_ring,
int id, int frags,
- u64 mapping[IPOIB_CM_RX_SG])
+ u64 mapping[IPOIB_CM_RX_SG],
+ gfp_t gfp)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct sk_buff *skb;
@@ -164,7 +165,7 @@ static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev,
}
for (i = 0; i < frags; i++) {
- struct page *page = alloc_page(GFP_ATOMIC);
+ struct page *page = alloc_page(gfp);
if (!page)
goto partial_error;
@@ -382,7 +383,8 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i
for (i = 0; i < ipoib_recvq_size; ++i) {
if (!ipoib_cm_alloc_rx_skb(dev, rx->rx_ring, i, IPOIB_CM_RX_SG - 1,
- rx->rx_ring[i].mapping)) {
+ rx->rx_ring[i].mapping,
+ GFP_KERNEL)) {
ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
ret = -ENOMEM;
goto err_count;
@@ -639,7 +641,8 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len,
(unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE;
- newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags, mapping);
+ newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags,
+ mapping, GFP_ATOMIC);
if (unlikely(!newskb)) {
/*
* If we can't allocate a new RX buffer, dump
@@ -1027,10 +1030,20 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
.cap.max_send_sge = 1,
.sq_sig_type = IB_SIGNAL_ALL_WR,
.qp_type = IB_QPT_RC,
- .qp_context = tx
+ .qp_context = tx,
+ .create_flags = IB_QP_CREATE_USE_GFP_NOIO
};
- return ib_create_qp(priv->pd, &attr);
+ struct ib_qp *tx_qp;
+
+ tx_qp = ib_create_qp(priv->pd, &attr);
+ if (PTR_ERR(tx_qp) == -EINVAL) {
+ ipoib_warn(priv, "can't use GFP_NOIO for QPs on device %s, using GFP_KERNEL\n",
+ priv->ca->name);
+ attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO;
+ tx_qp = ib_create_qp(priv->pd, &attr);
+ }
+ return tx_qp;
}
static int ipoib_cm_send_req(struct net_device *dev,
@@ -1101,12 +1114,14 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
struct ipoib_dev_priv *priv = netdev_priv(p->dev);
int ret;
- p->tx_ring = vzalloc(ipoib_sendq_size * sizeof *p->tx_ring);
+ p->tx_ring = __vmalloc(ipoib_sendq_size * sizeof *p->tx_ring,
+ GFP_NOIO, PAGE_KERNEL);
if (!p->tx_ring) {
ipoib_warn(priv, "failed to allocate tx ring\n");
ret = -ENOMEM;
goto err_tx;
}
+ memset(p->tx_ring, 0, ipoib_sendq_size * sizeof *p->tx_ring);
p->qp = ipoib_cm_create_tx_qp(p->dev, p);
if (IS_ERR(p->qp)) {
@@ -1556,7 +1571,8 @@ int ipoib_cm_dev_init(struct net_device *dev)
for (i = 0; i < ipoib_recvq_size; ++i) {
if (!ipoib_cm_alloc_rx_skb(dev, priv->cm.srq_ring, i,
priv->cm.num_frags - 1,
- priv->cm.srq_ring[i].mapping)) {
+ priv->cm.srq_ring[i].mapping,
+ GFP_KERNEL)) {
ipoib_warn(priv, "failed to allocate "
"receive buffer %d\n", i);
ipoib_cm_dev_cleanup(dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index c4b3940845e..078cadd6c79 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -105,5 +105,5 @@ static const struct ethtool_ops ipoib_ethtool_ops = {
void ipoib_set_ethtool_ops(struct net_device *dev)
{
- SET_ETHTOOL_OPS(dev, &ipoib_ethtool_ops);
+ dev->ethtool_ops = &ipoib_ethtool_ops;
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 196b1d13cbc..6a7003ddb0b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -685,15 +685,13 @@ int ipoib_ib_dev_open(struct net_device *dev)
ret = ipoib_ib_post_receives(dev);
if (ret) {
ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
- ipoib_ib_dev_stop(dev, 1);
- return -1;
+ goto dev_stop;
}
ret = ipoib_cm_dev_open(dev);
if (ret) {
ipoib_warn(priv, "ipoib_cm_dev_open returned %d\n", ret);
- ipoib_ib_dev_stop(dev, 1);
- return -1;
+ goto dev_stop;
}
clear_bit(IPOIB_STOP_REAPER, &priv->flags);
@@ -704,6 +702,11 @@ int ipoib_ib_dev_open(struct net_device *dev)
napi_enable(&priv->napi);
return 0;
+dev_stop:
+ if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
+ napi_enable(&priv->napi);
+ ipoib_ib_dev_stop(dev, 1);
+ return -1;
}
static void ipoib_pkey_dev_check_presence(struct net_device *dev)
@@ -746,10 +749,8 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush)
if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
mutex_lock(&pkey_mutex);
set_bit(IPOIB_PKEY_STOP, &priv->flags);
- cancel_delayed_work(&priv->pkey_poll_task);
+ cancel_delayed_work_sync(&priv->pkey_poll_task);
mutex_unlock(&pkey_mutex);
- if (flush)
- flush_workqueue(ipoib_workqueue);
}
ipoib_mcast_stop_thread(dev, flush);
@@ -974,7 +975,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
u16 new_index;
int result;
- mutex_lock(&priv->vlan_mutex);
+ down_read(&priv->vlan_rwsem);
/*
* Flush any child interfaces too -- they might be up even if
@@ -983,7 +984,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
list_for_each_entry(cpriv, &priv->child_intfs, list)
__ipoib_ib_dev_flush(cpriv, level);
- mutex_unlock(&priv->vlan_mutex);
+ up_read(&priv->vlan_rwsem);
if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) {
/* for non-child devices must check/update the pkey value here */
@@ -1081,6 +1082,11 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
struct ipoib_dev_priv *priv = netdev_priv(dev);
ipoib_dbg(priv, "cleaning up ib_dev\n");
+ /*
+ * We must make sure there are no more (path) completions
+ * that may wish to touch priv fields that are no longer valid
+ */
+ ipoib_flush_paths(dev);
ipoib_mcast_stop_thread(dev, 1);
ipoib_mcast_dev_flush(dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 82cec1af902..5786a78ff8b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -104,6 +104,8 @@ int ipoib_open(struct net_device *dev)
ipoib_dbg(priv, "bringing up interface\n");
+ netif_carrier_off(dev);
+
set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
if (ipoib_pkey_dev_delay_open(dev))
@@ -119,7 +121,7 @@ int ipoib_open(struct net_device *dev)
struct ipoib_dev_priv *cpriv;
/* Bring up any child interfaces too */
- mutex_lock(&priv->vlan_mutex);
+ down_read(&priv->vlan_rwsem);
list_for_each_entry(cpriv, &priv->child_intfs, list) {
int flags;
@@ -129,7 +131,7 @@ int ipoib_open(struct net_device *dev)
dev_change_flags(cpriv->dev, flags | IFF_UP);
}
- mutex_unlock(&priv->vlan_mutex);
+ up_read(&priv->vlan_rwsem);
}
netif_start_queue(dev);
@@ -162,7 +164,7 @@ static int ipoib_stop(struct net_device *dev)
struct ipoib_dev_priv *cpriv;
/* Bring down any child interfaces too */
- mutex_lock(&priv->vlan_mutex);
+ down_read(&priv->vlan_rwsem);
list_for_each_entry(cpriv, &priv->child_intfs, list) {
int flags;
@@ -172,7 +174,7 @@ static int ipoib_stop(struct net_device *dev)
dev_change_flags(cpriv->dev, flags & ~IFF_UP);
}
- mutex_unlock(&priv->vlan_mutex);
+ up_read(&priv->vlan_rwsem);
}
return 0;
@@ -1350,7 +1352,7 @@ void ipoib_setup(struct net_device *dev)
ipoib_set_ethtool_ops(dev);
- netif_napi_add(dev, &priv->napi, ipoib_poll, 100);
+ netif_napi_add(dev, &priv->napi, ipoib_poll, NAPI_POLL_WEIGHT);
dev->watchdog_timeo = HZ;
@@ -1366,13 +1368,11 @@ void ipoib_setup(struct net_device *dev)
memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN);
- netif_carrier_off(dev);
-
priv->dev = dev;
spin_lock_init(&priv->lock);
- mutex_init(&priv->vlan_mutex);
+ init_rwsem(&priv->vlan_rwsem);
INIT_LIST_HEAD(&priv->path_list);
INIT_LIST_HEAD(&priv->child_intfs);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index cecb98a4c66..d4e005720d0 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -386,8 +386,10 @@ static int ipoib_mcast_join_complete(int status,
mcast->mcmember.mgid.raw, status);
/* We trap for port events ourselves. */
- if (status == -ENETRESET)
- return 0;
+ if (status == -ENETRESET) {
+ status = 0;
+ goto out;
+ }
if (!status)
status = ipoib_mcast_join_finish(mcast, &multicast->rec);
@@ -407,7 +409,8 @@ static int ipoib_mcast_join_complete(int status,
if (mcast == priv->broadcast)
queue_work(ipoib_workqueue, &priv->carrier_on_task);
- return 0;
+ status = 0;
+ goto out;
}
if (mcast->logcount++ < 20) {
@@ -434,7 +437,8 @@ static int ipoib_mcast_join_complete(int status,
mcast->backoff * HZ);
spin_unlock_irq(&priv->lock);
mutex_unlock(&mcast_mutex);
-
+out:
+ complete(&mcast->done);
return status;
}
@@ -484,11 +488,15 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
}
set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+ init_completion(&mcast->done);
+ set_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags);
+
mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
&rec, comp_mask, GFP_KERNEL,
ipoib_mcast_join_complete, mcast);
if (IS_ERR(mcast->mc)) {
clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+ complete(&mcast->done);
ret = PTR_ERR(mcast->mc);
ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
@@ -510,10 +518,18 @@ void ipoib_mcast_join_task(struct work_struct *work)
struct ipoib_dev_priv *priv =
container_of(work, struct ipoib_dev_priv, mcast_task.work);
struct net_device *dev = priv->dev;
+ struct ib_port_attr port_attr;
if (!test_bit(IPOIB_MCAST_RUN, &priv->flags))
return;
+ if (ib_query_port(priv->ca, priv->port, &port_attr) ||
+ port_attr.state != IB_PORT_ACTIVE) {
+ ipoib_dbg(priv, "port state is not ACTIVE (state = %d) suspending join task\n",
+ port_attr.state);
+ return;
+ }
+
if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid))
ipoib_warn(priv, "ib_query_gid() failed\n");
else
@@ -751,6 +767,11 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
spin_unlock_irqrestore(&priv->lock, flags);
+ /* seperate between the wait to the leave*/
+ list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
+ if (test_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags))
+ wait_for_completion(&mcast->done);
+
list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
ipoib_mcast_leave(dev, mcast);
ipoib_mcast_free(mcast);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
index f81abe16cf0..cdc7df4fdb8 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
@@ -31,6 +31,7 @@
*/
#include <linux/netdevice.h>
+#include <linux/if_arp.h> /* For ARPHRD_xxx */
#include <linux/module.h>
#include <net/rtnetlink.h>
#include "ipoib.h"
@@ -103,7 +104,7 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
return -EINVAL;
pdev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
- if (!pdev)
+ if (!pdev || pdev->type != ARPHRD_INFINIBAND)
return -ENODEV;
ppriv = netdev_priv(pdev);
@@ -142,10 +143,10 @@ static void ipoib_unregister_child_dev(struct net_device *dev, struct list_head
priv = netdev_priv(dev);
ppriv = netdev_priv(priv->parent);
- mutex_lock(&ppriv->vlan_mutex);
+ down_write(&ppriv->vlan_rwsem);
unregister_netdevice_queue(dev, head);
list_del(&priv->list);
- mutex_unlock(&ppriv->vlan_mutex);
+ up_write(&ppriv->vlan_rwsem);
}
static size_t ipoib_get_size(const struct net_device *dev)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 049a997caff..c56d5d44c53 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -192,6 +192,9 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
if (priv->hca_caps & IB_DEVICE_BLOCK_MULTICAST_LOOPBACK)
init_attr.create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
+ if (priv->hca_caps & IB_DEVICE_MANAGED_FLOW_STEERING)
+ init_attr.create_flags |= IB_QP_CREATE_NETIF_QP;
+
if (dev->features & NETIF_F_SG)
init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 8292554bccb..9fad7b5ac8b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -140,7 +140,7 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
if (!rtnl_trylock())
return restart_syscall();
- mutex_lock(&ppriv->vlan_mutex);
+ down_write(&ppriv->vlan_rwsem);
/*
* First ensure this isn't a duplicate. We check the parent device and
@@ -163,7 +163,7 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
result = __ipoib_vlan_add(ppriv, priv, pkey, IPOIB_LEGACY_CHILD);
out:
- mutex_unlock(&ppriv->vlan_mutex);
+ up_write(&ppriv->vlan_rwsem);
if (result)
free_netdev(priv->dev);
@@ -185,7 +185,8 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
if (!rtnl_trylock())
return restart_syscall();
- mutex_lock(&ppriv->vlan_mutex);
+
+ down_write(&ppriv->vlan_rwsem);
list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) {
if (priv->pkey == pkey &&
priv->child_type == IPOIB_LEGACY_CHILD) {
@@ -195,7 +196,8 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
break;
}
}
- mutex_unlock(&ppriv->vlan_mutex);
+ up_write(&ppriv->vlan_rwsem);
+
rtnl_unlock();
if (dev) {
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index dd03cfe596d..eb7973957a6 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -5,7 +5,7 @@
* Copyright (C) 2004 Alex Aizman
* Copyright (C) 2005 Mike Christie
* Copyright (c) 2005, 2006 Voltaire, Inc. All rights reserved.
- * Copyright (c) 2013 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.
* maintained by openib-general@openib.org
*
* This software is available to you under a choice of one of two
@@ -82,6 +82,8 @@ static unsigned int iscsi_max_lun = 512;
module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO);
int iser_debug_level = 0;
+bool iser_pi_enable = false;
+int iser_pi_guard = 0;
MODULE_DESCRIPTION("iSER (iSCSI Extensions for RDMA) Datamover");
MODULE_LICENSE("Dual BSD/GPL");
@@ -91,6 +93,13 @@ MODULE_VERSION(DRV_VER);
module_param_named(debug_level, iser_debug_level, int, 0644);
MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 (default:disabled)");
+module_param_named(pi_enable, iser_pi_enable, bool, 0644);
+MODULE_PARM_DESC(pi_enable, "Enable T10-PI offload support (default:disabled)");
+
+module_param_named(pi_guard, iser_pi_guard, int, 0644);
+MODULE_PARM_DESC(pi_guard, "T10-PI guard_type, 0:CRC|1:IP_CSUM (default:CRC)");
+
+static struct workqueue_struct *release_wq;
struct iser_global ig;
void
@@ -138,8 +147,8 @@ static int iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode)
int iser_initialize_task_headers(struct iscsi_task *task,
struct iser_tx_desc *tx_desc)
{
- struct iscsi_iser_conn *iser_conn = task->conn->dd_data;
- struct iser_device *device = iser_conn->ib_conn->device;
+ struct iser_conn *ib_conn = task->conn->dd_data;
+ struct iser_device *device = ib_conn->device;
struct iscsi_iser_task *iser_task = task->dd_data;
u64 dma_addr;
@@ -153,7 +162,7 @@ int iser_initialize_task_headers(struct iscsi_task *task,
tx_desc->tx_sg[0].length = ISER_HEADERS_LEN;
tx_desc->tx_sg[0].lkey = device->mr->lkey;
- iser_task->iser_conn = iser_conn;
+ iser_task->ib_conn = ib_conn;
return 0;
}
/**
@@ -176,6 +185,8 @@ iscsi_iser_task_init(struct iscsi_task *task)
iser_task->command_sent = 0;
iser_task_rdma_init(iser_task);
+ iser_task->sc = task->sc;
+
return 0;
}
@@ -278,10 +289,9 @@ iscsi_iser_task_xmit(struct iscsi_task *task)
static void iscsi_iser_cleanup_task(struct iscsi_task *task)
{
struct iscsi_iser_task *iser_task = task->dd_data;
- struct iser_tx_desc *tx_desc = &iser_task->desc;
-
- struct iscsi_iser_conn *iser_conn = task->conn->dd_data;
- struct iser_device *device = iser_conn->ib_conn->device;
+ struct iser_tx_desc *tx_desc = &iser_task->desc;
+ struct iser_conn *ib_conn = task->conn->dd_data;
+ struct iser_device *device = ib_conn->device;
ib_dma_unmap_single(device->ib_device,
tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
@@ -296,14 +306,25 @@ static void iscsi_iser_cleanup_task(struct iscsi_task *task)
}
}
+static u8 iscsi_iser_check_protection(struct iscsi_task *task, sector_t *sector)
+{
+ struct iscsi_iser_task *iser_task = task->dd_data;
+
+ if (iser_task->dir[ISER_DIR_IN])
+ return iser_check_task_pi_status(iser_task, ISER_DIR_IN,
+ sector);
+ else
+ return iser_check_task_pi_status(iser_task, ISER_DIR_OUT,
+ sector);
+}
+
static struct iscsi_cls_conn *
iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx)
{
struct iscsi_conn *conn;
struct iscsi_cls_conn *cls_conn;
- struct iscsi_iser_conn *iser_conn;
- cls_conn = iscsi_conn_setup(cls_session, sizeof(*iser_conn), conn_idx);
+ cls_conn = iscsi_conn_setup(cls_session, 0, conn_idx);
if (!cls_conn)
return NULL;
conn = cls_conn->dd_data;
@@ -314,39 +335,15 @@ iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx)
*/
conn->max_recv_dlength = ISER_RECV_DATA_SEG_LEN;
- iser_conn = conn->dd_data;
- conn->dd_data = iser_conn;
- iser_conn->iscsi_conn = conn;
-
return cls_conn;
}
-static void
-iscsi_iser_conn_destroy(struct iscsi_cls_conn *cls_conn)
-{
- struct iscsi_conn *conn = cls_conn->dd_data;
- struct iscsi_iser_conn *iser_conn = conn->dd_data;
- struct iser_conn *ib_conn = iser_conn->ib_conn;
-
- iscsi_conn_teardown(cls_conn);
- /*
- * Userspace will normally call the stop callback and
- * already have freed the ib_conn, but if it goofed up then
- * we free it here.
- */
- if (ib_conn) {
- ib_conn->iser_conn = NULL;
- iser_conn_put(ib_conn, 1); /* deref iscsi/ib conn unbinding */
- }
-}
-
static int
iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
struct iscsi_cls_conn *cls_conn, uint64_t transport_eph,
int is_leading)
{
struct iscsi_conn *conn = cls_conn->dd_data;
- struct iscsi_iser_conn *iser_conn;
struct iscsi_session *session;
struct iser_conn *ib_conn;
struct iscsi_endpoint *ep;
@@ -373,35 +370,44 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
/* binds the iSER connection retrieved from the previously
* connected ep_handle to the iSCSI layer connection. exchanges
* connection pointers */
- iser_info("binding iscsi/iser conn %p %p to ib_conn %p\n",
- conn, conn->dd_data, ib_conn);
- iser_conn = conn->dd_data;
- ib_conn->iser_conn = iser_conn;
- iser_conn->ib_conn = ib_conn;
- iser_conn_get(ib_conn); /* ref iscsi/ib conn binding */
+ iser_info("binding iscsi conn %p to ib_conn %p\n", conn, ib_conn);
+
+ conn->dd_data = ib_conn;
+ ib_conn->iscsi_conn = conn;
+
return 0;
}
+static int
+iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn)
+{
+ struct iscsi_conn *iscsi_conn;
+ struct iser_conn *ib_conn;
+
+ iscsi_conn = cls_conn->dd_data;
+ ib_conn = iscsi_conn->dd_data;
+ reinit_completion(&ib_conn->stop_completion);
+
+ return iscsi_conn_start(cls_conn);
+}
+
static void
iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
{
struct iscsi_conn *conn = cls_conn->dd_data;
- struct iscsi_iser_conn *iser_conn = conn->dd_data;
- struct iser_conn *ib_conn = iser_conn->ib_conn;
+ struct iser_conn *ib_conn = conn->dd_data;
+
+ iser_dbg("stopping iscsi_conn: %p, ib_conn: %p\n", conn, ib_conn);
+ iscsi_conn_stop(cls_conn, flag);
/*
* Userspace may have goofed up and not bound the connection or
* might have only partially setup the connection.
*/
if (ib_conn) {
- iscsi_conn_stop(cls_conn, flag);
- /*
- * There is no unbind event so the stop callback
- * must release the ref from the bind.
- */
- iser_conn_put(ib_conn, 1); /* deref iscsi/ib conn unbinding */
+ conn->dd_data = NULL;
+ complete(&ib_conn->stop_completion);
}
- iser_conn->ib_conn = NULL;
}
static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session)
@@ -413,6 +419,17 @@ static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session)
iscsi_host_free(shost);
}
+static inline unsigned int
+iser_dif_prot_caps(int prot_caps)
+{
+ return ((prot_caps & IB_PROT_T10DIF_TYPE_1) ? SHOST_DIF_TYPE1_PROTECTION |
+ SHOST_DIX_TYPE1_PROTECTION : 0) |
+ ((prot_caps & IB_PROT_T10DIF_TYPE_2) ? SHOST_DIF_TYPE2_PROTECTION |
+ SHOST_DIX_TYPE2_PROTECTION : 0) |
+ ((prot_caps & IB_PROT_T10DIF_TYPE_3) ? SHOST_DIF_TYPE3_PROTECTION |
+ SHOST_DIX_TYPE3_PROTECTION : 0);
+}
+
static struct iscsi_cls_session *
iscsi_iser_session_create(struct iscsi_endpoint *ep,
uint16_t cmds_max, uint16_t qdepth,
@@ -437,8 +454,18 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
* older userspace tools (before 2.0-870) did not pass us
* the leading conn's ep so this will be NULL;
*/
- if (ep)
+ if (ep) {
ib_conn = ep->dd_data;
+ if (ib_conn->pi_support) {
+ u32 sig_caps = ib_conn->device->dev_attr.sig_prot_cap;
+
+ scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps));
+ if (iser_pi_guard)
+ scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP);
+ else
+ scsi_host_set_guard(shost, SHOST_DIX_GUARD_CRC);
+ }
+ }
if (iscsi_host_add(shost,
ep ? ib_conn->device->ib_device->dma_device : NULL))
@@ -481,28 +508,28 @@ iscsi_iser_set_param(struct iscsi_cls_conn *cls_conn,
case ISCSI_PARAM_HDRDGST_EN:
sscanf(buf, "%d", &value);
if (value) {
- iser_err("DataDigest wasn't negotiated to None");
+ iser_err("DataDigest wasn't negotiated to None\n");
return -EPROTO;
}
break;
case ISCSI_PARAM_DATADGST_EN:
sscanf(buf, "%d", &value);
if (value) {
- iser_err("DataDigest wasn't negotiated to None");
+ iser_err("DataDigest wasn't negotiated to None\n");
return -EPROTO;
}
break;
case ISCSI_PARAM_IFMARKER_EN:
sscanf(buf, "%d", &value);
if (value) {
- iser_err("IFMarker wasn't negotiated to No");
+ iser_err("IFMarker wasn't negotiated to No\n");
return -EPROTO;
}
break;
case ISCSI_PARAM_OFMARKER_EN:
sscanf(buf, "%d", &value);
if (value) {
- iser_err("OFMarker wasn't negotiated to No");
+ iser_err("OFMarker wasn't negotiated to No\n");
return -EPROTO;
}
break;
@@ -618,19 +645,20 @@ iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep)
struct iser_conn *ib_conn;
ib_conn = ep->dd_data;
- if (ib_conn->iser_conn)
- /*
- * Must suspend xmit path if the ep is bound to the
- * iscsi_conn, so we know we are not accessing the ib_conn
- * when we free it.
- *
- * This may not be bound if the ep poll failed.
- */
- iscsi_suspend_tx(ib_conn->iser_conn->iscsi_conn);
-
-
- iser_info("ib conn %p state %d\n", ib_conn, ib_conn->state);
+ iser_info("ep %p ib conn %p state %d\n", ep, ib_conn, ib_conn->state);
iser_conn_terminate(ib_conn);
+
+ /*
+ * if iser_conn and iscsi_conn are bound, we must wait iscsi_conn_stop
+ * call and ISER_CONN_DOWN state before freeing the iser resources.
+ * otherwise we are safe to free resources immediately.
+ */
+ if (ib_conn->iscsi_conn) {
+ INIT_WORK(&ib_conn->release_work, iser_release_work);
+ queue_work(release_wq, &ib_conn->release_work);
+ } else {
+ iser_conn_release(ib_conn);
+ }
}
static umode_t iser_attr_is_visible(int param_type, int param)
@@ -714,13 +742,13 @@ static struct iscsi_transport iscsi_iser_transport = {
/* connection management */
.create_conn = iscsi_iser_conn_create,
.bind_conn = iscsi_iser_conn_bind,
- .destroy_conn = iscsi_iser_conn_destroy,
+ .destroy_conn = iscsi_conn_teardown,
.attr_is_visible = iser_attr_is_visible,
.set_param = iscsi_iser_set_param,
.get_conn_param = iscsi_conn_get_param,
.get_ep_param = iscsi_iser_get_ep_param,
.get_session_param = iscsi_session_get_param,
- .start_conn = iscsi_conn_start,
+ .start_conn = iscsi_iser_conn_start,
.stop_conn = iscsi_iser_conn_stop,
/* iscsi host params */
.get_host_param = iscsi_host_get_param,
@@ -732,6 +760,7 @@ static struct iscsi_transport iscsi_iser_transport = {
.xmit_task = iscsi_iser_task_xmit,
.cleanup_task = iscsi_iser_cleanup_task,
.alloc_pdu = iscsi_iser_pdu_alloc,
+ .check_protection = iscsi_iser_check_protection,
/* recovery */
.session_recovery_timedout = iscsi_session_recovery_timedout,
@@ -766,6 +795,12 @@ static int __init iser_init(void)
mutex_init(&ig.connlist_mutex);
INIT_LIST_HEAD(&ig.connlist);
+ release_wq = alloc_workqueue("release workqueue", 0, 0);
+ if (!release_wq) {
+ iser_err("failed to allocate release workqueue\n");
+ return -ENOMEM;
+ }
+
iscsi_iser_scsi_transport = iscsi_register_transport(
&iscsi_iser_transport);
if (!iscsi_iser_scsi_transport) {
@@ -784,7 +819,24 @@ register_transport_failure:
static void __exit iser_exit(void)
{
+ struct iser_conn *ib_conn, *n;
+ int connlist_empty;
+
iser_dbg("Removing iSER datamover...\n");
+ destroy_workqueue(release_wq);
+
+ mutex_lock(&ig.connlist_mutex);
+ connlist_empty = list_empty(&ig.connlist);
+ mutex_unlock(&ig.connlist_mutex);
+
+ if (!connlist_empty) {
+ iser_err("Error cleanup stage completed but we still have iser "
+ "connections, destroying them anyway.\n");
+ list_for_each_entry_safe(ib_conn, n, &ig.connlist, conn_list) {
+ iser_conn_release(ib_conn);
+ }
+ }
+
iscsi_unregister_transport(&iscsi_iser_transport);
kmem_cache_destroy(ig.desc_cache);
}
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 67914027c61..97cd385bf7f 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -8,7 +8,7 @@
*
* Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
* Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
- * Copyright (c) 2013 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -46,6 +46,8 @@
#include <linux/printk.h>
#include <scsi/libiscsi.h>
#include <scsi/scsi_transport_iscsi.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_device.h>
#include <linux/interrupt.h>
#include <linux/wait.h>
@@ -67,7 +69,7 @@
#define DRV_NAME "iser"
#define PFX DRV_NAME ": "
-#define DRV_VER "1.1"
+#define DRV_VER "1.4"
#define iser_dbg(fmt, arg...) \
do { \
@@ -134,10 +136,21 @@
ISER_MAX_TX_MISC_PDUS + \
ISER_MAX_RX_MISC_PDUS)
+/* Max registration work requests per command */
+#define ISER_MAX_REG_WR_PER_CMD 5
+
+/* For Signature we don't support DATAOUTs so no need to make room for them */
+#define ISER_QP_SIG_MAX_REQ_DTOS (ISER_DEF_XMIT_CMDS_MAX * \
+ (1 + ISER_MAX_REG_WR_PER_CMD) + \
+ ISER_MAX_TX_MISC_PDUS + \
+ ISER_MAX_RX_MISC_PDUS)
+
#define ISER_VER 0x10
#define ISER_WSV 0x08
#define ISER_RSV 0x04
+#define ISER_FASTREG_LI_WRID 0xffffffffffffffffULL
+
struct iser_hdr {
u8 flags;
u8 rsvd[3];
@@ -201,7 +214,6 @@ struct iser_data_buf {
/* fwd declarations */
struct iser_device;
struct iser_cq_desc;
-struct iscsi_iser_conn;
struct iscsi_iser_task;
struct iscsi_endpoint;
@@ -258,6 +270,7 @@ struct iscsi_iser_task;
struct iser_device {
struct ib_device *ib_device;
struct ib_pd *pd;
+ struct ib_device_attr dev_attr;
struct ib_cq *rx_cq[ISER_MAX_CQ];
struct ib_cq *tx_cq[ISER_MAX_CQ];
struct ib_mr *mr;
@@ -277,17 +290,35 @@ struct iser_device {
enum iser_data_dir cmd_dir);
};
+#define ISER_CHECK_GUARD 0xc0
+#define ISER_CHECK_REFTAG 0x0f
+#define ISER_CHECK_APPTAG 0x30
+
+enum iser_reg_indicator {
+ ISER_DATA_KEY_VALID = 1 << 0,
+ ISER_PROT_KEY_VALID = 1 << 1,
+ ISER_SIG_KEY_VALID = 1 << 2,
+ ISER_FASTREG_PROTECTED = 1 << 3,
+};
+
+struct iser_pi_context {
+ struct ib_mr *prot_mr;
+ struct ib_fast_reg_page_list *prot_frpl;
+ struct ib_mr *sig_mr;
+};
+
struct fast_reg_descriptor {
struct list_head list;
/* For fast registration - FRWR */
struct ib_mr *data_mr;
struct ib_fast_reg_page_list *data_frpl;
- /* Valid for fast registration flag */
- bool valid;
+ struct iser_pi_context *pi_ctx;
+ /* registration indicators container */
+ u8 reg_indicators;
};
struct iser_conn {
- struct iscsi_iser_conn *iser_conn; /* iser conn for upcalls */
+ struct iscsi_conn *iscsi_conn;
struct iscsi_endpoint *ep;
enum iser_ib_conn_state state; /* rdma connection state */
atomic_t refcount;
@@ -302,6 +333,8 @@ struct iser_conn {
int post_recv_buf_count; /* posted rx count */
atomic_t post_send_buf_count; /* posted tx count */
char name[ISER_OBJECT_NAME_SIZE];
+ struct work_struct release_work;
+ struct completion stop_completion;
struct list_head conn_list; /* entry in ig conn list */
char *login_buf;
@@ -310,6 +343,9 @@ struct iser_conn {
unsigned int rx_desc_head;
struct iser_rx_desc *rx_descs;
struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX];
+ bool pi_support;
+
+ /* Connection memory registration pool */
union {
struct {
struct ib_fmr_pool *pool; /* pool of IB FMRs */
@@ -319,24 +355,22 @@ struct iser_conn {
struct {
struct list_head pool;
int pool_size;
- } frwr;
- } fastreg;
-};
-
-struct iscsi_iser_conn {
- struct iscsi_conn *iscsi_conn;/* ptr to iscsi conn */
- struct iser_conn *ib_conn; /* iSER IB conn */
+ } fastreg;
+ };
};
struct iscsi_iser_task {
struct iser_tx_desc desc;
- struct iscsi_iser_conn *iser_conn;
+ struct iser_conn *ib_conn;
enum iser_task_status status;
+ struct scsi_cmnd *sc;
int command_sent; /* set if command sent */
int dir[ISER_DIRS_NUM]; /* set if dir use*/
struct iser_regd_buf rdma_regd[ISER_DIRS_NUM];/* regd rdma buf */
struct iser_data_buf data[ISER_DIRS_NUM]; /* orig. data des*/
struct iser_data_buf data_copy[ISER_DIRS_NUM];/* contig. copy */
+ struct iser_data_buf prot[ISER_DIRS_NUM]; /* prot desc */
+ struct iser_data_buf prot_copy[ISER_DIRS_NUM];/* prot copy */
};
struct iser_page_vec {
@@ -362,6 +396,8 @@ struct iser_global {
extern struct iser_global ig;
extern int iser_debug_level;
+extern bool iser_pi_enable;
+extern int iser_pi_guard;
/* allocate connection resources needed for rdma functionality */
int iser_conn_set_full_featured_mode(struct iscsi_conn *conn);
@@ -383,12 +419,12 @@ void iscsi_iser_recv(struct iscsi_conn *conn,
void iser_conn_init(struct iser_conn *ib_conn);
-void iser_conn_get(struct iser_conn *ib_conn);
-
-int iser_conn_put(struct iser_conn *ib_conn, int destroy_cma_id_allowed);
+void iser_conn_release(struct iser_conn *ib_conn);
void iser_conn_terminate(struct iser_conn *ib_conn);
+void iser_release_work(struct work_struct *work);
+
void iser_rcv_completion(struct iser_rx_desc *desc,
unsigned long dto_xfer_len,
struct iser_conn *ib_conn);
@@ -401,13 +437,15 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *task);
void iser_free_rx_descriptors(struct iser_conn *ib_conn);
-void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *task,
- enum iser_data_dir cmd_dir);
+void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
+ struct iser_data_buf *mem,
+ struct iser_data_buf *mem_copy,
+ enum iser_data_dir cmd_dir);
int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task,
enum iser_data_dir cmd_dir);
-int iser_reg_rdma_mem_frwr(struct iscsi_iser_task *task,
- enum iser_data_dir cmd_dir);
+int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *task,
+ enum iser_data_dir cmd_dir);
int iser_connect(struct iser_conn *ib_conn,
struct sockaddr_in *src_addr,
@@ -420,8 +458,8 @@ int iser_reg_page_vec(struct iser_conn *ib_conn,
void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir);
-void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task,
- enum iser_data_dir cmd_dir);
+void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir);
int iser_post_recvl(struct iser_conn *ib_conn);
int iser_post_recvm(struct iser_conn *ib_conn, int count);
@@ -432,12 +470,15 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
enum iser_data_dir iser_dir,
enum dma_data_direction dma_dir);
-void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task);
+void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task,
+ struct iser_data_buf *data);
int iser_initialize_task_headers(struct iscsi_task *task,
struct iser_tx_desc *tx_desc);
int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session);
int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max);
void iser_free_fmr_pool(struct iser_conn *ib_conn);
-int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max);
-void iser_free_frwr_pool(struct iser_conn *ib_conn);
+int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max);
+void iser_free_fastreg_pool(struct iser_conn *ib_conn);
+u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir, sector_t *sector);
#endif
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 538822684d5..8d44a406063 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
- * Copyright (c) 2013 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -41,15 +41,15 @@
#include "iscsi_iser.h"
/* Register user buffer memory and initialize passive rdma
- * dto descriptor. Total data size is stored in
- * iser_task->data[ISER_DIR_IN].data_len
+ * dto descriptor. Data size is stored in
+ * task->data[ISER_DIR_IN].data_len, Protection size
+ * os stored in task->prot[ISER_DIR_IN].data_len
*/
-static int iser_prepare_read_cmd(struct iscsi_task *task,
- unsigned int edtl)
+static int iser_prepare_read_cmd(struct iscsi_task *task)
{
struct iscsi_iser_task *iser_task = task->dd_data;
- struct iser_device *device = iser_task->iser_conn->ib_conn->device;
+ struct iser_device *device = iser_task->ib_conn->device;
struct iser_regd_buf *regd_buf;
int err;
struct iser_hdr *hdr = &iser_task->desc.iser_header;
@@ -62,12 +62,15 @@ static int iser_prepare_read_cmd(struct iscsi_task *task,
if (err)
return err;
- if (edtl > iser_task->data[ISER_DIR_IN].data_len) {
- iser_err("Total data length: %ld, less than EDTL: "
- "%d, in READ cmd BHS itt: %d, conn: 0x%p\n",
- iser_task->data[ISER_DIR_IN].data_len, edtl,
- task->itt, iser_task->iser_conn);
- return -EINVAL;
+ if (scsi_prot_sg_count(iser_task->sc)) {
+ struct iser_data_buf *pbuf_in = &iser_task->prot[ISER_DIR_IN];
+
+ err = iser_dma_map_task_data(iser_task,
+ pbuf_in,
+ ISER_DIR_IN,
+ DMA_FROM_DEVICE);
+ if (err)
+ return err;
}
err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_IN);
@@ -89,8 +92,9 @@ static int iser_prepare_read_cmd(struct iscsi_task *task,
}
/* Register user buffer memory and initialize passive rdma
- * dto descriptor. Total data size is stored in
- * task->data[ISER_DIR_OUT].data_len
+ * dto descriptor. Data size is stored in
+ * task->data[ISER_DIR_OUT].data_len, Protection size
+ * is stored at task->prot[ISER_DIR_OUT].data_len
*/
static int
iser_prepare_write_cmd(struct iscsi_task *task,
@@ -99,7 +103,7 @@ iser_prepare_write_cmd(struct iscsi_task *task,
unsigned int edtl)
{
struct iscsi_iser_task *iser_task = task->dd_data;
- struct iser_device *device = iser_task->iser_conn->ib_conn->device;
+ struct iser_device *device = iser_task->ib_conn->device;
struct iser_regd_buf *regd_buf;
int err;
struct iser_hdr *hdr = &iser_task->desc.iser_header;
@@ -113,12 +117,15 @@ iser_prepare_write_cmd(struct iscsi_task *task,
if (err)
return err;
- if (edtl > iser_task->data[ISER_DIR_OUT].data_len) {
- iser_err("Total data length: %ld, less than EDTL: %d, "
- "in WRITE cmd BHS itt: %d, conn: 0x%p\n",
- iser_task->data[ISER_DIR_OUT].data_len,
- edtl, task->itt, task->conn);
- return -EINVAL;
+ if (scsi_prot_sg_count(iser_task->sc)) {
+ struct iser_data_buf *pbuf_out = &iser_task->prot[ISER_DIR_OUT];
+
+ err = iser_dma_map_task_data(iser_task,
+ pbuf_out,
+ ISER_DIR_OUT,
+ DMA_TO_DEVICE);
+ if (err)
+ return err;
}
err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_OUT);
@@ -327,7 +334,7 @@ free_login_buf:
static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req)
{
- struct iscsi_iser_conn *iser_conn = conn->dd_data;
+ struct iser_conn *ib_conn = conn->dd_data;
struct iscsi_session *session = conn->session;
iser_dbg("req op %x flags %x\n", req->opcode, req->flags);
@@ -340,19 +347,18 @@ static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req)
* response) and no posted send buffers left - they must have been
* consumed during previous login phases.
*/
- WARN_ON(iser_conn->ib_conn->post_recv_buf_count != 1);
- WARN_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0);
+ WARN_ON(ib_conn->post_recv_buf_count != 1);
+ WARN_ON(atomic_read(&ib_conn->post_send_buf_count) != 0);
if (session->discovery_sess) {
iser_info("Discovery session, re-using login RX buffer\n");
return 0;
} else
iser_info("Normal session, posting batch of RX %d buffers\n",
- iser_conn->ib_conn->min_posted_rx);
+ ib_conn->min_posted_rx);
/* Initial post receive buffers */
- if (iser_post_recvm(iser_conn->ib_conn,
- iser_conn->ib_conn->min_posted_rx))
+ if (iser_post_recvm(ib_conn, ib_conn->min_posted_rx))
return -ENOMEM;
return 0;
@@ -364,11 +370,11 @@ static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req)
int iser_send_command(struct iscsi_conn *conn,
struct iscsi_task *task)
{
- struct iscsi_iser_conn *iser_conn = conn->dd_data;
+ struct iser_conn *ib_conn = conn->dd_data;
struct iscsi_iser_task *iser_task = task->dd_data;
unsigned long edtl;
int err;
- struct iser_data_buf *data_buf;
+ struct iser_data_buf *data_buf, *prot_buf;
struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr;
struct scsi_cmnd *sc = task->sc;
struct iser_tx_desc *tx_desc = &iser_task->desc;
@@ -377,22 +383,31 @@ int iser_send_command(struct iscsi_conn *conn,
/* build the tx desc regd header and add it to the tx desc dto */
tx_desc->type = ISCSI_TX_SCSI_COMMAND;
- iser_create_send_desc(iser_conn->ib_conn, tx_desc);
+ iser_create_send_desc(ib_conn, tx_desc);
- if (hdr->flags & ISCSI_FLAG_CMD_READ)
+ if (hdr->flags & ISCSI_FLAG_CMD_READ) {
data_buf = &iser_task->data[ISER_DIR_IN];
- else
+ prot_buf = &iser_task->prot[ISER_DIR_IN];
+ } else {
data_buf = &iser_task->data[ISER_DIR_OUT];
+ prot_buf = &iser_task->prot[ISER_DIR_OUT];
+ }
if (scsi_sg_count(sc)) { /* using a scatter list */
data_buf->buf = scsi_sglist(sc);
data_buf->size = scsi_sg_count(sc);
}
-
data_buf->data_len = scsi_bufflen(sc);
+ if (scsi_prot_sg_count(sc)) {
+ prot_buf->buf = scsi_prot_sglist(sc);
+ prot_buf->size = scsi_prot_sg_count(sc);
+ prot_buf->data_len = data_buf->data_len >>
+ ilog2(sc->device->sector_size) * 8;
+ }
+
if (hdr->flags & ISCSI_FLAG_CMD_READ) {
- err = iser_prepare_read_cmd(task, edtl);
+ err = iser_prepare_read_cmd(task);
if (err)
goto send_command_error;
}
@@ -408,7 +423,7 @@ int iser_send_command(struct iscsi_conn *conn,
iser_task->status = ISER_TASK_STATUS_STARTED;
- err = iser_post_send(iser_conn->ib_conn, tx_desc);
+ err = iser_post_send(ib_conn, tx_desc);
if (!err)
return 0;
@@ -424,7 +439,7 @@ int iser_send_data_out(struct iscsi_conn *conn,
struct iscsi_task *task,
struct iscsi_data *hdr)
{
- struct iscsi_iser_conn *iser_conn = conn->dd_data;
+ struct iser_conn *ib_conn = conn->dd_data;
struct iscsi_iser_task *iser_task = task->dd_data;
struct iser_tx_desc *tx_desc = NULL;
struct iser_regd_buf *regd_buf;
@@ -473,7 +488,7 @@ int iser_send_data_out(struct iscsi_conn *conn,
itt, buf_offset, data_seg_len);
- err = iser_post_send(iser_conn->ib_conn, tx_desc);
+ err = iser_post_send(ib_conn, tx_desc);
if (!err)
return 0;
@@ -486,19 +501,18 @@ send_data_out_error:
int iser_send_control(struct iscsi_conn *conn,
struct iscsi_task *task)
{
- struct iscsi_iser_conn *iser_conn = conn->dd_data;
+ struct iser_conn *ib_conn = conn->dd_data;
struct iscsi_iser_task *iser_task = task->dd_data;
struct iser_tx_desc *mdesc = &iser_task->desc;
unsigned long data_seg_len;
int err = 0;
struct iser_device *device;
- struct iser_conn *ib_conn = iser_conn->ib_conn;
/* build the tx desc regd header and add it to the tx desc dto */
mdesc->type = ISCSI_TX_CONTROL;
- iser_create_send_desc(iser_conn->ib_conn, mdesc);
+ iser_create_send_desc(ib_conn, mdesc);
- device = iser_conn->ib_conn->device;
+ device = ib_conn->device;
data_seg_len = ntoh24(task->hdr->dlength);
@@ -513,14 +527,13 @@ int iser_send_control(struct iscsi_conn *conn,
ib_conn->login_req_dma, task->data_count,
DMA_TO_DEVICE);
- memcpy(iser_conn->ib_conn->login_req_buf, task->data,
- task->data_count);
+ memcpy(ib_conn->login_req_buf, task->data, task->data_count);
ib_dma_sync_single_for_device(device->ib_device,
ib_conn->login_req_dma, task->data_count,
DMA_TO_DEVICE);
- tx_dsg->addr = iser_conn->ib_conn->login_req_dma;
+ tx_dsg->addr = ib_conn->login_req_dma;
tx_dsg->length = task->data_count;
tx_dsg->lkey = device->mr->lkey;
mdesc->num_sge = 2;
@@ -529,7 +542,7 @@ int iser_send_control(struct iscsi_conn *conn,
if (task == conn->login_task) {
iser_dbg("op %x dsl %lx, posting login rx buffer\n",
task->hdr->opcode, data_seg_len);
- err = iser_post_recvl(iser_conn->ib_conn);
+ err = iser_post_recvl(ib_conn);
if (err)
goto send_control_error;
err = iser_post_rx_bufs(conn, task->hdr);
@@ -537,7 +550,7 @@ int iser_send_control(struct iscsi_conn *conn,
goto send_control_error;
}
- err = iser_post_send(iser_conn->ib_conn, mdesc);
+ err = iser_post_send(ib_conn, mdesc);
if (!err)
return 0;
@@ -553,7 +566,6 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,
unsigned long rx_xfer_len,
struct iser_conn *ib_conn)
{
- struct iscsi_iser_conn *conn = ib_conn->iser_conn;
struct iscsi_hdr *hdr;
u64 rx_dma;
int rx_buflen, outstanding, count, err;
@@ -575,17 +587,17 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,
iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,
hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN));
- iscsi_iser_recv(conn->iscsi_conn, hdr,
- rx_desc->data, rx_xfer_len - ISER_HEADERS_LEN);
+ iscsi_iser_recv(ib_conn->iscsi_conn, hdr, rx_desc->data,
+ rx_xfer_len - ISER_HEADERS_LEN);
ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma,
- rx_buflen, DMA_FROM_DEVICE);
+ rx_buflen, DMA_FROM_DEVICE);
/* decrementing conn->post_recv_buf_count only --after-- freeing the *
* task eliminates the need to worry on tasks which are completed in *
* parallel to the execution of iser_conn_term. So the code that waits *
* for the posted rx bufs refcount to become zero handles everything */
- conn->ib_conn->post_recv_buf_count--;
+ ib_conn->post_recv_buf_count--;
if (rx_dma == ib_conn->login_resp_dma)
return;
@@ -610,11 +622,12 @@ void iser_snd_completion(struct iser_tx_desc *tx_desc,
ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr,
ISER_HEADERS_LEN, DMA_TO_DEVICE);
kmem_cache_free(ig.desc_cache, tx_desc);
+ tx_desc = NULL;
}
atomic_dec(&ib_conn->post_send_buf_count);
- if (tx_desc->type == ISCSI_TX_CONTROL) {
+ if (tx_desc && tx_desc->type == ISCSI_TX_CONTROL) {
/* this arithmetic is legal by libiscsi dd_data allocation */
task = (void *) ((long)(void *)tx_desc -
sizeof(struct iscsi_task));
@@ -634,6 +647,9 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
iser_task->data[ISER_DIR_IN].data_len = 0;
iser_task->data[ISER_DIR_OUT].data_len = 0;
+ iser_task->prot[ISER_DIR_IN].data_len = 0;
+ iser_task->prot[ISER_DIR_OUT].data_len = 0;
+
memset(&iser_task->rdma_regd[ISER_DIR_IN], 0,
sizeof(struct iser_regd_buf));
memset(&iser_task->rdma_regd[ISER_DIR_OUT], 0,
@@ -642,28 +658,63 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
{
- struct iser_device *device = iser_task->iser_conn->ib_conn->device;
- int is_rdma_aligned = 1;
+ struct iser_device *device = iser_task->ib_conn->device;
+ int is_rdma_data_aligned = 1;
+ int is_rdma_prot_aligned = 1;
+ int prot_count = scsi_prot_sg_count(iser_task->sc);
/* if we were reading, copy back to unaligned sglist,
* anyway dma_unmap and free the copy
*/
if (iser_task->data_copy[ISER_DIR_IN].copy_buf != NULL) {
- is_rdma_aligned = 0;
- iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_IN);
+ is_rdma_data_aligned = 0;
+ iser_finalize_rdma_unaligned_sg(iser_task,
+ &iser_task->data[ISER_DIR_IN],
+ &iser_task->data_copy[ISER_DIR_IN],
+ ISER_DIR_IN);
}
+
if (iser_task->data_copy[ISER_DIR_OUT].copy_buf != NULL) {
- is_rdma_aligned = 0;
- iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_OUT);
+ is_rdma_data_aligned = 0;
+ iser_finalize_rdma_unaligned_sg(iser_task,
+ &iser_task->data[ISER_DIR_OUT],
+ &iser_task->data_copy[ISER_DIR_OUT],
+ ISER_DIR_OUT);
+ }
+
+ if (iser_task->prot_copy[ISER_DIR_IN].copy_buf != NULL) {
+ is_rdma_prot_aligned = 0;
+ iser_finalize_rdma_unaligned_sg(iser_task,
+ &iser_task->prot[ISER_DIR_IN],
+ &iser_task->prot_copy[ISER_DIR_IN],
+ ISER_DIR_IN);
}
- if (iser_task->dir[ISER_DIR_IN])
+ if (iser_task->prot_copy[ISER_DIR_OUT].copy_buf != NULL) {
+ is_rdma_prot_aligned = 0;
+ iser_finalize_rdma_unaligned_sg(iser_task,
+ &iser_task->prot[ISER_DIR_OUT],
+ &iser_task->prot_copy[ISER_DIR_OUT],
+ ISER_DIR_OUT);
+ }
+
+ if (iser_task->dir[ISER_DIR_IN]) {
device->iser_unreg_rdma_mem(iser_task, ISER_DIR_IN);
+ if (is_rdma_data_aligned)
+ iser_dma_unmap_task_data(iser_task,
+ &iser_task->data[ISER_DIR_IN]);
+ if (prot_count && is_rdma_prot_aligned)
+ iser_dma_unmap_task_data(iser_task,
+ &iser_task->prot[ISER_DIR_IN]);
+ }
- if (iser_task->dir[ISER_DIR_OUT])
+ if (iser_task->dir[ISER_DIR_OUT]) {
device->iser_unreg_rdma_mem(iser_task, ISER_DIR_OUT);
-
- /* if the data was unaligned, it was already unmapped and then copied */
- if (is_rdma_aligned)
- iser_dma_unmap_task_data(iser_task);
+ if (is_rdma_data_aligned)
+ iser_dma_unmap_task_data(iser_task,
+ &iser_task->data[ISER_DIR_OUT]);
+ if (prot_count && is_rdma_prot_aligned)
+ iser_dma_unmap_task_data(iser_task,
+ &iser_task->prot[ISER_DIR_OUT]);
+ }
}
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 1ce0c97d2cc..47acd3ad3a1 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
- * Copyright (c) 2013 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -45,13 +45,19 @@
* iser_start_rdma_unaligned_sg
*/
static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
+ struct iser_data_buf *data,
+ struct iser_data_buf *data_copy,
enum iser_data_dir cmd_dir)
{
- int dma_nents;
- struct ib_device *dev;
+ struct ib_device *dev = iser_task->ib_conn->device->ib_device;
+ struct scatterlist *sgl = (struct scatterlist *)data->buf;
+ struct scatterlist *sg;
char *mem = NULL;
- struct iser_data_buf *data = &iser_task->data[cmd_dir];
- unsigned long cmd_data_len = data->data_len;
+ unsigned long cmd_data_len = 0;
+ int dma_nents, i;
+
+ for_each_sg(sgl, sg, data->size, i)
+ cmd_data_len += ib_sg_dma_len(dev, sg);
if (cmd_data_len > ISER_KMALLOC_THRESHOLD)
mem = (void *)__get_free_pages(GFP_ATOMIC,
@@ -61,17 +67,16 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
if (mem == NULL) {
iser_err("Failed to allocate mem size %d %d for copying sglist\n",
- data->size,(int)cmd_data_len);
+ data->size, (int)cmd_data_len);
return -ENOMEM;
}
if (cmd_dir == ISER_DIR_OUT) {
/* copy the unaligned sg the buffer which is used for RDMA */
- struct scatterlist *sgl = (struct scatterlist *)data->buf;
- struct scatterlist *sg;
int i;
char *p, *from;
+ sgl = (struct scatterlist *)data->buf;
p = mem;
for_each_sg(sgl, sg, data->size, i) {
from = kmap_atomic(sg_page(sg));
@@ -83,39 +88,37 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
}
}
- sg_init_one(&iser_task->data_copy[cmd_dir].sg_single, mem, cmd_data_len);
- iser_task->data_copy[cmd_dir].buf =
- &iser_task->data_copy[cmd_dir].sg_single;
- iser_task->data_copy[cmd_dir].size = 1;
+ sg_init_one(&data_copy->sg_single, mem, cmd_data_len);
+ data_copy->buf = &data_copy->sg_single;
+ data_copy->size = 1;
+ data_copy->copy_buf = mem;
- iser_task->data_copy[cmd_dir].copy_buf = mem;
-
- dev = iser_task->iser_conn->ib_conn->device->ib_device;
- dma_nents = ib_dma_map_sg(dev,
- &iser_task->data_copy[cmd_dir].sg_single,
- 1,
+ dma_nents = ib_dma_map_sg(dev, &data_copy->sg_single, 1,
(cmd_dir == ISER_DIR_OUT) ?
DMA_TO_DEVICE : DMA_FROM_DEVICE);
BUG_ON(dma_nents == 0);
- iser_task->data_copy[cmd_dir].dma_nents = dma_nents;
+ data_copy->dma_nents = dma_nents;
+ data_copy->data_len = cmd_data_len;
+
return 0;
}
/**
* iser_finalize_rdma_unaligned_sg
*/
+
void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
- enum iser_data_dir cmd_dir)
+ struct iser_data_buf *data,
+ struct iser_data_buf *data_copy,
+ enum iser_data_dir cmd_dir)
{
struct ib_device *dev;
- struct iser_data_buf *mem_copy;
unsigned long cmd_data_len;
- dev = iser_task->iser_conn->ib_conn->device->ib_device;
- mem_copy = &iser_task->data_copy[cmd_dir];
+ dev = iser_task->ib_conn->device->ib_device;
- ib_dma_unmap_sg(dev, &mem_copy->sg_single, 1,
+ ib_dma_unmap_sg(dev, &data_copy->sg_single, 1,
(cmd_dir == ISER_DIR_OUT) ?
DMA_TO_DEVICE : DMA_FROM_DEVICE);
@@ -127,10 +130,10 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
int i;
/* copy back read RDMA to unaligned sg */
- mem = mem_copy->copy_buf;
+ mem = data_copy->copy_buf;
- sgl = (struct scatterlist *)iser_task->data[ISER_DIR_IN].buf;
- sg_size = iser_task->data[ISER_DIR_IN].size;
+ sgl = (struct scatterlist *)data->buf;
+ sg_size = data->size;
p = mem;
for_each_sg(sgl, sg, sg_size, i) {
@@ -143,15 +146,15 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
}
}
- cmd_data_len = iser_task->data[cmd_dir].data_len;
+ cmd_data_len = data->data_len;
if (cmd_data_len > ISER_KMALLOC_THRESHOLD)
- free_pages((unsigned long)mem_copy->copy_buf,
+ free_pages((unsigned long)data_copy->copy_buf,
ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT);
else
- kfree(mem_copy->copy_buf);
+ kfree(data_copy->copy_buf);
- mem_copy->copy_buf = NULL;
+ data_copy->copy_buf = NULL;
}
#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0)
@@ -319,7 +322,7 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
struct ib_device *dev;
iser_task->dir[iser_dir] = 1;
- dev = iser_task->iser_conn->ib_conn->device->ib_device;
+ dev = iser_task->ib_conn->device->ib_device;
data->dma_nents = ib_dma_map_sg(dev, data->buf, data->size, dma_dir);
if (data->dma_nents == 0) {
@@ -329,31 +332,23 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
return 0;
}
-void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task)
+void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task,
+ struct iser_data_buf *data)
{
struct ib_device *dev;
- struct iser_data_buf *data;
- dev = iser_task->iser_conn->ib_conn->device->ib_device;
-
- if (iser_task->dir[ISER_DIR_IN]) {
- data = &iser_task->data[ISER_DIR_IN];
- ib_dma_unmap_sg(dev, data->buf, data->size, DMA_FROM_DEVICE);
- }
-
- if (iser_task->dir[ISER_DIR_OUT]) {
- data = &iser_task->data[ISER_DIR_OUT];
- ib_dma_unmap_sg(dev, data->buf, data->size, DMA_TO_DEVICE);
- }
+ dev = iser_task->ib_conn->device->ib_device;
+ ib_dma_unmap_sg(dev, data->buf, data->size, DMA_FROM_DEVICE);
}
static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
struct ib_device *ibdev,
+ struct iser_data_buf *mem,
+ struct iser_data_buf *mem_copy,
enum iser_data_dir cmd_dir,
int aligned_len)
{
- struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn;
- struct iser_data_buf *mem = &iser_task->data[cmd_dir];
+ struct iscsi_conn *iscsi_conn = iser_task->ib_conn->iscsi_conn;
iscsi_conn->fmr_unalign_cnt++;
iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n",
@@ -363,12 +358,12 @@ static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
iser_data_buf_dump(mem, ibdev);
/* unmap the command data before accessing it */
- iser_dma_unmap_task_data(iser_task);
+ iser_dma_unmap_task_data(iser_task, mem);
/* allocate copy buf, if we are writing, copy the */
/* unaligned scatterlist, dma map the copy */
- if (iser_start_rdma_unaligned_sg(iser_task, cmd_dir) != 0)
- return -ENOMEM;
+ if (iser_start_rdma_unaligned_sg(iser_task, mem, mem_copy, cmd_dir) != 0)
+ return -ENOMEM;
return 0;
}
@@ -382,7 +377,7 @@ static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir)
{
- struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn;
+ struct iser_conn *ib_conn = iser_task->ib_conn;
struct iser_device *device = ib_conn->device;
struct ib_device *ibdev = device->ib_device;
struct iser_data_buf *mem = &iser_task->data[cmd_dir];
@@ -396,7 +391,8 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
aligned_len = iser_data_buf_aligned_len(mem, ibdev);
if (aligned_len != mem->dma_nents) {
- err = fall_to_bounce_buf(iser_task, ibdev,
+ err = fall_to_bounce_buf(iser_task, ibdev, mem,
+ &iser_task->data_copy[cmd_dir],
cmd_dir, aligned_len);
if (err) {
iser_err("failed to allocate bounce buffer\n");
@@ -422,8 +418,8 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
(unsigned long)regd_buf->reg.va,
(unsigned long)regd_buf->reg.len);
} else { /* use FMR for multiple dma entries */
- iser_page_vec_build(mem, ib_conn->fastreg.fmr.page_vec, ibdev);
- err = iser_reg_page_vec(ib_conn, ib_conn->fastreg.fmr.page_vec,
+ iser_page_vec_build(mem, ib_conn->fmr.page_vec, ibdev);
+ err = iser_reg_page_vec(ib_conn, ib_conn->fmr.page_vec,
&regd_buf->reg);
if (err && err != -EAGAIN) {
iser_data_buf_dump(mem, ibdev);
@@ -431,12 +427,12 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
mem->dma_nents,
ntoh24(iser_task->desc.iscsi_header.dlength));
iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n",
- ib_conn->fastreg.fmr.page_vec->data_size,
- ib_conn->fastreg.fmr.page_vec->length,
- ib_conn->fastreg.fmr.page_vec->offset);
- for (i = 0; i < ib_conn->fastreg.fmr.page_vec->length; i++)
+ ib_conn->fmr.page_vec->data_size,
+ ib_conn->fmr.page_vec->length,
+ ib_conn->fmr.page_vec->offset);
+ for (i = 0; i < ib_conn->fmr.page_vec->length; i++)
iser_err("page_vec[%d] = 0x%llx\n", i,
- (unsigned long long) ib_conn->fastreg.fmr.page_vec->pages[i]);
+ (unsigned long long) ib_conn->fmr.page_vec->pages[i]);
}
if (err)
return err;
@@ -444,94 +440,280 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
return 0;
}
-static int iser_fast_reg_mr(struct fast_reg_descriptor *desc,
- struct iser_conn *ib_conn,
+static inline enum ib_t10_dif_type
+scsi2ib_prot_type(unsigned char prot_type)
+{
+ switch (prot_type) {
+ case SCSI_PROT_DIF_TYPE0:
+ return IB_T10DIF_NONE;
+ case SCSI_PROT_DIF_TYPE1:
+ return IB_T10DIF_TYPE1;
+ case SCSI_PROT_DIF_TYPE2:
+ return IB_T10DIF_TYPE2;
+ case SCSI_PROT_DIF_TYPE3:
+ return IB_T10DIF_TYPE3;
+ default:
+ return IB_T10DIF_NONE;
+ }
+}
+
+
+static int
+iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs)
+{
+ unsigned char scsi_ptype = scsi_get_prot_type(sc);
+
+ sig_attrs->mem.sig_type = IB_SIG_TYPE_T10_DIF;
+ sig_attrs->wire.sig_type = IB_SIG_TYPE_T10_DIF;
+ sig_attrs->mem.sig.dif.pi_interval = sc->device->sector_size;
+ sig_attrs->wire.sig.dif.pi_interval = sc->device->sector_size;
+
+ switch (scsi_get_prot_op(sc)) {
+ case SCSI_PROT_WRITE_INSERT:
+ case SCSI_PROT_READ_STRIP:
+ sig_attrs->mem.sig.dif.type = IB_T10DIF_NONE;
+ sig_attrs->wire.sig.dif.type = scsi2ib_prot_type(scsi_ptype);
+ sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
+ sig_attrs->wire.sig.dif.ref_tag = scsi_get_lba(sc) &
+ 0xffffffff;
+ break;
+ case SCSI_PROT_READ_INSERT:
+ case SCSI_PROT_WRITE_STRIP:
+ sig_attrs->mem.sig.dif.type = scsi2ib_prot_type(scsi_ptype);
+ sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC;
+ sig_attrs->mem.sig.dif.ref_tag = scsi_get_lba(sc) &
+ 0xffffffff;
+ sig_attrs->wire.sig.dif.type = IB_T10DIF_NONE;
+ break;
+ case SCSI_PROT_READ_PASS:
+ case SCSI_PROT_WRITE_PASS:
+ sig_attrs->mem.sig.dif.type = scsi2ib_prot_type(scsi_ptype);
+ sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC;
+ sig_attrs->mem.sig.dif.ref_tag = scsi_get_lba(sc) &
+ 0xffffffff;
+ sig_attrs->wire.sig.dif.type = scsi2ib_prot_type(scsi_ptype);
+ sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
+ sig_attrs->wire.sig.dif.ref_tag = scsi_get_lba(sc) &
+ 0xffffffff;
+ break;
+ default:
+ iser_err("Unsupported PI operation %d\n",
+ scsi_get_prot_op(sc));
+ return -EINVAL;
+ }
+ return 0;
+}
+
+
+static int
+iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
+{
+ switch (scsi_get_prot_type(sc)) {
+ case SCSI_PROT_DIF_TYPE0:
+ *mask = 0x0;
+ break;
+ case SCSI_PROT_DIF_TYPE1:
+ case SCSI_PROT_DIF_TYPE2:
+ *mask = ISER_CHECK_GUARD | ISER_CHECK_REFTAG;
+ break;
+ case SCSI_PROT_DIF_TYPE3:
+ *mask = ISER_CHECK_GUARD;
+ break;
+ default:
+ iser_err("Unsupported protection type %d\n",
+ scsi_get_prot_type(sc));
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
+ struct fast_reg_descriptor *desc, struct ib_sge *data_sge,
+ struct ib_sge *prot_sge, struct ib_sge *sig_sge)
+{
+ struct iser_conn *ib_conn = iser_task->ib_conn;
+ struct iser_pi_context *pi_ctx = desc->pi_ctx;
+ struct ib_send_wr sig_wr, inv_wr;
+ struct ib_send_wr *bad_wr, *wr = NULL;
+ struct ib_sig_attrs sig_attrs;
+ int ret;
+ u32 key;
+
+ memset(&sig_attrs, 0, sizeof(sig_attrs));
+ ret = iser_set_sig_attrs(iser_task->sc, &sig_attrs);
+ if (ret)
+ goto err;
+
+ ret = iser_set_prot_checks(iser_task->sc, &sig_attrs.check_mask);
+ if (ret)
+ goto err;
+
+ if (!(desc->reg_indicators & ISER_SIG_KEY_VALID)) {
+ memset(&inv_wr, 0, sizeof(inv_wr));
+ inv_wr.opcode = IB_WR_LOCAL_INV;
+ inv_wr.wr_id = ISER_FASTREG_LI_WRID;
+ inv_wr.ex.invalidate_rkey = pi_ctx->sig_mr->rkey;
+ wr = &inv_wr;
+ /* Bump the key */
+ key = (u8)(pi_ctx->sig_mr->rkey & 0x000000FF);
+ ib_update_fast_reg_key(pi_ctx->sig_mr, ++key);
+ }
+
+ memset(&sig_wr, 0, sizeof(sig_wr));
+ sig_wr.opcode = IB_WR_REG_SIG_MR;
+ sig_wr.wr_id = ISER_FASTREG_LI_WRID;
+ sig_wr.sg_list = data_sge;
+ sig_wr.num_sge = 1;
+ sig_wr.wr.sig_handover.sig_attrs = &sig_attrs;
+ sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr;
+ if (scsi_prot_sg_count(iser_task->sc))
+ sig_wr.wr.sig_handover.prot = prot_sge;
+ sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_READ |
+ IB_ACCESS_REMOTE_WRITE;
+
+ if (!wr)
+ wr = &sig_wr;
+ else
+ wr->next = &sig_wr;
+
+ ret = ib_post_send(ib_conn->qp, wr, &bad_wr);
+ if (ret) {
+ iser_err("reg_sig_mr failed, ret:%d\n", ret);
+ goto err;
+ }
+ desc->reg_indicators &= ~ISER_SIG_KEY_VALID;
+
+ sig_sge->lkey = pi_ctx->sig_mr->lkey;
+ sig_sge->addr = 0;
+ sig_sge->length = data_sge->length + prot_sge->length;
+ if (scsi_get_prot_op(iser_task->sc) == SCSI_PROT_WRITE_INSERT ||
+ scsi_get_prot_op(iser_task->sc) == SCSI_PROT_READ_STRIP) {
+ sig_sge->length += (data_sge->length /
+ iser_task->sc->device->sector_size) * 8;
+ }
+
+ iser_dbg("sig_sge: addr: 0x%llx length: %u lkey: 0x%x\n",
+ sig_sge->addr, sig_sge->length,
+ sig_sge->lkey);
+err:
+ return ret;
+}
+
+static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
struct iser_regd_buf *regd_buf,
- u32 offset, unsigned int data_size,
- unsigned int page_list_len)
+ struct iser_data_buf *mem,
+ enum iser_reg_indicator ind,
+ struct ib_sge *sge)
{
+ struct fast_reg_descriptor *desc = regd_buf->reg.mem_h;
+ struct iser_conn *ib_conn = iser_task->ib_conn;
+ struct iser_device *device = ib_conn->device;
+ struct ib_device *ibdev = device->ib_device;
+ struct ib_mr *mr;
+ struct ib_fast_reg_page_list *frpl;
struct ib_send_wr fastreg_wr, inv_wr;
struct ib_send_wr *bad_wr, *wr = NULL;
u8 key;
- int ret;
+ int ret, offset, size, plen;
+
+ /* if there a single dma entry, dma mr suffices */
+ if (mem->dma_nents == 1) {
+ struct scatterlist *sg = (struct scatterlist *)mem->buf;
- if (!desc->valid) {
+ sge->lkey = device->mr->lkey;
+ sge->addr = ib_sg_dma_address(ibdev, &sg[0]);
+ sge->length = ib_sg_dma_len(ibdev, &sg[0]);
+
+ iser_dbg("Single DMA entry: lkey=0x%x, addr=0x%llx, length=0x%x\n",
+ sge->lkey, sge->addr, sge->length);
+ return 0;
+ }
+
+ if (ind == ISER_DATA_KEY_VALID) {
+ mr = desc->data_mr;
+ frpl = desc->data_frpl;
+ } else {
+ mr = desc->pi_ctx->prot_mr;
+ frpl = desc->pi_ctx->prot_frpl;
+ }
+
+ plen = iser_sg_to_page_vec(mem, device->ib_device, frpl->page_list,
+ &offset, &size);
+ if (plen * SIZE_4K < size) {
+ iser_err("fast reg page_list too short to hold this SG\n");
+ return -EINVAL;
+ }
+
+ if (!(desc->reg_indicators & ind)) {
memset(&inv_wr, 0, sizeof(inv_wr));
+ inv_wr.wr_id = ISER_FASTREG_LI_WRID;
inv_wr.opcode = IB_WR_LOCAL_INV;
- inv_wr.send_flags = IB_SEND_SIGNALED;
- inv_wr.ex.invalidate_rkey = desc->data_mr->rkey;
+ inv_wr.ex.invalidate_rkey = mr->rkey;
wr = &inv_wr;
/* Bump the key */
- key = (u8)(desc->data_mr->rkey & 0x000000FF);
- ib_update_fast_reg_key(desc->data_mr, ++key);
+ key = (u8)(mr->rkey & 0x000000FF);
+ ib_update_fast_reg_key(mr, ++key);
}
/* Prepare FASTREG WR */
memset(&fastreg_wr, 0, sizeof(fastreg_wr));
+ fastreg_wr.wr_id = ISER_FASTREG_LI_WRID;
fastreg_wr.opcode = IB_WR_FAST_REG_MR;
- fastreg_wr.send_flags = IB_SEND_SIGNALED;
- fastreg_wr.wr.fast_reg.iova_start = desc->data_frpl->page_list[0] + offset;
- fastreg_wr.wr.fast_reg.page_list = desc->data_frpl;
- fastreg_wr.wr.fast_reg.page_list_len = page_list_len;
+ fastreg_wr.wr.fast_reg.iova_start = frpl->page_list[0] + offset;
+ fastreg_wr.wr.fast_reg.page_list = frpl;
+ fastreg_wr.wr.fast_reg.page_list_len = plen;
fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K;
- fastreg_wr.wr.fast_reg.length = data_size;
- fastreg_wr.wr.fast_reg.rkey = desc->data_mr->rkey;
+ fastreg_wr.wr.fast_reg.length = size;
+ fastreg_wr.wr.fast_reg.rkey = mr->rkey;
fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_WRITE |
IB_ACCESS_REMOTE_READ);
- if (!wr) {
+ if (!wr)
wr = &fastreg_wr;
- atomic_inc(&ib_conn->post_send_buf_count);
- } else {
+ else
wr->next = &fastreg_wr;
- atomic_add(2, &ib_conn->post_send_buf_count);
- }
ret = ib_post_send(ib_conn->qp, wr, &bad_wr);
if (ret) {
- if (bad_wr->next)
- atomic_sub(2, &ib_conn->post_send_buf_count);
- else
- atomic_dec(&ib_conn->post_send_buf_count);
iser_err("fast registration failed, ret:%d\n", ret);
return ret;
}
- desc->valid = false;
+ desc->reg_indicators &= ~ind;
- regd_buf->reg.mem_h = desc;
- regd_buf->reg.lkey = desc->data_mr->lkey;
- regd_buf->reg.rkey = desc->data_mr->rkey;
- regd_buf->reg.va = desc->data_frpl->page_list[0] + offset;
- regd_buf->reg.len = data_size;
- regd_buf->reg.is_mr = 1;
+ sge->lkey = mr->lkey;
+ sge->addr = frpl->page_list[0] + offset;
+ sge->length = size;
return ret;
}
/**
- * iser_reg_rdma_mem_frwr - Registers memory intended for RDMA,
+ * iser_reg_rdma_mem_fastreg - Registers memory intended for RDMA,
* using Fast Registration WR (if possible) obtaining rkey and va
*
* returns 0 on success, errno code on failure
*/
-int iser_reg_rdma_mem_frwr(struct iscsi_iser_task *iser_task,
- enum iser_data_dir cmd_dir)
+int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir)
{
- struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn;
+ struct iser_conn *ib_conn = iser_task->ib_conn;
struct iser_device *device = ib_conn->device;
struct ib_device *ibdev = device->ib_device;
struct iser_data_buf *mem = &iser_task->data[cmd_dir];
struct iser_regd_buf *regd_buf = &iser_task->rdma_regd[cmd_dir];
- struct fast_reg_descriptor *desc;
- unsigned int data_size, page_list_len;
+ struct fast_reg_descriptor *desc = NULL;
+ struct ib_sge data_sge;
int err, aligned_len;
unsigned long flags;
- u32 offset;
aligned_len = iser_data_buf_aligned_len(mem, ibdev);
if (aligned_len != mem->dma_nents) {
- err = fall_to_bounce_buf(iser_task, ibdev,
+ err = fall_to_bounce_buf(iser_task, ibdev, mem,
+ &iser_task->data_copy[cmd_dir],
cmd_dir, aligned_len);
if (err) {
iser_err("failed to allocate bounce buffer\n");
@@ -540,41 +722,79 @@ int iser_reg_rdma_mem_frwr(struct iscsi_iser_task *iser_task,
mem = &iser_task->data_copy[cmd_dir];
}
- /* if there a single dma entry, dma mr suffices */
- if (mem->dma_nents == 1) {
- struct scatterlist *sg = (struct scatterlist *)mem->buf;
-
- regd_buf->reg.lkey = device->mr->lkey;
- regd_buf->reg.rkey = device->mr->rkey;
- regd_buf->reg.len = ib_sg_dma_len(ibdev, &sg[0]);
- regd_buf->reg.va = ib_sg_dma_address(ibdev, &sg[0]);
- regd_buf->reg.is_mr = 0;
- } else {
+ if (mem->dma_nents != 1 ||
+ scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {
spin_lock_irqsave(&ib_conn->lock, flags);
- desc = list_first_entry(&ib_conn->fastreg.frwr.pool,
+ desc = list_first_entry(&ib_conn->fastreg.pool,
struct fast_reg_descriptor, list);
list_del(&desc->list);
spin_unlock_irqrestore(&ib_conn->lock, flags);
- page_list_len = iser_sg_to_page_vec(mem, device->ib_device,
- desc->data_frpl->page_list,
- &offset, &data_size);
-
- if (page_list_len * SIZE_4K < data_size) {
- iser_err("fast reg page_list too short to hold this SG\n");
- err = -EINVAL;
- goto err_reg;
+ regd_buf->reg.mem_h = desc;
+ }
+
+ err = iser_fast_reg_mr(iser_task, regd_buf, mem,
+ ISER_DATA_KEY_VALID, &data_sge);
+ if (err)
+ goto err_reg;
+
+ if (scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {
+ struct ib_sge prot_sge, sig_sge;
+
+ memset(&prot_sge, 0, sizeof(prot_sge));
+ if (scsi_prot_sg_count(iser_task->sc)) {
+ mem = &iser_task->prot[cmd_dir];
+ aligned_len = iser_data_buf_aligned_len(mem, ibdev);
+ if (aligned_len != mem->dma_nents) {
+ err = fall_to_bounce_buf(iser_task, ibdev, mem,
+ &iser_task->prot_copy[cmd_dir],
+ cmd_dir, aligned_len);
+ if (err) {
+ iser_err("failed to allocate bounce buffer\n");
+ return err;
+ }
+ mem = &iser_task->prot_copy[cmd_dir];
+ }
+
+ err = iser_fast_reg_mr(iser_task, regd_buf, mem,
+ ISER_PROT_KEY_VALID, &prot_sge);
+ if (err)
+ goto err_reg;
}
- err = iser_fast_reg_mr(desc, ib_conn, regd_buf,
- offset, data_size, page_list_len);
- if (err)
- goto err_reg;
+ err = iser_reg_sig_mr(iser_task, desc, &data_sge,
+ &prot_sge, &sig_sge);
+ if (err) {
+ iser_err("Failed to register signature mr\n");
+ return err;
+ }
+ desc->reg_indicators |= ISER_FASTREG_PROTECTED;
+
+ regd_buf->reg.lkey = sig_sge.lkey;
+ regd_buf->reg.rkey = desc->pi_ctx->sig_mr->rkey;
+ regd_buf->reg.va = sig_sge.addr;
+ regd_buf->reg.len = sig_sge.length;
+ regd_buf->reg.is_mr = 1;
+ } else {
+ if (desc) {
+ regd_buf->reg.rkey = desc->data_mr->rkey;
+ regd_buf->reg.is_mr = 1;
+ } else {
+ regd_buf->reg.rkey = device->mr->rkey;
+ regd_buf->reg.is_mr = 0;
+ }
+
+ regd_buf->reg.lkey = data_sge.lkey;
+ regd_buf->reg.va = data_sge.addr;
+ regd_buf->reg.len = data_sge.length;
}
return 0;
err_reg:
- spin_lock_irqsave(&ib_conn->lock, flags);
- list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool);
- spin_unlock_irqrestore(&ib_conn->lock, flags);
+ if (desc) {
+ spin_lock_irqsave(&ib_conn->lock, flags);
+ list_add_tail(&desc->list, &ib_conn->fastreg.pool);
+ spin_unlock_irqrestore(&ib_conn->lock, flags);
+ }
+
return err;
}
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index afe95674008..ea01075f9f9 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -1,7 +1,7 @@
/*
* Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
* Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
- * Copyright (c) 2013 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -71,17 +71,14 @@ static void iser_event_handler(struct ib_event_handler *handler,
*/
static int iser_create_device_ib_res(struct iser_device *device)
{
- int i, j;
struct iser_cq_desc *cq_desc;
- struct ib_device_attr *dev_attr;
+ struct ib_device_attr *dev_attr = &device->dev_attr;
+ int ret, i, j;
- dev_attr = kmalloc(sizeof(*dev_attr), GFP_KERNEL);
- if (!dev_attr)
- return -ENOMEM;
-
- if (ib_query_device(device->ib_device, dev_attr)) {
+ ret = ib_query_device(device->ib_device, dev_attr);
+ if (ret) {
pr_warn("Query device failed for %s\n", device->ib_device->name);
- goto dev_attr_err;
+ return ret;
}
/* Assign function handles - based on FMR support */
@@ -94,14 +91,14 @@ static int iser_create_device_ib_res(struct iser_device *device)
device->iser_unreg_rdma_mem = iser_unreg_mem_fmr;
} else
if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
- iser_info("FRWR supported, using FRWR for registration\n");
- device->iser_alloc_rdma_reg_res = iser_create_frwr_pool;
- device->iser_free_rdma_reg_res = iser_free_frwr_pool;
- device->iser_reg_rdma_mem = iser_reg_rdma_mem_frwr;
- device->iser_unreg_rdma_mem = iser_unreg_mem_frwr;
+ iser_info("FastReg supported, using FastReg for registration\n");
+ device->iser_alloc_rdma_reg_res = iser_create_fastreg_pool;
+ device->iser_free_rdma_reg_res = iser_free_fastreg_pool;
+ device->iser_reg_rdma_mem = iser_reg_rdma_mem_fastreg;
+ device->iser_unreg_rdma_mem = iser_unreg_mem_fastreg;
} else {
- iser_err("IB device does not support FMRs nor FRWRs, can't register memory\n");
- goto dev_attr_err;
+ iser_err("IB device does not support FMRs nor FastRegs, can't register memory\n");
+ return -1;
}
device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors);
@@ -158,7 +155,6 @@ static int iser_create_device_ib_res(struct iser_device *device)
if (ib_register_event_handler(&device->event_handler))
goto handler_err;
- kfree(dev_attr);
return 0;
handler_err:
@@ -178,8 +174,6 @@ pd_err:
kfree(device->cq_desc);
cq_desc_err:
iser_err("failed to allocate an IB resource\n");
-dev_attr_err:
- kfree(dev_attr);
return -1;
}
@@ -221,13 +215,13 @@ int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max)
struct ib_fmr_pool_param params;
int ret = -ENOMEM;
- ib_conn->fastreg.fmr.page_vec = kmalloc(sizeof(struct iser_page_vec) +
- (sizeof(u64)*(ISCSI_ISER_SG_TABLESIZE + 1)),
- GFP_KERNEL);
- if (!ib_conn->fastreg.fmr.page_vec)
+ ib_conn->fmr.page_vec = kmalloc(sizeof(*ib_conn->fmr.page_vec) +
+ (sizeof(u64)*(ISCSI_ISER_SG_TABLESIZE + 1)),
+ GFP_KERNEL);
+ if (!ib_conn->fmr.page_vec)
return ret;
- ib_conn->fastreg.fmr.page_vec->pages = (u64 *)(ib_conn->fastreg.fmr.page_vec + 1);
+ ib_conn->fmr.page_vec->pages = (u64 *)(ib_conn->fmr.page_vec + 1);
params.page_shift = SHIFT_4K;
/* when the first/last SG element are not start/end *
@@ -243,16 +237,16 @@ int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max)
IB_ACCESS_REMOTE_WRITE |
IB_ACCESS_REMOTE_READ);
- ib_conn->fastreg.fmr.pool = ib_create_fmr_pool(device->pd, &params);
- if (!IS_ERR(ib_conn->fastreg.fmr.pool))
+ ib_conn->fmr.pool = ib_create_fmr_pool(device->pd, &params);
+ if (!IS_ERR(ib_conn->fmr.pool))
return 0;
/* no FMR => no need for page_vec */
- kfree(ib_conn->fastreg.fmr.page_vec);
- ib_conn->fastreg.fmr.page_vec = NULL;
+ kfree(ib_conn->fmr.page_vec);
+ ib_conn->fmr.page_vec = NULL;
- ret = PTR_ERR(ib_conn->fastreg.fmr.pool);
- ib_conn->fastreg.fmr.pool = NULL;
+ ret = PTR_ERR(ib_conn->fmr.pool);
+ ib_conn->fmr.pool = NULL;
if (ret != -ENOSYS) {
iser_err("FMR allocation failed, err %d\n", ret);
return ret;
@@ -268,93 +262,173 @@ int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max)
void iser_free_fmr_pool(struct iser_conn *ib_conn)
{
iser_info("freeing conn %p fmr pool %p\n",
- ib_conn, ib_conn->fastreg.fmr.pool);
+ ib_conn, ib_conn->fmr.pool);
+
+ if (ib_conn->fmr.pool != NULL)
+ ib_destroy_fmr_pool(ib_conn->fmr.pool);
+
+ ib_conn->fmr.pool = NULL;
+
+ kfree(ib_conn->fmr.page_vec);
+ ib_conn->fmr.page_vec = NULL;
+}
+
+static int
+iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd,
+ bool pi_enable, struct fast_reg_descriptor *desc)
+{
+ int ret;
+
+ desc->data_frpl = ib_alloc_fast_reg_page_list(ib_device,
+ ISCSI_ISER_SG_TABLESIZE + 1);
+ if (IS_ERR(desc->data_frpl)) {
+ ret = PTR_ERR(desc->data_frpl);
+ iser_err("Failed to allocate ib_fast_reg_page_list err=%d\n",
+ ret);
+ return PTR_ERR(desc->data_frpl);
+ }
- if (ib_conn->fastreg.fmr.pool != NULL)
- ib_destroy_fmr_pool(ib_conn->fastreg.fmr.pool);
+ desc->data_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE + 1);
+ if (IS_ERR(desc->data_mr)) {
+ ret = PTR_ERR(desc->data_mr);
+ iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret);
+ goto fast_reg_mr_failure;
+ }
+ desc->reg_indicators |= ISER_DATA_KEY_VALID;
+
+ if (pi_enable) {
+ struct ib_mr_init_attr mr_init_attr = {0};
+ struct iser_pi_context *pi_ctx = NULL;
+
+ desc->pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL);
+ if (!desc->pi_ctx) {
+ iser_err("Failed to allocate pi context\n");
+ ret = -ENOMEM;
+ goto pi_ctx_alloc_failure;
+ }
+ pi_ctx = desc->pi_ctx;
+
+ pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(ib_device,
+ ISCSI_ISER_SG_TABLESIZE);
+ if (IS_ERR(pi_ctx->prot_frpl)) {
+ ret = PTR_ERR(pi_ctx->prot_frpl);
+ iser_err("Failed to allocate prot frpl ret=%d\n",
+ ret);
+ goto prot_frpl_failure;
+ }
- ib_conn->fastreg.fmr.pool = NULL;
+ pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd,
+ ISCSI_ISER_SG_TABLESIZE + 1);
+ if (IS_ERR(pi_ctx->prot_mr)) {
+ ret = PTR_ERR(pi_ctx->prot_mr);
+ iser_err("Failed to allocate prot frmr ret=%d\n",
+ ret);
+ goto prot_mr_failure;
+ }
+ desc->reg_indicators |= ISER_PROT_KEY_VALID;
+
+ mr_init_attr.max_reg_descriptors = 2;
+ mr_init_attr.flags |= IB_MR_SIGNATURE_EN;
+ pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr);
+ if (IS_ERR(pi_ctx->sig_mr)) {
+ ret = PTR_ERR(pi_ctx->sig_mr);
+ iser_err("Failed to allocate signature enabled mr err=%d\n",
+ ret);
+ goto sig_mr_failure;
+ }
+ desc->reg_indicators |= ISER_SIG_KEY_VALID;
+ }
+ desc->reg_indicators &= ~ISER_FASTREG_PROTECTED;
+
+ iser_dbg("Create fr_desc %p page_list %p\n",
+ desc, desc->data_frpl->page_list);
+
+ return 0;
+sig_mr_failure:
+ ib_dereg_mr(desc->pi_ctx->prot_mr);
+prot_mr_failure:
+ ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl);
+prot_frpl_failure:
+ kfree(desc->pi_ctx);
+pi_ctx_alloc_failure:
+ ib_dereg_mr(desc->data_mr);
+fast_reg_mr_failure:
+ ib_free_fast_reg_page_list(desc->data_frpl);
- kfree(ib_conn->fastreg.fmr.page_vec);
- ib_conn->fastreg.fmr.page_vec = NULL;
+ return ret;
}
/**
- * iser_create_frwr_pool - Creates pool of fast_reg descriptors
+ * iser_create_fastreg_pool - Creates pool of fast_reg descriptors
* for fast registration work requests.
* returns 0 on success, or errno code on failure
*/
-int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max)
+int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max)
{
struct iser_device *device = ib_conn->device;
struct fast_reg_descriptor *desc;
int i, ret;
- INIT_LIST_HEAD(&ib_conn->fastreg.frwr.pool);
- ib_conn->fastreg.frwr.pool_size = 0;
+ INIT_LIST_HEAD(&ib_conn->fastreg.pool);
+ ib_conn->fastreg.pool_size = 0;
for (i = 0; i < cmds_max; i++) {
- desc = kmalloc(sizeof(*desc), GFP_KERNEL);
+ desc = kzalloc(sizeof(*desc), GFP_KERNEL);
if (!desc) {
iser_err("Failed to allocate a new fast_reg descriptor\n");
ret = -ENOMEM;
goto err;
}
- desc->data_frpl = ib_alloc_fast_reg_page_list(device->ib_device,
- ISCSI_ISER_SG_TABLESIZE + 1);
- if (IS_ERR(desc->data_frpl)) {
- ret = PTR_ERR(desc->data_frpl);
- iser_err("Failed to allocate ib_fast_reg_page_list err=%d\n", ret);
- goto fast_reg_page_failure;
+ ret = iser_create_fastreg_desc(device->ib_device, device->pd,
+ ib_conn->pi_support, desc);
+ if (ret) {
+ iser_err("Failed to create fastreg descriptor err=%d\n",
+ ret);
+ kfree(desc);
+ goto err;
}
- desc->data_mr = ib_alloc_fast_reg_mr(device->pd,
- ISCSI_ISER_SG_TABLESIZE + 1);
- if (IS_ERR(desc->data_mr)) {
- ret = PTR_ERR(desc->data_mr);
- iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret);
- goto fast_reg_mr_failure;
- }
- desc->valid = true;
- list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool);
- ib_conn->fastreg.frwr.pool_size++;
+ list_add_tail(&desc->list, &ib_conn->fastreg.pool);
+ ib_conn->fastreg.pool_size++;
}
return 0;
-fast_reg_mr_failure:
- ib_free_fast_reg_page_list(desc->data_frpl);
-fast_reg_page_failure:
- kfree(desc);
err:
- iser_free_frwr_pool(ib_conn);
+ iser_free_fastreg_pool(ib_conn);
return ret;
}
/**
- * iser_free_frwr_pool - releases the pool of fast_reg descriptors
+ * iser_free_fastreg_pool - releases the pool of fast_reg descriptors
*/
-void iser_free_frwr_pool(struct iser_conn *ib_conn)
+void iser_free_fastreg_pool(struct iser_conn *ib_conn)
{
struct fast_reg_descriptor *desc, *tmp;
int i = 0;
- if (list_empty(&ib_conn->fastreg.frwr.pool))
+ if (list_empty(&ib_conn->fastreg.pool))
return;
- iser_info("freeing conn %p frwr pool\n", ib_conn);
+ iser_info("freeing conn %p fr pool\n", ib_conn);
- list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.frwr.pool, list) {
+ list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.pool, list) {
list_del(&desc->list);
ib_free_fast_reg_page_list(desc->data_frpl);
ib_dereg_mr(desc->data_mr);
+ if (desc->pi_ctx) {
+ ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl);
+ ib_dereg_mr(desc->pi_ctx->prot_mr);
+ ib_destroy_mr(desc->pi_ctx->sig_mr);
+ kfree(desc->pi_ctx);
+ }
kfree(desc);
++i;
}
- if (i < ib_conn->fastreg.frwr.pool_size)
+ if (i < ib_conn->fastreg.pool_size)
iser_warn("pool still has %d regions registered\n",
- ib_conn->fastreg.frwr.pool_size - i);
+ ib_conn->fastreg.pool_size - i);
}
/**
@@ -389,12 +463,17 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
init_attr.qp_context = (void *)ib_conn;
init_attr.send_cq = device->tx_cq[min_index];
init_attr.recv_cq = device->rx_cq[min_index];
- init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS;
init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS;
init_attr.cap.max_send_sge = 2;
init_attr.cap.max_recv_sge = 1;
init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
init_attr.qp_type = IB_QPT_RC;
+ if (ib_conn->pi_support) {
+ init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS;
+ init_attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN;
+ } else {
+ init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS;
+ }
ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr);
if (ret)
@@ -502,14 +581,30 @@ static int iser_conn_state_comp_exch(struct iser_conn *ib_conn,
return ret;
}
+void iser_release_work(struct work_struct *work)
+{
+ struct iser_conn *ib_conn;
+
+ ib_conn = container_of(work, struct iser_conn, release_work);
+
+ /* wait for .conn_stop callback */
+ wait_for_completion(&ib_conn->stop_completion);
+
+ /* wait for the qp`s post send and post receive buffers to empty */
+ wait_event_interruptible(ib_conn->wait,
+ ib_conn->state == ISER_CONN_DOWN);
+
+ iser_conn_release(ib_conn);
+}
+
/**
* Frees all conn objects and deallocs conn descriptor
*/
-static void iser_conn_release(struct iser_conn *ib_conn, int can_destroy_id)
+void iser_conn_release(struct iser_conn *ib_conn)
{
struct iser_device *device = ib_conn->device;
- BUG_ON(ib_conn->state != ISER_CONN_DOWN);
+ BUG_ON(ib_conn->state == ISER_CONN_UP);
mutex_lock(&ig.connlist_mutex);
list_del(&ib_conn->conn_list);
@@ -521,27 +616,13 @@ static void iser_conn_release(struct iser_conn *ib_conn, int can_destroy_id)
if (device != NULL)
iser_device_try_release(device);
/* if cma handler context, the caller actually destroy the id */
- if (ib_conn->cma_id != NULL && can_destroy_id) {
+ if (ib_conn->cma_id != NULL) {
rdma_destroy_id(ib_conn->cma_id);
ib_conn->cma_id = NULL;
}
iscsi_destroy_endpoint(ib_conn->ep);
}
-void iser_conn_get(struct iser_conn *ib_conn)
-{
- atomic_inc(&ib_conn->refcount);
-}
-
-int iser_conn_put(struct iser_conn *ib_conn, int can_destroy_id)
-{
- if (atomic_dec_and_test(&ib_conn->refcount)) {
- iser_conn_release(ib_conn, can_destroy_id);
- return 1;
- }
- return 0;
-}
-
/**
* triggers start of the disconnect procedures and wait for them to be done
*/
@@ -559,24 +640,19 @@ void iser_conn_terminate(struct iser_conn *ib_conn)
if (err)
iser_err("Failed to disconnect, conn: 0x%p err %d\n",
ib_conn,err);
-
- wait_event_interruptible(ib_conn->wait,
- ib_conn->state == ISER_CONN_DOWN);
-
- iser_conn_put(ib_conn, 1); /* deref ib conn deallocate */
}
-static int iser_connect_error(struct rdma_cm_id *cma_id)
+static void iser_connect_error(struct rdma_cm_id *cma_id)
{
struct iser_conn *ib_conn;
+
ib_conn = (struct iser_conn *)cma_id->context;
ib_conn->state = ISER_CONN_DOWN;
wake_up_interruptible(&ib_conn->wait);
- return iser_conn_put(ib_conn, 0); /* deref ib conn's cma id */
}
-static int iser_addr_handler(struct rdma_cm_id *cma_id)
+static void iser_addr_handler(struct rdma_cm_id *cma_id)
{
struct iser_device *device;
struct iser_conn *ib_conn;
@@ -585,22 +661,35 @@ static int iser_addr_handler(struct rdma_cm_id *cma_id)
device = iser_device_find_by_ib_device(cma_id);
if (!device) {
iser_err("device lookup/creation failed\n");
- return iser_connect_error(cma_id);
+ iser_connect_error(cma_id);
+ return;
}
ib_conn = (struct iser_conn *)cma_id->context;
ib_conn->device = device;
+ /* connection T10-PI support */
+ if (iser_pi_enable) {
+ if (!(device->dev_attr.device_cap_flags &
+ IB_DEVICE_SIGNATURE_HANDOVER)) {
+ iser_warn("T10-PI requested but not supported on %s, "
+ "continue without T10-PI\n",
+ ib_conn->device->ib_device->name);
+ ib_conn->pi_support = false;
+ } else {
+ ib_conn->pi_support = true;
+ }
+ }
+
ret = rdma_resolve_route(cma_id, 1000);
if (ret) {
iser_err("resolve route failed: %d\n", ret);
- return iser_connect_error(cma_id);
+ iser_connect_error(cma_id);
+ return;
}
-
- return 0;
}
-static int iser_route_handler(struct rdma_cm_id *cma_id)
+static void iser_route_handler(struct rdma_cm_id *cma_id)
{
struct rdma_conn_param conn_param;
int ret;
@@ -628,33 +717,40 @@ static int iser_route_handler(struct rdma_cm_id *cma_id)
goto failure;
}
- return 0;
+ return;
failure:
- return iser_connect_error(cma_id);
+ iser_connect_error(cma_id);
}
static void iser_connected_handler(struct rdma_cm_id *cma_id)
{
struct iser_conn *ib_conn;
+ struct ib_qp_attr attr;
+ struct ib_qp_init_attr init_attr;
+
+ (void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr);
+ iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num);
ib_conn = (struct iser_conn *)cma_id->context;
- ib_conn->state = ISER_CONN_UP;
- wake_up_interruptible(&ib_conn->wait);
+ if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_PENDING, ISER_CONN_UP))
+ wake_up_interruptible(&ib_conn->wait);
}
-static int iser_disconnected_handler(struct rdma_cm_id *cma_id)
+static void iser_disconnected_handler(struct rdma_cm_id *cma_id)
{
struct iser_conn *ib_conn;
- int ret;
ib_conn = (struct iser_conn *)cma_id->context;
/* getting here when the state is UP means that the conn is being *
* terminated asynchronously from the iSCSI layer's perspective. */
if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP,
- ISER_CONN_TERMINATING))
- iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn,
- ISCSI_ERR_CONN_FAILED);
+ ISER_CONN_TERMINATING)){
+ if (ib_conn->iscsi_conn)
+ iscsi_conn_failure(ib_conn->iscsi_conn, ISCSI_ERR_CONN_FAILED);
+ else
+ iser_err("iscsi_iser connection isn't bound\n");
+ }
/* Complete the termination process if no posts are pending */
if (ib_conn->post_recv_buf_count == 0 &&
@@ -662,24 +758,19 @@ static int iser_disconnected_handler(struct rdma_cm_id *cma_id)
ib_conn->state = ISER_CONN_DOWN;
wake_up_interruptible(&ib_conn->wait);
}
-
- ret = iser_conn_put(ib_conn, 0); /* deref ib conn's cma id */
- return ret;
}
static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
{
- int ret = 0;
-
iser_info("event %d status %d conn %p id %p\n",
event->event, event->status, cma_id->context, cma_id);
switch (event->event) {
case RDMA_CM_EVENT_ADDR_RESOLVED:
- ret = iser_addr_handler(cma_id);
+ iser_addr_handler(cma_id);
break;
case RDMA_CM_EVENT_ROUTE_RESOLVED:
- ret = iser_route_handler(cma_id);
+ iser_route_handler(cma_id);
break;
case RDMA_CM_EVENT_ESTABLISHED:
iser_connected_handler(cma_id);
@@ -689,18 +780,18 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve
case RDMA_CM_EVENT_CONNECT_ERROR:
case RDMA_CM_EVENT_UNREACHABLE:
case RDMA_CM_EVENT_REJECTED:
- ret = iser_connect_error(cma_id);
+ iser_connect_error(cma_id);
break;
case RDMA_CM_EVENT_DISCONNECTED:
case RDMA_CM_EVENT_DEVICE_REMOVAL:
case RDMA_CM_EVENT_ADDR_CHANGE:
- ret = iser_disconnected_handler(cma_id);
+ iser_disconnected_handler(cma_id);
break;
default:
iser_err("Unexpected RDMA CM event (%d)\n", event->event);
break;
}
- return ret;
+ return 0;
}
void iser_conn_init(struct iser_conn *ib_conn)
@@ -709,7 +800,7 @@ void iser_conn_init(struct iser_conn *ib_conn)
init_waitqueue_head(&ib_conn->wait);
ib_conn->post_recv_buf_count = 0;
atomic_set(&ib_conn->post_send_buf_count, 0);
- atomic_set(&ib_conn->refcount, 1); /* ref ib conn allocation */
+ init_completion(&ib_conn->stop_completion);
INIT_LIST_HEAD(&ib_conn->conn_list);
spin_lock_init(&ib_conn->lock);
}
@@ -737,7 +828,6 @@ int iser_connect(struct iser_conn *ib_conn,
ib_conn->state = ISER_CONN_PENDING;
- iser_conn_get(ib_conn); /* ref ib conn's cma id */
ib_conn->cma_id = rdma_create_id(iser_cma_handler,
(void *)ib_conn,
RDMA_PS_TCP, IB_QPT_RC);
@@ -774,9 +864,8 @@ id_failure:
ib_conn->cma_id = NULL;
addr_failure:
ib_conn->state = ISER_CONN_DOWN;
- iser_conn_put(ib_conn, 1); /* deref ib conn's cma id */
connect_failure:
- iser_conn_put(ib_conn, 1); /* deref ib conn deallocate */
+ iser_conn_release(ib_conn);
return err;
}
@@ -797,7 +886,7 @@ int iser_reg_page_vec(struct iser_conn *ib_conn,
page_list = page_vec->pages;
io_addr = page_list[0];
- mem = ib_fmr_pool_map_phys(ib_conn->fastreg.fmr.pool,
+ mem = ib_fmr_pool_map_phys(ib_conn->fmr.pool,
page_list,
page_vec->length,
io_addr);
@@ -851,11 +940,11 @@ void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
reg->mem_h = NULL;
}
-void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task,
- enum iser_data_dir cmd_dir)
+void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir)
{
struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
- struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn;
+ struct iser_conn *ib_conn = iser_task->ib_conn;
struct fast_reg_descriptor *desc = reg->mem_h;
if (!reg->is_mr)
@@ -864,7 +953,7 @@ void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task,
reg->mem_h = NULL;
reg->is_mr = 0;
spin_lock_bh(&ib_conn->lock);
- list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool);
+ list_add_tail(&desc->list, &ib_conn->fastreg.pool);
spin_unlock_bh(&ib_conn->lock);
}
@@ -965,7 +1054,7 @@ static void iser_handle_comp_error(struct iser_tx_desc *desc,
* perspective. */
if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP,
ISER_CONN_TERMINATING))
- iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn,
+ iscsi_conn_failure(ib_conn->iscsi_conn,
ISCSI_ERR_CONN_FAILED);
/* no more non completed posts to the QP, complete the
@@ -989,18 +1078,16 @@ static int iser_drain_tx_cq(struct iser_device *device, int cq_index)
if (wc.status == IB_WC_SUCCESS) {
if (wc.opcode == IB_WC_SEND)
iser_snd_completion(tx_desc, ib_conn);
- else if (wc.opcode == IB_WC_LOCAL_INV ||
- wc.opcode == IB_WC_FAST_REG_MR) {
- atomic_dec(&ib_conn->post_send_buf_count);
- continue;
- } else
+ else
iser_err("expected opcode %d got %d\n",
IB_WC_SEND, wc.opcode);
} else {
iser_err("tx id %llx status %d vend_err %x\n",
- wc.wr_id, wc.status, wc.vendor_err);
- atomic_dec(&ib_conn->post_send_buf_count);
- iser_handle_comp_error(tx_desc, ib_conn);
+ wc.wr_id, wc.status, wc.vendor_err);
+ if (wc.wr_id != ISER_FASTREG_LI_WRID) {
+ atomic_dec(&ib_conn->post_send_buf_count);
+ iser_handle_comp_error(tx_desc, ib_conn);
+ }
}
completed_tx++;
}
@@ -1018,8 +1105,12 @@ static void iser_cq_tasklet_fn(unsigned long data)
struct iser_rx_desc *desc;
unsigned long xfer_len;
struct iser_conn *ib_conn;
- int completed_tx, completed_rx;
- completed_tx = completed_rx = 0;
+ int completed_tx, completed_rx = 0;
+
+ /* First do tx drain, so in a case where we have rx flushes and a successful
+ * tx completion we will still go through completion error handling.
+ */
+ completed_tx = iser_drain_tx_cq(device, cq_index);
while (ib_poll_cq(cq, 1, &wc) == 1) {
desc = (struct iser_rx_desc *) (unsigned long) wc.wr_id;
@@ -1047,7 +1138,6 @@ static void iser_cq_tasklet_fn(unsigned long data)
* " would not cause interrupts to be missed" */
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
- completed_tx += iser_drain_tx_cq(device, cq_index);
iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx);
}
@@ -1059,3 +1149,51 @@ static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
tasklet_schedule(&device->cq_tasklet[cq_index]);
}
+
+u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir, sector_t *sector)
+{
+ struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
+ struct fast_reg_descriptor *desc = reg->mem_h;
+ unsigned long sector_size = iser_task->sc->device->sector_size;
+ struct ib_mr_status mr_status;
+ int ret;
+
+ if (desc && desc->reg_indicators & ISER_FASTREG_PROTECTED) {
+ desc->reg_indicators &= ~ISER_FASTREG_PROTECTED;
+ ret = ib_check_mr_status(desc->pi_ctx->sig_mr,
+ IB_MR_CHECK_SIG_STATUS, &mr_status);
+ if (ret) {
+ pr_err("ib_check_mr_status failed, ret %d\n", ret);
+ goto err;
+ }
+
+ if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) {
+ sector_t sector_off = mr_status.sig_err.sig_err_offset;
+
+ do_div(sector_off, sector_size + 8);
+ *sector = scsi_get_lba(iser_task->sc) + sector_off;
+
+ pr_err("PI error found type %d at sector %llx "
+ "expected %x vs actual %x\n",
+ mr_status.sig_err.err_type,
+ (unsigned long long)*sector,
+ mr_status.sig_err.expected,
+ mr_status.sig_err.actual);
+
+ switch (mr_status.sig_err.err_type) {
+ case IB_SIG_BAD_GUARD:
+ return 0x1;
+ case IB_SIG_BAD_REFTAG:
+ return 0x3;
+ case IB_SIG_BAD_APPTAG:
+ return 0x2;
+ }
+ }
+ }
+
+ return 0;
+err:
+ /* Not alot we can do here, return ambiguous guard error */
+ return 0x1;
+}
diff --git a/drivers/infiniband/ulp/isert/Kconfig b/drivers/infiniband/ulp/isert/Kconfig
index ce3fd32167d..02f9759ebb1 100644
--- a/drivers/infiniband/ulp/isert/Kconfig
+++ b/drivers/infiniband/ulp/isert/Kconfig
@@ -1,5 +1,5 @@
config INFINIBAND_ISERT
- tristate "iSCSI Extentions for RDMA (iSER) target support"
+ tristate "iSCSI Extensions for RDMA (iSER) target support"
depends on INET && INFINIBAND_ADDR_TRANS && TARGET_CORE && ISCSI_TARGET
---help---
- Support for iSCSI Extentions for RDMA (iSER) Target on Infiniband fabrics.
+ Support for iSCSI Extensions for RDMA (iSER) Target on Infiniband fabrics.
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 3591855cc5b..d4c7928a0f3 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -22,11 +22,13 @@
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/in6.h>
+#include <linux/llist.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
#include <target/target_core_base.h>
#include <target/target_core_fabric.h>
#include <target/iscsi/iscsi_transport.h>
+#include <linux/semaphore.h>
#include "isert_proto.h"
#include "ib_isert.h"
@@ -46,10 +48,12 @@ static int
isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
struct isert_rdma_wr *wr);
static void
-isert_unreg_rdma_frwr(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn);
+isert_unreg_rdma(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn);
static int
-isert_reg_rdma_frwr(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
- struct isert_rdma_wr *wr);
+isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
+ struct isert_rdma_wr *wr);
+static int
+isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd);
static void
isert_qp_event_callback(struct ib_event *e, void *context)
@@ -86,7 +90,8 @@ isert_query_device(struct ib_device *ib_dev, struct ib_device_attr *devattr)
}
static int
-isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id)
+isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id,
+ u8 protection)
{
struct isert_device *device = isert_conn->conn_device;
struct ib_qp_init_attr attr;
@@ -118,6 +123,8 @@ isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id)
attr.cap.max_recv_sge = 1;
attr.sq_sig_type = IB_SIGNAL_REQ_WR;
attr.qp_type = IB_QPT_RC;
+ if (protection)
+ attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN;
pr_debug("isert_conn_setup_qp cma_id->device: %p\n",
cma_id->device);
@@ -206,7 +213,9 @@ isert_free_rx_descriptors(struct isert_conn *isert_conn)
isert_conn->conn_rx_descs = NULL;
}
+static void isert_cq_tx_work(struct work_struct *);
static void isert_cq_tx_callback(struct ib_cq *, void *);
+static void isert_cq_rx_work(struct work_struct *);
static void isert_cq_rx_callback(struct ib_cq *, void *);
static int
@@ -223,22 +232,29 @@ isert_create_device_ib_res(struct isert_device *device)
return ret;
/* asign function handlers */
- if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
- device->use_frwr = 1;
- device->reg_rdma_mem = isert_reg_rdma_frwr;
- device->unreg_rdma_mem = isert_unreg_rdma_frwr;
+ if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS &&
+ dev_attr->device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) {
+ device->use_fastreg = 1;
+ device->reg_rdma_mem = isert_reg_rdma;
+ device->unreg_rdma_mem = isert_unreg_rdma;
} else {
- device->use_frwr = 0;
+ device->use_fastreg = 0;
device->reg_rdma_mem = isert_map_rdma;
device->unreg_rdma_mem = isert_unmap_cmd;
}
+ /* Check signature cap */
+ device->pi_capable = dev_attr->device_cap_flags &
+ IB_DEVICE_SIGNATURE_HANDOVER ? true : false;
+
device->cqs_used = min_t(int, num_online_cpus(),
device->ib_device->num_comp_vectors);
device->cqs_used = min(ISERT_MAX_CQ, device->cqs_used);
- pr_debug("Using %d CQs, device %s supports %d vectors support FRWR %d\n",
+ pr_debug("Using %d CQs, device %s supports %d vectors support "
+ "Fast registration %d pi_capable %d\n",
device->cqs_used, device->ib_device->name,
- device->ib_device->num_comp_vectors, device->use_frwr);
+ device->ib_device->num_comp_vectors, device->use_fastreg,
+ device->pi_capable);
device->cq_desc = kzalloc(sizeof(struct isert_cq_desc) *
device->cqs_used, GFP_KERNEL);
if (!device->cq_desc) {
@@ -247,47 +263,43 @@ isert_create_device_ib_res(struct isert_device *device)
}
cq_desc = device->cq_desc;
- device->dev_pd = ib_alloc_pd(ib_dev);
- if (IS_ERR(device->dev_pd)) {
- ret = PTR_ERR(device->dev_pd);
- pr_err("ib_alloc_pd failed for dev_pd: %d\n", ret);
- goto out_cq_desc;
- }
-
for (i = 0; i < device->cqs_used; i++) {
cq_desc[i].device = device;
cq_desc[i].cq_index = i;
+ INIT_WORK(&cq_desc[i].cq_rx_work, isert_cq_rx_work);
device->dev_rx_cq[i] = ib_create_cq(device->ib_device,
isert_cq_rx_callback,
isert_cq_event_callback,
(void *)&cq_desc[i],
ISER_MAX_RX_CQ_LEN, i);
- if (IS_ERR(device->dev_rx_cq[i]))
+ if (IS_ERR(device->dev_rx_cq[i])) {
+ ret = PTR_ERR(device->dev_rx_cq[i]);
+ device->dev_rx_cq[i] = NULL;
goto out_cq;
+ }
+ INIT_WORK(&cq_desc[i].cq_tx_work, isert_cq_tx_work);
device->dev_tx_cq[i] = ib_create_cq(device->ib_device,
isert_cq_tx_callback,
isert_cq_event_callback,
(void *)&cq_desc[i],
ISER_MAX_TX_CQ_LEN, i);
- if (IS_ERR(device->dev_tx_cq[i]))
+ if (IS_ERR(device->dev_tx_cq[i])) {
+ ret = PTR_ERR(device->dev_tx_cq[i]);
+ device->dev_tx_cq[i] = NULL;
goto out_cq;
+ }
- if (ib_req_notify_cq(device->dev_rx_cq[i], IB_CQ_NEXT_COMP))
+ ret = ib_req_notify_cq(device->dev_rx_cq[i], IB_CQ_NEXT_COMP);
+ if (ret)
goto out_cq;
- if (ib_req_notify_cq(device->dev_tx_cq[i], IB_CQ_NEXT_COMP))
+ ret = ib_req_notify_cq(device->dev_tx_cq[i], IB_CQ_NEXT_COMP);
+ if (ret)
goto out_cq;
}
- device->dev_mr = ib_get_dma_mr(device->dev_pd, IB_ACCESS_LOCAL_WRITE);
- if (IS_ERR(device->dev_mr)) {
- ret = PTR_ERR(device->dev_mr);
- pr_err("ib_get_dma_mr failed for dev_mr: %d\n", ret);
- goto out_cq;
- }
-
return 0;
out_cq:
@@ -303,9 +315,6 @@ out_cq:
ib_destroy_cq(device->dev_tx_cq[j]);
}
}
- ib_dealloc_pd(device->dev_pd);
-
-out_cq_desc:
kfree(device->cq_desc);
return ret;
@@ -328,8 +337,6 @@ isert_free_device_ib_res(struct isert_device *device)
device->dev_tx_cq[i] = NULL;
}
- ib_dereg_mr(device->dev_mr);
- ib_dealloc_pd(device->dev_pd);
kfree(device->cq_desc);
}
@@ -385,40 +392,136 @@ isert_device_find_by_ib_dev(struct rdma_cm_id *cma_id)
}
static void
-isert_conn_free_frwr_pool(struct isert_conn *isert_conn)
+isert_conn_free_fastreg_pool(struct isert_conn *isert_conn)
{
struct fast_reg_descriptor *fr_desc, *tmp;
int i = 0;
- if (list_empty(&isert_conn->conn_frwr_pool))
+ if (list_empty(&isert_conn->conn_fr_pool))
return;
- pr_debug("Freeing conn %p frwr pool", isert_conn);
+ pr_debug("Freeing conn %p fastreg pool", isert_conn);
list_for_each_entry_safe(fr_desc, tmp,
- &isert_conn->conn_frwr_pool, list) {
+ &isert_conn->conn_fr_pool, list) {
list_del(&fr_desc->list);
ib_free_fast_reg_page_list(fr_desc->data_frpl);
ib_dereg_mr(fr_desc->data_mr);
+ if (fr_desc->pi_ctx) {
+ ib_free_fast_reg_page_list(fr_desc->pi_ctx->prot_frpl);
+ ib_dereg_mr(fr_desc->pi_ctx->prot_mr);
+ ib_destroy_mr(fr_desc->pi_ctx->sig_mr);
+ kfree(fr_desc->pi_ctx);
+ }
kfree(fr_desc);
++i;
}
- if (i < isert_conn->conn_frwr_pool_size)
+ if (i < isert_conn->conn_fr_pool_size)
pr_warn("Pool still has %d regions registered\n",
- isert_conn->conn_frwr_pool_size - i);
+ isert_conn->conn_fr_pool_size - i);
+}
+
+static int
+isert_create_fr_desc(struct ib_device *ib_device, struct ib_pd *pd,
+ struct fast_reg_descriptor *fr_desc, u8 protection)
+{
+ int ret;
+
+ fr_desc->data_frpl = ib_alloc_fast_reg_page_list(ib_device,
+ ISCSI_ISER_SG_TABLESIZE);
+ if (IS_ERR(fr_desc->data_frpl)) {
+ pr_err("Failed to allocate data frpl err=%ld\n",
+ PTR_ERR(fr_desc->data_frpl));
+ return PTR_ERR(fr_desc->data_frpl);
+ }
+
+ fr_desc->data_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE);
+ if (IS_ERR(fr_desc->data_mr)) {
+ pr_err("Failed to allocate data frmr err=%ld\n",
+ PTR_ERR(fr_desc->data_mr));
+ ret = PTR_ERR(fr_desc->data_mr);
+ goto err_data_frpl;
+ }
+ pr_debug("Create fr_desc %p page_list %p\n",
+ fr_desc, fr_desc->data_frpl->page_list);
+ fr_desc->ind |= ISERT_DATA_KEY_VALID;
+
+ if (protection) {
+ struct ib_mr_init_attr mr_init_attr = {0};
+ struct pi_context *pi_ctx;
+
+ fr_desc->pi_ctx = kzalloc(sizeof(*fr_desc->pi_ctx), GFP_KERNEL);
+ if (!fr_desc->pi_ctx) {
+ pr_err("Failed to allocate pi context\n");
+ ret = -ENOMEM;
+ goto err_data_mr;
+ }
+ pi_ctx = fr_desc->pi_ctx;
+
+ pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(ib_device,
+ ISCSI_ISER_SG_TABLESIZE);
+ if (IS_ERR(pi_ctx->prot_frpl)) {
+ pr_err("Failed to allocate prot frpl err=%ld\n",
+ PTR_ERR(pi_ctx->prot_frpl));
+ ret = PTR_ERR(pi_ctx->prot_frpl);
+ goto err_pi_ctx;
+ }
+
+ pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE);
+ if (IS_ERR(pi_ctx->prot_mr)) {
+ pr_err("Failed to allocate prot frmr err=%ld\n",
+ PTR_ERR(pi_ctx->prot_mr));
+ ret = PTR_ERR(pi_ctx->prot_mr);
+ goto err_prot_frpl;
+ }
+ fr_desc->ind |= ISERT_PROT_KEY_VALID;
+
+ mr_init_attr.max_reg_descriptors = 2;
+ mr_init_attr.flags |= IB_MR_SIGNATURE_EN;
+ pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr);
+ if (IS_ERR(pi_ctx->sig_mr)) {
+ pr_err("Failed to allocate signature enabled mr err=%ld\n",
+ PTR_ERR(pi_ctx->sig_mr));
+ ret = PTR_ERR(pi_ctx->sig_mr);
+ goto err_prot_mr;
+ }
+ fr_desc->ind |= ISERT_SIG_KEY_VALID;
+ }
+ fr_desc->ind &= ~ISERT_PROTECTED;
+
+ return 0;
+err_prot_mr:
+ ib_dereg_mr(fr_desc->pi_ctx->prot_mr);
+err_prot_frpl:
+ ib_free_fast_reg_page_list(fr_desc->pi_ctx->prot_frpl);
+err_pi_ctx:
+ kfree(fr_desc->pi_ctx);
+err_data_mr:
+ ib_dereg_mr(fr_desc->data_mr);
+err_data_frpl:
+ ib_free_fast_reg_page_list(fr_desc->data_frpl);
+
+ return ret;
}
static int
-isert_conn_create_frwr_pool(struct isert_conn *isert_conn)
+isert_conn_create_fastreg_pool(struct isert_conn *isert_conn, u8 pi_support)
{
struct fast_reg_descriptor *fr_desc;
struct isert_device *device = isert_conn->conn_device;
- int i, ret;
+ struct se_session *se_sess = isert_conn->conn->sess->se_sess;
+ struct se_node_acl *se_nacl = se_sess->se_node_acl;
+ int i, ret, tag_num;
+ /*
+ * Setup the number of FRMRs based upon the number of tags
+ * available to session in iscsi_target_locate_portal().
+ */
+ tag_num = max_t(u32, ISCSIT_MIN_TAGS, se_nacl->queue_depth);
+ tag_num = (tag_num * 2) + ISCSIT_EXTRA_TAGS;
- INIT_LIST_HEAD(&isert_conn->conn_frwr_pool);
- isert_conn->conn_frwr_pool_size = 0;
- for (i = 0; i < ISCSI_DEF_XMIT_CMDS_MAX; i++) {
+ isert_conn->conn_fr_pool_size = 0;
+ for (i = 0; i < tag_num; i++) {
fr_desc = kzalloc(sizeof(*fr_desc), GFP_KERNEL);
if (!fr_desc) {
pr_err("Failed to allocate fast_reg descriptor\n");
@@ -426,40 +529,27 @@ isert_conn_create_frwr_pool(struct isert_conn *isert_conn)
goto err;
}
- fr_desc->data_frpl =
- ib_alloc_fast_reg_page_list(device->ib_device,
- ISCSI_ISER_SG_TABLESIZE);
- if (IS_ERR(fr_desc->data_frpl)) {
- pr_err("Failed to allocate fr_pg_list err=%ld\n",
- PTR_ERR(fr_desc->data_frpl));
- ret = PTR_ERR(fr_desc->data_frpl);
- goto err;
- }
-
- fr_desc->data_mr = ib_alloc_fast_reg_mr(device->dev_pd,
- ISCSI_ISER_SG_TABLESIZE);
- if (IS_ERR(fr_desc->data_mr)) {
- pr_err("Failed to allocate frmr err=%ld\n",
- PTR_ERR(fr_desc->data_mr));
- ret = PTR_ERR(fr_desc->data_mr);
- ib_free_fast_reg_page_list(fr_desc->data_frpl);
+ ret = isert_create_fr_desc(device->ib_device,
+ isert_conn->conn_pd, fr_desc,
+ pi_support);
+ if (ret) {
+ pr_err("Failed to create fastreg descriptor err=%d\n",
+ ret);
+ kfree(fr_desc);
goto err;
}
- pr_debug("Create fr_desc %p page_list %p\n",
- fr_desc, fr_desc->data_frpl->page_list);
- fr_desc->valid = true;
- list_add_tail(&fr_desc->list, &isert_conn->conn_frwr_pool);
- isert_conn->conn_frwr_pool_size++;
+ list_add_tail(&fr_desc->list, &isert_conn->conn_fr_pool);
+ isert_conn->conn_fr_pool_size++;
}
- pr_debug("Creating conn %p frwr pool size=%d",
- isert_conn, isert_conn->conn_frwr_pool_size);
+ pr_debug("Creating conn %p fastreg pool size=%d",
+ isert_conn, isert_conn->conn_fr_pool_size);
return 0;
err:
- isert_conn_free_frwr_pool(isert_conn);
+ isert_conn_free_fastreg_pool(isert_conn);
return ret;
}
@@ -472,6 +562,15 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
struct isert_device *device;
struct ib_device *ib_dev = cma_id->device;
int ret = 0;
+ u8 pi_support;
+
+ spin_lock_bh(&np->np_thread_lock);
+ if (!np->enabled) {
+ spin_unlock_bh(&np->np_thread_lock);
+ pr_debug("iscsi_np is not enabled, reject connect request\n");
+ return rdma_reject(cma_id, NULL, 0);
+ }
+ spin_unlock_bh(&np->np_thread_lock);
pr_debug("Entering isert_connect_request cma_id: %p, context: %p\n",
cma_id, cma_id->context);
@@ -484,12 +583,13 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
isert_conn->state = ISER_CONN_INIT;
INIT_LIST_HEAD(&isert_conn->conn_accept_node);
init_completion(&isert_conn->conn_login_comp);
- init_waitqueue_head(&isert_conn->conn_wait);
- init_waitqueue_head(&isert_conn->conn_wait_comp_err);
+ init_completion(&isert_conn->conn_wait);
+ init_completion(&isert_conn->conn_wait_comp_err);
kref_init(&isert_conn->conn_kref);
kref_get(&isert_conn->conn_kref);
mutex_init(&isert_conn->conn_mutex);
spin_lock_init(&isert_conn->conn_lock);
+ INIT_LIST_HEAD(&isert_conn->conn_fr_pool);
cma_id->context = isert_conn;
isert_conn->conn_cm_id = cma_id;
@@ -544,33 +644,48 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
}
isert_conn->conn_device = device;
- isert_conn->conn_pd = device->dev_pd;
- isert_conn->conn_mr = device->dev_mr;
+ isert_conn->conn_pd = ib_alloc_pd(isert_conn->conn_device->ib_device);
+ if (IS_ERR(isert_conn->conn_pd)) {
+ ret = PTR_ERR(isert_conn->conn_pd);
+ pr_err("ib_alloc_pd failed for conn %p: ret=%d\n",
+ isert_conn, ret);
+ goto out_pd;
+ }
- if (device->use_frwr) {
- ret = isert_conn_create_frwr_pool(isert_conn);
- if (ret) {
- pr_err("Conn: %p failed to create frwr_pool\n", isert_conn);
- goto out_frwr;
- }
+ isert_conn->conn_mr = ib_get_dma_mr(isert_conn->conn_pd,
+ IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(isert_conn->conn_mr)) {
+ ret = PTR_ERR(isert_conn->conn_mr);
+ pr_err("ib_get_dma_mr failed for conn %p: ret=%d\n",
+ isert_conn, ret);
+ goto out_mr;
+ }
+
+ pi_support = np->tpg_np->tpg->tpg_attrib.t10_pi;
+ if (pi_support && !device->pi_capable) {
+ pr_err("Protection information requested but not supported, "
+ "rejecting connect request\n");
+ ret = rdma_reject(cma_id, NULL, 0);
+ goto out_mr;
}
- ret = isert_conn_setup_qp(isert_conn, cma_id);
+ ret = isert_conn_setup_qp(isert_conn, cma_id, pi_support);
if (ret)
goto out_conn_dev;
mutex_lock(&isert_np->np_accept_mutex);
- list_add_tail(&isert_np->np_accept_list, &isert_conn->conn_accept_node);
+ list_add_tail(&isert_conn->conn_accept_node, &isert_np->np_accept_list);
mutex_unlock(&isert_np->np_accept_mutex);
- pr_debug("isert_connect_request() waking up np_accept_wq: %p\n", np);
- wake_up(&isert_np->np_accept_wq);
+ pr_debug("isert_connect_request() up np_sem np: %p\n", np);
+ up(&isert_np->np_sem);
return 0;
out_conn_dev:
- if (device->use_frwr)
- isert_conn_free_frwr_pool(isert_conn);
-out_frwr:
+ ib_dereg_mr(isert_conn->conn_mr);
+out_mr:
+ ib_dealloc_pd(isert_conn->conn_pd);
+out_pd:
isert_device_try_release(device);
out_rsp_dma_map:
ib_dma_unmap_single(ib_dev, isert_conn->login_rsp_dma,
@@ -594,8 +709,8 @@ isert_connect_release(struct isert_conn *isert_conn)
pr_debug("Entering isert_connect_release(): >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n");
- if (device->use_frwr)
- isert_conn_free_frwr_pool(isert_conn);
+ if (device && device->use_fastreg)
+ isert_conn_free_fastreg_pool(isert_conn);
if (isert_conn->conn_qp) {
cq_index = ((struct isert_cq_desc *)
@@ -609,6 +724,9 @@ isert_connect_release(struct isert_conn *isert_conn)
isert_free_rx_descriptors(isert_conn);
rdma_destroy_id(isert_conn->conn_cm_id);
+ ib_dereg_mr(isert_conn->conn_mr);
+ ib_dealloc_pd(isert_conn->conn_pd);
+
if (isert_conn->login_buf) {
ib_dma_unmap_single(ib_dev, isert_conn->login_rsp_dma,
ISER_RX_LOGIN_SIZE, DMA_TO_DEVICE);
@@ -657,11 +775,11 @@ isert_disconnect_work(struct work_struct *work)
pr_debug("isert_disconnect_work(): >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n");
mutex_lock(&isert_conn->conn_mutex);
- isert_conn->state = ISER_CONN_DOWN;
+ if (isert_conn->state == ISER_CONN_UP)
+ isert_conn->state = ISER_CONN_TERMINATING;
if (isert_conn->post_recv_buf_count == 0 &&
atomic_read(&isert_conn->post_send_buf_count) == 0) {
- pr_debug("Calling wake_up(&isert_conn->conn_wait);\n");
mutex_unlock(&isert_conn->conn_mutex);
goto wake_up;
}
@@ -670,26 +788,25 @@ isert_disconnect_work(struct work_struct *work)
isert_put_conn(isert_conn);
return;
}
- if (!isert_conn->logout_posted) {
- pr_debug("Calling rdma_disconnect for !logout_posted from"
- " isert_disconnect_work\n");
+
+ if (isert_conn->disconnect) {
+ /* Send DREQ/DREP towards our initiator */
rdma_disconnect(isert_conn->conn_cm_id);
- mutex_unlock(&isert_conn->conn_mutex);
- iscsit_cause_connection_reinstatement(isert_conn->conn, 0);
- goto wake_up;
}
+
mutex_unlock(&isert_conn->conn_mutex);
wake_up:
- wake_up(&isert_conn->conn_wait);
+ complete(&isert_conn->conn_wait);
isert_put_conn(isert_conn);
}
static void
-isert_disconnected_handler(struct rdma_cm_id *cma_id)
+isert_disconnected_handler(struct rdma_cm_id *cma_id, bool disconnect)
{
struct isert_conn *isert_conn = (struct isert_conn *)cma_id->context;
+ isert_conn->disconnect = disconnect;
INIT_WORK(&isert_conn->conn_logout_work, isert_disconnect_work);
schedule_work(&isert_conn->conn_logout_work);
}
@@ -698,29 +815,28 @@ static int
isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
{
int ret = 0;
+ bool disconnect = false;
pr_debug("isert_cma_handler: event %d status %d conn %p id %p\n",
event->event, event->status, cma_id->context, cma_id);
switch (event->event) {
case RDMA_CM_EVENT_CONNECT_REQUEST:
- pr_debug("RDMA_CM_EVENT_CONNECT_REQUEST: >>>>>>>>>>>>>>>\n");
ret = isert_connect_request(cma_id, event);
break;
case RDMA_CM_EVENT_ESTABLISHED:
- pr_debug("RDMA_CM_EVENT_ESTABLISHED >>>>>>>>>>>>>>\n");
isert_connected_handler(cma_id);
break;
- case RDMA_CM_EVENT_DISCONNECTED:
- pr_debug("RDMA_CM_EVENT_DISCONNECTED: >>>>>>>>>>>>>>\n");
- isert_disconnected_handler(cma_id);
- break;
- case RDMA_CM_EVENT_DEVICE_REMOVAL:
- case RDMA_CM_EVENT_ADDR_CHANGE:
+ case RDMA_CM_EVENT_ADDR_CHANGE: /* FALLTHRU */
+ case RDMA_CM_EVENT_DISCONNECTED: /* FALLTHRU */
+ case RDMA_CM_EVENT_DEVICE_REMOVAL: /* FALLTHRU */
+ disconnect = true;
+ case RDMA_CM_EVENT_TIMEWAIT_EXIT: /* FALLTHRU */
+ isert_disconnected_handler(cma_id, disconnect);
break;
case RDMA_CM_EVENT_CONNECT_ERROR:
default:
- pr_err("Unknown RDMA CMA event: %d\n", event->event);
+ pr_err("Unhandled RDMA CMA event: %d\n", event->event);
break;
}
@@ -843,14 +959,33 @@ isert_init_tx_hdrs(struct isert_conn *isert_conn,
}
static void
-isert_init_send_wr(struct isert_cmd *isert_cmd, struct ib_send_wr *send_wr)
+isert_init_send_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
+ struct ib_send_wr *send_wr, bool coalesce)
{
+ struct iser_tx_desc *tx_desc = &isert_cmd->tx_desc;
+
isert_cmd->rdma_wr.iser_ib_op = ISER_IB_SEND;
send_wr->wr_id = (unsigned long)&isert_cmd->tx_desc;
send_wr->opcode = IB_WR_SEND;
- send_wr->send_flags = IB_SEND_SIGNALED;
- send_wr->sg_list = &isert_cmd->tx_desc.tx_sg[0];
+ send_wr->sg_list = &tx_desc->tx_sg[0];
send_wr->num_sge = isert_cmd->tx_desc.num_sge;
+ /*
+ * Coalesce send completion interrupts by only setting IB_SEND_SIGNALED
+ * bit for every ISERT_COMP_BATCH_COUNT number of ib_post_send() calls.
+ */
+ mutex_lock(&isert_conn->conn_mutex);
+ if (coalesce && isert_conn->state == ISER_CONN_UP &&
+ ++isert_conn->conn_comp_batch < ISERT_COMP_BATCH_COUNT) {
+ tx_desc->llnode_active = true;
+ llist_add(&tx_desc->comp_llnode, &isert_conn->conn_comp_llist);
+ mutex_unlock(&isert_conn->conn_mutex);
+ return;
+ }
+ isert_conn->conn_comp_batch = 0;
+ tx_desc->comp_llnode_batch = llist_del_all(&isert_conn->conn_comp_llist);
+ mutex_unlock(&isert_conn->conn_mutex);
+
+ send_wr->send_flags = IB_SEND_SIGNALED;
}
static int
@@ -918,6 +1053,20 @@ isert_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login,
}
if (!login->login_failed) {
if (login->login_complete) {
+ if (!conn->sess->sess_ops->SessionType &&
+ isert_conn->conn_device->use_fastreg) {
+ /* Normal Session and fastreg is used */
+ u8 pi_support = login->np->tpg_np->tpg->tpg_attrib.t10_pi;
+
+ ret = isert_conn_create_fastreg_pool(isert_conn,
+ pi_support);
+ if (ret) {
+ pr_err("Conn: %p failed to create"
+ " fastreg pool\n", isert_conn);
+ return ret;
+ }
+ }
+
ret = isert_alloc_rx_descriptors(isert_conn);
if (ret)
return ret;
@@ -992,13 +1141,13 @@ isert_rx_login_req(struct iser_rx_desc *rx_desc, int rx_buflen,
}
static struct iscsi_cmd
-*isert_allocate_cmd(struct iscsi_conn *conn, gfp_t gfp)
+*isert_allocate_cmd(struct iscsi_conn *conn)
{
struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
struct isert_cmd *isert_cmd;
struct iscsi_cmd *cmd;
- cmd = iscsit_allocate_cmd(conn, gfp);
+ cmd = iscsit_allocate_cmd(conn, TASK_INTERRUPTIBLE);
if (!cmd) {
pr_err("Unable to allocate iscsi_cmd + isert_cmd\n");
return NULL;
@@ -1062,6 +1211,8 @@ sequence_cmd:
if (!rc && dump_payload == false && unsol_data)
iscsit_set_unsoliticed_dataout(cmd);
+ else if (dump_payload && imm_data)
+ target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd);
return 0;
}
@@ -1187,7 +1338,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
switch (opcode) {
case ISCSI_OP_SCSI_CMD:
- cmd = isert_allocate_cmd(conn, GFP_KERNEL);
+ cmd = isert_allocate_cmd(conn);
if (!cmd)
break;
@@ -1201,7 +1352,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
rx_desc, (unsigned char *)hdr);
break;
case ISCSI_OP_NOOP_OUT:
- cmd = isert_allocate_cmd(conn, GFP_KERNEL);
+ cmd = isert_allocate_cmd(conn);
if (!cmd)
break;
@@ -1214,7 +1365,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
(unsigned char *)hdr);
break;
case ISCSI_OP_SCSI_TMFUNC:
- cmd = isert_allocate_cmd(conn, GFP_KERNEL);
+ cmd = isert_allocate_cmd(conn);
if (!cmd)
break;
@@ -1222,7 +1373,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
(unsigned char *)hdr);
break;
case ISCSI_OP_LOGOUT:
- cmd = isert_allocate_cmd(conn, GFP_KERNEL);
+ cmd = isert_allocate_cmd(conn);
if (!cmd)
break;
@@ -1233,7 +1384,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
HZ);
break;
case ISCSI_OP_TEXT:
- cmd = isert_allocate_cmd(conn, GFP_KERNEL);
+ cmd = isert_allocate_cmd(conn);
if (!cmd)
break;
@@ -1343,19 +1494,60 @@ isert_rx_completion(struct iser_rx_desc *desc, struct isert_conn *isert_conn,
}
}
+static int
+isert_map_data_buf(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
+ struct scatterlist *sg, u32 nents, u32 length, u32 offset,
+ enum iser_ib_op_code op, struct isert_data_buf *data)
+{
+ struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+
+ data->dma_dir = op == ISER_IB_RDMA_WRITE ?
+ DMA_TO_DEVICE : DMA_FROM_DEVICE;
+
+ data->len = length - offset;
+ data->offset = offset;
+ data->sg_off = data->offset / PAGE_SIZE;
+
+ data->sg = &sg[data->sg_off];
+ data->nents = min_t(unsigned int, nents - data->sg_off,
+ ISCSI_ISER_SG_TABLESIZE);
+ data->len = min_t(unsigned int, data->len, ISCSI_ISER_SG_TABLESIZE *
+ PAGE_SIZE);
+
+ data->dma_nents = ib_dma_map_sg(ib_dev, data->sg, data->nents,
+ data->dma_dir);
+ if (unlikely(!data->dma_nents)) {
+ pr_err("Cmd: unable to dma map SGs %p\n", sg);
+ return -EINVAL;
+ }
+
+ pr_debug("Mapped cmd: %p count: %u sg: %p sg_nents: %u rdma_len %d\n",
+ isert_cmd, data->dma_nents, data->sg, data->nents, data->len);
+
+ return 0;
+}
+
+static void
+isert_unmap_data_buf(struct isert_conn *isert_conn, struct isert_data_buf *data)
+{
+ struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+
+ ib_dma_unmap_sg(ib_dev, data->sg, data->nents, data->dma_dir);
+ memset(data, 0, sizeof(*data));
+}
+
+
+
static void
isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)
{
struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
- struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
pr_debug("isert_unmap_cmd: %p\n", isert_cmd);
- if (wr->sge) {
+
+ if (wr->data.sg) {
pr_debug("isert_unmap_cmd: %p unmap_sg op\n", isert_cmd);
- ib_dma_unmap_sg(ib_dev, wr->sge, wr->num_sge,
- (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ?
- DMA_TO_DEVICE : DMA_FROM_DEVICE);
- wr->sge = NULL;
+ isert_unmap_data_buf(isert_conn, &wr->data);
}
if (wr->send_wr) {
@@ -1372,29 +1564,29 @@ isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)
}
static void
-isert_unreg_rdma_frwr(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)
+isert_unreg_rdma(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)
{
struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
- struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
LIST_HEAD(unmap_list);
- pr_debug("unreg_frwr_cmd: %p\n", isert_cmd);
+ pr_debug("unreg_fastreg_cmd: %p\n", isert_cmd);
if (wr->fr_desc) {
- pr_debug("unreg_frwr_cmd: %p free fr_desc %p\n",
+ pr_debug("unreg_fastreg_cmd: %p free fr_desc %p\n",
isert_cmd, wr->fr_desc);
+ if (wr->fr_desc->ind & ISERT_PROTECTED) {
+ isert_unmap_data_buf(isert_conn, &wr->prot);
+ wr->fr_desc->ind &= ~ISERT_PROTECTED;
+ }
spin_lock_bh(&isert_conn->conn_lock);
- list_add_tail(&wr->fr_desc->list, &isert_conn->conn_frwr_pool);
+ list_add_tail(&wr->fr_desc->list, &isert_conn->conn_fr_pool);
spin_unlock_bh(&isert_conn->conn_lock);
wr->fr_desc = NULL;
}
- if (wr->sge) {
- pr_debug("unreg_frwr_cmd: %p unmap_sg op\n", isert_cmd);
- ib_dma_unmap_sg(ib_dev, wr->sge, wr->num_sge,
- (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ?
- DMA_TO_DEVICE : DMA_FROM_DEVICE);
- wr->sge = NULL;
+ if (wr->data.sg) {
+ pr_debug("unreg_fastreg_cmd: %p unmap_sg op\n", isert_cmd);
+ isert_unmap_data_buf(isert_conn, &wr->data);
}
wr->ib_sge = NULL;
@@ -1402,7 +1594,7 @@ isert_unreg_rdma_frwr(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn
}
static void
-isert_put_cmd(struct isert_cmd *isert_cmd)
+isert_put_cmd(struct isert_cmd *isert_cmd, bool comp_err)
{
struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
struct isert_conn *isert_conn = isert_cmd->conn;
@@ -1415,11 +1607,24 @@ isert_put_cmd(struct isert_cmd *isert_cmd)
case ISCSI_OP_SCSI_CMD:
spin_lock_bh(&conn->cmd_lock);
if (!list_empty(&cmd->i_conn_node))
- list_del(&cmd->i_conn_node);
+ list_del_init(&cmd->i_conn_node);
spin_unlock_bh(&conn->cmd_lock);
- if (cmd->data_direction == DMA_TO_DEVICE)
+ if (cmd->data_direction == DMA_TO_DEVICE) {
iscsit_stop_dataout_timer(cmd);
+ /*
+ * Check for special case during comp_err where
+ * WRITE_PENDING has been handed off from core,
+ * but requires an extra target_put_sess_cmd()
+ * before transport_generic_free_cmd() below.
+ */
+ if (comp_err &&
+ cmd->se_cmd.t_state == TRANSPORT_WRITE_PENDING) {
+ struct se_cmd *se_cmd = &cmd->se_cmd;
+
+ target_put_sess_cmd(se_cmd->se_sess, se_cmd);
+ }
+ }
device->unreg_rdma_mem(isert_cmd, isert_conn);
transport_generic_free_cmd(&cmd->se_cmd, 0);
@@ -1427,7 +1632,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd)
case ISCSI_OP_SCSI_TMFUNC:
spin_lock_bh(&conn->cmd_lock);
if (!list_empty(&cmd->i_conn_node))
- list_del(&cmd->i_conn_node);
+ list_del_init(&cmd->i_conn_node);
spin_unlock_bh(&conn->cmd_lock);
transport_generic_free_cmd(&cmd->se_cmd, 0);
@@ -1437,7 +1642,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd)
case ISCSI_OP_TEXT:
spin_lock_bh(&conn->cmd_lock);
if (!list_empty(&cmd->i_conn_node))
- list_del(&cmd->i_conn_node);
+ list_del_init(&cmd->i_conn_node);
spin_unlock_bh(&conn->cmd_lock);
/*
@@ -1474,7 +1679,7 @@ isert_unmap_tx_desc(struct iser_tx_desc *tx_desc, struct ib_device *ib_dev)
static void
isert_completion_put(struct iser_tx_desc *tx_desc, struct isert_cmd *isert_cmd,
- struct ib_device *ib_dev)
+ struct ib_device *ib_dev, bool comp_err)
{
if (isert_cmd->pdu_buf_dma != 0) {
pr_debug("Calling ib_dma_unmap_single for isert_cmd->pdu_buf_dma\n");
@@ -1484,7 +1689,77 @@ isert_completion_put(struct iser_tx_desc *tx_desc, struct isert_cmd *isert_cmd,
}
isert_unmap_tx_desc(tx_desc, ib_dev);
- isert_put_cmd(isert_cmd);
+ isert_put_cmd(isert_cmd, comp_err);
+}
+
+static int
+isert_check_pi_status(struct se_cmd *se_cmd, struct ib_mr *sig_mr)
+{
+ struct ib_mr_status mr_status;
+ int ret;
+
+ ret = ib_check_mr_status(sig_mr, IB_MR_CHECK_SIG_STATUS, &mr_status);
+ if (ret) {
+ pr_err("ib_check_mr_status failed, ret %d\n", ret);
+ goto fail_mr_status;
+ }
+
+ if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) {
+ u64 sec_offset_err;
+ u32 block_size = se_cmd->se_dev->dev_attrib.block_size + 8;
+
+ switch (mr_status.sig_err.err_type) {
+ case IB_SIG_BAD_GUARD:
+ se_cmd->pi_err = TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED;
+ break;
+ case IB_SIG_BAD_REFTAG:
+ se_cmd->pi_err = TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED;
+ break;
+ case IB_SIG_BAD_APPTAG:
+ se_cmd->pi_err = TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED;
+ break;
+ }
+ sec_offset_err = mr_status.sig_err.sig_err_offset;
+ do_div(sec_offset_err, block_size);
+ se_cmd->bad_sector = sec_offset_err + se_cmd->t_task_lba;
+
+ pr_err("isert: PI error found type %d at sector 0x%llx "
+ "expected 0x%x vs actual 0x%x\n",
+ mr_status.sig_err.err_type,
+ (unsigned long long)se_cmd->bad_sector,
+ mr_status.sig_err.expected,
+ mr_status.sig_err.actual);
+ ret = 1;
+ }
+
+fail_mr_status:
+ return ret;
+}
+
+static void
+isert_completion_rdma_write(struct iser_tx_desc *tx_desc,
+ struct isert_cmd *isert_cmd)
+{
+ struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
+ struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
+ struct se_cmd *se_cmd = &cmd->se_cmd;
+ struct isert_conn *isert_conn = isert_cmd->conn;
+ struct isert_device *device = isert_conn->conn_device;
+ int ret = 0;
+
+ if (wr->fr_desc && wr->fr_desc->ind & ISERT_PROTECTED) {
+ ret = isert_check_pi_status(se_cmd,
+ wr->fr_desc->pi_ctx->sig_mr);
+ wr->fr_desc->ind &= ~ISERT_PROTECTED;
+ }
+
+ device->unreg_rdma_mem(isert_cmd, isert_conn);
+ wr->send_wr_num = 0;
+ if (ret)
+ transport_send_check_condition_and_sense(se_cmd,
+ se_cmd->pi_err, 0);
+ else
+ isert_put_response(isert_conn->conn, cmd);
}
static void
@@ -1496,10 +1771,18 @@ isert_completion_rdma_read(struct iser_tx_desc *tx_desc,
struct se_cmd *se_cmd = &cmd->se_cmd;
struct isert_conn *isert_conn = isert_cmd->conn;
struct isert_device *device = isert_conn->conn_device;
+ int ret = 0;
+
+ if (wr->fr_desc && wr->fr_desc->ind & ISERT_PROTECTED) {
+ ret = isert_check_pi_status(se_cmd,
+ wr->fr_desc->pi_ctx->sig_mr);
+ wr->fr_desc->ind &= ~ISERT_PROTECTED;
+ }
iscsit_stop_dataout_timer(cmd);
device->unreg_rdma_mem(isert_cmd, isert_conn);
- cmd->write_data_done = wr->cur_rdma_length;
+ cmd->write_data_done = wr->data.len;
+ wr->send_wr_num = 0;
pr_debug("Cmd: %p RDMA_READ comp calling execute_cmd\n", isert_cmd);
spin_lock_bh(&cmd->istate_lock);
@@ -1507,7 +1790,11 @@ isert_completion_rdma_read(struct iser_tx_desc *tx_desc,
cmd->i_state = ISTATE_RECEIVED_LAST_DATAOUT;
spin_unlock_bh(&cmd->istate_lock);
- target_execute_cmd(se_cmd);
+ if (ret)
+ transport_send_check_condition_and_sense(se_cmd,
+ se_cmd->pi_err, 0);
+ else
+ target_execute_cmd(se_cmd);
}
static void
@@ -1527,28 +1814,25 @@ isert_do_control_comp(struct work_struct *work)
iscsit_tmr_post_handler(cmd, cmd->conn);
cmd->i_state = ISTATE_SENT_STATUS;
- isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev);
+ isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev, false);
break;
case ISTATE_SEND_REJECT:
pr_debug("Got isert_do_control_comp ISTATE_SEND_REJECT: >>>\n");
atomic_dec(&isert_conn->post_send_buf_count);
cmd->i_state = ISTATE_SENT_STATUS;
- isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev);
+ isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev, false);
break;
case ISTATE_SEND_LOGOUTRSP:
pr_debug("Calling iscsit_logout_post_handler >>>>>>>>>>>>>>\n");
- /*
- * Call atomic_dec(&isert_conn->post_send_buf_count)
- * from isert_free_conn()
- */
- isert_conn->logout_posted = true;
+
+ atomic_dec(&isert_conn->post_send_buf_count);
iscsit_logout_post_handler(cmd, cmd->conn);
break;
case ISTATE_SEND_TEXTRSP:
atomic_dec(&isert_conn->post_send_buf_count);
cmd->i_state = ISTATE_SENT_STATUS;
- isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev);
+ isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev, false);
break;
default:
pr_err("Unknown do_control_comp i_state %d\n", cmd->i_state);
@@ -1564,6 +1848,7 @@ isert_response_completion(struct iser_tx_desc *tx_desc,
struct ib_device *ib_dev)
{
struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
+ struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
if (cmd->i_state == ISTATE_SEND_TASKMGTRSP ||
cmd->i_state == ISTATE_SEND_LOGOUTRSP ||
@@ -1575,15 +1860,26 @@ isert_response_completion(struct iser_tx_desc *tx_desc,
queue_work(isert_comp_wq, &isert_cmd->comp_work);
return;
}
- atomic_dec(&isert_conn->post_send_buf_count);
+
+ /**
+ * If send_wr_num is 0 this means that we got
+ * RDMA completion and we cleared it and we should
+ * simply decrement the response post. else the
+ * response is incorporated in send_wr_num, just
+ * sub it.
+ **/
+ if (wr->send_wr_num)
+ atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count);
+ else
+ atomic_dec(&isert_conn->post_send_buf_count);
cmd->i_state = ISTATE_SENT_STATUS;
- isert_completion_put(tx_desc, isert_cmd, ib_dev);
+ isert_completion_put(tx_desc, isert_cmd, ib_dev, false);
}
static void
-isert_send_completion(struct iser_tx_desc *tx_desc,
- struct isert_conn *isert_conn)
+__isert_send_completion(struct iser_tx_desc *tx_desc,
+ struct isert_conn *isert_conn)
{
struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
struct isert_cmd *isert_cmd = tx_desc->isert_cmd;
@@ -1607,13 +1903,14 @@ isert_send_completion(struct iser_tx_desc *tx_desc,
isert_conn, ib_dev);
break;
case ISER_IB_RDMA_WRITE:
- pr_err("isert_send_completion: Got ISER_IB_RDMA_WRITE\n");
- dump_stack();
+ pr_debug("isert_send_completion: Got ISER_IB_RDMA_WRITE\n");
+ atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count);
+ isert_completion_rdma_write(tx_desc, isert_cmd);
break;
case ISER_IB_RDMA_READ:
pr_debug("isert_send_completion: Got ISER_IB_RDMA_READ:\n");
- atomic_dec(&isert_conn->post_send_buf_count);
+ atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count);
isert_completion_rdma_read(tx_desc, isert_cmd);
break;
default:
@@ -1624,31 +1921,120 @@ isert_send_completion(struct iser_tx_desc *tx_desc,
}
static void
-isert_cq_comp_err(struct iser_tx_desc *tx_desc, struct isert_conn *isert_conn)
+isert_send_completion(struct iser_tx_desc *tx_desc,
+ struct isert_conn *isert_conn)
+{
+ struct llist_node *llnode = tx_desc->comp_llnode_batch;
+ struct iser_tx_desc *t;
+ /*
+ * Drain coalesced completion llist starting from comp_llnode_batch
+ * setup in isert_init_send_wr(), and then complete trailing tx_desc.
+ */
+ while (llnode) {
+ t = llist_entry(llnode, struct iser_tx_desc, comp_llnode);
+ llnode = llist_next(llnode);
+ __isert_send_completion(t, isert_conn);
+ }
+ __isert_send_completion(tx_desc, isert_conn);
+}
+
+static void
+isert_cq_drain_comp_llist(struct isert_conn *isert_conn, struct ib_device *ib_dev)
{
- struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+ struct llist_node *llnode;
+ struct isert_rdma_wr *wr;
+ struct iser_tx_desc *t;
+
+ mutex_lock(&isert_conn->conn_mutex);
+ llnode = llist_del_all(&isert_conn->conn_comp_llist);
+ isert_conn->conn_comp_batch = 0;
+ mutex_unlock(&isert_conn->conn_mutex);
+
+ while (llnode) {
+ t = llist_entry(llnode, struct iser_tx_desc, comp_llnode);
+ llnode = llist_next(llnode);
+ wr = &t->isert_cmd->rdma_wr;
+
+ /**
+ * If send_wr_num is 0 this means that we got
+ * RDMA completion and we cleared it and we should
+ * simply decrement the response post. else the
+ * response is incorporated in send_wr_num, just
+ * sub it.
+ **/
+ if (wr->send_wr_num)
+ atomic_sub(wr->send_wr_num,
+ &isert_conn->post_send_buf_count);
+ else
+ atomic_dec(&isert_conn->post_send_buf_count);
- if (tx_desc) {
- struct isert_cmd *isert_cmd = tx_desc->isert_cmd;
+ isert_completion_put(t, t->isert_cmd, ib_dev, true);
+ }
+}
- if (!isert_cmd)
- isert_unmap_tx_desc(tx_desc, ib_dev);
+static void
+isert_cq_tx_comp_err(struct iser_tx_desc *tx_desc, struct isert_conn *isert_conn)
+{
+ struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+ struct isert_cmd *isert_cmd = tx_desc->isert_cmd;
+ struct llist_node *llnode = tx_desc->comp_llnode_batch;
+ struct isert_rdma_wr *wr;
+ struct iser_tx_desc *t;
+
+ while (llnode) {
+ t = llist_entry(llnode, struct iser_tx_desc, comp_llnode);
+ llnode = llist_next(llnode);
+ wr = &t->isert_cmd->rdma_wr;
+
+ /**
+ * If send_wr_num is 0 this means that we got
+ * RDMA completion and we cleared it and we should
+ * simply decrement the response post. else the
+ * response is incorporated in send_wr_num, just
+ * sub it.
+ **/
+ if (wr->send_wr_num)
+ atomic_sub(wr->send_wr_num,
+ &isert_conn->post_send_buf_count);
else
- isert_completion_put(tx_desc, isert_cmd, ib_dev);
+ atomic_dec(&isert_conn->post_send_buf_count);
+
+ isert_completion_put(t, t->isert_cmd, ib_dev, true);
}
+ tx_desc->comp_llnode_batch = NULL;
- if (isert_conn->post_recv_buf_count == 0 &&
- atomic_read(&isert_conn->post_send_buf_count) == 0) {
- pr_debug("isert_cq_comp_err >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n");
- pr_debug("Calling wake_up from isert_cq_comp_err\n");
+ if (!isert_cmd)
+ isert_unmap_tx_desc(tx_desc, ib_dev);
+ else
+ isert_completion_put(tx_desc, isert_cmd, ib_dev, true);
+}
- mutex_lock(&isert_conn->conn_mutex);
- if (isert_conn->state != ISER_CONN_DOWN)
- isert_conn->state = ISER_CONN_TERMINATING;
- mutex_unlock(&isert_conn->conn_mutex);
+static void
+isert_cq_rx_comp_err(struct isert_conn *isert_conn)
+{
+ struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+ struct iscsi_conn *conn = isert_conn->conn;
+
+ if (isert_conn->post_recv_buf_count)
+ return;
+
+ isert_cq_drain_comp_llist(isert_conn, ib_dev);
- wake_up(&isert_conn->conn_wait_comp_err);
+ if (conn->sess) {
+ target_sess_cmd_list_set_waiting(conn->sess->se_sess);
+ target_wait_for_sess_cmds(conn->sess->se_sess);
}
+
+ while (atomic_read(&isert_conn->post_send_buf_count))
+ msleep(3000);
+
+ mutex_lock(&isert_conn->conn_mutex);
+ isert_conn->state = ISER_CONN_DOWN;
+ mutex_unlock(&isert_conn->conn_mutex);
+
+ iscsit_cause_connection_reinstatement(isert_conn->conn, 0);
+
+ complete(&isert_conn->conn_wait_comp_err);
}
static void
@@ -1673,8 +2059,14 @@ isert_cq_tx_work(struct work_struct *work)
pr_debug("TX wc.status != IB_WC_SUCCESS >>>>>>>>>>>>>>\n");
pr_debug("TX wc.status: 0x%08x\n", wc.status);
pr_debug("TX wc.vendor_err: 0x%08x\n", wc.vendor_err);
- atomic_dec(&isert_conn->post_send_buf_count);
- isert_cq_comp_err(tx_desc, isert_conn);
+
+ if (wc.wr_id != ISER_FASTREG_LI_WRID) {
+ if (tx_desc->llnode_active)
+ continue;
+
+ atomic_dec(&isert_conn->post_send_buf_count);
+ isert_cq_tx_comp_err(tx_desc, isert_conn);
+ }
}
}
@@ -1686,7 +2078,6 @@ isert_cq_tx_callback(struct ib_cq *cq, void *context)
{
struct isert_cq_desc *cq_desc = (struct isert_cq_desc *)context;
- INIT_WORK(&cq_desc->cq_tx_work, isert_cq_tx_work);
queue_work(isert_comp_wq, &cq_desc->cq_tx_work);
}
@@ -1718,7 +2109,7 @@ isert_cq_rx_work(struct work_struct *work)
wc.vendor_err);
}
isert_conn->post_recv_buf_count--;
- isert_cq_comp_err(NULL, isert_conn);
+ isert_cq_rx_comp_err(isert_conn);
}
}
@@ -1730,7 +2121,6 @@ isert_cq_rx_callback(struct ib_cq *cq, void *context)
{
struct isert_cq_desc *cq_desc = (struct isert_cq_desc *)context;
- INIT_WORK(&cq_desc->cq_rx_work, isert_cq_rx_work);
queue_work(isert_rx_wq, &cq_desc->cq_rx_work);
}
@@ -1793,13 +2183,43 @@ isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
isert_cmd->tx_desc.num_sge = 2;
}
- isert_init_send_wr(isert_cmd, send_wr);
+ isert_init_send_wr(isert_conn, isert_cmd, send_wr, true);
pr_debug("Posting SCSI Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n");
return isert_post_response(isert_conn, isert_cmd);
}
+static void
+isert_aborted_task(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
+{
+ struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
+ struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
+ struct isert_device *device = isert_conn->conn_device;
+
+ spin_lock_bh(&conn->cmd_lock);
+ if (!list_empty(&cmd->i_conn_node))
+ list_del_init(&cmd->i_conn_node);
+ spin_unlock_bh(&conn->cmd_lock);
+
+ if (cmd->data_direction == DMA_TO_DEVICE)
+ iscsit_stop_dataout_timer(cmd);
+
+ device->unreg_rdma_mem(isert_cmd, isert_conn);
+}
+
+static enum target_prot_op
+isert_get_sup_prot_ops(struct iscsi_conn *conn)
+{
+ struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
+ struct isert_device *device = isert_conn->conn_device;
+
+ if (device->pi_capable)
+ return TARGET_PROT_ALL;
+
+ return TARGET_PROT_NORMAL;
+}
+
static int
isert_put_nopin(struct iscsi_cmd *cmd, struct iscsi_conn *conn,
bool nopout_response)
@@ -1813,7 +2233,7 @@ isert_put_nopin(struct iscsi_cmd *cmd, struct iscsi_conn *conn,
&isert_cmd->tx_desc.iscsi_header,
nopout_response);
isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
- isert_init_send_wr(isert_cmd, send_wr);
+ isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);
pr_debug("Posting NOPIN Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n");
@@ -1831,7 +2251,7 @@ isert_put_logout_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
iscsit_build_logout_rsp(cmd, conn, (struct iscsi_logout_rsp *)
&isert_cmd->tx_desc.iscsi_header);
isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
- isert_init_send_wr(isert_cmd, send_wr);
+ isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);
pr_debug("Posting Logout Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n");
@@ -1849,7 +2269,7 @@ isert_put_tm_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
iscsit_build_task_mgt_rsp(cmd, conn, (struct iscsi_tm_rsp *)
&isert_cmd->tx_desc.iscsi_header);
isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
- isert_init_send_wr(isert_cmd, send_wr);
+ isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);
pr_debug("Posting Task Management Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n");
@@ -1881,7 +2301,7 @@ isert_put_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
tx_dsg->lkey = isert_conn->conn_mr->lkey;
isert_cmd->tx_desc.num_sge = 2;
- isert_init_send_wr(isert_cmd, send_wr);
+ isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);
pr_debug("Posting Reject IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n");
@@ -1900,7 +2320,7 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
int rc;
isert_create_send_desc(isert_conn, isert_cmd, &isert_cmd->tx_desc);
- rc = iscsit_build_text_rsp(cmd, conn, hdr);
+ rc = iscsit_build_text_rsp(cmd, conn, hdr, ISCSI_INFINIBAND);
if (rc < 0)
return rc;
@@ -1921,7 +2341,7 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
tx_dsg->lkey = isert_conn->conn_mr->lkey;
isert_cmd->tx_desc.num_sge = 2;
}
- isert_init_send_wr(isert_cmd, send_wr);
+ isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);
pr_debug("Posting Text Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n");
@@ -1981,56 +2401,39 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
struct se_cmd *se_cmd = &cmd->se_cmd;
struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
- struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+ struct isert_data_buf *data = &wr->data;
struct ib_send_wr *send_wr;
struct ib_sge *ib_sge;
- struct scatterlist *sg_start;
- u32 sg_off = 0, sg_nents;
- u32 offset = 0, data_len, data_left, rdma_write_max, va_offset = 0;
- int ret = 0, count, i, ib_sge_cnt;
+ u32 offset, data_len, data_left, rdma_write_max, va_offset = 0;
+ int ret = 0, i, ib_sge_cnt;
- if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) {
- data_left = se_cmd->data_length;
- iscsit_increment_maxcmdsn(cmd, conn->sess);
- cmd->stat_sn = conn->stat_sn++;
- } else {
- sg_off = cmd->write_data_done / PAGE_SIZE;
- data_left = se_cmd->data_length - cmd->write_data_done;
- offset = cmd->write_data_done;
- isert_cmd->tx_desc.isert_cmd = isert_cmd;
- }
+ isert_cmd->tx_desc.isert_cmd = isert_cmd;
- sg_start = &cmd->se_cmd.t_data_sg[sg_off];
- sg_nents = se_cmd->t_data_nents - sg_off;
+ offset = wr->iser_ib_op == ISER_IB_RDMA_READ ? cmd->write_data_done : 0;
+ ret = isert_map_data_buf(isert_conn, isert_cmd, se_cmd->t_data_sg,
+ se_cmd->t_data_nents, se_cmd->data_length,
+ offset, wr->iser_ib_op, &wr->data);
+ if (ret)
+ return ret;
- count = ib_dma_map_sg(ib_dev, sg_start, sg_nents,
- (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ?
- DMA_TO_DEVICE : DMA_FROM_DEVICE);
- if (unlikely(!count)) {
- pr_err("Cmd: %p unrable to map SGs\n", isert_cmd);
- return -EINVAL;
- }
- wr->sge = sg_start;
- wr->num_sge = sg_nents;
- wr->cur_rdma_length = data_left;
- pr_debug("Mapped cmd: %p count: %u sg: %p sg_nents: %u rdma_len %d\n",
- isert_cmd, count, sg_start, sg_nents, data_left);
+ data_left = data->len;
+ offset = data->offset;
- ib_sge = kzalloc(sizeof(struct ib_sge) * sg_nents, GFP_KERNEL);
+ ib_sge = kzalloc(sizeof(struct ib_sge) * data->nents, GFP_KERNEL);
if (!ib_sge) {
pr_warn("Unable to allocate ib_sge\n");
ret = -ENOMEM;
- goto unmap_sg;
+ goto unmap_cmd;
}
wr->ib_sge = ib_sge;
- wr->send_wr_num = DIV_ROUND_UP(sg_nents, isert_conn->max_sge);
+ wr->send_wr_num = DIV_ROUND_UP(data->nents, isert_conn->max_sge);
wr->send_wr = kzalloc(sizeof(struct ib_send_wr) * wr->send_wr_num,
GFP_KERNEL);
if (!wr->send_wr) {
pr_debug("Unable to allocate wr->send_wr\n");
ret = -ENOMEM;
- goto unmap_sg;
+ goto unmap_cmd;
}
wr->isert_cmd = isert_cmd;
@@ -2069,10 +2472,9 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
}
return 0;
-unmap_sg:
- ib_dma_unmap_sg(ib_dev, sg_start, sg_nents,
- (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ?
- DMA_TO_DEVICE : DMA_FROM_DEVICE);
+unmap_cmd:
+ isert_unmap_data_buf(isert_conn, data);
+
return ret;
}
@@ -2116,51 +2518,70 @@ isert_map_fr_pagelist(struct ib_device *ib_dev,
}
static int
-isert_fast_reg_mr(struct fast_reg_descriptor *fr_desc,
- struct isert_cmd *isert_cmd, struct isert_conn *isert_conn,
- struct ib_sge *ib_sge, u32 offset, unsigned int data_len)
+isert_fast_reg_mr(struct isert_conn *isert_conn,
+ struct fast_reg_descriptor *fr_desc,
+ struct isert_data_buf *mem,
+ enum isert_indicator ind,
+ struct ib_sge *sge)
{
- struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
- struct scatterlist *sg_start;
- u32 sg_off, page_off;
+ struct ib_mr *mr;
+ struct ib_fast_reg_page_list *frpl;
struct ib_send_wr fr_wr, inv_wr;
struct ib_send_wr *bad_wr, *wr = NULL;
+ int ret, pagelist_len;
+ u32 page_off;
u8 key;
- int ret, sg_nents, pagelist_len;
- sg_off = offset / PAGE_SIZE;
- sg_start = &cmd->se_cmd.t_data_sg[sg_off];
- sg_nents = min_t(unsigned int, cmd->se_cmd.t_data_nents - sg_off,
- ISCSI_ISER_SG_TABLESIZE);
- page_off = offset % PAGE_SIZE;
+ if (mem->dma_nents == 1) {
+ sge->lkey = isert_conn->conn_mr->lkey;
+ sge->addr = ib_sg_dma_address(ib_dev, &mem->sg[0]);
+ sge->length = ib_sg_dma_len(ib_dev, &mem->sg[0]);
+ pr_debug("%s:%d sge: addr: 0x%llx length: %u lkey: %x\n",
+ __func__, __LINE__, sge->addr, sge->length,
+ sge->lkey);
+ return 0;
+ }
+
+ if (ind == ISERT_DATA_KEY_VALID) {
+ /* Registering data buffer */
+ mr = fr_desc->data_mr;
+ frpl = fr_desc->data_frpl;
+ } else {
+ /* Registering protection buffer */
+ mr = fr_desc->pi_ctx->prot_mr;
+ frpl = fr_desc->pi_ctx->prot_frpl;
+ }
- pr_debug("Cmd: %p use fr_desc %p sg_nents %d sg_off %d offset %u\n",
- isert_cmd, fr_desc, sg_nents, sg_off, offset);
+ page_off = mem->offset % PAGE_SIZE;
- pagelist_len = isert_map_fr_pagelist(ib_dev, sg_start, sg_nents,
- &fr_desc->data_frpl->page_list[0]);
+ pr_debug("Use fr_desc %p sg_nents %d offset %u\n",
+ fr_desc, mem->nents, mem->offset);
- if (!fr_desc->valid) {
+ pagelist_len = isert_map_fr_pagelist(ib_dev, mem->sg, mem->nents,
+ &frpl->page_list[0]);
+
+ if (!(fr_desc->ind & ISERT_DATA_KEY_VALID)) {
memset(&inv_wr, 0, sizeof(inv_wr));
+ inv_wr.wr_id = ISER_FASTREG_LI_WRID;
inv_wr.opcode = IB_WR_LOCAL_INV;
- inv_wr.ex.invalidate_rkey = fr_desc->data_mr->rkey;
+ inv_wr.ex.invalidate_rkey = mr->rkey;
wr = &inv_wr;
/* Bump the key */
- key = (u8)(fr_desc->data_mr->rkey & 0x000000FF);
- ib_update_fast_reg_key(fr_desc->data_mr, ++key);
+ key = (u8)(mr->rkey & 0x000000FF);
+ ib_update_fast_reg_key(mr, ++key);
}
/* Prepare FASTREG WR */
memset(&fr_wr, 0, sizeof(fr_wr));
+ fr_wr.wr_id = ISER_FASTREG_LI_WRID;
fr_wr.opcode = IB_WR_FAST_REG_MR;
- fr_wr.wr.fast_reg.iova_start =
- fr_desc->data_frpl->page_list[0] + page_off;
- fr_wr.wr.fast_reg.page_list = fr_desc->data_frpl;
+ fr_wr.wr.fast_reg.iova_start = frpl->page_list[0] + page_off;
+ fr_wr.wr.fast_reg.page_list = frpl;
fr_wr.wr.fast_reg.page_list_len = pagelist_len;
fr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
- fr_wr.wr.fast_reg.length = data_len;
- fr_wr.wr.fast_reg.rkey = fr_desc->data_mr->rkey;
+ fr_wr.wr.fast_reg.length = mem->len;
+ fr_wr.wr.fast_reg.rkey = mr->rkey;
fr_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE;
if (!wr)
@@ -2173,82 +2594,242 @@ isert_fast_reg_mr(struct fast_reg_descriptor *fr_desc,
pr_err("fast registration failed, ret:%d\n", ret);
return ret;
}
- fr_desc->valid = false;
+ fr_desc->ind &= ~ind;
+
+ sge->lkey = mr->lkey;
+ sge->addr = frpl->page_list[0] + page_off;
+ sge->length = mem->len;
+
+ pr_debug("%s:%d sge: addr: 0x%llx length: %u lkey: %x\n",
+ __func__, __LINE__, sge->addr, sge->length,
+ sge->lkey);
+
+ return ret;
+}
+
+static inline enum ib_t10_dif_type
+se2ib_prot_type(enum target_prot_type prot_type)
+{
+ switch (prot_type) {
+ case TARGET_DIF_TYPE0_PROT:
+ return IB_T10DIF_NONE;
+ case TARGET_DIF_TYPE1_PROT:
+ return IB_T10DIF_TYPE1;
+ case TARGET_DIF_TYPE2_PROT:
+ return IB_T10DIF_TYPE2;
+ case TARGET_DIF_TYPE3_PROT:
+ return IB_T10DIF_TYPE3;
+ default:
+ return IB_T10DIF_NONE;
+ }
+}
+
+static int
+isert_set_sig_attrs(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs)
+{
+ enum ib_t10_dif_type ib_prot_type = se2ib_prot_type(se_cmd->prot_type);
+
+ sig_attrs->mem.sig_type = IB_SIG_TYPE_T10_DIF;
+ sig_attrs->wire.sig_type = IB_SIG_TYPE_T10_DIF;
+ sig_attrs->mem.sig.dif.pi_interval =
+ se_cmd->se_dev->dev_attrib.block_size;
+ sig_attrs->wire.sig.dif.pi_interval =
+ se_cmd->se_dev->dev_attrib.block_size;
+
+ switch (se_cmd->prot_op) {
+ case TARGET_PROT_DIN_INSERT:
+ case TARGET_PROT_DOUT_STRIP:
+ sig_attrs->mem.sig.dif.type = IB_T10DIF_NONE;
+ sig_attrs->wire.sig.dif.type = ib_prot_type;
+ sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
+ sig_attrs->wire.sig.dif.ref_tag = se_cmd->reftag_seed;
+ break;
+ case TARGET_PROT_DOUT_INSERT:
+ case TARGET_PROT_DIN_STRIP:
+ sig_attrs->mem.sig.dif.type = ib_prot_type;
+ sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC;
+ sig_attrs->mem.sig.dif.ref_tag = se_cmd->reftag_seed;
+ sig_attrs->wire.sig.dif.type = IB_T10DIF_NONE;
+ break;
+ case TARGET_PROT_DIN_PASS:
+ case TARGET_PROT_DOUT_PASS:
+ sig_attrs->mem.sig.dif.type = ib_prot_type;
+ sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC;
+ sig_attrs->mem.sig.dif.ref_tag = se_cmd->reftag_seed;
+ sig_attrs->wire.sig.dif.type = ib_prot_type;
+ sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
+ sig_attrs->wire.sig.dif.ref_tag = se_cmd->reftag_seed;
+ break;
+ default:
+ pr_err("Unsupported PI operation %d\n", se_cmd->prot_op);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static inline u8
+isert_set_prot_checks(u8 prot_checks)
+{
+ return (prot_checks & TARGET_DIF_CHECK_GUARD ? 0xc0 : 0) |
+ (prot_checks & TARGET_DIF_CHECK_REFTAG ? 0x30 : 0) |
+ (prot_checks & TARGET_DIF_CHECK_REFTAG ? 0x0f : 0);
+}
+
+static int
+isert_reg_sig_mr(struct isert_conn *isert_conn, struct se_cmd *se_cmd,
+ struct fast_reg_descriptor *fr_desc,
+ struct ib_sge *data_sge, struct ib_sge *prot_sge,
+ struct ib_sge *sig_sge)
+{
+ struct ib_send_wr sig_wr, inv_wr;
+ struct ib_send_wr *bad_wr, *wr = NULL;
+ struct pi_context *pi_ctx = fr_desc->pi_ctx;
+ struct ib_sig_attrs sig_attrs;
+ int ret;
+ u32 key;
+
+ memset(&sig_attrs, 0, sizeof(sig_attrs));
+ ret = isert_set_sig_attrs(se_cmd, &sig_attrs);
+ if (ret)
+ goto err;
+
+ sig_attrs.check_mask = isert_set_prot_checks(se_cmd->prot_checks);
+
+ if (!(fr_desc->ind & ISERT_SIG_KEY_VALID)) {
+ memset(&inv_wr, 0, sizeof(inv_wr));
+ inv_wr.opcode = IB_WR_LOCAL_INV;
+ inv_wr.wr_id = ISER_FASTREG_LI_WRID;
+ inv_wr.ex.invalidate_rkey = pi_ctx->sig_mr->rkey;
+ wr = &inv_wr;
+ /* Bump the key */
+ key = (u8)(pi_ctx->sig_mr->rkey & 0x000000FF);
+ ib_update_fast_reg_key(pi_ctx->sig_mr, ++key);
+ }
+
+ memset(&sig_wr, 0, sizeof(sig_wr));
+ sig_wr.opcode = IB_WR_REG_SIG_MR;
+ sig_wr.wr_id = ISER_FASTREG_LI_WRID;
+ sig_wr.sg_list = data_sge;
+ sig_wr.num_sge = 1;
+ sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE;
+ sig_wr.wr.sig_handover.sig_attrs = &sig_attrs;
+ sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr;
+ if (se_cmd->t_prot_sg)
+ sig_wr.wr.sig_handover.prot = prot_sge;
+
+ if (!wr)
+ wr = &sig_wr;
+ else
+ wr->next = &sig_wr;
- ib_sge->lkey = fr_desc->data_mr->lkey;
- ib_sge->addr = fr_desc->data_frpl->page_list[0] + page_off;
- ib_sge->length = data_len;
+ ret = ib_post_send(isert_conn->conn_qp, wr, &bad_wr);
+ if (ret) {
+ pr_err("fast registration failed, ret:%d\n", ret);
+ goto err;
+ }
+ fr_desc->ind &= ~ISERT_SIG_KEY_VALID;
- pr_debug("RDMA ib_sge: addr: 0x%16llx length: %u lkey: %08x\n",
- ib_sge->addr, ib_sge->length, ib_sge->lkey);
+ sig_sge->lkey = pi_ctx->sig_mr->lkey;
+ sig_sge->addr = 0;
+ sig_sge->length = se_cmd->data_length;
+ if (se_cmd->prot_op != TARGET_PROT_DIN_STRIP &&
+ se_cmd->prot_op != TARGET_PROT_DOUT_INSERT)
+ /*
+ * We have protection guards on the wire
+ * so we need to set a larget transfer
+ */
+ sig_sge->length += se_cmd->prot_length;
+ pr_debug("sig_sge: addr: 0x%llx length: %u lkey: %x\n",
+ sig_sge->addr, sig_sge->length,
+ sig_sge->lkey);
+err:
return ret;
}
static int
-isert_reg_rdma_frwr(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
- struct isert_rdma_wr *wr)
+isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
+ struct isert_rdma_wr *wr)
{
struct se_cmd *se_cmd = &cmd->se_cmd;
struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
- struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
- struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+ struct isert_conn *isert_conn = conn->context;
+ struct ib_sge data_sge;
struct ib_send_wr *send_wr;
- struct ib_sge *ib_sge;
- struct scatterlist *sg_start;
- struct fast_reg_descriptor *fr_desc;
- u32 sg_off = 0, sg_nents;
- u32 offset = 0, data_len, data_left, rdma_write_max;
- int ret = 0, count;
+ struct fast_reg_descriptor *fr_desc = NULL;
+ u32 offset;
+ int ret = 0;
unsigned long flags;
- if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) {
- data_left = se_cmd->data_length;
- iscsit_increment_maxcmdsn(cmd, conn->sess);
- cmd->stat_sn = conn->stat_sn++;
- } else {
- sg_off = cmd->write_data_done / PAGE_SIZE;
- data_left = se_cmd->data_length - cmd->write_data_done;
- offset = cmd->write_data_done;
- isert_cmd->tx_desc.isert_cmd = isert_cmd;
- }
+ isert_cmd->tx_desc.isert_cmd = isert_cmd;
- sg_start = &cmd->se_cmd.t_data_sg[sg_off];
- sg_nents = se_cmd->t_data_nents - sg_off;
+ offset = wr->iser_ib_op == ISER_IB_RDMA_READ ? cmd->write_data_done : 0;
+ ret = isert_map_data_buf(isert_conn, isert_cmd, se_cmd->t_data_sg,
+ se_cmd->t_data_nents, se_cmd->data_length,
+ offset, wr->iser_ib_op, &wr->data);
+ if (ret)
+ return ret;
- count = ib_dma_map_sg(ib_dev, sg_start, sg_nents,
- (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ?
- DMA_TO_DEVICE : DMA_FROM_DEVICE);
- if (unlikely(!count)) {
- pr_err("Cmd: %p unrable to map SGs\n", isert_cmd);
- return -EINVAL;
+ if (wr->data.dma_nents != 1 ||
+ se_cmd->prot_op != TARGET_PROT_NORMAL) {
+ spin_lock_irqsave(&isert_conn->conn_lock, flags);
+ fr_desc = list_first_entry(&isert_conn->conn_fr_pool,
+ struct fast_reg_descriptor, list);
+ list_del(&fr_desc->list);
+ spin_unlock_irqrestore(&isert_conn->conn_lock, flags);
+ wr->fr_desc = fr_desc;
}
- wr->sge = sg_start;
- wr->num_sge = sg_nents;
- pr_debug("Mapped cmd: %p count: %u sg: %p sg_nents: %u rdma_len %d\n",
- isert_cmd, count, sg_start, sg_nents, data_left);
- memset(&wr->s_ib_sge, 0, sizeof(*ib_sge));
- ib_sge = &wr->s_ib_sge;
- wr->ib_sge = ib_sge;
+ ret = isert_fast_reg_mr(isert_conn, fr_desc, &wr->data,
+ ISERT_DATA_KEY_VALID, &data_sge);
+ if (ret)
+ goto unmap_cmd;
+
+ if (se_cmd->prot_op != TARGET_PROT_NORMAL) {
+ struct ib_sge prot_sge, sig_sge;
+
+ if (se_cmd->t_prot_sg) {
+ ret = isert_map_data_buf(isert_conn, isert_cmd,
+ se_cmd->t_prot_sg,
+ se_cmd->t_prot_nents,
+ se_cmd->prot_length,
+ 0, wr->iser_ib_op, &wr->prot);
+ if (ret)
+ goto unmap_cmd;
+
+ ret = isert_fast_reg_mr(isert_conn, fr_desc, &wr->prot,
+ ISERT_PROT_KEY_VALID, &prot_sge);
+ if (ret)
+ goto unmap_prot_cmd;
+ }
+
+ ret = isert_reg_sig_mr(isert_conn, se_cmd, fr_desc,
+ &data_sge, &prot_sge, &sig_sge);
+ if (ret)
+ goto unmap_prot_cmd;
+
+ fr_desc->ind |= ISERT_PROTECTED;
+ memcpy(&wr->s_ib_sge, &sig_sge, sizeof(sig_sge));
+ } else
+ memcpy(&wr->s_ib_sge, &data_sge, sizeof(data_sge));
+ wr->ib_sge = &wr->s_ib_sge;
wr->send_wr_num = 1;
memset(&wr->s_send_wr, 0, sizeof(*send_wr));
wr->send_wr = &wr->s_send_wr;
-
wr->isert_cmd = isert_cmd;
- rdma_write_max = ISCSI_ISER_SG_TABLESIZE * PAGE_SIZE;
send_wr = &isert_cmd->rdma_wr.s_send_wr;
- send_wr->sg_list = ib_sge;
+ send_wr->sg_list = &wr->s_ib_sge;
send_wr->num_sge = 1;
send_wr->wr_id = (unsigned long)&isert_cmd->tx_desc;
if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) {
send_wr->opcode = IB_WR_RDMA_WRITE;
send_wr->wr.rdma.remote_addr = isert_cmd->read_va;
send_wr->wr.rdma.rkey = isert_cmd->read_stag;
- send_wr->send_flags = 0;
- send_wr->next = &isert_cmd->tx_desc.send_wr;
+ send_wr->send_flags = se_cmd->prot_op == TARGET_PROT_NORMAL ?
+ 0 : IB_SEND_SIGNALED;
} else {
send_wr->opcode = IB_WR_RDMA_READ;
send_wr->wr.rdma.remote_addr = isert_cmd->write_va;
@@ -2256,29 +2837,18 @@ isert_reg_rdma_frwr(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
send_wr->send_flags = IB_SEND_SIGNALED;
}
- data_len = min(data_left, rdma_write_max);
- wr->cur_rdma_length = data_len;
-
- spin_lock_irqsave(&isert_conn->conn_lock, flags);
- fr_desc = list_first_entry(&isert_conn->conn_frwr_pool,
- struct fast_reg_descriptor, list);
- list_del(&fr_desc->list);
- spin_unlock_irqrestore(&isert_conn->conn_lock, flags);
- wr->fr_desc = fr_desc;
-
- ret = isert_fast_reg_mr(fr_desc, isert_cmd, isert_conn,
- ib_sge, offset, data_len);
- if (ret) {
- list_add_tail(&fr_desc->list, &isert_conn->conn_frwr_pool);
- goto unmap_sg;
- }
-
return 0;
+unmap_prot_cmd:
+ if (se_cmd->t_prot_sg)
+ isert_unmap_data_buf(isert_conn, &wr->prot);
+unmap_cmd:
+ if (fr_desc) {
+ spin_lock_irqsave(&isert_conn->conn_lock, flags);
+ list_add_tail(&fr_desc->list, &isert_conn->conn_fr_pool);
+ spin_unlock_irqrestore(&isert_conn->conn_lock, flags);
+ }
+ isert_unmap_data_buf(isert_conn, &wr->data);
-unmap_sg:
- ib_dma_unmap_sg(ib_dev, sg_start, sg_nents,
- (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ?
- DMA_TO_DEVICE : DMA_FROM_DEVICE);
return ret;
}
@@ -2302,24 +2872,35 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
return rc;
}
- /*
- * Build isert_conn->tx_desc for iSCSI response PDU and attach
- */
- isert_create_send_desc(isert_conn, isert_cmd, &isert_cmd->tx_desc);
- iscsit_build_rsp_pdu(cmd, conn, false, (struct iscsi_scsi_rsp *)
- &isert_cmd->tx_desc.iscsi_header);
- isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
- isert_init_send_wr(isert_cmd, &isert_cmd->tx_desc.send_wr);
+ if (se_cmd->prot_op == TARGET_PROT_NORMAL) {
+ /*
+ * Build isert_conn->tx_desc for iSCSI response PDU and attach
+ */
+ isert_create_send_desc(isert_conn, isert_cmd,
+ &isert_cmd->tx_desc);
+ iscsit_build_rsp_pdu(cmd, conn, true, (struct iscsi_scsi_rsp *)
+ &isert_cmd->tx_desc.iscsi_header);
+ isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
+ isert_init_send_wr(isert_conn, isert_cmd,
+ &isert_cmd->tx_desc.send_wr, true);
+ isert_cmd->rdma_wr.s_send_wr.next = &isert_cmd->tx_desc.send_wr;
+ wr->send_wr_num += 1;
+ }
- atomic_inc(&isert_conn->post_send_buf_count);
+ atomic_add(wr->send_wr_num, &isert_conn->post_send_buf_count);
rc = ib_post_send(isert_conn->conn_qp, wr->send_wr, &wr_failed);
if (rc) {
pr_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n");
- atomic_dec(&isert_conn->post_send_buf_count);
+ atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count);
}
- pr_debug("Cmd: %p posted RDMA_WRITE + Response for iSER Data READ\n",
- isert_cmd);
+
+ if (se_cmd->prot_op == TARGET_PROT_NORMAL)
+ pr_debug("Cmd: %p posted RDMA_WRITE + Response for iSER Data "
+ "READ\n", isert_cmd);
+ else
+ pr_debug("Cmd: %p posted RDMA_WRITE for iSER Data READ\n",
+ isert_cmd);
return 1;
}
@@ -2344,12 +2925,12 @@ isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery)
return rc;
}
- atomic_inc(&isert_conn->post_send_buf_count);
+ atomic_add(wr->send_wr_num, &isert_conn->post_send_buf_count);
rc = ib_post_send(isert_conn->conn_qp, wr->send_wr, &wr_failed);
if (rc) {
pr_warn("ib_post_send() failed for IB_WR_RDMA_READ\n");
- atomic_dec(&isert_conn->post_send_buf_count);
+ atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count);
}
pr_debug("Cmd: %p posted RDMA_READ memory for ISER Data WRITE\n",
isert_cmd);
@@ -2430,7 +3011,7 @@ isert_setup_np(struct iscsi_np *np,
pr_err("Unable to allocate struct isert_np\n");
return -ENOMEM;
}
- init_waitqueue_head(&isert_np->np_accept_wq);
+ sema_init(&isert_np->np_sem, 0);
mutex_init(&isert_np->np_accept_mutex);
INIT_LIST_HEAD(&isert_np->np_accept_list);
init_completion(&isert_np->np_login_comp);
@@ -2479,18 +3060,6 @@ out:
}
static int
-isert_check_accept_queue(struct isert_np *isert_np)
-{
- int empty;
-
- mutex_lock(&isert_np->np_accept_mutex);
- empty = list_empty(&isert_np->np_accept_list);
- mutex_unlock(&isert_np->np_accept_mutex);
-
- return empty;
-}
-
-static int
isert_rdma_accept(struct isert_conn *isert_conn)
{
struct rdma_cm_id *cm_id = isert_conn->conn_cm_id;
@@ -2582,16 +3151,19 @@ isert_accept_np(struct iscsi_np *np, struct iscsi_conn *conn)
int max_accept = 0, ret;
accept_wait:
- ret = wait_event_interruptible(isert_np->np_accept_wq,
- !isert_check_accept_queue(isert_np) ||
- np->np_thread_state == ISCSI_NP_THREAD_RESET);
+ ret = down_interruptible(&isert_np->np_sem);
if (max_accept > 5)
return -ENODEV;
spin_lock_bh(&np->np_thread_lock);
- if (np->np_thread_state == ISCSI_NP_THREAD_RESET) {
+ if (np->np_thread_state >= ISCSI_NP_THREAD_RESET) {
spin_unlock_bh(&np->np_thread_lock);
- pr_err("ISCSI_NP_THREAD_RESET for isert_accept_np\n");
+ pr_debug("np_thread_state %d for isert_accept_np\n",
+ np->np_thread_state);
+ /**
+ * No point in stalling here when np_thread
+ * is in state RESET/SHUTDOWN/EXIT - bail
+ **/
return -ENODEV;
}
spin_unlock_bh(&np->np_thread_lock);
@@ -2636,63 +3208,37 @@ isert_free_np(struct iscsi_np *np)
kfree(isert_np);
}
-static int isert_check_state(struct isert_conn *isert_conn, int state)
-{
- int ret;
-
- mutex_lock(&isert_conn->conn_mutex);
- ret = (isert_conn->state == state);
- mutex_unlock(&isert_conn->conn_mutex);
-
- return ret;
-}
-
-static void isert_free_conn(struct iscsi_conn *conn)
+static void isert_wait_conn(struct iscsi_conn *conn)
{
struct isert_conn *isert_conn = conn->context;
- pr_debug("isert_free_conn: Starting \n");
- /*
- * Decrement post_send_buf_count for special case when called
- * from isert_do_control_comp() -> iscsit_logout_post_handler()
- */
- mutex_lock(&isert_conn->conn_mutex);
- if (isert_conn->logout_posted)
- atomic_dec(&isert_conn->post_send_buf_count);
+ pr_debug("isert_wait_conn: Starting \n");
- if (isert_conn->conn_cm_id && isert_conn->state != ISER_CONN_DOWN) {
- pr_debug("Calling rdma_disconnect from isert_free_conn\n");
+ mutex_lock(&isert_conn->conn_mutex);
+ if (isert_conn->conn_cm_id) {
+ pr_debug("Calling rdma_disconnect from isert_wait_conn\n");
rdma_disconnect(isert_conn->conn_cm_id);
}
/*
* Only wait for conn_wait_comp_err if the isert_conn made it
* into full feature phase..
*/
- if (isert_conn->state == ISER_CONN_UP) {
- pr_debug("isert_free_conn: Before wait_event comp_err %d\n",
- isert_conn->state);
- mutex_unlock(&isert_conn->conn_mutex);
-
- wait_event(isert_conn->conn_wait_comp_err,
- (isert_check_state(isert_conn, ISER_CONN_TERMINATING)));
-
- wait_event(isert_conn->conn_wait,
- (isert_check_state(isert_conn, ISER_CONN_DOWN)));
-
- isert_put_conn(isert_conn);
- return;
- }
if (isert_conn->state == ISER_CONN_INIT) {
mutex_unlock(&isert_conn->conn_mutex);
- isert_put_conn(isert_conn);
return;
}
- pr_debug("isert_free_conn: wait_event conn_wait %d\n",
- isert_conn->state);
+ if (isert_conn->state == ISER_CONN_UP)
+ isert_conn->state = ISER_CONN_TERMINATING;
mutex_unlock(&isert_conn->conn_mutex);
- wait_event(isert_conn->conn_wait,
- (isert_check_state(isert_conn, ISER_CONN_DOWN)));
+ wait_for_completion(&isert_conn->conn_wait_comp_err);
+
+ wait_for_completion(&isert_conn->conn_wait);
+}
+
+static void isert_free_conn(struct iscsi_conn *conn)
+{
+ struct isert_conn *isert_conn = conn->context;
isert_put_conn(isert_conn);
}
@@ -2705,6 +3251,7 @@ static struct iscsit_transport iser_target_transport = {
.iscsit_setup_np = isert_setup_np,
.iscsit_accept_np = isert_accept_np,
.iscsit_free_np = isert_free_np,
+ .iscsit_wait_conn = isert_wait_conn,
.iscsit_free_conn = isert_free_conn,
.iscsit_get_login_rx = isert_get_login_rx,
.iscsit_put_login_tx = isert_put_login_tx,
@@ -2713,6 +3260,8 @@ static struct iscsit_transport iser_target_transport = {
.iscsit_get_dataout = isert_get_dataout,
.iscsit_queue_data_in = isert_put_datain,
.iscsit_queue_status = isert_put_response,
+ .iscsit_aborted_task = isert_aborted_task,
+ .iscsit_get_sup_prot_ops = isert_get_sup_prot_ops,
};
static int __init isert_init(void)
@@ -2743,6 +3292,7 @@ destroy_rx_wq:
static void __exit isert_exit(void)
{
+ flush_scheduled_work();
destroy_workqueue(isert_comp_wq);
destroy_workqueue(isert_rx_wq);
iscsit_unregister_transport(&iser_target_transport);
diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
index 631f2090f0b..04f51f7bf61 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h
@@ -6,6 +6,7 @@
#define ISERT_RDMA_LISTEN_BACKLOG 10
#define ISCSI_ISER_SG_TABLESIZE 256
+#define ISER_FASTREG_LI_WRID 0xffffffffffffffffULL
enum isert_desc_type {
ISCSI_TX_CONTROL,
@@ -43,14 +44,41 @@ struct iser_tx_desc {
struct ib_sge tx_sg[2];
int num_sge;
struct isert_cmd *isert_cmd;
+ struct llist_node *comp_llnode_batch;
+ struct llist_node comp_llnode;
+ bool llnode_active;
struct ib_send_wr send_wr;
} __packed;
+enum isert_indicator {
+ ISERT_PROTECTED = 1 << 0,
+ ISERT_DATA_KEY_VALID = 1 << 1,
+ ISERT_PROT_KEY_VALID = 1 << 2,
+ ISERT_SIG_KEY_VALID = 1 << 3,
+};
+
+struct pi_context {
+ struct ib_mr *prot_mr;
+ struct ib_fast_reg_page_list *prot_frpl;
+ struct ib_mr *sig_mr;
+};
+
struct fast_reg_descriptor {
- struct list_head list;
- struct ib_mr *data_mr;
- struct ib_fast_reg_page_list *data_frpl;
- bool valid;
+ struct list_head list;
+ struct ib_mr *data_mr;
+ struct ib_fast_reg_page_list *data_frpl;
+ u8 ind;
+ struct pi_context *pi_ctx;
+};
+
+struct isert_data_buf {
+ struct scatterlist *sg;
+ int nents;
+ u32 sg_off;
+ u32 len; /* cur_rdma_length */
+ u32 offset;
+ unsigned int dma_nents;
+ enum dma_data_direction dma_dir;
};
struct isert_rdma_wr {
@@ -59,12 +87,11 @@ struct isert_rdma_wr {
enum iser_ib_op_code iser_ib_op;
struct ib_sge *ib_sge;
struct ib_sge s_ib_sge;
- int num_sge;
- struct scatterlist *sge;
int send_wr_num;
struct ib_send_wr *send_wr;
struct ib_send_wr s_send_wr;
- u32 cur_rdma_length;
+ struct isert_data_buf data;
+ struct isert_data_buf prot;
struct fast_reg_descriptor *fr_desc;
};
@@ -89,7 +116,6 @@ struct isert_device;
struct isert_conn {
enum iser_conn_state state;
- bool logout_posted;
int post_recv_buf_count;
atomic_t post_send_buf_count;
u32 responder_resources;
@@ -114,13 +140,17 @@ struct isert_conn {
struct isert_device *conn_device;
struct work_struct conn_logout_work;
struct mutex conn_mutex;
- wait_queue_head_t conn_wait;
- wait_queue_head_t conn_wait_comp_err;
+ struct completion conn_wait;
+ struct completion conn_wait_comp_err;
struct kref conn_kref;
- struct list_head conn_frwr_pool;
- int conn_frwr_pool_size;
- /* lock to protect frwr_pool */
+ struct list_head conn_fr_pool;
+ int conn_fr_pool_size;
+ /* lock to protect fastreg pool */
spinlock_t conn_lock;
+#define ISERT_COMP_BATCH_COUNT 8
+ int conn_comp_batch;
+ struct llist_head conn_comp_llist;
+ bool disconnect;
};
#define ISERT_MAX_CQ 64
@@ -133,13 +163,12 @@ struct isert_cq_desc {
};
struct isert_device {
- int use_frwr;
+ int use_fastreg;
+ bool pi_capable;
int cqs_used;
int refcount;
int cq_active_qps[ISERT_MAX_CQ];
struct ib_device *ib_device;
- struct ib_pd *dev_pd;
- struct ib_mr *dev_mr;
struct ib_cq *dev_rx_cq[ISERT_MAX_CQ];
struct ib_cq *dev_tx_cq[ISERT_MAX_CQ];
struct isert_cq_desc *cq_desc;
@@ -153,7 +182,7 @@ struct isert_device {
};
struct isert_np {
- wait_queue_head_t np_accept_wq;
+ struct semaphore np_sem;
struct rdma_cm_id *np_cm_id;
struct mutex np_accept_mutex;
struct list_head np_accept_list;
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index f93baf8254c..e3c2c5b4297 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -30,7 +30,7 @@
* SOFTWARE.
*/
-#define pr_fmt(fmt) PFX fmt
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/init.h>
@@ -46,6 +46,7 @@
#include <scsi/scsi.h>
#include <scsi/scsi_device.h>
#include <scsi/scsi_dbg.h>
+#include <scsi/scsi_tcq.h>
#include <scsi/srp.h>
#include <scsi/scsi_transport_srp.h>
@@ -65,6 +66,8 @@ static unsigned int srp_sg_tablesize;
static unsigned int cmd_sg_entries;
static unsigned int indirect_sg_entries;
static bool allow_ext_sg;
+static bool prefer_fr;
+static bool register_always;
static int topspin_workarounds = 1;
module_param(srp_sg_tablesize, uint, 0444);
@@ -86,6 +89,40 @@ module_param(topspin_workarounds, int, 0444);
MODULE_PARM_DESC(topspin_workarounds,
"Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
+module_param(prefer_fr, bool, 0444);
+MODULE_PARM_DESC(prefer_fr,
+"Whether to use fast registration if both FMR and fast registration are supported");
+
+module_param(register_always, bool, 0444);
+MODULE_PARM_DESC(register_always,
+ "Use memory registration even for contiguous memory regions");
+
+static struct kernel_param_ops srp_tmo_ops;
+
+static int srp_reconnect_delay = 10;
+module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
+ S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
+
+static int srp_fast_io_fail_tmo = 15;
+module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
+ S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(fast_io_fail_tmo,
+ "Number of seconds between the observation of a transport"
+ " layer error and failing all I/O. \"off\" means that this"
+ " functionality is disabled.");
+
+static int srp_dev_loss_tmo = 600;
+module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
+ S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(dev_loss_tmo,
+ "Maximum number of seconds that the SRP transport should"
+ " insulate transport layer errors. After this time has been"
+ " exceeded the SCSI host is removed. Should be"
+ " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
+ " if fast_io_fail_tmo has not been set. \"off\" means that"
+ " this functionality is disabled.");
+
static void srp_add_one(struct ib_device *device);
static void srp_remove_one(struct ib_device *device);
static void srp_recv_completion(struct ib_cq *cq, void *target_ptr);
@@ -102,6 +139,48 @@ static struct ib_client srp_client = {
static struct ib_sa_client srp_sa_client;
+static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
+{
+ int tmo = *(int *)kp->arg;
+
+ if (tmo >= 0)
+ return sprintf(buffer, "%d", tmo);
+ else
+ return sprintf(buffer, "off");
+}
+
+static int srp_tmo_set(const char *val, const struct kernel_param *kp)
+{
+ int tmo, res;
+
+ if (strncmp(val, "off", 3) != 0) {
+ res = kstrtoint(val, 0, &tmo);
+ if (res)
+ goto out;
+ } else {
+ tmo = -1;
+ }
+ if (kp->arg == &srp_reconnect_delay)
+ res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
+ srp_dev_loss_tmo);
+ else if (kp->arg == &srp_fast_io_fail_tmo)
+ res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
+ else
+ res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
+ tmo);
+ if (res)
+ goto out;
+ *(int *)kp->arg = tmo;
+
+out:
+ return res;
+}
+
+static struct kernel_param_ops srp_tmo_ops = {
+ .get = srp_tmo_get,
+ .set = srp_tmo_set,
+};
+
static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
{
return (struct srp_target_port *) host->hostdata;
@@ -219,28 +298,174 @@ static int srp_new_cm_id(struct srp_target_port *target)
return 0;
}
+static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
+{
+ struct srp_device *dev = target->srp_host->srp_dev;
+ struct ib_fmr_pool_param fmr_param;
+
+ memset(&fmr_param, 0, sizeof(fmr_param));
+ fmr_param.pool_size = target->scsi_host->can_queue;
+ fmr_param.dirty_watermark = fmr_param.pool_size / 4;
+ fmr_param.cache = 1;
+ fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
+ fmr_param.page_shift = ilog2(dev->mr_page_size);
+ fmr_param.access = (IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_READ);
+
+ return ib_create_fmr_pool(dev->pd, &fmr_param);
+}
+
+/**
+ * srp_destroy_fr_pool() - free the resources owned by a pool
+ * @pool: Fast registration pool to be destroyed.
+ */
+static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
+{
+ int i;
+ struct srp_fr_desc *d;
+
+ if (!pool)
+ return;
+
+ for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
+ if (d->frpl)
+ ib_free_fast_reg_page_list(d->frpl);
+ if (d->mr)
+ ib_dereg_mr(d->mr);
+ }
+ kfree(pool);
+}
+
+/**
+ * srp_create_fr_pool() - allocate and initialize a pool for fast registration
+ * @device: IB device to allocate fast registration descriptors for.
+ * @pd: Protection domain associated with the FR descriptors.
+ * @pool_size: Number of descriptors to allocate.
+ * @max_page_list_len: Maximum fast registration work request page list length.
+ */
+static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
+ struct ib_pd *pd, int pool_size,
+ int max_page_list_len)
+{
+ struct srp_fr_pool *pool;
+ struct srp_fr_desc *d;
+ struct ib_mr *mr;
+ struct ib_fast_reg_page_list *frpl;
+ int i, ret = -EINVAL;
+
+ if (pool_size <= 0)
+ goto err;
+ ret = -ENOMEM;
+ pool = kzalloc(sizeof(struct srp_fr_pool) +
+ pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
+ if (!pool)
+ goto err;
+ pool->size = pool_size;
+ pool->max_page_list_len = max_page_list_len;
+ spin_lock_init(&pool->lock);
+ INIT_LIST_HEAD(&pool->free_list);
+
+ for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
+ mr = ib_alloc_fast_reg_mr(pd, max_page_list_len);
+ if (IS_ERR(mr)) {
+ ret = PTR_ERR(mr);
+ goto destroy_pool;
+ }
+ d->mr = mr;
+ frpl = ib_alloc_fast_reg_page_list(device, max_page_list_len);
+ if (IS_ERR(frpl)) {
+ ret = PTR_ERR(frpl);
+ goto destroy_pool;
+ }
+ d->frpl = frpl;
+ list_add_tail(&d->entry, &pool->free_list);
+ }
+
+out:
+ return pool;
+
+destroy_pool:
+ srp_destroy_fr_pool(pool);
+
+err:
+ pool = ERR_PTR(ret);
+ goto out;
+}
+
+/**
+ * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
+ * @pool: Pool to obtain descriptor from.
+ */
+static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
+{
+ struct srp_fr_desc *d = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pool->lock, flags);
+ if (!list_empty(&pool->free_list)) {
+ d = list_first_entry(&pool->free_list, typeof(*d), entry);
+ list_del(&d->entry);
+ }
+ spin_unlock_irqrestore(&pool->lock, flags);
+
+ return d;
+}
+
+/**
+ * srp_fr_pool_put() - put an FR descriptor back in the free list
+ * @pool: Pool the descriptor was allocated from.
+ * @desc: Pointer to an array of fast registration descriptor pointers.
+ * @n: Number of descriptors to put back.
+ *
+ * Note: The caller must already have queued an invalidation request for
+ * desc->mr->rkey before calling this function.
+ */
+static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
+ int n)
+{
+ unsigned long flags;
+ int i;
+
+ spin_lock_irqsave(&pool->lock, flags);
+ for (i = 0; i < n; i++)
+ list_add(&desc[i]->entry, &pool->free_list);
+ spin_unlock_irqrestore(&pool->lock, flags);
+}
+
+static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
+{
+ struct srp_device *dev = target->srp_host->srp_dev;
+
+ return srp_create_fr_pool(dev->dev, dev->pd,
+ target->scsi_host->can_queue,
+ dev->max_pages_per_mr);
+}
+
static int srp_create_target_ib(struct srp_target_port *target)
{
+ struct srp_device *dev = target->srp_host->srp_dev;
struct ib_qp_init_attr *init_attr;
struct ib_cq *recv_cq, *send_cq;
struct ib_qp *qp;
+ struct ib_fmr_pool *fmr_pool = NULL;
+ struct srp_fr_pool *fr_pool = NULL;
+ const int m = 1 + dev->use_fast_reg;
int ret;
init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
if (!init_attr)
return -ENOMEM;
- recv_cq = ib_create_cq(target->srp_host->srp_dev->dev,
- srp_recv_completion, NULL, target, SRP_RQ_SIZE,
- target->comp_vector);
+ recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, target,
+ target->queue_size, target->comp_vector);
if (IS_ERR(recv_cq)) {
ret = PTR_ERR(recv_cq);
goto err;
}
- send_cq = ib_create_cq(target->srp_host->srp_dev->dev,
- srp_send_completion, NULL, target, SRP_SQ_SIZE,
- target->comp_vector);
+ send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, target,
+ m * target->queue_size, target->comp_vector);
if (IS_ERR(send_cq)) {
ret = PTR_ERR(send_cq);
goto err_recv_cq;
@@ -249,16 +474,16 @@ static int srp_create_target_ib(struct srp_target_port *target)
ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);
init_attr->event_handler = srp_qp_event;
- init_attr->cap.max_send_wr = SRP_SQ_SIZE;
- init_attr->cap.max_recv_wr = SRP_RQ_SIZE;
+ init_attr->cap.max_send_wr = m * target->queue_size;
+ init_attr->cap.max_recv_wr = target->queue_size;
init_attr->cap.max_recv_sge = 1;
init_attr->cap.max_send_sge = 1;
- init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
+ init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
init_attr->qp_type = IB_QPT_RC;
init_attr->send_cq = send_cq;
init_attr->recv_cq = recv_cq;
- qp = ib_create_qp(target->srp_host->srp_dev->pd, init_attr);
+ qp = ib_create_qp(dev->pd, init_attr);
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
goto err_send_cq;
@@ -268,6 +493,30 @@ static int srp_create_target_ib(struct srp_target_port *target)
if (ret)
goto err_qp;
+ if (dev->use_fast_reg && dev->has_fr) {
+ fr_pool = srp_alloc_fr_pool(target);
+ if (IS_ERR(fr_pool)) {
+ ret = PTR_ERR(fr_pool);
+ shost_printk(KERN_WARNING, target->scsi_host, PFX
+ "FR pool allocation failed (%d)\n", ret);
+ goto err_qp;
+ }
+ if (target->fr_pool)
+ srp_destroy_fr_pool(target->fr_pool);
+ target->fr_pool = fr_pool;
+ } else if (!dev->use_fast_reg && dev->has_fmr) {
+ fmr_pool = srp_alloc_fmr_pool(target);
+ if (IS_ERR(fmr_pool)) {
+ ret = PTR_ERR(fmr_pool);
+ shost_printk(KERN_WARNING, target->scsi_host, PFX
+ "FMR pool allocation failed (%d)\n", ret);
+ goto err_qp;
+ }
+ if (target->fmr_pool)
+ ib_destroy_fmr_pool(target->fmr_pool);
+ target->fmr_pool = fmr_pool;
+ }
+
if (target->qp)
ib_destroy_qp(target->qp);
if (target->recv_cq)
@@ -296,10 +545,22 @@ err:
return ret;
}
+/*
+ * Note: this function may be called without srp_alloc_iu_bufs() having been
+ * invoked. Hence the target->[rt]x_ring checks.
+ */
static void srp_free_target_ib(struct srp_target_port *target)
{
+ struct srp_device *dev = target->srp_host->srp_dev;
int i;
+ if (dev->use_fast_reg) {
+ if (target->fr_pool)
+ srp_destroy_fr_pool(target->fr_pool);
+ } else {
+ if (target->fmr_pool)
+ ib_destroy_fmr_pool(target->fmr_pool);
+ }
ib_destroy_qp(target->qp);
ib_destroy_cq(target->send_cq);
ib_destroy_cq(target->recv_cq);
@@ -307,10 +568,18 @@ static void srp_free_target_ib(struct srp_target_port *target)
target->qp = NULL;
target->send_cq = target->recv_cq = NULL;
- for (i = 0; i < SRP_RQ_SIZE; ++i)
- srp_free_iu(target->srp_host, target->rx_ring[i]);
- for (i = 0; i < SRP_SQ_SIZE; ++i)
- srp_free_iu(target->srp_host, target->tx_ring[i]);
+ if (target->rx_ring) {
+ for (i = 0; i < target->queue_size; ++i)
+ srp_free_iu(target->srp_host, target->rx_ring[i]);
+ kfree(target->rx_ring);
+ target->rx_ring = NULL;
+ }
+ if (target->tx_ring) {
+ for (i = 0; i < target->queue_size; ++i)
+ srp_free_iu(target->srp_host, target->tx_ring[i]);
+ kfree(target->tx_ring);
+ target->tx_ring = NULL;
+ }
}
static void srp_path_rec_completion(int status,
@@ -330,6 +599,8 @@ static void srp_path_rec_completion(int status,
static int srp_lookup_path(struct srp_target_port *target)
{
+ int ret;
+
target->path.numb_path = 1;
init_completion(&target->done);
@@ -350,7 +621,9 @@ static int srp_lookup_path(struct srp_target_port *target)
if (target->path_query_id < 0)
return target->path_query_id;
- wait_for_completion(&target->done);
+ ret = wait_for_completion_interruptible(&target->done);
+ if (ret < 0)
+ return ret;
if (target->status < 0)
shost_printk(KERN_WARNING, target->scsi_host,
@@ -390,7 +663,7 @@ static int srp_send_req(struct srp_target_port *target)
req->param.responder_resources = 4;
req->param.remote_cm_response_timeout = 20;
req->param.local_cm_response_timeout = 20;
- req->param.retry_count = 7;
+ req->param.retry_count = target->tl_retry_count;
req->param.rnr_retry_count = 7;
req->param.max_cm_retries = 15;
@@ -492,12 +765,20 @@ static void srp_disconnect_target(struct srp_target_port *target)
static void srp_free_req_data(struct srp_target_port *target)
{
- struct ib_device *ibdev = target->srp_host->srp_dev->dev;
+ struct srp_device *dev = target->srp_host->srp_dev;
+ struct ib_device *ibdev = dev->dev;
struct srp_request *req;
int i;
- for (i = 0, req = target->req_ring; i < SRP_CMD_SQ_SIZE; ++i, ++req) {
- kfree(req->fmr_list);
+ if (!target->req_ring)
+ return;
+
+ for (i = 0; i < target->req_ring_size; ++i) {
+ req = &target->req_ring[i];
+ if (dev->use_fast_reg)
+ kfree(req->fr_list);
+ else
+ kfree(req->fmr_list);
kfree(req->map_page);
if (req->indirect_dma_addr) {
ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
@@ -506,6 +787,59 @@ static void srp_free_req_data(struct srp_target_port *target)
}
kfree(req->indirect_desc);
}
+
+ kfree(target->req_ring);
+ target->req_ring = NULL;
+}
+
+static int srp_alloc_req_data(struct srp_target_port *target)
+{
+ struct srp_device *srp_dev = target->srp_host->srp_dev;
+ struct ib_device *ibdev = srp_dev->dev;
+ struct srp_request *req;
+ void *mr_list;
+ dma_addr_t dma_addr;
+ int i, ret = -ENOMEM;
+
+ INIT_LIST_HEAD(&target->free_reqs);
+
+ target->req_ring = kzalloc(target->req_ring_size *
+ sizeof(*target->req_ring), GFP_KERNEL);
+ if (!target->req_ring)
+ goto out;
+
+ for (i = 0; i < target->req_ring_size; ++i) {
+ req = &target->req_ring[i];
+ mr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *),
+ GFP_KERNEL);
+ if (!mr_list)
+ goto out;
+ if (srp_dev->use_fast_reg)
+ req->fr_list = mr_list;
+ else
+ req->fmr_list = mr_list;
+ req->map_page = kmalloc(srp_dev->max_pages_per_mr *
+ sizeof(void *), GFP_KERNEL);
+ if (!req->map_page)
+ goto out;
+ req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
+ if (!req->indirect_desc)
+ goto out;
+
+ dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
+ target->indirect_size,
+ DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(ibdev, dma_addr))
+ goto out;
+
+ req->indirect_dma_addr = dma_addr;
+ req->index = i;
+ list_add_tail(&req->list, &target->free_reqs);
+ }
+ ret = 0;
+
+out:
+ return ret;
}
/**
@@ -528,12 +862,21 @@ static void srp_remove_target(struct srp_target_port *target)
WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
srp_del_scsi_host_attr(target->scsi_host);
+ srp_rport_get(target->rport);
srp_remove_host(target->scsi_host);
scsi_remove_host(target->scsi_host);
+ srp_stop_rport_timers(target->rport);
srp_disconnect_target(target);
ib_destroy_cm_id(target->cm_id);
srp_free_target_ib(target);
+ cancel_work_sync(&target->tl_err_work);
+ srp_rport_put(target->rport);
srp_free_req_data(target);
+
+ spin_lock(&target->srp_host->target_lock);
+ list_del(&target->list);
+ spin_unlock(&target->srp_host->target_lock);
+
scsi_host_put(target->scsi_host);
}
@@ -545,10 +888,6 @@ static void srp_remove_work(struct work_struct *work)
WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
srp_remove_target(target);
-
- spin_lock(&target->srp_host->target_lock);
- list_del(&target->list);
- spin_unlock(&target->srp_host->target_lock);
}
static void srp_rport_delete(struct srp_rport *rport)
@@ -576,7 +915,9 @@ static int srp_connect_target(struct srp_target_port *target)
ret = srp_send_req(target);
if (ret)
return ret;
- wait_for_completion(&target->done);
+ ret = wait_for_completion_interruptible(&target->done);
+ if (ret < 0)
+ return ret;
/*
* The CM event handling code will set status to
@@ -619,21 +960,56 @@ static int srp_connect_target(struct srp_target_port *target)
}
}
+static int srp_inv_rkey(struct srp_target_port *target, u32 rkey)
+{
+ struct ib_send_wr *bad_wr;
+ struct ib_send_wr wr = {
+ .opcode = IB_WR_LOCAL_INV,
+ .wr_id = LOCAL_INV_WR_ID_MASK,
+ .next = NULL,
+ .num_sge = 0,
+ .send_flags = 0,
+ .ex.invalidate_rkey = rkey,
+ };
+
+ return ib_post_send(target->qp, &wr, &bad_wr);
+}
+
static void srp_unmap_data(struct scsi_cmnd *scmnd,
struct srp_target_port *target,
struct srp_request *req)
{
- struct ib_device *ibdev = target->srp_host->srp_dev->dev;
- struct ib_pool_fmr **pfmr;
+ struct srp_device *dev = target->srp_host->srp_dev;
+ struct ib_device *ibdev = dev->dev;
+ int i, res;
if (!scsi_sglist(scmnd) ||
(scmnd->sc_data_direction != DMA_TO_DEVICE &&
scmnd->sc_data_direction != DMA_FROM_DEVICE))
return;
- pfmr = req->fmr_list;
- while (req->nfmr--)
- ib_fmr_pool_unmap(*pfmr++);
+ if (dev->use_fast_reg) {
+ struct srp_fr_desc **pfr;
+
+ for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
+ res = srp_inv_rkey(target, (*pfr)->mr->rkey);
+ if (res < 0) {
+ shost_printk(KERN_ERR, target->scsi_host, PFX
+ "Queueing INV WR for rkey %#x failed (%d)\n",
+ (*pfr)->mr->rkey, res);
+ queue_work(system_long_wq,
+ &target->tl_err_work);
+ }
+ }
+ if (req->nmdesc)
+ srp_fr_pool_put(target->fr_pool, req->fr_list,
+ req->nmdesc);
+ } else {
+ struct ib_pool_fmr **pfmr;
+
+ for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
+ ib_fmr_pool_unmap(*pfmr);
+ }
ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
scmnd->sc_data_direction);
@@ -643,6 +1019,7 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,
* srp_claim_req - Take ownership of the scmnd associated with a request.
* @target: SRP target port.
* @req: SRP request.
+ * @sdev: If not NULL, only take ownership for this SCSI device.
* @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
* ownership of @req->scmnd if it equals @scmnd.
*
@@ -651,16 +1028,17 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,
*/
static struct scsi_cmnd *srp_claim_req(struct srp_target_port *target,
struct srp_request *req,
+ struct scsi_device *sdev,
struct scsi_cmnd *scmnd)
{
unsigned long flags;
spin_lock_irqsave(&target->lock, flags);
- if (!scmnd) {
+ if (req->scmnd &&
+ (!sdev || req->scmnd->device == sdev) &&
+ (!scmnd || req->scmnd == scmnd)) {
scmnd = req->scmnd;
req->scmnd = NULL;
- } else if (req->scmnd == scmnd) {
- req->scmnd = NULL;
} else {
scmnd = NULL;
}
@@ -671,6 +1049,10 @@ static struct scsi_cmnd *srp_claim_req(struct srp_target_port *target,
/**
* srp_free_req() - Unmap data and add request to the free request list.
+ * @target: SRP target port.
+ * @req: Request to be freed.
+ * @scmnd: SCSI command associated with @req.
+ * @req_lim_delta: Amount to be added to @target->req_lim.
*/
static void srp_free_req(struct srp_target_port *target,
struct srp_request *req, struct scsi_cmnd *scmnd,
@@ -686,23 +1068,52 @@ static void srp_free_req(struct srp_target_port *target,
spin_unlock_irqrestore(&target->lock, flags);
}
-static void srp_reset_req(struct srp_target_port *target, struct srp_request *req)
+static void srp_finish_req(struct srp_target_port *target,
+ struct srp_request *req, struct scsi_device *sdev,
+ int result)
{
- struct scsi_cmnd *scmnd = srp_claim_req(target, req, NULL);
+ struct scsi_cmnd *scmnd = srp_claim_req(target, req, sdev, NULL);
if (scmnd) {
srp_free_req(target, req, scmnd, 0);
- scmnd->result = DID_RESET << 16;
+ scmnd->result = result;
scmnd->scsi_done(scmnd);
}
}
-static int srp_reconnect_target(struct srp_target_port *target)
+static void srp_terminate_io(struct srp_rport *rport)
{
+ struct srp_target_port *target = rport->lld_data;
struct Scsi_Host *shost = target->scsi_host;
- int i, ret;
+ struct scsi_device *sdev;
+ int i;
- scsi_target_block(&shost->shost_gendev);
+ /*
+ * Invoking srp_terminate_io() while srp_queuecommand() is running
+ * is not safe. Hence the warning statement below.
+ */
+ shost_for_each_device(sdev, shost)
+ WARN_ON_ONCE(sdev->request_queue->request_fn_active);
+
+ for (i = 0; i < target->req_ring_size; ++i) {
+ struct srp_request *req = &target->req_ring[i];
+ srp_finish_req(target, req, NULL, DID_TRANSPORT_FAILFAST << 16);
+ }
+}
+
+/*
+ * It is up to the caller to ensure that srp_rport_reconnect() calls are
+ * serialized and that no concurrent srp_queuecommand(), srp_abort(),
+ * srp_reset_device() or srp_reset_host() calls will occur while this function
+ * is in progress. One way to realize that is not to call this function
+ * directly but to call srp_reconnect_rport() instead since that last function
+ * serializes calls of this function via rport->mutex and also blocks
+ * srp_queuecommand() calls before invoking this function.
+ */
+static int srp_rport_reconnect(struct srp_rport *rport)
+{
+ struct srp_target_port *target = rport->lld_data;
+ int i, ret;
srp_disconnect_target(target);
/*
@@ -711,51 +1122,29 @@ static int srp_reconnect_target(struct srp_target_port *target)
* callbacks will have finished before a new QP is allocated.
*/
ret = srp_new_cm_id(target);
- /*
- * Whether or not creating a new CM ID succeeded, create a new
- * QP. This guarantees that all completion callback function
- * invocations have finished before request resetting starts.
- */
- if (ret == 0)
- ret = srp_create_target_ib(target);
- else
- srp_create_target_ib(target);
- for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) {
+ for (i = 0; i < target->req_ring_size; ++i) {
struct srp_request *req = &target->req_ring[i];
- if (req->scmnd)
- srp_reset_req(target, req);
+ srp_finish_req(target, req, NULL, DID_RESET << 16);
}
+ /*
+ * Whether or not creating a new CM ID succeeded, create a new
+ * QP. This guarantees that all callback functions for the old QP have
+ * finished before any send requests are posted on the new QP.
+ */
+ ret += srp_create_target_ib(target);
+
INIT_LIST_HEAD(&target->free_tx);
- for (i = 0; i < SRP_SQ_SIZE; ++i)
+ for (i = 0; i < target->queue_size; ++i)
list_add(&target->tx_ring[i]->list, &target->free_tx);
if (ret == 0)
ret = srp_connect_target(target);
- scsi_target_unblock(&shost->shost_gendev, ret == 0 ? SDEV_RUNNING :
- SDEV_TRANSPORT_OFFLINE);
- target->transport_offline = !!ret;
-
- if (ret)
- goto err;
-
- shost_printk(KERN_INFO, target->scsi_host, PFX "reconnect succeeded\n");
-
- return ret;
-
-err:
- shost_printk(KERN_ERR, target->scsi_host,
- PFX "reconnect failed (%d), removing target port.\n", ret);
-
- /*
- * We couldn't reconnect, so kill our target port off.
- * However, we have to defer the real removal because we
- * are in the context of the SCSI error handler now, which
- * will deadlock if we call scsi_remove_host().
- */
- srp_queue_remove_work(target);
+ if (ret == 0)
+ shost_printk(KERN_INFO, target->scsi_host,
+ PFX "reconnect succeeded\n");
return ret;
}
@@ -777,33 +1166,87 @@ static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
static int srp_map_finish_fmr(struct srp_map_state *state,
struct srp_target_port *target)
{
- struct srp_device *dev = target->srp_host->srp_dev;
struct ib_pool_fmr *fmr;
u64 io_addr = 0;
- if (!state->npages)
- return 0;
-
- if (state->npages == 1) {
- srp_map_desc(state, state->base_dma_addr, state->fmr_len,
- target->rkey);
- state->npages = state->fmr_len = 0;
- return 0;
- }
-
- fmr = ib_fmr_pool_map_phys(dev->fmr_pool, state->pages,
+ fmr = ib_fmr_pool_map_phys(target->fmr_pool, state->pages,
state->npages, io_addr);
if (IS_ERR(fmr))
return PTR_ERR(fmr);
*state->next_fmr++ = fmr;
- state->nfmr++;
+ state->nmdesc++;
+
+ srp_map_desc(state, 0, state->dma_len, fmr->fmr->rkey);
- srp_map_desc(state, 0, state->fmr_len, fmr->fmr->rkey);
- state->npages = state->fmr_len = 0;
return 0;
}
+static int srp_map_finish_fr(struct srp_map_state *state,
+ struct srp_target_port *target)
+{
+ struct srp_device *dev = target->srp_host->srp_dev;
+ struct ib_send_wr *bad_wr;
+ struct ib_send_wr wr;
+ struct srp_fr_desc *desc;
+ u32 rkey;
+
+ desc = srp_fr_pool_get(target->fr_pool);
+ if (!desc)
+ return -ENOMEM;
+
+ rkey = ib_inc_rkey(desc->mr->rkey);
+ ib_update_fast_reg_key(desc->mr, rkey);
+
+ memcpy(desc->frpl->page_list, state->pages,
+ sizeof(state->pages[0]) * state->npages);
+
+ memset(&wr, 0, sizeof(wr));
+ wr.opcode = IB_WR_FAST_REG_MR;
+ wr.wr_id = FAST_REG_WR_ID_MASK;
+ wr.wr.fast_reg.iova_start = state->base_dma_addr;
+ wr.wr.fast_reg.page_list = desc->frpl;
+ wr.wr.fast_reg.page_list_len = state->npages;
+ wr.wr.fast_reg.page_shift = ilog2(dev->mr_page_size);
+ wr.wr.fast_reg.length = state->dma_len;
+ wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_READ |
+ IB_ACCESS_REMOTE_WRITE);
+ wr.wr.fast_reg.rkey = desc->mr->lkey;
+
+ *state->next_fr++ = desc;
+ state->nmdesc++;
+
+ srp_map_desc(state, state->base_dma_addr, state->dma_len,
+ desc->mr->rkey);
+
+ return ib_post_send(target->qp, &wr, &bad_wr);
+}
+
+static int srp_finish_mapping(struct srp_map_state *state,
+ struct srp_target_port *target)
+{
+ int ret = 0;
+
+ if (state->npages == 0)
+ return 0;
+
+ if (state->npages == 1 && !register_always)
+ srp_map_desc(state, state->base_dma_addr, state->dma_len,
+ target->rkey);
+ else
+ ret = target->srp_host->srp_dev->use_fast_reg ?
+ srp_map_finish_fr(state, target) :
+ srp_map_finish_fmr(state, target);
+
+ if (ret == 0) {
+ state->npages = 0;
+ state->dma_len = 0;
+ }
+
+ return ret;
+}
+
static void srp_map_update_start(struct srp_map_state *state,
struct scatterlist *sg, int sg_index,
dma_addr_t dma_addr)
@@ -816,7 +1259,7 @@ static void srp_map_update_start(struct srp_map_state *state,
static int srp_map_sg_entry(struct srp_map_state *state,
struct srp_target_port *target,
struct scatterlist *sg, int sg_index,
- int use_fmr)
+ bool use_mr)
{
struct srp_device *dev = target->srp_host->srp_dev;
struct ib_device *ibdev = dev->dev;
@@ -828,23 +1271,25 @@ static int srp_map_sg_entry(struct srp_map_state *state,
if (!dma_len)
return 0;
- if (use_fmr == SRP_MAP_NO_FMR) {
- /* Once we're in direct map mode for a request, we don't
- * go back to FMR mode, so no need to update anything
+ if (!use_mr) {
+ /*
+ * Once we're in direct map mode for a request, we don't
+ * go back to FMR or FR mode, so no need to update anything
* other than the descriptor.
*/
srp_map_desc(state, dma_addr, dma_len, target->rkey);
return 0;
}
- /* If we start at an offset into the FMR page, don't merge into
- * the current FMR. Finish it out, and use the kernel's MR for this
- * sg entry. This is to avoid potential bugs on some SRP targets
- * that were never quite defined, but went away when the initiator
- * avoided using FMR on such page fragments.
+ /*
+ * Since not all RDMA HW drivers support non-zero page offsets for
+ * FMR, if we start at an offset into a page, don't merge into the
+ * current FMR mapping. Finish it out, and use the kernel's MR for
+ * this sg entry.
*/
- if (dma_addr & ~dev->fmr_page_mask || dma_len > dev->fmr_max_size) {
- ret = srp_map_finish_fmr(state, target);
+ if ((!dev->use_fast_reg && dma_addr & ~dev->mr_page_mask) ||
+ dma_len > dev->mr_max_size) {
+ ret = srp_finish_mapping(state, target);
if (ret)
return ret;
@@ -853,52 +1298,106 @@ static int srp_map_sg_entry(struct srp_map_state *state,
return 0;
}
- /* If this is the first sg to go into the FMR, save our position.
- * We need to know the first unmapped entry, its index, and the
- * first unmapped address within that entry to be able to restart
- * mapping after an error.
+ /*
+ * If this is the first sg that will be mapped via FMR or via FR, save
+ * our position. We need to know the first unmapped entry, its index,
+ * and the first unmapped address within that entry to be able to
+ * restart mapping after an error.
*/
if (!state->unmapped_sg)
srp_map_update_start(state, sg, sg_index, dma_addr);
while (dma_len) {
- if (state->npages == SRP_FMR_SIZE) {
- ret = srp_map_finish_fmr(state, target);
+ unsigned offset = dma_addr & ~dev->mr_page_mask;
+ if (state->npages == dev->max_pages_per_mr || offset != 0) {
+ ret = srp_finish_mapping(state, target);
if (ret)
return ret;
srp_map_update_start(state, sg, sg_index, dma_addr);
}
- len = min_t(unsigned int, dma_len, dev->fmr_page_size);
+ len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
if (!state->npages)
state->base_dma_addr = dma_addr;
- state->pages[state->npages++] = dma_addr;
- state->fmr_len += len;
+ state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
+ state->dma_len += len;
dma_addr += len;
dma_len -= len;
}
- /* If the last entry of the FMR wasn't a full page, then we need to
+ /*
+ * If the last entry of the MR wasn't a full page, then we need to
* close it out and start a new one -- we can only merge at page
* boundries.
*/
ret = 0;
- if (len != dev->fmr_page_size) {
- ret = srp_map_finish_fmr(state, target);
+ if (len != dev->mr_page_size) {
+ ret = srp_finish_mapping(state, target);
if (!ret)
srp_map_update_start(state, NULL, 0, 0);
}
return ret;
}
+static int srp_map_sg(struct srp_map_state *state,
+ struct srp_target_port *target, struct srp_request *req,
+ struct scatterlist *scat, int count)
+{
+ struct srp_device *dev = target->srp_host->srp_dev;
+ struct ib_device *ibdev = dev->dev;
+ struct scatterlist *sg;
+ int i;
+ bool use_mr;
+
+ state->desc = req->indirect_desc;
+ state->pages = req->map_page;
+ if (dev->use_fast_reg) {
+ state->next_fr = req->fr_list;
+ use_mr = !!target->fr_pool;
+ } else {
+ state->next_fmr = req->fmr_list;
+ use_mr = !!target->fmr_pool;
+ }
+
+ for_each_sg(scat, sg, count, i) {
+ if (srp_map_sg_entry(state, target, sg, i, use_mr)) {
+ /*
+ * Memory registration failed, so backtrack to the
+ * first unmapped entry and continue on without using
+ * memory registration.
+ */
+ dma_addr_t dma_addr;
+ unsigned int dma_len;
+
+backtrack:
+ sg = state->unmapped_sg;
+ i = state->unmapped_index;
+
+ dma_addr = ib_sg_dma_address(ibdev, sg);
+ dma_len = ib_sg_dma_len(ibdev, sg);
+ dma_len -= (state->unmapped_addr - dma_addr);
+ dma_addr = state->unmapped_addr;
+ use_mr = false;
+ srp_map_desc(state, dma_addr, dma_len, target->rkey);
+ }
+ }
+
+ if (use_mr && srp_finish_mapping(state, target))
+ goto backtrack;
+
+ req->nmdesc = state->nmdesc;
+
+ return 0;
+}
+
static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
struct srp_request *req)
{
- struct scatterlist *scat, *sg;
+ struct scatterlist *scat;
struct srp_cmd *cmd = req->cmd->buf;
- int i, len, nents, count, use_fmr;
+ int len, nents, count;
struct srp_device *dev;
struct ib_device *ibdev;
struct srp_map_state state;
@@ -930,7 +1429,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
fmt = SRP_DATA_DESC_DIRECT;
len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf);
- if (count == 1) {
+ if (count == 1 && !register_always) {
/*
* The midlayer only generated a single gather/scatter
* entry, or DMA mapping coalesced everything to a
@@ -943,13 +1442,13 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
buf->key = cpu_to_be32(target->rkey);
buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
- req->nfmr = 0;
+ req->nmdesc = 0;
goto map_complete;
}
- /* We have more than one scatter/gather entry, so build our indirect
- * descriptor table, trying to merge as many entries with FMR as we
- * can.
+ /*
+ * We have more than one scatter/gather entry, so build our indirect
+ * descriptor table, trying to merge as many entries as we can.
*/
indirect_hdr = (void *) cmd->add_data;
@@ -957,35 +1456,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
target->indirect_size, DMA_TO_DEVICE);
memset(&state, 0, sizeof(state));
- state.desc = req->indirect_desc;
- state.pages = req->map_page;
- state.next_fmr = req->fmr_list;
-
- use_fmr = dev->fmr_pool ? SRP_MAP_ALLOW_FMR : SRP_MAP_NO_FMR;
-
- for_each_sg(scat, sg, count, i) {
- if (srp_map_sg_entry(&state, target, sg, i, use_fmr)) {
- /* FMR mapping failed, so backtrack to the first
- * unmapped entry and continue on without using FMR.
- */
- dma_addr_t dma_addr;
- unsigned int dma_len;
-
-backtrack:
- sg = state.unmapped_sg;
- i = state.unmapped_index;
-
- dma_addr = ib_sg_dma_address(ibdev, sg);
- dma_len = ib_sg_dma_len(ibdev, sg);
- dma_len -= (state.unmapped_addr - dma_addr);
- dma_addr = state.unmapped_addr;
- use_fmr = SRP_MAP_NO_FMR;
- srp_map_desc(&state, dma_addr, dma_len, target->rkey);
- }
- }
-
- if (use_fmr == SRP_MAP_ALLOW_FMR && srp_map_finish_fmr(&state, target))
- goto backtrack;
+ srp_map_sg(&state, target, req, scat, count);
/* We've mapped the request, now pull as much of the indirect
* descriptor table as we can into the command buffer. If this
@@ -993,9 +1464,9 @@ backtrack:
* guaranteed to fit into the command, as the SCSI layer won't
* give us more S/G entries than we allow.
*/
- req->nfmr = state.nfmr;
if (state.ndesc == 1) {
- /* FMR mapping was able to collapse this to one entry,
+ /*
+ * Memory registration collapsed the sg-list into one entry,
* so use a direct descriptor.
*/
struct srp_direct_buf *buf = (void *) cmd->add_data;
@@ -1151,7 +1622,7 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)
complete(&target->tsk_mgmt_done);
} else {
req = &target->req_ring[rsp->tag];
- scmnd = srp_claim_req(target, req, NULL);
+ scmnd = srp_claim_req(target, req, NULL, NULL);
if (!scmnd) {
shost_printk(KERN_ERR, target->scsi_host,
"Null scmnd for RSP w/tag %016llx\n",
@@ -1302,15 +1773,41 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)
PFX "Recv failed with error code %d\n", res);
}
-static void srp_handle_qp_err(enum ib_wc_status wc_status,
- enum ib_wc_opcode wc_opcode,
- struct srp_target_port *target)
+/**
+ * srp_tl_err_work() - handle a transport layer error
+ * @work: Work structure embedded in an SRP target port.
+ *
+ * Note: This function may get invoked before the rport has been created,
+ * hence the target->rport test.
+ */
+static void srp_tl_err_work(struct work_struct *work)
+{
+ struct srp_target_port *target;
+
+ target = container_of(work, struct srp_target_port, tl_err_work);
+ if (target->rport)
+ srp_start_tl_fail_timers(target->rport);
+}
+
+static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status,
+ bool send_err, struct srp_target_port *target)
{
if (target->connected && !target->qp_in_error) {
- shost_printk(KERN_ERR, target->scsi_host,
- PFX "failed %s status %d\n",
- wc_opcode & IB_WC_RECV ? "receive" : "send",
- wc_status);
+ if (wr_id & LOCAL_INV_WR_ID_MASK) {
+ shost_printk(KERN_ERR, target->scsi_host, PFX
+ "LOCAL_INV failed with status %d\n",
+ wc_status);
+ } else if (wr_id & FAST_REG_WR_ID_MASK) {
+ shost_printk(KERN_ERR, target->scsi_host, PFX
+ "FAST_REG_MR failed status %d\n",
+ wc_status);
+ } else {
+ shost_printk(KERN_ERR, target->scsi_host,
+ PFX "failed %s status %d for iu %p\n",
+ send_err ? "send" : "receive",
+ wc_status, (void *)(uintptr_t)wr_id);
+ }
+ queue_work(system_long_wq, &target->tl_err_work);
}
target->qp_in_error = true;
}
@@ -1325,7 +1822,7 @@ static void srp_recv_completion(struct ib_cq *cq, void *target_ptr)
if (likely(wc.status == IB_WC_SUCCESS)) {
srp_handle_recv(target, &wc);
} else {
- srp_handle_qp_err(wc.status, wc.opcode, target);
+ srp_handle_qp_err(wc.wr_id, wc.status, false, target);
}
}
}
@@ -1341,7 +1838,7 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr)
iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
list_add(&iu->list, &target->free_tx);
} else {
- srp_handle_qp_err(wc.status, wc.opcode, target);
+ srp_handle_qp_err(wc.wr_id, wc.status, true, target);
}
}
}
@@ -1349,18 +1846,27 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr)
static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
{
struct srp_target_port *target = host_to_target(shost);
+ struct srp_rport *rport = target->rport;
struct srp_request *req;
struct srp_iu *iu;
struct srp_cmd *cmd;
struct ib_device *dev;
unsigned long flags;
- int len;
+ int len, ret;
+ const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
- if (unlikely(target->transport_offline)) {
- scmnd->result = DID_NO_CONNECT << 16;
- scmnd->scsi_done(scmnd);
- return 0;
- }
+ /*
+ * The SCSI EH thread is the only context from which srp_queuecommand()
+ * can get invoked for blocked devices (SDEV_BLOCK /
+ * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
+ * locking the rport mutex if invoked from inside the SCSI EH.
+ */
+ if (in_scsi_eh)
+ mutex_lock(&rport->mutex);
+
+ scmnd->result = srp_chkready(target->rport);
+ if (unlikely(scmnd->result))
+ goto err;
spin_lock_irqsave(&target->lock, flags);
iu = __srp_get_tx_iu(target, SRP_IU_CMD);
@@ -1375,7 +1881,6 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
DMA_TO_DEVICE);
- scmnd->result = 0;
scmnd->host_scribble = (void *) req;
cmd = iu->buf;
@@ -1392,7 +1897,15 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
len = srp_map_data(scmnd, target, req);
if (len < 0) {
shost_printk(KERN_ERR, target->scsi_host,
- PFX "Failed to map data\n");
+ PFX "Failed to map data (%d)\n", len);
+ /*
+ * If we ran out of memory descriptors (-ENOMEM) because an
+ * application is queuing many requests with more than
+ * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
+ * to reduce queue depth temporarily.
+ */
+ scmnd->result = len == -ENOMEM ?
+ DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
goto err_iu;
}
@@ -1404,7 +1917,13 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
goto err_unmap;
}
- return 0;
+ ret = 0;
+
+unlock_rport:
+ if (in_scsi_eh)
+ mutex_unlock(&rport->mutex);
+
+ return ret;
err_unmap:
srp_unmap_data(scmnd, target, req);
@@ -1412,20 +1931,47 @@ err_unmap:
err_iu:
srp_put_tx_iu(target, iu, SRP_IU_CMD);
+ /*
+ * Avoid that the loops that iterate over the request ring can
+ * encounter a dangling SCSI command pointer.
+ */
+ req->scmnd = NULL;
+
spin_lock_irqsave(&target->lock, flags);
list_add(&req->list, &target->free_reqs);
err_unlock:
spin_unlock_irqrestore(&target->lock, flags);
- return SCSI_MLQUEUE_HOST_BUSY;
+err:
+ if (scmnd->result) {
+ scmnd->scsi_done(scmnd);
+ ret = 0;
+ } else {
+ ret = SCSI_MLQUEUE_HOST_BUSY;
+ }
+
+ goto unlock_rport;
}
+/*
+ * Note: the resources allocated in this function are freed in
+ * srp_free_target_ib().
+ */
static int srp_alloc_iu_bufs(struct srp_target_port *target)
{
int i;
- for (i = 0; i < SRP_RQ_SIZE; ++i) {
+ target->rx_ring = kzalloc(target->queue_size * sizeof(*target->rx_ring),
+ GFP_KERNEL);
+ if (!target->rx_ring)
+ goto err_no_ring;
+ target->tx_ring = kzalloc(target->queue_size * sizeof(*target->tx_ring),
+ GFP_KERNEL);
+ if (!target->tx_ring)
+ goto err_no_ring;
+
+ for (i = 0; i < target->queue_size; ++i) {
target->rx_ring[i] = srp_alloc_iu(target->srp_host,
target->max_ti_iu_len,
GFP_KERNEL, DMA_FROM_DEVICE);
@@ -1433,7 +1979,7 @@ static int srp_alloc_iu_bufs(struct srp_target_port *target)
goto err;
}
- for (i = 0; i < SRP_SQ_SIZE; ++i) {
+ for (i = 0; i < target->queue_size; ++i) {
target->tx_ring[i] = srp_alloc_iu(target->srp_host,
target->max_iu_len,
GFP_KERNEL, DMA_TO_DEVICE);
@@ -1446,16 +1992,18 @@ static int srp_alloc_iu_bufs(struct srp_target_port *target)
return 0;
err:
- for (i = 0; i < SRP_RQ_SIZE; ++i) {
+ for (i = 0; i < target->queue_size; ++i) {
srp_free_iu(target->srp_host, target->rx_ring[i]);
- target->rx_ring[i] = NULL;
- }
-
- for (i = 0; i < SRP_SQ_SIZE; ++i) {
srp_free_iu(target->srp_host, target->tx_ring[i]);
- target->tx_ring[i] = NULL;
}
+
+err_no_ring:
+ kfree(target->tx_ring);
+ target->tx_ring = NULL;
+ kfree(target->rx_ring);
+ target->rx_ring = NULL;
+
return -ENOMEM;
}
@@ -1506,6 +2054,9 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
target->scsi_host->can_queue
= min(target->req_lim - SRP_TSK_MGMT_SQ_SIZE,
target->scsi_host->can_queue);
+ target->scsi_host->cmd_per_lun
+ = min_t(int, target->scsi_host->can_queue,
+ target->scsi_host->cmd_per_lun);
} else {
shost_printk(KERN_WARNING, target->scsi_host,
PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
@@ -1513,7 +2064,7 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
goto error;
}
- if (!target->rx_ring[0]) {
+ if (!target->rx_ring) {
ret = srp_alloc_iu_bufs(target);
if (ret)
goto error;
@@ -1533,7 +2084,7 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
if (ret)
goto error_free;
- for (i = 0; i < SRP_RQ_SIZE; i++) {
+ for (i = 0; i < target->queue_size; i++) {
struct srp_iu *iu = target->rx_ring[i];
ret = srp_post_recv(target, iu);
if (ret)
@@ -1619,8 +2170,10 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
shost_printk(KERN_WARNING, shost,
PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
else
- shost_printk(KERN_WARNING, shost,
- PFX "SRP LOGIN REJECTED, reason 0x%08x\n", reason);
+ shost_printk(KERN_WARNING, shost, PFX
+ "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
+ target->path.sgid.raw,
+ target->orig_dgid, reason);
} else
shost_printk(KERN_WARNING, shost,
" REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
@@ -1672,11 +2225,13 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
if (ib_send_cm_drep(cm_id, NULL, 0))
shost_printk(KERN_ERR, target->scsi_host,
PFX "Sending CM DREP failed\n");
+ queue_work(system_long_wq, &target->tl_err_work);
break;
case IB_CM_TIMEWAIT_EXIT:
shost_printk(KERN_ERR, target->scsi_host,
PFX "connection closed\n");
+ comp = 1;
target->status = 0;
break;
@@ -1698,9 +2253,61 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
return 0;
}
+/**
+ * srp_change_queue_type - changing device queue tag type
+ * @sdev: scsi device struct
+ * @tag_type: requested tag type
+ *
+ * Returns queue tag type.
+ */
+static int
+srp_change_queue_type(struct scsi_device *sdev, int tag_type)
+{
+ if (sdev->tagged_supported) {
+ scsi_set_tag_type(sdev, tag_type);
+ if (tag_type)
+ scsi_activate_tcq(sdev, sdev->queue_depth);
+ else
+ scsi_deactivate_tcq(sdev, sdev->queue_depth);
+ } else
+ tag_type = 0;
+
+ return tag_type;
+}
+
+/**
+ * srp_change_queue_depth - setting device queue depth
+ * @sdev: scsi device struct
+ * @qdepth: requested queue depth
+ * @reason: SCSI_QDEPTH_DEFAULT/SCSI_QDEPTH_QFULL/SCSI_QDEPTH_RAMP_UP
+ * (see include/scsi/scsi_host.h for definition)
+ *
+ * Returns queue depth.
+ */
+static int
+srp_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason)
+{
+ struct Scsi_Host *shost = sdev->host;
+ int max_depth;
+ if (reason == SCSI_QDEPTH_DEFAULT || reason == SCSI_QDEPTH_RAMP_UP) {
+ max_depth = shost->can_queue;
+ if (!sdev->tagged_supported)
+ max_depth = 1;
+ if (qdepth > max_depth)
+ qdepth = max_depth;
+ scsi_adjust_queue_depth(sdev, scsi_get_tag_type(sdev), qdepth);
+ } else if (reason == SCSI_QDEPTH_QFULL)
+ scsi_track_queue_full(sdev, qdepth);
+ else
+ return -EOPNOTSUPP;
+
+ return sdev->queue_depth;
+}
+
static int srp_send_tsk_mgmt(struct srp_target_port *target,
u64 req_tag, unsigned int lun, u8 func)
{
+ struct srp_rport *rport = target->rport;
struct ib_device *dev = target->srp_host->srp_dev->dev;
struct srp_iu *iu;
struct srp_tsk_mgmt *tsk_mgmt;
@@ -1710,12 +2317,20 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target,
init_completion(&target->tsk_mgmt_done);
+ /*
+ * Lock the rport mutex to avoid that srp_create_target_ib() is
+ * invoked while a task management function is being sent.
+ */
+ mutex_lock(&rport->mutex);
spin_lock_irq(&target->lock);
iu = __srp_get_tx_iu(target, SRP_IU_TSK_MGMT);
spin_unlock_irq(&target->lock);
- if (!iu)
+ if (!iu) {
+ mutex_unlock(&rport->mutex);
+
return -1;
+ }
ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
DMA_TO_DEVICE);
@@ -1732,8 +2347,11 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target,
DMA_TO_DEVICE);
if (srp_post_send(target, iu, sizeof *tsk_mgmt)) {
srp_put_tx_iu(target, iu, SRP_IU_TSK_MGMT);
+ mutex_unlock(&rport->mutex);
+
return -1;
}
+ mutex_unlock(&rport->mutex);
if (!wait_for_completion_timeout(&target->tsk_mgmt_done,
msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
@@ -1750,12 +2368,12 @@ static int srp_abort(struct scsi_cmnd *scmnd)
shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
- if (!req || !srp_claim_req(target, req, scmnd))
- return FAILED;
+ if (!req || !srp_claim_req(target, req, NULL, scmnd))
+ return SUCCESS;
if (srp_send_tsk_mgmt(target, req->index, scmnd->device->lun,
SRP_TSK_ABORT_TASK) == 0)
ret = SUCCESS;
- else if (target->transport_offline)
+ else if (target->rport->state == SRP_RPORT_LOST)
ret = FAST_IO_FAIL;
else
ret = FAILED;
@@ -1779,10 +2397,9 @@ static int srp_reset_device(struct scsi_cmnd *scmnd)
if (target->tsk_mgmt_status)
return FAILED;
- for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) {
+ for (i = 0; i < target->req_ring_size; ++i) {
struct srp_request *req = &target->req_ring[i];
- if (req->scmnd && req->scmnd->device == scmnd->device)
- srp_reset_req(target, req);
+ srp_finish_req(target, req, scmnd->device, DID_RESET << 16);
}
return SUCCESS;
@@ -1791,14 +2408,10 @@ static int srp_reset_device(struct scsi_cmnd *scmnd)
static int srp_reset_host(struct scsi_cmnd *scmnd)
{
struct srp_target_port *target = host_to_target(scmnd->device->host);
- int ret = FAILED;
shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
- if (!srp_reconnect_target(target))
- ret = SUCCESS;
-
- return ret;
+ return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
}
static int srp_slave_configure(struct scsi_device *sdev)
@@ -1851,6 +2464,14 @@ static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
return sprintf(buf, "0x%04x\n", be16_to_cpu(target->path.pkey));
}
+static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct srp_target_port *target = host_to_target(class_to_shost(dev));
+
+ return sprintf(buf, "%pI6\n", target->path.sgid.raw);
+}
+
static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
char *buf)
{
@@ -1907,6 +2528,14 @@ static ssize_t show_comp_vector(struct device *dev,
return sprintf(buf, "%d\n", target->comp_vector);
}
+static ssize_t show_tl_retry_count(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct srp_target_port *target = host_to_target(class_to_shost(dev));
+
+ return sprintf(buf, "%d\n", target->tl_retry_count);
+}
+
static ssize_t show_cmd_sg_entries(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -1927,6 +2556,7 @@ static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL);
static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL);
static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL);
static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
+static DEVICE_ATTR(sgid, S_IRUGO, show_sgid, NULL);
static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL);
static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL);
static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL);
@@ -1934,6 +2564,7 @@ static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL);
static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL);
static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL);
+static DEVICE_ATTR(tl_retry_count, S_IRUGO, show_tl_retry_count, NULL);
static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL);
static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL);
@@ -1942,6 +2573,7 @@ static struct device_attribute *srp_host_attrs[] = {
&dev_attr_ioc_guid,
&dev_attr_service_id,
&dev_attr_pkey,
+ &dev_attr_sgid,
&dev_attr_dgid,
&dev_attr_orig_dgid,
&dev_attr_req_lim,
@@ -1949,6 +2581,7 @@ static struct device_attribute *srp_host_attrs[] = {
&dev_attr_local_ib_port,
&dev_attr_local_ib_device,
&dev_attr_comp_vector,
+ &dev_attr_tl_retry_count,
&dev_attr_cmd_sg_entries,
&dev_attr_allow_ext_sg,
NULL
@@ -1961,14 +2594,16 @@ static struct scsi_host_template srp_template = {
.slave_configure = srp_slave_configure,
.info = srp_target_info,
.queuecommand = srp_queuecommand,
+ .change_queue_depth = srp_change_queue_depth,
+ .change_queue_type = srp_change_queue_type,
.eh_abort_handler = srp_abort,
.eh_device_reset_handler = srp_reset_device,
.eh_host_reset_handler = srp_reset_host,
.skip_settle_delay = true,
.sg_tablesize = SRP_DEF_SG_TABLESIZE,
- .can_queue = SRP_CMD_SQ_SIZE,
+ .can_queue = SRP_DEFAULT_CMD_SQ_SIZE,
.this_id = -1,
- .cmd_per_lun = SRP_CMD_SQ_SIZE,
+ .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE,
.use_clustering = ENABLE_CLUSTERING,
.shost_attrs = srp_host_attrs
};
@@ -1994,6 +2629,7 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
}
rport->lld_data = target;
+ target->rport = rport;
spin_lock(&host->target_lock);
list_add_tail(&target->list, &host->target_list);
@@ -2022,6 +2658,8 @@ static struct class srp_class = {
/**
* srp_conn_unique() - check whether the connection to a target is unique
+ * @host: SRP host.
+ * @target: SRP target port.
*/
static bool srp_conn_unique(struct srp_host *host,
struct srp_target_port *target)
@@ -2073,6 +2711,8 @@ enum {
SRP_OPT_ALLOW_EXT_SG = 1 << 10,
SRP_OPT_SG_TABLESIZE = 1 << 11,
SRP_OPT_COMP_VECTOR = 1 << 12,
+ SRP_OPT_TL_RETRY_COUNT = 1 << 13,
+ SRP_OPT_QUEUE_SIZE = 1 << 14,
SRP_OPT_ALL = (SRP_OPT_ID_EXT |
SRP_OPT_IOC_GUID |
SRP_OPT_DGID |
@@ -2094,6 +2734,8 @@ static const match_table_t srp_opt_tokens = {
{ SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" },
{ SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" },
{ SRP_OPT_COMP_VECTOR, "comp_vector=%u" },
+ { SRP_OPT_TL_RETRY_COUNT, "tl_retry_count=%u" },
+ { SRP_OPT_QUEUE_SIZE, "queue_size=%d" },
{ SRP_OPT_ERR, NULL }
};
@@ -2188,13 +2830,25 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
target->scsi_host->max_sectors = token;
break;
+ case SRP_OPT_QUEUE_SIZE:
+ if (match_int(args, &token) || token < 1) {
+ pr_warn("bad queue_size parameter '%s'\n", p);
+ goto out;
+ }
+ target->scsi_host->can_queue = token;
+ target->queue_size = token + SRP_RSP_SQ_SIZE +
+ SRP_TSK_MGMT_SQ_SIZE;
+ if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
+ target->scsi_host->cmd_per_lun = token;
+ break;
+
case SRP_OPT_MAX_CMD_PER_LUN:
- if (match_int(args, &token)) {
+ if (match_int(args, &token) || token < 1) {
pr_warn("bad max cmd_per_lun parameter '%s'\n",
p);
goto out;
}
- target->scsi_host->cmd_per_lun = min(token, SRP_CMD_SQ_SIZE);
+ target->scsi_host->cmd_per_lun = token;
break;
case SRP_OPT_IO_CLASS:
@@ -2257,6 +2911,15 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
target->comp_vector = token;
break;
+ case SRP_OPT_TL_RETRY_COUNT:
+ if (match_int(args, &token) || token < 2 || token > 7) {
+ pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
+ p);
+ goto out;
+ }
+ target->tl_retry_count = token;
+ break;
+
default:
pr_warn("unknown parameter or missing value '%s' in target creation request\n",
p);
@@ -2273,6 +2936,12 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
pr_warn("target creation request is missing parameter '%s'\n",
srp_opt_tokens[i].pattern);
+ if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
+ && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
+ pr_warn("cmd_per_lun = %d > queue_size = %d\n",
+ target->scsi_host->cmd_per_lun,
+ target->scsi_host->can_queue);
+
out:
kfree(options);
return ret;
@@ -2286,9 +2955,9 @@ static ssize_t srp_create_target(struct device *dev,
container_of(dev, struct srp_host, dev);
struct Scsi_Host *target_host;
struct srp_target_port *target;
- struct ib_device *ibdev = host->srp_dev->dev;
- dma_addr_t dma_addr;
- int i, ret;
+ struct srp_device *srp_dev = host->srp_dev;
+ struct ib_device *ibdev = srp_dev->dev;
+ int ret;
target_host = scsi_host_alloc(&srp_template,
sizeof (struct srp_target_port));
@@ -2311,11 +2980,17 @@ static ssize_t srp_create_target(struct device *dev,
target->cmd_sg_cnt = cmd_sg_entries;
target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries;
target->allow_ext_sg = allow_ext_sg;
+ target->tl_retry_count = 7;
+ target->queue_size = SRP_DEFAULT_QUEUE_SIZE;
+
+ mutex_lock(&host->add_target_mutex);
ret = srp_parse_options(buf, target);
if (ret)
goto err;
+ target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
+
if (!srp_conn_unique(target->srp_host, target)) {
shost_printk(KERN_INFO, target->scsi_host,
PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
@@ -2326,9 +3001,9 @@ static ssize_t srp_create_target(struct device *dev,
goto err;
}
- if (!host->srp_dev->fmr_pool && !target->allow_ext_sg &&
- target->cmd_sg_cnt < target->sg_tablesize) {
- pr_warn("No FMR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
+ if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
+ target->cmd_sg_cnt < target->sg_tablesize) {
+ pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
target->sg_tablesize = target->cmd_sg_cnt;
}
@@ -2339,42 +3014,17 @@ static ssize_t srp_create_target(struct device *dev,
sizeof (struct srp_indirect_buf) +
target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
+ INIT_WORK(&target->tl_err_work, srp_tl_err_work);
INIT_WORK(&target->remove_work, srp_remove_work);
spin_lock_init(&target->lock);
INIT_LIST_HEAD(&target->free_tx);
- INIT_LIST_HEAD(&target->free_reqs);
- for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) {
- struct srp_request *req = &target->req_ring[i];
-
- req->fmr_list = kmalloc(target->cmd_sg_cnt * sizeof (void *),
- GFP_KERNEL);
- req->map_page = kmalloc(SRP_FMR_SIZE * sizeof (void *),
- GFP_KERNEL);
- req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
- if (!req->fmr_list || !req->map_page || !req->indirect_desc)
- goto err_free_mem;
-
- dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
- target->indirect_size,
- DMA_TO_DEVICE);
- if (ib_dma_mapping_error(ibdev, dma_addr))
- goto err_free_mem;
-
- req->indirect_dma_addr = dma_addr;
- req->index = i;
- list_add_tail(&req->list, &target->free_reqs);
- }
-
- ib_query_gid(ibdev, host->port, 0, &target->path.sgid);
+ ret = srp_alloc_req_data(target);
+ if (ret)
+ goto err_free_mem;
- shost_printk(KERN_DEBUG, target->scsi_host, PFX
- "new target: id_ext %016llx ioc_guid %016llx pkey %04x "
- "service_id %016llx dgid %pI6\n",
- (unsigned long long) be64_to_cpu(target->id_ext),
- (unsigned long long) be64_to_cpu(target->ioc_guid),
- be16_to_cpu(target->path.pkey),
- (unsigned long long) be64_to_cpu(target->service_id),
- target->path.dgid.raw);
+ ret = ib_query_gid(ibdev, host->port, 0, &target->path.sgid);
+ if (ret)
+ goto err_free_mem;
ret = srp_create_target_ib(target);
if (ret)
@@ -2395,7 +3045,19 @@ static ssize_t srp_create_target(struct device *dev,
if (ret)
goto err_disconnect;
- return count;
+ shost_printk(KERN_DEBUG, target->scsi_host, PFX
+ "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
+ be64_to_cpu(target->id_ext),
+ be64_to_cpu(target->ioc_guid),
+ be16_to_cpu(target->path.pkey),
+ be64_to_cpu(target->service_id),
+ target->path.sgid.raw, target->path.dgid.raw);
+
+ ret = count;
+
+out:
+ mutex_unlock(&host->add_target_mutex);
+ return ret;
err_disconnect:
srp_disconnect_target(target);
@@ -2411,8 +3073,7 @@ err_free_mem:
err:
scsi_host_put(target_host);
-
- return ret;
+ goto out;
}
static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
@@ -2448,6 +3109,7 @@ static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
INIT_LIST_HEAD(&host->target_list);
spin_lock_init(&host->target_lock);
init_completion(&host->released);
+ mutex_init(&host->add_target_mutex);
host->srp_dev = device;
host->port = port;
@@ -2479,9 +3141,9 @@ static void srp_add_one(struct ib_device *device)
{
struct srp_device *srp_dev;
struct ib_device_attr *dev_attr;
- struct ib_fmr_pool_param fmr_param;
struct srp_host *host;
- int max_pages_per_fmr, fmr_page_shift, s, e, p;
+ int mr_page_shift, s, e, p;
+ u64 max_pages_per_mr;
dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
if (!dev_attr)
@@ -2496,15 +3158,39 @@ static void srp_add_one(struct ib_device *device)
if (!srp_dev)
goto free_attr;
+ srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
+ device->map_phys_fmr && device->unmap_fmr);
+ srp_dev->has_fr = (dev_attr->device_cap_flags &
+ IB_DEVICE_MEM_MGT_EXTENSIONS);
+ if (!srp_dev->has_fmr && !srp_dev->has_fr)
+ dev_warn(&device->dev, "neither FMR nor FR is supported\n");
+
+ srp_dev->use_fast_reg = (srp_dev->has_fr &&
+ (!srp_dev->has_fmr || prefer_fr));
+
/*
* Use the smallest page size supported by the HCA, down to a
* minimum of 4096 bytes. We're unlikely to build large sglists
* out of smaller entries.
*/
- fmr_page_shift = max(12, ffs(dev_attr->page_size_cap) - 1);
- srp_dev->fmr_page_size = 1 << fmr_page_shift;
- srp_dev->fmr_page_mask = ~((u64) srp_dev->fmr_page_size - 1);
- srp_dev->fmr_max_size = srp_dev->fmr_page_size * SRP_FMR_SIZE;
+ mr_page_shift = max(12, ffs(dev_attr->page_size_cap) - 1);
+ srp_dev->mr_page_size = 1 << mr_page_shift;
+ srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1);
+ max_pages_per_mr = dev_attr->max_mr_size;
+ do_div(max_pages_per_mr, srp_dev->mr_page_size);
+ srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
+ max_pages_per_mr);
+ if (srp_dev->use_fast_reg) {
+ srp_dev->max_pages_per_mr =
+ min_t(u32, srp_dev->max_pages_per_mr,
+ dev_attr->max_fast_reg_page_list_len);
+ }
+ srp_dev->mr_max_size = srp_dev->mr_page_size *
+ srp_dev->max_pages_per_mr;
+ pr_debug("%s: mr_page_shift = %d, dev_attr->max_mr_size = %#llx, dev_attr->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
+ device->name, mr_page_shift, dev_attr->max_mr_size,
+ dev_attr->max_fast_reg_page_list_len,
+ srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
INIT_LIST_HEAD(&srp_dev->dev_list);
@@ -2520,27 +3206,6 @@ static void srp_add_one(struct ib_device *device)
if (IS_ERR(srp_dev->mr))
goto err_pd;
- for (max_pages_per_fmr = SRP_FMR_SIZE;
- max_pages_per_fmr >= SRP_FMR_MIN_SIZE;
- max_pages_per_fmr /= 2, srp_dev->fmr_max_size /= 2) {
- memset(&fmr_param, 0, sizeof fmr_param);
- fmr_param.pool_size = SRP_FMR_POOL_SIZE;
- fmr_param.dirty_watermark = SRP_FMR_DIRTY_SIZE;
- fmr_param.cache = 1;
- fmr_param.max_pages_per_fmr = max_pages_per_fmr;
- fmr_param.page_shift = fmr_page_shift;
- fmr_param.access = (IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_REMOTE_WRITE |
- IB_ACCESS_REMOTE_READ);
-
- srp_dev->fmr_pool = ib_create_fmr_pool(srp_dev->pd, &fmr_param);
- if (!IS_ERR(srp_dev->fmr_pool))
- break;
- }
-
- if (IS_ERR(srp_dev->fmr_pool))
- srp_dev->fmr_pool = NULL;
-
if (device->node_type == RDMA_NODE_IB_SWITCH) {
s = 0;
e = 0;
@@ -2603,8 +3268,6 @@ static void srp_remove_one(struct ib_device *device)
kfree(host);
}
- if (srp_dev->fmr_pool)
- ib_destroy_fmr_pool(srp_dev->fmr_pool);
ib_dereg_mr(srp_dev->mr);
ib_dealloc_pd(srp_dev->pd);
@@ -2612,7 +3275,14 @@ static void srp_remove_one(struct ib_device *device)
}
static struct srp_function_template ib_srp_transport_functions = {
+ .has_rport_state = true,
+ .reset_timer_if_blocked = true,
+ .reconnect_delay = &srp_reconnect_delay,
+ .fast_io_fail_tmo = &srp_fast_io_fail_tmo,
+ .dev_loss_tmo = &srp_dev_loss_tmo,
+ .reconnect = srp_rport_reconnect,
.rport_delete = srp_rport_delete,
+ .terminate_rport_io = srp_terminate_io,
};
static int __init srp_init_module(void)
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index e641088c14d..e46ecb15aa0 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -57,25 +57,19 @@ enum {
SRP_MAX_LUN = 512,
SRP_DEF_SG_TABLESIZE = 12,
- SRP_RQ_SHIFT = 6,
- SRP_RQ_SIZE = 1 << SRP_RQ_SHIFT,
-
- SRP_SQ_SIZE = SRP_RQ_SIZE,
+ SRP_DEFAULT_QUEUE_SIZE = 1 << 6,
SRP_RSP_SQ_SIZE = 1,
- SRP_REQ_SQ_SIZE = SRP_SQ_SIZE - SRP_RSP_SQ_SIZE,
SRP_TSK_MGMT_SQ_SIZE = 1,
- SRP_CMD_SQ_SIZE = SRP_REQ_SQ_SIZE - SRP_TSK_MGMT_SQ_SIZE,
+ SRP_DEFAULT_CMD_SQ_SIZE = SRP_DEFAULT_QUEUE_SIZE - SRP_RSP_SQ_SIZE -
+ SRP_TSK_MGMT_SQ_SIZE,
SRP_TAG_NO_REQ = ~0U,
SRP_TAG_TSK_MGMT = 1U << 31,
- SRP_FMR_SIZE = 512,
- SRP_FMR_MIN_SIZE = 128,
- SRP_FMR_POOL_SIZE = 1024,
- SRP_FMR_DIRTY_SIZE = SRP_FMR_POOL_SIZE / 4,
+ SRP_MAX_PAGES_PER_MR = 512,
- SRP_MAP_ALLOW_FMR = 0,
- SRP_MAP_NO_FMR = 1,
+ LOCAL_INV_WR_ID_MASK = 1,
+ FAST_REG_WR_ID_MASK = 2,
};
enum srp_target_state {
@@ -89,15 +83,24 @@ enum srp_iu_type {
SRP_IU_RSP,
};
+/*
+ * @mr_page_mask: HCA memory registration page mask.
+ * @mr_page_size: HCA memory registration page size.
+ * @mr_max_size: Maximum size in bytes of a single FMR / FR registration
+ * request.
+ */
struct srp_device {
struct list_head dev_list;
struct ib_device *dev;
struct ib_pd *pd;
struct ib_mr *mr;
- struct ib_fmr_pool *fmr_pool;
- u64 fmr_page_mask;
- int fmr_page_size;
- int fmr_max_size;
+ u64 mr_page_mask;
+ int mr_page_size;
+ int mr_max_size;
+ int max_pages_per_mr;
+ bool has_fmr;
+ bool has_fr;
+ bool use_fast_reg;
};
struct srp_host {
@@ -108,17 +111,21 @@ struct srp_host {
spinlock_t target_lock;
struct completion released;
struct list_head list;
+ struct mutex add_target_mutex;
};
struct srp_request {
struct list_head list;
struct scsi_cmnd *scmnd;
struct srp_iu *cmd;
- struct ib_pool_fmr **fmr_list;
+ union {
+ struct ib_pool_fmr **fmr_list;
+ struct srp_fr_desc **fr_list;
+ };
u64 *map_page;
struct srp_direct_buf *indirect_desc;
dma_addr_t indirect_dma_addr;
- short nfmr;
+ short nmdesc;
short index;
};
@@ -133,6 +140,10 @@ struct srp_target_port {
struct ib_cq *send_cq ____cacheline_aligned_in_smp;
struct ib_cq *recv_cq;
struct ib_qp *qp;
+ union {
+ struct ib_fmr_pool *fmr_pool;
+ struct srp_fr_pool *fr_pool;
+ };
u32 lkey;
u32 rkey;
enum srp_target_state state;
@@ -140,7 +151,6 @@ struct srp_target_port {
unsigned int cmd_sg_cnt;
unsigned int indirect_size;
bool allow_ext_sg;
- bool transport_offline;
/* Everything above this point is used in the hot path of
* command processing. Try to keep them packed into cachelines.
@@ -153,10 +163,14 @@ struct srp_target_port {
u16 io_class;
struct srp_host *srp_host;
struct Scsi_Host *scsi_host;
+ struct srp_rport *rport;
char target_name[32];
unsigned int scsi_id;
unsigned int sg_tablesize;
+ int queue_size;
+ int req_ring_size;
int comp_vector;
+ int tl_retry_count;
struct ib_sa_path_rec path;
__be16 orig_dgid[8];
@@ -172,10 +186,11 @@ struct srp_target_port {
int zero_req_lim;
- struct srp_iu *tx_ring[SRP_SQ_SIZE];
- struct srp_iu *rx_ring[SRP_RQ_SIZE];
- struct srp_request req_ring[SRP_CMD_SQ_SIZE];
+ struct srp_iu **tx_ring;
+ struct srp_iu **rx_ring;
+ struct srp_request *req_ring;
+ struct work_struct tl_err_work;
struct work_struct remove_work;
struct list_head list;
@@ -195,15 +210,66 @@ struct srp_iu {
enum dma_data_direction direction;
};
+/**
+ * struct srp_fr_desc - fast registration work request arguments
+ * @entry: Entry in srp_fr_pool.free_list.
+ * @mr: Memory region.
+ * @frpl: Fast registration page list.
+ */
+struct srp_fr_desc {
+ struct list_head entry;
+ struct ib_mr *mr;
+ struct ib_fast_reg_page_list *frpl;
+};
+
+/**
+ * struct srp_fr_pool - pool of fast registration descriptors
+ *
+ * An entry is available for allocation if and only if it occurs in @free_list.
+ *
+ * @size: Number of descriptors in this pool.
+ * @max_page_list_len: Maximum fast registration work request page list length.
+ * @lock: Protects free_list.
+ * @free_list: List of free descriptors.
+ * @desc: Fast registration descriptor pool.
+ */
+struct srp_fr_pool {
+ int size;
+ int max_page_list_len;
+ spinlock_t lock;
+ struct list_head free_list;
+ struct srp_fr_desc desc[0];
+};
+
+/**
+ * struct srp_map_state - per-request DMA memory mapping state
+ * @desc: Pointer to the element of the SRP buffer descriptor array
+ * that is being filled in.
+ * @pages: Array with DMA addresses of pages being considered for
+ * memory registration.
+ * @base_dma_addr: DMA address of the first page that has not yet been mapped.
+ * @dma_len: Number of bytes that will be registered with the next
+ * FMR or FR memory registration call.
+ * @total_len: Total number of bytes in the sg-list being mapped.
+ * @npages: Number of page addresses in the pages[] array.
+ * @nmdesc: Number of FMR or FR memory descriptors used for mapping.
+ * @ndesc: Number of SRP buffer descriptors that have been filled in.
+ * @unmapped_sg: First element of the sg-list that is mapped via FMR or FR.
+ * @unmapped_index: Index of the first element mapped via FMR or FR.
+ * @unmapped_addr: DMA address of the first element mapped via FMR or FR.
+ */
struct srp_map_state {
- struct ib_pool_fmr **next_fmr;
+ union {
+ struct ib_pool_fmr **next_fmr;
+ struct srp_fr_desc **next_fr;
+ };
struct srp_direct_buf *desc;
u64 *pages;
dma_addr_t base_dma_addr;
- u32 fmr_len;
+ u32 dma_len;
u32 total_len;
unsigned int npages;
- unsigned int nfmr;
+ unsigned int nmdesc;
unsigned int ndesc;
struct scatterlist *unmapped_sg;
int unmapped_index;
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 653ac6bfc57..fe09f2788b1 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -1078,6 +1078,7 @@ static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch,
static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
struct srpt_send_ioctx *ioctx)
{
+ struct ib_device *dev = ch->sport->sdev->device;
struct se_cmd *cmd;
struct scatterlist *sg, *sg_orig;
int sg_cnt;
@@ -1124,7 +1125,7 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
db = ioctx->rbufs;
tsize = cmd->data_length;
- dma_len = sg_dma_len(&sg[0]);
+ dma_len = ib_sg_dma_len(dev, &sg[0]);
riu = ioctx->rdma_ius;
/*
@@ -1155,7 +1156,8 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
++j;
if (j < count) {
sg = sg_next(sg);
- dma_len = sg_dma_len(sg);
+ dma_len = ib_sg_dma_len(
+ dev, sg);
}
}
} else {
@@ -1192,8 +1194,8 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
tsize = cmd->data_length;
riu = ioctx->rdma_ius;
sg = sg_orig;
- dma_len = sg_dma_len(&sg[0]);
- dma_addr = sg_dma_address(&sg[0]);
+ dma_len = ib_sg_dma_len(dev, &sg[0]);
+ dma_addr = ib_sg_dma_address(dev, &sg[0]);
/* this second loop is really mapped sg_addres to rdma_iu->ib_sge */
for (i = 0, j = 0;
@@ -1216,8 +1218,10 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
++j;
if (j < count) {
sg = sg_next(sg);
- dma_len = sg_dma_len(sg);
- dma_addr = sg_dma_address(sg);
+ dma_len = ib_sg_dma_len(
+ dev, sg);
+ dma_addr = ib_sg_dma_address(
+ dev, sg);
}
}
} else {
@@ -1352,11 +1356,8 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx)
/* XXX(hch): this is a horrible layering violation.. */
spin_lock_irqsave(&ioctx->cmd.t_state_lock, flags);
- ioctx->cmd.transport_state |= CMD_T_LUN_STOP;
ioctx->cmd.transport_state &= ~CMD_T_ACTIVE;
spin_unlock_irqrestore(&ioctx->cmd.t_state_lock, flags);
-
- complete(&ioctx->cmd.transport_lun_stop_comp);
break;
case SRPT_STATE_CMD_RSP_SENT:
/*
@@ -1364,9 +1365,6 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx)
* not been received in time.
*/
srpt_unmap_sg_to_ib_sge(ioctx->ch, ioctx);
- spin_lock_irqsave(&ioctx->cmd.t_state_lock, flags);
- ioctx->cmd.transport_state |= CMD_T_LUN_STOP;
- spin_unlock_irqrestore(&ioctx->cmd.t_state_lock, flags);
target_put_sess_cmd(ioctx->ch->sess, &ioctx->cmd);
break;
case SRPT_STATE_MGMT_RSP_SENT:
@@ -1476,7 +1474,6 @@ static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch,
{
struct se_cmd *cmd;
enum srpt_command_state state;
- unsigned long flags;
cmd = &ioctx->cmd;
state = srpt_get_cmd_state(ioctx);
@@ -1496,9 +1493,6 @@ static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch,
__func__, __LINE__, state);
break;
case SRPT_RDMA_WRITE_LAST:
- spin_lock_irqsave(&ioctx->cmd.t_state_lock, flags);
- ioctx->cmd.transport_state |= CMD_T_LUN_STOP;
- spin_unlock_irqrestore(&ioctx->cmd.t_state_lock, flags);
break;
default:
printk(KERN_ERR "%s[%d]: opcode = %u\n", __func__,
@@ -1588,7 +1582,7 @@ static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch,
int resp_data_len;
int resp_len;
- resp_data_len = (rsp_code == SRP_TSK_MGMT_SUCCESS) ? 0 : 4;
+ resp_data_len = 4;
resp_len = sizeof(*srp_rsp) + resp_data_len;
srp_rsp = ioctx->ioctx.buf;
@@ -1600,11 +1594,9 @@ static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch,
+ atomic_xchg(&ch->req_lim_delta, 0));
srp_rsp->tag = tag;
- if (rsp_code != SRP_TSK_MGMT_SUCCESS) {
- srp_rsp->flags |= SRP_RSP_FLAG_RSPVALID;
- srp_rsp->resp_data_len = cpu_to_be32(resp_data_len);
- srp_rsp->data[3] = rsp_code;
- }
+ srp_rsp->flags |= SRP_RSP_FLAG_RSPVALID;
+ srp_rsp->resp_data_len = cpu_to_be32(resp_data_len);
+ srp_rsp->data[3] = rsp_code;
return resp_len;
}
@@ -2358,6 +2350,8 @@ static void srpt_release_channel_work(struct work_struct *w)
transport_deregister_session(se_sess);
ch->sess = NULL;
+ ib_destroy_cm_id(ch->cm_id);
+
srpt_destroy_ch_ib(ch);
srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring,
@@ -2368,8 +2362,6 @@ static void srpt_release_channel_work(struct work_struct *w)
list_del(&ch->list);
spin_unlock_irq(&sdev->spinlock);
- ib_destroy_cm_id(ch->cm_id);
-
if (ch->release_done)
complete(ch->release_done);
@@ -2592,7 +2584,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
goto destroy_ib;
}
- ch->sess = transport_init_session();
+ ch->sess = transport_init_session(TARGET_PROT_NORMAL);
if (IS_ERR(ch->sess)) {
rej->reason = __constant_cpu_to_be32(
SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
@@ -3093,6 +3085,14 @@ static void srpt_queue_tm_rsp(struct se_cmd *cmd)
srpt_queue_response(cmd);
}
+static void srpt_aborted_task(struct se_cmd *cmd)
+{
+ struct srpt_send_ioctx *ioctx = container_of(cmd,
+ struct srpt_send_ioctx, cmd);
+
+ srpt_unmap_sg_to_ib_sge(ioctx->ch, ioctx);
+}
+
static int srpt_queue_status(struct se_cmd *cmd)
{
struct srpt_send_ioctx *ioctx;
@@ -3678,9 +3678,9 @@ static ssize_t srpt_tpg_attrib_store_srp_max_rdma_size(
unsigned long val;
int ret;
- ret = strict_strtoul(page, 0, &val);
+ ret = kstrtoul(page, 0, &val);
if (ret < 0) {
- pr_err("strict_strtoul() failed with ret: %d\n", ret);
+ pr_err("kstrtoul() failed with ret: %d\n", ret);
return -EINVAL;
}
if (val > MAX_SRPT_RDMA_SIZE) {
@@ -3718,9 +3718,9 @@ static ssize_t srpt_tpg_attrib_store_srp_max_rsp_size(
unsigned long val;
int ret;
- ret = strict_strtoul(page, 0, &val);
+ ret = kstrtoul(page, 0, &val);
if (ret < 0) {
- pr_err("strict_strtoul() failed with ret: %d\n", ret);
+ pr_err("kstrtoul() failed with ret: %d\n", ret);
return -EINVAL;
}
if (val > MAX_SRPT_RSP_SIZE) {
@@ -3758,9 +3758,9 @@ static ssize_t srpt_tpg_attrib_store_srp_sq_size(
unsigned long val;
int ret;
- ret = strict_strtoul(page, 0, &val);
+ ret = kstrtoul(page, 0, &val);
if (ret < 0) {
- pr_err("strict_strtoul() failed with ret: %d\n", ret);
+ pr_err("kstrtoul() failed with ret: %d\n", ret);
return -EINVAL;
}
if (val > MAX_SRPT_SRQ_SIZE) {
@@ -3805,7 +3805,7 @@ static ssize_t srpt_tpg_store_enable(
unsigned long tmp;
int ret;
- ret = strict_strtoul(page, 0, &tmp);
+ ret = kstrtoul(page, 0, &tmp);
if (ret < 0) {
printk(KERN_ERR "Unable to extract srpt_tpg_store_enable\n");
return -EINVAL;
@@ -3940,6 +3940,7 @@ static struct target_core_fabric_ops srpt_template = {
.queue_data_in = srpt_queue_data_in,
.queue_status = srpt_queue_status,
.queue_tm_rsp = srpt_queue_tm_rsp,
+ .aborted_task = srpt_aborted_task,
/*
* Setup function pointers for generic logic in
* target_core_fabric_configfs.c