aboutsummaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/mlx4/mad.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/mlx4/mad.c')
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c266
1 files changed, 188 insertions, 78 deletions
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 21a794152d1..287ad0564ac 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -93,7 +93,7 @@ static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
__be64 mlx4_ib_gen_node_guid(void)
{
#define NODE_GUID_HI ((u64) (((u64)IB_OPENIB_OUI) << 40))
- return cpu_to_be64(NODE_GUID_HI | random32());
+ return cpu_to_be64(NODE_GUID_HI | prandom_u32());
}
__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx)
@@ -409,38 +409,45 @@ int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid)
}
-static int get_pkey_phys_indices(struct mlx4_ib_dev *ibdev, u8 port, u8 ph_pkey_ix,
- u8 *full_pk_ix, u8 *partial_pk_ix,
- int *is_full_member)
+static int find_slave_port_pkey_ix(struct mlx4_ib_dev *dev, int slave,
+ u8 port, u16 pkey, u16 *ix)
{
- u16 search_pkey;
- int fm;
- int err = 0;
- u16 pk;
+ int i, ret;
+ u8 unassigned_pkey_ix, pkey_ix, partial_ix = 0xFF;
+ u16 slot_pkey;
- err = ib_get_cached_pkey(&ibdev->ib_dev, port, ph_pkey_ix, &search_pkey);
- if (err)
- return err;
+ if (slave == mlx4_master_func_num(dev->dev))
+ return ib_find_cached_pkey(&dev->ib_dev, port, pkey, ix);
- fm = (search_pkey & 0x8000) ? 1 : 0;
- if (fm) {
- *full_pk_ix = ph_pkey_ix;
- search_pkey &= 0x7FFF;
- } else {
- *partial_pk_ix = ph_pkey_ix;
- search_pkey |= 0x8000;
- }
+ unassigned_pkey_ix = dev->dev->phys_caps.pkey_phys_table_len[port] - 1;
- if (ib_find_exact_cached_pkey(&ibdev->ib_dev, port, search_pkey, &pk))
- pk = 0xFFFF;
+ for (i = 0; i < dev->dev->caps.pkey_table_len[port]; i++) {
+ if (dev->pkeys.virt2phys_pkey[slave][port - 1][i] == unassigned_pkey_ix)
+ continue;
- if (fm)
- *partial_pk_ix = (pk & 0xFF);
- else
- *full_pk_ix = (pk & 0xFF);
+ pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][i];
- *is_full_member = fm;
- return err;
+ ret = ib_get_cached_pkey(&dev->ib_dev, port, pkey_ix, &slot_pkey);
+ if (ret)
+ continue;
+ if ((slot_pkey & 0x7FFF) == (pkey & 0x7FFF)) {
+ if (slot_pkey & 0x8000) {
+ *ix = (u16) pkey_ix;
+ return 0;
+ } else {
+ /* take first partial pkey index found */
+ if (partial_ix == 0xFF)
+ partial_ix = pkey_ix;
+ }
+ }
+ }
+
+ if (partial_ix < 0xFF) {
+ *ix = (u16) partial_ix;
+ return 0;
+ }
+
+ return -EINVAL;
}
int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
@@ -458,10 +465,9 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
unsigned tun_tx_ix = 0;
int dqpn;
int ret = 0;
- int i;
- int is_full_member = 0;
u16 tun_pkey_ix;
- u8 ph_pkey_ix, full_pk_ix = 0, partial_pk_ix = 0;
+ u16 cached_pkey;
+ u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
if (dest_qpt > IB_QPT_GSI)
return -EINVAL;
@@ -472,36 +478,22 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
if (!tun_ctx || tun_ctx->state != DEMUX_PV_STATE_ACTIVE)
return -EAGAIN;
- /* QP0 forwarding only for Dom0 */
- if (!dest_qpt && (mlx4_master_func_num(dev->dev) != slave))
- return -EINVAL;
-
if (!dest_qpt)
tun_qp = &tun_ctx->qp[0];
else
tun_qp = &tun_ctx->qp[1];
- /* compute pkey index for slave */
- /* get physical pkey -- virtualized Dom0 pkey to phys*/
+ /* compute P_Key index to put in tunnel header for slave */
if (dest_qpt) {
- ph_pkey_ix =
- dev->pkeys.virt2phys_pkey[mlx4_master_func_num(dev->dev)][port - 1][wc->pkey_index];
-
- /* now, translate this to the slave pkey index */
- ret = get_pkey_phys_indices(dev, port, ph_pkey_ix, &full_pk_ix,
- &partial_pk_ix, &is_full_member);
+ u16 pkey_ix;
+ ret = ib_get_cached_pkey(&dev->ib_dev, port, wc->pkey_index, &cached_pkey);
if (ret)
return -EINVAL;
- for (i = 0; i < dev->dev->caps.pkey_table_len[port]; i++) {
- if ((dev->pkeys.virt2phys_pkey[slave][port - 1][i] == full_pk_ix) ||
- (is_full_member &&
- (dev->pkeys.virt2phys_pkey[slave][port - 1][i] == partial_pk_ix)))
- break;
- }
- if (i == dev->dev->caps.pkey_table_len[port])
+ ret = find_slave_port_pkey_ix(dev, slave, port, cached_pkey, &pkey_ix);
+ if (ret)
return -EINVAL;
- tun_pkey_ix = i;
+ tun_pkey_ix = pkey_ix;
} else
tun_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0];
@@ -514,6 +506,10 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
* The driver will set the force loopback bit in post_send */
memset(&attr, 0, sizeof attr);
attr.port_num = port;
+ if (is_eth) {
+ memcpy(&attr.grh.dgid.raw[0], &grh->dgid.raw[0], 16);
+ attr.ah_flags = IB_AH_GRH;
+ }
ah = ib_create_ah(tun_ctx->pd, &attr);
if (IS_ERR(ah))
return -ENOMEM;
@@ -545,11 +541,36 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
/* adjust tunnel data */
tun_mad->hdr.pkey_index = cpu_to_be16(tun_pkey_ix);
- tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12);
- tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid);
tun_mad->hdr.flags_src_qp = cpu_to_be32(wc->src_qp & 0xFFFFFF);
tun_mad->hdr.g_ml_path = (grh && (wc->wc_flags & IB_WC_GRH)) ? 0x80 : 0;
+ if (is_eth) {
+ u16 vlan = 0;
+ if (mlx4_get_slave_default_vlan(dev->dev, port, slave, &vlan,
+ NULL)) {
+ /* VST mode */
+ if (vlan != wc->vlan_id)
+ /* Packet vlan is not the VST-assigned vlan.
+ * Drop the packet.
+ */
+ goto out;
+ else
+ /* Remove the vlan tag before forwarding
+ * the packet to the VF.
+ */
+ vlan = 0xffff;
+ } else {
+ vlan = wc->vlan_id;
+ }
+
+ tun_mad->hdr.sl_vid = cpu_to_be16(vlan);
+ memcpy((char *)&tun_mad->hdr.mac_31_0, &(wc->smac[0]), 4);
+ memcpy((char *)&tun_mad->hdr.slid_mac_47_32, &(wc->smac[4]), 2);
+ } else {
+ tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12);
+ tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid);
+ }
+
ib_dma_sync_single_for_device(&dev->ib_dev,
tun_qp->tx_ring[tun_tx_ix].buf.map,
sizeof (struct mlx4_rcv_tunnel_mad),
@@ -585,6 +606,41 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
int err;
int slave;
u8 *slave_id;
+ int is_eth = 0;
+
+ if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND)
+ is_eth = 0;
+ else
+ is_eth = 1;
+
+ if (is_eth) {
+ if (!(wc->wc_flags & IB_WC_GRH)) {
+ mlx4_ib_warn(ibdev, "RoCE grh not present.\n");
+ return -EINVAL;
+ }
+ if (mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_CM) {
+ mlx4_ib_warn(ibdev, "RoCE mgmt class is not CM\n");
+ return -EINVAL;
+ }
+ if (mlx4_get_slave_from_roce_gid(dev->dev, port, grh->dgid.raw, &slave)) {
+ mlx4_ib_warn(ibdev, "failed matching grh\n");
+ return -ENOENT;
+ }
+ if (slave >= dev->dev->caps.sqp_demux) {
+ mlx4_ib_warn(ibdev, "slave id: %d is bigger than allowed:%d\n",
+ slave, dev->dev->caps.sqp_demux);
+ return -ENOENT;
+ }
+
+ if (mlx4_ib_demux_cm_handler(ibdev, port, NULL, mad))
+ return 0;
+
+ err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
+ if (err)
+ pr_debug("failed sending to slave %d via tunnel qp (%d)\n",
+ slave, err);
+ return 0;
+ }
/* Initially assume that this mad is for us */
slave = mlx4_master_func_num(dev->dev);
@@ -607,6 +663,21 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
}
/* Class-specific handling */
switch (mad->mad_hdr.mgmt_class) {
+ case IB_MGMT_CLASS_SUBN_LID_ROUTED:
+ case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
+ /* 255 indicates the dom0 */
+ if (slave != 255 && slave != mlx4_master_func_num(dev->dev)) {
+ if (!mlx4_vf_smi_enabled(dev->dev, slave, port))
+ return -EPERM;
+ /* for a VF. drop unsolicited MADs */
+ if (!(mad->mad_hdr.method & IB_MGMT_METHOD_RESP)) {
+ mlx4_ib_warn(ibdev, "demux QP0. rejecting unsolicited mad for slave %d class 0x%x, method 0x%x\n",
+ slave, mad->mad_hdr.mgmt_class,
+ mad->mad_hdr.method);
+ return -EINVAL;
+ }
+ }
+ break;
case IB_MGMT_CLASS_SUBN_ADM:
if (mlx4_ib_demux_sa_handler(ibdev, port, slave,
(struct ib_sa_mad *) mad))
@@ -1081,8 +1152,9 @@ static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
- enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
- u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad)
+ enum ib_qp_type dest_qpt, u16 pkey_index,
+ u32 remote_qpn, u32 qkey, struct ib_ah_attr *attr,
+ u8 *s_mac, struct ib_mad *mad)
{
struct ib_sge list;
struct ib_send_wr wr, *bad_wr;
@@ -1104,10 +1176,6 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
if (!sqp_ctx || sqp_ctx->state != DEMUX_PV_STATE_ACTIVE)
return -EAGAIN;
- /* QP0 forwarding only for Dom0 */
- if (dest_qpt == IB_QPT_SMI && (mlx4_master_func_num(dev->dev) != slave))
- return -EINVAL;
-
if (dest_qpt == IB_QPT_SMI) {
src_qpnum = 0;
sqp = &sqp_ctx->qp[0];
@@ -1171,6 +1239,9 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
wr.num_sge = 1;
wr.opcode = IB_WR_SEND;
wr.send_flags = IB_SEND_SIGNALED;
+ if (s_mac)
+ memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6);
+
ret = ib_post_send(send_qp, &wr, &bad_wr);
out:
@@ -1179,6 +1250,22 @@ out:
return ret;
}
+static int get_slave_base_gid_ix(struct mlx4_ib_dev *dev, int slave, int port)
+{
+ if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND)
+ return slave;
+ return mlx4_get_base_gid_ix(dev->dev, slave, port);
+}
+
+static void fill_in_real_sgid_index(struct mlx4_ib_dev *dev, int slave, int port,
+ struct ib_ah_attr *ah_attr)
+{
+ if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND)
+ ah_attr->grh.sgid_index = slave;
+ else
+ ah_attr->grh.sgid_index += get_slave_base_gid_ix(dev, slave, port);
+}
+
static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc *wc)
{
struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
@@ -1189,6 +1276,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
struct ib_ah_attr ah_attr;
u8 *slave_id;
int slave;
+ int port;
/* Get slave that sent this packet */
if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn ||
@@ -1204,11 +1292,6 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
"belongs to another slave\n", wc->src_qp);
return;
}
- if (slave != mlx4_master_func_num(dev->dev) && !(wc->src_qp & 0x2)) {
- mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: "
- "non-master trying to send QP0 packets\n", wc->src_qp);
- return;
- }
/* Map transaction ID */
ib_dma_sync_single_for_cpu(ctx->ib_dev, tun_qp->ring[wr_ix].map,
@@ -1236,6 +1319,12 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
/* Class-specific handling */
switch (tunnel->mad.mad_hdr.mgmt_class) {
+ case IB_MGMT_CLASS_SUBN_LID_ROUTED:
+ case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
+ if (slave != mlx4_master_func_num(dev->dev) &&
+ !mlx4_vf_smi_enabled(dev->dev, slave, ctx->port))
+ return;
+ break;
case IB_MGMT_CLASS_SUBN_ADM:
if (mlx4_ib_multiplex_sa_handler(ctx->ib_dev, ctx->port, slave,
(struct ib_sa_mad *) &tunnel->mad))
@@ -1265,12 +1354,18 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
memcpy(&ah.av, &tunnel->hdr.av, sizeof (struct mlx4_av));
ah.ibah.device = ctx->ib_dev;
mlx4_ib_query_ah(&ah.ibah, &ah_attr);
- if ((ah_attr.ah_flags & IB_AH_GRH) &&
- (ah_attr.grh.sgid_index != slave)) {
- mlx4_ib_warn(ctx->ib_dev, "slave:%d accessed invalid sgid_index:%d\n",
- slave, ah_attr.grh.sgid_index);
+ if (ah_attr.ah_flags & IB_AH_GRH)
+ fill_in_real_sgid_index(dev, slave, ctx->port, &ah_attr);
+
+ port = mlx4_slave_convert_port(dev->dev, slave, ah_attr.port_num);
+ if (port < 0)
return;
- }
+ ah_attr.port_num = port;
+ memcpy(ah_attr.dmac, tunnel->hdr.mac, 6);
+ ah_attr.vlan_id = be16_to_cpu(tunnel->hdr.vlan);
+ /* if slave have default vlan use it */
+ mlx4_get_slave_default_vlan(dev->dev, ctx->port, slave,
+ &ah_attr.vlan_id, &ah_attr.sl);
mlx4_ib_send_to_wire(dev, slave, ctx->port,
is_proxy_qp0(dev, wc->src_qp, slave) ?
@@ -1278,7 +1373,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
be16_to_cpu(tunnel->hdr.pkey_index),
be32_to_cpu(tunnel->hdr.remote_qpn),
be32_to_cpu(tunnel->hdr.qkey),
- &ah_attr, &tunnel->mad);
+ &ah_attr, wc->smac, &tunnel->mad);
}
static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
@@ -1516,8 +1611,14 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
memset(&attr, 0, sizeof attr);
attr.qp_state = IB_QPS_INIT;
- attr.pkey_index =
- to_mdev(ctx->ib_dev)->pkeys.virt2phys_pkey[ctx->slave][ctx->port - 1][0];
+ ret = 0;
+ if (create_tun)
+ ret = find_slave_port_pkey_ix(to_mdev(ctx->ib_dev), ctx->slave,
+ ctx->port, IB_DEFAULT_PKEY_FULL,
+ &attr.pkey_index);
+ if (ret || !create_tun)
+ attr.pkey_index =
+ to_mdev(ctx->ib_dev)->pkeys.virt2phys_pkey[ctx->slave][ctx->port - 1][0];
attr.qkey = IB_QP1_QKEY;
attr.port_num = ctx->port;
ret = ib_modify_qp(tun_qp->qp, &attr, qp_attr_mask_INIT);
@@ -1656,9 +1757,9 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
return -EEXIST;
ctx->state = DEMUX_PV_STATE_STARTING;
- /* have QP0 only on port owner, and only if link layer is IB */
- if (ctx->slave == mlx4_master_func_num(to_mdev(ctx->ib_dev)->dev) &&
- rdma_port_get_link_layer(ibdev, ctx->port) == IB_LINK_LAYER_INFINIBAND)
+ /* have QP0 only if link layer is IB */
+ if (rdma_port_get_link_layer(ibdev, ctx->port) ==
+ IB_LINK_LAYER_INFINIBAND)
ctx->has_smi = 1;
if (ctx->has_smi) {
@@ -1849,7 +1950,15 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
ctx->port = port;
ctx->ib_dev = &dev->ib_dev;
- for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
+ for (i = 0;
+ i < min(dev->dev->caps.sqp_demux, (u16)(dev->dev->num_vfs + 1));
+ i++) {
+ struct mlx4_active_ports actv_ports =
+ mlx4_get_active_ports(dev->dev, i);
+
+ if (!test_bit(port - 1, actv_ports.ports))
+ continue;
+
ret = alloc_pv_object(dev, i, port, &ctx->tun[i]);
if (ret) {
ret = -ENOMEM;
@@ -2004,16 +2113,17 @@ int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev)
goto demux_err;
err = mlx4_ib_alloc_demux_ctx(dev, &dev->sriov.demux[i], i + 1);
if (err)
- goto demux_err;
+ goto free_pv;
}
mlx4_ib_master_tunnels(dev, 1);
return 0;
+free_pv:
+ free_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1);
demux_err:
- while (i > 0) {
+ while (--i >= 0) {
free_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1);
mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]);
- --i;
}
mlx4_ib_device_unregister_sysfs(dev);