diff options
Diffstat (limited to 'drivers/infiniband/hw/mlx4/mad.c')
| -rw-r--r-- | drivers/infiniband/hw/mlx4/mad.c | 266 |
1 files changed, 188 insertions, 78 deletions
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 21a794152d1..287ad0564ac 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -93,7 +93,7 @@ static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num, __be64 mlx4_ib_gen_node_guid(void) { #define NODE_GUID_HI ((u64) (((u64)IB_OPENIB_OUI) << 40)) - return cpu_to_be64(NODE_GUID_HI | random32()); + return cpu_to_be64(NODE_GUID_HI | prandom_u32()); } __be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx) @@ -409,38 +409,45 @@ int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid) } -static int get_pkey_phys_indices(struct mlx4_ib_dev *ibdev, u8 port, u8 ph_pkey_ix, - u8 *full_pk_ix, u8 *partial_pk_ix, - int *is_full_member) +static int find_slave_port_pkey_ix(struct mlx4_ib_dev *dev, int slave, + u8 port, u16 pkey, u16 *ix) { - u16 search_pkey; - int fm; - int err = 0; - u16 pk; + int i, ret; + u8 unassigned_pkey_ix, pkey_ix, partial_ix = 0xFF; + u16 slot_pkey; - err = ib_get_cached_pkey(&ibdev->ib_dev, port, ph_pkey_ix, &search_pkey); - if (err) - return err; + if (slave == mlx4_master_func_num(dev->dev)) + return ib_find_cached_pkey(&dev->ib_dev, port, pkey, ix); - fm = (search_pkey & 0x8000) ? 1 : 0; - if (fm) { - *full_pk_ix = ph_pkey_ix; - search_pkey &= 0x7FFF; - } else { - *partial_pk_ix = ph_pkey_ix; - search_pkey |= 0x8000; - } + unassigned_pkey_ix = dev->dev->phys_caps.pkey_phys_table_len[port] - 1; - if (ib_find_exact_cached_pkey(&ibdev->ib_dev, port, search_pkey, &pk)) - pk = 0xFFFF; + for (i = 0; i < dev->dev->caps.pkey_table_len[port]; i++) { + if (dev->pkeys.virt2phys_pkey[slave][port - 1][i] == unassigned_pkey_ix) + continue; - if (fm) - *partial_pk_ix = (pk & 0xFF); - else - *full_pk_ix = (pk & 0xFF); + pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][i]; - *is_full_member = fm; - return err; + ret = ib_get_cached_pkey(&dev->ib_dev, port, pkey_ix, &slot_pkey); + if (ret) + continue; + if ((slot_pkey & 0x7FFF) == (pkey & 0x7FFF)) { + if (slot_pkey & 0x8000) { + *ix = (u16) pkey_ix; + return 0; + } else { + /* take first partial pkey index found */ + if (partial_ix == 0xFF) + partial_ix = pkey_ix; + } + } + } + + if (partial_ix < 0xFF) { + *ix = (u16) partial_ix; + return 0; + } + + return -EINVAL; } int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, @@ -458,10 +465,9 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, unsigned tun_tx_ix = 0; int dqpn; int ret = 0; - int i; - int is_full_member = 0; u16 tun_pkey_ix; - u8 ph_pkey_ix, full_pk_ix = 0, partial_pk_ix = 0; + u16 cached_pkey; + u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH; if (dest_qpt > IB_QPT_GSI) return -EINVAL; @@ -472,36 +478,22 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, if (!tun_ctx || tun_ctx->state != DEMUX_PV_STATE_ACTIVE) return -EAGAIN; - /* QP0 forwarding only for Dom0 */ - if (!dest_qpt && (mlx4_master_func_num(dev->dev) != slave)) - return -EINVAL; - if (!dest_qpt) tun_qp = &tun_ctx->qp[0]; else tun_qp = &tun_ctx->qp[1]; - /* compute pkey index for slave */ - /* get physical pkey -- virtualized Dom0 pkey to phys*/ + /* compute P_Key index to put in tunnel header for slave */ if (dest_qpt) { - ph_pkey_ix = - dev->pkeys.virt2phys_pkey[mlx4_master_func_num(dev->dev)][port - 1][wc->pkey_index]; - - /* now, translate this to the slave pkey index */ - ret = get_pkey_phys_indices(dev, port, ph_pkey_ix, &full_pk_ix, - &partial_pk_ix, &is_full_member); + u16 pkey_ix; + ret = ib_get_cached_pkey(&dev->ib_dev, port, wc->pkey_index, &cached_pkey); if (ret) return -EINVAL; - for (i = 0; i < dev->dev->caps.pkey_table_len[port]; i++) { - if ((dev->pkeys.virt2phys_pkey[slave][port - 1][i] == full_pk_ix) || - (is_full_member && - (dev->pkeys.virt2phys_pkey[slave][port - 1][i] == partial_pk_ix))) - break; - } - if (i == dev->dev->caps.pkey_table_len[port]) + ret = find_slave_port_pkey_ix(dev, slave, port, cached_pkey, &pkey_ix); + if (ret) return -EINVAL; - tun_pkey_ix = i; + tun_pkey_ix = pkey_ix; } else tun_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0]; @@ -514,6 +506,10 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, * The driver will set the force loopback bit in post_send */ memset(&attr, 0, sizeof attr); attr.port_num = port; + if (is_eth) { + memcpy(&attr.grh.dgid.raw[0], &grh->dgid.raw[0], 16); + attr.ah_flags = IB_AH_GRH; + } ah = ib_create_ah(tun_ctx->pd, &attr); if (IS_ERR(ah)) return -ENOMEM; @@ -545,11 +541,36 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, /* adjust tunnel data */ tun_mad->hdr.pkey_index = cpu_to_be16(tun_pkey_ix); - tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12); - tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid); tun_mad->hdr.flags_src_qp = cpu_to_be32(wc->src_qp & 0xFFFFFF); tun_mad->hdr.g_ml_path = (grh && (wc->wc_flags & IB_WC_GRH)) ? 0x80 : 0; + if (is_eth) { + u16 vlan = 0; + if (mlx4_get_slave_default_vlan(dev->dev, port, slave, &vlan, + NULL)) { + /* VST mode */ + if (vlan != wc->vlan_id) + /* Packet vlan is not the VST-assigned vlan. + * Drop the packet. + */ + goto out; + else + /* Remove the vlan tag before forwarding + * the packet to the VF. + */ + vlan = 0xffff; + } else { + vlan = wc->vlan_id; + } + + tun_mad->hdr.sl_vid = cpu_to_be16(vlan); + memcpy((char *)&tun_mad->hdr.mac_31_0, &(wc->smac[0]), 4); + memcpy((char *)&tun_mad->hdr.slid_mac_47_32, &(wc->smac[4]), 2); + } else { + tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12); + tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid); + } + ib_dma_sync_single_for_device(&dev->ib_dev, tun_qp->tx_ring[tun_tx_ix].buf.map, sizeof (struct mlx4_rcv_tunnel_mad), @@ -585,6 +606,41 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, int err; int slave; u8 *slave_id; + int is_eth = 0; + + if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND) + is_eth = 0; + else + is_eth = 1; + + if (is_eth) { + if (!(wc->wc_flags & IB_WC_GRH)) { + mlx4_ib_warn(ibdev, "RoCE grh not present.\n"); + return -EINVAL; + } + if (mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_CM) { + mlx4_ib_warn(ibdev, "RoCE mgmt class is not CM\n"); + return -EINVAL; + } + if (mlx4_get_slave_from_roce_gid(dev->dev, port, grh->dgid.raw, &slave)) { + mlx4_ib_warn(ibdev, "failed matching grh\n"); + return -ENOENT; + } + if (slave >= dev->dev->caps.sqp_demux) { + mlx4_ib_warn(ibdev, "slave id: %d is bigger than allowed:%d\n", + slave, dev->dev->caps.sqp_demux); + return -ENOENT; + } + + if (mlx4_ib_demux_cm_handler(ibdev, port, NULL, mad)) + return 0; + + err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad); + if (err) + pr_debug("failed sending to slave %d via tunnel qp (%d)\n", + slave, err); + return 0; + } /* Initially assume that this mad is for us */ slave = mlx4_master_func_num(dev->dev); @@ -607,6 +663,21 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, } /* Class-specific handling */ switch (mad->mad_hdr.mgmt_class) { + case IB_MGMT_CLASS_SUBN_LID_ROUTED: + case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: + /* 255 indicates the dom0 */ + if (slave != 255 && slave != mlx4_master_func_num(dev->dev)) { + if (!mlx4_vf_smi_enabled(dev->dev, slave, port)) + return -EPERM; + /* for a VF. drop unsolicited MADs */ + if (!(mad->mad_hdr.method & IB_MGMT_METHOD_RESP)) { + mlx4_ib_warn(ibdev, "demux QP0. rejecting unsolicited mad for slave %d class 0x%x, method 0x%x\n", + slave, mad->mad_hdr.mgmt_class, + mad->mad_hdr.method); + return -EINVAL; + } + } + break; case IB_MGMT_CLASS_SUBN_ADM: if (mlx4_ib_demux_sa_handler(ibdev, port, slave, (struct ib_sa_mad *) mad)) @@ -1081,8 +1152,9 @@ static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave) int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, - enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn, - u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad) + enum ib_qp_type dest_qpt, u16 pkey_index, + u32 remote_qpn, u32 qkey, struct ib_ah_attr *attr, + u8 *s_mac, struct ib_mad *mad) { struct ib_sge list; struct ib_send_wr wr, *bad_wr; @@ -1104,10 +1176,6 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, if (!sqp_ctx || sqp_ctx->state != DEMUX_PV_STATE_ACTIVE) return -EAGAIN; - /* QP0 forwarding only for Dom0 */ - if (dest_qpt == IB_QPT_SMI && (mlx4_master_func_num(dev->dev) != slave)) - return -EINVAL; - if (dest_qpt == IB_QPT_SMI) { src_qpnum = 0; sqp = &sqp_ctx->qp[0]; @@ -1171,6 +1239,9 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, wr.num_sge = 1; wr.opcode = IB_WR_SEND; wr.send_flags = IB_SEND_SIGNALED; + if (s_mac) + memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6); + ret = ib_post_send(send_qp, &wr, &bad_wr); out: @@ -1179,6 +1250,22 @@ out: return ret; } +static int get_slave_base_gid_ix(struct mlx4_ib_dev *dev, int slave, int port) +{ + if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND) + return slave; + return mlx4_get_base_gid_ix(dev->dev, slave, port); +} + +static void fill_in_real_sgid_index(struct mlx4_ib_dev *dev, int slave, int port, + struct ib_ah_attr *ah_attr) +{ + if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND) + ah_attr->grh.sgid_index = slave; + else + ah_attr->grh.sgid_index += get_slave_base_gid_ix(dev, slave, port); +} + static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc *wc) { struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev); @@ -1189,6 +1276,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc struct ib_ah_attr ah_attr; u8 *slave_id; int slave; + int port; /* Get slave that sent this packet */ if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn || @@ -1204,11 +1292,6 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc "belongs to another slave\n", wc->src_qp); return; } - if (slave != mlx4_master_func_num(dev->dev) && !(wc->src_qp & 0x2)) { - mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: " - "non-master trying to send QP0 packets\n", wc->src_qp); - return; - } /* Map transaction ID */ ib_dma_sync_single_for_cpu(ctx->ib_dev, tun_qp->ring[wr_ix].map, @@ -1236,6 +1319,12 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc /* Class-specific handling */ switch (tunnel->mad.mad_hdr.mgmt_class) { + case IB_MGMT_CLASS_SUBN_LID_ROUTED: + case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: + if (slave != mlx4_master_func_num(dev->dev) && + !mlx4_vf_smi_enabled(dev->dev, slave, ctx->port)) + return; + break; case IB_MGMT_CLASS_SUBN_ADM: if (mlx4_ib_multiplex_sa_handler(ctx->ib_dev, ctx->port, slave, (struct ib_sa_mad *) &tunnel->mad)) @@ -1265,12 +1354,18 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc memcpy(&ah.av, &tunnel->hdr.av, sizeof (struct mlx4_av)); ah.ibah.device = ctx->ib_dev; mlx4_ib_query_ah(&ah.ibah, &ah_attr); - if ((ah_attr.ah_flags & IB_AH_GRH) && - (ah_attr.grh.sgid_index != slave)) { - mlx4_ib_warn(ctx->ib_dev, "slave:%d accessed invalid sgid_index:%d\n", - slave, ah_attr.grh.sgid_index); + if (ah_attr.ah_flags & IB_AH_GRH) + fill_in_real_sgid_index(dev, slave, ctx->port, &ah_attr); + + port = mlx4_slave_convert_port(dev->dev, slave, ah_attr.port_num); + if (port < 0) return; - } + ah_attr.port_num = port; + memcpy(ah_attr.dmac, tunnel->hdr.mac, 6); + ah_attr.vlan_id = be16_to_cpu(tunnel->hdr.vlan); + /* if slave have default vlan use it */ + mlx4_get_slave_default_vlan(dev->dev, ctx->port, slave, + &ah_attr.vlan_id, &ah_attr.sl); mlx4_ib_send_to_wire(dev, slave, ctx->port, is_proxy_qp0(dev, wc->src_qp, slave) ? @@ -1278,7 +1373,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc be16_to_cpu(tunnel->hdr.pkey_index), be32_to_cpu(tunnel->hdr.remote_qpn), be32_to_cpu(tunnel->hdr.qkey), - &ah_attr, &tunnel->mad); + &ah_attr, wc->smac, &tunnel->mad); } static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx, @@ -1516,8 +1611,14 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx, memset(&attr, 0, sizeof attr); attr.qp_state = IB_QPS_INIT; - attr.pkey_index = - to_mdev(ctx->ib_dev)->pkeys.virt2phys_pkey[ctx->slave][ctx->port - 1][0]; + ret = 0; + if (create_tun) + ret = find_slave_port_pkey_ix(to_mdev(ctx->ib_dev), ctx->slave, + ctx->port, IB_DEFAULT_PKEY_FULL, + &attr.pkey_index); + if (ret || !create_tun) + attr.pkey_index = + to_mdev(ctx->ib_dev)->pkeys.virt2phys_pkey[ctx->slave][ctx->port - 1][0]; attr.qkey = IB_QP1_QKEY; attr.port_num = ctx->port; ret = ib_modify_qp(tun_qp->qp, &attr, qp_attr_mask_INIT); @@ -1656,9 +1757,9 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port, return -EEXIST; ctx->state = DEMUX_PV_STATE_STARTING; - /* have QP0 only on port owner, and only if link layer is IB */ - if (ctx->slave == mlx4_master_func_num(to_mdev(ctx->ib_dev)->dev) && - rdma_port_get_link_layer(ibdev, ctx->port) == IB_LINK_LAYER_INFINIBAND) + /* have QP0 only if link layer is IB */ + if (rdma_port_get_link_layer(ibdev, ctx->port) == + IB_LINK_LAYER_INFINIBAND) ctx->has_smi = 1; if (ctx->has_smi) { @@ -1849,7 +1950,15 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev, ctx->port = port; ctx->ib_dev = &dev->ib_dev; - for (i = 0; i < dev->dev->caps.sqp_demux; i++) { + for (i = 0; + i < min(dev->dev->caps.sqp_demux, (u16)(dev->dev->num_vfs + 1)); + i++) { + struct mlx4_active_ports actv_ports = + mlx4_get_active_ports(dev->dev, i); + + if (!test_bit(port - 1, actv_ports.ports)) + continue; + ret = alloc_pv_object(dev, i, port, &ctx->tun[i]); if (ret) { ret = -ENOMEM; @@ -2004,16 +2113,17 @@ int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev) goto demux_err; err = mlx4_ib_alloc_demux_ctx(dev, &dev->sriov.demux[i], i + 1); if (err) - goto demux_err; + goto free_pv; } mlx4_ib_master_tunnels(dev, 1); return 0; +free_pv: + free_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1); demux_err: - while (i > 0) { + while (--i >= 0) { free_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1); mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]); - --i; } mlx4_ib_device_unregister_sysfs(dev); |
