diff options
Diffstat (limited to 'drivers/infiniband/hw/mlx4')
| -rw-r--r-- | drivers/infiniband/hw/mlx4/ah.c | 2 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx4/alias_GUID.c | 2 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx4/cm.c | 80 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx4/cq.c | 48 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx4/doorbell.c | 4 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx4/mad.c | 162 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx4/main.c | 139 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx4/mcg.c | 5 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx4/mlx4_ib.h | 28 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx4/mr.c | 39 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx4/qp.c | 431 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx4/srq.c | 7 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx4/sysfs.c | 110 |
13 files changed, 839 insertions, 218 deletions
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 170dca60804..2d8c3397774 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -73,7 +73,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr { struct mlx4_ib_dev *ibdev = to_mdev(pd->device); struct mlx4_dev *dev = ibdev->dev; - int is_mcast; + int is_mcast = 0; struct in6_addr in6; u16 vlan_tag; diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c index 2f215b93db6..0eb141c4141 100644 --- a/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -154,7 +154,7 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev, continue; slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ; - if (slave_id >= dev->dev->num_slaves) + if (slave_id >= dev->dev->num_vfs + 1) return; tmp_cur_ag = *(__be64 *)&p_data[i * GUID_REC_SIZE]; form_cache_ag = get_cached_alias_guid(dev, port_num, diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c index d1f5f1dd77b..56a593e0ae5 100644 --- a/drivers/infiniband/hw/mlx4/cm.c +++ b/drivers/infiniband/hw/mlx4/cm.c @@ -61,6 +61,11 @@ struct cm_generic_msg { __be32 remote_comm_id; }; +struct cm_sidr_generic_msg { + struct ib_mad_hdr hdr; + __be32 request_id; +}; + struct cm_req_msg { unsigned char unused[0x60]; union ib_gid primary_path_sgid; @@ -69,28 +74,62 @@ struct cm_req_msg { static void set_local_comm_id(struct ib_mad *mad, u32 cm_id) { - struct cm_generic_msg *msg = (struct cm_generic_msg *)mad; - msg->local_comm_id = cpu_to_be32(cm_id); + if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) { + struct cm_sidr_generic_msg *msg = + (struct cm_sidr_generic_msg *)mad; + msg->request_id = cpu_to_be32(cm_id); + } else if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) { + pr_err("trying to set local_comm_id in SIDR_REP\n"); + return; + } else { + struct cm_generic_msg *msg = (struct cm_generic_msg *)mad; + msg->local_comm_id = cpu_to_be32(cm_id); + } } static u32 get_local_comm_id(struct ib_mad *mad) { - struct cm_generic_msg *msg = (struct cm_generic_msg *)mad; - - return be32_to_cpu(msg->local_comm_id); + if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) { + struct cm_sidr_generic_msg *msg = + (struct cm_sidr_generic_msg *)mad; + return be32_to_cpu(msg->request_id); + } else if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) { + pr_err("trying to set local_comm_id in SIDR_REP\n"); + return -1; + } else { + struct cm_generic_msg *msg = (struct cm_generic_msg *)mad; + return be32_to_cpu(msg->local_comm_id); + } } static void set_remote_comm_id(struct ib_mad *mad, u32 cm_id) { - struct cm_generic_msg *msg = (struct cm_generic_msg *)mad; - msg->remote_comm_id = cpu_to_be32(cm_id); + if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) { + struct cm_sidr_generic_msg *msg = + (struct cm_sidr_generic_msg *)mad; + msg->request_id = cpu_to_be32(cm_id); + } else if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) { + pr_err("trying to set remote_comm_id in SIDR_REQ\n"); + return; + } else { + struct cm_generic_msg *msg = (struct cm_generic_msg *)mad; + msg->remote_comm_id = cpu_to_be32(cm_id); + } } static u32 get_remote_comm_id(struct ib_mad *mad) { - struct cm_generic_msg *msg = (struct cm_generic_msg *)mad; - - return be32_to_cpu(msg->remote_comm_id); + if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) { + struct cm_sidr_generic_msg *msg = + (struct cm_sidr_generic_msg *)mad; + return be32_to_cpu(msg->request_id); + } else if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) { + pr_err("trying to set remote_comm_id in SIDR_REQ\n"); + return -1; + } else { + struct cm_generic_msg *msg = (struct cm_generic_msg *)mad; + return be32_to_cpu(msg->remote_comm_id); + } } static union ib_gid gid_from_req_msg(struct ib_device *ibdev, struct ib_mad *mad) @@ -282,19 +321,21 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id u32 sl_cm_id; int pv_cm_id = -1; - sl_cm_id = get_local_comm_id(mad); - if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID || - mad->mad_hdr.attr_id == CM_REP_ATTR_ID) { + mad->mad_hdr.attr_id == CM_REP_ATTR_ID || + mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) { + sl_cm_id = get_local_comm_id(mad); id = id_map_alloc(ibdev, slave_id, sl_cm_id); if (IS_ERR(id)) { mlx4_ib_warn(ibdev, "%s: id{slave: %d, sl_cm_id: 0x%x} Failed to id_map_alloc\n", __func__, slave_id, sl_cm_id); return PTR_ERR(id); } - } else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID) { + } else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID || + mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) { return 0; } else { + sl_cm_id = get_local_comm_id(mad); id = id_map_get(ibdev, &pv_cm_id, slave_id, sl_cm_id); } @@ -315,14 +356,18 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id } int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, - struct ib_mad *mad) + struct ib_mad *mad) { u32 pv_cm_id; struct id_map_entry *id; - if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID) { + if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID || + mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) { union ib_gid gid; + if (!slave) + return 0; + gid = gid_from_req_msg(ibdev, mad); *slave = mlx4_ib_find_real_gid(ibdev, port, gid.global.interface_id); if (*slave < 0) { @@ -341,7 +386,8 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, return -ENOENT; } - *slave = id->slave_id; + if (slave) + *slave = id->slave_id; set_remote_comm_id(mad, id->sl_cm_id); if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID) diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index cc40f08ca8f..1066eec854a 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -102,7 +102,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf * int err; err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size, - PAGE_SIZE * 2, &buf->buf); + PAGE_SIZE * 2, &buf->buf, GFP_KERNEL); if (err) goto out; @@ -113,7 +113,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf * if (err) goto err_buf; - err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf); + err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf, GFP_KERNEL); if (err) goto err_mtt; @@ -209,7 +209,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector uar = &to_mucontext(context)->uar; } else { - err = mlx4_db_alloc(dev->dev, &cq->db, 1); + err = mlx4_db_alloc(dev->dev, &cq->db, 1, GFP_KERNEL); if (err) goto err_cq; @@ -564,7 +564,7 @@ static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum) } static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc, - unsigned tail, struct mlx4_cqe *cqe) + unsigned tail, struct mlx4_cqe *cqe, int is_eth) { struct mlx4_ib_proxy_sqp_hdr *hdr; @@ -574,12 +574,20 @@ static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct DMA_FROM_DEVICE); hdr = (struct mlx4_ib_proxy_sqp_hdr *) (qp->sqp_proxy_rcv[tail].addr); wc->pkey_index = be16_to_cpu(hdr->tun.pkey_index); - wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32); - wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12); wc->src_qp = be32_to_cpu(hdr->tun.flags_src_qp) & 0xFFFFFF; wc->wc_flags |= (hdr->tun.g_ml_path & 0x80) ? (IB_WC_GRH) : 0; wc->dlid_path_bits = 0; + if (is_eth) { + wc->vlan_id = be16_to_cpu(hdr->tun.sl_vid); + memcpy(&(wc->smac[0]), (char *)&hdr->tun.mac_31_0, 4); + memcpy(&(wc->smac[4]), (char *)&hdr->tun.slid_mac_47_32, 2); + wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC); + } else { + wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32); + wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12); + } + return 0; } @@ -594,6 +602,7 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, struct mlx4_srq *msrq = NULL; int is_send; int is_error; + int is_eth; u32 g_mlpath_rqpn; u16 wqe_ctr; unsigned tail = 0; @@ -778,11 +787,15 @@ repoll: break; } + is_eth = (rdma_port_get_link_layer(wc->qp->device, + (*cur_qp)->port) == + IB_LINK_LAYER_ETHERNET); if (mlx4_is_mfunc(to_mdev(cq->ibcq.device)->dev)) { if ((*cur_qp)->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) - return use_tunnel_data(*cur_qp, cq, wc, tail, cqe); + return use_tunnel_data(*cur_qp, cq, wc, tail, + cqe, is_eth); } wc->slid = be16_to_cpu(cqe->rlid); @@ -793,20 +806,21 @@ repoll: wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f; wc->wc_flags |= mlx4_ib_ipoib_csum_ok(cqe->status, cqe->checksum) ? IB_WC_IP_CSUM_OK : 0; - if (rdma_port_get_link_layer(wc->qp->device, - (*cur_qp)->port) == IB_LINK_LAYER_ETHERNET) + if (is_eth) { wc->sl = be16_to_cpu(cqe->sl_vid) >> 13; - else - wc->sl = be16_to_cpu(cqe->sl_vid) >> 12; - if (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_VLAN_PRESENT_MASK) { - wc->vlan_id = be16_to_cpu(cqe->sl_vid) & - MLX4_CQE_VID_MASK; + if (be32_to_cpu(cqe->vlan_my_qpn) & + MLX4_CQE_VLAN_PRESENT_MASK) { + wc->vlan_id = be16_to_cpu(cqe->sl_vid) & + MLX4_CQE_VID_MASK; + } else { + wc->vlan_id = 0xffff; + } + memcpy(wc->smac, cqe->smac, ETH_ALEN); + wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC); } else { + wc->sl = be16_to_cpu(cqe->sl_vid) >> 12; wc->vlan_id = 0xffff; } - wc->wc_flags |= IB_WC_WITH_VLAN; - memcpy(wc->smac, cqe->smac, ETH_ALEN); - wc->wc_flags |= IB_WC_WITH_SMAC; } return 0; diff --git a/drivers/infiniband/hw/mlx4/doorbell.c b/drivers/infiniband/hw/mlx4/doorbell.c index 8aee4233b38..c5174098636 100644 --- a/drivers/infiniband/hw/mlx4/doorbell.c +++ b/drivers/infiniband/hw/mlx4/doorbell.c @@ -45,7 +45,6 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, struct mlx4_db *db) { struct mlx4_ib_user_db_page *page; - struct ib_umem_chunk *chunk; int err = 0; mutex_lock(&context->db_page_mutex); @@ -73,8 +72,7 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, list_add(&page->list, &context->db_page_list); found: - chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list); - db->dma = sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK); + db->dma = sg_dma_address(page->umem->sg_head.sgl) + (virt & ~PAGE_MASK); db->u.user_page = page; ++page->refcnt; diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index f2a3f48107e..287ad0564ac 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -467,6 +467,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, int ret = 0; u16 tun_pkey_ix; u16 cached_pkey; + u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH; if (dest_qpt > IB_QPT_GSI) return -EINVAL; @@ -477,10 +478,6 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, if (!tun_ctx || tun_ctx->state != DEMUX_PV_STATE_ACTIVE) return -EAGAIN; - /* QP0 forwarding only for Dom0 */ - if (!dest_qpt && (mlx4_master_func_num(dev->dev) != slave)) - return -EINVAL; - if (!dest_qpt) tun_qp = &tun_ctx->qp[0]; else @@ -509,6 +506,10 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, * The driver will set the force loopback bit in post_send */ memset(&attr, 0, sizeof attr); attr.port_num = port; + if (is_eth) { + memcpy(&attr.grh.dgid.raw[0], &grh->dgid.raw[0], 16); + attr.ah_flags = IB_AH_GRH; + } ah = ib_create_ah(tun_ctx->pd, &attr); if (IS_ERR(ah)) return -ENOMEM; @@ -540,11 +541,36 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, /* adjust tunnel data */ tun_mad->hdr.pkey_index = cpu_to_be16(tun_pkey_ix); - tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12); - tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid); tun_mad->hdr.flags_src_qp = cpu_to_be32(wc->src_qp & 0xFFFFFF); tun_mad->hdr.g_ml_path = (grh && (wc->wc_flags & IB_WC_GRH)) ? 0x80 : 0; + if (is_eth) { + u16 vlan = 0; + if (mlx4_get_slave_default_vlan(dev->dev, port, slave, &vlan, + NULL)) { + /* VST mode */ + if (vlan != wc->vlan_id) + /* Packet vlan is not the VST-assigned vlan. + * Drop the packet. + */ + goto out; + else + /* Remove the vlan tag before forwarding + * the packet to the VF. + */ + vlan = 0xffff; + } else { + vlan = wc->vlan_id; + } + + tun_mad->hdr.sl_vid = cpu_to_be16(vlan); + memcpy((char *)&tun_mad->hdr.mac_31_0, &(wc->smac[0]), 4); + memcpy((char *)&tun_mad->hdr.slid_mac_47_32, &(wc->smac[4]), 2); + } else { + tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12); + tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid); + } + ib_dma_sync_single_for_device(&dev->ib_dev, tun_qp->tx_ring[tun_tx_ix].buf.map, sizeof (struct mlx4_rcv_tunnel_mad), @@ -580,6 +606,41 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, int err; int slave; u8 *slave_id; + int is_eth = 0; + + if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND) + is_eth = 0; + else + is_eth = 1; + + if (is_eth) { + if (!(wc->wc_flags & IB_WC_GRH)) { + mlx4_ib_warn(ibdev, "RoCE grh not present.\n"); + return -EINVAL; + } + if (mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_CM) { + mlx4_ib_warn(ibdev, "RoCE mgmt class is not CM\n"); + return -EINVAL; + } + if (mlx4_get_slave_from_roce_gid(dev->dev, port, grh->dgid.raw, &slave)) { + mlx4_ib_warn(ibdev, "failed matching grh\n"); + return -ENOENT; + } + if (slave >= dev->dev->caps.sqp_demux) { + mlx4_ib_warn(ibdev, "slave id: %d is bigger than allowed:%d\n", + slave, dev->dev->caps.sqp_demux); + return -ENOENT; + } + + if (mlx4_ib_demux_cm_handler(ibdev, port, NULL, mad)) + return 0; + + err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad); + if (err) + pr_debug("failed sending to slave %d via tunnel qp (%d)\n", + slave, err); + return 0; + } /* Initially assume that this mad is for us */ slave = mlx4_master_func_num(dev->dev); @@ -602,6 +663,21 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, } /* Class-specific handling */ switch (mad->mad_hdr.mgmt_class) { + case IB_MGMT_CLASS_SUBN_LID_ROUTED: + case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: + /* 255 indicates the dom0 */ + if (slave != 255 && slave != mlx4_master_func_num(dev->dev)) { + if (!mlx4_vf_smi_enabled(dev->dev, slave, port)) + return -EPERM; + /* for a VF. drop unsolicited MADs */ + if (!(mad->mad_hdr.method & IB_MGMT_METHOD_RESP)) { + mlx4_ib_warn(ibdev, "demux QP0. rejecting unsolicited mad for slave %d class 0x%x, method 0x%x\n", + slave, mad->mad_hdr.mgmt_class, + mad->mad_hdr.method); + return -EINVAL; + } + } + break; case IB_MGMT_CLASS_SUBN_ADM: if (mlx4_ib_demux_sa_handler(ibdev, port, slave, (struct ib_sa_mad *) mad)) @@ -1076,8 +1152,9 @@ static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave) int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, - enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn, - u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad) + enum ib_qp_type dest_qpt, u16 pkey_index, + u32 remote_qpn, u32 qkey, struct ib_ah_attr *attr, + u8 *s_mac, struct ib_mad *mad) { struct ib_sge list; struct ib_send_wr wr, *bad_wr; @@ -1099,10 +1176,6 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, if (!sqp_ctx || sqp_ctx->state != DEMUX_PV_STATE_ACTIVE) return -EAGAIN; - /* QP0 forwarding only for Dom0 */ - if (dest_qpt == IB_QPT_SMI && (mlx4_master_func_num(dev->dev) != slave)) - return -EINVAL; - if (dest_qpt == IB_QPT_SMI) { src_qpnum = 0; sqp = &sqp_ctx->qp[0]; @@ -1166,6 +1239,9 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, wr.num_sge = 1; wr.opcode = IB_WR_SEND; wr.send_flags = IB_SEND_SIGNALED; + if (s_mac) + memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6); + ret = ib_post_send(send_qp, &wr, &bad_wr); out: @@ -1174,6 +1250,22 @@ out: return ret; } +static int get_slave_base_gid_ix(struct mlx4_ib_dev *dev, int slave, int port) +{ + if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND) + return slave; + return mlx4_get_base_gid_ix(dev->dev, slave, port); +} + +static void fill_in_real_sgid_index(struct mlx4_ib_dev *dev, int slave, int port, + struct ib_ah_attr *ah_attr) +{ + if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND) + ah_attr->grh.sgid_index = slave; + else + ah_attr->grh.sgid_index += get_slave_base_gid_ix(dev, slave, port); +} + static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc *wc) { struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev); @@ -1184,6 +1276,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc struct ib_ah_attr ah_attr; u8 *slave_id; int slave; + int port; /* Get slave that sent this packet */ if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn || @@ -1199,11 +1292,6 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc "belongs to another slave\n", wc->src_qp); return; } - if (slave != mlx4_master_func_num(dev->dev) && !(wc->src_qp & 0x2)) { - mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: " - "non-master trying to send QP0 packets\n", wc->src_qp); - return; - } /* Map transaction ID */ ib_dma_sync_single_for_cpu(ctx->ib_dev, tun_qp->ring[wr_ix].map, @@ -1231,6 +1319,12 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc /* Class-specific handling */ switch (tunnel->mad.mad_hdr.mgmt_class) { + case IB_MGMT_CLASS_SUBN_LID_ROUTED: + case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: + if (slave != mlx4_master_func_num(dev->dev) && + !mlx4_vf_smi_enabled(dev->dev, slave, ctx->port)) + return; + break; case IB_MGMT_CLASS_SUBN_ADM: if (mlx4_ib_multiplex_sa_handler(ctx->ib_dev, ctx->port, slave, (struct ib_sa_mad *) &tunnel->mad)) @@ -1260,12 +1354,18 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc memcpy(&ah.av, &tunnel->hdr.av, sizeof (struct mlx4_av)); ah.ibah.device = ctx->ib_dev; mlx4_ib_query_ah(&ah.ibah, &ah_attr); - if ((ah_attr.ah_flags & IB_AH_GRH) && - (ah_attr.grh.sgid_index != slave)) { - mlx4_ib_warn(ctx->ib_dev, "slave:%d accessed invalid sgid_index:%d\n", - slave, ah_attr.grh.sgid_index); + if (ah_attr.ah_flags & IB_AH_GRH) + fill_in_real_sgid_index(dev, slave, ctx->port, &ah_attr); + + port = mlx4_slave_convert_port(dev->dev, slave, ah_attr.port_num); + if (port < 0) return; - } + ah_attr.port_num = port; + memcpy(ah_attr.dmac, tunnel->hdr.mac, 6); + ah_attr.vlan_id = be16_to_cpu(tunnel->hdr.vlan); + /* if slave have default vlan use it */ + mlx4_get_slave_default_vlan(dev->dev, ctx->port, slave, + &ah_attr.vlan_id, &ah_attr.sl); mlx4_ib_send_to_wire(dev, slave, ctx->port, is_proxy_qp0(dev, wc->src_qp, slave) ? @@ -1273,7 +1373,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc be16_to_cpu(tunnel->hdr.pkey_index), be32_to_cpu(tunnel->hdr.remote_qpn), be32_to_cpu(tunnel->hdr.qkey), - &ah_attr, &tunnel->mad); + &ah_attr, wc->smac, &tunnel->mad); } static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx, @@ -1657,9 +1757,9 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port, return -EEXIST; ctx->state = DEMUX_PV_STATE_STARTING; - /* have QP0 only on port owner, and only if link layer is IB */ - if (ctx->slave == mlx4_master_func_num(to_mdev(ctx->ib_dev)->dev) && - rdma_port_get_link_layer(ibdev, ctx->port) == IB_LINK_LAYER_INFINIBAND) + /* have QP0 only if link layer is IB */ + if (rdma_port_get_link_layer(ibdev, ctx->port) == + IB_LINK_LAYER_INFINIBAND) ctx->has_smi = 1; if (ctx->has_smi) { @@ -1850,7 +1950,15 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev, ctx->port = port; ctx->ib_dev = &dev->ib_dev; - for (i = 0; i < dev->dev->caps.sqp_demux; i++) { + for (i = 0; + i < min(dev->dev->caps.sqp_demux, (u16)(dev->dev->num_vfs + 1)); + i++) { + struct mlx4_active_ports actv_ports = + mlx4_get_active_ports(dev->dev, i); + + if (!test_bit(port - 1, actv_ports.ports)) + continue; + ret = alloc_pv_object(dev, i, port, &ctx->tun[i]); if (ret) { ret = -ENOMEM; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index e81c5547e64..0f7027e7db1 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -48,13 +48,14 @@ #include <linux/mlx4/driver.h> #include <linux/mlx4/cmd.h> +#include <linux/mlx4/qp.h> #include "mlx4_ib.h" #include "user.h" #define DRV_NAME MLX4_IB_DRV_NAME -#define DRV_VERSION "1.0" -#define DRV_RELDATE "April 4, 2008" +#define DRV_VERSION "2.2-1" +#define DRV_RELDATE "Feb 2014" #define MLX4_IB_FLOW_MAX_PRIO 0xFFF #define MLX4_IB_FLOW_QPN_MASK 0xFFFFFF @@ -544,12 +545,11 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask, return 0; } -static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols, - u32 cap_mask) +static int mlx4_ib_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols, + u32 cap_mask) { struct mlx4_cmd_mailbox *mailbox; int err; - u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH; mailbox = mlx4_alloc_cmd_mailbox(dev->dev); if (IS_ERR(mailbox)) @@ -563,8 +563,8 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols, ((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask); } - err = mlx4_cmd(dev->dev, mailbox->dma, port, is_eth, MLX4_CMD_SET_PORT, - MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + err = mlx4_cmd(dev->dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); mlx4_free_cmd_mailbox(dev->dev, mailbox); return err; @@ -573,11 +573,20 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols, static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask, struct ib_port_modify *props) { + struct mlx4_ib_dev *mdev = to_mdev(ibdev); + u8 is_eth = mdev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH; struct ib_port_attr attr; u32 cap_mask; int err; - mutex_lock(&to_mdev(ibdev)->cap_mask_mutex); + /* return OK if this is RoCE. CM calls ib_modify_port() regardless + * of whether port link layer is ETH or IB. For ETH ports, qkey + * violations and port capabilities are not meaningful. + */ + if (is_eth) + return 0; + + mutex_lock(&mdev->cap_mask_mutex); err = mlx4_ib_query_port(ibdev, port, &attr); if (err) @@ -586,9 +595,9 @@ static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask, cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) & ~props->clr_port_cap_mask; - err = mlx4_SET_PORT(to_mdev(ibdev), port, - !!(mask & IB_PORT_RESET_QKEY_CNTR), - cap_mask); + err = mlx4_ib_SET_PORT(mdev, port, + !!(mask & IB_PORT_RESET_QKEY_CNTR), + cap_mask); out: mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex); @@ -1546,7 +1555,7 @@ static int mlx4_ib_addr_event(int event, struct net_device *event_netdev, iboe = &ibdev->iboe; spin_lock(&iboe->lock); - for (port = 1; port <= MLX4_MAX_PORTS; ++port) + for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) if ((netif_is_bond_master(real_dev) && (real_dev == iboe->masters[port - 1])) || (!netif_is_bond_master(real_dev) && @@ -1569,14 +1578,14 @@ static u8 mlx4_ib_get_dev_port(struct net_device *dev, iboe = &ibdev->iboe; - for (port = 1; port <= MLX4_MAX_PORTS; ++port) + for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) if ((netif_is_bond_master(real_dev) && (real_dev == iboe->masters[port - 1])) || (!netif_is_bond_master(real_dev) && (real_dev == iboe->netdevs[port - 1]))) break; - if ((port == 0) || (port > MLX4_MAX_PORTS)) + if ((port == 0) || (port > ibdev->dev->caps.num_ports)) return 0; else return port; @@ -1614,6 +1623,53 @@ static int mlx4_ib_inet6_event(struct notifier_block *this, unsigned long event, } #endif +#define MLX4_IB_INVALID_MAC ((u64)-1) +static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev, + struct net_device *dev, + int port) +{ + u64 new_smac = 0; + u64 release_mac = MLX4_IB_INVALID_MAC; + struct mlx4_ib_qp *qp; + + read_lock(&dev_base_lock); + new_smac = mlx4_mac_to_u64(dev->dev_addr); + read_unlock(&dev_base_lock); + + mutex_lock(&ibdev->qp1_proxy_lock[port - 1]); + qp = ibdev->qp1_proxy[port - 1]; + if (qp) { + int new_smac_index; + u64 old_smac = qp->pri.smac; + struct mlx4_update_qp_params update_params; + + if (new_smac == old_smac) + goto unlock; + + new_smac_index = mlx4_register_mac(ibdev->dev, port, new_smac); + + if (new_smac_index < 0) + goto unlock; + + update_params.smac_index = new_smac_index; + if (mlx4_update_qp(ibdev->dev, &qp->mqp, MLX4_UPDATE_QP_SMAC, + &update_params)) { + release_mac = new_smac; + goto unlock; + } + + qp->pri.smac = new_smac; + qp->pri.smac_index = new_smac_index; + + release_mac = old_smac; + } + +unlock: + mutex_unlock(&ibdev->qp1_proxy_lock[port - 1]); + if (release_mac != MLX4_IB_INVALID_MAC) + mlx4_unregister_mac(ibdev->dev, port, release_mac); +} + static void mlx4_ib_get_dev_addr(struct net_device *dev, struct mlx4_ib_dev *ibdev, u8 port) { @@ -1626,7 +1682,7 @@ static void mlx4_ib_get_dev_addr(struct net_device *dev, union ib_gid gid; - if ((port == 0) || (port > MLX4_MAX_PORTS)) + if ((port == 0) || (port > ibdev->dev->caps.num_ports)) return; /* IPv4 gids */ @@ -1689,9 +1745,13 @@ static int mlx4_ib_init_gid_table(struct mlx4_ib_dev *ibdev) return 0; } -static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev) +static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev, + struct net_device *dev, + unsigned long event) + { struct mlx4_ib_iboe *iboe; + int update_qps_port = -1; int port; iboe = &ibdev->iboe; @@ -1719,6 +1779,11 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev) } curr_master = iboe->masters[port - 1]; + if (dev == iboe->netdevs[port - 1] && + (event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER || + event == NETDEV_UP || event == NETDEV_CHANGE)) + update_qps_port = port; + if (curr_netdev) { port_state = (netif_running(curr_netdev) && netif_carrier_ok(curr_netdev)) ? IB_PORT_ACTIVE : IB_PORT_DOWN; @@ -1752,6 +1817,9 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev) } spin_unlock(&iboe->lock); + + if (update_qps_port > 0) + mlx4_ib_update_qps(ibdev, dev, update_qps_port); } static int mlx4_ib_netdev_event(struct notifier_block *this, @@ -1764,7 +1832,7 @@ static int mlx4_ib_netdev_event(struct notifier_block *this, return NOTIFY_DONE; ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb); - mlx4_ib_scan_netdevs(ibdev); + mlx4_ib_scan_netdevs(ibdev, dev, event); return NOTIFY_DONE; } @@ -1803,7 +1871,7 @@ static void init_pkeys(struct mlx4_ib_dev *ibdev) static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) { - char name[32]; + char name[80]; int eq_per_port = 0; int added_eqs = 0; int total_eqs = 0; @@ -1833,8 +1901,8 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) eq = 0; mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) { for (j = 0; j < eq_per_port; j++) { - sprintf(name, "mlx4-ib-%d-%d@%s", - i, j, dev->pdev->bus->name); + snprintf(name, sizeof(name), "mlx4-ib-%d-%d@%s", + i, j, dev->pdev->bus->name); /* Set IRQ for specific name (per ring) */ if (mlx4_assign_eq(dev, name, NULL, &ibdev->eq_table[eq])) { @@ -1888,14 +1956,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) pr_info_once("%s", mlx4_ib_version); - mlx4_foreach_non_ib_transport_port(i, dev) - num_ports++; - - if (mlx4_is_mfunc(dev) && num_ports) { - dev_err(&dev->pdev->dev, "RoCE is not supported over SRIOV as yet\n"); - return NULL; - } - num_ports = 0; mlx4_foreach_ib_transport_port(i, dev) num_ports++; @@ -2051,13 +2111,15 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) goto err_map; for (i = 0; i < ibdev->num_ports; ++i) { + mutex_init(&ibdev->qp1_proxy_lock[i]); if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) == IB_LINK_LAYER_ETHERNET) { err = mlx4_counter_alloc(ibdev->dev, &ibdev->counters[i]); if (err) ibdev->counters[i] = -1; - } else - ibdev->counters[i] = -1; + } else { + ibdev->counters[i] = -1; + } } mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) @@ -2133,7 +2195,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) for (i = 1 ; i <= ibdev->num_ports ; ++i) reset_gid_table(ibdev, i); rtnl_lock(); - mlx4_ib_scan_netdevs(ibdev); + mlx4_ib_scan_netdevs(ibdev, NULL, 0); rtnl_unlock(); mlx4_ib_init_gid_table(ibdev); } @@ -2331,17 +2393,24 @@ static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init) struct mlx4_dev *dev = ibdev->dev; int i; unsigned long flags; + struct mlx4_active_ports actv_ports; + unsigned int ports; + unsigned int first_port; if (!mlx4_is_master(dev)) return; - dm = kcalloc(dev->caps.num_ports, sizeof *dm, GFP_ATOMIC); + actv_ports = mlx4_get_active_ports(dev, slave); + ports = bitmap_weight(actv_ports.ports, dev->caps.num_ports); + first_port = find_first_bit(actv_ports.ports, dev->caps.num_ports); + + dm = kcalloc(ports, sizeof(*dm), GFP_ATOMIC); if (!dm) { pr_err("failed to allocate memory for tunneling qp update\n"); goto out; } - for (i = 0; i < dev->caps.num_ports; i++) { + for (i = 0; i < ports; i++) { dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC); if (!dm[i]) { pr_err("failed to allocate memory for tunneling qp update work struct\n"); @@ -2353,9 +2422,9 @@ static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init) } } /* initialize or tear down tunnel QPs for the slave */ - for (i = 0; i < dev->caps.num_ports; i++) { + for (i = 0; i < ports; i++) { INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work); - dm[i]->port = i + 1; + dm[i]->port = first_port + i + 1; dm[i]->slave = slave; dm[i]->do_init = do_init; dm[i]->dev = ibdev; diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c index 25b2cdff00f..ed327e6c8fd 100644 --- a/drivers/infiniband/hw/mlx4/mcg.c +++ b/drivers/infiniband/hw/mlx4/mcg.c @@ -215,8 +215,9 @@ static int send_mad_to_wire(struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad) } mlx4_ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr); spin_unlock(&dev->sm_lock); - return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev), ctx->port, - IB_QPT_GSI, 0, 1, IB_QP1_QKEY, &ah_attr, mad); + return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev), + ctx->port, IB_QPT_GSI, 0, 1, IB_QP1_QKEY, + &ah_attr, NULL, mad); } static int send_mad_to_slave(int slave, struct mlx4_ib_demux_ctx *ctx, diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index a230683af94..369da3ca5d6 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -156,6 +156,7 @@ enum mlx4_ib_qp_flags { MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO, MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK, MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP, + MLX4_IB_QP_CREATE_USE_GFP_NOIO = IB_QP_CREATE_USE_GFP_NOIO, MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30, MLX4_IB_SRIOV_SQP = 1 << 31, }; @@ -241,6 +242,22 @@ struct mlx4_ib_proxy_sqp_hdr { struct mlx4_rcv_tunnel_hdr tun; } __packed; +struct mlx4_roce_smac_vlan_info { + u64 smac; + int smac_index; + int smac_port; + u64 candidate_smac; + int candidate_smac_index; + int candidate_smac_port; + u16 vid; + int vlan_index; + int vlan_port; + u16 candidate_vid; + int candidate_vlan_index; + int candidate_vlan_port; + int update_vid; +}; + struct mlx4_ib_qp { struct ib_qp ibqp; struct mlx4_qp mqp; @@ -273,8 +290,9 @@ struct mlx4_ib_qp { struct list_head gid_list; struct list_head steering_rules; struct mlx4_ib_buf *sqp_proxy_rcv; + struct mlx4_roce_smac_vlan_info pri; + struct mlx4_roce_smac_vlan_info alt; u64 reg_id; - }; struct mlx4_ib_srq { @@ -505,6 +523,9 @@ struct mlx4_ib_dev { int steer_qpn_count; int steer_qpn_base; int steering_support; + struct mlx4_ib_qp *qp1_proxy[MLX4_MAX_PORTS]; + /* lock when destroying qp1_proxy and getting netdev events */ + struct mutex qp1_proxy_lock[MLX4_MAX_PORTS]; }; struct ib_event_work { @@ -720,9 +741,12 @@ void mlx4_ib_tunnels_update_work(struct work_struct *work); int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, enum ib_qp_type qpt, struct ib_wc *wc, struct ib_grh *grh, struct ib_mad *mad); + int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn, - u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad); + u32 qkey, struct ib_ah_attr *attr, u8 *s_mac, + struct ib_mad *mad); + __be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx); int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index e471f089ff0..cb2a8727f3f 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -90,11 +90,11 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, struct ib_umem *umem) { u64 *pages; - struct ib_umem_chunk *chunk; - int i, j, k; + int i, k, entry; int n; int len; int err = 0; + struct scatterlist *sg; pages = (u64 *) __get_free_page(GFP_KERNEL); if (!pages) @@ -102,26 +102,25 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, i = n = 0; - list_for_each_entry(chunk, &umem->chunk_list, list) - for (j = 0; j < chunk->nmap; ++j) { - len = sg_dma_len(&chunk->page_list[j]) >> mtt->page_shift; - for (k = 0; k < len; ++k) { - pages[i++] = sg_dma_address(&chunk->page_list[j]) + - umem->page_size * k; - /* - * Be friendly to mlx4_write_mtt() and - * pass it chunks of appropriate size. - */ - if (i == PAGE_SIZE / sizeof (u64)) { - err = mlx4_write_mtt(dev->dev, mtt, n, - i, pages); - if (err) - goto out; - n += i; - i = 0; - } + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + len = sg_dma_len(sg) >> mtt->page_shift; + for (k = 0; k < len; ++k) { + pages[i++] = sg_dma_address(sg) + + umem->page_size * k; + /* + * Be friendly to mlx4_write_mtt() and + * pass it chunks of appropriate size. + */ + if (i == PAGE_SIZE / sizeof (u64)) { + err = mlx4_write_mtt(dev->dev, mtt, n, + i, pages); + if (err) + goto out; + n += i; + i = 0; } } + } if (i) err = mlx4_write_mtt(dev->dev, mtt, n, i, pages); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index d8f4d1fe849..67780452f0c 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -608,9 +608,20 @@ static int qp_has_rq(struct ib_qp_init_attr *attr) return !attr->srq; } +static int qp0_enabled_vf(struct mlx4_dev *dev, int qpn) +{ + int i; + for (i = 0; i < dev->caps.num_ports; i++) { + if (qpn == dev->caps.qp0_proxy[i]) + return !!dev->caps.qp0_qkey[i]; + } + return 0; +} + static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, struct ib_qp_init_attr *init_attr, - struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp) + struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp, + gfp_t gfp) { int qpn; int err; @@ -625,10 +636,13 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, !(init_attr->create_flags & MLX4_IB_SRIOV_SQP))) { if (init_attr->qp_type == IB_QPT_GSI) qp_type = MLX4_IB_QPT_PROXY_GSI; - else if (mlx4_is_master(dev->dev)) - qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER; - else - qp_type = MLX4_IB_QPT_PROXY_SMI; + else { + if (mlx4_is_master(dev->dev) || + qp0_enabled_vf(dev->dev, sqpn)) + qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER; + else + qp_type = MLX4_IB_QPT_PROXY_SMI; + } } qpn = sqpn; /* add extra sg entry for tunneling */ @@ -643,7 +657,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, return -EINVAL; if (tnl_init->proxy_qp_type == IB_QPT_GSI) qp_type = MLX4_IB_QPT_TUN_GSI; - else if (tnl_init->slave == mlx4_master_func_num(dev->dev)) + else if (tnl_init->slave == mlx4_master_func_num(dev->dev) || + mlx4_vf_smi_enabled(dev->dev, tnl_init->slave, + tnl_init->port)) qp_type = MLX4_IB_QPT_TUN_SMI_OWNER; else qp_type = MLX4_IB_QPT_TUN_SMI; @@ -658,14 +674,18 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI || (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) { - sqp = kzalloc(sizeof (struct mlx4_ib_sqp), GFP_KERNEL); + sqp = kzalloc(sizeof (struct mlx4_ib_sqp), gfp); if (!sqp) return -ENOMEM; qp = &sqp->qp; + qp->pri.vid = 0xFFFF; + qp->alt.vid = 0xFFFF; } else { - qp = kzalloc(sizeof (struct mlx4_ib_qp), GFP_KERNEL); + qp = kzalloc(sizeof (struct mlx4_ib_qp), gfp); if (!qp) return -ENOMEM; + qp->pri.vid = 0xFFFF; + qp->alt.vid = 0xFFFF; } } else qp = *caller_qp; @@ -744,14 +764,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err; if (qp_has_rq(init_attr)) { - err = mlx4_db_alloc(dev->dev, &qp->db, 0); + err = mlx4_db_alloc(dev->dev, &qp->db, 0, gfp); if (err) goto err; *qp->db.db = 0; } - if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) { + if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf, gfp)) { err = -ENOMEM; goto err_db; } @@ -761,13 +781,12 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (err) goto err_buf; - err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf); + err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf, gfp); if (err) goto err_mtt; - qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), GFP_KERNEL); - qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), GFP_KERNEL); - + qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), gfp); + qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), gfp); if (!qp->sq.wrid || !qp->rq.wrid) { err = -ENOMEM; goto err_wrid; @@ -797,7 +816,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err_proxy; } - err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp); + err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp, gfp); if (err) goto err_qpn; @@ -940,11 +959,32 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, { struct mlx4_ib_cq *send_cq, *recv_cq; - if (qp->state != IB_QPS_RESET) + if (qp->state != IB_QPS_RESET) { if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state), MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp)) pr_warn("modify QP %06x to RESET failed.\n", qp->mqp.qpn); + if (qp->pri.smac) { + mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac); + qp->pri.smac = 0; + } + if (qp->alt.smac) { + mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac); + qp->alt.smac = 0; + } + if (qp->pri.vid < 0x1000) { + mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, qp->pri.vid); + qp->pri.vid = 0xFFFF; + qp->pri.candidate_vid = 0xFFFF; + qp->pri.update_vid = 0; + } + if (qp->alt.vid < 0x1000) { + mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, qp->alt.vid); + qp->alt.vid = 0xFFFF; + qp->alt.candidate_vid = 0xFFFF; + qp->alt.update_vid = 0; + } + } get_cqs(qp, &send_cq, &recv_cq); @@ -1015,7 +1055,10 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, struct mlx4_ib_qp *qp = NULL; int err; u16 xrcdn = 0; + gfp_t gfp; + gfp = (init_attr->create_flags & MLX4_IB_QP_CREATE_USE_GFP_NOIO) ? + GFP_NOIO : GFP_KERNEL; /* * We only support LSO, vendor flag1, and multicast loopback blocking, * and only for kernel UD QPs. @@ -1024,7 +1067,8 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK | MLX4_IB_SRIOV_TUNNEL_QP | MLX4_IB_SRIOV_SQP | - MLX4_IB_QP_NETIF)) + MLX4_IB_QP_NETIF | + MLX4_IB_QP_CREATE_USE_GFP_NOIO)) return ERR_PTR(-EINVAL); if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) { @@ -1034,7 +1078,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, if (init_attr->create_flags && (udata || - ((init_attr->create_flags & ~MLX4_IB_SRIOV_SQP) && + ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP | MLX4_IB_QP_CREATE_USE_GFP_NOIO)) && init_attr->qp_type != IB_QPT_UD) || ((init_attr->create_flags & MLX4_IB_SRIOV_SQP) && init_attr->qp_type > IB_QPT_GSI))) @@ -1054,14 +1098,16 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, case IB_QPT_RC: case IB_QPT_UC: case IB_QPT_RAW_PACKET: - qp = kzalloc(sizeof *qp, GFP_KERNEL); + qp = kzalloc(sizeof *qp, gfp); if (!qp) return ERR_PTR(-ENOMEM); + qp->pri.vid = 0xFFFF; + qp->alt.vid = 0xFFFF; /* fall through */ case IB_QPT_UD: { err = create_qp_common(to_mdev(pd->device), pd, init_attr, - udata, 0, &qp); + udata, 0, &qp, gfp); if (err) return ERR_PTR(err); @@ -1079,7 +1125,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, get_sqp_num(to_mdev(pd->device), init_attr), - &qp); + &qp, gfp); if (err) return ERR_PTR(err); @@ -1105,6 +1151,12 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp) if (is_qp0(dev, mqp)) mlx4_CLOSE_PORT(dev->dev, mqp->port); + if (dev->qp1_proxy[mqp->port - 1] == mqp) { + mutex_lock(&dev->qp1_proxy_lock[mqp->port - 1]); + dev->qp1_proxy[mqp->port - 1] = NULL; + mutex_unlock(&dev->qp1_proxy_lock[mqp->port - 1]); + } + pd = get_pd(mqp); destroy_qp_common(dev, mqp, !!pd->ibpd.uobject); @@ -1188,12 +1240,13 @@ static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port) static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, u64 smac, u16 vlan_tag, struct mlx4_qp_path *path, - u8 port) + struct mlx4_roce_smac_vlan_info *smac_info, u8 port) { int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_ETHERNET; int vidx; int smac_index; + int err; path->grh_mylmc = ah->src_path_bits & 0x7f; @@ -1223,61 +1276,103 @@ static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, } if (is_eth) { - path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | - ((port - 1) << 6) | ((ah->sl & 7) << 3); - if (!(ah->ah_flags & IB_AH_GRH)) return -1; - memcpy(path->dmac, ah->dmac, ETH_ALEN); - path->ackto = MLX4_IB_LINK_TYPE_ETH; - /* find the index into MAC table for IBoE */ - if (!is_zero_ether_addr((const u8 *)&smac)) { - if (mlx4_find_cached_mac(dev->dev, port, smac, - &smac_index)) - return -ENOENT; - } else { - smac_index = 0; - } - - path->grh_mylmc &= 0x80 | smac_index; + path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | + ((port - 1) << 6) | ((ah->sl & 7) << 3); path->feup |= MLX4_FEUP_FORCE_ETH_UP; if (vlan_tag < 0x1000) { - if (mlx4_find_cached_vlan(dev->dev, port, vlan_tag, &vidx)) - return -ENOENT; - - path->vlan_index = vidx; - path->fl = 1 << 6; + if (smac_info->vid < 0x1000) { + /* both valid vlan ids */ + if (smac_info->vid != vlan_tag) { + /* different VIDs. unreg old and reg new */ + err = mlx4_register_vlan(dev->dev, port, vlan_tag, &vidx); + if (err) + return err; + smac_info->candidate_vid = vlan_tag; + smac_info->candidate_vlan_index = vidx; + smac_info->candidate_vlan_port = port; + smac_info->update_vid = 1; + path->vlan_index = vidx; + } else { + path->vlan_index = smac_info->vlan_index; + } + } else { + /* no current vlan tag in qp */ + err = mlx4_register_vlan(dev->dev, port, vlan_tag, &vidx); + if (err) + return err; + smac_info->candidate_vid = vlan_tag; + smac_info->candidate_vlan_index = vidx; + smac_info->candidate_vlan_port = port; + smac_info->update_vid = 1; + path->vlan_index = vidx; + } path->feup |= MLX4_FVL_FORCE_ETH_VLAN; + path->fl = 1 << 6; + } else { + /* have current vlan tag. unregister it at modify-qp success */ + if (smac_info->vid < 0x1000) { + smac_info->candidate_vid = 0xFFFF; + smac_info->update_vid = 1; + } } - } else + + /* get smac_index for RoCE use. + * If no smac was yet assigned, register one. + * If one was already assigned, but the new mac differs, + * unregister the old one and register the new one. + */ + if (!smac_info->smac || smac_info->smac != smac) { + /* register candidate now, unreg if needed, after success */ + smac_index = mlx4_register_mac(dev->dev, port, smac); + if (smac_index >= 0) { + smac_info->candidate_smac_index = smac_index; + smac_info->candidate_smac = smac; + smac_info->candidate_smac_port = port; + } else { + return -EINVAL; + } + } else { + smac_index = smac_info->smac_index; + } + + memcpy(path->dmac, ah->dmac, 6); + path->ackto = MLX4_IB_LINK_TYPE_ETH; + /* put MAC table smac index for IBoE */ + path->grh_mylmc = (u8) (smac_index) | 0x80; + } else { path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | ((port - 1) << 6) | ((ah->sl & 0xf) << 2); + } return 0; } static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_qp_attr *qp, enum ib_qp_attr_mask qp_attr_mask, + struct mlx4_ib_qp *mqp, struct mlx4_qp_path *path, u8 port) { return _mlx4_set_path(dev, &qp->ah_attr, mlx4_mac_to_u64((u8 *)qp->smac), (qp_attr_mask & IB_QP_VID) ? qp->vlan_id : 0xffff, - path, port); + path, &mqp->pri, port); } static int mlx4_set_alt_path(struct mlx4_ib_dev *dev, const struct ib_qp_attr *qp, enum ib_qp_attr_mask qp_attr_mask, + struct mlx4_ib_qp *mqp, struct mlx4_qp_path *path, u8 port) { return _mlx4_set_path(dev, &qp->alt_ah_attr, mlx4_mac_to_u64((u8 *)qp->alt_smac), (qp_attr_mask & IB_QP_ALT_VID) ? qp->alt_vlan_id : 0xffff, - path, port); + path, &mqp->alt, port); } static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) @@ -1292,6 +1387,37 @@ static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) } } +static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, u8 *smac, + struct mlx4_qp_context *context) +{ + struct net_device *ndev; + u64 u64_mac; + int smac_index; + + + ndev = dev->iboe.netdevs[qp->port - 1]; + if (ndev) { + smac = ndev->dev_addr; + u64_mac = mlx4_mac_to_u64(smac); + } else { + u64_mac = dev->dev->caps.def_mac[qp->port]; + } + + context->pri_path.sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | ((qp->port - 1) << 6); + if (!qp->pri.smac) { + smac_index = mlx4_register_mac(dev->dev, qp->port, u64_mac); + if (smac_index >= 0) { + qp->pri.candidate_smac_index = smac_index; + qp->pri.candidate_smac = u64_mac; + qp->pri.candidate_smac_port = qp->port; + context->pri_path.grh_mylmc = 0x80 | (u8) smac_index; + } else { + return -ENOENT; + } + } + return 0; +} + static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state) @@ -1403,7 +1529,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } if (attr_mask & IB_QP_AV) { - if (mlx4_set_path(dev, attr, attr_mask, &context->pri_path, + if (mlx4_set_path(dev, attr, attr_mask, qp, &context->pri_path, attr_mask & IB_QP_PORT ? attr->port_num : qp->port)) goto out; @@ -1426,7 +1552,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, dev->dev->caps.pkey_table_len[attr->alt_port_num]) goto out; - if (mlx4_set_alt_path(dev, attr, attr_mask, &context->alt_path, + if (mlx4_set_alt_path(dev, attr, attr_mask, qp, + &context->alt_path, attr->alt_port_num)) goto out; @@ -1532,6 +1659,22 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, context->pri_path.fl = 0x80; context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE; } + if (rdma_port_get_link_layer(&dev->ib_dev, qp->port) == + IB_LINK_LAYER_ETHERNET) { + if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI || + qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) + context->pri_path.feup = 1 << 7; /* don't fsm */ + /* handle smac_index */ + if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_UD || + qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI || + qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI) { + err = handle_eth_ud_smac_index(dev, qp, (u8 *)attr->smac, context); + if (err) + return -EINVAL; + if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI) + dev->qp1_proxy[qp->port - 1] = qp; + } + } } if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) @@ -1619,28 +1762,113 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, * If we moved a kernel QP to RESET, clean up all old CQ * entries and reinitialize the QP. */ - if (new_state == IB_QPS_RESET && !ibqp->uobject) { - mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn, - ibqp->srq ? to_msrq(ibqp->srq): NULL); - if (send_cq != recv_cq) - mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL); + if (new_state == IB_QPS_RESET) { + if (!ibqp->uobject) { + mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn, + ibqp->srq ? to_msrq(ibqp->srq) : NULL); + if (send_cq != recv_cq) + mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL); + + qp->rq.head = 0; + qp->rq.tail = 0; + qp->sq.head = 0; + qp->sq.tail = 0; + qp->sq_next_wqe = 0; + if (qp->rq.wqe_cnt) + *qp->db.db = 0; - qp->rq.head = 0; - qp->rq.tail = 0; - qp->sq.head = 0; - qp->sq.tail = 0; - qp->sq_next_wqe = 0; - if (qp->rq.wqe_cnt) - *qp->db.db = 0; + if (qp->flags & MLX4_IB_QP_NETIF) + mlx4_ib_steer_qp_reg(dev, qp, 0); + } + if (qp->pri.smac) { + mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac); + qp->pri.smac = 0; + } + if (qp->alt.smac) { + mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac); + qp->alt.smac = 0; + } + if (qp->pri.vid < 0x1000) { + mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, qp->pri.vid); + qp->pri.vid = 0xFFFF; + qp->pri.candidate_vid = 0xFFFF; + qp->pri.update_vid = 0; + } - if (qp->flags & MLX4_IB_QP_NETIF) - mlx4_ib_steer_qp_reg(dev, qp, 0); + if (qp->alt.vid < 0x1000) { + mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, qp->alt.vid); + qp->alt.vid = 0xFFFF; + qp->alt.candidate_vid = 0xFFFF; + qp->alt.update_vid = 0; + } } - out: if (err && steer_qp) mlx4_ib_steer_qp_reg(dev, qp, 0); kfree(context); + if (qp->pri.candidate_smac) { + if (err) { + mlx4_unregister_mac(dev->dev, qp->pri.candidate_smac_port, qp->pri.candidate_smac); + } else { + if (qp->pri.smac) + mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac); + qp->pri.smac = qp->pri.candidate_smac; + qp->pri.smac_index = qp->pri.candidate_smac_index; + qp->pri.smac_port = qp->pri.candidate_smac_port; + } + qp->pri.candidate_smac = 0; + qp->pri.candidate_smac_index = 0; + qp->pri.candidate_smac_port = 0; + } + if (qp->alt.candidate_smac) { + if (err) { + mlx4_unregister_mac(dev->dev, qp->alt.candidate_smac_port, qp->alt.candidate_smac); + } else { + if (qp->alt.smac) + mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac); + qp->alt.smac = qp->alt.candidate_smac; + qp->alt.smac_index = qp->alt.candidate_smac_index; + qp->alt.smac_port = qp->alt.candidate_smac_port; + } + qp->alt.candidate_smac = 0; + qp->alt.candidate_smac_index = 0; + qp->alt.candidate_smac_port = 0; + } + + if (qp->pri.update_vid) { + if (err) { + if (qp->pri.candidate_vid < 0x1000) + mlx4_unregister_vlan(dev->dev, qp->pri.candidate_vlan_port, + qp->pri.candidate_vid); + } else { + if (qp->pri.vid < 0x1000) + mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, + qp->pri.vid); + qp->pri.vid = qp->pri.candidate_vid; + qp->pri.vlan_port = qp->pri.candidate_vlan_port; + qp->pri.vlan_index = qp->pri.candidate_vlan_index; + } + qp->pri.candidate_vid = 0xFFFF; + qp->pri.update_vid = 0; + } + + if (qp->alt.update_vid) { + if (err) { + if (qp->alt.candidate_vid < 0x1000) + mlx4_unregister_vlan(dev->dev, qp->alt.candidate_vlan_port, + qp->alt.candidate_vid); + } else { + if (qp->alt.vid < 0x1000) + mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, + qp->alt.vid); + qp->alt.vid = qp->alt.candidate_vid; + qp->alt.vlan_port = qp->alt.candidate_vlan_port; + qp->alt.vlan_index = qp->alt.candidate_vlan_index; + } + qp->alt.candidate_vid = 0xFFFF; + qp->alt.update_vid = 0; + } + return err; } @@ -1729,6 +1957,19 @@ out: return err; } +static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey) +{ + int i; + for (i = 0; i < dev->caps.num_ports; i++) { + if (qpn == dev->caps.qp0_proxy[i] || + qpn == dev->caps.qp0_tunnel[i]) { + *qkey = dev->caps.qp0_qkey[i]; + return 0; + } + } + return -EINVAL; +} + static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len) @@ -1786,8 +2027,13 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]); sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); - if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey)) - return -EINVAL; + if (mlx4_is_master(mdev->dev)) { + if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey)) + return -EINVAL; + } else { + if (vf_get_qp0_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey)) + return -EINVAL; + } sqp->ud_header.deth.qkey = cpu_to_be32(qkey); sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn); @@ -1842,9 +2088,9 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, { struct ib_device *ib_dev = sqp->qp.ibqp.device; struct mlx4_wqe_mlx_seg *mlx = wqe; + struct mlx4_wqe_ctrl_seg *ctrl = wqe; struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); - struct net_device *ndev; union ib_gid sgid; u16 pkey; int send_size; @@ -1868,12 +2114,11 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, /* When multi-function is enabled, the ib_core gid * indexes don't necessarily match the hw ones, so * we must use our own cache */ - sgid.global.subnet_prefix = - to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. - subnet_prefix; - sgid.global.interface_id = - to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. - guid_cache[ah->av.ib.gid_index]; + err = mlx4_get_roce_gid_from_slave(to_mdev(ib_dev)->dev, + be32_to_cpu(ah->av.ib.port_pd) >> 24, + ah->av.ib.gid_index, &sgid.raw[0]); + if (err) + return err; } else { err = ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, @@ -1882,7 +2127,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, return err; } - if (ah->av.eth.vlan != 0xffff) { + if (ah->av.eth.vlan != cpu_to_be16(0xffff)) { vlan = be16_to_cpu(ah->av.eth.vlan) & 0x0fff; is_vlan = 1; } @@ -1902,6 +2147,9 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, sqp->ud_header.grh.flow_label = ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff); sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit; + if (is_eth) + memcpy(sqp->ud_header.grh.source_gid.raw, sgid.raw, 16); + else { if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { /* When multi-function is enabled, the ib_core gid * indexes don't necessarily match the hw ones, so @@ -1917,6 +2165,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, be32_to_cpu(ah->av.ib.port_pd) >> 24, ah->av.ib.gid_index, &sqp->ud_header.grh.source_gid); + } memcpy(sqp->ud_header.grh.destination_gid.raw, ah->av.ib.dgid, 16); } @@ -1949,16 +2198,23 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, if (is_eth) { u8 *smac; + struct in6_addr in6; + u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13; mlx->sched_prio = cpu_to_be16(pcp); memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6); /* FIXME: cache smac value? */ - ndev = to_mdev(sqp->qp.ibqp.device)->iboe.netdevs[sqp->qp.port - 1]; - if (!ndev) - return -ENODEV; - smac = ndev->dev_addr; + memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2); + memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4); + memcpy(&in6, sgid.raw, sizeof(in6)); + + if (!mlx4_is_mfunc(to_mdev(ib_dev)->dev)) + smac = to_mdev(sqp->qp.ibqp.device)-> + iboe.netdevs[sqp->qp.port - 1]->dev_addr; + else /* use the src mac of the tunnel */ + smac = ah->av.eth.s_mac; memcpy(sqp->ud_header.eth.smac_h, smac, 6); if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6)) mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK); @@ -2159,7 +2415,8 @@ static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev, struct mlx4_wqe_datagram_seg *dseg, - struct ib_send_wr *wr, enum ib_qp_type qpt) + struct ib_send_wr *wr, + enum mlx4_ib_qp_type qpt) { union mlx4_ext_av *av = &to_mah(wr->wr.ud.ah)->av; struct mlx4_av sqp_av = {0}; @@ -2172,8 +2429,10 @@ static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev, cpu_to_be32(0xf0000000); memcpy(dseg->av, &sqp_av, sizeof (struct mlx4_av)); - /* This function used only for sending on QP1 proxies */ - dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]); + if (qpt == MLX4_IB_QPT_PROXY_GSI) + dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]); + else + dseg->dqpn = cpu_to_be32(dev->dev->caps.qp0_tunnel[port - 1]); /* Use QKEY from the QP context, which is set by master */ dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY); } @@ -2190,6 +2449,8 @@ static void build_tunnel_header(struct ib_send_wr *wr, void *wqe, unsigned *mlx_ hdr.remote_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); hdr.pkey_index = cpu_to_be16(wr->wr.ud.pkey_index); hdr.qkey = cpu_to_be32(wr->wr.ud.remote_qkey); + memcpy(hdr.mac, ah->av.eth.mac, 6); + hdr.vlan = ah->av.eth.vlan; spc = MLX4_INLINE_ALIGN - ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1)); @@ -2466,11 +2727,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case MLX4_IB_QPT_PROXY_SMI_OWNER: - if (unlikely(!mlx4_is_master(to_mdev(ibqp->device)->dev))) { - err = -ENOSYS; - *bad_wr = wr; - goto out; - } err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen); if (unlikely(err)) { *bad_wr = wr; @@ -2487,16 +2743,13 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, size += seglen / 16; break; case MLX4_IB_QPT_PROXY_SMI: - /* don't allow QP0 sends on guests */ - err = -ENOSYS; - *bad_wr = wr; - goto out; case MLX4_IB_QPT_PROXY_GSI: /* If we are tunneling special qps, this is a UD qp. * In this case we first add a UD segment targeting * the tunnel qp, and then add a header with address * information */ - set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr, ibqp->qp_type); + set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr, + qp->mlx4_ib_qp_type); wqe += sizeof (struct mlx4_wqe_datagram_seg); size += sizeof (struct mlx4_wqe_datagram_seg) / 16; build_tunnel_header(wr, wqe, &seglen); diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 60c5fb025fc..62d9285300a 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -134,13 +134,14 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, if (err) goto err_mtt; } else { - err = mlx4_db_alloc(dev->dev, &srq->db, 0); + err = mlx4_db_alloc(dev->dev, &srq->db, 0, GFP_KERNEL); if (err) goto err_srq; *srq->db.db = 0; - if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf)) { + if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf, + GFP_KERNEL)) { err = -ENOMEM; goto err_db; } @@ -165,7 +166,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, if (err) goto err_buf; - err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf); + err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf, GFP_KERNEL); if (err) goto err_mtt; diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index db2ea31df83..cb4c66e723b 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -389,8 +389,10 @@ struct mlx4_port { struct mlx4_ib_dev *dev; struct attribute_group pkey_group; struct attribute_group gid_group; - u8 port_num; + struct device_attribute enable_smi_admin; + struct device_attribute smi_enabled; int slave; + u8 port_num; }; @@ -558,6 +560,101 @@ err: return NULL; } +static ssize_t sysfs_show_smi_enabled(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct mlx4_port *p = + container_of(attr, struct mlx4_port, smi_enabled); + ssize_t len = 0; + + if (mlx4_vf_smi_enabled(p->dev->dev, p->slave, p->port_num)) + len = sprintf(buf, "%d\n", 1); + else + len = sprintf(buf, "%d\n", 0); + + return len; +} + +static ssize_t sysfs_show_enable_smi_admin(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlx4_port *p = + container_of(attr, struct mlx4_port, enable_smi_admin); + ssize_t len = 0; + + if (mlx4_vf_get_enable_smi_admin(p->dev->dev, p->slave, p->port_num)) + len = sprintf(buf, "%d\n", 1); + else + len = sprintf(buf, "%d\n", 0); + + return len; +} + +static ssize_t sysfs_store_enable_smi_admin(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct mlx4_port *p = + container_of(attr, struct mlx4_port, enable_smi_admin); + int enable; + + if (sscanf(buf, "%i", &enable) != 1 || + enable < 0 || enable > 1) + return -EINVAL; + + if (mlx4_vf_set_enable_smi_admin(p->dev->dev, p->slave, p->port_num, enable)) + return -EINVAL; + return count; +} + +static int add_vf_smi_entries(struct mlx4_port *p) +{ + int is_eth = rdma_port_get_link_layer(&p->dev->ib_dev, p->port_num) == + IB_LINK_LAYER_ETHERNET; + int ret; + + /* do not display entries if eth transport, or if master */ + if (is_eth || p->slave == mlx4_master_func_num(p->dev->dev)) + return 0; + + sysfs_attr_init(&p->smi_enabled.attr); + p->smi_enabled.show = sysfs_show_smi_enabled; + p->smi_enabled.store = NULL; + p->smi_enabled.attr.name = "smi_enabled"; + p->smi_enabled.attr.mode = 0444; + ret = sysfs_create_file(&p->kobj, &p->smi_enabled.attr); + if (ret) { + pr_err("failed to create smi_enabled\n"); + return ret; + } + + sysfs_attr_init(&p->enable_smi_admin.attr); + p->enable_smi_admin.show = sysfs_show_enable_smi_admin; + p->enable_smi_admin.store = sysfs_store_enable_smi_admin; + p->enable_smi_admin.attr.name = "enable_smi_admin"; + p->enable_smi_admin.attr.mode = 0644; + ret = sysfs_create_file(&p->kobj, &p->enable_smi_admin.attr); + if (ret) { + pr_err("failed to create enable_smi_admin\n"); + sysfs_remove_file(&p->kobj, &p->smi_enabled.attr); + return ret; + } + return 0; +} + +static void remove_vf_smi_entries(struct mlx4_port *p) +{ + int is_eth = rdma_port_get_link_layer(&p->dev->ib_dev, p->port_num) == + IB_LINK_LAYER_ETHERNET; + + if (is_eth || p->slave == mlx4_master_func_num(p->dev->dev)) + return; + + sysfs_remove_file(&p->kobj, &p->smi_enabled.attr); + sysfs_remove_file(&p->kobj, &p->enable_smi_admin.attr); +} + static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave) { struct mlx4_port *p; @@ -602,6 +699,10 @@ static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave) if (ret) goto err_free_gid; + ret = add_vf_smi_entries(p); + if (ret) + goto err_free_gid; + list_add_tail(&p->kobj.entry, &dev->pkeys.pkey_port_list[slave]); return 0; @@ -627,6 +728,7 @@ static int register_one_pkey_tree(struct mlx4_ib_dev *dev, int slave) int port; struct kobject *p, *t; struct mlx4_port *mport; + struct mlx4_active_ports actv_ports; get_name(dev, name, slave, sizeof name); @@ -649,7 +751,11 @@ static int register_one_pkey_tree(struct mlx4_ib_dev *dev, int slave) goto err_ports; } + actv_ports = mlx4_get_active_ports(dev->dev, slave); + for (port = 1; port <= dev->dev->caps.num_ports; ++port) { + if (!test_bit(port - 1, actv_ports.ports)) + continue; err = add_port(dev, port, slave); if (err) goto err_add; @@ -664,6 +770,7 @@ err_add: mport = container_of(p, struct mlx4_port, kobj); sysfs_remove_group(p, &mport->pkey_group); sysfs_remove_group(p, &mport->gid_group); + remove_vf_smi_entries(mport); kobject_put(p); } kobject_put(dev->dev_ports_parent[slave]); @@ -708,6 +815,7 @@ static void unregister_pkey_tree(struct mlx4_ib_dev *device) port = container_of(p, struct mlx4_port, kobj); sysfs_remove_group(p, &port->pkey_group); sysfs_remove_group(p, &port->gid_group); + remove_vf_smi_entries(port); kobject_put(p); kobject_put(device->dev_ports_parent[slave]); } |
