diff options
Diffstat (limited to 'drivers/infiniband/hw/mlx5')
| -rw-r--r-- | drivers/infiniband/hw/mlx5/Kconfig | 2 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx5/cq.c | 410 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx5/doorbell.c | 4 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx5/main.c | 72 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx5/mem.c | 80 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx5/mlx5_ib.h | 35 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx5/mr.c | 461 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx5/qp.c | 806 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx5/srq.c | 26 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx5/user.h | 12 |
10 files changed, 1553 insertions, 355 deletions
diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig index 8e6aebfaf8a..10df386c634 100644 --- a/drivers/infiniband/hw/mlx5/Kconfig +++ b/drivers/infiniband/hw/mlx5/Kconfig @@ -1,6 +1,6 @@ config MLX5_INFINIBAND tristate "Mellanox Connect-IB HCA support" - depends on NETDEVICES && ETHERNET && PCI && X86 + depends on NETDEVICES && ETHERNET && PCI select NET_VENDOR_MELLANOX select MLX5_CORE ---help--- diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 344ab03948a..8ae4f896cb4 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -32,6 +32,7 @@ #include <linux/kref.h> #include <rdma/ib_umem.h> +#include <rdma/ib_user_verbs.h> #include "mlx5_ib.h" #include "user.h" @@ -73,14 +74,24 @@ static void *get_cqe(struct mlx5_ib_cq *cq, int n) return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz); } +static u8 sw_ownership_bit(int n, int nent) +{ + return (n & nent) ? 1 : 0; +} + static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n) { void *cqe = get_cqe(cq, n & cq->ibcq.cqe); struct mlx5_cqe64 *cqe64; cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; - return ((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ - !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe; + + if (likely((cqe64->op_own) >> 4 != MLX5_CQE_INVALID) && + !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1)))) { + return cqe; + } else { + return NULL; + } } static void *next_cqe_sw(struct mlx5_ib_cq *cq) @@ -351,6 +362,43 @@ static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64, qp->sq.last_poll = tail; } +static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf) +{ + mlx5_buf_free(&dev->mdev, &buf->buf); +} + +static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe, + struct ib_sig_err *item) +{ + u16 syndrome = be16_to_cpu(cqe->syndrome); + +#define GUARD_ERR (1 << 13) +#define APPTAG_ERR (1 << 12) +#define REFTAG_ERR (1 << 11) + + if (syndrome & GUARD_ERR) { + item->err_type = IB_SIG_BAD_GUARD; + item->expected = be32_to_cpu(cqe->expected_trans_sig) >> 16; + item->actual = be32_to_cpu(cqe->actual_trans_sig) >> 16; + } else + if (syndrome & REFTAG_ERR) { + item->err_type = IB_SIG_BAD_REFTAG; + item->expected = be32_to_cpu(cqe->expected_reftag); + item->actual = be32_to_cpu(cqe->actual_reftag); + } else + if (syndrome & APPTAG_ERR) { + item->err_type = IB_SIG_BAD_APPTAG; + item->expected = be32_to_cpu(cqe->expected_trans_sig) & 0xffff; + item->actual = be32_to_cpu(cqe->actual_trans_sig) & 0xffff; + } else { + pr_err("Got signature completion error with bad syndrome %04x\n", + syndrome); + } + + item->sig_err_offset = be64_to_cpu(cqe->err_offset); + item->key = be32_to_cpu(cqe->mkey); +} + static int mlx5_poll_one(struct mlx5_ib_cq *cq, struct mlx5_ib_qp **cur_qp, struct ib_wc *wc) @@ -360,12 +408,16 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq, struct mlx5_cqe64 *cqe64; struct mlx5_core_qp *mqp; struct mlx5_ib_wq *wq; + struct mlx5_sig_err_cqe *sig_err_cqe; + struct mlx5_core_mr *mmr; + struct mlx5_ib_mr *mr; uint8_t opcode; uint32_t qpn; u16 wqe_ctr; void *cqe; int idx; +repoll: cqe = next_cqe_sw(cq); if (!cqe) return -EAGAIN; @@ -379,7 +431,18 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq, */ rmb(); - /* TBD: resize CQ */ + opcode = cqe64->op_own >> 4; + if (unlikely(opcode == MLX5_CQE_RESIZE_CQ)) { + if (likely(cq->resize_buf)) { + free_cq_buf(dev, &cq->buf); + cq->buf = *cq->resize_buf; + kfree(cq->resize_buf); + cq->resize_buf = NULL; + goto repoll; + } else { + mlx5_ib_warn(dev, "unexpected resize cqe\n"); + } + } qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff; if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) { @@ -398,7 +461,6 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq, } wc->qp = &(*cur_qp)->ibqp; - opcode = cqe64->op_own >> 4; switch (opcode) { case MLX5_CQE_REQ: wq = &(*cur_qp)->sq; @@ -449,6 +511,33 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq, } } break; + case MLX5_CQE_SIG_ERR: + sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64; + + read_lock(&dev->mdev.priv.mr_table.lock); + mmr = __mlx5_mr_lookup(&dev->mdev, + mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey))); + if (unlikely(!mmr)) { + read_unlock(&dev->mdev.priv.mr_table.lock); + mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n", + cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey)); + return -EINVAL; + } + + mr = to_mibmr(mmr); + get_sig_err_item(sig_err_cqe, &mr->sig->err_item); + mr->sig->sig_err_exists = true; + mr->sig->sigerr_count++; + + mlx5_ib_warn(dev, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n", + cq->mcq.cqn, mr->sig->err_item.key, + mr->sig->err_item.err_type, + mr->sig->err_item.sig_err_offset, + mr->sig->err_item.expected, + mr->sig->err_item.actual); + + read_unlock(&dev->mdev.priv.mr_table.lock); + goto repoll; } return 0; @@ -503,29 +592,35 @@ static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf, return err; buf->cqe_size = cqe_size; + buf->nent = nent; return 0; } -static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf) -{ - mlx5_buf_free(&dev->mdev, &buf->buf); -} - static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, struct ib_ucontext *context, struct mlx5_ib_cq *cq, int entries, struct mlx5_create_cq_mbox_in **cqb, int *cqe_size, int *index, int *inlen) { struct mlx5_ib_create_cq ucmd; + size_t ucmdlen; int page_shift; int npages; int ncont; int err; - if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) + ucmdlen = + (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) < + sizeof(ucmd)) ? (sizeof(ucmd) - + sizeof(ucmd.reserved)) : sizeof(ucmd); + + if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) return -EFAULT; + if (ucmdlen == sizeof(ucmd) && + ucmd.reserved != 0) + return -EINVAL; + if (ucmd.cqe_size != 64 && ucmd.cqe_size != 128) return -EINVAL; @@ -556,7 +651,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, goto err_db; } mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, (*cqb)->pas, 0); - (*cqb)->ctx.log_pg_sz = page_shift - PAGE_SHIFT; + (*cqb)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT; *index = to_mucontext(context)->uuari.uars[0].index; @@ -576,16 +671,16 @@ static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_ucontext *context) ib_umem_release(cq->buf.umem); } -static void init_cq_buf(struct mlx5_ib_cq *cq, int nent) +static void init_cq_buf(struct mlx5_ib_cq *cq, struct mlx5_ib_cq_buf *buf) { int i; void *cqe; struct mlx5_cqe64 *cqe64; - for (i = 0; i < nent; i++) { - cqe = get_cqe(cq, i); - cqe64 = (cq->buf.cqe_size == 64) ? cqe : cqe + 64; - cqe64->op_own = 0xf1; + for (i = 0; i < buf->nent; i++) { + cqe = get_cqe_from_buf(buf, i, buf->cqe_size); + cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64; + cqe64->op_own = MLX5_CQE_INVALID << 4; } } @@ -610,7 +705,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, if (err) goto err_db; - init_cq_buf(cq, entries); + init_cq_buf(cq, &cq->buf); *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * cq->buf.buf.npages; *cqb = mlx5_vzalloc(*inlen); @@ -620,7 +715,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, } mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas); - (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - PAGE_SHIFT; + (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT; *index = dev->mdev.priv.uuari.uars[0].index; return 0; @@ -653,8 +748,11 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries, int eqn; int err; + if (entries < 0) + return ERR_PTR(-EINVAL); + entries = roundup_pow_of_two(entries + 1); - if (entries < 1 || entries > dev->mdev.caps.max_cqes) + if (entries > dev->mdev.caps.max_cqes) return ERR_PTR(-EINVAL); cq = kzalloc(sizeof(*cq), GFP_KERNEL); @@ -747,17 +845,9 @@ int mlx5_ib_destroy_cq(struct ib_cq *cq) return 0; } -static int is_equal_rsn(struct mlx5_cqe64 *cqe64, struct mlx5_ib_srq *srq, - u32 rsn) +static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn) { - u32 lrsn; - - if (srq) - lrsn = be32_to_cpu(cqe64->srqn) & 0xffffff; - else - lrsn = be32_to_cpu(cqe64->sop_drop_qpn) & 0xffffff; - - return rsn == lrsn; + return rsn == (ntohl(cqe64->sop_drop_qpn) & 0xffffff); } void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq) @@ -787,8 +877,8 @@ void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq) while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) { cqe = get_cqe(cq, prod_index & cq->ibcq.cqe); cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; - if (is_equal_rsn(cqe64, srq, rsn)) { - if (srq) + if (is_equal_rsn(cqe64, rsn)) { + if (srq && (ntohl(cqe64->srqn) & 0xffffff)) mlx5_ib_free_srq_wqe(srq, be16_to_cpu(cqe64->wqe_counter)); ++nfreed; } else if (nfreed) { @@ -823,12 +913,266 @@ void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq) int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) { - return -ENOSYS; + struct mlx5_modify_cq_mbox_in *in; + struct mlx5_ib_dev *dev = to_mdev(cq->device); + struct mlx5_ib_cq *mcq = to_mcq(cq); + int err; + u32 fsel; + + if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_CQ_MODER)) + return -ENOSYS; + + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) + return -ENOMEM; + + in->cqn = cpu_to_be32(mcq->mcq.cqn); + fsel = (MLX5_CQ_MODIFY_PERIOD | MLX5_CQ_MODIFY_COUNT); + in->ctx.cq_period = cpu_to_be16(cq_period); + in->ctx.cq_max_count = cpu_to_be16(cq_count); + in->field_select = cpu_to_be32(fsel); + err = mlx5_core_modify_cq(&dev->mdev, &mcq->mcq, in, sizeof(*in)); + kfree(in); + + if (err) + mlx5_ib_warn(dev, "modify cq 0x%x failed\n", mcq->mcq.cqn); + + return err; +} + +static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, + int entries, struct ib_udata *udata, int *npas, + int *page_shift, int *cqe_size) +{ + struct mlx5_ib_resize_cq ucmd; + struct ib_umem *umem; + int err; + int npages; + struct ib_ucontext *context = cq->buf.umem->context; + + err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); + if (err) + return err; + + if (ucmd.reserved0 || ucmd.reserved1) + return -EINVAL; + + umem = ib_umem_get(context, ucmd.buf_addr, entries * ucmd.cqe_size, + IB_ACCESS_LOCAL_WRITE, 1); + if (IS_ERR(umem)) { + err = PTR_ERR(umem); + return err; + } + + mlx5_ib_cont_pages(umem, ucmd.buf_addr, &npages, page_shift, + npas, NULL); + + cq->resize_umem = umem; + *cqe_size = ucmd.cqe_size; + + return 0; +} + +static void un_resize_user(struct mlx5_ib_cq *cq) +{ + ib_umem_release(cq->resize_umem); +} + +static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, + int entries, int cqe_size) +{ + int err; + + cq->resize_buf = kzalloc(sizeof(*cq->resize_buf), GFP_KERNEL); + if (!cq->resize_buf) + return -ENOMEM; + + err = alloc_cq_buf(dev, cq->resize_buf, entries, cqe_size); + if (err) + goto ex; + + init_cq_buf(cq, cq->resize_buf); + + return 0; + +ex: + kfree(cq->resize_buf); + return err; +} + +static void un_resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq) +{ + free_cq_buf(dev, cq->resize_buf); + cq->resize_buf = NULL; +} + +static int copy_resize_cqes(struct mlx5_ib_cq *cq) +{ + struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); + struct mlx5_cqe64 *scqe64; + struct mlx5_cqe64 *dcqe64; + void *start_cqe; + void *scqe; + void *dcqe; + int ssize; + int dsize; + int i; + u8 sw_own; + + ssize = cq->buf.cqe_size; + dsize = cq->resize_buf->cqe_size; + if (ssize != dsize) { + mlx5_ib_warn(dev, "resize from different cqe size is not supported\n"); + return -EINVAL; + } + + i = cq->mcq.cons_index; + scqe = get_sw_cqe(cq, i); + scqe64 = ssize == 64 ? scqe : scqe + 64; + start_cqe = scqe; + if (!scqe) { + mlx5_ib_warn(dev, "expected cqe in sw ownership\n"); + return -EINVAL; + } + + while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) { + dcqe = get_cqe_from_buf(cq->resize_buf, + (i + 1) & (cq->resize_buf->nent), + dsize); + dcqe64 = dsize == 64 ? dcqe : dcqe + 64; + sw_own = sw_ownership_bit(i + 1, cq->resize_buf->nent); + memcpy(dcqe, scqe, dsize); + dcqe64->op_own = (dcqe64->op_own & ~MLX5_CQE_OWNER_MASK) | sw_own; + + ++i; + scqe = get_sw_cqe(cq, i); + scqe64 = ssize == 64 ? scqe : scqe + 64; + if (!scqe) { + mlx5_ib_warn(dev, "expected cqe in sw ownership\n"); + return -EINVAL; + } + + if (scqe == start_cqe) { + pr_warn("resize CQ failed to get resize CQE, CQN 0x%x\n", + cq->mcq.cqn); + return -ENOMEM; + } + } + ++cq->mcq.cons_index; + return 0; } int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) { - return -ENOSYS; + struct mlx5_ib_dev *dev = to_mdev(ibcq->device); + struct mlx5_ib_cq *cq = to_mcq(ibcq); + struct mlx5_modify_cq_mbox_in *in; + int err; + int npas; + int page_shift; + int inlen; + int uninitialized_var(cqe_size); + unsigned long flags; + + if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_RESIZE_CQ)) { + pr_info("Firmware does not support resize CQ\n"); + return -ENOSYS; + } + + if (entries < 1) + return -EINVAL; + + entries = roundup_pow_of_two(entries + 1); + if (entries > dev->mdev.caps.max_cqes + 1) + return -EINVAL; + + if (entries == ibcq->cqe + 1) + return 0; + + mutex_lock(&cq->resize_mutex); + if (udata) { + err = resize_user(dev, cq, entries, udata, &npas, &page_shift, + &cqe_size); + } else { + cqe_size = 64; + err = resize_kernel(dev, cq, entries, cqe_size); + if (!err) { + npas = cq->resize_buf->buf.npages; + page_shift = cq->resize_buf->buf.page_shift; + } + } + + if (err) + goto ex; + + inlen = sizeof(*in) + npas * sizeof(in->pas[0]); + in = mlx5_vzalloc(inlen); + if (!in) { + err = -ENOMEM; + goto ex_resize; + } + + if (udata) + mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift, + in->pas, 0); + else + mlx5_fill_page_array(&cq->resize_buf->buf, in->pas); + + in->field_select = cpu_to_be32(MLX5_MODIFY_CQ_MASK_LOG_SIZE | + MLX5_MODIFY_CQ_MASK_PG_OFFSET | + MLX5_MODIFY_CQ_MASK_PG_SIZE); + in->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT; + in->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5; + in->ctx.page_offset = 0; + in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(entries) << 24); + in->hdr.opmod = cpu_to_be16(MLX5_CQ_OPMOD_RESIZE); + in->cqn = cpu_to_be32(cq->mcq.cqn); + + err = mlx5_core_modify_cq(&dev->mdev, &cq->mcq, in, inlen); + if (err) + goto ex_alloc; + + if (udata) { + cq->ibcq.cqe = entries - 1; + ib_umem_release(cq->buf.umem); + cq->buf.umem = cq->resize_umem; + cq->resize_umem = NULL; + } else { + struct mlx5_ib_cq_buf tbuf; + int resized = 0; + + spin_lock_irqsave(&cq->lock, flags); + if (cq->resize_buf) { + err = copy_resize_cqes(cq); + if (!err) { + tbuf = cq->buf; + cq->buf = *cq->resize_buf; + kfree(cq->resize_buf); + cq->resize_buf = NULL; + resized = 1; + } + } + cq->ibcq.cqe = entries - 1; + spin_unlock_irqrestore(&cq->lock, flags); + if (resized) + free_cq_buf(dev, &tbuf); + } + mutex_unlock(&cq->resize_mutex); + + mlx5_vfree(in); + return 0; + +ex_alloc: + mlx5_vfree(in); + +ex_resize: + if (udata) + un_resize_user(cq); + else + un_resize_kernel(dev, cq); +ex: + mutex_unlock(&cq->resize_mutex); + return err; } int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq) diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c index 256a23344f2..ece028fc47d 100644 --- a/drivers/infiniband/hw/mlx5/doorbell.c +++ b/drivers/infiniband/hw/mlx5/doorbell.c @@ -47,7 +47,6 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt, struct mlx5_db *db) { struct mlx5_ib_user_db_page *page; - struct ib_umem_chunk *chunk; int err = 0; mutex_lock(&context->db_page_mutex); @@ -75,8 +74,7 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt, list_add(&page->list, &context->db_page_list); found: - chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list); - db->dma = sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK); + db->dma = sg_dma_address(page->umem->sg_head.sgl) + (virt & ~PAGE_MASK); db->u.user_page = page; ++page->refcnt; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 3f831de9a4d..364d4b6937f 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -46,8 +46,8 @@ #include "mlx5_ib.h" #define DRIVER_NAME "mlx5_ib" -#define DRIVER_VERSION "1.0" -#define DRIVER_RELDATE "June 2013" +#define DRIVER_VERSION "2.2-1" +#define DRIVER_RELDATE "Feb 2014" MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver"); @@ -164,6 +164,7 @@ int mlx5_vector2eqn(struct mlx5_ib_dev *dev, int vector, int *eqn, int *irqn) static int alloc_comp_eqs(struct mlx5_ib_dev *dev) { struct mlx5_eq_table *table = &dev->mdev.priv.eq_table; + char name[MLX5_MAX_EQ_NAME]; struct mlx5_eq *eq, *n; int ncomp_vec; int nent; @@ -180,11 +181,10 @@ static int alloc_comp_eqs(struct mlx5_ib_dev *dev) goto clean; } - snprintf(eq->name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i); + snprintf(name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i); err = mlx5_create_map_eq(&dev->mdev, eq, i + MLX5_EQ_VEC_COMP_BASE, nent, 0, - eq->name, - &dev->mdev.priv.uuari.uars[0]); + name, &dev->mdev.priv.uuari.uars[0]); if (err) { kfree(eq); goto clean; @@ -261,8 +261,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT | IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SYS_IMAGE_GUID | - IB_DEVICE_RC_RNR_NAK_GEN | - IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; + IB_DEVICE_RC_RNR_NAK_GEN; flags = dev->mdev.caps.flags; if (flags & MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR) props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; @@ -274,6 +273,17 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (flags & MLX5_DEV_CAP_FLAG_XRC) props->device_cap_flags |= IB_DEVICE_XRC; props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; + if (flags & MLX5_DEV_CAP_FLAG_SIG_HAND_OVER) { + props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER; + /* At this stage no support for signature handover */ + props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 | + IB_PROT_T10DIF_TYPE_2 | + IB_PROT_T10DIF_TYPE_3; + props->sig_guard_cap = IB_GUARD_T10DIF_CRC | + IB_GUARD_T10DIF_CSUM; + } + if (flags & MLX5_DEV_CAP_FLAG_BLOCK_MCAST) + props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; props->vendor_id = be32_to_cpup((__be32 *)(out_mad->data + 36)) & 0xffffff; @@ -301,9 +311,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->max_srq_sge = max_rq_sg - 1; props->max_fast_reg_page_list_len = (unsigned int)-1; props->local_ca_ack_delay = dev->mdev.caps.local_ca_ack_delay; - props->atomic_cap = dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_ATOMIC ? - IB_ATOMIC_HCA : IB_ATOMIC_NONE; - props->masked_atomic_cap = IB_ATOMIC_HCA; + props->atomic_cap = IB_ATOMIC_NONE; + props->masked_atomic_cap = IB_ATOMIC_NONE; props->max_pkeys = be16_to_cpup((__be16 *)(out_mad->data + 28)); props->max_mcast_grp = 1 << dev->mdev.caps.log_max_mcg; props->max_mcast_qp_attach = dev->mdev.caps.max_qp_mcg; @@ -537,34 +546,51 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - struct mlx5_ib_alloc_ucontext_req req; + struct mlx5_ib_alloc_ucontext_req_v2 req; struct mlx5_ib_alloc_ucontext_resp resp; struct mlx5_ib_ucontext *context; struct mlx5_uuar_info *uuari; struct mlx5_uar *uars; + int gross_uuars; int num_uars; + int ver; int uuarn; int err; int i; + int reqlen; if (!dev->ib_active) return ERR_PTR(-EAGAIN); - err = ib_copy_from_udata(&req, udata, sizeof(req)); + memset(&req, 0, sizeof(req)); + reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr); + if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req)) + ver = 0; + else if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req_v2)) + ver = 2; + else + return ERR_PTR(-EINVAL); + + err = ib_copy_from_udata(&req, udata, reqlen); if (err) return ERR_PTR(err); + if (req.flags || req.reserved) + return ERR_PTR(-EINVAL); + if (req.total_num_uuars > MLX5_MAX_UUARS) return ERR_PTR(-ENOMEM); if (req.total_num_uuars == 0) return ERR_PTR(-EINVAL); - req.total_num_uuars = ALIGN(req.total_num_uuars, MLX5_BF_REGS_PER_PAGE); + req.total_num_uuars = ALIGN(req.total_num_uuars, + MLX5_NON_FP_BF_REGS_PER_PAGE); if (req.num_low_latency_uuars > req.total_num_uuars - 1) return ERR_PTR(-EINVAL); - num_uars = req.total_num_uuars / MLX5_BF_REGS_PER_PAGE; + num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE; + gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE; resp.qp_tab_size = 1 << dev->mdev.caps.log_max_qp; resp.bf_reg_size = dev->mdev.caps.bf_reg_size; resp.cache_line_size = L1_CACHE_BYTES; @@ -586,7 +612,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, goto out_ctx; } - uuari->bitmap = kcalloc(BITS_TO_LONGS(req.total_num_uuars), + uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars), sizeof(*uuari->bitmap), GFP_KERNEL); if (!uuari->bitmap) { @@ -596,13 +622,13 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, /* * clear all fast path uuars */ - for (i = 0; i < req.total_num_uuars; i++) { + for (i = 0; i < gross_uuars; i++) { uuarn = i & 3; if (uuarn == 2 || uuarn == 3) set_bit(i, uuari->bitmap); } - uuari->count = kcalloc(req.total_num_uuars, sizeof(*uuari->count), GFP_KERNEL); + uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL); if (!uuari->count) { err = -ENOMEM; goto out_bitmap; @@ -624,6 +650,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, if (err) goto out_uars; + uuari->ver = ver; uuari->num_low_latency_uuars = req.num_low_latency_uuars; uuari->uars = uars; uuari->num_uars = num_uars; @@ -746,7 +773,8 @@ static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn) seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); seg->start_addr = 0; - err = mlx5_core_create_mkey(&dev->mdev, &mr, in, sizeof(*in)); + err = mlx5_core_create_mkey(&dev->mdev, &mr, in, sizeof(*in), + NULL, NULL, NULL); if (err) { mlx5_ib_warn(dev, "failed to create mkey, %d\n", err); goto err_in; @@ -1006,6 +1034,11 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, ibev.device = &ibdev->ib_dev; ibev.element.port_num = port; + if (port < 1 || port > ibdev->num_ports) { + mlx5_ib_warn(ibdev, "warning: event on port %d\n", port); + return; + } + if (ibdev->ib_active) ib_dispatch_event(&ibev); } @@ -1401,12 +1434,15 @@ static int init_one(struct pci_dev *pdev, dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr; dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr; dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr; + dev->ib_dev.destroy_mr = mlx5_ib_destroy_mr; dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach; dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach; dev->ib_dev.process_mad = mlx5_ib_process_mad; + dev->ib_dev.create_mr = mlx5_ib_create_mr; dev->ib_dev.alloc_fast_reg_mr = mlx5_ib_alloc_fast_reg_mr; dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list; dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list; + dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; if (mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC) { dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd; diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 3a5322870b9..8499aec94db 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -44,16 +44,17 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, int *ncont, int *order) { - struct ib_umem_chunk *chunk; unsigned long tmp; unsigned long m; - int i, j, k; + int i, k; u64 base = 0; int p = 0; int skip; int mask; u64 len; u64 pfn; + struct scatterlist *sg; + int entry; addr = addr >> PAGE_SHIFT; tmp = (unsigned long)addr; @@ -61,32 +62,31 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, skip = 1 << m; mask = skip - 1; i = 0; - list_for_each_entry(chunk, &umem->chunk_list, list) - for (j = 0; j < chunk->nmap; j++) { - len = sg_dma_len(&chunk->page_list[j]) >> PAGE_SHIFT; - pfn = sg_dma_address(&chunk->page_list[j]) >> PAGE_SHIFT; - for (k = 0; k < len; k++) { - if (!(i & mask)) { - tmp = (unsigned long)pfn; - m = min(m, find_first_bit(&tmp, sizeof(tmp))); + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + len = sg_dma_len(sg) >> PAGE_SHIFT; + pfn = sg_dma_address(sg) >> PAGE_SHIFT; + for (k = 0; k < len; k++) { + if (!(i & mask)) { + tmp = (unsigned long)pfn; + m = min(m, find_first_bit(&tmp, sizeof(tmp))); + skip = 1 << m; + mask = skip - 1; + base = pfn; + p = 0; + } else { + if (base + p != pfn) { + tmp = (unsigned long)p; + m = find_first_bit(&tmp, sizeof(tmp)); skip = 1 << m; mask = skip - 1; base = pfn; p = 0; - } else { - if (base + p != pfn) { - tmp = (unsigned long)p; - m = find_first_bit(&tmp, sizeof(tmp)); - skip = 1 << m; - mask = skip - 1; - base = pfn; - p = 0; - } } - p++; - i++; } + p++; + i++; } + } if (i) { m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m); @@ -112,32 +112,32 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, { int shift = page_shift - PAGE_SHIFT; int mask = (1 << shift) - 1; - struct ib_umem_chunk *chunk; - int i, j, k; + int i, k; u64 cur = 0; u64 base; int len; + struct scatterlist *sg; + int entry; i = 0; - list_for_each_entry(chunk, &umem->chunk_list, list) - for (j = 0; j < chunk->nmap; j++) { - len = sg_dma_len(&chunk->page_list[j]) >> PAGE_SHIFT; - base = sg_dma_address(&chunk->page_list[j]); - for (k = 0; k < len; k++) { - if (!(i & mask)) { - cur = base + (k << PAGE_SHIFT); - if (umr) - cur |= 3; + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + len = sg_dma_len(sg) >> PAGE_SHIFT; + base = sg_dma_address(sg); + for (k = 0; k < len; k++) { + if (!(i & mask)) { + cur = base + (k << PAGE_SHIFT); + if (umr) + cur |= 3; - pas[i >> shift] = cpu_to_be64(cur); - mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n", - i >> shift, be64_to_cpu(pas[i >> shift])); - } else - mlx5_ib_dbg(dev, "=====> 0x%llx\n", - base + (k << PAGE_SHIFT)); - i++; - } + pas[i >> shift] = cpu_to_be64(cur); + mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n", + i >> shift, be64_to_cpu(pas[i >> shift])); + } else + mlx5_ib_dbg(dev, "=====> 0x%llx\n", + base + (k << PAGE_SHIFT)); + i++; } + } } int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 836be915724..f2ccf1a5a29 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -189,12 +189,16 @@ struct mlx5_ib_qp { int create_type; u32 pa_lkey; + + /* Store signature errors */ + bool signature_en; }; struct mlx5_ib_cq_buf { struct mlx5_buf buf; struct ib_umem *umem; int cqe_size; + int nent; }; enum mlx5_ib_qp_flags { @@ -220,7 +224,7 @@ struct mlx5_ib_cq { /* protect resize cq */ struct mutex resize_mutex; - struct mlx5_ib_cq_resize *resize_buf; + struct mlx5_ib_cq_buf *resize_buf; struct ib_umem *resize_umem; int cqe_size; }; @@ -260,8 +264,9 @@ struct mlx5_ib_mr { __be64 *pas; dma_addr_t dma; int npages; - struct completion done; - enum ib_wc_status status; + struct mlx5_ib_dev *dev; + struct mlx5_create_mkey_mbox_out out; + struct mlx5_core_sig_ctx *sig; }; struct mlx5_ib_fast_reg_page_list { @@ -270,6 +275,17 @@ struct mlx5_ib_fast_reg_page_list { dma_addr_t map; }; +struct mlx5_ib_umr_context { + enum ib_wc_status status; + struct completion done; +}; + +static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context) +{ + context->status = -1; + init_completion(&context->done); +} + struct umr_common { struct ib_pd *pd; struct ib_cq *cq; @@ -323,6 +339,7 @@ struct mlx5_cache_ent { struct mlx5_ib_dev *dev; struct work_struct work; struct delayed_work dwork; + int pending; }; struct mlx5_mr_cache { @@ -358,6 +375,8 @@ struct mlx5_ib_dev { spinlock_t mr_lock; struct mlx5_ib_resources devr; struct mlx5_mr_cache cache; + struct timer_list delay_timer; + int fill_delay; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) @@ -390,6 +409,11 @@ static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp) return container_of(mqp, struct mlx5_ib_qp, mqp); } +static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mr *mmr) +{ + return container_of(mmr, struct mlx5_ib_mr, mmr); +} + static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd) { return container_of(ibpd, struct mlx5_ib_pd, ibpd); @@ -489,6 +513,9 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); int mlx5_ib_dereg_mr(struct ib_mr *ibmr); +int mlx5_ib_destroy_mr(struct ib_mr *ibmr); +struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd, + struct ib_mr_init_attr *mr_init_attr); struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len); struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, @@ -524,6 +551,8 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift); void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context); +int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, + struct ib_mr_status *mr_status); static inline void init_query_mad(struct ib_smp *mad) { diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index bd41df95b6f..afa873bd028 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -35,11 +35,16 @@ #include <linux/random.h> #include <linux/debugfs.h> #include <linux/export.h> +#include <linux/delay.h> #include <rdma/ib_umem.h> #include "mlx5_ib.h" enum { - DEF_CACHE_SIZE = 10, + MAX_PENDING_REG_MR = 8, +}; + +enum { + MLX5_UMR_ALIGN = 2048 }; static __be64 *mr_align(__be64 *ptr, int align) @@ -59,15 +64,67 @@ static int order2idx(struct mlx5_ib_dev *dev, int order) return order - cache->ent[0].order; } +static void reg_mr_callback(int status, void *context) +{ + struct mlx5_ib_mr *mr = context; + struct mlx5_ib_dev *dev = mr->dev; + struct mlx5_mr_cache *cache = &dev->cache; + int c = order2idx(dev, mr->order); + struct mlx5_cache_ent *ent = &cache->ent[c]; + u8 key; + unsigned long flags; + struct mlx5_mr_table *table = &dev->mdev.priv.mr_table; + int err; + + spin_lock_irqsave(&ent->lock, flags); + ent->pending--; + spin_unlock_irqrestore(&ent->lock, flags); + if (status) { + mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status); + kfree(mr); + dev->fill_delay = 1; + mod_timer(&dev->delay_timer, jiffies + HZ); + return; + } + + if (mr->out.hdr.status) { + mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n", + mr->out.hdr.status, + be32_to_cpu(mr->out.hdr.syndrome)); + kfree(mr); + dev->fill_delay = 1; + mod_timer(&dev->delay_timer, jiffies + HZ); + return; + } + + spin_lock_irqsave(&dev->mdev.priv.mkey_lock, flags); + key = dev->mdev.priv.mkey_key++; + spin_unlock_irqrestore(&dev->mdev.priv.mkey_lock, flags); + mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key; + + cache->last_add = jiffies; + + spin_lock_irqsave(&ent->lock, flags); + list_add_tail(&mr->list, &ent->head); + ent->cur++; + ent->size++; + spin_unlock_irqrestore(&ent->lock, flags); + + write_lock_irqsave(&table->lock, flags); + err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmr.key), + &mr->mmr); + if (err) + pr_err("Error inserting to mr tree. 0x%x\n", -err); + write_unlock_irqrestore(&table->lock, flags); +} + static int add_keys(struct mlx5_ib_dev *dev, int c, int num) { - struct device *ddev = dev->ib_dev.dma_device; struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent = &cache->ent[c]; struct mlx5_create_mkey_mbox_in *in; struct mlx5_ib_mr *mr; int npages = 1 << ent->order; - int size = sizeof(u64) * npages; int err = 0; int i; @@ -76,87 +133,66 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) return -ENOMEM; for (i = 0; i < num; i++) { + if (ent->pending >= MAX_PENDING_REG_MR) { + err = -EAGAIN; + break; + } + mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) { err = -ENOMEM; - goto out; + break; } mr->order = ent->order; mr->umred = 1; - mr->pas = kmalloc(size + 0x3f, GFP_KERNEL); - if (!mr->pas) { - kfree(mr); - err = -ENOMEM; - goto out; - } - mr->dma = dma_map_single(ddev, mr_align(mr->pas, 0x40), size, - DMA_TO_DEVICE); - if (dma_mapping_error(ddev, mr->dma)) { - kfree(mr->pas); - kfree(mr); - err = -ENOMEM; - goto out; - } - + mr->dev = dev; in->seg.status = 1 << 6; in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2); in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN; in->seg.log2_page_size = 12; + spin_lock_irq(&ent->lock); + ent->pending++; + spin_unlock_irq(&ent->lock); err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, - sizeof(*in)); + sizeof(*in), reg_mr_callback, + mr, &mr->out); if (err) { mlx5_ib_warn(dev, "create mkey failed %d\n", err); - dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE); - kfree(mr->pas); kfree(mr); - goto out; + break; } - cache->last_add = jiffies; - - spin_lock(&ent->lock); - list_add_tail(&mr->list, &ent->head); - ent->cur++; - ent->size++; - spin_unlock(&ent->lock); } -out: kfree(in); return err; } static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) { - struct device *ddev = dev->ib_dev.dma_device; struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent = &cache->ent[c]; struct mlx5_ib_mr *mr; - int size; int err; int i; for (i = 0; i < num; i++) { - spin_lock(&ent->lock); + spin_lock_irq(&ent->lock); if (list_empty(&ent->head)) { - spin_unlock(&ent->lock); + spin_unlock_irq(&ent->lock); return; } mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); list_del(&mr->list); ent->cur--; ent->size--; - spin_unlock(&ent->lock); + spin_unlock_irq(&ent->lock); err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); - if (err) { + if (err) mlx5_ib_warn(dev, "failed destroy mkey\n"); - } else { - size = ALIGN(sizeof(u64) * (1 << mr->order), 0x40); - dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE); - kfree(mr->pas); + else kfree(mr); - } } } @@ -183,9 +219,13 @@ static ssize_t size_write(struct file *filp, const char __user *buf, return -EINVAL; if (var > ent->size) { - err = add_keys(dev, c, var - ent->size); - if (err) - return err; + do { + err = add_keys(dev, c, var - ent->size); + if (err && err != -EAGAIN) + return err; + + usleep_range(3000, 5000); + } while (err); } else if (var < ent->size) { remove_keys(dev, c, ent->size - var); } @@ -301,23 +341,37 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) struct mlx5_ib_dev *dev = ent->dev; struct mlx5_mr_cache *cache = &dev->cache; int i = order2idx(dev, ent->order); + int err; if (cache->stopped) return; ent = &dev->cache.ent[i]; - if (ent->cur < 2 * ent->limit) { - add_keys(dev, i, 1); - if (ent->cur < 2 * ent->limit) - queue_work(cache->wq, &ent->work); + if (ent->cur < 2 * ent->limit && !dev->fill_delay) { + err = add_keys(dev, i, 1); + if (ent->cur < 2 * ent->limit) { + if (err == -EAGAIN) { + mlx5_ib_dbg(dev, "returned eagain, order %d\n", + i + 2); + queue_delayed_work(cache->wq, &ent->dwork, + msecs_to_jiffies(3)); + } else if (err) { + mlx5_ib_warn(dev, "command failed order %d, err %d\n", + i + 2, err); + queue_delayed_work(cache->wq, &ent->dwork, + msecs_to_jiffies(1000)); + } else { + queue_work(cache->wq, &ent->work); + } + } } else if (ent->cur > 2 * ent->limit) { if (!someone_adding(cache) && - time_after(jiffies, cache->last_add + 60 * HZ)) { + time_after(jiffies, cache->last_add + 300 * HZ)) { remove_keys(dev, i, 1); if (ent->cur > ent->limit) queue_work(cache->wq, &ent->work); } else { - queue_delayed_work(cache->wq, &ent->dwork, 60 * HZ); + queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ); } } } @@ -357,18 +411,18 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i); - spin_lock(&ent->lock); + spin_lock_irq(&ent->lock); if (!list_empty(&ent->head)) { mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); list_del(&mr->list); ent->cur--; - spin_unlock(&ent->lock); + spin_unlock_irq(&ent->lock); if (ent->cur < ent->limit) queue_work(cache->wq, &ent->work); break; } - spin_unlock(&ent->lock); + spin_unlock_irq(&ent->lock); queue_work(cache->wq, &ent->work); @@ -395,12 +449,12 @@ static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) return; } ent = &cache->ent[c]; - spin_lock(&ent->lock); + spin_lock_irq(&ent->lock); list_add_tail(&mr->list, &ent->head); ent->cur++; if (ent->cur > 2 * ent->limit) shrink = 1; - spin_unlock(&ent->lock); + spin_unlock_irq(&ent->lock); if (shrink) queue_work(cache->wq, &ent->work); @@ -408,33 +462,28 @@ static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) static void clean_keys(struct mlx5_ib_dev *dev, int c) { - struct device *ddev = dev->ib_dev.dma_device; struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent = &cache->ent[c]; struct mlx5_ib_mr *mr; - int size; int err; + cancel_delayed_work(&ent->dwork); while (1) { - spin_lock(&ent->lock); + spin_lock_irq(&ent->lock); if (list_empty(&ent->head)) { - spin_unlock(&ent->lock); + spin_unlock_irq(&ent->lock); return; } mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); list_del(&mr->list); ent->cur--; ent->size--; - spin_unlock(&ent->lock); + spin_unlock_irq(&ent->lock); err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); - if (err) { + if (err) mlx5_ib_warn(dev, "failed destroy mkey\n"); - } else { - size = ALIGN(sizeof(u64) * (1 << mr->order), 0x40); - dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE); - kfree(mr->pas); + else kfree(mr); - } } } @@ -490,12 +539,18 @@ static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) debugfs_remove_recursive(dev->cache.root); } +static void delay_time_func(unsigned long ctx) +{ + struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx; + + dev->fill_delay = 0; +} + int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent; int limit; - int size; int err; int i; @@ -505,6 +560,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) return -ENOMEM; } + setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev); for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { INIT_LIST_HEAD(&cache->ent[i].head); spin_lock_init(&cache->ent[i].lock); @@ -515,13 +571,11 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) ent->order = i + 2; ent->dev = dev; - if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE) { - size = dev->mdev.profile->mr_cache[i].size; + if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE) limit = dev->mdev.profile->mr_cache[i].limit; - } else { - size = DEF_CACHE_SIZE; + else limit = 0; - } + INIT_WORK(&ent->work, cache_work_func); INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); ent->limit = limit; @@ -540,13 +594,16 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) int i; dev->cache.stopped = 1; - destroy_workqueue(dev->cache.wq); + flush_workqueue(dev->cache.wq); mlx5_mr_cache_debugfs_cleanup(dev); for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) clean_keys(dev, i); + destroy_workqueue(dev->cache.wq); + del_timer_sync(&dev->delay_timer); + return 0; } @@ -575,7 +632,8 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); seg->start_addr = 0; - err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in)); + err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL, + NULL); if (err) goto err_in; @@ -650,7 +708,7 @@ static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev, void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context) { - struct mlx5_ib_mr *mr; + struct mlx5_ib_umr_context *context; struct ib_wc wc; int err; @@ -663,9 +721,9 @@ void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context) if (err == 0) break; - mr = (struct mlx5_ib_mr *)(unsigned long)wc.wr_id; - mr->status = wc.status; - complete(&mr->done); + context = (struct mlx5_ib_umr_context *) (unsigned long) wc.wr_id; + context->status = wc.status; + complete(&context->done); } ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); } @@ -675,21 +733,24 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, int page_shift, int order, int access_flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct device *ddev = dev->ib_dev.dma_device; struct umr_common *umrc = &dev->umrc; + struct mlx5_ib_umr_context umr_context; struct ib_send_wr wr, *bad; struct mlx5_ib_mr *mr; struct ib_sge sg; - int err; + int size = sizeof(u64) * npages; + int err = 0; int i; - for (i = 0; i < 10; i++) { + for (i = 0; i < 1; i++) { mr = alloc_cached_mr(dev, order); if (mr) break; err = add_keys(dev, order2idx(dev, order), 1); - if (err) { - mlx5_ib_warn(dev, "add_keys failed\n"); + if (err && err != -EAGAIN) { + mlx5_ib_warn(dev, "add_keys failed, err %d\n", err); break; } } @@ -697,38 +758,58 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, if (!mr) return ERR_PTR(-EAGAIN); - mlx5_ib_populate_pas(dev, umem, page_shift, mr_align(mr->pas, 0x40), 1); + mr->pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL); + if (!mr->pas) { + err = -ENOMEM; + goto free_mr; + } + + mlx5_ib_populate_pas(dev, umem, page_shift, + mr_align(mr->pas, MLX5_UMR_ALIGN), 1); + + mr->dma = dma_map_single(ddev, mr_align(mr->pas, MLX5_UMR_ALIGN), size, + DMA_TO_DEVICE); + if (dma_mapping_error(ddev, mr->dma)) { + err = -ENOMEM; + goto free_pas; + } memset(&wr, 0, sizeof(wr)); - wr.wr_id = (u64)(unsigned long)mr; + wr.wr_id = (u64)(unsigned long)&umr_context; prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags); - /* We serialize polls so one process does not kidnap another's - * completion. This is not a problem since wr is completed in - * around 1 usec - */ + mlx5_ib_init_umr_context(&umr_context); down(&umrc->sem); - init_completion(&mr->done); err = ib_post_send(umrc->qp, &wr, &bad); if (err) { mlx5_ib_warn(dev, "post send failed, err %d\n", err); - up(&umrc->sem); - goto error; + goto unmap_dma; + } else { + wait_for_completion(&umr_context.done); + if (umr_context.status != IB_WC_SUCCESS) { + mlx5_ib_warn(dev, "reg umr failed\n"); + err = -EFAULT; + } } - wait_for_completion(&mr->done); + + mr->mmr.iova = virt_addr; + mr->mmr.size = len; + mr->mmr.pd = to_mpd(pd)->pdn; + +unmap_dma: up(&umrc->sem); + dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE); - if (mr->status != IB_WC_SUCCESS) { - mlx5_ib_warn(dev, "reg umr failed\n"); - err = -EFAULT; - goto error; +free_pas: + kfree(mr->pas); + +free_mr: + if (err) { + free_cached_mr(dev, mr); + return ERR_PTR(err); } return mr; - -error: - free_cached_mr(dev, mr); - return ERR_PTR(err); } static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, @@ -763,8 +844,10 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift)); in->seg.log2_page_size = page_shift; in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); - in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift)); - err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen); + in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, + 1 << page_shift)); + err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen, NULL, + NULL, NULL); if (err) { mlx5_ib_warn(dev, "create mkey failed\n"); goto err_2; @@ -855,24 +938,26 @@ error: static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { struct umr_common *umrc = &dev->umrc; + struct mlx5_ib_umr_context umr_context; struct ib_send_wr wr, *bad; int err; memset(&wr, 0, sizeof(wr)); - wr.wr_id = (u64)(unsigned long)mr; + wr.wr_id = (u64)(unsigned long)&umr_context; prep_umr_unreg_wqe(dev, &wr, mr->mmr.key); + mlx5_ib_init_umr_context(&umr_context); down(&umrc->sem); - init_completion(&mr->done); err = ib_post_send(umrc->qp, &wr, &bad); if (err) { up(&umrc->sem); mlx5_ib_dbg(dev, "err %d\n", err); goto error; + } else { + wait_for_completion(&umr_context.done); + up(&umrc->sem); } - wait_for_completion(&mr->done); - up(&umrc->sem); - if (mr->status != IB_WC_SUCCESS) { + if (umr_context.status != IB_WC_SUCCESS) { mlx5_ib_warn(dev, "unreg umr failed\n"); err = -EFAULT; goto error; @@ -921,6 +1006,122 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr) return 0; } +struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd, + struct ib_mr_init_attr *mr_init_attr) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_create_mkey_mbox_in *in; + struct mlx5_ib_mr *mr; + int access_mode, err; + int ndescs = roundup(mr_init_attr->max_reg_descriptors, 4); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_free; + } + + in->seg.status = 1 << 6; /* free */ + in->seg.xlt_oct_size = cpu_to_be32(ndescs); + in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); + access_mode = MLX5_ACCESS_MODE_MTT; + + if (mr_init_attr->flags & IB_MR_SIGNATURE_EN) { + u32 psv_index[2]; + + in->seg.flags_pd = cpu_to_be32(be32_to_cpu(in->seg.flags_pd) | + MLX5_MKEY_BSF_EN); + in->seg.bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE); + mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL); + if (!mr->sig) { + err = -ENOMEM; + goto err_free_in; + } + + /* create mem & wire PSVs */ + err = mlx5_core_create_psv(&dev->mdev, to_mpd(pd)->pdn, + 2, psv_index); + if (err) + goto err_free_sig; + + access_mode = MLX5_ACCESS_MODE_KLM; + mr->sig->psv_memory.psv_idx = psv_index[0]; + mr->sig->psv_wire.psv_idx = psv_index[1]; + + mr->sig->sig_status_checked = true; + mr->sig->sig_err_exists = false; + /* Next UMR, Arm SIGERR */ + ++mr->sig->sigerr_count; + } + + in->seg.flags = MLX5_PERM_UMR_EN | access_mode; + err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in), + NULL, NULL, NULL); + if (err) + goto err_destroy_psv; + + mr->ibmr.lkey = mr->mmr.key; + mr->ibmr.rkey = mr->mmr.key; + mr->umem = NULL; + kfree(in); + + return &mr->ibmr; + +err_destroy_psv: + if (mr->sig) { + if (mlx5_core_destroy_psv(&dev->mdev, + mr->sig->psv_memory.psv_idx)) + mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", + mr->sig->psv_memory.psv_idx); + if (mlx5_core_destroy_psv(&dev->mdev, + mr->sig->psv_wire.psv_idx)) + mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", + mr->sig->psv_wire.psv_idx); + } +err_free_sig: + kfree(mr->sig); +err_free_in: + kfree(in); +err_free: + kfree(mr); + return ERR_PTR(err); +} + +int mlx5_ib_destroy_mr(struct ib_mr *ibmr) +{ + struct mlx5_ib_dev *dev = to_mdev(ibmr->device); + struct mlx5_ib_mr *mr = to_mmr(ibmr); + int err; + + if (mr->sig) { + if (mlx5_core_destroy_psv(&dev->mdev, + mr->sig->psv_memory.psv_idx)) + mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", + mr->sig->psv_memory.psv_idx); + if (mlx5_core_destroy_psv(&dev->mdev, + mr->sig->psv_wire.psv_idx)) + mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", + mr->sig->psv_wire.psv_idx); + kfree(mr->sig); + } + + err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); + if (err) { + mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", + mr->mmr.key, err); + return err; + } + + kfree(mr); + + return err; +} + struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len) { @@ -948,7 +1149,8 @@ struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd, * TBD not needed - issue 197292 */ in->seg.log2_page_size = PAGE_SHIFT; - err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in)); + err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in), NULL, + NULL, NULL); kfree(in); if (err) goto err_free; @@ -1005,3 +1207,44 @@ void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) kfree(mfrpl->ibfrpl.page_list); kfree(mfrpl); } + +int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, + struct ib_mr_status *mr_status) +{ + struct mlx5_ib_mr *mmr = to_mmr(ibmr); + int ret = 0; + + if (check_mask & ~IB_MR_CHECK_SIG_STATUS) { + pr_err("Invalid status check mask\n"); + ret = -EINVAL; + goto done; + } + + mr_status->fail_status = 0; + if (check_mask & IB_MR_CHECK_SIG_STATUS) { + if (!mmr->sig) { + ret = -EINVAL; + pr_err("signature status check requested on a non-signature enabled MR\n"); + goto done; + } + + mmr->sig->sig_status_checked = true; + if (!mmr->sig->sig_err_exists) + goto done; + + if (ibmr->lkey == mmr->sig->err_item.key) + memcpy(&mr_status->sig_err, &mmr->sig->err_item, + sizeof(mr_status->sig_err)); + else { + mr_status->sig_err.err_type = IB_SIG_BAD_GUARD; + mr_status->sig_err.sig_err_offset = 0; + mr_status->sig_err.key = mmr->sig->err_item.key; + } + + mmr->sig->sig_err_exists = false; + mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS; + } + +done: + return ret; +} diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 045f8cdbd30..bbbcf389272 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -203,7 +203,7 @@ static int sq_overhead(enum ib_qp_type qp_type) switch (qp_type) { case IB_QPT_XRC_INI: - size = sizeof(struct mlx5_wqe_xrc_seg); + size += sizeof(struct mlx5_wqe_xrc_seg); /* fall through */ case IB_QPT_RC: size += sizeof(struct mlx5_wqe_ctrl_seg) + @@ -211,20 +211,25 @@ static int sq_overhead(enum ib_qp_type qp_type) sizeof(struct mlx5_wqe_raddr_seg); break; + case IB_QPT_XRC_TGT: + return 0; + case IB_QPT_UC: - size = sizeof(struct mlx5_wqe_ctrl_seg) + - sizeof(struct mlx5_wqe_raddr_seg); + size += sizeof(struct mlx5_wqe_ctrl_seg) + + sizeof(struct mlx5_wqe_raddr_seg) + + sizeof(struct mlx5_wqe_umr_ctrl_seg) + + sizeof(struct mlx5_mkey_seg); break; case IB_QPT_UD: case IB_QPT_SMI: case IB_QPT_GSI: - size = sizeof(struct mlx5_wqe_ctrl_seg) + + size += sizeof(struct mlx5_wqe_ctrl_seg) + sizeof(struct mlx5_wqe_datagram_seg); break; case MLX5_IB_QPT_REG_UMR: - size = sizeof(struct mlx5_wqe_ctrl_seg) + + size += sizeof(struct mlx5_wqe_ctrl_seg) + sizeof(struct mlx5_wqe_umr_ctrl_seg) + sizeof(struct mlx5_mkey_seg); break; @@ -251,8 +256,11 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr) } size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg); - - return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB); + if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN && + ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB) < MLX5_SIG_WQE_SIZE) + return MLX5_SIG_WQE_SIZE; + else + return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB); } static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, @@ -270,7 +278,8 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, return wqe_size; if (wqe_size > dev->mdev.caps.max_sq_desc_sz) { - mlx5_ib_dbg(dev, "\n"); + mlx5_ib_dbg(dev, "wqe_size(%d) > max_sq_desc_sz(%d)\n", + wqe_size, dev->mdev.caps.max_sq_desc_sz); return -EINVAL; } @@ -278,11 +287,20 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, sizeof(struct mlx5_wqe_inline_seg); attr->cap.max_inline_data = qp->max_inline_data; + if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN) + qp->signature_en = true; + wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size); qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB; + if (qp->sq.wqe_cnt > dev->mdev.caps.max_wqes) { + mlx5_ib_dbg(dev, "wqe count(%d) exceeds limits(%d)\n", + qp->sq.wqe_cnt, dev->mdev.caps.max_wqes); + return -ENOMEM; + } qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB); qp->sq.max_gs = attr->cap.max_send_sge; - qp->sq.max_post = 1 << ilog2(wq_size / wqe_size); + qp->sq.max_post = wq_size / wqe_size; + attr->cap.max_send_wr = qp->sq.max_post; return wq_size; } @@ -330,14 +348,57 @@ static int qp_has_rq(struct ib_qp_init_attr *attr) return 1; } +static int first_med_uuar(void) +{ + return 1; +} + +static int next_uuar(int n) +{ + n++; + + while (((n % 4) & 2)) + n++; + + return n; +} + +static int num_med_uuar(struct mlx5_uuar_info *uuari) +{ + int n; + + n = uuari->num_uars * MLX5_NON_FP_BF_REGS_PER_PAGE - + uuari->num_low_latency_uuars - 1; + + return n >= 0 ? n : 0; +} + +static int max_uuari(struct mlx5_uuar_info *uuari) +{ + return uuari->num_uars * 4; +} + +static int first_hi_uuar(struct mlx5_uuar_info *uuari) +{ + int med; + int i; + int t; + + med = num_med_uuar(uuari); + for (t = 0, i = first_med_uuar();; i = next_uuar(i)) { + t++; + if (t == med) + return next_uuar(i); + } + + return 0; +} + static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari) { - int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE; - int start_uuar; int i; - start_uuar = nuuars - uuari->num_low_latency_uuars; - for (i = start_uuar; i < nuuars; i++) { + for (i = first_hi_uuar(uuari); i < max_uuari(uuari); i = next_uuar(i)) { if (!test_bit(i, uuari->bitmap)) { set_bit(i, uuari->bitmap); uuari->count[i]++; @@ -350,19 +411,10 @@ static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari) static int alloc_med_class_uuar(struct mlx5_uuar_info *uuari) { - int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE; - int minidx = 1; - int uuarn; - int end; + int minidx = first_med_uuar(); int i; - end = nuuars - uuari->num_low_latency_uuars; - - for (i = 1; i < end; i++) { - uuarn = i & 3; - if (uuarn == 2 || uuarn == 3) - continue; - + for (i = first_med_uuar(); i < first_hi_uuar(uuari); i = next_uuar(i)) { if (uuari->count[i] < uuari->count[minidx]) minidx = i; } @@ -384,11 +436,17 @@ static int alloc_uuar(struct mlx5_uuar_info *uuari, break; case MLX5_IB_LATENCY_CLASS_MEDIUM: - uuarn = alloc_med_class_uuar(uuari); + if (uuari->ver < 2) + uuarn = -ENOMEM; + else + uuarn = alloc_med_class_uuar(uuari); break; case MLX5_IB_LATENCY_CLASS_HIGH: - uuarn = alloc_high_class_uuar(uuari); + if (uuari->ver < 2) + uuarn = -ENOMEM; + else + uuarn = alloc_high_class_uuar(uuari); break; case MLX5_IB_LATENCY_CLASS_FAST_PATH: @@ -479,12 +537,12 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, { struct mlx5_ib_ucontext *context; struct mlx5_ib_create_qp ucmd; - int page_shift; + int page_shift = 0; int uar_index; int npages; - u32 offset; + u32 offset = 0; int uuarn; - int ncont; + int ncont = 0; int err; err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); @@ -500,38 +558,53 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH); if (uuarn < 0) { mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n"); - mlx5_ib_dbg(dev, "reverting to high latency\n"); - uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW); + mlx5_ib_dbg(dev, "reverting to medium latency\n"); + uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM); if (uuarn < 0) { - mlx5_ib_dbg(dev, "uuar allocation failed\n"); - return uuarn; + mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n"); + mlx5_ib_dbg(dev, "reverting to high latency\n"); + uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW); + if (uuarn < 0) { + mlx5_ib_warn(dev, "uuar allocation failed\n"); + return uuarn; + } } } uar_index = uuarn_to_uar_index(&context->uuari, uuarn); mlx5_ib_dbg(dev, "uuarn 0x%x, uar_index 0x%x\n", uuarn, uar_index); + qp->rq.offset = 0; + qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB); + qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; + err = set_user_buf_size(dev, qp, &ucmd); if (err) goto err_uuar; - qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, - qp->buf_size, 0, 0); - if (IS_ERR(qp->umem)) { - mlx5_ib_dbg(dev, "umem_get failed\n"); - err = PTR_ERR(qp->umem); - goto err_uuar; + if (ucmd.buf_addr && qp->buf_size) { + qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, + qp->buf_size, 0, 0); + if (IS_ERR(qp->umem)) { + mlx5_ib_dbg(dev, "umem_get failed\n"); + err = PTR_ERR(qp->umem); + goto err_uuar; + } + } else { + qp->umem = NULL; } - mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift, - &ncont, NULL); - err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset); - if (err) { - mlx5_ib_warn(dev, "bad offset\n"); - goto err_umem; + if (qp->umem) { + mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift, + &ncont, NULL); + err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset); + if (err) { + mlx5_ib_warn(dev, "bad offset\n"); + goto err_umem; + } + mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n", + ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset); } - mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n", - ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset); *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont; *in = mlx5_vzalloc(*inlen); @@ -539,9 +612,10 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, err = -ENOMEM; goto err_umem; } - mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0); + if (qp->umem) + mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0); (*in)->ctx.log_pg_sz_remote_qpn = - cpu_to_be32((page_shift - PAGE_SHIFT) << 24); + cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24); (*in)->ctx.params2 = cpu_to_be32(offset << 6); (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index); @@ -570,7 +644,8 @@ err_free: mlx5_vfree(*in); err_umem: - ib_umem_release(qp->umem); + if (qp->umem) + ib_umem_release(qp->umem); err_uuar: free_uuar(&context->uuari, uuarn); @@ -583,7 +658,8 @@ static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp) context = to_mucontext(pd->uobject->context); mlx5_ib_db_unmap_user(context, &qp->db); - ib_umem_release(qp->umem); + if (qp->umem) + ib_umem_release(qp->umem); free_uuar(&context->uuari, qp->uuarn); } @@ -599,8 +675,8 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, int err; uuari = &dev->mdev.priv.uuari; - if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) - qp->flags |= MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK; + if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN | IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)) + return -EINVAL; if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR) lc = MLX5_IB_LATENCY_CLASS_FAST_PATH; @@ -638,7 +714,8 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, goto err_buf; } (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index); - (*in)->ctx.log_pg_sz_remote_qpn = cpu_to_be32((qp->buf.page_shift - PAGE_SHIFT) << 24); + (*in)->ctx.log_pg_sz_remote_qpn = + cpu_to_be32((qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24); /* Set "fast registration enabled" for all kernel QPs */ (*in)->ctx.params1 |= cpu_to_be32(1 << 11); (*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4); @@ -734,6 +811,15 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); + if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { + if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_BLOCK_MCAST)) { + mlx5_ib_dbg(dev, "block multicast loopback isn't supported\n"); + return -EINVAL; + } else { + qp->flags |= MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK; + } + } + if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE; @@ -805,6 +891,9 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (qp->wq_sig) in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_ENABLE_SIG); + if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK) + in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_BLOCK_MCAST); + if (qp->scat_cqe && is_connected(init_attr->qp_type)) { int rcqe_sz; int scqe_sz; @@ -1280,6 +1369,11 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q MLX5_QP_OPTPAR_Q_KEY, [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_PKEY_INDEX | MLX5_QP_OPTPAR_Q_KEY, + [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | + MLX5_QP_OPTPAR_RRE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_PKEY_INDEX, }, }, [MLX5_QP_STATE_RTR] = { @@ -1302,9 +1396,11 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q MLX5_QP_OPTPAR_RAE | MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_RNR_TIMEOUT | - MLX5_QP_OPTPAR_PM_STATE, + MLX5_QP_OPTPAR_PM_STATE | + MLX5_QP_OPTPAR_ALT_ADDR_PATH, [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE | - MLX5_QP_OPTPAR_PM_STATE, + MLX5_QP_OPTPAR_PM_STATE | + MLX5_QP_OPTPAR_ALT_ADDR_PATH, [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY | MLX5_QP_OPTPAR_SRQN | MLX5_QP_OPTPAR_CQN_RCV, @@ -1314,6 +1410,11 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q [MLX5_QP_STATE_RTS] = { [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY, [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_Q_KEY, + [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE, + [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RNR_TIMEOUT | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RRE, }, }, }; @@ -1530,7 +1631,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, mlx5_cur = to_mlx5_state(cur_state); mlx5_new = to_mlx5_state(new_state); mlx5_st = to_mlx5_st(ibqp->qp_type); - if (mlx5_cur < 0 || mlx5_new < 0 || mlx5_st < 0) + if (mlx5_st < 0) goto out; optpar = ib_mask_to_mlx5_opt(attr_mask); @@ -1593,7 +1694,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR && - !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) + !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask, + IB_LINK_LAYER_UNSPECIFIED)) goto out; if ((attr_mask & IB_QP_PORT) && @@ -1651,29 +1753,6 @@ static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg, rseg->reserved = 0; } -static void set_atomic_seg(struct mlx5_wqe_atomic_seg *aseg, struct ib_send_wr *wr) -{ - if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { - aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); - aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); - } else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) { - aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); - aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add_mask); - } else { - aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); - aseg->compare = 0; - } -} - -static void set_masked_atomic_seg(struct mlx5_wqe_masked_atomic_seg *aseg, - struct ib_send_wr *wr) -{ - aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); - aseg->swap_add_mask = cpu_to_be64(wr->wr.atomic.swap_mask); - aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); - aseg->compare_mask = cpu_to_be64(wr->wr.atomic.compare_add_mask); -} - static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg, struct ib_send_wr *wr) { @@ -1714,6 +1793,27 @@ static __be64 frwr_mkey_mask(void) return cpu_to_be64(result); } +static __be64 sig_mkey_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_LEN | + MLX5_MKEY_MASK_PAGE_SIZE | + MLX5_MKEY_MASK_START_ADDR | + MLX5_MKEY_MASK_EN_SIGERR | + MLX5_MKEY_MASK_EN_RINVAL | + MLX5_MKEY_MASK_KEY | + MLX5_MKEY_MASK_LR | + MLX5_MKEY_MASK_LW | + MLX5_MKEY_MASK_RR | + MLX5_MKEY_MASK_RW | + MLX5_MKEY_MASK_SMALL_FENCE | + MLX5_MKEY_MASK_FREE | + MLX5_MKEY_MASK_BSF_EN; + + return cpu_to_be64(result); +} + static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, struct ib_send_wr *wr, int li) { @@ -1747,6 +1847,7 @@ static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, MLX5_MKEY_MASK_PD | MLX5_MKEY_MASK_LR | MLX5_MKEY_MASK_LW | + MLX5_MKEY_MASK_KEY | MLX5_MKEY_MASK_RR | MLX5_MKEY_MASK_RW | MLX5_MKEY_MASK_A | @@ -1768,7 +1869,7 @@ static u8 get_umr_flags(int acc) (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) | (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) | (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) | - MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT; + MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN; } static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr, @@ -1780,7 +1881,8 @@ static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr, return; } - seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags); + seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags) | + MLX5_ACCESS_MODE_MTT; *writ = seg->flags & (MLX5_PERM_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE); seg->qpn_mkey7_0 = cpu_to_be32((wr->wr.fast_reg.rkey & 0xff) | 0xffffff00); seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL); @@ -1803,7 +1905,8 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *w seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start); seg->len = cpu_to_be64(wr->wr.fast_reg.length); seg->log2_page_size = wr->wr.fast_reg.page_shift; - seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 | + mlx5_mkey_variant(wr->wr.fast_reg.rkey)); } static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg, @@ -1895,6 +1998,342 @@ static int set_data_inl_seg(struct mlx5_ib_qp *qp, struct ib_send_wr *wr, return 0; } +static u16 prot_field_size(enum ib_signature_type type) +{ + switch (type) { + case IB_SIG_TYPE_T10_DIF: + return MLX5_DIF_SIZE; + default: + return 0; + } +} + +static u8 bs_selector(int block_size) +{ + switch (block_size) { + case 512: return 0x1; + case 520: return 0x2; + case 4096: return 0x3; + case 4160: return 0x4; + case 1073741824: return 0x5; + default: return 0; + } +} + +static int format_selector(struct ib_sig_attrs *attr, + struct ib_sig_domain *domain, + int *selector) +{ + +#define FORMAT_DIF_NONE 0 +#define FORMAT_DIF_CRC_INC 8 +#define FORMAT_DIF_CRC_NO_INC 12 +#define FORMAT_DIF_CSUM_INC 13 +#define FORMAT_DIF_CSUM_NO_INC 14 + + switch (domain->sig.dif.type) { + case IB_T10DIF_NONE: + /* No DIF */ + *selector = FORMAT_DIF_NONE; + break; + case IB_T10DIF_TYPE1: /* Fall through */ + case IB_T10DIF_TYPE2: + switch (domain->sig.dif.bg_type) { + case IB_T10DIF_CRC: + *selector = FORMAT_DIF_CRC_INC; + break; + case IB_T10DIF_CSUM: + *selector = FORMAT_DIF_CSUM_INC; + break; + default: + return 1; + } + break; + case IB_T10DIF_TYPE3: + switch (domain->sig.dif.bg_type) { + case IB_T10DIF_CRC: + *selector = domain->sig.dif.type3_inc_reftag ? + FORMAT_DIF_CRC_INC : + FORMAT_DIF_CRC_NO_INC; + break; + case IB_T10DIF_CSUM: + *selector = domain->sig.dif.type3_inc_reftag ? + FORMAT_DIF_CSUM_INC : + FORMAT_DIF_CSUM_NO_INC; + break; + default: + return 1; + } + break; + default: + return 1; + } + + return 0; +} + +static int mlx5_set_bsf(struct ib_mr *sig_mr, + struct ib_sig_attrs *sig_attrs, + struct mlx5_bsf *bsf, u32 data_size) +{ + struct mlx5_core_sig_ctx *msig = to_mmr(sig_mr)->sig; + struct mlx5_bsf_basic *basic = &bsf->basic; + struct ib_sig_domain *mem = &sig_attrs->mem; + struct ib_sig_domain *wire = &sig_attrs->wire; + int ret, selector; + + memset(bsf, 0, sizeof(*bsf)); + switch (sig_attrs->mem.sig_type) { + case IB_SIG_TYPE_T10_DIF: + if (sig_attrs->wire.sig_type != IB_SIG_TYPE_T10_DIF) + return -EINVAL; + + /* Input domain check byte mask */ + basic->check_byte_mask = sig_attrs->check_mask; + if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval && + mem->sig.dif.type == wire->sig.dif.type) { + /* Same block structure */ + basic->bsf_size_sbs = 1 << 4; + if (mem->sig.dif.bg_type == wire->sig.dif.bg_type) + basic->wire.copy_byte_mask |= 0xc0; + if (mem->sig.dif.app_tag == wire->sig.dif.app_tag) + basic->wire.copy_byte_mask |= 0x30; + if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag) + basic->wire.copy_byte_mask |= 0x0f; + } else + basic->wire.bs_selector = bs_selector(wire->sig.dif.pi_interval); + + basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval); + basic->raw_data_size = cpu_to_be32(data_size); + + ret = format_selector(sig_attrs, mem, &selector); + if (ret) + return -EINVAL; + basic->m_bfs_psv = cpu_to_be32(selector << 24 | + msig->psv_memory.psv_idx); + + ret = format_selector(sig_attrs, wire, &selector); + if (ret) + return -EINVAL; + basic->w_bfs_psv = cpu_to_be32(selector << 24 | + msig->psv_wire.psv_idx); + break; + + default: + return -EINVAL; + } + + return 0; +} + +static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, + void **seg, int *size) +{ + struct ib_sig_attrs *sig_attrs = wr->wr.sig_handover.sig_attrs; + struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr; + struct mlx5_bsf *bsf; + u32 data_len = wr->sg_list->length; + u32 data_key = wr->sg_list->lkey; + u64 data_va = wr->sg_list->addr; + int ret; + int wqe_size; + + if (!wr->wr.sig_handover.prot || + (data_key == wr->wr.sig_handover.prot->lkey && + data_va == wr->wr.sig_handover.prot->addr && + data_len == wr->wr.sig_handover.prot->length)) { + /** + * Source domain doesn't contain signature information + * or data and protection are interleaved in memory. + * So need construct: + * ------------------ + * | data_klm | + * ------------------ + * | BSF | + * ------------------ + **/ + struct mlx5_klm *data_klm = *seg; + + data_klm->bcount = cpu_to_be32(data_len); + data_klm->key = cpu_to_be32(data_key); + data_klm->va = cpu_to_be64(data_va); + wqe_size = ALIGN(sizeof(*data_klm), 64); + } else { + /** + * Source domain contains signature information + * So need construct a strided block format: + * --------------------------- + * | stride_block_ctrl | + * --------------------------- + * | data_klm | + * --------------------------- + * | prot_klm | + * --------------------------- + * | BSF | + * --------------------------- + **/ + struct mlx5_stride_block_ctrl_seg *sblock_ctrl; + struct mlx5_stride_block_entry *data_sentry; + struct mlx5_stride_block_entry *prot_sentry; + u32 prot_key = wr->wr.sig_handover.prot->lkey; + u64 prot_va = wr->wr.sig_handover.prot->addr; + u16 block_size = sig_attrs->mem.sig.dif.pi_interval; + int prot_size; + + sblock_ctrl = *seg; + data_sentry = (void *)sblock_ctrl + sizeof(*sblock_ctrl); + prot_sentry = (void *)data_sentry + sizeof(*data_sentry); + + prot_size = prot_field_size(sig_attrs->mem.sig_type); + if (!prot_size) { + pr_err("Bad block size given: %u\n", block_size); + return -EINVAL; + } + sblock_ctrl->bcount_per_cycle = cpu_to_be32(block_size + + prot_size); + sblock_ctrl->op = cpu_to_be32(MLX5_STRIDE_BLOCK_OP); + sblock_ctrl->repeat_count = cpu_to_be32(data_len / block_size); + sblock_ctrl->num_entries = cpu_to_be16(2); + + data_sentry->bcount = cpu_to_be16(block_size); + data_sentry->key = cpu_to_be32(data_key); + data_sentry->va = cpu_to_be64(data_va); + data_sentry->stride = cpu_to_be16(block_size); + + prot_sentry->bcount = cpu_to_be16(prot_size); + prot_sentry->key = cpu_to_be32(prot_key); + prot_sentry->va = cpu_to_be64(prot_va); + prot_sentry->stride = cpu_to_be16(prot_size); + + wqe_size = ALIGN(sizeof(*sblock_ctrl) + sizeof(*data_sentry) + + sizeof(*prot_sentry), 64); + } + + *seg += wqe_size; + *size += wqe_size / 16; + if (unlikely((*seg == qp->sq.qend))) + *seg = mlx5_get_send_wqe(qp, 0); + + bsf = *seg; + ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len); + if (ret) + return -EINVAL; + + *seg += sizeof(*bsf); + *size += sizeof(*bsf) / 16; + if (unlikely((*seg == qp->sq.qend))) + *seg = mlx5_get_send_wqe(qp, 0); + + return 0; +} + +static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, + struct ib_send_wr *wr, u32 nelements, + u32 length, u32 pdn) +{ + struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr; + u32 sig_key = sig_mr->rkey; + u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1; + + memset(seg, 0, sizeof(*seg)); + + seg->flags = get_umr_flags(wr->wr.sig_handover.access_flags) | + MLX5_ACCESS_MODE_KLM; + seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00); + seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 | + MLX5_MKEY_BSF_EN | pdn); + seg->len = cpu_to_be64(length); + seg->xlt_oct_size = cpu_to_be32(be16_to_cpu(get_klm_octo(nelements))); + seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE); +} + +static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, + struct ib_send_wr *wr, u32 nelements) +{ + memset(umr, 0, sizeof(*umr)); + + umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE; + umr->klm_octowords = get_klm_octo(nelements); + umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE); + umr->mkey_mask = sig_mkey_mask(); +} + + +static int set_sig_umr_wr(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, + void **seg, int *size) +{ + struct mlx5_ib_mr *sig_mr = to_mmr(wr->wr.sig_handover.sig_mr); + u32 pdn = get_pd(qp)->pdn; + u32 klm_oct_size; + int region_len, ret; + + if (unlikely(wr->num_sge != 1) || + unlikely(wr->wr.sig_handover.access_flags & + IB_ACCESS_REMOTE_ATOMIC) || + unlikely(!sig_mr->sig) || unlikely(!qp->signature_en) || + unlikely(!sig_mr->sig->sig_status_checked)) + return -EINVAL; + + /* length of the protected region, data + protection */ + region_len = wr->sg_list->length; + if (wr->wr.sig_handover.prot && + (wr->wr.sig_handover.prot->lkey != wr->sg_list->lkey || + wr->wr.sig_handover.prot->addr != wr->sg_list->addr || + wr->wr.sig_handover.prot->length != wr->sg_list->length)) + region_len += wr->wr.sig_handover.prot->length; + + /** + * KLM octoword size - if protection was provided + * then we use strided block format (3 octowords), + * else we use single KLM (1 octoword) + **/ + klm_oct_size = wr->wr.sig_handover.prot ? 3 : 1; + + set_sig_umr_segment(*seg, wr, klm_oct_size); + *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); + *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; + if (unlikely((*seg == qp->sq.qend))) + *seg = mlx5_get_send_wqe(qp, 0); + + set_sig_mkey_segment(*seg, wr, klm_oct_size, region_len, pdn); + *seg += sizeof(struct mlx5_mkey_seg); + *size += sizeof(struct mlx5_mkey_seg) / 16; + if (unlikely((*seg == qp->sq.qend))) + *seg = mlx5_get_send_wqe(qp, 0); + + ret = set_sig_data_segment(wr, qp, seg, size); + if (ret) + return ret; + + sig_mr->sig->sig_status_checked = false; + return 0; +} + +static int set_psv_wr(struct ib_sig_domain *domain, + u32 psv_idx, void **seg, int *size) +{ + struct mlx5_seg_set_psv *psv_seg = *seg; + + memset(psv_seg, 0, sizeof(*psv_seg)); + psv_seg->psv_num = cpu_to_be32(psv_idx); + switch (domain->sig_type) { + case IB_SIG_TYPE_T10_DIF: + psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 | + domain->sig.dif.app_tag); + psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag); + + *seg += sizeof(*psv_seg); + *size += sizeof(*psv_seg) / 16; + break; + + default: + pr_err("Bad signature type given.\n"); + return 1; + } + + return 0; +} + static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size, struct mlx5_core_dev *mdev, struct mlx5_ib_pd *pd, struct mlx5_ib_qp *qp) { @@ -1916,6 +2355,10 @@ static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size, if (unlikely((*seg == qp->sq.qend))) *seg = mlx5_get_send_wqe(qp, 0); if (!li) { + if (unlikely(wr->wr.fast_reg.page_list_len > + wr->wr.fast_reg.page_list->max_page_list_len)) + return -ENOMEM; + set_frwr_pages(*seg, wr, mdev, pd, writ); *seg += sizeof(struct mlx5_wqe_data_seg); *size += (sizeof(struct mlx5_wqe_data_seg) / 16); @@ -1978,6 +2421,59 @@ static u8 get_fence(u8 fence, struct ib_send_wr *wr) } } +static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, + struct mlx5_wqe_ctrl_seg **ctrl, + struct ib_send_wr *wr, int *idx, + int *size, int nreq) +{ + int err = 0; + + if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) { + err = -ENOMEM; + return err; + } + + *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); + *seg = mlx5_get_send_wqe(qp, *idx); + *ctrl = *seg; + *(uint32_t *)(*seg + 8) = 0; + (*ctrl)->imm = send_ieth(wr); + (*ctrl)->fm_ce_se = qp->sq_signal_bits | + (wr->send_flags & IB_SEND_SIGNALED ? + MLX5_WQE_CTRL_CQ_UPDATE : 0) | + (wr->send_flags & IB_SEND_SOLICITED ? + MLX5_WQE_CTRL_SOLICITED : 0); + + *seg += sizeof(**ctrl); + *size = sizeof(**ctrl) / 16; + + return err; +} + +static void finish_wqe(struct mlx5_ib_qp *qp, + struct mlx5_wqe_ctrl_seg *ctrl, + u8 size, unsigned idx, u64 wr_id, + int nreq, u8 fence, u8 next_fence, + u32 mlx5_opcode) +{ + u8 opmod = 0; + + ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | + mlx5_opcode | ((u32)opmod << 24)); + ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8)); + ctrl->fm_ce_se |= fence; + qp->fm_cache = next_fence; + if (unlikely(qp->wq_sig)) + ctrl->signature = wq_sig(ctrl); + + qp->sq.wrid[idx] = wr_id; + qp->sq.w_list[idx].opcode = mlx5_opcode; + qp->sq.wqe_head[idx] = qp->sq.head + nreq; + qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB); + qp->sq.w_list[idx].next = qp->sq.cur_post; +} + + int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { @@ -1985,13 +2481,13 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_core_dev *mdev = &dev->mdev; struct mlx5_ib_qp *qp = to_mqp(ibqp); + struct mlx5_ib_mr *mr; struct mlx5_wqe_data_seg *dpseg; struct mlx5_wqe_xrc_seg *xrc; struct mlx5_bf *bf = qp->bf; int uninitialized_var(size); void *qend = qp->sq.qend; unsigned long flags; - u32 mlx5_opcode; unsigned idx; int err = 0; int inl = 0; @@ -2000,7 +2496,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, int nreq; int i; u8 next_fence = 0; - u8 opmod = 0; u8 fence; spin_lock_irqsave(&qp->sq.lock, flags); @@ -2013,36 +2508,23 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) { + fence = qp->fm_cache; + num_sge = wr->num_sge; + if (unlikely(num_sge > qp->sq.max_gs)) { mlx5_ib_warn(dev, "\n"); err = -ENOMEM; *bad_wr = wr; goto out; } - fence = qp->fm_cache; - num_sge = wr->num_sge; - if (unlikely(num_sge > qp->sq.max_gs)) { + err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq); + if (err) { mlx5_ib_warn(dev, "\n"); err = -ENOMEM; *bad_wr = wr; goto out; } - idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); - seg = mlx5_get_send_wqe(qp, idx); - ctrl = seg; - *(uint32_t *)(seg + 8) = 0; - ctrl->imm = send_ieth(wr); - ctrl->fm_ce_se = qp->sq_signal_bits | - (wr->send_flags & IB_SEND_SIGNALED ? - MLX5_WQE_CTRL_CQ_UPDATE : 0) | - (wr->send_flags & IB_SEND_SOLICITED ? - MLX5_WQE_CTRL_SOLICITED : 0); - - seg += sizeof(*ctrl); - size = sizeof(*ctrl) / 16; - switch (ibqp->qp_type) { case IB_QPT_XRC_INI: xrc = seg; @@ -2063,28 +2545,11 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: - set_raddr_seg(seg, wr->wr.atomic.remote_addr, - wr->wr.atomic.rkey); - seg += sizeof(struct mlx5_wqe_raddr_seg); - - set_atomic_seg(seg, wr); - seg += sizeof(struct mlx5_wqe_atomic_seg); - - size += (sizeof(struct mlx5_wqe_raddr_seg) + - sizeof(struct mlx5_wqe_atomic_seg)) / 16; - break; - case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: - set_raddr_seg(seg, wr->wr.atomic.remote_addr, - wr->wr.atomic.rkey); - seg += sizeof(struct mlx5_wqe_raddr_seg); - - set_masked_atomic_seg(seg, wr); - seg += sizeof(struct mlx5_wqe_masked_atomic_seg); - - size += (sizeof(struct mlx5_wqe_raddr_seg) + - sizeof(struct mlx5_wqe_masked_atomic_seg)) / 16; - break; + mlx5_ib_warn(dev, "Atomic operations are not supported yet\n"); + err = -ENOSYS; + *bad_wr = wr; + goto out; case IB_WR_LOCAL_INV: next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; @@ -2112,6 +2577,73 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, num_sge = 0; break; + case IB_WR_REG_SIG_MR: + qp->sq.wr_data[idx] = IB_WR_REG_SIG_MR; + mr = to_mmr(wr->wr.sig_handover.sig_mr); + + ctrl->imm = cpu_to_be32(mr->ibmr.rkey); + err = set_sig_umr_wr(wr, qp, &seg, &size); + if (err) { + mlx5_ib_warn(dev, "\n"); + *bad_wr = wr; + goto out; + } + + finish_wqe(qp, ctrl, size, idx, wr->wr_id, + nreq, get_fence(fence, wr), + next_fence, MLX5_OPCODE_UMR); + /* + * SET_PSV WQEs are not signaled and solicited + * on error + */ + wr->send_flags &= ~IB_SEND_SIGNALED; + wr->send_flags |= IB_SEND_SOLICITED; + err = begin_wqe(qp, &seg, &ctrl, wr, + &idx, &size, nreq); + if (err) { + mlx5_ib_warn(dev, "\n"); + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + + err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->mem, + mr->sig->psv_memory.psv_idx, &seg, + &size); + if (err) { + mlx5_ib_warn(dev, "\n"); + *bad_wr = wr; + goto out; + } + + finish_wqe(qp, ctrl, size, idx, wr->wr_id, + nreq, get_fence(fence, wr), + next_fence, MLX5_OPCODE_SET_PSV); + err = begin_wqe(qp, &seg, &ctrl, wr, + &idx, &size, nreq); + if (err) { + mlx5_ib_warn(dev, "\n"); + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + + next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; + err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->wire, + mr->sig->psv_wire.psv_idx, &seg, + &size); + if (err) { + mlx5_ib_warn(dev, "\n"); + *bad_wr = wr; + goto out; + } + + finish_wqe(qp, ctrl, size, idx, wr->wr_id, + nreq, get_fence(fence, wr), + next_fence, MLX5_OPCODE_SET_PSV); + num_sge = 0; + goto skip_psv; + default: break; } @@ -2192,22 +2724,10 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } } - mlx5_opcode = mlx5_ib_opcode[wr->opcode]; - ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | - mlx5_opcode | - ((u32)opmod << 24)); - ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8)); - ctrl->fm_ce_se |= get_fence(fence, wr); - qp->fm_cache = next_fence; - if (unlikely(qp->wq_sig)) - ctrl->signature = wq_sig(ctrl); - - qp->sq.wrid[idx] = wr->wr_id; - qp->sq.w_list[idx].opcode = mlx5_opcode; - qp->sq.wqe_head[idx] = qp->sq.head + nreq; - qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB); - qp->sq.w_list[idx].next = qp->sq.cur_post; - + finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, + get_fence(fence, wr), next_fence, + mlx5_ib_opcode[wr->opcode]); +skip_psv: if (0) dump_wqe(qp, idx, size); } @@ -2223,6 +2743,10 @@ out: qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post); + /* Make sure doorbell record is visible to the HCA before + * we hit doorbell */ + wmb(); + if (bf->need_lock) spin_lock(&bf->lock); diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 84d297afd6a..384af6dec5e 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -35,6 +35,7 @@ #include <linux/mlx5/srq.h> #include <linux/slab.h> #include <rdma/ib_umem.h> +#include <rdma/ib_user_verbs.h> #include "mlx5_ib.h" #include "user.h" @@ -78,16 +79,27 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_create_srq ucmd; + size_t ucmdlen; int err; int npages; int page_shift; int ncont; u32 offset; - if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { + ucmdlen = + (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) < + sizeof(ucmd)) ? (sizeof(ucmd) - + sizeof(ucmd.reserved)) : sizeof(ucmd); + + if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) { mlx5_ib_dbg(dev, "failed copy udata\n"); return -EFAULT; } + + if (ucmdlen == sizeof(ucmd) && + ucmd.reserved != 0) + return -EINVAL; + srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE); srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size, @@ -123,7 +135,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, goto err_in; } - (*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT; + (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT; (*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26); return 0; @@ -192,7 +204,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, } srq->wq_sig = !!srq_signature; - (*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT; + (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT; return 0; @@ -295,7 +307,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, mlx5_vfree(in); if (err) { mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err); - goto err_srq; + goto err_usr_kern_srq; } mlx5_ib_dbg(dev, "create SRQ with srqn 0x%x\n", srq->msrq.srqn); @@ -316,6 +328,8 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, err_core: mlx5_core_destroy_srq(&dev->mdev, &srq->msrq); + +err_usr_kern_srq: if (pd->uobject) destroy_srq_user(pd, srq); else @@ -388,9 +402,7 @@ int mlx5_ib_destroy_srq(struct ib_srq *srq) mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db); ib_umem_release(msrq->umem); } else { - kfree(msrq->wrid); - mlx5_buf_free(&dev->mdev, &msrq->buf); - mlx5_db_free(&dev->mdev, &msrq->db); + destroy_srq_kernel(dev, msrq); } kfree(srq); diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h index a886de3e593..d0ba264ac1e 100644 --- a/drivers/infiniband/hw/mlx5/user.h +++ b/drivers/infiniband/hw/mlx5/user.h @@ -62,6 +62,13 @@ struct mlx5_ib_alloc_ucontext_req { __u32 num_low_latency_uuars; }; +struct mlx5_ib_alloc_ucontext_req_v2 { + __u32 total_num_uuars; + __u32 num_low_latency_uuars; + __u32 flags; + __u32 reserved; +}; + struct mlx5_ib_alloc_ucontext_resp { __u32 qp_tab_size; __u32 bf_reg_size; @@ -84,6 +91,7 @@ struct mlx5_ib_create_cq { __u64 buf_addr; __u64 db_addr; __u32 cqe_size; + __u32 reserved; /* explicit padding (optional on i386) */ }; struct mlx5_ib_create_cq_resp { @@ -93,12 +101,16 @@ struct mlx5_ib_create_cq_resp { struct mlx5_ib_resize_cq { __u64 buf_addr; + __u16 cqe_size; + __u16 reserved0; + __u32 reserved1; }; struct mlx5_ib_create_srq { __u64 buf_addr; __u64 db_addr; __u32 flags; + __u32 reserved; /* explicit padding (optional on i386) */ }; struct mlx5_ib_create_srq_resp { |
