diff options
Diffstat (limited to 'drivers/infiniband/hw/mlx5')
| -rw-r--r-- | drivers/infiniband/hw/mlx5/Kconfig | 2 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx5/cq.c | 410 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx5/doorbell.c | 4 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx5/main.c | 72 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx5/mem.c | 80 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx5/mlx5_ib.h | 35 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx5/mr.c | 461 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx5/qp.c | 806 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx5/srq.c | 26 | ||||
| -rw-r--r-- | drivers/infiniband/hw/mlx5/user.h | 12 | 
10 files changed, 1553 insertions, 355 deletions
diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig index 8e6aebfaf8a..10df386c634 100644 --- a/drivers/infiniband/hw/mlx5/Kconfig +++ b/drivers/infiniband/hw/mlx5/Kconfig @@ -1,6 +1,6 @@  config MLX5_INFINIBAND  	tristate "Mellanox Connect-IB HCA support" -	depends on NETDEVICES && ETHERNET && PCI && X86 +	depends on NETDEVICES && ETHERNET && PCI  	select NET_VENDOR_MELLANOX  	select MLX5_CORE  	---help--- diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 344ab03948a..8ae4f896cb4 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -32,6 +32,7 @@  #include <linux/kref.h>  #include <rdma/ib_umem.h> +#include <rdma/ib_user_verbs.h>  #include "mlx5_ib.h"  #include "user.h" @@ -73,14 +74,24 @@ static void *get_cqe(struct mlx5_ib_cq *cq, int n)  	return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz);  } +static u8 sw_ownership_bit(int n, int nent) +{ +	return (n & nent) ? 1 : 0; +} +  static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n)  {  	void *cqe = get_cqe(cq, n & cq->ibcq.cqe);  	struct mlx5_cqe64 *cqe64;  	cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; -	return ((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ -		!!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe; + +	if (likely((cqe64->op_own) >> 4 != MLX5_CQE_INVALID) && +	    !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1)))) { +		return cqe; +	} else { +		return NULL; +	}  }  static void *next_cqe_sw(struct mlx5_ib_cq *cq) @@ -351,6 +362,43 @@ static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,  	qp->sq.last_poll = tail;  } +static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf) +{ +	mlx5_buf_free(&dev->mdev, &buf->buf); +} + +static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe, +			     struct ib_sig_err *item) +{ +	u16 syndrome = be16_to_cpu(cqe->syndrome); + +#define GUARD_ERR   (1 << 13) +#define APPTAG_ERR  (1 << 12) +#define REFTAG_ERR  (1 << 11) + +	if (syndrome & GUARD_ERR) { +		item->err_type = IB_SIG_BAD_GUARD; +		item->expected = be32_to_cpu(cqe->expected_trans_sig) >> 16; +		item->actual = be32_to_cpu(cqe->actual_trans_sig) >> 16; +	} else +	if (syndrome & REFTAG_ERR) { +		item->err_type = IB_SIG_BAD_REFTAG; +		item->expected = be32_to_cpu(cqe->expected_reftag); +		item->actual = be32_to_cpu(cqe->actual_reftag); +	} else +	if (syndrome & APPTAG_ERR) { +		item->err_type = IB_SIG_BAD_APPTAG; +		item->expected = be32_to_cpu(cqe->expected_trans_sig) & 0xffff; +		item->actual = be32_to_cpu(cqe->actual_trans_sig) & 0xffff; +	} else { +		pr_err("Got signature completion error with bad syndrome %04x\n", +		       syndrome); +	} + +	item->sig_err_offset = be64_to_cpu(cqe->err_offset); +	item->key = be32_to_cpu(cqe->mkey); +} +  static int mlx5_poll_one(struct mlx5_ib_cq *cq,  			 struct mlx5_ib_qp **cur_qp,  			 struct ib_wc *wc) @@ -360,12 +408,16 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,  	struct mlx5_cqe64 *cqe64;  	struct mlx5_core_qp *mqp;  	struct mlx5_ib_wq *wq; +	struct mlx5_sig_err_cqe *sig_err_cqe; +	struct mlx5_core_mr *mmr; +	struct mlx5_ib_mr *mr;  	uint8_t opcode;  	uint32_t qpn;  	u16 wqe_ctr;  	void *cqe;  	int idx; +repoll:  	cqe = next_cqe_sw(cq);  	if (!cqe)  		return -EAGAIN; @@ -379,7 +431,18 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,  	 */  	rmb(); -	/* TBD: resize CQ */ +	opcode = cqe64->op_own >> 4; +	if (unlikely(opcode == MLX5_CQE_RESIZE_CQ)) { +		if (likely(cq->resize_buf)) { +			free_cq_buf(dev, &cq->buf); +			cq->buf = *cq->resize_buf; +			kfree(cq->resize_buf); +			cq->resize_buf = NULL; +			goto repoll; +		} else { +			mlx5_ib_warn(dev, "unexpected resize cqe\n"); +		} +	}  	qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff;  	if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) { @@ -398,7 +461,6 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,  	}  	wc->qp  = &(*cur_qp)->ibqp; -	opcode = cqe64->op_own >> 4;  	switch (opcode) {  	case MLX5_CQE_REQ:  		wq = &(*cur_qp)->sq; @@ -449,6 +511,33 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,  			}  		}  		break; +	case MLX5_CQE_SIG_ERR: +		sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64; + +		read_lock(&dev->mdev.priv.mr_table.lock); +		mmr = __mlx5_mr_lookup(&dev->mdev, +				       mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey))); +		if (unlikely(!mmr)) { +			read_unlock(&dev->mdev.priv.mr_table.lock); +			mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n", +				     cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey)); +			return -EINVAL; +		} + +		mr = to_mibmr(mmr); +		get_sig_err_item(sig_err_cqe, &mr->sig->err_item); +		mr->sig->sig_err_exists = true; +		mr->sig->sigerr_count++; + +		mlx5_ib_warn(dev, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n", +			     cq->mcq.cqn, mr->sig->err_item.key, +			     mr->sig->err_item.err_type, +			     mr->sig->err_item.sig_err_offset, +			     mr->sig->err_item.expected, +			     mr->sig->err_item.actual); + +		read_unlock(&dev->mdev.priv.mr_table.lock); +		goto repoll;  	}  	return 0; @@ -503,29 +592,35 @@ static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf,  		return err;  	buf->cqe_size = cqe_size; +	buf->nent = nent;  	return 0;  } -static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf) -{ -	mlx5_buf_free(&dev->mdev, &buf->buf); -} -  static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,  			  struct ib_ucontext *context, struct mlx5_ib_cq *cq,  			  int entries, struct mlx5_create_cq_mbox_in **cqb,  			  int *cqe_size, int *index, int *inlen)  {  	struct mlx5_ib_create_cq ucmd; +	size_t ucmdlen;  	int page_shift;  	int npages;  	int ncont;  	int err; -	if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) +	ucmdlen = +		(udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) < +		 sizeof(ucmd)) ? (sizeof(ucmd) - +				  sizeof(ucmd.reserved)) : sizeof(ucmd); + +	if (ib_copy_from_udata(&ucmd, udata, ucmdlen))  		return -EFAULT; +	if (ucmdlen == sizeof(ucmd) && +	    ucmd.reserved != 0) +		return -EINVAL; +  	if (ucmd.cqe_size != 64 && ucmd.cqe_size != 128)  		return -EINVAL; @@ -556,7 +651,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,  		goto err_db;  	}  	mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, (*cqb)->pas, 0); -	(*cqb)->ctx.log_pg_sz = page_shift - PAGE_SHIFT; +	(*cqb)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;  	*index = to_mucontext(context)->uuari.uars[0].index; @@ -576,16 +671,16 @@ static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_ucontext *context)  	ib_umem_release(cq->buf.umem);  } -static void init_cq_buf(struct mlx5_ib_cq *cq, int nent) +static void init_cq_buf(struct mlx5_ib_cq *cq, struct mlx5_ib_cq_buf *buf)  {  	int i;  	void *cqe;  	struct mlx5_cqe64 *cqe64; -	for (i = 0; i < nent; i++) { -		cqe = get_cqe(cq, i); -		cqe64 = (cq->buf.cqe_size == 64) ? cqe : cqe + 64; -		cqe64->op_own = 0xf1; +	for (i = 0; i < buf->nent; i++) { +		cqe = get_cqe_from_buf(buf, i, buf->cqe_size); +		cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64; +		cqe64->op_own = MLX5_CQE_INVALID << 4;  	}  } @@ -610,7 +705,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,  	if (err)  		goto err_db; -	init_cq_buf(cq, entries); +	init_cq_buf(cq, &cq->buf);  	*inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * cq->buf.buf.npages;  	*cqb = mlx5_vzalloc(*inlen); @@ -620,7 +715,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,  	}  	mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas); -	(*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - PAGE_SHIFT; +	(*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;  	*index = dev->mdev.priv.uuari.uars[0].index;  	return 0; @@ -653,8 +748,11 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,  	int eqn;  	int err; +	if (entries < 0) +		return ERR_PTR(-EINVAL); +  	entries = roundup_pow_of_two(entries + 1); -	if (entries < 1 || entries > dev->mdev.caps.max_cqes) +	if (entries > dev->mdev.caps.max_cqes)  		return ERR_PTR(-EINVAL);  	cq = kzalloc(sizeof(*cq), GFP_KERNEL); @@ -747,17 +845,9 @@ int mlx5_ib_destroy_cq(struct ib_cq *cq)  	return 0;  } -static int is_equal_rsn(struct mlx5_cqe64 *cqe64, struct mlx5_ib_srq *srq, -			u32 rsn) +static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn)  { -	u32 lrsn; - -	if (srq) -		lrsn = be32_to_cpu(cqe64->srqn) & 0xffffff; -	else -		lrsn = be32_to_cpu(cqe64->sop_drop_qpn) & 0xffffff; - -	return rsn == lrsn; +	return rsn == (ntohl(cqe64->sop_drop_qpn) & 0xffffff);  }  void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq) @@ -787,8 +877,8 @@ void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq)  	while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {  		cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);  		cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; -		if (is_equal_rsn(cqe64, srq, rsn)) { -			if (srq) +		if (is_equal_rsn(cqe64, rsn)) { +			if (srq && (ntohl(cqe64->srqn) & 0xffffff))  				mlx5_ib_free_srq_wqe(srq, be16_to_cpu(cqe64->wqe_counter));  			++nfreed;  		} else if (nfreed) { @@ -823,12 +913,266 @@ void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq)  int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)  { -	return -ENOSYS; +	struct mlx5_modify_cq_mbox_in *in; +	struct mlx5_ib_dev *dev = to_mdev(cq->device); +	struct mlx5_ib_cq *mcq = to_mcq(cq); +	int err; +	u32 fsel; + +	if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_CQ_MODER)) +		return -ENOSYS; + +	in = kzalloc(sizeof(*in), GFP_KERNEL); +	if (!in) +		return -ENOMEM; + +	in->cqn = cpu_to_be32(mcq->mcq.cqn); +	fsel = (MLX5_CQ_MODIFY_PERIOD | MLX5_CQ_MODIFY_COUNT); +	in->ctx.cq_period = cpu_to_be16(cq_period); +	in->ctx.cq_max_count = cpu_to_be16(cq_count); +	in->field_select = cpu_to_be32(fsel); +	err = mlx5_core_modify_cq(&dev->mdev, &mcq->mcq, in, sizeof(*in)); +	kfree(in); + +	if (err) +		mlx5_ib_warn(dev, "modify cq 0x%x failed\n", mcq->mcq.cqn); + +	return err; +} + +static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, +		       int entries, struct ib_udata *udata, int *npas, +		       int *page_shift, int *cqe_size) +{ +	struct mlx5_ib_resize_cq ucmd; +	struct ib_umem *umem; +	int err; +	int npages; +	struct ib_ucontext *context = cq->buf.umem->context; + +	err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); +	if (err) +		return err; + +	if (ucmd.reserved0 || ucmd.reserved1) +		return -EINVAL; + +	umem = ib_umem_get(context, ucmd.buf_addr, entries * ucmd.cqe_size, +			   IB_ACCESS_LOCAL_WRITE, 1); +	if (IS_ERR(umem)) { +		err = PTR_ERR(umem); +		return err; +	} + +	mlx5_ib_cont_pages(umem, ucmd.buf_addr, &npages, page_shift, +			   npas, NULL); + +	cq->resize_umem = umem; +	*cqe_size = ucmd.cqe_size; + +	return 0; +} + +static void un_resize_user(struct mlx5_ib_cq *cq) +{ +	ib_umem_release(cq->resize_umem); +} + +static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, +			 int entries, int cqe_size) +{ +	int err; + +	cq->resize_buf = kzalloc(sizeof(*cq->resize_buf), GFP_KERNEL); +	if (!cq->resize_buf) +		return -ENOMEM; + +	err = alloc_cq_buf(dev, cq->resize_buf, entries, cqe_size); +	if (err) +		goto ex; + +	init_cq_buf(cq, cq->resize_buf); + +	return 0; + +ex: +	kfree(cq->resize_buf); +	return err; +} + +static void un_resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq) +{ +	free_cq_buf(dev, cq->resize_buf); +	cq->resize_buf = NULL; +} + +static int copy_resize_cqes(struct mlx5_ib_cq *cq) +{ +	struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); +	struct mlx5_cqe64 *scqe64; +	struct mlx5_cqe64 *dcqe64; +	void *start_cqe; +	void *scqe; +	void *dcqe; +	int ssize; +	int dsize; +	int i; +	u8 sw_own; + +	ssize = cq->buf.cqe_size; +	dsize = cq->resize_buf->cqe_size; +	if (ssize != dsize) { +		mlx5_ib_warn(dev, "resize from different cqe size is not supported\n"); +		return -EINVAL; +	} + +	i = cq->mcq.cons_index; +	scqe = get_sw_cqe(cq, i); +	scqe64 = ssize == 64 ? scqe : scqe + 64; +	start_cqe = scqe; +	if (!scqe) { +		mlx5_ib_warn(dev, "expected cqe in sw ownership\n"); +		return -EINVAL; +	} + +	while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) { +		dcqe = get_cqe_from_buf(cq->resize_buf, +					(i + 1) & (cq->resize_buf->nent), +					dsize); +		dcqe64 = dsize == 64 ? dcqe : dcqe + 64; +		sw_own = sw_ownership_bit(i + 1, cq->resize_buf->nent); +		memcpy(dcqe, scqe, dsize); +		dcqe64->op_own = (dcqe64->op_own & ~MLX5_CQE_OWNER_MASK) | sw_own; + +		++i; +		scqe = get_sw_cqe(cq, i); +		scqe64 = ssize == 64 ? scqe : scqe + 64; +		if (!scqe) { +			mlx5_ib_warn(dev, "expected cqe in sw ownership\n"); +			return -EINVAL; +		} + +		if (scqe == start_cqe) { +			pr_warn("resize CQ failed to get resize CQE, CQN 0x%x\n", +				cq->mcq.cqn); +			return -ENOMEM; +		} +	} +	++cq->mcq.cons_index; +	return 0;  }  int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)  { -	return -ENOSYS; +	struct mlx5_ib_dev *dev = to_mdev(ibcq->device); +	struct mlx5_ib_cq *cq = to_mcq(ibcq); +	struct mlx5_modify_cq_mbox_in *in; +	int err; +	int npas; +	int page_shift; +	int inlen; +	int uninitialized_var(cqe_size); +	unsigned long flags; + +	if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_RESIZE_CQ)) { +		pr_info("Firmware does not support resize CQ\n"); +		return -ENOSYS; +	} + +	if (entries < 1) +		return -EINVAL; + +	entries = roundup_pow_of_two(entries + 1); +	if (entries > dev->mdev.caps.max_cqes + 1) +		return -EINVAL; + +	if (entries == ibcq->cqe + 1) +		return 0; + +	mutex_lock(&cq->resize_mutex); +	if (udata) { +		err = resize_user(dev, cq, entries, udata, &npas, &page_shift, +				  &cqe_size); +	} else { +		cqe_size = 64; +		err = resize_kernel(dev, cq, entries, cqe_size); +		if (!err) { +			npas = cq->resize_buf->buf.npages; +			page_shift = cq->resize_buf->buf.page_shift; +		} +	} + +	if (err) +		goto ex; + +	inlen = sizeof(*in) + npas * sizeof(in->pas[0]); +	in = mlx5_vzalloc(inlen); +	if (!in) { +		err = -ENOMEM; +		goto ex_resize; +	} + +	if (udata) +		mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift, +				     in->pas, 0); +	else +		mlx5_fill_page_array(&cq->resize_buf->buf, in->pas); + +	in->field_select = cpu_to_be32(MLX5_MODIFY_CQ_MASK_LOG_SIZE  | +				       MLX5_MODIFY_CQ_MASK_PG_OFFSET | +				       MLX5_MODIFY_CQ_MASK_PG_SIZE); +	in->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT; +	in->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5; +	in->ctx.page_offset = 0; +	in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(entries) << 24); +	in->hdr.opmod = cpu_to_be16(MLX5_CQ_OPMOD_RESIZE); +	in->cqn = cpu_to_be32(cq->mcq.cqn); + +	err = mlx5_core_modify_cq(&dev->mdev, &cq->mcq, in, inlen); +	if (err) +		goto ex_alloc; + +	if (udata) { +		cq->ibcq.cqe = entries - 1; +		ib_umem_release(cq->buf.umem); +		cq->buf.umem = cq->resize_umem; +		cq->resize_umem = NULL; +	} else { +		struct mlx5_ib_cq_buf tbuf; +		int resized = 0; + +		spin_lock_irqsave(&cq->lock, flags); +		if (cq->resize_buf) { +			err = copy_resize_cqes(cq); +			if (!err) { +				tbuf = cq->buf; +				cq->buf = *cq->resize_buf; +				kfree(cq->resize_buf); +				cq->resize_buf = NULL; +				resized = 1; +			} +		} +		cq->ibcq.cqe = entries - 1; +		spin_unlock_irqrestore(&cq->lock, flags); +		if (resized) +			free_cq_buf(dev, &tbuf); +	} +	mutex_unlock(&cq->resize_mutex); + +	mlx5_vfree(in); +	return 0; + +ex_alloc: +	mlx5_vfree(in); + +ex_resize: +	if (udata) +		un_resize_user(cq); +	else +		un_resize_kernel(dev, cq); +ex: +	mutex_unlock(&cq->resize_mutex); +	return err;  }  int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq) diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c index 256a23344f2..ece028fc47d 100644 --- a/drivers/infiniband/hw/mlx5/doorbell.c +++ b/drivers/infiniband/hw/mlx5/doorbell.c @@ -47,7 +47,6 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,  			struct mlx5_db *db)  {  	struct mlx5_ib_user_db_page *page; -	struct ib_umem_chunk *chunk;  	int err = 0;  	mutex_lock(&context->db_page_mutex); @@ -75,8 +74,7 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,  	list_add(&page->list, &context->db_page_list);  found: -	chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list); -	db->dma		= sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK); +	db->dma = sg_dma_address(page->umem->sg_head.sgl) + (virt & ~PAGE_MASK);  	db->u.user_page = page;  	++page->refcnt; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 3f831de9a4d..364d4b6937f 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -46,8 +46,8 @@  #include "mlx5_ib.h"  #define DRIVER_NAME "mlx5_ib" -#define DRIVER_VERSION "1.0" -#define DRIVER_RELDATE	"June 2013" +#define DRIVER_VERSION "2.2-1" +#define DRIVER_RELDATE	"Feb 2014"  MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");  MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver"); @@ -164,6 +164,7 @@ int mlx5_vector2eqn(struct mlx5_ib_dev *dev, int vector, int *eqn, int *irqn)  static int alloc_comp_eqs(struct mlx5_ib_dev *dev)  {  	struct mlx5_eq_table *table = &dev->mdev.priv.eq_table; +	char name[MLX5_MAX_EQ_NAME];  	struct mlx5_eq *eq, *n;  	int ncomp_vec;  	int nent; @@ -180,11 +181,10 @@ static int alloc_comp_eqs(struct mlx5_ib_dev *dev)  			goto clean;  		} -		snprintf(eq->name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i); +		snprintf(name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i);  		err = mlx5_create_map_eq(&dev->mdev, eq,  					 i + MLX5_EQ_VEC_COMP_BASE, nent, 0, -					 eq->name, -					 &dev->mdev.priv.uuari.uars[0]); +					 name, &dev->mdev.priv.uuari.uars[0]);  		if (err) {  			kfree(eq);  			goto clean; @@ -261,8 +261,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,  	props->device_cap_flags    = IB_DEVICE_CHANGE_PHY_PORT |  		IB_DEVICE_PORT_ACTIVE_EVENT		|  		IB_DEVICE_SYS_IMAGE_GUID		| -		IB_DEVICE_RC_RNR_NAK_GEN		| -		IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; +		IB_DEVICE_RC_RNR_NAK_GEN;  	flags = dev->mdev.caps.flags;  	if (flags & MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR)  		props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; @@ -274,6 +273,17 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,  	if (flags & MLX5_DEV_CAP_FLAG_XRC)  		props->device_cap_flags |= IB_DEVICE_XRC;  	props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; +	if (flags & MLX5_DEV_CAP_FLAG_SIG_HAND_OVER) { +		props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER; +		/* At this stage no support for signature handover */ +		props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 | +				      IB_PROT_T10DIF_TYPE_2 | +				      IB_PROT_T10DIF_TYPE_3; +		props->sig_guard_cap = IB_GUARD_T10DIF_CRC | +				       IB_GUARD_T10DIF_CSUM; +	} +	if (flags & MLX5_DEV_CAP_FLAG_BLOCK_MCAST) +		props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;  	props->vendor_id	   = be32_to_cpup((__be32 *)(out_mad->data + 36)) &  		0xffffff; @@ -301,9 +311,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,  	props->max_srq_sge	   = max_rq_sg - 1;  	props->max_fast_reg_page_list_len = (unsigned int)-1;  	props->local_ca_ack_delay  = dev->mdev.caps.local_ca_ack_delay; -	props->atomic_cap	   = dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_ATOMIC ? -		IB_ATOMIC_HCA : IB_ATOMIC_NONE; -	props->masked_atomic_cap   = IB_ATOMIC_HCA; +	props->atomic_cap	   = IB_ATOMIC_NONE; +	props->masked_atomic_cap   = IB_ATOMIC_NONE;  	props->max_pkeys	   = be16_to_cpup((__be16 *)(out_mad->data + 28));  	props->max_mcast_grp	   = 1 << dev->mdev.caps.log_max_mcg;  	props->max_mcast_qp_attach = dev->mdev.caps.max_qp_mcg; @@ -537,34 +546,51 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,  						  struct ib_udata *udata)  {  	struct mlx5_ib_dev *dev = to_mdev(ibdev); -	struct mlx5_ib_alloc_ucontext_req req; +	struct mlx5_ib_alloc_ucontext_req_v2 req;  	struct mlx5_ib_alloc_ucontext_resp resp;  	struct mlx5_ib_ucontext *context;  	struct mlx5_uuar_info *uuari;  	struct mlx5_uar *uars; +	int gross_uuars;  	int num_uars; +	int ver;  	int uuarn;  	int err;  	int i; +	int reqlen;  	if (!dev->ib_active)  		return ERR_PTR(-EAGAIN); -	err = ib_copy_from_udata(&req, udata, sizeof(req)); +	memset(&req, 0, sizeof(req)); +	reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr); +	if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req)) +		ver = 0; +	else if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req_v2)) +		ver = 2; +	else +		return ERR_PTR(-EINVAL); + +	err = ib_copy_from_udata(&req, udata, reqlen);  	if (err)  		return ERR_PTR(err); +	if (req.flags || req.reserved) +		return ERR_PTR(-EINVAL); +  	if (req.total_num_uuars > MLX5_MAX_UUARS)  		return ERR_PTR(-ENOMEM);  	if (req.total_num_uuars == 0)  		return ERR_PTR(-EINVAL); -	req.total_num_uuars = ALIGN(req.total_num_uuars, MLX5_BF_REGS_PER_PAGE); +	req.total_num_uuars = ALIGN(req.total_num_uuars, +				    MLX5_NON_FP_BF_REGS_PER_PAGE);  	if (req.num_low_latency_uuars > req.total_num_uuars - 1)  		return ERR_PTR(-EINVAL); -	num_uars = req.total_num_uuars / MLX5_BF_REGS_PER_PAGE; +	num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE; +	gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE;  	resp.qp_tab_size      = 1 << dev->mdev.caps.log_max_qp;  	resp.bf_reg_size      = dev->mdev.caps.bf_reg_size;  	resp.cache_line_size  = L1_CACHE_BYTES; @@ -586,7 +612,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,  		goto out_ctx;  	} -	uuari->bitmap = kcalloc(BITS_TO_LONGS(req.total_num_uuars), +	uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars),  				sizeof(*uuari->bitmap),  				GFP_KERNEL);  	if (!uuari->bitmap) { @@ -596,13 +622,13 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,  	/*  	 * clear all fast path uuars  	 */ -	for (i = 0; i < req.total_num_uuars; i++) { +	for (i = 0; i < gross_uuars; i++) {  		uuarn = i & 3;  		if (uuarn == 2 || uuarn == 3)  			set_bit(i, uuari->bitmap);  	} -	uuari->count = kcalloc(req.total_num_uuars, sizeof(*uuari->count), GFP_KERNEL); +	uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL);  	if (!uuari->count) {  		err = -ENOMEM;  		goto out_bitmap; @@ -624,6 +650,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,  	if (err)  		goto out_uars; +	uuari->ver = ver;  	uuari->num_low_latency_uuars = req.num_low_latency_uuars;  	uuari->uars = uars;  	uuari->num_uars = num_uars; @@ -746,7 +773,8 @@ static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn)  	seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);  	seg->start_addr = 0; -	err = mlx5_core_create_mkey(&dev->mdev, &mr, in, sizeof(*in)); +	err = mlx5_core_create_mkey(&dev->mdev, &mr, in, sizeof(*in), +				    NULL, NULL, NULL);  	if (err) {  		mlx5_ib_warn(dev, "failed to create mkey, %d\n", err);  		goto err_in; @@ -1006,6 +1034,11 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,  	ibev.device	      = &ibdev->ib_dev;  	ibev.element.port_num = port; +	if (port < 1 || port > ibdev->num_ports) { +		mlx5_ib_warn(ibdev, "warning: event on port %d\n", port); +		return; +	} +  	if (ibdev->ib_active)  		ib_dispatch_event(&ibev);  } @@ -1401,12 +1434,15 @@ static int init_one(struct pci_dev *pdev,  	dev->ib_dev.get_dma_mr		= mlx5_ib_get_dma_mr;  	dev->ib_dev.reg_user_mr		= mlx5_ib_reg_user_mr;  	dev->ib_dev.dereg_mr		= mlx5_ib_dereg_mr; +	dev->ib_dev.destroy_mr		= mlx5_ib_destroy_mr;  	dev->ib_dev.attach_mcast	= mlx5_ib_mcg_attach;  	dev->ib_dev.detach_mcast	= mlx5_ib_mcg_detach;  	dev->ib_dev.process_mad		= mlx5_ib_process_mad; +	dev->ib_dev.create_mr		= mlx5_ib_create_mr;  	dev->ib_dev.alloc_fast_reg_mr	= mlx5_ib_alloc_fast_reg_mr;  	dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list;  	dev->ib_dev.free_fast_reg_page_list  = mlx5_ib_free_fast_reg_page_list; +	dev->ib_dev.check_mr_status	= mlx5_ib_check_mr_status;  	if (mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC) {  		dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd; diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 3a5322870b9..8499aec94db 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -44,16 +44,17 @@  void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,  			int *ncont, int *order)  { -	struct ib_umem_chunk *chunk;  	unsigned long tmp;  	unsigned long m; -	int i, j, k; +	int i, k;  	u64 base = 0;  	int p = 0;  	int skip;  	int mask;  	u64 len;  	u64 pfn; +	struct scatterlist *sg; +	int entry;  	addr = addr >> PAGE_SHIFT;  	tmp = (unsigned long)addr; @@ -61,32 +62,31 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,  	skip = 1 << m;  	mask = skip - 1;  	i = 0; -	list_for_each_entry(chunk, &umem->chunk_list, list) -		for (j = 0; j < chunk->nmap; j++) { -			len = sg_dma_len(&chunk->page_list[j]) >> PAGE_SHIFT; -			pfn = sg_dma_address(&chunk->page_list[j]) >> PAGE_SHIFT; -			for (k = 0; k < len; k++) { -				if (!(i & mask)) { -					tmp = (unsigned long)pfn; -					m = min(m, find_first_bit(&tmp, sizeof(tmp))); +	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { +		len = sg_dma_len(sg) >> PAGE_SHIFT; +		pfn = sg_dma_address(sg) >> PAGE_SHIFT; +		for (k = 0; k < len; k++) { +			if (!(i & mask)) { +				tmp = (unsigned long)pfn; +				m = min(m, find_first_bit(&tmp, sizeof(tmp))); +				skip = 1 << m; +				mask = skip - 1; +				base = pfn; +				p = 0; +			} else { +				if (base + p != pfn) { +					tmp = (unsigned long)p; +					m = find_first_bit(&tmp, sizeof(tmp));  					skip = 1 << m;  					mask = skip - 1;  					base = pfn;  					p = 0; -				} else { -					if (base + p != pfn) { -						tmp = (unsigned long)p; -						m = find_first_bit(&tmp, sizeof(tmp)); -						skip = 1 << m; -						mask = skip - 1; -						base = pfn; -						p = 0; -					}  				} -				p++; -				i++;  			} +			p++; +			i++;  		} +	}  	if (i) {  		m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m); @@ -112,32 +112,32 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,  {  	int shift = page_shift - PAGE_SHIFT;  	int mask = (1 << shift) - 1; -	struct ib_umem_chunk *chunk; -	int i, j, k; +	int i, k;  	u64 cur = 0;  	u64 base;  	int len; +	struct scatterlist *sg; +	int entry;  	i = 0; -	list_for_each_entry(chunk, &umem->chunk_list, list) -		for (j = 0; j < chunk->nmap; j++) { -			len = sg_dma_len(&chunk->page_list[j]) >> PAGE_SHIFT; -			base = sg_dma_address(&chunk->page_list[j]); -			for (k = 0; k < len; k++) { -				if (!(i & mask)) { -					cur = base + (k << PAGE_SHIFT); -					if (umr) -						cur |= 3; +	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { +		len = sg_dma_len(sg) >> PAGE_SHIFT; +		base = sg_dma_address(sg); +		for (k = 0; k < len; k++) { +			if (!(i & mask)) { +				cur = base + (k << PAGE_SHIFT); +				if (umr) +					cur |= 3; -					pas[i >> shift] = cpu_to_be64(cur); -					mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n", -						    i >> shift, be64_to_cpu(pas[i >> shift])); -				}  else -					mlx5_ib_dbg(dev, "=====> 0x%llx\n", -						    base + (k << PAGE_SHIFT)); -				i++; -			} +				pas[i >> shift] = cpu_to_be64(cur); +				mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n", +					    i >> shift, be64_to_cpu(pas[i >> shift])); +			}  else +				mlx5_ib_dbg(dev, "=====> 0x%llx\n", +					    base + (k << PAGE_SHIFT)); +			i++;  		} +	}  }  int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 836be915724..f2ccf1a5a29 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -189,12 +189,16 @@ struct mlx5_ib_qp {  	int			create_type;  	u32			pa_lkey; + +	/* Store signature errors */ +	bool			signature_en;  };  struct mlx5_ib_cq_buf {  	struct mlx5_buf		buf;  	struct ib_umem		*umem;  	int			cqe_size; +	int			nent;  };  enum mlx5_ib_qp_flags { @@ -220,7 +224,7 @@ struct mlx5_ib_cq {  	/* protect resize cq  	 */  	struct mutex		resize_mutex; -	struct mlx5_ib_cq_resize *resize_buf; +	struct mlx5_ib_cq_buf  *resize_buf;  	struct ib_umem	       *resize_umem;  	int			cqe_size;  }; @@ -260,8 +264,9 @@ struct mlx5_ib_mr {  	__be64			*pas;  	dma_addr_t		dma;  	int			npages; -	struct completion	done; -	enum ib_wc_status	status; +	struct mlx5_ib_dev     *dev; +	struct mlx5_create_mkey_mbox_out out; +	struct mlx5_core_sig_ctx    *sig;  };  struct mlx5_ib_fast_reg_page_list { @@ -270,6 +275,17 @@ struct mlx5_ib_fast_reg_page_list {  	dma_addr_t			map;  }; +struct mlx5_ib_umr_context { +	enum ib_wc_status	status; +	struct completion	done; +}; + +static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context) +{ +	context->status = -1; +	init_completion(&context->done); +} +  struct umr_common {  	struct ib_pd	*pd;  	struct ib_cq	*cq; @@ -323,6 +339,7 @@ struct mlx5_cache_ent {  	struct mlx5_ib_dev     *dev;  	struct work_struct	work;  	struct delayed_work	dwork; +	int			pending;  };  struct mlx5_mr_cache { @@ -358,6 +375,8 @@ struct mlx5_ib_dev {  	spinlock_t			mr_lock;  	struct mlx5_ib_resources	devr;  	struct mlx5_mr_cache		cache; +	struct timer_list		delay_timer; +	int				fill_delay;  };  static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) @@ -390,6 +409,11 @@ static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp)  	return container_of(mqp, struct mlx5_ib_qp, mqp);  } +static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mr *mmr) +{ +	return container_of(mmr, struct mlx5_ib_mr, mmr); +} +  static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd)  {  	return container_of(ibpd, struct mlx5_ib_pd, ibpd); @@ -489,6 +513,9 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,  				  u64 virt_addr, int access_flags,  				  struct ib_udata *udata);  int mlx5_ib_dereg_mr(struct ib_mr *ibmr); +int mlx5_ib_destroy_mr(struct ib_mr *ibmr); +struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd, +				struct ib_mr_init_attr *mr_init_attr);  struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,  					int max_page_list_len);  struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, @@ -524,6 +551,8 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);  int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);  int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift);  void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context); +int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, +			    struct ib_mr_status *mr_status);  static inline void init_query_mad(struct ib_smp *mad)  { diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index bd41df95b6f..afa873bd028 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -35,11 +35,16 @@  #include <linux/random.h>  #include <linux/debugfs.h>  #include <linux/export.h> +#include <linux/delay.h>  #include <rdma/ib_umem.h>  #include "mlx5_ib.h"  enum { -	DEF_CACHE_SIZE	= 10, +	MAX_PENDING_REG_MR = 8, +}; + +enum { +	MLX5_UMR_ALIGN	= 2048  };  static __be64 *mr_align(__be64 *ptr, int align) @@ -59,15 +64,67 @@ static int order2idx(struct mlx5_ib_dev *dev, int order)  		return order - cache->ent[0].order;  } +static void reg_mr_callback(int status, void *context) +{ +	struct mlx5_ib_mr *mr = context; +	struct mlx5_ib_dev *dev = mr->dev; +	struct mlx5_mr_cache *cache = &dev->cache; +	int c = order2idx(dev, mr->order); +	struct mlx5_cache_ent *ent = &cache->ent[c]; +	u8 key; +	unsigned long flags; +	struct mlx5_mr_table *table = &dev->mdev.priv.mr_table; +	int err; + +	spin_lock_irqsave(&ent->lock, flags); +	ent->pending--; +	spin_unlock_irqrestore(&ent->lock, flags); +	if (status) { +		mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status); +		kfree(mr); +		dev->fill_delay = 1; +		mod_timer(&dev->delay_timer, jiffies + HZ); +		return; +	} + +	if (mr->out.hdr.status) { +		mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n", +			     mr->out.hdr.status, +			     be32_to_cpu(mr->out.hdr.syndrome)); +		kfree(mr); +		dev->fill_delay = 1; +		mod_timer(&dev->delay_timer, jiffies + HZ); +		return; +	} + +	spin_lock_irqsave(&dev->mdev.priv.mkey_lock, flags); +	key = dev->mdev.priv.mkey_key++; +	spin_unlock_irqrestore(&dev->mdev.priv.mkey_lock, flags); +	mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key; + +	cache->last_add = jiffies; + +	spin_lock_irqsave(&ent->lock, flags); +	list_add_tail(&mr->list, &ent->head); +	ent->cur++; +	ent->size++; +	spin_unlock_irqrestore(&ent->lock, flags); + +	write_lock_irqsave(&table->lock, flags); +	err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmr.key), +				&mr->mmr); +	if (err) +		pr_err("Error inserting to mr tree. 0x%x\n", -err); +	write_unlock_irqrestore(&table->lock, flags); +} +  static int add_keys(struct mlx5_ib_dev *dev, int c, int num)  { -	struct device *ddev = dev->ib_dev.dma_device;  	struct mlx5_mr_cache *cache = &dev->cache;  	struct mlx5_cache_ent *ent = &cache->ent[c];  	struct mlx5_create_mkey_mbox_in *in;  	struct mlx5_ib_mr *mr;  	int npages = 1 << ent->order; -	int size = sizeof(u64) * npages;  	int err = 0;  	int i; @@ -76,87 +133,66 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)  		return -ENOMEM;  	for (i = 0; i < num; i++) { +		if (ent->pending >= MAX_PENDING_REG_MR) { +			err = -EAGAIN; +			break; +		} +  		mr = kzalloc(sizeof(*mr), GFP_KERNEL);  		if (!mr) {  			err = -ENOMEM; -			goto out; +			break;  		}  		mr->order = ent->order;  		mr->umred = 1; -		mr->pas = kmalloc(size + 0x3f, GFP_KERNEL); -		if (!mr->pas) { -			kfree(mr); -			err = -ENOMEM; -			goto out; -		} -		mr->dma = dma_map_single(ddev, mr_align(mr->pas, 0x40), size, -					 DMA_TO_DEVICE); -		if (dma_mapping_error(ddev, mr->dma)) { -			kfree(mr->pas); -			kfree(mr); -			err = -ENOMEM; -			goto out; -		} - +		mr->dev = dev;  		in->seg.status = 1 << 6;  		in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);  		in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);  		in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;  		in->seg.log2_page_size = 12; +		spin_lock_irq(&ent->lock); +		ent->pending++; +		spin_unlock_irq(&ent->lock);  		err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, -					    sizeof(*in)); +					    sizeof(*in), reg_mr_callback, +					    mr, &mr->out);  		if (err) {  			mlx5_ib_warn(dev, "create mkey failed %d\n", err); -			dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE); -			kfree(mr->pas);  			kfree(mr); -			goto out; +			break;  		} -		cache->last_add = jiffies; - -		spin_lock(&ent->lock); -		list_add_tail(&mr->list, &ent->head); -		ent->cur++; -		ent->size++; -		spin_unlock(&ent->lock);  	} -out:  	kfree(in);  	return err;  }  static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)  { -	struct device *ddev = dev->ib_dev.dma_device;  	struct mlx5_mr_cache *cache = &dev->cache;  	struct mlx5_cache_ent *ent = &cache->ent[c];  	struct mlx5_ib_mr *mr; -	int size;  	int err;  	int i;  	for (i = 0; i < num; i++) { -		spin_lock(&ent->lock); +		spin_lock_irq(&ent->lock);  		if (list_empty(&ent->head)) { -			spin_unlock(&ent->lock); +			spin_unlock_irq(&ent->lock);  			return;  		}  		mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);  		list_del(&mr->list);  		ent->cur--;  		ent->size--; -		spin_unlock(&ent->lock); +		spin_unlock_irq(&ent->lock);  		err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); -		if (err) { +		if (err)  			mlx5_ib_warn(dev, "failed destroy mkey\n"); -		} else { -			size = ALIGN(sizeof(u64) * (1 << mr->order), 0x40); -			dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE); -			kfree(mr->pas); +		else  			kfree(mr); -		}  	}  } @@ -183,9 +219,13 @@ static ssize_t size_write(struct file *filp, const char __user *buf,  		return -EINVAL;  	if (var > ent->size) { -		err = add_keys(dev, c, var - ent->size); -		if (err) -			return err; +		do { +			err = add_keys(dev, c, var - ent->size); +			if (err && err != -EAGAIN) +				return err; + +			usleep_range(3000, 5000); +		} while (err);  	} else if (var < ent->size) {  		remove_keys(dev, c, ent->size - var);  	} @@ -301,23 +341,37 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)  	struct mlx5_ib_dev *dev = ent->dev;  	struct mlx5_mr_cache *cache = &dev->cache;  	int i = order2idx(dev, ent->order); +	int err;  	if (cache->stopped)  		return;  	ent = &dev->cache.ent[i]; -	if (ent->cur < 2 * ent->limit) { -		add_keys(dev, i, 1); -		if (ent->cur < 2 * ent->limit) -			queue_work(cache->wq, &ent->work); +	if (ent->cur < 2 * ent->limit && !dev->fill_delay) { +		err = add_keys(dev, i, 1); +		if (ent->cur < 2 * ent->limit) { +			if (err == -EAGAIN) { +				mlx5_ib_dbg(dev, "returned eagain, order %d\n", +					    i + 2); +				queue_delayed_work(cache->wq, &ent->dwork, +						   msecs_to_jiffies(3)); +			} else if (err) { +				mlx5_ib_warn(dev, "command failed order %d, err %d\n", +					     i + 2, err); +				queue_delayed_work(cache->wq, &ent->dwork, +						   msecs_to_jiffies(1000)); +			} else { +				queue_work(cache->wq, &ent->work); +			} +		}  	} else if (ent->cur > 2 * ent->limit) {  		if (!someone_adding(cache) && -		    time_after(jiffies, cache->last_add + 60 * HZ)) { +		    time_after(jiffies, cache->last_add + 300 * HZ)) {  			remove_keys(dev, i, 1);  			if (ent->cur > ent->limit)  				queue_work(cache->wq, &ent->work);  		} else { -			queue_delayed_work(cache->wq, &ent->dwork, 60 * HZ); +			queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);  		}  	}  } @@ -357,18 +411,18 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)  		mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i); -		spin_lock(&ent->lock); +		spin_lock_irq(&ent->lock);  		if (!list_empty(&ent->head)) {  			mr = list_first_entry(&ent->head, struct mlx5_ib_mr,  					      list);  			list_del(&mr->list);  			ent->cur--; -			spin_unlock(&ent->lock); +			spin_unlock_irq(&ent->lock);  			if (ent->cur < ent->limit)  				queue_work(cache->wq, &ent->work);  			break;  		} -		spin_unlock(&ent->lock); +		spin_unlock_irq(&ent->lock);  		queue_work(cache->wq, &ent->work); @@ -395,12 +449,12 @@ static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)  		return;  	}  	ent = &cache->ent[c]; -	spin_lock(&ent->lock); +	spin_lock_irq(&ent->lock);  	list_add_tail(&mr->list, &ent->head);  	ent->cur++;  	if (ent->cur > 2 * ent->limit)  		shrink = 1; -	spin_unlock(&ent->lock); +	spin_unlock_irq(&ent->lock);  	if (shrink)  		queue_work(cache->wq, &ent->work); @@ -408,33 +462,28 @@ static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)  static void clean_keys(struct mlx5_ib_dev *dev, int c)  { -	struct device *ddev = dev->ib_dev.dma_device;  	struct mlx5_mr_cache *cache = &dev->cache;  	struct mlx5_cache_ent *ent = &cache->ent[c];  	struct mlx5_ib_mr *mr; -	int size;  	int err; +	cancel_delayed_work(&ent->dwork);  	while (1) { -		spin_lock(&ent->lock); +		spin_lock_irq(&ent->lock);  		if (list_empty(&ent->head)) { -			spin_unlock(&ent->lock); +			spin_unlock_irq(&ent->lock);  			return;  		}  		mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);  		list_del(&mr->list);  		ent->cur--;  		ent->size--; -		spin_unlock(&ent->lock); +		spin_unlock_irq(&ent->lock);  		err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); -		if (err) { +		if (err)  			mlx5_ib_warn(dev, "failed destroy mkey\n"); -		} else { -			size = ALIGN(sizeof(u64) * (1 << mr->order), 0x40); -			dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE); -			kfree(mr->pas); +		else  			kfree(mr); -		}  	}  } @@ -490,12 +539,18 @@ static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)  	debugfs_remove_recursive(dev->cache.root);  } +static void delay_time_func(unsigned long ctx) +{ +	struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx; + +	dev->fill_delay = 0; +} +  int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)  {  	struct mlx5_mr_cache *cache = &dev->cache;  	struct mlx5_cache_ent *ent;  	int limit; -	int size;  	int err;  	int i; @@ -505,6 +560,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)  		return -ENOMEM;  	} +	setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);  	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {  		INIT_LIST_HEAD(&cache->ent[i].head);  		spin_lock_init(&cache->ent[i].lock); @@ -515,13 +571,11 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)  		ent->order = i + 2;  		ent->dev = dev; -		if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE) { -			size = dev->mdev.profile->mr_cache[i].size; +		if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE)  			limit = dev->mdev.profile->mr_cache[i].limit; -		} else { -			size = DEF_CACHE_SIZE; +		else  			limit = 0; -		} +  		INIT_WORK(&ent->work, cache_work_func);  		INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);  		ent->limit = limit; @@ -540,13 +594,16 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)  	int i;  	dev->cache.stopped = 1; -	destroy_workqueue(dev->cache.wq); +	flush_workqueue(dev->cache.wq);  	mlx5_mr_cache_debugfs_cleanup(dev);  	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)  		clean_keys(dev, i); +	destroy_workqueue(dev->cache.wq); +	del_timer_sync(&dev->delay_timer); +  	return 0;  } @@ -575,7 +632,8 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)  	seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);  	seg->start_addr = 0; -	err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in)); +	err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL, +				    NULL);  	if (err)  		goto err_in; @@ -650,7 +708,7 @@ static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,  void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)  { -	struct mlx5_ib_mr *mr; +	struct mlx5_ib_umr_context *context;  	struct ib_wc wc;  	int err; @@ -663,9 +721,9 @@ void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)  		if (err == 0)  			break; -		mr = (struct mlx5_ib_mr *)(unsigned long)wc.wr_id; -		mr->status = wc.status; -		complete(&mr->done); +		context = (struct mlx5_ib_umr_context *) (unsigned long) wc.wr_id; +		context->status = wc.status; +		complete(&context->done);  	}  	ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);  } @@ -675,21 +733,24 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,  				  int page_shift, int order, int access_flags)  {  	struct mlx5_ib_dev *dev = to_mdev(pd->device); +	struct device *ddev = dev->ib_dev.dma_device;  	struct umr_common *umrc = &dev->umrc; +	struct mlx5_ib_umr_context umr_context;  	struct ib_send_wr wr, *bad;  	struct mlx5_ib_mr *mr;  	struct ib_sge sg; -	int err; +	int size = sizeof(u64) * npages; +	int err = 0;  	int i; -	for (i = 0; i < 10; i++) { +	for (i = 0; i < 1; i++) {  		mr = alloc_cached_mr(dev, order);  		if (mr)  			break;  		err = add_keys(dev, order2idx(dev, order), 1); -		if (err) { -			mlx5_ib_warn(dev, "add_keys failed\n"); +		if (err && err != -EAGAIN) { +			mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);  			break;  		}  	} @@ -697,38 +758,58 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,  	if (!mr)  		return ERR_PTR(-EAGAIN); -	mlx5_ib_populate_pas(dev, umem, page_shift, mr_align(mr->pas, 0x40), 1); +	mr->pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL); +	if (!mr->pas) { +		err = -ENOMEM; +		goto free_mr; +	} + +	mlx5_ib_populate_pas(dev, umem, page_shift, +			     mr_align(mr->pas, MLX5_UMR_ALIGN), 1); + +	mr->dma = dma_map_single(ddev, mr_align(mr->pas, MLX5_UMR_ALIGN), size, +				 DMA_TO_DEVICE); +	if (dma_mapping_error(ddev, mr->dma)) { +		err = -ENOMEM; +		goto free_pas; +	}  	memset(&wr, 0, sizeof(wr)); -	wr.wr_id = (u64)(unsigned long)mr; +	wr.wr_id = (u64)(unsigned long)&umr_context;  	prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags); -	/* We serialize polls so one process does not kidnap another's -	 * completion. This is not a problem since wr is completed in -	 * around 1 usec -	 */ +	mlx5_ib_init_umr_context(&umr_context);  	down(&umrc->sem); -	init_completion(&mr->done);  	err = ib_post_send(umrc->qp, &wr, &bad);  	if (err) {  		mlx5_ib_warn(dev, "post send failed, err %d\n", err); -		up(&umrc->sem); -		goto error; +		goto unmap_dma; +	} else { +		wait_for_completion(&umr_context.done); +		if (umr_context.status != IB_WC_SUCCESS) { +			mlx5_ib_warn(dev, "reg umr failed\n"); +			err = -EFAULT; +		}  	} -	wait_for_completion(&mr->done); + +	mr->mmr.iova = virt_addr; +	mr->mmr.size = len; +	mr->mmr.pd = to_mpd(pd)->pdn; + +unmap_dma:  	up(&umrc->sem); +	dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE); -	if (mr->status != IB_WC_SUCCESS) { -		mlx5_ib_warn(dev, "reg umr failed\n"); -		err = -EFAULT; -		goto error; +free_pas: +	kfree(mr->pas); + +free_mr: +	if (err) { +		free_cached_mr(dev, mr); +		return ERR_PTR(err);  	}  	return mr; - -error: -	free_cached_mr(dev, mr); -	return ERR_PTR(err);  }  static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, @@ -763,8 +844,10 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,  	in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));  	in->seg.log2_page_size = page_shift;  	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); -	in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift)); -	err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen); +	in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, +							 1 << page_shift)); +	err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen, NULL, +				    NULL, NULL);  	if (err) {  		mlx5_ib_warn(dev, "create mkey failed\n");  		goto err_2; @@ -855,24 +938,26 @@ error:  static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)  {  	struct umr_common *umrc = &dev->umrc; +	struct mlx5_ib_umr_context umr_context;  	struct ib_send_wr wr, *bad;  	int err;  	memset(&wr, 0, sizeof(wr)); -	wr.wr_id = (u64)(unsigned long)mr; +	wr.wr_id = (u64)(unsigned long)&umr_context;  	prep_umr_unreg_wqe(dev, &wr, mr->mmr.key); +	mlx5_ib_init_umr_context(&umr_context);  	down(&umrc->sem); -	init_completion(&mr->done);  	err = ib_post_send(umrc->qp, &wr, &bad);  	if (err) {  		up(&umrc->sem);  		mlx5_ib_dbg(dev, "err %d\n", err);  		goto error; +	} else { +		wait_for_completion(&umr_context.done); +		up(&umrc->sem);  	} -	wait_for_completion(&mr->done); -	up(&umrc->sem); -	if (mr->status != IB_WC_SUCCESS) { +	if (umr_context.status != IB_WC_SUCCESS) {  		mlx5_ib_warn(dev, "unreg umr failed\n");  		err = -EFAULT;  		goto error; @@ -921,6 +1006,122 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)  	return 0;  } +struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd, +				struct ib_mr_init_attr *mr_init_attr) +{ +	struct mlx5_ib_dev *dev = to_mdev(pd->device); +	struct mlx5_create_mkey_mbox_in *in; +	struct mlx5_ib_mr *mr; +	int access_mode, err; +	int ndescs = roundup(mr_init_attr->max_reg_descriptors, 4); + +	mr = kzalloc(sizeof(*mr), GFP_KERNEL); +	if (!mr) +		return ERR_PTR(-ENOMEM); + +	in = kzalloc(sizeof(*in), GFP_KERNEL); +	if (!in) { +		err = -ENOMEM; +		goto err_free; +	} + +	in->seg.status = 1 << 6; /* free */ +	in->seg.xlt_oct_size = cpu_to_be32(ndescs); +	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); +	in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); +	access_mode = MLX5_ACCESS_MODE_MTT; + +	if (mr_init_attr->flags & IB_MR_SIGNATURE_EN) { +		u32 psv_index[2]; + +		in->seg.flags_pd = cpu_to_be32(be32_to_cpu(in->seg.flags_pd) | +							   MLX5_MKEY_BSF_EN); +		in->seg.bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE); +		mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL); +		if (!mr->sig) { +			err = -ENOMEM; +			goto err_free_in; +		} + +		/* create mem & wire PSVs */ +		err = mlx5_core_create_psv(&dev->mdev, to_mpd(pd)->pdn, +					   2, psv_index); +		if (err) +			goto err_free_sig; + +		access_mode = MLX5_ACCESS_MODE_KLM; +		mr->sig->psv_memory.psv_idx = psv_index[0]; +		mr->sig->psv_wire.psv_idx = psv_index[1]; + +		mr->sig->sig_status_checked = true; +		mr->sig->sig_err_exists = false; +		/* Next UMR, Arm SIGERR */ +		++mr->sig->sigerr_count; +	} + +	in->seg.flags = MLX5_PERM_UMR_EN | access_mode; +	err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in), +				    NULL, NULL, NULL); +	if (err) +		goto err_destroy_psv; + +	mr->ibmr.lkey = mr->mmr.key; +	mr->ibmr.rkey = mr->mmr.key; +	mr->umem = NULL; +	kfree(in); + +	return &mr->ibmr; + +err_destroy_psv: +	if (mr->sig) { +		if (mlx5_core_destroy_psv(&dev->mdev, +					  mr->sig->psv_memory.psv_idx)) +			mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", +				     mr->sig->psv_memory.psv_idx); +		if (mlx5_core_destroy_psv(&dev->mdev, +					  mr->sig->psv_wire.psv_idx)) +			mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", +				     mr->sig->psv_wire.psv_idx); +	} +err_free_sig: +	kfree(mr->sig); +err_free_in: +	kfree(in); +err_free: +	kfree(mr); +	return ERR_PTR(err); +} + +int mlx5_ib_destroy_mr(struct ib_mr *ibmr) +{ +	struct mlx5_ib_dev *dev = to_mdev(ibmr->device); +	struct mlx5_ib_mr *mr = to_mmr(ibmr); +	int err; + +	if (mr->sig) { +		if (mlx5_core_destroy_psv(&dev->mdev, +					  mr->sig->psv_memory.psv_idx)) +			mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", +				     mr->sig->psv_memory.psv_idx); +		if (mlx5_core_destroy_psv(&dev->mdev, +					  mr->sig->psv_wire.psv_idx)) +			mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", +				     mr->sig->psv_wire.psv_idx); +		kfree(mr->sig); +	} + +	err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); +	if (err) { +		mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", +			     mr->mmr.key, err); +		return err; +	} + +	kfree(mr); + +	return err; +} +  struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,  					int max_page_list_len)  { @@ -948,7 +1149,8 @@ struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,  	 * TBD not needed - issue 197292 */  	in->seg.log2_page_size = PAGE_SHIFT; -	err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in)); +	err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in), NULL, +				    NULL, NULL);  	kfree(in);  	if (err)  		goto err_free; @@ -1005,3 +1207,44 @@ void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)  	kfree(mfrpl->ibfrpl.page_list);  	kfree(mfrpl);  } + +int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, +			    struct ib_mr_status *mr_status) +{ +	struct mlx5_ib_mr *mmr = to_mmr(ibmr); +	int ret = 0; + +	if (check_mask & ~IB_MR_CHECK_SIG_STATUS) { +		pr_err("Invalid status check mask\n"); +		ret = -EINVAL; +		goto done; +	} + +	mr_status->fail_status = 0; +	if (check_mask & IB_MR_CHECK_SIG_STATUS) { +		if (!mmr->sig) { +			ret = -EINVAL; +			pr_err("signature status check requested on a non-signature enabled MR\n"); +			goto done; +		} + +		mmr->sig->sig_status_checked = true; +		if (!mmr->sig->sig_err_exists) +			goto done; + +		if (ibmr->lkey == mmr->sig->err_item.key) +			memcpy(&mr_status->sig_err, &mmr->sig->err_item, +			       sizeof(mr_status->sig_err)); +		else { +			mr_status->sig_err.err_type = IB_SIG_BAD_GUARD; +			mr_status->sig_err.sig_err_offset = 0; +			mr_status->sig_err.key = mmr->sig->err_item.key; +		} + +		mmr->sig->sig_err_exists = false; +		mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS; +	} + +done: +	return ret; +} diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 045f8cdbd30..bbbcf389272 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -203,7 +203,7 @@ static int sq_overhead(enum ib_qp_type qp_type)  	switch (qp_type) {  	case IB_QPT_XRC_INI: -		size = sizeof(struct mlx5_wqe_xrc_seg); +		size += sizeof(struct mlx5_wqe_xrc_seg);  		/* fall through */  	case IB_QPT_RC:  		size += sizeof(struct mlx5_wqe_ctrl_seg) + @@ -211,20 +211,25 @@ static int sq_overhead(enum ib_qp_type qp_type)  			sizeof(struct mlx5_wqe_raddr_seg);  		break; +	case IB_QPT_XRC_TGT: +		return 0; +  	case IB_QPT_UC: -		size = sizeof(struct mlx5_wqe_ctrl_seg) + -			sizeof(struct mlx5_wqe_raddr_seg); +		size += sizeof(struct mlx5_wqe_ctrl_seg) + +			sizeof(struct mlx5_wqe_raddr_seg) + +			sizeof(struct mlx5_wqe_umr_ctrl_seg) + +			sizeof(struct mlx5_mkey_seg);  		break;  	case IB_QPT_UD:  	case IB_QPT_SMI:  	case IB_QPT_GSI: -		size = sizeof(struct mlx5_wqe_ctrl_seg) + +		size += sizeof(struct mlx5_wqe_ctrl_seg) +  			sizeof(struct mlx5_wqe_datagram_seg);  		break;  	case MLX5_IB_QPT_REG_UMR: -		size = sizeof(struct mlx5_wqe_ctrl_seg) + +		size += sizeof(struct mlx5_wqe_ctrl_seg) +  			sizeof(struct mlx5_wqe_umr_ctrl_seg) +  			sizeof(struct mlx5_mkey_seg);  		break; @@ -251,8 +256,11 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr)  	}  	size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg); - -	return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB); +	if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN && +	    ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB) < MLX5_SIG_WQE_SIZE) +			return MLX5_SIG_WQE_SIZE; +	else +		return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB);  }  static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, @@ -270,7 +278,8 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,  		return wqe_size;  	if (wqe_size > dev->mdev.caps.max_sq_desc_sz) { -		mlx5_ib_dbg(dev, "\n"); +		mlx5_ib_dbg(dev, "wqe_size(%d) > max_sq_desc_sz(%d)\n", +			    wqe_size, dev->mdev.caps.max_sq_desc_sz);  		return -EINVAL;  	} @@ -278,11 +287,20 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,  		sizeof(struct mlx5_wqe_inline_seg);  	attr->cap.max_inline_data = qp->max_inline_data; +	if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN) +		qp->signature_en = true; +  	wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size);  	qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB; +	if (qp->sq.wqe_cnt > dev->mdev.caps.max_wqes) { +		mlx5_ib_dbg(dev, "wqe count(%d) exceeds limits(%d)\n", +			    qp->sq.wqe_cnt, dev->mdev.caps.max_wqes); +		return -ENOMEM; +	}  	qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);  	qp->sq.max_gs = attr->cap.max_send_sge; -	qp->sq.max_post = 1 << ilog2(wq_size / wqe_size); +	qp->sq.max_post = wq_size / wqe_size; +	attr->cap.max_send_wr = qp->sq.max_post;  	return wq_size;  } @@ -330,14 +348,57 @@ static int qp_has_rq(struct ib_qp_init_attr *attr)  	return 1;  } +static int first_med_uuar(void) +{ +	return 1; +} + +static int next_uuar(int n) +{ +	n++; + +	while (((n % 4) & 2)) +		n++; + +	return n; +} + +static int num_med_uuar(struct mlx5_uuar_info *uuari) +{ +	int n; + +	n = uuari->num_uars * MLX5_NON_FP_BF_REGS_PER_PAGE - +		uuari->num_low_latency_uuars - 1; + +	return n >= 0 ? n : 0; +} + +static int max_uuari(struct mlx5_uuar_info *uuari) +{ +	return uuari->num_uars * 4; +} + +static int first_hi_uuar(struct mlx5_uuar_info *uuari) +{ +	int med; +	int i; +	int t; + +	med = num_med_uuar(uuari); +	for (t = 0, i = first_med_uuar();; i = next_uuar(i)) { +		t++; +		if (t == med) +			return next_uuar(i); +	} + +	return 0; +} +  static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari)  { -	int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE; -	int start_uuar;  	int i; -	start_uuar = nuuars - uuari->num_low_latency_uuars; -	for (i = start_uuar; i < nuuars; i++) { +	for (i = first_hi_uuar(uuari); i < max_uuari(uuari); i = next_uuar(i)) {  		if (!test_bit(i, uuari->bitmap)) {  			set_bit(i, uuari->bitmap);  			uuari->count[i]++; @@ -350,19 +411,10 @@ static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari)  static int alloc_med_class_uuar(struct mlx5_uuar_info *uuari)  { -	int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE; -	int minidx = 1; -	int uuarn; -	int end; +	int minidx = first_med_uuar();  	int i; -	end = nuuars - uuari->num_low_latency_uuars; - -	for (i = 1; i < end; i++) { -		uuarn = i & 3; -		if (uuarn == 2 || uuarn == 3) -			continue; - +	for (i = first_med_uuar(); i < first_hi_uuar(uuari); i = next_uuar(i)) {  		if (uuari->count[i] < uuari->count[minidx])  			minidx = i;  	} @@ -384,11 +436,17 @@ static int alloc_uuar(struct mlx5_uuar_info *uuari,  		break;  	case MLX5_IB_LATENCY_CLASS_MEDIUM: -		uuarn = alloc_med_class_uuar(uuari); +		if (uuari->ver < 2) +			uuarn = -ENOMEM; +		else +			uuarn = alloc_med_class_uuar(uuari);  		break;  	case MLX5_IB_LATENCY_CLASS_HIGH: -		uuarn = alloc_high_class_uuar(uuari); +		if (uuari->ver < 2) +			uuarn = -ENOMEM; +		else +			uuarn = alloc_high_class_uuar(uuari);  		break;  	case MLX5_IB_LATENCY_CLASS_FAST_PATH: @@ -479,12 +537,12 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,  {  	struct mlx5_ib_ucontext *context;  	struct mlx5_ib_create_qp ucmd; -	int page_shift; +	int page_shift = 0;  	int uar_index;  	int npages; -	u32 offset; +	u32 offset = 0;  	int uuarn; -	int ncont; +	int ncont = 0;  	int err;  	err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); @@ -500,38 +558,53 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,  	uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);  	if (uuarn < 0) {  		mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n"); -		mlx5_ib_dbg(dev, "reverting to high latency\n"); -		uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW); +		mlx5_ib_dbg(dev, "reverting to medium latency\n"); +		uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM);  		if (uuarn < 0) { -			mlx5_ib_dbg(dev, "uuar allocation failed\n"); -			return uuarn; +			mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n"); +			mlx5_ib_dbg(dev, "reverting to high latency\n"); +			uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW); +			if (uuarn < 0) { +				mlx5_ib_warn(dev, "uuar allocation failed\n"); +				return uuarn; +			}  		}  	}  	uar_index = uuarn_to_uar_index(&context->uuari, uuarn);  	mlx5_ib_dbg(dev, "uuarn 0x%x, uar_index 0x%x\n", uuarn, uar_index); +	qp->rq.offset = 0; +	qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB); +	qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; +  	err = set_user_buf_size(dev, qp, &ucmd);  	if (err)  		goto err_uuar; -	qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, -			       qp->buf_size, 0, 0); -	if (IS_ERR(qp->umem)) { -		mlx5_ib_dbg(dev, "umem_get failed\n"); -		err = PTR_ERR(qp->umem); -		goto err_uuar; +	if (ucmd.buf_addr && qp->buf_size) { +		qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, +				       qp->buf_size, 0, 0); +		if (IS_ERR(qp->umem)) { +			mlx5_ib_dbg(dev, "umem_get failed\n"); +			err = PTR_ERR(qp->umem); +			goto err_uuar; +		} +	} else { +		qp->umem = NULL;  	} -	mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift, -			   &ncont, NULL); -	err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset); -	if (err) { -		mlx5_ib_warn(dev, "bad offset\n"); -		goto err_umem; +	if (qp->umem) { +		mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift, +				   &ncont, NULL); +		err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset); +		if (err) { +			mlx5_ib_warn(dev, "bad offset\n"); +			goto err_umem; +		} +		mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n", +			    ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset);  	} -	mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n", -		    ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset);  	*inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;  	*in = mlx5_vzalloc(*inlen); @@ -539,9 +612,10 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,  		err = -ENOMEM;  		goto err_umem;  	} -	mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0); +	if (qp->umem) +		mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);  	(*in)->ctx.log_pg_sz_remote_qpn = -		cpu_to_be32((page_shift - PAGE_SHIFT) << 24); +		cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);  	(*in)->ctx.params2 = cpu_to_be32(offset << 6);  	(*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index); @@ -570,7 +644,8 @@ err_free:  	mlx5_vfree(*in);  err_umem: -	ib_umem_release(qp->umem); +	if (qp->umem) +		ib_umem_release(qp->umem);  err_uuar:  	free_uuar(&context->uuari, uuarn); @@ -583,7 +658,8 @@ static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp)  	context = to_mucontext(pd->uobject->context);  	mlx5_ib_db_unmap_user(context, &qp->db); -	ib_umem_release(qp->umem); +	if (qp->umem) +		ib_umem_release(qp->umem);  	free_uuar(&context->uuari, qp->uuarn);  } @@ -599,8 +675,8 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,  	int err;  	uuari = &dev->mdev.priv.uuari; -	if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) -		qp->flags |= MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK; +	if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN | IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)) +		return -EINVAL;  	if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR)  		lc = MLX5_IB_LATENCY_CLASS_FAST_PATH; @@ -638,7 +714,8 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,  		goto err_buf;  	}  	(*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index); -	(*in)->ctx.log_pg_sz_remote_qpn = cpu_to_be32((qp->buf.page_shift - PAGE_SHIFT) << 24); +	(*in)->ctx.log_pg_sz_remote_qpn = +		cpu_to_be32((qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);  	/* Set "fast registration enabled" for all kernel QPs */  	(*in)->ctx.params1 |= cpu_to_be32(1 << 11);  	(*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4); @@ -734,6 +811,15 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,  	spin_lock_init(&qp->sq.lock);  	spin_lock_init(&qp->rq.lock); +	if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { +		if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_BLOCK_MCAST)) { +			mlx5_ib_dbg(dev, "block multicast loopback isn't supported\n"); +			return -EINVAL; +		} else { +			qp->flags |= MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK; +		} +	} +  	if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)  		qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE; @@ -805,6 +891,9 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,  	if (qp->wq_sig)  		in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_ENABLE_SIG); +	if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK) +		in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_BLOCK_MCAST); +  	if (qp->scat_cqe && is_connected(init_attr->qp_type)) {  		int rcqe_sz;  		int scqe_sz; @@ -1280,6 +1369,11 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q  					  MLX5_QP_OPTPAR_Q_KEY,  			[MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_PKEY_INDEX	|  					   MLX5_QP_OPTPAR_Q_KEY, +			[MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | +					  MLX5_QP_OPTPAR_RRE            | +					  MLX5_QP_OPTPAR_RAE            | +					  MLX5_QP_OPTPAR_RWE            | +					  MLX5_QP_OPTPAR_PKEY_INDEX,  		},  	},  	[MLX5_QP_STATE_RTR] = { @@ -1302,9 +1396,11 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q  					  MLX5_QP_OPTPAR_RAE		|  					  MLX5_QP_OPTPAR_RWE		|  					  MLX5_QP_OPTPAR_RNR_TIMEOUT	| -					  MLX5_QP_OPTPAR_PM_STATE, +					  MLX5_QP_OPTPAR_PM_STATE	| +					  MLX5_QP_OPTPAR_ALT_ADDR_PATH,  			[MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE		| -					  MLX5_QP_OPTPAR_PM_STATE, +					  MLX5_QP_OPTPAR_PM_STATE	| +					  MLX5_QP_OPTPAR_ALT_ADDR_PATH,  			[MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY		|  					  MLX5_QP_OPTPAR_SRQN		|  					  MLX5_QP_OPTPAR_CQN_RCV, @@ -1314,6 +1410,11 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q  		[MLX5_QP_STATE_RTS] = {  			[MLX5_QP_ST_UD]	 = MLX5_QP_OPTPAR_Q_KEY,  			[MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_Q_KEY, +			[MLX5_QP_ST_UC]	 = MLX5_QP_OPTPAR_RWE, +			[MLX5_QP_ST_RC]	 = MLX5_QP_OPTPAR_RNR_TIMEOUT	| +					   MLX5_QP_OPTPAR_RWE		| +					   MLX5_QP_OPTPAR_RAE		| +					   MLX5_QP_OPTPAR_RRE,  		},  	},  }; @@ -1530,7 +1631,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,  	mlx5_cur = to_mlx5_state(cur_state);  	mlx5_new = to_mlx5_state(new_state);  	mlx5_st = to_mlx5_st(ibqp->qp_type); -	if (mlx5_cur < 0 || mlx5_new < 0 || mlx5_st < 0) +	if (mlx5_st < 0)  		goto out;  	optpar = ib_mask_to_mlx5_opt(attr_mask); @@ -1593,7 +1694,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,  	new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;  	if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR && -	    !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) +	    !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask, +				IB_LINK_LAYER_UNSPECIFIED))  		goto out;  	if ((attr_mask & IB_QP_PORT) && @@ -1651,29 +1753,6 @@ static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,  	rseg->reserved = 0;  } -static void set_atomic_seg(struct mlx5_wqe_atomic_seg *aseg, struct ib_send_wr *wr) -{ -	if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { -		aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); -		aseg->compare  = cpu_to_be64(wr->wr.atomic.compare_add); -	} else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) { -		aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); -		aseg->compare  = cpu_to_be64(wr->wr.atomic.compare_add_mask); -	} else { -		aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); -		aseg->compare  = 0; -	} -} - -static void set_masked_atomic_seg(struct mlx5_wqe_masked_atomic_seg *aseg, -				  struct ib_send_wr *wr) -{ -	aseg->swap_add		= cpu_to_be64(wr->wr.atomic.swap); -	aseg->swap_add_mask	= cpu_to_be64(wr->wr.atomic.swap_mask); -	aseg->compare		= cpu_to_be64(wr->wr.atomic.compare_add); -	aseg->compare_mask	= cpu_to_be64(wr->wr.atomic.compare_add_mask); -} -  static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,  			     struct ib_send_wr *wr)  { @@ -1714,6 +1793,27 @@ static __be64 frwr_mkey_mask(void)  	return cpu_to_be64(result);  } +static __be64 sig_mkey_mask(void) +{ +	u64 result; + +	result = MLX5_MKEY_MASK_LEN		| +		MLX5_MKEY_MASK_PAGE_SIZE	| +		MLX5_MKEY_MASK_START_ADDR	| +		MLX5_MKEY_MASK_EN_SIGERR	| +		MLX5_MKEY_MASK_EN_RINVAL	| +		MLX5_MKEY_MASK_KEY		| +		MLX5_MKEY_MASK_LR		| +		MLX5_MKEY_MASK_LW		| +		MLX5_MKEY_MASK_RR		| +		MLX5_MKEY_MASK_RW		| +		MLX5_MKEY_MASK_SMALL_FENCE	| +		MLX5_MKEY_MASK_FREE		| +		MLX5_MKEY_MASK_BSF_EN; + +	return cpu_to_be64(result); +} +  static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,  				 struct ib_send_wr *wr, int li)  { @@ -1747,6 +1847,7 @@ static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,  			MLX5_MKEY_MASK_PD		|  			MLX5_MKEY_MASK_LR		|  			MLX5_MKEY_MASK_LW		| +			MLX5_MKEY_MASK_KEY		|  			MLX5_MKEY_MASK_RR		|  			MLX5_MKEY_MASK_RW		|  			MLX5_MKEY_MASK_A		| @@ -1768,7 +1869,7 @@ static u8 get_umr_flags(int acc)  	       (acc & IB_ACCESS_REMOTE_WRITE  ? MLX5_PERM_REMOTE_WRITE : 0) |  	       (acc & IB_ACCESS_REMOTE_READ   ? MLX5_PERM_REMOTE_READ  : 0) |  	       (acc & IB_ACCESS_LOCAL_WRITE   ? MLX5_PERM_LOCAL_WRITE  : 0) | -		MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT; +		MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN;  }  static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr, @@ -1780,7 +1881,8 @@ static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr,  		return;  	} -	seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags); +	seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags) | +		     MLX5_ACCESS_MODE_MTT;  	*writ = seg->flags & (MLX5_PERM_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE);  	seg->qpn_mkey7_0 = cpu_to_be32((wr->wr.fast_reg.rkey & 0xff) | 0xffffff00);  	seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL); @@ -1803,7 +1905,8 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *w  	seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);  	seg->len = cpu_to_be64(wr->wr.fast_reg.length);  	seg->log2_page_size = wr->wr.fast_reg.page_shift; -	seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); +	seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 | +				       mlx5_mkey_variant(wr->wr.fast_reg.rkey));  }  static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg, @@ -1895,6 +1998,342 @@ static int set_data_inl_seg(struct mlx5_ib_qp *qp, struct ib_send_wr *wr,  	return 0;  } +static u16 prot_field_size(enum ib_signature_type type) +{ +	switch (type) { +	case IB_SIG_TYPE_T10_DIF: +		return MLX5_DIF_SIZE; +	default: +		return 0; +	} +} + +static u8 bs_selector(int block_size) +{ +	switch (block_size) { +	case 512:	    return 0x1; +	case 520:	    return 0x2; +	case 4096:	    return 0x3; +	case 4160:	    return 0x4; +	case 1073741824:    return 0x5; +	default:	    return 0; +	} +} + +static int format_selector(struct ib_sig_attrs *attr, +			   struct ib_sig_domain *domain, +			   int *selector) +{ + +#define FORMAT_DIF_NONE		0 +#define FORMAT_DIF_CRC_INC	8 +#define FORMAT_DIF_CRC_NO_INC	12 +#define FORMAT_DIF_CSUM_INC	13 +#define FORMAT_DIF_CSUM_NO_INC	14 + +	switch (domain->sig.dif.type) { +	case IB_T10DIF_NONE: +		/* No DIF */ +		*selector = FORMAT_DIF_NONE; +		break; +	case IB_T10DIF_TYPE1: /* Fall through */ +	case IB_T10DIF_TYPE2: +		switch (domain->sig.dif.bg_type) { +		case IB_T10DIF_CRC: +			*selector = FORMAT_DIF_CRC_INC; +			break; +		case IB_T10DIF_CSUM: +			*selector = FORMAT_DIF_CSUM_INC; +			break; +		default: +			return 1; +		} +		break; +	case IB_T10DIF_TYPE3: +		switch (domain->sig.dif.bg_type) { +		case IB_T10DIF_CRC: +			*selector = domain->sig.dif.type3_inc_reftag ? +					   FORMAT_DIF_CRC_INC : +					   FORMAT_DIF_CRC_NO_INC; +			break; +		case IB_T10DIF_CSUM: +			*selector = domain->sig.dif.type3_inc_reftag ? +					   FORMAT_DIF_CSUM_INC : +					   FORMAT_DIF_CSUM_NO_INC; +			break; +		default: +			return 1; +		} +		break; +	default: +		return 1; +	} + +	return 0; +} + +static int mlx5_set_bsf(struct ib_mr *sig_mr, +			struct ib_sig_attrs *sig_attrs, +			struct mlx5_bsf *bsf, u32 data_size) +{ +	struct mlx5_core_sig_ctx *msig = to_mmr(sig_mr)->sig; +	struct mlx5_bsf_basic *basic = &bsf->basic; +	struct ib_sig_domain *mem = &sig_attrs->mem; +	struct ib_sig_domain *wire = &sig_attrs->wire; +	int ret, selector; + +	memset(bsf, 0, sizeof(*bsf)); +	switch (sig_attrs->mem.sig_type) { +	case IB_SIG_TYPE_T10_DIF: +		if (sig_attrs->wire.sig_type != IB_SIG_TYPE_T10_DIF) +			return -EINVAL; + +		/* Input domain check byte mask */ +		basic->check_byte_mask = sig_attrs->check_mask; +		if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval && +		    mem->sig.dif.type == wire->sig.dif.type) { +			/* Same block structure */ +			basic->bsf_size_sbs = 1 << 4; +			if (mem->sig.dif.bg_type == wire->sig.dif.bg_type) +				basic->wire.copy_byte_mask |= 0xc0; +			if (mem->sig.dif.app_tag == wire->sig.dif.app_tag) +				basic->wire.copy_byte_mask |= 0x30; +			if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag) +				basic->wire.copy_byte_mask |= 0x0f; +		} else +			basic->wire.bs_selector = bs_selector(wire->sig.dif.pi_interval); + +		basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval); +		basic->raw_data_size = cpu_to_be32(data_size); + +		ret = format_selector(sig_attrs, mem, &selector); +		if (ret) +			return -EINVAL; +		basic->m_bfs_psv = cpu_to_be32(selector << 24 | +					       msig->psv_memory.psv_idx); + +		ret = format_selector(sig_attrs, wire, &selector); +		if (ret) +			return -EINVAL; +		basic->w_bfs_psv = cpu_to_be32(selector << 24 | +					       msig->psv_wire.psv_idx); +		break; + +	default: +		return -EINVAL; +	} + +	return 0; +} + +static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, +				void **seg, int *size) +{ +	struct ib_sig_attrs *sig_attrs = wr->wr.sig_handover.sig_attrs; +	struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr; +	struct mlx5_bsf *bsf; +	u32 data_len = wr->sg_list->length; +	u32 data_key = wr->sg_list->lkey; +	u64 data_va = wr->sg_list->addr; +	int ret; +	int wqe_size; + +	if (!wr->wr.sig_handover.prot || +	    (data_key == wr->wr.sig_handover.prot->lkey && +	     data_va == wr->wr.sig_handover.prot->addr && +	     data_len == wr->wr.sig_handover.prot->length)) { +		/** +		 * Source domain doesn't contain signature information +		 * or data and protection are interleaved in memory. +		 * So need construct: +		 *                  ------------------ +		 *                 |     data_klm     | +		 *                  ------------------ +		 *                 |       BSF        | +		 *                  ------------------ +		 **/ +		struct mlx5_klm *data_klm = *seg; + +		data_klm->bcount = cpu_to_be32(data_len); +		data_klm->key = cpu_to_be32(data_key); +		data_klm->va = cpu_to_be64(data_va); +		wqe_size = ALIGN(sizeof(*data_klm), 64); +	} else { +		/** +		 * Source domain contains signature information +		 * So need construct a strided block format: +		 *               --------------------------- +		 *              |     stride_block_ctrl     | +		 *               --------------------------- +		 *              |          data_klm         | +		 *               --------------------------- +		 *              |          prot_klm         | +		 *               --------------------------- +		 *              |             BSF           | +		 *               --------------------------- +		 **/ +		struct mlx5_stride_block_ctrl_seg *sblock_ctrl; +		struct mlx5_stride_block_entry *data_sentry; +		struct mlx5_stride_block_entry *prot_sentry; +		u32 prot_key = wr->wr.sig_handover.prot->lkey; +		u64 prot_va = wr->wr.sig_handover.prot->addr; +		u16 block_size = sig_attrs->mem.sig.dif.pi_interval; +		int prot_size; + +		sblock_ctrl = *seg; +		data_sentry = (void *)sblock_ctrl + sizeof(*sblock_ctrl); +		prot_sentry = (void *)data_sentry + sizeof(*data_sentry); + +		prot_size = prot_field_size(sig_attrs->mem.sig_type); +		if (!prot_size) { +			pr_err("Bad block size given: %u\n", block_size); +			return -EINVAL; +		} +		sblock_ctrl->bcount_per_cycle = cpu_to_be32(block_size + +							    prot_size); +		sblock_ctrl->op = cpu_to_be32(MLX5_STRIDE_BLOCK_OP); +		sblock_ctrl->repeat_count = cpu_to_be32(data_len / block_size); +		sblock_ctrl->num_entries = cpu_to_be16(2); + +		data_sentry->bcount = cpu_to_be16(block_size); +		data_sentry->key = cpu_to_be32(data_key); +		data_sentry->va = cpu_to_be64(data_va); +		data_sentry->stride = cpu_to_be16(block_size); + +		prot_sentry->bcount = cpu_to_be16(prot_size); +		prot_sentry->key = cpu_to_be32(prot_key); +		prot_sentry->va = cpu_to_be64(prot_va); +		prot_sentry->stride = cpu_to_be16(prot_size); + +		wqe_size = ALIGN(sizeof(*sblock_ctrl) + sizeof(*data_sentry) + +				 sizeof(*prot_sentry), 64); +	} + +	*seg += wqe_size; +	*size += wqe_size / 16; +	if (unlikely((*seg == qp->sq.qend))) +		*seg = mlx5_get_send_wqe(qp, 0); + +	bsf = *seg; +	ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len); +	if (ret) +		return -EINVAL; + +	*seg += sizeof(*bsf); +	*size += sizeof(*bsf) / 16; +	if (unlikely((*seg == qp->sq.qend))) +		*seg = mlx5_get_send_wqe(qp, 0); + +	return 0; +} + +static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, +				 struct ib_send_wr *wr, u32 nelements, +				 u32 length, u32 pdn) +{ +	struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr; +	u32 sig_key = sig_mr->rkey; +	u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1; + +	memset(seg, 0, sizeof(*seg)); + +	seg->flags = get_umr_flags(wr->wr.sig_handover.access_flags) | +				   MLX5_ACCESS_MODE_KLM; +	seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00); +	seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 | +				    MLX5_MKEY_BSF_EN | pdn); +	seg->len = cpu_to_be64(length); +	seg->xlt_oct_size = cpu_to_be32(be16_to_cpu(get_klm_octo(nelements))); +	seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE); +} + +static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, +				struct ib_send_wr *wr, u32 nelements) +{ +	memset(umr, 0, sizeof(*umr)); + +	umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE; +	umr->klm_octowords = get_klm_octo(nelements); +	umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE); +	umr->mkey_mask = sig_mkey_mask(); +} + + +static int set_sig_umr_wr(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, +			  void **seg, int *size) +{ +	struct mlx5_ib_mr *sig_mr = to_mmr(wr->wr.sig_handover.sig_mr); +	u32 pdn = get_pd(qp)->pdn; +	u32 klm_oct_size; +	int region_len, ret; + +	if (unlikely(wr->num_sge != 1) || +	    unlikely(wr->wr.sig_handover.access_flags & +		     IB_ACCESS_REMOTE_ATOMIC) || +	    unlikely(!sig_mr->sig) || unlikely(!qp->signature_en) || +	    unlikely(!sig_mr->sig->sig_status_checked)) +		return -EINVAL; + +	/* length of the protected region, data + protection */ +	region_len = wr->sg_list->length; +	if (wr->wr.sig_handover.prot && +	    (wr->wr.sig_handover.prot->lkey != wr->sg_list->lkey  || +	     wr->wr.sig_handover.prot->addr != wr->sg_list->addr  || +	     wr->wr.sig_handover.prot->length != wr->sg_list->length)) +		region_len += wr->wr.sig_handover.prot->length; + +	/** +	 * KLM octoword size - if protection was provided +	 * then we use strided block format (3 octowords), +	 * else we use single KLM (1 octoword) +	 **/ +	klm_oct_size = wr->wr.sig_handover.prot ? 3 : 1; + +	set_sig_umr_segment(*seg, wr, klm_oct_size); +	*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); +	*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; +	if (unlikely((*seg == qp->sq.qend))) +		*seg = mlx5_get_send_wqe(qp, 0); + +	set_sig_mkey_segment(*seg, wr, klm_oct_size, region_len, pdn); +	*seg += sizeof(struct mlx5_mkey_seg); +	*size += sizeof(struct mlx5_mkey_seg) / 16; +	if (unlikely((*seg == qp->sq.qend))) +		*seg = mlx5_get_send_wqe(qp, 0); + +	ret = set_sig_data_segment(wr, qp, seg, size); +	if (ret) +		return ret; + +	sig_mr->sig->sig_status_checked = false; +	return 0; +} + +static int set_psv_wr(struct ib_sig_domain *domain, +		      u32 psv_idx, void **seg, int *size) +{ +	struct mlx5_seg_set_psv *psv_seg = *seg; + +	memset(psv_seg, 0, sizeof(*psv_seg)); +	psv_seg->psv_num = cpu_to_be32(psv_idx); +	switch (domain->sig_type) { +	case IB_SIG_TYPE_T10_DIF: +		psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 | +						     domain->sig.dif.app_tag); +		psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag); + +		*seg += sizeof(*psv_seg); +		*size += sizeof(*psv_seg) / 16; +		break; + +	default: +		pr_err("Bad signature type given.\n"); +		return 1; +	} + +	return 0; +} +  static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size,  			  struct mlx5_core_dev *mdev, struct mlx5_ib_pd *pd, struct mlx5_ib_qp *qp)  { @@ -1916,6 +2355,10 @@ static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size,  	if (unlikely((*seg == qp->sq.qend)))  		*seg = mlx5_get_send_wqe(qp, 0);  	if (!li) { +		if (unlikely(wr->wr.fast_reg.page_list_len > +			     wr->wr.fast_reg.page_list->max_page_list_len)) +			return	-ENOMEM; +  		set_frwr_pages(*seg, wr, mdev, pd, writ);  		*seg += sizeof(struct mlx5_wqe_data_seg);  		*size += (sizeof(struct mlx5_wqe_data_seg) / 16); @@ -1978,6 +2421,59 @@ static u8 get_fence(u8 fence, struct ib_send_wr *wr)  	}  } +static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, +		     struct mlx5_wqe_ctrl_seg **ctrl, +		     struct ib_send_wr *wr, int *idx, +		     int *size, int nreq) +{ +	int err = 0; + +	if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) { +		err = -ENOMEM; +		return err; +	} + +	*idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); +	*seg = mlx5_get_send_wqe(qp, *idx); +	*ctrl = *seg; +	*(uint32_t *)(*seg + 8) = 0; +	(*ctrl)->imm = send_ieth(wr); +	(*ctrl)->fm_ce_se = qp->sq_signal_bits | +		(wr->send_flags & IB_SEND_SIGNALED ? +		 MLX5_WQE_CTRL_CQ_UPDATE : 0) | +		(wr->send_flags & IB_SEND_SOLICITED ? +		 MLX5_WQE_CTRL_SOLICITED : 0); + +	*seg += sizeof(**ctrl); +	*size = sizeof(**ctrl) / 16; + +	return err; +} + +static void finish_wqe(struct mlx5_ib_qp *qp, +		       struct mlx5_wqe_ctrl_seg *ctrl, +		       u8 size, unsigned idx, u64 wr_id, +		       int nreq, u8 fence, u8 next_fence, +		       u32 mlx5_opcode) +{ +	u8 opmod = 0; + +	ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | +					     mlx5_opcode | ((u32)opmod << 24)); +	ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8)); +	ctrl->fm_ce_se |= fence; +	qp->fm_cache = next_fence; +	if (unlikely(qp->wq_sig)) +		ctrl->signature = wq_sig(ctrl); + +	qp->sq.wrid[idx] = wr_id; +	qp->sq.w_list[idx].opcode = mlx5_opcode; +	qp->sq.wqe_head[idx] = qp->sq.head + nreq; +	qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB); +	qp->sq.w_list[idx].next = qp->sq.cur_post; +} + +  int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,  		      struct ib_send_wr **bad_wr)  { @@ -1985,13 +2481,13 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,  	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);  	struct mlx5_core_dev *mdev = &dev->mdev;  	struct mlx5_ib_qp *qp = to_mqp(ibqp); +	struct mlx5_ib_mr *mr;  	struct mlx5_wqe_data_seg *dpseg;  	struct mlx5_wqe_xrc_seg *xrc;  	struct mlx5_bf *bf = qp->bf;  	int uninitialized_var(size);  	void *qend = qp->sq.qend;  	unsigned long flags; -	u32 mlx5_opcode;  	unsigned idx;  	int err = 0;  	int inl = 0; @@ -2000,7 +2496,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,  	int nreq;  	int i;  	u8 next_fence = 0; -	u8 opmod = 0;  	u8 fence;  	spin_lock_irqsave(&qp->sq.lock, flags); @@ -2013,36 +2508,23 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,  			goto out;  		} -		if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) { +		fence = qp->fm_cache; +		num_sge = wr->num_sge; +		if (unlikely(num_sge > qp->sq.max_gs)) {  			mlx5_ib_warn(dev, "\n");  			err = -ENOMEM;  			*bad_wr = wr;  			goto out;  		} -		fence = qp->fm_cache; -		num_sge = wr->num_sge; -		if (unlikely(num_sge > qp->sq.max_gs)) { +		err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq); +		if (err) {  			mlx5_ib_warn(dev, "\n");  			err = -ENOMEM;  			*bad_wr = wr;  			goto out;  		} -		idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); -		seg = mlx5_get_send_wqe(qp, idx); -		ctrl = seg; -		*(uint32_t *)(seg + 8) = 0; -		ctrl->imm = send_ieth(wr); -		ctrl->fm_ce_se = qp->sq_signal_bits | -			(wr->send_flags & IB_SEND_SIGNALED ? -			 MLX5_WQE_CTRL_CQ_UPDATE : 0) | -			(wr->send_flags & IB_SEND_SOLICITED ? -			 MLX5_WQE_CTRL_SOLICITED : 0); - -		seg += sizeof(*ctrl); -		size = sizeof(*ctrl) / 16; -  		switch (ibqp->qp_type) {  		case IB_QPT_XRC_INI:  			xrc = seg; @@ -2063,28 +2545,11 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,  			case IB_WR_ATOMIC_CMP_AND_SWP:  			case IB_WR_ATOMIC_FETCH_AND_ADD: -				set_raddr_seg(seg, wr->wr.atomic.remote_addr, -					      wr->wr.atomic.rkey); -				seg  += sizeof(struct mlx5_wqe_raddr_seg); - -				set_atomic_seg(seg, wr); -				seg  += sizeof(struct mlx5_wqe_atomic_seg); - -				size += (sizeof(struct mlx5_wqe_raddr_seg) + -					 sizeof(struct mlx5_wqe_atomic_seg)) / 16; -				break; -  			case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: -				set_raddr_seg(seg, wr->wr.atomic.remote_addr, -					      wr->wr.atomic.rkey); -				seg  += sizeof(struct mlx5_wqe_raddr_seg); - -				set_masked_atomic_seg(seg, wr); -				seg  += sizeof(struct mlx5_wqe_masked_atomic_seg); - -				size += (sizeof(struct mlx5_wqe_raddr_seg) + -					 sizeof(struct mlx5_wqe_masked_atomic_seg)) / 16; -				break; +				mlx5_ib_warn(dev, "Atomic operations are not supported yet\n"); +				err = -ENOSYS; +				*bad_wr = wr; +				goto out;  			case IB_WR_LOCAL_INV:  				next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; @@ -2112,6 +2577,73 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,  				num_sge = 0;  				break; +			case IB_WR_REG_SIG_MR: +				qp->sq.wr_data[idx] = IB_WR_REG_SIG_MR; +				mr = to_mmr(wr->wr.sig_handover.sig_mr); + +				ctrl->imm = cpu_to_be32(mr->ibmr.rkey); +				err = set_sig_umr_wr(wr, qp, &seg, &size); +				if (err) { +					mlx5_ib_warn(dev, "\n"); +					*bad_wr = wr; +					goto out; +				} + +				finish_wqe(qp, ctrl, size, idx, wr->wr_id, +					   nreq, get_fence(fence, wr), +					   next_fence, MLX5_OPCODE_UMR); +				/* +				 * SET_PSV WQEs are not signaled and solicited +				 * on error +				 */ +				wr->send_flags &= ~IB_SEND_SIGNALED; +				wr->send_flags |= IB_SEND_SOLICITED; +				err = begin_wqe(qp, &seg, &ctrl, wr, +						&idx, &size, nreq); +				if (err) { +					mlx5_ib_warn(dev, "\n"); +					err = -ENOMEM; +					*bad_wr = wr; +					goto out; +				} + +				err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->mem, +						 mr->sig->psv_memory.psv_idx, &seg, +						 &size); +				if (err) { +					mlx5_ib_warn(dev, "\n"); +					*bad_wr = wr; +					goto out; +				} + +				finish_wqe(qp, ctrl, size, idx, wr->wr_id, +					   nreq, get_fence(fence, wr), +					   next_fence, MLX5_OPCODE_SET_PSV); +				err = begin_wqe(qp, &seg, &ctrl, wr, +						&idx, &size, nreq); +				if (err) { +					mlx5_ib_warn(dev, "\n"); +					err = -ENOMEM; +					*bad_wr = wr; +					goto out; +				} + +				next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; +				err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->wire, +						 mr->sig->psv_wire.psv_idx, &seg, +						 &size); +				if (err) { +					mlx5_ib_warn(dev, "\n"); +					*bad_wr = wr; +					goto out; +				} + +				finish_wqe(qp, ctrl, size, idx, wr->wr_id, +					   nreq, get_fence(fence, wr), +					   next_fence, MLX5_OPCODE_SET_PSV); +				num_sge = 0; +				goto skip_psv; +  			default:  				break;  			} @@ -2192,22 +2724,10 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,  			}  		} -		mlx5_opcode = mlx5_ib_opcode[wr->opcode]; -		ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8)	| -						     mlx5_opcode			| -						     ((u32)opmod << 24)); -		ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8)); -		ctrl->fm_ce_se |= get_fence(fence, wr); -		qp->fm_cache = next_fence; -		if (unlikely(qp->wq_sig)) -			ctrl->signature = wq_sig(ctrl); - -		qp->sq.wrid[idx] = wr->wr_id; -		qp->sq.w_list[idx].opcode = mlx5_opcode; -		qp->sq.wqe_head[idx] = qp->sq.head + nreq; -		qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB); -		qp->sq.w_list[idx].next = qp->sq.cur_post; - +		finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, +			   get_fence(fence, wr), next_fence, +			   mlx5_ib_opcode[wr->opcode]); +skip_psv:  		if (0)  			dump_wqe(qp, idx, size);  	} @@ -2223,6 +2743,10 @@ out:  		qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post); +		/* Make sure doorbell record is visible to the HCA before +		 * we hit doorbell */ +		wmb(); +  		if (bf->need_lock)  			spin_lock(&bf->lock); diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 84d297afd6a..384af6dec5e 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -35,6 +35,7 @@  #include <linux/mlx5/srq.h>  #include <linux/slab.h>  #include <rdma/ib_umem.h> +#include <rdma/ib_user_verbs.h>  #include "mlx5_ib.h"  #include "user.h" @@ -78,16 +79,27 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,  {  	struct mlx5_ib_dev *dev = to_mdev(pd->device);  	struct mlx5_ib_create_srq ucmd; +	size_t ucmdlen;  	int err;  	int npages;  	int page_shift;  	int ncont;  	u32 offset; -	if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { +	ucmdlen = +		(udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) < +		 sizeof(ucmd)) ? (sizeof(ucmd) - +				  sizeof(ucmd.reserved)) : sizeof(ucmd); + +	if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) {  		mlx5_ib_dbg(dev, "failed copy udata\n");  		return -EFAULT;  	} + +	if (ucmdlen == sizeof(ucmd) && +	    ucmd.reserved != 0) +		return -EINVAL; +  	srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE);  	srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size, @@ -123,7 +135,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,  		goto err_in;  	} -	(*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT; +	(*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;  	(*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);  	return 0; @@ -192,7 +204,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,  	}  	srq->wq_sig = !!srq_signature; -	(*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT; +	(*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;  	return 0; @@ -295,7 +307,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,  	mlx5_vfree(in);  	if (err) {  		mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err); -		goto err_srq; +		goto err_usr_kern_srq;  	}  	mlx5_ib_dbg(dev, "create SRQ with srqn 0x%x\n", srq->msrq.srqn); @@ -316,6 +328,8 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,  err_core:  	mlx5_core_destroy_srq(&dev->mdev, &srq->msrq); + +err_usr_kern_srq:  	if (pd->uobject)  		destroy_srq_user(pd, srq);  	else @@ -388,9 +402,7 @@ int mlx5_ib_destroy_srq(struct ib_srq *srq)  		mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db);  		ib_umem_release(msrq->umem);  	} else { -		kfree(msrq->wrid); -		mlx5_buf_free(&dev->mdev, &msrq->buf); -		mlx5_db_free(&dev->mdev, &msrq->db); +		destroy_srq_kernel(dev, msrq);  	}  	kfree(srq); diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h index a886de3e593..d0ba264ac1e 100644 --- a/drivers/infiniband/hw/mlx5/user.h +++ b/drivers/infiniband/hw/mlx5/user.h @@ -62,6 +62,13 @@ struct mlx5_ib_alloc_ucontext_req {  	__u32	num_low_latency_uuars;  }; +struct mlx5_ib_alloc_ucontext_req_v2 { +	__u32	total_num_uuars; +	__u32	num_low_latency_uuars; +	__u32	flags; +	__u32	reserved; +}; +  struct mlx5_ib_alloc_ucontext_resp {  	__u32	qp_tab_size;  	__u32	bf_reg_size; @@ -84,6 +91,7 @@ struct mlx5_ib_create_cq {  	__u64	buf_addr;  	__u64	db_addr;  	__u32	cqe_size; +	__u32	reserved; /* explicit padding (optional on i386) */  };  struct mlx5_ib_create_cq_resp { @@ -93,12 +101,16 @@ struct mlx5_ib_create_cq_resp {  struct mlx5_ib_resize_cq {  	__u64	buf_addr; +	__u16	cqe_size; +	__u16	reserved0; +	__u32	reserved1;  };  struct mlx5_ib_create_srq {  	__u64	buf_addr;  	__u64	db_addr;  	__u32	flags; +	__u32	reserved; /* explicit padding (optional on i386) */  };  struct mlx5_ib_create_srq_resp {  | 
