diff options
Diffstat (limited to 'drivers/infiniband/ulp/srp')
| -rw-r--r-- | drivers/infiniband/ulp/srp/ib_srp.c | 1206 | ||||
| -rw-r--r-- | drivers/infiniband/ulp/srp/ib_srp.h | 116 | 
2 files changed, 1029 insertions, 293 deletions
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index f93baf8254c..e3c2c5b4297 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -30,7 +30,7 @@   * SOFTWARE.   */ -#define pr_fmt(fmt) PFX fmt +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt  #include <linux/module.h>  #include <linux/init.h> @@ -46,6 +46,7 @@  #include <scsi/scsi.h>  #include <scsi/scsi_device.h>  #include <scsi/scsi_dbg.h> +#include <scsi/scsi_tcq.h>  #include <scsi/srp.h>  #include <scsi/scsi_transport_srp.h> @@ -65,6 +66,8 @@ static unsigned int srp_sg_tablesize;  static unsigned int cmd_sg_entries;  static unsigned int indirect_sg_entries;  static bool allow_ext_sg; +static bool prefer_fr; +static bool register_always;  static int topspin_workarounds = 1;  module_param(srp_sg_tablesize, uint, 0444); @@ -86,6 +89,40 @@ module_param(topspin_workarounds, int, 0444);  MODULE_PARM_DESC(topspin_workarounds,  		 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0"); +module_param(prefer_fr, bool, 0444); +MODULE_PARM_DESC(prefer_fr, +"Whether to use fast registration if both FMR and fast registration are supported"); + +module_param(register_always, bool, 0444); +MODULE_PARM_DESC(register_always, +		 "Use memory registration even for contiguous memory regions"); + +static struct kernel_param_ops srp_tmo_ops; + +static int srp_reconnect_delay = 10; +module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay, +		S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts"); + +static int srp_fast_io_fail_tmo = 15; +module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo, +		S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(fast_io_fail_tmo, +		 "Number of seconds between the observation of a transport" +		 " layer error and failing all I/O. \"off\" means that this" +		 " functionality is disabled."); + +static int srp_dev_loss_tmo = 600; +module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo, +		S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(dev_loss_tmo, +		 "Maximum number of seconds that the SRP transport should" +		 " insulate transport layer errors. After this time has been" +		 " exceeded the SCSI host is removed. Should be" +		 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT) +		 " if fast_io_fail_tmo has not been set. \"off\" means that" +		 " this functionality is disabled."); +  static void srp_add_one(struct ib_device *device);  static void srp_remove_one(struct ib_device *device);  static void srp_recv_completion(struct ib_cq *cq, void *target_ptr); @@ -102,6 +139,48 @@ static struct ib_client srp_client = {  static struct ib_sa_client srp_sa_client; +static int srp_tmo_get(char *buffer, const struct kernel_param *kp) +{ +	int tmo = *(int *)kp->arg; + +	if (tmo >= 0) +		return sprintf(buffer, "%d", tmo); +	else +		return sprintf(buffer, "off"); +} + +static int srp_tmo_set(const char *val, const struct kernel_param *kp) +{ +	int tmo, res; + +	if (strncmp(val, "off", 3) != 0) { +		res = kstrtoint(val, 0, &tmo); +		if (res) +			goto out; +	} else { +		tmo = -1; +	} +	if (kp->arg == &srp_reconnect_delay) +		res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo, +				    srp_dev_loss_tmo); +	else if (kp->arg == &srp_fast_io_fail_tmo) +		res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo); +	else +		res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo, +				    tmo); +	if (res) +		goto out; +	*(int *)kp->arg = tmo; + +out: +	return res; +} + +static struct kernel_param_ops srp_tmo_ops = { +	.get = srp_tmo_get, +	.set = srp_tmo_set, +}; +  static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)  {  	return (struct srp_target_port *) host->hostdata; @@ -219,28 +298,174 @@ static int srp_new_cm_id(struct srp_target_port *target)  	return 0;  } +static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target) +{ +	struct srp_device *dev = target->srp_host->srp_dev; +	struct ib_fmr_pool_param fmr_param; + +	memset(&fmr_param, 0, sizeof(fmr_param)); +	fmr_param.pool_size	    = target->scsi_host->can_queue; +	fmr_param.dirty_watermark   = fmr_param.pool_size / 4; +	fmr_param.cache		    = 1; +	fmr_param.max_pages_per_fmr = dev->max_pages_per_mr; +	fmr_param.page_shift	    = ilog2(dev->mr_page_size); +	fmr_param.access	    = (IB_ACCESS_LOCAL_WRITE | +				       IB_ACCESS_REMOTE_WRITE | +				       IB_ACCESS_REMOTE_READ); + +	return ib_create_fmr_pool(dev->pd, &fmr_param); +} + +/** + * srp_destroy_fr_pool() - free the resources owned by a pool + * @pool: Fast registration pool to be destroyed. + */ +static void srp_destroy_fr_pool(struct srp_fr_pool *pool) +{ +	int i; +	struct srp_fr_desc *d; + +	if (!pool) +		return; + +	for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { +		if (d->frpl) +			ib_free_fast_reg_page_list(d->frpl); +		if (d->mr) +			ib_dereg_mr(d->mr); +	} +	kfree(pool); +} + +/** + * srp_create_fr_pool() - allocate and initialize a pool for fast registration + * @device:            IB device to allocate fast registration descriptors for. + * @pd:                Protection domain associated with the FR descriptors. + * @pool_size:         Number of descriptors to allocate. + * @max_page_list_len: Maximum fast registration work request page list length. + */ +static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device, +					      struct ib_pd *pd, int pool_size, +					      int max_page_list_len) +{ +	struct srp_fr_pool *pool; +	struct srp_fr_desc *d; +	struct ib_mr *mr; +	struct ib_fast_reg_page_list *frpl; +	int i, ret = -EINVAL; + +	if (pool_size <= 0) +		goto err; +	ret = -ENOMEM; +	pool = kzalloc(sizeof(struct srp_fr_pool) + +		       pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL); +	if (!pool) +		goto err; +	pool->size = pool_size; +	pool->max_page_list_len = max_page_list_len; +	spin_lock_init(&pool->lock); +	INIT_LIST_HEAD(&pool->free_list); + +	for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { +		mr = ib_alloc_fast_reg_mr(pd, max_page_list_len); +		if (IS_ERR(mr)) { +			ret = PTR_ERR(mr); +			goto destroy_pool; +		} +		d->mr = mr; +		frpl = ib_alloc_fast_reg_page_list(device, max_page_list_len); +		if (IS_ERR(frpl)) { +			ret = PTR_ERR(frpl); +			goto destroy_pool; +		} +		d->frpl = frpl; +		list_add_tail(&d->entry, &pool->free_list); +	} + +out: +	return pool; + +destroy_pool: +	srp_destroy_fr_pool(pool); + +err: +	pool = ERR_PTR(ret); +	goto out; +} + +/** + * srp_fr_pool_get() - obtain a descriptor suitable for fast registration + * @pool: Pool to obtain descriptor from. + */ +static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool) +{ +	struct srp_fr_desc *d = NULL; +	unsigned long flags; + +	spin_lock_irqsave(&pool->lock, flags); +	if (!list_empty(&pool->free_list)) { +		d = list_first_entry(&pool->free_list, typeof(*d), entry); +		list_del(&d->entry); +	} +	spin_unlock_irqrestore(&pool->lock, flags); + +	return d; +} + +/** + * srp_fr_pool_put() - put an FR descriptor back in the free list + * @pool: Pool the descriptor was allocated from. + * @desc: Pointer to an array of fast registration descriptor pointers. + * @n:    Number of descriptors to put back. + * + * Note: The caller must already have queued an invalidation request for + * desc->mr->rkey before calling this function. + */ +static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc, +			    int n) +{ +	unsigned long flags; +	int i; + +	spin_lock_irqsave(&pool->lock, flags); +	for (i = 0; i < n; i++) +		list_add(&desc[i]->entry, &pool->free_list); +	spin_unlock_irqrestore(&pool->lock, flags); +} + +static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target) +{ +	struct srp_device *dev = target->srp_host->srp_dev; + +	return srp_create_fr_pool(dev->dev, dev->pd, +				  target->scsi_host->can_queue, +				  dev->max_pages_per_mr); +} +  static int srp_create_target_ib(struct srp_target_port *target)  { +	struct srp_device *dev = target->srp_host->srp_dev;  	struct ib_qp_init_attr *init_attr;  	struct ib_cq *recv_cq, *send_cq;  	struct ib_qp *qp; +	struct ib_fmr_pool *fmr_pool = NULL; +	struct srp_fr_pool *fr_pool = NULL; +	const int m = 1 + dev->use_fast_reg;  	int ret;  	init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);  	if (!init_attr)  		return -ENOMEM; -	recv_cq = ib_create_cq(target->srp_host->srp_dev->dev, -			       srp_recv_completion, NULL, target, SRP_RQ_SIZE, -			       target->comp_vector); +	recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, target, +			       target->queue_size, target->comp_vector);  	if (IS_ERR(recv_cq)) {  		ret = PTR_ERR(recv_cq);  		goto err;  	} -	send_cq = ib_create_cq(target->srp_host->srp_dev->dev, -			       srp_send_completion, NULL, target, SRP_SQ_SIZE, -			       target->comp_vector); +	send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, target, +			       m * target->queue_size, target->comp_vector);  	if (IS_ERR(send_cq)) {  		ret = PTR_ERR(send_cq);  		goto err_recv_cq; @@ -249,16 +474,16 @@ static int srp_create_target_ib(struct srp_target_port *target)  	ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);  	init_attr->event_handler       = srp_qp_event; -	init_attr->cap.max_send_wr     = SRP_SQ_SIZE; -	init_attr->cap.max_recv_wr     = SRP_RQ_SIZE; +	init_attr->cap.max_send_wr     = m * target->queue_size; +	init_attr->cap.max_recv_wr     = target->queue_size;  	init_attr->cap.max_recv_sge    = 1;  	init_attr->cap.max_send_sge    = 1; -	init_attr->sq_sig_type         = IB_SIGNAL_ALL_WR; +	init_attr->sq_sig_type         = IB_SIGNAL_REQ_WR;  	init_attr->qp_type             = IB_QPT_RC;  	init_attr->send_cq             = send_cq;  	init_attr->recv_cq             = recv_cq; -	qp = ib_create_qp(target->srp_host->srp_dev->pd, init_attr); +	qp = ib_create_qp(dev->pd, init_attr);  	if (IS_ERR(qp)) {  		ret = PTR_ERR(qp);  		goto err_send_cq; @@ -268,6 +493,30 @@ static int srp_create_target_ib(struct srp_target_port *target)  	if (ret)  		goto err_qp; +	if (dev->use_fast_reg && dev->has_fr) { +		fr_pool = srp_alloc_fr_pool(target); +		if (IS_ERR(fr_pool)) { +			ret = PTR_ERR(fr_pool); +			shost_printk(KERN_WARNING, target->scsi_host, PFX +				     "FR pool allocation failed (%d)\n", ret); +			goto err_qp; +		} +		if (target->fr_pool) +			srp_destroy_fr_pool(target->fr_pool); +		target->fr_pool = fr_pool; +	} else if (!dev->use_fast_reg && dev->has_fmr) { +		fmr_pool = srp_alloc_fmr_pool(target); +		if (IS_ERR(fmr_pool)) { +			ret = PTR_ERR(fmr_pool); +			shost_printk(KERN_WARNING, target->scsi_host, PFX +				     "FMR pool allocation failed (%d)\n", ret); +			goto err_qp; +		} +		if (target->fmr_pool) +			ib_destroy_fmr_pool(target->fmr_pool); +		target->fmr_pool = fmr_pool; +	} +  	if (target->qp)  		ib_destroy_qp(target->qp);  	if (target->recv_cq) @@ -296,10 +545,22 @@ err:  	return ret;  } +/* + * Note: this function may be called without srp_alloc_iu_bufs() having been + * invoked. Hence the target->[rt]x_ring checks. + */  static void srp_free_target_ib(struct srp_target_port *target)  { +	struct srp_device *dev = target->srp_host->srp_dev;  	int i; +	if (dev->use_fast_reg) { +		if (target->fr_pool) +			srp_destroy_fr_pool(target->fr_pool); +	} else { +		if (target->fmr_pool) +			ib_destroy_fmr_pool(target->fmr_pool); +	}  	ib_destroy_qp(target->qp);  	ib_destroy_cq(target->send_cq);  	ib_destroy_cq(target->recv_cq); @@ -307,10 +568,18 @@ static void srp_free_target_ib(struct srp_target_port *target)  	target->qp = NULL;  	target->send_cq = target->recv_cq = NULL; -	for (i = 0; i < SRP_RQ_SIZE; ++i) -		srp_free_iu(target->srp_host, target->rx_ring[i]); -	for (i = 0; i < SRP_SQ_SIZE; ++i) -		srp_free_iu(target->srp_host, target->tx_ring[i]); +	if (target->rx_ring) { +		for (i = 0; i < target->queue_size; ++i) +			srp_free_iu(target->srp_host, target->rx_ring[i]); +		kfree(target->rx_ring); +		target->rx_ring = NULL; +	} +	if (target->tx_ring) { +		for (i = 0; i < target->queue_size; ++i) +			srp_free_iu(target->srp_host, target->tx_ring[i]); +		kfree(target->tx_ring); +		target->tx_ring = NULL; +	}  }  static void srp_path_rec_completion(int status, @@ -330,6 +599,8 @@ static void srp_path_rec_completion(int status,  static int srp_lookup_path(struct srp_target_port *target)  { +	int ret; +  	target->path.numb_path = 1;  	init_completion(&target->done); @@ -350,7 +621,9 @@ static int srp_lookup_path(struct srp_target_port *target)  	if (target->path_query_id < 0)  		return target->path_query_id; -	wait_for_completion(&target->done); +	ret = wait_for_completion_interruptible(&target->done); +	if (ret < 0) +		return ret;  	if (target->status < 0)  		shost_printk(KERN_WARNING, target->scsi_host, @@ -390,7 +663,7 @@ static int srp_send_req(struct srp_target_port *target)  	req->param.responder_resources	      = 4;  	req->param.remote_cm_response_timeout = 20;  	req->param.local_cm_response_timeout  = 20; -	req->param.retry_count 		      = 7; +	req->param.retry_count                = target->tl_retry_count;  	req->param.rnr_retry_count 	      = 7;  	req->param.max_cm_retries 	      = 15; @@ -492,12 +765,20 @@ static void srp_disconnect_target(struct srp_target_port *target)  static void srp_free_req_data(struct srp_target_port *target)  { -	struct ib_device *ibdev = target->srp_host->srp_dev->dev; +	struct srp_device *dev = target->srp_host->srp_dev; +	struct ib_device *ibdev = dev->dev;  	struct srp_request *req;  	int i; -	for (i = 0, req = target->req_ring; i < SRP_CMD_SQ_SIZE; ++i, ++req) { -		kfree(req->fmr_list); +	if (!target->req_ring) +		return; + +	for (i = 0; i < target->req_ring_size; ++i) { +		req = &target->req_ring[i]; +		if (dev->use_fast_reg) +			kfree(req->fr_list); +		else +			kfree(req->fmr_list);  		kfree(req->map_page);  		if (req->indirect_dma_addr) {  			ib_dma_unmap_single(ibdev, req->indirect_dma_addr, @@ -506,6 +787,59 @@ static void srp_free_req_data(struct srp_target_port *target)  		}  		kfree(req->indirect_desc);  	} + +	kfree(target->req_ring); +	target->req_ring = NULL; +} + +static int srp_alloc_req_data(struct srp_target_port *target) +{ +	struct srp_device *srp_dev = target->srp_host->srp_dev; +	struct ib_device *ibdev = srp_dev->dev; +	struct srp_request *req; +	void *mr_list; +	dma_addr_t dma_addr; +	int i, ret = -ENOMEM; + +	INIT_LIST_HEAD(&target->free_reqs); + +	target->req_ring = kzalloc(target->req_ring_size * +				   sizeof(*target->req_ring), GFP_KERNEL); +	if (!target->req_ring) +		goto out; + +	for (i = 0; i < target->req_ring_size; ++i) { +		req = &target->req_ring[i]; +		mr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *), +				  GFP_KERNEL); +		if (!mr_list) +			goto out; +		if (srp_dev->use_fast_reg) +			req->fr_list = mr_list; +		else +			req->fmr_list = mr_list; +		req->map_page = kmalloc(srp_dev->max_pages_per_mr * +					sizeof(void *), GFP_KERNEL); +		if (!req->map_page) +			goto out; +		req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL); +		if (!req->indirect_desc) +			goto out; + +		dma_addr = ib_dma_map_single(ibdev, req->indirect_desc, +					     target->indirect_size, +					     DMA_TO_DEVICE); +		if (ib_dma_mapping_error(ibdev, dma_addr)) +			goto out; + +		req->indirect_dma_addr = dma_addr; +		req->index = i; +		list_add_tail(&req->list, &target->free_reqs); +	} +	ret = 0; + +out: +	return ret;  }  /** @@ -528,12 +862,21 @@ static void srp_remove_target(struct srp_target_port *target)  	WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);  	srp_del_scsi_host_attr(target->scsi_host); +	srp_rport_get(target->rport);  	srp_remove_host(target->scsi_host);  	scsi_remove_host(target->scsi_host); +	srp_stop_rport_timers(target->rport);  	srp_disconnect_target(target);  	ib_destroy_cm_id(target->cm_id);  	srp_free_target_ib(target); +	cancel_work_sync(&target->tl_err_work); +	srp_rport_put(target->rport);  	srp_free_req_data(target); + +	spin_lock(&target->srp_host->target_lock); +	list_del(&target->list); +	spin_unlock(&target->srp_host->target_lock); +  	scsi_host_put(target->scsi_host);  } @@ -545,10 +888,6 @@ static void srp_remove_work(struct work_struct *work)  	WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);  	srp_remove_target(target); - -	spin_lock(&target->srp_host->target_lock); -	list_del(&target->list); -	spin_unlock(&target->srp_host->target_lock);  }  static void srp_rport_delete(struct srp_rport *rport) @@ -576,7 +915,9 @@ static int srp_connect_target(struct srp_target_port *target)  		ret = srp_send_req(target);  		if (ret)  			return ret; -		wait_for_completion(&target->done); +		ret = wait_for_completion_interruptible(&target->done); +		if (ret < 0) +			return ret;  		/*  		 * The CM event handling code will set status to @@ -619,21 +960,56 @@ static int srp_connect_target(struct srp_target_port *target)  	}  } +static int srp_inv_rkey(struct srp_target_port *target, u32 rkey) +{ +	struct ib_send_wr *bad_wr; +	struct ib_send_wr wr = { +		.opcode		    = IB_WR_LOCAL_INV, +		.wr_id		    = LOCAL_INV_WR_ID_MASK, +		.next		    = NULL, +		.num_sge	    = 0, +		.send_flags	    = 0, +		.ex.invalidate_rkey = rkey, +	}; + +	return ib_post_send(target->qp, &wr, &bad_wr); +} +  static void srp_unmap_data(struct scsi_cmnd *scmnd,  			   struct srp_target_port *target,  			   struct srp_request *req)  { -	struct ib_device *ibdev = target->srp_host->srp_dev->dev; -	struct ib_pool_fmr **pfmr; +	struct srp_device *dev = target->srp_host->srp_dev; +	struct ib_device *ibdev = dev->dev; +	int i, res;  	if (!scsi_sglist(scmnd) ||  	    (scmnd->sc_data_direction != DMA_TO_DEVICE &&  	     scmnd->sc_data_direction != DMA_FROM_DEVICE))  		return; -	pfmr = req->fmr_list; -	while (req->nfmr--) -		ib_fmr_pool_unmap(*pfmr++); +	if (dev->use_fast_reg) { +		struct srp_fr_desc **pfr; + +		for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) { +			res = srp_inv_rkey(target, (*pfr)->mr->rkey); +			if (res < 0) { +				shost_printk(KERN_ERR, target->scsi_host, PFX +				  "Queueing INV WR for rkey %#x failed (%d)\n", +				  (*pfr)->mr->rkey, res); +				queue_work(system_long_wq, +					   &target->tl_err_work); +			} +		} +		if (req->nmdesc) +			srp_fr_pool_put(target->fr_pool, req->fr_list, +					req->nmdesc); +	} else { +		struct ib_pool_fmr **pfmr; + +		for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++) +			ib_fmr_pool_unmap(*pfmr); +	}  	ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),  			scmnd->sc_data_direction); @@ -643,6 +1019,7 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,   * srp_claim_req - Take ownership of the scmnd associated with a request.   * @target: SRP target port.   * @req: SRP request. + * @sdev: If not NULL, only take ownership for this SCSI device.   * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take   *         ownership of @req->scmnd if it equals @scmnd.   * @@ -651,16 +1028,17 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,   */  static struct scsi_cmnd *srp_claim_req(struct srp_target_port *target,  				       struct srp_request *req, +				       struct scsi_device *sdev,  				       struct scsi_cmnd *scmnd)  {  	unsigned long flags;  	spin_lock_irqsave(&target->lock, flags); -	if (!scmnd) { +	if (req->scmnd && +	    (!sdev || req->scmnd->device == sdev) && +	    (!scmnd || req->scmnd == scmnd)) {  		scmnd = req->scmnd;  		req->scmnd = NULL; -	} else if (req->scmnd == scmnd) { -		req->scmnd = NULL;  	} else {  		scmnd = NULL;  	} @@ -671,6 +1049,10 @@ static struct scsi_cmnd *srp_claim_req(struct srp_target_port *target,  /**   * srp_free_req() - Unmap data and add request to the free request list. + * @target: SRP target port. + * @req:    Request to be freed. + * @scmnd:  SCSI command associated with @req. + * @req_lim_delta: Amount to be added to @target->req_lim.   */  static void srp_free_req(struct srp_target_port *target,  			 struct srp_request *req, struct scsi_cmnd *scmnd, @@ -686,23 +1068,52 @@ static void srp_free_req(struct srp_target_port *target,  	spin_unlock_irqrestore(&target->lock, flags);  } -static void srp_reset_req(struct srp_target_port *target, struct srp_request *req) +static void srp_finish_req(struct srp_target_port *target, +			   struct srp_request *req, struct scsi_device *sdev, +			   int result)  { -	struct scsi_cmnd *scmnd = srp_claim_req(target, req, NULL); +	struct scsi_cmnd *scmnd = srp_claim_req(target, req, sdev, NULL);  	if (scmnd) {  		srp_free_req(target, req, scmnd, 0); -		scmnd->result = DID_RESET << 16; +		scmnd->result = result;  		scmnd->scsi_done(scmnd);  	}  } -static int srp_reconnect_target(struct srp_target_port *target) +static void srp_terminate_io(struct srp_rport *rport)  { +	struct srp_target_port *target = rport->lld_data;  	struct Scsi_Host *shost = target->scsi_host; -	int i, ret; +	struct scsi_device *sdev; +	int i; -	scsi_target_block(&shost->shost_gendev); +	/* +	 * Invoking srp_terminate_io() while srp_queuecommand() is running +	 * is not safe. Hence the warning statement below. +	 */ +	shost_for_each_device(sdev, shost) +		WARN_ON_ONCE(sdev->request_queue->request_fn_active); + +	for (i = 0; i < target->req_ring_size; ++i) { +		struct srp_request *req = &target->req_ring[i]; +		srp_finish_req(target, req, NULL, DID_TRANSPORT_FAILFAST << 16); +	} +} + +/* + * It is up to the caller to ensure that srp_rport_reconnect() calls are + * serialized and that no concurrent srp_queuecommand(), srp_abort(), + * srp_reset_device() or srp_reset_host() calls will occur while this function + * is in progress. One way to realize that is not to call this function + * directly but to call srp_reconnect_rport() instead since that last function + * serializes calls of this function via rport->mutex and also blocks + * srp_queuecommand() calls before invoking this function. + */ +static int srp_rport_reconnect(struct srp_rport *rport) +{ +	struct srp_target_port *target = rport->lld_data; +	int i, ret;  	srp_disconnect_target(target);  	/* @@ -711,51 +1122,29 @@ static int srp_reconnect_target(struct srp_target_port *target)  	 * callbacks will have finished before a new QP is allocated.  	 */  	ret = srp_new_cm_id(target); -	/* -	 * Whether or not creating a new CM ID succeeded, create a new -	 * QP. This guarantees that all completion callback function -	 * invocations have finished before request resetting starts. -	 */ -	if (ret == 0) -		ret = srp_create_target_ib(target); -	else -		srp_create_target_ib(target); -	for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) { +	for (i = 0; i < target->req_ring_size; ++i) {  		struct srp_request *req = &target->req_ring[i]; -		if (req->scmnd) -			srp_reset_req(target, req); +		srp_finish_req(target, req, NULL, DID_RESET << 16);  	} +	/* +	 * Whether or not creating a new CM ID succeeded, create a new +	 * QP. This guarantees that all callback functions for the old QP have +	 * finished before any send requests are posted on the new QP. +	 */ +	ret += srp_create_target_ib(target); +  	INIT_LIST_HEAD(&target->free_tx); -	for (i = 0; i < SRP_SQ_SIZE; ++i) +	for (i = 0; i < target->queue_size; ++i)  		list_add(&target->tx_ring[i]->list, &target->free_tx);  	if (ret == 0)  		ret = srp_connect_target(target); -	scsi_target_unblock(&shost->shost_gendev, ret == 0 ? SDEV_RUNNING : -			    SDEV_TRANSPORT_OFFLINE); -	target->transport_offline = !!ret; - -	if (ret) -		goto err; - -	shost_printk(KERN_INFO, target->scsi_host, PFX "reconnect succeeded\n"); - -	return ret; - -err: -	shost_printk(KERN_ERR, target->scsi_host, -		     PFX "reconnect failed (%d), removing target port.\n", ret); - -	/* -	 * We couldn't reconnect, so kill our target port off. -	 * However, we have to defer the real removal because we -	 * are in the context of the SCSI error handler now, which -	 * will deadlock if we call scsi_remove_host(). -	 */ -	srp_queue_remove_work(target); +	if (ret == 0) +		shost_printk(KERN_INFO, target->scsi_host, +			     PFX "reconnect succeeded\n");  	return ret;  } @@ -777,33 +1166,87 @@ static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,  static int srp_map_finish_fmr(struct srp_map_state *state,  			      struct srp_target_port *target)  { -	struct srp_device *dev = target->srp_host->srp_dev;  	struct ib_pool_fmr *fmr;  	u64 io_addr = 0; -	if (!state->npages) -		return 0; - -	if (state->npages == 1) { -		srp_map_desc(state, state->base_dma_addr, state->fmr_len, -			     target->rkey); -		state->npages = state->fmr_len = 0; -		return 0; -	} - -	fmr = ib_fmr_pool_map_phys(dev->fmr_pool, state->pages, +	fmr = ib_fmr_pool_map_phys(target->fmr_pool, state->pages,  				   state->npages, io_addr);  	if (IS_ERR(fmr))  		return PTR_ERR(fmr);  	*state->next_fmr++ = fmr; -	state->nfmr++; +	state->nmdesc++; + +	srp_map_desc(state, 0, state->dma_len, fmr->fmr->rkey); -	srp_map_desc(state, 0, state->fmr_len, fmr->fmr->rkey); -	state->npages = state->fmr_len = 0;  	return 0;  } +static int srp_map_finish_fr(struct srp_map_state *state, +			     struct srp_target_port *target) +{ +	struct srp_device *dev = target->srp_host->srp_dev; +	struct ib_send_wr *bad_wr; +	struct ib_send_wr wr; +	struct srp_fr_desc *desc; +	u32 rkey; + +	desc = srp_fr_pool_get(target->fr_pool); +	if (!desc) +		return -ENOMEM; + +	rkey = ib_inc_rkey(desc->mr->rkey); +	ib_update_fast_reg_key(desc->mr, rkey); + +	memcpy(desc->frpl->page_list, state->pages, +	       sizeof(state->pages[0]) * state->npages); + +	memset(&wr, 0, sizeof(wr)); +	wr.opcode = IB_WR_FAST_REG_MR; +	wr.wr_id = FAST_REG_WR_ID_MASK; +	wr.wr.fast_reg.iova_start = state->base_dma_addr; +	wr.wr.fast_reg.page_list = desc->frpl; +	wr.wr.fast_reg.page_list_len = state->npages; +	wr.wr.fast_reg.page_shift = ilog2(dev->mr_page_size); +	wr.wr.fast_reg.length = state->dma_len; +	wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE | +				       IB_ACCESS_REMOTE_READ | +				       IB_ACCESS_REMOTE_WRITE); +	wr.wr.fast_reg.rkey = desc->mr->lkey; + +	*state->next_fr++ = desc; +	state->nmdesc++; + +	srp_map_desc(state, state->base_dma_addr, state->dma_len, +		     desc->mr->rkey); + +	return ib_post_send(target->qp, &wr, &bad_wr); +} + +static int srp_finish_mapping(struct srp_map_state *state, +			      struct srp_target_port *target) +{ +	int ret = 0; + +	if (state->npages == 0) +		return 0; + +	if (state->npages == 1 && !register_always) +		srp_map_desc(state, state->base_dma_addr, state->dma_len, +			     target->rkey); +	else +		ret = target->srp_host->srp_dev->use_fast_reg ? +			srp_map_finish_fr(state, target) : +			srp_map_finish_fmr(state, target); + +	if (ret == 0) { +		state->npages = 0; +		state->dma_len = 0; +	} + +	return ret; +} +  static void srp_map_update_start(struct srp_map_state *state,  				 struct scatterlist *sg, int sg_index,  				 dma_addr_t dma_addr) @@ -816,7 +1259,7 @@ static void srp_map_update_start(struct srp_map_state *state,  static int srp_map_sg_entry(struct srp_map_state *state,  			    struct srp_target_port *target,  			    struct scatterlist *sg, int sg_index, -			    int use_fmr) +			    bool use_mr)  {  	struct srp_device *dev = target->srp_host->srp_dev;  	struct ib_device *ibdev = dev->dev; @@ -828,23 +1271,25 @@ static int srp_map_sg_entry(struct srp_map_state *state,  	if (!dma_len)  		return 0; -	if (use_fmr == SRP_MAP_NO_FMR) { -		/* Once we're in direct map mode for a request, we don't -		 * go back to FMR mode, so no need to update anything +	if (!use_mr) { +		/* +		 * Once we're in direct map mode for a request, we don't +		 * go back to FMR or FR mode, so no need to update anything  		 * other than the descriptor.  		 */  		srp_map_desc(state, dma_addr, dma_len, target->rkey);  		return 0;  	} -	/* If we start at an offset into the FMR page, don't merge into -	 * the current FMR. Finish it out, and use the kernel's MR for this -	 * sg entry. This is to avoid potential bugs on some SRP targets -	 * that were never quite defined, but went away when the initiator -	 * avoided using FMR on such page fragments. +	/* +	 * Since not all RDMA HW drivers support non-zero page offsets for +	 * FMR, if we start at an offset into a page, don't merge into the +	 * current FMR mapping. Finish it out, and use the kernel's MR for +	 * this sg entry.  	 */ -	if (dma_addr & ~dev->fmr_page_mask || dma_len > dev->fmr_max_size) { -		ret = srp_map_finish_fmr(state, target); +	if ((!dev->use_fast_reg && dma_addr & ~dev->mr_page_mask) || +	    dma_len > dev->mr_max_size) { +		ret = srp_finish_mapping(state, target);  		if (ret)  			return ret; @@ -853,52 +1298,106 @@ static int srp_map_sg_entry(struct srp_map_state *state,  		return 0;  	} -	/* If this is the first sg to go into the FMR, save our position. -	 * We need to know the first unmapped entry, its index, and the -	 * first unmapped address within that entry to be able to restart -	 * mapping after an error. +	/* +	 * If this is the first sg that will be mapped via FMR or via FR, save +	 * our position. We need to know the first unmapped entry, its index, +	 * and the first unmapped address within that entry to be able to +	 * restart mapping after an error.  	 */  	if (!state->unmapped_sg)  		srp_map_update_start(state, sg, sg_index, dma_addr);  	while (dma_len) { -		if (state->npages == SRP_FMR_SIZE) { -			ret = srp_map_finish_fmr(state, target); +		unsigned offset = dma_addr & ~dev->mr_page_mask; +		if (state->npages == dev->max_pages_per_mr || offset != 0) { +			ret = srp_finish_mapping(state, target);  			if (ret)  				return ret;  			srp_map_update_start(state, sg, sg_index, dma_addr);  		} -		len = min_t(unsigned int, dma_len, dev->fmr_page_size); +		len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);  		if (!state->npages)  			state->base_dma_addr = dma_addr; -		state->pages[state->npages++] = dma_addr; -		state->fmr_len += len; +		state->pages[state->npages++] = dma_addr & dev->mr_page_mask; +		state->dma_len += len;  		dma_addr += len;  		dma_len -= len;  	} -	/* If the last entry of the FMR wasn't a full page, then we need to +	/* +	 * If the last entry of the MR wasn't a full page, then we need to  	 * close it out and start a new one -- we can only merge at page  	 * boundries.  	 */  	ret = 0; -	if (len != dev->fmr_page_size) { -		ret = srp_map_finish_fmr(state, target); +	if (len != dev->mr_page_size) { +		ret = srp_finish_mapping(state, target);  		if (!ret)  			srp_map_update_start(state, NULL, 0, 0);  	}  	return ret;  } +static int srp_map_sg(struct srp_map_state *state, +		      struct srp_target_port *target, struct srp_request *req, +		      struct scatterlist *scat, int count) +{ +	struct srp_device *dev = target->srp_host->srp_dev; +	struct ib_device *ibdev = dev->dev; +	struct scatterlist *sg; +	int i; +	bool use_mr; + +	state->desc	= req->indirect_desc; +	state->pages	= req->map_page; +	if (dev->use_fast_reg) { +		state->next_fr = req->fr_list; +		use_mr = !!target->fr_pool; +	} else { +		state->next_fmr = req->fmr_list; +		use_mr = !!target->fmr_pool; +	} + +	for_each_sg(scat, sg, count, i) { +		if (srp_map_sg_entry(state, target, sg, i, use_mr)) { +			/* +			 * Memory registration failed, so backtrack to the +			 * first unmapped entry and continue on without using +			 * memory registration. +			 */ +			dma_addr_t dma_addr; +			unsigned int dma_len; + +backtrack: +			sg = state->unmapped_sg; +			i = state->unmapped_index; + +			dma_addr = ib_sg_dma_address(ibdev, sg); +			dma_len = ib_sg_dma_len(ibdev, sg); +			dma_len -= (state->unmapped_addr - dma_addr); +			dma_addr = state->unmapped_addr; +			use_mr = false; +			srp_map_desc(state, dma_addr, dma_len, target->rkey); +		} +	} + +	if (use_mr && srp_finish_mapping(state, target)) +		goto backtrack; + +	req->nmdesc = state->nmdesc; + +	return 0; +} +  static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,  			struct srp_request *req)  { -	struct scatterlist *scat, *sg; +	struct scatterlist *scat;  	struct srp_cmd *cmd = req->cmd->buf; -	int i, len, nents, count, use_fmr; +	int len, nents, count;  	struct srp_device *dev;  	struct ib_device *ibdev;  	struct srp_map_state state; @@ -930,7 +1429,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,  	fmt = SRP_DATA_DESC_DIRECT;  	len = sizeof (struct srp_cmd) +	sizeof (struct srp_direct_buf); -	if (count == 1) { +	if (count == 1 && !register_always) {  		/*  		 * The midlayer only generated a single gather/scatter  		 * entry, or DMA mapping coalesced everything to a @@ -943,13 +1442,13 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,  		buf->key = cpu_to_be32(target->rkey);  		buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat)); -		req->nfmr = 0; +		req->nmdesc = 0;  		goto map_complete;  	} -	/* We have more than one scatter/gather entry, so build our indirect -	 * descriptor table, trying to merge as many entries with FMR as we -	 * can. +	/* +	 * We have more than one scatter/gather entry, so build our indirect +	 * descriptor table, trying to merge as many entries as we can.  	 */  	indirect_hdr = (void *) cmd->add_data; @@ -957,35 +1456,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,  				   target->indirect_size, DMA_TO_DEVICE);  	memset(&state, 0, sizeof(state)); -	state.desc	= req->indirect_desc; -	state.pages	= req->map_page; -	state.next_fmr	= req->fmr_list; - -	use_fmr = dev->fmr_pool ? SRP_MAP_ALLOW_FMR : SRP_MAP_NO_FMR; - -	for_each_sg(scat, sg, count, i) { -		if (srp_map_sg_entry(&state, target, sg, i, use_fmr)) { -			/* FMR mapping failed, so backtrack to the first -			 * unmapped entry and continue on without using FMR. -			 */ -			dma_addr_t dma_addr; -			unsigned int dma_len; - -backtrack: -			sg = state.unmapped_sg; -			i = state.unmapped_index; - -			dma_addr = ib_sg_dma_address(ibdev, sg); -			dma_len = ib_sg_dma_len(ibdev, sg); -			dma_len -= (state.unmapped_addr - dma_addr); -			dma_addr = state.unmapped_addr; -			use_fmr = SRP_MAP_NO_FMR; -			srp_map_desc(&state, dma_addr, dma_len, target->rkey); -		} -	} - -	if (use_fmr == SRP_MAP_ALLOW_FMR && srp_map_finish_fmr(&state, target)) -		goto backtrack; +	srp_map_sg(&state, target, req, scat, count);  	/* We've mapped the request, now pull as much of the indirect  	 * descriptor table as we can into the command buffer. If this @@ -993,9 +1464,9 @@ backtrack:  	 * guaranteed to fit into the command, as the SCSI layer won't  	 * give us more S/G entries than we allow.  	 */ -	req->nfmr = state.nfmr;  	if (state.ndesc == 1) { -		/* FMR mapping was able to collapse this to one entry, +		/* +		 * Memory registration collapsed the sg-list into one entry,  		 * so use a direct descriptor.  		 */  		struct srp_direct_buf *buf = (void *) cmd->add_data; @@ -1151,7 +1622,7 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)  		complete(&target->tsk_mgmt_done);  	} else {  		req = &target->req_ring[rsp->tag]; -		scmnd = srp_claim_req(target, req, NULL); +		scmnd = srp_claim_req(target, req, NULL, NULL);  		if (!scmnd) {  			shost_printk(KERN_ERR, target->scsi_host,  				     "Null scmnd for RSP w/tag %016llx\n", @@ -1302,15 +1773,41 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)  			     PFX "Recv failed with error code %d\n", res);  } -static void srp_handle_qp_err(enum ib_wc_status wc_status, -			      enum ib_wc_opcode wc_opcode, -			      struct srp_target_port *target) +/** + * srp_tl_err_work() - handle a transport layer error + * @work: Work structure embedded in an SRP target port. + * + * Note: This function may get invoked before the rport has been created, + * hence the target->rport test. + */ +static void srp_tl_err_work(struct work_struct *work) +{ +	struct srp_target_port *target; + +	target = container_of(work, struct srp_target_port, tl_err_work); +	if (target->rport) +		srp_start_tl_fail_timers(target->rport); +} + +static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status, +			      bool send_err, struct srp_target_port *target)  {  	if (target->connected && !target->qp_in_error) { -		shost_printk(KERN_ERR, target->scsi_host, -			     PFX "failed %s status %d\n", -			     wc_opcode & IB_WC_RECV ? "receive" : "send", -			     wc_status); +		if (wr_id & LOCAL_INV_WR_ID_MASK) { +			shost_printk(KERN_ERR, target->scsi_host, PFX +				     "LOCAL_INV failed with status %d\n", +				     wc_status); +		} else if (wr_id & FAST_REG_WR_ID_MASK) { +			shost_printk(KERN_ERR, target->scsi_host, PFX +				     "FAST_REG_MR failed status %d\n", +				     wc_status); +		} else { +			shost_printk(KERN_ERR, target->scsi_host, +				     PFX "failed %s status %d for iu %p\n", +				     send_err ? "send" : "receive", +				     wc_status, (void *)(uintptr_t)wr_id); +		} +		queue_work(system_long_wq, &target->tl_err_work);  	}  	target->qp_in_error = true;  } @@ -1325,7 +1822,7 @@ static void srp_recv_completion(struct ib_cq *cq, void *target_ptr)  		if (likely(wc.status == IB_WC_SUCCESS)) {  			srp_handle_recv(target, &wc);  		} else { -			srp_handle_qp_err(wc.status, wc.opcode, target); +			srp_handle_qp_err(wc.wr_id, wc.status, false, target);  		}  	}  } @@ -1341,7 +1838,7 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr)  			iu = (struct srp_iu *) (uintptr_t) wc.wr_id;  			list_add(&iu->list, &target->free_tx);  		} else { -			srp_handle_qp_err(wc.status, wc.opcode, target); +			srp_handle_qp_err(wc.wr_id, wc.status, true, target);  		}  	}  } @@ -1349,18 +1846,27 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr)  static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)  {  	struct srp_target_port *target = host_to_target(shost); +	struct srp_rport *rport = target->rport;  	struct srp_request *req;  	struct srp_iu *iu;  	struct srp_cmd *cmd;  	struct ib_device *dev;  	unsigned long flags; -	int len; +	int len, ret; +	const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler; -	if (unlikely(target->transport_offline)) { -		scmnd->result = DID_NO_CONNECT << 16; -		scmnd->scsi_done(scmnd); -		return 0; -	} +	/* +	 * The SCSI EH thread is the only context from which srp_queuecommand() +	 * can get invoked for blocked devices (SDEV_BLOCK / +	 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by +	 * locking the rport mutex if invoked from inside the SCSI EH. +	 */ +	if (in_scsi_eh) +		mutex_lock(&rport->mutex); + +	scmnd->result = srp_chkready(target->rport); +	if (unlikely(scmnd->result)) +		goto err;  	spin_lock_irqsave(&target->lock, flags);  	iu = __srp_get_tx_iu(target, SRP_IU_CMD); @@ -1375,7 +1881,6 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)  	ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,  				   DMA_TO_DEVICE); -	scmnd->result        = 0;  	scmnd->host_scribble = (void *) req;  	cmd = iu->buf; @@ -1392,7 +1897,15 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)  	len = srp_map_data(scmnd, target, req);  	if (len < 0) {  		shost_printk(KERN_ERR, target->scsi_host, -			     PFX "Failed to map data\n"); +			     PFX "Failed to map data (%d)\n", len); +		/* +		 * If we ran out of memory descriptors (-ENOMEM) because an +		 * application is queuing many requests with more than +		 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer +		 * to reduce queue depth temporarily. +		 */ +		scmnd->result = len == -ENOMEM ? +			DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;  		goto err_iu;  	} @@ -1404,7 +1917,13 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)  		goto err_unmap;  	} -	return 0; +	ret = 0; + +unlock_rport: +	if (in_scsi_eh) +		mutex_unlock(&rport->mutex); + +	return ret;  err_unmap:  	srp_unmap_data(scmnd, target, req); @@ -1412,20 +1931,47 @@ err_unmap:  err_iu:  	srp_put_tx_iu(target, iu, SRP_IU_CMD); +	/* +	 * Avoid that the loops that iterate over the request ring can +	 * encounter a dangling SCSI command pointer. +	 */ +	req->scmnd = NULL; +  	spin_lock_irqsave(&target->lock, flags);  	list_add(&req->list, &target->free_reqs);  err_unlock:  	spin_unlock_irqrestore(&target->lock, flags); -	return SCSI_MLQUEUE_HOST_BUSY; +err: +	if (scmnd->result) { +		scmnd->scsi_done(scmnd); +		ret = 0; +	} else { +		ret = SCSI_MLQUEUE_HOST_BUSY; +	} + +	goto unlock_rport;  } +/* + * Note: the resources allocated in this function are freed in + * srp_free_target_ib(). + */  static int srp_alloc_iu_bufs(struct srp_target_port *target)  {  	int i; -	for (i = 0; i < SRP_RQ_SIZE; ++i) { +	target->rx_ring = kzalloc(target->queue_size * sizeof(*target->rx_ring), +				  GFP_KERNEL); +	if (!target->rx_ring) +		goto err_no_ring; +	target->tx_ring = kzalloc(target->queue_size * sizeof(*target->tx_ring), +				  GFP_KERNEL); +	if (!target->tx_ring) +		goto err_no_ring; + +	for (i = 0; i < target->queue_size; ++i) {  		target->rx_ring[i] = srp_alloc_iu(target->srp_host,  						  target->max_ti_iu_len,  						  GFP_KERNEL, DMA_FROM_DEVICE); @@ -1433,7 +1979,7 @@ static int srp_alloc_iu_bufs(struct srp_target_port *target)  			goto err;  	} -	for (i = 0; i < SRP_SQ_SIZE; ++i) { +	for (i = 0; i < target->queue_size; ++i) {  		target->tx_ring[i] = srp_alloc_iu(target->srp_host,  						  target->max_iu_len,  						  GFP_KERNEL, DMA_TO_DEVICE); @@ -1446,16 +1992,18 @@ static int srp_alloc_iu_bufs(struct srp_target_port *target)  	return 0;  err: -	for (i = 0; i < SRP_RQ_SIZE; ++i) { +	for (i = 0; i < target->queue_size; ++i) {  		srp_free_iu(target->srp_host, target->rx_ring[i]); -		target->rx_ring[i] = NULL; -	} - -	for (i = 0; i < SRP_SQ_SIZE; ++i) {  		srp_free_iu(target->srp_host, target->tx_ring[i]); -		target->tx_ring[i] = NULL;  	} + +err_no_ring: +	kfree(target->tx_ring); +	target->tx_ring = NULL; +	kfree(target->rx_ring); +	target->rx_ring = NULL; +  	return -ENOMEM;  } @@ -1506,6 +2054,9 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,  		target->scsi_host->can_queue  			= min(target->req_lim - SRP_TSK_MGMT_SQ_SIZE,  			      target->scsi_host->can_queue); +		target->scsi_host->cmd_per_lun +			= min_t(int, target->scsi_host->can_queue, +				target->scsi_host->cmd_per_lun);  	} else {  		shost_printk(KERN_WARNING, target->scsi_host,  			     PFX "Unhandled RSP opcode %#x\n", lrsp->opcode); @@ -1513,7 +2064,7 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,  		goto error;  	} -	if (!target->rx_ring[0]) { +	if (!target->rx_ring) {  		ret = srp_alloc_iu_bufs(target);  		if (ret)  			goto error; @@ -1533,7 +2084,7 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,  	if (ret)  		goto error_free; -	for (i = 0; i < SRP_RQ_SIZE; i++) { +	for (i = 0; i < target->queue_size; i++) {  		struct srp_iu *iu = target->rx_ring[i];  		ret = srp_post_recv(target, iu);  		if (ret) @@ -1619,8 +2170,10 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id,  				shost_printk(KERN_WARNING, shost,  					     PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");  			else -				shost_printk(KERN_WARNING, shost, -					    PFX "SRP LOGIN REJECTED, reason 0x%08x\n", reason); +				shost_printk(KERN_WARNING, shost, PFX +					     "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n", +					     target->path.sgid.raw, +					     target->orig_dgid, reason);  		} else  			shost_printk(KERN_WARNING, shost,  				     "  REJ reason: IB_CM_REJ_CONSUMER_DEFINED," @@ -1672,11 +2225,13 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)  		if (ib_send_cm_drep(cm_id, NULL, 0))  			shost_printk(KERN_ERR, target->scsi_host,  				     PFX "Sending CM DREP failed\n"); +		queue_work(system_long_wq, &target->tl_err_work);  		break;  	case IB_CM_TIMEWAIT_EXIT:  		shost_printk(KERN_ERR, target->scsi_host,  			     PFX "connection closed\n"); +		comp = 1;  		target->status = 0;  		break; @@ -1698,9 +2253,61 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)  	return 0;  } +/** + * srp_change_queue_type - changing device queue tag type + * @sdev: scsi device struct + * @tag_type: requested tag type + * + * Returns queue tag type. + */ +static int +srp_change_queue_type(struct scsi_device *sdev, int tag_type) +{ +	if (sdev->tagged_supported) { +		scsi_set_tag_type(sdev, tag_type); +		if (tag_type) +			scsi_activate_tcq(sdev, sdev->queue_depth); +		else +			scsi_deactivate_tcq(sdev, sdev->queue_depth); +	} else +		tag_type = 0; + +	return tag_type; +} + +/** + * srp_change_queue_depth - setting device queue depth + * @sdev: scsi device struct + * @qdepth: requested queue depth + * @reason: SCSI_QDEPTH_DEFAULT/SCSI_QDEPTH_QFULL/SCSI_QDEPTH_RAMP_UP + * (see include/scsi/scsi_host.h for definition) + * + * Returns queue depth. + */ +static int +srp_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason) +{ +	struct Scsi_Host *shost = sdev->host; +	int max_depth; +	if (reason == SCSI_QDEPTH_DEFAULT || reason == SCSI_QDEPTH_RAMP_UP) { +		max_depth = shost->can_queue; +		if (!sdev->tagged_supported) +			max_depth = 1; +		if (qdepth > max_depth) +			qdepth = max_depth; +		scsi_adjust_queue_depth(sdev, scsi_get_tag_type(sdev), qdepth); +	} else if (reason == SCSI_QDEPTH_QFULL) +		scsi_track_queue_full(sdev, qdepth); +	else +		return -EOPNOTSUPP; + +	return sdev->queue_depth; +} +  static int srp_send_tsk_mgmt(struct srp_target_port *target,  			     u64 req_tag, unsigned int lun, u8 func)  { +	struct srp_rport *rport = target->rport;  	struct ib_device *dev = target->srp_host->srp_dev->dev;  	struct srp_iu *iu;  	struct srp_tsk_mgmt *tsk_mgmt; @@ -1710,12 +2317,20 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target,  	init_completion(&target->tsk_mgmt_done); +	/* +	 * Lock the rport mutex to avoid that srp_create_target_ib() is +	 * invoked while a task management function is being sent. +	 */ +	mutex_lock(&rport->mutex);  	spin_lock_irq(&target->lock);  	iu = __srp_get_tx_iu(target, SRP_IU_TSK_MGMT);  	spin_unlock_irq(&target->lock); -	if (!iu) +	if (!iu) { +		mutex_unlock(&rport->mutex); +  		return -1; +	}  	ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,  				   DMA_TO_DEVICE); @@ -1732,8 +2347,11 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target,  				      DMA_TO_DEVICE);  	if (srp_post_send(target, iu, sizeof *tsk_mgmt)) {  		srp_put_tx_iu(target, iu, SRP_IU_TSK_MGMT); +		mutex_unlock(&rport->mutex); +  		return -1;  	} +	mutex_unlock(&rport->mutex);  	if (!wait_for_completion_timeout(&target->tsk_mgmt_done,  					 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS))) @@ -1750,12 +2368,12 @@ static int srp_abort(struct scsi_cmnd *scmnd)  	shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); -	if (!req || !srp_claim_req(target, req, scmnd)) -		return FAILED; +	if (!req || !srp_claim_req(target, req, NULL, scmnd)) +		return SUCCESS;  	if (srp_send_tsk_mgmt(target, req->index, scmnd->device->lun,  			      SRP_TSK_ABORT_TASK) == 0)  		ret = SUCCESS; -	else if (target->transport_offline) +	else if (target->rport->state == SRP_RPORT_LOST)  		ret = FAST_IO_FAIL;  	else  		ret = FAILED; @@ -1779,10 +2397,9 @@ static int srp_reset_device(struct scsi_cmnd *scmnd)  	if (target->tsk_mgmt_status)  		return FAILED; -	for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) { +	for (i = 0; i < target->req_ring_size; ++i) {  		struct srp_request *req = &target->req_ring[i]; -		if (req->scmnd && req->scmnd->device == scmnd->device) -			srp_reset_req(target, req); +		srp_finish_req(target, req, scmnd->device, DID_RESET << 16);  	}  	return SUCCESS; @@ -1791,14 +2408,10 @@ static int srp_reset_device(struct scsi_cmnd *scmnd)  static int srp_reset_host(struct scsi_cmnd *scmnd)  {  	struct srp_target_port *target = host_to_target(scmnd->device->host); -	int ret = FAILED;  	shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n"); -	if (!srp_reconnect_target(target)) -		ret = SUCCESS; - -	return ret; +	return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;  }  static int srp_slave_configure(struct scsi_device *sdev) @@ -1851,6 +2464,14 @@ static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,  	return sprintf(buf, "0x%04x\n", be16_to_cpu(target->path.pkey));  } +static ssize_t show_sgid(struct device *dev, struct device_attribute *attr, +			 char *buf) +{ +	struct srp_target_port *target = host_to_target(class_to_shost(dev)); + +	return sprintf(buf, "%pI6\n", target->path.sgid.raw); +} +  static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,  			 char *buf)  { @@ -1907,6 +2528,14 @@ static ssize_t show_comp_vector(struct device *dev,  	return sprintf(buf, "%d\n", target->comp_vector);  } +static ssize_t show_tl_retry_count(struct device *dev, +				   struct device_attribute *attr, char *buf) +{ +	struct srp_target_port *target = host_to_target(class_to_shost(dev)); + +	return sprintf(buf, "%d\n", target->tl_retry_count); +} +  static ssize_t show_cmd_sg_entries(struct device *dev,  				   struct device_attribute *attr, char *buf)  { @@ -1927,6 +2556,7 @@ static DEVICE_ATTR(id_ext,	    S_IRUGO, show_id_ext,	   NULL);  static DEVICE_ATTR(ioc_guid,	    S_IRUGO, show_ioc_guid,	   NULL);  static DEVICE_ATTR(service_id,	    S_IRUGO, show_service_id,	   NULL);  static DEVICE_ATTR(pkey,	    S_IRUGO, show_pkey,		   NULL); +static DEVICE_ATTR(sgid,	    S_IRUGO, show_sgid,		   NULL);  static DEVICE_ATTR(dgid,	    S_IRUGO, show_dgid,		   NULL);  static DEVICE_ATTR(orig_dgid,	    S_IRUGO, show_orig_dgid,	   NULL);  static DEVICE_ATTR(req_lim,         S_IRUGO, show_req_lim,         NULL); @@ -1934,6 +2564,7 @@ static DEVICE_ATTR(zero_req_lim,    S_IRUGO, show_zero_req_lim,	   NULL);  static DEVICE_ATTR(local_ib_port,   S_IRUGO, show_local_ib_port,   NULL);  static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);  static DEVICE_ATTR(comp_vector,     S_IRUGO, show_comp_vector,     NULL); +static DEVICE_ATTR(tl_retry_count,  S_IRUGO, show_tl_retry_count,  NULL);  static DEVICE_ATTR(cmd_sg_entries,  S_IRUGO, show_cmd_sg_entries,  NULL);  static DEVICE_ATTR(allow_ext_sg,    S_IRUGO, show_allow_ext_sg,    NULL); @@ -1942,6 +2573,7 @@ static struct device_attribute *srp_host_attrs[] = {  	&dev_attr_ioc_guid,  	&dev_attr_service_id,  	&dev_attr_pkey, +	&dev_attr_sgid,  	&dev_attr_dgid,  	&dev_attr_orig_dgid,  	&dev_attr_req_lim, @@ -1949,6 +2581,7 @@ static struct device_attribute *srp_host_attrs[] = {  	&dev_attr_local_ib_port,  	&dev_attr_local_ib_device,  	&dev_attr_comp_vector, +	&dev_attr_tl_retry_count,  	&dev_attr_cmd_sg_entries,  	&dev_attr_allow_ext_sg,  	NULL @@ -1961,14 +2594,16 @@ static struct scsi_host_template srp_template = {  	.slave_configure		= srp_slave_configure,  	.info				= srp_target_info,  	.queuecommand			= srp_queuecommand, +	.change_queue_depth             = srp_change_queue_depth, +	.change_queue_type              = srp_change_queue_type,  	.eh_abort_handler		= srp_abort,  	.eh_device_reset_handler	= srp_reset_device,  	.eh_host_reset_handler		= srp_reset_host,  	.skip_settle_delay		= true,  	.sg_tablesize			= SRP_DEF_SG_TABLESIZE, -	.can_queue			= SRP_CMD_SQ_SIZE, +	.can_queue			= SRP_DEFAULT_CMD_SQ_SIZE,  	.this_id			= -1, -	.cmd_per_lun			= SRP_CMD_SQ_SIZE, +	.cmd_per_lun			= SRP_DEFAULT_CMD_SQ_SIZE,  	.use_clustering			= ENABLE_CLUSTERING,  	.shost_attrs			= srp_host_attrs  }; @@ -1994,6 +2629,7 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target)  	}  	rport->lld_data = target; +	target->rport = rport;  	spin_lock(&host->target_lock);  	list_add_tail(&target->list, &host->target_list); @@ -2022,6 +2658,8 @@ static struct class srp_class = {  /**   * srp_conn_unique() - check whether the connection to a target is unique + * @host:   SRP host. + * @target: SRP target port.   */  static bool srp_conn_unique(struct srp_host *host,  			    struct srp_target_port *target) @@ -2073,6 +2711,8 @@ enum {  	SRP_OPT_ALLOW_EXT_SG	= 1 << 10,  	SRP_OPT_SG_TABLESIZE	= 1 << 11,  	SRP_OPT_COMP_VECTOR	= 1 << 12, +	SRP_OPT_TL_RETRY_COUNT	= 1 << 13, +	SRP_OPT_QUEUE_SIZE	= 1 << 14,  	SRP_OPT_ALL		= (SRP_OPT_ID_EXT	|  				   SRP_OPT_IOC_GUID	|  				   SRP_OPT_DGID		| @@ -2094,6 +2734,8 @@ static const match_table_t srp_opt_tokens = {  	{ SRP_OPT_ALLOW_EXT_SG,		"allow_ext_sg=%u"	},  	{ SRP_OPT_SG_TABLESIZE,		"sg_tablesize=%u"	},  	{ SRP_OPT_COMP_VECTOR,		"comp_vector=%u"	}, +	{ SRP_OPT_TL_RETRY_COUNT,	"tl_retry_count=%u"	}, +	{ SRP_OPT_QUEUE_SIZE,		"queue_size=%d"		},  	{ SRP_OPT_ERR,			NULL 			}  }; @@ -2188,13 +2830,25 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)  			target->scsi_host->max_sectors = token;  			break; +		case SRP_OPT_QUEUE_SIZE: +			if (match_int(args, &token) || token < 1) { +				pr_warn("bad queue_size parameter '%s'\n", p); +				goto out; +			} +			target->scsi_host->can_queue = token; +			target->queue_size = token + SRP_RSP_SQ_SIZE + +					     SRP_TSK_MGMT_SQ_SIZE; +			if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN)) +				target->scsi_host->cmd_per_lun = token; +			break; +  		case SRP_OPT_MAX_CMD_PER_LUN: -			if (match_int(args, &token)) { +			if (match_int(args, &token) || token < 1) {  				pr_warn("bad max cmd_per_lun parameter '%s'\n",  					p);  				goto out;  			} -			target->scsi_host->cmd_per_lun = min(token, SRP_CMD_SQ_SIZE); +			target->scsi_host->cmd_per_lun = token;  			break;  		case SRP_OPT_IO_CLASS: @@ -2257,6 +2911,15 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)  			target->comp_vector = token;  			break; +		case SRP_OPT_TL_RETRY_COUNT: +			if (match_int(args, &token) || token < 2 || token > 7) { +				pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n", +					p); +				goto out; +			} +			target->tl_retry_count = token; +			break; +  		default:  			pr_warn("unknown parameter or missing value '%s' in target creation request\n",  				p); @@ -2273,6 +2936,12 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)  				pr_warn("target creation request is missing parameter '%s'\n",  					srp_opt_tokens[i].pattern); +	if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue +	    && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN)) +		pr_warn("cmd_per_lun = %d > queue_size = %d\n", +			target->scsi_host->cmd_per_lun, +			target->scsi_host->can_queue); +  out:  	kfree(options);  	return ret; @@ -2286,9 +2955,9 @@ static ssize_t srp_create_target(struct device *dev,  		container_of(dev, struct srp_host, dev);  	struct Scsi_Host *target_host;  	struct srp_target_port *target; -	struct ib_device *ibdev = host->srp_dev->dev; -	dma_addr_t dma_addr; -	int i, ret; +	struct srp_device *srp_dev = host->srp_dev; +	struct ib_device *ibdev = srp_dev->dev; +	int ret;  	target_host = scsi_host_alloc(&srp_template,  				      sizeof (struct srp_target_port)); @@ -2311,11 +2980,17 @@ static ssize_t srp_create_target(struct device *dev,  	target->cmd_sg_cnt	= cmd_sg_entries;  	target->sg_tablesize	= indirect_sg_entries ? : cmd_sg_entries;  	target->allow_ext_sg	= allow_ext_sg; +	target->tl_retry_count	= 7; +	target->queue_size	= SRP_DEFAULT_QUEUE_SIZE; + +	mutex_lock(&host->add_target_mutex);  	ret = srp_parse_options(buf, target);  	if (ret)  		goto err; +	target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE; +  	if (!srp_conn_unique(target->srp_host, target)) {  		shost_printk(KERN_INFO, target->scsi_host,  			     PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n", @@ -2326,9 +3001,9 @@ static ssize_t srp_create_target(struct device *dev,  		goto err;  	} -	if (!host->srp_dev->fmr_pool && !target->allow_ext_sg && -				target->cmd_sg_cnt < target->sg_tablesize) { -		pr_warn("No FMR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n"); +	if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg && +	    target->cmd_sg_cnt < target->sg_tablesize) { +		pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");  		target->sg_tablesize = target->cmd_sg_cnt;  	} @@ -2339,42 +3014,17 @@ static ssize_t srp_create_target(struct device *dev,  			     sizeof (struct srp_indirect_buf) +  			     target->cmd_sg_cnt * sizeof (struct srp_direct_buf); +	INIT_WORK(&target->tl_err_work, srp_tl_err_work);  	INIT_WORK(&target->remove_work, srp_remove_work);  	spin_lock_init(&target->lock);  	INIT_LIST_HEAD(&target->free_tx); -	INIT_LIST_HEAD(&target->free_reqs); -	for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) { -		struct srp_request *req = &target->req_ring[i]; - -		req->fmr_list = kmalloc(target->cmd_sg_cnt * sizeof (void *), -					GFP_KERNEL); -		req->map_page = kmalloc(SRP_FMR_SIZE * sizeof (void *), -					GFP_KERNEL); -		req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL); -		if (!req->fmr_list || !req->map_page || !req->indirect_desc) -			goto err_free_mem; - -		dma_addr = ib_dma_map_single(ibdev, req->indirect_desc, -					     target->indirect_size, -					     DMA_TO_DEVICE); -		if (ib_dma_mapping_error(ibdev, dma_addr)) -			goto err_free_mem; - -		req->indirect_dma_addr = dma_addr; -		req->index = i; -		list_add_tail(&req->list, &target->free_reqs); -	} - -	ib_query_gid(ibdev, host->port, 0, &target->path.sgid); +	ret = srp_alloc_req_data(target); +	if (ret) +		goto err_free_mem; -	shost_printk(KERN_DEBUG, target->scsi_host, PFX -		     "new target: id_ext %016llx ioc_guid %016llx pkey %04x " -		     "service_id %016llx dgid %pI6\n", -	       (unsigned long long) be64_to_cpu(target->id_ext), -	       (unsigned long long) be64_to_cpu(target->ioc_guid), -	       be16_to_cpu(target->path.pkey), -	       (unsigned long long) be64_to_cpu(target->service_id), -	       target->path.dgid.raw); +	ret = ib_query_gid(ibdev, host->port, 0, &target->path.sgid); +	if (ret) +		goto err_free_mem;  	ret = srp_create_target_ib(target);  	if (ret) @@ -2395,7 +3045,19 @@ static ssize_t srp_create_target(struct device *dev,  	if (ret)  		goto err_disconnect; -	return count; +	shost_printk(KERN_DEBUG, target->scsi_host, PFX +		     "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n", +		     be64_to_cpu(target->id_ext), +		     be64_to_cpu(target->ioc_guid), +		     be16_to_cpu(target->path.pkey), +		     be64_to_cpu(target->service_id), +		     target->path.sgid.raw, target->path.dgid.raw); + +	ret = count; + +out: +	mutex_unlock(&host->add_target_mutex); +	return ret;  err_disconnect:  	srp_disconnect_target(target); @@ -2411,8 +3073,7 @@ err_free_mem:  err:  	scsi_host_put(target_host); - -	return ret; +	goto out;  }  static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target); @@ -2448,6 +3109,7 @@ static struct srp_host *srp_add_port(struct srp_device *device, u8 port)  	INIT_LIST_HEAD(&host->target_list);  	spin_lock_init(&host->target_lock);  	init_completion(&host->released); +	mutex_init(&host->add_target_mutex);  	host->srp_dev = device;  	host->port = port; @@ -2479,9 +3141,9 @@ static void srp_add_one(struct ib_device *device)  {  	struct srp_device *srp_dev;  	struct ib_device_attr *dev_attr; -	struct ib_fmr_pool_param fmr_param;  	struct srp_host *host; -	int max_pages_per_fmr, fmr_page_shift, s, e, p; +	int mr_page_shift, s, e, p; +	u64 max_pages_per_mr;  	dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);  	if (!dev_attr) @@ -2496,15 +3158,39 @@ static void srp_add_one(struct ib_device *device)  	if (!srp_dev)  		goto free_attr; +	srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr && +			    device->map_phys_fmr && device->unmap_fmr); +	srp_dev->has_fr = (dev_attr->device_cap_flags & +			   IB_DEVICE_MEM_MGT_EXTENSIONS); +	if (!srp_dev->has_fmr && !srp_dev->has_fr) +		dev_warn(&device->dev, "neither FMR nor FR is supported\n"); + +	srp_dev->use_fast_reg = (srp_dev->has_fr && +				 (!srp_dev->has_fmr || prefer_fr)); +  	/*  	 * Use the smallest page size supported by the HCA, down to a  	 * minimum of 4096 bytes. We're unlikely to build large sglists  	 * out of smaller entries.  	 */ -	fmr_page_shift		= max(12, ffs(dev_attr->page_size_cap) - 1); -	srp_dev->fmr_page_size	= 1 << fmr_page_shift; -	srp_dev->fmr_page_mask	= ~((u64) srp_dev->fmr_page_size - 1); -	srp_dev->fmr_max_size	= srp_dev->fmr_page_size * SRP_FMR_SIZE; +	mr_page_shift		= max(12, ffs(dev_attr->page_size_cap) - 1); +	srp_dev->mr_page_size	= 1 << mr_page_shift; +	srp_dev->mr_page_mask	= ~((u64) srp_dev->mr_page_size - 1); +	max_pages_per_mr	= dev_attr->max_mr_size; +	do_div(max_pages_per_mr, srp_dev->mr_page_size); +	srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR, +					  max_pages_per_mr); +	if (srp_dev->use_fast_reg) { +		srp_dev->max_pages_per_mr = +			min_t(u32, srp_dev->max_pages_per_mr, +			      dev_attr->max_fast_reg_page_list_len); +	} +	srp_dev->mr_max_size	= srp_dev->mr_page_size * +				   srp_dev->max_pages_per_mr; +	pr_debug("%s: mr_page_shift = %d, dev_attr->max_mr_size = %#llx, dev_attr->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n", +		 device->name, mr_page_shift, dev_attr->max_mr_size, +		 dev_attr->max_fast_reg_page_list_len, +		 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);  	INIT_LIST_HEAD(&srp_dev->dev_list); @@ -2520,27 +3206,6 @@ static void srp_add_one(struct ib_device *device)  	if (IS_ERR(srp_dev->mr))  		goto err_pd; -	for (max_pages_per_fmr = SRP_FMR_SIZE; -			max_pages_per_fmr >= SRP_FMR_MIN_SIZE; -			max_pages_per_fmr /= 2, srp_dev->fmr_max_size /= 2) { -		memset(&fmr_param, 0, sizeof fmr_param); -		fmr_param.pool_size	    = SRP_FMR_POOL_SIZE; -		fmr_param.dirty_watermark   = SRP_FMR_DIRTY_SIZE; -		fmr_param.cache		    = 1; -		fmr_param.max_pages_per_fmr = max_pages_per_fmr; -		fmr_param.page_shift	    = fmr_page_shift; -		fmr_param.access	    = (IB_ACCESS_LOCAL_WRITE | -					       IB_ACCESS_REMOTE_WRITE | -					       IB_ACCESS_REMOTE_READ); - -		srp_dev->fmr_pool = ib_create_fmr_pool(srp_dev->pd, &fmr_param); -		if (!IS_ERR(srp_dev->fmr_pool)) -			break; -	} - -	if (IS_ERR(srp_dev->fmr_pool)) -		srp_dev->fmr_pool = NULL; -  	if (device->node_type == RDMA_NODE_IB_SWITCH) {  		s = 0;  		e = 0; @@ -2603,8 +3268,6 @@ static void srp_remove_one(struct ib_device *device)  		kfree(host);  	} -	if (srp_dev->fmr_pool) -		ib_destroy_fmr_pool(srp_dev->fmr_pool);  	ib_dereg_mr(srp_dev->mr);  	ib_dealloc_pd(srp_dev->pd); @@ -2612,7 +3275,14 @@ static void srp_remove_one(struct ib_device *device)  }  static struct srp_function_template ib_srp_transport_functions = { +	.has_rport_state	 = true, +	.reset_timer_if_blocked	 = true, +	.reconnect_delay	 = &srp_reconnect_delay, +	.fast_io_fail_tmo	 = &srp_fast_io_fail_tmo, +	.dev_loss_tmo		 = &srp_dev_loss_tmo, +	.reconnect		 = srp_rport_reconnect,  	.rport_delete		 = srp_rport_delete, +	.terminate_rport_io	 = srp_terminate_io,  };  static int __init srp_init_module(void) diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index e641088c14d..e46ecb15aa0 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -57,25 +57,19 @@ enum {  	SRP_MAX_LUN		= 512,  	SRP_DEF_SG_TABLESIZE	= 12, -	SRP_RQ_SHIFT    	= 6, -	SRP_RQ_SIZE		= 1 << SRP_RQ_SHIFT, - -	SRP_SQ_SIZE		= SRP_RQ_SIZE, +	SRP_DEFAULT_QUEUE_SIZE	= 1 << 6,  	SRP_RSP_SQ_SIZE		= 1, -	SRP_REQ_SQ_SIZE		= SRP_SQ_SIZE - SRP_RSP_SQ_SIZE,  	SRP_TSK_MGMT_SQ_SIZE	= 1, -	SRP_CMD_SQ_SIZE		= SRP_REQ_SQ_SIZE - SRP_TSK_MGMT_SQ_SIZE, +	SRP_DEFAULT_CMD_SQ_SIZE = SRP_DEFAULT_QUEUE_SIZE - SRP_RSP_SQ_SIZE - +				  SRP_TSK_MGMT_SQ_SIZE,  	SRP_TAG_NO_REQ		= ~0U,  	SRP_TAG_TSK_MGMT	= 1U << 31, -	SRP_FMR_SIZE		= 512, -	SRP_FMR_MIN_SIZE	= 128, -	SRP_FMR_POOL_SIZE	= 1024, -	SRP_FMR_DIRTY_SIZE	= SRP_FMR_POOL_SIZE / 4, +	SRP_MAX_PAGES_PER_MR	= 512, -	SRP_MAP_ALLOW_FMR	= 0, -	SRP_MAP_NO_FMR		= 1, +	LOCAL_INV_WR_ID_MASK	= 1, +	FAST_REG_WR_ID_MASK	= 2,  };  enum srp_target_state { @@ -89,15 +83,24 @@ enum srp_iu_type {  	SRP_IU_RSP,  }; +/* + * @mr_page_mask: HCA memory registration page mask. + * @mr_page_size: HCA memory registration page size. + * @mr_max_size: Maximum size in bytes of a single FMR / FR registration + *   request. + */  struct srp_device {  	struct list_head	dev_list;  	struct ib_device       *dev;  	struct ib_pd	       *pd;  	struct ib_mr	       *mr; -	struct ib_fmr_pool     *fmr_pool; -	u64			fmr_page_mask; -	int			fmr_page_size; -	int			fmr_max_size; +	u64			mr_page_mask; +	int			mr_page_size; +	int			mr_max_size; +	int			max_pages_per_mr; +	bool			has_fmr; +	bool			has_fr; +	bool			use_fast_reg;  };  struct srp_host { @@ -108,17 +111,21 @@ struct srp_host {  	spinlock_t		target_lock;  	struct completion	released;  	struct list_head	list; +	struct mutex		add_target_mutex;  };  struct srp_request {  	struct list_head	list;  	struct scsi_cmnd       *scmnd;  	struct srp_iu	       *cmd; -	struct ib_pool_fmr    **fmr_list; +	union { +		struct ib_pool_fmr **fmr_list; +		struct srp_fr_desc **fr_list; +	};  	u64		       *map_page;  	struct srp_direct_buf  *indirect_desc;  	dma_addr_t		indirect_dma_addr; -	short			nfmr; +	short			nmdesc;  	short			index;  }; @@ -133,6 +140,10 @@ struct srp_target_port {  	struct ib_cq	       *send_cq ____cacheline_aligned_in_smp;  	struct ib_cq	       *recv_cq;  	struct ib_qp	       *qp; +	union { +		struct ib_fmr_pool     *fmr_pool; +		struct srp_fr_pool     *fr_pool; +	};  	u32			lkey;  	u32			rkey;  	enum srp_target_state	state; @@ -140,7 +151,6 @@ struct srp_target_port {  	unsigned int		cmd_sg_cnt;  	unsigned int		indirect_size;  	bool			allow_ext_sg; -	bool			transport_offline;  	/* Everything above this point is used in the hot path of  	 * command processing. Try to keep them packed into cachelines. @@ -153,10 +163,14 @@ struct srp_target_port {  	u16			io_class;  	struct srp_host	       *srp_host;  	struct Scsi_Host       *scsi_host; +	struct srp_rport       *rport;  	char			target_name[32];  	unsigned int		scsi_id;  	unsigned int		sg_tablesize; +	int			queue_size; +	int			req_ring_size;  	int			comp_vector; +	int			tl_retry_count;  	struct ib_sa_path_rec	path;  	__be16			orig_dgid[8]; @@ -172,10 +186,11 @@ struct srp_target_port {  	int			zero_req_lim; -	struct srp_iu	       *tx_ring[SRP_SQ_SIZE]; -	struct srp_iu	       *rx_ring[SRP_RQ_SIZE]; -	struct srp_request	req_ring[SRP_CMD_SQ_SIZE]; +	struct srp_iu	       **tx_ring; +	struct srp_iu	       **rx_ring; +	struct srp_request	*req_ring; +	struct work_struct	tl_err_work;  	struct work_struct	remove_work;  	struct list_head	list; @@ -195,15 +210,66 @@ struct srp_iu {  	enum dma_data_direction	direction;  }; +/** + * struct srp_fr_desc - fast registration work request arguments + * @entry: Entry in srp_fr_pool.free_list. + * @mr:    Memory region. + * @frpl:  Fast registration page list. + */ +struct srp_fr_desc { +	struct list_head		entry; +	struct ib_mr			*mr; +	struct ib_fast_reg_page_list	*frpl; +}; + +/** + * struct srp_fr_pool - pool of fast registration descriptors + * + * An entry is available for allocation if and only if it occurs in @free_list. + * + * @size:      Number of descriptors in this pool. + * @max_page_list_len: Maximum fast registration work request page list length. + * @lock:      Protects free_list. + * @free_list: List of free descriptors. + * @desc:      Fast registration descriptor pool. + */ +struct srp_fr_pool { +	int			size; +	int			max_page_list_len; +	spinlock_t		lock; +	struct list_head	free_list; +	struct srp_fr_desc	desc[0]; +}; + +/** + * struct srp_map_state - per-request DMA memory mapping state + * @desc:	    Pointer to the element of the SRP buffer descriptor array + *		    that is being filled in. + * @pages:	    Array with DMA addresses of pages being considered for + *		    memory registration. + * @base_dma_addr:  DMA address of the first page that has not yet been mapped. + * @dma_len:	    Number of bytes that will be registered with the next + *		    FMR or FR memory registration call. + * @total_len:	    Total number of bytes in the sg-list being mapped. + * @npages:	    Number of page addresses in the pages[] array. + * @nmdesc:	    Number of FMR or FR memory descriptors used for mapping. + * @ndesc:	    Number of SRP buffer descriptors that have been filled in. + * @unmapped_sg:    First element of the sg-list that is mapped via FMR or FR. + * @unmapped_index: Index of the first element mapped via FMR or FR. + * @unmapped_addr:  DMA address of the first element mapped via FMR or FR. + */  struct srp_map_state { -	struct ib_pool_fmr    **next_fmr; +	union { +		struct ib_pool_fmr **next_fmr; +		struct srp_fr_desc **next_fr; +	};  	struct srp_direct_buf  *desc;  	u64		       *pages;  	dma_addr_t		base_dma_addr; -	u32			fmr_len; +	u32			dma_len;  	u32			total_len;  	unsigned int		npages; -	unsigned int		nfmr; +	unsigned int		nmdesc;  	unsigned int		ndesc;  	struct scatterlist     *unmapped_sg;  	int			unmapped_index;  | 
