diff options
Diffstat (limited to 'drivers/infiniband/ulp/srp')
| -rw-r--r-- | drivers/infiniband/ulp/srp/ib_srp.c | 2376 | ||||
| -rw-r--r-- | drivers/infiniband/ulp/srp/ib_srp.h | 185 | 
2 files changed, 1853 insertions, 708 deletions
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index cfc1d65c457..e3c2c5b4297 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -30,6 +30,8 @@   * SOFTWARE.   */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +  #include <linux/module.h>  #include <linux/init.h>  #include <linux/slab.h> @@ -39,11 +41,12 @@  #include <linux/random.h>  #include <linux/jiffies.h> -#include <asm/atomic.h> +#include <linux/atomic.h>  #include <scsi/scsi.h>  #include <scsi/scsi_device.h>  #include <scsi/scsi_dbg.h> +#include <scsi/scsi_tcq.h>  #include <scsi/srp.h>  #include <scsi/scsi_transport_srp.h> @@ -51,32 +54,74 @@  #define DRV_NAME	"ib_srp"  #define PFX		DRV_NAME ": " -#define DRV_VERSION	"0.2" -#define DRV_RELDATE	"November 1, 2005" +#define DRV_VERSION	"1.0" +#define DRV_RELDATE	"July 1, 2013"  MODULE_AUTHOR("Roland Dreier");  MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator "  		   "v" DRV_VERSION " (" DRV_RELDATE ")");  MODULE_LICENSE("Dual BSD/GPL"); -static int srp_sg_tablesize = SRP_DEF_SG_TABLESIZE; -static int srp_max_iu_len; +static unsigned int srp_sg_tablesize; +static unsigned int cmd_sg_entries; +static unsigned int indirect_sg_entries; +static bool allow_ext_sg; +static bool prefer_fr; +static bool register_always; +static int topspin_workarounds = 1; -module_param(srp_sg_tablesize, int, 0444); -MODULE_PARM_DESC(srp_sg_tablesize, -		 "Max number of gather/scatter entries per I/O (default is 12, max 255)"); +module_param(srp_sg_tablesize, uint, 0444); +MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries"); -static int topspin_workarounds = 1; +module_param(cmd_sg_entries, uint, 0444); +MODULE_PARM_DESC(cmd_sg_entries, +		 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)"); + +module_param(indirect_sg_entries, uint, 0444); +MODULE_PARM_DESC(indirect_sg_entries, +		 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SCSI_MAX_SG_CHAIN_SEGMENTS) ")"); + +module_param(allow_ext_sg, bool, 0444); +MODULE_PARM_DESC(allow_ext_sg, +		  "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");  module_param(topspin_workarounds, int, 0444);  MODULE_PARM_DESC(topspin_workarounds,  		 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0"); -static int mellanox_workarounds = 1; - -module_param(mellanox_workarounds, int, 0444); -MODULE_PARM_DESC(mellanox_workarounds, -		 "Enable workarounds for Mellanox SRP target bugs if != 0"); +module_param(prefer_fr, bool, 0444); +MODULE_PARM_DESC(prefer_fr, +"Whether to use fast registration if both FMR and fast registration are supported"); + +module_param(register_always, bool, 0444); +MODULE_PARM_DESC(register_always, +		 "Use memory registration even for contiguous memory regions"); + +static struct kernel_param_ops srp_tmo_ops; + +static int srp_reconnect_delay = 10; +module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay, +		S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts"); + +static int srp_fast_io_fail_tmo = 15; +module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo, +		S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(fast_io_fail_tmo, +		 "Number of seconds between the observation of a transport" +		 " layer error and failing all I/O. \"off\" means that this" +		 " functionality is disabled."); + +static int srp_dev_loss_tmo = 600; +module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo, +		S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(dev_loss_tmo, +		 "Maximum number of seconds that the SRP transport should" +		 " insulate transport layer errors. After this time has been" +		 " exceeded the SCSI host is removed. Should be" +		 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT) +		 " if fast_io_fail_tmo has not been set. \"off\" means that" +		 " this functionality is disabled.");  static void srp_add_one(struct ib_device *device);  static void srp_remove_one(struct ib_device *device); @@ -94,6 +139,48 @@ static struct ib_client srp_client = {  static struct ib_sa_client srp_sa_client; +static int srp_tmo_get(char *buffer, const struct kernel_param *kp) +{ +	int tmo = *(int *)kp->arg; + +	if (tmo >= 0) +		return sprintf(buffer, "%d", tmo); +	else +		return sprintf(buffer, "off"); +} + +static int srp_tmo_set(const char *val, const struct kernel_param *kp) +{ +	int tmo, res; + +	if (strncmp(val, "off", 3) != 0) { +		res = kstrtoint(val, 0, &tmo); +		if (res) +			goto out; +	} else { +		tmo = -1; +	} +	if (kp->arg == &srp_reconnect_delay) +		res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo, +				    srp_dev_loss_tmo); +	else if (kp->arg == &srp_fast_io_fail_tmo) +		res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo); +	else +		res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo, +				    tmo); +	if (res) +		goto out; +	*(int *)kp->arg = tmo; + +out: +	return res; +} + +static struct kernel_param_ops srp_tmo_ops = { +	.get = srp_tmo_get, +	.set = srp_tmo_set, +}; +  static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)  {  	return (struct srp_target_port *) host->hostdata; @@ -114,14 +201,6 @@ static int srp_target_is_topspin(struct srp_target_port *target)  		 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));  } -static int srp_target_is_mellanox(struct srp_target_port *target) -{ -	static const u8 mellanox_oui[3] = { 0x00, 0x02, 0xc9 }; - -	return mellanox_workarounds && -		!memcmp(&target->ioc_guid, mellanox_oui, sizeof mellanox_oui); -} -  static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,  				   gfp_t gfp_mask,  				   enum dma_data_direction direction) @@ -167,7 +246,7 @@ static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)  static void srp_qp_event(struct ib_event *event, void *context)  { -	printk(KERN_ERR PFX "QP event %d\n", event->event); +	pr_debug("QP event %d\n", event->event);  }  static int srp_init_qp(struct srp_target_port *target, @@ -219,80 +298,288 @@ static int srp_new_cm_id(struct srp_target_port *target)  	return 0;  } +static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target) +{ +	struct srp_device *dev = target->srp_host->srp_dev; +	struct ib_fmr_pool_param fmr_param; + +	memset(&fmr_param, 0, sizeof(fmr_param)); +	fmr_param.pool_size	    = target->scsi_host->can_queue; +	fmr_param.dirty_watermark   = fmr_param.pool_size / 4; +	fmr_param.cache		    = 1; +	fmr_param.max_pages_per_fmr = dev->max_pages_per_mr; +	fmr_param.page_shift	    = ilog2(dev->mr_page_size); +	fmr_param.access	    = (IB_ACCESS_LOCAL_WRITE | +				       IB_ACCESS_REMOTE_WRITE | +				       IB_ACCESS_REMOTE_READ); + +	return ib_create_fmr_pool(dev->pd, &fmr_param); +} + +/** + * srp_destroy_fr_pool() - free the resources owned by a pool + * @pool: Fast registration pool to be destroyed. + */ +static void srp_destroy_fr_pool(struct srp_fr_pool *pool) +{ +	int i; +	struct srp_fr_desc *d; + +	if (!pool) +		return; + +	for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { +		if (d->frpl) +			ib_free_fast_reg_page_list(d->frpl); +		if (d->mr) +			ib_dereg_mr(d->mr); +	} +	kfree(pool); +} + +/** + * srp_create_fr_pool() - allocate and initialize a pool for fast registration + * @device:            IB device to allocate fast registration descriptors for. + * @pd:                Protection domain associated with the FR descriptors. + * @pool_size:         Number of descriptors to allocate. + * @max_page_list_len: Maximum fast registration work request page list length. + */ +static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device, +					      struct ib_pd *pd, int pool_size, +					      int max_page_list_len) +{ +	struct srp_fr_pool *pool; +	struct srp_fr_desc *d; +	struct ib_mr *mr; +	struct ib_fast_reg_page_list *frpl; +	int i, ret = -EINVAL; + +	if (pool_size <= 0) +		goto err; +	ret = -ENOMEM; +	pool = kzalloc(sizeof(struct srp_fr_pool) + +		       pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL); +	if (!pool) +		goto err; +	pool->size = pool_size; +	pool->max_page_list_len = max_page_list_len; +	spin_lock_init(&pool->lock); +	INIT_LIST_HEAD(&pool->free_list); + +	for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { +		mr = ib_alloc_fast_reg_mr(pd, max_page_list_len); +		if (IS_ERR(mr)) { +			ret = PTR_ERR(mr); +			goto destroy_pool; +		} +		d->mr = mr; +		frpl = ib_alloc_fast_reg_page_list(device, max_page_list_len); +		if (IS_ERR(frpl)) { +			ret = PTR_ERR(frpl); +			goto destroy_pool; +		} +		d->frpl = frpl; +		list_add_tail(&d->entry, &pool->free_list); +	} + +out: +	return pool; + +destroy_pool: +	srp_destroy_fr_pool(pool); + +err: +	pool = ERR_PTR(ret); +	goto out; +} + +/** + * srp_fr_pool_get() - obtain a descriptor suitable for fast registration + * @pool: Pool to obtain descriptor from. + */ +static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool) +{ +	struct srp_fr_desc *d = NULL; +	unsigned long flags; + +	spin_lock_irqsave(&pool->lock, flags); +	if (!list_empty(&pool->free_list)) { +		d = list_first_entry(&pool->free_list, typeof(*d), entry); +		list_del(&d->entry); +	} +	spin_unlock_irqrestore(&pool->lock, flags); + +	return d; +} + +/** + * srp_fr_pool_put() - put an FR descriptor back in the free list + * @pool: Pool the descriptor was allocated from. + * @desc: Pointer to an array of fast registration descriptor pointers. + * @n:    Number of descriptors to put back. + * + * Note: The caller must already have queued an invalidation request for + * desc->mr->rkey before calling this function. + */ +static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc, +			    int n) +{ +	unsigned long flags; +	int i; + +	spin_lock_irqsave(&pool->lock, flags); +	for (i = 0; i < n; i++) +		list_add(&desc[i]->entry, &pool->free_list); +	spin_unlock_irqrestore(&pool->lock, flags); +} + +static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target) +{ +	struct srp_device *dev = target->srp_host->srp_dev; + +	return srp_create_fr_pool(dev->dev, dev->pd, +				  target->scsi_host->can_queue, +				  dev->max_pages_per_mr); +} +  static int srp_create_target_ib(struct srp_target_port *target)  { +	struct srp_device *dev = target->srp_host->srp_dev;  	struct ib_qp_init_attr *init_attr; +	struct ib_cq *recv_cq, *send_cq; +	struct ib_qp *qp; +	struct ib_fmr_pool *fmr_pool = NULL; +	struct srp_fr_pool *fr_pool = NULL; +	const int m = 1 + dev->use_fast_reg;  	int ret;  	init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);  	if (!init_attr)  		return -ENOMEM; -	target->recv_cq = ib_create_cq(target->srp_host->srp_dev->dev, -				       srp_recv_completion, NULL, target, SRP_RQ_SIZE, 0); -	if (IS_ERR(target->recv_cq)) { -		ret = PTR_ERR(target->recv_cq); +	recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, target, +			       target->queue_size, target->comp_vector); +	if (IS_ERR(recv_cq)) { +		ret = PTR_ERR(recv_cq);  		goto err;  	} -	target->send_cq = ib_create_cq(target->srp_host->srp_dev->dev, -				       srp_send_completion, NULL, target, SRP_SQ_SIZE, 0); -	if (IS_ERR(target->send_cq)) { -		ret = PTR_ERR(target->send_cq); +	send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, target, +			       m * target->queue_size, target->comp_vector); +	if (IS_ERR(send_cq)) { +		ret = PTR_ERR(send_cq);  		goto err_recv_cq;  	} -	ib_req_notify_cq(target->recv_cq, IB_CQ_NEXT_COMP); +	ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);  	init_attr->event_handler       = srp_qp_event; -	init_attr->cap.max_send_wr     = SRP_SQ_SIZE; -	init_attr->cap.max_recv_wr     = SRP_RQ_SIZE; +	init_attr->cap.max_send_wr     = m * target->queue_size; +	init_attr->cap.max_recv_wr     = target->queue_size;  	init_attr->cap.max_recv_sge    = 1;  	init_attr->cap.max_send_sge    = 1; -	init_attr->sq_sig_type         = IB_SIGNAL_ALL_WR; +	init_attr->sq_sig_type         = IB_SIGNAL_REQ_WR;  	init_attr->qp_type             = IB_QPT_RC; -	init_attr->send_cq             = target->send_cq; -	init_attr->recv_cq             = target->recv_cq; +	init_attr->send_cq             = send_cq; +	init_attr->recv_cq             = recv_cq; -	target->qp = ib_create_qp(target->srp_host->srp_dev->pd, init_attr); -	if (IS_ERR(target->qp)) { -		ret = PTR_ERR(target->qp); +	qp = ib_create_qp(dev->pd, init_attr); +	if (IS_ERR(qp)) { +		ret = PTR_ERR(qp);  		goto err_send_cq;  	} -	ret = srp_init_qp(target, target->qp); +	ret = srp_init_qp(target, qp);  	if (ret)  		goto err_qp; +	if (dev->use_fast_reg && dev->has_fr) { +		fr_pool = srp_alloc_fr_pool(target); +		if (IS_ERR(fr_pool)) { +			ret = PTR_ERR(fr_pool); +			shost_printk(KERN_WARNING, target->scsi_host, PFX +				     "FR pool allocation failed (%d)\n", ret); +			goto err_qp; +		} +		if (target->fr_pool) +			srp_destroy_fr_pool(target->fr_pool); +		target->fr_pool = fr_pool; +	} else if (!dev->use_fast_reg && dev->has_fmr) { +		fmr_pool = srp_alloc_fmr_pool(target); +		if (IS_ERR(fmr_pool)) { +			ret = PTR_ERR(fmr_pool); +			shost_printk(KERN_WARNING, target->scsi_host, PFX +				     "FMR pool allocation failed (%d)\n", ret); +			goto err_qp; +		} +		if (target->fmr_pool) +			ib_destroy_fmr_pool(target->fmr_pool); +		target->fmr_pool = fmr_pool; +	} + +	if (target->qp) +		ib_destroy_qp(target->qp); +	if (target->recv_cq) +		ib_destroy_cq(target->recv_cq); +	if (target->send_cq) +		ib_destroy_cq(target->send_cq); + +	target->qp = qp; +	target->recv_cq = recv_cq; +	target->send_cq = send_cq; +  	kfree(init_attr);  	return 0;  err_qp: -	ib_destroy_qp(target->qp); +	ib_destroy_qp(qp);  err_send_cq: -	ib_destroy_cq(target->send_cq); +	ib_destroy_cq(send_cq);  err_recv_cq: -	ib_destroy_cq(target->recv_cq); +	ib_destroy_cq(recv_cq);  err:  	kfree(init_attr);  	return ret;  } +/* + * Note: this function may be called without srp_alloc_iu_bufs() having been + * invoked. Hence the target->[rt]x_ring checks. + */  static void srp_free_target_ib(struct srp_target_port *target)  { +	struct srp_device *dev = target->srp_host->srp_dev;  	int i; +	if (dev->use_fast_reg) { +		if (target->fr_pool) +			srp_destroy_fr_pool(target->fr_pool); +	} else { +		if (target->fmr_pool) +			ib_destroy_fmr_pool(target->fmr_pool); +	}  	ib_destroy_qp(target->qp);  	ib_destroy_cq(target->send_cq);  	ib_destroy_cq(target->recv_cq); -	for (i = 0; i < SRP_RQ_SIZE; ++i) -		srp_free_iu(target->srp_host, target->rx_ring[i]); -	for (i = 0; i < SRP_SQ_SIZE; ++i) -		srp_free_iu(target->srp_host, target->tx_ring[i]); +	target->qp = NULL; +	target->send_cq = target->recv_cq = NULL; + +	if (target->rx_ring) { +		for (i = 0; i < target->queue_size; ++i) +			srp_free_iu(target->srp_host, target->rx_ring[i]); +		kfree(target->rx_ring); +		target->rx_ring = NULL; +	} +	if (target->tx_ring) { +		for (i = 0; i < target->queue_size; ++i) +			srp_free_iu(target->srp_host, target->tx_ring[i]); +		kfree(target->tx_ring); +		target->tx_ring = NULL; +	}  }  static void srp_path_rec_completion(int status, @@ -312,6 +599,8 @@ static void srp_path_rec_completion(int status,  static int srp_lookup_path(struct srp_target_port *target)  { +	int ret; +  	target->path.numb_path = 1;  	init_completion(&target->done); @@ -332,7 +621,9 @@ static int srp_lookup_path(struct srp_target_port *target)  	if (target->path_query_id < 0)  		return target->path_query_id; -	wait_for_completion(&target->done); +	ret = wait_for_completion_interruptible(&target->done); +	if (ret < 0) +		return ret;  	if (target->status < 0)  		shost_printk(KERN_WARNING, target->scsi_host, @@ -372,13 +663,13 @@ static int srp_send_req(struct srp_target_port *target)  	req->param.responder_resources	      = 4;  	req->param.remote_cm_response_timeout = 20;  	req->param.local_cm_response_timeout  = 20; -	req->param.retry_count 		      = 7; +	req->param.retry_count                = target->tl_retry_count;  	req->param.rnr_retry_count 	      = 7;  	req->param.max_cm_retries 	      = 15;  	req->priv.opcode     	= SRP_LOGIN_REQ;  	req->priv.tag        	= 0; -	req->priv.req_it_iu_len = cpu_to_be32(srp_max_iu_len); +	req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len);  	req->priv.req_buf_fmt 	= cpu_to_be16(SRP_BUF_FORMAT_DIRECT |  					      SRP_BUF_FORMAT_INDIRECT);  	/* @@ -428,48 +719,193 @@ static int srp_send_req(struct srp_target_port *target)  	return status;  } +static bool srp_queue_remove_work(struct srp_target_port *target) +{ +	bool changed = false; + +	spin_lock_irq(&target->lock); +	if (target->state != SRP_TARGET_REMOVED) { +		target->state = SRP_TARGET_REMOVED; +		changed = true; +	} +	spin_unlock_irq(&target->lock); + +	if (changed) +		queue_work(system_long_wq, &target->remove_work); + +	return changed; +} + +static bool srp_change_conn_state(struct srp_target_port *target, +				  bool connected) +{ +	bool changed = false; + +	spin_lock_irq(&target->lock); +	if (target->connected != connected) { +		target->connected = connected; +		changed = true; +	} +	spin_unlock_irq(&target->lock); + +	return changed; +} +  static void srp_disconnect_target(struct srp_target_port *target)  { -	/* XXX should send SRP_I_LOGOUT request */ +	if (srp_change_conn_state(target, false)) { +		/* XXX should send SRP_I_LOGOUT request */ -	init_completion(&target->done); -	if (ib_send_cm_dreq(target->cm_id, NULL, 0)) { -		shost_printk(KERN_DEBUG, target->scsi_host, -			     PFX "Sending CM DREQ failed\n"); -		return; +		if (ib_send_cm_dreq(target->cm_id, NULL, 0)) { +			shost_printk(KERN_DEBUG, target->scsi_host, +				     PFX "Sending CM DREQ failed\n"); +		}  	} -	wait_for_completion(&target->done);  } -static void srp_remove_work(struct work_struct *work) +static void srp_free_req_data(struct srp_target_port *target)  { -	struct srp_target_port *target = -		container_of(work, struct srp_target_port, work); +	struct srp_device *dev = target->srp_host->srp_dev; +	struct ib_device *ibdev = dev->dev; +	struct srp_request *req; +	int i; -	spin_lock_irq(target->scsi_host->host_lock); -	if (target->state != SRP_TARGET_DEAD) { -		spin_unlock_irq(target->scsi_host->host_lock); +	if (!target->req_ring)  		return; + +	for (i = 0; i < target->req_ring_size; ++i) { +		req = &target->req_ring[i]; +		if (dev->use_fast_reg) +			kfree(req->fr_list); +		else +			kfree(req->fmr_list); +		kfree(req->map_page); +		if (req->indirect_dma_addr) { +			ib_dma_unmap_single(ibdev, req->indirect_dma_addr, +					    target->indirect_size, +					    DMA_TO_DEVICE); +		} +		kfree(req->indirect_desc);  	} -	target->state = SRP_TARGET_REMOVED; -	spin_unlock_irq(target->scsi_host->host_lock); -	spin_lock(&target->srp_host->target_lock); -	list_del(&target->list); -	spin_unlock(&target->srp_host->target_lock); +	kfree(target->req_ring); +	target->req_ring = NULL; +} + +static int srp_alloc_req_data(struct srp_target_port *target) +{ +	struct srp_device *srp_dev = target->srp_host->srp_dev; +	struct ib_device *ibdev = srp_dev->dev; +	struct srp_request *req; +	void *mr_list; +	dma_addr_t dma_addr; +	int i, ret = -ENOMEM; + +	INIT_LIST_HEAD(&target->free_reqs); +	target->req_ring = kzalloc(target->req_ring_size * +				   sizeof(*target->req_ring), GFP_KERNEL); +	if (!target->req_ring) +		goto out; + +	for (i = 0; i < target->req_ring_size; ++i) { +		req = &target->req_ring[i]; +		mr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *), +				  GFP_KERNEL); +		if (!mr_list) +			goto out; +		if (srp_dev->use_fast_reg) +			req->fr_list = mr_list; +		else +			req->fmr_list = mr_list; +		req->map_page = kmalloc(srp_dev->max_pages_per_mr * +					sizeof(void *), GFP_KERNEL); +		if (!req->map_page) +			goto out; +		req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL); +		if (!req->indirect_desc) +			goto out; + +		dma_addr = ib_dma_map_single(ibdev, req->indirect_desc, +					     target->indirect_size, +					     DMA_TO_DEVICE); +		if (ib_dma_mapping_error(ibdev, dma_addr)) +			goto out; + +		req->indirect_dma_addr = dma_addr; +		req->index = i; +		list_add_tail(&req->list, &target->free_reqs); +	} +	ret = 0; + +out: +	return ret; +} + +/** + * srp_del_scsi_host_attr() - Remove attributes defined in the host template. + * @shost: SCSI host whose attributes to remove from sysfs. + * + * Note: Any attributes defined in the host template and that did not exist + * before invocation of this function will be ignored. + */ +static void srp_del_scsi_host_attr(struct Scsi_Host *shost) +{ +	struct device_attribute **attr; + +	for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr) +		device_remove_file(&shost->shost_dev, *attr); +} + +static void srp_remove_target(struct srp_target_port *target) +{ +	WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); + +	srp_del_scsi_host_attr(target->scsi_host); +	srp_rport_get(target->rport);  	srp_remove_host(target->scsi_host);  	scsi_remove_host(target->scsi_host); +	srp_stop_rport_timers(target->rport); +	srp_disconnect_target(target);  	ib_destroy_cm_id(target->cm_id);  	srp_free_target_ib(target); +	cancel_work_sync(&target->tl_err_work); +	srp_rport_put(target->rport); +	srp_free_req_data(target); + +	spin_lock(&target->srp_host->target_lock); +	list_del(&target->list); +	spin_unlock(&target->srp_host->target_lock); +  	scsi_host_put(target->scsi_host);  } +static void srp_remove_work(struct work_struct *work) +{ +	struct srp_target_port *target = +		container_of(work, struct srp_target_port, remove_work); + +	WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); + +	srp_remove_target(target); +} + +static void srp_rport_delete(struct srp_rport *rport) +{ +	struct srp_target_port *target = rport->lld_data; + +	srp_queue_remove_work(target); +} +  static int srp_connect_target(struct srp_target_port *target)  {  	int retries = 3;  	int ret; +	WARN_ON_ONCE(target->connected); + +	target->qp_in_error = false; +  	ret = srp_lookup_path(target);  	if (ret)  		return ret; @@ -479,7 +915,9 @@ static int srp_connect_target(struct srp_target_port *target)  		ret = srp_send_req(target);  		if (ret)  			return ret; -		wait_for_completion(&target->done); +		ret = wait_for_completion_interruptible(&target->done); +		if (ret < 0) +			return ret;  		/*  		 * The CM event handling code will set status to @@ -489,6 +927,7 @@ static int srp_connect_target(struct srp_target_port *target)  		 */  		switch (target->status) {  		case 0: +			srp_change_conn_state(target, true);  			return 0;  		case SRP_PORT_REDIRECT: @@ -521,199 +960,436 @@ static int srp_connect_target(struct srp_target_port *target)  	}  } +static int srp_inv_rkey(struct srp_target_port *target, u32 rkey) +{ +	struct ib_send_wr *bad_wr; +	struct ib_send_wr wr = { +		.opcode		    = IB_WR_LOCAL_INV, +		.wr_id		    = LOCAL_INV_WR_ID_MASK, +		.next		    = NULL, +		.num_sge	    = 0, +		.send_flags	    = 0, +		.ex.invalidate_rkey = rkey, +	}; + +	return ib_post_send(target->qp, &wr, &bad_wr); +} +  static void srp_unmap_data(struct scsi_cmnd *scmnd,  			   struct srp_target_port *target,  			   struct srp_request *req)  { +	struct srp_device *dev = target->srp_host->srp_dev; +	struct ib_device *ibdev = dev->dev; +	int i, res; +  	if (!scsi_sglist(scmnd) ||  	    (scmnd->sc_data_direction != DMA_TO_DEVICE &&  	     scmnd->sc_data_direction != DMA_FROM_DEVICE))  		return; -	if (req->fmr) { -		ib_fmr_pool_unmap(req->fmr); -		req->fmr = NULL; +	if (dev->use_fast_reg) { +		struct srp_fr_desc **pfr; + +		for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) { +			res = srp_inv_rkey(target, (*pfr)->mr->rkey); +			if (res < 0) { +				shost_printk(KERN_ERR, target->scsi_host, PFX +				  "Queueing INV WR for rkey %#x failed (%d)\n", +				  (*pfr)->mr->rkey, res); +				queue_work(system_long_wq, +					   &target->tl_err_work); +			} +		} +		if (req->nmdesc) +			srp_fr_pool_put(target->fr_pool, req->fr_list, +					req->nmdesc); +	} else { +		struct ib_pool_fmr **pfmr; + +		for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++) +			ib_fmr_pool_unmap(*pfmr);  	} -	ib_dma_unmap_sg(target->srp_host->srp_dev->dev, scsi_sglist(scmnd), -			scsi_sg_count(scmnd), scmnd->sc_data_direction); +	ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd), +			scmnd->sc_data_direction);  } -static void srp_remove_req(struct srp_target_port *target, struct srp_request *req) +/** + * srp_claim_req - Take ownership of the scmnd associated with a request. + * @target: SRP target port. + * @req: SRP request. + * @sdev: If not NULL, only take ownership for this SCSI device. + * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take + *         ownership of @req->scmnd if it equals @scmnd. + * + * Return value: + * Either NULL or a pointer to the SCSI command the caller became owner of. + */ +static struct scsi_cmnd *srp_claim_req(struct srp_target_port *target, +				       struct srp_request *req, +				       struct scsi_device *sdev, +				       struct scsi_cmnd *scmnd)  { -	srp_unmap_data(req->scmnd, target, req); -	list_move_tail(&req->list, &target->free_reqs); +	unsigned long flags; + +	spin_lock_irqsave(&target->lock, flags); +	if (req->scmnd && +	    (!sdev || req->scmnd->device == sdev) && +	    (!scmnd || req->scmnd == scmnd)) { +		scmnd = req->scmnd; +		req->scmnd = NULL; +	} else { +		scmnd = NULL; +	} +	spin_unlock_irqrestore(&target->lock, flags); + +	return scmnd;  } -static void srp_reset_req(struct srp_target_port *target, struct srp_request *req) +/** + * srp_free_req() - Unmap data and add request to the free request list. + * @target: SRP target port. + * @req:    Request to be freed. + * @scmnd:  SCSI command associated with @req. + * @req_lim_delta: Amount to be added to @target->req_lim. + */ +static void srp_free_req(struct srp_target_port *target, +			 struct srp_request *req, struct scsi_cmnd *scmnd, +			 s32 req_lim_delta)  { -	req->scmnd->result = DID_RESET << 16; -	req->scmnd->scsi_done(req->scmnd); -	srp_remove_req(target, req); +	unsigned long flags; + +	srp_unmap_data(scmnd, target, req); + +	spin_lock_irqsave(&target->lock, flags); +	target->req_lim += req_lim_delta; +	list_add_tail(&req->list, &target->free_reqs); +	spin_unlock_irqrestore(&target->lock, flags);  } -static int srp_reconnect_target(struct srp_target_port *target) +static void srp_finish_req(struct srp_target_port *target, +			   struct srp_request *req, struct scsi_device *sdev, +			   int result)  { -	struct ib_qp_attr qp_attr; -	struct srp_request *req, *tmp; -	struct ib_wc wc; -	int ret; +	struct scsi_cmnd *scmnd = srp_claim_req(target, req, sdev, NULL); -	spin_lock_irq(target->scsi_host->host_lock); -	if (target->state != SRP_TARGET_LIVE) { -		spin_unlock_irq(target->scsi_host->host_lock); -		return -EAGAIN; +	if (scmnd) { +		srp_free_req(target, req, scmnd, 0); +		scmnd->result = result; +		scmnd->scsi_done(scmnd);  	} -	target->state = SRP_TARGET_CONNECTING; -	spin_unlock_irq(target->scsi_host->host_lock); +} + +static void srp_terminate_io(struct srp_rport *rport) +{ +	struct srp_target_port *target = rport->lld_data; +	struct Scsi_Host *shost = target->scsi_host; +	struct scsi_device *sdev; +	int i; + +	/* +	 * Invoking srp_terminate_io() while srp_queuecommand() is running +	 * is not safe. Hence the warning statement below. +	 */ +	shost_for_each_device(sdev, shost) +		WARN_ON_ONCE(sdev->request_queue->request_fn_active); + +	for (i = 0; i < target->req_ring_size; ++i) { +		struct srp_request *req = &target->req_ring[i]; +		srp_finish_req(target, req, NULL, DID_TRANSPORT_FAILFAST << 16); +	} +} + +/* + * It is up to the caller to ensure that srp_rport_reconnect() calls are + * serialized and that no concurrent srp_queuecommand(), srp_abort(), + * srp_reset_device() or srp_reset_host() calls will occur while this function + * is in progress. One way to realize that is not to call this function + * directly but to call srp_reconnect_rport() instead since that last function + * serializes calls of this function via rport->mutex and also blocks + * srp_queuecommand() calls before invoking this function. + */ +static int srp_rport_reconnect(struct srp_rport *rport) +{ +	struct srp_target_port *target = rport->lld_data; +	int i, ret;  	srp_disconnect_target(target);  	/* -	 * Now get a new local CM ID so that we avoid confusing the -	 * target in case things are really fouled up. +	 * Now get a new local CM ID so that we avoid confusing the target in +	 * case things are really fouled up. Doing so also ensures that all CM +	 * callbacks will have finished before a new QP is allocated.  	 */  	ret = srp_new_cm_id(target); -	if (ret) -		goto err; -	qp_attr.qp_state = IB_QPS_RESET; -	ret = ib_modify_qp(target->qp, &qp_attr, IB_QP_STATE); -	if (ret) -		goto err; +	for (i = 0; i < target->req_ring_size; ++i) { +		struct srp_request *req = &target->req_ring[i]; +		srp_finish_req(target, req, NULL, DID_RESET << 16); +	} -	ret = srp_init_qp(target, target->qp); -	if (ret) -		goto err; +	/* +	 * Whether or not creating a new CM ID succeeded, create a new +	 * QP. This guarantees that all callback functions for the old QP have +	 * finished before any send requests are posted on the new QP. +	 */ +	ret += srp_create_target_ib(target); -	while (ib_poll_cq(target->recv_cq, 1, &wc) > 0) -		; /* nothing */ -	while (ib_poll_cq(target->send_cq, 1, &wc) > 0) -		; /* nothing */ +	INIT_LIST_HEAD(&target->free_tx); +	for (i = 0; i < target->queue_size; ++i) +		list_add(&target->tx_ring[i]->list, &target->free_tx); -	spin_lock_irq(target->scsi_host->host_lock); -	list_for_each_entry_safe(req, tmp, &target->req_queue, list) -		srp_reset_req(target, req); -	spin_unlock_irq(target->scsi_host->host_lock); +	if (ret == 0) +		ret = srp_connect_target(target); -	target->rx_head	 = 0; -	target->tx_head	 = 0; -	target->tx_tail  = 0; +	if (ret == 0) +		shost_printk(KERN_INFO, target->scsi_host, +			     PFX "reconnect succeeded\n"); -	target->qp_in_error = 0; -	ret = srp_connect_target(target); -	if (ret) -		goto err; +	return ret; +} -	spin_lock_irq(target->scsi_host->host_lock); -	if (target->state == SRP_TARGET_CONNECTING) { -		ret = 0; -		target->state = SRP_TARGET_LIVE; -	} else -		ret = -EAGAIN; -	spin_unlock_irq(target->scsi_host->host_lock); +static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr, +			 unsigned int dma_len, u32 rkey) +{ +	struct srp_direct_buf *desc = state->desc; -	return ret; +	desc->va = cpu_to_be64(dma_addr); +	desc->key = cpu_to_be32(rkey); +	desc->len = cpu_to_be32(dma_len); -err: -	shost_printk(KERN_ERR, target->scsi_host, -		     PFX "reconnect failed (%d), removing target port.\n", ret); +	state->total_len += dma_len; +	state->desc++; +	state->ndesc++; +} -	/* -	 * We couldn't reconnect, so kill our target port off. -	 * However, we have to defer the real removal because we might -	 * be in the context of the SCSI error handler now, which -	 * would deadlock if we call scsi_remove_host(). -	 */ -	spin_lock_irq(target->scsi_host->host_lock); -	if (target->state == SRP_TARGET_CONNECTING) { -		target->state = SRP_TARGET_DEAD; -		INIT_WORK(&target->work, srp_remove_work); -		schedule_work(&target->work); +static int srp_map_finish_fmr(struct srp_map_state *state, +			      struct srp_target_port *target) +{ +	struct ib_pool_fmr *fmr; +	u64 io_addr = 0; + +	fmr = ib_fmr_pool_map_phys(target->fmr_pool, state->pages, +				   state->npages, io_addr); +	if (IS_ERR(fmr)) +		return PTR_ERR(fmr); + +	*state->next_fmr++ = fmr; +	state->nmdesc++; + +	srp_map_desc(state, 0, state->dma_len, fmr->fmr->rkey); + +	return 0; +} + +static int srp_map_finish_fr(struct srp_map_state *state, +			     struct srp_target_port *target) +{ +	struct srp_device *dev = target->srp_host->srp_dev; +	struct ib_send_wr *bad_wr; +	struct ib_send_wr wr; +	struct srp_fr_desc *desc; +	u32 rkey; + +	desc = srp_fr_pool_get(target->fr_pool); +	if (!desc) +		return -ENOMEM; + +	rkey = ib_inc_rkey(desc->mr->rkey); +	ib_update_fast_reg_key(desc->mr, rkey); + +	memcpy(desc->frpl->page_list, state->pages, +	       sizeof(state->pages[0]) * state->npages); + +	memset(&wr, 0, sizeof(wr)); +	wr.opcode = IB_WR_FAST_REG_MR; +	wr.wr_id = FAST_REG_WR_ID_MASK; +	wr.wr.fast_reg.iova_start = state->base_dma_addr; +	wr.wr.fast_reg.page_list = desc->frpl; +	wr.wr.fast_reg.page_list_len = state->npages; +	wr.wr.fast_reg.page_shift = ilog2(dev->mr_page_size); +	wr.wr.fast_reg.length = state->dma_len; +	wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE | +				       IB_ACCESS_REMOTE_READ | +				       IB_ACCESS_REMOTE_WRITE); +	wr.wr.fast_reg.rkey = desc->mr->lkey; + +	*state->next_fr++ = desc; +	state->nmdesc++; + +	srp_map_desc(state, state->base_dma_addr, state->dma_len, +		     desc->mr->rkey); + +	return ib_post_send(target->qp, &wr, &bad_wr); +} + +static int srp_finish_mapping(struct srp_map_state *state, +			      struct srp_target_port *target) +{ +	int ret = 0; + +	if (state->npages == 0) +		return 0; + +	if (state->npages == 1 && !register_always) +		srp_map_desc(state, state->base_dma_addr, state->dma_len, +			     target->rkey); +	else +		ret = target->srp_host->srp_dev->use_fast_reg ? +			srp_map_finish_fr(state, target) : +			srp_map_finish_fmr(state, target); + +	if (ret == 0) { +		state->npages = 0; +		state->dma_len = 0;  	} -	spin_unlock_irq(target->scsi_host->host_lock);  	return ret;  } -static int srp_map_fmr(struct srp_target_port *target, struct scatterlist *scat, -		       int sg_cnt, struct srp_request *req, -		       struct srp_direct_buf *buf) +static void srp_map_update_start(struct srp_map_state *state, +				 struct scatterlist *sg, int sg_index, +				 dma_addr_t dma_addr) +{ +	state->unmapped_sg = sg; +	state->unmapped_index = sg_index; +	state->unmapped_addr = dma_addr; +} + +static int srp_map_sg_entry(struct srp_map_state *state, +			    struct srp_target_port *target, +			    struct scatterlist *sg, int sg_index, +			    bool use_mr)  { -	u64 io_addr = 0; -	u64 *dma_pages; -	u32 len; -	int page_cnt; -	int i, j; -	int ret;  	struct srp_device *dev = target->srp_host->srp_dev;  	struct ib_device *ibdev = dev->dev; -	struct scatterlist *sg; - -	if (!dev->fmr_pool) -		return -ENODEV; +	dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg); +	unsigned int dma_len = ib_sg_dma_len(ibdev, sg); +	unsigned int len; +	int ret; -	if (srp_target_is_mellanox(target) && -	    (ib_sg_dma_address(ibdev, &scat[0]) & ~dev->fmr_page_mask)) -		return -EINVAL; +	if (!dma_len) +		return 0; -	len = page_cnt = 0; -	scsi_for_each_sg(req->scmnd, sg, sg_cnt, i) { -		unsigned int dma_len = ib_sg_dma_len(ibdev, sg); +	if (!use_mr) { +		/* +		 * Once we're in direct map mode for a request, we don't +		 * go back to FMR or FR mode, so no need to update anything +		 * other than the descriptor. +		 */ +		srp_map_desc(state, dma_addr, dma_len, target->rkey); +		return 0; +	} -		if (ib_sg_dma_address(ibdev, sg) & ~dev->fmr_page_mask) { -			if (i > 0) -				return -EINVAL; -			else -				++page_cnt; -		} -		if ((ib_sg_dma_address(ibdev, sg) + dma_len) & -		    ~dev->fmr_page_mask) { -			if (i < sg_cnt - 1) -				return -EINVAL; -			else -				++page_cnt; -		} +	/* +	 * Since not all RDMA HW drivers support non-zero page offsets for +	 * FMR, if we start at an offset into a page, don't merge into the +	 * current FMR mapping. Finish it out, and use the kernel's MR for +	 * this sg entry. +	 */ +	if ((!dev->use_fast_reg && dma_addr & ~dev->mr_page_mask) || +	    dma_len > dev->mr_max_size) { +		ret = srp_finish_mapping(state, target); +		if (ret) +			return ret; -		len += dma_len; +		srp_map_desc(state, dma_addr, dma_len, target->rkey); +		srp_map_update_start(state, NULL, 0, 0); +		return 0;  	} -	page_cnt += len >> dev->fmr_page_shift; -	if (page_cnt > SRP_FMR_SIZE) -		return -ENOMEM; +	/* +	 * If this is the first sg that will be mapped via FMR or via FR, save +	 * our position. We need to know the first unmapped entry, its index, +	 * and the first unmapped address within that entry to be able to +	 * restart mapping after an error. +	 */ +	if (!state->unmapped_sg) +		srp_map_update_start(state, sg, sg_index, dma_addr); -	dma_pages = kmalloc(sizeof (u64) * page_cnt, GFP_ATOMIC); -	if (!dma_pages) -		return -ENOMEM; +	while (dma_len) { +		unsigned offset = dma_addr & ~dev->mr_page_mask; +		if (state->npages == dev->max_pages_per_mr || offset != 0) { +			ret = srp_finish_mapping(state, target); +			if (ret) +				return ret; + +			srp_map_update_start(state, sg, sg_index, dma_addr); +		} -	page_cnt = 0; -	scsi_for_each_sg(req->scmnd, sg, sg_cnt, i) { -		unsigned int dma_len = ib_sg_dma_len(ibdev, sg); +		len = min_t(unsigned int, dma_len, dev->mr_page_size - offset); -		for (j = 0; j < dma_len; j += dev->fmr_page_size) -			dma_pages[page_cnt++] = -				(ib_sg_dma_address(ibdev, sg) & -				 dev->fmr_page_mask) + j; +		if (!state->npages) +			state->base_dma_addr = dma_addr; +		state->pages[state->npages++] = dma_addr & dev->mr_page_mask; +		state->dma_len += len; +		dma_addr += len; +		dma_len -= len;  	} -	req->fmr = ib_fmr_pool_map_phys(dev->fmr_pool, -					dma_pages, page_cnt, io_addr); -	if (IS_ERR(req->fmr)) { -		ret = PTR_ERR(req->fmr); -		req->fmr = NULL; -		goto out; +	/* +	 * If the last entry of the MR wasn't a full page, then we need to +	 * close it out and start a new one -- we can only merge at page +	 * boundries. +	 */ +	ret = 0; +	if (len != dev->mr_page_size) { +		ret = srp_finish_mapping(state, target); +		if (!ret) +			srp_map_update_start(state, NULL, 0, 0);  	} +	return ret; +} -	buf->va  = cpu_to_be64(ib_sg_dma_address(ibdev, &scat[0]) & -			       ~dev->fmr_page_mask); -	buf->key = cpu_to_be32(req->fmr->fmr->rkey); -	buf->len = cpu_to_be32(len); +static int srp_map_sg(struct srp_map_state *state, +		      struct srp_target_port *target, struct srp_request *req, +		      struct scatterlist *scat, int count) +{ +	struct srp_device *dev = target->srp_host->srp_dev; +	struct ib_device *ibdev = dev->dev; +	struct scatterlist *sg; +	int i; +	bool use_mr; -	ret = 0; +	state->desc	= req->indirect_desc; +	state->pages	= req->map_page; +	if (dev->use_fast_reg) { +		state->next_fr = req->fr_list; +		use_mr = !!target->fr_pool; +	} else { +		state->next_fmr = req->fmr_list; +		use_mr = !!target->fmr_pool; +	} -out: -	kfree(dma_pages); +	for_each_sg(scat, sg, count, i) { +		if (srp_map_sg_entry(state, target, sg, i, use_mr)) { +			/* +			 * Memory registration failed, so backtrack to the +			 * first unmapped entry and continue on without using +			 * memory registration. +			 */ +			dma_addr_t dma_addr; +			unsigned int dma_len; + +backtrack: +			sg = state->unmapped_sg; +			i = state->unmapped_index; + +			dma_addr = ib_sg_dma_address(ibdev, sg); +			dma_len = ib_sg_dma_len(ibdev, sg); +			dma_len -= (state->unmapped_addr - dma_addr); +			dma_addr = state->unmapped_addr; +			use_mr = false; +			srp_map_desc(state, dma_addr, dma_len, target->rkey); +		} +	} -	return ret; +	if (use_mr && srp_finish_mapping(state, target)) +		goto backtrack; + +	req->nmdesc = state->nmdesc; + +	return 0;  }  static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target, @@ -722,9 +1398,12 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,  	struct scatterlist *scat;  	struct srp_cmd *cmd = req->cmd->buf;  	int len, nents, count; -	u8 fmt = SRP_DATA_DESC_DIRECT;  	struct srp_device *dev;  	struct ib_device *ibdev; +	struct srp_map_state state; +	struct srp_indirect_buf *indirect_hdr; +	u32 table_len; +	u8 fmt;  	if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)  		return sizeof (struct srp_cmd); @@ -744,11 +1423,13 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,  	ibdev = dev->dev;  	count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction); +	if (unlikely(count == 0)) +		return -EIO;  	fmt = SRP_DATA_DESC_DIRECT;  	len = sizeof (struct srp_cmd) +	sizeof (struct srp_direct_buf); -	if (count == 1) { +	if (count == 1 && !register_always) {  		/*  		 * The midlayer only generated a single gather/scatter  		 * entry, or DMA mapping coalesced everything to a @@ -758,51 +1439,73 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,  		struct srp_direct_buf *buf = (void *) cmd->add_data;  		buf->va  = cpu_to_be64(ib_sg_dma_address(ibdev, scat)); -		buf->key = cpu_to_be32(dev->mr->rkey); +		buf->key = cpu_to_be32(target->rkey);  		buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat)); -	} else if (srp_map_fmr(target, scat, count, req, -			       (void *) cmd->add_data)) { + +		req->nmdesc = 0; +		goto map_complete; +	} + +	/* +	 * We have more than one scatter/gather entry, so build our indirect +	 * descriptor table, trying to merge as many entries as we can. +	 */ +	indirect_hdr = (void *) cmd->add_data; + +	ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr, +				   target->indirect_size, DMA_TO_DEVICE); + +	memset(&state, 0, sizeof(state)); +	srp_map_sg(&state, target, req, scat, count); + +	/* We've mapped the request, now pull as much of the indirect +	 * descriptor table as we can into the command buffer. If this +	 * target is not using an external indirect table, we are +	 * guaranteed to fit into the command, as the SCSI layer won't +	 * give us more S/G entries than we allow. +	 */ +	if (state.ndesc == 1) {  		/* -		 * FMR mapping failed, and the scatterlist has more -		 * than one entry.  Generate an indirect memory -		 * descriptor. +		 * Memory registration collapsed the sg-list into one entry, +		 * so use a direct descriptor.  		 */ -		struct srp_indirect_buf *buf = (void *) cmd->add_data; -		struct scatterlist *sg; -		u32 datalen = 0; -		int i; - -		fmt = SRP_DATA_DESC_INDIRECT; -		len = sizeof (struct srp_cmd) + -			sizeof (struct srp_indirect_buf) + -			count * sizeof (struct srp_direct_buf); - -		scsi_for_each_sg(scmnd, sg, count, i) { -			unsigned int dma_len = ib_sg_dma_len(ibdev, sg); - -			buf->desc_list[i].va  = -				cpu_to_be64(ib_sg_dma_address(ibdev, sg)); -			buf->desc_list[i].key = -				cpu_to_be32(dev->mr->rkey); -			buf->desc_list[i].len = cpu_to_be32(dma_len); -			datalen += dma_len; -		} - -		if (scmnd->sc_data_direction == DMA_TO_DEVICE) -			cmd->data_out_desc_cnt = count; -		else -			cmd->data_in_desc_cnt = count; +		struct srp_direct_buf *buf = (void *) cmd->add_data; -		buf->table_desc.va  = -			cpu_to_be64(req->cmd->dma + sizeof *cmd + sizeof *buf); -		buf->table_desc.key = -			cpu_to_be32(target->srp_host->srp_dev->mr->rkey); -		buf->table_desc.len = -			cpu_to_be32(count * sizeof (struct srp_direct_buf)); +		*buf = req->indirect_desc[0]; +		goto map_complete; +	} -		buf->len = cpu_to_be32(datalen); +	if (unlikely(target->cmd_sg_cnt < state.ndesc && +						!target->allow_ext_sg)) { +		shost_printk(KERN_ERR, target->scsi_host, +			     "Could not fit S/G list into SRP_CMD\n"); +		return -EIO;  	} +	count = min(state.ndesc, target->cmd_sg_cnt); +	table_len = state.ndesc * sizeof (struct srp_direct_buf); + +	fmt = SRP_DATA_DESC_INDIRECT; +	len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf); +	len += count * sizeof (struct srp_direct_buf); + +	memcpy(indirect_hdr->desc_list, req->indirect_desc, +	       count * sizeof (struct srp_direct_buf)); + +	indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr); +	indirect_hdr->table_desc.key = cpu_to_be32(target->rkey); +	indirect_hdr->table_desc.len = cpu_to_be32(table_len); +	indirect_hdr->len = cpu_to_be32(state.total_len); + +	if (scmnd->sc_data_direction == DMA_TO_DEVICE) +		cmd->data_out_desc_cnt = count; +	else +		cmd->data_in_desc_cnt = count; + +	ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len, +				      DMA_TO_DEVICE); + +map_complete:  	if (scmnd->sc_data_direction == DMA_TO_DEVICE)  		cmd->buf_fmt = fmt << 4;  	else @@ -812,9 +1515,23 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,  }  /* - * Must be called with target->scsi_host->host_lock held to protect - * req_lim and tx_head.  Lock cannot be dropped between call here and - * call to __srp_post_send(). + * Return an IU and possible credit to the free pool + */ +static void srp_put_tx_iu(struct srp_target_port *target, struct srp_iu *iu, +			  enum srp_iu_type iu_type) +{ +	unsigned long flags; + +	spin_lock_irqsave(&target->lock, flags); +	list_add(&iu->list, &target->free_tx); +	if (iu_type != SRP_IU_RSP) +		++target->req_lim; +	spin_unlock_irqrestore(&target->lock, flags); +} + +/* + * Must be called with target->lock held to protect req_lim and free_tx. + * If IU is not sent, it must be returned using srp_put_tx_iu().   *   * Note:   * An upper limit for the number of allocated information units for each @@ -833,83 +1550,59 @@ static struct srp_iu *__srp_get_tx_iu(struct srp_target_port *target,  	srp_send_completion(target->send_cq, target); -	if (target->tx_head - target->tx_tail >= SRP_SQ_SIZE) +	if (list_empty(&target->free_tx))  		return NULL;  	/* Initiator responses to target requests do not consume credits */ -	if (target->req_lim <= rsv && iu_type != SRP_IU_RSP) { -		++target->zero_req_lim; -		return NULL; +	if (iu_type != SRP_IU_RSP) { +		if (target->req_lim <= rsv) { +			++target->zero_req_lim; +			return NULL; +		} + +		--target->req_lim;  	} -	iu = target->tx_ring[target->tx_head & SRP_SQ_MASK]; -	iu->type = iu_type; +	iu = list_first_entry(&target->free_tx, struct srp_iu, list); +	list_del(&iu->list);  	return iu;  } -/* - * Must be called with target->scsi_host->host_lock held to protect - * req_lim and tx_head. - */ -static int __srp_post_send(struct srp_target_port *target, -			   struct srp_iu *iu, int len) +static int srp_post_send(struct srp_target_port *target, +			 struct srp_iu *iu, int len)  {  	struct ib_sge list;  	struct ib_send_wr wr, *bad_wr; -	int ret = 0;  	list.addr   = iu->dma;  	list.length = len; -	list.lkey   = target->srp_host->srp_dev->mr->lkey; +	list.lkey   = target->lkey;  	wr.next       = NULL; -	wr.wr_id      = target->tx_head & SRP_SQ_MASK; +	wr.wr_id      = (uintptr_t) iu;  	wr.sg_list    = &list;  	wr.num_sge    = 1;  	wr.opcode     = IB_WR_SEND;  	wr.send_flags = IB_SEND_SIGNALED; -	ret = ib_post_send(target->qp, &wr, &bad_wr); - -	if (!ret) { -		++target->tx_head; -		if (iu->type != SRP_IU_RSP) -			--target->req_lim; -	} - -	return ret; +	return ib_post_send(target->qp, &wr, &bad_wr);  } -static int srp_post_recv(struct srp_target_port *target) +static int srp_post_recv(struct srp_target_port *target, struct srp_iu *iu)  { -	unsigned long flags; -	struct srp_iu *iu; -	struct ib_sge list;  	struct ib_recv_wr wr, *bad_wr; -	unsigned int next; -	int ret; - -	spin_lock_irqsave(target->scsi_host->host_lock, flags); - -	next	 = target->rx_head & SRP_RQ_MASK; -	wr.wr_id = next; -	iu	 = target->rx_ring[next]; +	struct ib_sge list;  	list.addr   = iu->dma;  	list.length = iu->size; -	list.lkey   = target->srp_host->srp_dev->mr->lkey; +	list.lkey   = target->lkey;  	wr.next     = NULL; +	wr.wr_id    = (uintptr_t) iu;  	wr.sg_list  = &list;  	wr.num_sge  = 1; -	ret = ib_post_recv(target->qp, &wr, &bad_wr); -	if (!ret) -		++target->rx_head; - -	spin_unlock_irqrestore(target->scsi_host->host_lock, flags); - -	return ret; +	return ib_post_recv(target->qp, &wr, &bad_wr);  }  static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp) @@ -917,28 +1610,30 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)  	struct srp_request *req;  	struct scsi_cmnd *scmnd;  	unsigned long flags; -	s32 delta; - -	delta = (s32) be32_to_cpu(rsp->req_lim_delta); - -	spin_lock_irqsave(target->scsi_host->host_lock, flags); - -	target->req_lim += delta; - -	req = &target->req_ring[rsp->tag & ~SRP_TAG_TSK_MGMT];  	if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) { -		if (be32_to_cpu(rsp->resp_data_len) < 4) -			req->tsk_status = -1; -		else -			req->tsk_status = rsp->data[3]; -		complete(&req->done); +		spin_lock_irqsave(&target->lock, flags); +		target->req_lim += be32_to_cpu(rsp->req_lim_delta); +		spin_unlock_irqrestore(&target->lock, flags); + +		target->tsk_mgmt_status = -1; +		if (be32_to_cpu(rsp->resp_data_len) >= 4) +			target->tsk_mgmt_status = rsp->data[3]; +		complete(&target->tsk_mgmt_done);  	} else { -		scmnd = req->scmnd; -		if (!scmnd) +		req = &target->req_ring[rsp->tag]; +		scmnd = srp_claim_req(target, req, NULL, NULL); +		if (!scmnd) {  			shost_printk(KERN_ERR, target->scsi_host,  				     "Null scmnd for RSP w/tag %016llx\n",  				     (unsigned long long) rsp->tag); + +			spin_lock_irqsave(&target->lock, flags); +			target->req_lim += be32_to_cpu(rsp->req_lim_delta); +			spin_unlock_irqrestore(&target->lock, flags); + +			return; +		}  		scmnd->result = rsp->status;  		if (rsp->flags & SRP_RSP_FLAG_SNSVALID) { @@ -953,49 +1648,44 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)  		else if (rsp->flags & (SRP_RSP_FLAG_DIOVER | SRP_RSP_FLAG_DIUNDER))  			scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt)); -		if (!req->tsk_mgmt) { -			scmnd->host_scribble = (void *) -1L; -			scmnd->scsi_done(scmnd); +		srp_free_req(target, req, scmnd, +			     be32_to_cpu(rsp->req_lim_delta)); -			srp_remove_req(target, req); -		} else -			req->cmd_done = 1; +		scmnd->host_scribble = NULL; +		scmnd->scsi_done(scmnd);  	} - -	spin_unlock_irqrestore(target->scsi_host->host_lock, flags);  }  static int srp_response_common(struct srp_target_port *target, s32 req_delta,  			       void *rsp, int len)  { -	struct ib_device *dev; +	struct ib_device *dev = target->srp_host->srp_dev->dev;  	unsigned long flags;  	struct srp_iu *iu; -	int err = 1; +	int err; -	dev = target->srp_host->srp_dev->dev; - -	spin_lock_irqsave(target->scsi_host->host_lock, flags); +	spin_lock_irqsave(&target->lock, flags);  	target->req_lim += req_delta; -  	iu = __srp_get_tx_iu(target, SRP_IU_RSP); +	spin_unlock_irqrestore(&target->lock, flags); +  	if (!iu) {  		shost_printk(KERN_ERR, target->scsi_host, PFX  			     "no IU available to send response\n"); -		goto out; +		return 1;  	}  	ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);  	memcpy(iu->buf, rsp, len);  	ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE); -	err = __srp_post_send(target, iu, len); -	if (err) +	err = srp_post_send(target, iu, len); +	if (err) {  		shost_printk(KERN_ERR, target->scsi_host, PFX  			     "unable to post response: %d\n", err); +		srp_put_tx_iu(target, iu, SRP_IU_RSP); +	} -out: -	spin_unlock_irqrestore(target->scsi_host->host_lock, flags);  	return err;  } @@ -1032,14 +1722,11 @@ static void srp_process_aer_req(struct srp_target_port *target,  static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)  { -	struct ib_device *dev; -	struct srp_iu *iu; +	struct ib_device *dev = target->srp_host->srp_dev->dev; +	struct srp_iu *iu = (struct srp_iu *) (uintptr_t) wc->wr_id;  	int res;  	u8 opcode; -	iu = target->rx_ring[wc->wr_id]; - -	dev = target->srp_host->srp_dev->dev;  	ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_ti_iu_len,  				   DMA_FROM_DEVICE); @@ -1080,12 +1767,51 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)  	ib_dma_sync_single_for_device(dev, iu->dma, target->max_ti_iu_len,  				      DMA_FROM_DEVICE); -	res = srp_post_recv(target); +	res = srp_post_recv(target, iu);  	if (res != 0)  		shost_printk(KERN_ERR, target->scsi_host,  			     PFX "Recv failed with error code %d\n", res);  } +/** + * srp_tl_err_work() - handle a transport layer error + * @work: Work structure embedded in an SRP target port. + * + * Note: This function may get invoked before the rport has been created, + * hence the target->rport test. + */ +static void srp_tl_err_work(struct work_struct *work) +{ +	struct srp_target_port *target; + +	target = container_of(work, struct srp_target_port, tl_err_work); +	if (target->rport) +		srp_start_tl_fail_timers(target->rport); +} + +static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status, +			      bool send_err, struct srp_target_port *target) +{ +	if (target->connected && !target->qp_in_error) { +		if (wr_id & LOCAL_INV_WR_ID_MASK) { +			shost_printk(KERN_ERR, target->scsi_host, PFX +				     "LOCAL_INV failed with status %d\n", +				     wc_status); +		} else if (wr_id & FAST_REG_WR_ID_MASK) { +			shost_printk(KERN_ERR, target->scsi_host, PFX +				     "FAST_REG_MR failed status %d\n", +				     wc_status); +		} else { +			shost_printk(KERN_ERR, target->scsi_host, +				     PFX "failed %s status %d for iu %p\n", +				     send_err ? "send" : "receive", +				     wc_status, (void *)(uintptr_t)wr_id); +		} +		queue_work(system_long_wq, &target->tl_err_work); +	} +	target->qp_in_error = true; +} +  static void srp_recv_completion(struct ib_cq *cq, void *target_ptr)  {  	struct srp_target_port *target = target_ptr; @@ -1093,15 +1819,11 @@ static void srp_recv_completion(struct ib_cq *cq, void *target_ptr)  	ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);  	while (ib_poll_cq(cq, 1, &wc) > 0) { -		if (wc.status) { -			shost_printk(KERN_ERR, target->scsi_host, -				     PFX "failed receive status %d\n", -				     wc.status); -			target->qp_in_error = 1; -			break; +		if (likely(wc.status == IB_WC_SUCCESS)) { +			srp_handle_recv(target, &wc); +		} else { +			srp_handle_qp_err(wc.wr_id, wc.status, false, target);  		} - -		srp_handle_recv(target, &wc);  	}  } @@ -1109,53 +1831,57 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr)  {  	struct srp_target_port *target = target_ptr;  	struct ib_wc wc; +	struct srp_iu *iu;  	while (ib_poll_cq(cq, 1, &wc) > 0) { -		if (wc.status) { -			shost_printk(KERN_ERR, target->scsi_host, -				     PFX "failed send status %d\n", -				     wc.status); -			target->qp_in_error = 1; -			break; +		if (likely(wc.status == IB_WC_SUCCESS)) { +			iu = (struct srp_iu *) (uintptr_t) wc.wr_id; +			list_add(&iu->list, &target->free_tx); +		} else { +			srp_handle_qp_err(wc.wr_id, wc.status, true, target);  		} - -		++target->tx_tail;  	}  } -static int srp_queuecommand(struct scsi_cmnd *scmnd, -			    void (*done)(struct scsi_cmnd *)) +static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)  { -	struct srp_target_port *target = host_to_target(scmnd->device->host); +	struct srp_target_port *target = host_to_target(shost); +	struct srp_rport *rport = target->rport;  	struct srp_request *req;  	struct srp_iu *iu;  	struct srp_cmd *cmd;  	struct ib_device *dev; -	int len; +	unsigned long flags; +	int len, ret; +	const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler; -	if (target->state == SRP_TARGET_CONNECTING) -		goto err; +	/* +	 * The SCSI EH thread is the only context from which srp_queuecommand() +	 * can get invoked for blocked devices (SDEV_BLOCK / +	 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by +	 * locking the rport mutex if invoked from inside the SCSI EH. +	 */ +	if (in_scsi_eh) +		mutex_lock(&rport->mutex); -	if (target->state == SRP_TARGET_DEAD || -	    target->state == SRP_TARGET_REMOVED) { -		scmnd->result = DID_BAD_TARGET << 16; -		done(scmnd); -		return 0; -	} +	scmnd->result = srp_chkready(target->rport); +	if (unlikely(scmnd->result)) +		goto err; +	spin_lock_irqsave(&target->lock, flags);  	iu = __srp_get_tx_iu(target, SRP_IU_CMD);  	if (!iu) -		goto err; +		goto err_unlock; + +	req = list_first_entry(&target->free_reqs, struct srp_request, list); +	list_del(&req->list); +	spin_unlock_irqrestore(&target->lock, flags);  	dev = target->srp_host->srp_dev->dev; -	ib_dma_sync_single_for_cpu(dev, iu->dma, srp_max_iu_len, +	ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,  				   DMA_TO_DEVICE); -	req = list_first_entry(&target->free_reqs, struct srp_request, list); - -	scmnd->scsi_done     = done; -	scmnd->result        = 0; -	scmnd->host_scribble = (void *) (long) req->index; +	scmnd->host_scribble = (void *) req;  	cmd = iu->buf;  	memset(cmd, 0, sizeof *cmd); @@ -1167,40 +1893,85 @@ static int srp_queuecommand(struct scsi_cmnd *scmnd,  	req->scmnd    = scmnd;  	req->cmd      = iu; -	req->cmd_done = 0; -	req->tsk_mgmt = NULL;  	len = srp_map_data(scmnd, target, req);  	if (len < 0) {  		shost_printk(KERN_ERR, target->scsi_host, -			     PFX "Failed to map data\n"); -		goto err; +			     PFX "Failed to map data (%d)\n", len); +		/* +		 * If we ran out of memory descriptors (-ENOMEM) because an +		 * application is queuing many requests with more than +		 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer +		 * to reduce queue depth temporarily. +		 */ +		scmnd->result = len == -ENOMEM ? +			DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16; +		goto err_iu;  	} -	ib_dma_sync_single_for_device(dev, iu->dma, srp_max_iu_len, +	ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len,  				      DMA_TO_DEVICE); -	if (__srp_post_send(target, iu, len)) { +	if (srp_post_send(target, iu, len)) {  		shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");  		goto err_unmap;  	} -	list_move_tail(&req->list, &target->req_queue); +	ret = 0; + +unlock_rport: +	if (in_scsi_eh) +		mutex_unlock(&rport->mutex); -	return 0; +	return ret;  err_unmap:  	srp_unmap_data(scmnd, target, req); +err_iu: +	srp_put_tx_iu(target, iu, SRP_IU_CMD); + +	/* +	 * Avoid that the loops that iterate over the request ring can +	 * encounter a dangling SCSI command pointer. +	 */ +	req->scmnd = NULL; + +	spin_lock_irqsave(&target->lock, flags); +	list_add(&req->list, &target->free_reqs); + +err_unlock: +	spin_unlock_irqrestore(&target->lock, flags); +  err: -	return SCSI_MLQUEUE_HOST_BUSY; +	if (scmnd->result) { +		scmnd->scsi_done(scmnd); +		ret = 0; +	} else { +		ret = SCSI_MLQUEUE_HOST_BUSY; +	} + +	goto unlock_rport;  } +/* + * Note: the resources allocated in this function are freed in + * srp_free_target_ib(). + */  static int srp_alloc_iu_bufs(struct srp_target_port *target)  {  	int i; -	for (i = 0; i < SRP_RQ_SIZE; ++i) { +	target->rx_ring = kzalloc(target->queue_size * sizeof(*target->rx_ring), +				  GFP_KERNEL); +	if (!target->rx_ring) +		goto err_no_ring; +	target->tx_ring = kzalloc(target->queue_size * sizeof(*target->tx_ring), +				  GFP_KERNEL); +	if (!target->tx_ring) +		goto err_no_ring; + +	for (i = 0; i < target->queue_size; ++i) {  		target->rx_ring[i] = srp_alloc_iu(target->srp_host,  						  target->max_ti_iu_len,  						  GFP_KERNEL, DMA_FROM_DEVICE); @@ -1208,30 +1979,138 @@ static int srp_alloc_iu_bufs(struct srp_target_port *target)  			goto err;  	} -	for (i = 0; i < SRP_SQ_SIZE; ++i) { +	for (i = 0; i < target->queue_size; ++i) {  		target->tx_ring[i] = srp_alloc_iu(target->srp_host, -						  srp_max_iu_len, +						  target->max_iu_len,  						  GFP_KERNEL, DMA_TO_DEVICE);  		if (!target->tx_ring[i])  			goto err; + +		list_add(&target->tx_ring[i]->list, &target->free_tx);  	}  	return 0;  err: -	for (i = 0; i < SRP_RQ_SIZE; ++i) { +	for (i = 0; i < target->queue_size; ++i) {  		srp_free_iu(target->srp_host, target->rx_ring[i]); -		target->rx_ring[i] = NULL; -	} - -	for (i = 0; i < SRP_SQ_SIZE; ++i) {  		srp_free_iu(target->srp_host, target->tx_ring[i]); -		target->tx_ring[i] = NULL;  	} + +err_no_ring: +	kfree(target->tx_ring); +	target->tx_ring = NULL; +	kfree(target->rx_ring); +	target->rx_ring = NULL; +  	return -ENOMEM;  } +static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask) +{ +	uint64_t T_tr_ns, max_compl_time_ms; +	uint32_t rq_tmo_jiffies; + +	/* +	 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair, +	 * table 91), both the QP timeout and the retry count have to be set +	 * for RC QP's during the RTR to RTS transition. +	 */ +	WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) != +		     (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)); + +	/* +	 * Set target->rq_tmo_jiffies to one second more than the largest time +	 * it can take before an error completion is generated. See also +	 * C9-140..142 in the IBTA spec for more information about how to +	 * convert the QP Local ACK Timeout value to nanoseconds. +	 */ +	T_tr_ns = 4096 * (1ULL << qp_attr->timeout); +	max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns; +	do_div(max_compl_time_ms, NSEC_PER_MSEC); +	rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000); + +	return rq_tmo_jiffies; +} + +static void srp_cm_rep_handler(struct ib_cm_id *cm_id, +			       struct srp_login_rsp *lrsp, +			       struct srp_target_port *target) +{ +	struct ib_qp_attr *qp_attr = NULL; +	int attr_mask = 0; +	int ret; +	int i; + +	if (lrsp->opcode == SRP_LOGIN_RSP) { +		target->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len); +		target->req_lim       = be32_to_cpu(lrsp->req_lim_delta); + +		/* +		 * Reserve credits for task management so we don't +		 * bounce requests back to the SCSI mid-layer. +		 */ +		target->scsi_host->can_queue +			= min(target->req_lim - SRP_TSK_MGMT_SQ_SIZE, +			      target->scsi_host->can_queue); +		target->scsi_host->cmd_per_lun +			= min_t(int, target->scsi_host->can_queue, +				target->scsi_host->cmd_per_lun); +	} else { +		shost_printk(KERN_WARNING, target->scsi_host, +			     PFX "Unhandled RSP opcode %#x\n", lrsp->opcode); +		ret = -ECONNRESET; +		goto error; +	} + +	if (!target->rx_ring) { +		ret = srp_alloc_iu_bufs(target); +		if (ret) +			goto error; +	} + +	ret = -ENOMEM; +	qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL); +	if (!qp_attr) +		goto error; + +	qp_attr->qp_state = IB_QPS_RTR; +	ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask); +	if (ret) +		goto error_free; + +	ret = ib_modify_qp(target->qp, qp_attr, attr_mask); +	if (ret) +		goto error_free; + +	for (i = 0; i < target->queue_size; i++) { +		struct srp_iu *iu = target->rx_ring[i]; +		ret = srp_post_recv(target, iu); +		if (ret) +			goto error_free; +	} + +	qp_attr->qp_state = IB_QPS_RTS; +	ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask); +	if (ret) +		goto error_free; + +	target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask); + +	ret = ib_modify_qp(target->qp, qp_attr, attr_mask); +	if (ret) +		goto error_free; + +	ret = ib_send_cm_rtu(cm_id, NULL, 0); + +error_free: +	kfree(qp_attr); + +error: +	target->status = ret; +} +  static void srp_cm_rej_handler(struct ib_cm_id *cm_id,  			       struct ib_cm_event *event,  			       struct srp_target_port *target) @@ -1291,8 +2170,10 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id,  				shost_printk(KERN_WARNING, shost,  					     PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");  			else -				shost_printk(KERN_WARNING, shost, -					    PFX "SRP LOGIN REJECTED, reason 0x%08x\n", reason); +				shost_printk(KERN_WARNING, shost, PFX +					     "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n", +					     target->path.sgid.raw, +					     target->orig_dgid, reason);  		} else  			shost_printk(KERN_WARNING, shost,  				     "  REJ reason: IB_CM_REJ_CONSUMER_DEFINED," @@ -1315,11 +2196,7 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id,  static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)  {  	struct srp_target_port *target = cm_id->context; -	struct ib_qp_attr *qp_attr = NULL; -	int attr_mask = 0;  	int comp = 0; -	int opcode = 0; -	int i;  	switch (event->event) {  	case IB_CM_REQ_ERROR: @@ -1331,70 +2208,7 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)  	case IB_CM_REP_RECEIVED:  		comp = 1; -		opcode = *(u8 *) event->private_data; - -		if (opcode == SRP_LOGIN_RSP) { -			struct srp_login_rsp *rsp = event->private_data; - -			target->max_ti_iu_len = be32_to_cpu(rsp->max_ti_iu_len); -			target->req_lim       = be32_to_cpu(rsp->req_lim_delta); - -			/* -			 * Reserve credits for task management so we don't -			 * bounce requests back to the SCSI mid-layer. -			 */ -			target->scsi_host->can_queue -				= min(target->req_lim - SRP_TSK_MGMT_SQ_SIZE, -				      target->scsi_host->can_queue); -		} else { -			shost_printk(KERN_WARNING, target->scsi_host, -				    PFX "Unhandled RSP opcode %#x\n", opcode); -			target->status = -ECONNRESET; -			break; -		} - -		if (!target->rx_ring[0]) { -			target->status = srp_alloc_iu_bufs(target); -			if (target->status) -				break; -		} - -		qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL); -		if (!qp_attr) { -			target->status = -ENOMEM; -			break; -		} - -		qp_attr->qp_state = IB_QPS_RTR; -		target->status = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask); -		if (target->status) -			break; - -		target->status = ib_modify_qp(target->qp, qp_attr, attr_mask); -		if (target->status) -			break; - -		for (i = 0; i < SRP_RQ_SIZE; i++) { -			target->status = srp_post_recv(target); -			if (target->status) -				break; -		} -		if (target->status) -			break; - -		qp_attr->qp_state = IB_QPS_RTS; -		target->status = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask); -		if (target->status) -			break; - -		target->status = ib_modify_qp(target->qp, qp_attr, attr_mask); -		if (target->status) -			break; - -		target->status = ib_send_cm_rtu(cm_id, NULL, 0); -		if (target->status) -			break; - +		srp_cm_rep_handler(cm_id, event->private_data, target);  		break;  	case IB_CM_REJ_RECEIVED: @@ -1407,16 +2221,18 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)  	case IB_CM_DREQ_RECEIVED:  		shost_printk(KERN_WARNING, target->scsi_host,  			     PFX "DREQ received - connection closed\n"); +		srp_change_conn_state(target, false);  		if (ib_send_cm_drep(cm_id, NULL, 0))  			shost_printk(KERN_ERR, target->scsi_host,  				     PFX "Sending CM DREP failed\n"); +		queue_work(system_long_wq, &target->tl_err_work);  		break;  	case IB_CM_TIMEWAIT_EXIT:  		shost_printk(KERN_ERR, target->scsi_host,  			     PFX "connection closed\n"); -  		comp = 1; +  		target->status = 0;  		break; @@ -1434,31 +2250,87 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)  	if (comp)  		complete(&target->done); -	kfree(qp_attr); -  	return 0;  } +/** + * srp_change_queue_type - changing device queue tag type + * @sdev: scsi device struct + * @tag_type: requested tag type + * + * Returns queue tag type. + */ +static int +srp_change_queue_type(struct scsi_device *sdev, int tag_type) +{ +	if (sdev->tagged_supported) { +		scsi_set_tag_type(sdev, tag_type); +		if (tag_type) +			scsi_activate_tcq(sdev, sdev->queue_depth); +		else +			scsi_deactivate_tcq(sdev, sdev->queue_depth); +	} else +		tag_type = 0; + +	return tag_type; +} + +/** + * srp_change_queue_depth - setting device queue depth + * @sdev: scsi device struct + * @qdepth: requested queue depth + * @reason: SCSI_QDEPTH_DEFAULT/SCSI_QDEPTH_QFULL/SCSI_QDEPTH_RAMP_UP + * (see include/scsi/scsi_host.h for definition) + * + * Returns queue depth. + */ +static int +srp_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason) +{ +	struct Scsi_Host *shost = sdev->host; +	int max_depth; +	if (reason == SCSI_QDEPTH_DEFAULT || reason == SCSI_QDEPTH_RAMP_UP) { +		max_depth = shost->can_queue; +		if (!sdev->tagged_supported) +			max_depth = 1; +		if (qdepth > max_depth) +			qdepth = max_depth; +		scsi_adjust_queue_depth(sdev, scsi_get_tag_type(sdev), qdepth); +	} else if (reason == SCSI_QDEPTH_QFULL) +		scsi_track_queue_full(sdev, qdepth); +	else +		return -EOPNOTSUPP; + +	return sdev->queue_depth; +} +  static int srp_send_tsk_mgmt(struct srp_target_port *target, -			     struct srp_request *req, u8 func) +			     u64 req_tag, unsigned int lun, u8 func)  { +	struct srp_rport *rport = target->rport;  	struct ib_device *dev = target->srp_host->srp_dev->dev;  	struct srp_iu *iu;  	struct srp_tsk_mgmt *tsk_mgmt; -	spin_lock_irq(target->scsi_host->host_lock); - -	if (target->state == SRP_TARGET_DEAD || -	    target->state == SRP_TARGET_REMOVED) { -		req->scmnd->result = DID_BAD_TARGET << 16; -		goto out; -	} +	if (!target->connected || target->qp_in_error) +		return -1; -	init_completion(&req->done); +	init_completion(&target->tsk_mgmt_done); +	/* +	 * Lock the rport mutex to avoid that srp_create_target_ib() is +	 * invoked while a task management function is being sent. +	 */ +	mutex_lock(&rport->mutex); +	spin_lock_irq(&target->lock);  	iu = __srp_get_tx_iu(target, SRP_IU_TSK_MGMT); -	if (!iu) -		goto out; +	spin_unlock_irq(&target->lock); + +	if (!iu) { +		mutex_unlock(&rport->mutex); + +		return -1; +	}  	ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,  				   DMA_TO_DEVICE); @@ -1466,70 +2338,48 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target,  	memset(tsk_mgmt, 0, sizeof *tsk_mgmt);  	tsk_mgmt->opcode 	= SRP_TSK_MGMT; -	tsk_mgmt->lun 		= cpu_to_be64((u64) req->scmnd->device->lun << 48); -	tsk_mgmt->tag 		= req->index | SRP_TAG_TSK_MGMT; +	tsk_mgmt->lun		= cpu_to_be64((u64) lun << 48); +	tsk_mgmt->tag		= req_tag | SRP_TAG_TSK_MGMT;  	tsk_mgmt->tsk_mgmt_func = func; -	tsk_mgmt->task_tag 	= req->index; +	tsk_mgmt->task_tag	= req_tag;  	ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,  				      DMA_TO_DEVICE); -	if (__srp_post_send(target, iu, sizeof *tsk_mgmt)) -		goto out; - -	req->tsk_mgmt = iu; - -	spin_unlock_irq(target->scsi_host->host_lock); +	if (srp_post_send(target, iu, sizeof *tsk_mgmt)) { +		srp_put_tx_iu(target, iu, SRP_IU_TSK_MGMT); +		mutex_unlock(&rport->mutex); -	if (!wait_for_completion_timeout(&req->done, -					 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))  		return -1; +	} +	mutex_unlock(&rport->mutex); -	return 0; - -out: -	spin_unlock_irq(target->scsi_host->host_lock); -	return -1; -} - -static int srp_find_req(struct srp_target_port *target, -			struct scsi_cmnd *scmnd, -			struct srp_request **req) -{ -	if (scmnd->host_scribble == (void *) -1L) +	if (!wait_for_completion_timeout(&target->tsk_mgmt_done, +					 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))  		return -1; -	*req = &target->req_ring[(long) scmnd->host_scribble]; -  	return 0;  }  static int srp_abort(struct scsi_cmnd *scmnd)  {  	struct srp_target_port *target = host_to_target(scmnd->device->host); -	struct srp_request *req; -	int ret = SUCCESS; +	struct srp_request *req = (struct srp_request *) scmnd->host_scribble; +	int ret;  	shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); -	if (target->qp_in_error) -		return FAILED; -	if (srp_find_req(target, scmnd, &req)) -		return FAILED; -	if (srp_send_tsk_mgmt(target, req, SRP_TSK_ABORT_TASK)) -		return FAILED; - -	spin_lock_irq(target->scsi_host->host_lock); - -	if (req->cmd_done) { -		srp_remove_req(target, req); -		scmnd->scsi_done(scmnd); -	} else if (!req->tsk_status) { -		srp_remove_req(target, req); -		scmnd->result = DID_ABORT << 16; -	} else +	if (!req || !srp_claim_req(target, req, NULL, scmnd)) +		return SUCCESS; +	if (srp_send_tsk_mgmt(target, req->index, scmnd->device->lun, +			      SRP_TSK_ABORT_TASK) == 0) +		ret = SUCCESS; +	else if (target->rport->state == SRP_RPORT_LOST) +		ret = FAST_IO_FAIL; +	else  		ret = FAILED; - -	spin_unlock_irq(target->scsi_host->host_lock); +	srp_free_req(target, req, scmnd, 0); +	scmnd->result = DID_ABORT << 16; +	scmnd->scsi_done(scmnd);  	return ret;  } @@ -1537,26 +2387,20 @@ static int srp_abort(struct scsi_cmnd *scmnd)  static int srp_reset_device(struct scsi_cmnd *scmnd)  {  	struct srp_target_port *target = host_to_target(scmnd->device->host); -	struct srp_request *req, *tmp; +	int i;  	shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n"); -	if (target->qp_in_error) +	if (srp_send_tsk_mgmt(target, SRP_TAG_NO_REQ, scmnd->device->lun, +			      SRP_TSK_LUN_RESET))  		return FAILED; -	if (srp_find_req(target, scmnd, &req)) +	if (target->tsk_mgmt_status)  		return FAILED; -	if (srp_send_tsk_mgmt(target, req, SRP_TSK_LUN_RESET)) -		return FAILED; -	if (req->tsk_status) -		return FAILED; - -	spin_lock_irq(target->scsi_host->host_lock); -	list_for_each_entry_safe(req, tmp, &target->req_queue, list) -		if (req->scmnd->device == scmnd->device) -			srp_reset_req(target, req); - -	spin_unlock_irq(target->scsi_host->host_lock); +	for (i = 0; i < target->req_ring_size; ++i) { +		struct srp_request *req = &target->req_ring[i]; +		srp_finish_req(target, req, scmnd->device, DID_RESET << 16); +	}  	return SUCCESS;  } @@ -1564,14 +2408,25 @@ static int srp_reset_device(struct scsi_cmnd *scmnd)  static int srp_reset_host(struct scsi_cmnd *scmnd)  {  	struct srp_target_port *target = host_to_target(scmnd->device->host); -	int ret = FAILED;  	shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n"); -	if (!srp_reconnect_target(target)) -		ret = SUCCESS; +	return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED; +} -	return ret; +static int srp_slave_configure(struct scsi_device *sdev) +{ +	struct Scsi_Host *shost = sdev->host; +	struct srp_target_port *target = host_to_target(shost); +	struct request_queue *q = sdev->request_queue; +	unsigned long timeout; + +	if (sdev->type == TYPE_DISK) { +		timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies); +		blk_queue_rq_timeout(q, timeout); +	} + +	return 0;  }  static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr, @@ -1579,10 +2434,6 @@ static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,  {  	struct srp_target_port *target = host_to_target(class_to_shost(dev)); -	if (target->state == SRP_TARGET_DEAD || -	    target->state == SRP_TARGET_REMOVED) -		return -ENODEV; -  	return sprintf(buf, "0x%016llx\n",  		       (unsigned long long) be64_to_cpu(target->id_ext));  } @@ -1592,10 +2443,6 @@ static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,  {  	struct srp_target_port *target = host_to_target(class_to_shost(dev)); -	if (target->state == SRP_TARGET_DEAD || -	    target->state == SRP_TARGET_REMOVED) -		return -ENODEV; -  	return sprintf(buf, "0x%016llx\n",  		       (unsigned long long) be64_to_cpu(target->ioc_guid));  } @@ -1605,10 +2452,6 @@ static ssize_t show_service_id(struct device *dev,  {  	struct srp_target_port *target = host_to_target(class_to_shost(dev)); -	if (target->state == SRP_TARGET_DEAD || -	    target->state == SRP_TARGET_REMOVED) -		return -ENODEV; -  	return sprintf(buf, "0x%016llx\n",  		       (unsigned long long) be64_to_cpu(target->service_id));  } @@ -1618,21 +2461,21 @@ static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,  {  	struct srp_target_port *target = host_to_target(class_to_shost(dev)); -	if (target->state == SRP_TARGET_DEAD || -	    target->state == SRP_TARGET_REMOVED) -		return -ENODEV; -  	return sprintf(buf, "0x%04x\n", be16_to_cpu(target->path.pkey));  } -static ssize_t show_dgid(struct device *dev, struct device_attribute *attr, +static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,  			 char *buf)  {  	struct srp_target_port *target = host_to_target(class_to_shost(dev)); -	if (target->state == SRP_TARGET_DEAD || -	    target->state == SRP_TARGET_REMOVED) -		return -ENODEV; +	return sprintf(buf, "%pI6\n", target->path.sgid.raw); +} + +static ssize_t show_dgid(struct device *dev, struct device_attribute *attr, +			 char *buf) +{ +	struct srp_target_port *target = host_to_target(class_to_shost(dev));  	return sprintf(buf, "%pI6\n", target->path.dgid.raw);  } @@ -1642,10 +2485,6 @@ static ssize_t show_orig_dgid(struct device *dev,  {  	struct srp_target_port *target = host_to_target(class_to_shost(dev)); -	if (target->state == SRP_TARGET_DEAD || -	    target->state == SRP_TARGET_REMOVED) -		return -ENODEV; -  	return sprintf(buf, "%pI6\n", target->orig_dgid);  } @@ -1654,10 +2493,6 @@ static ssize_t show_req_lim(struct device *dev,  {  	struct srp_target_port *target = host_to_target(class_to_shost(dev)); -	if (target->state == SRP_TARGET_DEAD || -	    target->state == SRP_TARGET_REMOVED) -		return -ENODEV; -  	return sprintf(buf, "%d\n", target->req_lim);  } @@ -1666,10 +2501,6 @@ static ssize_t show_zero_req_lim(struct device *dev,  {  	struct srp_target_port *target = host_to_target(class_to_shost(dev)); -	if (target->state == SRP_TARGET_DEAD || -	    target->state == SRP_TARGET_REMOVED) -		return -ENODEV; -  	return sprintf(buf, "%d\n", target->zero_req_lim);  } @@ -1689,28 +2520,70 @@ static ssize_t show_local_ib_device(struct device *dev,  	return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);  } +static ssize_t show_comp_vector(struct device *dev, +				struct device_attribute *attr, char *buf) +{ +	struct srp_target_port *target = host_to_target(class_to_shost(dev)); + +	return sprintf(buf, "%d\n", target->comp_vector); +} + +static ssize_t show_tl_retry_count(struct device *dev, +				   struct device_attribute *attr, char *buf) +{ +	struct srp_target_port *target = host_to_target(class_to_shost(dev)); + +	return sprintf(buf, "%d\n", target->tl_retry_count); +} + +static ssize_t show_cmd_sg_entries(struct device *dev, +				   struct device_attribute *attr, char *buf) +{ +	struct srp_target_port *target = host_to_target(class_to_shost(dev)); + +	return sprintf(buf, "%u\n", target->cmd_sg_cnt); +} + +static ssize_t show_allow_ext_sg(struct device *dev, +				 struct device_attribute *attr, char *buf) +{ +	struct srp_target_port *target = host_to_target(class_to_shost(dev)); + +	return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false"); +} +  static DEVICE_ATTR(id_ext,	    S_IRUGO, show_id_ext,	   NULL);  static DEVICE_ATTR(ioc_guid,	    S_IRUGO, show_ioc_guid,	   NULL);  static DEVICE_ATTR(service_id,	    S_IRUGO, show_service_id,	   NULL);  static DEVICE_ATTR(pkey,	    S_IRUGO, show_pkey,		   NULL); +static DEVICE_ATTR(sgid,	    S_IRUGO, show_sgid,		   NULL);  static DEVICE_ATTR(dgid,	    S_IRUGO, show_dgid,		   NULL);  static DEVICE_ATTR(orig_dgid,	    S_IRUGO, show_orig_dgid,	   NULL);  static DEVICE_ATTR(req_lim,         S_IRUGO, show_req_lim,         NULL);  static DEVICE_ATTR(zero_req_lim,    S_IRUGO, show_zero_req_lim,	   NULL);  static DEVICE_ATTR(local_ib_port,   S_IRUGO, show_local_ib_port,   NULL);  static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL); +static DEVICE_ATTR(comp_vector,     S_IRUGO, show_comp_vector,     NULL); +static DEVICE_ATTR(tl_retry_count,  S_IRUGO, show_tl_retry_count,  NULL); +static DEVICE_ATTR(cmd_sg_entries,  S_IRUGO, show_cmd_sg_entries,  NULL); +static DEVICE_ATTR(allow_ext_sg,    S_IRUGO, show_allow_ext_sg,    NULL);  static struct device_attribute *srp_host_attrs[] = {  	&dev_attr_id_ext,  	&dev_attr_ioc_guid,  	&dev_attr_service_id,  	&dev_attr_pkey, +	&dev_attr_sgid,  	&dev_attr_dgid,  	&dev_attr_orig_dgid,  	&dev_attr_req_lim,  	&dev_attr_zero_req_lim,  	&dev_attr_local_ib_port,  	&dev_attr_local_ib_device, +	&dev_attr_comp_vector, +	&dev_attr_tl_retry_count, +	&dev_attr_cmd_sg_entries, +	&dev_attr_allow_ext_sg,  	NULL  }; @@ -1718,14 +2591,19 @@ static struct scsi_host_template srp_template = {  	.module				= THIS_MODULE,  	.name				= "InfiniBand SRP initiator",  	.proc_name			= DRV_NAME, +	.slave_configure		= srp_slave_configure,  	.info				= srp_target_info,  	.queuecommand			= srp_queuecommand, +	.change_queue_depth             = srp_change_queue_depth, +	.change_queue_type              = srp_change_queue_type,  	.eh_abort_handler		= srp_abort,  	.eh_device_reset_handler	= srp_reset_device,  	.eh_host_reset_handler		= srp_reset_host, -	.can_queue			= SRP_CMD_SQ_SIZE, +	.skip_settle_delay		= true, +	.sg_tablesize			= SRP_DEF_SG_TABLESIZE, +	.can_queue			= SRP_DEFAULT_CMD_SQ_SIZE,  	.this_id			= -1, -	.cmd_per_lun			= SRP_CMD_SQ_SIZE, +	.cmd_per_lun			= SRP_DEFAULT_CMD_SQ_SIZE,  	.use_clustering			= ENABLE_CLUSTERING,  	.shost_attrs			= srp_host_attrs  }; @@ -1750,6 +2628,9 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target)  		return PTR_ERR(rport);  	} +	rport->lld_data = target; +	target->rport = rport; +  	spin_lock(&host->target_lock);  	list_add_tail(&target->list, &host->target_list);  	spin_unlock(&host->target_lock); @@ -1775,6 +2656,38 @@ static struct class srp_class = {  	.dev_release = srp_release_dev  }; +/** + * srp_conn_unique() - check whether the connection to a target is unique + * @host:   SRP host. + * @target: SRP target port. + */ +static bool srp_conn_unique(struct srp_host *host, +			    struct srp_target_port *target) +{ +	struct srp_target_port *t; +	bool ret = false; + +	if (target->state == SRP_TARGET_REMOVED) +		goto out; + +	ret = true; + +	spin_lock(&host->target_lock); +	list_for_each_entry(t, &host->target_list, list) { +		if (t != target && +		    target->id_ext == t->id_ext && +		    target->ioc_guid == t->ioc_guid && +		    target->initiator_ext == t->initiator_ext) { +			ret = false; +			break; +		} +	} +	spin_unlock(&host->target_lock); + +out: +	return ret; +} +  /*   * Target ports are added by writing   * @@ -1794,6 +2707,12 @@ enum {  	SRP_OPT_MAX_CMD_PER_LUN	= 1 << 6,  	SRP_OPT_IO_CLASS	= 1 << 7,  	SRP_OPT_INITIATOR_EXT	= 1 << 8, +	SRP_OPT_CMD_SG_ENTRIES	= 1 << 9, +	SRP_OPT_ALLOW_EXT_SG	= 1 << 10, +	SRP_OPT_SG_TABLESIZE	= 1 << 11, +	SRP_OPT_COMP_VECTOR	= 1 << 12, +	SRP_OPT_TL_RETRY_COUNT	= 1 << 13, +	SRP_OPT_QUEUE_SIZE	= 1 << 14,  	SRP_OPT_ALL		= (SRP_OPT_ID_EXT	|  				   SRP_OPT_IOC_GUID	|  				   SRP_OPT_DGID		| @@ -1811,6 +2730,12 @@ static const match_table_t srp_opt_tokens = {  	{ SRP_OPT_MAX_CMD_PER_LUN,	"max_cmd_per_lun=%d" 	},  	{ SRP_OPT_IO_CLASS,		"io_class=%x"		},  	{ SRP_OPT_INITIATOR_EXT,	"initiator_ext=%s"	}, +	{ SRP_OPT_CMD_SG_ENTRIES,	"cmd_sg_entries=%u"	}, +	{ SRP_OPT_ALLOW_EXT_SG,		"allow_ext_sg=%u"	}, +	{ SRP_OPT_SG_TABLESIZE,		"sg_tablesize=%u"	}, +	{ SRP_OPT_COMP_VECTOR,		"comp_vector=%u"	}, +	{ SRP_OPT_TL_RETRY_COUNT,	"tl_retry_count=%u"	}, +	{ SRP_OPT_QUEUE_SIZE,		"queue_size=%d"		},  	{ SRP_OPT_ERR,			NULL 			}  }; @@ -1865,7 +2790,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)  				goto out;  			}  			if (strlen(p) != 32) { -				printk(KERN_WARNING PFX "bad dest GID parameter '%s'\n", p); +				pr_warn("bad dest GID parameter '%s'\n", p);  				kfree(p);  				goto out;  			} @@ -1880,7 +2805,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)  		case SRP_OPT_PKEY:  			if (match_hex(args, &token)) { -				printk(KERN_WARNING PFX "bad P_Key parameter '%s'\n", p); +				pr_warn("bad P_Key parameter '%s'\n", p);  				goto out;  			}  			target->path.pkey = cpu_to_be16(token); @@ -1899,30 +2824,43 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)  		case SRP_OPT_MAX_SECT:  			if (match_int(args, &token)) { -				printk(KERN_WARNING PFX "bad max sect parameter '%s'\n", p); +				pr_warn("bad max sect parameter '%s'\n", p);  				goto out;  			}  			target->scsi_host->max_sectors = token;  			break; +		case SRP_OPT_QUEUE_SIZE: +			if (match_int(args, &token) || token < 1) { +				pr_warn("bad queue_size parameter '%s'\n", p); +				goto out; +			} +			target->scsi_host->can_queue = token; +			target->queue_size = token + SRP_RSP_SQ_SIZE + +					     SRP_TSK_MGMT_SQ_SIZE; +			if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN)) +				target->scsi_host->cmd_per_lun = token; +			break; +  		case SRP_OPT_MAX_CMD_PER_LUN: -			if (match_int(args, &token)) { -				printk(KERN_WARNING PFX "bad max cmd_per_lun parameter '%s'\n", p); +			if (match_int(args, &token) || token < 1) { +				pr_warn("bad max cmd_per_lun parameter '%s'\n", +					p);  				goto out;  			} -			target->scsi_host->cmd_per_lun = min(token, SRP_CMD_SQ_SIZE); +			target->scsi_host->cmd_per_lun = token;  			break;  		case SRP_OPT_IO_CLASS:  			if (match_hex(args, &token)) { -				printk(KERN_WARNING PFX "bad  IO class parameter '%s' \n", p); +				pr_warn("bad IO class parameter '%s'\n", p);  				goto out;  			}  			if (token != SRP_REV10_IB_IO_CLASS &&  			    token != SRP_REV16A_IB_IO_CLASS) { -				printk(KERN_WARNING PFX "unknown IO class parameter value" -				       " %x specified (use %x or %x).\n", -				       token, SRP_REV10_IB_IO_CLASS, SRP_REV16A_IB_IO_CLASS); +				pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n", +					token, SRP_REV10_IB_IO_CLASS, +					SRP_REV16A_IB_IO_CLASS);  				goto out;  			}  			target->io_class = token; @@ -1938,9 +2876,53 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)  			kfree(p);  			break; +		case SRP_OPT_CMD_SG_ENTRIES: +			if (match_int(args, &token) || token < 1 || token > 255) { +				pr_warn("bad max cmd_sg_entries parameter '%s'\n", +					p); +				goto out; +			} +			target->cmd_sg_cnt = token; +			break; + +		case SRP_OPT_ALLOW_EXT_SG: +			if (match_int(args, &token)) { +				pr_warn("bad allow_ext_sg parameter '%s'\n", p); +				goto out; +			} +			target->allow_ext_sg = !!token; +			break; + +		case SRP_OPT_SG_TABLESIZE: +			if (match_int(args, &token) || token < 1 || +					token > SCSI_MAX_SG_CHAIN_SEGMENTS) { +				pr_warn("bad max sg_tablesize parameter '%s'\n", +					p); +				goto out; +			} +			target->sg_tablesize = token; +			break; + +		case SRP_OPT_COMP_VECTOR: +			if (match_int(args, &token) || token < 0) { +				pr_warn("bad comp_vector parameter '%s'\n", p); +				goto out; +			} +			target->comp_vector = token; +			break; + +		case SRP_OPT_TL_RETRY_COUNT: +			if (match_int(args, &token) || token < 2 || token > 7) { +				pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n", +					p); +				goto out; +			} +			target->tl_retry_count = token; +			break; +  		default: -			printk(KERN_WARNING PFX "unknown parameter or missing value " -			       "'%s' in target creation request\n", p); +			pr_warn("unknown parameter or missing value '%s' in target creation request\n", +				p);  			goto out;  		}  	} @@ -1951,9 +2933,14 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)  		for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i)  			if ((srp_opt_tokens[i].token & SRP_OPT_ALL) &&  			    !(srp_opt_tokens[i].token & opt_mask)) -				printk(KERN_WARNING PFX "target creation request is " -				       "missing parameter '%s'\n", -				       srp_opt_tokens[i].pattern); +				pr_warn("target creation request is missing parameter '%s'\n", +					srp_opt_tokens[i].pattern); + +	if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue +	    && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN)) +		pr_warn("cmd_per_lun = %d > queue_size = %d\n", +			target->scsi_host->cmd_per_lun, +			target->scsi_host->can_queue);  out:  	kfree(options); @@ -1968,55 +2955,85 @@ static ssize_t srp_create_target(struct device *dev,  		container_of(dev, struct srp_host, dev);  	struct Scsi_Host *target_host;  	struct srp_target_port *target; +	struct srp_device *srp_dev = host->srp_dev; +	struct ib_device *ibdev = srp_dev->dev;  	int ret; -	int i;  	target_host = scsi_host_alloc(&srp_template,  				      sizeof (struct srp_target_port));  	if (!target_host)  		return -ENOMEM; -	target_host->transportt = ib_srp_transport_template; +	target_host->transportt  = ib_srp_transport_template; +	target_host->max_channel = 0; +	target_host->max_id      = 1;  	target_host->max_lun     = SRP_MAX_LUN;  	target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;  	target = host_to_target(target_host); -	target->io_class   = SRP_REV16A_IB_IO_CLASS; -	target->scsi_host  = target_host; -	target->srp_host   = host; +	target->io_class	= SRP_REV16A_IB_IO_CLASS; +	target->scsi_host	= target_host; +	target->srp_host	= host; +	target->lkey		= host->srp_dev->mr->lkey; +	target->rkey		= host->srp_dev->mr->rkey; +	target->cmd_sg_cnt	= cmd_sg_entries; +	target->sg_tablesize	= indirect_sg_entries ? : cmd_sg_entries; +	target->allow_ext_sg	= allow_ext_sg; +	target->tl_retry_count	= 7; +	target->queue_size	= SRP_DEFAULT_QUEUE_SIZE; -	INIT_LIST_HEAD(&target->free_reqs); -	INIT_LIST_HEAD(&target->req_queue); -	for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) { -		target->req_ring[i].index = i; -		list_add_tail(&target->req_ring[i].list, &target->free_reqs); -	} +	mutex_lock(&host->add_target_mutex);  	ret = srp_parse_options(buf, target);  	if (ret)  		goto err; -	ib_query_gid(host->srp_dev->dev, host->port, 0, &target->path.sgid); +	target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE; -	shost_printk(KERN_DEBUG, target->scsi_host, PFX -		     "new target: id_ext %016llx ioc_guid %016llx pkey %04x " -		     "service_id %016llx dgid %pI6\n", -	       (unsigned long long) be64_to_cpu(target->id_ext), -	       (unsigned long long) be64_to_cpu(target->ioc_guid), -	       be16_to_cpu(target->path.pkey), -	       (unsigned long long) be64_to_cpu(target->service_id), -	       target->path.dgid.raw); +	if (!srp_conn_unique(target->srp_host, target)) { +		shost_printk(KERN_INFO, target->scsi_host, +			     PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n", +			     be64_to_cpu(target->id_ext), +			     be64_to_cpu(target->ioc_guid), +			     be64_to_cpu(target->initiator_ext)); +		ret = -EEXIST; +		goto err; +	} + +	if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg && +	    target->cmd_sg_cnt < target->sg_tablesize) { +		pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n"); +		target->sg_tablesize = target->cmd_sg_cnt; +	} + +	target_host->sg_tablesize = target->sg_tablesize; +	target->indirect_size = target->sg_tablesize * +				sizeof (struct srp_direct_buf); +	target->max_iu_len = sizeof (struct srp_cmd) + +			     sizeof (struct srp_indirect_buf) + +			     target->cmd_sg_cnt * sizeof (struct srp_direct_buf); + +	INIT_WORK(&target->tl_err_work, srp_tl_err_work); +	INIT_WORK(&target->remove_work, srp_remove_work); +	spin_lock_init(&target->lock); +	INIT_LIST_HEAD(&target->free_tx); +	ret = srp_alloc_req_data(target); +	if (ret) +		goto err_free_mem; + +	ret = ib_query_gid(ibdev, host->port, 0, &target->path.sgid); +	if (ret) +		goto err_free_mem;  	ret = srp_create_target_ib(target);  	if (ret) -		goto err; +		goto err_free_mem;  	ret = srp_new_cm_id(target);  	if (ret) -		goto err_free; +		goto err_free_ib; -	target->qp_in_error = 0;  	ret = srp_connect_target(target);  	if (ret) {  		shost_printk(KERN_ERR, target->scsi_host, @@ -2028,7 +3045,19 @@ static ssize_t srp_create_target(struct device *dev,  	if (ret)  		goto err_disconnect; -	return count; +	shost_printk(KERN_DEBUG, target->scsi_host, PFX +		     "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n", +		     be64_to_cpu(target->id_ext), +		     be64_to_cpu(target->ioc_guid), +		     be16_to_cpu(target->path.pkey), +		     be64_to_cpu(target->service_id), +		     target->path.sgid.raw, target->path.dgid.raw); + +	ret = count; + +out: +	mutex_unlock(&host->add_target_mutex); +	return ret;  err_disconnect:  	srp_disconnect_target(target); @@ -2036,13 +3065,15 @@ err_disconnect:  err_cm_id:  	ib_destroy_cm_id(target->cm_id); -err_free: +err_free_ib:  	srp_free_target_ib(target); +err_free_mem: +	srp_free_req_data(target); +  err:  	scsi_host_put(target_host); - -	return ret; +	goto out;  }  static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target); @@ -2078,6 +3109,7 @@ static struct srp_host *srp_add_port(struct srp_device *device, u8 port)  	INIT_LIST_HEAD(&host->target_list);  	spin_lock_init(&host->target_lock);  	init_completion(&host->released); +	mutex_init(&host->add_target_mutex);  	host->srp_dev = device;  	host->port = port; @@ -2109,17 +3141,16 @@ static void srp_add_one(struct ib_device *device)  {  	struct srp_device *srp_dev;  	struct ib_device_attr *dev_attr; -	struct ib_fmr_pool_param fmr_param;  	struct srp_host *host; -	int s, e, p; +	int mr_page_shift, s, e, p; +	u64 max_pages_per_mr;  	dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);  	if (!dev_attr)  		return;  	if (ib_query_device(device, dev_attr)) { -		printk(KERN_WARNING PFX "Query device failed for %s\n", -		       device->name); +		pr_warn("Query device failed for %s\n", device->name);  		goto free_attr;  	} @@ -2127,14 +3158,39 @@ static void srp_add_one(struct ib_device *device)  	if (!srp_dev)  		goto free_attr; +	srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr && +			    device->map_phys_fmr && device->unmap_fmr); +	srp_dev->has_fr = (dev_attr->device_cap_flags & +			   IB_DEVICE_MEM_MGT_EXTENSIONS); +	if (!srp_dev->has_fmr && !srp_dev->has_fr) +		dev_warn(&device->dev, "neither FMR nor FR is supported\n"); + +	srp_dev->use_fast_reg = (srp_dev->has_fr && +				 (!srp_dev->has_fmr || prefer_fr)); +  	/*  	 * Use the smallest page size supported by the HCA, down to a -	 * minimum of 512 bytes (which is the smallest sector that a -	 * SCSI command will ever carry). +	 * minimum of 4096 bytes. We're unlikely to build large sglists +	 * out of smaller entries.  	 */ -	srp_dev->fmr_page_shift = max(9, ffs(dev_attr->page_size_cap) - 1); -	srp_dev->fmr_page_size  = 1 << srp_dev->fmr_page_shift; -	srp_dev->fmr_page_mask  = ~((u64) srp_dev->fmr_page_size - 1); +	mr_page_shift		= max(12, ffs(dev_attr->page_size_cap) - 1); +	srp_dev->mr_page_size	= 1 << mr_page_shift; +	srp_dev->mr_page_mask	= ~((u64) srp_dev->mr_page_size - 1); +	max_pages_per_mr	= dev_attr->max_mr_size; +	do_div(max_pages_per_mr, srp_dev->mr_page_size); +	srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR, +					  max_pages_per_mr); +	if (srp_dev->use_fast_reg) { +		srp_dev->max_pages_per_mr = +			min_t(u32, srp_dev->max_pages_per_mr, +			      dev_attr->max_fast_reg_page_list_len); +	} +	srp_dev->mr_max_size	= srp_dev->mr_page_size * +				   srp_dev->max_pages_per_mr; +	pr_debug("%s: mr_page_shift = %d, dev_attr->max_mr_size = %#llx, dev_attr->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n", +		 device->name, mr_page_shift, dev_attr->max_mr_size, +		 dev_attr->max_fast_reg_page_list_len, +		 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);  	INIT_LIST_HEAD(&srp_dev->dev_list); @@ -2150,20 +3206,6 @@ static void srp_add_one(struct ib_device *device)  	if (IS_ERR(srp_dev->mr))  		goto err_pd; -	memset(&fmr_param, 0, sizeof fmr_param); -	fmr_param.pool_size	    = SRP_FMR_POOL_SIZE; -	fmr_param.dirty_watermark   = SRP_FMR_DIRTY_SIZE; -	fmr_param.cache		    = 1; -	fmr_param.max_pages_per_fmr = SRP_FMR_SIZE; -	fmr_param.page_shift	    = srp_dev->fmr_page_shift; -	fmr_param.access	    = (IB_ACCESS_LOCAL_WRITE | -				       IB_ACCESS_REMOTE_WRITE | -				       IB_ACCESS_REMOTE_READ); - -	srp_dev->fmr_pool = ib_create_fmr_pool(srp_dev->pd, &fmr_param); -	if (IS_ERR(srp_dev->fmr_pool)) -		srp_dev->fmr_pool = NULL; -  	if (device->node_type == RDMA_NODE_IB_SWITCH) {  		s = 0;  		e = 0; @@ -2196,10 +3238,11 @@ static void srp_remove_one(struct ib_device *device)  {  	struct srp_device *srp_dev;  	struct srp_host *host, *tmp_host; -	LIST_HEAD(target_list); -	struct srp_target_port *target, *tmp_target; +	struct srp_target_port *target;  	srp_dev = ib_get_client_data(device, &srp_client); +	if (!srp_dev) +		return;  	list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {  		device_unregister(&host->dev); @@ -2210,39 +3253,21 @@ static void srp_remove_one(struct ib_device *device)  		wait_for_completion(&host->released);  		/* -		 * Mark all target ports as removed, so we stop queueing -		 * commands and don't try to reconnect. +		 * Remove all target ports.  		 */  		spin_lock(&host->target_lock); -		list_for_each_entry(target, &host->target_list, list) { -			spin_lock_irq(target->scsi_host->host_lock); -			target->state = SRP_TARGET_REMOVED; -			spin_unlock_irq(target->scsi_host->host_lock); -		} +		list_for_each_entry(target, &host->target_list, list) +			srp_queue_remove_work(target);  		spin_unlock(&host->target_lock);  		/* -		 * Wait for any reconnection tasks that may have -		 * started before we marked our target ports as -		 * removed, and any target port removal tasks. +		 * Wait for target port removal tasks.  		 */ -		flush_scheduled_work(); - -		list_for_each_entry_safe(target, tmp_target, -					 &host->target_list, list) { -			srp_remove_host(target->scsi_host); -			scsi_remove_host(target->scsi_host); -			srp_disconnect_target(target); -			ib_destroy_cm_id(target->cm_id); -			srp_free_target_ib(target); -			scsi_host_put(target->scsi_host); -		} +		flush_workqueue(system_long_wq);  		kfree(host);  	} -	if (srp_dev->fmr_pool) -		ib_destroy_fmr_pool(srp_dev->fmr_pool);  	ib_dereg_mr(srp_dev->mr);  	ib_dealloc_pd(srp_dev->pd); @@ -2250,18 +3275,42 @@ static void srp_remove_one(struct ib_device *device)  }  static struct srp_function_template ib_srp_transport_functions = { +	.has_rport_state	 = true, +	.reset_timer_if_blocked	 = true, +	.reconnect_delay	 = &srp_reconnect_delay, +	.fast_io_fail_tmo	 = &srp_fast_io_fail_tmo, +	.dev_loss_tmo		 = &srp_dev_loss_tmo, +	.reconnect		 = srp_rport_reconnect, +	.rport_delete		 = srp_rport_delete, +	.terminate_rport_io	 = srp_terminate_io,  };  static int __init srp_init_module(void)  {  	int ret; -	BUILD_BUG_ON_NOT_POWER_OF_2(SRP_SQ_SIZE); -	BUILD_BUG_ON_NOT_POWER_OF_2(SRP_RQ_SIZE); +	BUILD_BUG_ON(FIELD_SIZEOF(struct ib_wc, wr_id) < sizeof(void *)); -	if (srp_sg_tablesize > 255) { -		printk(KERN_WARNING PFX "Clamping srp_sg_tablesize to 255\n"); -		srp_sg_tablesize = 255; +	if (srp_sg_tablesize) { +		pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n"); +		if (!cmd_sg_entries) +			cmd_sg_entries = srp_sg_tablesize; +	} + +	if (!cmd_sg_entries) +		cmd_sg_entries = SRP_DEF_SG_TABLESIZE; + +	if (cmd_sg_entries > 255) { +		pr_warn("Clamping cmd_sg_entries to 255\n"); +		cmd_sg_entries = 255; +	} + +	if (!indirect_sg_entries) +		indirect_sg_entries = cmd_sg_entries; +	else if (indirect_sg_entries < cmd_sg_entries) { +		pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n", +			cmd_sg_entries); +		indirect_sg_entries = cmd_sg_entries;  	}  	ib_srp_transport_template = @@ -2269,14 +3318,9 @@ static int __init srp_init_module(void)  	if (!ib_srp_transport_template)  		return -ENOMEM; -	srp_template.sg_tablesize = srp_sg_tablesize; -	srp_max_iu_len = (sizeof (struct srp_cmd) + -			  sizeof (struct srp_indirect_buf) + -			  srp_sg_tablesize * 16); -  	ret = class_register(&srp_class);  	if (ret) { -		printk(KERN_ERR PFX "couldn't register class infiniband_srp\n"); +		pr_err("couldn't register class infiniband_srp\n");  		srp_release_transport(ib_srp_transport_template);  		return ret;  	} @@ -2285,7 +3329,7 @@ static int __init srp_init_module(void)  	ret = ib_register_client(&srp_client);  	if (ret) { -		printk(KERN_ERR PFX "couldn't register IB client\n"); +		pr_err("couldn't register IB client\n");  		srp_release_transport(ib_srp_transport_template);  		ib_sa_unregister_client(&srp_sa_client);  		class_unregister(&srp_class); diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index ed0dce9e479..e46ecb15aa0 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -57,29 +57,24 @@ enum {  	SRP_MAX_LUN		= 512,  	SRP_DEF_SG_TABLESIZE	= 12, -	SRP_RQ_SHIFT    	= 6, -	SRP_RQ_SIZE		= 1 << SRP_RQ_SHIFT, -	SRP_RQ_MASK		= SRP_RQ_SIZE - 1, - -	SRP_SQ_SIZE		= SRP_RQ_SIZE, -	SRP_SQ_MASK		= SRP_SQ_SIZE - 1, +	SRP_DEFAULT_QUEUE_SIZE	= 1 << 6,  	SRP_RSP_SQ_SIZE		= 1, -	SRP_REQ_SQ_SIZE		= SRP_SQ_SIZE - SRP_RSP_SQ_SIZE,  	SRP_TSK_MGMT_SQ_SIZE	= 1, -	SRP_CMD_SQ_SIZE		= SRP_REQ_SQ_SIZE - SRP_TSK_MGMT_SQ_SIZE, +	SRP_DEFAULT_CMD_SQ_SIZE = SRP_DEFAULT_QUEUE_SIZE - SRP_RSP_SQ_SIZE - +				  SRP_TSK_MGMT_SQ_SIZE, + +	SRP_TAG_NO_REQ		= ~0U, +	SRP_TAG_TSK_MGMT	= 1U << 31, -	SRP_TAG_TSK_MGMT	= 1 << (SRP_RQ_SHIFT + 1), +	SRP_MAX_PAGES_PER_MR	= 512, -	SRP_FMR_SIZE		= 256, -	SRP_FMR_POOL_SIZE	= 1024, -	SRP_FMR_DIRTY_SIZE	= SRP_FMR_POOL_SIZE / 4 +	LOCAL_INV_WR_ID_MASK	= 1, +	FAST_REG_WR_ID_MASK	= 2,  };  enum srp_target_state {  	SRP_TARGET_LIVE, -	SRP_TARGET_CONNECTING, -	SRP_TARGET_DEAD, -	SRP_TARGET_REMOVED +	SRP_TARGET_REMOVED,  };  enum srp_iu_type { @@ -88,15 +83,24 @@ enum srp_iu_type {  	SRP_IU_RSP,  }; +/* + * @mr_page_mask: HCA memory registration page mask. + * @mr_page_size: HCA memory registration page size. + * @mr_max_size: Maximum size in bytes of a single FMR / FR registration + *   request. + */  struct srp_device {  	struct list_head	dev_list;  	struct ib_device       *dev;  	struct ib_pd	       *pd;  	struct ib_mr	       *mr; -	struct ib_fmr_pool     *fmr_pool; -	int			fmr_page_shift; -	int			fmr_page_size; -	u64			fmr_page_mask; +	u64			mr_page_mask; +	int			mr_page_size; +	int			mr_max_size; +	int			max_pages_per_mr; +	bool			has_fmr; +	bool			has_fr; +	bool			use_fast_reg;  };  struct srp_host { @@ -107,21 +111,51 @@ struct srp_host {  	spinlock_t		target_lock;  	struct completion	released;  	struct list_head	list; +	struct mutex		add_target_mutex;  };  struct srp_request {  	struct list_head	list;  	struct scsi_cmnd       *scmnd;  	struct srp_iu	       *cmd; -	struct srp_iu	       *tsk_mgmt; -	struct ib_pool_fmr     *fmr; -	struct completion	done; +	union { +		struct ib_pool_fmr **fmr_list; +		struct srp_fr_desc **fr_list; +	}; +	u64		       *map_page; +	struct srp_direct_buf  *indirect_desc; +	dma_addr_t		indirect_dma_addr; +	short			nmdesc;  	short			index; -	u8			cmd_done; -	u8			tsk_status;  };  struct srp_target_port { +	/* These are RW in the hot path, and commonly used together */ +	struct list_head	free_tx; +	struct list_head	free_reqs; +	spinlock_t		lock; +	s32			req_lim; + +	/* These are read-only in the hot path */ +	struct ib_cq	       *send_cq ____cacheline_aligned_in_smp; +	struct ib_cq	       *recv_cq; +	struct ib_qp	       *qp; +	union { +		struct ib_fmr_pool     *fmr_pool; +		struct srp_fr_pool     *fr_pool; +	}; +	u32			lkey; +	u32			rkey; +	enum srp_target_state	state; +	unsigned int		max_iu_len; +	unsigned int		cmd_sg_cnt; +	unsigned int		indirect_size; +	bool			allow_ext_sg; + +	/* Everything above this point is used in the hot path of +	 * command processing. Try to keep them packed into cachelines. +	 */ +  	__be64			id_ext;  	__be64			ioc_guid;  	__be64			service_id; @@ -129,50 +163,117 @@ struct srp_target_port {  	u16			io_class;  	struct srp_host	       *srp_host;  	struct Scsi_Host       *scsi_host; +	struct srp_rport       *rport;  	char			target_name[32];  	unsigned int		scsi_id; +	unsigned int		sg_tablesize; +	int			queue_size; +	int			req_ring_size; +	int			comp_vector; +	int			tl_retry_count;  	struct ib_sa_path_rec	path;  	__be16			orig_dgid[8];  	struct ib_sa_query     *path_query;  	int			path_query_id; +	u32			rq_tmo_jiffies; +	bool			connected; +  	struct ib_cm_id	       *cm_id; -	struct ib_cq	       *recv_cq; -	struct ib_cq	       *send_cq; -	struct ib_qp	       *qp;  	int			max_ti_iu_len; -	s32			req_lim;  	int			zero_req_lim; -	unsigned		rx_head; -	struct srp_iu	       *rx_ring[SRP_RQ_SIZE]; +	struct srp_iu	       **tx_ring; +	struct srp_iu	       **rx_ring; +	struct srp_request	*req_ring; -	unsigned		tx_head; -	unsigned		tx_tail; -	struct srp_iu	       *tx_ring[SRP_SQ_SIZE]; - -	struct list_head	free_reqs; -	struct list_head	req_queue; -	struct srp_request	req_ring[SRP_CMD_SQ_SIZE]; - -	struct work_struct	work; +	struct work_struct	tl_err_work; +	struct work_struct	remove_work;  	struct list_head	list;  	struct completion	done;  	int			status; -	enum srp_target_state	state; -	int			qp_in_error; +	bool			qp_in_error; + +	struct completion	tsk_mgmt_done; +	u8			tsk_mgmt_status;  };  struct srp_iu { +	struct list_head	list;  	u64			dma;  	void		       *buf;  	size_t			size;  	enum dma_data_direction	direction; -	enum srp_iu_type	type; +}; + +/** + * struct srp_fr_desc - fast registration work request arguments + * @entry: Entry in srp_fr_pool.free_list. + * @mr:    Memory region. + * @frpl:  Fast registration page list. + */ +struct srp_fr_desc { +	struct list_head		entry; +	struct ib_mr			*mr; +	struct ib_fast_reg_page_list	*frpl; +}; + +/** + * struct srp_fr_pool - pool of fast registration descriptors + * + * An entry is available for allocation if and only if it occurs in @free_list. + * + * @size:      Number of descriptors in this pool. + * @max_page_list_len: Maximum fast registration work request page list length. + * @lock:      Protects free_list. + * @free_list: List of free descriptors. + * @desc:      Fast registration descriptor pool. + */ +struct srp_fr_pool { +	int			size; +	int			max_page_list_len; +	spinlock_t		lock; +	struct list_head	free_list; +	struct srp_fr_desc	desc[0]; +}; + +/** + * struct srp_map_state - per-request DMA memory mapping state + * @desc:	    Pointer to the element of the SRP buffer descriptor array + *		    that is being filled in. + * @pages:	    Array with DMA addresses of pages being considered for + *		    memory registration. + * @base_dma_addr:  DMA address of the first page that has not yet been mapped. + * @dma_len:	    Number of bytes that will be registered with the next + *		    FMR or FR memory registration call. + * @total_len:	    Total number of bytes in the sg-list being mapped. + * @npages:	    Number of page addresses in the pages[] array. + * @nmdesc:	    Number of FMR or FR memory descriptors used for mapping. + * @ndesc:	    Number of SRP buffer descriptors that have been filled in. + * @unmapped_sg:    First element of the sg-list that is mapped via FMR or FR. + * @unmapped_index: Index of the first element mapped via FMR or FR. + * @unmapped_addr:  DMA address of the first element mapped via FMR or FR. + */ +struct srp_map_state { +	union { +		struct ib_pool_fmr **next_fmr; +		struct srp_fr_desc **next_fr; +	}; +	struct srp_direct_buf  *desc; +	u64		       *pages; +	dma_addr_t		base_dma_addr; +	u32			dma_len; +	u32			total_len; +	unsigned int		npages; +	unsigned int		nmdesc; +	unsigned int		ndesc; +	struct scatterlist     *unmapped_sg; +	int			unmapped_index; +	dma_addr_t		unmapped_addr;  };  #endif /* IB_SRP_H */  | 
