diff options
Diffstat (limited to 'drivers/infiniband/ulp')
21 files changed, 3068 insertions, 1194 deletions
diff --git a/drivers/infiniband/ulp/Makefile b/drivers/infiniband/ulp/Makefile new file mode 100644 index 00000000000..f3c7dcf0309 --- /dev/null +++ b/drivers/infiniband/ulp/Makefile @@ -0,0 +1,5 @@ +obj-$(CONFIG_INFINIBAND_IPOIB)		+= ipoib/ +obj-$(CONFIG_INFINIBAND_SRP)		+= srp/ +obj-$(CONFIG_INFINIBAND_SRPT)		+= srpt/ +obj-$(CONFIG_INFINIBAND_ISER)		+= iser/ +obj-$(CONFIG_INFINIBAND_ISERT)		+= isert/ diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index eb71aaa26a9..c639f90cfda 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -101,6 +101,7 @@ enum {  	IPOIB_MCAST_FLAG_SENDONLY = 1,  	IPOIB_MCAST_FLAG_BUSY	  = 2,	/* joining or already joined */  	IPOIB_MCAST_FLAG_ATTACHED = 3, +	IPOIB_MCAST_JOIN_STARTED  = 4,  	MAX_SEND_CQE		  = 16,  	IPOIB_CM_COPYBREAK	  = 256, @@ -151,6 +152,7 @@ struct ipoib_mcast {  	struct sk_buff_head pkt_queue;  	struct net_device *dev; +	struct completion done;  };  struct ipoib_rx_buf { @@ -299,7 +301,7 @@ struct ipoib_dev_priv {  	unsigned long flags; -	struct mutex vlan_mutex; +	struct rw_semaphore vlan_rwsem;  	struct rb_root  path_tree;  	struct list_head path_list; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 7a3175400b2..933efcea0d0 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -140,7 +140,8 @@ static int ipoib_cm_post_receive_nonsrq(struct net_device *dev,  static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev,  					     struct ipoib_cm_rx_buf *rx_ring,  					     int id, int frags, -					     u64 mapping[IPOIB_CM_RX_SG]) +					     u64 mapping[IPOIB_CM_RX_SG], +					     gfp_t gfp)  {  	struct ipoib_dev_priv *priv = netdev_priv(dev);  	struct sk_buff *skb; @@ -164,7 +165,7 @@ static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev,  	}  	for (i = 0; i < frags; i++) { -		struct page *page = alloc_page(GFP_ATOMIC); +		struct page *page = alloc_page(gfp);  		if (!page)  			goto partial_error; @@ -382,7 +383,8 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i  	for (i = 0; i < ipoib_recvq_size; ++i) {  		if (!ipoib_cm_alloc_rx_skb(dev, rx->rx_ring, i, IPOIB_CM_RX_SG - 1, -					   rx->rx_ring[i].mapping)) { +					   rx->rx_ring[i].mapping, +					   GFP_KERNEL)) {  			ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);  				ret = -ENOMEM;  				goto err_count; @@ -639,7 +641,8 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)  	frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len,  					      (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE; -	newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags, mapping); +	newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags, +				       mapping, GFP_ATOMIC);  	if (unlikely(!newskb)) {  		/*  		 * If we can't allocate a new RX buffer, dump @@ -1027,10 +1030,20 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_  		.cap.max_send_sge	= 1,  		.sq_sig_type		= IB_SIGNAL_ALL_WR,  		.qp_type		= IB_QPT_RC, -		.qp_context		= tx +		.qp_context		= tx, +		.create_flags		= IB_QP_CREATE_USE_GFP_NOIO  	}; -	return ib_create_qp(priv->pd, &attr); +	struct ib_qp *tx_qp; + +	tx_qp = ib_create_qp(priv->pd, &attr); +	if (PTR_ERR(tx_qp) == -EINVAL) { +		ipoib_warn(priv, "can't use GFP_NOIO for QPs on device %s, using GFP_KERNEL\n", +			   priv->ca->name); +		attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO; +		tx_qp = ib_create_qp(priv->pd, &attr); +	} +	return tx_qp;  }  static int ipoib_cm_send_req(struct net_device *dev, @@ -1101,12 +1114,14 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,  	struct ipoib_dev_priv *priv = netdev_priv(p->dev);  	int ret; -	p->tx_ring = vzalloc(ipoib_sendq_size * sizeof *p->tx_ring); +	p->tx_ring = __vmalloc(ipoib_sendq_size * sizeof *p->tx_ring, +			       GFP_NOIO, PAGE_KERNEL);  	if (!p->tx_ring) {  		ipoib_warn(priv, "failed to allocate tx ring\n");  		ret = -ENOMEM;  		goto err_tx;  	} +	memset(p->tx_ring, 0, ipoib_sendq_size * sizeof *p->tx_ring);  	p->qp = ipoib_cm_create_tx_qp(p->dev, p);  	if (IS_ERR(p->qp)) { @@ -1556,7 +1571,8 @@ int ipoib_cm_dev_init(struct net_device *dev)  		for (i = 0; i < ipoib_recvq_size; ++i) {  			if (!ipoib_cm_alloc_rx_skb(dev, priv->cm.srq_ring, i,  						   priv->cm.num_frags - 1, -						   priv->cm.srq_ring[i].mapping)) { +						   priv->cm.srq_ring[i].mapping, +						   GFP_KERNEL)) {  				ipoib_warn(priv, "failed to allocate "  					   "receive buffer %d\n", i);  				ipoib_cm_dev_cleanup(dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index c4b3940845e..078cadd6c79 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -105,5 +105,5 @@ static const struct ethtool_ops ipoib_ethtool_ops = {  void ipoib_set_ethtool_ops(struct net_device *dev)  { -	SET_ETHTOOL_OPS(dev, &ipoib_ethtool_ops); +	dev->ethtool_ops = &ipoib_ethtool_ops;  } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 196b1d13cbc..6a7003ddb0b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -685,15 +685,13 @@ int ipoib_ib_dev_open(struct net_device *dev)  	ret = ipoib_ib_post_receives(dev);  	if (ret) {  		ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret); -		ipoib_ib_dev_stop(dev, 1); -		return -1; +		goto dev_stop;  	}  	ret = ipoib_cm_dev_open(dev);  	if (ret) {  		ipoib_warn(priv, "ipoib_cm_dev_open returned %d\n", ret); -		ipoib_ib_dev_stop(dev, 1); -		return -1; +		goto dev_stop;  	}  	clear_bit(IPOIB_STOP_REAPER, &priv->flags); @@ -704,6 +702,11 @@ int ipoib_ib_dev_open(struct net_device *dev)  		napi_enable(&priv->napi);  	return 0; +dev_stop: +	if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) +		napi_enable(&priv->napi); +	ipoib_ib_dev_stop(dev, 1); +	return -1;  }  static void ipoib_pkey_dev_check_presence(struct net_device *dev) @@ -746,10 +749,8 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush)  	if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {  		mutex_lock(&pkey_mutex);  		set_bit(IPOIB_PKEY_STOP, &priv->flags); -		cancel_delayed_work(&priv->pkey_poll_task); +		cancel_delayed_work_sync(&priv->pkey_poll_task);  		mutex_unlock(&pkey_mutex); -		if (flush) -			flush_workqueue(ipoib_workqueue);  	}  	ipoib_mcast_stop_thread(dev, flush); @@ -974,7 +975,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,  	u16 new_index;  	int result; -	mutex_lock(&priv->vlan_mutex); +	down_read(&priv->vlan_rwsem);  	/*  	 * Flush any child interfaces too -- they might be up even if @@ -983,7 +984,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,  	list_for_each_entry(cpriv, &priv->child_intfs, list)  		__ipoib_ib_dev_flush(cpriv, level); -	mutex_unlock(&priv->vlan_mutex); +	up_read(&priv->vlan_rwsem);  	if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) {  		/* for non-child devices must check/update the pkey value here */ @@ -1081,6 +1082,11 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)  	struct ipoib_dev_priv *priv = netdev_priv(dev);  	ipoib_dbg(priv, "cleaning up ib_dev\n"); +	/* +	 * We must make sure there are no more (path) completions +	 * that may wish to touch priv fields that are no longer valid +	 */ +	ipoib_flush_paths(dev);  	ipoib_mcast_stop_thread(dev, 1);  	ipoib_mcast_dev_flush(dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 82cec1af902..5786a78ff8b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -104,6 +104,8 @@ int ipoib_open(struct net_device *dev)  	ipoib_dbg(priv, "bringing up interface\n"); +	netif_carrier_off(dev); +  	set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);  	if (ipoib_pkey_dev_delay_open(dev)) @@ -119,7 +121,7 @@ int ipoib_open(struct net_device *dev)  		struct ipoib_dev_priv *cpriv;  		/* Bring up any child interfaces too */ -		mutex_lock(&priv->vlan_mutex); +		down_read(&priv->vlan_rwsem);  		list_for_each_entry(cpriv, &priv->child_intfs, list) {  			int flags; @@ -129,7 +131,7 @@ int ipoib_open(struct net_device *dev)  			dev_change_flags(cpriv->dev, flags | IFF_UP);  		} -		mutex_unlock(&priv->vlan_mutex); +		up_read(&priv->vlan_rwsem);  	}  	netif_start_queue(dev); @@ -162,7 +164,7 @@ static int ipoib_stop(struct net_device *dev)  		struct ipoib_dev_priv *cpriv;  		/* Bring down any child interfaces too */ -		mutex_lock(&priv->vlan_mutex); +		down_read(&priv->vlan_rwsem);  		list_for_each_entry(cpriv, &priv->child_intfs, list) {  			int flags; @@ -172,7 +174,7 @@ static int ipoib_stop(struct net_device *dev)  			dev_change_flags(cpriv->dev, flags & ~IFF_UP);  		} -		mutex_unlock(&priv->vlan_mutex); +		up_read(&priv->vlan_rwsem);  	}  	return 0; @@ -1350,7 +1352,7 @@ void ipoib_setup(struct net_device *dev)  	ipoib_set_ethtool_ops(dev); -	netif_napi_add(dev, &priv->napi, ipoib_poll, 100); +	netif_napi_add(dev, &priv->napi, ipoib_poll, NAPI_POLL_WEIGHT);  	dev->watchdog_timeo	 = HZ; @@ -1366,13 +1368,11 @@ void ipoib_setup(struct net_device *dev)  	memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN); -	netif_carrier_off(dev); -  	priv->dev = dev;  	spin_lock_init(&priv->lock); -	mutex_init(&priv->vlan_mutex); +	init_rwsem(&priv->vlan_rwsem);  	INIT_LIST_HEAD(&priv->path_list);  	INIT_LIST_HEAD(&priv->child_intfs); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index cecb98a4c66..d4e005720d0 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -386,8 +386,10 @@ static int ipoib_mcast_join_complete(int status,  			mcast->mcmember.mgid.raw, status);  	/* We trap for port events ourselves. */ -	if (status == -ENETRESET) -		return 0; +	if (status == -ENETRESET) { +		status = 0; +		goto out; +	}  	if (!status)  		status = ipoib_mcast_join_finish(mcast, &multicast->rec); @@ -407,7 +409,8 @@ static int ipoib_mcast_join_complete(int status,  		if (mcast == priv->broadcast)  			queue_work(ipoib_workqueue, &priv->carrier_on_task); -		return 0; +		status = 0; +		goto out;  	}  	if (mcast->logcount++ < 20) { @@ -434,7 +437,8 @@ static int ipoib_mcast_join_complete(int status,  				   mcast->backoff * HZ);  	spin_unlock_irq(&priv->lock);  	mutex_unlock(&mcast_mutex); - +out: +	complete(&mcast->done);  	return status;  } @@ -484,11 +488,15 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,  	}  	set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); +	init_completion(&mcast->done); +	set_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags); +  	mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,  					 &rec, comp_mask, GFP_KERNEL,  					 ipoib_mcast_join_complete, mcast);  	if (IS_ERR(mcast->mc)) {  		clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); +		complete(&mcast->done);  		ret = PTR_ERR(mcast->mc);  		ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret); @@ -510,10 +518,18 @@ void ipoib_mcast_join_task(struct work_struct *work)  	struct ipoib_dev_priv *priv =  		container_of(work, struct ipoib_dev_priv, mcast_task.work);  	struct net_device *dev = priv->dev; +	struct ib_port_attr port_attr;  	if (!test_bit(IPOIB_MCAST_RUN, &priv->flags))  		return; +	if (ib_query_port(priv->ca, priv->port, &port_attr) || +	    port_attr.state != IB_PORT_ACTIVE) { +		ipoib_dbg(priv, "port state is not ACTIVE (state = %d) suspending join task\n", +			  port_attr.state); +		return; +	} +  	if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid))  		ipoib_warn(priv, "ib_query_gid() failed\n");  	else @@ -751,6 +767,11 @@ void ipoib_mcast_dev_flush(struct net_device *dev)  	spin_unlock_irqrestore(&priv->lock, flags); +	/* seperate between the wait to the leave*/ +	list_for_each_entry_safe(mcast, tmcast, &remove_list, list) +		if (test_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags)) +			wait_for_completion(&mcast->done); +  	list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {  		ipoib_mcast_leave(dev, mcast);  		ipoib_mcast_free(mcast); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c index f81abe16cf0..cdc7df4fdb8 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c @@ -31,6 +31,7 @@   */  #include <linux/netdevice.h> +#include <linux/if_arp.h>      /* For ARPHRD_xxx */  #include <linux/module.h>  #include <net/rtnetlink.h>  #include "ipoib.h" @@ -103,7 +104,7 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,  		return -EINVAL;  	pdev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); -	if (!pdev) +	if (!pdev || pdev->type != ARPHRD_INFINIBAND)  		return -ENODEV;  	ppriv = netdev_priv(pdev); @@ -142,10 +143,10 @@ static void ipoib_unregister_child_dev(struct net_device *dev, struct list_head  	priv = netdev_priv(dev);  	ppriv = netdev_priv(priv->parent); -	mutex_lock(&ppriv->vlan_mutex); +	down_write(&ppriv->vlan_rwsem);  	unregister_netdevice_queue(dev, head);  	list_del(&priv->list); -	mutex_unlock(&ppriv->vlan_mutex); +	up_write(&ppriv->vlan_rwsem);  }  static size_t ipoib_get_size(const struct net_device *dev) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 049a997caff..c56d5d44c53 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -192,6 +192,9 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)  	if (priv->hca_caps & IB_DEVICE_BLOCK_MULTICAST_LOOPBACK)  		init_attr.create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK; +	if (priv->hca_caps & IB_DEVICE_MANAGED_FLOW_STEERING) +		init_attr.create_flags |= IB_QP_CREATE_NETIF_QP; +  	if (dev->features & NETIF_F_SG)  		init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 8292554bccb..9fad7b5ac8b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -140,7 +140,7 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)  	if (!rtnl_trylock())  		return restart_syscall(); -	mutex_lock(&ppriv->vlan_mutex); +	down_write(&ppriv->vlan_rwsem);  	/*  	 * First ensure this isn't a duplicate. We check the parent device and @@ -163,7 +163,7 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)  	result = __ipoib_vlan_add(ppriv, priv, pkey, IPOIB_LEGACY_CHILD);  out: -	mutex_unlock(&ppriv->vlan_mutex); +	up_write(&ppriv->vlan_rwsem);  	if (result)  		free_netdev(priv->dev); @@ -185,7 +185,8 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)  	if (!rtnl_trylock())  		return restart_syscall(); -	mutex_lock(&ppriv->vlan_mutex); + +	down_write(&ppriv->vlan_rwsem);  	list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) {  		if (priv->pkey == pkey &&  		    priv->child_type == IPOIB_LEGACY_CHILD) { @@ -195,7 +196,8 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)  			break;  		}  	} -	mutex_unlock(&ppriv->vlan_mutex); +	up_write(&ppriv->vlan_rwsem); +  	rtnl_unlock();  	if (dev) { diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index dd03cfe596d..eb7973957a6 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -5,7 +5,7 @@   * Copyright (C) 2004 Alex Aizman   * Copyright (C) 2005 Mike Christie   * Copyright (c) 2005, 2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2013 Mellanox Technologies. All rights reserved. + * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.   * maintained by openib-general@openib.org   *   * This software is available to you under a choice of one of two @@ -82,6 +82,8 @@ static unsigned int iscsi_max_lun = 512;  module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO);  int iser_debug_level = 0; +bool iser_pi_enable = false; +int iser_pi_guard = 0;  MODULE_DESCRIPTION("iSER (iSCSI Extensions for RDMA) Datamover");  MODULE_LICENSE("Dual BSD/GPL"); @@ -91,6 +93,13 @@ MODULE_VERSION(DRV_VER);  module_param_named(debug_level, iser_debug_level, int, 0644);  MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 (default:disabled)"); +module_param_named(pi_enable, iser_pi_enable, bool, 0644); +MODULE_PARM_DESC(pi_enable, "Enable T10-PI offload support (default:disabled)"); + +module_param_named(pi_guard, iser_pi_guard, int, 0644); +MODULE_PARM_DESC(pi_guard, "T10-PI guard_type, 0:CRC|1:IP_CSUM (default:CRC)"); + +static struct workqueue_struct *release_wq;  struct iser_global ig;  void @@ -138,8 +147,8 @@ static int iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode)  int iser_initialize_task_headers(struct iscsi_task *task,  						struct iser_tx_desc *tx_desc)  { -	struct iscsi_iser_conn *iser_conn = task->conn->dd_data; -	struct iser_device     *device    = iser_conn->ib_conn->device; +	struct iser_conn       *ib_conn   = task->conn->dd_data; +	struct iser_device     *device    = ib_conn->device;  	struct iscsi_iser_task *iser_task = task->dd_data;  	u64 dma_addr; @@ -153,7 +162,7 @@ int iser_initialize_task_headers(struct iscsi_task *task,  	tx_desc->tx_sg[0].length = ISER_HEADERS_LEN;  	tx_desc->tx_sg[0].lkey   = device->mr->lkey; -	iser_task->iser_conn		= iser_conn; +	iser_task->ib_conn = ib_conn;  	return 0;  }  /** @@ -176,6 +185,8 @@ iscsi_iser_task_init(struct iscsi_task *task)  	iser_task->command_sent = 0;  	iser_task_rdma_init(iser_task); +	iser_task->sc = task->sc; +  	return 0;  } @@ -278,10 +289,9 @@ iscsi_iser_task_xmit(struct iscsi_task *task)  static void iscsi_iser_cleanup_task(struct iscsi_task *task)  {  	struct iscsi_iser_task *iser_task = task->dd_data; -	struct iser_tx_desc	*tx_desc = &iser_task->desc; - -	struct iscsi_iser_conn *iser_conn = task->conn->dd_data; -	struct iser_device     *device    = iser_conn->ib_conn->device; +	struct iser_tx_desc    *tx_desc   = &iser_task->desc; +	struct iser_conn       *ib_conn	  = task->conn->dd_data; +	struct iser_device     *device	  = ib_conn->device;  	ib_dma_unmap_single(device->ib_device,  		tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); @@ -296,14 +306,25 @@ static void iscsi_iser_cleanup_task(struct iscsi_task *task)  	}  } +static u8 iscsi_iser_check_protection(struct iscsi_task *task, sector_t *sector) +{ +	struct iscsi_iser_task *iser_task = task->dd_data; + +	if (iser_task->dir[ISER_DIR_IN]) +		return iser_check_task_pi_status(iser_task, ISER_DIR_IN, +						 sector); +	else +		return iser_check_task_pi_status(iser_task, ISER_DIR_OUT, +						 sector); +} +  static struct iscsi_cls_conn *  iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx)  {  	struct iscsi_conn *conn;  	struct iscsi_cls_conn *cls_conn; -	struct iscsi_iser_conn *iser_conn; -	cls_conn = iscsi_conn_setup(cls_session, sizeof(*iser_conn), conn_idx); +	cls_conn = iscsi_conn_setup(cls_session, 0, conn_idx);  	if (!cls_conn)  		return NULL;  	conn = cls_conn->dd_data; @@ -314,39 +335,15 @@ iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx)  	 */  	conn->max_recv_dlength = ISER_RECV_DATA_SEG_LEN; -	iser_conn = conn->dd_data; -	conn->dd_data = iser_conn; -	iser_conn->iscsi_conn = conn; -  	return cls_conn;  } -static void -iscsi_iser_conn_destroy(struct iscsi_cls_conn *cls_conn) -{ -	struct iscsi_conn *conn = cls_conn->dd_data; -	struct iscsi_iser_conn *iser_conn = conn->dd_data; -	struct iser_conn *ib_conn = iser_conn->ib_conn; - -	iscsi_conn_teardown(cls_conn); -	/* -	 * Userspace will normally call the stop callback and -	 * already have freed the ib_conn, but if it goofed up then -	 * we free it here. -	 */ -	if (ib_conn) { -		ib_conn->iser_conn = NULL; -		iser_conn_put(ib_conn, 1); /* deref iscsi/ib conn unbinding */ -	} -} -  static int  iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,  		     struct iscsi_cls_conn *cls_conn, uint64_t transport_eph,  		     int is_leading)  {  	struct iscsi_conn *conn = cls_conn->dd_data; -	struct iscsi_iser_conn *iser_conn;  	struct iscsi_session *session;  	struct iser_conn *ib_conn;  	struct iscsi_endpoint *ep; @@ -373,35 +370,44 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,  	/* binds the iSER connection retrieved from the previously  	 * connected ep_handle to the iSCSI layer connection. exchanges  	 * connection pointers */ -	iser_info("binding iscsi/iser conn %p %p to ib_conn %p\n", -		  conn, conn->dd_data, ib_conn); -	iser_conn = conn->dd_data; -	ib_conn->iser_conn = iser_conn; -	iser_conn->ib_conn  = ib_conn; -	iser_conn_get(ib_conn); /* ref iscsi/ib conn binding */ +	iser_info("binding iscsi conn %p to ib_conn %p\n", conn, ib_conn); + +	conn->dd_data = ib_conn; +	ib_conn->iscsi_conn = conn; +  	return 0;  } +static int +iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn) +{ +	struct iscsi_conn *iscsi_conn; +	struct iser_conn *ib_conn; + +	iscsi_conn = cls_conn->dd_data; +	ib_conn = iscsi_conn->dd_data; +	reinit_completion(&ib_conn->stop_completion); + +	return iscsi_conn_start(cls_conn); +} +  static void  iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)  {  	struct iscsi_conn *conn = cls_conn->dd_data; -	struct iscsi_iser_conn *iser_conn = conn->dd_data; -	struct iser_conn *ib_conn = iser_conn->ib_conn; +	struct iser_conn *ib_conn = conn->dd_data; + +	iser_dbg("stopping iscsi_conn: %p, ib_conn: %p\n", conn, ib_conn); +	iscsi_conn_stop(cls_conn, flag);  	/*  	 * Userspace may have goofed up and not bound the connection or  	 * might have only partially setup the connection.  	 */  	if (ib_conn) { -		iscsi_conn_stop(cls_conn, flag); -		/* -		 * There is no unbind event so the stop callback -		 * must release the ref from the bind. -		 */ -		iser_conn_put(ib_conn, 1); /* deref iscsi/ib conn unbinding */ +		conn->dd_data = NULL; +		complete(&ib_conn->stop_completion);  	} -	iser_conn->ib_conn = NULL;  }  static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session) @@ -413,6 +419,17 @@ static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session)  	iscsi_host_free(shost);  } +static inline unsigned int +iser_dif_prot_caps(int prot_caps) +{ +	return ((prot_caps & IB_PROT_T10DIF_TYPE_1) ? SHOST_DIF_TYPE1_PROTECTION | +						      SHOST_DIX_TYPE1_PROTECTION : 0) | +	       ((prot_caps & IB_PROT_T10DIF_TYPE_2) ? SHOST_DIF_TYPE2_PROTECTION | +						      SHOST_DIX_TYPE2_PROTECTION : 0) | +	       ((prot_caps & IB_PROT_T10DIF_TYPE_3) ? SHOST_DIF_TYPE3_PROTECTION | +						      SHOST_DIX_TYPE3_PROTECTION : 0); +} +  static struct iscsi_cls_session *  iscsi_iser_session_create(struct iscsi_endpoint *ep,  			  uint16_t cmds_max, uint16_t qdepth, @@ -437,8 +454,18 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,  	 * older userspace tools (before 2.0-870) did not pass us  	 * the leading conn's ep so this will be NULL;  	 */ -	if (ep) +	if (ep) {  		ib_conn = ep->dd_data; +		if (ib_conn->pi_support) { +			u32 sig_caps = ib_conn->device->dev_attr.sig_prot_cap; + +			scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps)); +			if (iser_pi_guard) +				scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP); +			else +				scsi_host_set_guard(shost, SHOST_DIX_GUARD_CRC); +		} +	}  	if (iscsi_host_add(shost,  			   ep ? ib_conn->device->ib_device->dma_device : NULL)) @@ -481,28 +508,28 @@ iscsi_iser_set_param(struct iscsi_cls_conn *cls_conn,  	case ISCSI_PARAM_HDRDGST_EN:  		sscanf(buf, "%d", &value);  		if (value) { -			iser_err("DataDigest wasn't negotiated to None"); +			iser_err("DataDigest wasn't negotiated to None\n");  			return -EPROTO;  		}  		break;  	case ISCSI_PARAM_DATADGST_EN:  		sscanf(buf, "%d", &value);  		if (value) { -			iser_err("DataDigest wasn't negotiated to None"); +			iser_err("DataDigest wasn't negotiated to None\n");  			return -EPROTO;  		}  		break;  	case ISCSI_PARAM_IFMARKER_EN:  		sscanf(buf, "%d", &value);  		if (value) { -			iser_err("IFMarker wasn't negotiated to No"); +			iser_err("IFMarker wasn't negotiated to No\n");  			return -EPROTO;  		}  		break;  	case ISCSI_PARAM_OFMARKER_EN:  		sscanf(buf, "%d", &value);  		if (value) { -			iser_err("OFMarker wasn't negotiated to No"); +			iser_err("OFMarker wasn't negotiated to No\n");  			return -EPROTO;  		}  		break; @@ -618,19 +645,20 @@ iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep)  	struct iser_conn *ib_conn;  	ib_conn = ep->dd_data; -	if (ib_conn->iser_conn) -		/* -		 * Must suspend xmit path if the ep is bound to the -		 * iscsi_conn, so we know we are not accessing the ib_conn -		 * when we free it. -		 * -		 * This may not be bound if the ep poll failed. -		 */ -		iscsi_suspend_tx(ib_conn->iser_conn->iscsi_conn); - - -	iser_info("ib conn %p state %d\n", ib_conn, ib_conn->state); +	iser_info("ep %p ib conn %p state %d\n", ep, ib_conn, ib_conn->state);  	iser_conn_terminate(ib_conn); + +	/* +	 * if iser_conn and iscsi_conn are bound, we must wait iscsi_conn_stop +	 * call and ISER_CONN_DOWN state before freeing the iser resources. +	 * otherwise we are safe to free resources immediately. +	 */ +	if (ib_conn->iscsi_conn) { +		INIT_WORK(&ib_conn->release_work, iser_release_work); +		queue_work(release_wq, &ib_conn->release_work); +	} else { +		iser_conn_release(ib_conn); +	}  }  static umode_t iser_attr_is_visible(int param_type, int param) @@ -714,13 +742,13 @@ static struct iscsi_transport iscsi_iser_transport = {  	/* connection management */  	.create_conn            = iscsi_iser_conn_create,  	.bind_conn              = iscsi_iser_conn_bind, -	.destroy_conn           = iscsi_iser_conn_destroy, +	.destroy_conn           = iscsi_conn_teardown,  	.attr_is_visible	= iser_attr_is_visible,  	.set_param              = iscsi_iser_set_param,  	.get_conn_param		= iscsi_conn_get_param,  	.get_ep_param		= iscsi_iser_get_ep_param,  	.get_session_param	= iscsi_session_get_param, -	.start_conn             = iscsi_conn_start, +	.start_conn             = iscsi_iser_conn_start,  	.stop_conn              = iscsi_iser_conn_stop,  	/* iscsi host params */  	.get_host_param		= iscsi_host_get_param, @@ -732,6 +760,7 @@ static struct iscsi_transport iscsi_iser_transport = {  	.xmit_task		= iscsi_iser_task_xmit,  	.cleanup_task		= iscsi_iser_cleanup_task,  	.alloc_pdu		= iscsi_iser_pdu_alloc, +	.check_protection	= iscsi_iser_check_protection,  	/* recovery */  	.session_recovery_timedout = iscsi_session_recovery_timedout, @@ -766,6 +795,12 @@ static int __init iser_init(void)  	mutex_init(&ig.connlist_mutex);  	INIT_LIST_HEAD(&ig.connlist); +	release_wq = alloc_workqueue("release workqueue", 0, 0); +	if (!release_wq) { +		iser_err("failed to allocate release workqueue\n"); +		return -ENOMEM; +	} +  	iscsi_iser_scsi_transport = iscsi_register_transport(  							&iscsi_iser_transport);  	if (!iscsi_iser_scsi_transport) { @@ -784,7 +819,24 @@ register_transport_failure:  static void __exit iser_exit(void)  { +	struct iser_conn *ib_conn, *n; +	int connlist_empty; +  	iser_dbg("Removing iSER datamover...\n"); +	destroy_workqueue(release_wq); + +	mutex_lock(&ig.connlist_mutex); +	connlist_empty = list_empty(&ig.connlist); +	mutex_unlock(&ig.connlist_mutex); + +	if (!connlist_empty) { +		iser_err("Error cleanup stage completed but we still have iser " +			 "connections, destroying them anyway.\n"); +		list_for_each_entry_safe(ib_conn, n, &ig.connlist, conn_list) { +			iser_conn_release(ib_conn); +		} +	} +  	iscsi_unregister_transport(&iscsi_iser_transport);  	kmem_cache_destroy(ig.desc_cache);  } diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 67914027c61..97cd385bf7f 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -8,7 +8,7 @@   *   * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.   * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved. - * Copyright (c) 2013 Mellanox Technologies. All rights reserved. + * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.   *   * This software is available to you under a choice of one of two   * licenses.  You may choose to be licensed under the terms of the GNU @@ -46,6 +46,8 @@  #include <linux/printk.h>  #include <scsi/libiscsi.h>  #include <scsi/scsi_transport_iscsi.h> +#include <scsi/scsi_cmnd.h> +#include <scsi/scsi_device.h>  #include <linux/interrupt.h>  #include <linux/wait.h> @@ -67,7 +69,7 @@  #define DRV_NAME	"iser"  #define PFX		DRV_NAME ": " -#define DRV_VER		"1.1" +#define DRV_VER		"1.4"  #define iser_dbg(fmt, arg...)				\  	do {						\ @@ -134,10 +136,21 @@  					ISER_MAX_TX_MISC_PDUS        + \  					ISER_MAX_RX_MISC_PDUS) +/* Max registration work requests per command */ +#define ISER_MAX_REG_WR_PER_CMD		5 + +/* For Signature we don't support DATAOUTs so no need to make room for them */ +#define ISER_QP_SIG_MAX_REQ_DTOS	(ISER_DEF_XMIT_CMDS_MAX	*       \ +					(1 + ISER_MAX_REG_WR_PER_CMD) + \ +					ISER_MAX_TX_MISC_PDUS         + \ +					ISER_MAX_RX_MISC_PDUS) +  #define ISER_VER			0x10  #define ISER_WSV			0x08  #define ISER_RSV			0x04 +#define ISER_FASTREG_LI_WRID		0xffffffffffffffffULL +  struct iser_hdr {  	u8      flags;  	u8      rsvd[3]; @@ -201,7 +214,6 @@ struct iser_data_buf {  /* fwd declarations */  struct iser_device;  struct iser_cq_desc; -struct iscsi_iser_conn;  struct iscsi_iser_task;  struct iscsi_endpoint; @@ -258,6 +270,7 @@ struct iscsi_iser_task;  struct iser_device {  	struct ib_device             *ib_device;  	struct ib_pd	             *pd; +	struct ib_device_attr	     dev_attr;  	struct ib_cq	             *rx_cq[ISER_MAX_CQ];  	struct ib_cq	             *tx_cq[ISER_MAX_CQ];  	struct ib_mr	             *mr; @@ -277,17 +290,35 @@ struct iser_device {  							    enum iser_data_dir cmd_dir);  }; +#define ISER_CHECK_GUARD	0xc0 +#define ISER_CHECK_REFTAG	0x0f +#define ISER_CHECK_APPTAG	0x30 + +enum iser_reg_indicator { +	ISER_DATA_KEY_VALID	= 1 << 0, +	ISER_PROT_KEY_VALID	= 1 << 1, +	ISER_SIG_KEY_VALID	= 1 << 2, +	ISER_FASTREG_PROTECTED	= 1 << 3, +}; + +struct iser_pi_context { +	struct ib_mr                   *prot_mr; +	struct ib_fast_reg_page_list   *prot_frpl; +	struct ib_mr                   *sig_mr; +}; +  struct fast_reg_descriptor {  	struct list_head		  list;  	/* For fast registration - FRWR */  	struct ib_mr			 *data_mr;  	struct ib_fast_reg_page_list     *data_frpl; -	/* Valid for fast registration flag */ -	bool				  valid; +	struct iser_pi_context		 *pi_ctx; +	/* registration indicators container */ +	u8				  reg_indicators;  };  struct iser_conn { -	struct iscsi_iser_conn       *iser_conn; /* iser conn for upcalls  */ +	struct iscsi_conn	     *iscsi_conn;  	struct iscsi_endpoint	     *ep;  	enum iser_ib_conn_state	     state;	    /* rdma connection state   */  	atomic_t		     refcount; @@ -302,6 +333,8 @@ struct iser_conn {  	int                          post_recv_buf_count; /* posted rx count  */  	atomic_t                     post_send_buf_count; /* posted tx count   */  	char 			     name[ISER_OBJECT_NAME_SIZE]; +	struct work_struct	     release_work; +	struct completion	     stop_completion;  	struct list_head	     conn_list;       /* entry in ig conn list */  	char  			     *login_buf; @@ -310,6 +343,9 @@ struct iser_conn {  	unsigned int 		     rx_desc_head;  	struct iser_rx_desc	     *rx_descs;  	struct ib_recv_wr	     rx_wr[ISER_MIN_POSTED_RX]; +	bool			     pi_support; + +	/* Connection memory registration pool */  	union {  		struct {  			struct ib_fmr_pool      *pool;	   /* pool of IB FMRs         */ @@ -319,24 +355,22 @@ struct iser_conn {  		struct {  			struct list_head	pool;  			int			pool_size; -		} frwr; -	} fastreg; -}; - -struct iscsi_iser_conn { -	struct iscsi_conn            *iscsi_conn;/* ptr to iscsi conn */ -	struct iser_conn             *ib_conn;   /* iSER IB conn      */ +		} fastreg; +	};  };  struct iscsi_iser_task {  	struct iser_tx_desc          desc; -	struct iscsi_iser_conn	     *iser_conn; +	struct iser_conn	     *ib_conn;  	enum iser_task_status 	     status; +	struct scsi_cmnd	     *sc;  	int                          command_sent;  /* set if command  sent  */  	int                          dir[ISER_DIRS_NUM];      /* set if dir use*/  	struct iser_regd_buf         rdma_regd[ISER_DIRS_NUM];/* regd rdma buf */  	struct iser_data_buf         data[ISER_DIRS_NUM];     /* orig. data des*/  	struct iser_data_buf         data_copy[ISER_DIRS_NUM];/* contig. copy  */ +	struct iser_data_buf         prot[ISER_DIRS_NUM];     /* prot desc     */ +	struct iser_data_buf         prot_copy[ISER_DIRS_NUM];/* prot copy     */  };  struct iser_page_vec { @@ -362,6 +396,8 @@ struct iser_global {  extern struct iser_global ig;  extern int iser_debug_level; +extern bool iser_pi_enable; +extern int iser_pi_guard;  /* allocate connection resources needed for rdma functionality */  int iser_conn_set_full_featured_mode(struct iscsi_conn *conn); @@ -383,12 +419,12 @@ void iscsi_iser_recv(struct iscsi_conn *conn,  void iser_conn_init(struct iser_conn *ib_conn); -void iser_conn_get(struct iser_conn *ib_conn); - -int iser_conn_put(struct iser_conn *ib_conn, int destroy_cma_id_allowed); +void iser_conn_release(struct iser_conn *ib_conn);  void iser_conn_terminate(struct iser_conn *ib_conn); +void iser_release_work(struct work_struct *work); +  void iser_rcv_completion(struct iser_rx_desc *desc,  			 unsigned long    dto_xfer_len,  			struct iser_conn *ib_conn); @@ -401,13 +437,15 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *task);  void iser_free_rx_descriptors(struct iser_conn *ib_conn); -void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *task, -				     enum iser_data_dir         cmd_dir); +void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, +				     struct iser_data_buf *mem, +				     struct iser_data_buf *mem_copy, +				     enum iser_data_dir cmd_dir);  int  iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task,  			   enum iser_data_dir cmd_dir); -int  iser_reg_rdma_mem_frwr(struct iscsi_iser_task *task, -			    enum iser_data_dir cmd_dir); +int  iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *task, +			       enum iser_data_dir cmd_dir);  int  iser_connect(struct iser_conn   *ib_conn,  		  struct sockaddr_in *src_addr, @@ -420,8 +458,8 @@ int  iser_reg_page_vec(struct iser_conn     *ib_conn,  void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,  			enum iser_data_dir cmd_dir); -void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task, -			 enum iser_data_dir cmd_dir); +void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, +			    enum iser_data_dir cmd_dir);  int  iser_post_recvl(struct iser_conn *ib_conn);  int  iser_post_recvm(struct iser_conn *ib_conn, int count); @@ -432,12 +470,15 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,  			    enum   iser_data_dir       iser_dir,  			    enum   dma_data_direction  dma_dir); -void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task); +void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task, +			      struct iser_data_buf *data);  int  iser_initialize_task_headers(struct iscsi_task *task,  			struct iser_tx_desc *tx_desc);  int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session);  int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max);  void iser_free_fmr_pool(struct iser_conn *ib_conn); -int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max); -void iser_free_frwr_pool(struct iser_conn *ib_conn); +int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max); +void iser_free_fastreg_pool(struct iser_conn *ib_conn); +u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, +			     enum iser_data_dir cmd_dir, sector_t *sector);  #endif diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 538822684d5..8d44a406063 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -1,6 +1,6 @@  /*   * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2013 Mellanox Technologies. All rights reserved. + * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.   *   * This software is available to you under a choice of one of two   * licenses.  You may choose to be licensed under the terms of the GNU @@ -41,15 +41,15 @@  #include "iscsi_iser.h"  /* Register user buffer memory and initialize passive rdma - *  dto descriptor. Total data size is stored in - *  iser_task->data[ISER_DIR_IN].data_len + *  dto descriptor. Data size is stored in + *  task->data[ISER_DIR_IN].data_len, Protection size + *  os stored in task->prot[ISER_DIR_IN].data_len   */ -static int iser_prepare_read_cmd(struct iscsi_task *task, -				 unsigned int edtl) +static int iser_prepare_read_cmd(struct iscsi_task *task)  {  	struct iscsi_iser_task *iser_task = task->dd_data; -	struct iser_device  *device = iser_task->iser_conn->ib_conn->device; +	struct iser_device  *device = iser_task->ib_conn->device;  	struct iser_regd_buf *regd_buf;  	int err;  	struct iser_hdr *hdr = &iser_task->desc.iser_header; @@ -62,12 +62,15 @@ static int iser_prepare_read_cmd(struct iscsi_task *task,  	if (err)  		return err; -	if (edtl > iser_task->data[ISER_DIR_IN].data_len) { -		iser_err("Total data length: %ld, less than EDTL: " -			 "%d, in READ cmd BHS itt: %d, conn: 0x%p\n", -			 iser_task->data[ISER_DIR_IN].data_len, edtl, -			 task->itt, iser_task->iser_conn); -		return -EINVAL; +	if (scsi_prot_sg_count(iser_task->sc)) { +		struct iser_data_buf *pbuf_in = &iser_task->prot[ISER_DIR_IN]; + +		err = iser_dma_map_task_data(iser_task, +					     pbuf_in, +					     ISER_DIR_IN, +					     DMA_FROM_DEVICE); +		if (err) +			return err;  	}  	err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_IN); @@ -89,8 +92,9 @@ static int iser_prepare_read_cmd(struct iscsi_task *task,  }  /* Register user buffer memory and initialize passive rdma - *  dto descriptor. Total data size is stored in - *  task->data[ISER_DIR_OUT].data_len + *  dto descriptor. Data size is stored in + *  task->data[ISER_DIR_OUT].data_len, Protection size + *  is stored at task->prot[ISER_DIR_OUT].data_len   */  static int  iser_prepare_write_cmd(struct iscsi_task *task, @@ -99,7 +103,7 @@ iser_prepare_write_cmd(struct iscsi_task *task,  		       unsigned int edtl)  {  	struct iscsi_iser_task *iser_task = task->dd_data; -	struct iser_device  *device = iser_task->iser_conn->ib_conn->device; +	struct iser_device  *device = iser_task->ib_conn->device;  	struct iser_regd_buf *regd_buf;  	int err;  	struct iser_hdr *hdr = &iser_task->desc.iser_header; @@ -113,12 +117,15 @@ iser_prepare_write_cmd(struct iscsi_task *task,  	if (err)  		return err; -	if (edtl > iser_task->data[ISER_DIR_OUT].data_len) { -		iser_err("Total data length: %ld, less than EDTL: %d, " -			 "in WRITE cmd BHS itt: %d, conn: 0x%p\n", -			 iser_task->data[ISER_DIR_OUT].data_len, -			 edtl, task->itt, task->conn); -		return -EINVAL; +	if (scsi_prot_sg_count(iser_task->sc)) { +		struct iser_data_buf *pbuf_out = &iser_task->prot[ISER_DIR_OUT]; + +		err = iser_dma_map_task_data(iser_task, +					     pbuf_out, +					     ISER_DIR_OUT, +					     DMA_TO_DEVICE); +		if (err) +			return err;  	}  	err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_OUT); @@ -327,7 +334,7 @@ free_login_buf:  static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req)  { -	struct iscsi_iser_conn *iser_conn = conn->dd_data; +	struct iser_conn *ib_conn = conn->dd_data;  	struct iscsi_session *session = conn->session;  	iser_dbg("req op %x flags %x\n", req->opcode, req->flags); @@ -340,19 +347,18 @@ static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req)  	 * response) and no posted send buffers left - they must have been  	 * consumed during previous login phases.  	 */ -	WARN_ON(iser_conn->ib_conn->post_recv_buf_count != 1); -	WARN_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0); +	WARN_ON(ib_conn->post_recv_buf_count != 1); +	WARN_ON(atomic_read(&ib_conn->post_send_buf_count) != 0);  	if (session->discovery_sess) {  		iser_info("Discovery session, re-using login RX buffer\n");  		return 0;  	} else  		iser_info("Normal session, posting batch of RX %d buffers\n", -			  iser_conn->ib_conn->min_posted_rx); +			  ib_conn->min_posted_rx);  	/* Initial post receive buffers */ -	if (iser_post_recvm(iser_conn->ib_conn, -			    iser_conn->ib_conn->min_posted_rx)) +	if (iser_post_recvm(ib_conn, ib_conn->min_posted_rx))  		return -ENOMEM;  	return 0; @@ -364,11 +370,11 @@ static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req)  int iser_send_command(struct iscsi_conn *conn,  		      struct iscsi_task *task)  { -	struct iscsi_iser_conn *iser_conn = conn->dd_data; +	struct iser_conn *ib_conn = conn->dd_data;  	struct iscsi_iser_task *iser_task = task->dd_data;  	unsigned long edtl;  	int err; -	struct iser_data_buf *data_buf; +	struct iser_data_buf *data_buf, *prot_buf;  	struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr;  	struct scsi_cmnd *sc  =  task->sc;  	struct iser_tx_desc *tx_desc = &iser_task->desc; @@ -377,22 +383,31 @@ int iser_send_command(struct iscsi_conn *conn,  	/* build the tx desc regd header and add it to the tx desc dto */  	tx_desc->type = ISCSI_TX_SCSI_COMMAND; -	iser_create_send_desc(iser_conn->ib_conn, tx_desc); +	iser_create_send_desc(ib_conn, tx_desc); -	if (hdr->flags & ISCSI_FLAG_CMD_READ) +	if (hdr->flags & ISCSI_FLAG_CMD_READ) {  		data_buf = &iser_task->data[ISER_DIR_IN]; -	else +		prot_buf = &iser_task->prot[ISER_DIR_IN]; +	} else {  		data_buf = &iser_task->data[ISER_DIR_OUT]; +		prot_buf = &iser_task->prot[ISER_DIR_OUT]; +	}  	if (scsi_sg_count(sc)) { /* using a scatter list */  		data_buf->buf  = scsi_sglist(sc);  		data_buf->size = scsi_sg_count(sc);  	} -  	data_buf->data_len = scsi_bufflen(sc); +	if (scsi_prot_sg_count(sc)) { +		prot_buf->buf  = scsi_prot_sglist(sc); +		prot_buf->size = scsi_prot_sg_count(sc); +		prot_buf->data_len = data_buf->data_len >> +				     ilog2(sc->device->sector_size) * 8; +	} +  	if (hdr->flags & ISCSI_FLAG_CMD_READ) { -		err = iser_prepare_read_cmd(task, edtl); +		err = iser_prepare_read_cmd(task);  		if (err)  			goto send_command_error;  	} @@ -408,7 +423,7 @@ int iser_send_command(struct iscsi_conn *conn,  	iser_task->status = ISER_TASK_STATUS_STARTED; -	err = iser_post_send(iser_conn->ib_conn, tx_desc); +	err = iser_post_send(ib_conn, tx_desc);  	if (!err)  		return 0; @@ -424,7 +439,7 @@ int iser_send_data_out(struct iscsi_conn *conn,  		       struct iscsi_task *task,  		       struct iscsi_data *hdr)  { -	struct iscsi_iser_conn *iser_conn = conn->dd_data; +	struct iser_conn *ib_conn = conn->dd_data;  	struct iscsi_iser_task *iser_task = task->dd_data;  	struct iser_tx_desc *tx_desc = NULL;  	struct iser_regd_buf *regd_buf; @@ -473,7 +488,7 @@ int iser_send_data_out(struct iscsi_conn *conn,  		 itt, buf_offset, data_seg_len); -	err = iser_post_send(iser_conn->ib_conn, tx_desc); +	err = iser_post_send(ib_conn, tx_desc);  	if (!err)  		return 0; @@ -486,19 +501,18 @@ send_data_out_error:  int iser_send_control(struct iscsi_conn *conn,  		      struct iscsi_task *task)  { -	struct iscsi_iser_conn *iser_conn = conn->dd_data; +	struct iser_conn *ib_conn = conn->dd_data;  	struct iscsi_iser_task *iser_task = task->dd_data;  	struct iser_tx_desc *mdesc = &iser_task->desc;  	unsigned long data_seg_len;  	int err = 0;  	struct iser_device *device; -	struct iser_conn *ib_conn = iser_conn->ib_conn;  	/* build the tx desc regd header and add it to the tx desc dto */  	mdesc->type = ISCSI_TX_CONTROL; -	iser_create_send_desc(iser_conn->ib_conn, mdesc); +	iser_create_send_desc(ib_conn, mdesc); -	device = iser_conn->ib_conn->device; +	device = ib_conn->device;  	data_seg_len = ntoh24(task->hdr->dlength); @@ -513,14 +527,13 @@ int iser_send_control(struct iscsi_conn *conn,  			ib_conn->login_req_dma, task->data_count,  			DMA_TO_DEVICE); -		memcpy(iser_conn->ib_conn->login_req_buf, task->data, -							task->data_count); +		memcpy(ib_conn->login_req_buf, task->data, task->data_count);  		ib_dma_sync_single_for_device(device->ib_device,  			ib_conn->login_req_dma, task->data_count,  			DMA_TO_DEVICE); -		tx_dsg->addr    = iser_conn->ib_conn->login_req_dma; +		tx_dsg->addr    = ib_conn->login_req_dma;  		tx_dsg->length  = task->data_count;  		tx_dsg->lkey    = device->mr->lkey;  		mdesc->num_sge = 2; @@ -529,7 +542,7 @@ int iser_send_control(struct iscsi_conn *conn,  	if (task == conn->login_task) {  		iser_dbg("op %x dsl %lx, posting login rx buffer\n",  			 task->hdr->opcode, data_seg_len); -		err = iser_post_recvl(iser_conn->ib_conn); +		err = iser_post_recvl(ib_conn);  		if (err)  			goto send_control_error;  		err = iser_post_rx_bufs(conn, task->hdr); @@ -537,7 +550,7 @@ int iser_send_control(struct iscsi_conn *conn,  			goto send_control_error;  	} -	err = iser_post_send(iser_conn->ib_conn, mdesc); +	err = iser_post_send(ib_conn, mdesc);  	if (!err)  		return 0; @@ -553,7 +566,6 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,  			 unsigned long rx_xfer_len,  			 struct iser_conn *ib_conn)  { -	struct iscsi_iser_conn *conn = ib_conn->iser_conn;  	struct iscsi_hdr *hdr;  	u64 rx_dma;  	int rx_buflen, outstanding, count, err; @@ -575,17 +587,17 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,  	iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,  			hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN)); -	iscsi_iser_recv(conn->iscsi_conn, hdr, -		rx_desc->data, rx_xfer_len - ISER_HEADERS_LEN); +	iscsi_iser_recv(ib_conn->iscsi_conn, hdr, rx_desc->data, +			rx_xfer_len - ISER_HEADERS_LEN);  	ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma, -			rx_buflen, DMA_FROM_DEVICE); +				      rx_buflen, DMA_FROM_DEVICE);  	/* decrementing conn->post_recv_buf_count only --after-- freeing the   *  	 * task eliminates the need to worry on tasks which are completed in   *  	 * parallel to the execution of iser_conn_term. So the code that waits *  	 * for the posted rx bufs refcount to become zero handles everything   */ -	conn->ib_conn->post_recv_buf_count--; +	ib_conn->post_recv_buf_count--;  	if (rx_dma == ib_conn->login_resp_dma)  		return; @@ -610,11 +622,12 @@ void iser_snd_completion(struct iser_tx_desc *tx_desc,  		ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr,  					ISER_HEADERS_LEN, DMA_TO_DEVICE);  		kmem_cache_free(ig.desc_cache, tx_desc); +		tx_desc = NULL;  	}  	atomic_dec(&ib_conn->post_send_buf_count); -	if (tx_desc->type == ISCSI_TX_CONTROL) { +	if (tx_desc && tx_desc->type == ISCSI_TX_CONTROL) {  		/* this arithmetic is legal by libiscsi dd_data allocation */  		task = (void *) ((long)(void *)tx_desc -  				  sizeof(struct iscsi_task)); @@ -634,6 +647,9 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task)  	iser_task->data[ISER_DIR_IN].data_len  = 0;  	iser_task->data[ISER_DIR_OUT].data_len = 0; +	iser_task->prot[ISER_DIR_IN].data_len  = 0; +	iser_task->prot[ISER_DIR_OUT].data_len = 0; +  	memset(&iser_task->rdma_regd[ISER_DIR_IN], 0,  	       sizeof(struct iser_regd_buf));  	memset(&iser_task->rdma_regd[ISER_DIR_OUT], 0, @@ -642,28 +658,63 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task)  void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)  { -	struct iser_device *device = iser_task->iser_conn->ib_conn->device; -	int is_rdma_aligned = 1; +	struct iser_device *device = iser_task->ib_conn->device; +	int is_rdma_data_aligned = 1; +	int is_rdma_prot_aligned = 1; +	int prot_count = scsi_prot_sg_count(iser_task->sc);  	/* if we were reading, copy back to unaligned sglist,  	 * anyway dma_unmap and free the copy  	 */  	if (iser_task->data_copy[ISER_DIR_IN].copy_buf != NULL) { -		is_rdma_aligned = 0; -		iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_IN); +		is_rdma_data_aligned = 0; +		iser_finalize_rdma_unaligned_sg(iser_task, +						&iser_task->data[ISER_DIR_IN], +						&iser_task->data_copy[ISER_DIR_IN], +						ISER_DIR_IN);  	} +  	if (iser_task->data_copy[ISER_DIR_OUT].copy_buf != NULL) { -		is_rdma_aligned = 0; -		iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_OUT); +		is_rdma_data_aligned = 0; +		iser_finalize_rdma_unaligned_sg(iser_task, +						&iser_task->data[ISER_DIR_OUT], +						&iser_task->data_copy[ISER_DIR_OUT], +						ISER_DIR_OUT); +	} + +	if (iser_task->prot_copy[ISER_DIR_IN].copy_buf != NULL) { +		is_rdma_prot_aligned = 0; +		iser_finalize_rdma_unaligned_sg(iser_task, +						&iser_task->prot[ISER_DIR_IN], +						&iser_task->prot_copy[ISER_DIR_IN], +						ISER_DIR_IN);  	} -	if (iser_task->dir[ISER_DIR_IN]) +	if (iser_task->prot_copy[ISER_DIR_OUT].copy_buf != NULL) { +		is_rdma_prot_aligned = 0; +		iser_finalize_rdma_unaligned_sg(iser_task, +						&iser_task->prot[ISER_DIR_OUT], +						&iser_task->prot_copy[ISER_DIR_OUT], +						ISER_DIR_OUT); +	} + +	if (iser_task->dir[ISER_DIR_IN]) {  		device->iser_unreg_rdma_mem(iser_task, ISER_DIR_IN); +		if (is_rdma_data_aligned) +			iser_dma_unmap_task_data(iser_task, +						 &iser_task->data[ISER_DIR_IN]); +		if (prot_count && is_rdma_prot_aligned) +			iser_dma_unmap_task_data(iser_task, +						 &iser_task->prot[ISER_DIR_IN]); +	} -	if (iser_task->dir[ISER_DIR_OUT]) +	if (iser_task->dir[ISER_DIR_OUT]) {  		device->iser_unreg_rdma_mem(iser_task, ISER_DIR_OUT); - -       /* if the data was unaligned, it was already unmapped and then copied */ -       if (is_rdma_aligned) -		iser_dma_unmap_task_data(iser_task); +		if (is_rdma_data_aligned) +			iser_dma_unmap_task_data(iser_task, +						 &iser_task->data[ISER_DIR_OUT]); +		if (prot_count && is_rdma_prot_aligned) +			iser_dma_unmap_task_data(iser_task, +						 &iser_task->prot[ISER_DIR_OUT]); +	}  } diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 1ce0c97d2cc..47acd3ad3a1 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -1,6 +1,6 @@  /*   * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2013 Mellanox Technologies. All rights reserved. + * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.   *   * This software is available to you under a choice of one of two   * licenses.  You may choose to be licensed under the terms of the GNU @@ -45,13 +45,19 @@   * iser_start_rdma_unaligned_sg   */  static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, +					struct iser_data_buf *data, +					struct iser_data_buf *data_copy,  					enum iser_data_dir cmd_dir)  { -	int dma_nents; -	struct ib_device *dev; +	struct ib_device *dev = iser_task->ib_conn->device->ib_device; +	struct scatterlist *sgl = (struct scatterlist *)data->buf; +	struct scatterlist *sg;  	char *mem = NULL; -	struct iser_data_buf *data = &iser_task->data[cmd_dir]; -	unsigned long  cmd_data_len = data->data_len; +	unsigned long  cmd_data_len = 0; +	int dma_nents, i; + +	for_each_sg(sgl, sg, data->size, i) +		cmd_data_len += ib_sg_dma_len(dev, sg);  	if (cmd_data_len > ISER_KMALLOC_THRESHOLD)  		mem = (void *)__get_free_pages(GFP_ATOMIC, @@ -61,17 +67,16 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,  	if (mem == NULL) {  		iser_err("Failed to allocate mem size %d %d for copying sglist\n", -			 data->size,(int)cmd_data_len); +			 data->size, (int)cmd_data_len);  		return -ENOMEM;  	}  	if (cmd_dir == ISER_DIR_OUT) {  		/* copy the unaligned sg the buffer which is used for RDMA */ -		struct scatterlist *sgl = (struct scatterlist *)data->buf; -		struct scatterlist *sg;  		int i;  		char *p, *from; +		sgl = (struct scatterlist *)data->buf;  		p = mem;  		for_each_sg(sgl, sg, data->size, i) {  			from = kmap_atomic(sg_page(sg)); @@ -83,39 +88,37 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,  		}  	} -	sg_init_one(&iser_task->data_copy[cmd_dir].sg_single, mem, cmd_data_len); -	iser_task->data_copy[cmd_dir].buf  = -		&iser_task->data_copy[cmd_dir].sg_single; -	iser_task->data_copy[cmd_dir].size = 1; +	sg_init_one(&data_copy->sg_single, mem, cmd_data_len); +	data_copy->buf = &data_copy->sg_single; +	data_copy->size = 1; +	data_copy->copy_buf = mem; -	iser_task->data_copy[cmd_dir].copy_buf  = mem; - -	dev = iser_task->iser_conn->ib_conn->device->ib_device; -	dma_nents = ib_dma_map_sg(dev, -				  &iser_task->data_copy[cmd_dir].sg_single, -				  1, +	dma_nents = ib_dma_map_sg(dev, &data_copy->sg_single, 1,  				  (cmd_dir == ISER_DIR_OUT) ?  				  DMA_TO_DEVICE : DMA_FROM_DEVICE);  	BUG_ON(dma_nents == 0); -	iser_task->data_copy[cmd_dir].dma_nents = dma_nents; +	data_copy->dma_nents = dma_nents; +	data_copy->data_len = cmd_data_len; +  	return 0;  }  /**   * iser_finalize_rdma_unaligned_sg   */ +  void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, -				     enum iser_data_dir         cmd_dir) +				     struct iser_data_buf *data, +				     struct iser_data_buf *data_copy, +				     enum iser_data_dir cmd_dir)  {  	struct ib_device *dev; -	struct iser_data_buf *mem_copy;  	unsigned long  cmd_data_len; -	dev = iser_task->iser_conn->ib_conn->device->ib_device; -	mem_copy = &iser_task->data_copy[cmd_dir]; +	dev = iser_task->ib_conn->device->ib_device; -	ib_dma_unmap_sg(dev, &mem_copy->sg_single, 1, +	ib_dma_unmap_sg(dev, &data_copy->sg_single, 1,  			(cmd_dir == ISER_DIR_OUT) ?  			DMA_TO_DEVICE : DMA_FROM_DEVICE); @@ -127,10 +130,10 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,  		int i;  		/* copy back read RDMA to unaligned sg */ -		mem	= mem_copy->copy_buf; +		mem = data_copy->copy_buf; -		sgl	= (struct scatterlist *)iser_task->data[ISER_DIR_IN].buf; -		sg_size = iser_task->data[ISER_DIR_IN].size; +		sgl = (struct scatterlist *)data->buf; +		sg_size = data->size;  		p = mem;  		for_each_sg(sgl, sg, sg_size, i) { @@ -143,15 +146,15 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,  		}  	} -	cmd_data_len = iser_task->data[cmd_dir].data_len; +	cmd_data_len = data->data_len;  	if (cmd_data_len > ISER_KMALLOC_THRESHOLD) -		free_pages((unsigned long)mem_copy->copy_buf, +		free_pages((unsigned long)data_copy->copy_buf,  			   ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT);  	else -		kfree(mem_copy->copy_buf); +		kfree(data_copy->copy_buf); -	mem_copy->copy_buf = NULL; +	data_copy->copy_buf = NULL;  }  #define IS_4K_ALIGNED(addr)	((((unsigned long)addr) & ~MASK_4K) == 0) @@ -319,7 +322,7 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,  	struct ib_device *dev;  	iser_task->dir[iser_dir] = 1; -	dev = iser_task->iser_conn->ib_conn->device->ib_device; +	dev = iser_task->ib_conn->device->ib_device;  	data->dma_nents = ib_dma_map_sg(dev, data->buf, data->size, dma_dir);  	if (data->dma_nents == 0) { @@ -329,31 +332,23 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,  	return 0;  } -void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task) +void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task, +			      struct iser_data_buf *data)  {  	struct ib_device *dev; -	struct iser_data_buf *data; -	dev = iser_task->iser_conn->ib_conn->device->ib_device; - -	if (iser_task->dir[ISER_DIR_IN]) { -		data = &iser_task->data[ISER_DIR_IN]; -		ib_dma_unmap_sg(dev, data->buf, data->size, DMA_FROM_DEVICE); -	} - -	if (iser_task->dir[ISER_DIR_OUT]) { -		data = &iser_task->data[ISER_DIR_OUT]; -		ib_dma_unmap_sg(dev, data->buf, data->size, DMA_TO_DEVICE); -	} +	dev = iser_task->ib_conn->device->ib_device; +	ib_dma_unmap_sg(dev, data->buf, data->size, DMA_FROM_DEVICE);  }  static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,  			      struct ib_device *ibdev, +			      struct iser_data_buf *mem, +			      struct iser_data_buf *mem_copy,  			      enum iser_data_dir cmd_dir,  			      int aligned_len)  { -	struct iscsi_conn    *iscsi_conn = iser_task->iser_conn->iscsi_conn; -	struct iser_data_buf *mem = &iser_task->data[cmd_dir]; +	struct iscsi_conn    *iscsi_conn = iser_task->ib_conn->iscsi_conn;  	iscsi_conn->fmr_unalign_cnt++;  	iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n", @@ -363,12 +358,12 @@ static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,  		iser_data_buf_dump(mem, ibdev);  	/* unmap the command data before accessing it */ -	iser_dma_unmap_task_data(iser_task); +	iser_dma_unmap_task_data(iser_task, mem);  	/* allocate copy buf, if we are writing, copy the */  	/* unaligned scatterlist, dma map the copy        */ -	if (iser_start_rdma_unaligned_sg(iser_task, cmd_dir) != 0) -			return -ENOMEM; +	if (iser_start_rdma_unaligned_sg(iser_task, mem, mem_copy, cmd_dir) != 0) +		return -ENOMEM;  	return 0;  } @@ -382,7 +377,7 @@ static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,  int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,  			  enum iser_data_dir cmd_dir)  { -	struct iser_conn     *ib_conn = iser_task->iser_conn->ib_conn; +	struct iser_conn     *ib_conn = iser_task->ib_conn;  	struct iser_device   *device = ib_conn->device;  	struct ib_device     *ibdev = device->ib_device;  	struct iser_data_buf *mem = &iser_task->data[cmd_dir]; @@ -396,7 +391,8 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,  	aligned_len = iser_data_buf_aligned_len(mem, ibdev);  	if (aligned_len != mem->dma_nents) { -		err = fall_to_bounce_buf(iser_task, ibdev, +		err = fall_to_bounce_buf(iser_task, ibdev, mem, +					 &iser_task->data_copy[cmd_dir],  					 cmd_dir, aligned_len);  		if (err) {  			iser_err("failed to allocate bounce buffer\n"); @@ -422,8 +418,8 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,  			 (unsigned long)regd_buf->reg.va,  			 (unsigned long)regd_buf->reg.len);  	} else { /* use FMR for multiple dma entries */ -		iser_page_vec_build(mem, ib_conn->fastreg.fmr.page_vec, ibdev); -		err = iser_reg_page_vec(ib_conn, ib_conn->fastreg.fmr.page_vec, +		iser_page_vec_build(mem, ib_conn->fmr.page_vec, ibdev); +		err = iser_reg_page_vec(ib_conn, ib_conn->fmr.page_vec,  					®d_buf->reg);  		if (err && err != -EAGAIN) {  			iser_data_buf_dump(mem, ibdev); @@ -431,12 +427,12 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,  				 mem->dma_nents,  				 ntoh24(iser_task->desc.iscsi_header.dlength));  			iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n", -				 ib_conn->fastreg.fmr.page_vec->data_size, -				 ib_conn->fastreg.fmr.page_vec->length, -				 ib_conn->fastreg.fmr.page_vec->offset); -			for (i = 0; i < ib_conn->fastreg.fmr.page_vec->length; i++) +				 ib_conn->fmr.page_vec->data_size, +				 ib_conn->fmr.page_vec->length, +				 ib_conn->fmr.page_vec->offset); +			for (i = 0; i < ib_conn->fmr.page_vec->length; i++)  				iser_err("page_vec[%d] = 0x%llx\n", i, -					 (unsigned long long) ib_conn->fastreg.fmr.page_vec->pages[i]); +					 (unsigned long long) ib_conn->fmr.page_vec->pages[i]);  		}  		if (err)  			return err; @@ -444,94 +440,280 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,  	return 0;  } -static int iser_fast_reg_mr(struct fast_reg_descriptor *desc, -			    struct iser_conn *ib_conn, +static inline enum ib_t10_dif_type +scsi2ib_prot_type(unsigned char prot_type) +{ +	switch (prot_type) { +	case SCSI_PROT_DIF_TYPE0: +		return IB_T10DIF_NONE; +	case SCSI_PROT_DIF_TYPE1: +		return IB_T10DIF_TYPE1; +	case SCSI_PROT_DIF_TYPE2: +		return IB_T10DIF_TYPE2; +	case SCSI_PROT_DIF_TYPE3: +		return IB_T10DIF_TYPE3; +	default: +		return IB_T10DIF_NONE; +	} +} + + +static int +iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs) +{ +	unsigned char scsi_ptype = scsi_get_prot_type(sc); + +	sig_attrs->mem.sig_type = IB_SIG_TYPE_T10_DIF; +	sig_attrs->wire.sig_type = IB_SIG_TYPE_T10_DIF; +	sig_attrs->mem.sig.dif.pi_interval = sc->device->sector_size; +	sig_attrs->wire.sig.dif.pi_interval = sc->device->sector_size; + +	switch (scsi_get_prot_op(sc)) { +	case SCSI_PROT_WRITE_INSERT: +	case SCSI_PROT_READ_STRIP: +		sig_attrs->mem.sig.dif.type = IB_T10DIF_NONE; +		sig_attrs->wire.sig.dif.type = scsi2ib_prot_type(scsi_ptype); +		sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC; +		sig_attrs->wire.sig.dif.ref_tag = scsi_get_lba(sc) & +						  0xffffffff; +		break; +	case SCSI_PROT_READ_INSERT: +	case SCSI_PROT_WRITE_STRIP: +		sig_attrs->mem.sig.dif.type = scsi2ib_prot_type(scsi_ptype); +		sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC; +		sig_attrs->mem.sig.dif.ref_tag = scsi_get_lba(sc) & +						 0xffffffff; +		sig_attrs->wire.sig.dif.type = IB_T10DIF_NONE; +		break; +	case SCSI_PROT_READ_PASS: +	case SCSI_PROT_WRITE_PASS: +		sig_attrs->mem.sig.dif.type = scsi2ib_prot_type(scsi_ptype); +		sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC; +		sig_attrs->mem.sig.dif.ref_tag = scsi_get_lba(sc) & +						 0xffffffff; +		sig_attrs->wire.sig.dif.type = scsi2ib_prot_type(scsi_ptype); +		sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC; +		sig_attrs->wire.sig.dif.ref_tag = scsi_get_lba(sc) & +						  0xffffffff; +		break; +	default: +		iser_err("Unsupported PI operation %d\n", +			 scsi_get_prot_op(sc)); +		return -EINVAL; +	} +	return 0; +} + + +static int +iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask) +{ +	switch (scsi_get_prot_type(sc)) { +	case SCSI_PROT_DIF_TYPE0: +		*mask = 0x0; +		break; +	case SCSI_PROT_DIF_TYPE1: +	case SCSI_PROT_DIF_TYPE2: +		*mask = ISER_CHECK_GUARD | ISER_CHECK_REFTAG; +		break; +	case SCSI_PROT_DIF_TYPE3: +		*mask = ISER_CHECK_GUARD; +		break; +	default: +		iser_err("Unsupported protection type %d\n", +			 scsi_get_prot_type(sc)); +		return -EINVAL; +	} + +	return 0; +} + +static int +iser_reg_sig_mr(struct iscsi_iser_task *iser_task, +		struct fast_reg_descriptor *desc, struct ib_sge *data_sge, +		struct ib_sge *prot_sge, struct ib_sge *sig_sge) +{ +	struct iser_conn *ib_conn = iser_task->ib_conn; +	struct iser_pi_context *pi_ctx = desc->pi_ctx; +	struct ib_send_wr sig_wr, inv_wr; +	struct ib_send_wr *bad_wr, *wr = NULL; +	struct ib_sig_attrs sig_attrs; +	int ret; +	u32 key; + +	memset(&sig_attrs, 0, sizeof(sig_attrs)); +	ret = iser_set_sig_attrs(iser_task->sc, &sig_attrs); +	if (ret) +		goto err; + +	ret = iser_set_prot_checks(iser_task->sc, &sig_attrs.check_mask); +	if (ret) +		goto err; + +	if (!(desc->reg_indicators & ISER_SIG_KEY_VALID)) { +		memset(&inv_wr, 0, sizeof(inv_wr)); +		inv_wr.opcode = IB_WR_LOCAL_INV; +		inv_wr.wr_id = ISER_FASTREG_LI_WRID; +		inv_wr.ex.invalidate_rkey = pi_ctx->sig_mr->rkey; +		wr = &inv_wr; +		/* Bump the key */ +		key = (u8)(pi_ctx->sig_mr->rkey & 0x000000FF); +		ib_update_fast_reg_key(pi_ctx->sig_mr, ++key); +	} + +	memset(&sig_wr, 0, sizeof(sig_wr)); +	sig_wr.opcode = IB_WR_REG_SIG_MR; +	sig_wr.wr_id = ISER_FASTREG_LI_WRID; +	sig_wr.sg_list = data_sge; +	sig_wr.num_sge = 1; +	sig_wr.wr.sig_handover.sig_attrs = &sig_attrs; +	sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr; +	if (scsi_prot_sg_count(iser_task->sc)) +		sig_wr.wr.sig_handover.prot = prot_sge; +	sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE | +					      IB_ACCESS_REMOTE_READ | +					      IB_ACCESS_REMOTE_WRITE; + +	if (!wr) +		wr = &sig_wr; +	else +		wr->next = &sig_wr; + +	ret = ib_post_send(ib_conn->qp, wr, &bad_wr); +	if (ret) { +		iser_err("reg_sig_mr failed, ret:%d\n", ret); +		goto err; +	} +	desc->reg_indicators &= ~ISER_SIG_KEY_VALID; + +	sig_sge->lkey = pi_ctx->sig_mr->lkey; +	sig_sge->addr = 0; +	sig_sge->length = data_sge->length + prot_sge->length; +	if (scsi_get_prot_op(iser_task->sc) == SCSI_PROT_WRITE_INSERT || +	    scsi_get_prot_op(iser_task->sc) == SCSI_PROT_READ_STRIP) { +		sig_sge->length += (data_sge->length / +				   iser_task->sc->device->sector_size) * 8; +	} + +	iser_dbg("sig_sge: addr: 0x%llx  length: %u lkey: 0x%x\n", +		 sig_sge->addr, sig_sge->length, +		 sig_sge->lkey); +err: +	return ret; +} + +static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,  			    struct iser_regd_buf *regd_buf, -			    u32 offset, unsigned int data_size, -			    unsigned int page_list_len) +			    struct iser_data_buf *mem, +			    enum iser_reg_indicator ind, +			    struct ib_sge *sge)  { +	struct fast_reg_descriptor *desc = regd_buf->reg.mem_h; +	struct iser_conn *ib_conn = iser_task->ib_conn; +	struct iser_device *device = ib_conn->device; +	struct ib_device *ibdev = device->ib_device; +	struct ib_mr *mr; +	struct ib_fast_reg_page_list *frpl;  	struct ib_send_wr fastreg_wr, inv_wr;  	struct ib_send_wr *bad_wr, *wr = NULL;  	u8 key; -	int ret; +	int ret, offset, size, plen; + +	/* if there a single dma entry, dma mr suffices */ +	if (mem->dma_nents == 1) { +		struct scatterlist *sg = (struct scatterlist *)mem->buf; -	if (!desc->valid) { +		sge->lkey = device->mr->lkey; +		sge->addr   = ib_sg_dma_address(ibdev, &sg[0]); +		sge->length  = ib_sg_dma_len(ibdev, &sg[0]); + +		iser_dbg("Single DMA entry: lkey=0x%x, addr=0x%llx, length=0x%x\n", +			 sge->lkey, sge->addr, sge->length); +		return 0; +	} + +	if (ind == ISER_DATA_KEY_VALID) { +		mr = desc->data_mr; +		frpl = desc->data_frpl; +	} else { +		mr = desc->pi_ctx->prot_mr; +		frpl = desc->pi_ctx->prot_frpl; +	} + +	plen = iser_sg_to_page_vec(mem, device->ib_device, frpl->page_list, +				   &offset, &size); +	if (plen * SIZE_4K < size) { +		iser_err("fast reg page_list too short to hold this SG\n"); +		return -EINVAL; +	} + +	if (!(desc->reg_indicators & ind)) {  		memset(&inv_wr, 0, sizeof(inv_wr)); +		inv_wr.wr_id = ISER_FASTREG_LI_WRID;  		inv_wr.opcode = IB_WR_LOCAL_INV; -		inv_wr.send_flags = IB_SEND_SIGNALED; -		inv_wr.ex.invalidate_rkey = desc->data_mr->rkey; +		inv_wr.ex.invalidate_rkey = mr->rkey;  		wr = &inv_wr;  		/* Bump the key */ -		key = (u8)(desc->data_mr->rkey & 0x000000FF); -		ib_update_fast_reg_key(desc->data_mr, ++key); +		key = (u8)(mr->rkey & 0x000000FF); +		ib_update_fast_reg_key(mr, ++key);  	}  	/* Prepare FASTREG WR */  	memset(&fastreg_wr, 0, sizeof(fastreg_wr)); +	fastreg_wr.wr_id = ISER_FASTREG_LI_WRID;  	fastreg_wr.opcode = IB_WR_FAST_REG_MR; -	fastreg_wr.send_flags = IB_SEND_SIGNALED; -	fastreg_wr.wr.fast_reg.iova_start = desc->data_frpl->page_list[0] + offset; -	fastreg_wr.wr.fast_reg.page_list = desc->data_frpl; -	fastreg_wr.wr.fast_reg.page_list_len = page_list_len; +	fastreg_wr.wr.fast_reg.iova_start = frpl->page_list[0] + offset; +	fastreg_wr.wr.fast_reg.page_list = frpl; +	fastreg_wr.wr.fast_reg.page_list_len = plen;  	fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K; -	fastreg_wr.wr.fast_reg.length = data_size; -	fastreg_wr.wr.fast_reg.rkey = desc->data_mr->rkey; +	fastreg_wr.wr.fast_reg.length = size; +	fastreg_wr.wr.fast_reg.rkey = mr->rkey;  	fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE  |  					       IB_ACCESS_REMOTE_WRITE |  					       IB_ACCESS_REMOTE_READ); -	if (!wr) { +	if (!wr)  		wr = &fastreg_wr; -		atomic_inc(&ib_conn->post_send_buf_count); -	} else { +	else  		wr->next = &fastreg_wr; -		atomic_add(2, &ib_conn->post_send_buf_count); -	}  	ret = ib_post_send(ib_conn->qp, wr, &bad_wr);  	if (ret) { -		if (bad_wr->next) -			atomic_sub(2, &ib_conn->post_send_buf_count); -		else -			atomic_dec(&ib_conn->post_send_buf_count);  		iser_err("fast registration failed, ret:%d\n", ret);  		return ret;  	} -	desc->valid = false; +	desc->reg_indicators &= ~ind; -	regd_buf->reg.mem_h = desc; -	regd_buf->reg.lkey = desc->data_mr->lkey; -	regd_buf->reg.rkey = desc->data_mr->rkey; -	regd_buf->reg.va = desc->data_frpl->page_list[0] + offset; -	regd_buf->reg.len = data_size; -	regd_buf->reg.is_mr = 1; +	sge->lkey = mr->lkey; +	sge->addr = frpl->page_list[0] + offset; +	sge->length = size;  	return ret;  }  /** - * iser_reg_rdma_mem_frwr - Registers memory intended for RDMA, + * iser_reg_rdma_mem_fastreg - Registers memory intended for RDMA,   * using Fast Registration WR (if possible) obtaining rkey and va   *   * returns 0 on success, errno code on failure   */ -int iser_reg_rdma_mem_frwr(struct iscsi_iser_task *iser_task, -			   enum iser_data_dir cmd_dir) +int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task, +			      enum iser_data_dir cmd_dir)  { -	struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn; +	struct iser_conn *ib_conn = iser_task->ib_conn;  	struct iser_device *device = ib_conn->device;  	struct ib_device *ibdev = device->ib_device;  	struct iser_data_buf *mem = &iser_task->data[cmd_dir];  	struct iser_regd_buf *regd_buf = &iser_task->rdma_regd[cmd_dir]; -	struct fast_reg_descriptor *desc; -	unsigned int data_size, page_list_len; +	struct fast_reg_descriptor *desc = NULL; +	struct ib_sge data_sge;  	int err, aligned_len;  	unsigned long flags; -	u32 offset;  	aligned_len = iser_data_buf_aligned_len(mem, ibdev);  	if (aligned_len != mem->dma_nents) { -		err = fall_to_bounce_buf(iser_task, ibdev, +		err = fall_to_bounce_buf(iser_task, ibdev, mem, +					 &iser_task->data_copy[cmd_dir],  					 cmd_dir, aligned_len);  		if (err) {  			iser_err("failed to allocate bounce buffer\n"); @@ -540,41 +722,79 @@ int iser_reg_rdma_mem_frwr(struct iscsi_iser_task *iser_task,  		mem = &iser_task->data_copy[cmd_dir];  	} -	/* if there a single dma entry, dma mr suffices */ -	if (mem->dma_nents == 1) { -		struct scatterlist *sg = (struct scatterlist *)mem->buf; - -		regd_buf->reg.lkey = device->mr->lkey; -		regd_buf->reg.rkey = device->mr->rkey; -		regd_buf->reg.len  = ib_sg_dma_len(ibdev, &sg[0]); -		regd_buf->reg.va   = ib_sg_dma_address(ibdev, &sg[0]); -		regd_buf->reg.is_mr = 0; -	} else { +	if (mem->dma_nents != 1 || +	    scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {  		spin_lock_irqsave(&ib_conn->lock, flags); -		desc = list_first_entry(&ib_conn->fastreg.frwr.pool, +		desc = list_first_entry(&ib_conn->fastreg.pool,  					struct fast_reg_descriptor, list);  		list_del(&desc->list);  		spin_unlock_irqrestore(&ib_conn->lock, flags); -		page_list_len = iser_sg_to_page_vec(mem, device->ib_device, -						    desc->data_frpl->page_list, -						    &offset, &data_size); - -		if (page_list_len * SIZE_4K < data_size) { -			iser_err("fast reg page_list too short to hold this SG\n"); -			err = -EINVAL; -			goto err_reg; +		regd_buf->reg.mem_h = desc; +	} + +	err = iser_fast_reg_mr(iser_task, regd_buf, mem, +			       ISER_DATA_KEY_VALID, &data_sge); +	if (err) +		goto err_reg; + +	if (scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) { +		struct ib_sge prot_sge, sig_sge; + +		memset(&prot_sge, 0, sizeof(prot_sge)); +		if (scsi_prot_sg_count(iser_task->sc)) { +			mem = &iser_task->prot[cmd_dir]; +			aligned_len = iser_data_buf_aligned_len(mem, ibdev); +			if (aligned_len != mem->dma_nents) { +				err = fall_to_bounce_buf(iser_task, ibdev, mem, +							 &iser_task->prot_copy[cmd_dir], +							 cmd_dir, aligned_len); +				if (err) { +					iser_err("failed to allocate bounce buffer\n"); +					return err; +				} +				mem = &iser_task->prot_copy[cmd_dir]; +			} + +			err = iser_fast_reg_mr(iser_task, regd_buf, mem, +					       ISER_PROT_KEY_VALID, &prot_sge); +			if (err) +				goto err_reg;  		} -		err = iser_fast_reg_mr(desc, ib_conn, regd_buf, -				       offset, data_size, page_list_len); -		if (err) -			goto err_reg; +		err = iser_reg_sig_mr(iser_task, desc, &data_sge, +				      &prot_sge, &sig_sge); +		if (err) { +			iser_err("Failed to register signature mr\n"); +			return err; +		} +		desc->reg_indicators |= ISER_FASTREG_PROTECTED; + +		regd_buf->reg.lkey = sig_sge.lkey; +		regd_buf->reg.rkey = desc->pi_ctx->sig_mr->rkey; +		regd_buf->reg.va = sig_sge.addr; +		regd_buf->reg.len = sig_sge.length; +		regd_buf->reg.is_mr = 1; +	} else { +		if (desc) { +			regd_buf->reg.rkey = desc->data_mr->rkey; +			regd_buf->reg.is_mr = 1; +		} else { +			regd_buf->reg.rkey = device->mr->rkey; +			regd_buf->reg.is_mr = 0; +		} + +		regd_buf->reg.lkey = data_sge.lkey; +		regd_buf->reg.va = data_sge.addr; +		regd_buf->reg.len = data_sge.length;  	}  	return 0;  err_reg: -	spin_lock_irqsave(&ib_conn->lock, flags); -	list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool); -	spin_unlock_irqrestore(&ib_conn->lock, flags); +	if (desc) { +		spin_lock_irqsave(&ib_conn->lock, flags); +		list_add_tail(&desc->list, &ib_conn->fastreg.pool); +		spin_unlock_irqrestore(&ib_conn->lock, flags); +	} +  	return err;  } diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index afe95674008..ea01075f9f9 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -1,7 +1,7 @@  /*   * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.   * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved. - * Copyright (c) 2013 Mellanox Technologies. All rights reserved. + * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.   *   * This software is available to you under a choice of one of two   * licenses.  You may choose to be licensed under the terms of the GNU @@ -71,17 +71,14 @@ static void iser_event_handler(struct ib_event_handler *handler,   */  static int iser_create_device_ib_res(struct iser_device *device)  { -	int i, j;  	struct iser_cq_desc *cq_desc; -	struct ib_device_attr *dev_attr; +	struct ib_device_attr *dev_attr = &device->dev_attr; +	int ret, i, j; -	dev_attr = kmalloc(sizeof(*dev_attr), GFP_KERNEL); -	if (!dev_attr) -		return -ENOMEM; - -	if (ib_query_device(device->ib_device, dev_attr)) { +	ret = ib_query_device(device->ib_device, dev_attr); +	if (ret) {  		pr_warn("Query device failed for %s\n", device->ib_device->name); -		goto dev_attr_err; +		return ret;  	}  	/* Assign function handles  - based on FMR support */ @@ -94,14 +91,14 @@ static int iser_create_device_ib_res(struct iser_device *device)  		device->iser_unreg_rdma_mem = iser_unreg_mem_fmr;  	} else  	if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { -		iser_info("FRWR supported, using FRWR for registration\n"); -		device->iser_alloc_rdma_reg_res = iser_create_frwr_pool; -		device->iser_free_rdma_reg_res = iser_free_frwr_pool; -		device->iser_reg_rdma_mem = iser_reg_rdma_mem_frwr; -		device->iser_unreg_rdma_mem = iser_unreg_mem_frwr; +		iser_info("FastReg supported, using FastReg for registration\n"); +		device->iser_alloc_rdma_reg_res = iser_create_fastreg_pool; +		device->iser_free_rdma_reg_res = iser_free_fastreg_pool; +		device->iser_reg_rdma_mem = iser_reg_rdma_mem_fastreg; +		device->iser_unreg_rdma_mem = iser_unreg_mem_fastreg;  	} else { -		iser_err("IB device does not support FMRs nor FRWRs, can't register memory\n"); -		goto dev_attr_err; +		iser_err("IB device does not support FMRs nor FastRegs, can't register memory\n"); +		return -1;  	}  	device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors); @@ -158,7 +155,6 @@ static int iser_create_device_ib_res(struct iser_device *device)  	if (ib_register_event_handler(&device->event_handler))  		goto handler_err; -	kfree(dev_attr);  	return 0;  handler_err: @@ -178,8 +174,6 @@ pd_err:  	kfree(device->cq_desc);  cq_desc_err:  	iser_err("failed to allocate an IB resource\n"); -dev_attr_err: -	kfree(dev_attr);  	return -1;  } @@ -221,13 +215,13 @@ int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max)  	struct ib_fmr_pool_param params;  	int ret = -ENOMEM; -	ib_conn->fastreg.fmr.page_vec = kmalloc(sizeof(struct iser_page_vec) + -						(sizeof(u64)*(ISCSI_ISER_SG_TABLESIZE + 1)), -						GFP_KERNEL); -	if (!ib_conn->fastreg.fmr.page_vec) +	ib_conn->fmr.page_vec = kmalloc(sizeof(*ib_conn->fmr.page_vec) + +					(sizeof(u64)*(ISCSI_ISER_SG_TABLESIZE + 1)), +					GFP_KERNEL); +	if (!ib_conn->fmr.page_vec)  		return ret; -	ib_conn->fastreg.fmr.page_vec->pages = (u64 *)(ib_conn->fastreg.fmr.page_vec + 1); +	ib_conn->fmr.page_vec->pages = (u64 *)(ib_conn->fmr.page_vec + 1);  	params.page_shift        = SHIFT_4K;  	/* when the first/last SG element are not start/end * @@ -243,16 +237,16 @@ int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max)  				    IB_ACCESS_REMOTE_WRITE |  				    IB_ACCESS_REMOTE_READ); -	ib_conn->fastreg.fmr.pool = ib_create_fmr_pool(device->pd, ¶ms); -	if (!IS_ERR(ib_conn->fastreg.fmr.pool)) +	ib_conn->fmr.pool = ib_create_fmr_pool(device->pd, ¶ms); +	if (!IS_ERR(ib_conn->fmr.pool))  		return 0;  	/* no FMR => no need for page_vec */ -	kfree(ib_conn->fastreg.fmr.page_vec); -	ib_conn->fastreg.fmr.page_vec = NULL; +	kfree(ib_conn->fmr.page_vec); +	ib_conn->fmr.page_vec = NULL; -	ret = PTR_ERR(ib_conn->fastreg.fmr.pool); -	ib_conn->fastreg.fmr.pool = NULL; +	ret = PTR_ERR(ib_conn->fmr.pool); +	ib_conn->fmr.pool = NULL;  	if (ret != -ENOSYS) {  		iser_err("FMR allocation failed, err %d\n", ret);  		return ret; @@ -268,93 +262,173 @@ int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max)  void iser_free_fmr_pool(struct iser_conn *ib_conn)  {  	iser_info("freeing conn %p fmr pool %p\n", -		  ib_conn, ib_conn->fastreg.fmr.pool); +		  ib_conn, ib_conn->fmr.pool); + +	if (ib_conn->fmr.pool != NULL) +		ib_destroy_fmr_pool(ib_conn->fmr.pool); + +	ib_conn->fmr.pool = NULL; + +	kfree(ib_conn->fmr.page_vec); +	ib_conn->fmr.page_vec = NULL; +} + +static int +iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd, +			 bool pi_enable, struct fast_reg_descriptor *desc) +{ +	int ret; + +	desc->data_frpl = ib_alloc_fast_reg_page_list(ib_device, +						      ISCSI_ISER_SG_TABLESIZE + 1); +	if (IS_ERR(desc->data_frpl)) { +		ret = PTR_ERR(desc->data_frpl); +		iser_err("Failed to allocate ib_fast_reg_page_list err=%d\n", +			 ret); +		return PTR_ERR(desc->data_frpl); +	} -	if (ib_conn->fastreg.fmr.pool != NULL) -		ib_destroy_fmr_pool(ib_conn->fastreg.fmr.pool); +	desc->data_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE + 1); +	if (IS_ERR(desc->data_mr)) { +		ret = PTR_ERR(desc->data_mr); +		iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret); +		goto fast_reg_mr_failure; +	} +	desc->reg_indicators |= ISER_DATA_KEY_VALID; + +	if (pi_enable) { +		struct ib_mr_init_attr mr_init_attr = {0}; +		struct iser_pi_context *pi_ctx = NULL; + +		desc->pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL); +		if (!desc->pi_ctx) { +			iser_err("Failed to allocate pi context\n"); +			ret = -ENOMEM; +			goto pi_ctx_alloc_failure; +		} +		pi_ctx = desc->pi_ctx; + +		pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(ib_device, +						    ISCSI_ISER_SG_TABLESIZE); +		if (IS_ERR(pi_ctx->prot_frpl)) { +			ret = PTR_ERR(pi_ctx->prot_frpl); +			iser_err("Failed to allocate prot frpl ret=%d\n", +				 ret); +			goto prot_frpl_failure; +		} -	ib_conn->fastreg.fmr.pool = NULL; +		pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd, +						ISCSI_ISER_SG_TABLESIZE + 1); +		if (IS_ERR(pi_ctx->prot_mr)) { +			ret = PTR_ERR(pi_ctx->prot_mr); +			iser_err("Failed to allocate prot frmr ret=%d\n", +				 ret); +			goto prot_mr_failure; +		} +		desc->reg_indicators |= ISER_PROT_KEY_VALID; + +		mr_init_attr.max_reg_descriptors = 2; +		mr_init_attr.flags |= IB_MR_SIGNATURE_EN; +		pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr); +		if (IS_ERR(pi_ctx->sig_mr)) { +			ret = PTR_ERR(pi_ctx->sig_mr); +			iser_err("Failed to allocate signature enabled mr err=%d\n", +				 ret); +			goto sig_mr_failure; +		} +		desc->reg_indicators |= ISER_SIG_KEY_VALID; +	} +	desc->reg_indicators &= ~ISER_FASTREG_PROTECTED; + +	iser_dbg("Create fr_desc %p page_list %p\n", +		 desc, desc->data_frpl->page_list); + +	return 0; +sig_mr_failure: +	ib_dereg_mr(desc->pi_ctx->prot_mr); +prot_mr_failure: +	ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl); +prot_frpl_failure: +	kfree(desc->pi_ctx); +pi_ctx_alloc_failure: +	ib_dereg_mr(desc->data_mr); +fast_reg_mr_failure: +	ib_free_fast_reg_page_list(desc->data_frpl); -	kfree(ib_conn->fastreg.fmr.page_vec); -	ib_conn->fastreg.fmr.page_vec = NULL; +	return ret;  }  /** - * iser_create_frwr_pool - Creates pool of fast_reg descriptors + * iser_create_fastreg_pool - Creates pool of fast_reg descriptors   * for fast registration work requests.   * returns 0 on success, or errno code on failure   */ -int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max) +int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max)  {  	struct iser_device	*device = ib_conn->device;  	struct fast_reg_descriptor	*desc;  	int i, ret; -	INIT_LIST_HEAD(&ib_conn->fastreg.frwr.pool); -	ib_conn->fastreg.frwr.pool_size = 0; +	INIT_LIST_HEAD(&ib_conn->fastreg.pool); +	ib_conn->fastreg.pool_size = 0;  	for (i = 0; i < cmds_max; i++) { -		desc = kmalloc(sizeof(*desc), GFP_KERNEL); +		desc = kzalloc(sizeof(*desc), GFP_KERNEL);  		if (!desc) {  			iser_err("Failed to allocate a new fast_reg descriptor\n");  			ret = -ENOMEM;  			goto err;  		} -		desc->data_frpl = ib_alloc_fast_reg_page_list(device->ib_device, -							 ISCSI_ISER_SG_TABLESIZE + 1); -		if (IS_ERR(desc->data_frpl)) { -			ret = PTR_ERR(desc->data_frpl); -			iser_err("Failed to allocate ib_fast_reg_page_list err=%d\n", ret); -			goto fast_reg_page_failure; +		ret = iser_create_fastreg_desc(device->ib_device, device->pd, +					       ib_conn->pi_support, desc); +		if (ret) { +			iser_err("Failed to create fastreg descriptor err=%d\n", +				 ret); +			kfree(desc); +			goto err;  		} -		desc->data_mr = ib_alloc_fast_reg_mr(device->pd, -						     ISCSI_ISER_SG_TABLESIZE + 1); -		if (IS_ERR(desc->data_mr)) { -			ret = PTR_ERR(desc->data_mr); -			iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret); -			goto fast_reg_mr_failure; -		} -		desc->valid = true; -		list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool); -		ib_conn->fastreg.frwr.pool_size++; +		list_add_tail(&desc->list, &ib_conn->fastreg.pool); +		ib_conn->fastreg.pool_size++;  	}  	return 0; -fast_reg_mr_failure: -	ib_free_fast_reg_page_list(desc->data_frpl); -fast_reg_page_failure: -	kfree(desc);  err: -	iser_free_frwr_pool(ib_conn); +	iser_free_fastreg_pool(ib_conn);  	return ret;  }  /** - * iser_free_frwr_pool - releases the pool of fast_reg descriptors + * iser_free_fastreg_pool - releases the pool of fast_reg descriptors   */ -void iser_free_frwr_pool(struct iser_conn *ib_conn) +void iser_free_fastreg_pool(struct iser_conn *ib_conn)  {  	struct fast_reg_descriptor *desc, *tmp;  	int i = 0; -	if (list_empty(&ib_conn->fastreg.frwr.pool)) +	if (list_empty(&ib_conn->fastreg.pool))  		return; -	iser_info("freeing conn %p frwr pool\n", ib_conn); +	iser_info("freeing conn %p fr pool\n", ib_conn); -	list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.frwr.pool, list) { +	list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.pool, list) {  		list_del(&desc->list);  		ib_free_fast_reg_page_list(desc->data_frpl);  		ib_dereg_mr(desc->data_mr); +		if (desc->pi_ctx) { +			ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl); +			ib_dereg_mr(desc->pi_ctx->prot_mr); +			ib_destroy_mr(desc->pi_ctx->sig_mr); +			kfree(desc->pi_ctx); +		}  		kfree(desc);  		++i;  	} -	if (i < ib_conn->fastreg.frwr.pool_size) +	if (i < ib_conn->fastreg.pool_size)  		iser_warn("pool still has %d regions registered\n", -			  ib_conn->fastreg.frwr.pool_size - i); +			  ib_conn->fastreg.pool_size - i);  }  /** @@ -389,12 +463,17 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)  	init_attr.qp_context	= (void *)ib_conn;  	init_attr.send_cq	= device->tx_cq[min_index];  	init_attr.recv_cq	= device->rx_cq[min_index]; -	init_attr.cap.max_send_wr  = ISER_QP_MAX_REQ_DTOS;  	init_attr.cap.max_recv_wr  = ISER_QP_MAX_RECV_DTOS;  	init_attr.cap.max_send_sge = 2;  	init_attr.cap.max_recv_sge = 1;  	init_attr.sq_sig_type	= IB_SIGNAL_REQ_WR;  	init_attr.qp_type	= IB_QPT_RC; +	if (ib_conn->pi_support) { +		init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS; +		init_attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN; +	} else { +		init_attr.cap.max_send_wr  = ISER_QP_MAX_REQ_DTOS; +	}  	ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr);  	if (ret) @@ -502,14 +581,30 @@ static int iser_conn_state_comp_exch(struct iser_conn *ib_conn,  	return ret;  } +void iser_release_work(struct work_struct *work) +{ +	struct iser_conn *ib_conn; + +	ib_conn = container_of(work, struct iser_conn, release_work); + +	/* wait for .conn_stop callback */ +	wait_for_completion(&ib_conn->stop_completion); + +	/* wait for the qp`s post send and post receive buffers to empty */ +	wait_event_interruptible(ib_conn->wait, +				 ib_conn->state == ISER_CONN_DOWN); + +	iser_conn_release(ib_conn); +} +  /**   * Frees all conn objects and deallocs conn descriptor   */ -static void iser_conn_release(struct iser_conn *ib_conn, int can_destroy_id) +void iser_conn_release(struct iser_conn *ib_conn)  {  	struct iser_device  *device = ib_conn->device; -	BUG_ON(ib_conn->state != ISER_CONN_DOWN); +	BUG_ON(ib_conn->state == ISER_CONN_UP);  	mutex_lock(&ig.connlist_mutex);  	list_del(&ib_conn->conn_list); @@ -521,27 +616,13 @@ static void iser_conn_release(struct iser_conn *ib_conn, int can_destroy_id)  	if (device != NULL)  		iser_device_try_release(device);  	/* if cma handler context, the caller actually destroy the id */ -	if (ib_conn->cma_id != NULL && can_destroy_id) { +	if (ib_conn->cma_id != NULL) {  		rdma_destroy_id(ib_conn->cma_id);  		ib_conn->cma_id = NULL;  	}  	iscsi_destroy_endpoint(ib_conn->ep);  } -void iser_conn_get(struct iser_conn *ib_conn) -{ -	atomic_inc(&ib_conn->refcount); -} - -int iser_conn_put(struct iser_conn *ib_conn, int can_destroy_id) -{ -	if (atomic_dec_and_test(&ib_conn->refcount)) { -		iser_conn_release(ib_conn, can_destroy_id); -		return 1; -	} -	return 0; -} -  /**   * triggers start of the disconnect procedures and wait for them to be done   */ @@ -559,24 +640,19 @@ void iser_conn_terminate(struct iser_conn *ib_conn)  	if (err)  		iser_err("Failed to disconnect, conn: 0x%p err %d\n",  			 ib_conn,err); - -	wait_event_interruptible(ib_conn->wait, -				 ib_conn->state == ISER_CONN_DOWN); - -	iser_conn_put(ib_conn, 1); /* deref ib conn deallocate */  } -static int iser_connect_error(struct rdma_cm_id *cma_id) +static void iser_connect_error(struct rdma_cm_id *cma_id)  {  	struct iser_conn *ib_conn; +  	ib_conn = (struct iser_conn *)cma_id->context;  	ib_conn->state = ISER_CONN_DOWN;  	wake_up_interruptible(&ib_conn->wait); -	return iser_conn_put(ib_conn, 0); /* deref ib conn's cma id */  } -static int iser_addr_handler(struct rdma_cm_id *cma_id) +static void iser_addr_handler(struct rdma_cm_id *cma_id)  {  	struct iser_device *device;  	struct iser_conn   *ib_conn; @@ -585,22 +661,35 @@ static int iser_addr_handler(struct rdma_cm_id *cma_id)  	device = iser_device_find_by_ib_device(cma_id);  	if (!device) {  		iser_err("device lookup/creation failed\n"); -		return iser_connect_error(cma_id); +		iser_connect_error(cma_id); +		return;  	}  	ib_conn = (struct iser_conn *)cma_id->context;  	ib_conn->device = device; +	/* connection T10-PI support */ +	if (iser_pi_enable) { +		if (!(device->dev_attr.device_cap_flags & +		      IB_DEVICE_SIGNATURE_HANDOVER)) { +			iser_warn("T10-PI requested but not supported on %s, " +				  "continue without T10-PI\n", +				  ib_conn->device->ib_device->name); +			ib_conn->pi_support = false; +		} else { +			ib_conn->pi_support = true; +		} +	} +  	ret = rdma_resolve_route(cma_id, 1000);  	if (ret) {  		iser_err("resolve route failed: %d\n", ret); -		return iser_connect_error(cma_id); +		iser_connect_error(cma_id); +		return;  	} - -	return 0;  } -static int iser_route_handler(struct rdma_cm_id *cma_id) +static void iser_route_handler(struct rdma_cm_id *cma_id)  {  	struct rdma_conn_param conn_param;  	int    ret; @@ -628,33 +717,40 @@ static int iser_route_handler(struct rdma_cm_id *cma_id)  		goto failure;  	} -	return 0; +	return;  failure: -	return iser_connect_error(cma_id); +	iser_connect_error(cma_id);  }  static void iser_connected_handler(struct rdma_cm_id *cma_id)  {  	struct iser_conn *ib_conn; +	struct ib_qp_attr attr; +	struct ib_qp_init_attr init_attr; + +	(void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr); +	iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num);  	ib_conn = (struct iser_conn *)cma_id->context; -	ib_conn->state = ISER_CONN_UP; -	wake_up_interruptible(&ib_conn->wait); +	if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_PENDING, ISER_CONN_UP)) +		wake_up_interruptible(&ib_conn->wait);  } -static int iser_disconnected_handler(struct rdma_cm_id *cma_id) +static void iser_disconnected_handler(struct rdma_cm_id *cma_id)  {  	struct iser_conn *ib_conn; -	int ret;  	ib_conn = (struct iser_conn *)cma_id->context;  	/* getting here when the state is UP means that the conn is being *  	 * terminated asynchronously from the iSCSI layer's perspective.  */  	if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, -				      ISER_CONN_TERMINATING)) -		iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn, -				   ISCSI_ERR_CONN_FAILED); +					ISER_CONN_TERMINATING)){ +		if (ib_conn->iscsi_conn) +			iscsi_conn_failure(ib_conn->iscsi_conn, ISCSI_ERR_CONN_FAILED); +		else +			iser_err("iscsi_iser connection isn't bound\n"); +	}  	/* Complete the termination process if no posts are pending */  	if (ib_conn->post_recv_buf_count == 0 && @@ -662,24 +758,19 @@ static int iser_disconnected_handler(struct rdma_cm_id *cma_id)  		ib_conn->state = ISER_CONN_DOWN;  		wake_up_interruptible(&ib_conn->wait);  	} - -	ret = iser_conn_put(ib_conn, 0); /* deref ib conn's cma id */ -	return ret;  }  static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)  { -	int ret = 0; -  	iser_info("event %d status %d conn %p id %p\n",  		  event->event, event->status, cma_id->context, cma_id);  	switch (event->event) {  	case RDMA_CM_EVENT_ADDR_RESOLVED: -		ret = iser_addr_handler(cma_id); +		iser_addr_handler(cma_id);  		break;  	case RDMA_CM_EVENT_ROUTE_RESOLVED: -		ret = iser_route_handler(cma_id); +		iser_route_handler(cma_id);  		break;  	case RDMA_CM_EVENT_ESTABLISHED:  		iser_connected_handler(cma_id); @@ -689,18 +780,18 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve  	case RDMA_CM_EVENT_CONNECT_ERROR:  	case RDMA_CM_EVENT_UNREACHABLE:  	case RDMA_CM_EVENT_REJECTED: -		ret = iser_connect_error(cma_id); +		iser_connect_error(cma_id);  		break;  	case RDMA_CM_EVENT_DISCONNECTED:  	case RDMA_CM_EVENT_DEVICE_REMOVAL:  	case RDMA_CM_EVENT_ADDR_CHANGE: -		ret = iser_disconnected_handler(cma_id); +		iser_disconnected_handler(cma_id);  		break;  	default:  		iser_err("Unexpected RDMA CM event (%d)\n", event->event);  		break;  	} -	return ret; +	return 0;  }  void iser_conn_init(struct iser_conn *ib_conn) @@ -709,7 +800,7 @@ void iser_conn_init(struct iser_conn *ib_conn)  	init_waitqueue_head(&ib_conn->wait);  	ib_conn->post_recv_buf_count = 0;  	atomic_set(&ib_conn->post_send_buf_count, 0); -	atomic_set(&ib_conn->refcount, 1); /* ref ib conn allocation */ +	init_completion(&ib_conn->stop_completion);  	INIT_LIST_HEAD(&ib_conn->conn_list);  	spin_lock_init(&ib_conn->lock);  } @@ -737,7 +828,6 @@ int iser_connect(struct iser_conn   *ib_conn,  	ib_conn->state = ISER_CONN_PENDING; -	iser_conn_get(ib_conn); /* ref ib conn's cma id */  	ib_conn->cma_id = rdma_create_id(iser_cma_handler,  					     (void *)ib_conn,  					     RDMA_PS_TCP, IB_QPT_RC); @@ -774,9 +864,8 @@ id_failure:  	ib_conn->cma_id = NULL;  addr_failure:  	ib_conn->state = ISER_CONN_DOWN; -	iser_conn_put(ib_conn, 1); /* deref ib conn's cma id */  connect_failure: -	iser_conn_put(ib_conn, 1); /* deref ib conn deallocate */ +	iser_conn_release(ib_conn);  	return err;  } @@ -797,7 +886,7 @@ int iser_reg_page_vec(struct iser_conn     *ib_conn,  	page_list = page_vec->pages;  	io_addr	  = page_list[0]; -	mem  = ib_fmr_pool_map_phys(ib_conn->fastreg.fmr.pool, +	mem  = ib_fmr_pool_map_phys(ib_conn->fmr.pool,  				    page_list,  				    page_vec->length,  				    io_addr); @@ -851,11 +940,11 @@ void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,  	reg->mem_h = NULL;  } -void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task, -			 enum iser_data_dir cmd_dir) +void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, +			    enum iser_data_dir cmd_dir)  {  	struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg; -	struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn; +	struct iser_conn *ib_conn = iser_task->ib_conn;  	struct fast_reg_descriptor *desc = reg->mem_h;  	if (!reg->is_mr) @@ -864,7 +953,7 @@ void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task,  	reg->mem_h = NULL;  	reg->is_mr = 0;  	spin_lock_bh(&ib_conn->lock); -	list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool); +	list_add_tail(&desc->list, &ib_conn->fastreg.pool);  	spin_unlock_bh(&ib_conn->lock);  } @@ -965,7 +1054,7 @@ static void iser_handle_comp_error(struct iser_tx_desc *desc,  		 * perspective.                                             */  		if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP,  		    ISER_CONN_TERMINATING)) -			iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn, +			iscsi_conn_failure(ib_conn->iscsi_conn,  					   ISCSI_ERR_CONN_FAILED);  		/* no more non completed posts to the QP, complete the @@ -989,18 +1078,16 @@ static int iser_drain_tx_cq(struct iser_device  *device, int cq_index)  		if (wc.status == IB_WC_SUCCESS) {  			if (wc.opcode == IB_WC_SEND)  				iser_snd_completion(tx_desc, ib_conn); -			else if (wc.opcode == IB_WC_LOCAL_INV || -				 wc.opcode == IB_WC_FAST_REG_MR) { -				atomic_dec(&ib_conn->post_send_buf_count); -				continue; -			} else +			else  				iser_err("expected opcode %d got %d\n",  					IB_WC_SEND, wc.opcode);  		} else {  			iser_err("tx id %llx status %d vend_err %x\n", -				wc.wr_id, wc.status, wc.vendor_err); -			atomic_dec(&ib_conn->post_send_buf_count); -			iser_handle_comp_error(tx_desc, ib_conn); +				 wc.wr_id, wc.status, wc.vendor_err); +			if (wc.wr_id != ISER_FASTREG_LI_WRID) { +				atomic_dec(&ib_conn->post_send_buf_count); +				iser_handle_comp_error(tx_desc, ib_conn); +			}  		}  		completed_tx++;  	} @@ -1018,8 +1105,12 @@ static void iser_cq_tasklet_fn(unsigned long data)  	 struct iser_rx_desc *desc;  	 unsigned long	     xfer_len;  	struct iser_conn *ib_conn; -	int completed_tx, completed_rx; -	completed_tx = completed_rx = 0; +	int completed_tx, completed_rx = 0; + +	/* First do tx drain, so in a case where we have rx flushes and a successful +	 * tx completion we will still go through completion error handling. +	 */ +	completed_tx = iser_drain_tx_cq(device, cq_index);  	while (ib_poll_cq(cq, 1, &wc) == 1) {  		desc	 = (struct iser_rx_desc *) (unsigned long) wc.wr_id; @@ -1047,7 +1138,6 @@ static void iser_cq_tasklet_fn(unsigned long data)  	 * " would not cause interrupts to be missed"                       */  	ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); -	completed_tx += iser_drain_tx_cq(device, cq_index);  	iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx);  } @@ -1059,3 +1149,51 @@ static void iser_cq_callback(struct ib_cq *cq, void *cq_context)  	tasklet_schedule(&device->cq_tasklet[cq_index]);  } + +u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, +			     enum iser_data_dir cmd_dir, sector_t *sector) +{ +	struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg; +	struct fast_reg_descriptor *desc = reg->mem_h; +	unsigned long sector_size = iser_task->sc->device->sector_size; +	struct ib_mr_status mr_status; +	int ret; + +	if (desc && desc->reg_indicators & ISER_FASTREG_PROTECTED) { +		desc->reg_indicators &= ~ISER_FASTREG_PROTECTED; +		ret = ib_check_mr_status(desc->pi_ctx->sig_mr, +					 IB_MR_CHECK_SIG_STATUS, &mr_status); +		if (ret) { +			pr_err("ib_check_mr_status failed, ret %d\n", ret); +			goto err; +		} + +		if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) { +			sector_t sector_off = mr_status.sig_err.sig_err_offset; + +			do_div(sector_off, sector_size + 8); +			*sector = scsi_get_lba(iser_task->sc) + sector_off; + +			pr_err("PI error found type %d at sector %llx " +			       "expected %x vs actual %x\n", +			       mr_status.sig_err.err_type, +			       (unsigned long long)*sector, +			       mr_status.sig_err.expected, +			       mr_status.sig_err.actual); + +			switch (mr_status.sig_err.err_type) { +			case IB_SIG_BAD_GUARD: +				return 0x1; +			case IB_SIG_BAD_REFTAG: +				return 0x3; +			case IB_SIG_BAD_APPTAG: +				return 0x2; +			} +		} +	} + +	return 0; +err: +	/* Not alot we can do here, return ambiguous guard error */ +	return 0x1; +} diff --git a/drivers/infiniband/ulp/isert/Kconfig b/drivers/infiniband/ulp/isert/Kconfig index ce3fd32167d..02f9759ebb1 100644 --- a/drivers/infiniband/ulp/isert/Kconfig +++ b/drivers/infiniband/ulp/isert/Kconfig @@ -1,5 +1,5 @@  config INFINIBAND_ISERT -	tristate "iSCSI Extentions for RDMA (iSER) target support" +	tristate "iSCSI Extensions for RDMA (iSER) target support"  	depends on INET && INFINIBAND_ADDR_TRANS && TARGET_CORE && ISCSI_TARGET  	---help--- -	Support for iSCSI Extentions for RDMA (iSER) Target on Infiniband fabrics. +	Support for iSCSI Extensions for RDMA (iSER) Target on Infiniband fabrics. diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 3591855cc5b..d4c7928a0f3 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -22,11 +22,13 @@  #include <linux/socket.h>  #include <linux/in.h>  #include <linux/in6.h> +#include <linux/llist.h>  #include <rdma/ib_verbs.h>  #include <rdma/rdma_cm.h>  #include <target/target_core_base.h>  #include <target/target_core_fabric.h>  #include <target/iscsi/iscsi_transport.h> +#include <linux/semaphore.h>  #include "isert_proto.h"  #include "ib_isert.h" @@ -46,10 +48,12 @@ static int  isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,  	       struct isert_rdma_wr *wr);  static void -isert_unreg_rdma_frwr(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn); +isert_unreg_rdma(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn);  static int -isert_reg_rdma_frwr(struct iscsi_conn *conn, struct iscsi_cmd *cmd, -		    struct isert_rdma_wr *wr); +isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, +	       struct isert_rdma_wr *wr); +static int +isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd);  static void  isert_qp_event_callback(struct ib_event *e, void *context) @@ -86,7 +90,8 @@ isert_query_device(struct ib_device *ib_dev, struct ib_device_attr *devattr)  }  static int -isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id) +isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id, +		    u8 protection)  {  	struct isert_device *device = isert_conn->conn_device;  	struct ib_qp_init_attr attr; @@ -118,6 +123,8 @@ isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id)  	attr.cap.max_recv_sge = 1;  	attr.sq_sig_type = IB_SIGNAL_REQ_WR;  	attr.qp_type = IB_QPT_RC; +	if (protection) +		attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN;  	pr_debug("isert_conn_setup_qp cma_id->device: %p\n",  		 cma_id->device); @@ -206,7 +213,9 @@ isert_free_rx_descriptors(struct isert_conn *isert_conn)  	isert_conn->conn_rx_descs = NULL;  } +static void isert_cq_tx_work(struct work_struct *);  static void isert_cq_tx_callback(struct ib_cq *, void *); +static void isert_cq_rx_work(struct work_struct *);  static void isert_cq_rx_callback(struct ib_cq *, void *);  static int @@ -223,22 +232,29 @@ isert_create_device_ib_res(struct isert_device *device)  		return ret;  	/* asign function handlers */ -	if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { -		device->use_frwr = 1; -		device->reg_rdma_mem = isert_reg_rdma_frwr; -		device->unreg_rdma_mem = isert_unreg_rdma_frwr; +	if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS && +	    dev_attr->device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) { +		device->use_fastreg = 1; +		device->reg_rdma_mem = isert_reg_rdma; +		device->unreg_rdma_mem = isert_unreg_rdma;  	} else { -		device->use_frwr = 0; +		device->use_fastreg = 0;  		device->reg_rdma_mem = isert_map_rdma;  		device->unreg_rdma_mem = isert_unmap_cmd;  	} +	/* Check signature cap */ +	device->pi_capable = dev_attr->device_cap_flags & +			     IB_DEVICE_SIGNATURE_HANDOVER ? true : false; +  	device->cqs_used = min_t(int, num_online_cpus(),  				 device->ib_device->num_comp_vectors);  	device->cqs_used = min(ISERT_MAX_CQ, device->cqs_used); -	pr_debug("Using %d CQs, device %s supports %d vectors support FRWR %d\n", +	pr_debug("Using %d CQs, device %s supports %d vectors support " +		 "Fast registration %d pi_capable %d\n",  		 device->cqs_used, device->ib_device->name, -		 device->ib_device->num_comp_vectors, device->use_frwr); +		 device->ib_device->num_comp_vectors, device->use_fastreg, +		 device->pi_capable);  	device->cq_desc = kzalloc(sizeof(struct isert_cq_desc) *  				device->cqs_used, GFP_KERNEL);  	if (!device->cq_desc) { @@ -247,47 +263,43 @@ isert_create_device_ib_res(struct isert_device *device)  	}  	cq_desc = device->cq_desc; -	device->dev_pd = ib_alloc_pd(ib_dev); -	if (IS_ERR(device->dev_pd)) { -		ret = PTR_ERR(device->dev_pd); -		pr_err("ib_alloc_pd failed for dev_pd: %d\n", ret); -		goto out_cq_desc; -	} -  	for (i = 0; i < device->cqs_used; i++) {  		cq_desc[i].device = device;  		cq_desc[i].cq_index = i; +		INIT_WORK(&cq_desc[i].cq_rx_work, isert_cq_rx_work);  		device->dev_rx_cq[i] = ib_create_cq(device->ib_device,  						isert_cq_rx_callback,  						isert_cq_event_callback,  						(void *)&cq_desc[i],  						ISER_MAX_RX_CQ_LEN, i); -		if (IS_ERR(device->dev_rx_cq[i])) +		if (IS_ERR(device->dev_rx_cq[i])) { +			ret = PTR_ERR(device->dev_rx_cq[i]); +			device->dev_rx_cq[i] = NULL;  			goto out_cq; +		} +		INIT_WORK(&cq_desc[i].cq_tx_work, isert_cq_tx_work);  		device->dev_tx_cq[i] = ib_create_cq(device->ib_device,  						isert_cq_tx_callback,  						isert_cq_event_callback,  						(void *)&cq_desc[i],  						ISER_MAX_TX_CQ_LEN, i); -		if (IS_ERR(device->dev_tx_cq[i])) +		if (IS_ERR(device->dev_tx_cq[i])) { +			ret = PTR_ERR(device->dev_tx_cq[i]); +			device->dev_tx_cq[i] = NULL;  			goto out_cq; +		} -		if (ib_req_notify_cq(device->dev_rx_cq[i], IB_CQ_NEXT_COMP)) +		ret = ib_req_notify_cq(device->dev_rx_cq[i], IB_CQ_NEXT_COMP); +		if (ret)  			goto out_cq; -		if (ib_req_notify_cq(device->dev_tx_cq[i], IB_CQ_NEXT_COMP)) +		ret = ib_req_notify_cq(device->dev_tx_cq[i], IB_CQ_NEXT_COMP); +		if (ret)  			goto out_cq;  	} -	device->dev_mr = ib_get_dma_mr(device->dev_pd, IB_ACCESS_LOCAL_WRITE); -	if (IS_ERR(device->dev_mr)) { -		ret = PTR_ERR(device->dev_mr); -		pr_err("ib_get_dma_mr failed for dev_mr: %d\n", ret); -		goto out_cq; -	} -  	return 0;  out_cq: @@ -303,9 +315,6 @@ out_cq:  			ib_destroy_cq(device->dev_tx_cq[j]);  		}  	} -	ib_dealloc_pd(device->dev_pd); - -out_cq_desc:  	kfree(device->cq_desc);  	return ret; @@ -328,8 +337,6 @@ isert_free_device_ib_res(struct isert_device *device)  		device->dev_tx_cq[i] = NULL;  	} -	ib_dereg_mr(device->dev_mr); -	ib_dealloc_pd(device->dev_pd);  	kfree(device->cq_desc);  } @@ -385,40 +392,136 @@ isert_device_find_by_ib_dev(struct rdma_cm_id *cma_id)  }  static void -isert_conn_free_frwr_pool(struct isert_conn *isert_conn) +isert_conn_free_fastreg_pool(struct isert_conn *isert_conn)  {  	struct fast_reg_descriptor *fr_desc, *tmp;  	int i = 0; -	if (list_empty(&isert_conn->conn_frwr_pool)) +	if (list_empty(&isert_conn->conn_fr_pool))  		return; -	pr_debug("Freeing conn %p frwr pool", isert_conn); +	pr_debug("Freeing conn %p fastreg pool", isert_conn);  	list_for_each_entry_safe(fr_desc, tmp, -				 &isert_conn->conn_frwr_pool, list) { +				 &isert_conn->conn_fr_pool, list) {  		list_del(&fr_desc->list);  		ib_free_fast_reg_page_list(fr_desc->data_frpl);  		ib_dereg_mr(fr_desc->data_mr); +		if (fr_desc->pi_ctx) { +			ib_free_fast_reg_page_list(fr_desc->pi_ctx->prot_frpl); +			ib_dereg_mr(fr_desc->pi_ctx->prot_mr); +			ib_destroy_mr(fr_desc->pi_ctx->sig_mr); +			kfree(fr_desc->pi_ctx); +		}  		kfree(fr_desc);  		++i;  	} -	if (i < isert_conn->conn_frwr_pool_size) +	if (i < isert_conn->conn_fr_pool_size)  		pr_warn("Pool still has %d regions registered\n", -			isert_conn->conn_frwr_pool_size - i); +			isert_conn->conn_fr_pool_size - i); +} + +static int +isert_create_fr_desc(struct ib_device *ib_device, struct ib_pd *pd, +		     struct fast_reg_descriptor *fr_desc, u8 protection) +{ +	int ret; + +	fr_desc->data_frpl = ib_alloc_fast_reg_page_list(ib_device, +							 ISCSI_ISER_SG_TABLESIZE); +	if (IS_ERR(fr_desc->data_frpl)) { +		pr_err("Failed to allocate data frpl err=%ld\n", +		       PTR_ERR(fr_desc->data_frpl)); +		return PTR_ERR(fr_desc->data_frpl); +	} + +	fr_desc->data_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE); +	if (IS_ERR(fr_desc->data_mr)) { +		pr_err("Failed to allocate data frmr err=%ld\n", +		       PTR_ERR(fr_desc->data_mr)); +		ret = PTR_ERR(fr_desc->data_mr); +		goto err_data_frpl; +	} +	pr_debug("Create fr_desc %p page_list %p\n", +		 fr_desc, fr_desc->data_frpl->page_list); +	fr_desc->ind |= ISERT_DATA_KEY_VALID; + +	if (protection) { +		struct ib_mr_init_attr mr_init_attr = {0}; +		struct pi_context *pi_ctx; + +		fr_desc->pi_ctx = kzalloc(sizeof(*fr_desc->pi_ctx), GFP_KERNEL); +		if (!fr_desc->pi_ctx) { +			pr_err("Failed to allocate pi context\n"); +			ret = -ENOMEM; +			goto err_data_mr; +		} +		pi_ctx = fr_desc->pi_ctx; + +		pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(ib_device, +						    ISCSI_ISER_SG_TABLESIZE); +		if (IS_ERR(pi_ctx->prot_frpl)) { +			pr_err("Failed to allocate prot frpl err=%ld\n", +			       PTR_ERR(pi_ctx->prot_frpl)); +			ret = PTR_ERR(pi_ctx->prot_frpl); +			goto err_pi_ctx; +		} + +		pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE); +		if (IS_ERR(pi_ctx->prot_mr)) { +			pr_err("Failed to allocate prot frmr err=%ld\n", +			       PTR_ERR(pi_ctx->prot_mr)); +			ret = PTR_ERR(pi_ctx->prot_mr); +			goto err_prot_frpl; +		} +		fr_desc->ind |= ISERT_PROT_KEY_VALID; + +		mr_init_attr.max_reg_descriptors = 2; +		mr_init_attr.flags |= IB_MR_SIGNATURE_EN; +		pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr); +		if (IS_ERR(pi_ctx->sig_mr)) { +			pr_err("Failed to allocate signature enabled mr err=%ld\n", +			       PTR_ERR(pi_ctx->sig_mr)); +			ret = PTR_ERR(pi_ctx->sig_mr); +			goto err_prot_mr; +		} +		fr_desc->ind |= ISERT_SIG_KEY_VALID; +	} +	fr_desc->ind &= ~ISERT_PROTECTED; + +	return 0; +err_prot_mr: +	ib_dereg_mr(fr_desc->pi_ctx->prot_mr); +err_prot_frpl: +	ib_free_fast_reg_page_list(fr_desc->pi_ctx->prot_frpl); +err_pi_ctx: +	kfree(fr_desc->pi_ctx); +err_data_mr: +	ib_dereg_mr(fr_desc->data_mr); +err_data_frpl: +	ib_free_fast_reg_page_list(fr_desc->data_frpl); + +	return ret;  }  static int -isert_conn_create_frwr_pool(struct isert_conn *isert_conn) +isert_conn_create_fastreg_pool(struct isert_conn *isert_conn, u8 pi_support)  {  	struct fast_reg_descriptor *fr_desc;  	struct isert_device *device = isert_conn->conn_device; -	int i, ret; +	struct se_session *se_sess = isert_conn->conn->sess->se_sess; +	struct se_node_acl *se_nacl = se_sess->se_node_acl; +	int i, ret, tag_num; +	/* +	 * Setup the number of FRMRs based upon the number of tags +	 * available to session in iscsi_target_locate_portal(). +	 */ +	tag_num = max_t(u32, ISCSIT_MIN_TAGS, se_nacl->queue_depth); +	tag_num = (tag_num * 2) + ISCSIT_EXTRA_TAGS; -	INIT_LIST_HEAD(&isert_conn->conn_frwr_pool); -	isert_conn->conn_frwr_pool_size = 0; -	for (i = 0; i < ISCSI_DEF_XMIT_CMDS_MAX; i++) { +	isert_conn->conn_fr_pool_size = 0; +	for (i = 0; i < tag_num; i++) {  		fr_desc = kzalloc(sizeof(*fr_desc), GFP_KERNEL);  		if (!fr_desc) {  			pr_err("Failed to allocate fast_reg descriptor\n"); @@ -426,40 +529,27 @@ isert_conn_create_frwr_pool(struct isert_conn *isert_conn)  			goto err;  		} -		fr_desc->data_frpl = -			ib_alloc_fast_reg_page_list(device->ib_device, -						    ISCSI_ISER_SG_TABLESIZE); -		if (IS_ERR(fr_desc->data_frpl)) { -			pr_err("Failed to allocate fr_pg_list err=%ld\n", -			       PTR_ERR(fr_desc->data_frpl)); -			ret = PTR_ERR(fr_desc->data_frpl); -			goto err; -		} - -		fr_desc->data_mr = ib_alloc_fast_reg_mr(device->dev_pd, -					ISCSI_ISER_SG_TABLESIZE); -		if (IS_ERR(fr_desc->data_mr)) { -			pr_err("Failed to allocate frmr err=%ld\n", -			       PTR_ERR(fr_desc->data_mr)); -			ret = PTR_ERR(fr_desc->data_mr); -			ib_free_fast_reg_page_list(fr_desc->data_frpl); +		ret = isert_create_fr_desc(device->ib_device, +					   isert_conn->conn_pd, fr_desc, +					   pi_support); +		if (ret) { +			pr_err("Failed to create fastreg descriptor err=%d\n", +			       ret); +			kfree(fr_desc);  			goto err;  		} -		pr_debug("Create fr_desc %p page_list %p\n", -			 fr_desc, fr_desc->data_frpl->page_list); -		fr_desc->valid = true; -		list_add_tail(&fr_desc->list, &isert_conn->conn_frwr_pool); -		isert_conn->conn_frwr_pool_size++; +		list_add_tail(&fr_desc->list, &isert_conn->conn_fr_pool); +		isert_conn->conn_fr_pool_size++;  	} -	pr_debug("Creating conn %p frwr pool size=%d", -		 isert_conn, isert_conn->conn_frwr_pool_size); +	pr_debug("Creating conn %p fastreg pool size=%d", +		 isert_conn, isert_conn->conn_fr_pool_size);  	return 0;  err: -	isert_conn_free_frwr_pool(isert_conn); +	isert_conn_free_fastreg_pool(isert_conn);  	return ret;  } @@ -472,6 +562,15 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)  	struct isert_device *device;  	struct ib_device *ib_dev = cma_id->device;  	int ret = 0; +	u8 pi_support; + +	spin_lock_bh(&np->np_thread_lock); +	if (!np->enabled) { +		spin_unlock_bh(&np->np_thread_lock); +		pr_debug("iscsi_np is not enabled, reject connect request\n"); +		return rdma_reject(cma_id, NULL, 0); +	} +	spin_unlock_bh(&np->np_thread_lock);  	pr_debug("Entering isert_connect_request cma_id: %p, context: %p\n",  		 cma_id, cma_id->context); @@ -484,12 +583,13 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)  	isert_conn->state = ISER_CONN_INIT;  	INIT_LIST_HEAD(&isert_conn->conn_accept_node);  	init_completion(&isert_conn->conn_login_comp); -	init_waitqueue_head(&isert_conn->conn_wait); -	init_waitqueue_head(&isert_conn->conn_wait_comp_err); +	init_completion(&isert_conn->conn_wait); +	init_completion(&isert_conn->conn_wait_comp_err);  	kref_init(&isert_conn->conn_kref);  	kref_get(&isert_conn->conn_kref);  	mutex_init(&isert_conn->conn_mutex);  	spin_lock_init(&isert_conn->conn_lock); +	INIT_LIST_HEAD(&isert_conn->conn_fr_pool);  	cma_id->context = isert_conn;  	isert_conn->conn_cm_id = cma_id; @@ -544,33 +644,48 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)  	}  	isert_conn->conn_device = device; -	isert_conn->conn_pd = device->dev_pd; -	isert_conn->conn_mr = device->dev_mr; +	isert_conn->conn_pd = ib_alloc_pd(isert_conn->conn_device->ib_device); +	if (IS_ERR(isert_conn->conn_pd)) { +		ret = PTR_ERR(isert_conn->conn_pd); +		pr_err("ib_alloc_pd failed for conn %p: ret=%d\n", +		       isert_conn, ret); +		goto out_pd; +	} -	if (device->use_frwr) { -		ret = isert_conn_create_frwr_pool(isert_conn); -		if (ret) { -			pr_err("Conn: %p failed to create frwr_pool\n", isert_conn); -			goto out_frwr; -		} +	isert_conn->conn_mr = ib_get_dma_mr(isert_conn->conn_pd, +					   IB_ACCESS_LOCAL_WRITE); +	if (IS_ERR(isert_conn->conn_mr)) { +		ret = PTR_ERR(isert_conn->conn_mr); +		pr_err("ib_get_dma_mr failed for conn %p: ret=%d\n", +		       isert_conn, ret); +		goto out_mr; +	} + +	pi_support = np->tpg_np->tpg->tpg_attrib.t10_pi; +	if (pi_support && !device->pi_capable) { +		pr_err("Protection information requested but not supported, " +		       "rejecting connect request\n"); +		ret = rdma_reject(cma_id, NULL, 0); +		goto out_mr;  	} -	ret = isert_conn_setup_qp(isert_conn, cma_id); +	ret = isert_conn_setup_qp(isert_conn, cma_id, pi_support);  	if (ret)  		goto out_conn_dev;  	mutex_lock(&isert_np->np_accept_mutex); -	list_add_tail(&isert_np->np_accept_list, &isert_conn->conn_accept_node); +	list_add_tail(&isert_conn->conn_accept_node, &isert_np->np_accept_list);  	mutex_unlock(&isert_np->np_accept_mutex); -	pr_debug("isert_connect_request() waking up np_accept_wq: %p\n", np); -	wake_up(&isert_np->np_accept_wq); +	pr_debug("isert_connect_request() up np_sem np: %p\n", np); +	up(&isert_np->np_sem);  	return 0;  out_conn_dev: -	if (device->use_frwr) -		isert_conn_free_frwr_pool(isert_conn); -out_frwr: +	ib_dereg_mr(isert_conn->conn_mr); +out_mr: +	ib_dealloc_pd(isert_conn->conn_pd); +out_pd:  	isert_device_try_release(device);  out_rsp_dma_map:  	ib_dma_unmap_single(ib_dev, isert_conn->login_rsp_dma, @@ -594,8 +709,8 @@ isert_connect_release(struct isert_conn *isert_conn)  	pr_debug("Entering isert_connect_release(): >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"); -	if (device->use_frwr) -		isert_conn_free_frwr_pool(isert_conn); +	if (device && device->use_fastreg) +		isert_conn_free_fastreg_pool(isert_conn);  	if (isert_conn->conn_qp) {  		cq_index = ((struct isert_cq_desc *) @@ -609,6 +724,9 @@ isert_connect_release(struct isert_conn *isert_conn)  	isert_free_rx_descriptors(isert_conn);  	rdma_destroy_id(isert_conn->conn_cm_id); +	ib_dereg_mr(isert_conn->conn_mr); +	ib_dealloc_pd(isert_conn->conn_pd); +  	if (isert_conn->login_buf) {  		ib_dma_unmap_single(ib_dev, isert_conn->login_rsp_dma,  				    ISER_RX_LOGIN_SIZE, DMA_TO_DEVICE); @@ -657,11 +775,11 @@ isert_disconnect_work(struct work_struct *work)  	pr_debug("isert_disconnect_work(): >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n");  	mutex_lock(&isert_conn->conn_mutex); -	isert_conn->state = ISER_CONN_DOWN; +	if (isert_conn->state == ISER_CONN_UP) +		isert_conn->state = ISER_CONN_TERMINATING;  	if (isert_conn->post_recv_buf_count == 0 &&  	    atomic_read(&isert_conn->post_send_buf_count) == 0) { -		pr_debug("Calling wake_up(&isert_conn->conn_wait);\n");  		mutex_unlock(&isert_conn->conn_mutex);  		goto wake_up;  	} @@ -670,26 +788,25 @@ isert_disconnect_work(struct work_struct *work)  		isert_put_conn(isert_conn);  		return;  	} -	if (!isert_conn->logout_posted) { -		pr_debug("Calling rdma_disconnect for !logout_posted from" -			 " isert_disconnect_work\n"); + +	if (isert_conn->disconnect) { +		/* Send DREQ/DREP towards our initiator */  		rdma_disconnect(isert_conn->conn_cm_id); -		mutex_unlock(&isert_conn->conn_mutex); -		iscsit_cause_connection_reinstatement(isert_conn->conn, 0); -		goto wake_up;  	} +  	mutex_unlock(&isert_conn->conn_mutex);  wake_up: -	wake_up(&isert_conn->conn_wait); +	complete(&isert_conn->conn_wait);  	isert_put_conn(isert_conn);  }  static void -isert_disconnected_handler(struct rdma_cm_id *cma_id) +isert_disconnected_handler(struct rdma_cm_id *cma_id, bool disconnect)  {  	struct isert_conn *isert_conn = (struct isert_conn *)cma_id->context; +	isert_conn->disconnect = disconnect;  	INIT_WORK(&isert_conn->conn_logout_work, isert_disconnect_work);  	schedule_work(&isert_conn->conn_logout_work);  } @@ -698,29 +815,28 @@ static int  isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)  {  	int ret = 0; +	bool disconnect = false;  	pr_debug("isert_cma_handler: event %d status %d conn %p id %p\n",  		 event->event, event->status, cma_id->context, cma_id);  	switch (event->event) {  	case RDMA_CM_EVENT_CONNECT_REQUEST: -		pr_debug("RDMA_CM_EVENT_CONNECT_REQUEST: >>>>>>>>>>>>>>>\n");  		ret = isert_connect_request(cma_id, event);  		break;  	case RDMA_CM_EVENT_ESTABLISHED: -		pr_debug("RDMA_CM_EVENT_ESTABLISHED >>>>>>>>>>>>>>\n");  		isert_connected_handler(cma_id);  		break; -	case RDMA_CM_EVENT_DISCONNECTED: -		pr_debug("RDMA_CM_EVENT_DISCONNECTED: >>>>>>>>>>>>>>\n"); -		isert_disconnected_handler(cma_id); -		break; -	case RDMA_CM_EVENT_DEVICE_REMOVAL: -	case RDMA_CM_EVENT_ADDR_CHANGE: +	case RDMA_CM_EVENT_ADDR_CHANGE:    /* FALLTHRU */ +	case RDMA_CM_EVENT_DISCONNECTED:   /* FALLTHRU */ +	case RDMA_CM_EVENT_DEVICE_REMOVAL: /* FALLTHRU */ +		disconnect = true; +	case RDMA_CM_EVENT_TIMEWAIT_EXIT:  /* FALLTHRU */ +		isert_disconnected_handler(cma_id, disconnect);  		break;  	case RDMA_CM_EVENT_CONNECT_ERROR:  	default: -		pr_err("Unknown RDMA CMA event: %d\n", event->event); +		pr_err("Unhandled RDMA CMA event: %d\n", event->event);  		break;  	} @@ -843,14 +959,33 @@ isert_init_tx_hdrs(struct isert_conn *isert_conn,  }  static void -isert_init_send_wr(struct isert_cmd *isert_cmd, struct ib_send_wr *send_wr) +isert_init_send_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, +		   struct ib_send_wr *send_wr, bool coalesce)  { +	struct iser_tx_desc *tx_desc = &isert_cmd->tx_desc; +  	isert_cmd->rdma_wr.iser_ib_op = ISER_IB_SEND;  	send_wr->wr_id = (unsigned long)&isert_cmd->tx_desc;  	send_wr->opcode = IB_WR_SEND; -	send_wr->send_flags = IB_SEND_SIGNALED; -	send_wr->sg_list = &isert_cmd->tx_desc.tx_sg[0]; +	send_wr->sg_list = &tx_desc->tx_sg[0];  	send_wr->num_sge = isert_cmd->tx_desc.num_sge; +	/* +	 * Coalesce send completion interrupts by only setting IB_SEND_SIGNALED +	 * bit for every ISERT_COMP_BATCH_COUNT number of ib_post_send() calls. +	 */ +	mutex_lock(&isert_conn->conn_mutex); +	if (coalesce && isert_conn->state == ISER_CONN_UP && +	    ++isert_conn->conn_comp_batch < ISERT_COMP_BATCH_COUNT) { +		tx_desc->llnode_active = true; +		llist_add(&tx_desc->comp_llnode, &isert_conn->conn_comp_llist); +		mutex_unlock(&isert_conn->conn_mutex); +		return; +	} +	isert_conn->conn_comp_batch = 0; +	tx_desc->comp_llnode_batch = llist_del_all(&isert_conn->conn_comp_llist); +	mutex_unlock(&isert_conn->conn_mutex); + +	send_wr->send_flags = IB_SEND_SIGNALED;  }  static int @@ -918,6 +1053,20 @@ isert_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login,  	}  	if (!login->login_failed) {  		if (login->login_complete) { +			if (!conn->sess->sess_ops->SessionType && +			    isert_conn->conn_device->use_fastreg) { +				/* Normal Session and fastreg is used */ +				u8 pi_support = login->np->tpg_np->tpg->tpg_attrib.t10_pi; + +				ret = isert_conn_create_fastreg_pool(isert_conn, +								     pi_support); +				if (ret) { +					pr_err("Conn: %p failed to create" +					       " fastreg pool\n", isert_conn); +					return ret; +				} +			} +  			ret = isert_alloc_rx_descriptors(isert_conn);  			if (ret)  				return ret; @@ -992,13 +1141,13 @@ isert_rx_login_req(struct iser_rx_desc *rx_desc, int rx_buflen,  }  static struct iscsi_cmd -*isert_allocate_cmd(struct iscsi_conn *conn, gfp_t gfp) +*isert_allocate_cmd(struct iscsi_conn *conn)  {  	struct isert_conn *isert_conn = (struct isert_conn *)conn->context;  	struct isert_cmd *isert_cmd;  	struct iscsi_cmd *cmd; -	cmd = iscsit_allocate_cmd(conn, gfp); +	cmd = iscsit_allocate_cmd(conn, TASK_INTERRUPTIBLE);  	if (!cmd) {  		pr_err("Unable to allocate iscsi_cmd + isert_cmd\n");  		return NULL; @@ -1062,6 +1211,8 @@ sequence_cmd:  	if (!rc && dump_payload == false && unsol_data)  		iscsit_set_unsoliticed_dataout(cmd); +	else if (dump_payload && imm_data) +		target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd);  	return 0;  } @@ -1187,7 +1338,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,  	switch (opcode) {  	case ISCSI_OP_SCSI_CMD: -		cmd = isert_allocate_cmd(conn, GFP_KERNEL); +		cmd = isert_allocate_cmd(conn);  		if (!cmd)  			break; @@ -1201,7 +1352,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,  					rx_desc, (unsigned char *)hdr);  		break;  	case ISCSI_OP_NOOP_OUT: -		cmd = isert_allocate_cmd(conn, GFP_KERNEL); +		cmd = isert_allocate_cmd(conn);  		if (!cmd)  			break; @@ -1214,7 +1365,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,  						(unsigned char *)hdr);  		break;  	case ISCSI_OP_SCSI_TMFUNC: -		cmd = isert_allocate_cmd(conn, GFP_KERNEL); +		cmd = isert_allocate_cmd(conn);  		if (!cmd)  			break; @@ -1222,7 +1373,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,  						(unsigned char *)hdr);  		break;  	case ISCSI_OP_LOGOUT: -		cmd = isert_allocate_cmd(conn, GFP_KERNEL); +		cmd = isert_allocate_cmd(conn);  		if (!cmd)  			break; @@ -1233,7 +1384,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,  						    HZ);  		break;  	case ISCSI_OP_TEXT: -		cmd = isert_allocate_cmd(conn, GFP_KERNEL); +		cmd = isert_allocate_cmd(conn);  		if (!cmd)  			break; @@ -1343,19 +1494,60 @@ isert_rx_completion(struct iser_rx_desc *desc, struct isert_conn *isert_conn,  	}  } +static int +isert_map_data_buf(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, +		   struct scatterlist *sg, u32 nents, u32 length, u32 offset, +		   enum iser_ib_op_code op, struct isert_data_buf *data) +{ +	struct ib_device *ib_dev = isert_conn->conn_cm_id->device; + +	data->dma_dir = op == ISER_IB_RDMA_WRITE ? +			      DMA_TO_DEVICE : DMA_FROM_DEVICE; + +	data->len = length - offset; +	data->offset = offset; +	data->sg_off = data->offset / PAGE_SIZE; + +	data->sg = &sg[data->sg_off]; +	data->nents = min_t(unsigned int, nents - data->sg_off, +					  ISCSI_ISER_SG_TABLESIZE); +	data->len = min_t(unsigned int, data->len, ISCSI_ISER_SG_TABLESIZE * +					PAGE_SIZE); + +	data->dma_nents = ib_dma_map_sg(ib_dev, data->sg, data->nents, +					data->dma_dir); +	if (unlikely(!data->dma_nents)) { +		pr_err("Cmd: unable to dma map SGs %p\n", sg); +		return -EINVAL; +	} + +	pr_debug("Mapped cmd: %p count: %u sg: %p sg_nents: %u rdma_len %d\n", +		 isert_cmd, data->dma_nents, data->sg, data->nents, data->len); + +	return 0; +} + +static void +isert_unmap_data_buf(struct isert_conn *isert_conn, struct isert_data_buf *data) +{ +	struct ib_device *ib_dev = isert_conn->conn_cm_id->device; + +	ib_dma_unmap_sg(ib_dev, data->sg, data->nents, data->dma_dir); +	memset(data, 0, sizeof(*data)); +} + + +  static void  isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)  {  	struct isert_rdma_wr *wr = &isert_cmd->rdma_wr; -	struct ib_device *ib_dev = isert_conn->conn_cm_id->device;  	pr_debug("isert_unmap_cmd: %p\n", isert_cmd); -	if (wr->sge) { + +	if (wr->data.sg) {  		pr_debug("isert_unmap_cmd: %p unmap_sg op\n", isert_cmd); -		ib_dma_unmap_sg(ib_dev, wr->sge, wr->num_sge, -				(wr->iser_ib_op == ISER_IB_RDMA_WRITE) ? -				DMA_TO_DEVICE : DMA_FROM_DEVICE); -		wr->sge = NULL; +		isert_unmap_data_buf(isert_conn, &wr->data);  	}  	if (wr->send_wr) { @@ -1372,29 +1564,29 @@ isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)  }  static void -isert_unreg_rdma_frwr(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn) +isert_unreg_rdma(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)  {  	struct isert_rdma_wr *wr = &isert_cmd->rdma_wr; -	struct ib_device *ib_dev = isert_conn->conn_cm_id->device;  	LIST_HEAD(unmap_list); -	pr_debug("unreg_frwr_cmd: %p\n", isert_cmd); +	pr_debug("unreg_fastreg_cmd: %p\n", isert_cmd);  	if (wr->fr_desc) { -		pr_debug("unreg_frwr_cmd: %p free fr_desc %p\n", +		pr_debug("unreg_fastreg_cmd: %p free fr_desc %p\n",  			 isert_cmd, wr->fr_desc); +		if (wr->fr_desc->ind & ISERT_PROTECTED) { +			isert_unmap_data_buf(isert_conn, &wr->prot); +			wr->fr_desc->ind &= ~ISERT_PROTECTED; +		}  		spin_lock_bh(&isert_conn->conn_lock); -		list_add_tail(&wr->fr_desc->list, &isert_conn->conn_frwr_pool); +		list_add_tail(&wr->fr_desc->list, &isert_conn->conn_fr_pool);  		spin_unlock_bh(&isert_conn->conn_lock);  		wr->fr_desc = NULL;  	} -	if (wr->sge) { -		pr_debug("unreg_frwr_cmd: %p unmap_sg op\n", isert_cmd); -		ib_dma_unmap_sg(ib_dev, wr->sge, wr->num_sge, -				(wr->iser_ib_op == ISER_IB_RDMA_WRITE) ? -				DMA_TO_DEVICE : DMA_FROM_DEVICE); -		wr->sge = NULL; +	if (wr->data.sg) { +		pr_debug("unreg_fastreg_cmd: %p unmap_sg op\n", isert_cmd); +		isert_unmap_data_buf(isert_conn, &wr->data);  	}  	wr->ib_sge = NULL; @@ -1402,7 +1594,7 @@ isert_unreg_rdma_frwr(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn  }  static void -isert_put_cmd(struct isert_cmd *isert_cmd) +isert_put_cmd(struct isert_cmd *isert_cmd, bool comp_err)  {  	struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;  	struct isert_conn *isert_conn = isert_cmd->conn; @@ -1415,11 +1607,24 @@ isert_put_cmd(struct isert_cmd *isert_cmd)  	case ISCSI_OP_SCSI_CMD:  		spin_lock_bh(&conn->cmd_lock);  		if (!list_empty(&cmd->i_conn_node)) -			list_del(&cmd->i_conn_node); +			list_del_init(&cmd->i_conn_node);  		spin_unlock_bh(&conn->cmd_lock); -		if (cmd->data_direction == DMA_TO_DEVICE) +		if (cmd->data_direction == DMA_TO_DEVICE) {  			iscsit_stop_dataout_timer(cmd); +			/* +			 * Check for special case during comp_err where +			 * WRITE_PENDING has been handed off from core, +			 * but requires an extra target_put_sess_cmd() +			 * before transport_generic_free_cmd() below. +			 */ +			if (comp_err && +			    cmd->se_cmd.t_state == TRANSPORT_WRITE_PENDING) { +				struct se_cmd *se_cmd = &cmd->se_cmd; + +				target_put_sess_cmd(se_cmd->se_sess, se_cmd); +			} +		}  		device->unreg_rdma_mem(isert_cmd, isert_conn);  		transport_generic_free_cmd(&cmd->se_cmd, 0); @@ -1427,7 +1632,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd)  	case ISCSI_OP_SCSI_TMFUNC:  		spin_lock_bh(&conn->cmd_lock);  		if (!list_empty(&cmd->i_conn_node)) -			list_del(&cmd->i_conn_node); +			list_del_init(&cmd->i_conn_node);  		spin_unlock_bh(&conn->cmd_lock);  		transport_generic_free_cmd(&cmd->se_cmd, 0); @@ -1437,7 +1642,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd)  	case ISCSI_OP_TEXT:  		spin_lock_bh(&conn->cmd_lock);  		if (!list_empty(&cmd->i_conn_node)) -			list_del(&cmd->i_conn_node); +			list_del_init(&cmd->i_conn_node);  		spin_unlock_bh(&conn->cmd_lock);  		/* @@ -1474,7 +1679,7 @@ isert_unmap_tx_desc(struct iser_tx_desc *tx_desc, struct ib_device *ib_dev)  static void  isert_completion_put(struct iser_tx_desc *tx_desc, struct isert_cmd *isert_cmd, -		     struct ib_device *ib_dev) +		     struct ib_device *ib_dev, bool comp_err)  {  	if (isert_cmd->pdu_buf_dma != 0) {  		pr_debug("Calling ib_dma_unmap_single for isert_cmd->pdu_buf_dma\n"); @@ -1484,7 +1689,77 @@ isert_completion_put(struct iser_tx_desc *tx_desc, struct isert_cmd *isert_cmd,  	}  	isert_unmap_tx_desc(tx_desc, ib_dev); -	isert_put_cmd(isert_cmd); +	isert_put_cmd(isert_cmd, comp_err); +} + +static int +isert_check_pi_status(struct se_cmd *se_cmd, struct ib_mr *sig_mr) +{ +	struct ib_mr_status mr_status; +	int ret; + +	ret = ib_check_mr_status(sig_mr, IB_MR_CHECK_SIG_STATUS, &mr_status); +	if (ret) { +		pr_err("ib_check_mr_status failed, ret %d\n", ret); +		goto fail_mr_status; +	} + +	if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) { +		u64 sec_offset_err; +		u32 block_size = se_cmd->se_dev->dev_attrib.block_size + 8; + +		switch (mr_status.sig_err.err_type) { +		case IB_SIG_BAD_GUARD: +			se_cmd->pi_err = TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED; +			break; +		case IB_SIG_BAD_REFTAG: +			se_cmd->pi_err = TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED; +			break; +		case IB_SIG_BAD_APPTAG: +			se_cmd->pi_err = TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED; +			break; +		} +		sec_offset_err = mr_status.sig_err.sig_err_offset; +		do_div(sec_offset_err, block_size); +		se_cmd->bad_sector = sec_offset_err + se_cmd->t_task_lba; + +		pr_err("isert: PI error found type %d at sector 0x%llx " +		       "expected 0x%x vs actual 0x%x\n", +		       mr_status.sig_err.err_type, +		       (unsigned long long)se_cmd->bad_sector, +		       mr_status.sig_err.expected, +		       mr_status.sig_err.actual); +		ret = 1; +	} + +fail_mr_status: +	return ret; +} + +static void +isert_completion_rdma_write(struct iser_tx_desc *tx_desc, +			    struct isert_cmd *isert_cmd) +{ +	struct isert_rdma_wr *wr = &isert_cmd->rdma_wr; +	struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; +	struct se_cmd *se_cmd = &cmd->se_cmd; +	struct isert_conn *isert_conn = isert_cmd->conn; +	struct isert_device *device = isert_conn->conn_device; +	int ret = 0; + +	if (wr->fr_desc && wr->fr_desc->ind & ISERT_PROTECTED) { +		ret = isert_check_pi_status(se_cmd, +					    wr->fr_desc->pi_ctx->sig_mr); +		wr->fr_desc->ind &= ~ISERT_PROTECTED; +	} + +	device->unreg_rdma_mem(isert_cmd, isert_conn); +	wr->send_wr_num = 0; +	if (ret) +		transport_send_check_condition_and_sense(se_cmd, +							 se_cmd->pi_err, 0); +	else +		isert_put_response(isert_conn->conn, cmd);  }  static void @@ -1496,10 +1771,18 @@ isert_completion_rdma_read(struct iser_tx_desc *tx_desc,  	struct se_cmd *se_cmd = &cmd->se_cmd;  	struct isert_conn *isert_conn = isert_cmd->conn;  	struct isert_device *device = isert_conn->conn_device; +	int ret = 0; + +	if (wr->fr_desc && wr->fr_desc->ind & ISERT_PROTECTED) { +		ret = isert_check_pi_status(se_cmd, +					    wr->fr_desc->pi_ctx->sig_mr); +		wr->fr_desc->ind &= ~ISERT_PROTECTED; +	}  	iscsit_stop_dataout_timer(cmd);  	device->unreg_rdma_mem(isert_cmd, isert_conn); -	cmd->write_data_done = wr->cur_rdma_length; +	cmd->write_data_done = wr->data.len; +	wr->send_wr_num = 0;  	pr_debug("Cmd: %p RDMA_READ comp calling execute_cmd\n", isert_cmd);  	spin_lock_bh(&cmd->istate_lock); @@ -1507,7 +1790,11 @@ isert_completion_rdma_read(struct iser_tx_desc *tx_desc,  	cmd->i_state = ISTATE_RECEIVED_LAST_DATAOUT;  	spin_unlock_bh(&cmd->istate_lock); -	target_execute_cmd(se_cmd); +	if (ret) +		transport_send_check_condition_and_sense(se_cmd, +							 se_cmd->pi_err, 0); +	else +		target_execute_cmd(se_cmd);  }  static void @@ -1527,28 +1814,25 @@ isert_do_control_comp(struct work_struct *work)  		iscsit_tmr_post_handler(cmd, cmd->conn);  		cmd->i_state = ISTATE_SENT_STATUS; -		isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev); +		isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev, false);  		break;  	case ISTATE_SEND_REJECT:  		pr_debug("Got isert_do_control_comp ISTATE_SEND_REJECT: >>>\n");  		atomic_dec(&isert_conn->post_send_buf_count);  		cmd->i_state = ISTATE_SENT_STATUS; -		isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev); +		isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev, false);  		break;  	case ISTATE_SEND_LOGOUTRSP:  		pr_debug("Calling iscsit_logout_post_handler >>>>>>>>>>>>>>\n"); -		/* -		 * Call atomic_dec(&isert_conn->post_send_buf_count) -		 * from isert_free_conn() -		 */ -		isert_conn->logout_posted = true; + +		atomic_dec(&isert_conn->post_send_buf_count);  		iscsit_logout_post_handler(cmd, cmd->conn);  		break;  	case ISTATE_SEND_TEXTRSP:  		atomic_dec(&isert_conn->post_send_buf_count);  		cmd->i_state = ISTATE_SENT_STATUS; -		isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev); +		isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev, false);  		break;  	default:  		pr_err("Unknown do_control_comp i_state %d\n", cmd->i_state); @@ -1564,6 +1848,7 @@ isert_response_completion(struct iser_tx_desc *tx_desc,  			  struct ib_device *ib_dev)  {  	struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; +	struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;  	if (cmd->i_state == ISTATE_SEND_TASKMGTRSP ||  	    cmd->i_state == ISTATE_SEND_LOGOUTRSP || @@ -1575,15 +1860,26 @@ isert_response_completion(struct iser_tx_desc *tx_desc,  		queue_work(isert_comp_wq, &isert_cmd->comp_work);  		return;  	} -	atomic_dec(&isert_conn->post_send_buf_count); + +	/** +	 * If send_wr_num is 0 this means that we got +	 * RDMA completion and we cleared it and we should +	 * simply decrement the response post. else the +	 * response is incorporated in send_wr_num, just +	 * sub it. +	 **/ +	if (wr->send_wr_num) +		atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count); +	else +		atomic_dec(&isert_conn->post_send_buf_count);  	cmd->i_state = ISTATE_SENT_STATUS; -	isert_completion_put(tx_desc, isert_cmd, ib_dev); +	isert_completion_put(tx_desc, isert_cmd, ib_dev, false);  }  static void -isert_send_completion(struct iser_tx_desc *tx_desc, -		      struct isert_conn *isert_conn) +__isert_send_completion(struct iser_tx_desc *tx_desc, +		        struct isert_conn *isert_conn)  {  	struct ib_device *ib_dev = isert_conn->conn_cm_id->device;  	struct isert_cmd *isert_cmd = tx_desc->isert_cmd; @@ -1607,13 +1903,14 @@ isert_send_completion(struct iser_tx_desc *tx_desc,  					  isert_conn, ib_dev);  		break;  	case ISER_IB_RDMA_WRITE: -		pr_err("isert_send_completion: Got ISER_IB_RDMA_WRITE\n"); -		dump_stack(); +		pr_debug("isert_send_completion: Got ISER_IB_RDMA_WRITE\n"); +		atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count); +		isert_completion_rdma_write(tx_desc, isert_cmd);  		break;  	case ISER_IB_RDMA_READ:  		pr_debug("isert_send_completion: Got ISER_IB_RDMA_READ:\n"); -		atomic_dec(&isert_conn->post_send_buf_count); +		atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count);  		isert_completion_rdma_read(tx_desc, isert_cmd);  		break;  	default: @@ -1624,31 +1921,120 @@ isert_send_completion(struct iser_tx_desc *tx_desc,  }  static void -isert_cq_comp_err(struct iser_tx_desc *tx_desc, struct isert_conn *isert_conn) +isert_send_completion(struct iser_tx_desc *tx_desc, +		      struct isert_conn *isert_conn) +{ +	struct llist_node *llnode = tx_desc->comp_llnode_batch; +	struct iser_tx_desc *t; +	/* +	 * Drain coalesced completion llist starting from comp_llnode_batch +	 * setup in isert_init_send_wr(), and then complete trailing tx_desc. +	 */ +	while (llnode) { +		t = llist_entry(llnode, struct iser_tx_desc, comp_llnode); +		llnode = llist_next(llnode); +		__isert_send_completion(t, isert_conn); +	} +	__isert_send_completion(tx_desc, isert_conn); +} + +static void +isert_cq_drain_comp_llist(struct isert_conn *isert_conn, struct ib_device *ib_dev)  { -	struct ib_device *ib_dev = isert_conn->conn_cm_id->device; +	struct llist_node *llnode; +	struct isert_rdma_wr *wr; +	struct iser_tx_desc *t; + +	mutex_lock(&isert_conn->conn_mutex); +	llnode = llist_del_all(&isert_conn->conn_comp_llist); +	isert_conn->conn_comp_batch = 0; +	mutex_unlock(&isert_conn->conn_mutex); + +	while (llnode) { +		t = llist_entry(llnode, struct iser_tx_desc, comp_llnode); +		llnode = llist_next(llnode); +		wr = &t->isert_cmd->rdma_wr; + +		/** +		 * If send_wr_num is 0 this means that we got +		 * RDMA completion and we cleared it and we should +		 * simply decrement the response post. else the +		 * response is incorporated in send_wr_num, just +		 * sub it. +		 **/ +		if (wr->send_wr_num) +			atomic_sub(wr->send_wr_num, +				   &isert_conn->post_send_buf_count); +		else +			atomic_dec(&isert_conn->post_send_buf_count); -	if (tx_desc) { -		struct isert_cmd *isert_cmd = tx_desc->isert_cmd; +		isert_completion_put(t, t->isert_cmd, ib_dev, true); +	} +} -		if (!isert_cmd) -			isert_unmap_tx_desc(tx_desc, ib_dev); +static void +isert_cq_tx_comp_err(struct iser_tx_desc *tx_desc, struct isert_conn *isert_conn) +{ +	struct ib_device *ib_dev = isert_conn->conn_cm_id->device; +	struct isert_cmd *isert_cmd = tx_desc->isert_cmd; +	struct llist_node *llnode = tx_desc->comp_llnode_batch; +	struct isert_rdma_wr *wr; +	struct iser_tx_desc *t; + +	while (llnode) { +		t = llist_entry(llnode, struct iser_tx_desc, comp_llnode); +		llnode = llist_next(llnode); +		wr = &t->isert_cmd->rdma_wr; + +		/** +		 * If send_wr_num is 0 this means that we got +		 * RDMA completion and we cleared it and we should +		 * simply decrement the response post. else the +		 * response is incorporated in send_wr_num, just +		 * sub it. +		 **/ +		if (wr->send_wr_num) +			atomic_sub(wr->send_wr_num, +				   &isert_conn->post_send_buf_count);  		else -			isert_completion_put(tx_desc, isert_cmd, ib_dev); +			atomic_dec(&isert_conn->post_send_buf_count); + +		isert_completion_put(t, t->isert_cmd, ib_dev, true);  	} +	tx_desc->comp_llnode_batch = NULL; -	if (isert_conn->post_recv_buf_count == 0 && -	    atomic_read(&isert_conn->post_send_buf_count) == 0) { -		pr_debug("isert_cq_comp_err >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"); -		pr_debug("Calling wake_up from isert_cq_comp_err\n"); +	if (!isert_cmd) +		isert_unmap_tx_desc(tx_desc, ib_dev); +	else +		isert_completion_put(tx_desc, isert_cmd, ib_dev, true); +} -		mutex_lock(&isert_conn->conn_mutex); -		if (isert_conn->state != ISER_CONN_DOWN) -			isert_conn->state = ISER_CONN_TERMINATING; -		mutex_unlock(&isert_conn->conn_mutex); +static void +isert_cq_rx_comp_err(struct isert_conn *isert_conn) +{ +	struct ib_device *ib_dev = isert_conn->conn_cm_id->device; +	struct iscsi_conn *conn = isert_conn->conn; + +	if (isert_conn->post_recv_buf_count) +		return; + +	isert_cq_drain_comp_llist(isert_conn, ib_dev); -		wake_up(&isert_conn->conn_wait_comp_err); +	if (conn->sess) { +		target_sess_cmd_list_set_waiting(conn->sess->se_sess); +		target_wait_for_sess_cmds(conn->sess->se_sess);  	} + +	while (atomic_read(&isert_conn->post_send_buf_count)) +		msleep(3000); + +	mutex_lock(&isert_conn->conn_mutex); +	isert_conn->state = ISER_CONN_DOWN; +	mutex_unlock(&isert_conn->conn_mutex); + +	iscsit_cause_connection_reinstatement(isert_conn->conn, 0); + +	complete(&isert_conn->conn_wait_comp_err);  }  static void @@ -1673,8 +2059,14 @@ isert_cq_tx_work(struct work_struct *work)  			pr_debug("TX wc.status != IB_WC_SUCCESS >>>>>>>>>>>>>>\n");  			pr_debug("TX wc.status: 0x%08x\n", wc.status);  			pr_debug("TX wc.vendor_err: 0x%08x\n", wc.vendor_err); -			atomic_dec(&isert_conn->post_send_buf_count); -			isert_cq_comp_err(tx_desc, isert_conn); + +			if (wc.wr_id != ISER_FASTREG_LI_WRID) { +				if (tx_desc->llnode_active) +					continue; + +				atomic_dec(&isert_conn->post_send_buf_count); +				isert_cq_tx_comp_err(tx_desc, isert_conn); +			}  		}  	} @@ -1686,7 +2078,6 @@ isert_cq_tx_callback(struct ib_cq *cq, void *context)  {  	struct isert_cq_desc *cq_desc = (struct isert_cq_desc *)context; -	INIT_WORK(&cq_desc->cq_tx_work, isert_cq_tx_work);  	queue_work(isert_comp_wq, &cq_desc->cq_tx_work);  } @@ -1718,7 +2109,7 @@ isert_cq_rx_work(struct work_struct *work)  					 wc.vendor_err);  			}  			isert_conn->post_recv_buf_count--; -			isert_cq_comp_err(NULL, isert_conn); +			isert_cq_rx_comp_err(isert_conn);  		}  	} @@ -1730,7 +2121,6 @@ isert_cq_rx_callback(struct ib_cq *cq, void *context)  {  	struct isert_cq_desc *cq_desc = (struct isert_cq_desc *)context; -	INIT_WORK(&cq_desc->cq_rx_work, isert_cq_rx_work);  	queue_work(isert_rx_wq, &cq_desc->cq_rx_work);  } @@ -1793,13 +2183,43 @@ isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd)  		isert_cmd->tx_desc.num_sge = 2;  	} -	isert_init_send_wr(isert_cmd, send_wr); +	isert_init_send_wr(isert_conn, isert_cmd, send_wr, true);  	pr_debug("Posting SCSI Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n");  	return isert_post_response(isert_conn, isert_cmd);  } +static void +isert_aborted_task(struct iscsi_conn *conn, struct iscsi_cmd *cmd) +{ +	struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); +	struct isert_conn *isert_conn = (struct isert_conn *)conn->context; +	struct isert_device *device = isert_conn->conn_device; + +	spin_lock_bh(&conn->cmd_lock); +	if (!list_empty(&cmd->i_conn_node)) +		list_del_init(&cmd->i_conn_node); +	spin_unlock_bh(&conn->cmd_lock); + +	if (cmd->data_direction == DMA_TO_DEVICE) +		iscsit_stop_dataout_timer(cmd); + +	device->unreg_rdma_mem(isert_cmd, isert_conn); +} + +static enum target_prot_op +isert_get_sup_prot_ops(struct iscsi_conn *conn) +{ +	struct isert_conn *isert_conn = (struct isert_conn *)conn->context; +	struct isert_device *device = isert_conn->conn_device; + +	if (device->pi_capable) +		return TARGET_PROT_ALL; + +	return TARGET_PROT_NORMAL; +} +  static int  isert_put_nopin(struct iscsi_cmd *cmd, struct iscsi_conn *conn,  		bool nopout_response) @@ -1813,7 +2233,7 @@ isert_put_nopin(struct iscsi_cmd *cmd, struct iscsi_conn *conn,  			       &isert_cmd->tx_desc.iscsi_header,  			       nopout_response);  	isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc); -	isert_init_send_wr(isert_cmd, send_wr); +	isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);  	pr_debug("Posting NOPIN Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n"); @@ -1831,7 +2251,7 @@ isert_put_logout_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)  	iscsit_build_logout_rsp(cmd, conn, (struct iscsi_logout_rsp *)  				&isert_cmd->tx_desc.iscsi_header);  	isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc); -	isert_init_send_wr(isert_cmd, send_wr); +	isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);  	pr_debug("Posting Logout Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n"); @@ -1849,7 +2269,7 @@ isert_put_tm_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)  	iscsit_build_task_mgt_rsp(cmd, conn, (struct iscsi_tm_rsp *)  				  &isert_cmd->tx_desc.iscsi_header);  	isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc); -	isert_init_send_wr(isert_cmd, send_wr); +	isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);  	pr_debug("Posting Task Management Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n"); @@ -1881,7 +2301,7 @@ isert_put_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn)  	tx_dsg->lkey	= isert_conn->conn_mr->lkey;  	isert_cmd->tx_desc.num_sge = 2; -	isert_init_send_wr(isert_cmd, send_wr); +	isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);  	pr_debug("Posting Reject IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n"); @@ -1900,7 +2320,7 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)  	int rc;  	isert_create_send_desc(isert_conn, isert_cmd, &isert_cmd->tx_desc); -	rc = iscsit_build_text_rsp(cmd, conn, hdr); +	rc = iscsit_build_text_rsp(cmd, conn, hdr, ISCSI_INFINIBAND);  	if (rc < 0)  		return rc; @@ -1921,7 +2341,7 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)  		tx_dsg->lkey	= isert_conn->conn_mr->lkey;  		isert_cmd->tx_desc.num_sge = 2;  	} -	isert_init_send_wr(isert_cmd, send_wr); +	isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);  	pr_debug("Posting Text Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n"); @@ -1981,56 +2401,39 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,  	struct se_cmd *se_cmd = &cmd->se_cmd;  	struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);  	struct isert_conn *isert_conn = (struct isert_conn *)conn->context; -	struct ib_device *ib_dev = isert_conn->conn_cm_id->device; +	struct isert_data_buf *data = &wr->data;  	struct ib_send_wr *send_wr;  	struct ib_sge *ib_sge; -	struct scatterlist *sg_start; -	u32 sg_off = 0, sg_nents; -	u32 offset = 0, data_len, data_left, rdma_write_max, va_offset = 0; -	int ret = 0, count, i, ib_sge_cnt; +	u32 offset, data_len, data_left, rdma_write_max, va_offset = 0; +	int ret = 0, i, ib_sge_cnt; -	if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) { -		data_left = se_cmd->data_length; -		iscsit_increment_maxcmdsn(cmd, conn->sess); -		cmd->stat_sn = conn->stat_sn++; -	} else { -		sg_off = cmd->write_data_done / PAGE_SIZE; -		data_left = se_cmd->data_length - cmd->write_data_done; -		offset = cmd->write_data_done; -		isert_cmd->tx_desc.isert_cmd = isert_cmd; -	} +	isert_cmd->tx_desc.isert_cmd = isert_cmd; -	sg_start = &cmd->se_cmd.t_data_sg[sg_off]; -	sg_nents = se_cmd->t_data_nents - sg_off; +	offset = wr->iser_ib_op == ISER_IB_RDMA_READ ? cmd->write_data_done : 0; +	ret = isert_map_data_buf(isert_conn, isert_cmd, se_cmd->t_data_sg, +				 se_cmd->t_data_nents, se_cmd->data_length, +				 offset, wr->iser_ib_op, &wr->data); +	if (ret) +		return ret; -	count = ib_dma_map_sg(ib_dev, sg_start, sg_nents, -			      (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ? -			      DMA_TO_DEVICE : DMA_FROM_DEVICE); -	if (unlikely(!count)) { -		pr_err("Cmd: %p unrable to map SGs\n", isert_cmd); -		return -EINVAL; -	} -	wr->sge = sg_start; -	wr->num_sge = sg_nents; -	wr->cur_rdma_length = data_left; -	pr_debug("Mapped cmd: %p count: %u sg: %p sg_nents: %u rdma_len %d\n", -		 isert_cmd, count, sg_start, sg_nents, data_left); +	data_left = data->len; +	offset = data->offset; -	ib_sge = kzalloc(sizeof(struct ib_sge) * sg_nents, GFP_KERNEL); +	ib_sge = kzalloc(sizeof(struct ib_sge) * data->nents, GFP_KERNEL);  	if (!ib_sge) {  		pr_warn("Unable to allocate ib_sge\n");  		ret = -ENOMEM; -		goto unmap_sg; +		goto unmap_cmd;  	}  	wr->ib_sge = ib_sge; -	wr->send_wr_num = DIV_ROUND_UP(sg_nents, isert_conn->max_sge); +	wr->send_wr_num = DIV_ROUND_UP(data->nents, isert_conn->max_sge);  	wr->send_wr = kzalloc(sizeof(struct ib_send_wr) * wr->send_wr_num,  				GFP_KERNEL);  	if (!wr->send_wr) {  		pr_debug("Unable to allocate wr->send_wr\n");  		ret = -ENOMEM; -		goto unmap_sg; +		goto unmap_cmd;  	}  	wr->isert_cmd = isert_cmd; @@ -2069,10 +2472,9 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,  	}  	return 0; -unmap_sg: -	ib_dma_unmap_sg(ib_dev, sg_start, sg_nents, -			(wr->iser_ib_op == ISER_IB_RDMA_WRITE) ? -			DMA_TO_DEVICE : DMA_FROM_DEVICE); +unmap_cmd: +	isert_unmap_data_buf(isert_conn, data); +  	return ret;  } @@ -2116,51 +2518,70 @@ isert_map_fr_pagelist(struct ib_device *ib_dev,  }  static int -isert_fast_reg_mr(struct fast_reg_descriptor *fr_desc, -		  struct isert_cmd *isert_cmd, struct isert_conn *isert_conn, -		  struct ib_sge *ib_sge, u32 offset, unsigned int data_len) +isert_fast_reg_mr(struct isert_conn *isert_conn, +		  struct fast_reg_descriptor *fr_desc, +		  struct isert_data_buf *mem, +		  enum isert_indicator ind, +		  struct ib_sge *sge)  { -	struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;  	struct ib_device *ib_dev = isert_conn->conn_cm_id->device; -	struct scatterlist *sg_start; -	u32 sg_off, page_off; +	struct ib_mr *mr; +	struct ib_fast_reg_page_list *frpl;  	struct ib_send_wr fr_wr, inv_wr;  	struct ib_send_wr *bad_wr, *wr = NULL; +	int ret, pagelist_len; +	u32 page_off;  	u8 key; -	int ret, sg_nents, pagelist_len; -	sg_off = offset / PAGE_SIZE; -	sg_start = &cmd->se_cmd.t_data_sg[sg_off]; -	sg_nents = min_t(unsigned int, cmd->se_cmd.t_data_nents - sg_off, -			 ISCSI_ISER_SG_TABLESIZE); -	page_off = offset % PAGE_SIZE; +	if (mem->dma_nents == 1) { +		sge->lkey = isert_conn->conn_mr->lkey; +		sge->addr = ib_sg_dma_address(ib_dev, &mem->sg[0]); +		sge->length = ib_sg_dma_len(ib_dev, &mem->sg[0]); +		pr_debug("%s:%d sge: addr: 0x%llx  length: %u lkey: %x\n", +			 __func__, __LINE__, sge->addr, sge->length, +			 sge->lkey); +		return 0; +	} + +	if (ind == ISERT_DATA_KEY_VALID) { +		/* Registering data buffer */ +		mr = fr_desc->data_mr; +		frpl = fr_desc->data_frpl; +	} else { +		/* Registering protection buffer */ +		mr = fr_desc->pi_ctx->prot_mr; +		frpl = fr_desc->pi_ctx->prot_frpl; +	} -	pr_debug("Cmd: %p use fr_desc %p sg_nents %d sg_off %d offset %u\n", -		 isert_cmd, fr_desc, sg_nents, sg_off, offset); +	page_off = mem->offset % PAGE_SIZE; -	pagelist_len = isert_map_fr_pagelist(ib_dev, sg_start, sg_nents, -					     &fr_desc->data_frpl->page_list[0]); +	pr_debug("Use fr_desc %p sg_nents %d offset %u\n", +		 fr_desc, mem->nents, mem->offset); -	if (!fr_desc->valid) { +	pagelist_len = isert_map_fr_pagelist(ib_dev, mem->sg, mem->nents, +					     &frpl->page_list[0]); + +	if (!(fr_desc->ind & ISERT_DATA_KEY_VALID)) {  		memset(&inv_wr, 0, sizeof(inv_wr)); +		inv_wr.wr_id = ISER_FASTREG_LI_WRID;  		inv_wr.opcode = IB_WR_LOCAL_INV; -		inv_wr.ex.invalidate_rkey = fr_desc->data_mr->rkey; +		inv_wr.ex.invalidate_rkey = mr->rkey;  		wr = &inv_wr;  		/* Bump the key */ -		key = (u8)(fr_desc->data_mr->rkey & 0x000000FF); -		ib_update_fast_reg_key(fr_desc->data_mr, ++key); +		key = (u8)(mr->rkey & 0x000000FF); +		ib_update_fast_reg_key(mr, ++key);  	}  	/* Prepare FASTREG WR */  	memset(&fr_wr, 0, sizeof(fr_wr)); +	fr_wr.wr_id = ISER_FASTREG_LI_WRID;  	fr_wr.opcode = IB_WR_FAST_REG_MR; -	fr_wr.wr.fast_reg.iova_start = -		fr_desc->data_frpl->page_list[0] + page_off; -	fr_wr.wr.fast_reg.page_list = fr_desc->data_frpl; +	fr_wr.wr.fast_reg.iova_start = frpl->page_list[0] + page_off; +	fr_wr.wr.fast_reg.page_list = frpl;  	fr_wr.wr.fast_reg.page_list_len = pagelist_len;  	fr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; -	fr_wr.wr.fast_reg.length = data_len; -	fr_wr.wr.fast_reg.rkey = fr_desc->data_mr->rkey; +	fr_wr.wr.fast_reg.length = mem->len; +	fr_wr.wr.fast_reg.rkey = mr->rkey;  	fr_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE;  	if (!wr) @@ -2173,82 +2594,242 @@ isert_fast_reg_mr(struct fast_reg_descriptor *fr_desc,  		pr_err("fast registration failed, ret:%d\n", ret);  		return ret;  	} -	fr_desc->valid = false; +	fr_desc->ind &= ~ind; + +	sge->lkey = mr->lkey; +	sge->addr = frpl->page_list[0] + page_off; +	sge->length = mem->len; + +	pr_debug("%s:%d sge: addr: 0x%llx  length: %u lkey: %x\n", +		 __func__, __LINE__, sge->addr, sge->length, +		 sge->lkey); + +	return ret; +} + +static inline enum ib_t10_dif_type +se2ib_prot_type(enum target_prot_type prot_type) +{ +	switch (prot_type) { +	case TARGET_DIF_TYPE0_PROT: +		return IB_T10DIF_NONE; +	case TARGET_DIF_TYPE1_PROT: +		return IB_T10DIF_TYPE1; +	case TARGET_DIF_TYPE2_PROT: +		return IB_T10DIF_TYPE2; +	case TARGET_DIF_TYPE3_PROT: +		return IB_T10DIF_TYPE3; +	default: +		return IB_T10DIF_NONE; +	} +} + +static int +isert_set_sig_attrs(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs) +{ +	enum ib_t10_dif_type ib_prot_type = se2ib_prot_type(se_cmd->prot_type); + +	sig_attrs->mem.sig_type = IB_SIG_TYPE_T10_DIF; +	sig_attrs->wire.sig_type = IB_SIG_TYPE_T10_DIF; +	sig_attrs->mem.sig.dif.pi_interval = +				se_cmd->se_dev->dev_attrib.block_size; +	sig_attrs->wire.sig.dif.pi_interval = +				se_cmd->se_dev->dev_attrib.block_size; + +	switch (se_cmd->prot_op) { +	case TARGET_PROT_DIN_INSERT: +	case TARGET_PROT_DOUT_STRIP: +		sig_attrs->mem.sig.dif.type = IB_T10DIF_NONE; +		sig_attrs->wire.sig.dif.type = ib_prot_type; +		sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC; +		sig_attrs->wire.sig.dif.ref_tag = se_cmd->reftag_seed; +		break; +	case TARGET_PROT_DOUT_INSERT: +	case TARGET_PROT_DIN_STRIP: +		sig_attrs->mem.sig.dif.type = ib_prot_type; +		sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC; +		sig_attrs->mem.sig.dif.ref_tag = se_cmd->reftag_seed; +		sig_attrs->wire.sig.dif.type = IB_T10DIF_NONE; +		break; +	case TARGET_PROT_DIN_PASS: +	case TARGET_PROT_DOUT_PASS: +		sig_attrs->mem.sig.dif.type = ib_prot_type; +		sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC; +		sig_attrs->mem.sig.dif.ref_tag = se_cmd->reftag_seed; +		sig_attrs->wire.sig.dif.type = ib_prot_type; +		sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC; +		sig_attrs->wire.sig.dif.ref_tag = se_cmd->reftag_seed; +		break; +	default: +		pr_err("Unsupported PI operation %d\n", se_cmd->prot_op); +		return -EINVAL; +	} + +	return 0; +} + +static inline u8 +isert_set_prot_checks(u8 prot_checks) +{ +	return (prot_checks & TARGET_DIF_CHECK_GUARD  ? 0xc0 : 0) | +	       (prot_checks & TARGET_DIF_CHECK_REFTAG ? 0x30 : 0) | +	       (prot_checks & TARGET_DIF_CHECK_REFTAG ? 0x0f : 0); +} + +static int +isert_reg_sig_mr(struct isert_conn *isert_conn, struct se_cmd *se_cmd, +		 struct fast_reg_descriptor *fr_desc, +		 struct ib_sge *data_sge, struct ib_sge *prot_sge, +		 struct ib_sge *sig_sge) +{ +	struct ib_send_wr sig_wr, inv_wr; +	struct ib_send_wr *bad_wr, *wr = NULL; +	struct pi_context *pi_ctx = fr_desc->pi_ctx; +	struct ib_sig_attrs sig_attrs; +	int ret; +	u32 key; + +	memset(&sig_attrs, 0, sizeof(sig_attrs)); +	ret = isert_set_sig_attrs(se_cmd, &sig_attrs); +	if (ret) +		goto err; + +	sig_attrs.check_mask = isert_set_prot_checks(se_cmd->prot_checks); + +	if (!(fr_desc->ind & ISERT_SIG_KEY_VALID)) { +		memset(&inv_wr, 0, sizeof(inv_wr)); +		inv_wr.opcode = IB_WR_LOCAL_INV; +		inv_wr.wr_id = ISER_FASTREG_LI_WRID; +		inv_wr.ex.invalidate_rkey = pi_ctx->sig_mr->rkey; +		wr = &inv_wr; +		/* Bump the key */ +		key = (u8)(pi_ctx->sig_mr->rkey & 0x000000FF); +		ib_update_fast_reg_key(pi_ctx->sig_mr, ++key); +	} + +	memset(&sig_wr, 0, sizeof(sig_wr)); +	sig_wr.opcode = IB_WR_REG_SIG_MR; +	sig_wr.wr_id = ISER_FASTREG_LI_WRID; +	sig_wr.sg_list = data_sge; +	sig_wr.num_sge = 1; +	sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE; +	sig_wr.wr.sig_handover.sig_attrs = &sig_attrs; +	sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr; +	if (se_cmd->t_prot_sg) +		sig_wr.wr.sig_handover.prot = prot_sge; + +	if (!wr) +		wr = &sig_wr; +	else +		wr->next = &sig_wr; -	ib_sge->lkey = fr_desc->data_mr->lkey; -	ib_sge->addr = fr_desc->data_frpl->page_list[0] + page_off; -	ib_sge->length = data_len; +	ret = ib_post_send(isert_conn->conn_qp, wr, &bad_wr); +	if (ret) { +		pr_err("fast registration failed, ret:%d\n", ret); +		goto err; +	} +	fr_desc->ind &= ~ISERT_SIG_KEY_VALID; -	pr_debug("RDMA ib_sge: addr: 0x%16llx  length: %u lkey: %08x\n", -		 ib_sge->addr, ib_sge->length, ib_sge->lkey); +	sig_sge->lkey = pi_ctx->sig_mr->lkey; +	sig_sge->addr = 0; +	sig_sge->length = se_cmd->data_length; +	if (se_cmd->prot_op != TARGET_PROT_DIN_STRIP && +	    se_cmd->prot_op != TARGET_PROT_DOUT_INSERT) +		/* +		 * We have protection guards on the wire +		 * so we need to set a larget transfer +		 */ +		sig_sge->length += se_cmd->prot_length; +	pr_debug("sig_sge: addr: 0x%llx  length: %u lkey: %x\n", +		 sig_sge->addr, sig_sge->length, +		 sig_sge->lkey); +err:  	return ret;  }  static int -isert_reg_rdma_frwr(struct iscsi_conn *conn, struct iscsi_cmd *cmd, -		    struct isert_rdma_wr *wr) +isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, +	       struct isert_rdma_wr *wr)  {  	struct se_cmd *se_cmd = &cmd->se_cmd;  	struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); -	struct isert_conn *isert_conn = (struct isert_conn *)conn->context; -	struct ib_device *ib_dev = isert_conn->conn_cm_id->device; +	struct isert_conn *isert_conn = conn->context; +	struct ib_sge data_sge;  	struct ib_send_wr *send_wr; -	struct ib_sge *ib_sge; -	struct scatterlist *sg_start; -	struct fast_reg_descriptor *fr_desc; -	u32 sg_off = 0, sg_nents; -	u32 offset = 0, data_len, data_left, rdma_write_max; -	int ret = 0, count; +	struct fast_reg_descriptor *fr_desc = NULL; +	u32 offset; +	int ret = 0;  	unsigned long flags; -	if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) { -		data_left = se_cmd->data_length; -		iscsit_increment_maxcmdsn(cmd, conn->sess); -		cmd->stat_sn = conn->stat_sn++; -	} else { -		sg_off = cmd->write_data_done / PAGE_SIZE; -		data_left = se_cmd->data_length - cmd->write_data_done; -		offset = cmd->write_data_done; -		isert_cmd->tx_desc.isert_cmd = isert_cmd; -	} +	isert_cmd->tx_desc.isert_cmd = isert_cmd; -	sg_start = &cmd->se_cmd.t_data_sg[sg_off]; -	sg_nents = se_cmd->t_data_nents - sg_off; +	offset = wr->iser_ib_op == ISER_IB_RDMA_READ ? cmd->write_data_done : 0; +	ret = isert_map_data_buf(isert_conn, isert_cmd, se_cmd->t_data_sg, +				 se_cmd->t_data_nents, se_cmd->data_length, +				 offset, wr->iser_ib_op, &wr->data); +	if (ret) +		return ret; -	count = ib_dma_map_sg(ib_dev, sg_start, sg_nents, -			      (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ? -			      DMA_TO_DEVICE : DMA_FROM_DEVICE); -	if (unlikely(!count)) { -		pr_err("Cmd: %p unrable to map SGs\n", isert_cmd); -		return -EINVAL; +	if (wr->data.dma_nents != 1 || +	    se_cmd->prot_op != TARGET_PROT_NORMAL) { +		spin_lock_irqsave(&isert_conn->conn_lock, flags); +		fr_desc = list_first_entry(&isert_conn->conn_fr_pool, +					   struct fast_reg_descriptor, list); +		list_del(&fr_desc->list); +		spin_unlock_irqrestore(&isert_conn->conn_lock, flags); +		wr->fr_desc = fr_desc;  	} -	wr->sge = sg_start; -	wr->num_sge = sg_nents; -	pr_debug("Mapped cmd: %p count: %u sg: %p sg_nents: %u rdma_len %d\n", -		 isert_cmd, count, sg_start, sg_nents, data_left); -	memset(&wr->s_ib_sge, 0, sizeof(*ib_sge)); -	ib_sge = &wr->s_ib_sge; -	wr->ib_sge = ib_sge; +	ret = isert_fast_reg_mr(isert_conn, fr_desc, &wr->data, +				ISERT_DATA_KEY_VALID, &data_sge); +	if (ret) +		goto unmap_cmd; + +	if (se_cmd->prot_op != TARGET_PROT_NORMAL) { +		struct ib_sge prot_sge, sig_sge; + +		if (se_cmd->t_prot_sg) { +			ret = isert_map_data_buf(isert_conn, isert_cmd, +						 se_cmd->t_prot_sg, +						 se_cmd->t_prot_nents, +						 se_cmd->prot_length, +						 0, wr->iser_ib_op, &wr->prot); +			if (ret) +				goto unmap_cmd; + +			ret = isert_fast_reg_mr(isert_conn, fr_desc, &wr->prot, +						ISERT_PROT_KEY_VALID, &prot_sge); +			if (ret) +				goto unmap_prot_cmd; +		} + +		ret = isert_reg_sig_mr(isert_conn, se_cmd, fr_desc, +				       &data_sge, &prot_sge, &sig_sge); +		if (ret) +			goto unmap_prot_cmd; + +		fr_desc->ind |= ISERT_PROTECTED; +		memcpy(&wr->s_ib_sge, &sig_sge, sizeof(sig_sge)); +	} else +		memcpy(&wr->s_ib_sge, &data_sge, sizeof(data_sge)); +	wr->ib_sge = &wr->s_ib_sge;  	wr->send_wr_num = 1;  	memset(&wr->s_send_wr, 0, sizeof(*send_wr));  	wr->send_wr = &wr->s_send_wr; -  	wr->isert_cmd = isert_cmd; -	rdma_write_max = ISCSI_ISER_SG_TABLESIZE * PAGE_SIZE;  	send_wr = &isert_cmd->rdma_wr.s_send_wr; -	send_wr->sg_list = ib_sge; +	send_wr->sg_list = &wr->s_ib_sge;  	send_wr->num_sge = 1;  	send_wr->wr_id = (unsigned long)&isert_cmd->tx_desc;  	if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) {  		send_wr->opcode = IB_WR_RDMA_WRITE;  		send_wr->wr.rdma.remote_addr = isert_cmd->read_va;  		send_wr->wr.rdma.rkey = isert_cmd->read_stag; -		send_wr->send_flags = 0; -		send_wr->next = &isert_cmd->tx_desc.send_wr; +		send_wr->send_flags = se_cmd->prot_op == TARGET_PROT_NORMAL ? +				      0 : IB_SEND_SIGNALED;  	} else {  		send_wr->opcode = IB_WR_RDMA_READ;  		send_wr->wr.rdma.remote_addr = isert_cmd->write_va; @@ -2256,29 +2837,18 @@ isert_reg_rdma_frwr(struct iscsi_conn *conn, struct iscsi_cmd *cmd,  		send_wr->send_flags = IB_SEND_SIGNALED;  	} -	data_len = min(data_left, rdma_write_max); -	wr->cur_rdma_length = data_len; - -	spin_lock_irqsave(&isert_conn->conn_lock, flags); -	fr_desc = list_first_entry(&isert_conn->conn_frwr_pool, -				   struct fast_reg_descriptor, list); -	list_del(&fr_desc->list); -	spin_unlock_irqrestore(&isert_conn->conn_lock, flags); -	wr->fr_desc = fr_desc; - -	ret = isert_fast_reg_mr(fr_desc, isert_cmd, isert_conn, -			  ib_sge, offset, data_len); -	if (ret) { -		list_add_tail(&fr_desc->list, &isert_conn->conn_frwr_pool); -		goto unmap_sg; -	} -  	return 0; +unmap_prot_cmd: +	if (se_cmd->t_prot_sg) +		isert_unmap_data_buf(isert_conn, &wr->prot); +unmap_cmd: +	if (fr_desc) { +		spin_lock_irqsave(&isert_conn->conn_lock, flags); +		list_add_tail(&fr_desc->list, &isert_conn->conn_fr_pool); +		spin_unlock_irqrestore(&isert_conn->conn_lock, flags); +	} +	isert_unmap_data_buf(isert_conn, &wr->data); -unmap_sg: -	ib_dma_unmap_sg(ib_dev, sg_start, sg_nents, -			(wr->iser_ib_op == ISER_IB_RDMA_WRITE) ? -			DMA_TO_DEVICE : DMA_FROM_DEVICE);  	return ret;  } @@ -2302,24 +2872,35 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)  		return rc;  	} -	/* -	 * Build isert_conn->tx_desc for iSCSI response PDU and attach -	 */ -	isert_create_send_desc(isert_conn, isert_cmd, &isert_cmd->tx_desc); -	iscsit_build_rsp_pdu(cmd, conn, false, (struct iscsi_scsi_rsp *) -			     &isert_cmd->tx_desc.iscsi_header); -	isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc); -	isert_init_send_wr(isert_cmd, &isert_cmd->tx_desc.send_wr); +	if (se_cmd->prot_op == TARGET_PROT_NORMAL) { +		/* +		 * Build isert_conn->tx_desc for iSCSI response PDU and attach +		 */ +		isert_create_send_desc(isert_conn, isert_cmd, +				       &isert_cmd->tx_desc); +		iscsit_build_rsp_pdu(cmd, conn, true, (struct iscsi_scsi_rsp *) +				     &isert_cmd->tx_desc.iscsi_header); +		isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc); +		isert_init_send_wr(isert_conn, isert_cmd, +				   &isert_cmd->tx_desc.send_wr, true); +		isert_cmd->rdma_wr.s_send_wr.next = &isert_cmd->tx_desc.send_wr; +		wr->send_wr_num += 1; +	} -	atomic_inc(&isert_conn->post_send_buf_count); +	atomic_add(wr->send_wr_num, &isert_conn->post_send_buf_count);  	rc = ib_post_send(isert_conn->conn_qp, wr->send_wr, &wr_failed);  	if (rc) {  		pr_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n"); -		atomic_dec(&isert_conn->post_send_buf_count); +		atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count);  	} -	pr_debug("Cmd: %p posted RDMA_WRITE + Response for iSER Data READ\n", -		 isert_cmd); + +	if (se_cmd->prot_op == TARGET_PROT_NORMAL) +		pr_debug("Cmd: %p posted RDMA_WRITE + Response for iSER Data " +			 "READ\n", isert_cmd); +	else +		pr_debug("Cmd: %p posted RDMA_WRITE for iSER Data READ\n", +			 isert_cmd);  	return 1;  } @@ -2344,12 +2925,12 @@ isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery)  		return rc;  	} -	atomic_inc(&isert_conn->post_send_buf_count); +	atomic_add(wr->send_wr_num, &isert_conn->post_send_buf_count);  	rc = ib_post_send(isert_conn->conn_qp, wr->send_wr, &wr_failed);  	if (rc) {  		pr_warn("ib_post_send() failed for IB_WR_RDMA_READ\n"); -		atomic_dec(&isert_conn->post_send_buf_count); +		atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count);  	}  	pr_debug("Cmd: %p posted RDMA_READ memory for ISER Data WRITE\n",  		 isert_cmd); @@ -2430,7 +3011,7 @@ isert_setup_np(struct iscsi_np *np,  		pr_err("Unable to allocate struct isert_np\n");  		return -ENOMEM;  	} -	init_waitqueue_head(&isert_np->np_accept_wq); +	sema_init(&isert_np->np_sem, 0);  	mutex_init(&isert_np->np_accept_mutex);  	INIT_LIST_HEAD(&isert_np->np_accept_list);  	init_completion(&isert_np->np_login_comp); @@ -2479,18 +3060,6 @@ out:  }  static int -isert_check_accept_queue(struct isert_np *isert_np) -{ -	int empty; - -	mutex_lock(&isert_np->np_accept_mutex); -	empty = list_empty(&isert_np->np_accept_list); -	mutex_unlock(&isert_np->np_accept_mutex); - -	return empty; -} - -static int  isert_rdma_accept(struct isert_conn *isert_conn)  {  	struct rdma_cm_id *cm_id = isert_conn->conn_cm_id; @@ -2582,16 +3151,19 @@ isert_accept_np(struct iscsi_np *np, struct iscsi_conn *conn)  	int max_accept = 0, ret;  accept_wait: -	ret = wait_event_interruptible(isert_np->np_accept_wq, -			!isert_check_accept_queue(isert_np) || -			np->np_thread_state == ISCSI_NP_THREAD_RESET); +	ret = down_interruptible(&isert_np->np_sem);  	if (max_accept > 5)  		return -ENODEV;  	spin_lock_bh(&np->np_thread_lock); -	if (np->np_thread_state == ISCSI_NP_THREAD_RESET) { +	if (np->np_thread_state >= ISCSI_NP_THREAD_RESET) {  		spin_unlock_bh(&np->np_thread_lock); -		pr_err("ISCSI_NP_THREAD_RESET for isert_accept_np\n"); +		pr_debug("np_thread_state %d for isert_accept_np\n", +			 np->np_thread_state); +		/** +		 * No point in stalling here when np_thread +		 * is in state RESET/SHUTDOWN/EXIT - bail +		 **/  		return -ENODEV;  	}  	spin_unlock_bh(&np->np_thread_lock); @@ -2636,63 +3208,37 @@ isert_free_np(struct iscsi_np *np)  	kfree(isert_np);  } -static int isert_check_state(struct isert_conn *isert_conn, int state) -{ -	int ret; - -	mutex_lock(&isert_conn->conn_mutex); -	ret = (isert_conn->state == state); -	mutex_unlock(&isert_conn->conn_mutex); - -	return ret; -} - -static void isert_free_conn(struct iscsi_conn *conn) +static void isert_wait_conn(struct iscsi_conn *conn)  {  	struct isert_conn *isert_conn = conn->context; -	pr_debug("isert_free_conn: Starting \n"); -	/* -	 * Decrement post_send_buf_count for special case when called -	 * from isert_do_control_comp() -> iscsit_logout_post_handler() -	 */ -	mutex_lock(&isert_conn->conn_mutex); -	if (isert_conn->logout_posted) -		atomic_dec(&isert_conn->post_send_buf_count); +	pr_debug("isert_wait_conn: Starting \n"); -	if (isert_conn->conn_cm_id && isert_conn->state != ISER_CONN_DOWN) { -		pr_debug("Calling rdma_disconnect from isert_free_conn\n"); +	mutex_lock(&isert_conn->conn_mutex); +	if (isert_conn->conn_cm_id) { +		pr_debug("Calling rdma_disconnect from isert_wait_conn\n");  		rdma_disconnect(isert_conn->conn_cm_id);  	}  	/*  	 * Only wait for conn_wait_comp_err if the isert_conn made it  	 * into full feature phase..  	 */ -	if (isert_conn->state == ISER_CONN_UP) { -		pr_debug("isert_free_conn: Before wait_event comp_err %d\n", -			 isert_conn->state); -		mutex_unlock(&isert_conn->conn_mutex); - -		wait_event(isert_conn->conn_wait_comp_err, -			  (isert_check_state(isert_conn, ISER_CONN_TERMINATING))); - -		wait_event(isert_conn->conn_wait, -			  (isert_check_state(isert_conn, ISER_CONN_DOWN))); - -		isert_put_conn(isert_conn); -		return; -	}  	if (isert_conn->state == ISER_CONN_INIT) {  		mutex_unlock(&isert_conn->conn_mutex); -		isert_put_conn(isert_conn);  		return;  	} -	pr_debug("isert_free_conn: wait_event conn_wait %d\n", -		 isert_conn->state); +	if (isert_conn->state == ISER_CONN_UP) +		isert_conn->state = ISER_CONN_TERMINATING;  	mutex_unlock(&isert_conn->conn_mutex); -	wait_event(isert_conn->conn_wait, -		  (isert_check_state(isert_conn, ISER_CONN_DOWN))); +	wait_for_completion(&isert_conn->conn_wait_comp_err); + +	wait_for_completion(&isert_conn->conn_wait); +} + +static void isert_free_conn(struct iscsi_conn *conn) +{ +	struct isert_conn *isert_conn = conn->context;  	isert_put_conn(isert_conn);  } @@ -2705,6 +3251,7 @@ static struct iscsit_transport iser_target_transport = {  	.iscsit_setup_np	= isert_setup_np,  	.iscsit_accept_np	= isert_accept_np,  	.iscsit_free_np		= isert_free_np, +	.iscsit_wait_conn	= isert_wait_conn,  	.iscsit_free_conn	= isert_free_conn,  	.iscsit_get_login_rx	= isert_get_login_rx,  	.iscsit_put_login_tx	= isert_put_login_tx, @@ -2713,6 +3260,8 @@ static struct iscsit_transport iser_target_transport = {  	.iscsit_get_dataout	= isert_get_dataout,  	.iscsit_queue_data_in	= isert_put_datain,  	.iscsit_queue_status	= isert_put_response, +	.iscsit_aborted_task	= isert_aborted_task, +	.iscsit_get_sup_prot_ops = isert_get_sup_prot_ops,  };  static int __init isert_init(void) @@ -2743,6 +3292,7 @@ destroy_rx_wq:  static void __exit isert_exit(void)  { +	flush_scheduled_work();  	destroy_workqueue(isert_comp_wq);  	destroy_workqueue(isert_rx_wq);  	iscsit_unregister_transport(&iser_target_transport); diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index 631f2090f0b..04f51f7bf61 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -6,6 +6,7 @@  #define ISERT_RDMA_LISTEN_BACKLOG	10  #define ISCSI_ISER_SG_TABLESIZE		256 +#define ISER_FASTREG_LI_WRID		0xffffffffffffffffULL  enum isert_desc_type {  	ISCSI_TX_CONTROL, @@ -43,14 +44,41 @@ struct iser_tx_desc {  	struct ib_sge	tx_sg[2];  	int		num_sge;  	struct isert_cmd *isert_cmd; +	struct llist_node *comp_llnode_batch; +	struct llist_node comp_llnode; +	bool		llnode_active;  	struct ib_send_wr send_wr;  } __packed; +enum isert_indicator { +	ISERT_PROTECTED		= 1 << 0, +	ISERT_DATA_KEY_VALID	= 1 << 1, +	ISERT_PROT_KEY_VALID	= 1 << 2, +	ISERT_SIG_KEY_VALID	= 1 << 3, +}; + +struct pi_context { +	struct ib_mr		       *prot_mr; +	struct ib_fast_reg_page_list   *prot_frpl; +	struct ib_mr		       *sig_mr; +}; +  struct fast_reg_descriptor { -	struct list_head	list; -	struct ib_mr		*data_mr; -	struct ib_fast_reg_page_list	*data_frpl; -	bool			valid; +	struct list_head		list; +	struct ib_mr		       *data_mr; +	struct ib_fast_reg_page_list   *data_frpl; +	u8				ind; +	struct pi_context	       *pi_ctx; +}; + +struct isert_data_buf { +	struct scatterlist     *sg; +	int			nents; +	u32			sg_off; +	u32			len; /* cur_rdma_length */ +	u32			offset; +	unsigned int		dma_nents; +	enum dma_data_direction dma_dir;  };  struct isert_rdma_wr { @@ -59,12 +87,11 @@ struct isert_rdma_wr {  	enum iser_ib_op_code	iser_ib_op;  	struct ib_sge		*ib_sge;  	struct ib_sge		s_ib_sge; -	int			num_sge; -	struct scatterlist	*sge;  	int			send_wr_num;  	struct ib_send_wr	*send_wr;  	struct ib_send_wr	s_send_wr; -	u32			cur_rdma_length; +	struct isert_data_buf	data; +	struct isert_data_buf	prot;  	struct fast_reg_descriptor *fr_desc;  }; @@ -89,7 +116,6 @@ struct isert_device;  struct isert_conn {  	enum iser_conn_state	state; -	bool			logout_posted;  	int			post_recv_buf_count;  	atomic_t		post_send_buf_count;  	u32			responder_resources; @@ -114,13 +140,17 @@ struct isert_conn {  	struct isert_device	*conn_device;  	struct work_struct	conn_logout_work;  	struct mutex		conn_mutex; -	wait_queue_head_t	conn_wait; -	wait_queue_head_t	conn_wait_comp_err; +	struct completion	conn_wait; +	struct completion	conn_wait_comp_err;  	struct kref		conn_kref; -	struct list_head	conn_frwr_pool; -	int			conn_frwr_pool_size; -	/* lock to protect frwr_pool */ +	struct list_head	conn_fr_pool; +	int			conn_fr_pool_size; +	/* lock to protect fastreg pool */  	spinlock_t		conn_lock; +#define ISERT_COMP_BATCH_COUNT	8 +	int			conn_comp_batch; +	struct llist_head	conn_comp_llist; +	bool                    disconnect;  };  #define ISERT_MAX_CQ 64 @@ -133,13 +163,12 @@ struct isert_cq_desc {  };  struct isert_device { -	int			use_frwr; +	int			use_fastreg; +	bool			pi_capable;  	int			cqs_used;  	int			refcount;  	int			cq_active_qps[ISERT_MAX_CQ];  	struct ib_device	*ib_device; -	struct ib_pd		*dev_pd; -	struct ib_mr		*dev_mr;  	struct ib_cq		*dev_rx_cq[ISERT_MAX_CQ];  	struct ib_cq		*dev_tx_cq[ISERT_MAX_CQ];  	struct isert_cq_desc	*cq_desc; @@ -153,7 +182,7 @@ struct isert_device {  };  struct isert_np { -	wait_queue_head_t	np_accept_wq; +	struct semaphore	np_sem;  	struct rdma_cm_id	*np_cm_id;  	struct mutex		np_accept_mutex;  	struct list_head	np_accept_list; diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index f93baf8254c..e3c2c5b4297 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -30,7 +30,7 @@   * SOFTWARE.   */ -#define pr_fmt(fmt) PFX fmt +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt  #include <linux/module.h>  #include <linux/init.h> @@ -46,6 +46,7 @@  #include <scsi/scsi.h>  #include <scsi/scsi_device.h>  #include <scsi/scsi_dbg.h> +#include <scsi/scsi_tcq.h>  #include <scsi/srp.h>  #include <scsi/scsi_transport_srp.h> @@ -65,6 +66,8 @@ static unsigned int srp_sg_tablesize;  static unsigned int cmd_sg_entries;  static unsigned int indirect_sg_entries;  static bool allow_ext_sg; +static bool prefer_fr; +static bool register_always;  static int topspin_workarounds = 1;  module_param(srp_sg_tablesize, uint, 0444); @@ -86,6 +89,40 @@ module_param(topspin_workarounds, int, 0444);  MODULE_PARM_DESC(topspin_workarounds,  		 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0"); +module_param(prefer_fr, bool, 0444); +MODULE_PARM_DESC(prefer_fr, +"Whether to use fast registration if both FMR and fast registration are supported"); + +module_param(register_always, bool, 0444); +MODULE_PARM_DESC(register_always, +		 "Use memory registration even for contiguous memory regions"); + +static struct kernel_param_ops srp_tmo_ops; + +static int srp_reconnect_delay = 10; +module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay, +		S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts"); + +static int srp_fast_io_fail_tmo = 15; +module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo, +		S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(fast_io_fail_tmo, +		 "Number of seconds between the observation of a transport" +		 " layer error and failing all I/O. \"off\" means that this" +		 " functionality is disabled."); + +static int srp_dev_loss_tmo = 600; +module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo, +		S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(dev_loss_tmo, +		 "Maximum number of seconds that the SRP transport should" +		 " insulate transport layer errors. After this time has been" +		 " exceeded the SCSI host is removed. Should be" +		 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT) +		 " if fast_io_fail_tmo has not been set. \"off\" means that" +		 " this functionality is disabled."); +  static void srp_add_one(struct ib_device *device);  static void srp_remove_one(struct ib_device *device);  static void srp_recv_completion(struct ib_cq *cq, void *target_ptr); @@ -102,6 +139,48 @@ static struct ib_client srp_client = {  static struct ib_sa_client srp_sa_client; +static int srp_tmo_get(char *buffer, const struct kernel_param *kp) +{ +	int tmo = *(int *)kp->arg; + +	if (tmo >= 0) +		return sprintf(buffer, "%d", tmo); +	else +		return sprintf(buffer, "off"); +} + +static int srp_tmo_set(const char *val, const struct kernel_param *kp) +{ +	int tmo, res; + +	if (strncmp(val, "off", 3) != 0) { +		res = kstrtoint(val, 0, &tmo); +		if (res) +			goto out; +	} else { +		tmo = -1; +	} +	if (kp->arg == &srp_reconnect_delay) +		res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo, +				    srp_dev_loss_tmo); +	else if (kp->arg == &srp_fast_io_fail_tmo) +		res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo); +	else +		res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo, +				    tmo); +	if (res) +		goto out; +	*(int *)kp->arg = tmo; + +out: +	return res; +} + +static struct kernel_param_ops srp_tmo_ops = { +	.get = srp_tmo_get, +	.set = srp_tmo_set, +}; +  static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)  {  	return (struct srp_target_port *) host->hostdata; @@ -219,28 +298,174 @@ static int srp_new_cm_id(struct srp_target_port *target)  	return 0;  } +static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target) +{ +	struct srp_device *dev = target->srp_host->srp_dev; +	struct ib_fmr_pool_param fmr_param; + +	memset(&fmr_param, 0, sizeof(fmr_param)); +	fmr_param.pool_size	    = target->scsi_host->can_queue; +	fmr_param.dirty_watermark   = fmr_param.pool_size / 4; +	fmr_param.cache		    = 1; +	fmr_param.max_pages_per_fmr = dev->max_pages_per_mr; +	fmr_param.page_shift	    = ilog2(dev->mr_page_size); +	fmr_param.access	    = (IB_ACCESS_LOCAL_WRITE | +				       IB_ACCESS_REMOTE_WRITE | +				       IB_ACCESS_REMOTE_READ); + +	return ib_create_fmr_pool(dev->pd, &fmr_param); +} + +/** + * srp_destroy_fr_pool() - free the resources owned by a pool + * @pool: Fast registration pool to be destroyed. + */ +static void srp_destroy_fr_pool(struct srp_fr_pool *pool) +{ +	int i; +	struct srp_fr_desc *d; + +	if (!pool) +		return; + +	for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { +		if (d->frpl) +			ib_free_fast_reg_page_list(d->frpl); +		if (d->mr) +			ib_dereg_mr(d->mr); +	} +	kfree(pool); +} + +/** + * srp_create_fr_pool() - allocate and initialize a pool for fast registration + * @device:            IB device to allocate fast registration descriptors for. + * @pd:                Protection domain associated with the FR descriptors. + * @pool_size:         Number of descriptors to allocate. + * @max_page_list_len: Maximum fast registration work request page list length. + */ +static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device, +					      struct ib_pd *pd, int pool_size, +					      int max_page_list_len) +{ +	struct srp_fr_pool *pool; +	struct srp_fr_desc *d; +	struct ib_mr *mr; +	struct ib_fast_reg_page_list *frpl; +	int i, ret = -EINVAL; + +	if (pool_size <= 0) +		goto err; +	ret = -ENOMEM; +	pool = kzalloc(sizeof(struct srp_fr_pool) + +		       pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL); +	if (!pool) +		goto err; +	pool->size = pool_size; +	pool->max_page_list_len = max_page_list_len; +	spin_lock_init(&pool->lock); +	INIT_LIST_HEAD(&pool->free_list); + +	for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { +		mr = ib_alloc_fast_reg_mr(pd, max_page_list_len); +		if (IS_ERR(mr)) { +			ret = PTR_ERR(mr); +			goto destroy_pool; +		} +		d->mr = mr; +		frpl = ib_alloc_fast_reg_page_list(device, max_page_list_len); +		if (IS_ERR(frpl)) { +			ret = PTR_ERR(frpl); +			goto destroy_pool; +		} +		d->frpl = frpl; +		list_add_tail(&d->entry, &pool->free_list); +	} + +out: +	return pool; + +destroy_pool: +	srp_destroy_fr_pool(pool); + +err: +	pool = ERR_PTR(ret); +	goto out; +} + +/** + * srp_fr_pool_get() - obtain a descriptor suitable for fast registration + * @pool: Pool to obtain descriptor from. + */ +static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool) +{ +	struct srp_fr_desc *d = NULL; +	unsigned long flags; + +	spin_lock_irqsave(&pool->lock, flags); +	if (!list_empty(&pool->free_list)) { +		d = list_first_entry(&pool->free_list, typeof(*d), entry); +		list_del(&d->entry); +	} +	spin_unlock_irqrestore(&pool->lock, flags); + +	return d; +} + +/** + * srp_fr_pool_put() - put an FR descriptor back in the free list + * @pool: Pool the descriptor was allocated from. + * @desc: Pointer to an array of fast registration descriptor pointers. + * @n:    Number of descriptors to put back. + * + * Note: The caller must already have queued an invalidation request for + * desc->mr->rkey before calling this function. + */ +static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc, +			    int n) +{ +	unsigned long flags; +	int i; + +	spin_lock_irqsave(&pool->lock, flags); +	for (i = 0; i < n; i++) +		list_add(&desc[i]->entry, &pool->free_list); +	spin_unlock_irqrestore(&pool->lock, flags); +} + +static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target) +{ +	struct srp_device *dev = target->srp_host->srp_dev; + +	return srp_create_fr_pool(dev->dev, dev->pd, +				  target->scsi_host->can_queue, +				  dev->max_pages_per_mr); +} +  static int srp_create_target_ib(struct srp_target_port *target)  { +	struct srp_device *dev = target->srp_host->srp_dev;  	struct ib_qp_init_attr *init_attr;  	struct ib_cq *recv_cq, *send_cq;  	struct ib_qp *qp; +	struct ib_fmr_pool *fmr_pool = NULL; +	struct srp_fr_pool *fr_pool = NULL; +	const int m = 1 + dev->use_fast_reg;  	int ret;  	init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);  	if (!init_attr)  		return -ENOMEM; -	recv_cq = ib_create_cq(target->srp_host->srp_dev->dev, -			       srp_recv_completion, NULL, target, SRP_RQ_SIZE, -			       target->comp_vector); +	recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, target, +			       target->queue_size, target->comp_vector);  	if (IS_ERR(recv_cq)) {  		ret = PTR_ERR(recv_cq);  		goto err;  	} -	send_cq = ib_create_cq(target->srp_host->srp_dev->dev, -			       srp_send_completion, NULL, target, SRP_SQ_SIZE, -			       target->comp_vector); +	send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, target, +			       m * target->queue_size, target->comp_vector);  	if (IS_ERR(send_cq)) {  		ret = PTR_ERR(send_cq);  		goto err_recv_cq; @@ -249,16 +474,16 @@ static int srp_create_target_ib(struct srp_target_port *target)  	ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);  	init_attr->event_handler       = srp_qp_event; -	init_attr->cap.max_send_wr     = SRP_SQ_SIZE; -	init_attr->cap.max_recv_wr     = SRP_RQ_SIZE; +	init_attr->cap.max_send_wr     = m * target->queue_size; +	init_attr->cap.max_recv_wr     = target->queue_size;  	init_attr->cap.max_recv_sge    = 1;  	init_attr->cap.max_send_sge    = 1; -	init_attr->sq_sig_type         = IB_SIGNAL_ALL_WR; +	init_attr->sq_sig_type         = IB_SIGNAL_REQ_WR;  	init_attr->qp_type             = IB_QPT_RC;  	init_attr->send_cq             = send_cq;  	init_attr->recv_cq             = recv_cq; -	qp = ib_create_qp(target->srp_host->srp_dev->pd, init_attr); +	qp = ib_create_qp(dev->pd, init_attr);  	if (IS_ERR(qp)) {  		ret = PTR_ERR(qp);  		goto err_send_cq; @@ -268,6 +493,30 @@ static int srp_create_target_ib(struct srp_target_port *target)  	if (ret)  		goto err_qp; +	if (dev->use_fast_reg && dev->has_fr) { +		fr_pool = srp_alloc_fr_pool(target); +		if (IS_ERR(fr_pool)) { +			ret = PTR_ERR(fr_pool); +			shost_printk(KERN_WARNING, target->scsi_host, PFX +				     "FR pool allocation failed (%d)\n", ret); +			goto err_qp; +		} +		if (target->fr_pool) +			srp_destroy_fr_pool(target->fr_pool); +		target->fr_pool = fr_pool; +	} else if (!dev->use_fast_reg && dev->has_fmr) { +		fmr_pool = srp_alloc_fmr_pool(target); +		if (IS_ERR(fmr_pool)) { +			ret = PTR_ERR(fmr_pool); +			shost_printk(KERN_WARNING, target->scsi_host, PFX +				     "FMR pool allocation failed (%d)\n", ret); +			goto err_qp; +		} +		if (target->fmr_pool) +			ib_destroy_fmr_pool(target->fmr_pool); +		target->fmr_pool = fmr_pool; +	} +  	if (target->qp)  		ib_destroy_qp(target->qp);  	if (target->recv_cq) @@ -296,10 +545,22 @@ err:  	return ret;  } +/* + * Note: this function may be called without srp_alloc_iu_bufs() having been + * invoked. Hence the target->[rt]x_ring checks. + */  static void srp_free_target_ib(struct srp_target_port *target)  { +	struct srp_device *dev = target->srp_host->srp_dev;  	int i; +	if (dev->use_fast_reg) { +		if (target->fr_pool) +			srp_destroy_fr_pool(target->fr_pool); +	} else { +		if (target->fmr_pool) +			ib_destroy_fmr_pool(target->fmr_pool); +	}  	ib_destroy_qp(target->qp);  	ib_destroy_cq(target->send_cq);  	ib_destroy_cq(target->recv_cq); @@ -307,10 +568,18 @@ static void srp_free_target_ib(struct srp_target_port *target)  	target->qp = NULL;  	target->send_cq = target->recv_cq = NULL; -	for (i = 0; i < SRP_RQ_SIZE; ++i) -		srp_free_iu(target->srp_host, target->rx_ring[i]); -	for (i = 0; i < SRP_SQ_SIZE; ++i) -		srp_free_iu(target->srp_host, target->tx_ring[i]); +	if (target->rx_ring) { +		for (i = 0; i < target->queue_size; ++i) +			srp_free_iu(target->srp_host, target->rx_ring[i]); +		kfree(target->rx_ring); +		target->rx_ring = NULL; +	} +	if (target->tx_ring) { +		for (i = 0; i < target->queue_size; ++i) +			srp_free_iu(target->srp_host, target->tx_ring[i]); +		kfree(target->tx_ring); +		target->tx_ring = NULL; +	}  }  static void srp_path_rec_completion(int status, @@ -330,6 +599,8 @@ static void srp_path_rec_completion(int status,  static int srp_lookup_path(struct srp_target_port *target)  { +	int ret; +  	target->path.numb_path = 1;  	init_completion(&target->done); @@ -350,7 +621,9 @@ static int srp_lookup_path(struct srp_target_port *target)  	if (target->path_query_id < 0)  		return target->path_query_id; -	wait_for_completion(&target->done); +	ret = wait_for_completion_interruptible(&target->done); +	if (ret < 0) +		return ret;  	if (target->status < 0)  		shost_printk(KERN_WARNING, target->scsi_host, @@ -390,7 +663,7 @@ static int srp_send_req(struct srp_target_port *target)  	req->param.responder_resources	      = 4;  	req->param.remote_cm_response_timeout = 20;  	req->param.local_cm_response_timeout  = 20; -	req->param.retry_count 		      = 7; +	req->param.retry_count                = target->tl_retry_count;  	req->param.rnr_retry_count 	      = 7;  	req->param.max_cm_retries 	      = 15; @@ -492,12 +765,20 @@ static void srp_disconnect_target(struct srp_target_port *target)  static void srp_free_req_data(struct srp_target_port *target)  { -	struct ib_device *ibdev = target->srp_host->srp_dev->dev; +	struct srp_device *dev = target->srp_host->srp_dev; +	struct ib_device *ibdev = dev->dev;  	struct srp_request *req;  	int i; -	for (i = 0, req = target->req_ring; i < SRP_CMD_SQ_SIZE; ++i, ++req) { -		kfree(req->fmr_list); +	if (!target->req_ring) +		return; + +	for (i = 0; i < target->req_ring_size; ++i) { +		req = &target->req_ring[i]; +		if (dev->use_fast_reg) +			kfree(req->fr_list); +		else +			kfree(req->fmr_list);  		kfree(req->map_page);  		if (req->indirect_dma_addr) {  			ib_dma_unmap_single(ibdev, req->indirect_dma_addr, @@ -506,6 +787,59 @@ static void srp_free_req_data(struct srp_target_port *target)  		}  		kfree(req->indirect_desc);  	} + +	kfree(target->req_ring); +	target->req_ring = NULL; +} + +static int srp_alloc_req_data(struct srp_target_port *target) +{ +	struct srp_device *srp_dev = target->srp_host->srp_dev; +	struct ib_device *ibdev = srp_dev->dev; +	struct srp_request *req; +	void *mr_list; +	dma_addr_t dma_addr; +	int i, ret = -ENOMEM; + +	INIT_LIST_HEAD(&target->free_reqs); + +	target->req_ring = kzalloc(target->req_ring_size * +				   sizeof(*target->req_ring), GFP_KERNEL); +	if (!target->req_ring) +		goto out; + +	for (i = 0; i < target->req_ring_size; ++i) { +		req = &target->req_ring[i]; +		mr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *), +				  GFP_KERNEL); +		if (!mr_list) +			goto out; +		if (srp_dev->use_fast_reg) +			req->fr_list = mr_list; +		else +			req->fmr_list = mr_list; +		req->map_page = kmalloc(srp_dev->max_pages_per_mr * +					sizeof(void *), GFP_KERNEL); +		if (!req->map_page) +			goto out; +		req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL); +		if (!req->indirect_desc) +			goto out; + +		dma_addr = ib_dma_map_single(ibdev, req->indirect_desc, +					     target->indirect_size, +					     DMA_TO_DEVICE); +		if (ib_dma_mapping_error(ibdev, dma_addr)) +			goto out; + +		req->indirect_dma_addr = dma_addr; +		req->index = i; +		list_add_tail(&req->list, &target->free_reqs); +	} +	ret = 0; + +out: +	return ret;  }  /** @@ -528,12 +862,21 @@ static void srp_remove_target(struct srp_target_port *target)  	WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);  	srp_del_scsi_host_attr(target->scsi_host); +	srp_rport_get(target->rport);  	srp_remove_host(target->scsi_host);  	scsi_remove_host(target->scsi_host); +	srp_stop_rport_timers(target->rport);  	srp_disconnect_target(target);  	ib_destroy_cm_id(target->cm_id);  	srp_free_target_ib(target); +	cancel_work_sync(&target->tl_err_work); +	srp_rport_put(target->rport);  	srp_free_req_data(target); + +	spin_lock(&target->srp_host->target_lock); +	list_del(&target->list); +	spin_unlock(&target->srp_host->target_lock); +  	scsi_host_put(target->scsi_host);  } @@ -545,10 +888,6 @@ static void srp_remove_work(struct work_struct *work)  	WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);  	srp_remove_target(target); - -	spin_lock(&target->srp_host->target_lock); -	list_del(&target->list); -	spin_unlock(&target->srp_host->target_lock);  }  static void srp_rport_delete(struct srp_rport *rport) @@ -576,7 +915,9 @@ static int srp_connect_target(struct srp_target_port *target)  		ret = srp_send_req(target);  		if (ret)  			return ret; -		wait_for_completion(&target->done); +		ret = wait_for_completion_interruptible(&target->done); +		if (ret < 0) +			return ret;  		/*  		 * The CM event handling code will set status to @@ -619,21 +960,56 @@ static int srp_connect_target(struct srp_target_port *target)  	}  } +static int srp_inv_rkey(struct srp_target_port *target, u32 rkey) +{ +	struct ib_send_wr *bad_wr; +	struct ib_send_wr wr = { +		.opcode		    = IB_WR_LOCAL_INV, +		.wr_id		    = LOCAL_INV_WR_ID_MASK, +		.next		    = NULL, +		.num_sge	    = 0, +		.send_flags	    = 0, +		.ex.invalidate_rkey = rkey, +	}; + +	return ib_post_send(target->qp, &wr, &bad_wr); +} +  static void srp_unmap_data(struct scsi_cmnd *scmnd,  			   struct srp_target_port *target,  			   struct srp_request *req)  { -	struct ib_device *ibdev = target->srp_host->srp_dev->dev; -	struct ib_pool_fmr **pfmr; +	struct srp_device *dev = target->srp_host->srp_dev; +	struct ib_device *ibdev = dev->dev; +	int i, res;  	if (!scsi_sglist(scmnd) ||  	    (scmnd->sc_data_direction != DMA_TO_DEVICE &&  	     scmnd->sc_data_direction != DMA_FROM_DEVICE))  		return; -	pfmr = req->fmr_list; -	while (req->nfmr--) -		ib_fmr_pool_unmap(*pfmr++); +	if (dev->use_fast_reg) { +		struct srp_fr_desc **pfr; + +		for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) { +			res = srp_inv_rkey(target, (*pfr)->mr->rkey); +			if (res < 0) { +				shost_printk(KERN_ERR, target->scsi_host, PFX +				  "Queueing INV WR for rkey %#x failed (%d)\n", +				  (*pfr)->mr->rkey, res); +				queue_work(system_long_wq, +					   &target->tl_err_work); +			} +		} +		if (req->nmdesc) +			srp_fr_pool_put(target->fr_pool, req->fr_list, +					req->nmdesc); +	} else { +		struct ib_pool_fmr **pfmr; + +		for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++) +			ib_fmr_pool_unmap(*pfmr); +	}  	ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),  			scmnd->sc_data_direction); @@ -643,6 +1019,7 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,   * srp_claim_req - Take ownership of the scmnd associated with a request.   * @target: SRP target port.   * @req: SRP request. + * @sdev: If not NULL, only take ownership for this SCSI device.   * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take   *         ownership of @req->scmnd if it equals @scmnd.   * @@ -651,16 +1028,17 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,   */  static struct scsi_cmnd *srp_claim_req(struct srp_target_port *target,  				       struct srp_request *req, +				       struct scsi_device *sdev,  				       struct scsi_cmnd *scmnd)  {  	unsigned long flags;  	spin_lock_irqsave(&target->lock, flags); -	if (!scmnd) { +	if (req->scmnd && +	    (!sdev || req->scmnd->device == sdev) && +	    (!scmnd || req->scmnd == scmnd)) {  		scmnd = req->scmnd;  		req->scmnd = NULL; -	} else if (req->scmnd == scmnd) { -		req->scmnd = NULL;  	} else {  		scmnd = NULL;  	} @@ -671,6 +1049,10 @@ static struct scsi_cmnd *srp_claim_req(struct srp_target_port *target,  /**   * srp_free_req() - Unmap data and add request to the free request list. + * @target: SRP target port. + * @req:    Request to be freed. + * @scmnd:  SCSI command associated with @req. + * @req_lim_delta: Amount to be added to @target->req_lim.   */  static void srp_free_req(struct srp_target_port *target,  			 struct srp_request *req, struct scsi_cmnd *scmnd, @@ -686,23 +1068,52 @@ static void srp_free_req(struct srp_target_port *target,  	spin_unlock_irqrestore(&target->lock, flags);  } -static void srp_reset_req(struct srp_target_port *target, struct srp_request *req) +static void srp_finish_req(struct srp_target_port *target, +			   struct srp_request *req, struct scsi_device *sdev, +			   int result)  { -	struct scsi_cmnd *scmnd = srp_claim_req(target, req, NULL); +	struct scsi_cmnd *scmnd = srp_claim_req(target, req, sdev, NULL);  	if (scmnd) {  		srp_free_req(target, req, scmnd, 0); -		scmnd->result = DID_RESET << 16; +		scmnd->result = result;  		scmnd->scsi_done(scmnd);  	}  } -static int srp_reconnect_target(struct srp_target_port *target) +static void srp_terminate_io(struct srp_rport *rport)  { +	struct srp_target_port *target = rport->lld_data;  	struct Scsi_Host *shost = target->scsi_host; -	int i, ret; +	struct scsi_device *sdev; +	int i; -	scsi_target_block(&shost->shost_gendev); +	/* +	 * Invoking srp_terminate_io() while srp_queuecommand() is running +	 * is not safe. Hence the warning statement below. +	 */ +	shost_for_each_device(sdev, shost) +		WARN_ON_ONCE(sdev->request_queue->request_fn_active); + +	for (i = 0; i < target->req_ring_size; ++i) { +		struct srp_request *req = &target->req_ring[i]; +		srp_finish_req(target, req, NULL, DID_TRANSPORT_FAILFAST << 16); +	} +} + +/* + * It is up to the caller to ensure that srp_rport_reconnect() calls are + * serialized and that no concurrent srp_queuecommand(), srp_abort(), + * srp_reset_device() or srp_reset_host() calls will occur while this function + * is in progress. One way to realize that is not to call this function + * directly but to call srp_reconnect_rport() instead since that last function + * serializes calls of this function via rport->mutex and also blocks + * srp_queuecommand() calls before invoking this function. + */ +static int srp_rport_reconnect(struct srp_rport *rport) +{ +	struct srp_target_port *target = rport->lld_data; +	int i, ret;  	srp_disconnect_target(target);  	/* @@ -711,51 +1122,29 @@ static int srp_reconnect_target(struct srp_target_port *target)  	 * callbacks will have finished before a new QP is allocated.  	 */  	ret = srp_new_cm_id(target); -	/* -	 * Whether or not creating a new CM ID succeeded, create a new -	 * QP. This guarantees that all completion callback function -	 * invocations have finished before request resetting starts. -	 */ -	if (ret == 0) -		ret = srp_create_target_ib(target); -	else -		srp_create_target_ib(target); -	for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) { +	for (i = 0; i < target->req_ring_size; ++i) {  		struct srp_request *req = &target->req_ring[i]; -		if (req->scmnd) -			srp_reset_req(target, req); +		srp_finish_req(target, req, NULL, DID_RESET << 16);  	} +	/* +	 * Whether or not creating a new CM ID succeeded, create a new +	 * QP. This guarantees that all callback functions for the old QP have +	 * finished before any send requests are posted on the new QP. +	 */ +	ret += srp_create_target_ib(target); +  	INIT_LIST_HEAD(&target->free_tx); -	for (i = 0; i < SRP_SQ_SIZE; ++i) +	for (i = 0; i < target->queue_size; ++i)  		list_add(&target->tx_ring[i]->list, &target->free_tx);  	if (ret == 0)  		ret = srp_connect_target(target); -	scsi_target_unblock(&shost->shost_gendev, ret == 0 ? SDEV_RUNNING : -			    SDEV_TRANSPORT_OFFLINE); -	target->transport_offline = !!ret; - -	if (ret) -		goto err; - -	shost_printk(KERN_INFO, target->scsi_host, PFX "reconnect succeeded\n"); - -	return ret; - -err: -	shost_printk(KERN_ERR, target->scsi_host, -		     PFX "reconnect failed (%d), removing target port.\n", ret); - -	/* -	 * We couldn't reconnect, so kill our target port off. -	 * However, we have to defer the real removal because we -	 * are in the context of the SCSI error handler now, which -	 * will deadlock if we call scsi_remove_host(). -	 */ -	srp_queue_remove_work(target); +	if (ret == 0) +		shost_printk(KERN_INFO, target->scsi_host, +			     PFX "reconnect succeeded\n");  	return ret;  } @@ -777,33 +1166,87 @@ static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,  static int srp_map_finish_fmr(struct srp_map_state *state,  			      struct srp_target_port *target)  { -	struct srp_device *dev = target->srp_host->srp_dev;  	struct ib_pool_fmr *fmr;  	u64 io_addr = 0; -	if (!state->npages) -		return 0; - -	if (state->npages == 1) { -		srp_map_desc(state, state->base_dma_addr, state->fmr_len, -			     target->rkey); -		state->npages = state->fmr_len = 0; -		return 0; -	} - -	fmr = ib_fmr_pool_map_phys(dev->fmr_pool, state->pages, +	fmr = ib_fmr_pool_map_phys(target->fmr_pool, state->pages,  				   state->npages, io_addr);  	if (IS_ERR(fmr))  		return PTR_ERR(fmr);  	*state->next_fmr++ = fmr; -	state->nfmr++; +	state->nmdesc++; + +	srp_map_desc(state, 0, state->dma_len, fmr->fmr->rkey); -	srp_map_desc(state, 0, state->fmr_len, fmr->fmr->rkey); -	state->npages = state->fmr_len = 0;  	return 0;  } +static int srp_map_finish_fr(struct srp_map_state *state, +			     struct srp_target_port *target) +{ +	struct srp_device *dev = target->srp_host->srp_dev; +	struct ib_send_wr *bad_wr; +	struct ib_send_wr wr; +	struct srp_fr_desc *desc; +	u32 rkey; + +	desc = srp_fr_pool_get(target->fr_pool); +	if (!desc) +		return -ENOMEM; + +	rkey = ib_inc_rkey(desc->mr->rkey); +	ib_update_fast_reg_key(desc->mr, rkey); + +	memcpy(desc->frpl->page_list, state->pages, +	       sizeof(state->pages[0]) * state->npages); + +	memset(&wr, 0, sizeof(wr)); +	wr.opcode = IB_WR_FAST_REG_MR; +	wr.wr_id = FAST_REG_WR_ID_MASK; +	wr.wr.fast_reg.iova_start = state->base_dma_addr; +	wr.wr.fast_reg.page_list = desc->frpl; +	wr.wr.fast_reg.page_list_len = state->npages; +	wr.wr.fast_reg.page_shift = ilog2(dev->mr_page_size); +	wr.wr.fast_reg.length = state->dma_len; +	wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE | +				       IB_ACCESS_REMOTE_READ | +				       IB_ACCESS_REMOTE_WRITE); +	wr.wr.fast_reg.rkey = desc->mr->lkey; + +	*state->next_fr++ = desc; +	state->nmdesc++; + +	srp_map_desc(state, state->base_dma_addr, state->dma_len, +		     desc->mr->rkey); + +	return ib_post_send(target->qp, &wr, &bad_wr); +} + +static int srp_finish_mapping(struct srp_map_state *state, +			      struct srp_target_port *target) +{ +	int ret = 0; + +	if (state->npages == 0) +		return 0; + +	if (state->npages == 1 && !register_always) +		srp_map_desc(state, state->base_dma_addr, state->dma_len, +			     target->rkey); +	else +		ret = target->srp_host->srp_dev->use_fast_reg ? +			srp_map_finish_fr(state, target) : +			srp_map_finish_fmr(state, target); + +	if (ret == 0) { +		state->npages = 0; +		state->dma_len = 0; +	} + +	return ret; +} +  static void srp_map_update_start(struct srp_map_state *state,  				 struct scatterlist *sg, int sg_index,  				 dma_addr_t dma_addr) @@ -816,7 +1259,7 @@ static void srp_map_update_start(struct srp_map_state *state,  static int srp_map_sg_entry(struct srp_map_state *state,  			    struct srp_target_port *target,  			    struct scatterlist *sg, int sg_index, -			    int use_fmr) +			    bool use_mr)  {  	struct srp_device *dev = target->srp_host->srp_dev;  	struct ib_device *ibdev = dev->dev; @@ -828,23 +1271,25 @@ static int srp_map_sg_entry(struct srp_map_state *state,  	if (!dma_len)  		return 0; -	if (use_fmr == SRP_MAP_NO_FMR) { -		/* Once we're in direct map mode for a request, we don't -		 * go back to FMR mode, so no need to update anything +	if (!use_mr) { +		/* +		 * Once we're in direct map mode for a request, we don't +		 * go back to FMR or FR mode, so no need to update anything  		 * other than the descriptor.  		 */  		srp_map_desc(state, dma_addr, dma_len, target->rkey);  		return 0;  	} -	/* If we start at an offset into the FMR page, don't merge into -	 * the current FMR. Finish it out, and use the kernel's MR for this -	 * sg entry. This is to avoid potential bugs on some SRP targets -	 * that were never quite defined, but went away when the initiator -	 * avoided using FMR on such page fragments. +	/* +	 * Since not all RDMA HW drivers support non-zero page offsets for +	 * FMR, if we start at an offset into a page, don't merge into the +	 * current FMR mapping. Finish it out, and use the kernel's MR for +	 * this sg entry.  	 */ -	if (dma_addr & ~dev->fmr_page_mask || dma_len > dev->fmr_max_size) { -		ret = srp_map_finish_fmr(state, target); +	if ((!dev->use_fast_reg && dma_addr & ~dev->mr_page_mask) || +	    dma_len > dev->mr_max_size) { +		ret = srp_finish_mapping(state, target);  		if (ret)  			return ret; @@ -853,52 +1298,106 @@ static int srp_map_sg_entry(struct srp_map_state *state,  		return 0;  	} -	/* If this is the first sg to go into the FMR, save our position. -	 * We need to know the first unmapped entry, its index, and the -	 * first unmapped address within that entry to be able to restart -	 * mapping after an error. +	/* +	 * If this is the first sg that will be mapped via FMR or via FR, save +	 * our position. We need to know the first unmapped entry, its index, +	 * and the first unmapped address within that entry to be able to +	 * restart mapping after an error.  	 */  	if (!state->unmapped_sg)  		srp_map_update_start(state, sg, sg_index, dma_addr);  	while (dma_len) { -		if (state->npages == SRP_FMR_SIZE) { -			ret = srp_map_finish_fmr(state, target); +		unsigned offset = dma_addr & ~dev->mr_page_mask; +		if (state->npages == dev->max_pages_per_mr || offset != 0) { +			ret = srp_finish_mapping(state, target);  			if (ret)  				return ret;  			srp_map_update_start(state, sg, sg_index, dma_addr);  		} -		len = min_t(unsigned int, dma_len, dev->fmr_page_size); +		len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);  		if (!state->npages)  			state->base_dma_addr = dma_addr; -		state->pages[state->npages++] = dma_addr; -		state->fmr_len += len; +		state->pages[state->npages++] = dma_addr & dev->mr_page_mask; +		state->dma_len += len;  		dma_addr += len;  		dma_len -= len;  	} -	/* If the last entry of the FMR wasn't a full page, then we need to +	/* +	 * If the last entry of the MR wasn't a full page, then we need to  	 * close it out and start a new one -- we can only merge at page  	 * boundries.  	 */  	ret = 0; -	if (len != dev->fmr_page_size) { -		ret = srp_map_finish_fmr(state, target); +	if (len != dev->mr_page_size) { +		ret = srp_finish_mapping(state, target);  		if (!ret)  			srp_map_update_start(state, NULL, 0, 0);  	}  	return ret;  } +static int srp_map_sg(struct srp_map_state *state, +		      struct srp_target_port *target, struct srp_request *req, +		      struct scatterlist *scat, int count) +{ +	struct srp_device *dev = target->srp_host->srp_dev; +	struct ib_device *ibdev = dev->dev; +	struct scatterlist *sg; +	int i; +	bool use_mr; + +	state->desc	= req->indirect_desc; +	state->pages	= req->map_page; +	if (dev->use_fast_reg) { +		state->next_fr = req->fr_list; +		use_mr = !!target->fr_pool; +	} else { +		state->next_fmr = req->fmr_list; +		use_mr = !!target->fmr_pool; +	} + +	for_each_sg(scat, sg, count, i) { +		if (srp_map_sg_entry(state, target, sg, i, use_mr)) { +			/* +			 * Memory registration failed, so backtrack to the +			 * first unmapped entry and continue on without using +			 * memory registration. +			 */ +			dma_addr_t dma_addr; +			unsigned int dma_len; + +backtrack: +			sg = state->unmapped_sg; +			i = state->unmapped_index; + +			dma_addr = ib_sg_dma_address(ibdev, sg); +			dma_len = ib_sg_dma_len(ibdev, sg); +			dma_len -= (state->unmapped_addr - dma_addr); +			dma_addr = state->unmapped_addr; +			use_mr = false; +			srp_map_desc(state, dma_addr, dma_len, target->rkey); +		} +	} + +	if (use_mr && srp_finish_mapping(state, target)) +		goto backtrack; + +	req->nmdesc = state->nmdesc; + +	return 0; +} +  static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,  			struct srp_request *req)  { -	struct scatterlist *scat, *sg; +	struct scatterlist *scat;  	struct srp_cmd *cmd = req->cmd->buf; -	int i, len, nents, count, use_fmr; +	int len, nents, count;  	struct srp_device *dev;  	struct ib_device *ibdev;  	struct srp_map_state state; @@ -930,7 +1429,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,  	fmt = SRP_DATA_DESC_DIRECT;  	len = sizeof (struct srp_cmd) +	sizeof (struct srp_direct_buf); -	if (count == 1) { +	if (count == 1 && !register_always) {  		/*  		 * The midlayer only generated a single gather/scatter  		 * entry, or DMA mapping coalesced everything to a @@ -943,13 +1442,13 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,  		buf->key = cpu_to_be32(target->rkey);  		buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat)); -		req->nfmr = 0; +		req->nmdesc = 0;  		goto map_complete;  	} -	/* We have more than one scatter/gather entry, so build our indirect -	 * descriptor table, trying to merge as many entries with FMR as we -	 * can. +	/* +	 * We have more than one scatter/gather entry, so build our indirect +	 * descriptor table, trying to merge as many entries as we can.  	 */  	indirect_hdr = (void *) cmd->add_data; @@ -957,35 +1456,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,  				   target->indirect_size, DMA_TO_DEVICE);  	memset(&state, 0, sizeof(state)); -	state.desc	= req->indirect_desc; -	state.pages	= req->map_page; -	state.next_fmr	= req->fmr_list; - -	use_fmr = dev->fmr_pool ? SRP_MAP_ALLOW_FMR : SRP_MAP_NO_FMR; - -	for_each_sg(scat, sg, count, i) { -		if (srp_map_sg_entry(&state, target, sg, i, use_fmr)) { -			/* FMR mapping failed, so backtrack to the first -			 * unmapped entry and continue on without using FMR. -			 */ -			dma_addr_t dma_addr; -			unsigned int dma_len; - -backtrack: -			sg = state.unmapped_sg; -			i = state.unmapped_index; - -			dma_addr = ib_sg_dma_address(ibdev, sg); -			dma_len = ib_sg_dma_len(ibdev, sg); -			dma_len -= (state.unmapped_addr - dma_addr); -			dma_addr = state.unmapped_addr; -			use_fmr = SRP_MAP_NO_FMR; -			srp_map_desc(&state, dma_addr, dma_len, target->rkey); -		} -	} - -	if (use_fmr == SRP_MAP_ALLOW_FMR && srp_map_finish_fmr(&state, target)) -		goto backtrack; +	srp_map_sg(&state, target, req, scat, count);  	/* We've mapped the request, now pull as much of the indirect  	 * descriptor table as we can into the command buffer. If this @@ -993,9 +1464,9 @@ backtrack:  	 * guaranteed to fit into the command, as the SCSI layer won't  	 * give us more S/G entries than we allow.  	 */ -	req->nfmr = state.nfmr;  	if (state.ndesc == 1) { -		/* FMR mapping was able to collapse this to one entry, +		/* +		 * Memory registration collapsed the sg-list into one entry,  		 * so use a direct descriptor.  		 */  		struct srp_direct_buf *buf = (void *) cmd->add_data; @@ -1151,7 +1622,7 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)  		complete(&target->tsk_mgmt_done);  	} else {  		req = &target->req_ring[rsp->tag]; -		scmnd = srp_claim_req(target, req, NULL); +		scmnd = srp_claim_req(target, req, NULL, NULL);  		if (!scmnd) {  			shost_printk(KERN_ERR, target->scsi_host,  				     "Null scmnd for RSP w/tag %016llx\n", @@ -1302,15 +1773,41 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)  			     PFX "Recv failed with error code %d\n", res);  } -static void srp_handle_qp_err(enum ib_wc_status wc_status, -			      enum ib_wc_opcode wc_opcode, -			      struct srp_target_port *target) +/** + * srp_tl_err_work() - handle a transport layer error + * @work: Work structure embedded in an SRP target port. + * + * Note: This function may get invoked before the rport has been created, + * hence the target->rport test. + */ +static void srp_tl_err_work(struct work_struct *work) +{ +	struct srp_target_port *target; + +	target = container_of(work, struct srp_target_port, tl_err_work); +	if (target->rport) +		srp_start_tl_fail_timers(target->rport); +} + +static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status, +			      bool send_err, struct srp_target_port *target)  {  	if (target->connected && !target->qp_in_error) { -		shost_printk(KERN_ERR, target->scsi_host, -			     PFX "failed %s status %d\n", -			     wc_opcode & IB_WC_RECV ? "receive" : "send", -			     wc_status); +		if (wr_id & LOCAL_INV_WR_ID_MASK) { +			shost_printk(KERN_ERR, target->scsi_host, PFX +				     "LOCAL_INV failed with status %d\n", +				     wc_status); +		} else if (wr_id & FAST_REG_WR_ID_MASK) { +			shost_printk(KERN_ERR, target->scsi_host, PFX +				     "FAST_REG_MR failed status %d\n", +				     wc_status); +		} else { +			shost_printk(KERN_ERR, target->scsi_host, +				     PFX "failed %s status %d for iu %p\n", +				     send_err ? "send" : "receive", +				     wc_status, (void *)(uintptr_t)wr_id); +		} +		queue_work(system_long_wq, &target->tl_err_work);  	}  	target->qp_in_error = true;  } @@ -1325,7 +1822,7 @@ static void srp_recv_completion(struct ib_cq *cq, void *target_ptr)  		if (likely(wc.status == IB_WC_SUCCESS)) {  			srp_handle_recv(target, &wc);  		} else { -			srp_handle_qp_err(wc.status, wc.opcode, target); +			srp_handle_qp_err(wc.wr_id, wc.status, false, target);  		}  	}  } @@ -1341,7 +1838,7 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr)  			iu = (struct srp_iu *) (uintptr_t) wc.wr_id;  			list_add(&iu->list, &target->free_tx);  		} else { -			srp_handle_qp_err(wc.status, wc.opcode, target); +			srp_handle_qp_err(wc.wr_id, wc.status, true, target);  		}  	}  } @@ -1349,18 +1846,27 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr)  static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)  {  	struct srp_target_port *target = host_to_target(shost); +	struct srp_rport *rport = target->rport;  	struct srp_request *req;  	struct srp_iu *iu;  	struct srp_cmd *cmd;  	struct ib_device *dev;  	unsigned long flags; -	int len; +	int len, ret; +	const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler; -	if (unlikely(target->transport_offline)) { -		scmnd->result = DID_NO_CONNECT << 16; -		scmnd->scsi_done(scmnd); -		return 0; -	} +	/* +	 * The SCSI EH thread is the only context from which srp_queuecommand() +	 * can get invoked for blocked devices (SDEV_BLOCK / +	 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by +	 * locking the rport mutex if invoked from inside the SCSI EH. +	 */ +	if (in_scsi_eh) +		mutex_lock(&rport->mutex); + +	scmnd->result = srp_chkready(target->rport); +	if (unlikely(scmnd->result)) +		goto err;  	spin_lock_irqsave(&target->lock, flags);  	iu = __srp_get_tx_iu(target, SRP_IU_CMD); @@ -1375,7 +1881,6 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)  	ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,  				   DMA_TO_DEVICE); -	scmnd->result        = 0;  	scmnd->host_scribble = (void *) req;  	cmd = iu->buf; @@ -1392,7 +1897,15 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)  	len = srp_map_data(scmnd, target, req);  	if (len < 0) {  		shost_printk(KERN_ERR, target->scsi_host, -			     PFX "Failed to map data\n"); +			     PFX "Failed to map data (%d)\n", len); +		/* +		 * If we ran out of memory descriptors (-ENOMEM) because an +		 * application is queuing many requests with more than +		 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer +		 * to reduce queue depth temporarily. +		 */ +		scmnd->result = len == -ENOMEM ? +			DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;  		goto err_iu;  	} @@ -1404,7 +1917,13 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)  		goto err_unmap;  	} -	return 0; +	ret = 0; + +unlock_rport: +	if (in_scsi_eh) +		mutex_unlock(&rport->mutex); + +	return ret;  err_unmap:  	srp_unmap_data(scmnd, target, req); @@ -1412,20 +1931,47 @@ err_unmap:  err_iu:  	srp_put_tx_iu(target, iu, SRP_IU_CMD); +	/* +	 * Avoid that the loops that iterate over the request ring can +	 * encounter a dangling SCSI command pointer. +	 */ +	req->scmnd = NULL; +  	spin_lock_irqsave(&target->lock, flags);  	list_add(&req->list, &target->free_reqs);  err_unlock:  	spin_unlock_irqrestore(&target->lock, flags); -	return SCSI_MLQUEUE_HOST_BUSY; +err: +	if (scmnd->result) { +		scmnd->scsi_done(scmnd); +		ret = 0; +	} else { +		ret = SCSI_MLQUEUE_HOST_BUSY; +	} + +	goto unlock_rport;  } +/* + * Note: the resources allocated in this function are freed in + * srp_free_target_ib(). + */  static int srp_alloc_iu_bufs(struct srp_target_port *target)  {  	int i; -	for (i = 0; i < SRP_RQ_SIZE; ++i) { +	target->rx_ring = kzalloc(target->queue_size * sizeof(*target->rx_ring), +				  GFP_KERNEL); +	if (!target->rx_ring) +		goto err_no_ring; +	target->tx_ring = kzalloc(target->queue_size * sizeof(*target->tx_ring), +				  GFP_KERNEL); +	if (!target->tx_ring) +		goto err_no_ring; + +	for (i = 0; i < target->queue_size; ++i) {  		target->rx_ring[i] = srp_alloc_iu(target->srp_host,  						  target->max_ti_iu_len,  						  GFP_KERNEL, DMA_FROM_DEVICE); @@ -1433,7 +1979,7 @@ static int srp_alloc_iu_bufs(struct srp_target_port *target)  			goto err;  	} -	for (i = 0; i < SRP_SQ_SIZE; ++i) { +	for (i = 0; i < target->queue_size; ++i) {  		target->tx_ring[i] = srp_alloc_iu(target->srp_host,  						  target->max_iu_len,  						  GFP_KERNEL, DMA_TO_DEVICE); @@ -1446,16 +1992,18 @@ static int srp_alloc_iu_bufs(struct srp_target_port *target)  	return 0;  err: -	for (i = 0; i < SRP_RQ_SIZE; ++i) { +	for (i = 0; i < target->queue_size; ++i) {  		srp_free_iu(target->srp_host, target->rx_ring[i]); -		target->rx_ring[i] = NULL; -	} - -	for (i = 0; i < SRP_SQ_SIZE; ++i) {  		srp_free_iu(target->srp_host, target->tx_ring[i]); -		target->tx_ring[i] = NULL;  	} + +err_no_ring: +	kfree(target->tx_ring); +	target->tx_ring = NULL; +	kfree(target->rx_ring); +	target->rx_ring = NULL; +  	return -ENOMEM;  } @@ -1506,6 +2054,9 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,  		target->scsi_host->can_queue  			= min(target->req_lim - SRP_TSK_MGMT_SQ_SIZE,  			      target->scsi_host->can_queue); +		target->scsi_host->cmd_per_lun +			= min_t(int, target->scsi_host->can_queue, +				target->scsi_host->cmd_per_lun);  	} else {  		shost_printk(KERN_WARNING, target->scsi_host,  			     PFX "Unhandled RSP opcode %#x\n", lrsp->opcode); @@ -1513,7 +2064,7 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,  		goto error;  	} -	if (!target->rx_ring[0]) { +	if (!target->rx_ring) {  		ret = srp_alloc_iu_bufs(target);  		if (ret)  			goto error; @@ -1533,7 +2084,7 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,  	if (ret)  		goto error_free; -	for (i = 0; i < SRP_RQ_SIZE; i++) { +	for (i = 0; i < target->queue_size; i++) {  		struct srp_iu *iu = target->rx_ring[i];  		ret = srp_post_recv(target, iu);  		if (ret) @@ -1619,8 +2170,10 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id,  				shost_printk(KERN_WARNING, shost,  					     PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");  			else -				shost_printk(KERN_WARNING, shost, -					    PFX "SRP LOGIN REJECTED, reason 0x%08x\n", reason); +				shost_printk(KERN_WARNING, shost, PFX +					     "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n", +					     target->path.sgid.raw, +					     target->orig_dgid, reason);  		} else  			shost_printk(KERN_WARNING, shost,  				     "  REJ reason: IB_CM_REJ_CONSUMER_DEFINED," @@ -1672,11 +2225,13 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)  		if (ib_send_cm_drep(cm_id, NULL, 0))  			shost_printk(KERN_ERR, target->scsi_host,  				     PFX "Sending CM DREP failed\n"); +		queue_work(system_long_wq, &target->tl_err_work);  		break;  	case IB_CM_TIMEWAIT_EXIT:  		shost_printk(KERN_ERR, target->scsi_host,  			     PFX "connection closed\n"); +		comp = 1;  		target->status = 0;  		break; @@ -1698,9 +2253,61 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)  	return 0;  } +/** + * srp_change_queue_type - changing device queue tag type + * @sdev: scsi device struct + * @tag_type: requested tag type + * + * Returns queue tag type. + */ +static int +srp_change_queue_type(struct scsi_device *sdev, int tag_type) +{ +	if (sdev->tagged_supported) { +		scsi_set_tag_type(sdev, tag_type); +		if (tag_type) +			scsi_activate_tcq(sdev, sdev->queue_depth); +		else +			scsi_deactivate_tcq(sdev, sdev->queue_depth); +	} else +		tag_type = 0; + +	return tag_type; +} + +/** + * srp_change_queue_depth - setting device queue depth + * @sdev: scsi device struct + * @qdepth: requested queue depth + * @reason: SCSI_QDEPTH_DEFAULT/SCSI_QDEPTH_QFULL/SCSI_QDEPTH_RAMP_UP + * (see include/scsi/scsi_host.h for definition) + * + * Returns queue depth. + */ +static int +srp_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason) +{ +	struct Scsi_Host *shost = sdev->host; +	int max_depth; +	if (reason == SCSI_QDEPTH_DEFAULT || reason == SCSI_QDEPTH_RAMP_UP) { +		max_depth = shost->can_queue; +		if (!sdev->tagged_supported) +			max_depth = 1; +		if (qdepth > max_depth) +			qdepth = max_depth; +		scsi_adjust_queue_depth(sdev, scsi_get_tag_type(sdev), qdepth); +	} else if (reason == SCSI_QDEPTH_QFULL) +		scsi_track_queue_full(sdev, qdepth); +	else +		return -EOPNOTSUPP; + +	return sdev->queue_depth; +} +  static int srp_send_tsk_mgmt(struct srp_target_port *target,  			     u64 req_tag, unsigned int lun, u8 func)  { +	struct srp_rport *rport = target->rport;  	struct ib_device *dev = target->srp_host->srp_dev->dev;  	struct srp_iu *iu;  	struct srp_tsk_mgmt *tsk_mgmt; @@ -1710,12 +2317,20 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target,  	init_completion(&target->tsk_mgmt_done); +	/* +	 * Lock the rport mutex to avoid that srp_create_target_ib() is +	 * invoked while a task management function is being sent. +	 */ +	mutex_lock(&rport->mutex);  	spin_lock_irq(&target->lock);  	iu = __srp_get_tx_iu(target, SRP_IU_TSK_MGMT);  	spin_unlock_irq(&target->lock); -	if (!iu) +	if (!iu) { +		mutex_unlock(&rport->mutex); +  		return -1; +	}  	ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,  				   DMA_TO_DEVICE); @@ -1732,8 +2347,11 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target,  				      DMA_TO_DEVICE);  	if (srp_post_send(target, iu, sizeof *tsk_mgmt)) {  		srp_put_tx_iu(target, iu, SRP_IU_TSK_MGMT); +		mutex_unlock(&rport->mutex); +  		return -1;  	} +	mutex_unlock(&rport->mutex);  	if (!wait_for_completion_timeout(&target->tsk_mgmt_done,  					 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS))) @@ -1750,12 +2368,12 @@ static int srp_abort(struct scsi_cmnd *scmnd)  	shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); -	if (!req || !srp_claim_req(target, req, scmnd)) -		return FAILED; +	if (!req || !srp_claim_req(target, req, NULL, scmnd)) +		return SUCCESS;  	if (srp_send_tsk_mgmt(target, req->index, scmnd->device->lun,  			      SRP_TSK_ABORT_TASK) == 0)  		ret = SUCCESS; -	else if (target->transport_offline) +	else if (target->rport->state == SRP_RPORT_LOST)  		ret = FAST_IO_FAIL;  	else  		ret = FAILED; @@ -1779,10 +2397,9 @@ static int srp_reset_device(struct scsi_cmnd *scmnd)  	if (target->tsk_mgmt_status)  		return FAILED; -	for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) { +	for (i = 0; i < target->req_ring_size; ++i) {  		struct srp_request *req = &target->req_ring[i]; -		if (req->scmnd && req->scmnd->device == scmnd->device) -			srp_reset_req(target, req); +		srp_finish_req(target, req, scmnd->device, DID_RESET << 16);  	}  	return SUCCESS; @@ -1791,14 +2408,10 @@ static int srp_reset_device(struct scsi_cmnd *scmnd)  static int srp_reset_host(struct scsi_cmnd *scmnd)  {  	struct srp_target_port *target = host_to_target(scmnd->device->host); -	int ret = FAILED;  	shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n"); -	if (!srp_reconnect_target(target)) -		ret = SUCCESS; - -	return ret; +	return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;  }  static int srp_slave_configure(struct scsi_device *sdev) @@ -1851,6 +2464,14 @@ static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,  	return sprintf(buf, "0x%04x\n", be16_to_cpu(target->path.pkey));  } +static ssize_t show_sgid(struct device *dev, struct device_attribute *attr, +			 char *buf) +{ +	struct srp_target_port *target = host_to_target(class_to_shost(dev)); + +	return sprintf(buf, "%pI6\n", target->path.sgid.raw); +} +  static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,  			 char *buf)  { @@ -1907,6 +2528,14 @@ static ssize_t show_comp_vector(struct device *dev,  	return sprintf(buf, "%d\n", target->comp_vector);  } +static ssize_t show_tl_retry_count(struct device *dev, +				   struct device_attribute *attr, char *buf) +{ +	struct srp_target_port *target = host_to_target(class_to_shost(dev)); + +	return sprintf(buf, "%d\n", target->tl_retry_count); +} +  static ssize_t show_cmd_sg_entries(struct device *dev,  				   struct device_attribute *attr, char *buf)  { @@ -1927,6 +2556,7 @@ static DEVICE_ATTR(id_ext,	    S_IRUGO, show_id_ext,	   NULL);  static DEVICE_ATTR(ioc_guid,	    S_IRUGO, show_ioc_guid,	   NULL);  static DEVICE_ATTR(service_id,	    S_IRUGO, show_service_id,	   NULL);  static DEVICE_ATTR(pkey,	    S_IRUGO, show_pkey,		   NULL); +static DEVICE_ATTR(sgid,	    S_IRUGO, show_sgid,		   NULL);  static DEVICE_ATTR(dgid,	    S_IRUGO, show_dgid,		   NULL);  static DEVICE_ATTR(orig_dgid,	    S_IRUGO, show_orig_dgid,	   NULL);  static DEVICE_ATTR(req_lim,         S_IRUGO, show_req_lim,         NULL); @@ -1934,6 +2564,7 @@ static DEVICE_ATTR(zero_req_lim,    S_IRUGO, show_zero_req_lim,	   NULL);  static DEVICE_ATTR(local_ib_port,   S_IRUGO, show_local_ib_port,   NULL);  static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);  static DEVICE_ATTR(comp_vector,     S_IRUGO, show_comp_vector,     NULL); +static DEVICE_ATTR(tl_retry_count,  S_IRUGO, show_tl_retry_count,  NULL);  static DEVICE_ATTR(cmd_sg_entries,  S_IRUGO, show_cmd_sg_entries,  NULL);  static DEVICE_ATTR(allow_ext_sg,    S_IRUGO, show_allow_ext_sg,    NULL); @@ -1942,6 +2573,7 @@ static struct device_attribute *srp_host_attrs[] = {  	&dev_attr_ioc_guid,  	&dev_attr_service_id,  	&dev_attr_pkey, +	&dev_attr_sgid,  	&dev_attr_dgid,  	&dev_attr_orig_dgid,  	&dev_attr_req_lim, @@ -1949,6 +2581,7 @@ static struct device_attribute *srp_host_attrs[] = {  	&dev_attr_local_ib_port,  	&dev_attr_local_ib_device,  	&dev_attr_comp_vector, +	&dev_attr_tl_retry_count,  	&dev_attr_cmd_sg_entries,  	&dev_attr_allow_ext_sg,  	NULL @@ -1961,14 +2594,16 @@ static struct scsi_host_template srp_template = {  	.slave_configure		= srp_slave_configure,  	.info				= srp_target_info,  	.queuecommand			= srp_queuecommand, +	.change_queue_depth             = srp_change_queue_depth, +	.change_queue_type              = srp_change_queue_type,  	.eh_abort_handler		= srp_abort,  	.eh_device_reset_handler	= srp_reset_device,  	.eh_host_reset_handler		= srp_reset_host,  	.skip_settle_delay		= true,  	.sg_tablesize			= SRP_DEF_SG_TABLESIZE, -	.can_queue			= SRP_CMD_SQ_SIZE, +	.can_queue			= SRP_DEFAULT_CMD_SQ_SIZE,  	.this_id			= -1, -	.cmd_per_lun			= SRP_CMD_SQ_SIZE, +	.cmd_per_lun			= SRP_DEFAULT_CMD_SQ_SIZE,  	.use_clustering			= ENABLE_CLUSTERING,  	.shost_attrs			= srp_host_attrs  }; @@ -1994,6 +2629,7 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target)  	}  	rport->lld_data = target; +	target->rport = rport;  	spin_lock(&host->target_lock);  	list_add_tail(&target->list, &host->target_list); @@ -2022,6 +2658,8 @@ static struct class srp_class = {  /**   * srp_conn_unique() - check whether the connection to a target is unique + * @host:   SRP host. + * @target: SRP target port.   */  static bool srp_conn_unique(struct srp_host *host,  			    struct srp_target_port *target) @@ -2073,6 +2711,8 @@ enum {  	SRP_OPT_ALLOW_EXT_SG	= 1 << 10,  	SRP_OPT_SG_TABLESIZE	= 1 << 11,  	SRP_OPT_COMP_VECTOR	= 1 << 12, +	SRP_OPT_TL_RETRY_COUNT	= 1 << 13, +	SRP_OPT_QUEUE_SIZE	= 1 << 14,  	SRP_OPT_ALL		= (SRP_OPT_ID_EXT	|  				   SRP_OPT_IOC_GUID	|  				   SRP_OPT_DGID		| @@ -2094,6 +2734,8 @@ static const match_table_t srp_opt_tokens = {  	{ SRP_OPT_ALLOW_EXT_SG,		"allow_ext_sg=%u"	},  	{ SRP_OPT_SG_TABLESIZE,		"sg_tablesize=%u"	},  	{ SRP_OPT_COMP_VECTOR,		"comp_vector=%u"	}, +	{ SRP_OPT_TL_RETRY_COUNT,	"tl_retry_count=%u"	}, +	{ SRP_OPT_QUEUE_SIZE,		"queue_size=%d"		},  	{ SRP_OPT_ERR,			NULL 			}  }; @@ -2188,13 +2830,25 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)  			target->scsi_host->max_sectors = token;  			break; +		case SRP_OPT_QUEUE_SIZE: +			if (match_int(args, &token) || token < 1) { +				pr_warn("bad queue_size parameter '%s'\n", p); +				goto out; +			} +			target->scsi_host->can_queue = token; +			target->queue_size = token + SRP_RSP_SQ_SIZE + +					     SRP_TSK_MGMT_SQ_SIZE; +			if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN)) +				target->scsi_host->cmd_per_lun = token; +			break; +  		case SRP_OPT_MAX_CMD_PER_LUN: -			if (match_int(args, &token)) { +			if (match_int(args, &token) || token < 1) {  				pr_warn("bad max cmd_per_lun parameter '%s'\n",  					p);  				goto out;  			} -			target->scsi_host->cmd_per_lun = min(token, SRP_CMD_SQ_SIZE); +			target->scsi_host->cmd_per_lun = token;  			break;  		case SRP_OPT_IO_CLASS: @@ -2257,6 +2911,15 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)  			target->comp_vector = token;  			break; +		case SRP_OPT_TL_RETRY_COUNT: +			if (match_int(args, &token) || token < 2 || token > 7) { +				pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n", +					p); +				goto out; +			} +			target->tl_retry_count = token; +			break; +  		default:  			pr_warn("unknown parameter or missing value '%s' in target creation request\n",  				p); @@ -2273,6 +2936,12 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)  				pr_warn("target creation request is missing parameter '%s'\n",  					srp_opt_tokens[i].pattern); +	if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue +	    && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN)) +		pr_warn("cmd_per_lun = %d > queue_size = %d\n", +			target->scsi_host->cmd_per_lun, +			target->scsi_host->can_queue); +  out:  	kfree(options);  	return ret; @@ -2286,9 +2955,9 @@ static ssize_t srp_create_target(struct device *dev,  		container_of(dev, struct srp_host, dev);  	struct Scsi_Host *target_host;  	struct srp_target_port *target; -	struct ib_device *ibdev = host->srp_dev->dev; -	dma_addr_t dma_addr; -	int i, ret; +	struct srp_device *srp_dev = host->srp_dev; +	struct ib_device *ibdev = srp_dev->dev; +	int ret;  	target_host = scsi_host_alloc(&srp_template,  				      sizeof (struct srp_target_port)); @@ -2311,11 +2980,17 @@ static ssize_t srp_create_target(struct device *dev,  	target->cmd_sg_cnt	= cmd_sg_entries;  	target->sg_tablesize	= indirect_sg_entries ? : cmd_sg_entries;  	target->allow_ext_sg	= allow_ext_sg; +	target->tl_retry_count	= 7; +	target->queue_size	= SRP_DEFAULT_QUEUE_SIZE; + +	mutex_lock(&host->add_target_mutex);  	ret = srp_parse_options(buf, target);  	if (ret)  		goto err; +	target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE; +  	if (!srp_conn_unique(target->srp_host, target)) {  		shost_printk(KERN_INFO, target->scsi_host,  			     PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n", @@ -2326,9 +3001,9 @@ static ssize_t srp_create_target(struct device *dev,  		goto err;  	} -	if (!host->srp_dev->fmr_pool && !target->allow_ext_sg && -				target->cmd_sg_cnt < target->sg_tablesize) { -		pr_warn("No FMR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n"); +	if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg && +	    target->cmd_sg_cnt < target->sg_tablesize) { +		pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");  		target->sg_tablesize = target->cmd_sg_cnt;  	} @@ -2339,42 +3014,17 @@ static ssize_t srp_create_target(struct device *dev,  			     sizeof (struct srp_indirect_buf) +  			     target->cmd_sg_cnt * sizeof (struct srp_direct_buf); +	INIT_WORK(&target->tl_err_work, srp_tl_err_work);  	INIT_WORK(&target->remove_work, srp_remove_work);  	spin_lock_init(&target->lock);  	INIT_LIST_HEAD(&target->free_tx); -	INIT_LIST_HEAD(&target->free_reqs); -	for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) { -		struct srp_request *req = &target->req_ring[i]; - -		req->fmr_list = kmalloc(target->cmd_sg_cnt * sizeof (void *), -					GFP_KERNEL); -		req->map_page = kmalloc(SRP_FMR_SIZE * sizeof (void *), -					GFP_KERNEL); -		req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL); -		if (!req->fmr_list || !req->map_page || !req->indirect_desc) -			goto err_free_mem; - -		dma_addr = ib_dma_map_single(ibdev, req->indirect_desc, -					     target->indirect_size, -					     DMA_TO_DEVICE); -		if (ib_dma_mapping_error(ibdev, dma_addr)) -			goto err_free_mem; - -		req->indirect_dma_addr = dma_addr; -		req->index = i; -		list_add_tail(&req->list, &target->free_reqs); -	} - -	ib_query_gid(ibdev, host->port, 0, &target->path.sgid); +	ret = srp_alloc_req_data(target); +	if (ret) +		goto err_free_mem; -	shost_printk(KERN_DEBUG, target->scsi_host, PFX -		     "new target: id_ext %016llx ioc_guid %016llx pkey %04x " -		     "service_id %016llx dgid %pI6\n", -	       (unsigned long long) be64_to_cpu(target->id_ext), -	       (unsigned long long) be64_to_cpu(target->ioc_guid), -	       be16_to_cpu(target->path.pkey), -	       (unsigned long long) be64_to_cpu(target->service_id), -	       target->path.dgid.raw); +	ret = ib_query_gid(ibdev, host->port, 0, &target->path.sgid); +	if (ret) +		goto err_free_mem;  	ret = srp_create_target_ib(target);  	if (ret) @@ -2395,7 +3045,19 @@ static ssize_t srp_create_target(struct device *dev,  	if (ret)  		goto err_disconnect; -	return count; +	shost_printk(KERN_DEBUG, target->scsi_host, PFX +		     "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n", +		     be64_to_cpu(target->id_ext), +		     be64_to_cpu(target->ioc_guid), +		     be16_to_cpu(target->path.pkey), +		     be64_to_cpu(target->service_id), +		     target->path.sgid.raw, target->path.dgid.raw); + +	ret = count; + +out: +	mutex_unlock(&host->add_target_mutex); +	return ret;  err_disconnect:  	srp_disconnect_target(target); @@ -2411,8 +3073,7 @@ err_free_mem:  err:  	scsi_host_put(target_host); - -	return ret; +	goto out;  }  static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target); @@ -2448,6 +3109,7 @@ static struct srp_host *srp_add_port(struct srp_device *device, u8 port)  	INIT_LIST_HEAD(&host->target_list);  	spin_lock_init(&host->target_lock);  	init_completion(&host->released); +	mutex_init(&host->add_target_mutex);  	host->srp_dev = device;  	host->port = port; @@ -2479,9 +3141,9 @@ static void srp_add_one(struct ib_device *device)  {  	struct srp_device *srp_dev;  	struct ib_device_attr *dev_attr; -	struct ib_fmr_pool_param fmr_param;  	struct srp_host *host; -	int max_pages_per_fmr, fmr_page_shift, s, e, p; +	int mr_page_shift, s, e, p; +	u64 max_pages_per_mr;  	dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);  	if (!dev_attr) @@ -2496,15 +3158,39 @@ static void srp_add_one(struct ib_device *device)  	if (!srp_dev)  		goto free_attr; +	srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr && +			    device->map_phys_fmr && device->unmap_fmr); +	srp_dev->has_fr = (dev_attr->device_cap_flags & +			   IB_DEVICE_MEM_MGT_EXTENSIONS); +	if (!srp_dev->has_fmr && !srp_dev->has_fr) +		dev_warn(&device->dev, "neither FMR nor FR is supported\n"); + +	srp_dev->use_fast_reg = (srp_dev->has_fr && +				 (!srp_dev->has_fmr || prefer_fr)); +  	/*  	 * Use the smallest page size supported by the HCA, down to a  	 * minimum of 4096 bytes. We're unlikely to build large sglists  	 * out of smaller entries.  	 */ -	fmr_page_shift		= max(12, ffs(dev_attr->page_size_cap) - 1); -	srp_dev->fmr_page_size	= 1 << fmr_page_shift; -	srp_dev->fmr_page_mask	= ~((u64) srp_dev->fmr_page_size - 1); -	srp_dev->fmr_max_size	= srp_dev->fmr_page_size * SRP_FMR_SIZE; +	mr_page_shift		= max(12, ffs(dev_attr->page_size_cap) - 1); +	srp_dev->mr_page_size	= 1 << mr_page_shift; +	srp_dev->mr_page_mask	= ~((u64) srp_dev->mr_page_size - 1); +	max_pages_per_mr	= dev_attr->max_mr_size; +	do_div(max_pages_per_mr, srp_dev->mr_page_size); +	srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR, +					  max_pages_per_mr); +	if (srp_dev->use_fast_reg) { +		srp_dev->max_pages_per_mr = +			min_t(u32, srp_dev->max_pages_per_mr, +			      dev_attr->max_fast_reg_page_list_len); +	} +	srp_dev->mr_max_size	= srp_dev->mr_page_size * +				   srp_dev->max_pages_per_mr; +	pr_debug("%s: mr_page_shift = %d, dev_attr->max_mr_size = %#llx, dev_attr->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n", +		 device->name, mr_page_shift, dev_attr->max_mr_size, +		 dev_attr->max_fast_reg_page_list_len, +		 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);  	INIT_LIST_HEAD(&srp_dev->dev_list); @@ -2520,27 +3206,6 @@ static void srp_add_one(struct ib_device *device)  	if (IS_ERR(srp_dev->mr))  		goto err_pd; -	for (max_pages_per_fmr = SRP_FMR_SIZE; -			max_pages_per_fmr >= SRP_FMR_MIN_SIZE; -			max_pages_per_fmr /= 2, srp_dev->fmr_max_size /= 2) { -		memset(&fmr_param, 0, sizeof fmr_param); -		fmr_param.pool_size	    = SRP_FMR_POOL_SIZE; -		fmr_param.dirty_watermark   = SRP_FMR_DIRTY_SIZE; -		fmr_param.cache		    = 1; -		fmr_param.max_pages_per_fmr = max_pages_per_fmr; -		fmr_param.page_shift	    = fmr_page_shift; -		fmr_param.access	    = (IB_ACCESS_LOCAL_WRITE | -					       IB_ACCESS_REMOTE_WRITE | -					       IB_ACCESS_REMOTE_READ); - -		srp_dev->fmr_pool = ib_create_fmr_pool(srp_dev->pd, &fmr_param); -		if (!IS_ERR(srp_dev->fmr_pool)) -			break; -	} - -	if (IS_ERR(srp_dev->fmr_pool)) -		srp_dev->fmr_pool = NULL; -  	if (device->node_type == RDMA_NODE_IB_SWITCH) {  		s = 0;  		e = 0; @@ -2603,8 +3268,6 @@ static void srp_remove_one(struct ib_device *device)  		kfree(host);  	} -	if (srp_dev->fmr_pool) -		ib_destroy_fmr_pool(srp_dev->fmr_pool);  	ib_dereg_mr(srp_dev->mr);  	ib_dealloc_pd(srp_dev->pd); @@ -2612,7 +3275,14 @@ static void srp_remove_one(struct ib_device *device)  }  static struct srp_function_template ib_srp_transport_functions = { +	.has_rport_state	 = true, +	.reset_timer_if_blocked	 = true, +	.reconnect_delay	 = &srp_reconnect_delay, +	.fast_io_fail_tmo	 = &srp_fast_io_fail_tmo, +	.dev_loss_tmo		 = &srp_dev_loss_tmo, +	.reconnect		 = srp_rport_reconnect,  	.rport_delete		 = srp_rport_delete, +	.terminate_rport_io	 = srp_terminate_io,  };  static int __init srp_init_module(void) diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index e641088c14d..e46ecb15aa0 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -57,25 +57,19 @@ enum {  	SRP_MAX_LUN		= 512,  	SRP_DEF_SG_TABLESIZE	= 12, -	SRP_RQ_SHIFT    	= 6, -	SRP_RQ_SIZE		= 1 << SRP_RQ_SHIFT, - -	SRP_SQ_SIZE		= SRP_RQ_SIZE, +	SRP_DEFAULT_QUEUE_SIZE	= 1 << 6,  	SRP_RSP_SQ_SIZE		= 1, -	SRP_REQ_SQ_SIZE		= SRP_SQ_SIZE - SRP_RSP_SQ_SIZE,  	SRP_TSK_MGMT_SQ_SIZE	= 1, -	SRP_CMD_SQ_SIZE		= SRP_REQ_SQ_SIZE - SRP_TSK_MGMT_SQ_SIZE, +	SRP_DEFAULT_CMD_SQ_SIZE = SRP_DEFAULT_QUEUE_SIZE - SRP_RSP_SQ_SIZE - +				  SRP_TSK_MGMT_SQ_SIZE,  	SRP_TAG_NO_REQ		= ~0U,  	SRP_TAG_TSK_MGMT	= 1U << 31, -	SRP_FMR_SIZE		= 512, -	SRP_FMR_MIN_SIZE	= 128, -	SRP_FMR_POOL_SIZE	= 1024, -	SRP_FMR_DIRTY_SIZE	= SRP_FMR_POOL_SIZE / 4, +	SRP_MAX_PAGES_PER_MR	= 512, -	SRP_MAP_ALLOW_FMR	= 0, -	SRP_MAP_NO_FMR		= 1, +	LOCAL_INV_WR_ID_MASK	= 1, +	FAST_REG_WR_ID_MASK	= 2,  };  enum srp_target_state { @@ -89,15 +83,24 @@ enum srp_iu_type {  	SRP_IU_RSP,  }; +/* + * @mr_page_mask: HCA memory registration page mask. + * @mr_page_size: HCA memory registration page size. + * @mr_max_size: Maximum size in bytes of a single FMR / FR registration + *   request. + */  struct srp_device {  	struct list_head	dev_list;  	struct ib_device       *dev;  	struct ib_pd	       *pd;  	struct ib_mr	       *mr; -	struct ib_fmr_pool     *fmr_pool; -	u64			fmr_page_mask; -	int			fmr_page_size; -	int			fmr_max_size; +	u64			mr_page_mask; +	int			mr_page_size; +	int			mr_max_size; +	int			max_pages_per_mr; +	bool			has_fmr; +	bool			has_fr; +	bool			use_fast_reg;  };  struct srp_host { @@ -108,17 +111,21 @@ struct srp_host {  	spinlock_t		target_lock;  	struct completion	released;  	struct list_head	list; +	struct mutex		add_target_mutex;  };  struct srp_request {  	struct list_head	list;  	struct scsi_cmnd       *scmnd;  	struct srp_iu	       *cmd; -	struct ib_pool_fmr    **fmr_list; +	union { +		struct ib_pool_fmr **fmr_list; +		struct srp_fr_desc **fr_list; +	};  	u64		       *map_page;  	struct srp_direct_buf  *indirect_desc;  	dma_addr_t		indirect_dma_addr; -	short			nfmr; +	short			nmdesc;  	short			index;  }; @@ -133,6 +140,10 @@ struct srp_target_port {  	struct ib_cq	       *send_cq ____cacheline_aligned_in_smp;  	struct ib_cq	       *recv_cq;  	struct ib_qp	       *qp; +	union { +		struct ib_fmr_pool     *fmr_pool; +		struct srp_fr_pool     *fr_pool; +	};  	u32			lkey;  	u32			rkey;  	enum srp_target_state	state; @@ -140,7 +151,6 @@ struct srp_target_port {  	unsigned int		cmd_sg_cnt;  	unsigned int		indirect_size;  	bool			allow_ext_sg; -	bool			transport_offline;  	/* Everything above this point is used in the hot path of  	 * command processing. Try to keep them packed into cachelines. @@ -153,10 +163,14 @@ struct srp_target_port {  	u16			io_class;  	struct srp_host	       *srp_host;  	struct Scsi_Host       *scsi_host; +	struct srp_rport       *rport;  	char			target_name[32];  	unsigned int		scsi_id;  	unsigned int		sg_tablesize; +	int			queue_size; +	int			req_ring_size;  	int			comp_vector; +	int			tl_retry_count;  	struct ib_sa_path_rec	path;  	__be16			orig_dgid[8]; @@ -172,10 +186,11 @@ struct srp_target_port {  	int			zero_req_lim; -	struct srp_iu	       *tx_ring[SRP_SQ_SIZE]; -	struct srp_iu	       *rx_ring[SRP_RQ_SIZE]; -	struct srp_request	req_ring[SRP_CMD_SQ_SIZE]; +	struct srp_iu	       **tx_ring; +	struct srp_iu	       **rx_ring; +	struct srp_request	*req_ring; +	struct work_struct	tl_err_work;  	struct work_struct	remove_work;  	struct list_head	list; @@ -195,15 +210,66 @@ struct srp_iu {  	enum dma_data_direction	direction;  }; +/** + * struct srp_fr_desc - fast registration work request arguments + * @entry: Entry in srp_fr_pool.free_list. + * @mr:    Memory region. + * @frpl:  Fast registration page list. + */ +struct srp_fr_desc { +	struct list_head		entry; +	struct ib_mr			*mr; +	struct ib_fast_reg_page_list	*frpl; +}; + +/** + * struct srp_fr_pool - pool of fast registration descriptors + * + * An entry is available for allocation if and only if it occurs in @free_list. + * + * @size:      Number of descriptors in this pool. + * @max_page_list_len: Maximum fast registration work request page list length. + * @lock:      Protects free_list. + * @free_list: List of free descriptors. + * @desc:      Fast registration descriptor pool. + */ +struct srp_fr_pool { +	int			size; +	int			max_page_list_len; +	spinlock_t		lock; +	struct list_head	free_list; +	struct srp_fr_desc	desc[0]; +}; + +/** + * struct srp_map_state - per-request DMA memory mapping state + * @desc:	    Pointer to the element of the SRP buffer descriptor array + *		    that is being filled in. + * @pages:	    Array with DMA addresses of pages being considered for + *		    memory registration. + * @base_dma_addr:  DMA address of the first page that has not yet been mapped. + * @dma_len:	    Number of bytes that will be registered with the next + *		    FMR or FR memory registration call. + * @total_len:	    Total number of bytes in the sg-list being mapped. + * @npages:	    Number of page addresses in the pages[] array. + * @nmdesc:	    Number of FMR or FR memory descriptors used for mapping. + * @ndesc:	    Number of SRP buffer descriptors that have been filled in. + * @unmapped_sg:    First element of the sg-list that is mapped via FMR or FR. + * @unmapped_index: Index of the first element mapped via FMR or FR. + * @unmapped_addr:  DMA address of the first element mapped via FMR or FR. + */  struct srp_map_state { -	struct ib_pool_fmr    **next_fmr; +	union { +		struct ib_pool_fmr **next_fmr; +		struct srp_fr_desc **next_fr; +	};  	struct srp_direct_buf  *desc;  	u64		       *pages;  	dma_addr_t		base_dma_addr; -	u32			fmr_len; +	u32			dma_len;  	u32			total_len;  	unsigned int		npages; -	unsigned int		nfmr; +	unsigned int		nmdesc;  	unsigned int		ndesc;  	struct scatterlist     *unmapped_sg;  	int			unmapped_index; diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 653ac6bfc57..fe09f2788b1 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1078,6 +1078,7 @@ static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch,  static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,  				 struct srpt_send_ioctx *ioctx)  { +	struct ib_device *dev = ch->sport->sdev->device;  	struct se_cmd *cmd;  	struct scatterlist *sg, *sg_orig;  	int sg_cnt; @@ -1124,7 +1125,7 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,  	db = ioctx->rbufs;  	tsize = cmd->data_length; -	dma_len = sg_dma_len(&sg[0]); +	dma_len = ib_sg_dma_len(dev, &sg[0]);  	riu = ioctx->rdma_ius;  	/* @@ -1155,7 +1156,8 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,  					++j;  					if (j < count) {  						sg = sg_next(sg); -						dma_len = sg_dma_len(sg); +						dma_len = ib_sg_dma_len( +								dev, sg);  					}  				}  			} else { @@ -1192,8 +1194,8 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,  	tsize = cmd->data_length;  	riu = ioctx->rdma_ius;  	sg = sg_orig; -	dma_len = sg_dma_len(&sg[0]); -	dma_addr = sg_dma_address(&sg[0]); +	dma_len = ib_sg_dma_len(dev, &sg[0]); +	dma_addr = ib_sg_dma_address(dev, &sg[0]);  	/* this second loop is really mapped sg_addres to rdma_iu->ib_sge */  	for (i = 0, j = 0; @@ -1216,8 +1218,10 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,  					++j;  					if (j < count) {  						sg = sg_next(sg); -						dma_len = sg_dma_len(sg); -						dma_addr = sg_dma_address(sg); +						dma_len = ib_sg_dma_len( +								dev, sg); +						dma_addr = ib_sg_dma_address( +								dev, sg);  					}  				}  			} else { @@ -1352,11 +1356,8 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx)  		/* XXX(hch): this is a horrible layering violation.. */  		spin_lock_irqsave(&ioctx->cmd.t_state_lock, flags); -		ioctx->cmd.transport_state |= CMD_T_LUN_STOP;  		ioctx->cmd.transport_state &= ~CMD_T_ACTIVE;  		spin_unlock_irqrestore(&ioctx->cmd.t_state_lock, flags); - -		complete(&ioctx->cmd.transport_lun_stop_comp);  		break;  	case SRPT_STATE_CMD_RSP_SENT:  		/* @@ -1364,9 +1365,6 @@ static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx)  		 * not been received in time.  		 */  		srpt_unmap_sg_to_ib_sge(ioctx->ch, ioctx); -		spin_lock_irqsave(&ioctx->cmd.t_state_lock, flags); -		ioctx->cmd.transport_state |= CMD_T_LUN_STOP; -		spin_unlock_irqrestore(&ioctx->cmd.t_state_lock, flags);  		target_put_sess_cmd(ioctx->ch->sess, &ioctx->cmd);  		break;  	case SRPT_STATE_MGMT_RSP_SENT: @@ -1476,7 +1474,6 @@ static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch,  {  	struct se_cmd *cmd;  	enum srpt_command_state state; -	unsigned long flags;  	cmd = &ioctx->cmd;  	state = srpt_get_cmd_state(ioctx); @@ -1496,9 +1493,6 @@ static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch,  			       __func__, __LINE__, state);  		break;  	case SRPT_RDMA_WRITE_LAST: -		spin_lock_irqsave(&ioctx->cmd.t_state_lock, flags); -		ioctx->cmd.transport_state |= CMD_T_LUN_STOP; -		spin_unlock_irqrestore(&ioctx->cmd.t_state_lock, flags);  		break;  	default:  		printk(KERN_ERR "%s[%d]: opcode = %u\n", __func__, @@ -1588,7 +1582,7 @@ static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch,  	int resp_data_len;  	int resp_len; -	resp_data_len = (rsp_code == SRP_TSK_MGMT_SUCCESS) ? 0 : 4; +	resp_data_len = 4;  	resp_len = sizeof(*srp_rsp) + resp_data_len;  	srp_rsp = ioctx->ioctx.buf; @@ -1600,11 +1594,9 @@ static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch,  				    + atomic_xchg(&ch->req_lim_delta, 0));  	srp_rsp->tag = tag; -	if (rsp_code != SRP_TSK_MGMT_SUCCESS) { -		srp_rsp->flags |= SRP_RSP_FLAG_RSPVALID; -		srp_rsp->resp_data_len = cpu_to_be32(resp_data_len); -		srp_rsp->data[3] = rsp_code; -	} +	srp_rsp->flags |= SRP_RSP_FLAG_RSPVALID; +	srp_rsp->resp_data_len = cpu_to_be32(resp_data_len); +	srp_rsp->data[3] = rsp_code;  	return resp_len;  } @@ -2358,6 +2350,8 @@ static void srpt_release_channel_work(struct work_struct *w)  	transport_deregister_session(se_sess);  	ch->sess = NULL; +	ib_destroy_cm_id(ch->cm_id); +  	srpt_destroy_ch_ib(ch);  	srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring, @@ -2368,8 +2362,6 @@ static void srpt_release_channel_work(struct work_struct *w)  	list_del(&ch->list);  	spin_unlock_irq(&sdev->spinlock); -	ib_destroy_cm_id(ch->cm_id); -  	if (ch->release_done)  		complete(ch->release_done); @@ -2592,7 +2584,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,  		goto destroy_ib;  	} -	ch->sess = transport_init_session(); +	ch->sess = transport_init_session(TARGET_PROT_NORMAL);  	if (IS_ERR(ch->sess)) {  		rej->reason = __constant_cpu_to_be32(  				SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); @@ -3093,6 +3085,14 @@ static void srpt_queue_tm_rsp(struct se_cmd *cmd)  	srpt_queue_response(cmd);  } +static void srpt_aborted_task(struct se_cmd *cmd) +{ +	struct srpt_send_ioctx *ioctx = container_of(cmd, +				struct srpt_send_ioctx, cmd); + +	srpt_unmap_sg_to_ib_sge(ioctx->ch, ioctx); +} +  static int srpt_queue_status(struct se_cmd *cmd)  {  	struct srpt_send_ioctx *ioctx; @@ -3678,9 +3678,9 @@ static ssize_t srpt_tpg_attrib_store_srp_max_rdma_size(  	unsigned long val;  	int ret; -	ret = strict_strtoul(page, 0, &val); +	ret = kstrtoul(page, 0, &val);  	if (ret < 0) { -		pr_err("strict_strtoul() failed with ret: %d\n", ret); +		pr_err("kstrtoul() failed with ret: %d\n", ret);  		return -EINVAL;  	}  	if (val > MAX_SRPT_RDMA_SIZE) { @@ -3718,9 +3718,9 @@ static ssize_t srpt_tpg_attrib_store_srp_max_rsp_size(  	unsigned long val;  	int ret; -	ret = strict_strtoul(page, 0, &val); +	ret = kstrtoul(page, 0, &val);  	if (ret < 0) { -		pr_err("strict_strtoul() failed with ret: %d\n", ret); +		pr_err("kstrtoul() failed with ret: %d\n", ret);  		return -EINVAL;  	}  	if (val > MAX_SRPT_RSP_SIZE) { @@ -3758,9 +3758,9 @@ static ssize_t srpt_tpg_attrib_store_srp_sq_size(  	unsigned long val;  	int ret; -	ret = strict_strtoul(page, 0, &val); +	ret = kstrtoul(page, 0, &val);  	if (ret < 0) { -		pr_err("strict_strtoul() failed with ret: %d\n", ret); +		pr_err("kstrtoul() failed with ret: %d\n", ret);  		return -EINVAL;  	}  	if (val > MAX_SRPT_SRQ_SIZE) { @@ -3805,7 +3805,7 @@ static ssize_t srpt_tpg_store_enable(  	unsigned long tmp;          int ret; -	ret = strict_strtoul(page, 0, &tmp); +	ret = kstrtoul(page, 0, &tmp);  	if (ret < 0) {  		printk(KERN_ERR "Unable to extract srpt_tpg_store_enable\n");  		return -EINVAL; @@ -3940,6 +3940,7 @@ static struct target_core_fabric_ops srpt_template = {  	.queue_data_in			= srpt_queue_data_in,  	.queue_status			= srpt_queue_status,  	.queue_tm_rsp			= srpt_queue_tm_rsp, +	.aborted_task			= srpt_aborted_task,  	/*  	 * Setup function pointers for generic logic in  	 * target_core_fabric_configfs.c  | 
