diff options
Diffstat (limited to 'drivers/infiniband/ulp/iser')
| -rw-r--r-- | drivers/infiniband/ulp/iser/iscsi_iser.c | 188 | ||||
| -rw-r--r-- | drivers/infiniband/ulp/iser/iscsi_iser.h | 93 | ||||
| -rw-r--r-- | drivers/infiniband/ulp/iser/iser_initiator.c | 179 | ||||
| -rw-r--r-- | drivers/infiniband/ulp/iser/iser_memory.c | 470 | ||||
| -rw-r--r-- | drivers/infiniband/ulp/iser/iser_verbs.c | 418 | 
5 files changed, 925 insertions, 423 deletions
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index dd03cfe596d..eb7973957a6 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -5,7 +5,7 @@   * Copyright (C) 2004 Alex Aizman   * Copyright (C) 2005 Mike Christie   * Copyright (c) 2005, 2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2013 Mellanox Technologies. All rights reserved. + * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.   * maintained by openib-general@openib.org   *   * This software is available to you under a choice of one of two @@ -82,6 +82,8 @@ static unsigned int iscsi_max_lun = 512;  module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO);  int iser_debug_level = 0; +bool iser_pi_enable = false; +int iser_pi_guard = 0;  MODULE_DESCRIPTION("iSER (iSCSI Extensions for RDMA) Datamover");  MODULE_LICENSE("Dual BSD/GPL"); @@ -91,6 +93,13 @@ MODULE_VERSION(DRV_VER);  module_param_named(debug_level, iser_debug_level, int, 0644);  MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 (default:disabled)"); +module_param_named(pi_enable, iser_pi_enable, bool, 0644); +MODULE_PARM_DESC(pi_enable, "Enable T10-PI offload support (default:disabled)"); + +module_param_named(pi_guard, iser_pi_guard, int, 0644); +MODULE_PARM_DESC(pi_guard, "T10-PI guard_type, 0:CRC|1:IP_CSUM (default:CRC)"); + +static struct workqueue_struct *release_wq;  struct iser_global ig;  void @@ -138,8 +147,8 @@ static int iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode)  int iser_initialize_task_headers(struct iscsi_task *task,  						struct iser_tx_desc *tx_desc)  { -	struct iscsi_iser_conn *iser_conn = task->conn->dd_data; -	struct iser_device     *device    = iser_conn->ib_conn->device; +	struct iser_conn       *ib_conn   = task->conn->dd_data; +	struct iser_device     *device    = ib_conn->device;  	struct iscsi_iser_task *iser_task = task->dd_data;  	u64 dma_addr; @@ -153,7 +162,7 @@ int iser_initialize_task_headers(struct iscsi_task *task,  	tx_desc->tx_sg[0].length = ISER_HEADERS_LEN;  	tx_desc->tx_sg[0].lkey   = device->mr->lkey; -	iser_task->iser_conn		= iser_conn; +	iser_task->ib_conn = ib_conn;  	return 0;  }  /** @@ -176,6 +185,8 @@ iscsi_iser_task_init(struct iscsi_task *task)  	iser_task->command_sent = 0;  	iser_task_rdma_init(iser_task); +	iser_task->sc = task->sc; +  	return 0;  } @@ -278,10 +289,9 @@ iscsi_iser_task_xmit(struct iscsi_task *task)  static void iscsi_iser_cleanup_task(struct iscsi_task *task)  {  	struct iscsi_iser_task *iser_task = task->dd_data; -	struct iser_tx_desc	*tx_desc = &iser_task->desc; - -	struct iscsi_iser_conn *iser_conn = task->conn->dd_data; -	struct iser_device     *device    = iser_conn->ib_conn->device; +	struct iser_tx_desc    *tx_desc   = &iser_task->desc; +	struct iser_conn       *ib_conn	  = task->conn->dd_data; +	struct iser_device     *device	  = ib_conn->device;  	ib_dma_unmap_single(device->ib_device,  		tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); @@ -296,14 +306,25 @@ static void iscsi_iser_cleanup_task(struct iscsi_task *task)  	}  } +static u8 iscsi_iser_check_protection(struct iscsi_task *task, sector_t *sector) +{ +	struct iscsi_iser_task *iser_task = task->dd_data; + +	if (iser_task->dir[ISER_DIR_IN]) +		return iser_check_task_pi_status(iser_task, ISER_DIR_IN, +						 sector); +	else +		return iser_check_task_pi_status(iser_task, ISER_DIR_OUT, +						 sector); +} +  static struct iscsi_cls_conn *  iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx)  {  	struct iscsi_conn *conn;  	struct iscsi_cls_conn *cls_conn; -	struct iscsi_iser_conn *iser_conn; -	cls_conn = iscsi_conn_setup(cls_session, sizeof(*iser_conn), conn_idx); +	cls_conn = iscsi_conn_setup(cls_session, 0, conn_idx);  	if (!cls_conn)  		return NULL;  	conn = cls_conn->dd_data; @@ -314,39 +335,15 @@ iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx)  	 */  	conn->max_recv_dlength = ISER_RECV_DATA_SEG_LEN; -	iser_conn = conn->dd_data; -	conn->dd_data = iser_conn; -	iser_conn->iscsi_conn = conn; -  	return cls_conn;  } -static void -iscsi_iser_conn_destroy(struct iscsi_cls_conn *cls_conn) -{ -	struct iscsi_conn *conn = cls_conn->dd_data; -	struct iscsi_iser_conn *iser_conn = conn->dd_data; -	struct iser_conn *ib_conn = iser_conn->ib_conn; - -	iscsi_conn_teardown(cls_conn); -	/* -	 * Userspace will normally call the stop callback and -	 * already have freed the ib_conn, but if it goofed up then -	 * we free it here. -	 */ -	if (ib_conn) { -		ib_conn->iser_conn = NULL; -		iser_conn_put(ib_conn, 1); /* deref iscsi/ib conn unbinding */ -	} -} -  static int  iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,  		     struct iscsi_cls_conn *cls_conn, uint64_t transport_eph,  		     int is_leading)  {  	struct iscsi_conn *conn = cls_conn->dd_data; -	struct iscsi_iser_conn *iser_conn;  	struct iscsi_session *session;  	struct iser_conn *ib_conn;  	struct iscsi_endpoint *ep; @@ -373,35 +370,44 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,  	/* binds the iSER connection retrieved from the previously  	 * connected ep_handle to the iSCSI layer connection. exchanges  	 * connection pointers */ -	iser_info("binding iscsi/iser conn %p %p to ib_conn %p\n", -		  conn, conn->dd_data, ib_conn); -	iser_conn = conn->dd_data; -	ib_conn->iser_conn = iser_conn; -	iser_conn->ib_conn  = ib_conn; -	iser_conn_get(ib_conn); /* ref iscsi/ib conn binding */ +	iser_info("binding iscsi conn %p to ib_conn %p\n", conn, ib_conn); + +	conn->dd_data = ib_conn; +	ib_conn->iscsi_conn = conn; +  	return 0;  } +static int +iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn) +{ +	struct iscsi_conn *iscsi_conn; +	struct iser_conn *ib_conn; + +	iscsi_conn = cls_conn->dd_data; +	ib_conn = iscsi_conn->dd_data; +	reinit_completion(&ib_conn->stop_completion); + +	return iscsi_conn_start(cls_conn); +} +  static void  iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)  {  	struct iscsi_conn *conn = cls_conn->dd_data; -	struct iscsi_iser_conn *iser_conn = conn->dd_data; -	struct iser_conn *ib_conn = iser_conn->ib_conn; +	struct iser_conn *ib_conn = conn->dd_data; + +	iser_dbg("stopping iscsi_conn: %p, ib_conn: %p\n", conn, ib_conn); +	iscsi_conn_stop(cls_conn, flag);  	/*  	 * Userspace may have goofed up and not bound the connection or  	 * might have only partially setup the connection.  	 */  	if (ib_conn) { -		iscsi_conn_stop(cls_conn, flag); -		/* -		 * There is no unbind event so the stop callback -		 * must release the ref from the bind. -		 */ -		iser_conn_put(ib_conn, 1); /* deref iscsi/ib conn unbinding */ +		conn->dd_data = NULL; +		complete(&ib_conn->stop_completion);  	} -	iser_conn->ib_conn = NULL;  }  static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session) @@ -413,6 +419,17 @@ static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session)  	iscsi_host_free(shost);  } +static inline unsigned int +iser_dif_prot_caps(int prot_caps) +{ +	return ((prot_caps & IB_PROT_T10DIF_TYPE_1) ? SHOST_DIF_TYPE1_PROTECTION | +						      SHOST_DIX_TYPE1_PROTECTION : 0) | +	       ((prot_caps & IB_PROT_T10DIF_TYPE_2) ? SHOST_DIF_TYPE2_PROTECTION | +						      SHOST_DIX_TYPE2_PROTECTION : 0) | +	       ((prot_caps & IB_PROT_T10DIF_TYPE_3) ? SHOST_DIF_TYPE3_PROTECTION | +						      SHOST_DIX_TYPE3_PROTECTION : 0); +} +  static struct iscsi_cls_session *  iscsi_iser_session_create(struct iscsi_endpoint *ep,  			  uint16_t cmds_max, uint16_t qdepth, @@ -437,8 +454,18 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,  	 * older userspace tools (before 2.0-870) did not pass us  	 * the leading conn's ep so this will be NULL;  	 */ -	if (ep) +	if (ep) {  		ib_conn = ep->dd_data; +		if (ib_conn->pi_support) { +			u32 sig_caps = ib_conn->device->dev_attr.sig_prot_cap; + +			scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps)); +			if (iser_pi_guard) +				scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP); +			else +				scsi_host_set_guard(shost, SHOST_DIX_GUARD_CRC); +		} +	}  	if (iscsi_host_add(shost,  			   ep ? ib_conn->device->ib_device->dma_device : NULL)) @@ -481,28 +508,28 @@ iscsi_iser_set_param(struct iscsi_cls_conn *cls_conn,  	case ISCSI_PARAM_HDRDGST_EN:  		sscanf(buf, "%d", &value);  		if (value) { -			iser_err("DataDigest wasn't negotiated to None"); +			iser_err("DataDigest wasn't negotiated to None\n");  			return -EPROTO;  		}  		break;  	case ISCSI_PARAM_DATADGST_EN:  		sscanf(buf, "%d", &value);  		if (value) { -			iser_err("DataDigest wasn't negotiated to None"); +			iser_err("DataDigest wasn't negotiated to None\n");  			return -EPROTO;  		}  		break;  	case ISCSI_PARAM_IFMARKER_EN:  		sscanf(buf, "%d", &value);  		if (value) { -			iser_err("IFMarker wasn't negotiated to No"); +			iser_err("IFMarker wasn't negotiated to No\n");  			return -EPROTO;  		}  		break;  	case ISCSI_PARAM_OFMARKER_EN:  		sscanf(buf, "%d", &value);  		if (value) { -			iser_err("OFMarker wasn't negotiated to No"); +			iser_err("OFMarker wasn't negotiated to No\n");  			return -EPROTO;  		}  		break; @@ -618,19 +645,20 @@ iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep)  	struct iser_conn *ib_conn;  	ib_conn = ep->dd_data; -	if (ib_conn->iser_conn) -		/* -		 * Must suspend xmit path if the ep is bound to the -		 * iscsi_conn, so we know we are not accessing the ib_conn -		 * when we free it. -		 * -		 * This may not be bound if the ep poll failed. -		 */ -		iscsi_suspend_tx(ib_conn->iser_conn->iscsi_conn); - - -	iser_info("ib conn %p state %d\n", ib_conn, ib_conn->state); +	iser_info("ep %p ib conn %p state %d\n", ep, ib_conn, ib_conn->state);  	iser_conn_terminate(ib_conn); + +	/* +	 * if iser_conn and iscsi_conn are bound, we must wait iscsi_conn_stop +	 * call and ISER_CONN_DOWN state before freeing the iser resources. +	 * otherwise we are safe to free resources immediately. +	 */ +	if (ib_conn->iscsi_conn) { +		INIT_WORK(&ib_conn->release_work, iser_release_work); +		queue_work(release_wq, &ib_conn->release_work); +	} else { +		iser_conn_release(ib_conn); +	}  }  static umode_t iser_attr_is_visible(int param_type, int param) @@ -714,13 +742,13 @@ static struct iscsi_transport iscsi_iser_transport = {  	/* connection management */  	.create_conn            = iscsi_iser_conn_create,  	.bind_conn              = iscsi_iser_conn_bind, -	.destroy_conn           = iscsi_iser_conn_destroy, +	.destroy_conn           = iscsi_conn_teardown,  	.attr_is_visible	= iser_attr_is_visible,  	.set_param              = iscsi_iser_set_param,  	.get_conn_param		= iscsi_conn_get_param,  	.get_ep_param		= iscsi_iser_get_ep_param,  	.get_session_param	= iscsi_session_get_param, -	.start_conn             = iscsi_conn_start, +	.start_conn             = iscsi_iser_conn_start,  	.stop_conn              = iscsi_iser_conn_stop,  	/* iscsi host params */  	.get_host_param		= iscsi_host_get_param, @@ -732,6 +760,7 @@ static struct iscsi_transport iscsi_iser_transport = {  	.xmit_task		= iscsi_iser_task_xmit,  	.cleanup_task		= iscsi_iser_cleanup_task,  	.alloc_pdu		= iscsi_iser_pdu_alloc, +	.check_protection	= iscsi_iser_check_protection,  	/* recovery */  	.session_recovery_timedout = iscsi_session_recovery_timedout, @@ -766,6 +795,12 @@ static int __init iser_init(void)  	mutex_init(&ig.connlist_mutex);  	INIT_LIST_HEAD(&ig.connlist); +	release_wq = alloc_workqueue("release workqueue", 0, 0); +	if (!release_wq) { +		iser_err("failed to allocate release workqueue\n"); +		return -ENOMEM; +	} +  	iscsi_iser_scsi_transport = iscsi_register_transport(  							&iscsi_iser_transport);  	if (!iscsi_iser_scsi_transport) { @@ -784,7 +819,24 @@ register_transport_failure:  static void __exit iser_exit(void)  { +	struct iser_conn *ib_conn, *n; +	int connlist_empty; +  	iser_dbg("Removing iSER datamover...\n"); +	destroy_workqueue(release_wq); + +	mutex_lock(&ig.connlist_mutex); +	connlist_empty = list_empty(&ig.connlist); +	mutex_unlock(&ig.connlist_mutex); + +	if (!connlist_empty) { +		iser_err("Error cleanup stage completed but we still have iser " +			 "connections, destroying them anyway.\n"); +		list_for_each_entry_safe(ib_conn, n, &ig.connlist, conn_list) { +			iser_conn_release(ib_conn); +		} +	} +  	iscsi_unregister_transport(&iscsi_iser_transport);  	kmem_cache_destroy(ig.desc_cache);  } diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 67914027c61..97cd385bf7f 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -8,7 +8,7 @@   *   * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.   * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved. - * Copyright (c) 2013 Mellanox Technologies. All rights reserved. + * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.   *   * This software is available to you under a choice of one of two   * licenses.  You may choose to be licensed under the terms of the GNU @@ -46,6 +46,8 @@  #include <linux/printk.h>  #include <scsi/libiscsi.h>  #include <scsi/scsi_transport_iscsi.h> +#include <scsi/scsi_cmnd.h> +#include <scsi/scsi_device.h>  #include <linux/interrupt.h>  #include <linux/wait.h> @@ -67,7 +69,7 @@  #define DRV_NAME	"iser"  #define PFX		DRV_NAME ": " -#define DRV_VER		"1.1" +#define DRV_VER		"1.4"  #define iser_dbg(fmt, arg...)				\  	do {						\ @@ -134,10 +136,21 @@  					ISER_MAX_TX_MISC_PDUS        + \  					ISER_MAX_RX_MISC_PDUS) +/* Max registration work requests per command */ +#define ISER_MAX_REG_WR_PER_CMD		5 + +/* For Signature we don't support DATAOUTs so no need to make room for them */ +#define ISER_QP_SIG_MAX_REQ_DTOS	(ISER_DEF_XMIT_CMDS_MAX	*       \ +					(1 + ISER_MAX_REG_WR_PER_CMD) + \ +					ISER_MAX_TX_MISC_PDUS         + \ +					ISER_MAX_RX_MISC_PDUS) +  #define ISER_VER			0x10  #define ISER_WSV			0x08  #define ISER_RSV			0x04 +#define ISER_FASTREG_LI_WRID		0xffffffffffffffffULL +  struct iser_hdr {  	u8      flags;  	u8      rsvd[3]; @@ -201,7 +214,6 @@ struct iser_data_buf {  /* fwd declarations */  struct iser_device;  struct iser_cq_desc; -struct iscsi_iser_conn;  struct iscsi_iser_task;  struct iscsi_endpoint; @@ -258,6 +270,7 @@ struct iscsi_iser_task;  struct iser_device {  	struct ib_device             *ib_device;  	struct ib_pd	             *pd; +	struct ib_device_attr	     dev_attr;  	struct ib_cq	             *rx_cq[ISER_MAX_CQ];  	struct ib_cq	             *tx_cq[ISER_MAX_CQ];  	struct ib_mr	             *mr; @@ -277,17 +290,35 @@ struct iser_device {  							    enum iser_data_dir cmd_dir);  }; +#define ISER_CHECK_GUARD	0xc0 +#define ISER_CHECK_REFTAG	0x0f +#define ISER_CHECK_APPTAG	0x30 + +enum iser_reg_indicator { +	ISER_DATA_KEY_VALID	= 1 << 0, +	ISER_PROT_KEY_VALID	= 1 << 1, +	ISER_SIG_KEY_VALID	= 1 << 2, +	ISER_FASTREG_PROTECTED	= 1 << 3, +}; + +struct iser_pi_context { +	struct ib_mr                   *prot_mr; +	struct ib_fast_reg_page_list   *prot_frpl; +	struct ib_mr                   *sig_mr; +}; +  struct fast_reg_descriptor {  	struct list_head		  list;  	/* For fast registration - FRWR */  	struct ib_mr			 *data_mr;  	struct ib_fast_reg_page_list     *data_frpl; -	/* Valid for fast registration flag */ -	bool				  valid; +	struct iser_pi_context		 *pi_ctx; +	/* registration indicators container */ +	u8				  reg_indicators;  };  struct iser_conn { -	struct iscsi_iser_conn       *iser_conn; /* iser conn for upcalls  */ +	struct iscsi_conn	     *iscsi_conn;  	struct iscsi_endpoint	     *ep;  	enum iser_ib_conn_state	     state;	    /* rdma connection state   */  	atomic_t		     refcount; @@ -302,6 +333,8 @@ struct iser_conn {  	int                          post_recv_buf_count; /* posted rx count  */  	atomic_t                     post_send_buf_count; /* posted tx count   */  	char 			     name[ISER_OBJECT_NAME_SIZE]; +	struct work_struct	     release_work; +	struct completion	     stop_completion;  	struct list_head	     conn_list;       /* entry in ig conn list */  	char  			     *login_buf; @@ -310,6 +343,9 @@ struct iser_conn {  	unsigned int 		     rx_desc_head;  	struct iser_rx_desc	     *rx_descs;  	struct ib_recv_wr	     rx_wr[ISER_MIN_POSTED_RX]; +	bool			     pi_support; + +	/* Connection memory registration pool */  	union {  		struct {  			struct ib_fmr_pool      *pool;	   /* pool of IB FMRs         */ @@ -319,24 +355,22 @@ struct iser_conn {  		struct {  			struct list_head	pool;  			int			pool_size; -		} frwr; -	} fastreg; -}; - -struct iscsi_iser_conn { -	struct iscsi_conn            *iscsi_conn;/* ptr to iscsi conn */ -	struct iser_conn             *ib_conn;   /* iSER IB conn      */ +		} fastreg; +	};  };  struct iscsi_iser_task {  	struct iser_tx_desc          desc; -	struct iscsi_iser_conn	     *iser_conn; +	struct iser_conn	     *ib_conn;  	enum iser_task_status 	     status; +	struct scsi_cmnd	     *sc;  	int                          command_sent;  /* set if command  sent  */  	int                          dir[ISER_DIRS_NUM];      /* set if dir use*/  	struct iser_regd_buf         rdma_regd[ISER_DIRS_NUM];/* regd rdma buf */  	struct iser_data_buf         data[ISER_DIRS_NUM];     /* orig. data des*/  	struct iser_data_buf         data_copy[ISER_DIRS_NUM];/* contig. copy  */ +	struct iser_data_buf         prot[ISER_DIRS_NUM];     /* prot desc     */ +	struct iser_data_buf         prot_copy[ISER_DIRS_NUM];/* prot copy     */  };  struct iser_page_vec { @@ -362,6 +396,8 @@ struct iser_global {  extern struct iser_global ig;  extern int iser_debug_level; +extern bool iser_pi_enable; +extern int iser_pi_guard;  /* allocate connection resources needed for rdma functionality */  int iser_conn_set_full_featured_mode(struct iscsi_conn *conn); @@ -383,12 +419,12 @@ void iscsi_iser_recv(struct iscsi_conn *conn,  void iser_conn_init(struct iser_conn *ib_conn); -void iser_conn_get(struct iser_conn *ib_conn); - -int iser_conn_put(struct iser_conn *ib_conn, int destroy_cma_id_allowed); +void iser_conn_release(struct iser_conn *ib_conn);  void iser_conn_terminate(struct iser_conn *ib_conn); +void iser_release_work(struct work_struct *work); +  void iser_rcv_completion(struct iser_rx_desc *desc,  			 unsigned long    dto_xfer_len,  			struct iser_conn *ib_conn); @@ -401,13 +437,15 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *task);  void iser_free_rx_descriptors(struct iser_conn *ib_conn); -void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *task, -				     enum iser_data_dir         cmd_dir); +void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, +				     struct iser_data_buf *mem, +				     struct iser_data_buf *mem_copy, +				     enum iser_data_dir cmd_dir);  int  iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task,  			   enum iser_data_dir cmd_dir); -int  iser_reg_rdma_mem_frwr(struct iscsi_iser_task *task, -			    enum iser_data_dir cmd_dir); +int  iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *task, +			       enum iser_data_dir cmd_dir);  int  iser_connect(struct iser_conn   *ib_conn,  		  struct sockaddr_in *src_addr, @@ -420,8 +458,8 @@ int  iser_reg_page_vec(struct iser_conn     *ib_conn,  void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,  			enum iser_data_dir cmd_dir); -void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task, -			 enum iser_data_dir cmd_dir); +void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, +			    enum iser_data_dir cmd_dir);  int  iser_post_recvl(struct iser_conn *ib_conn);  int  iser_post_recvm(struct iser_conn *ib_conn, int count); @@ -432,12 +470,15 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,  			    enum   iser_data_dir       iser_dir,  			    enum   dma_data_direction  dma_dir); -void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task); +void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task, +			      struct iser_data_buf *data);  int  iser_initialize_task_headers(struct iscsi_task *task,  			struct iser_tx_desc *tx_desc);  int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session);  int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max);  void iser_free_fmr_pool(struct iser_conn *ib_conn); -int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max); -void iser_free_frwr_pool(struct iser_conn *ib_conn); +int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max); +void iser_free_fastreg_pool(struct iser_conn *ib_conn); +u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, +			     enum iser_data_dir cmd_dir, sector_t *sector);  #endif diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 538822684d5..8d44a406063 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -1,6 +1,6 @@  /*   * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2013 Mellanox Technologies. All rights reserved. + * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.   *   * This software is available to you under a choice of one of two   * licenses.  You may choose to be licensed under the terms of the GNU @@ -41,15 +41,15 @@  #include "iscsi_iser.h"  /* Register user buffer memory and initialize passive rdma - *  dto descriptor. Total data size is stored in - *  iser_task->data[ISER_DIR_IN].data_len + *  dto descriptor. Data size is stored in + *  task->data[ISER_DIR_IN].data_len, Protection size + *  os stored in task->prot[ISER_DIR_IN].data_len   */ -static int iser_prepare_read_cmd(struct iscsi_task *task, -				 unsigned int edtl) +static int iser_prepare_read_cmd(struct iscsi_task *task)  {  	struct iscsi_iser_task *iser_task = task->dd_data; -	struct iser_device  *device = iser_task->iser_conn->ib_conn->device; +	struct iser_device  *device = iser_task->ib_conn->device;  	struct iser_regd_buf *regd_buf;  	int err;  	struct iser_hdr *hdr = &iser_task->desc.iser_header; @@ -62,12 +62,15 @@ static int iser_prepare_read_cmd(struct iscsi_task *task,  	if (err)  		return err; -	if (edtl > iser_task->data[ISER_DIR_IN].data_len) { -		iser_err("Total data length: %ld, less than EDTL: " -			 "%d, in READ cmd BHS itt: %d, conn: 0x%p\n", -			 iser_task->data[ISER_DIR_IN].data_len, edtl, -			 task->itt, iser_task->iser_conn); -		return -EINVAL; +	if (scsi_prot_sg_count(iser_task->sc)) { +		struct iser_data_buf *pbuf_in = &iser_task->prot[ISER_DIR_IN]; + +		err = iser_dma_map_task_data(iser_task, +					     pbuf_in, +					     ISER_DIR_IN, +					     DMA_FROM_DEVICE); +		if (err) +			return err;  	}  	err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_IN); @@ -89,8 +92,9 @@ static int iser_prepare_read_cmd(struct iscsi_task *task,  }  /* Register user buffer memory and initialize passive rdma - *  dto descriptor. Total data size is stored in - *  task->data[ISER_DIR_OUT].data_len + *  dto descriptor. Data size is stored in + *  task->data[ISER_DIR_OUT].data_len, Protection size + *  is stored at task->prot[ISER_DIR_OUT].data_len   */  static int  iser_prepare_write_cmd(struct iscsi_task *task, @@ -99,7 +103,7 @@ iser_prepare_write_cmd(struct iscsi_task *task,  		       unsigned int edtl)  {  	struct iscsi_iser_task *iser_task = task->dd_data; -	struct iser_device  *device = iser_task->iser_conn->ib_conn->device; +	struct iser_device  *device = iser_task->ib_conn->device;  	struct iser_regd_buf *regd_buf;  	int err;  	struct iser_hdr *hdr = &iser_task->desc.iser_header; @@ -113,12 +117,15 @@ iser_prepare_write_cmd(struct iscsi_task *task,  	if (err)  		return err; -	if (edtl > iser_task->data[ISER_DIR_OUT].data_len) { -		iser_err("Total data length: %ld, less than EDTL: %d, " -			 "in WRITE cmd BHS itt: %d, conn: 0x%p\n", -			 iser_task->data[ISER_DIR_OUT].data_len, -			 edtl, task->itt, task->conn); -		return -EINVAL; +	if (scsi_prot_sg_count(iser_task->sc)) { +		struct iser_data_buf *pbuf_out = &iser_task->prot[ISER_DIR_OUT]; + +		err = iser_dma_map_task_data(iser_task, +					     pbuf_out, +					     ISER_DIR_OUT, +					     DMA_TO_DEVICE); +		if (err) +			return err;  	}  	err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_OUT); @@ -327,7 +334,7 @@ free_login_buf:  static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req)  { -	struct iscsi_iser_conn *iser_conn = conn->dd_data; +	struct iser_conn *ib_conn = conn->dd_data;  	struct iscsi_session *session = conn->session;  	iser_dbg("req op %x flags %x\n", req->opcode, req->flags); @@ -340,19 +347,18 @@ static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req)  	 * response) and no posted send buffers left - they must have been  	 * consumed during previous login phases.  	 */ -	WARN_ON(iser_conn->ib_conn->post_recv_buf_count != 1); -	WARN_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0); +	WARN_ON(ib_conn->post_recv_buf_count != 1); +	WARN_ON(atomic_read(&ib_conn->post_send_buf_count) != 0);  	if (session->discovery_sess) {  		iser_info("Discovery session, re-using login RX buffer\n");  		return 0;  	} else  		iser_info("Normal session, posting batch of RX %d buffers\n", -			  iser_conn->ib_conn->min_posted_rx); +			  ib_conn->min_posted_rx);  	/* Initial post receive buffers */ -	if (iser_post_recvm(iser_conn->ib_conn, -			    iser_conn->ib_conn->min_posted_rx)) +	if (iser_post_recvm(ib_conn, ib_conn->min_posted_rx))  		return -ENOMEM;  	return 0; @@ -364,11 +370,11 @@ static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req)  int iser_send_command(struct iscsi_conn *conn,  		      struct iscsi_task *task)  { -	struct iscsi_iser_conn *iser_conn = conn->dd_data; +	struct iser_conn *ib_conn = conn->dd_data;  	struct iscsi_iser_task *iser_task = task->dd_data;  	unsigned long edtl;  	int err; -	struct iser_data_buf *data_buf; +	struct iser_data_buf *data_buf, *prot_buf;  	struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr;  	struct scsi_cmnd *sc  =  task->sc;  	struct iser_tx_desc *tx_desc = &iser_task->desc; @@ -377,22 +383,31 @@ int iser_send_command(struct iscsi_conn *conn,  	/* build the tx desc regd header and add it to the tx desc dto */  	tx_desc->type = ISCSI_TX_SCSI_COMMAND; -	iser_create_send_desc(iser_conn->ib_conn, tx_desc); +	iser_create_send_desc(ib_conn, tx_desc); -	if (hdr->flags & ISCSI_FLAG_CMD_READ) +	if (hdr->flags & ISCSI_FLAG_CMD_READ) {  		data_buf = &iser_task->data[ISER_DIR_IN]; -	else +		prot_buf = &iser_task->prot[ISER_DIR_IN]; +	} else {  		data_buf = &iser_task->data[ISER_DIR_OUT]; +		prot_buf = &iser_task->prot[ISER_DIR_OUT]; +	}  	if (scsi_sg_count(sc)) { /* using a scatter list */  		data_buf->buf  = scsi_sglist(sc);  		data_buf->size = scsi_sg_count(sc);  	} -  	data_buf->data_len = scsi_bufflen(sc); +	if (scsi_prot_sg_count(sc)) { +		prot_buf->buf  = scsi_prot_sglist(sc); +		prot_buf->size = scsi_prot_sg_count(sc); +		prot_buf->data_len = data_buf->data_len >> +				     ilog2(sc->device->sector_size) * 8; +	} +  	if (hdr->flags & ISCSI_FLAG_CMD_READ) { -		err = iser_prepare_read_cmd(task, edtl); +		err = iser_prepare_read_cmd(task);  		if (err)  			goto send_command_error;  	} @@ -408,7 +423,7 @@ int iser_send_command(struct iscsi_conn *conn,  	iser_task->status = ISER_TASK_STATUS_STARTED; -	err = iser_post_send(iser_conn->ib_conn, tx_desc); +	err = iser_post_send(ib_conn, tx_desc);  	if (!err)  		return 0; @@ -424,7 +439,7 @@ int iser_send_data_out(struct iscsi_conn *conn,  		       struct iscsi_task *task,  		       struct iscsi_data *hdr)  { -	struct iscsi_iser_conn *iser_conn = conn->dd_data; +	struct iser_conn *ib_conn = conn->dd_data;  	struct iscsi_iser_task *iser_task = task->dd_data;  	struct iser_tx_desc *tx_desc = NULL;  	struct iser_regd_buf *regd_buf; @@ -473,7 +488,7 @@ int iser_send_data_out(struct iscsi_conn *conn,  		 itt, buf_offset, data_seg_len); -	err = iser_post_send(iser_conn->ib_conn, tx_desc); +	err = iser_post_send(ib_conn, tx_desc);  	if (!err)  		return 0; @@ -486,19 +501,18 @@ send_data_out_error:  int iser_send_control(struct iscsi_conn *conn,  		      struct iscsi_task *task)  { -	struct iscsi_iser_conn *iser_conn = conn->dd_data; +	struct iser_conn *ib_conn = conn->dd_data;  	struct iscsi_iser_task *iser_task = task->dd_data;  	struct iser_tx_desc *mdesc = &iser_task->desc;  	unsigned long data_seg_len;  	int err = 0;  	struct iser_device *device; -	struct iser_conn *ib_conn = iser_conn->ib_conn;  	/* build the tx desc regd header and add it to the tx desc dto */  	mdesc->type = ISCSI_TX_CONTROL; -	iser_create_send_desc(iser_conn->ib_conn, mdesc); +	iser_create_send_desc(ib_conn, mdesc); -	device = iser_conn->ib_conn->device; +	device = ib_conn->device;  	data_seg_len = ntoh24(task->hdr->dlength); @@ -513,14 +527,13 @@ int iser_send_control(struct iscsi_conn *conn,  			ib_conn->login_req_dma, task->data_count,  			DMA_TO_DEVICE); -		memcpy(iser_conn->ib_conn->login_req_buf, task->data, -							task->data_count); +		memcpy(ib_conn->login_req_buf, task->data, task->data_count);  		ib_dma_sync_single_for_device(device->ib_device,  			ib_conn->login_req_dma, task->data_count,  			DMA_TO_DEVICE); -		tx_dsg->addr    = iser_conn->ib_conn->login_req_dma; +		tx_dsg->addr    = ib_conn->login_req_dma;  		tx_dsg->length  = task->data_count;  		tx_dsg->lkey    = device->mr->lkey;  		mdesc->num_sge = 2; @@ -529,7 +542,7 @@ int iser_send_control(struct iscsi_conn *conn,  	if (task == conn->login_task) {  		iser_dbg("op %x dsl %lx, posting login rx buffer\n",  			 task->hdr->opcode, data_seg_len); -		err = iser_post_recvl(iser_conn->ib_conn); +		err = iser_post_recvl(ib_conn);  		if (err)  			goto send_control_error;  		err = iser_post_rx_bufs(conn, task->hdr); @@ -537,7 +550,7 @@ int iser_send_control(struct iscsi_conn *conn,  			goto send_control_error;  	} -	err = iser_post_send(iser_conn->ib_conn, mdesc); +	err = iser_post_send(ib_conn, mdesc);  	if (!err)  		return 0; @@ -553,7 +566,6 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,  			 unsigned long rx_xfer_len,  			 struct iser_conn *ib_conn)  { -	struct iscsi_iser_conn *conn = ib_conn->iser_conn;  	struct iscsi_hdr *hdr;  	u64 rx_dma;  	int rx_buflen, outstanding, count, err; @@ -575,17 +587,17 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,  	iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,  			hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN)); -	iscsi_iser_recv(conn->iscsi_conn, hdr, -		rx_desc->data, rx_xfer_len - ISER_HEADERS_LEN); +	iscsi_iser_recv(ib_conn->iscsi_conn, hdr, rx_desc->data, +			rx_xfer_len - ISER_HEADERS_LEN);  	ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma, -			rx_buflen, DMA_FROM_DEVICE); +				      rx_buflen, DMA_FROM_DEVICE);  	/* decrementing conn->post_recv_buf_count only --after-- freeing the   *  	 * task eliminates the need to worry on tasks which are completed in   *  	 * parallel to the execution of iser_conn_term. So the code that waits *  	 * for the posted rx bufs refcount to become zero handles everything   */ -	conn->ib_conn->post_recv_buf_count--; +	ib_conn->post_recv_buf_count--;  	if (rx_dma == ib_conn->login_resp_dma)  		return; @@ -610,11 +622,12 @@ void iser_snd_completion(struct iser_tx_desc *tx_desc,  		ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr,  					ISER_HEADERS_LEN, DMA_TO_DEVICE);  		kmem_cache_free(ig.desc_cache, tx_desc); +		tx_desc = NULL;  	}  	atomic_dec(&ib_conn->post_send_buf_count); -	if (tx_desc->type == ISCSI_TX_CONTROL) { +	if (tx_desc && tx_desc->type == ISCSI_TX_CONTROL) {  		/* this arithmetic is legal by libiscsi dd_data allocation */  		task = (void *) ((long)(void *)tx_desc -  				  sizeof(struct iscsi_task)); @@ -634,6 +647,9 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task)  	iser_task->data[ISER_DIR_IN].data_len  = 0;  	iser_task->data[ISER_DIR_OUT].data_len = 0; +	iser_task->prot[ISER_DIR_IN].data_len  = 0; +	iser_task->prot[ISER_DIR_OUT].data_len = 0; +  	memset(&iser_task->rdma_regd[ISER_DIR_IN], 0,  	       sizeof(struct iser_regd_buf));  	memset(&iser_task->rdma_regd[ISER_DIR_OUT], 0, @@ -642,28 +658,63 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task)  void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)  { -	struct iser_device *device = iser_task->iser_conn->ib_conn->device; -	int is_rdma_aligned = 1; +	struct iser_device *device = iser_task->ib_conn->device; +	int is_rdma_data_aligned = 1; +	int is_rdma_prot_aligned = 1; +	int prot_count = scsi_prot_sg_count(iser_task->sc);  	/* if we were reading, copy back to unaligned sglist,  	 * anyway dma_unmap and free the copy  	 */  	if (iser_task->data_copy[ISER_DIR_IN].copy_buf != NULL) { -		is_rdma_aligned = 0; -		iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_IN); +		is_rdma_data_aligned = 0; +		iser_finalize_rdma_unaligned_sg(iser_task, +						&iser_task->data[ISER_DIR_IN], +						&iser_task->data_copy[ISER_DIR_IN], +						ISER_DIR_IN);  	} +  	if (iser_task->data_copy[ISER_DIR_OUT].copy_buf != NULL) { -		is_rdma_aligned = 0; -		iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_OUT); +		is_rdma_data_aligned = 0; +		iser_finalize_rdma_unaligned_sg(iser_task, +						&iser_task->data[ISER_DIR_OUT], +						&iser_task->data_copy[ISER_DIR_OUT], +						ISER_DIR_OUT); +	} + +	if (iser_task->prot_copy[ISER_DIR_IN].copy_buf != NULL) { +		is_rdma_prot_aligned = 0; +		iser_finalize_rdma_unaligned_sg(iser_task, +						&iser_task->prot[ISER_DIR_IN], +						&iser_task->prot_copy[ISER_DIR_IN], +						ISER_DIR_IN);  	} -	if (iser_task->dir[ISER_DIR_IN]) +	if (iser_task->prot_copy[ISER_DIR_OUT].copy_buf != NULL) { +		is_rdma_prot_aligned = 0; +		iser_finalize_rdma_unaligned_sg(iser_task, +						&iser_task->prot[ISER_DIR_OUT], +						&iser_task->prot_copy[ISER_DIR_OUT], +						ISER_DIR_OUT); +	} + +	if (iser_task->dir[ISER_DIR_IN]) {  		device->iser_unreg_rdma_mem(iser_task, ISER_DIR_IN); +		if (is_rdma_data_aligned) +			iser_dma_unmap_task_data(iser_task, +						 &iser_task->data[ISER_DIR_IN]); +		if (prot_count && is_rdma_prot_aligned) +			iser_dma_unmap_task_data(iser_task, +						 &iser_task->prot[ISER_DIR_IN]); +	} -	if (iser_task->dir[ISER_DIR_OUT]) +	if (iser_task->dir[ISER_DIR_OUT]) {  		device->iser_unreg_rdma_mem(iser_task, ISER_DIR_OUT); - -       /* if the data was unaligned, it was already unmapped and then copied */ -       if (is_rdma_aligned) -		iser_dma_unmap_task_data(iser_task); +		if (is_rdma_data_aligned) +			iser_dma_unmap_task_data(iser_task, +						 &iser_task->data[ISER_DIR_OUT]); +		if (prot_count && is_rdma_prot_aligned) +			iser_dma_unmap_task_data(iser_task, +						 &iser_task->prot[ISER_DIR_OUT]); +	}  } diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 1ce0c97d2cc..47acd3ad3a1 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -1,6 +1,6 @@  /*   * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2013 Mellanox Technologies. All rights reserved. + * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.   *   * This software is available to you under a choice of one of two   * licenses.  You may choose to be licensed under the terms of the GNU @@ -45,13 +45,19 @@   * iser_start_rdma_unaligned_sg   */  static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, +					struct iser_data_buf *data, +					struct iser_data_buf *data_copy,  					enum iser_data_dir cmd_dir)  { -	int dma_nents; -	struct ib_device *dev; +	struct ib_device *dev = iser_task->ib_conn->device->ib_device; +	struct scatterlist *sgl = (struct scatterlist *)data->buf; +	struct scatterlist *sg;  	char *mem = NULL; -	struct iser_data_buf *data = &iser_task->data[cmd_dir]; -	unsigned long  cmd_data_len = data->data_len; +	unsigned long  cmd_data_len = 0; +	int dma_nents, i; + +	for_each_sg(sgl, sg, data->size, i) +		cmd_data_len += ib_sg_dma_len(dev, sg);  	if (cmd_data_len > ISER_KMALLOC_THRESHOLD)  		mem = (void *)__get_free_pages(GFP_ATOMIC, @@ -61,17 +67,16 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,  	if (mem == NULL) {  		iser_err("Failed to allocate mem size %d %d for copying sglist\n", -			 data->size,(int)cmd_data_len); +			 data->size, (int)cmd_data_len);  		return -ENOMEM;  	}  	if (cmd_dir == ISER_DIR_OUT) {  		/* copy the unaligned sg the buffer which is used for RDMA */ -		struct scatterlist *sgl = (struct scatterlist *)data->buf; -		struct scatterlist *sg;  		int i;  		char *p, *from; +		sgl = (struct scatterlist *)data->buf;  		p = mem;  		for_each_sg(sgl, sg, data->size, i) {  			from = kmap_atomic(sg_page(sg)); @@ -83,39 +88,37 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,  		}  	} -	sg_init_one(&iser_task->data_copy[cmd_dir].sg_single, mem, cmd_data_len); -	iser_task->data_copy[cmd_dir].buf  = -		&iser_task->data_copy[cmd_dir].sg_single; -	iser_task->data_copy[cmd_dir].size = 1; +	sg_init_one(&data_copy->sg_single, mem, cmd_data_len); +	data_copy->buf = &data_copy->sg_single; +	data_copy->size = 1; +	data_copy->copy_buf = mem; -	iser_task->data_copy[cmd_dir].copy_buf  = mem; - -	dev = iser_task->iser_conn->ib_conn->device->ib_device; -	dma_nents = ib_dma_map_sg(dev, -				  &iser_task->data_copy[cmd_dir].sg_single, -				  1, +	dma_nents = ib_dma_map_sg(dev, &data_copy->sg_single, 1,  				  (cmd_dir == ISER_DIR_OUT) ?  				  DMA_TO_DEVICE : DMA_FROM_DEVICE);  	BUG_ON(dma_nents == 0); -	iser_task->data_copy[cmd_dir].dma_nents = dma_nents; +	data_copy->dma_nents = dma_nents; +	data_copy->data_len = cmd_data_len; +  	return 0;  }  /**   * iser_finalize_rdma_unaligned_sg   */ +  void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, -				     enum iser_data_dir         cmd_dir) +				     struct iser_data_buf *data, +				     struct iser_data_buf *data_copy, +				     enum iser_data_dir cmd_dir)  {  	struct ib_device *dev; -	struct iser_data_buf *mem_copy;  	unsigned long  cmd_data_len; -	dev = iser_task->iser_conn->ib_conn->device->ib_device; -	mem_copy = &iser_task->data_copy[cmd_dir]; +	dev = iser_task->ib_conn->device->ib_device; -	ib_dma_unmap_sg(dev, &mem_copy->sg_single, 1, +	ib_dma_unmap_sg(dev, &data_copy->sg_single, 1,  			(cmd_dir == ISER_DIR_OUT) ?  			DMA_TO_DEVICE : DMA_FROM_DEVICE); @@ -127,10 +130,10 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,  		int i;  		/* copy back read RDMA to unaligned sg */ -		mem	= mem_copy->copy_buf; +		mem = data_copy->copy_buf; -		sgl	= (struct scatterlist *)iser_task->data[ISER_DIR_IN].buf; -		sg_size = iser_task->data[ISER_DIR_IN].size; +		sgl = (struct scatterlist *)data->buf; +		sg_size = data->size;  		p = mem;  		for_each_sg(sgl, sg, sg_size, i) { @@ -143,15 +146,15 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,  		}  	} -	cmd_data_len = iser_task->data[cmd_dir].data_len; +	cmd_data_len = data->data_len;  	if (cmd_data_len > ISER_KMALLOC_THRESHOLD) -		free_pages((unsigned long)mem_copy->copy_buf, +		free_pages((unsigned long)data_copy->copy_buf,  			   ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT);  	else -		kfree(mem_copy->copy_buf); +		kfree(data_copy->copy_buf); -	mem_copy->copy_buf = NULL; +	data_copy->copy_buf = NULL;  }  #define IS_4K_ALIGNED(addr)	((((unsigned long)addr) & ~MASK_4K) == 0) @@ -319,7 +322,7 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,  	struct ib_device *dev;  	iser_task->dir[iser_dir] = 1; -	dev = iser_task->iser_conn->ib_conn->device->ib_device; +	dev = iser_task->ib_conn->device->ib_device;  	data->dma_nents = ib_dma_map_sg(dev, data->buf, data->size, dma_dir);  	if (data->dma_nents == 0) { @@ -329,31 +332,23 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,  	return 0;  } -void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task) +void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task, +			      struct iser_data_buf *data)  {  	struct ib_device *dev; -	struct iser_data_buf *data; -	dev = iser_task->iser_conn->ib_conn->device->ib_device; - -	if (iser_task->dir[ISER_DIR_IN]) { -		data = &iser_task->data[ISER_DIR_IN]; -		ib_dma_unmap_sg(dev, data->buf, data->size, DMA_FROM_DEVICE); -	} - -	if (iser_task->dir[ISER_DIR_OUT]) { -		data = &iser_task->data[ISER_DIR_OUT]; -		ib_dma_unmap_sg(dev, data->buf, data->size, DMA_TO_DEVICE); -	} +	dev = iser_task->ib_conn->device->ib_device; +	ib_dma_unmap_sg(dev, data->buf, data->size, DMA_FROM_DEVICE);  }  static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,  			      struct ib_device *ibdev, +			      struct iser_data_buf *mem, +			      struct iser_data_buf *mem_copy,  			      enum iser_data_dir cmd_dir,  			      int aligned_len)  { -	struct iscsi_conn    *iscsi_conn = iser_task->iser_conn->iscsi_conn; -	struct iser_data_buf *mem = &iser_task->data[cmd_dir]; +	struct iscsi_conn    *iscsi_conn = iser_task->ib_conn->iscsi_conn;  	iscsi_conn->fmr_unalign_cnt++;  	iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n", @@ -363,12 +358,12 @@ static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,  		iser_data_buf_dump(mem, ibdev);  	/* unmap the command data before accessing it */ -	iser_dma_unmap_task_data(iser_task); +	iser_dma_unmap_task_data(iser_task, mem);  	/* allocate copy buf, if we are writing, copy the */  	/* unaligned scatterlist, dma map the copy        */ -	if (iser_start_rdma_unaligned_sg(iser_task, cmd_dir) != 0) -			return -ENOMEM; +	if (iser_start_rdma_unaligned_sg(iser_task, mem, mem_copy, cmd_dir) != 0) +		return -ENOMEM;  	return 0;  } @@ -382,7 +377,7 @@ static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,  int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,  			  enum iser_data_dir cmd_dir)  { -	struct iser_conn     *ib_conn = iser_task->iser_conn->ib_conn; +	struct iser_conn     *ib_conn = iser_task->ib_conn;  	struct iser_device   *device = ib_conn->device;  	struct ib_device     *ibdev = device->ib_device;  	struct iser_data_buf *mem = &iser_task->data[cmd_dir]; @@ -396,7 +391,8 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,  	aligned_len = iser_data_buf_aligned_len(mem, ibdev);  	if (aligned_len != mem->dma_nents) { -		err = fall_to_bounce_buf(iser_task, ibdev, +		err = fall_to_bounce_buf(iser_task, ibdev, mem, +					 &iser_task->data_copy[cmd_dir],  					 cmd_dir, aligned_len);  		if (err) {  			iser_err("failed to allocate bounce buffer\n"); @@ -422,8 +418,8 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,  			 (unsigned long)regd_buf->reg.va,  			 (unsigned long)regd_buf->reg.len);  	} else { /* use FMR for multiple dma entries */ -		iser_page_vec_build(mem, ib_conn->fastreg.fmr.page_vec, ibdev); -		err = iser_reg_page_vec(ib_conn, ib_conn->fastreg.fmr.page_vec, +		iser_page_vec_build(mem, ib_conn->fmr.page_vec, ibdev); +		err = iser_reg_page_vec(ib_conn, ib_conn->fmr.page_vec,  					®d_buf->reg);  		if (err && err != -EAGAIN) {  			iser_data_buf_dump(mem, ibdev); @@ -431,12 +427,12 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,  				 mem->dma_nents,  				 ntoh24(iser_task->desc.iscsi_header.dlength));  			iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n", -				 ib_conn->fastreg.fmr.page_vec->data_size, -				 ib_conn->fastreg.fmr.page_vec->length, -				 ib_conn->fastreg.fmr.page_vec->offset); -			for (i = 0; i < ib_conn->fastreg.fmr.page_vec->length; i++) +				 ib_conn->fmr.page_vec->data_size, +				 ib_conn->fmr.page_vec->length, +				 ib_conn->fmr.page_vec->offset); +			for (i = 0; i < ib_conn->fmr.page_vec->length; i++)  				iser_err("page_vec[%d] = 0x%llx\n", i, -					 (unsigned long long) ib_conn->fastreg.fmr.page_vec->pages[i]); +					 (unsigned long long) ib_conn->fmr.page_vec->pages[i]);  		}  		if (err)  			return err; @@ -444,94 +440,280 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,  	return 0;  } -static int iser_fast_reg_mr(struct fast_reg_descriptor *desc, -			    struct iser_conn *ib_conn, +static inline enum ib_t10_dif_type +scsi2ib_prot_type(unsigned char prot_type) +{ +	switch (prot_type) { +	case SCSI_PROT_DIF_TYPE0: +		return IB_T10DIF_NONE; +	case SCSI_PROT_DIF_TYPE1: +		return IB_T10DIF_TYPE1; +	case SCSI_PROT_DIF_TYPE2: +		return IB_T10DIF_TYPE2; +	case SCSI_PROT_DIF_TYPE3: +		return IB_T10DIF_TYPE3; +	default: +		return IB_T10DIF_NONE; +	} +} + + +static int +iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs) +{ +	unsigned char scsi_ptype = scsi_get_prot_type(sc); + +	sig_attrs->mem.sig_type = IB_SIG_TYPE_T10_DIF; +	sig_attrs->wire.sig_type = IB_SIG_TYPE_T10_DIF; +	sig_attrs->mem.sig.dif.pi_interval = sc->device->sector_size; +	sig_attrs->wire.sig.dif.pi_interval = sc->device->sector_size; + +	switch (scsi_get_prot_op(sc)) { +	case SCSI_PROT_WRITE_INSERT: +	case SCSI_PROT_READ_STRIP: +		sig_attrs->mem.sig.dif.type = IB_T10DIF_NONE; +		sig_attrs->wire.sig.dif.type = scsi2ib_prot_type(scsi_ptype); +		sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC; +		sig_attrs->wire.sig.dif.ref_tag = scsi_get_lba(sc) & +						  0xffffffff; +		break; +	case SCSI_PROT_READ_INSERT: +	case SCSI_PROT_WRITE_STRIP: +		sig_attrs->mem.sig.dif.type = scsi2ib_prot_type(scsi_ptype); +		sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC; +		sig_attrs->mem.sig.dif.ref_tag = scsi_get_lba(sc) & +						 0xffffffff; +		sig_attrs->wire.sig.dif.type = IB_T10DIF_NONE; +		break; +	case SCSI_PROT_READ_PASS: +	case SCSI_PROT_WRITE_PASS: +		sig_attrs->mem.sig.dif.type = scsi2ib_prot_type(scsi_ptype); +		sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC; +		sig_attrs->mem.sig.dif.ref_tag = scsi_get_lba(sc) & +						 0xffffffff; +		sig_attrs->wire.sig.dif.type = scsi2ib_prot_type(scsi_ptype); +		sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC; +		sig_attrs->wire.sig.dif.ref_tag = scsi_get_lba(sc) & +						  0xffffffff; +		break; +	default: +		iser_err("Unsupported PI operation %d\n", +			 scsi_get_prot_op(sc)); +		return -EINVAL; +	} +	return 0; +} + + +static int +iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask) +{ +	switch (scsi_get_prot_type(sc)) { +	case SCSI_PROT_DIF_TYPE0: +		*mask = 0x0; +		break; +	case SCSI_PROT_DIF_TYPE1: +	case SCSI_PROT_DIF_TYPE2: +		*mask = ISER_CHECK_GUARD | ISER_CHECK_REFTAG; +		break; +	case SCSI_PROT_DIF_TYPE3: +		*mask = ISER_CHECK_GUARD; +		break; +	default: +		iser_err("Unsupported protection type %d\n", +			 scsi_get_prot_type(sc)); +		return -EINVAL; +	} + +	return 0; +} + +static int +iser_reg_sig_mr(struct iscsi_iser_task *iser_task, +		struct fast_reg_descriptor *desc, struct ib_sge *data_sge, +		struct ib_sge *prot_sge, struct ib_sge *sig_sge) +{ +	struct iser_conn *ib_conn = iser_task->ib_conn; +	struct iser_pi_context *pi_ctx = desc->pi_ctx; +	struct ib_send_wr sig_wr, inv_wr; +	struct ib_send_wr *bad_wr, *wr = NULL; +	struct ib_sig_attrs sig_attrs; +	int ret; +	u32 key; + +	memset(&sig_attrs, 0, sizeof(sig_attrs)); +	ret = iser_set_sig_attrs(iser_task->sc, &sig_attrs); +	if (ret) +		goto err; + +	ret = iser_set_prot_checks(iser_task->sc, &sig_attrs.check_mask); +	if (ret) +		goto err; + +	if (!(desc->reg_indicators & ISER_SIG_KEY_VALID)) { +		memset(&inv_wr, 0, sizeof(inv_wr)); +		inv_wr.opcode = IB_WR_LOCAL_INV; +		inv_wr.wr_id = ISER_FASTREG_LI_WRID; +		inv_wr.ex.invalidate_rkey = pi_ctx->sig_mr->rkey; +		wr = &inv_wr; +		/* Bump the key */ +		key = (u8)(pi_ctx->sig_mr->rkey & 0x000000FF); +		ib_update_fast_reg_key(pi_ctx->sig_mr, ++key); +	} + +	memset(&sig_wr, 0, sizeof(sig_wr)); +	sig_wr.opcode = IB_WR_REG_SIG_MR; +	sig_wr.wr_id = ISER_FASTREG_LI_WRID; +	sig_wr.sg_list = data_sge; +	sig_wr.num_sge = 1; +	sig_wr.wr.sig_handover.sig_attrs = &sig_attrs; +	sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr; +	if (scsi_prot_sg_count(iser_task->sc)) +		sig_wr.wr.sig_handover.prot = prot_sge; +	sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE | +					      IB_ACCESS_REMOTE_READ | +					      IB_ACCESS_REMOTE_WRITE; + +	if (!wr) +		wr = &sig_wr; +	else +		wr->next = &sig_wr; + +	ret = ib_post_send(ib_conn->qp, wr, &bad_wr); +	if (ret) { +		iser_err("reg_sig_mr failed, ret:%d\n", ret); +		goto err; +	} +	desc->reg_indicators &= ~ISER_SIG_KEY_VALID; + +	sig_sge->lkey = pi_ctx->sig_mr->lkey; +	sig_sge->addr = 0; +	sig_sge->length = data_sge->length + prot_sge->length; +	if (scsi_get_prot_op(iser_task->sc) == SCSI_PROT_WRITE_INSERT || +	    scsi_get_prot_op(iser_task->sc) == SCSI_PROT_READ_STRIP) { +		sig_sge->length += (data_sge->length / +				   iser_task->sc->device->sector_size) * 8; +	} + +	iser_dbg("sig_sge: addr: 0x%llx  length: %u lkey: 0x%x\n", +		 sig_sge->addr, sig_sge->length, +		 sig_sge->lkey); +err: +	return ret; +} + +static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,  			    struct iser_regd_buf *regd_buf, -			    u32 offset, unsigned int data_size, -			    unsigned int page_list_len) +			    struct iser_data_buf *mem, +			    enum iser_reg_indicator ind, +			    struct ib_sge *sge)  { +	struct fast_reg_descriptor *desc = regd_buf->reg.mem_h; +	struct iser_conn *ib_conn = iser_task->ib_conn; +	struct iser_device *device = ib_conn->device; +	struct ib_device *ibdev = device->ib_device; +	struct ib_mr *mr; +	struct ib_fast_reg_page_list *frpl;  	struct ib_send_wr fastreg_wr, inv_wr;  	struct ib_send_wr *bad_wr, *wr = NULL;  	u8 key; -	int ret; +	int ret, offset, size, plen; + +	/* if there a single dma entry, dma mr suffices */ +	if (mem->dma_nents == 1) { +		struct scatterlist *sg = (struct scatterlist *)mem->buf; -	if (!desc->valid) { +		sge->lkey = device->mr->lkey; +		sge->addr   = ib_sg_dma_address(ibdev, &sg[0]); +		sge->length  = ib_sg_dma_len(ibdev, &sg[0]); + +		iser_dbg("Single DMA entry: lkey=0x%x, addr=0x%llx, length=0x%x\n", +			 sge->lkey, sge->addr, sge->length); +		return 0; +	} + +	if (ind == ISER_DATA_KEY_VALID) { +		mr = desc->data_mr; +		frpl = desc->data_frpl; +	} else { +		mr = desc->pi_ctx->prot_mr; +		frpl = desc->pi_ctx->prot_frpl; +	} + +	plen = iser_sg_to_page_vec(mem, device->ib_device, frpl->page_list, +				   &offset, &size); +	if (plen * SIZE_4K < size) { +		iser_err("fast reg page_list too short to hold this SG\n"); +		return -EINVAL; +	} + +	if (!(desc->reg_indicators & ind)) {  		memset(&inv_wr, 0, sizeof(inv_wr)); +		inv_wr.wr_id = ISER_FASTREG_LI_WRID;  		inv_wr.opcode = IB_WR_LOCAL_INV; -		inv_wr.send_flags = IB_SEND_SIGNALED; -		inv_wr.ex.invalidate_rkey = desc->data_mr->rkey; +		inv_wr.ex.invalidate_rkey = mr->rkey;  		wr = &inv_wr;  		/* Bump the key */ -		key = (u8)(desc->data_mr->rkey & 0x000000FF); -		ib_update_fast_reg_key(desc->data_mr, ++key); +		key = (u8)(mr->rkey & 0x000000FF); +		ib_update_fast_reg_key(mr, ++key);  	}  	/* Prepare FASTREG WR */  	memset(&fastreg_wr, 0, sizeof(fastreg_wr)); +	fastreg_wr.wr_id = ISER_FASTREG_LI_WRID;  	fastreg_wr.opcode = IB_WR_FAST_REG_MR; -	fastreg_wr.send_flags = IB_SEND_SIGNALED; -	fastreg_wr.wr.fast_reg.iova_start = desc->data_frpl->page_list[0] + offset; -	fastreg_wr.wr.fast_reg.page_list = desc->data_frpl; -	fastreg_wr.wr.fast_reg.page_list_len = page_list_len; +	fastreg_wr.wr.fast_reg.iova_start = frpl->page_list[0] + offset; +	fastreg_wr.wr.fast_reg.page_list = frpl; +	fastreg_wr.wr.fast_reg.page_list_len = plen;  	fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K; -	fastreg_wr.wr.fast_reg.length = data_size; -	fastreg_wr.wr.fast_reg.rkey = desc->data_mr->rkey; +	fastreg_wr.wr.fast_reg.length = size; +	fastreg_wr.wr.fast_reg.rkey = mr->rkey;  	fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE  |  					       IB_ACCESS_REMOTE_WRITE |  					       IB_ACCESS_REMOTE_READ); -	if (!wr) { +	if (!wr)  		wr = &fastreg_wr; -		atomic_inc(&ib_conn->post_send_buf_count); -	} else { +	else  		wr->next = &fastreg_wr; -		atomic_add(2, &ib_conn->post_send_buf_count); -	}  	ret = ib_post_send(ib_conn->qp, wr, &bad_wr);  	if (ret) { -		if (bad_wr->next) -			atomic_sub(2, &ib_conn->post_send_buf_count); -		else -			atomic_dec(&ib_conn->post_send_buf_count);  		iser_err("fast registration failed, ret:%d\n", ret);  		return ret;  	} -	desc->valid = false; +	desc->reg_indicators &= ~ind; -	regd_buf->reg.mem_h = desc; -	regd_buf->reg.lkey = desc->data_mr->lkey; -	regd_buf->reg.rkey = desc->data_mr->rkey; -	regd_buf->reg.va = desc->data_frpl->page_list[0] + offset; -	regd_buf->reg.len = data_size; -	regd_buf->reg.is_mr = 1; +	sge->lkey = mr->lkey; +	sge->addr = frpl->page_list[0] + offset; +	sge->length = size;  	return ret;  }  /** - * iser_reg_rdma_mem_frwr - Registers memory intended for RDMA, + * iser_reg_rdma_mem_fastreg - Registers memory intended for RDMA,   * using Fast Registration WR (if possible) obtaining rkey and va   *   * returns 0 on success, errno code on failure   */ -int iser_reg_rdma_mem_frwr(struct iscsi_iser_task *iser_task, -			   enum iser_data_dir cmd_dir) +int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task, +			      enum iser_data_dir cmd_dir)  { -	struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn; +	struct iser_conn *ib_conn = iser_task->ib_conn;  	struct iser_device *device = ib_conn->device;  	struct ib_device *ibdev = device->ib_device;  	struct iser_data_buf *mem = &iser_task->data[cmd_dir];  	struct iser_regd_buf *regd_buf = &iser_task->rdma_regd[cmd_dir]; -	struct fast_reg_descriptor *desc; -	unsigned int data_size, page_list_len; +	struct fast_reg_descriptor *desc = NULL; +	struct ib_sge data_sge;  	int err, aligned_len;  	unsigned long flags; -	u32 offset;  	aligned_len = iser_data_buf_aligned_len(mem, ibdev);  	if (aligned_len != mem->dma_nents) { -		err = fall_to_bounce_buf(iser_task, ibdev, +		err = fall_to_bounce_buf(iser_task, ibdev, mem, +					 &iser_task->data_copy[cmd_dir],  					 cmd_dir, aligned_len);  		if (err) {  			iser_err("failed to allocate bounce buffer\n"); @@ -540,41 +722,79 @@ int iser_reg_rdma_mem_frwr(struct iscsi_iser_task *iser_task,  		mem = &iser_task->data_copy[cmd_dir];  	} -	/* if there a single dma entry, dma mr suffices */ -	if (mem->dma_nents == 1) { -		struct scatterlist *sg = (struct scatterlist *)mem->buf; - -		regd_buf->reg.lkey = device->mr->lkey; -		regd_buf->reg.rkey = device->mr->rkey; -		regd_buf->reg.len  = ib_sg_dma_len(ibdev, &sg[0]); -		regd_buf->reg.va   = ib_sg_dma_address(ibdev, &sg[0]); -		regd_buf->reg.is_mr = 0; -	} else { +	if (mem->dma_nents != 1 || +	    scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {  		spin_lock_irqsave(&ib_conn->lock, flags); -		desc = list_first_entry(&ib_conn->fastreg.frwr.pool, +		desc = list_first_entry(&ib_conn->fastreg.pool,  					struct fast_reg_descriptor, list);  		list_del(&desc->list);  		spin_unlock_irqrestore(&ib_conn->lock, flags); -		page_list_len = iser_sg_to_page_vec(mem, device->ib_device, -						    desc->data_frpl->page_list, -						    &offset, &data_size); - -		if (page_list_len * SIZE_4K < data_size) { -			iser_err("fast reg page_list too short to hold this SG\n"); -			err = -EINVAL; -			goto err_reg; +		regd_buf->reg.mem_h = desc; +	} + +	err = iser_fast_reg_mr(iser_task, regd_buf, mem, +			       ISER_DATA_KEY_VALID, &data_sge); +	if (err) +		goto err_reg; + +	if (scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) { +		struct ib_sge prot_sge, sig_sge; + +		memset(&prot_sge, 0, sizeof(prot_sge)); +		if (scsi_prot_sg_count(iser_task->sc)) { +			mem = &iser_task->prot[cmd_dir]; +			aligned_len = iser_data_buf_aligned_len(mem, ibdev); +			if (aligned_len != mem->dma_nents) { +				err = fall_to_bounce_buf(iser_task, ibdev, mem, +							 &iser_task->prot_copy[cmd_dir], +							 cmd_dir, aligned_len); +				if (err) { +					iser_err("failed to allocate bounce buffer\n"); +					return err; +				} +				mem = &iser_task->prot_copy[cmd_dir]; +			} + +			err = iser_fast_reg_mr(iser_task, regd_buf, mem, +					       ISER_PROT_KEY_VALID, &prot_sge); +			if (err) +				goto err_reg;  		} -		err = iser_fast_reg_mr(desc, ib_conn, regd_buf, -				       offset, data_size, page_list_len); -		if (err) -			goto err_reg; +		err = iser_reg_sig_mr(iser_task, desc, &data_sge, +				      &prot_sge, &sig_sge); +		if (err) { +			iser_err("Failed to register signature mr\n"); +			return err; +		} +		desc->reg_indicators |= ISER_FASTREG_PROTECTED; + +		regd_buf->reg.lkey = sig_sge.lkey; +		regd_buf->reg.rkey = desc->pi_ctx->sig_mr->rkey; +		regd_buf->reg.va = sig_sge.addr; +		regd_buf->reg.len = sig_sge.length; +		regd_buf->reg.is_mr = 1; +	} else { +		if (desc) { +			regd_buf->reg.rkey = desc->data_mr->rkey; +			regd_buf->reg.is_mr = 1; +		} else { +			regd_buf->reg.rkey = device->mr->rkey; +			regd_buf->reg.is_mr = 0; +		} + +		regd_buf->reg.lkey = data_sge.lkey; +		regd_buf->reg.va = data_sge.addr; +		regd_buf->reg.len = data_sge.length;  	}  	return 0;  err_reg: -	spin_lock_irqsave(&ib_conn->lock, flags); -	list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool); -	spin_unlock_irqrestore(&ib_conn->lock, flags); +	if (desc) { +		spin_lock_irqsave(&ib_conn->lock, flags); +		list_add_tail(&desc->list, &ib_conn->fastreg.pool); +		spin_unlock_irqrestore(&ib_conn->lock, flags); +	} +  	return err;  } diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index afe95674008..ea01075f9f9 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -1,7 +1,7 @@  /*   * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.   * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved. - * Copyright (c) 2013 Mellanox Technologies. All rights reserved. + * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.   *   * This software is available to you under a choice of one of two   * licenses.  You may choose to be licensed under the terms of the GNU @@ -71,17 +71,14 @@ static void iser_event_handler(struct ib_event_handler *handler,   */  static int iser_create_device_ib_res(struct iser_device *device)  { -	int i, j;  	struct iser_cq_desc *cq_desc; -	struct ib_device_attr *dev_attr; +	struct ib_device_attr *dev_attr = &device->dev_attr; +	int ret, i, j; -	dev_attr = kmalloc(sizeof(*dev_attr), GFP_KERNEL); -	if (!dev_attr) -		return -ENOMEM; - -	if (ib_query_device(device->ib_device, dev_attr)) { +	ret = ib_query_device(device->ib_device, dev_attr); +	if (ret) {  		pr_warn("Query device failed for %s\n", device->ib_device->name); -		goto dev_attr_err; +		return ret;  	}  	/* Assign function handles  - based on FMR support */ @@ -94,14 +91,14 @@ static int iser_create_device_ib_res(struct iser_device *device)  		device->iser_unreg_rdma_mem = iser_unreg_mem_fmr;  	} else  	if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { -		iser_info("FRWR supported, using FRWR for registration\n"); -		device->iser_alloc_rdma_reg_res = iser_create_frwr_pool; -		device->iser_free_rdma_reg_res = iser_free_frwr_pool; -		device->iser_reg_rdma_mem = iser_reg_rdma_mem_frwr; -		device->iser_unreg_rdma_mem = iser_unreg_mem_frwr; +		iser_info("FastReg supported, using FastReg for registration\n"); +		device->iser_alloc_rdma_reg_res = iser_create_fastreg_pool; +		device->iser_free_rdma_reg_res = iser_free_fastreg_pool; +		device->iser_reg_rdma_mem = iser_reg_rdma_mem_fastreg; +		device->iser_unreg_rdma_mem = iser_unreg_mem_fastreg;  	} else { -		iser_err("IB device does not support FMRs nor FRWRs, can't register memory\n"); -		goto dev_attr_err; +		iser_err("IB device does not support FMRs nor FastRegs, can't register memory\n"); +		return -1;  	}  	device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors); @@ -158,7 +155,6 @@ static int iser_create_device_ib_res(struct iser_device *device)  	if (ib_register_event_handler(&device->event_handler))  		goto handler_err; -	kfree(dev_attr);  	return 0;  handler_err: @@ -178,8 +174,6 @@ pd_err:  	kfree(device->cq_desc);  cq_desc_err:  	iser_err("failed to allocate an IB resource\n"); -dev_attr_err: -	kfree(dev_attr);  	return -1;  } @@ -221,13 +215,13 @@ int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max)  	struct ib_fmr_pool_param params;  	int ret = -ENOMEM; -	ib_conn->fastreg.fmr.page_vec = kmalloc(sizeof(struct iser_page_vec) + -						(sizeof(u64)*(ISCSI_ISER_SG_TABLESIZE + 1)), -						GFP_KERNEL); -	if (!ib_conn->fastreg.fmr.page_vec) +	ib_conn->fmr.page_vec = kmalloc(sizeof(*ib_conn->fmr.page_vec) + +					(sizeof(u64)*(ISCSI_ISER_SG_TABLESIZE + 1)), +					GFP_KERNEL); +	if (!ib_conn->fmr.page_vec)  		return ret; -	ib_conn->fastreg.fmr.page_vec->pages = (u64 *)(ib_conn->fastreg.fmr.page_vec + 1); +	ib_conn->fmr.page_vec->pages = (u64 *)(ib_conn->fmr.page_vec + 1);  	params.page_shift        = SHIFT_4K;  	/* when the first/last SG element are not start/end * @@ -243,16 +237,16 @@ int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max)  				    IB_ACCESS_REMOTE_WRITE |  				    IB_ACCESS_REMOTE_READ); -	ib_conn->fastreg.fmr.pool = ib_create_fmr_pool(device->pd, ¶ms); -	if (!IS_ERR(ib_conn->fastreg.fmr.pool)) +	ib_conn->fmr.pool = ib_create_fmr_pool(device->pd, ¶ms); +	if (!IS_ERR(ib_conn->fmr.pool))  		return 0;  	/* no FMR => no need for page_vec */ -	kfree(ib_conn->fastreg.fmr.page_vec); -	ib_conn->fastreg.fmr.page_vec = NULL; +	kfree(ib_conn->fmr.page_vec); +	ib_conn->fmr.page_vec = NULL; -	ret = PTR_ERR(ib_conn->fastreg.fmr.pool); -	ib_conn->fastreg.fmr.pool = NULL; +	ret = PTR_ERR(ib_conn->fmr.pool); +	ib_conn->fmr.pool = NULL;  	if (ret != -ENOSYS) {  		iser_err("FMR allocation failed, err %d\n", ret);  		return ret; @@ -268,93 +262,173 @@ int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max)  void iser_free_fmr_pool(struct iser_conn *ib_conn)  {  	iser_info("freeing conn %p fmr pool %p\n", -		  ib_conn, ib_conn->fastreg.fmr.pool); +		  ib_conn, ib_conn->fmr.pool); + +	if (ib_conn->fmr.pool != NULL) +		ib_destroy_fmr_pool(ib_conn->fmr.pool); + +	ib_conn->fmr.pool = NULL; + +	kfree(ib_conn->fmr.page_vec); +	ib_conn->fmr.page_vec = NULL; +} + +static int +iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd, +			 bool pi_enable, struct fast_reg_descriptor *desc) +{ +	int ret; + +	desc->data_frpl = ib_alloc_fast_reg_page_list(ib_device, +						      ISCSI_ISER_SG_TABLESIZE + 1); +	if (IS_ERR(desc->data_frpl)) { +		ret = PTR_ERR(desc->data_frpl); +		iser_err("Failed to allocate ib_fast_reg_page_list err=%d\n", +			 ret); +		return PTR_ERR(desc->data_frpl); +	} -	if (ib_conn->fastreg.fmr.pool != NULL) -		ib_destroy_fmr_pool(ib_conn->fastreg.fmr.pool); +	desc->data_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE + 1); +	if (IS_ERR(desc->data_mr)) { +		ret = PTR_ERR(desc->data_mr); +		iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret); +		goto fast_reg_mr_failure; +	} +	desc->reg_indicators |= ISER_DATA_KEY_VALID; + +	if (pi_enable) { +		struct ib_mr_init_attr mr_init_attr = {0}; +		struct iser_pi_context *pi_ctx = NULL; + +		desc->pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL); +		if (!desc->pi_ctx) { +			iser_err("Failed to allocate pi context\n"); +			ret = -ENOMEM; +			goto pi_ctx_alloc_failure; +		} +		pi_ctx = desc->pi_ctx; + +		pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(ib_device, +						    ISCSI_ISER_SG_TABLESIZE); +		if (IS_ERR(pi_ctx->prot_frpl)) { +			ret = PTR_ERR(pi_ctx->prot_frpl); +			iser_err("Failed to allocate prot frpl ret=%d\n", +				 ret); +			goto prot_frpl_failure; +		} -	ib_conn->fastreg.fmr.pool = NULL; +		pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd, +						ISCSI_ISER_SG_TABLESIZE + 1); +		if (IS_ERR(pi_ctx->prot_mr)) { +			ret = PTR_ERR(pi_ctx->prot_mr); +			iser_err("Failed to allocate prot frmr ret=%d\n", +				 ret); +			goto prot_mr_failure; +		} +		desc->reg_indicators |= ISER_PROT_KEY_VALID; + +		mr_init_attr.max_reg_descriptors = 2; +		mr_init_attr.flags |= IB_MR_SIGNATURE_EN; +		pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr); +		if (IS_ERR(pi_ctx->sig_mr)) { +			ret = PTR_ERR(pi_ctx->sig_mr); +			iser_err("Failed to allocate signature enabled mr err=%d\n", +				 ret); +			goto sig_mr_failure; +		} +		desc->reg_indicators |= ISER_SIG_KEY_VALID; +	} +	desc->reg_indicators &= ~ISER_FASTREG_PROTECTED; + +	iser_dbg("Create fr_desc %p page_list %p\n", +		 desc, desc->data_frpl->page_list); + +	return 0; +sig_mr_failure: +	ib_dereg_mr(desc->pi_ctx->prot_mr); +prot_mr_failure: +	ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl); +prot_frpl_failure: +	kfree(desc->pi_ctx); +pi_ctx_alloc_failure: +	ib_dereg_mr(desc->data_mr); +fast_reg_mr_failure: +	ib_free_fast_reg_page_list(desc->data_frpl); -	kfree(ib_conn->fastreg.fmr.page_vec); -	ib_conn->fastreg.fmr.page_vec = NULL; +	return ret;  }  /** - * iser_create_frwr_pool - Creates pool of fast_reg descriptors + * iser_create_fastreg_pool - Creates pool of fast_reg descriptors   * for fast registration work requests.   * returns 0 on success, or errno code on failure   */ -int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max) +int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max)  {  	struct iser_device	*device = ib_conn->device;  	struct fast_reg_descriptor	*desc;  	int i, ret; -	INIT_LIST_HEAD(&ib_conn->fastreg.frwr.pool); -	ib_conn->fastreg.frwr.pool_size = 0; +	INIT_LIST_HEAD(&ib_conn->fastreg.pool); +	ib_conn->fastreg.pool_size = 0;  	for (i = 0; i < cmds_max; i++) { -		desc = kmalloc(sizeof(*desc), GFP_KERNEL); +		desc = kzalloc(sizeof(*desc), GFP_KERNEL);  		if (!desc) {  			iser_err("Failed to allocate a new fast_reg descriptor\n");  			ret = -ENOMEM;  			goto err;  		} -		desc->data_frpl = ib_alloc_fast_reg_page_list(device->ib_device, -							 ISCSI_ISER_SG_TABLESIZE + 1); -		if (IS_ERR(desc->data_frpl)) { -			ret = PTR_ERR(desc->data_frpl); -			iser_err("Failed to allocate ib_fast_reg_page_list err=%d\n", ret); -			goto fast_reg_page_failure; +		ret = iser_create_fastreg_desc(device->ib_device, device->pd, +					       ib_conn->pi_support, desc); +		if (ret) { +			iser_err("Failed to create fastreg descriptor err=%d\n", +				 ret); +			kfree(desc); +			goto err;  		} -		desc->data_mr = ib_alloc_fast_reg_mr(device->pd, -						     ISCSI_ISER_SG_TABLESIZE + 1); -		if (IS_ERR(desc->data_mr)) { -			ret = PTR_ERR(desc->data_mr); -			iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret); -			goto fast_reg_mr_failure; -		} -		desc->valid = true; -		list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool); -		ib_conn->fastreg.frwr.pool_size++; +		list_add_tail(&desc->list, &ib_conn->fastreg.pool); +		ib_conn->fastreg.pool_size++;  	}  	return 0; -fast_reg_mr_failure: -	ib_free_fast_reg_page_list(desc->data_frpl); -fast_reg_page_failure: -	kfree(desc);  err: -	iser_free_frwr_pool(ib_conn); +	iser_free_fastreg_pool(ib_conn);  	return ret;  }  /** - * iser_free_frwr_pool - releases the pool of fast_reg descriptors + * iser_free_fastreg_pool - releases the pool of fast_reg descriptors   */ -void iser_free_frwr_pool(struct iser_conn *ib_conn) +void iser_free_fastreg_pool(struct iser_conn *ib_conn)  {  	struct fast_reg_descriptor *desc, *tmp;  	int i = 0; -	if (list_empty(&ib_conn->fastreg.frwr.pool)) +	if (list_empty(&ib_conn->fastreg.pool))  		return; -	iser_info("freeing conn %p frwr pool\n", ib_conn); +	iser_info("freeing conn %p fr pool\n", ib_conn); -	list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.frwr.pool, list) { +	list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.pool, list) {  		list_del(&desc->list);  		ib_free_fast_reg_page_list(desc->data_frpl);  		ib_dereg_mr(desc->data_mr); +		if (desc->pi_ctx) { +			ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl); +			ib_dereg_mr(desc->pi_ctx->prot_mr); +			ib_destroy_mr(desc->pi_ctx->sig_mr); +			kfree(desc->pi_ctx); +		}  		kfree(desc);  		++i;  	} -	if (i < ib_conn->fastreg.frwr.pool_size) +	if (i < ib_conn->fastreg.pool_size)  		iser_warn("pool still has %d regions registered\n", -			  ib_conn->fastreg.frwr.pool_size - i); +			  ib_conn->fastreg.pool_size - i);  }  /** @@ -389,12 +463,17 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)  	init_attr.qp_context	= (void *)ib_conn;  	init_attr.send_cq	= device->tx_cq[min_index];  	init_attr.recv_cq	= device->rx_cq[min_index]; -	init_attr.cap.max_send_wr  = ISER_QP_MAX_REQ_DTOS;  	init_attr.cap.max_recv_wr  = ISER_QP_MAX_RECV_DTOS;  	init_attr.cap.max_send_sge = 2;  	init_attr.cap.max_recv_sge = 1;  	init_attr.sq_sig_type	= IB_SIGNAL_REQ_WR;  	init_attr.qp_type	= IB_QPT_RC; +	if (ib_conn->pi_support) { +		init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS; +		init_attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN; +	} else { +		init_attr.cap.max_send_wr  = ISER_QP_MAX_REQ_DTOS; +	}  	ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr);  	if (ret) @@ -502,14 +581,30 @@ static int iser_conn_state_comp_exch(struct iser_conn *ib_conn,  	return ret;  } +void iser_release_work(struct work_struct *work) +{ +	struct iser_conn *ib_conn; + +	ib_conn = container_of(work, struct iser_conn, release_work); + +	/* wait for .conn_stop callback */ +	wait_for_completion(&ib_conn->stop_completion); + +	/* wait for the qp`s post send and post receive buffers to empty */ +	wait_event_interruptible(ib_conn->wait, +				 ib_conn->state == ISER_CONN_DOWN); + +	iser_conn_release(ib_conn); +} +  /**   * Frees all conn objects and deallocs conn descriptor   */ -static void iser_conn_release(struct iser_conn *ib_conn, int can_destroy_id) +void iser_conn_release(struct iser_conn *ib_conn)  {  	struct iser_device  *device = ib_conn->device; -	BUG_ON(ib_conn->state != ISER_CONN_DOWN); +	BUG_ON(ib_conn->state == ISER_CONN_UP);  	mutex_lock(&ig.connlist_mutex);  	list_del(&ib_conn->conn_list); @@ -521,27 +616,13 @@ static void iser_conn_release(struct iser_conn *ib_conn, int can_destroy_id)  	if (device != NULL)  		iser_device_try_release(device);  	/* if cma handler context, the caller actually destroy the id */ -	if (ib_conn->cma_id != NULL && can_destroy_id) { +	if (ib_conn->cma_id != NULL) {  		rdma_destroy_id(ib_conn->cma_id);  		ib_conn->cma_id = NULL;  	}  	iscsi_destroy_endpoint(ib_conn->ep);  } -void iser_conn_get(struct iser_conn *ib_conn) -{ -	atomic_inc(&ib_conn->refcount); -} - -int iser_conn_put(struct iser_conn *ib_conn, int can_destroy_id) -{ -	if (atomic_dec_and_test(&ib_conn->refcount)) { -		iser_conn_release(ib_conn, can_destroy_id); -		return 1; -	} -	return 0; -} -  /**   * triggers start of the disconnect procedures and wait for them to be done   */ @@ -559,24 +640,19 @@ void iser_conn_terminate(struct iser_conn *ib_conn)  	if (err)  		iser_err("Failed to disconnect, conn: 0x%p err %d\n",  			 ib_conn,err); - -	wait_event_interruptible(ib_conn->wait, -				 ib_conn->state == ISER_CONN_DOWN); - -	iser_conn_put(ib_conn, 1); /* deref ib conn deallocate */  } -static int iser_connect_error(struct rdma_cm_id *cma_id) +static void iser_connect_error(struct rdma_cm_id *cma_id)  {  	struct iser_conn *ib_conn; +  	ib_conn = (struct iser_conn *)cma_id->context;  	ib_conn->state = ISER_CONN_DOWN;  	wake_up_interruptible(&ib_conn->wait); -	return iser_conn_put(ib_conn, 0); /* deref ib conn's cma id */  } -static int iser_addr_handler(struct rdma_cm_id *cma_id) +static void iser_addr_handler(struct rdma_cm_id *cma_id)  {  	struct iser_device *device;  	struct iser_conn   *ib_conn; @@ -585,22 +661,35 @@ static int iser_addr_handler(struct rdma_cm_id *cma_id)  	device = iser_device_find_by_ib_device(cma_id);  	if (!device) {  		iser_err("device lookup/creation failed\n"); -		return iser_connect_error(cma_id); +		iser_connect_error(cma_id); +		return;  	}  	ib_conn = (struct iser_conn *)cma_id->context;  	ib_conn->device = device; +	/* connection T10-PI support */ +	if (iser_pi_enable) { +		if (!(device->dev_attr.device_cap_flags & +		      IB_DEVICE_SIGNATURE_HANDOVER)) { +			iser_warn("T10-PI requested but not supported on %s, " +				  "continue without T10-PI\n", +				  ib_conn->device->ib_device->name); +			ib_conn->pi_support = false; +		} else { +			ib_conn->pi_support = true; +		} +	} +  	ret = rdma_resolve_route(cma_id, 1000);  	if (ret) {  		iser_err("resolve route failed: %d\n", ret); -		return iser_connect_error(cma_id); +		iser_connect_error(cma_id); +		return;  	} - -	return 0;  } -static int iser_route_handler(struct rdma_cm_id *cma_id) +static void iser_route_handler(struct rdma_cm_id *cma_id)  {  	struct rdma_conn_param conn_param;  	int    ret; @@ -628,33 +717,40 @@ static int iser_route_handler(struct rdma_cm_id *cma_id)  		goto failure;  	} -	return 0; +	return;  failure: -	return iser_connect_error(cma_id); +	iser_connect_error(cma_id);  }  static void iser_connected_handler(struct rdma_cm_id *cma_id)  {  	struct iser_conn *ib_conn; +	struct ib_qp_attr attr; +	struct ib_qp_init_attr init_attr; + +	(void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr); +	iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num);  	ib_conn = (struct iser_conn *)cma_id->context; -	ib_conn->state = ISER_CONN_UP; -	wake_up_interruptible(&ib_conn->wait); +	if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_PENDING, ISER_CONN_UP)) +		wake_up_interruptible(&ib_conn->wait);  } -static int iser_disconnected_handler(struct rdma_cm_id *cma_id) +static void iser_disconnected_handler(struct rdma_cm_id *cma_id)  {  	struct iser_conn *ib_conn; -	int ret;  	ib_conn = (struct iser_conn *)cma_id->context;  	/* getting here when the state is UP means that the conn is being *  	 * terminated asynchronously from the iSCSI layer's perspective.  */  	if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, -				      ISER_CONN_TERMINATING)) -		iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn, -				   ISCSI_ERR_CONN_FAILED); +					ISER_CONN_TERMINATING)){ +		if (ib_conn->iscsi_conn) +			iscsi_conn_failure(ib_conn->iscsi_conn, ISCSI_ERR_CONN_FAILED); +		else +			iser_err("iscsi_iser connection isn't bound\n"); +	}  	/* Complete the termination process if no posts are pending */  	if (ib_conn->post_recv_buf_count == 0 && @@ -662,24 +758,19 @@ static int iser_disconnected_handler(struct rdma_cm_id *cma_id)  		ib_conn->state = ISER_CONN_DOWN;  		wake_up_interruptible(&ib_conn->wait);  	} - -	ret = iser_conn_put(ib_conn, 0); /* deref ib conn's cma id */ -	return ret;  }  static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)  { -	int ret = 0; -  	iser_info("event %d status %d conn %p id %p\n",  		  event->event, event->status, cma_id->context, cma_id);  	switch (event->event) {  	case RDMA_CM_EVENT_ADDR_RESOLVED: -		ret = iser_addr_handler(cma_id); +		iser_addr_handler(cma_id);  		break;  	case RDMA_CM_EVENT_ROUTE_RESOLVED: -		ret = iser_route_handler(cma_id); +		iser_route_handler(cma_id);  		break;  	case RDMA_CM_EVENT_ESTABLISHED:  		iser_connected_handler(cma_id); @@ -689,18 +780,18 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve  	case RDMA_CM_EVENT_CONNECT_ERROR:  	case RDMA_CM_EVENT_UNREACHABLE:  	case RDMA_CM_EVENT_REJECTED: -		ret = iser_connect_error(cma_id); +		iser_connect_error(cma_id);  		break;  	case RDMA_CM_EVENT_DISCONNECTED:  	case RDMA_CM_EVENT_DEVICE_REMOVAL:  	case RDMA_CM_EVENT_ADDR_CHANGE: -		ret = iser_disconnected_handler(cma_id); +		iser_disconnected_handler(cma_id);  		break;  	default:  		iser_err("Unexpected RDMA CM event (%d)\n", event->event);  		break;  	} -	return ret; +	return 0;  }  void iser_conn_init(struct iser_conn *ib_conn) @@ -709,7 +800,7 @@ void iser_conn_init(struct iser_conn *ib_conn)  	init_waitqueue_head(&ib_conn->wait);  	ib_conn->post_recv_buf_count = 0;  	atomic_set(&ib_conn->post_send_buf_count, 0); -	atomic_set(&ib_conn->refcount, 1); /* ref ib conn allocation */ +	init_completion(&ib_conn->stop_completion);  	INIT_LIST_HEAD(&ib_conn->conn_list);  	spin_lock_init(&ib_conn->lock);  } @@ -737,7 +828,6 @@ int iser_connect(struct iser_conn   *ib_conn,  	ib_conn->state = ISER_CONN_PENDING; -	iser_conn_get(ib_conn); /* ref ib conn's cma id */  	ib_conn->cma_id = rdma_create_id(iser_cma_handler,  					     (void *)ib_conn,  					     RDMA_PS_TCP, IB_QPT_RC); @@ -774,9 +864,8 @@ id_failure:  	ib_conn->cma_id = NULL;  addr_failure:  	ib_conn->state = ISER_CONN_DOWN; -	iser_conn_put(ib_conn, 1); /* deref ib conn's cma id */  connect_failure: -	iser_conn_put(ib_conn, 1); /* deref ib conn deallocate */ +	iser_conn_release(ib_conn);  	return err;  } @@ -797,7 +886,7 @@ int iser_reg_page_vec(struct iser_conn     *ib_conn,  	page_list = page_vec->pages;  	io_addr	  = page_list[0]; -	mem  = ib_fmr_pool_map_phys(ib_conn->fastreg.fmr.pool, +	mem  = ib_fmr_pool_map_phys(ib_conn->fmr.pool,  				    page_list,  				    page_vec->length,  				    io_addr); @@ -851,11 +940,11 @@ void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,  	reg->mem_h = NULL;  } -void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task, -			 enum iser_data_dir cmd_dir) +void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, +			    enum iser_data_dir cmd_dir)  {  	struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg; -	struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn; +	struct iser_conn *ib_conn = iser_task->ib_conn;  	struct fast_reg_descriptor *desc = reg->mem_h;  	if (!reg->is_mr) @@ -864,7 +953,7 @@ void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task,  	reg->mem_h = NULL;  	reg->is_mr = 0;  	spin_lock_bh(&ib_conn->lock); -	list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool); +	list_add_tail(&desc->list, &ib_conn->fastreg.pool);  	spin_unlock_bh(&ib_conn->lock);  } @@ -965,7 +1054,7 @@ static void iser_handle_comp_error(struct iser_tx_desc *desc,  		 * perspective.                                             */  		if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP,  		    ISER_CONN_TERMINATING)) -			iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn, +			iscsi_conn_failure(ib_conn->iscsi_conn,  					   ISCSI_ERR_CONN_FAILED);  		/* no more non completed posts to the QP, complete the @@ -989,18 +1078,16 @@ static int iser_drain_tx_cq(struct iser_device  *device, int cq_index)  		if (wc.status == IB_WC_SUCCESS) {  			if (wc.opcode == IB_WC_SEND)  				iser_snd_completion(tx_desc, ib_conn); -			else if (wc.opcode == IB_WC_LOCAL_INV || -				 wc.opcode == IB_WC_FAST_REG_MR) { -				atomic_dec(&ib_conn->post_send_buf_count); -				continue; -			} else +			else  				iser_err("expected opcode %d got %d\n",  					IB_WC_SEND, wc.opcode);  		} else {  			iser_err("tx id %llx status %d vend_err %x\n", -				wc.wr_id, wc.status, wc.vendor_err); -			atomic_dec(&ib_conn->post_send_buf_count); -			iser_handle_comp_error(tx_desc, ib_conn); +				 wc.wr_id, wc.status, wc.vendor_err); +			if (wc.wr_id != ISER_FASTREG_LI_WRID) { +				atomic_dec(&ib_conn->post_send_buf_count); +				iser_handle_comp_error(tx_desc, ib_conn); +			}  		}  		completed_tx++;  	} @@ -1018,8 +1105,12 @@ static void iser_cq_tasklet_fn(unsigned long data)  	 struct iser_rx_desc *desc;  	 unsigned long	     xfer_len;  	struct iser_conn *ib_conn; -	int completed_tx, completed_rx; -	completed_tx = completed_rx = 0; +	int completed_tx, completed_rx = 0; + +	/* First do tx drain, so in a case where we have rx flushes and a successful +	 * tx completion we will still go through completion error handling. +	 */ +	completed_tx = iser_drain_tx_cq(device, cq_index);  	while (ib_poll_cq(cq, 1, &wc) == 1) {  		desc	 = (struct iser_rx_desc *) (unsigned long) wc.wr_id; @@ -1047,7 +1138,6 @@ static void iser_cq_tasklet_fn(unsigned long data)  	 * " would not cause interrupts to be missed"                       */  	ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); -	completed_tx += iser_drain_tx_cq(device, cq_index);  	iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx);  } @@ -1059,3 +1149,51 @@ static void iser_cq_callback(struct ib_cq *cq, void *cq_context)  	tasklet_schedule(&device->cq_tasklet[cq_index]);  } + +u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, +			     enum iser_data_dir cmd_dir, sector_t *sector) +{ +	struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg; +	struct fast_reg_descriptor *desc = reg->mem_h; +	unsigned long sector_size = iser_task->sc->device->sector_size; +	struct ib_mr_status mr_status; +	int ret; + +	if (desc && desc->reg_indicators & ISER_FASTREG_PROTECTED) { +		desc->reg_indicators &= ~ISER_FASTREG_PROTECTED; +		ret = ib_check_mr_status(desc->pi_ctx->sig_mr, +					 IB_MR_CHECK_SIG_STATUS, &mr_status); +		if (ret) { +			pr_err("ib_check_mr_status failed, ret %d\n", ret); +			goto err; +		} + +		if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) { +			sector_t sector_off = mr_status.sig_err.sig_err_offset; + +			do_div(sector_off, sector_size + 8); +			*sector = scsi_get_lba(iser_task->sc) + sector_off; + +			pr_err("PI error found type %d at sector %llx " +			       "expected %x vs actual %x\n", +			       mr_status.sig_err.err_type, +			       (unsigned long long)*sector, +			       mr_status.sig_err.expected, +			       mr_status.sig_err.actual); + +			switch (mr_status.sig_err.err_type) { +			case IB_SIG_BAD_GUARD: +				return 0x1; +			case IB_SIG_BAD_REFTAG: +				return 0x3; +			case IB_SIG_BAD_APPTAG: +				return 0x2; +			} +		} +	} + +	return 0; +err: +	/* Not alot we can do here, return ambiguous guard error */ +	return 0x1; +}  | 
