diff options
Diffstat (limited to 'drivers/infiniband/ulp')
| -rw-r--r-- | drivers/infiniband/ulp/Makefile | 5 | ||||
| -rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_cm.c | 18 | ||||
| -rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_ethtool.c | 2 | ||||
| -rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_main.c | 4 | ||||
| -rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 3 | ||||
| -rw-r--r-- | drivers/infiniband/ulp/iser/iscsi_iser.c | 188 | ||||
| -rw-r--r-- | drivers/infiniband/ulp/iser/iscsi_iser.h | 93 | ||||
| -rw-r--r-- | drivers/infiniband/ulp/iser/iser_initiator.c | 179 | ||||
| -rw-r--r-- | drivers/infiniband/ulp/iser/iser_memory.c | 470 | ||||
| -rw-r--r-- | drivers/infiniband/ulp/iser/iser_verbs.c | 418 | ||||
| -rw-r--r-- | drivers/infiniband/ulp/isert/ib_isert.c | 1257 | ||||
| -rw-r--r-- | drivers/infiniband/ulp/isert/ib_isert.h | 59 | ||||
| -rw-r--r-- | drivers/infiniband/ulp/srp/ib_srp.c | 756 | ||||
| -rw-r--r-- | drivers/infiniband/ulp/srp/ib_srp.h | 95 | ||||
| -rw-r--r-- | drivers/infiniband/ulp/srpt/ib_srpt.c | 41 |
15 files changed, 2544 insertions, 1044 deletions
diff --git a/drivers/infiniband/ulp/Makefile b/drivers/infiniband/ulp/Makefile new file mode 100644 index 00000000000..f3c7dcf0309 --- /dev/null +++ b/drivers/infiniband/ulp/Makefile @@ -0,0 +1,5 @@ +obj-$(CONFIG_INFINIBAND_IPOIB) += ipoib/ +obj-$(CONFIG_INFINIBAND_SRP) += srp/ +obj-$(CONFIG_INFINIBAND_SRPT) += srpt/ +obj-$(CONFIG_INFINIBAND_ISER) += iser/ +obj-$(CONFIG_INFINIBAND_ISERT) += isert/ diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 1377f85911c..933efcea0d0 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1030,10 +1030,20 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ .cap.max_send_sge = 1, .sq_sig_type = IB_SIGNAL_ALL_WR, .qp_type = IB_QPT_RC, - .qp_context = tx + .qp_context = tx, + .create_flags = IB_QP_CREATE_USE_GFP_NOIO }; - return ib_create_qp(priv->pd, &attr); + struct ib_qp *tx_qp; + + tx_qp = ib_create_qp(priv->pd, &attr); + if (PTR_ERR(tx_qp) == -EINVAL) { + ipoib_warn(priv, "can't use GFP_NOIO for QPs on device %s, using GFP_KERNEL\n", + priv->ca->name); + attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO; + tx_qp = ib_create_qp(priv->pd, &attr); + } + return tx_qp; } static int ipoib_cm_send_req(struct net_device *dev, @@ -1104,12 +1114,14 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, struct ipoib_dev_priv *priv = netdev_priv(p->dev); int ret; - p->tx_ring = vzalloc(ipoib_sendq_size * sizeof *p->tx_ring); + p->tx_ring = __vmalloc(ipoib_sendq_size * sizeof *p->tx_ring, + GFP_NOIO, PAGE_KERNEL); if (!p->tx_ring) { ipoib_warn(priv, "failed to allocate tx ring\n"); ret = -ENOMEM; goto err_tx; } + memset(p->tx_ring, 0, ipoib_sendq_size * sizeof *p->tx_ring); p->qp = ipoib_cm_create_tx_qp(p->dev, p); if (IS_ERR(p->qp)) { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index c4b3940845e..078cadd6c79 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -105,5 +105,5 @@ static const struct ethtool_ops ipoib_ethtool_ops = { void ipoib_set_ethtool_ops(struct net_device *dev) { - SET_ETHTOOL_OPS(dev, &ipoib_ethtool_ops); + dev->ethtool_ops = &ipoib_ethtool_ops; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index d64ed05fb08..5786a78ff8b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -104,6 +104,8 @@ int ipoib_open(struct net_device *dev) ipoib_dbg(priv, "bringing up interface\n"); + netif_carrier_off(dev); + set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); if (ipoib_pkey_dev_delay_open(dev)) @@ -1366,8 +1368,6 @@ void ipoib_setup(struct net_device *dev) memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN); - netif_carrier_off(dev); - priv->dev = dev; spin_lock_init(&priv->lock); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 049a997caff..c56d5d44c53 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -192,6 +192,9 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) if (priv->hca_caps & IB_DEVICE_BLOCK_MULTICAST_LOOPBACK) init_attr.create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK; + if (priv->hca_caps & IB_DEVICE_MANAGED_FLOW_STEERING) + init_attr.create_flags |= IB_QP_CREATE_NETIF_QP; + if (dev->features & NETIF_F_SG) init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1; diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index dd03cfe596d..eb7973957a6 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -5,7 +5,7 @@ * Copyright (C) 2004 Alex Aizman * Copyright (C) 2005 Mike Christie * Copyright (c) 2005, 2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2013 Mellanox Technologies. All rights reserved. + * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved. * maintained by openib-general@openib.org * * This software is available to you under a choice of one of two @@ -82,6 +82,8 @@ static unsigned int iscsi_max_lun = 512; module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO); int iser_debug_level = 0; +bool iser_pi_enable = false; +int iser_pi_guard = 0; MODULE_DESCRIPTION("iSER (iSCSI Extensions for RDMA) Datamover"); MODULE_LICENSE("Dual BSD/GPL"); @@ -91,6 +93,13 @@ MODULE_VERSION(DRV_VER); module_param_named(debug_level, iser_debug_level, int, 0644); MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 (default:disabled)"); +module_param_named(pi_enable, iser_pi_enable, bool, 0644); +MODULE_PARM_DESC(pi_enable, "Enable T10-PI offload support (default:disabled)"); + +module_param_named(pi_guard, iser_pi_guard, int, 0644); +MODULE_PARM_DESC(pi_guard, "T10-PI guard_type, 0:CRC|1:IP_CSUM (default:CRC)"); + +static struct workqueue_struct *release_wq; struct iser_global ig; void @@ -138,8 +147,8 @@ static int iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode) int iser_initialize_task_headers(struct iscsi_task *task, struct iser_tx_desc *tx_desc) { - struct iscsi_iser_conn *iser_conn = task->conn->dd_data; - struct iser_device *device = iser_conn->ib_conn->device; + struct iser_conn *ib_conn = task->conn->dd_data; + struct iser_device *device = ib_conn->device; struct iscsi_iser_task *iser_task = task->dd_data; u64 dma_addr; @@ -153,7 +162,7 @@ int iser_initialize_task_headers(struct iscsi_task *task, tx_desc->tx_sg[0].length = ISER_HEADERS_LEN; tx_desc->tx_sg[0].lkey = device->mr->lkey; - iser_task->iser_conn = iser_conn; + iser_task->ib_conn = ib_conn; return 0; } /** @@ -176,6 +185,8 @@ iscsi_iser_task_init(struct iscsi_task *task) iser_task->command_sent = 0; iser_task_rdma_init(iser_task); + iser_task->sc = task->sc; + return 0; } @@ -278,10 +289,9 @@ iscsi_iser_task_xmit(struct iscsi_task *task) static void iscsi_iser_cleanup_task(struct iscsi_task *task) { struct iscsi_iser_task *iser_task = task->dd_data; - struct iser_tx_desc *tx_desc = &iser_task->desc; - - struct iscsi_iser_conn *iser_conn = task->conn->dd_data; - struct iser_device *device = iser_conn->ib_conn->device; + struct iser_tx_desc *tx_desc = &iser_task->desc; + struct iser_conn *ib_conn = task->conn->dd_data; + struct iser_device *device = ib_conn->device; ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); @@ -296,14 +306,25 @@ static void iscsi_iser_cleanup_task(struct iscsi_task *task) } } +static u8 iscsi_iser_check_protection(struct iscsi_task *task, sector_t *sector) +{ + struct iscsi_iser_task *iser_task = task->dd_data; + + if (iser_task->dir[ISER_DIR_IN]) + return iser_check_task_pi_status(iser_task, ISER_DIR_IN, + sector); + else + return iser_check_task_pi_status(iser_task, ISER_DIR_OUT, + sector); +} + static struct iscsi_cls_conn * iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx) { struct iscsi_conn *conn; struct iscsi_cls_conn *cls_conn; - struct iscsi_iser_conn *iser_conn; - cls_conn = iscsi_conn_setup(cls_session, sizeof(*iser_conn), conn_idx); + cls_conn = iscsi_conn_setup(cls_session, 0, conn_idx); if (!cls_conn) return NULL; conn = cls_conn->dd_data; @@ -314,39 +335,15 @@ iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx) */ conn->max_recv_dlength = ISER_RECV_DATA_SEG_LEN; - iser_conn = conn->dd_data; - conn->dd_data = iser_conn; - iser_conn->iscsi_conn = conn; - return cls_conn; } -static void -iscsi_iser_conn_destroy(struct iscsi_cls_conn *cls_conn) -{ - struct iscsi_conn *conn = cls_conn->dd_data; - struct iscsi_iser_conn *iser_conn = conn->dd_data; - struct iser_conn *ib_conn = iser_conn->ib_conn; - - iscsi_conn_teardown(cls_conn); - /* - * Userspace will normally call the stop callback and - * already have freed the ib_conn, but if it goofed up then - * we free it here. - */ - if (ib_conn) { - ib_conn->iser_conn = NULL; - iser_conn_put(ib_conn, 1); /* deref iscsi/ib conn unbinding */ - } -} - static int iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session, struct iscsi_cls_conn *cls_conn, uint64_t transport_eph, int is_leading) { struct iscsi_conn *conn = cls_conn->dd_data; - struct iscsi_iser_conn *iser_conn; struct iscsi_session *session; struct iser_conn *ib_conn; struct iscsi_endpoint *ep; @@ -373,35 +370,44 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session, /* binds the iSER connection retrieved from the previously * connected ep_handle to the iSCSI layer connection. exchanges * connection pointers */ - iser_info("binding iscsi/iser conn %p %p to ib_conn %p\n", - conn, conn->dd_data, ib_conn); - iser_conn = conn->dd_data; - ib_conn->iser_conn = iser_conn; - iser_conn->ib_conn = ib_conn; - iser_conn_get(ib_conn); /* ref iscsi/ib conn binding */ + iser_info("binding iscsi conn %p to ib_conn %p\n", conn, ib_conn); + + conn->dd_data = ib_conn; + ib_conn->iscsi_conn = conn; + return 0; } +static int +iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn) +{ + struct iscsi_conn *iscsi_conn; + struct iser_conn *ib_conn; + + iscsi_conn = cls_conn->dd_data; + ib_conn = iscsi_conn->dd_data; + reinit_completion(&ib_conn->stop_completion); + + return iscsi_conn_start(cls_conn); +} + static void iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) { struct iscsi_conn *conn = cls_conn->dd_data; - struct iscsi_iser_conn *iser_conn = conn->dd_data; - struct iser_conn *ib_conn = iser_conn->ib_conn; + struct iser_conn *ib_conn = conn->dd_data; + + iser_dbg("stopping iscsi_conn: %p, ib_conn: %p\n", conn, ib_conn); + iscsi_conn_stop(cls_conn, flag); /* * Userspace may have goofed up and not bound the connection or * might have only partially setup the connection. */ if (ib_conn) { - iscsi_conn_stop(cls_conn, flag); - /* - * There is no unbind event so the stop callback - * must release the ref from the bind. - */ - iser_conn_put(ib_conn, 1); /* deref iscsi/ib conn unbinding */ + conn->dd_data = NULL; + complete(&ib_conn->stop_completion); } - iser_conn->ib_conn = NULL; } static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session) @@ -413,6 +419,17 @@ static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session) iscsi_host_free(shost); } +static inline unsigned int +iser_dif_prot_caps(int prot_caps) +{ + return ((prot_caps & IB_PROT_T10DIF_TYPE_1) ? SHOST_DIF_TYPE1_PROTECTION | + SHOST_DIX_TYPE1_PROTECTION : 0) | + ((prot_caps & IB_PROT_T10DIF_TYPE_2) ? SHOST_DIF_TYPE2_PROTECTION | + SHOST_DIX_TYPE2_PROTECTION : 0) | + ((prot_caps & IB_PROT_T10DIF_TYPE_3) ? SHOST_DIF_TYPE3_PROTECTION | + SHOST_DIX_TYPE3_PROTECTION : 0); +} + static struct iscsi_cls_session * iscsi_iser_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max, uint16_t qdepth, @@ -437,8 +454,18 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, * older userspace tools (before 2.0-870) did not pass us * the leading conn's ep so this will be NULL; */ - if (ep) + if (ep) { ib_conn = ep->dd_data; + if (ib_conn->pi_support) { + u32 sig_caps = ib_conn->device->dev_attr.sig_prot_cap; + + scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps)); + if (iser_pi_guard) + scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP); + else + scsi_host_set_guard(shost, SHOST_DIX_GUARD_CRC); + } + } if (iscsi_host_add(shost, ep ? ib_conn->device->ib_device->dma_device : NULL)) @@ -481,28 +508,28 @@ iscsi_iser_set_param(struct iscsi_cls_conn *cls_conn, case ISCSI_PARAM_HDRDGST_EN: sscanf(buf, "%d", &value); if (value) { - iser_err("DataDigest wasn't negotiated to None"); + iser_err("DataDigest wasn't negotiated to None\n"); return -EPROTO; } break; case ISCSI_PARAM_DATADGST_EN: sscanf(buf, "%d", &value); if (value) { - iser_err("DataDigest wasn't negotiated to None"); + iser_err("DataDigest wasn't negotiated to None\n"); return -EPROTO; } break; case ISCSI_PARAM_IFMARKER_EN: sscanf(buf, "%d", &value); if (value) { - iser_err("IFMarker wasn't negotiated to No"); + iser_err("IFMarker wasn't negotiated to No\n"); return -EPROTO; } break; case ISCSI_PARAM_OFMARKER_EN: sscanf(buf, "%d", &value); if (value) { - iser_err("OFMarker wasn't negotiated to No"); + iser_err("OFMarker wasn't negotiated to No\n"); return -EPROTO; } break; @@ -618,19 +645,20 @@ iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep) struct iser_conn *ib_conn; ib_conn = ep->dd_data; - if (ib_conn->iser_conn) - /* - * Must suspend xmit path if the ep is bound to the - * iscsi_conn, so we know we are not accessing the ib_conn - * when we free it. - * - * This may not be bound if the ep poll failed. - */ - iscsi_suspend_tx(ib_conn->iser_conn->iscsi_conn); - - - iser_info("ib conn %p state %d\n", ib_conn, ib_conn->state); + iser_info("ep %p ib conn %p state %d\n", ep, ib_conn, ib_conn->state); iser_conn_terminate(ib_conn); + + /* + * if iser_conn and iscsi_conn are bound, we must wait iscsi_conn_stop + * call and ISER_CONN_DOWN state before freeing the iser resources. + * otherwise we are safe to free resources immediately. + */ + if (ib_conn->iscsi_conn) { + INIT_WORK(&ib_conn->release_work, iser_release_work); + queue_work(release_wq, &ib_conn->release_work); + } else { + iser_conn_release(ib_conn); + } } static umode_t iser_attr_is_visible(int param_type, int param) @@ -714,13 +742,13 @@ static struct iscsi_transport iscsi_iser_transport = { /* connection management */ .create_conn = iscsi_iser_conn_create, .bind_conn = iscsi_iser_conn_bind, - .destroy_conn = iscsi_iser_conn_destroy, + .destroy_conn = iscsi_conn_teardown, .attr_is_visible = iser_attr_is_visible, .set_param = iscsi_iser_set_param, .get_conn_param = iscsi_conn_get_param, .get_ep_param = iscsi_iser_get_ep_param, .get_session_param = iscsi_session_get_param, - .start_conn = iscsi_conn_start, + .start_conn = iscsi_iser_conn_start, .stop_conn = iscsi_iser_conn_stop, /* iscsi host params */ .get_host_param = iscsi_host_get_param, @@ -732,6 +760,7 @@ static struct iscsi_transport iscsi_iser_transport = { .xmit_task = iscsi_iser_task_xmit, .cleanup_task = iscsi_iser_cleanup_task, .alloc_pdu = iscsi_iser_pdu_alloc, + .check_protection = iscsi_iser_check_protection, /* recovery */ .session_recovery_timedout = iscsi_session_recovery_timedout, @@ -766,6 +795,12 @@ static int __init iser_init(void) mutex_init(&ig.connlist_mutex); INIT_LIST_HEAD(&ig.connlist); + release_wq = alloc_workqueue("release workqueue", 0, 0); + if (!release_wq) { + iser_err("failed to allocate release workqueue\n"); + return -ENOMEM; + } + iscsi_iser_scsi_transport = iscsi_register_transport( &iscsi_iser_transport); if (!iscsi_iser_scsi_transport) { @@ -784,7 +819,24 @@ register_transport_failure: static void __exit iser_exit(void) { + struct iser_conn *ib_conn, *n; + int connlist_empty; + iser_dbg("Removing iSER datamover...\n"); + destroy_workqueue(release_wq); + + mutex_lock(&ig.connlist_mutex); + connlist_empty = list_empty(&ig.connlist); + mutex_unlock(&ig.connlist_mutex); + + if (!connlist_empty) { + iser_err("Error cleanup stage completed but we still have iser " + "connections, destroying them anyway.\n"); + list_for_each_entry_safe(ib_conn, n, &ig.connlist, conn_list) { + iser_conn_release(ib_conn); + } + } + iscsi_unregister_transport(&iscsi_iser_transport); kmem_cache_destroy(ig.desc_cache); } diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 67914027c61..97cd385bf7f 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -8,7 +8,7 @@ * * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved. * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. - * Copyright (c) 2013 Mellanox Technologies. All rights reserved. + * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -46,6 +46,8 @@ #include <linux/printk.h> #include <scsi/libiscsi.h> #include <scsi/scsi_transport_iscsi.h> +#include <scsi/scsi_cmnd.h> +#include <scsi/scsi_device.h> #include <linux/interrupt.h> #include <linux/wait.h> @@ -67,7 +69,7 @@ #define DRV_NAME "iser" #define PFX DRV_NAME ": " -#define DRV_VER "1.1" +#define DRV_VER "1.4" #define iser_dbg(fmt, arg...) \ do { \ @@ -134,10 +136,21 @@ ISER_MAX_TX_MISC_PDUS + \ ISER_MAX_RX_MISC_PDUS) +/* Max registration work requests per command */ +#define ISER_MAX_REG_WR_PER_CMD 5 + +/* For Signature we don't support DATAOUTs so no need to make room for them */ +#define ISER_QP_SIG_MAX_REQ_DTOS (ISER_DEF_XMIT_CMDS_MAX * \ + (1 + ISER_MAX_REG_WR_PER_CMD) + \ + ISER_MAX_TX_MISC_PDUS + \ + ISER_MAX_RX_MISC_PDUS) + #define ISER_VER 0x10 #define ISER_WSV 0x08 #define ISER_RSV 0x04 +#define ISER_FASTREG_LI_WRID 0xffffffffffffffffULL + struct iser_hdr { u8 flags; u8 rsvd[3]; @@ -201,7 +214,6 @@ struct iser_data_buf { /* fwd declarations */ struct iser_device; struct iser_cq_desc; -struct iscsi_iser_conn; struct iscsi_iser_task; struct iscsi_endpoint; @@ -258,6 +270,7 @@ struct iscsi_iser_task; struct iser_device { struct ib_device *ib_device; struct ib_pd *pd; + struct ib_device_attr dev_attr; struct ib_cq *rx_cq[ISER_MAX_CQ]; struct ib_cq *tx_cq[ISER_MAX_CQ]; struct ib_mr *mr; @@ -277,17 +290,35 @@ struct iser_device { enum iser_data_dir cmd_dir); }; +#define ISER_CHECK_GUARD 0xc0 +#define ISER_CHECK_REFTAG 0x0f +#define ISER_CHECK_APPTAG 0x30 + +enum iser_reg_indicator { + ISER_DATA_KEY_VALID = 1 << 0, + ISER_PROT_KEY_VALID = 1 << 1, + ISER_SIG_KEY_VALID = 1 << 2, + ISER_FASTREG_PROTECTED = 1 << 3, +}; + +struct iser_pi_context { + struct ib_mr *prot_mr; + struct ib_fast_reg_page_list *prot_frpl; + struct ib_mr *sig_mr; +}; + struct fast_reg_descriptor { struct list_head list; /* For fast registration - FRWR */ struct ib_mr *data_mr; struct ib_fast_reg_page_list *data_frpl; - /* Valid for fast registration flag */ - bool valid; + struct iser_pi_context *pi_ctx; + /* registration indicators container */ + u8 reg_indicators; }; struct iser_conn { - struct iscsi_iser_conn *iser_conn; /* iser conn for upcalls */ + struct iscsi_conn *iscsi_conn; struct iscsi_endpoint *ep; enum iser_ib_conn_state state; /* rdma connection state */ atomic_t refcount; @@ -302,6 +333,8 @@ struct iser_conn { int post_recv_buf_count; /* posted rx count */ atomic_t post_send_buf_count; /* posted tx count */ char name[ISER_OBJECT_NAME_SIZE]; + struct work_struct release_work; + struct completion stop_completion; struct list_head conn_list; /* entry in ig conn list */ char *login_buf; @@ -310,6 +343,9 @@ struct iser_conn { unsigned int rx_desc_head; struct iser_rx_desc *rx_descs; struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX]; + bool pi_support; + + /* Connection memory registration pool */ union { struct { struct ib_fmr_pool *pool; /* pool of IB FMRs */ @@ -319,24 +355,22 @@ struct iser_conn { struct { struct list_head pool; int pool_size; - } frwr; - } fastreg; -}; - -struct iscsi_iser_conn { - struct iscsi_conn *iscsi_conn;/* ptr to iscsi conn */ - struct iser_conn *ib_conn; /* iSER IB conn */ + } fastreg; + }; }; struct iscsi_iser_task { struct iser_tx_desc desc; - struct iscsi_iser_conn *iser_conn; + struct iser_conn *ib_conn; enum iser_task_status status; + struct scsi_cmnd *sc; int command_sent; /* set if command sent */ int dir[ISER_DIRS_NUM]; /* set if dir use*/ struct iser_regd_buf rdma_regd[ISER_DIRS_NUM];/* regd rdma buf */ struct iser_data_buf data[ISER_DIRS_NUM]; /* orig. data des*/ struct iser_data_buf data_copy[ISER_DIRS_NUM];/* contig. copy */ + struct iser_data_buf prot[ISER_DIRS_NUM]; /* prot desc */ + struct iser_data_buf prot_copy[ISER_DIRS_NUM];/* prot copy */ }; struct iser_page_vec { @@ -362,6 +396,8 @@ struct iser_global { extern struct iser_global ig; extern int iser_debug_level; +extern bool iser_pi_enable; +extern int iser_pi_guard; /* allocate connection resources needed for rdma functionality */ int iser_conn_set_full_featured_mode(struct iscsi_conn *conn); @@ -383,12 +419,12 @@ void iscsi_iser_recv(struct iscsi_conn *conn, void iser_conn_init(struct iser_conn *ib_conn); -void iser_conn_get(struct iser_conn *ib_conn); - -int iser_conn_put(struct iser_conn *ib_conn, int destroy_cma_id_allowed); +void iser_conn_release(struct iser_conn *ib_conn); void iser_conn_terminate(struct iser_conn *ib_conn); +void iser_release_work(struct work_struct *work); + void iser_rcv_completion(struct iser_rx_desc *desc, unsigned long dto_xfer_len, struct iser_conn *ib_conn); @@ -401,13 +437,15 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *task); void iser_free_rx_descriptors(struct iser_conn *ib_conn); -void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *task, - enum iser_data_dir cmd_dir); +void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, + struct iser_data_buf *mem, + struct iser_data_buf *mem_copy, + enum iser_data_dir cmd_dir); int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task, enum iser_data_dir cmd_dir); -int iser_reg_rdma_mem_frwr(struct iscsi_iser_task *task, - enum iser_data_dir cmd_dir); +int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *task, + enum iser_data_dir cmd_dir); int iser_connect(struct iser_conn *ib_conn, struct sockaddr_in *src_addr, @@ -420,8 +458,8 @@ int iser_reg_page_vec(struct iser_conn *ib_conn, void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir); -void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task, - enum iser_data_dir cmd_dir); +void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, + enum iser_data_dir cmd_dir); int iser_post_recvl(struct iser_conn *ib_conn); int iser_post_recvm(struct iser_conn *ib_conn, int count); @@ -432,12 +470,15 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, enum iser_data_dir iser_dir, enum dma_data_direction dma_dir); -void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task); +void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task, + struct iser_data_buf *data); int iser_initialize_task_headers(struct iscsi_task *task, struct iser_tx_desc *tx_desc); int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session); int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max); void iser_free_fmr_pool(struct iser_conn *ib_conn); -int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max); -void iser_free_frwr_pool(struct iser_conn *ib_conn); +int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max); +void iser_free_fastreg_pool(struct iser_conn *ib_conn); +u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, + enum iser_data_dir cmd_dir, sector_t *sector); #endif diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 538822684d5..8d44a406063 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2013 Mellanox Technologies. All rights reserved. + * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -41,15 +41,15 @@ #include "iscsi_iser.h" /* Register user buffer memory and initialize passive rdma - * dto descriptor. Total data size is stored in - * iser_task->data[ISER_DIR_IN].data_len + * dto descriptor. Data size is stored in + * task->data[ISER_DIR_IN].data_len, Protection size + * os stored in task->prot[ISER_DIR_IN].data_len */ -static int iser_prepare_read_cmd(struct iscsi_task *task, - unsigned int edtl) +static int iser_prepare_read_cmd(struct iscsi_task *task) { struct iscsi_iser_task *iser_task = task->dd_data; - struct iser_device *device = iser_task->iser_conn->ib_conn->device; + struct iser_device *device = iser_task->ib_conn->device; struct iser_regd_buf *regd_buf; int err; struct iser_hdr *hdr = &iser_task->desc.iser_header; @@ -62,12 +62,15 @@ static int iser_prepare_read_cmd(struct iscsi_task *task, if (err) return err; - if (edtl > iser_task->data[ISER_DIR_IN].data_len) { - iser_err("Total data length: %ld, less than EDTL: " - "%d, in READ cmd BHS itt: %d, conn: 0x%p\n", - iser_task->data[ISER_DIR_IN].data_len, edtl, - task->itt, iser_task->iser_conn); - return -EINVAL; + if (scsi_prot_sg_count(iser_task->sc)) { + struct iser_data_buf *pbuf_in = &iser_task->prot[ISER_DIR_IN]; + + err = iser_dma_map_task_data(iser_task, + pbuf_in, + ISER_DIR_IN, + DMA_FROM_DEVICE); + if (err) + return err; } err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_IN); @@ -89,8 +92,9 @@ static int iser_prepare_read_cmd(struct iscsi_task *task, } /* Register user buffer memory and initialize passive rdma - * dto descriptor. Total data size is stored in - * task->data[ISER_DIR_OUT].data_len + * dto descriptor. Data size is stored in + * task->data[ISER_DIR_OUT].data_len, Protection size + * is stored at task->prot[ISER_DIR_OUT].data_len */ static int iser_prepare_write_cmd(struct iscsi_task *task, @@ -99,7 +103,7 @@ iser_prepare_write_cmd(struct iscsi_task *task, unsigned int edtl) { struct iscsi_iser_task *iser_task = task->dd_data; - struct iser_device *device = iser_task->iser_conn->ib_conn->device; + struct iser_device *device = iser_task->ib_conn->device; struct iser_regd_buf *regd_buf; int err; struct iser_hdr *hdr = &iser_task->desc.iser_header; @@ -113,12 +117,15 @@ iser_prepare_write_cmd(struct iscsi_task *task, if (err) return err; - if (edtl > iser_task->data[ISER_DIR_OUT].data_len) { - iser_err("Total data length: %ld, less than EDTL: %d, " - "in WRITE cmd BHS itt: %d, conn: 0x%p\n", - iser_task->data[ISER_DIR_OUT].data_len, - edtl, task->itt, task->conn); - return -EINVAL; + if (scsi_prot_sg_count(iser_task->sc)) { + struct iser_data_buf *pbuf_out = &iser_task->prot[ISER_DIR_OUT]; + + err = iser_dma_map_task_data(iser_task, + pbuf_out, + ISER_DIR_OUT, + DMA_TO_DEVICE); + if (err) + return err; } err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_OUT); @@ -327,7 +334,7 @@ free_login_buf: static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req) { - struct iscsi_iser_conn *iser_conn = conn->dd_data; + struct iser_conn *ib_conn = conn->dd_data; struct iscsi_session *session = conn->session; iser_dbg("req op %x flags %x\n", req->opcode, req->flags); @@ -340,19 +347,18 @@ static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req) * response) and no posted send buffers left - they must have been * consumed during previous login phases. */ - WARN_ON(iser_conn->ib_conn->post_recv_buf_count != 1); - WARN_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0); + WARN_ON(ib_conn->post_recv_buf_count != 1); + WARN_ON(atomic_read(&ib_conn->post_send_buf_count) != 0); if (session->discovery_sess) { iser_info("Discovery session, re-using login RX buffer\n"); return 0; } else iser_info("Normal session, posting batch of RX %d buffers\n", - iser_conn->ib_conn->min_posted_rx); + ib_conn->min_posted_rx); /* Initial post receive buffers */ - if (iser_post_recvm(iser_conn->ib_conn, - iser_conn->ib_conn->min_posted_rx)) + if (iser_post_recvm(ib_conn, ib_conn->min_posted_rx)) return -ENOMEM; return 0; @@ -364,11 +370,11 @@ static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req) int iser_send_command(struct iscsi_conn *conn, struct iscsi_task *task) { - struct iscsi_iser_conn *iser_conn = conn->dd_data; + struct iser_conn *ib_conn = conn->dd_data; struct iscsi_iser_task *iser_task = task->dd_data; unsigned long edtl; int err; - struct iser_data_buf *data_buf; + struct iser_data_buf *data_buf, *prot_buf; struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr; struct scsi_cmnd *sc = task->sc; struct iser_tx_desc *tx_desc = &iser_task->desc; @@ -377,22 +383,31 @@ int iser_send_command(struct iscsi_conn *conn, /* build the tx desc regd header and add it to the tx desc dto */ tx_desc->type = ISCSI_TX_SCSI_COMMAND; - iser_create_send_desc(iser_conn->ib_conn, tx_desc); + iser_create_send_desc(ib_conn, tx_desc); - if (hdr->flags & ISCSI_FLAG_CMD_READ) + if (hdr->flags & ISCSI_FLAG_CMD_READ) { data_buf = &iser_task->data[ISER_DIR_IN]; - else + prot_buf = &iser_task->prot[ISER_DIR_IN]; + } else { data_buf = &iser_task->data[ISER_DIR_OUT]; + prot_buf = &iser_task->prot[ISER_DIR_OUT]; + } if (scsi_sg_count(sc)) { /* using a scatter list */ data_buf->buf = scsi_sglist(sc); data_buf->size = scsi_sg_count(sc); } - data_buf->data_len = scsi_bufflen(sc); + if (scsi_prot_sg_count(sc)) { + prot_buf->buf = scsi_prot_sglist(sc); + prot_buf->size = scsi_prot_sg_count(sc); + prot_buf->data_len = data_buf->data_len >> + ilog2(sc->device->sector_size) * 8; + } + if (hdr->flags & ISCSI_FLAG_CMD_READ) { - err = iser_prepare_read_cmd(task, edtl); + err = iser_prepare_read_cmd(task); if (err) goto send_command_error; } @@ -408,7 +423,7 @@ int iser_send_command(struct iscsi_conn *conn, iser_task->status = ISER_TASK_STATUS_STARTED; - err = iser_post_send(iser_conn->ib_conn, tx_desc); + err = iser_post_send(ib_conn, tx_desc); if (!err) return 0; @@ -424,7 +439,7 @@ int iser_send_data_out(struct iscsi_conn *conn, struct iscsi_task *task, struct iscsi_data *hdr) { - struct iscsi_iser_conn *iser_conn = conn->dd_data; + struct iser_conn *ib_conn = conn->dd_data; struct iscsi_iser_task *iser_task = task->dd_data; struct iser_tx_desc *tx_desc = NULL; struct iser_regd_buf *regd_buf; @@ -473,7 +488,7 @@ int iser_send_data_out(struct iscsi_conn *conn, itt, buf_offset, data_seg_len); - err = iser_post_send(iser_conn->ib_conn, tx_desc); + err = iser_post_send(ib_conn, tx_desc); if (!err) return 0; @@ -486,19 +501,18 @@ send_data_out_error: int iser_send_control(struct iscsi_conn *conn, struct iscsi_task *task) { - struct iscsi_iser_conn *iser_conn = conn->dd_data; + struct iser_conn *ib_conn = conn->dd_data; struct iscsi_iser_task *iser_task = task->dd_data; struct iser_tx_desc *mdesc = &iser_task->desc; unsigned long data_seg_len; int err = 0; struct iser_device *device; - struct iser_conn *ib_conn = iser_conn->ib_conn; /* build the tx desc regd header and add it to the tx desc dto */ mdesc->type = ISCSI_TX_CONTROL; - iser_create_send_desc(iser_conn->ib_conn, mdesc); + iser_create_send_desc(ib_conn, mdesc); - device = iser_conn->ib_conn->device; + device = ib_conn->device; data_seg_len = ntoh24(task->hdr->dlength); @@ -513,14 +527,13 @@ int iser_send_control(struct iscsi_conn *conn, ib_conn->login_req_dma, task->data_count, DMA_TO_DEVICE); - memcpy(iser_conn->ib_conn->login_req_buf, task->data, - task->data_count); + memcpy(ib_conn->login_req_buf, task->data, task->data_count); ib_dma_sync_single_for_device(device->ib_device, ib_conn->login_req_dma, task->data_count, DMA_TO_DEVICE); - tx_dsg->addr = iser_conn->ib_conn->login_req_dma; + tx_dsg->addr = ib_conn->login_req_dma; tx_dsg->length = task->data_count; tx_dsg->lkey = device->mr->lkey; mdesc->num_sge = 2; @@ -529,7 +542,7 @@ int iser_send_control(struct iscsi_conn *conn, if (task == conn->login_task) { iser_dbg("op %x dsl %lx, posting login rx buffer\n", task->hdr->opcode, data_seg_len); - err = iser_post_recvl(iser_conn->ib_conn); + err = iser_post_recvl(ib_conn); if (err) goto send_control_error; err = iser_post_rx_bufs(conn, task->hdr); @@ -537,7 +550,7 @@ int iser_send_control(struct iscsi_conn *conn, goto send_control_error; } - err = iser_post_send(iser_conn->ib_conn, mdesc); + err = iser_post_send(ib_conn, mdesc); if (!err) return 0; @@ -553,7 +566,6 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc, unsigned long rx_xfer_len, struct iser_conn *ib_conn) { - struct iscsi_iser_conn *conn = ib_conn->iser_conn; struct iscsi_hdr *hdr; u64 rx_dma; int rx_buflen, outstanding, count, err; @@ -575,17 +587,17 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc, iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode, hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN)); - iscsi_iser_recv(conn->iscsi_conn, hdr, - rx_desc->data, rx_xfer_len - ISER_HEADERS_LEN); + iscsi_iser_recv(ib_conn->iscsi_conn, hdr, rx_desc->data, + rx_xfer_len - ISER_HEADERS_LEN); ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma, - rx_buflen, DMA_FROM_DEVICE); + rx_buflen, DMA_FROM_DEVICE); /* decrementing conn->post_recv_buf_count only --after-- freeing the * * task eliminates the need to worry on tasks which are completed in * * parallel to the execution of iser_conn_term. So the code that waits * * for the posted rx bufs refcount to become zero handles everything */ - conn->ib_conn->post_recv_buf_count--; + ib_conn->post_recv_buf_count--; if (rx_dma == ib_conn->login_resp_dma) return; @@ -610,11 +622,12 @@ void iser_snd_completion(struct iser_tx_desc *tx_desc, ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); kmem_cache_free(ig.desc_cache, tx_desc); + tx_desc = NULL; } atomic_dec(&ib_conn->post_send_buf_count); - if (tx_desc->type == ISCSI_TX_CONTROL) { + if (tx_desc && tx_desc->type == ISCSI_TX_CONTROL) { /* this arithmetic is legal by libiscsi dd_data allocation */ task = (void *) ((long)(void *)tx_desc - sizeof(struct iscsi_task)); @@ -634,6 +647,9 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task) iser_task->data[ISER_DIR_IN].data_len = 0; iser_task->data[ISER_DIR_OUT].data_len = 0; + iser_task->prot[ISER_DIR_IN].data_len = 0; + iser_task->prot[ISER_DIR_OUT].data_len = 0; + memset(&iser_task->rdma_regd[ISER_DIR_IN], 0, sizeof(struct iser_regd_buf)); memset(&iser_task->rdma_regd[ISER_DIR_OUT], 0, @@ -642,28 +658,63 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task) void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) { - struct iser_device *device = iser_task->iser_conn->ib_conn->device; - int is_rdma_aligned = 1; + struct iser_device *device = iser_task->ib_conn->device; + int is_rdma_data_aligned = 1; + int is_rdma_prot_aligned = 1; + int prot_count = scsi_prot_sg_count(iser_task->sc); /* if we were reading, copy back to unaligned sglist, * anyway dma_unmap and free the copy */ if (iser_task->data_copy[ISER_DIR_IN].copy_buf != NULL) { - is_rdma_aligned = 0; - iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_IN); + is_rdma_data_aligned = 0; + iser_finalize_rdma_unaligned_sg(iser_task, + &iser_task->data[ISER_DIR_IN], + &iser_task->data_copy[ISER_DIR_IN], + ISER_DIR_IN); } + if (iser_task->data_copy[ISER_DIR_OUT].copy_buf != NULL) { - is_rdma_aligned = 0; - iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_OUT); + is_rdma_data_aligned = 0; + iser_finalize_rdma_unaligned_sg(iser_task, + &iser_task->data[ISER_DIR_OUT], + &iser_task->data_copy[ISER_DIR_OUT], + ISER_DIR_OUT); + } + + if (iser_task->prot_copy[ISER_DIR_IN].copy_buf != NULL) { + is_rdma_prot_aligned = 0; + iser_finalize_rdma_unaligned_sg(iser_task, + &iser_task->prot[ISER_DIR_IN], + &iser_task->prot_copy[ISER_DIR_IN], + ISER_DIR_IN); } - if (iser_task->dir[ISER_DIR_IN]) + if (iser_task->prot_copy[ISER_DIR_OUT].copy_buf != NULL) { + is_rdma_prot_aligned = 0; + iser_finalize_rdma_unaligned_sg(iser_task, + &iser_task->prot[ISER_DIR_OUT], + &iser_task->prot_copy[ISER_DIR_OUT], + ISER_DIR_OUT); + } + + if (iser_task->dir[ISER_DIR_IN]) { device->iser_unreg_rdma_mem(iser_task, ISER_DIR_IN); + if (is_rdma_data_aligned) + iser_dma_unmap_task_data(iser_task, + &iser_task->data[ISER_DIR_IN]); + if (prot_count && is_rdma_prot_aligned) + iser_dma_unmap_task_data(iser_task, + &iser_task->prot[ISER_DIR_IN]); + } - if (iser_task->dir[ISER_DIR_OUT]) + if (iser_task->dir[ISER_DIR_OUT]) { device->iser_unreg_rdma_mem(iser_task, ISER_DIR_OUT); - - /* if the data was unaligned, it was already unmapped and then copied */ - if (is_rdma_aligned) - iser_dma_unmap_task_data(iser_task); + if (is_rdma_data_aligned) + iser_dma_unmap_task_data(iser_task, + &iser_task->data[ISER_DIR_OUT]); + if (prot_count && is_rdma_prot_aligned) + iser_dma_unmap_task_data(iser_task, + &iser_task->prot[ISER_DIR_OUT]); + } } diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 1ce0c97d2cc..47acd3ad3a1 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved. - * Copyright (c) 2013 Mellanox Technologies. All rights reserved. + * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -45,13 +45,19 @@ * iser_start_rdma_unaligned_sg */ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, + struct iser_data_buf *data, + struct iser_data_buf *data_copy, enum iser_data_dir cmd_dir) { - int dma_nents; - struct ib_device *dev; + struct ib_device *dev = iser_task->ib_conn->device->ib_device; + struct scatterlist *sgl = (struct scatterlist *)data->buf; + struct scatterlist *sg; char *mem = NULL; - struct iser_data_buf *data = &iser_task->data[cmd_dir]; - unsigned long cmd_data_len = data->data_len; + unsigned long cmd_data_len = 0; + int dma_nents, i; + + for_each_sg(sgl, sg, data->size, i) + cmd_data_len += ib_sg_dma_len(dev, sg); if (cmd_data_len > ISER_KMALLOC_THRESHOLD) mem = (void *)__get_free_pages(GFP_ATOMIC, @@ -61,17 +67,16 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, if (mem == NULL) { iser_err("Failed to allocate mem size %d %d for copying sglist\n", - data->size,(int)cmd_data_len); + data->size, (int)cmd_data_len); return -ENOMEM; } if (cmd_dir == ISER_DIR_OUT) { /* copy the unaligned sg the buffer which is used for RDMA */ - struct scatterlist *sgl = (struct scatterlist *)data->buf; - struct scatterlist *sg; int i; char *p, *from; + sgl = (struct scatterlist *)data->buf; p = mem; for_each_sg(sgl, sg, data->size, i) { from = kmap_atomic(sg_page(sg)); @@ -83,39 +88,37 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, } } - sg_init_one(&iser_task->data_copy[cmd_dir].sg_single, mem, cmd_data_len); - iser_task->data_copy[cmd_dir].buf = - &iser_task->data_copy[cmd_dir].sg_single; - iser_task->data_copy[cmd_dir].size = 1; + sg_init_one(&data_copy->sg_single, mem, cmd_data_len); + data_copy->buf = &data_copy->sg_single; + data_copy->size = 1; + data_copy->copy_buf = mem; - iser_task->data_copy[cmd_dir].copy_buf = mem; - - dev = iser_task->iser_conn->ib_conn->device->ib_device; - dma_nents = ib_dma_map_sg(dev, - &iser_task->data_copy[cmd_dir].sg_single, - 1, + dma_nents = ib_dma_map_sg(dev, &data_copy->sg_single, 1, (cmd_dir == ISER_DIR_OUT) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); BUG_ON(dma_nents == 0); - iser_task->data_copy[cmd_dir].dma_nents = dma_nents; + data_copy->dma_nents = dma_nents; + data_copy->data_len = cmd_data_len; + return 0; } /** * iser_finalize_rdma_unaligned_sg */ + void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, - enum iser_data_dir cmd_dir) + struct iser_data_buf *data, + struct iser_data_buf *data_copy, + enum iser_data_dir cmd_dir) { struct ib_device *dev; - struct iser_data_buf *mem_copy; unsigned long cmd_data_len; - dev = iser_task->iser_conn->ib_conn->device->ib_device; - mem_copy = &iser_task->data_copy[cmd_dir]; + dev = iser_task->ib_conn->device->ib_device; - ib_dma_unmap_sg(dev, &mem_copy->sg_single, 1, + ib_dma_unmap_sg(dev, &data_copy->sg_single, 1, (cmd_dir == ISER_DIR_OUT) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); @@ -127,10 +130,10 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, int i; /* copy back read RDMA to unaligned sg */ - mem = mem_copy->copy_buf; + mem = data_copy->copy_buf; - sgl = (struct scatterlist *)iser_task->data[ISER_DIR_IN].buf; - sg_size = iser_task->data[ISER_DIR_IN].size; + sgl = (struct scatterlist *)data->buf; + sg_size = data->size; p = mem; for_each_sg(sgl, sg, sg_size, i) { @@ -143,15 +146,15 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, } } - cmd_data_len = iser_task->data[cmd_dir].data_len; + cmd_data_len = data->data_len; if (cmd_data_len > ISER_KMALLOC_THRESHOLD) - free_pages((unsigned long)mem_copy->copy_buf, + free_pages((unsigned long)data_copy->copy_buf, ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT); else - kfree(mem_copy->copy_buf); + kfree(data_copy->copy_buf); - mem_copy->copy_buf = NULL; + data_copy->copy_buf = NULL; } #define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0) @@ -319,7 +322,7 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, struct ib_device *dev; iser_task->dir[iser_dir] = 1; - dev = iser_task->iser_conn->ib_conn->device->ib_device; + dev = iser_task->ib_conn->device->ib_device; data->dma_nents = ib_dma_map_sg(dev, data->buf, data->size, dma_dir); if (data->dma_nents == 0) { @@ -329,31 +332,23 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, return 0; } -void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task) +void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task, + struct iser_data_buf *data) { struct ib_device *dev; - struct iser_data_buf *data; - dev = iser_task->iser_conn->ib_conn->device->ib_device; - - if (iser_task->dir[ISER_DIR_IN]) { - data = &iser_task->data[ISER_DIR_IN]; - ib_dma_unmap_sg(dev, data->buf, data->size, DMA_FROM_DEVICE); - } - - if (iser_task->dir[ISER_DIR_OUT]) { - data = &iser_task->data[ISER_DIR_OUT]; - ib_dma_unmap_sg(dev, data->buf, data->size, DMA_TO_DEVICE); - } + dev = iser_task->ib_conn->device->ib_device; + ib_dma_unmap_sg(dev, data->buf, data->size, DMA_FROM_DEVICE); } static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task, struct ib_device *ibdev, + struct iser_data_buf *mem, + struct iser_data_buf *mem_copy, enum iser_data_dir cmd_dir, int aligned_len) { - struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn; - struct iser_data_buf *mem = &iser_task->data[cmd_dir]; + struct iscsi_conn *iscsi_conn = iser_task->ib_conn->iscsi_conn; iscsi_conn->fmr_unalign_cnt++; iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n", @@ -363,12 +358,12 @@ static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task, iser_data_buf_dump(mem, ibdev); /* unmap the command data before accessing it */ - iser_dma_unmap_task_data(iser_task); + iser_dma_unmap_task_data(iser_task, mem); /* allocate copy buf, if we are writing, copy the */ /* unaligned scatterlist, dma map the copy */ - if (iser_start_rdma_unaligned_sg(iser_task, cmd_dir) != 0) - return -ENOMEM; + if (iser_start_rdma_unaligned_sg(iser_task, mem, mem_copy, cmd_dir) != 0) + return -ENOMEM; return 0; } @@ -382,7 +377,7 @@ static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task, int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir) { - struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn; + struct iser_conn *ib_conn = iser_task->ib_conn; struct iser_device *device = ib_conn->device; struct ib_device *ibdev = device->ib_device; struct iser_data_buf *mem = &iser_task->data[cmd_dir]; @@ -396,7 +391,8 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, aligned_len = iser_data_buf_aligned_len(mem, ibdev); if (aligned_len != mem->dma_nents) { - err = fall_to_bounce_buf(iser_task, ibdev, + err = fall_to_bounce_buf(iser_task, ibdev, mem, + &iser_task->data_copy[cmd_dir], cmd_dir, aligned_len); if (err) { iser_err("failed to allocate bounce buffer\n"); @@ -422,8 +418,8 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, (unsigned long)regd_buf->reg.va, (unsigned long)regd_buf->reg.len); } else { /* use FMR for multiple dma entries */ - iser_page_vec_build(mem, ib_conn->fastreg.fmr.page_vec, ibdev); - err = iser_reg_page_vec(ib_conn, ib_conn->fastreg.fmr.page_vec, + iser_page_vec_build(mem, ib_conn->fmr.page_vec, ibdev); + err = iser_reg_page_vec(ib_conn, ib_conn->fmr.page_vec, ®d_buf->reg); if (err && err != -EAGAIN) { iser_data_buf_dump(mem, ibdev); @@ -431,12 +427,12 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, mem->dma_nents, ntoh24(iser_task->desc.iscsi_header.dlength)); iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n", - ib_conn->fastreg.fmr.page_vec->data_size, - ib_conn->fastreg.fmr.page_vec->length, - ib_conn->fastreg.fmr.page_vec->offset); - for (i = 0; i < ib_conn->fastreg.fmr.page_vec->length; i++) + ib_conn->fmr.page_vec->data_size, + ib_conn->fmr.page_vec->length, + ib_conn->fmr.page_vec->offset); + for (i = 0; i < ib_conn->fmr.page_vec->length; i++) iser_err("page_vec[%d] = 0x%llx\n", i, - (unsigned long long) ib_conn->fastreg.fmr.page_vec->pages[i]); + (unsigned long long) ib_conn->fmr.page_vec->pages[i]); } if (err) return err; @@ -444,94 +440,280 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, return 0; } -static int iser_fast_reg_mr(struct fast_reg_descriptor *desc, - struct iser_conn *ib_conn, +static inline enum ib_t10_dif_type +scsi2ib_prot_type(unsigned char prot_type) +{ + switch (prot_type) { + case SCSI_PROT_DIF_TYPE0: + return IB_T10DIF_NONE; + case SCSI_PROT_DIF_TYPE1: + return IB_T10DIF_TYPE1; + case SCSI_PROT_DIF_TYPE2: + return IB_T10DIF_TYPE2; + case SCSI_PROT_DIF_TYPE3: + return IB_T10DIF_TYPE3; + default: + return IB_T10DIF_NONE; + } +} + + +static int +iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs) +{ + unsigned char scsi_ptype = scsi_get_prot_type(sc); + + sig_attrs->mem.sig_type = IB_SIG_TYPE_T10_DIF; + sig_attrs->wire.sig_type = IB_SIG_TYPE_T10_DIF; + sig_attrs->mem.sig.dif.pi_interval = sc->device->sector_size; + sig_attrs->wire.sig.dif.pi_interval = sc->device->sector_size; + + switch (scsi_get_prot_op(sc)) { + case SCSI_PROT_WRITE_INSERT: + case SCSI_PROT_READ_STRIP: + sig_attrs->mem.sig.dif.type = IB_T10DIF_NONE; + sig_attrs->wire.sig.dif.type = scsi2ib_prot_type(scsi_ptype); + sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC; + sig_attrs->wire.sig.dif.ref_tag = scsi_get_lba(sc) & + 0xffffffff; + break; + case SCSI_PROT_READ_INSERT: + case SCSI_PROT_WRITE_STRIP: + sig_attrs->mem.sig.dif.type = scsi2ib_prot_type(scsi_ptype); + sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC; + sig_attrs->mem.sig.dif.ref_tag = scsi_get_lba(sc) & + 0xffffffff; + sig_attrs->wire.sig.dif.type = IB_T10DIF_NONE; + break; + case SCSI_PROT_READ_PASS: + case SCSI_PROT_WRITE_PASS: + sig_attrs->mem.sig.dif.type = scsi2ib_prot_type(scsi_ptype); + sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC; + sig_attrs->mem.sig.dif.ref_tag = scsi_get_lba(sc) & + 0xffffffff; + sig_attrs->wire.sig.dif.type = scsi2ib_prot_type(scsi_ptype); + sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC; + sig_attrs->wire.sig.dif.ref_tag = scsi_get_lba(sc) & + 0xffffffff; + break; + default: + iser_err("Unsupported PI operation %d\n", + scsi_get_prot_op(sc)); + return -EINVAL; + } + return 0; +} + + +static int +iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask) +{ + switch (scsi_get_prot_type(sc)) { + case SCSI_PROT_DIF_TYPE0: + *mask = 0x0; + break; + case SCSI_PROT_DIF_TYPE1: + case SCSI_PROT_DIF_TYPE2: + *mask = ISER_CHECK_GUARD | ISER_CHECK_REFTAG; + break; + case SCSI_PROT_DIF_TYPE3: + *mask = ISER_CHECK_GUARD; + break; + default: + iser_err("Unsupported protection type %d\n", + scsi_get_prot_type(sc)); + return -EINVAL; + } + + return 0; +} + +static int +iser_reg_sig_mr(struct iscsi_iser_task *iser_task, + struct fast_reg_descriptor *desc, struct ib_sge *data_sge, + struct ib_sge *prot_sge, struct ib_sge *sig_sge) +{ + struct iser_conn *ib_conn = iser_task->ib_conn; + struct iser_pi_context *pi_ctx = desc->pi_ctx; + struct ib_send_wr sig_wr, inv_wr; + struct ib_send_wr *bad_wr, *wr = NULL; + struct ib_sig_attrs sig_attrs; + int ret; + u32 key; + + memset(&sig_attrs, 0, sizeof(sig_attrs)); + ret = iser_set_sig_attrs(iser_task->sc, &sig_attrs); + if (ret) + goto err; + + ret = iser_set_prot_checks(iser_task->sc, &sig_attrs.check_mask); + if (ret) + goto err; + + if (!(desc->reg_indicators & ISER_SIG_KEY_VALID)) { + memset(&inv_wr, 0, sizeof(inv_wr)); + inv_wr.opcode = IB_WR_LOCAL_INV; + inv_wr.wr_id = ISER_FASTREG_LI_WRID; + inv_wr.ex.invalidate_rkey = pi_ctx->sig_mr->rkey; + wr = &inv_wr; + /* Bump the key */ + key = (u8)(pi_ctx->sig_mr->rkey & 0x000000FF); + ib_update_fast_reg_key(pi_ctx->sig_mr, ++key); + } + + memset(&sig_wr, 0, sizeof(sig_wr)); + sig_wr.opcode = IB_WR_REG_SIG_MR; + sig_wr.wr_id = ISER_FASTREG_LI_WRID; + sig_wr.sg_list = data_sge; + sig_wr.num_sge = 1; + sig_wr.wr.sig_handover.sig_attrs = &sig_attrs; + sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr; + if (scsi_prot_sg_count(iser_task->sc)) + sig_wr.wr.sig_handover.prot = prot_sge; + sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_WRITE; + + if (!wr) + wr = &sig_wr; + else + wr->next = &sig_wr; + + ret = ib_post_send(ib_conn->qp, wr, &bad_wr); + if (ret) { + iser_err("reg_sig_mr failed, ret:%d\n", ret); + goto err; + } + desc->reg_indicators &= ~ISER_SIG_KEY_VALID; + + sig_sge->lkey = pi_ctx->sig_mr->lkey; + sig_sge->addr = 0; + sig_sge->length = data_sge->length + prot_sge->length; + if (scsi_get_prot_op(iser_task->sc) == SCSI_PROT_WRITE_INSERT || + scsi_get_prot_op(iser_task->sc) == SCSI_PROT_READ_STRIP) { + sig_sge->length += (data_sge->length / + iser_task->sc->device->sector_size) * 8; + } + + iser_dbg("sig_sge: addr: 0x%llx length: %u lkey: 0x%x\n", + sig_sge->addr, sig_sge->length, + sig_sge->lkey); +err: + return ret; +} + +static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, struct iser_regd_buf *regd_buf, - u32 offset, unsigned int data_size, - unsigned int page_list_len) + struct iser_data_buf *mem, + enum iser_reg_indicator ind, + struct ib_sge *sge) { + struct fast_reg_descriptor *desc = regd_buf->reg.mem_h; + struct iser_conn *ib_conn = iser_task->ib_conn; + struct iser_device *device = ib_conn->device; + struct ib_device *ibdev = device->ib_device; + struct ib_mr *mr; + struct ib_fast_reg_page_list *frpl; struct ib_send_wr fastreg_wr, inv_wr; struct ib_send_wr *bad_wr, *wr = NULL; u8 key; - int ret; + int ret, offset, size, plen; + + /* if there a single dma entry, dma mr suffices */ + if (mem->dma_nents == 1) { + struct scatterlist *sg = (struct scatterlist *)mem->buf; - if (!desc->valid) { + sge->lkey = device->mr->lkey; + sge->addr = ib_sg_dma_address(ibdev, &sg[0]); + sge->length = ib_sg_dma_len(ibdev, &sg[0]); + + iser_dbg("Single DMA entry: lkey=0x%x, addr=0x%llx, length=0x%x\n", + sge->lkey, sge->addr, sge->length); + return 0; + } + + if (ind == ISER_DATA_KEY_VALID) { + mr = desc->data_mr; + frpl = desc->data_frpl; + } else { + mr = desc->pi_ctx->prot_mr; + frpl = desc->pi_ctx->prot_frpl; + } + + plen = iser_sg_to_page_vec(mem, device->ib_device, frpl->page_list, + &offset, &size); + if (plen * SIZE_4K < size) { + iser_err("fast reg page_list too short to hold this SG\n"); + return -EINVAL; + } + + if (!(desc->reg_indicators & ind)) { memset(&inv_wr, 0, sizeof(inv_wr)); + inv_wr.wr_id = ISER_FASTREG_LI_WRID; inv_wr.opcode = IB_WR_LOCAL_INV; - inv_wr.send_flags = IB_SEND_SIGNALED; - inv_wr.ex.invalidate_rkey = desc->data_mr->rkey; + inv_wr.ex.invalidate_rkey = mr->rkey; wr = &inv_wr; /* Bump the key */ - key = (u8)(desc->data_mr->rkey & 0x000000FF); - ib_update_fast_reg_key(desc->data_mr, ++key); + key = (u8)(mr->rkey & 0x000000FF); + ib_update_fast_reg_key(mr, ++key); } /* Prepare FASTREG WR */ memset(&fastreg_wr, 0, sizeof(fastreg_wr)); + fastreg_wr.wr_id = ISER_FASTREG_LI_WRID; fastreg_wr.opcode = IB_WR_FAST_REG_MR; - fastreg_wr.send_flags = IB_SEND_SIGNALED; - fastreg_wr.wr.fast_reg.iova_start = desc->data_frpl->page_list[0] + offset; - fastreg_wr.wr.fast_reg.page_list = desc->data_frpl; - fastreg_wr.wr.fast_reg.page_list_len = page_list_len; + fastreg_wr.wr.fast_reg.iova_start = frpl->page_list[0] + offset; + fastreg_wr.wr.fast_reg.page_list = frpl; + fastreg_wr.wr.fast_reg.page_list_len = plen; fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K; - fastreg_wr.wr.fast_reg.length = data_size; - fastreg_wr.wr.fast_reg.rkey = desc->data_mr->rkey; + fastreg_wr.wr.fast_reg.length = size; + fastreg_wr.wr.fast_reg.rkey = mr->rkey; fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ); - if (!wr) { + if (!wr) wr = &fastreg_wr; - atomic_inc(&ib_conn->post_send_buf_count); - } else { + else wr->next = &fastreg_wr; - atomic_add(2, &ib_conn->post_send_buf_count); - } ret = ib_post_send(ib_conn->qp, wr, &bad_wr); if (ret) { - if (bad_wr->next) - atomic_sub(2, &ib_conn->post_send_buf_count); - else - atomic_dec(&ib_conn->post_send_buf_count); iser_err("fast registration failed, ret:%d\n", ret); return ret; } - desc->valid = false; + desc->reg_indicators &= ~ind; - regd_buf->reg.mem_h = desc; - regd_buf->reg.lkey = desc->data_mr->lkey; - regd_buf->reg.rkey = desc->data_mr->rkey; - regd_buf->reg.va = desc->data_frpl->page_list[0] + offset; - regd_buf->reg.len = data_size; - regd_buf->reg.is_mr = 1; + sge->lkey = mr->lkey; + sge->addr = frpl->page_list[0] + offset; + sge->length = size; return ret; } /** - * iser_reg_rdma_mem_frwr - Registers memory intended for RDMA, + * iser_reg_rdma_mem_fastreg - Registers memory intended for RDMA, * using Fast Registration WR (if possible) obtaining rkey and va * * returns 0 on success, errno code on failure */ -int iser_reg_rdma_mem_frwr(struct iscsi_iser_task *iser_task, - enum iser_data_dir cmd_dir) +int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task, + enum iser_data_dir cmd_dir) { - struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn; + struct iser_conn *ib_conn = iser_task->ib_conn; struct iser_device *device = ib_conn->device; struct ib_device *ibdev = device->ib_device; struct iser_data_buf *mem = &iser_task->data[cmd_dir]; struct iser_regd_buf *regd_buf = &iser_task->rdma_regd[cmd_dir]; - struct fast_reg_descriptor *desc; - unsigned int data_size, page_list_len; + struct fast_reg_descriptor *desc = NULL; + struct ib_sge data_sge; int err, aligned_len; unsigned long flags; - u32 offset; aligned_len = iser_data_buf_aligned_len(mem, ibdev); if (aligned_len != mem->dma_nents) { - err = fall_to_bounce_buf(iser_task, ibdev, + err = fall_to_bounce_buf(iser_task, ibdev, mem, + &iser_task->data_copy[cmd_dir], cmd_dir, aligned_len); if (err) { iser_err("failed to allocate bounce buffer\n"); @@ -540,41 +722,79 @@ int iser_reg_rdma_mem_frwr(struct iscsi_iser_task *iser_task, mem = &iser_task->data_copy[cmd_dir]; } - /* if there a single dma entry, dma mr suffices */ - if (mem->dma_nents == 1) { - struct scatterlist *sg = (struct scatterlist *)mem->buf; - - regd_buf->reg.lkey = device->mr->lkey; - regd_buf->reg.rkey = device->mr->rkey; - regd_buf->reg.len = ib_sg_dma_len(ibdev, &sg[0]); - regd_buf->reg.va = ib_sg_dma_address(ibdev, &sg[0]); - regd_buf->reg.is_mr = 0; - } else { + if (mem->dma_nents != 1 || + scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) { spin_lock_irqsave(&ib_conn->lock, flags); - desc = list_first_entry(&ib_conn->fastreg.frwr.pool, + desc = list_first_entry(&ib_conn->fastreg.pool, struct fast_reg_descriptor, list); list_del(&desc->list); spin_unlock_irqrestore(&ib_conn->lock, flags); - page_list_len = iser_sg_to_page_vec(mem, device->ib_device, - desc->data_frpl->page_list, - &offset, &data_size); - - if (page_list_len * SIZE_4K < data_size) { - iser_err("fast reg page_list too short to hold this SG\n"); - err = -EINVAL; - goto err_reg; + regd_buf->reg.mem_h = desc; + } + + err = iser_fast_reg_mr(iser_task, regd_buf, mem, + ISER_DATA_KEY_VALID, &data_sge); + if (err) + goto err_reg; + + if (scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) { + struct ib_sge prot_sge, sig_sge; + + memset(&prot_sge, 0, sizeof(prot_sge)); + if (scsi_prot_sg_count(iser_task->sc)) { + mem = &iser_task->prot[cmd_dir]; + aligned_len = iser_data_buf_aligned_len(mem, ibdev); + if (aligned_len != mem->dma_nents) { + err = fall_to_bounce_buf(iser_task, ibdev, mem, + &iser_task->prot_copy[cmd_dir], + cmd_dir, aligned_len); + if (err) { + iser_err("failed to allocate bounce buffer\n"); + return err; + } + mem = &iser_task->prot_copy[cmd_dir]; + } + + err = iser_fast_reg_mr(iser_task, regd_buf, mem, + ISER_PROT_KEY_VALID, &prot_sge); + if (err) + goto err_reg; } - err = iser_fast_reg_mr(desc, ib_conn, regd_buf, - offset, data_size, page_list_len); - if (err) - goto err_reg; + err = iser_reg_sig_mr(iser_task, desc, &data_sge, + &prot_sge, &sig_sge); + if (err) { + iser_err("Failed to register signature mr\n"); + return err; + } + desc->reg_indicators |= ISER_FASTREG_PROTECTED; + + regd_buf->reg.lkey = sig_sge.lkey; + regd_buf->reg.rkey = desc->pi_ctx->sig_mr->rkey; + regd_buf->reg.va = sig_sge.addr; + regd_buf->reg.len = sig_sge.length; + regd_buf->reg.is_mr = 1; + } else { + if (desc) { + regd_buf->reg.rkey = desc->data_mr->rkey; + regd_buf->reg.is_mr = 1; + } else { + regd_buf->reg.rkey = device->mr->rkey; + regd_buf->reg.is_mr = 0; + } + + regd_buf->reg.lkey = data_sge.lkey; + regd_buf->reg.va = data_sge.addr; + regd_buf->reg.len = data_sge.length; } return 0; err_reg: - spin_lock_irqsave(&ib_conn->lock, flags); - list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool); - spin_unlock_irqrestore(&ib_conn->lock, flags); + if (desc) { + spin_lock_irqsave(&ib_conn->lock, flags); + list_add_tail(&desc->list, &ib_conn->fastreg.pool); + spin_unlock_irqrestore(&ib_conn->lock, flags); + } + return err; } diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index afe95674008..ea01075f9f9 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved. * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. - * Copyright (c) 2013 Mellanox Technologies. All rights reserved. + * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -71,17 +71,14 @@ static void iser_event_handler(struct ib_event_handler *handler, */ static int iser_create_device_ib_res(struct iser_device *device) { - int i, j; struct iser_cq_desc *cq_desc; - struct ib_device_attr *dev_attr; + struct ib_device_attr *dev_attr = &device->dev_attr; + int ret, i, j; - dev_attr = kmalloc(sizeof(*dev_attr), GFP_KERNEL); - if (!dev_attr) - return -ENOMEM; - - if (ib_query_device(device->ib_device, dev_attr)) { + ret = ib_query_device(device->ib_device, dev_attr); + if (ret) { pr_warn("Query device failed for %s\n", device->ib_device->name); - goto dev_attr_err; + return ret; } /* Assign function handles - based on FMR support */ @@ -94,14 +91,14 @@ static int iser_create_device_ib_res(struct iser_device *device) device->iser_unreg_rdma_mem = iser_unreg_mem_fmr; } else if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { - iser_info("FRWR supported, using FRWR for registration\n"); - device->iser_alloc_rdma_reg_res = iser_create_frwr_pool; - device->iser_free_rdma_reg_res = iser_free_frwr_pool; - device->iser_reg_rdma_mem = iser_reg_rdma_mem_frwr; - device->iser_unreg_rdma_mem = iser_unreg_mem_frwr; + iser_info("FastReg supported, using FastReg for registration\n"); + device->iser_alloc_rdma_reg_res = iser_create_fastreg_pool; + device->iser_free_rdma_reg_res = iser_free_fastreg_pool; + device->iser_reg_rdma_mem = iser_reg_rdma_mem_fastreg; + device->iser_unreg_rdma_mem = iser_unreg_mem_fastreg; } else { - iser_err("IB device does not support FMRs nor FRWRs, can't register memory\n"); - goto dev_attr_err; + iser_err("IB device does not support FMRs nor FastRegs, can't register memory\n"); + return -1; } device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors); @@ -158,7 +155,6 @@ static int iser_create_device_ib_res(struct iser_device *device) if (ib_register_event_handler(&device->event_handler)) goto handler_err; - kfree(dev_attr); return 0; handler_err: @@ -178,8 +174,6 @@ pd_err: kfree(device->cq_desc); cq_desc_err: iser_err("failed to allocate an IB resource\n"); -dev_attr_err: - kfree(dev_attr); return -1; } @@ -221,13 +215,13 @@ int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max) struct ib_fmr_pool_param params; int ret = -ENOMEM; - ib_conn->fastreg.fmr.page_vec = kmalloc(sizeof(struct iser_page_vec) + - (sizeof(u64)*(ISCSI_ISER_SG_TABLESIZE + 1)), - GFP_KERNEL); - if (!ib_conn->fastreg.fmr.page_vec) + ib_conn->fmr.page_vec = kmalloc(sizeof(*ib_conn->fmr.page_vec) + + (sizeof(u64)*(ISCSI_ISER_SG_TABLESIZE + 1)), + GFP_KERNEL); + if (!ib_conn->fmr.page_vec) return ret; - ib_conn->fastreg.fmr.page_vec->pages = (u64 *)(ib_conn->fastreg.fmr.page_vec + 1); + ib_conn->fmr.page_vec->pages = (u64 *)(ib_conn->fmr.page_vec + 1); params.page_shift = SHIFT_4K; /* when the first/last SG element are not start/end * @@ -243,16 +237,16 @@ int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max) IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ); - ib_conn->fastreg.fmr.pool = ib_create_fmr_pool(device->pd, ¶ms); - if (!IS_ERR(ib_conn->fastreg.fmr.pool)) + ib_conn->fmr.pool = ib_create_fmr_pool(device->pd, ¶ms); + if (!IS_ERR(ib_conn->fmr.pool)) return 0; /* no FMR => no need for page_vec */ - kfree(ib_conn->fastreg.fmr.page_vec); - ib_conn->fastreg.fmr.page_vec = NULL; + kfree(ib_conn->fmr.page_vec); + ib_conn->fmr.page_vec = NULL; - ret = PTR_ERR(ib_conn->fastreg.fmr.pool); - ib_conn->fastreg.fmr.pool = NULL; + ret = PTR_ERR(ib_conn->fmr.pool); + ib_conn->fmr.pool = NULL; if (ret != -ENOSYS) { iser_err("FMR allocation failed, err %d\n", ret); return ret; @@ -268,93 +262,173 @@ int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max) void iser_free_fmr_pool(struct iser_conn *ib_conn) { iser_info("freeing conn %p fmr pool %p\n", - ib_conn, ib_conn->fastreg.fmr.pool); + ib_conn, ib_conn->fmr.pool); + + if (ib_conn->fmr.pool != NULL) + ib_destroy_fmr_pool(ib_conn->fmr.pool); + + ib_conn->fmr.pool = NULL; + + kfree(ib_conn->fmr.page_vec); + ib_conn->fmr.page_vec = NULL; +} + +static int +iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd, + bool pi_enable, struct fast_reg_descriptor *desc) +{ + int ret; + + desc->data_frpl = ib_alloc_fast_reg_page_list(ib_device, + ISCSI_ISER_SG_TABLESIZE + 1); + if (IS_ERR(desc->data_frpl)) { + ret = PTR_ERR(desc->data_frpl); + iser_err("Failed to allocate ib_fast_reg_page_list err=%d\n", + ret); + return PTR_ERR(desc->data_frpl); + } - if (ib_conn->fastreg.fmr.pool != NULL) - ib_destroy_fmr_pool(ib_conn->fastreg.fmr.pool); + desc->data_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE + 1); + if (IS_ERR(desc->data_mr)) { + ret = PTR_ERR(desc->data_mr); + iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret); + goto fast_reg_mr_failure; + } + desc->reg_indicators |= ISER_DATA_KEY_VALID; + + if (pi_enable) { + struct ib_mr_init_attr mr_init_attr = {0}; + struct iser_pi_context *pi_ctx = NULL; + + desc->pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL); + if (!desc->pi_ctx) { + iser_err("Failed to allocate pi context\n"); + ret = -ENOMEM; + goto pi_ctx_alloc_failure; + } + pi_ctx = desc->pi_ctx; + + pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(ib_device, + ISCSI_ISER_SG_TABLESIZE); + if (IS_ERR(pi_ctx->prot_frpl)) { + ret = PTR_ERR(pi_ctx->prot_frpl); + iser_err("Failed to allocate prot frpl ret=%d\n", + ret); + goto prot_frpl_failure; + } - ib_conn->fastreg.fmr.pool = NULL; + pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd, + ISCSI_ISER_SG_TABLESIZE + 1); + if (IS_ERR(pi_ctx->prot_mr)) { + ret = PTR_ERR(pi_ctx->prot_mr); + iser_err("Failed to allocate prot frmr ret=%d\n", + ret); + goto prot_mr_failure; + } + desc->reg_indicators |= ISER_PROT_KEY_VALID; + + mr_init_attr.max_reg_descriptors = 2; + mr_init_attr.flags |= IB_MR_SIGNATURE_EN; + pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr); + if (IS_ERR(pi_ctx->sig_mr)) { + ret = PTR_ERR(pi_ctx->sig_mr); + iser_err("Failed to allocate signature enabled mr err=%d\n", + ret); + goto sig_mr_failure; + } + desc->reg_indicators |= ISER_SIG_KEY_VALID; + } + desc->reg_indicators &= ~ISER_FASTREG_PROTECTED; + + iser_dbg("Create fr_desc %p page_list %p\n", + desc, desc->data_frpl->page_list); + + return 0; +sig_mr_failure: + ib_dereg_mr(desc->pi_ctx->prot_mr); +prot_mr_failure: + ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl); +prot_frpl_failure: + kfree(desc->pi_ctx); +pi_ctx_alloc_failure: + ib_dereg_mr(desc->data_mr); +fast_reg_mr_failure: + ib_free_fast_reg_page_list(desc->data_frpl); - kfree(ib_conn->fastreg.fmr.page_vec); - ib_conn->fastreg.fmr.page_vec = NULL; + return ret; } /** - * iser_create_frwr_pool - Creates pool of fast_reg descriptors + * iser_create_fastreg_pool - Creates pool of fast_reg descriptors * for fast registration work requests. * returns 0 on success, or errno code on failure */ -int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max) +int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max) { struct iser_device *device = ib_conn->device; struct fast_reg_descriptor *desc; int i, ret; - INIT_LIST_HEAD(&ib_conn->fastreg.frwr.pool); - ib_conn->fastreg.frwr.pool_size = 0; + INIT_LIST_HEAD(&ib_conn->fastreg.pool); + ib_conn->fastreg.pool_size = 0; for (i = 0; i < cmds_max; i++) { - desc = kmalloc(sizeof(*desc), GFP_KERNEL); + desc = kzalloc(sizeof(*desc), GFP_KERNEL); if (!desc) { iser_err("Failed to allocate a new fast_reg descriptor\n"); ret = -ENOMEM; goto err; } - desc->data_frpl = ib_alloc_fast_reg_page_list(device->ib_device, - ISCSI_ISER_SG_TABLESIZE + 1); - if (IS_ERR(desc->data_frpl)) { - ret = PTR_ERR(desc->data_frpl); - iser_err("Failed to allocate ib_fast_reg_page_list err=%d\n", ret); - goto fast_reg_page_failure; + ret = iser_create_fastreg_desc(device->ib_device, device->pd, + ib_conn->pi_support, desc); + if (ret) { + iser_err("Failed to create fastreg descriptor err=%d\n", + ret); + kfree(desc); + goto err; } - desc->data_mr = ib_alloc_fast_reg_mr(device->pd, - ISCSI_ISER_SG_TABLESIZE + 1); - if (IS_ERR(desc->data_mr)) { - ret = PTR_ERR(desc->data_mr); - iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret); - goto fast_reg_mr_failure; - } - desc->valid = true; - list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool); - ib_conn->fastreg.frwr.pool_size++; + list_add_tail(&desc->list, &ib_conn->fastreg.pool); + ib_conn->fastreg.pool_size++; } return 0; -fast_reg_mr_failure: - ib_free_fast_reg_page_list(desc->data_frpl); -fast_reg_page_failure: - kfree(desc); err: - iser_free_frwr_pool(ib_conn); + iser_free_fastreg_pool(ib_conn); return ret; } /** - * iser_free_frwr_pool - releases the pool of fast_reg descriptors + * iser_free_fastreg_pool - releases the pool of fast_reg descriptors */ -void iser_free_frwr_pool(struct iser_conn *ib_conn) +void iser_free_fastreg_pool(struct iser_conn *ib_conn) { struct fast_reg_descriptor *desc, *tmp; int i = 0; - if (list_empty(&ib_conn->fastreg.frwr.pool)) + if (list_empty(&ib_conn->fastreg.pool)) return; - iser_info("freeing conn %p frwr pool\n", ib_conn); + iser_info("freeing conn %p fr pool\n", ib_conn); - list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.frwr.pool, list) { + list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.pool, list) { list_del(&desc->list); ib_free_fast_reg_page_list(desc->data_frpl); ib_dereg_mr(desc->data_mr); + if (desc->pi_ctx) { + ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl); + ib_dereg_mr(desc->pi_ctx->prot_mr); + ib_destroy_mr(desc->pi_ctx->sig_mr); + kfree(desc->pi_ctx); + } kfree(desc); ++i; } - if (i < ib_conn->fastreg.frwr.pool_size) + if (i < ib_conn->fastreg.pool_size) iser_warn("pool still has %d regions registered\n", - ib_conn->fastreg.frwr.pool_size - i); + ib_conn->fastreg.pool_size - i); } /** @@ -389,12 +463,17 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn) init_attr.qp_context = (void *)ib_conn; init_attr.send_cq = device->tx_cq[min_index]; init_attr.recv_cq = device->rx_cq[min_index]; - init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; init_attr.cap.max_send_sge = 2; init_attr.cap.max_recv_sge = 1; init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; init_attr.qp_type = IB_QPT_RC; + if (ib_conn->pi_support) { + init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS; + init_attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN; + } else { + init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; + } ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr); if (ret) @@ -502,14 +581,30 @@ static int iser_conn_state_comp_exch(struct iser_conn *ib_conn, return ret; } +void iser_release_work(struct work_struct *work) +{ + struct iser_conn *ib_conn; + + ib_conn = container_of(work, struct iser_conn, release_work); + + /* wait for .conn_stop callback */ + wait_for_completion(&ib_conn->stop_completion); + + /* wait for the qp`s post send and post receive buffers to empty */ + wait_event_interruptible(ib_conn->wait, + ib_conn->state == ISER_CONN_DOWN); + + iser_conn_release(ib_conn); +} + /** * Frees all conn objects and deallocs conn descriptor */ -static void iser_conn_release(struct iser_conn *ib_conn, int can_destroy_id) +void iser_conn_release(struct iser_conn *ib_conn) { struct iser_device *device = ib_conn->device; - BUG_ON(ib_conn->state != ISER_CONN_DOWN); + BUG_ON(ib_conn->state == ISER_CONN_UP); mutex_lock(&ig.connlist_mutex); list_del(&ib_conn->conn_list); @@ -521,27 +616,13 @@ static void iser_conn_release(struct iser_conn *ib_conn, int can_destroy_id) if (device != NULL) iser_device_try_release(device); /* if cma handler context, the caller actually destroy the id */ - if (ib_conn->cma_id != NULL && can_destroy_id) { + if (ib_conn->cma_id != NULL) { rdma_destroy_id(ib_conn->cma_id); ib_conn->cma_id = NULL; } iscsi_destroy_endpoint(ib_conn->ep); } -void iser_conn_get(struct iser_conn *ib_conn) -{ - atomic_inc(&ib_conn->refcount); -} - -int iser_conn_put(struct iser_conn *ib_conn, int can_destroy_id) -{ - if (atomic_dec_and_test(&ib_conn->refcount)) { - iser_conn_release(ib_conn, can_destroy_id); - return 1; - } - return 0; -} - /** * triggers start of the disconnect procedures and wait for them to be done */ @@ -559,24 +640,19 @@ void iser_conn_terminate(struct iser_conn *ib_conn) if (err) iser_err("Failed to disconnect, conn: 0x%p err %d\n", ib_conn,err); - - wait_event_interruptible(ib_conn->wait, - ib_conn->state == ISER_CONN_DOWN); - - iser_conn_put(ib_conn, 1); /* deref ib conn deallocate */ } -static int iser_connect_error(struct rdma_cm_id *cma_id) +static void iser_connect_error(struct rdma_cm_id *cma_id) { struct iser_conn *ib_conn; + ib_conn = (struct iser_conn *)cma_id->context; ib_conn->state = ISER_CONN_DOWN; wake_up_interruptible(&ib_conn->wait); - return iser_conn_put(ib_conn, 0); /* deref ib conn's cma id */ } -static int iser_addr_handler(struct rdma_cm_id *cma_id) +static void iser_addr_handler(struct rdma_cm_id *cma_id) { struct iser_device *device; struct iser_conn *ib_conn; @@ -585,22 +661,35 @@ static int iser_addr_handler(struct rdma_cm_id *cma_id) device = iser_device_find_by_ib_device(cma_id); if (!device) { iser_err("device lookup/creation failed\n"); - return iser_connect_error(cma_id); + iser_connect_error(cma_id); + return; } ib_conn = (struct iser_conn *)cma_id->context; ib_conn->device = device; + /* connection T10-PI support */ + if (iser_pi_enable) { + if (!(device->dev_attr.device_cap_flags & + IB_DEVICE_SIGNATURE_HANDOVER)) { + iser_warn("T10-PI requested but not supported on %s, " + "continue without T10-PI\n", + ib_conn->device->ib_device->name); + ib_conn->pi_support = false; + } else { + ib_conn->pi_support = true; + } + } + ret = rdma_resolve_route(cma_id, 1000); if (ret) { iser_err("resolve route failed: %d\n", ret); - return iser_connect_error(cma_id); + iser_connect_error(cma_id); + return; } - - return 0; } -static int iser_route_handler(struct rdma_cm_id *cma_id) +static void iser_route_handler(struct rdma_cm_id *cma_id) { struct rdma_conn_param conn_param; int ret; @@ -628,33 +717,40 @@ static int iser_route_handler(struct rdma_cm_id *cma_id) goto failure; } - return 0; + return; failure: - return iser_connect_error(cma_id); + iser_connect_error(cma_id); } static void iser_connected_handler(struct rdma_cm_id *cma_id) { struct iser_conn *ib_conn; + struct ib_qp_attr attr; + struct ib_qp_init_attr init_attr; + + (void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr); + iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num); ib_conn = (struct iser_conn *)cma_id->context; - ib_conn->state = ISER_CONN_UP; - wake_up_interruptible(&ib_conn->wait); + if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_PENDING, ISER_CONN_UP)) + wake_up_interruptible(&ib_conn->wait); } -static int iser_disconnected_handler(struct rdma_cm_id *cma_id) +static void iser_disconnected_handler(struct rdma_cm_id *cma_id) { struct iser_conn *ib_conn; - int ret; ib_conn = (struct iser_conn *)cma_id->context; /* getting here when the state is UP means that the conn is being * * terminated asynchronously from the iSCSI layer's perspective. */ if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, - ISER_CONN_TERMINATING)) - iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn, - ISCSI_ERR_CONN_FAILED); + ISER_CONN_TERMINATING)){ + if (ib_conn->iscsi_conn) + iscsi_conn_failure(ib_conn->iscsi_conn, ISCSI_ERR_CONN_FAILED); + else + iser_err("iscsi_iser connection isn't bound\n"); + } /* Complete the termination process if no posts are pending */ if (ib_conn->post_recv_buf_count == 0 && @@ -662,24 +758,19 @@ static int iser_disconnected_handler(struct rdma_cm_id *cma_id) ib_conn->state = ISER_CONN_DOWN; wake_up_interruptible(&ib_conn->wait); } - - ret = iser_conn_put(ib_conn, 0); /* deref ib conn's cma id */ - return ret; } static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { - int ret = 0; - iser_info("event %d status %d conn %p id %p\n", event->event, event->status, cma_id->context, cma_id); switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: - ret = iser_addr_handler(cma_id); + iser_addr_handler(cma_id); break; case RDMA_CM_EVENT_ROUTE_RESOLVED: - ret = iser_route_handler(cma_id); + iser_route_handler(cma_id); break; case RDMA_CM_EVENT_ESTABLISHED: iser_connected_handler(cma_id); @@ -689,18 +780,18 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve case RDMA_CM_EVENT_CONNECT_ERROR: case RDMA_CM_EVENT_UNREACHABLE: case RDMA_CM_EVENT_REJECTED: - ret = iser_connect_error(cma_id); + iser_connect_error(cma_id); break; case RDMA_CM_EVENT_DISCONNECTED: case RDMA_CM_EVENT_DEVICE_REMOVAL: case RDMA_CM_EVENT_ADDR_CHANGE: - ret = iser_disconnected_handler(cma_id); + iser_disconnected_handler(cma_id); break; default: iser_err("Unexpected RDMA CM event (%d)\n", event->event); break; } - return ret; + return 0; } void iser_conn_init(struct iser_conn *ib_conn) @@ -709,7 +800,7 @@ void iser_conn_init(struct iser_conn *ib_conn) init_waitqueue_head(&ib_conn->wait); ib_conn->post_recv_buf_count = 0; atomic_set(&ib_conn->post_send_buf_count, 0); - atomic_set(&ib_conn->refcount, 1); /* ref ib conn allocation */ + init_completion(&ib_conn->stop_completion); INIT_LIST_HEAD(&ib_conn->conn_list); spin_lock_init(&ib_conn->lock); } @@ -737,7 +828,6 @@ int iser_connect(struct iser_conn *ib_conn, ib_conn->state = ISER_CONN_PENDING; - iser_conn_get(ib_conn); /* ref ib conn's cma id */ ib_conn->cma_id = rdma_create_id(iser_cma_handler, (void *)ib_conn, RDMA_PS_TCP, IB_QPT_RC); @@ -774,9 +864,8 @@ id_failure: ib_conn->cma_id = NULL; addr_failure: ib_conn->state = ISER_CONN_DOWN; - iser_conn_put(ib_conn, 1); /* deref ib conn's cma id */ connect_failure: - iser_conn_put(ib_conn, 1); /* deref ib conn deallocate */ + iser_conn_release(ib_conn); return err; } @@ -797,7 +886,7 @@ int iser_reg_page_vec(struct iser_conn *ib_conn, page_list = page_vec->pages; io_addr = page_list[0]; - mem = ib_fmr_pool_map_phys(ib_conn->fastreg.fmr.pool, + mem = ib_fmr_pool_map_phys(ib_conn->fmr.pool, page_list, page_vec->length, io_addr); @@ -851,11 +940,11 @@ void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task, reg->mem_h = NULL; } -void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task, - enum iser_data_dir cmd_dir) +void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, + enum iser_data_dir cmd_dir) { struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg; - struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn; + struct iser_conn *ib_conn = iser_task->ib_conn; struct fast_reg_descriptor *desc = reg->mem_h; if (!reg->is_mr) @@ -864,7 +953,7 @@ void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task, reg->mem_h = NULL; reg->is_mr = 0; spin_lock_bh(&ib_conn->lock); - list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool); + list_add_tail(&desc->list, &ib_conn->fastreg.pool); spin_unlock_bh(&ib_conn->lock); } @@ -965,7 +1054,7 @@ static void iser_handle_comp_error(struct iser_tx_desc *desc, * perspective. */ if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, ISER_CONN_TERMINATING)) - iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn, + iscsi_conn_failure(ib_conn->iscsi_conn, ISCSI_ERR_CONN_FAILED); /* no more non completed posts to the QP, complete the @@ -989,18 +1078,16 @@ static int iser_drain_tx_cq(struct iser_device *device, int cq_index) if (wc.status == IB_WC_SUCCESS) { if (wc.opcode == IB_WC_SEND) iser_snd_completion(tx_desc, ib_conn); - else if (wc.opcode == IB_WC_LOCAL_INV || - wc.opcode == IB_WC_FAST_REG_MR) { - atomic_dec(&ib_conn->post_send_buf_count); - continue; - } else + else iser_err("expected opcode %d got %d\n", IB_WC_SEND, wc.opcode); } else { iser_err("tx id %llx status %d vend_err %x\n", - wc.wr_id, wc.status, wc.vendor_err); - atomic_dec(&ib_conn->post_send_buf_count); - iser_handle_comp_error(tx_desc, ib_conn); + wc.wr_id, wc.status, wc.vendor_err); + if (wc.wr_id != ISER_FASTREG_LI_WRID) { + atomic_dec(&ib_conn->post_send_buf_count); + iser_handle_comp_error(tx_desc, ib_conn); + } } completed_tx++; } @@ -1018,8 +1105,12 @@ static void iser_cq_tasklet_fn(unsigned long data) struct iser_rx_desc *desc; unsigned long xfer_len; struct iser_conn *ib_conn; - int completed_tx, completed_rx; - completed_tx = completed_rx = 0; + int completed_tx, completed_rx = 0; + + /* First do tx drain, so in a case where we have rx flushes and a successful + * tx completion we will still go through completion error handling. + */ + completed_tx = iser_drain_tx_cq(device, cq_index); while (ib_poll_cq(cq, 1, &wc) == 1) { desc = (struct iser_rx_desc *) (unsigned long) wc.wr_id; @@ -1047,7 +1138,6 @@ static void iser_cq_tasklet_fn(unsigned long data) * " would not cause interrupts to be missed" */ ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); - completed_tx += iser_drain_tx_cq(device, cq_index); iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx); } @@ -1059,3 +1149,51 @@ static void iser_cq_callback(struct ib_cq *cq, void *cq_context) tasklet_schedule(&device->cq_tasklet[cq_index]); } + +u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, + enum iser_data_dir cmd_dir, sector_t *sector) +{ + struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg; + struct fast_reg_descriptor *desc = reg->mem_h; + unsigned long sector_size = iser_task->sc->device->sector_size; + struct ib_mr_status mr_status; + int ret; + + if (desc && desc->reg_indicators & ISER_FASTREG_PROTECTED) { + desc->reg_indicators &= ~ISER_FASTREG_PROTECTED; + ret = ib_check_mr_status(desc->pi_ctx->sig_mr, + IB_MR_CHECK_SIG_STATUS, &mr_status); + if (ret) { + pr_err("ib_check_mr_status failed, ret %d\n", ret); + goto err; + } + + if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) { + sector_t sector_off = mr_status.sig_err.sig_err_offset; + + do_div(sector_off, sector_size + 8); + *sector = scsi_get_lba(iser_task->sc) + sector_off; + + pr_err("PI error found type %d at sector %llx " + "expected %x vs actual %x\n", + mr_status.sig_err.err_type, + (unsigned long long)*sector, + mr_status.sig_err.expected, + mr_status.sig_err.actual); + + switch (mr_status.sig_err.err_type) { + case IB_SIG_BAD_GUARD: + return 0x1; + case IB_SIG_BAD_REFTAG: + return 0x3; + case IB_SIG_BAD_APPTAG: + return 0x2; + } + } + } + + return 0; +err: + /* Not alot we can do here, return ambiguous guard error */ + return 0x1; +} diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 9804fca6bf0..d4c7928a0f3 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -28,6 +28,7 @@ #include <target/target_core_base.h> #include <target/target_core_fabric.h> #include <target/iscsi/iscsi_transport.h> +#include <linux/semaphore.h> #include "isert_proto.h" #include "ib_isert.h" @@ -47,10 +48,12 @@ static int isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, struct isert_rdma_wr *wr); static void -isert_unreg_rdma_frwr(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn); +isert_unreg_rdma(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn); static int -isert_reg_rdma_frwr(struct iscsi_conn *conn, struct iscsi_cmd *cmd, - struct isert_rdma_wr *wr); +isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, + struct isert_rdma_wr *wr); +static int +isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd); static void isert_qp_event_callback(struct ib_event *e, void *context) @@ -87,7 +90,8 @@ isert_query_device(struct ib_device *ib_dev, struct ib_device_attr *devattr) } static int -isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id) +isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id, + u8 protection) { struct isert_device *device = isert_conn->conn_device; struct ib_qp_init_attr attr; @@ -119,6 +123,8 @@ isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id) attr.cap.max_recv_sge = 1; attr.sq_sig_type = IB_SIGNAL_REQ_WR; attr.qp_type = IB_QPT_RC; + if (protection) + attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN; pr_debug("isert_conn_setup_qp cma_id->device: %p\n", cma_id->device); @@ -226,22 +232,29 @@ isert_create_device_ib_res(struct isert_device *device) return ret; /* asign function handlers */ - if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { - device->use_frwr = 1; - device->reg_rdma_mem = isert_reg_rdma_frwr; - device->unreg_rdma_mem = isert_unreg_rdma_frwr; + if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS && + dev_attr->device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) { + device->use_fastreg = 1; + device->reg_rdma_mem = isert_reg_rdma; + device->unreg_rdma_mem = isert_unreg_rdma; } else { - device->use_frwr = 0; + device->use_fastreg = 0; device->reg_rdma_mem = isert_map_rdma; device->unreg_rdma_mem = isert_unmap_cmd; } + /* Check signature cap */ + device->pi_capable = dev_attr->device_cap_flags & + IB_DEVICE_SIGNATURE_HANDOVER ? true : false; + device->cqs_used = min_t(int, num_online_cpus(), device->ib_device->num_comp_vectors); device->cqs_used = min(ISERT_MAX_CQ, device->cqs_used); - pr_debug("Using %d CQs, device %s supports %d vectors support FRWR %d\n", + pr_debug("Using %d CQs, device %s supports %d vectors support " + "Fast registration %d pi_capable %d\n", device->cqs_used, device->ib_device->name, - device->ib_device->num_comp_vectors, device->use_frwr); + device->ib_device->num_comp_vectors, device->use_fastreg, + device->pi_capable); device->cq_desc = kzalloc(sizeof(struct isert_cq_desc) * device->cqs_used, GFP_KERNEL); if (!device->cq_desc) { @@ -250,13 +263,6 @@ isert_create_device_ib_res(struct isert_device *device) } cq_desc = device->cq_desc; - device->dev_pd = ib_alloc_pd(ib_dev); - if (IS_ERR(device->dev_pd)) { - ret = PTR_ERR(device->dev_pd); - pr_err("ib_alloc_pd failed for dev_pd: %d\n", ret); - goto out_cq_desc; - } - for (i = 0; i < device->cqs_used; i++) { cq_desc[i].device = device; cq_desc[i].cq_index = i; @@ -294,13 +300,6 @@ isert_create_device_ib_res(struct isert_device *device) goto out_cq; } - device->dev_mr = ib_get_dma_mr(device->dev_pd, IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(device->dev_mr)) { - ret = PTR_ERR(device->dev_mr); - pr_err("ib_get_dma_mr failed for dev_mr: %d\n", ret); - goto out_cq; - } - return 0; out_cq: @@ -316,9 +315,6 @@ out_cq: ib_destroy_cq(device->dev_tx_cq[j]); } } - ib_dealloc_pd(device->dev_pd); - -out_cq_desc: kfree(device->cq_desc); return ret; @@ -341,8 +337,6 @@ isert_free_device_ib_res(struct isert_device *device) device->dev_tx_cq[i] = NULL; } - ib_dereg_mr(device->dev_mr); - ib_dealloc_pd(device->dev_pd); kfree(device->cq_desc); } @@ -398,40 +392,136 @@ isert_device_find_by_ib_dev(struct rdma_cm_id *cma_id) } static void -isert_conn_free_frwr_pool(struct isert_conn *isert_conn) +isert_conn_free_fastreg_pool(struct isert_conn *isert_conn) { struct fast_reg_descriptor *fr_desc, *tmp; int i = 0; - if (list_empty(&isert_conn->conn_frwr_pool)) + if (list_empty(&isert_conn->conn_fr_pool)) return; - pr_debug("Freeing conn %p frwr pool", isert_conn); + pr_debug("Freeing conn %p fastreg pool", isert_conn); list_for_each_entry_safe(fr_desc, tmp, - &isert_conn->conn_frwr_pool, list) { + &isert_conn->conn_fr_pool, list) { list_del(&fr_desc->list); ib_free_fast_reg_page_list(fr_desc->data_frpl); ib_dereg_mr(fr_desc->data_mr); + if (fr_desc->pi_ctx) { + ib_free_fast_reg_page_list(fr_desc->pi_ctx->prot_frpl); + ib_dereg_mr(fr_desc->pi_ctx->prot_mr); + ib_destroy_mr(fr_desc->pi_ctx->sig_mr); + kfree(fr_desc->pi_ctx); + } kfree(fr_desc); ++i; } - if (i < isert_conn->conn_frwr_pool_size) + if (i < isert_conn->conn_fr_pool_size) pr_warn("Pool still has %d regions registered\n", - isert_conn->conn_frwr_pool_size - i); + isert_conn->conn_fr_pool_size - i); +} + +static int +isert_create_fr_desc(struct ib_device *ib_device, struct ib_pd *pd, + struct fast_reg_descriptor *fr_desc, u8 protection) +{ + int ret; + + fr_desc->data_frpl = ib_alloc_fast_reg_page_list(ib_device, + ISCSI_ISER_SG_TABLESIZE); + if (IS_ERR(fr_desc->data_frpl)) { + pr_err("Failed to allocate data frpl err=%ld\n", + PTR_ERR(fr_desc->data_frpl)); + return PTR_ERR(fr_desc->data_frpl); + } + + fr_desc->data_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE); + if (IS_ERR(fr_desc->data_mr)) { + pr_err("Failed to allocate data frmr err=%ld\n", + PTR_ERR(fr_desc->data_mr)); + ret = PTR_ERR(fr_desc->data_mr); + goto err_data_frpl; + } + pr_debug("Create fr_desc %p page_list %p\n", + fr_desc, fr_desc->data_frpl->page_list); + fr_desc->ind |= ISERT_DATA_KEY_VALID; + + if (protection) { + struct ib_mr_init_attr mr_init_attr = {0}; + struct pi_context *pi_ctx; + + fr_desc->pi_ctx = kzalloc(sizeof(*fr_desc->pi_ctx), GFP_KERNEL); + if (!fr_desc->pi_ctx) { + pr_err("Failed to allocate pi context\n"); + ret = -ENOMEM; + goto err_data_mr; + } + pi_ctx = fr_desc->pi_ctx; + + pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(ib_device, + ISCSI_ISER_SG_TABLESIZE); + if (IS_ERR(pi_ctx->prot_frpl)) { + pr_err("Failed to allocate prot frpl err=%ld\n", + PTR_ERR(pi_ctx->prot_frpl)); + ret = PTR_ERR(pi_ctx->prot_frpl); + goto err_pi_ctx; + } + + pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE); + if (IS_ERR(pi_ctx->prot_mr)) { + pr_err("Failed to allocate prot frmr err=%ld\n", + PTR_ERR(pi_ctx->prot_mr)); + ret = PTR_ERR(pi_ctx->prot_mr); + goto err_prot_frpl; + } + fr_desc->ind |= ISERT_PROT_KEY_VALID; + + mr_init_attr.max_reg_descriptors = 2; + mr_init_attr.flags |= IB_MR_SIGNATURE_EN; + pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr); + if (IS_ERR(pi_ctx->sig_mr)) { + pr_err("Failed to allocate signature enabled mr err=%ld\n", + PTR_ERR(pi_ctx->sig_mr)); + ret = PTR_ERR(pi_ctx->sig_mr); + goto err_prot_mr; + } + fr_desc->ind |= ISERT_SIG_KEY_VALID; + } + fr_desc->ind &= ~ISERT_PROTECTED; + + return 0; +err_prot_mr: + ib_dereg_mr(fr_desc->pi_ctx->prot_mr); +err_prot_frpl: + ib_free_fast_reg_page_list(fr_desc->pi_ctx->prot_frpl); +err_pi_ctx: + kfree(fr_desc->pi_ctx); +err_data_mr: + ib_dereg_mr(fr_desc->data_mr); +err_data_frpl: + ib_free_fast_reg_page_list(fr_desc->data_frpl); + + return ret; } static int -isert_conn_create_frwr_pool(struct isert_conn *isert_conn) +isert_conn_create_fastreg_pool(struct isert_conn *isert_conn, u8 pi_support) { struct fast_reg_descriptor *fr_desc; struct isert_device *device = isert_conn->conn_device; - int i, ret; + struct se_session *se_sess = isert_conn->conn->sess->se_sess; + struct se_node_acl *se_nacl = se_sess->se_node_acl; + int i, ret, tag_num; + /* + * Setup the number of FRMRs based upon the number of tags + * available to session in iscsi_target_locate_portal(). + */ + tag_num = max_t(u32, ISCSIT_MIN_TAGS, se_nacl->queue_depth); + tag_num = (tag_num * 2) + ISCSIT_EXTRA_TAGS; - INIT_LIST_HEAD(&isert_conn->conn_frwr_pool); - isert_conn->conn_frwr_pool_size = 0; - for (i = 0; i < ISCSI_DEF_XMIT_CMDS_MAX; i++) { + isert_conn->conn_fr_pool_size = 0; + for (i = 0; i < tag_num; i++) { fr_desc = kzalloc(sizeof(*fr_desc), GFP_KERNEL); if (!fr_desc) { pr_err("Failed to allocate fast_reg descriptor\n"); @@ -439,40 +529,27 @@ isert_conn_create_frwr_pool(struct isert_conn *isert_conn) goto err; } - fr_desc->data_frpl = - ib_alloc_fast_reg_page_list(device->ib_device, - ISCSI_ISER_SG_TABLESIZE); - if (IS_ERR(fr_desc->data_frpl)) { - pr_err("Failed to allocate fr_pg_list err=%ld\n", - PTR_ERR(fr_desc->data_frpl)); - ret = PTR_ERR(fr_desc->data_frpl); - goto err; - } - - fr_desc->data_mr = ib_alloc_fast_reg_mr(device->dev_pd, - ISCSI_ISER_SG_TABLESIZE); - if (IS_ERR(fr_desc->data_mr)) { - pr_err("Failed to allocate frmr err=%ld\n", - PTR_ERR(fr_desc->data_mr)); - ret = PTR_ERR(fr_desc->data_mr); - ib_free_fast_reg_page_list(fr_desc->data_frpl); + ret = isert_create_fr_desc(device->ib_device, + isert_conn->conn_pd, fr_desc, + pi_support); + if (ret) { + pr_err("Failed to create fastreg descriptor err=%d\n", + ret); + kfree(fr_desc); goto err; } - pr_debug("Create fr_desc %p page_list %p\n", - fr_desc, fr_desc->data_frpl->page_list); - fr_desc->valid = true; - list_add_tail(&fr_desc->list, &isert_conn->conn_frwr_pool); - isert_conn->conn_frwr_pool_size++; + list_add_tail(&fr_desc->list, &isert_conn->conn_fr_pool); + isert_conn->conn_fr_pool_size++; } - pr_debug("Creating conn %p frwr pool size=%d", - isert_conn, isert_conn->conn_frwr_pool_size); + pr_debug("Creating conn %p fastreg pool size=%d", + isert_conn, isert_conn->conn_fr_pool_size); return 0; err: - isert_conn_free_frwr_pool(isert_conn); + isert_conn_free_fastreg_pool(isert_conn); return ret; } @@ -485,6 +562,15 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) struct isert_device *device; struct ib_device *ib_dev = cma_id->device; int ret = 0; + u8 pi_support; + + spin_lock_bh(&np->np_thread_lock); + if (!np->enabled) { + spin_unlock_bh(&np->np_thread_lock); + pr_debug("iscsi_np is not enabled, reject connect request\n"); + return rdma_reject(cma_id, NULL, 0); + } + spin_unlock_bh(&np->np_thread_lock); pr_debug("Entering isert_connect_request cma_id: %p, context: %p\n", cma_id, cma_id->context); @@ -497,13 +583,13 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) isert_conn->state = ISER_CONN_INIT; INIT_LIST_HEAD(&isert_conn->conn_accept_node); init_completion(&isert_conn->conn_login_comp); - init_waitqueue_head(&isert_conn->conn_wait); - init_waitqueue_head(&isert_conn->conn_wait_comp_err); + init_completion(&isert_conn->conn_wait); + init_completion(&isert_conn->conn_wait_comp_err); kref_init(&isert_conn->conn_kref); kref_get(&isert_conn->conn_kref); mutex_init(&isert_conn->conn_mutex); - mutex_init(&isert_conn->conn_comp_mutex); spin_lock_init(&isert_conn->conn_lock); + INIT_LIST_HEAD(&isert_conn->conn_fr_pool); cma_id->context = isert_conn; isert_conn->conn_cm_id = cma_id; @@ -558,33 +644,48 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) } isert_conn->conn_device = device; - isert_conn->conn_pd = device->dev_pd; - isert_conn->conn_mr = device->dev_mr; + isert_conn->conn_pd = ib_alloc_pd(isert_conn->conn_device->ib_device); + if (IS_ERR(isert_conn->conn_pd)) { + ret = PTR_ERR(isert_conn->conn_pd); + pr_err("ib_alloc_pd failed for conn %p: ret=%d\n", + isert_conn, ret); + goto out_pd; + } - if (device->use_frwr) { - ret = isert_conn_create_frwr_pool(isert_conn); - if (ret) { - pr_err("Conn: %p failed to create frwr_pool\n", isert_conn); - goto out_frwr; - } + isert_conn->conn_mr = ib_get_dma_mr(isert_conn->conn_pd, + IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(isert_conn->conn_mr)) { + ret = PTR_ERR(isert_conn->conn_mr); + pr_err("ib_get_dma_mr failed for conn %p: ret=%d\n", + isert_conn, ret); + goto out_mr; + } + + pi_support = np->tpg_np->tpg->tpg_attrib.t10_pi; + if (pi_support && !device->pi_capable) { + pr_err("Protection information requested but not supported, " + "rejecting connect request\n"); + ret = rdma_reject(cma_id, NULL, 0); + goto out_mr; } - ret = isert_conn_setup_qp(isert_conn, cma_id); + ret = isert_conn_setup_qp(isert_conn, cma_id, pi_support); if (ret) goto out_conn_dev; mutex_lock(&isert_np->np_accept_mutex); - list_add_tail(&isert_np->np_accept_list, &isert_conn->conn_accept_node); + list_add_tail(&isert_conn->conn_accept_node, &isert_np->np_accept_list); mutex_unlock(&isert_np->np_accept_mutex); - pr_debug("isert_connect_request() waking up np_accept_wq: %p\n", np); - wake_up(&isert_np->np_accept_wq); + pr_debug("isert_connect_request() up np_sem np: %p\n", np); + up(&isert_np->np_sem); return 0; out_conn_dev: - if (device->use_frwr) - isert_conn_free_frwr_pool(isert_conn); -out_frwr: + ib_dereg_mr(isert_conn->conn_mr); +out_mr: + ib_dealloc_pd(isert_conn->conn_pd); +out_pd: isert_device_try_release(device); out_rsp_dma_map: ib_dma_unmap_single(ib_dev, isert_conn->login_rsp_dma, @@ -608,8 +709,8 @@ isert_connect_release(struct isert_conn *isert_conn) pr_debug("Entering isert_connect_release(): >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"); - if (device && device->use_frwr) - isert_conn_free_frwr_pool(isert_conn); + if (device && device->use_fastreg) + isert_conn_free_fastreg_pool(isert_conn); if (isert_conn->conn_qp) { cq_index = ((struct isert_cq_desc *) @@ -623,6 +724,9 @@ isert_connect_release(struct isert_conn *isert_conn) isert_free_rx_descriptors(isert_conn); rdma_destroy_id(isert_conn->conn_cm_id); + ib_dereg_mr(isert_conn->conn_mr); + ib_dealloc_pd(isert_conn->conn_pd); + if (isert_conn->login_buf) { ib_dma_unmap_single(ib_dev, isert_conn->login_rsp_dma, ISER_RX_LOGIN_SIZE, DMA_TO_DEVICE); @@ -671,11 +775,11 @@ isert_disconnect_work(struct work_struct *work) pr_debug("isert_disconnect_work(): >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"); mutex_lock(&isert_conn->conn_mutex); - isert_conn->state = ISER_CONN_DOWN; + if (isert_conn->state == ISER_CONN_UP) + isert_conn->state = ISER_CONN_TERMINATING; if (isert_conn->post_recv_buf_count == 0 && atomic_read(&isert_conn->post_send_buf_count) == 0) { - pr_debug("Calling wake_up(&isert_conn->conn_wait);\n"); mutex_unlock(&isert_conn->conn_mutex); goto wake_up; } @@ -684,26 +788,25 @@ isert_disconnect_work(struct work_struct *work) isert_put_conn(isert_conn); return; } - if (!isert_conn->logout_posted) { - pr_debug("Calling rdma_disconnect for !logout_posted from" - " isert_disconnect_work\n"); + + if (isert_conn->disconnect) { + /* Send DREQ/DREP towards our initiator */ rdma_disconnect(isert_conn->conn_cm_id); - mutex_unlock(&isert_conn->conn_mutex); - iscsit_cause_connection_reinstatement(isert_conn->conn, 0); - goto wake_up; } + mutex_unlock(&isert_conn->conn_mutex); wake_up: - wake_up(&isert_conn->conn_wait); + complete(&isert_conn->conn_wait); isert_put_conn(isert_conn); } static void -isert_disconnected_handler(struct rdma_cm_id *cma_id) +isert_disconnected_handler(struct rdma_cm_id *cma_id, bool disconnect) { struct isert_conn *isert_conn = (struct isert_conn *)cma_id->context; + isert_conn->disconnect = disconnect; INIT_WORK(&isert_conn->conn_logout_work, isert_disconnect_work); schedule_work(&isert_conn->conn_logout_work); } @@ -712,29 +815,28 @@ static int isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { int ret = 0; + bool disconnect = false; pr_debug("isert_cma_handler: event %d status %d conn %p id %p\n", event->event, event->status, cma_id->context, cma_id); switch (event->event) { case RDMA_CM_EVENT_CONNECT_REQUEST: - pr_debug("RDMA_CM_EVENT_CONNECT_REQUEST: >>>>>>>>>>>>>>>\n"); ret = isert_connect_request(cma_id, event); break; case RDMA_CM_EVENT_ESTABLISHED: - pr_debug("RDMA_CM_EVENT_ESTABLISHED >>>>>>>>>>>>>>\n"); isert_connected_handler(cma_id); break; - case RDMA_CM_EVENT_DISCONNECTED: - pr_debug("RDMA_CM_EVENT_DISCONNECTED: >>>>>>>>>>>>>>\n"); - isert_disconnected_handler(cma_id); - break; - case RDMA_CM_EVENT_DEVICE_REMOVAL: - case RDMA_CM_EVENT_ADDR_CHANGE: + case RDMA_CM_EVENT_ADDR_CHANGE: /* FALLTHRU */ + case RDMA_CM_EVENT_DISCONNECTED: /* FALLTHRU */ + case RDMA_CM_EVENT_DEVICE_REMOVAL: /* FALLTHRU */ + disconnect = true; + case RDMA_CM_EVENT_TIMEWAIT_EXIT: /* FALLTHRU */ + isert_disconnected_handler(cma_id, disconnect); break; case RDMA_CM_EVENT_CONNECT_ERROR: default: - pr_err("Unknown RDMA CMA event: %d\n", event->event); + pr_err("Unhandled RDMA CMA event: %d\n", event->event); break; } @@ -871,16 +973,17 @@ isert_init_send_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, * Coalesce send completion interrupts by only setting IB_SEND_SIGNALED * bit for every ISERT_COMP_BATCH_COUNT number of ib_post_send() calls. */ - mutex_lock(&isert_conn->conn_comp_mutex); - if (coalesce && + mutex_lock(&isert_conn->conn_mutex); + if (coalesce && isert_conn->state == ISER_CONN_UP && ++isert_conn->conn_comp_batch < ISERT_COMP_BATCH_COUNT) { + tx_desc->llnode_active = true; llist_add(&tx_desc->comp_llnode, &isert_conn->conn_comp_llist); - mutex_unlock(&isert_conn->conn_comp_mutex); + mutex_unlock(&isert_conn->conn_mutex); return; } isert_conn->conn_comp_batch = 0; tx_desc->comp_llnode_batch = llist_del_all(&isert_conn->conn_comp_llist); - mutex_unlock(&isert_conn->conn_comp_mutex); + mutex_unlock(&isert_conn->conn_mutex); send_wr->send_flags = IB_SEND_SIGNALED; } @@ -950,6 +1053,20 @@ isert_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login, } if (!login->login_failed) { if (login->login_complete) { + if (!conn->sess->sess_ops->SessionType && + isert_conn->conn_device->use_fastreg) { + /* Normal Session and fastreg is used */ + u8 pi_support = login->np->tpg_np->tpg->tpg_attrib.t10_pi; + + ret = isert_conn_create_fastreg_pool(isert_conn, + pi_support); + if (ret) { + pr_err("Conn: %p failed to create" + " fastreg pool\n", isert_conn); + return ret; + } + } + ret = isert_alloc_rx_descriptors(isert_conn); if (ret) return ret; @@ -1024,13 +1141,13 @@ isert_rx_login_req(struct iser_rx_desc *rx_desc, int rx_buflen, } static struct iscsi_cmd -*isert_allocate_cmd(struct iscsi_conn *conn, gfp_t gfp) +*isert_allocate_cmd(struct iscsi_conn *conn) { struct isert_conn *isert_conn = (struct isert_conn *)conn->context; struct isert_cmd *isert_cmd; struct iscsi_cmd *cmd; - cmd = iscsit_allocate_cmd(conn, gfp); + cmd = iscsit_allocate_cmd(conn, TASK_INTERRUPTIBLE); if (!cmd) { pr_err("Unable to allocate iscsi_cmd + isert_cmd\n"); return NULL; @@ -1094,6 +1211,8 @@ sequence_cmd: if (!rc && dump_payload == false && unsol_data) iscsit_set_unsoliticed_dataout(cmd); + else if (dump_payload && imm_data) + target_put_sess_cmd(conn->sess->se_sess, &cmd->se_cmd); return 0; } @@ -1219,7 +1338,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc, switch (opcode) { case ISCSI_OP_SCSI_CMD: - cmd = isert_allocate_cmd(conn, GFP_KERNEL); + cmd = isert_allocate_cmd(conn); if (!cmd) break; @@ -1233,7 +1352,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc, rx_desc, (unsigned char *)hdr); break; case ISCSI_OP_NOOP_OUT: - cmd = isert_allocate_cmd(conn, GFP_KERNEL); + cmd = isert_allocate_cmd(conn); if (!cmd) break; @@ -1246,7 +1365,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc, (unsigned char *)hdr); break; case ISCSI_OP_SCSI_TMFUNC: - cmd = isert_allocate_cmd(conn, GFP_KERNEL); + cmd = isert_allocate_cmd(conn); if (!cmd) break; @@ -1254,7 +1373,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc, (unsigned char *)hdr); break; case ISCSI_OP_LOGOUT: - cmd = isert_allocate_cmd(conn, GFP_KERNEL); + cmd = isert_allocate_cmd(conn); if (!cmd) break; @@ -1265,7 +1384,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc, HZ); break; case ISCSI_OP_TEXT: - cmd = isert_allocate_cmd(conn, GFP_KERNEL); + cmd = isert_allocate_cmd(conn); if (!cmd) break; @@ -1375,19 +1494,60 @@ isert_rx_completion(struct iser_rx_desc *desc, struct isert_conn *isert_conn, } } +static int +isert_map_data_buf(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, + struct scatterlist *sg, u32 nents, u32 length, u32 offset, + enum iser_ib_op_code op, struct isert_data_buf *data) +{ + struct ib_device *ib_dev = isert_conn->conn_cm_id->device; + + data->dma_dir = op == ISER_IB_RDMA_WRITE ? + DMA_TO_DEVICE : DMA_FROM_DEVICE; + + data->len = length - offset; + data->offset = offset; + data->sg_off = data->offset / PAGE_SIZE; + + data->sg = &sg[data->sg_off]; + data->nents = min_t(unsigned int, nents - data->sg_off, + ISCSI_ISER_SG_TABLESIZE); + data->len = min_t(unsigned int, data->len, ISCSI_ISER_SG_TABLESIZE * + PAGE_SIZE); + + data->dma_nents = ib_dma_map_sg(ib_dev, data->sg, data->nents, + data->dma_dir); + if (unlikely(!data->dma_nents)) { + pr_err("Cmd: unable to dma map SGs %p\n", sg); + return -EINVAL; + } + + pr_debug("Mapped cmd: %p count: %u sg: %p sg_nents: %u rdma_len %d\n", + isert_cmd, data->dma_nents, data->sg, data->nents, data->len); + + return 0; +} + +static void +isert_unmap_data_buf(struct isert_conn *isert_conn, struct isert_data_buf *data) +{ + struct ib_device *ib_dev = isert_conn->conn_cm_id->device; + + ib_dma_unmap_sg(ib_dev, data->sg, data->nents, data->dma_dir); + memset(data, 0, sizeof(*data)); +} + + + static void isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn) { struct isert_rdma_wr *wr = &isert_cmd->rdma_wr; - struct ib_device *ib_dev = isert_conn->conn_cm_id->device; pr_debug("isert_unmap_cmd: %p\n", isert_cmd); - if (wr->sge) { + + if (wr->data.sg) { pr_debug("isert_unmap_cmd: %p unmap_sg op\n", isert_cmd); - ib_dma_unmap_sg(ib_dev, wr->sge, wr->num_sge, - (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ? - DMA_TO_DEVICE : DMA_FROM_DEVICE); - wr->sge = NULL; + isert_unmap_data_buf(isert_conn, &wr->data); } if (wr->send_wr) { @@ -1404,29 +1564,29 @@ isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn) } static void -isert_unreg_rdma_frwr(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn) +isert_unreg_rdma(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn) { struct isert_rdma_wr *wr = &isert_cmd->rdma_wr; - struct ib_device *ib_dev = isert_conn->conn_cm_id->device; LIST_HEAD(unmap_list); - pr_debug("unreg_frwr_cmd: %p\n", isert_cmd); + pr_debug("unreg_fastreg_cmd: %p\n", isert_cmd); if (wr->fr_desc) { - pr_debug("unreg_frwr_cmd: %p free fr_desc %p\n", + pr_debug("unreg_fastreg_cmd: %p free fr_desc %p\n", isert_cmd, wr->fr_desc); + if (wr->fr_desc->ind & ISERT_PROTECTED) { + isert_unmap_data_buf(isert_conn, &wr->prot); + wr->fr_desc->ind &= ~ISERT_PROTECTED; + } spin_lock_bh(&isert_conn->conn_lock); - list_add_tail(&wr->fr_desc->list, &isert_conn->conn_frwr_pool); + list_add_tail(&wr->fr_desc->list, &isert_conn->conn_fr_pool); spin_unlock_bh(&isert_conn->conn_lock); wr->fr_desc = NULL; } - if (wr->sge) { - pr_debug("unreg_frwr_cmd: %p unmap_sg op\n", isert_cmd); - ib_dma_unmap_sg(ib_dev, wr->sge, wr->num_sge, - (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ? - DMA_TO_DEVICE : DMA_FROM_DEVICE); - wr->sge = NULL; + if (wr->data.sg) { + pr_debug("unreg_fastreg_cmd: %p unmap_sg op\n", isert_cmd); + isert_unmap_data_buf(isert_conn, &wr->data); } wr->ib_sge = NULL; @@ -1434,7 +1594,7 @@ isert_unreg_rdma_frwr(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn } static void -isert_put_cmd(struct isert_cmd *isert_cmd) +isert_put_cmd(struct isert_cmd *isert_cmd, bool comp_err) { struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; struct isert_conn *isert_conn = isert_cmd->conn; @@ -1447,11 +1607,24 @@ isert_put_cmd(struct isert_cmd *isert_cmd) case ISCSI_OP_SCSI_CMD: spin_lock_bh(&conn->cmd_lock); if (!list_empty(&cmd->i_conn_node)) - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); - if (cmd->data_direction == DMA_TO_DEVICE) + if (cmd->data_direction == DMA_TO_DEVICE) { iscsit_stop_dataout_timer(cmd); + /* + * Check for special case during comp_err where + * WRITE_PENDING has been handed off from core, + * but requires an extra target_put_sess_cmd() + * before transport_generic_free_cmd() below. + */ + if (comp_err && + cmd->se_cmd.t_state == TRANSPORT_WRITE_PENDING) { + struct se_cmd *se_cmd = &cmd->se_cmd; + + target_put_sess_cmd(se_cmd->se_sess, se_cmd); + } + } device->unreg_rdma_mem(isert_cmd, isert_conn); transport_generic_free_cmd(&cmd->se_cmd, 0); @@ -1459,7 +1632,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd) case ISCSI_OP_SCSI_TMFUNC: spin_lock_bh(&conn->cmd_lock); if (!list_empty(&cmd->i_conn_node)) - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); transport_generic_free_cmd(&cmd->se_cmd, 0); @@ -1469,7 +1642,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd) case ISCSI_OP_TEXT: spin_lock_bh(&conn->cmd_lock); if (!list_empty(&cmd->i_conn_node)) - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); /* @@ -1506,7 +1679,7 @@ isert_unmap_tx_desc(struct iser_tx_desc *tx_desc, struct ib_device *ib_dev) static void isert_completion_put(struct iser_tx_desc *tx_desc, struct isert_cmd *isert_cmd, - struct ib_device *ib_dev) + struct ib_device *ib_dev, bool comp_err) { if (isert_cmd->pdu_buf_dma != 0) { pr_debug("Calling ib_dma_unmap_single for isert_cmd->pdu_buf_dma\n"); @@ -1516,7 +1689,77 @@ isert_completion_put(struct iser_tx_desc *tx_desc, struct isert_cmd *isert_cmd, } isert_unmap_tx_desc(tx_desc, ib_dev); - isert_put_cmd(isert_cmd); + isert_put_cmd(isert_cmd, comp_err); +} + +static int +isert_check_pi_status(struct se_cmd *se_cmd, struct ib_mr *sig_mr) +{ + struct ib_mr_status mr_status; + int ret; + + ret = ib_check_mr_status(sig_mr, IB_MR_CHECK_SIG_STATUS, &mr_status); + if (ret) { + pr_err("ib_check_mr_status failed, ret %d\n", ret); + goto fail_mr_status; + } + + if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) { + u64 sec_offset_err; + u32 block_size = se_cmd->se_dev->dev_attrib.block_size + 8; + + switch (mr_status.sig_err.err_type) { + case IB_SIG_BAD_GUARD: + se_cmd->pi_err = TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED; + break; + case IB_SIG_BAD_REFTAG: + se_cmd->pi_err = TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED; + break; + case IB_SIG_BAD_APPTAG: + se_cmd->pi_err = TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED; + break; + } + sec_offset_err = mr_status.sig_err.sig_err_offset; + do_div(sec_offset_err, block_size); + se_cmd->bad_sector = sec_offset_err + se_cmd->t_task_lba; + + pr_err("isert: PI error found type %d at sector 0x%llx " + "expected 0x%x vs actual 0x%x\n", + mr_status.sig_err.err_type, + (unsigned long long)se_cmd->bad_sector, + mr_status.sig_err.expected, + mr_status.sig_err.actual); + ret = 1; + } + +fail_mr_status: + return ret; +} + +static void +isert_completion_rdma_write(struct iser_tx_desc *tx_desc, + struct isert_cmd *isert_cmd) +{ + struct isert_rdma_wr *wr = &isert_cmd->rdma_wr; + struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; + struct se_cmd *se_cmd = &cmd->se_cmd; + struct isert_conn *isert_conn = isert_cmd->conn; + struct isert_device *device = isert_conn->conn_device; + int ret = 0; + + if (wr->fr_desc && wr->fr_desc->ind & ISERT_PROTECTED) { + ret = isert_check_pi_status(se_cmd, + wr->fr_desc->pi_ctx->sig_mr); + wr->fr_desc->ind &= ~ISERT_PROTECTED; + } + + device->unreg_rdma_mem(isert_cmd, isert_conn); + wr->send_wr_num = 0; + if (ret) + transport_send_check_condition_and_sense(se_cmd, + se_cmd->pi_err, 0); + else + isert_put_response(isert_conn->conn, cmd); } static void @@ -1528,10 +1771,18 @@ isert_completion_rdma_read(struct iser_tx_desc *tx_desc, struct se_cmd *se_cmd = &cmd->se_cmd; struct isert_conn *isert_conn = isert_cmd->conn; struct isert_device *device = isert_conn->conn_device; + int ret = 0; + + if (wr->fr_desc && wr->fr_desc->ind & ISERT_PROTECTED) { + ret = isert_check_pi_status(se_cmd, + wr->fr_desc->pi_ctx->sig_mr); + wr->fr_desc->ind &= ~ISERT_PROTECTED; + } iscsit_stop_dataout_timer(cmd); device->unreg_rdma_mem(isert_cmd, isert_conn); - cmd->write_data_done = wr->cur_rdma_length; + cmd->write_data_done = wr->data.len; + wr->send_wr_num = 0; pr_debug("Cmd: %p RDMA_READ comp calling execute_cmd\n", isert_cmd); spin_lock_bh(&cmd->istate_lock); @@ -1539,7 +1790,11 @@ isert_completion_rdma_read(struct iser_tx_desc *tx_desc, cmd->i_state = ISTATE_RECEIVED_LAST_DATAOUT; spin_unlock_bh(&cmd->istate_lock); - target_execute_cmd(se_cmd); + if (ret) + transport_send_check_condition_and_sense(se_cmd, + se_cmd->pi_err, 0); + else + target_execute_cmd(se_cmd); } static void @@ -1559,28 +1814,25 @@ isert_do_control_comp(struct work_struct *work) iscsit_tmr_post_handler(cmd, cmd->conn); cmd->i_state = ISTATE_SENT_STATUS; - isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev); + isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev, false); break; case ISTATE_SEND_REJECT: pr_debug("Got isert_do_control_comp ISTATE_SEND_REJECT: >>>\n"); atomic_dec(&isert_conn->post_send_buf_count); cmd->i_state = ISTATE_SENT_STATUS; - isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev); + isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev, false); break; case ISTATE_SEND_LOGOUTRSP: pr_debug("Calling iscsit_logout_post_handler >>>>>>>>>>>>>>\n"); - /* - * Call atomic_dec(&isert_conn->post_send_buf_count) - * from isert_free_conn() - */ - isert_conn->logout_posted = true; + + atomic_dec(&isert_conn->post_send_buf_count); iscsit_logout_post_handler(cmd, cmd->conn); break; case ISTATE_SEND_TEXTRSP: atomic_dec(&isert_conn->post_send_buf_count); cmd->i_state = ISTATE_SENT_STATUS; - isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev); + isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev, false); break; default: pr_err("Unknown do_control_comp i_state %d\n", cmd->i_state); @@ -1596,6 +1848,7 @@ isert_response_completion(struct iser_tx_desc *tx_desc, struct ib_device *ib_dev) { struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; + struct isert_rdma_wr *wr = &isert_cmd->rdma_wr; if (cmd->i_state == ISTATE_SEND_TASKMGTRSP || cmd->i_state == ISTATE_SEND_LOGOUTRSP || @@ -1607,10 +1860,21 @@ isert_response_completion(struct iser_tx_desc *tx_desc, queue_work(isert_comp_wq, &isert_cmd->comp_work); return; } - atomic_dec(&isert_conn->post_send_buf_count); + + /** + * If send_wr_num is 0 this means that we got + * RDMA completion and we cleared it and we should + * simply decrement the response post. else the + * response is incorporated in send_wr_num, just + * sub it. + **/ + if (wr->send_wr_num) + atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count); + else + atomic_dec(&isert_conn->post_send_buf_count); cmd->i_state = ISTATE_SENT_STATUS; - isert_completion_put(tx_desc, isert_cmd, ib_dev); + isert_completion_put(tx_desc, isert_cmd, ib_dev, false); } static void @@ -1639,13 +1903,14 @@ __isert_send_completion(struct iser_tx_desc *tx_desc, isert_conn, ib_dev); break; case ISER_IB_RDMA_WRITE: - pr_err("isert_send_completion: Got ISER_IB_RDMA_WRITE\n"); - dump_stack(); + pr_debug("isert_send_completion: Got ISER_IB_RDMA_WRITE\n"); + atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count); + isert_completion_rdma_write(tx_desc, isert_cmd); break; case ISER_IB_RDMA_READ: pr_debug("isert_send_completion: Got ISER_IB_RDMA_READ:\n"); - atomic_dec(&isert_conn->post_send_buf_count); + atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count); isert_completion_rdma_read(tx_desc, isert_cmd); break; default: @@ -1674,31 +1939,102 @@ isert_send_completion(struct iser_tx_desc *tx_desc, } static void -isert_cq_comp_err(struct iser_tx_desc *tx_desc, struct isert_conn *isert_conn) +isert_cq_drain_comp_llist(struct isert_conn *isert_conn, struct ib_device *ib_dev) { - struct ib_device *ib_dev = isert_conn->conn_cm_id->device; + struct llist_node *llnode; + struct isert_rdma_wr *wr; + struct iser_tx_desc *t; - if (tx_desc) { - struct isert_cmd *isert_cmd = tx_desc->isert_cmd; + mutex_lock(&isert_conn->conn_mutex); + llnode = llist_del_all(&isert_conn->conn_comp_llist); + isert_conn->conn_comp_batch = 0; + mutex_unlock(&isert_conn->conn_mutex); - if (!isert_cmd) - isert_unmap_tx_desc(tx_desc, ib_dev); + while (llnode) { + t = llist_entry(llnode, struct iser_tx_desc, comp_llnode); + llnode = llist_next(llnode); + wr = &t->isert_cmd->rdma_wr; + + /** + * If send_wr_num is 0 this means that we got + * RDMA completion and we cleared it and we should + * simply decrement the response post. else the + * response is incorporated in send_wr_num, just + * sub it. + **/ + if (wr->send_wr_num) + atomic_sub(wr->send_wr_num, + &isert_conn->post_send_buf_count); else - isert_completion_put(tx_desc, isert_cmd, ib_dev); + atomic_dec(&isert_conn->post_send_buf_count); + + isert_completion_put(t, t->isert_cmd, ib_dev, true); } +} - if (isert_conn->post_recv_buf_count == 0 && - atomic_read(&isert_conn->post_send_buf_count) == 0) { - pr_debug("isert_cq_comp_err >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"); - pr_debug("Calling wake_up from isert_cq_comp_err\n"); +static void +isert_cq_tx_comp_err(struct iser_tx_desc *tx_desc, struct isert_conn *isert_conn) +{ + struct ib_device *ib_dev = isert_conn->conn_cm_id->device; + struct isert_cmd *isert_cmd = tx_desc->isert_cmd; + struct llist_node *llnode = tx_desc->comp_llnode_batch; + struct isert_rdma_wr *wr; + struct iser_tx_desc *t; - mutex_lock(&isert_conn->conn_mutex); - if (isert_conn->state != ISER_CONN_DOWN) - isert_conn->state = ISER_CONN_TERMINATING; - mutex_unlock(&isert_conn->conn_mutex); + while (llnode) { + t = llist_entry(llnode, struct iser_tx_desc, comp_llnode); + llnode = llist_next(llnode); + wr = &t->isert_cmd->rdma_wr; + + /** + * If send_wr_num is 0 this means that we got + * RDMA completion and we cleared it and we should + * simply decrement the response post. else the + * response is incorporated in send_wr_num, just + * sub it. + **/ + if (wr->send_wr_num) + atomic_sub(wr->send_wr_num, + &isert_conn->post_send_buf_count); + else + atomic_dec(&isert_conn->post_send_buf_count); - wake_up(&isert_conn->conn_wait_comp_err); + isert_completion_put(t, t->isert_cmd, ib_dev, true); } + tx_desc->comp_llnode_batch = NULL; + + if (!isert_cmd) + isert_unmap_tx_desc(tx_desc, ib_dev); + else + isert_completion_put(tx_desc, isert_cmd, ib_dev, true); +} + +static void +isert_cq_rx_comp_err(struct isert_conn *isert_conn) +{ + struct ib_device *ib_dev = isert_conn->conn_cm_id->device; + struct iscsi_conn *conn = isert_conn->conn; + + if (isert_conn->post_recv_buf_count) + return; + + isert_cq_drain_comp_llist(isert_conn, ib_dev); + + if (conn->sess) { + target_sess_cmd_list_set_waiting(conn->sess->se_sess); + target_wait_for_sess_cmds(conn->sess->se_sess); + } + + while (atomic_read(&isert_conn->post_send_buf_count)) + msleep(3000); + + mutex_lock(&isert_conn->conn_mutex); + isert_conn->state = ISER_CONN_DOWN; + mutex_unlock(&isert_conn->conn_mutex); + + iscsit_cause_connection_reinstatement(isert_conn->conn, 0); + + complete(&isert_conn->conn_wait_comp_err); } static void @@ -1723,8 +2059,14 @@ isert_cq_tx_work(struct work_struct *work) pr_debug("TX wc.status != IB_WC_SUCCESS >>>>>>>>>>>>>>\n"); pr_debug("TX wc.status: 0x%08x\n", wc.status); pr_debug("TX wc.vendor_err: 0x%08x\n", wc.vendor_err); - atomic_dec(&isert_conn->post_send_buf_count); - isert_cq_comp_err(tx_desc, isert_conn); + + if (wc.wr_id != ISER_FASTREG_LI_WRID) { + if (tx_desc->llnode_active) + continue; + + atomic_dec(&isert_conn->post_send_buf_count); + isert_cq_tx_comp_err(tx_desc, isert_conn); + } } } @@ -1767,7 +2109,7 @@ isert_cq_rx_work(struct work_struct *work) wc.vendor_err); } isert_conn->post_recv_buf_count--; - isert_cq_comp_err(NULL, isert_conn); + isert_cq_rx_comp_err(isert_conn); } } @@ -1848,6 +2190,36 @@ isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd) return isert_post_response(isert_conn, isert_cmd); } +static void +isert_aborted_task(struct iscsi_conn *conn, struct iscsi_cmd *cmd) +{ + struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); + struct isert_conn *isert_conn = (struct isert_conn *)conn->context; + struct isert_device *device = isert_conn->conn_device; + + spin_lock_bh(&conn->cmd_lock); + if (!list_empty(&cmd->i_conn_node)) + list_del_init(&cmd->i_conn_node); + spin_unlock_bh(&conn->cmd_lock); + + if (cmd->data_direction == DMA_TO_DEVICE) + iscsit_stop_dataout_timer(cmd); + + device->unreg_rdma_mem(isert_cmd, isert_conn); +} + +static enum target_prot_op +isert_get_sup_prot_ops(struct iscsi_conn *conn) +{ + struct isert_conn *isert_conn = (struct isert_conn *)conn->context; + struct isert_device *device = isert_conn->conn_device; + + if (device->pi_capable) + return TARGET_PROT_ALL; + + return TARGET_PROT_NORMAL; +} + static int isert_put_nopin(struct iscsi_cmd *cmd, struct iscsi_conn *conn, bool nopout_response) @@ -1948,7 +2320,7 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) int rc; isert_create_send_desc(isert_conn, isert_cmd, &isert_cmd->tx_desc); - rc = iscsit_build_text_rsp(cmd, conn, hdr); + rc = iscsit_build_text_rsp(cmd, conn, hdr, ISCSI_INFINIBAND); if (rc < 0) return rc; @@ -2029,54 +2401,39 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, struct se_cmd *se_cmd = &cmd->se_cmd; struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); struct isert_conn *isert_conn = (struct isert_conn *)conn->context; - struct ib_device *ib_dev = isert_conn->conn_cm_id->device; + struct isert_data_buf *data = &wr->data; struct ib_send_wr *send_wr; struct ib_sge *ib_sge; - struct scatterlist *sg_start; - u32 sg_off = 0, sg_nents; - u32 offset = 0, data_len, data_left, rdma_write_max, va_offset = 0; - int ret = 0, count, i, ib_sge_cnt; + u32 offset, data_len, data_left, rdma_write_max, va_offset = 0; + int ret = 0, i, ib_sge_cnt; - if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) { - data_left = se_cmd->data_length; - } else { - sg_off = cmd->write_data_done / PAGE_SIZE; - data_left = se_cmd->data_length - cmd->write_data_done; - offset = cmd->write_data_done; - isert_cmd->tx_desc.isert_cmd = isert_cmd; - } + isert_cmd->tx_desc.isert_cmd = isert_cmd; - sg_start = &cmd->se_cmd.t_data_sg[sg_off]; - sg_nents = se_cmd->t_data_nents - sg_off; + offset = wr->iser_ib_op == ISER_IB_RDMA_READ ? cmd->write_data_done : 0; + ret = isert_map_data_buf(isert_conn, isert_cmd, se_cmd->t_data_sg, + se_cmd->t_data_nents, se_cmd->data_length, + offset, wr->iser_ib_op, &wr->data); + if (ret) + return ret; - count = ib_dma_map_sg(ib_dev, sg_start, sg_nents, - (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ? - DMA_TO_DEVICE : DMA_FROM_DEVICE); - if (unlikely(!count)) { - pr_err("Cmd: %p unrable to map SGs\n", isert_cmd); - return -EINVAL; - } - wr->sge = sg_start; - wr->num_sge = sg_nents; - wr->cur_rdma_length = data_left; - pr_debug("Mapped cmd: %p count: %u sg: %p sg_nents: %u rdma_len %d\n", - isert_cmd, count, sg_start, sg_nents, data_left); + data_left = data->len; + offset = data->offset; - ib_sge = kzalloc(sizeof(struct ib_sge) * sg_nents, GFP_KERNEL); + ib_sge = kzalloc(sizeof(struct ib_sge) * data->nents, GFP_KERNEL); if (!ib_sge) { pr_warn("Unable to allocate ib_sge\n"); ret = -ENOMEM; - goto unmap_sg; + goto unmap_cmd; } wr->ib_sge = ib_sge; - wr->send_wr_num = DIV_ROUND_UP(sg_nents, isert_conn->max_sge); + wr->send_wr_num = DIV_ROUND_UP(data->nents, isert_conn->max_sge); wr->send_wr = kzalloc(sizeof(struct ib_send_wr) * wr->send_wr_num, GFP_KERNEL); if (!wr->send_wr) { pr_debug("Unable to allocate wr->send_wr\n"); ret = -ENOMEM; - goto unmap_sg; + goto unmap_cmd; } wr->isert_cmd = isert_cmd; @@ -2115,10 +2472,9 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, } return 0; -unmap_sg: - ib_dma_unmap_sg(ib_dev, sg_start, sg_nents, - (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ? - DMA_TO_DEVICE : DMA_FROM_DEVICE); +unmap_cmd: + isert_unmap_data_buf(isert_conn, data); + return ret; } @@ -2162,51 +2518,70 @@ isert_map_fr_pagelist(struct ib_device *ib_dev, } static int -isert_fast_reg_mr(struct fast_reg_descriptor *fr_desc, - struct isert_cmd *isert_cmd, struct isert_conn *isert_conn, - struct ib_sge *ib_sge, u32 offset, unsigned int data_len) +isert_fast_reg_mr(struct isert_conn *isert_conn, + struct fast_reg_descriptor *fr_desc, + struct isert_data_buf *mem, + enum isert_indicator ind, + struct ib_sge *sge) { - struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; struct ib_device *ib_dev = isert_conn->conn_cm_id->device; - struct scatterlist *sg_start; - u32 sg_off, page_off; + struct ib_mr *mr; + struct ib_fast_reg_page_list *frpl; struct ib_send_wr fr_wr, inv_wr; struct ib_send_wr *bad_wr, *wr = NULL; + int ret, pagelist_len; + u32 page_off; u8 key; - int ret, sg_nents, pagelist_len; - sg_off = offset / PAGE_SIZE; - sg_start = &cmd->se_cmd.t_data_sg[sg_off]; - sg_nents = min_t(unsigned int, cmd->se_cmd.t_data_nents - sg_off, - ISCSI_ISER_SG_TABLESIZE); - page_off = offset % PAGE_SIZE; + if (mem->dma_nents == 1) { + sge->lkey = isert_conn->conn_mr->lkey; + sge->addr = ib_sg_dma_address(ib_dev, &mem->sg[0]); + sge->length = ib_sg_dma_len(ib_dev, &mem->sg[0]); + pr_debug("%s:%d sge: addr: 0x%llx length: %u lkey: %x\n", + __func__, __LINE__, sge->addr, sge->length, + sge->lkey); + return 0; + } + + if (ind == ISERT_DATA_KEY_VALID) { + /* Registering data buffer */ + mr = fr_desc->data_mr; + frpl = fr_desc->data_frpl; + } else { + /* Registering protection buffer */ + mr = fr_desc->pi_ctx->prot_mr; + frpl = fr_desc->pi_ctx->prot_frpl; + } - pr_debug("Cmd: %p use fr_desc %p sg_nents %d sg_off %d offset %u\n", - isert_cmd, fr_desc, sg_nents, sg_off, offset); + page_off = mem->offset % PAGE_SIZE; - pagelist_len = isert_map_fr_pagelist(ib_dev, sg_start, sg_nents, - &fr_desc->data_frpl->page_list[0]); + pr_debug("Use fr_desc %p sg_nents %d offset %u\n", + fr_desc, mem->nents, mem->offset); - if (!fr_desc->valid) { + pagelist_len = isert_map_fr_pagelist(ib_dev, mem->sg, mem->nents, + &frpl->page_list[0]); + + if (!(fr_desc->ind & ISERT_DATA_KEY_VALID)) { memset(&inv_wr, 0, sizeof(inv_wr)); + inv_wr.wr_id = ISER_FASTREG_LI_WRID; inv_wr.opcode = IB_WR_LOCAL_INV; - inv_wr.ex.invalidate_rkey = fr_desc->data_mr->rkey; + inv_wr.ex.invalidate_rkey = mr->rkey; wr = &inv_wr; /* Bump the key */ - key = (u8)(fr_desc->data_mr->rkey & 0x000000FF); - ib_update_fast_reg_key(fr_desc->data_mr, ++key); + key = (u8)(mr->rkey & 0x000000FF); + ib_update_fast_reg_key(mr, ++key); } /* Prepare FASTREG WR */ memset(&fr_wr, 0, sizeof(fr_wr)); + fr_wr.wr_id = ISER_FASTREG_LI_WRID; fr_wr.opcode = IB_WR_FAST_REG_MR; - fr_wr.wr.fast_reg.iova_start = - fr_desc->data_frpl->page_list[0] + page_off; - fr_wr.wr.fast_reg.page_list = fr_desc->data_frpl; + fr_wr.wr.fast_reg.iova_start = frpl->page_list[0] + page_off; + fr_wr.wr.fast_reg.page_list = frpl; fr_wr.wr.fast_reg.page_list_len = pagelist_len; fr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; - fr_wr.wr.fast_reg.length = data_len; - fr_wr.wr.fast_reg.rkey = fr_desc->data_mr->rkey; + fr_wr.wr.fast_reg.length = mem->len; + fr_wr.wr.fast_reg.rkey = mr->rkey; fr_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE; if (!wr) @@ -2219,80 +2594,242 @@ isert_fast_reg_mr(struct fast_reg_descriptor *fr_desc, pr_err("fast registration failed, ret:%d\n", ret); return ret; } - fr_desc->valid = false; + fr_desc->ind &= ~ind; + + sge->lkey = mr->lkey; + sge->addr = frpl->page_list[0] + page_off; + sge->length = mem->len; + + pr_debug("%s:%d sge: addr: 0x%llx length: %u lkey: %x\n", + __func__, __LINE__, sge->addr, sge->length, + sge->lkey); + + return ret; +} + +static inline enum ib_t10_dif_type +se2ib_prot_type(enum target_prot_type prot_type) +{ + switch (prot_type) { + case TARGET_DIF_TYPE0_PROT: + return IB_T10DIF_NONE; + case TARGET_DIF_TYPE1_PROT: + return IB_T10DIF_TYPE1; + case TARGET_DIF_TYPE2_PROT: + return IB_T10DIF_TYPE2; + case TARGET_DIF_TYPE3_PROT: + return IB_T10DIF_TYPE3; + default: + return IB_T10DIF_NONE; + } +} + +static int +isert_set_sig_attrs(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs) +{ + enum ib_t10_dif_type ib_prot_type = se2ib_prot_type(se_cmd->prot_type); + + sig_attrs->mem.sig_type = IB_SIG_TYPE_T10_DIF; + sig_attrs->wire.sig_type = IB_SIG_TYPE_T10_DIF; + sig_attrs->mem.sig.dif.pi_interval = + se_cmd->se_dev->dev_attrib.block_size; + sig_attrs->wire.sig.dif.pi_interval = + se_cmd->se_dev->dev_attrib.block_size; + + switch (se_cmd->prot_op) { + case TARGET_PROT_DIN_INSERT: + case TARGET_PROT_DOUT_STRIP: + sig_attrs->mem.sig.dif.type = IB_T10DIF_NONE; + sig_attrs->wire.sig.dif.type = ib_prot_type; + sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC; + sig_attrs->wire.sig.dif.ref_tag = se_cmd->reftag_seed; + break; + case TARGET_PROT_DOUT_INSERT: + case TARGET_PROT_DIN_STRIP: + sig_attrs->mem.sig.dif.type = ib_prot_type; + sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC; + sig_attrs->mem.sig.dif.ref_tag = se_cmd->reftag_seed; + sig_attrs->wire.sig.dif.type = IB_T10DIF_NONE; + break; + case TARGET_PROT_DIN_PASS: + case TARGET_PROT_DOUT_PASS: + sig_attrs->mem.sig.dif.type = ib_prot_type; + sig_attrs->mem.sig.dif.bg_type = IB_T10DIF_CRC; + sig_attrs->mem.sig.dif.ref_tag = se_cmd->reftag_seed; + sig_attrs->wire.sig.dif.type = ib_prot_type; + sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC; + sig_attrs->wire.sig.dif.ref_tag = se_cmd->reftag_seed; + break; + default: + pr_err("Unsupported PI operation %d\n", se_cmd->prot_op); + return -EINVAL; + } + + return 0; +} + +static inline u8 +isert_set_prot_checks(u8 prot_checks) +{ + return (prot_checks & TARGET_DIF_CHECK_GUARD ? 0xc0 : 0) | + (prot_checks & TARGET_DIF_CHECK_REFTAG ? 0x30 : 0) | + (prot_checks & TARGET_DIF_CHECK_REFTAG ? 0x0f : 0); +} + +static int +isert_reg_sig_mr(struct isert_conn *isert_conn, struct se_cmd *se_cmd, + struct fast_reg_descriptor *fr_desc, + struct ib_sge *data_sge, struct ib_sge *prot_sge, + struct ib_sge *sig_sge) +{ + struct ib_send_wr sig_wr, inv_wr; + struct ib_send_wr *bad_wr, *wr = NULL; + struct pi_context *pi_ctx = fr_desc->pi_ctx; + struct ib_sig_attrs sig_attrs; + int ret; + u32 key; + + memset(&sig_attrs, 0, sizeof(sig_attrs)); + ret = isert_set_sig_attrs(se_cmd, &sig_attrs); + if (ret) + goto err; + + sig_attrs.check_mask = isert_set_prot_checks(se_cmd->prot_checks); + + if (!(fr_desc->ind & ISERT_SIG_KEY_VALID)) { + memset(&inv_wr, 0, sizeof(inv_wr)); + inv_wr.opcode = IB_WR_LOCAL_INV; + inv_wr.wr_id = ISER_FASTREG_LI_WRID; + inv_wr.ex.invalidate_rkey = pi_ctx->sig_mr->rkey; + wr = &inv_wr; + /* Bump the key */ + key = (u8)(pi_ctx->sig_mr->rkey & 0x000000FF); + ib_update_fast_reg_key(pi_ctx->sig_mr, ++key); + } + + memset(&sig_wr, 0, sizeof(sig_wr)); + sig_wr.opcode = IB_WR_REG_SIG_MR; + sig_wr.wr_id = ISER_FASTREG_LI_WRID; + sig_wr.sg_list = data_sge; + sig_wr.num_sge = 1; + sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE; + sig_wr.wr.sig_handover.sig_attrs = &sig_attrs; + sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr; + if (se_cmd->t_prot_sg) + sig_wr.wr.sig_handover.prot = prot_sge; + + if (!wr) + wr = &sig_wr; + else + wr->next = &sig_wr; - ib_sge->lkey = fr_desc->data_mr->lkey; - ib_sge->addr = fr_desc->data_frpl->page_list[0] + page_off; - ib_sge->length = data_len; + ret = ib_post_send(isert_conn->conn_qp, wr, &bad_wr); + if (ret) { + pr_err("fast registration failed, ret:%d\n", ret); + goto err; + } + fr_desc->ind &= ~ISERT_SIG_KEY_VALID; - pr_debug("RDMA ib_sge: addr: 0x%16llx length: %u lkey: %08x\n", - ib_sge->addr, ib_sge->length, ib_sge->lkey); + sig_sge->lkey = pi_ctx->sig_mr->lkey; + sig_sge->addr = 0; + sig_sge->length = se_cmd->data_length; + if (se_cmd->prot_op != TARGET_PROT_DIN_STRIP && + se_cmd->prot_op != TARGET_PROT_DOUT_INSERT) + /* + * We have protection guards on the wire + * so we need to set a larget transfer + */ + sig_sge->length += se_cmd->prot_length; + pr_debug("sig_sge: addr: 0x%llx length: %u lkey: %x\n", + sig_sge->addr, sig_sge->length, + sig_sge->lkey); +err: return ret; } static int -isert_reg_rdma_frwr(struct iscsi_conn *conn, struct iscsi_cmd *cmd, - struct isert_rdma_wr *wr) +isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, + struct isert_rdma_wr *wr) { struct se_cmd *se_cmd = &cmd->se_cmd; struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd); - struct isert_conn *isert_conn = (struct isert_conn *)conn->context; - struct ib_device *ib_dev = isert_conn->conn_cm_id->device; + struct isert_conn *isert_conn = conn->context; + struct ib_sge data_sge; struct ib_send_wr *send_wr; - struct ib_sge *ib_sge; - struct scatterlist *sg_start; - struct fast_reg_descriptor *fr_desc; - u32 sg_off = 0, sg_nents; - u32 offset = 0, data_len, data_left, rdma_write_max; - int ret = 0, count; + struct fast_reg_descriptor *fr_desc = NULL; + u32 offset; + int ret = 0; unsigned long flags; - if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) { - data_left = se_cmd->data_length; - } else { - sg_off = cmd->write_data_done / PAGE_SIZE; - data_left = se_cmd->data_length - cmd->write_data_done; - offset = cmd->write_data_done; - isert_cmd->tx_desc.isert_cmd = isert_cmd; - } + isert_cmd->tx_desc.isert_cmd = isert_cmd; - sg_start = &cmd->se_cmd.t_data_sg[sg_off]; - sg_nents = se_cmd->t_data_nents - sg_off; + offset = wr->iser_ib_op == ISER_IB_RDMA_READ ? cmd->write_data_done : 0; + ret = isert_map_data_buf(isert_conn, isert_cmd, se_cmd->t_data_sg, + se_cmd->t_data_nents, se_cmd->data_length, + offset, wr->iser_ib_op, &wr->data); + if (ret) + return ret; - count = ib_dma_map_sg(ib_dev, sg_start, sg_nents, - (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ? - DMA_TO_DEVICE : DMA_FROM_DEVICE); - if (unlikely(!count)) { - pr_err("Cmd: %p unrable to map SGs\n", isert_cmd); - return -EINVAL; + if (wr->data.dma_nents != 1 || + se_cmd->prot_op != TARGET_PROT_NORMAL) { + spin_lock_irqsave(&isert_conn->conn_lock, flags); + fr_desc = list_first_entry(&isert_conn->conn_fr_pool, + struct fast_reg_descriptor, list); + list_del(&fr_desc->list); + spin_unlock_irqrestore(&isert_conn->conn_lock, flags); + wr->fr_desc = fr_desc; } - wr->sge = sg_start; - wr->num_sge = sg_nents; - pr_debug("Mapped cmd: %p count: %u sg: %p sg_nents: %u rdma_len %d\n", - isert_cmd, count, sg_start, sg_nents, data_left); - memset(&wr->s_ib_sge, 0, sizeof(*ib_sge)); - ib_sge = &wr->s_ib_sge; - wr->ib_sge = ib_sge; + ret = isert_fast_reg_mr(isert_conn, fr_desc, &wr->data, + ISERT_DATA_KEY_VALID, &data_sge); + if (ret) + goto unmap_cmd; + + if (se_cmd->prot_op != TARGET_PROT_NORMAL) { + struct ib_sge prot_sge, sig_sge; + + if (se_cmd->t_prot_sg) { + ret = isert_map_data_buf(isert_conn, isert_cmd, + se_cmd->t_prot_sg, + se_cmd->t_prot_nents, + se_cmd->prot_length, + 0, wr->iser_ib_op, &wr->prot); + if (ret) + goto unmap_cmd; + + ret = isert_fast_reg_mr(isert_conn, fr_desc, &wr->prot, + ISERT_PROT_KEY_VALID, &prot_sge); + if (ret) + goto unmap_prot_cmd; + } + + ret = isert_reg_sig_mr(isert_conn, se_cmd, fr_desc, + &data_sge, &prot_sge, &sig_sge); + if (ret) + goto unmap_prot_cmd; + fr_desc->ind |= ISERT_PROTECTED; + memcpy(&wr->s_ib_sge, &sig_sge, sizeof(sig_sge)); + } else + memcpy(&wr->s_ib_sge, &data_sge, sizeof(data_sge)); + + wr->ib_sge = &wr->s_ib_sge; wr->send_wr_num = 1; memset(&wr->s_send_wr, 0, sizeof(*send_wr)); wr->send_wr = &wr->s_send_wr; - wr->isert_cmd = isert_cmd; - rdma_write_max = ISCSI_ISER_SG_TABLESIZE * PAGE_SIZE; send_wr = &isert_cmd->rdma_wr.s_send_wr; - send_wr->sg_list = ib_sge; + send_wr->sg_list = &wr->s_ib_sge; send_wr->num_sge = 1; send_wr->wr_id = (unsigned long)&isert_cmd->tx_desc; if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) { send_wr->opcode = IB_WR_RDMA_WRITE; send_wr->wr.rdma.remote_addr = isert_cmd->read_va; send_wr->wr.rdma.rkey = isert_cmd->read_stag; - send_wr->send_flags = 0; - send_wr->next = &isert_cmd->tx_desc.send_wr; + send_wr->send_flags = se_cmd->prot_op == TARGET_PROT_NORMAL ? + 0 : IB_SEND_SIGNALED; } else { send_wr->opcode = IB_WR_RDMA_READ; send_wr->wr.rdma.remote_addr = isert_cmd->write_va; @@ -2300,37 +2837,18 @@ isert_reg_rdma_frwr(struct iscsi_conn *conn, struct iscsi_cmd *cmd, send_wr->send_flags = IB_SEND_SIGNALED; } - data_len = min(data_left, rdma_write_max); - wr->cur_rdma_length = data_len; - - /* if there is a single dma entry, dma mr is sufficient */ - if (count == 1) { - ib_sge->addr = ib_sg_dma_address(ib_dev, &sg_start[0]); - ib_sge->length = ib_sg_dma_len(ib_dev, &sg_start[0]); - ib_sge->lkey = isert_conn->conn_mr->lkey; - wr->fr_desc = NULL; - } else { + return 0; +unmap_prot_cmd: + if (se_cmd->t_prot_sg) + isert_unmap_data_buf(isert_conn, &wr->prot); +unmap_cmd: + if (fr_desc) { spin_lock_irqsave(&isert_conn->conn_lock, flags); - fr_desc = list_first_entry(&isert_conn->conn_frwr_pool, - struct fast_reg_descriptor, list); - list_del(&fr_desc->list); + list_add_tail(&fr_desc->list, &isert_conn->conn_fr_pool); spin_unlock_irqrestore(&isert_conn->conn_lock, flags); - wr->fr_desc = fr_desc; - - ret = isert_fast_reg_mr(fr_desc, isert_cmd, isert_conn, - ib_sge, offset, data_len); - if (ret) { - list_add_tail(&fr_desc->list, &isert_conn->conn_frwr_pool); - goto unmap_sg; - } } + isert_unmap_data_buf(isert_conn, &wr->data); - return 0; - -unmap_sg: - ib_dma_unmap_sg(ib_dev, sg_start, sg_nents, - (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ? - DMA_TO_DEVICE : DMA_FROM_DEVICE); return ret; } @@ -2354,25 +2872,35 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd) return rc; } - /* - * Build isert_conn->tx_desc for iSCSI response PDU and attach - */ - isert_create_send_desc(isert_conn, isert_cmd, &isert_cmd->tx_desc); - iscsit_build_rsp_pdu(cmd, conn, true, (struct iscsi_scsi_rsp *) - &isert_cmd->tx_desc.iscsi_header); - isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc); - isert_init_send_wr(isert_conn, isert_cmd, - &isert_cmd->tx_desc.send_wr, true); + if (se_cmd->prot_op == TARGET_PROT_NORMAL) { + /* + * Build isert_conn->tx_desc for iSCSI response PDU and attach + */ + isert_create_send_desc(isert_conn, isert_cmd, + &isert_cmd->tx_desc); + iscsit_build_rsp_pdu(cmd, conn, true, (struct iscsi_scsi_rsp *) + &isert_cmd->tx_desc.iscsi_header); + isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc); + isert_init_send_wr(isert_conn, isert_cmd, + &isert_cmd->tx_desc.send_wr, true); + isert_cmd->rdma_wr.s_send_wr.next = &isert_cmd->tx_desc.send_wr; + wr->send_wr_num += 1; + } - atomic_inc(&isert_conn->post_send_buf_count); + atomic_add(wr->send_wr_num, &isert_conn->post_send_buf_count); rc = ib_post_send(isert_conn->conn_qp, wr->send_wr, &wr_failed); if (rc) { pr_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n"); - atomic_dec(&isert_conn->post_send_buf_count); + atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count); } - pr_debug("Cmd: %p posted RDMA_WRITE + Response for iSER Data READ\n", - isert_cmd); + + if (se_cmd->prot_op == TARGET_PROT_NORMAL) + pr_debug("Cmd: %p posted RDMA_WRITE + Response for iSER Data " + "READ\n", isert_cmd); + else + pr_debug("Cmd: %p posted RDMA_WRITE for iSER Data READ\n", + isert_cmd); return 1; } @@ -2397,12 +2925,12 @@ isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery) return rc; } - atomic_inc(&isert_conn->post_send_buf_count); + atomic_add(wr->send_wr_num, &isert_conn->post_send_buf_count); rc = ib_post_send(isert_conn->conn_qp, wr->send_wr, &wr_failed); if (rc) { pr_warn("ib_post_send() failed for IB_WR_RDMA_READ\n"); - atomic_dec(&isert_conn->post_send_buf_count); + atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count); } pr_debug("Cmd: %p posted RDMA_READ memory for ISER Data WRITE\n", isert_cmd); @@ -2483,7 +3011,7 @@ isert_setup_np(struct iscsi_np *np, pr_err("Unable to allocate struct isert_np\n"); return -ENOMEM; } - init_waitqueue_head(&isert_np->np_accept_wq); + sema_init(&isert_np->np_sem, 0); mutex_init(&isert_np->np_accept_mutex); INIT_LIST_HEAD(&isert_np->np_accept_list); init_completion(&isert_np->np_login_comp); @@ -2532,18 +3060,6 @@ out: } static int -isert_check_accept_queue(struct isert_np *isert_np) -{ - int empty; - - mutex_lock(&isert_np->np_accept_mutex); - empty = list_empty(&isert_np->np_accept_list); - mutex_unlock(&isert_np->np_accept_mutex); - - return empty; -} - -static int isert_rdma_accept(struct isert_conn *isert_conn) { struct rdma_cm_id *cm_id = isert_conn->conn_cm_id; @@ -2635,16 +3151,19 @@ isert_accept_np(struct iscsi_np *np, struct iscsi_conn *conn) int max_accept = 0, ret; accept_wait: - ret = wait_event_interruptible(isert_np->np_accept_wq, - !isert_check_accept_queue(isert_np) || - np->np_thread_state == ISCSI_NP_THREAD_RESET); + ret = down_interruptible(&isert_np->np_sem); if (max_accept > 5) return -ENODEV; spin_lock_bh(&np->np_thread_lock); - if (np->np_thread_state == ISCSI_NP_THREAD_RESET) { + if (np->np_thread_state >= ISCSI_NP_THREAD_RESET) { spin_unlock_bh(&np->np_thread_lock); - pr_err("ISCSI_NP_THREAD_RESET for isert_accept_np\n"); + pr_debug("np_thread_state %d for isert_accept_np\n", + np->np_thread_state); + /** + * No point in stalling here when np_thread + * is in state RESET/SHUTDOWN/EXIT - bail + **/ return -ENODEV; } spin_unlock_bh(&np->np_thread_lock); @@ -2689,63 +3208,37 @@ isert_free_np(struct iscsi_np *np) kfree(isert_np); } -static int isert_check_state(struct isert_conn *isert_conn, int state) -{ - int ret; - - mutex_lock(&isert_conn->conn_mutex); - ret = (isert_conn->state == state); - mutex_unlock(&isert_conn->conn_mutex); - - return ret; -} - -static void isert_free_conn(struct iscsi_conn *conn) +static void isert_wait_conn(struct iscsi_conn *conn) { struct isert_conn *isert_conn = conn->context; - pr_debug("isert_free_conn: Starting \n"); - /* - * Decrement post_send_buf_count for special case when called - * from isert_do_control_comp() -> iscsit_logout_post_handler() - */ - mutex_lock(&isert_conn->conn_mutex); - if (isert_conn->logout_posted) - atomic_dec(&isert_conn->post_send_buf_count); + pr_debug("isert_wait_conn: Starting \n"); - if (isert_conn->conn_cm_id && isert_conn->state != ISER_CONN_DOWN) { - pr_debug("Calling rdma_disconnect from isert_free_conn\n"); + mutex_lock(&isert_conn->conn_mutex); + if (isert_conn->conn_cm_id) { + pr_debug("Calling rdma_disconnect from isert_wait_conn\n"); rdma_disconnect(isert_conn->conn_cm_id); } /* * Only wait for conn_wait_comp_err if the isert_conn made it * into full feature phase.. */ - if (isert_conn->state == ISER_CONN_UP) { - pr_debug("isert_free_conn: Before wait_event comp_err %d\n", - isert_conn->state); - mutex_unlock(&isert_conn->conn_mutex); - - wait_event(isert_conn->conn_wait_comp_err, - (isert_check_state(isert_conn, ISER_CONN_TERMINATING))); - - wait_event(isert_conn->conn_wait, - (isert_check_state(isert_conn, ISER_CONN_DOWN))); - - isert_put_conn(isert_conn); - return; - } if (isert_conn->state == ISER_CONN_INIT) { mutex_unlock(&isert_conn->conn_mutex); - isert_put_conn(isert_conn); return; } - pr_debug("isert_free_conn: wait_event conn_wait %d\n", - isert_conn->state); + if (isert_conn->state == ISER_CONN_UP) + isert_conn->state = ISER_CONN_TERMINATING; mutex_unlock(&isert_conn->conn_mutex); - wait_event(isert_conn->conn_wait, - (isert_check_state(isert_conn, ISER_CONN_DOWN))); + wait_for_completion(&isert_conn->conn_wait_comp_err); + + wait_for_completion(&isert_conn->conn_wait); +} + +static void isert_free_conn(struct iscsi_conn *conn) +{ + struct isert_conn *isert_conn = conn->context; isert_put_conn(isert_conn); } @@ -2758,6 +3251,7 @@ static struct iscsit_transport iser_target_transport = { .iscsit_setup_np = isert_setup_np, .iscsit_accept_np = isert_accept_np, .iscsit_free_np = isert_free_np, + .iscsit_wait_conn = isert_wait_conn, .iscsit_free_conn = isert_free_conn, .iscsit_get_login_rx = isert_get_login_rx, .iscsit_put_login_tx = isert_put_login_tx, @@ -2766,6 +3260,8 @@ static struct iscsit_transport iser_target_transport = { .iscsit_get_dataout = isert_get_dataout, .iscsit_queue_data_in = isert_put_datain, .iscsit_queue_status = isert_put_response, + .iscsit_aborted_task = isert_aborted_task, + .iscsit_get_sup_prot_ops = isert_get_sup_prot_ops, }; static int __init isert_init(void) @@ -2796,6 +3292,7 @@ destroy_rx_wq: static void __exit isert_exit(void) { + flush_scheduled_work(); destroy_workqueue(isert_comp_wq); destroy_workqueue(isert_rx_wq); iscsit_unregister_transport(&iser_target_transport); diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index 691f90ff2d8..04f51f7bf61 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -6,6 +6,7 @@ #define ISERT_RDMA_LISTEN_BACKLOG 10 #define ISCSI_ISER_SG_TABLESIZE 256 +#define ISER_FASTREG_LI_WRID 0xffffffffffffffffULL enum isert_desc_type { ISCSI_TX_CONTROL, @@ -45,14 +46,39 @@ struct iser_tx_desc { struct isert_cmd *isert_cmd; struct llist_node *comp_llnode_batch; struct llist_node comp_llnode; + bool llnode_active; struct ib_send_wr send_wr; } __packed; +enum isert_indicator { + ISERT_PROTECTED = 1 << 0, + ISERT_DATA_KEY_VALID = 1 << 1, + ISERT_PROT_KEY_VALID = 1 << 2, + ISERT_SIG_KEY_VALID = 1 << 3, +}; + +struct pi_context { + struct ib_mr *prot_mr; + struct ib_fast_reg_page_list *prot_frpl; + struct ib_mr *sig_mr; +}; + struct fast_reg_descriptor { - struct list_head list; - struct ib_mr *data_mr; - struct ib_fast_reg_page_list *data_frpl; - bool valid; + struct list_head list; + struct ib_mr *data_mr; + struct ib_fast_reg_page_list *data_frpl; + u8 ind; + struct pi_context *pi_ctx; +}; + +struct isert_data_buf { + struct scatterlist *sg; + int nents; + u32 sg_off; + u32 len; /* cur_rdma_length */ + u32 offset; + unsigned int dma_nents; + enum dma_data_direction dma_dir; }; struct isert_rdma_wr { @@ -61,12 +87,11 @@ struct isert_rdma_wr { enum iser_ib_op_code iser_ib_op; struct ib_sge *ib_sge; struct ib_sge s_ib_sge; - int num_sge; - struct scatterlist *sge; int send_wr_num; struct ib_send_wr *send_wr; struct ib_send_wr s_send_wr; - u32 cur_rdma_length; + struct isert_data_buf data; + struct isert_data_buf prot; struct fast_reg_descriptor *fr_desc; }; @@ -91,7 +116,6 @@ struct isert_device; struct isert_conn { enum iser_conn_state state; - bool logout_posted; int post_recv_buf_count; atomic_t post_send_buf_count; u32 responder_resources; @@ -116,17 +140,17 @@ struct isert_conn { struct isert_device *conn_device; struct work_struct conn_logout_work; struct mutex conn_mutex; - wait_queue_head_t conn_wait; - wait_queue_head_t conn_wait_comp_err; + struct completion conn_wait; + struct completion conn_wait_comp_err; struct kref conn_kref; - struct list_head conn_frwr_pool; - int conn_frwr_pool_size; - /* lock to protect frwr_pool */ + struct list_head conn_fr_pool; + int conn_fr_pool_size; + /* lock to protect fastreg pool */ spinlock_t conn_lock; #define ISERT_COMP_BATCH_COUNT 8 int conn_comp_batch; struct llist_head conn_comp_llist; - struct mutex conn_comp_mutex; + bool disconnect; }; #define ISERT_MAX_CQ 64 @@ -139,13 +163,12 @@ struct isert_cq_desc { }; struct isert_device { - int use_frwr; + int use_fastreg; + bool pi_capable; int cqs_used; int refcount; int cq_active_qps[ISERT_MAX_CQ]; struct ib_device *ib_device; - struct ib_pd *dev_pd; - struct ib_mr *dev_mr; struct ib_cq *dev_rx_cq[ISERT_MAX_CQ]; struct ib_cq *dev_tx_cq[ISERT_MAX_CQ]; struct isert_cq_desc *cq_desc; @@ -159,7 +182,7 @@ struct isert_device { }; struct isert_np { - wait_queue_head_t np_accept_wq; + struct semaphore np_sem; struct rdma_cm_id *np_cm_id; struct mutex np_accept_mutex; struct list_head np_accept_list; diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index a88631918e8..e3c2c5b4297 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -30,7 +30,7 @@ * SOFTWARE. */ -#define pr_fmt(fmt) PFX fmt +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/init.h> @@ -66,6 +66,8 @@ static unsigned int srp_sg_tablesize; static unsigned int cmd_sg_entries; static unsigned int indirect_sg_entries; static bool allow_ext_sg; +static bool prefer_fr; +static bool register_always; static int topspin_workarounds = 1; module_param(srp_sg_tablesize, uint, 0444); @@ -87,6 +89,14 @@ module_param(topspin_workarounds, int, 0444); MODULE_PARM_DESC(topspin_workarounds, "Enable workarounds for Topspin/Cisco SRP target bugs if != 0"); +module_param(prefer_fr, bool, 0444); +MODULE_PARM_DESC(prefer_fr, +"Whether to use fast registration if both FMR and fast registration are supported"); + +module_param(register_always, bool, 0444); +MODULE_PARM_DESC(register_always, + "Use memory registration even for contiguous memory regions"); + static struct kernel_param_ops srp_tmo_ops; static int srp_reconnect_delay = 10; @@ -288,28 +298,174 @@ static int srp_new_cm_id(struct srp_target_port *target) return 0; } +static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target) +{ + struct srp_device *dev = target->srp_host->srp_dev; + struct ib_fmr_pool_param fmr_param; + + memset(&fmr_param, 0, sizeof(fmr_param)); + fmr_param.pool_size = target->scsi_host->can_queue; + fmr_param.dirty_watermark = fmr_param.pool_size / 4; + fmr_param.cache = 1; + fmr_param.max_pages_per_fmr = dev->max_pages_per_mr; + fmr_param.page_shift = ilog2(dev->mr_page_size); + fmr_param.access = (IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_READ); + + return ib_create_fmr_pool(dev->pd, &fmr_param); +} + +/** + * srp_destroy_fr_pool() - free the resources owned by a pool + * @pool: Fast registration pool to be destroyed. + */ +static void srp_destroy_fr_pool(struct srp_fr_pool *pool) +{ + int i; + struct srp_fr_desc *d; + + if (!pool) + return; + + for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { + if (d->frpl) + ib_free_fast_reg_page_list(d->frpl); + if (d->mr) + ib_dereg_mr(d->mr); + } + kfree(pool); +} + +/** + * srp_create_fr_pool() - allocate and initialize a pool for fast registration + * @device: IB device to allocate fast registration descriptors for. + * @pd: Protection domain associated with the FR descriptors. + * @pool_size: Number of descriptors to allocate. + * @max_page_list_len: Maximum fast registration work request page list length. + */ +static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device, + struct ib_pd *pd, int pool_size, + int max_page_list_len) +{ + struct srp_fr_pool *pool; + struct srp_fr_desc *d; + struct ib_mr *mr; + struct ib_fast_reg_page_list *frpl; + int i, ret = -EINVAL; + + if (pool_size <= 0) + goto err; + ret = -ENOMEM; + pool = kzalloc(sizeof(struct srp_fr_pool) + + pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL); + if (!pool) + goto err; + pool->size = pool_size; + pool->max_page_list_len = max_page_list_len; + spin_lock_init(&pool->lock); + INIT_LIST_HEAD(&pool->free_list); + + for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { + mr = ib_alloc_fast_reg_mr(pd, max_page_list_len); + if (IS_ERR(mr)) { + ret = PTR_ERR(mr); + goto destroy_pool; + } + d->mr = mr; + frpl = ib_alloc_fast_reg_page_list(device, max_page_list_len); + if (IS_ERR(frpl)) { + ret = PTR_ERR(frpl); + goto destroy_pool; + } + d->frpl = frpl; + list_add_tail(&d->entry, &pool->free_list); + } + +out: + return pool; + +destroy_pool: + srp_destroy_fr_pool(pool); + +err: + pool = ERR_PTR(ret); + goto out; +} + +/** + * srp_fr_pool_get() - obtain a descriptor suitable for fast registration + * @pool: Pool to obtain descriptor from. + */ +static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool) +{ + struct srp_fr_desc *d = NULL; + unsigned long flags; + + spin_lock_irqsave(&pool->lock, flags); + if (!list_empty(&pool->free_list)) { + d = list_first_entry(&pool->free_list, typeof(*d), entry); + list_del(&d->entry); + } + spin_unlock_irqrestore(&pool->lock, flags); + + return d; +} + +/** + * srp_fr_pool_put() - put an FR descriptor back in the free list + * @pool: Pool the descriptor was allocated from. + * @desc: Pointer to an array of fast registration descriptor pointers. + * @n: Number of descriptors to put back. + * + * Note: The caller must already have queued an invalidation request for + * desc->mr->rkey before calling this function. + */ +static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc, + int n) +{ + unsigned long flags; + int i; + + spin_lock_irqsave(&pool->lock, flags); + for (i = 0; i < n; i++) + list_add(&desc[i]->entry, &pool->free_list); + spin_unlock_irqrestore(&pool->lock, flags); +} + +static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target) +{ + struct srp_device *dev = target->srp_host->srp_dev; + + return srp_create_fr_pool(dev->dev, dev->pd, + target->scsi_host->can_queue, + dev->max_pages_per_mr); +} + static int srp_create_target_ib(struct srp_target_port *target) { + struct srp_device *dev = target->srp_host->srp_dev; struct ib_qp_init_attr *init_attr; struct ib_cq *recv_cq, *send_cq; struct ib_qp *qp; + struct ib_fmr_pool *fmr_pool = NULL; + struct srp_fr_pool *fr_pool = NULL; + const int m = 1 + dev->use_fast_reg; int ret; init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL); if (!init_attr) return -ENOMEM; - recv_cq = ib_create_cq(target->srp_host->srp_dev->dev, - srp_recv_completion, NULL, target, + recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, target, target->queue_size, target->comp_vector); if (IS_ERR(recv_cq)) { ret = PTR_ERR(recv_cq); goto err; } - send_cq = ib_create_cq(target->srp_host->srp_dev->dev, - srp_send_completion, NULL, target, - target->queue_size, target->comp_vector); + send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, target, + m * target->queue_size, target->comp_vector); if (IS_ERR(send_cq)) { ret = PTR_ERR(send_cq); goto err_recv_cq; @@ -318,16 +474,16 @@ static int srp_create_target_ib(struct srp_target_port *target) ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP); init_attr->event_handler = srp_qp_event; - init_attr->cap.max_send_wr = target->queue_size; + init_attr->cap.max_send_wr = m * target->queue_size; init_attr->cap.max_recv_wr = target->queue_size; init_attr->cap.max_recv_sge = 1; init_attr->cap.max_send_sge = 1; - init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; + init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; init_attr->qp_type = IB_QPT_RC; init_attr->send_cq = send_cq; init_attr->recv_cq = recv_cq; - qp = ib_create_qp(target->srp_host->srp_dev->pd, init_attr); + qp = ib_create_qp(dev->pd, init_attr); if (IS_ERR(qp)) { ret = PTR_ERR(qp); goto err_send_cq; @@ -337,6 +493,30 @@ static int srp_create_target_ib(struct srp_target_port *target) if (ret) goto err_qp; + if (dev->use_fast_reg && dev->has_fr) { + fr_pool = srp_alloc_fr_pool(target); + if (IS_ERR(fr_pool)) { + ret = PTR_ERR(fr_pool); + shost_printk(KERN_WARNING, target->scsi_host, PFX + "FR pool allocation failed (%d)\n", ret); + goto err_qp; + } + if (target->fr_pool) + srp_destroy_fr_pool(target->fr_pool); + target->fr_pool = fr_pool; + } else if (!dev->use_fast_reg && dev->has_fmr) { + fmr_pool = srp_alloc_fmr_pool(target); + if (IS_ERR(fmr_pool)) { + ret = PTR_ERR(fmr_pool); + shost_printk(KERN_WARNING, target->scsi_host, PFX + "FMR pool allocation failed (%d)\n", ret); + goto err_qp; + } + if (target->fmr_pool) + ib_destroy_fmr_pool(target->fmr_pool); + target->fmr_pool = fmr_pool; + } + if (target->qp) ib_destroy_qp(target->qp); if (target->recv_cq) @@ -371,8 +551,16 @@ err: */ static void srp_free_target_ib(struct srp_target_port *target) { + struct srp_device *dev = target->srp_host->srp_dev; int i; + if (dev->use_fast_reg) { + if (target->fr_pool) + srp_destroy_fr_pool(target->fr_pool); + } else { + if (target->fmr_pool) + ib_destroy_fmr_pool(target->fmr_pool); + } ib_destroy_qp(target->qp); ib_destroy_cq(target->send_cq); ib_destroy_cq(target->recv_cq); @@ -411,6 +599,8 @@ static void srp_path_rec_completion(int status, static int srp_lookup_path(struct srp_target_port *target) { + int ret; + target->path.numb_path = 1; init_completion(&target->done); @@ -431,7 +621,9 @@ static int srp_lookup_path(struct srp_target_port *target) if (target->path_query_id < 0) return target->path_query_id; - wait_for_completion(&target->done); + ret = wait_for_completion_interruptible(&target->done); + if (ret < 0) + return ret; if (target->status < 0) shost_printk(KERN_WARNING, target->scsi_host, @@ -573,7 +765,8 @@ static void srp_disconnect_target(struct srp_target_port *target) static void srp_free_req_data(struct srp_target_port *target) { - struct ib_device *ibdev = target->srp_host->srp_dev->dev; + struct srp_device *dev = target->srp_host->srp_dev; + struct ib_device *ibdev = dev->dev; struct srp_request *req; int i; @@ -582,7 +775,10 @@ static void srp_free_req_data(struct srp_target_port *target) for (i = 0; i < target->req_ring_size; ++i) { req = &target->req_ring[i]; - kfree(req->fmr_list); + if (dev->use_fast_reg) + kfree(req->fr_list); + else + kfree(req->fmr_list); kfree(req->map_page); if (req->indirect_dma_addr) { ib_dma_unmap_single(ibdev, req->indirect_dma_addr, @@ -601,6 +797,7 @@ static int srp_alloc_req_data(struct srp_target_port *target) struct srp_device *srp_dev = target->srp_host->srp_dev; struct ib_device *ibdev = srp_dev->dev; struct srp_request *req; + void *mr_list; dma_addr_t dma_addr; int i, ret = -ENOMEM; @@ -613,12 +810,20 @@ static int srp_alloc_req_data(struct srp_target_port *target) for (i = 0; i < target->req_ring_size; ++i) { req = &target->req_ring[i]; - req->fmr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *), - GFP_KERNEL); - req->map_page = kmalloc(SRP_FMR_SIZE * sizeof(void *), - GFP_KERNEL); + mr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *), + GFP_KERNEL); + if (!mr_list) + goto out; + if (srp_dev->use_fast_reg) + req->fr_list = mr_list; + else + req->fmr_list = mr_list; + req->map_page = kmalloc(srp_dev->max_pages_per_mr * + sizeof(void *), GFP_KERNEL); + if (!req->map_page) + goto out; req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL); - if (!req->fmr_list || !req->map_page || !req->indirect_desc) + if (!req->indirect_desc) goto out; dma_addr = ib_dma_map_single(ibdev, req->indirect_desc, @@ -660,6 +865,7 @@ static void srp_remove_target(struct srp_target_port *target) srp_rport_get(target->rport); srp_remove_host(target->scsi_host); scsi_remove_host(target->scsi_host); + srp_stop_rport_timers(target->rport); srp_disconnect_target(target); ib_destroy_cm_id(target->cm_id); srp_free_target_ib(target); @@ -709,7 +915,9 @@ static int srp_connect_target(struct srp_target_port *target) ret = srp_send_req(target); if (ret) return ret; - wait_for_completion(&target->done); + ret = wait_for_completion_interruptible(&target->done); + if (ret < 0) + return ret; /* * The CM event handling code will set status to @@ -752,21 +960,56 @@ static int srp_connect_target(struct srp_target_port *target) } } +static int srp_inv_rkey(struct srp_target_port *target, u32 rkey) +{ + struct ib_send_wr *bad_wr; + struct ib_send_wr wr = { + .opcode = IB_WR_LOCAL_INV, + .wr_id = LOCAL_INV_WR_ID_MASK, + .next = NULL, + .num_sge = 0, + .send_flags = 0, + .ex.invalidate_rkey = rkey, + }; + + return ib_post_send(target->qp, &wr, &bad_wr); +} + static void srp_unmap_data(struct scsi_cmnd *scmnd, struct srp_target_port *target, struct srp_request *req) { - struct ib_device *ibdev = target->srp_host->srp_dev->dev; - struct ib_pool_fmr **pfmr; + struct srp_device *dev = target->srp_host->srp_dev; + struct ib_device *ibdev = dev->dev; + int i, res; if (!scsi_sglist(scmnd) || (scmnd->sc_data_direction != DMA_TO_DEVICE && scmnd->sc_data_direction != DMA_FROM_DEVICE)) return; - pfmr = req->fmr_list; - while (req->nfmr--) - ib_fmr_pool_unmap(*pfmr++); + if (dev->use_fast_reg) { + struct srp_fr_desc **pfr; + + for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) { + res = srp_inv_rkey(target, (*pfr)->mr->rkey); + if (res < 0) { + shost_printk(KERN_ERR, target->scsi_host, PFX + "Queueing INV WR for rkey %#x failed (%d)\n", + (*pfr)->mr->rkey, res); + queue_work(system_long_wq, + &target->tl_err_work); + } + } + if (req->nmdesc) + srp_fr_pool_put(target->fr_pool, req->fr_list, + req->nmdesc); + } else { + struct ib_pool_fmr **pfmr; + + for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++) + ib_fmr_pool_unmap(*pfmr); + } ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd), scmnd->sc_data_direction); @@ -776,6 +1019,7 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd, * srp_claim_req - Take ownership of the scmnd associated with a request. * @target: SRP target port. * @req: SRP request. + * @sdev: If not NULL, only take ownership for this SCSI device. * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take * ownership of @req->scmnd if it equals @scmnd. * @@ -784,16 +1028,17 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd, */ static struct scsi_cmnd *srp_claim_req(struct srp_target_port *target, struct srp_request *req, + struct scsi_device *sdev, struct scsi_cmnd *scmnd) { unsigned long flags; spin_lock_irqsave(&target->lock, flags); - if (!scmnd) { + if (req->scmnd && + (!sdev || req->scmnd->device == sdev) && + (!scmnd || req->scmnd == scmnd)) { scmnd = req->scmnd; req->scmnd = NULL; - } else if (req->scmnd == scmnd) { - req->scmnd = NULL; } else { scmnd = NULL; } @@ -804,6 +1049,10 @@ static struct scsi_cmnd *srp_claim_req(struct srp_target_port *target, /** * srp_free_req() - Unmap data and add request to the free request list. + * @target: SRP target port. + * @req: Request to be freed. + * @scmnd: SCSI command associated with @req. + * @req_lim_delta: Amount to be added to @target->req_lim. */ static void srp_free_req(struct srp_target_port *target, struct srp_request *req, struct scsi_cmnd *scmnd, @@ -820,9 +1069,10 @@ static void srp_free_req(struct srp_target_port *target, } static void srp_finish_req(struct srp_target_port *target, - struct srp_request *req, int result) + struct srp_request *req, struct scsi_device *sdev, + int result) { - struct scsi_cmnd *scmnd = srp_claim_req(target, req, NULL); + struct scsi_cmnd *scmnd = srp_claim_req(target, req, sdev, NULL); if (scmnd) { srp_free_req(target, req, scmnd, 0); @@ -834,11 +1084,20 @@ static void srp_finish_req(struct srp_target_port *target, static void srp_terminate_io(struct srp_rport *rport) { struct srp_target_port *target = rport->lld_data; + struct Scsi_Host *shost = target->scsi_host; + struct scsi_device *sdev; int i; + /* + * Invoking srp_terminate_io() while srp_queuecommand() is running + * is not safe. Hence the warning statement below. + */ + shost_for_each_device(sdev, shost) + WARN_ON_ONCE(sdev->request_queue->request_fn_active); + for (i = 0; i < target->req_ring_size; ++i) { struct srp_request *req = &target->req_ring[i]; - srp_finish_req(target, req, DID_TRANSPORT_FAILFAST << 16); + srp_finish_req(target, req, NULL, DID_TRANSPORT_FAILFAST << 16); } } @@ -863,21 +1122,19 @@ static int srp_rport_reconnect(struct srp_rport *rport) * callbacks will have finished before a new QP is allocated. */ ret = srp_new_cm_id(target); - /* - * Whether or not creating a new CM ID succeeded, create a new - * QP. This guarantees that all completion callback function - * invocations have finished before request resetting starts. - */ - if (ret == 0) - ret = srp_create_target_ib(target); - else - srp_create_target_ib(target); for (i = 0; i < target->req_ring_size; ++i) { struct srp_request *req = &target->req_ring[i]; - srp_finish_req(target, req, DID_RESET << 16); + srp_finish_req(target, req, NULL, DID_RESET << 16); } + /* + * Whether or not creating a new CM ID succeeded, create a new + * QP. This guarantees that all callback functions for the old QP have + * finished before any send requests are posted on the new QP. + */ + ret += srp_create_target_ib(target); + INIT_LIST_HEAD(&target->free_tx); for (i = 0; i < target->queue_size; ++i) list_add(&target->tx_ring[i]->list, &target->free_tx); @@ -909,33 +1166,87 @@ static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr, static int srp_map_finish_fmr(struct srp_map_state *state, struct srp_target_port *target) { - struct srp_device *dev = target->srp_host->srp_dev; struct ib_pool_fmr *fmr; u64 io_addr = 0; - if (!state->npages) - return 0; - - if (state->npages == 1) { - srp_map_desc(state, state->base_dma_addr, state->fmr_len, - target->rkey); - state->npages = state->fmr_len = 0; - return 0; - } - - fmr = ib_fmr_pool_map_phys(dev->fmr_pool, state->pages, + fmr = ib_fmr_pool_map_phys(target->fmr_pool, state->pages, state->npages, io_addr); if (IS_ERR(fmr)) return PTR_ERR(fmr); *state->next_fmr++ = fmr; - state->nfmr++; + state->nmdesc++; + + srp_map_desc(state, 0, state->dma_len, fmr->fmr->rkey); - srp_map_desc(state, 0, state->fmr_len, fmr->fmr->rkey); - state->npages = state->fmr_len = 0; return 0; } +static int srp_map_finish_fr(struct srp_map_state *state, + struct srp_target_port *target) +{ + struct srp_device *dev = target->srp_host->srp_dev; + struct ib_send_wr *bad_wr; + struct ib_send_wr wr; + struct srp_fr_desc *desc; + u32 rkey; + + desc = srp_fr_pool_get(target->fr_pool); + if (!desc) + return -ENOMEM; + + rkey = ib_inc_rkey(desc->mr->rkey); + ib_update_fast_reg_key(desc->mr, rkey); + + memcpy(desc->frpl->page_list, state->pages, + sizeof(state->pages[0]) * state->npages); + + memset(&wr, 0, sizeof(wr)); + wr.opcode = IB_WR_FAST_REG_MR; + wr.wr_id = FAST_REG_WR_ID_MASK; + wr.wr.fast_reg.iova_start = state->base_dma_addr; + wr.wr.fast_reg.page_list = desc->frpl; + wr.wr.fast_reg.page_list_len = state->npages; + wr.wr.fast_reg.page_shift = ilog2(dev->mr_page_size); + wr.wr.fast_reg.length = state->dma_len; + wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_WRITE); + wr.wr.fast_reg.rkey = desc->mr->lkey; + + *state->next_fr++ = desc; + state->nmdesc++; + + srp_map_desc(state, state->base_dma_addr, state->dma_len, + desc->mr->rkey); + + return ib_post_send(target->qp, &wr, &bad_wr); +} + +static int srp_finish_mapping(struct srp_map_state *state, + struct srp_target_port *target) +{ + int ret = 0; + + if (state->npages == 0) + return 0; + + if (state->npages == 1 && !register_always) + srp_map_desc(state, state->base_dma_addr, state->dma_len, + target->rkey); + else + ret = target->srp_host->srp_dev->use_fast_reg ? + srp_map_finish_fr(state, target) : + srp_map_finish_fmr(state, target); + + if (ret == 0) { + state->npages = 0; + state->dma_len = 0; + } + + return ret; +} + static void srp_map_update_start(struct srp_map_state *state, struct scatterlist *sg, int sg_index, dma_addr_t dma_addr) @@ -948,7 +1259,7 @@ static void srp_map_update_start(struct srp_map_state *state, static int srp_map_sg_entry(struct srp_map_state *state, struct srp_target_port *target, struct scatterlist *sg, int sg_index, - int use_fmr) + bool use_mr) { struct srp_device *dev = target->srp_host->srp_dev; struct ib_device *ibdev = dev->dev; @@ -960,23 +1271,25 @@ static int srp_map_sg_entry(struct srp_map_state *state, if (!dma_len) return 0; - if (use_fmr == SRP_MAP_NO_FMR) { - /* Once we're in direct map mode for a request, we don't - * go back to FMR mode, so no need to update anything + if (!use_mr) { + /* + * Once we're in direct map mode for a request, we don't + * go back to FMR or FR mode, so no need to update anything * other than the descriptor. */ srp_map_desc(state, dma_addr, dma_len, target->rkey); return 0; } - /* If we start at an offset into the FMR page, don't merge into - * the current FMR. Finish it out, and use the kernel's MR for this - * sg entry. This is to avoid potential bugs on some SRP targets - * that were never quite defined, but went away when the initiator - * avoided using FMR on such page fragments. + /* + * Since not all RDMA HW drivers support non-zero page offsets for + * FMR, if we start at an offset into a page, don't merge into the + * current FMR mapping. Finish it out, and use the kernel's MR for + * this sg entry. */ - if (dma_addr & ~dev->fmr_page_mask || dma_len > dev->fmr_max_size) { - ret = srp_map_finish_fmr(state, target); + if ((!dev->use_fast_reg && dma_addr & ~dev->mr_page_mask) || + dma_len > dev->mr_max_size) { + ret = srp_finish_mapping(state, target); if (ret) return ret; @@ -985,52 +1298,106 @@ static int srp_map_sg_entry(struct srp_map_state *state, return 0; } - /* If this is the first sg to go into the FMR, save our position. - * We need to know the first unmapped entry, its index, and the - * first unmapped address within that entry to be able to restart - * mapping after an error. + /* + * If this is the first sg that will be mapped via FMR or via FR, save + * our position. We need to know the first unmapped entry, its index, + * and the first unmapped address within that entry to be able to + * restart mapping after an error. */ if (!state->unmapped_sg) srp_map_update_start(state, sg, sg_index, dma_addr); while (dma_len) { - if (state->npages == SRP_FMR_SIZE) { - ret = srp_map_finish_fmr(state, target); + unsigned offset = dma_addr & ~dev->mr_page_mask; + if (state->npages == dev->max_pages_per_mr || offset != 0) { + ret = srp_finish_mapping(state, target); if (ret) return ret; srp_map_update_start(state, sg, sg_index, dma_addr); } - len = min_t(unsigned int, dma_len, dev->fmr_page_size); + len = min_t(unsigned int, dma_len, dev->mr_page_size - offset); if (!state->npages) state->base_dma_addr = dma_addr; - state->pages[state->npages++] = dma_addr; - state->fmr_len += len; + state->pages[state->npages++] = dma_addr & dev->mr_page_mask; + state->dma_len += len; dma_addr += len; dma_len -= len; } - /* If the last entry of the FMR wasn't a full page, then we need to + /* + * If the last entry of the MR wasn't a full page, then we need to * close it out and start a new one -- we can only merge at page * boundries. */ ret = 0; - if (len != dev->fmr_page_size) { - ret = srp_map_finish_fmr(state, target); + if (len != dev->mr_page_size) { + ret = srp_finish_mapping(state, target); if (!ret) srp_map_update_start(state, NULL, 0, 0); } return ret; } +static int srp_map_sg(struct srp_map_state *state, + struct srp_target_port *target, struct srp_request *req, + struct scatterlist *scat, int count) +{ + struct srp_device *dev = target->srp_host->srp_dev; + struct ib_device *ibdev = dev->dev; + struct scatterlist *sg; + int i; + bool use_mr; + + state->desc = req->indirect_desc; + state->pages = req->map_page; + if (dev->use_fast_reg) { + state->next_fr = req->fr_list; + use_mr = !!target->fr_pool; + } else { + state->next_fmr = req->fmr_list; + use_mr = !!target->fmr_pool; + } + + for_each_sg(scat, sg, count, i) { + if (srp_map_sg_entry(state, target, sg, i, use_mr)) { + /* + * Memory registration failed, so backtrack to the + * first unmapped entry and continue on without using + * memory registration. + */ + dma_addr_t dma_addr; + unsigned int dma_len; + +backtrack: + sg = state->unmapped_sg; + i = state->unmapped_index; + + dma_addr = ib_sg_dma_address(ibdev, sg); + dma_len = ib_sg_dma_len(ibdev, sg); + dma_len -= (state->unmapped_addr - dma_addr); + dma_addr = state->unmapped_addr; + use_mr = false; + srp_map_desc(state, dma_addr, dma_len, target->rkey); + } + } + + if (use_mr && srp_finish_mapping(state, target)) + goto backtrack; + + req->nmdesc = state->nmdesc; + + return 0; +} + static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target, struct srp_request *req) { - struct scatterlist *scat, *sg; + struct scatterlist *scat; struct srp_cmd *cmd = req->cmd->buf; - int i, len, nents, count, use_fmr; + int len, nents, count; struct srp_device *dev; struct ib_device *ibdev; struct srp_map_state state; @@ -1062,7 +1429,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target, fmt = SRP_DATA_DESC_DIRECT; len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf); - if (count == 1) { + if (count == 1 && !register_always) { /* * The midlayer only generated a single gather/scatter * entry, or DMA mapping coalesced everything to a @@ -1075,13 +1442,13 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target, buf->key = cpu_to_be32(target->rkey); buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat)); - req->nfmr = 0; + req->nmdesc = 0; goto map_complete; } - /* We have more than one scatter/gather entry, so build our indirect - * descriptor table, trying to merge as many entries with FMR as we - * can. + /* + * We have more than one scatter/gather entry, so build our indirect + * descriptor table, trying to merge as many entries as we can. */ indirect_hdr = (void *) cmd->add_data; @@ -1089,35 +1456,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target, target->indirect_size, DMA_TO_DEVICE); memset(&state, 0, sizeof(state)); - state.desc = req->indirect_desc; - state.pages = req->map_page; - state.next_fmr = req->fmr_list; - - use_fmr = dev->fmr_pool ? SRP_MAP_ALLOW_FMR : SRP_MAP_NO_FMR; - - for_each_sg(scat, sg, count, i) { - if (srp_map_sg_entry(&state, target, sg, i, use_fmr)) { - /* FMR mapping failed, so backtrack to the first - * unmapped entry and continue on without using FMR. - */ - dma_addr_t dma_addr; - unsigned int dma_len; - -backtrack: - sg = state.unmapped_sg; - i = state.unmapped_index; - - dma_addr = ib_sg_dma_address(ibdev, sg); - dma_len = ib_sg_dma_len(ibdev, sg); - dma_len -= (state.unmapped_addr - dma_addr); - dma_addr = state.unmapped_addr; - use_fmr = SRP_MAP_NO_FMR; - srp_map_desc(&state, dma_addr, dma_len, target->rkey); - } - } - - if (use_fmr == SRP_MAP_ALLOW_FMR && srp_map_finish_fmr(&state, target)) - goto backtrack; + srp_map_sg(&state, target, req, scat, count); /* We've mapped the request, now pull as much of the indirect * descriptor table as we can into the command buffer. If this @@ -1125,9 +1464,9 @@ backtrack: * guaranteed to fit into the command, as the SCSI layer won't * give us more S/G entries than we allow. */ - req->nfmr = state.nfmr; if (state.ndesc == 1) { - /* FMR mapping was able to collapse this to one entry, + /* + * Memory registration collapsed the sg-list into one entry, * so use a direct descriptor. */ struct srp_direct_buf *buf = (void *) cmd->add_data; @@ -1283,7 +1622,7 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp) complete(&target->tsk_mgmt_done); } else { req = &target->req_ring[rsp->tag]; - scmnd = srp_claim_req(target, req, NULL); + scmnd = srp_claim_req(target, req, NULL, NULL); if (!scmnd) { shost_printk(KERN_ERR, target->scsi_host, "Null scmnd for RSP w/tag %016llx\n", @@ -1436,6 +1775,7 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc) /** * srp_tl_err_work() - handle a transport layer error + * @work: Work structure embedded in an SRP target port. * * Note: This function may get invoked before the rport has been created, * hence the target->rport test. @@ -1449,14 +1789,24 @@ static void srp_tl_err_work(struct work_struct *work) srp_start_tl_fail_timers(target->rport); } -static void srp_handle_qp_err(enum ib_wc_status wc_status, bool send_err, - struct srp_target_port *target) +static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status, + bool send_err, struct srp_target_port *target) { if (target->connected && !target->qp_in_error) { - shost_printk(KERN_ERR, target->scsi_host, - PFX "failed %s status %d\n", - send_err ? "send" : "receive", - wc_status); + if (wr_id & LOCAL_INV_WR_ID_MASK) { + shost_printk(KERN_ERR, target->scsi_host, PFX + "LOCAL_INV failed with status %d\n", + wc_status); + } else if (wr_id & FAST_REG_WR_ID_MASK) { + shost_printk(KERN_ERR, target->scsi_host, PFX + "FAST_REG_MR failed status %d\n", + wc_status); + } else { + shost_printk(KERN_ERR, target->scsi_host, + PFX "failed %s status %d for iu %p\n", + send_err ? "send" : "receive", + wc_status, (void *)(uintptr_t)wr_id); + } queue_work(system_long_wq, &target->tl_err_work); } target->qp_in_error = true; @@ -1472,7 +1822,7 @@ static void srp_recv_completion(struct ib_cq *cq, void *target_ptr) if (likely(wc.status == IB_WC_SUCCESS)) { srp_handle_recv(target, &wc); } else { - srp_handle_qp_err(wc.status, false, target); + srp_handle_qp_err(wc.wr_id, wc.status, false, target); } } } @@ -1488,7 +1838,7 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr) iu = (struct srp_iu *) (uintptr_t) wc.wr_id; list_add(&iu->list, &target->free_tx); } else { - srp_handle_qp_err(wc.status, true, target); + srp_handle_qp_err(wc.wr_id, wc.status, true, target); } } } @@ -1502,7 +1852,7 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) struct srp_cmd *cmd; struct ib_device *dev; unsigned long flags; - int len, result; + int len, ret; const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler; /* @@ -1514,12 +1864,9 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) if (in_scsi_eh) mutex_lock(&rport->mutex); - result = srp_chkready(target->rport); - if (unlikely(result)) { - scmnd->result = result; - scmnd->scsi_done(scmnd); - goto unlock_rport; - } + scmnd->result = srp_chkready(target->rport); + if (unlikely(scmnd->result)) + goto err; spin_lock_irqsave(&target->lock, flags); iu = __srp_get_tx_iu(target, SRP_IU_CMD); @@ -1534,7 +1881,6 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len, DMA_TO_DEVICE); - scmnd->result = 0; scmnd->host_scribble = (void *) req; cmd = iu->buf; @@ -1551,7 +1897,15 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) len = srp_map_data(scmnd, target, req); if (len < 0) { shost_printk(KERN_ERR, target->scsi_host, - PFX "Failed to map data\n"); + PFX "Failed to map data (%d)\n", len); + /* + * If we ran out of memory descriptors (-ENOMEM) because an + * application is queuing many requests with more than + * max_pages_per_mr sg-list elements, tell the SCSI mid-layer + * to reduce queue depth temporarily. + */ + scmnd->result = len == -ENOMEM ? + DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16; goto err_iu; } @@ -1563,11 +1917,13 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) goto err_unmap; } + ret = 0; + unlock_rport: if (in_scsi_eh) mutex_unlock(&rport->mutex); - return 0; + return ret; err_unmap: srp_unmap_data(scmnd, target, req); @@ -1575,16 +1931,27 @@ err_unmap: err_iu: srp_put_tx_iu(target, iu, SRP_IU_CMD); + /* + * Avoid that the loops that iterate over the request ring can + * encounter a dangling SCSI command pointer. + */ + req->scmnd = NULL; + spin_lock_irqsave(&target->lock, flags); list_add(&req->list, &target->free_reqs); err_unlock: spin_unlock_irqrestore(&target->lock, flags); - if (in_scsi_eh) - mutex_unlock(&rport->mutex); +err: + if (scmnd->result) { + scmnd->scsi_done(scmnd); + ret = 0; + } else { + ret = SCSI_MLQUEUE_HOST_BUSY; + } - return SCSI_MLQUEUE_HOST_BUSY; + goto unlock_rport; } /* @@ -1803,8 +2170,10 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id, shost_printk(KERN_WARNING, shost, PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n"); else - shost_printk(KERN_WARNING, shost, - PFX "SRP LOGIN REJECTED, reason 0x%08x\n", reason); + shost_printk(KERN_WARNING, shost, PFX + "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n", + target->path.sgid.raw, + target->orig_dgid, reason); } else shost_printk(KERN_WARNING, shost, " REJ reason: IB_CM_REJ_CONSUMER_DEFINED," @@ -1862,6 +2231,7 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) case IB_CM_TIMEWAIT_EXIT: shost_printk(KERN_ERR, target->scsi_host, PFX "connection closed\n"); + comp = 1; target->status = 0; break; @@ -1998,7 +2368,7 @@ static int srp_abort(struct scsi_cmnd *scmnd) shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); - if (!req || !srp_claim_req(target, req, scmnd)) + if (!req || !srp_claim_req(target, req, NULL, scmnd)) return SUCCESS; if (srp_send_tsk_mgmt(target, req->index, scmnd->device->lun, SRP_TSK_ABORT_TASK) == 0) @@ -2029,8 +2399,7 @@ static int srp_reset_device(struct scsi_cmnd *scmnd) for (i = 0; i < target->req_ring_size; ++i) { struct srp_request *req = &target->req_ring[i]; - if (req->scmnd && req->scmnd->device == scmnd->device) - srp_finish_req(target, req, DID_RESET << 16); + srp_finish_req(target, req, scmnd->device, DID_RESET << 16); } return SUCCESS; @@ -2289,6 +2658,8 @@ static struct class srp_class = { /** * srp_conn_unique() - check whether the connection to a target is unique + * @host: SRP host. + * @target: SRP target port. */ static bool srp_conn_unique(struct srp_host *host, struct srp_target_port *target) @@ -2584,7 +2955,8 @@ static ssize_t srp_create_target(struct device *dev, container_of(dev, struct srp_host, dev); struct Scsi_Host *target_host; struct srp_target_port *target; - struct ib_device *ibdev = host->srp_dev->dev; + struct srp_device *srp_dev = host->srp_dev; + struct ib_device *ibdev = srp_dev->dev; int ret; target_host = scsi_host_alloc(&srp_template, @@ -2611,6 +2983,8 @@ static ssize_t srp_create_target(struct device *dev, target->tl_retry_count = 7; target->queue_size = SRP_DEFAULT_QUEUE_SIZE; + mutex_lock(&host->add_target_mutex); + ret = srp_parse_options(buf, target); if (ret) goto err; @@ -2627,9 +3001,9 @@ static ssize_t srp_create_target(struct device *dev, goto err; } - if (!host->srp_dev->fmr_pool && !target->allow_ext_sg && - target->cmd_sg_cnt < target->sg_tablesize) { - pr_warn("No FMR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n"); + if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg && + target->cmd_sg_cnt < target->sg_tablesize) { + pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n"); target->sg_tablesize = target->cmd_sg_cnt; } @@ -2648,16 +3022,9 @@ static ssize_t srp_create_target(struct device *dev, if (ret) goto err_free_mem; - ib_query_gid(ibdev, host->port, 0, &target->path.sgid); - - shost_printk(KERN_DEBUG, target->scsi_host, PFX - "new target: id_ext %016llx ioc_guid %016llx pkey %04x " - "service_id %016llx dgid %pI6\n", - (unsigned long long) be64_to_cpu(target->id_ext), - (unsigned long long) be64_to_cpu(target->ioc_guid), - be16_to_cpu(target->path.pkey), - (unsigned long long) be64_to_cpu(target->service_id), - target->path.dgid.raw); + ret = ib_query_gid(ibdev, host->port, 0, &target->path.sgid); + if (ret) + goto err_free_mem; ret = srp_create_target_ib(target); if (ret) @@ -2678,7 +3045,19 @@ static ssize_t srp_create_target(struct device *dev, if (ret) goto err_disconnect; - return count; + shost_printk(KERN_DEBUG, target->scsi_host, PFX + "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n", + be64_to_cpu(target->id_ext), + be64_to_cpu(target->ioc_guid), + be16_to_cpu(target->path.pkey), + be64_to_cpu(target->service_id), + target->path.sgid.raw, target->path.dgid.raw); + + ret = count; + +out: + mutex_unlock(&host->add_target_mutex); + return ret; err_disconnect: srp_disconnect_target(target); @@ -2694,8 +3073,7 @@ err_free_mem: err: scsi_host_put(target_host); - - return ret; + goto out; } static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target); @@ -2731,6 +3109,7 @@ static struct srp_host *srp_add_port(struct srp_device *device, u8 port) INIT_LIST_HEAD(&host->target_list); spin_lock_init(&host->target_lock); init_completion(&host->released); + mutex_init(&host->add_target_mutex); host->srp_dev = device; host->port = port; @@ -2762,9 +3141,9 @@ static void srp_add_one(struct ib_device *device) { struct srp_device *srp_dev; struct ib_device_attr *dev_attr; - struct ib_fmr_pool_param fmr_param; struct srp_host *host; - int max_pages_per_fmr, fmr_page_shift, s, e, p; + int mr_page_shift, s, e, p; + u64 max_pages_per_mr; dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL); if (!dev_attr) @@ -2779,15 +3158,39 @@ static void srp_add_one(struct ib_device *device) if (!srp_dev) goto free_attr; + srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr && + device->map_phys_fmr && device->unmap_fmr); + srp_dev->has_fr = (dev_attr->device_cap_flags & + IB_DEVICE_MEM_MGT_EXTENSIONS); + if (!srp_dev->has_fmr && !srp_dev->has_fr) + dev_warn(&device->dev, "neither FMR nor FR is supported\n"); + + srp_dev->use_fast_reg = (srp_dev->has_fr && + (!srp_dev->has_fmr || prefer_fr)); + /* * Use the smallest page size supported by the HCA, down to a * minimum of 4096 bytes. We're unlikely to build large sglists * out of smaller entries. */ - fmr_page_shift = max(12, ffs(dev_attr->page_size_cap) - 1); - srp_dev->fmr_page_size = 1 << fmr_page_shift; - srp_dev->fmr_page_mask = ~((u64) srp_dev->fmr_page_size - 1); - srp_dev->fmr_max_size = srp_dev->fmr_page_size * SRP_FMR_SIZE; + mr_page_shift = max(12, ffs(dev_attr->page_size_cap) - 1); + srp_dev->mr_page_size = 1 << mr_page_shift; + srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1); + max_pages_per_mr = dev_attr->max_mr_size; + do_div(max_pages_per_mr, srp_dev->mr_page_size); + srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR, + max_pages_per_mr); + if (srp_dev->use_fast_reg) { + srp_dev->max_pages_per_mr = + min_t(u32, srp_dev->max_pages_per_mr, + dev_attr->max_fast_reg_page_list_len); + } + srp_dev->mr_max_size = srp_dev->mr_page_size * + srp_dev->max_pages_per_mr; + pr_debug("%s: mr_page_shift = %d, dev_attr->max_mr_size = %#llx, dev_attr->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n", + device->name, mr_page_shift, dev_attr->max_mr_size, + dev_attr->max_fast_reg_page_list_len, + srp_dev->max_pages_per_mr, srp_dev->mr_max_size); INIT_LIST_HEAD(&srp_dev->dev_list); @@ -2803,27 +3206,6 @@ static void srp_add_one(struct ib_device *device) if (IS_ERR(srp_dev->mr)) goto err_pd; - for (max_pages_per_fmr = SRP_FMR_SIZE; - max_pages_per_fmr >= SRP_FMR_MIN_SIZE; - max_pages_per_fmr /= 2, srp_dev->fmr_max_size /= 2) { - memset(&fmr_param, 0, sizeof fmr_param); - fmr_param.pool_size = SRP_FMR_POOL_SIZE; - fmr_param.dirty_watermark = SRP_FMR_DIRTY_SIZE; - fmr_param.cache = 1; - fmr_param.max_pages_per_fmr = max_pages_per_fmr; - fmr_param.page_shift = fmr_page_shift; - fmr_param.access = (IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_REMOTE_READ); - - srp_dev->fmr_pool = ib_create_fmr_pool(srp_dev->pd, &fmr_param); - if (!IS_ERR(srp_dev->fmr_pool)) - break; - } - - if (IS_ERR(srp_dev->fmr_pool)) - srp_dev->fmr_pool = NULL; - if (device->node_type == RDMA_NODE_IB_SWITCH) { s = 0; e = 0; @@ -2886,8 +3268,6 @@ static void srp_remove_one(struct ib_device *device) kfree(host); } - if (srp_dev->fmr_pool) - ib_destroy_fmr_pool(srp_dev->fmr_pool); ib_dereg_mr(srp_dev->mr); ib_dealloc_pd(srp_dev->pd); diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index 575681063f3..e46ecb15aa0 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -66,13 +66,10 @@ enum { SRP_TAG_NO_REQ = ~0U, SRP_TAG_TSK_MGMT = 1U << 31, - SRP_FMR_SIZE = 512, - SRP_FMR_MIN_SIZE = 128, - SRP_FMR_POOL_SIZE = 1024, - SRP_FMR_DIRTY_SIZE = SRP_FMR_POOL_SIZE / 4, + SRP_MAX_PAGES_PER_MR = 512, - SRP_MAP_ALLOW_FMR = 0, - SRP_MAP_NO_FMR = 1, + LOCAL_INV_WR_ID_MASK = 1, + FAST_REG_WR_ID_MASK = 2, }; enum srp_target_state { @@ -86,15 +83,24 @@ enum srp_iu_type { SRP_IU_RSP, }; +/* + * @mr_page_mask: HCA memory registration page mask. + * @mr_page_size: HCA memory registration page size. + * @mr_max_size: Maximum size in bytes of a single FMR / FR registration + * request. + */ struct srp_device { struct list_head dev_list; struct ib_device *dev; struct ib_pd *pd; struct ib_mr *mr; - struct ib_fmr_pool *fmr_pool; - u64 fmr_page_mask; - int fmr_page_size; - int fmr_max_size; + u64 mr_page_mask; + int mr_page_size; + int mr_max_size; + int max_pages_per_mr; + bool has_fmr; + bool has_fr; + bool use_fast_reg; }; struct srp_host { @@ -105,17 +111,21 @@ struct srp_host { spinlock_t target_lock; struct completion released; struct list_head list; + struct mutex add_target_mutex; }; struct srp_request { struct list_head list; struct scsi_cmnd *scmnd; struct srp_iu *cmd; - struct ib_pool_fmr **fmr_list; + union { + struct ib_pool_fmr **fmr_list; + struct srp_fr_desc **fr_list; + }; u64 *map_page; struct srp_direct_buf *indirect_desc; dma_addr_t indirect_dma_addr; - short nfmr; + short nmdesc; short index; }; @@ -130,6 +140,10 @@ struct srp_target_port { struct ib_cq *send_cq ____cacheline_aligned_in_smp; struct ib_cq *recv_cq; struct ib_qp *qp; + union { + struct ib_fmr_pool *fmr_pool; + struct srp_fr_pool *fr_pool; + }; u32 lkey; u32 rkey; enum srp_target_state state; @@ -196,15 +210,66 @@ struct srp_iu { enum dma_data_direction direction; }; +/** + * struct srp_fr_desc - fast registration work request arguments + * @entry: Entry in srp_fr_pool.free_list. + * @mr: Memory region. + * @frpl: Fast registration page list. + */ +struct srp_fr_desc { + struct list_head entry; + struct ib_mr *mr; + struct ib_fast_reg_page_list *frpl; +}; + +/** + * struct srp_fr_pool - pool of fast registration descriptors + * + * An entry is available for allocation if and only if it occurs in @free_list. + * + * @size: Number of descriptors in this pool. + * @max_page_list_len: Maximum fast registration work request page list length. + * @lock: Protects free_list. + * @free_list: List of free descriptors. + * @desc: Fast registration descriptor pool. + */ +struct srp_fr_pool { + int size; + int max_page_list_len; + spinlock_t lock; + struct list_head free_list; + struct srp_fr_desc desc[0]; +}; + +/** + * struct srp_map_state - per-request DMA memory mapping state + * @desc: Pointer to the element of the SRP buffer descriptor array + * that is being filled in. + * @pages: Array with DMA addresses of pages being considered for + * memory registration. + * @base_dma_addr: DMA address of the first page that has not yet been mapped. + * @dma_len: Number of bytes that will be registered with the next + * FMR or FR memory registration call. + * @total_len: Total number of bytes in the sg-list being mapped. + * @npages: Number of page addresses in the pages[] array. + * @nmdesc: Number of FMR or FR memory descriptors used for mapping. + * @ndesc: Number of SRP buffer descriptors that have been filled in. + * @unmapped_sg: First element of the sg-list that is mapped via FMR or FR. + * @unmapped_index: Index of the first element mapped via FMR or FR. + * @unmapped_addr: DMA address of the first element mapped via FMR or FR. + */ struct srp_map_state { - struct ib_pool_fmr **next_fmr; + union { + struct ib_pool_fmr **next_fmr; + struct srp_fr_desc **next_fr; + }; struct srp_direct_buf *desc; u64 *pages; dma_addr_t base_dma_addr; - u32 fmr_len; + u32 dma_len; u32 total_len; unsigned int npages; - unsigned int nfmr; + unsigned int nmdesc; unsigned int ndesc; struct scatterlist *unmapped_sg; int unmapped_index; diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 520a7e5a490..fe09f2788b1 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1078,6 +1078,7 @@ static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch, static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch, struct srpt_send_ioctx *ioctx) { + struct ib_device *dev = ch->sport->sdev->device; struct se_cmd *cmd; struct scatterlist *sg, *sg_orig; int sg_cnt; @@ -1124,7 +1125,7 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch, db = ioctx->rbufs; tsize = cmd->data_length; - dma_len = sg_dma_len(&sg[0]); + dma_len = ib_sg_dma_len(dev, &sg[0]); riu = ioctx->rdma_ius; /* @@ -1155,7 +1156,8 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch, ++j; if (j < count) { sg = sg_next(sg); - dma_len = sg_dma_len(sg); + dma_len = ib_sg_dma_len( + dev, sg); } } } else { @@ -1192,8 +1194,8 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch, tsize = cmd->data_length; riu = ioctx->rdma_ius; sg = sg_orig; - dma_len = sg_dma_len(&sg[0]); - dma_addr = sg_dma_address(&sg[0]); + dma_len = ib_sg_dma_len(dev, &sg[0]); + dma_addr = ib_sg_dma_address(dev, &sg[0]); /* this second loop is really mapped sg_addres to rdma_iu->ib_sge */ for (i = 0, j = 0; @@ -1216,8 +1218,10 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch, ++j; if (j < count) { sg = sg_next(sg); - dma_len = sg_dma_len(sg); - dma_addr = sg_dma_address(sg); + dma_len = ib_sg_dma_len( + dev, sg); + dma_addr = ib_sg_dma_address( + dev, sg); } } } else { @@ -2580,7 +2584,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id, goto destroy_ib; } - ch->sess = transport_init_session(); + ch->sess = transport_init_session(TARGET_PROT_NORMAL); if (IS_ERR(ch->sess)) { rej->reason = __constant_cpu_to_be32( SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); @@ -3081,6 +3085,14 @@ static void srpt_queue_tm_rsp(struct se_cmd *cmd) srpt_queue_response(cmd); } +static void srpt_aborted_task(struct se_cmd *cmd) +{ + struct srpt_send_ioctx *ioctx = container_of(cmd, + struct srpt_send_ioctx, cmd); + + srpt_unmap_sg_to_ib_sge(ioctx->ch, ioctx); +} + static int srpt_queue_status(struct se_cmd *cmd) { struct srpt_send_ioctx *ioctx; @@ -3666,9 +3678,9 @@ static ssize_t srpt_tpg_attrib_store_srp_max_rdma_size( unsigned long val; int ret; - ret = strict_strtoul(page, 0, &val); + ret = kstrtoul(page, 0, &val); if (ret < 0) { - pr_err("strict_strtoul() failed with ret: %d\n", ret); + pr_err("kstrtoul() failed with ret: %d\n", ret); return -EINVAL; } if (val > MAX_SRPT_RDMA_SIZE) { @@ -3706,9 +3718,9 @@ static ssize_t srpt_tpg_attrib_store_srp_max_rsp_size( unsigned long val; int ret; - ret = strict_strtoul(page, 0, &val); + ret = kstrtoul(page, 0, &val); if (ret < 0) { - pr_err("strict_strtoul() failed with ret: %d\n", ret); + pr_err("kstrtoul() failed with ret: %d\n", ret); return -EINVAL; } if (val > MAX_SRPT_RSP_SIZE) { @@ -3746,9 +3758,9 @@ static ssize_t srpt_tpg_attrib_store_srp_sq_size( unsigned long val; int ret; - ret = strict_strtoul(page, 0, &val); + ret = kstrtoul(page, 0, &val); if (ret < 0) { - pr_err("strict_strtoul() failed with ret: %d\n", ret); + pr_err("kstrtoul() failed with ret: %d\n", ret); return -EINVAL; } if (val > MAX_SRPT_SRQ_SIZE) { @@ -3793,7 +3805,7 @@ static ssize_t srpt_tpg_store_enable( unsigned long tmp; int ret; - ret = strict_strtoul(page, 0, &tmp); + ret = kstrtoul(page, 0, &tmp); if (ret < 0) { printk(KERN_ERR "Unable to extract srpt_tpg_store_enable\n"); return -EINVAL; @@ -3928,6 +3940,7 @@ static struct target_core_fabric_ops srpt_template = { .queue_data_in = srpt_queue_data_in, .queue_status = srpt_queue_status, .queue_tm_rsp = srpt_queue_tm_rsp, + .aborted_task = srpt_aborted_task, /* * Setup function pointers for generic logic in * target_core_fabric_configfs.c |
