diff options
Diffstat (limited to 'drivers/infiniband/ulp/srp/ib_srp.c')
| -rw-r--r-- | drivers/infiniband/ulp/srp/ib_srp.c | 3101 |
1 files changed, 2381 insertions, 720 deletions
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 31207e66414..e3c2c5b4297 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -28,10 +28,10 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ib_srp.c 3932 2005-11-01 17:19:29Z roland $ */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/init.h> #include <linux/slab.h> @@ -41,46 +41,146 @@ #include <linux/random.h> #include <linux/jiffies.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <scsi/scsi.h> #include <scsi/scsi_device.h> #include <scsi/scsi_dbg.h> +#include <scsi/scsi_tcq.h> #include <scsi/srp.h> - -#include <rdma/ib_cache.h> +#include <scsi/scsi_transport_srp.h> #include "ib_srp.h" #define DRV_NAME "ib_srp" #define PFX DRV_NAME ": " -#define DRV_VERSION "0.2" -#define DRV_RELDATE "November 1, 2005" +#define DRV_VERSION "1.0" +#define DRV_RELDATE "July 1, 2013" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator " "v" DRV_VERSION " (" DRV_RELDATE ")"); MODULE_LICENSE("Dual BSD/GPL"); +static unsigned int srp_sg_tablesize; +static unsigned int cmd_sg_entries; +static unsigned int indirect_sg_entries; +static bool allow_ext_sg; +static bool prefer_fr; +static bool register_always; static int topspin_workarounds = 1; +module_param(srp_sg_tablesize, uint, 0444); +MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries"); + +module_param(cmd_sg_entries, uint, 0444); +MODULE_PARM_DESC(cmd_sg_entries, + "Default number of gather/scatter entries in the SRP command (default is 12, max 255)"); + +module_param(indirect_sg_entries, uint, 0444); +MODULE_PARM_DESC(indirect_sg_entries, + "Default max number of gather/scatter entries (default is 12, max is " __stringify(SCSI_MAX_SG_CHAIN_SEGMENTS) ")"); + +module_param(allow_ext_sg, bool, 0444); +MODULE_PARM_DESC(allow_ext_sg, + "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)"); + module_param(topspin_workarounds, int, 0444); MODULE_PARM_DESC(topspin_workarounds, "Enable workarounds for Topspin/Cisco SRP target bugs if != 0"); -static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad }; +module_param(prefer_fr, bool, 0444); +MODULE_PARM_DESC(prefer_fr, +"Whether to use fast registration if both FMR and fast registration are supported"); + +module_param(register_always, bool, 0444); +MODULE_PARM_DESC(register_always, + "Use memory registration even for contiguous memory regions"); + +static struct kernel_param_ops srp_tmo_ops; + +static int srp_reconnect_delay = 10; +module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay, + S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts"); + +static int srp_fast_io_fail_tmo = 15; +module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo, + S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(fast_io_fail_tmo, + "Number of seconds between the observation of a transport" + " layer error and failing all I/O. \"off\" means that this" + " functionality is disabled."); + +static int srp_dev_loss_tmo = 600; +module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo, + S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(dev_loss_tmo, + "Maximum number of seconds that the SRP transport should" + " insulate transport layer errors. After this time has been" + " exceeded the SCSI host is removed. Should be" + " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT) + " if fast_io_fail_tmo has not been set. \"off\" means that" + " this functionality is disabled."); static void srp_add_one(struct ib_device *device); static void srp_remove_one(struct ib_device *device); -static void srp_completion(struct ib_cq *cq, void *target_ptr); +static void srp_recv_completion(struct ib_cq *cq, void *target_ptr); +static void srp_send_completion(struct ib_cq *cq, void *target_ptr); static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event); +static struct scsi_transport_template *ib_srp_transport_template; + static struct ib_client srp_client = { .name = "srp", .add = srp_add_one, .remove = srp_remove_one }; +static struct ib_sa_client srp_sa_client; + +static int srp_tmo_get(char *buffer, const struct kernel_param *kp) +{ + int tmo = *(int *)kp->arg; + + if (tmo >= 0) + return sprintf(buffer, "%d", tmo); + else + return sprintf(buffer, "off"); +} + +static int srp_tmo_set(const char *val, const struct kernel_param *kp) +{ + int tmo, res; + + if (strncmp(val, "off", 3) != 0) { + res = kstrtoint(val, 0, &tmo); + if (res) + goto out; + } else { + tmo = -1; + } + if (kp->arg == &srp_reconnect_delay) + res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo, + srp_dev_loss_tmo); + else if (kp->arg == &srp_fast_io_fail_tmo) + res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo); + else + res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo, + tmo); + if (res) + goto out; + *(int *)kp->arg = tmo; + +out: + return res; +} + +static struct kernel_param_ops srp_tmo_ops = { + .get = srp_tmo_get, + .set = srp_tmo_set, +}; + static inline struct srp_target_port *host_to_target(struct Scsi_Host *host) { return (struct srp_target_port *) host->hostdata; @@ -91,6 +191,16 @@ static const char *srp_target_info(struct Scsi_Host *host) return host_to_target(host)->target_name; } +static int srp_target_is_topspin(struct srp_target_port *target) +{ + static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad }; + static const u8 cisco_oui[3] = { 0x00, 0x1b, 0x0d }; + + return topspin_workarounds && + (!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) || + !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui)); +} + static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size, gfp_t gfp_mask, enum dma_data_direction direction) @@ -105,8 +215,9 @@ static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size, if (!iu->buf) goto out_free_iu; - iu->dma = dma_map_single(host->dev->dma_device, iu->buf, size, direction); - if (dma_mapping_error(iu->dma)) + iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size, + direction); + if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma)) goto out_free_buf; iu->size = size; @@ -127,14 +238,15 @@ static void srp_free_iu(struct srp_host *host, struct srp_iu *iu) if (!iu) return; - dma_unmap_single(host->dev->dma_device, iu->dma, iu->size, iu->direction); + ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size, + iu->direction); kfree(iu->buf); kfree(iu); } static void srp_qp_event(struct ib_event *event, void *context) { - printk(KERN_ERR PFX "QP event %d\n", event->event); + pr_debug("QP event %d\n", event->event); } static int srp_init_qp(struct srp_target_port *target, @@ -147,10 +259,10 @@ static int srp_init_qp(struct srp_target_port *target, if (!attr) return -ENOMEM; - ret = ib_find_cached_pkey(target->srp_host->dev, - target->srp_host->port, - be16_to_cpu(target->path.pkey), - &attr->pkey_index); + ret = ib_find_pkey(target->srp_host->srp_dev->dev, + target->srp_host->port, + be16_to_cpu(target->path.pkey), + &attr->pkey_index); if (ret) goto out; @@ -170,64 +282,304 @@ out: return ret; } +static int srp_new_cm_id(struct srp_target_port *target) +{ + struct ib_cm_id *new_cm_id; + + new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev, + srp_cm_handler, target); + if (IS_ERR(new_cm_id)) + return PTR_ERR(new_cm_id); + + if (target->cm_id) + ib_destroy_cm_id(target->cm_id); + target->cm_id = new_cm_id; + + return 0; +} + +static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target) +{ + struct srp_device *dev = target->srp_host->srp_dev; + struct ib_fmr_pool_param fmr_param; + + memset(&fmr_param, 0, sizeof(fmr_param)); + fmr_param.pool_size = target->scsi_host->can_queue; + fmr_param.dirty_watermark = fmr_param.pool_size / 4; + fmr_param.cache = 1; + fmr_param.max_pages_per_fmr = dev->max_pages_per_mr; + fmr_param.page_shift = ilog2(dev->mr_page_size); + fmr_param.access = (IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_READ); + + return ib_create_fmr_pool(dev->pd, &fmr_param); +} + +/** + * srp_destroy_fr_pool() - free the resources owned by a pool + * @pool: Fast registration pool to be destroyed. + */ +static void srp_destroy_fr_pool(struct srp_fr_pool *pool) +{ + int i; + struct srp_fr_desc *d; + + if (!pool) + return; + + for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { + if (d->frpl) + ib_free_fast_reg_page_list(d->frpl); + if (d->mr) + ib_dereg_mr(d->mr); + } + kfree(pool); +} + +/** + * srp_create_fr_pool() - allocate and initialize a pool for fast registration + * @device: IB device to allocate fast registration descriptors for. + * @pd: Protection domain associated with the FR descriptors. + * @pool_size: Number of descriptors to allocate. + * @max_page_list_len: Maximum fast registration work request page list length. + */ +static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device, + struct ib_pd *pd, int pool_size, + int max_page_list_len) +{ + struct srp_fr_pool *pool; + struct srp_fr_desc *d; + struct ib_mr *mr; + struct ib_fast_reg_page_list *frpl; + int i, ret = -EINVAL; + + if (pool_size <= 0) + goto err; + ret = -ENOMEM; + pool = kzalloc(sizeof(struct srp_fr_pool) + + pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL); + if (!pool) + goto err; + pool->size = pool_size; + pool->max_page_list_len = max_page_list_len; + spin_lock_init(&pool->lock); + INIT_LIST_HEAD(&pool->free_list); + + for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { + mr = ib_alloc_fast_reg_mr(pd, max_page_list_len); + if (IS_ERR(mr)) { + ret = PTR_ERR(mr); + goto destroy_pool; + } + d->mr = mr; + frpl = ib_alloc_fast_reg_page_list(device, max_page_list_len); + if (IS_ERR(frpl)) { + ret = PTR_ERR(frpl); + goto destroy_pool; + } + d->frpl = frpl; + list_add_tail(&d->entry, &pool->free_list); + } + +out: + return pool; + +destroy_pool: + srp_destroy_fr_pool(pool); + +err: + pool = ERR_PTR(ret); + goto out; +} + +/** + * srp_fr_pool_get() - obtain a descriptor suitable for fast registration + * @pool: Pool to obtain descriptor from. + */ +static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool) +{ + struct srp_fr_desc *d = NULL; + unsigned long flags; + + spin_lock_irqsave(&pool->lock, flags); + if (!list_empty(&pool->free_list)) { + d = list_first_entry(&pool->free_list, typeof(*d), entry); + list_del(&d->entry); + } + spin_unlock_irqrestore(&pool->lock, flags); + + return d; +} + +/** + * srp_fr_pool_put() - put an FR descriptor back in the free list + * @pool: Pool the descriptor was allocated from. + * @desc: Pointer to an array of fast registration descriptor pointers. + * @n: Number of descriptors to put back. + * + * Note: The caller must already have queued an invalidation request for + * desc->mr->rkey before calling this function. + */ +static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc, + int n) +{ + unsigned long flags; + int i; + + spin_lock_irqsave(&pool->lock, flags); + for (i = 0; i < n; i++) + list_add(&desc[i]->entry, &pool->free_list); + spin_unlock_irqrestore(&pool->lock, flags); +} + +static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target) +{ + struct srp_device *dev = target->srp_host->srp_dev; + + return srp_create_fr_pool(dev->dev, dev->pd, + target->scsi_host->can_queue, + dev->max_pages_per_mr); +} + static int srp_create_target_ib(struct srp_target_port *target) { + struct srp_device *dev = target->srp_host->srp_dev; struct ib_qp_init_attr *init_attr; + struct ib_cq *recv_cq, *send_cq; + struct ib_qp *qp; + struct ib_fmr_pool *fmr_pool = NULL; + struct srp_fr_pool *fr_pool = NULL; + const int m = 1 + dev->use_fast_reg; int ret; init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL); if (!init_attr) return -ENOMEM; - target->cq = ib_create_cq(target->srp_host->dev, srp_completion, - NULL, target, SRP_CQ_SIZE); - if (IS_ERR(target->cq)) { - ret = PTR_ERR(target->cq); - goto out; + recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, target, + target->queue_size, target->comp_vector); + if (IS_ERR(recv_cq)) { + ret = PTR_ERR(recv_cq); + goto err; } - ib_req_notify_cq(target->cq, IB_CQ_NEXT_COMP); + send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, target, + m * target->queue_size, target->comp_vector); + if (IS_ERR(send_cq)) { + ret = PTR_ERR(send_cq); + goto err_recv_cq; + } + + ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP); init_attr->event_handler = srp_qp_event; - init_attr->cap.max_send_wr = SRP_SQ_SIZE; - init_attr->cap.max_recv_wr = SRP_RQ_SIZE; + init_attr->cap.max_send_wr = m * target->queue_size; + init_attr->cap.max_recv_wr = target->queue_size; init_attr->cap.max_recv_sge = 1; init_attr->cap.max_send_sge = 1; - init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; + init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; init_attr->qp_type = IB_QPT_RC; - init_attr->send_cq = target->cq; - init_attr->recv_cq = target->cq; + init_attr->send_cq = send_cq; + init_attr->recv_cq = recv_cq; - target->qp = ib_create_qp(target->srp_host->pd, init_attr); - if (IS_ERR(target->qp)) { - ret = PTR_ERR(target->qp); - ib_destroy_cq(target->cq); - goto out; + qp = ib_create_qp(dev->pd, init_attr); + if (IS_ERR(qp)) { + ret = PTR_ERR(qp); + goto err_send_cq; } - ret = srp_init_qp(target, target->qp); - if (ret) { - ib_destroy_qp(target->qp); - ib_destroy_cq(target->cq); - goto out; + ret = srp_init_qp(target, qp); + if (ret) + goto err_qp; + + if (dev->use_fast_reg && dev->has_fr) { + fr_pool = srp_alloc_fr_pool(target); + if (IS_ERR(fr_pool)) { + ret = PTR_ERR(fr_pool); + shost_printk(KERN_WARNING, target->scsi_host, PFX + "FR pool allocation failed (%d)\n", ret); + goto err_qp; + } + if (target->fr_pool) + srp_destroy_fr_pool(target->fr_pool); + target->fr_pool = fr_pool; + } else if (!dev->use_fast_reg && dev->has_fmr) { + fmr_pool = srp_alloc_fmr_pool(target); + if (IS_ERR(fmr_pool)) { + ret = PTR_ERR(fmr_pool); + shost_printk(KERN_WARNING, target->scsi_host, PFX + "FMR pool allocation failed (%d)\n", ret); + goto err_qp; + } + if (target->fmr_pool) + ib_destroy_fmr_pool(target->fmr_pool); + target->fmr_pool = fmr_pool; } -out: + if (target->qp) + ib_destroy_qp(target->qp); + if (target->recv_cq) + ib_destroy_cq(target->recv_cq); + if (target->send_cq) + ib_destroy_cq(target->send_cq); + + target->qp = qp; + target->recv_cq = recv_cq; + target->send_cq = send_cq; + + kfree(init_attr); + return 0; + +err_qp: + ib_destroy_qp(qp); + +err_send_cq: + ib_destroy_cq(send_cq); + +err_recv_cq: + ib_destroy_cq(recv_cq); + +err: kfree(init_attr); return ret; } +/* + * Note: this function may be called without srp_alloc_iu_bufs() having been + * invoked. Hence the target->[rt]x_ring checks. + */ static void srp_free_target_ib(struct srp_target_port *target) { + struct srp_device *dev = target->srp_host->srp_dev; int i; + if (dev->use_fast_reg) { + if (target->fr_pool) + srp_destroy_fr_pool(target->fr_pool); + } else { + if (target->fmr_pool) + ib_destroy_fmr_pool(target->fmr_pool); + } ib_destroy_qp(target->qp); - ib_destroy_cq(target->cq); + ib_destroy_cq(target->send_cq); + ib_destroy_cq(target->recv_cq); - for (i = 0; i < SRP_RQ_SIZE; ++i) - srp_free_iu(target->srp_host, target->rx_ring[i]); - for (i = 0; i < SRP_SQ_SIZE + 1; ++i) - srp_free_iu(target->srp_host, target->tx_ring[i]); + target->qp = NULL; + target->send_cq = target->recv_cq = NULL; + + if (target->rx_ring) { + for (i = 0; i < target->queue_size; ++i) + srp_free_iu(target->srp_host, target->rx_ring[i]); + kfree(target->rx_ring); + target->rx_ring = NULL; + } + if (target->tx_ring) { + for (i = 0; i < target->queue_size; ++i) + srp_free_iu(target->srp_host, target->tx_ring[i]); + kfree(target->tx_ring); + target->tx_ring = NULL; + } } static void srp_path_rec_completion(int status, @@ -238,7 +590,8 @@ static void srp_path_rec_completion(int status, target->status = status; if (status) - printk(KERN_ERR PFX "Got failed path rec status %d\n", status); + shost_printk(KERN_ERR, target->scsi_host, + PFX "Got failed path rec status %d\n", status); else target->path = *pathrec; complete(&target->done); @@ -246,13 +599,17 @@ static void srp_path_rec_completion(int status, static int srp_lookup_path(struct srp_target_port *target) { + int ret; + target->path.numb_path = 1; init_completion(&target->done); - target->path_query_id = ib_sa_path_rec_get(target->srp_host->dev, + target->path_query_id = ib_sa_path_rec_get(&srp_sa_client, + target->srp_host->srp_dev->dev, target->srp_host->port, &target->path, + IB_SA_PATH_REC_SERVICE_ID | IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | IB_SA_PATH_REC_NUMB_PATH | @@ -264,10 +621,13 @@ static int srp_lookup_path(struct srp_target_port *target) if (target->path_query_id < 0) return target->path_query_id; - wait_for_completion(&target->done); + ret = wait_for_completion_interruptible(&target->done); + if (ret < 0) + return ret; if (target->status < 0) - printk(KERN_WARNING PFX "Path record query failed\n"); + shost_printk(KERN_WARNING, target->scsi_host, + PFX "Path record query failed\n"); return target->status; } @@ -303,30 +663,54 @@ static int srp_send_req(struct srp_target_port *target) req->param.responder_resources = 4; req->param.remote_cm_response_timeout = 20; req->param.local_cm_response_timeout = 20; - req->param.retry_count = 7; + req->param.retry_count = target->tl_retry_count; req->param.rnr_retry_count = 7; req->param.max_cm_retries = 15; req->priv.opcode = SRP_LOGIN_REQ; req->priv.tag = 0; - req->priv.req_it_iu_len = cpu_to_be32(SRP_MAX_IU_LEN); + req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len); req->priv.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT); - memcpy(req->priv.initiator_port_id, target->srp_host->initiator_port_id, 16); + /* + * In the published SRP specification (draft rev. 16a), the + * port identifier format is 8 bytes of ID extension followed + * by 8 bytes of GUID. Older drafts put the two halves in the + * opposite order, so that the GUID comes first. + * + * Targets conforming to these obsolete drafts can be + * recognized by the I/O Class they report. + */ + if (target->io_class == SRP_REV10_IB_IO_CLASS) { + memcpy(req->priv.initiator_port_id, + &target->path.sgid.global.interface_id, 8); + memcpy(req->priv.initiator_port_id + 8, + &target->initiator_ext, 8); + memcpy(req->priv.target_port_id, &target->ioc_guid, 8); + memcpy(req->priv.target_port_id + 8, &target->id_ext, 8); + } else { + memcpy(req->priv.initiator_port_id, + &target->initiator_ext, 8); + memcpy(req->priv.initiator_port_id + 8, + &target->path.sgid.global.interface_id, 8); + memcpy(req->priv.target_port_id, &target->id_ext, 8); + memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8); + } + /* * Topspin/Cisco SRP targets will reject our login unless we - * zero out the first 8 bytes of our initiator port ID. The - * second 8 bytes must be our local node GUID, but we always - * use that anyway. + * zero out the first 8 bytes of our initiator port ID and set + * the second 8 bytes to the local node GUID. */ - if (topspin_workarounds && !memcmp(&target->ioc_guid, topspin_oui, 3)) { - printk(KERN_DEBUG PFX "Topspin/Cisco initiator port ID workaround " - "activated for target GUID %016llx\n", - (unsigned long long) be64_to_cpu(target->ioc_guid)); + if (srp_target_is_topspin(target)) { + shost_printk(KERN_DEBUG, target->scsi_host, + PFX "Topspin/Cisco initiator port ID workaround " + "activated for target GUID %016llx\n", + (unsigned long long) be64_to_cpu(target->ioc_guid)); memset(req->priv.initiator_port_id, 0, 8); + memcpy(req->priv.initiator_port_id + 8, + &target->srp_host->srp_dev->dev->node_guid, 8); } - memcpy(req->priv.target_port_id, &target->id_ext, 8); - memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8); status = ib_send_cm_req(target->cm_id, &req->param); @@ -335,44 +719,193 @@ static int srp_send_req(struct srp_target_port *target) return status; } +static bool srp_queue_remove_work(struct srp_target_port *target) +{ + bool changed = false; + + spin_lock_irq(&target->lock); + if (target->state != SRP_TARGET_REMOVED) { + target->state = SRP_TARGET_REMOVED; + changed = true; + } + spin_unlock_irq(&target->lock); + + if (changed) + queue_work(system_long_wq, &target->remove_work); + + return changed; +} + +static bool srp_change_conn_state(struct srp_target_port *target, + bool connected) +{ + bool changed = false; + + spin_lock_irq(&target->lock); + if (target->connected != connected) { + target->connected = connected; + changed = true; + } + spin_unlock_irq(&target->lock); + + return changed; +} + static void srp_disconnect_target(struct srp_target_port *target) { - /* XXX should send SRP_I_LOGOUT request */ + if (srp_change_conn_state(target, false)) { + /* XXX should send SRP_I_LOGOUT request */ - init_completion(&target->done); - ib_send_cm_dreq(target->cm_id, NULL, 0); - wait_for_completion(&target->done); + if (ib_send_cm_dreq(target->cm_id, NULL, 0)) { + shost_printk(KERN_DEBUG, target->scsi_host, + PFX "Sending CM DREQ failed\n"); + } + } } -static void srp_remove_work(void *target_ptr) +static void srp_free_req_data(struct srp_target_port *target) { - struct srp_target_port *target = target_ptr; + struct srp_device *dev = target->srp_host->srp_dev; + struct ib_device *ibdev = dev->dev; + struct srp_request *req; + int i; - spin_lock_irq(target->scsi_host->host_lock); - if (target->state != SRP_TARGET_DEAD) { - spin_unlock_irq(target->scsi_host->host_lock); - scsi_host_put(target->scsi_host); + if (!target->req_ring) return; + + for (i = 0; i < target->req_ring_size; ++i) { + req = &target->req_ring[i]; + if (dev->use_fast_reg) + kfree(req->fr_list); + else + kfree(req->fmr_list); + kfree(req->map_page); + if (req->indirect_dma_addr) { + ib_dma_unmap_single(ibdev, req->indirect_dma_addr, + target->indirect_size, + DMA_TO_DEVICE); + } + kfree(req->indirect_desc); } - target->state = SRP_TARGET_REMOVED; - spin_unlock_irq(target->scsi_host->host_lock); - down(&target->srp_host->target_mutex); - list_del(&target->list); - up(&target->srp_host->target_mutex); + kfree(target->req_ring); + target->req_ring = NULL; +} +static int srp_alloc_req_data(struct srp_target_port *target) +{ + struct srp_device *srp_dev = target->srp_host->srp_dev; + struct ib_device *ibdev = srp_dev->dev; + struct srp_request *req; + void *mr_list; + dma_addr_t dma_addr; + int i, ret = -ENOMEM; + + INIT_LIST_HEAD(&target->free_reqs); + + target->req_ring = kzalloc(target->req_ring_size * + sizeof(*target->req_ring), GFP_KERNEL); + if (!target->req_ring) + goto out; + + for (i = 0; i < target->req_ring_size; ++i) { + req = &target->req_ring[i]; + mr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *), + GFP_KERNEL); + if (!mr_list) + goto out; + if (srp_dev->use_fast_reg) + req->fr_list = mr_list; + else + req->fmr_list = mr_list; + req->map_page = kmalloc(srp_dev->max_pages_per_mr * + sizeof(void *), GFP_KERNEL); + if (!req->map_page) + goto out; + req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL); + if (!req->indirect_desc) + goto out; + + dma_addr = ib_dma_map_single(ibdev, req->indirect_desc, + target->indirect_size, + DMA_TO_DEVICE); + if (ib_dma_mapping_error(ibdev, dma_addr)) + goto out; + + req->indirect_dma_addr = dma_addr; + req->index = i; + list_add_tail(&req->list, &target->free_reqs); + } + ret = 0; + +out: + return ret; +} + +/** + * srp_del_scsi_host_attr() - Remove attributes defined in the host template. + * @shost: SCSI host whose attributes to remove from sysfs. + * + * Note: Any attributes defined in the host template and that did not exist + * before invocation of this function will be ignored. + */ +static void srp_del_scsi_host_attr(struct Scsi_Host *shost) +{ + struct device_attribute **attr; + + for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr) + device_remove_file(&shost->shost_dev, *attr); +} + +static void srp_remove_target(struct srp_target_port *target) +{ + WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); + + srp_del_scsi_host_attr(target->scsi_host); + srp_rport_get(target->rport); + srp_remove_host(target->scsi_host); scsi_remove_host(target->scsi_host); + srp_stop_rport_timers(target->rport); + srp_disconnect_target(target); ib_destroy_cm_id(target->cm_id); srp_free_target_ib(target); - scsi_host_put(target->scsi_host); - /* And another put to really free the target port... */ + cancel_work_sync(&target->tl_err_work); + srp_rport_put(target->rport); + srp_free_req_data(target); + + spin_lock(&target->srp_host->target_lock); + list_del(&target->list); + spin_unlock(&target->srp_host->target_lock); + scsi_host_put(target->scsi_host); } +static void srp_remove_work(struct work_struct *work) +{ + struct srp_target_port *target = + container_of(work, struct srp_target_port, remove_work); + + WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); + + srp_remove_target(target); +} + +static void srp_rport_delete(struct srp_rport *rport) +{ + struct srp_target_port *target = rport->lld_data; + + srp_queue_remove_work(target); +} + static int srp_connect_target(struct srp_target_port *target) { + int retries = 3; int ret; + WARN_ON_ONCE(target->connected); + + target->qp_in_error = false; + ret = srp_lookup_path(target); if (ret) return ret; @@ -382,7 +915,9 @@ static int srp_connect_target(struct srp_target_port *target) ret = srp_send_req(target); if (ret) return ret; - wait_for_completion(&target->done); + ret = wait_for_completion_interruptible(&target->done); + if (ret < 0) + return ret; /* * The CM event handling code will set status to @@ -392,6 +927,7 @@ static int srp_connect_target(struct srp_target_port *target) */ switch (target->status) { case 0: + srp_change_conn_state(target, true); return 0; case SRP_PORT_REDIRECT: @@ -403,253 +939,701 @@ static int srp_connect_target(struct srp_target_port *target) case SRP_DLID_REDIRECT: break; + case SRP_STALE_CONN: + /* Our current CM id was stale, and is now in timewait. + * Try to reconnect with a new one. + */ + if (!retries-- || srp_new_cm_id(target)) { + shost_printk(KERN_ERR, target->scsi_host, PFX + "giving up on stale connection\n"); + target->status = -ECONNRESET; + return target->status; + } + + shost_printk(KERN_ERR, target->scsi_host, PFX + "retrying stale connection\n"); + break; + default: return target->status; } } } -static int srp_reconnect_target(struct srp_target_port *target) +static int srp_inv_rkey(struct srp_target_port *target, u32 rkey) { - struct ib_cm_id *new_cm_id; - struct ib_qp_attr qp_attr; - struct srp_request *req; - struct ib_wc wc; - int ret; + struct ib_send_wr *bad_wr; + struct ib_send_wr wr = { + .opcode = IB_WR_LOCAL_INV, + .wr_id = LOCAL_INV_WR_ID_MASK, + .next = NULL, + .num_sge = 0, + .send_flags = 0, + .ex.invalidate_rkey = rkey, + }; + + return ib_post_send(target->qp, &wr, &bad_wr); +} + +static void srp_unmap_data(struct scsi_cmnd *scmnd, + struct srp_target_port *target, + struct srp_request *req) +{ + struct srp_device *dev = target->srp_host->srp_dev; + struct ib_device *ibdev = dev->dev; + int i, res; + + if (!scsi_sglist(scmnd) || + (scmnd->sc_data_direction != DMA_TO_DEVICE && + scmnd->sc_data_direction != DMA_FROM_DEVICE)) + return; + + if (dev->use_fast_reg) { + struct srp_fr_desc **pfr; + + for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) { + res = srp_inv_rkey(target, (*pfr)->mr->rkey); + if (res < 0) { + shost_printk(KERN_ERR, target->scsi_host, PFX + "Queueing INV WR for rkey %#x failed (%d)\n", + (*pfr)->mr->rkey, res); + queue_work(system_long_wq, + &target->tl_err_work); + } + } + if (req->nmdesc) + srp_fr_pool_put(target->fr_pool, req->fr_list, + req->nmdesc); + } else { + struct ib_pool_fmr **pfmr; + + for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++) + ib_fmr_pool_unmap(*pfmr); + } + + ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd), + scmnd->sc_data_direction); +} + +/** + * srp_claim_req - Take ownership of the scmnd associated with a request. + * @target: SRP target port. + * @req: SRP request. + * @sdev: If not NULL, only take ownership for this SCSI device. + * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take + * ownership of @req->scmnd if it equals @scmnd. + * + * Return value: + * Either NULL or a pointer to the SCSI command the caller became owner of. + */ +static struct scsi_cmnd *srp_claim_req(struct srp_target_port *target, + struct srp_request *req, + struct scsi_device *sdev, + struct scsi_cmnd *scmnd) +{ + unsigned long flags; + + spin_lock_irqsave(&target->lock, flags); + if (req->scmnd && + (!sdev || req->scmnd->device == sdev) && + (!scmnd || req->scmnd == scmnd)) { + scmnd = req->scmnd; + req->scmnd = NULL; + } else { + scmnd = NULL; + } + spin_unlock_irqrestore(&target->lock, flags); + + return scmnd; +} + +/** + * srp_free_req() - Unmap data and add request to the free request list. + * @target: SRP target port. + * @req: Request to be freed. + * @scmnd: SCSI command associated with @req. + * @req_lim_delta: Amount to be added to @target->req_lim. + */ +static void srp_free_req(struct srp_target_port *target, + struct srp_request *req, struct scsi_cmnd *scmnd, + s32 req_lim_delta) +{ + unsigned long flags; + + srp_unmap_data(scmnd, target, req); + + spin_lock_irqsave(&target->lock, flags); + target->req_lim += req_lim_delta; + list_add_tail(&req->list, &target->free_reqs); + spin_unlock_irqrestore(&target->lock, flags); +} + +static void srp_finish_req(struct srp_target_port *target, + struct srp_request *req, struct scsi_device *sdev, + int result) +{ + struct scsi_cmnd *scmnd = srp_claim_req(target, req, sdev, NULL); + + if (scmnd) { + srp_free_req(target, req, scmnd, 0); + scmnd->result = result; + scmnd->scsi_done(scmnd); + } +} + +static void srp_terminate_io(struct srp_rport *rport) +{ + struct srp_target_port *target = rport->lld_data; + struct Scsi_Host *shost = target->scsi_host; + struct scsi_device *sdev; int i; - spin_lock_irq(target->scsi_host->host_lock); - if (target->state != SRP_TARGET_LIVE) { - spin_unlock_irq(target->scsi_host->host_lock); - return -EAGAIN; + /* + * Invoking srp_terminate_io() while srp_queuecommand() is running + * is not safe. Hence the warning statement below. + */ + shost_for_each_device(sdev, shost) + WARN_ON_ONCE(sdev->request_queue->request_fn_active); + + for (i = 0; i < target->req_ring_size; ++i) { + struct srp_request *req = &target->req_ring[i]; + srp_finish_req(target, req, NULL, DID_TRANSPORT_FAILFAST << 16); } - target->state = SRP_TARGET_CONNECTING; - spin_unlock_irq(target->scsi_host->host_lock); +} + +/* + * It is up to the caller to ensure that srp_rport_reconnect() calls are + * serialized and that no concurrent srp_queuecommand(), srp_abort(), + * srp_reset_device() or srp_reset_host() calls will occur while this function + * is in progress. One way to realize that is not to call this function + * directly but to call srp_reconnect_rport() instead since that last function + * serializes calls of this function via rport->mutex and also blocks + * srp_queuecommand() calls before invoking this function. + */ +static int srp_rport_reconnect(struct srp_rport *rport) +{ + struct srp_target_port *target = rport->lld_data; + int i, ret; srp_disconnect_target(target); /* - * Now get a new local CM ID so that we avoid confusing the - * target in case things are really fouled up. + * Now get a new local CM ID so that we avoid confusing the target in + * case things are really fouled up. Doing so also ensures that all CM + * callbacks will have finished before a new QP is allocated. */ - new_cm_id = ib_create_cm_id(target->srp_host->dev, - srp_cm_handler, target); - if (IS_ERR(new_cm_id)) { - ret = PTR_ERR(new_cm_id); - goto err; + ret = srp_new_cm_id(target); + + for (i = 0; i < target->req_ring_size; ++i) { + struct srp_request *req = &target->req_ring[i]; + srp_finish_req(target, req, NULL, DID_RESET << 16); } - ib_destroy_cm_id(target->cm_id); - target->cm_id = new_cm_id; - qp_attr.qp_state = IB_QPS_RESET; - ret = ib_modify_qp(target->qp, &qp_attr, IB_QP_STATE); - if (ret) - goto err; + /* + * Whether or not creating a new CM ID succeeded, create a new + * QP. This guarantees that all callback functions for the old QP have + * finished before any send requests are posted on the new QP. + */ + ret += srp_create_target_ib(target); - ret = srp_init_qp(target, target->qp); - if (ret) - goto err; + INIT_LIST_HEAD(&target->free_tx); + for (i = 0; i < target->queue_size; ++i) + list_add(&target->tx_ring[i]->list, &target->free_tx); - while (ib_poll_cq(target->cq, 1, &wc) > 0) - ; /* nothing */ + if (ret == 0) + ret = srp_connect_target(target); - list_for_each_entry(req, &target->req_queue, list) { - req->scmnd->result = DID_RESET << 16; - req->scmnd->scsi_done(req->scmnd); - } + if (ret == 0) + shost_printk(KERN_INFO, target->scsi_host, + PFX "reconnect succeeded\n"); - target->rx_head = 0; - target->tx_head = 0; - target->tx_tail = 0; - target->req_head = 0; - for (i = 0; i < SRP_SQ_SIZE - 1; ++i) - target->req_ring[i].next = i + 1; - target->req_ring[SRP_SQ_SIZE - 1].next = -1; - INIT_LIST_HEAD(&target->req_queue); + return ret; +} - ret = srp_connect_target(target); - if (ret) - goto err; +static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr, + unsigned int dma_len, u32 rkey) +{ + struct srp_direct_buf *desc = state->desc; - spin_lock_irq(target->scsi_host->host_lock); - if (target->state == SRP_TARGET_CONNECTING) { - ret = 0; - target->state = SRP_TARGET_LIVE; - } else - ret = -EAGAIN; - spin_unlock_irq(target->scsi_host->host_lock); + desc->va = cpu_to_be64(dma_addr); + desc->key = cpu_to_be32(rkey); + desc->len = cpu_to_be32(dma_len); + + state->total_len += dma_len; + state->desc++; + state->ndesc++; +} + +static int srp_map_finish_fmr(struct srp_map_state *state, + struct srp_target_port *target) +{ + struct ib_pool_fmr *fmr; + u64 io_addr = 0; + + fmr = ib_fmr_pool_map_phys(target->fmr_pool, state->pages, + state->npages, io_addr); + if (IS_ERR(fmr)) + return PTR_ERR(fmr); + + *state->next_fmr++ = fmr; + state->nmdesc++; + + srp_map_desc(state, 0, state->dma_len, fmr->fmr->rkey); + + return 0; +} + +static int srp_map_finish_fr(struct srp_map_state *state, + struct srp_target_port *target) +{ + struct srp_device *dev = target->srp_host->srp_dev; + struct ib_send_wr *bad_wr; + struct ib_send_wr wr; + struct srp_fr_desc *desc; + u32 rkey; + + desc = srp_fr_pool_get(target->fr_pool); + if (!desc) + return -ENOMEM; + + rkey = ib_inc_rkey(desc->mr->rkey); + ib_update_fast_reg_key(desc->mr, rkey); + + memcpy(desc->frpl->page_list, state->pages, + sizeof(state->pages[0]) * state->npages); + + memset(&wr, 0, sizeof(wr)); + wr.opcode = IB_WR_FAST_REG_MR; + wr.wr_id = FAST_REG_WR_ID_MASK; + wr.wr.fast_reg.iova_start = state->base_dma_addr; + wr.wr.fast_reg.page_list = desc->frpl; + wr.wr.fast_reg.page_list_len = state->npages; + wr.wr.fast_reg.page_shift = ilog2(dev->mr_page_size); + wr.wr.fast_reg.length = state->dma_len; + wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_WRITE); + wr.wr.fast_reg.rkey = desc->mr->lkey; + + *state->next_fr++ = desc; + state->nmdesc++; + + srp_map_desc(state, state->base_dma_addr, state->dma_len, + desc->mr->rkey); + + return ib_post_send(target->qp, &wr, &bad_wr); +} + +static int srp_finish_mapping(struct srp_map_state *state, + struct srp_target_port *target) +{ + int ret = 0; + + if (state->npages == 0) + return 0; + + if (state->npages == 1 && !register_always) + srp_map_desc(state, state->base_dma_addr, state->dma_len, + target->rkey); + else + ret = target->srp_host->srp_dev->use_fast_reg ? + srp_map_finish_fr(state, target) : + srp_map_finish_fmr(state, target); + + if (ret == 0) { + state->npages = 0; + state->dma_len = 0; + } return ret; +} -err: - printk(KERN_ERR PFX "reconnect failed (%d), removing target port.\n", ret); +static void srp_map_update_start(struct srp_map_state *state, + struct scatterlist *sg, int sg_index, + dma_addr_t dma_addr) +{ + state->unmapped_sg = sg; + state->unmapped_index = sg_index; + state->unmapped_addr = dma_addr; +} + +static int srp_map_sg_entry(struct srp_map_state *state, + struct srp_target_port *target, + struct scatterlist *sg, int sg_index, + bool use_mr) +{ + struct srp_device *dev = target->srp_host->srp_dev; + struct ib_device *ibdev = dev->dev; + dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg); + unsigned int dma_len = ib_sg_dma_len(ibdev, sg); + unsigned int len; + int ret; + + if (!dma_len) + return 0; + + if (!use_mr) { + /* + * Once we're in direct map mode for a request, we don't + * go back to FMR or FR mode, so no need to update anything + * other than the descriptor. + */ + srp_map_desc(state, dma_addr, dma_len, target->rkey); + return 0; + } + + /* + * Since not all RDMA HW drivers support non-zero page offsets for + * FMR, if we start at an offset into a page, don't merge into the + * current FMR mapping. Finish it out, and use the kernel's MR for + * this sg entry. + */ + if ((!dev->use_fast_reg && dma_addr & ~dev->mr_page_mask) || + dma_len > dev->mr_max_size) { + ret = srp_finish_mapping(state, target); + if (ret) + return ret; + + srp_map_desc(state, dma_addr, dma_len, target->rkey); + srp_map_update_start(state, NULL, 0, 0); + return 0; + } /* - * We couldn't reconnect, so kill our target port off. - * However, we have to defer the real removal because we might - * be in the context of the SCSI error handler now, which - * would deadlock if we call scsi_remove_host(). + * If this is the first sg that will be mapped via FMR or via FR, save + * our position. We need to know the first unmapped entry, its index, + * and the first unmapped address within that entry to be able to + * restart mapping after an error. */ - spin_lock_irq(target->scsi_host->host_lock); - if (target->state == SRP_TARGET_CONNECTING) { - target->state = SRP_TARGET_DEAD; - INIT_WORK(&target->work, srp_remove_work, target); - schedule_work(&target->work); + if (!state->unmapped_sg) + srp_map_update_start(state, sg, sg_index, dma_addr); + + while (dma_len) { + unsigned offset = dma_addr & ~dev->mr_page_mask; + if (state->npages == dev->max_pages_per_mr || offset != 0) { + ret = srp_finish_mapping(state, target); + if (ret) + return ret; + + srp_map_update_start(state, sg, sg_index, dma_addr); + } + + len = min_t(unsigned int, dma_len, dev->mr_page_size - offset); + + if (!state->npages) + state->base_dma_addr = dma_addr; + state->pages[state->npages++] = dma_addr & dev->mr_page_mask; + state->dma_len += len; + dma_addr += len; + dma_len -= len; } - spin_unlock_irq(target->scsi_host->host_lock); + /* + * If the last entry of the MR wasn't a full page, then we need to + * close it out and start a new one -- we can only merge at page + * boundries. + */ + ret = 0; + if (len != dev->mr_page_size) { + ret = srp_finish_mapping(state, target); + if (!ret) + srp_map_update_start(state, NULL, 0, 0); + } return ret; } +static int srp_map_sg(struct srp_map_state *state, + struct srp_target_port *target, struct srp_request *req, + struct scatterlist *scat, int count) +{ + struct srp_device *dev = target->srp_host->srp_dev; + struct ib_device *ibdev = dev->dev; + struct scatterlist *sg; + int i; + bool use_mr; + + state->desc = req->indirect_desc; + state->pages = req->map_page; + if (dev->use_fast_reg) { + state->next_fr = req->fr_list; + use_mr = !!target->fr_pool; + } else { + state->next_fmr = req->fmr_list; + use_mr = !!target->fmr_pool; + } + + for_each_sg(scat, sg, count, i) { + if (srp_map_sg_entry(state, target, sg, i, use_mr)) { + /* + * Memory registration failed, so backtrack to the + * first unmapped entry and continue on without using + * memory registration. + */ + dma_addr_t dma_addr; + unsigned int dma_len; + +backtrack: + sg = state->unmapped_sg; + i = state->unmapped_index; + + dma_addr = ib_sg_dma_address(ibdev, sg); + dma_len = ib_sg_dma_len(ibdev, sg); + dma_len -= (state->unmapped_addr - dma_addr); + dma_addr = state->unmapped_addr; + use_mr = false; + srp_map_desc(state, dma_addr, dma_len, target->rkey); + } + } + + if (use_mr && srp_finish_mapping(state, target)) + goto backtrack; + + req->nmdesc = state->nmdesc; + + return 0; +} + static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target, struct srp_request *req) { + struct scatterlist *scat; struct srp_cmd *cmd = req->cmd->buf; - int len; + int len, nents, count; + struct srp_device *dev; + struct ib_device *ibdev; + struct srp_map_state state; + struct srp_indirect_buf *indirect_hdr; + u32 table_len; u8 fmt; - if (!scmnd->request_buffer || scmnd->sc_data_direction == DMA_NONE) + if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE) return sizeof (struct srp_cmd); if (scmnd->sc_data_direction != DMA_FROM_DEVICE && scmnd->sc_data_direction != DMA_TO_DEVICE) { - printk(KERN_WARNING PFX "Unhandled data direction %d\n", - scmnd->sc_data_direction); + shost_printk(KERN_WARNING, target->scsi_host, + PFX "Unhandled data direction %d\n", + scmnd->sc_data_direction); return -EINVAL; } - if (scmnd->use_sg) { - struct scatterlist *scat = scmnd->request_buffer; - int n; - int i; + nents = scsi_sg_count(scmnd); + scat = scsi_sglist(scmnd); - n = dma_map_sg(target->srp_host->dev->dma_device, - scat, scmnd->use_sg, scmnd->sc_data_direction); + dev = target->srp_host->srp_dev; + ibdev = dev->dev; - if (n == 1) { - struct srp_direct_buf *buf = (void *) cmd->add_data; + count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction); + if (unlikely(count == 0)) + return -EIO; - fmt = SRP_DATA_DESC_DIRECT; + fmt = SRP_DATA_DESC_DIRECT; + len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf); - buf->va = cpu_to_be64(sg_dma_address(scat)); - buf->key = cpu_to_be32(target->srp_host->mr->rkey); - buf->len = cpu_to_be32(sg_dma_len(scat)); + if (count == 1 && !register_always) { + /* + * The midlayer only generated a single gather/scatter + * entry, or DMA mapping coalesced everything to a + * single entry. So a direct descriptor along with + * the DMA MR suffices. + */ + struct srp_direct_buf *buf = (void *) cmd->add_data; - len = sizeof (struct srp_cmd) + - sizeof (struct srp_direct_buf); - } else { - struct srp_indirect_buf *buf = (void *) cmd->add_data; - u32 datalen = 0; + buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat)); + buf->key = cpu_to_be32(target->rkey); + buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat)); - fmt = SRP_DATA_DESC_INDIRECT; + req->nmdesc = 0; + goto map_complete; + } - if (scmnd->sc_data_direction == DMA_TO_DEVICE) - cmd->data_out_desc_cnt = n; - else - cmd->data_in_desc_cnt = n; - - buf->table_desc.va = cpu_to_be64(req->cmd->dma + - sizeof *cmd + - sizeof *buf); - buf->table_desc.key = - cpu_to_be32(target->srp_host->mr->rkey); - buf->table_desc.len = - cpu_to_be32(n * sizeof (struct srp_direct_buf)); - - for (i = 0; i < n; ++i) { - buf->desc_list[i].va = cpu_to_be64(sg_dma_address(&scat[i])); - buf->desc_list[i].key = - cpu_to_be32(target->srp_host->mr->rkey); - buf->desc_list[i].len = cpu_to_be32(sg_dma_len(&scat[i])); - - datalen += sg_dma_len(&scat[i]); - } + /* + * We have more than one scatter/gather entry, so build our indirect + * descriptor table, trying to merge as many entries as we can. + */ + indirect_hdr = (void *) cmd->add_data; - buf->len = cpu_to_be32(datalen); + ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr, + target->indirect_size, DMA_TO_DEVICE); - len = sizeof (struct srp_cmd) + - sizeof (struct srp_indirect_buf) + - n * sizeof (struct srp_direct_buf); - } - } else { + memset(&state, 0, sizeof(state)); + srp_map_sg(&state, target, req, scat, count); + + /* We've mapped the request, now pull as much of the indirect + * descriptor table as we can into the command buffer. If this + * target is not using an external indirect table, we are + * guaranteed to fit into the command, as the SCSI layer won't + * give us more S/G entries than we allow. + */ + if (state.ndesc == 1) { + /* + * Memory registration collapsed the sg-list into one entry, + * so use a direct descriptor. + */ struct srp_direct_buf *buf = (void *) cmd->add_data; - dma_addr_t dma; - - dma = dma_map_single(target->srp_host->dev->dma_device, - scmnd->request_buffer, scmnd->request_bufflen, - scmnd->sc_data_direction); - if (dma_mapping_error(dma)) { - printk(KERN_WARNING PFX "unable to map %p/%d (dir %d)\n", - scmnd->request_buffer, (int) scmnd->request_bufflen, - scmnd->sc_data_direction); - return -EINVAL; - } - pci_unmap_addr_set(req, direct_mapping, dma); + *buf = req->indirect_desc[0]; + goto map_complete; + } - buf->va = cpu_to_be64(dma); - buf->key = cpu_to_be32(target->srp_host->mr->rkey); - buf->len = cpu_to_be32(scmnd->request_bufflen); + if (unlikely(target->cmd_sg_cnt < state.ndesc && + !target->allow_ext_sg)) { + shost_printk(KERN_ERR, target->scsi_host, + "Could not fit S/G list into SRP_CMD\n"); + return -EIO; + } - fmt = SRP_DATA_DESC_DIRECT; + count = min(state.ndesc, target->cmd_sg_cnt); + table_len = state.ndesc * sizeof (struct srp_direct_buf); - len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf); - } + fmt = SRP_DATA_DESC_INDIRECT; + len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf); + len += count * sizeof (struct srp_direct_buf); + memcpy(indirect_hdr->desc_list, req->indirect_desc, + count * sizeof (struct srp_direct_buf)); + + indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr); + indirect_hdr->table_desc.key = cpu_to_be32(target->rkey); + indirect_hdr->table_desc.len = cpu_to_be32(table_len); + indirect_hdr->len = cpu_to_be32(state.total_len); + + if (scmnd->sc_data_direction == DMA_TO_DEVICE) + cmd->data_out_desc_cnt = count; + else + cmd->data_in_desc_cnt = count; + + ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len, + DMA_TO_DEVICE); + +map_complete: if (scmnd->sc_data_direction == DMA_TO_DEVICE) cmd->buf_fmt = fmt << 4; else cmd->buf_fmt = fmt; - return len; } -static void srp_unmap_data(struct scsi_cmnd *scmnd, - struct srp_target_port *target, - struct srp_request *req) +/* + * Return an IU and possible credit to the free pool + */ +static void srp_put_tx_iu(struct srp_target_port *target, struct srp_iu *iu, + enum srp_iu_type iu_type) { - if (!scmnd->request_buffer || - (scmnd->sc_data_direction != DMA_TO_DEVICE && - scmnd->sc_data_direction != DMA_FROM_DEVICE)) - return; + unsigned long flags; - if (scmnd->use_sg) - dma_unmap_sg(target->srp_host->dev->dma_device, - (struct scatterlist *) scmnd->request_buffer, - scmnd->use_sg, scmnd->sc_data_direction); - else - dma_unmap_single(target->srp_host->dev->dma_device, - pci_unmap_addr(req, direct_mapping), - scmnd->request_bufflen, - scmnd->sc_data_direction); + spin_lock_irqsave(&target->lock, flags); + list_add(&iu->list, &target->free_tx); + if (iu_type != SRP_IU_RSP) + ++target->req_lim; + spin_unlock_irqrestore(&target->lock, flags); } -static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp) +/* + * Must be called with target->lock held to protect req_lim and free_tx. + * If IU is not sent, it must be returned using srp_put_tx_iu(). + * + * Note: + * An upper limit for the number of allocated information units for each + * request type is: + * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues + * more than Scsi_Host.can_queue requests. + * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE. + * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than + * one unanswered SRP request to an initiator. + */ +static struct srp_iu *__srp_get_tx_iu(struct srp_target_port *target, + enum srp_iu_type iu_type) { - struct srp_request *req; - struct scsi_cmnd *scmnd; - unsigned long flags; - s32 delta; + s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE; + struct srp_iu *iu; - delta = (s32) be32_to_cpu(rsp->req_lim_delta); + srp_send_completion(target->send_cq, target); - spin_lock_irqsave(target->scsi_host->host_lock, flags); + if (list_empty(&target->free_tx)) + return NULL; - target->req_lim += delta; + /* Initiator responses to target requests do not consume credits */ + if (iu_type != SRP_IU_RSP) { + if (target->req_lim <= rsv) { + ++target->zero_req_lim; + return NULL; + } - req = &target->req_ring[rsp->tag & ~SRP_TAG_TSK_MGMT]; + --target->req_lim; + } + + iu = list_first_entry(&target->free_tx, struct srp_iu, list); + list_del(&iu->list); + return iu; +} + +static int srp_post_send(struct srp_target_port *target, + struct srp_iu *iu, int len) +{ + struct ib_sge list; + struct ib_send_wr wr, *bad_wr; + + list.addr = iu->dma; + list.length = len; + list.lkey = target->lkey; + + wr.next = NULL; + wr.wr_id = (uintptr_t) iu; + wr.sg_list = &list; + wr.num_sge = 1; + wr.opcode = IB_WR_SEND; + wr.send_flags = IB_SEND_SIGNALED; + + return ib_post_send(target->qp, &wr, &bad_wr); +} + +static int srp_post_recv(struct srp_target_port *target, struct srp_iu *iu) +{ + struct ib_recv_wr wr, *bad_wr; + struct ib_sge list; + + list.addr = iu->dma; + list.length = iu->size; + list.lkey = target->lkey; + + wr.next = NULL; + wr.wr_id = (uintptr_t) iu; + wr.sg_list = &list; + wr.num_sge = 1; + + return ib_post_recv(target->qp, &wr, &bad_wr); +} + +static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp) +{ + struct srp_request *req; + struct scsi_cmnd *scmnd; + unsigned long flags; if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) { - if (be32_to_cpu(rsp->resp_data_len) < 4) - req->tsk_status = -1; - else - req->tsk_status = rsp->data[3]; - complete(&req->done); + spin_lock_irqsave(&target->lock, flags); + target->req_lim += be32_to_cpu(rsp->req_lim_delta); + spin_unlock_irqrestore(&target->lock, flags); + + target->tsk_mgmt_status = -1; + if (be32_to_cpu(rsp->resp_data_len) >= 4) + target->tsk_mgmt_status = rsp->data[3]; + complete(&target->tsk_mgmt_done); } else { - scmnd = req->scmnd; - if (!scmnd) - printk(KERN_ERR "Null scmnd for RSP w/tag %016llx\n", - (unsigned long long) rsp->tag); + req = &target->req_ring[rsp->tag]; + scmnd = srp_claim_req(target, req, NULL, NULL); + if (!scmnd) { + shost_printk(KERN_ERR, target->scsi_host, + "Null scmnd for RSP w/tag %016llx\n", + (unsigned long long) rsp->tag); + + spin_lock_irqsave(&target->lock, flags); + target->req_lim += be32_to_cpu(rsp->req_lim_delta); + spin_unlock_irqrestore(&target->lock, flags); + + return; + } scmnd->result = rsp->status; if (rsp->flags & SRP_RSP_FLAG_SNSVALID) { @@ -660,61 +1644,99 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp) } if (rsp->flags & (SRP_RSP_FLAG_DOOVER | SRP_RSP_FLAG_DOUNDER)) - scmnd->resid = be32_to_cpu(rsp->data_out_res_cnt); + scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt)); else if (rsp->flags & (SRP_RSP_FLAG_DIOVER | SRP_RSP_FLAG_DIUNDER)) - scmnd->resid = be32_to_cpu(rsp->data_in_res_cnt); + scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt)); - srp_unmap_data(scmnd, target, req); + srp_free_req(target, req, scmnd, + be32_to_cpu(rsp->req_lim_delta)); - if (!req->tsk_mgmt) { - req->scmnd = NULL; - scmnd->host_scribble = (void *) -1L; - scmnd->scsi_done(scmnd); + scmnd->host_scribble = NULL; + scmnd->scsi_done(scmnd); + } +} - list_del(&req->list); - req->next = target->req_head; - target->req_head = rsp->tag & ~SRP_TAG_TSK_MGMT; - } else - req->cmd_done = 1; +static int srp_response_common(struct srp_target_port *target, s32 req_delta, + void *rsp, int len) +{ + struct ib_device *dev = target->srp_host->srp_dev->dev; + unsigned long flags; + struct srp_iu *iu; + int err; + + spin_lock_irqsave(&target->lock, flags); + target->req_lim += req_delta; + iu = __srp_get_tx_iu(target, SRP_IU_RSP); + spin_unlock_irqrestore(&target->lock, flags); + + if (!iu) { + shost_printk(KERN_ERR, target->scsi_host, PFX + "no IU available to send response\n"); + return 1; + } + + ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE); + memcpy(iu->buf, rsp, len); + ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE); + + err = srp_post_send(target, iu, len); + if (err) { + shost_printk(KERN_ERR, target->scsi_host, PFX + "unable to post response: %d\n", err); + srp_put_tx_iu(target, iu, SRP_IU_RSP); } - spin_unlock_irqrestore(target->scsi_host->host_lock, flags); + return err; } -static void srp_reconnect_work(void *target_ptr) +static void srp_process_cred_req(struct srp_target_port *target, + struct srp_cred_req *req) { - struct srp_target_port *target = target_ptr; + struct srp_cred_rsp rsp = { + .opcode = SRP_CRED_RSP, + .tag = req->tag, + }; + s32 delta = be32_to_cpu(req->req_lim_delta); + + if (srp_response_common(target, delta, &rsp, sizeof rsp)) + shost_printk(KERN_ERR, target->scsi_host, PFX + "problems processing SRP_CRED_REQ\n"); +} - srp_reconnect_target(target); +static void srp_process_aer_req(struct srp_target_port *target, + struct srp_aer_req *req) +{ + struct srp_aer_rsp rsp = { + .opcode = SRP_AER_RSP, + .tag = req->tag, + }; + s32 delta = be32_to_cpu(req->req_lim_delta); + + shost_printk(KERN_ERR, target->scsi_host, PFX + "ignoring AER for LUN %llu\n", be64_to_cpu(req->lun)); + + if (srp_response_common(target, delta, &rsp, sizeof rsp)) + shost_printk(KERN_ERR, target->scsi_host, PFX + "problems processing SRP_AER_REQ\n"); } static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc) { - struct srp_iu *iu; + struct ib_device *dev = target->srp_host->srp_dev->dev; + struct srp_iu *iu = (struct srp_iu *) (uintptr_t) wc->wr_id; + int res; u8 opcode; - iu = target->rx_ring[wc->wr_id & ~SRP_OP_RECV]; - - dma_sync_single_for_cpu(target->srp_host->dev->dma_device, iu->dma, - target->max_ti_iu_len, DMA_FROM_DEVICE); + ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_ti_iu_len, + DMA_FROM_DEVICE); opcode = *(u8 *) iu->buf; if (0) { - int i; - - printk(KERN_ERR PFX "recv completion, opcode 0x%02x\n", opcode); - - for (i = 0; i < wc->byte_len; ++i) { - if (i % 8 == 0) - printk(KERN_ERR " [%02x] ", i); - printk(" %02x", ((u8 *) iu->buf)[i]); - if ((i + 1) % 8 == 0) - printk("\n"); - } - - if (wc->byte_len % 8) - printk("\n"); + shost_printk(KERN_ERR, target->scsi_host, + PFX "recv completion, opcode 0x%02x\n", opcode); + print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1, + iu->buf, wc->byte_len, true); } switch (opcode) { @@ -722,221 +1744,234 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc) srp_process_rsp(target, iu->buf); break; + case SRP_CRED_REQ: + srp_process_cred_req(target, iu->buf); + break; + + case SRP_AER_REQ: + srp_process_aer_req(target, iu->buf); + break; + case SRP_T_LOGOUT: /* XXX Handle target logout */ - printk(KERN_WARNING PFX "Got target logout request\n"); + shost_printk(KERN_WARNING, target->scsi_host, + PFX "Got target logout request\n"); break; default: - printk(KERN_WARNING PFX "Unhandled SRP opcode 0x%02x\n", opcode); + shost_printk(KERN_WARNING, target->scsi_host, + PFX "Unhandled SRP opcode 0x%02x\n", opcode); break; } - dma_sync_single_for_device(target->srp_host->dev->dma_device, iu->dma, - target->max_ti_iu_len, DMA_FROM_DEVICE); -} + ib_dma_sync_single_for_device(dev, iu->dma, target->max_ti_iu_len, + DMA_FROM_DEVICE); -static void srp_completion(struct ib_cq *cq, void *target_ptr) -{ - struct srp_target_port *target = target_ptr; - struct ib_wc wc; - unsigned long flags; - - ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); - while (ib_poll_cq(cq, 1, &wc) > 0) { - if (wc.status) { - printk(KERN_ERR PFX "failed %s status %d\n", - wc.wr_id & SRP_OP_RECV ? "receive" : "send", - wc.status); - spin_lock_irqsave(target->scsi_host->host_lock, flags); - if (target->state == SRP_TARGET_LIVE) - schedule_work(&target->work); - spin_unlock_irqrestore(target->scsi_host->host_lock, flags); - break; - } - - if (wc.wr_id & SRP_OP_RECV) - srp_handle_recv(target, &wc); - else - ++target->tx_tail; - } + res = srp_post_recv(target, iu); + if (res != 0) + shost_printk(KERN_ERR, target->scsi_host, + PFX "Recv failed with error code %d\n", res); } -static int __srp_post_recv(struct srp_target_port *target) +/** + * srp_tl_err_work() - handle a transport layer error + * @work: Work structure embedded in an SRP target port. + * + * Note: This function may get invoked before the rport has been created, + * hence the target->rport test. + */ +static void srp_tl_err_work(struct work_struct *work) { - struct srp_iu *iu; - struct ib_sge list; - struct ib_recv_wr wr, *bad_wr; - unsigned int next; - int ret; - - next = target->rx_head & (SRP_RQ_SIZE - 1); - wr.wr_id = next | SRP_OP_RECV; - iu = target->rx_ring[next]; - - list.addr = iu->dma; - list.length = iu->size; - list.lkey = target->srp_host->mr->lkey; - - wr.next = NULL; - wr.sg_list = &list; - wr.num_sge = 1; - - ret = ib_post_recv(target->qp, &wr, &bad_wr); - if (!ret) - ++target->rx_head; + struct srp_target_port *target; - return ret; + target = container_of(work, struct srp_target_port, tl_err_work); + if (target->rport) + srp_start_tl_fail_timers(target->rport); } -static int srp_post_recv(struct srp_target_port *target) +static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status, + bool send_err, struct srp_target_port *target) { - unsigned long flags; - int ret; - - spin_lock_irqsave(target->scsi_host->host_lock, flags); - ret = __srp_post_recv(target); - spin_unlock_irqrestore(target->scsi_host->host_lock, flags); - - return ret; + if (target->connected && !target->qp_in_error) { + if (wr_id & LOCAL_INV_WR_ID_MASK) { + shost_printk(KERN_ERR, target->scsi_host, PFX + "LOCAL_INV failed with status %d\n", + wc_status); + } else if (wr_id & FAST_REG_WR_ID_MASK) { + shost_printk(KERN_ERR, target->scsi_host, PFX + "FAST_REG_MR failed status %d\n", + wc_status); + } else { + shost_printk(KERN_ERR, target->scsi_host, + PFX "failed %s status %d for iu %p\n", + send_err ? "send" : "receive", + wc_status, (void *)(uintptr_t)wr_id); + } + queue_work(system_long_wq, &target->tl_err_work); + } + target->qp_in_error = true; } -/* - * Must be called with target->scsi_host->host_lock held to protect - * req_lim and tx_head. Lock cannot be dropped between call here and - * call to __srp_post_send(). - */ -static struct srp_iu *__srp_get_tx_iu(struct srp_target_port *target) +static void srp_recv_completion(struct ib_cq *cq, void *target_ptr) { - if (target->tx_head - target->tx_tail >= SRP_SQ_SIZE) - return NULL; + struct srp_target_port *target = target_ptr; + struct ib_wc wc; - if (unlikely(target->req_lim < 1)) { - if (printk_ratelimit()) - printk(KERN_DEBUG PFX "Target has req_lim %d\n", - target->req_lim); - return NULL; + ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); + while (ib_poll_cq(cq, 1, &wc) > 0) { + if (likely(wc.status == IB_WC_SUCCESS)) { + srp_handle_recv(target, &wc); + } else { + srp_handle_qp_err(wc.wr_id, wc.status, false, target); + } } - - return target->tx_ring[target->tx_head & SRP_SQ_SIZE]; } -/* - * Must be called with target->scsi_host->host_lock held to protect - * req_lim and tx_head. - */ -static int __srp_post_send(struct srp_target_port *target, - struct srp_iu *iu, int len) +static void srp_send_completion(struct ib_cq *cq, void *target_ptr) { - struct ib_sge list; - struct ib_send_wr wr, *bad_wr; - int ret = 0; - - list.addr = iu->dma; - list.length = len; - list.lkey = target->srp_host->mr->lkey; - - wr.next = NULL; - wr.wr_id = target->tx_head & SRP_SQ_SIZE; - wr.sg_list = &list; - wr.num_sge = 1; - wr.opcode = IB_WR_SEND; - wr.send_flags = IB_SEND_SIGNALED; - - ret = ib_post_send(target->qp, &wr, &bad_wr); + struct srp_target_port *target = target_ptr; + struct ib_wc wc; + struct srp_iu *iu; - if (!ret) { - ++target->tx_head; - --target->req_lim; + while (ib_poll_cq(cq, 1, &wc) > 0) { + if (likely(wc.status == IB_WC_SUCCESS)) { + iu = (struct srp_iu *) (uintptr_t) wc.wr_id; + list_add(&iu->list, &target->free_tx); + } else { + srp_handle_qp_err(wc.wr_id, wc.status, true, target); + } } - - return ret; } -static int srp_queuecommand(struct scsi_cmnd *scmnd, - void (*done)(struct scsi_cmnd *)) +static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) { - struct srp_target_port *target = host_to_target(scmnd->device->host); + struct srp_target_port *target = host_to_target(shost); + struct srp_rport *rport = target->rport; struct srp_request *req; struct srp_iu *iu; struct srp_cmd *cmd; - long req_index; - int len; + struct ib_device *dev; + unsigned long flags; + int len, ret; + const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler; - if (target->state == SRP_TARGET_CONNECTING) - goto err; + /* + * The SCSI EH thread is the only context from which srp_queuecommand() + * can get invoked for blocked devices (SDEV_BLOCK / + * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by + * locking the rport mutex if invoked from inside the SCSI EH. + */ + if (in_scsi_eh) + mutex_lock(&rport->mutex); - if (target->state == SRP_TARGET_DEAD || - target->state == SRP_TARGET_REMOVED) { - scmnd->result = DID_BAD_TARGET << 16; - done(scmnd); - return 0; - } + scmnd->result = srp_chkready(target->rport); + if (unlikely(scmnd->result)) + goto err; - iu = __srp_get_tx_iu(target); + spin_lock_irqsave(&target->lock, flags); + iu = __srp_get_tx_iu(target, SRP_IU_CMD); if (!iu) - goto err; + goto err_unlock; - dma_sync_single_for_cpu(target->srp_host->dev->dma_device, iu->dma, - SRP_MAX_IU_LEN, DMA_TO_DEVICE); + req = list_first_entry(&target->free_reqs, struct srp_request, list); + list_del(&req->list); + spin_unlock_irqrestore(&target->lock, flags); - req_index = target->req_head; + dev = target->srp_host->srp_dev->dev; + ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len, + DMA_TO_DEVICE); - scmnd->scsi_done = done; - scmnd->result = 0; - scmnd->host_scribble = (void *) req_index; + scmnd->host_scribble = (void *) req; cmd = iu->buf; memset(cmd, 0, sizeof *cmd); cmd->opcode = SRP_CMD; cmd->lun = cpu_to_be64((u64) scmnd->device->lun << 48); - cmd->tag = req_index; + cmd->tag = req->index; memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len); - req = &target->req_ring[req_index]; - req->scmnd = scmnd; req->cmd = iu; - req->cmd_done = 0; - req->tsk_mgmt = NULL; len = srp_map_data(scmnd, target, req); if (len < 0) { - printk(KERN_ERR PFX "Failed to map data\n"); - goto err; - } - - if (__srp_post_recv(target)) { - printk(KERN_ERR PFX "Recv failed\n"); - goto err_unmap; + shost_printk(KERN_ERR, target->scsi_host, + PFX "Failed to map data (%d)\n", len); + /* + * If we ran out of memory descriptors (-ENOMEM) because an + * application is queuing many requests with more than + * max_pages_per_mr sg-list elements, tell the SCSI mid-layer + * to reduce queue depth temporarily. + */ + scmnd->result = len == -ENOMEM ? + DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16; + goto err_iu; } - dma_sync_single_for_device(target->srp_host->dev->dma_device, iu->dma, - SRP_MAX_IU_LEN, DMA_TO_DEVICE); + ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len, + DMA_TO_DEVICE); - if (__srp_post_send(target, iu, len)) { - printk(KERN_ERR PFX "Send failed\n"); + if (srp_post_send(target, iu, len)) { + shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n"); goto err_unmap; } - target->req_head = req->next; - list_add_tail(&req->list, &target->req_queue); + ret = 0; - return 0; +unlock_rport: + if (in_scsi_eh) + mutex_unlock(&rport->mutex); + + return ret; err_unmap: srp_unmap_data(scmnd, target, req); +err_iu: + srp_put_tx_iu(target, iu, SRP_IU_CMD); + + /* + * Avoid that the loops that iterate over the request ring can + * encounter a dangling SCSI command pointer. + */ + req->scmnd = NULL; + + spin_lock_irqsave(&target->lock, flags); + list_add(&req->list, &target->free_reqs); + +err_unlock: + spin_unlock_irqrestore(&target->lock, flags); + err: - return SCSI_MLQUEUE_HOST_BUSY; + if (scmnd->result) { + scmnd->scsi_done(scmnd); + ret = 0; + } else { + ret = SCSI_MLQUEUE_HOST_BUSY; + } + + goto unlock_rport; } +/* + * Note: the resources allocated in this function are freed in + * srp_free_target_ib(). + */ static int srp_alloc_iu_bufs(struct srp_target_port *target) { int i; - for (i = 0; i < SRP_RQ_SIZE; ++i) { + target->rx_ring = kzalloc(target->queue_size * sizeof(*target->rx_ring), + GFP_KERNEL); + if (!target->rx_ring) + goto err_no_ring; + target->tx_ring = kzalloc(target->queue_size * sizeof(*target->tx_ring), + GFP_KERNEL); + if (!target->tx_ring) + goto err_no_ring; + + for (i = 0; i < target->queue_size; ++i) { target->rx_ring[i] = srp_alloc_iu(target->srp_host, target->max_ti_iu_len, GFP_KERNEL, DMA_FROM_DEVICE); @@ -944,34 +1979,143 @@ static int srp_alloc_iu_bufs(struct srp_target_port *target) goto err; } - for (i = 0; i < SRP_SQ_SIZE + 1; ++i) { + for (i = 0; i < target->queue_size; ++i) { target->tx_ring[i] = srp_alloc_iu(target->srp_host, - SRP_MAX_IU_LEN, + target->max_iu_len, GFP_KERNEL, DMA_TO_DEVICE); if (!target->tx_ring[i]) goto err; + + list_add(&target->tx_ring[i]->list, &target->free_tx); } return 0; err: - for (i = 0; i < SRP_RQ_SIZE; ++i) { + for (i = 0; i < target->queue_size; ++i) { srp_free_iu(target->srp_host, target->rx_ring[i]); - target->rx_ring[i] = NULL; - } - - for (i = 0; i < SRP_SQ_SIZE + 1; ++i) { srp_free_iu(target->srp_host, target->tx_ring[i]); - target->tx_ring[i] = NULL; } + +err_no_ring: + kfree(target->tx_ring); + target->tx_ring = NULL; + kfree(target->rx_ring); + target->rx_ring = NULL; + return -ENOMEM; } +static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask) +{ + uint64_t T_tr_ns, max_compl_time_ms; + uint32_t rq_tmo_jiffies; + + /* + * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair, + * table 91), both the QP timeout and the retry count have to be set + * for RC QP's during the RTR to RTS transition. + */ + WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) != + (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)); + + /* + * Set target->rq_tmo_jiffies to one second more than the largest time + * it can take before an error completion is generated. See also + * C9-140..142 in the IBTA spec for more information about how to + * convert the QP Local ACK Timeout value to nanoseconds. + */ + T_tr_ns = 4096 * (1ULL << qp_attr->timeout); + max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns; + do_div(max_compl_time_ms, NSEC_PER_MSEC); + rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000); + + return rq_tmo_jiffies; +} + +static void srp_cm_rep_handler(struct ib_cm_id *cm_id, + struct srp_login_rsp *lrsp, + struct srp_target_port *target) +{ + struct ib_qp_attr *qp_attr = NULL; + int attr_mask = 0; + int ret; + int i; + + if (lrsp->opcode == SRP_LOGIN_RSP) { + target->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len); + target->req_lim = be32_to_cpu(lrsp->req_lim_delta); + + /* + * Reserve credits for task management so we don't + * bounce requests back to the SCSI mid-layer. + */ + target->scsi_host->can_queue + = min(target->req_lim - SRP_TSK_MGMT_SQ_SIZE, + target->scsi_host->can_queue); + target->scsi_host->cmd_per_lun + = min_t(int, target->scsi_host->can_queue, + target->scsi_host->cmd_per_lun); + } else { + shost_printk(KERN_WARNING, target->scsi_host, + PFX "Unhandled RSP opcode %#x\n", lrsp->opcode); + ret = -ECONNRESET; + goto error; + } + + if (!target->rx_ring) { + ret = srp_alloc_iu_bufs(target); + if (ret) + goto error; + } + + ret = -ENOMEM; + qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL); + if (!qp_attr) + goto error; + + qp_attr->qp_state = IB_QPS_RTR; + ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask); + if (ret) + goto error_free; + + ret = ib_modify_qp(target->qp, qp_attr, attr_mask); + if (ret) + goto error_free; + + for (i = 0; i < target->queue_size; i++) { + struct srp_iu *iu = target->rx_ring[i]; + ret = srp_post_recv(target, iu); + if (ret) + goto error_free; + } + + qp_attr->qp_state = IB_QPS_RTS; + ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask); + if (ret) + goto error_free; + + target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask); + + ret = ib_modify_qp(target->qp, qp_attr, attr_mask); + if (ret) + goto error_free; + + ret = ib_send_cm_rtu(cm_id, NULL, 0); + +error_free: + kfree(qp_attr); + +error: + target->status = ret; +} + static void srp_cm_rej_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event, struct srp_target_port *target) { + struct Scsi_Host *shost = target->scsi_host; struct ib_class_port_info *cpi; int opcode; @@ -988,8 +2132,7 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id, break; case IB_CM_REJ_PORT_REDIRECT: - if (topspin_workarounds && - !memcmp(&target->ioc_guid, topspin_oui, 3)) { + if (srp_target_is_topspin(target)) { /* * Topspin/Cisco SRP gateways incorrectly send * reject reason code 25 when they mean 24 @@ -998,19 +2141,22 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id, memcpy(target->path.dgid.raw, event->param.rej_rcvd.ari, 16); - printk(KERN_DEBUG PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n", - (unsigned long long) be64_to_cpu(target->path.dgid.global.subnet_prefix), - (unsigned long long) be64_to_cpu(target->path.dgid.global.interface_id)); + shost_printk(KERN_DEBUG, shost, + PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n", + (unsigned long long) be64_to_cpu(target->path.dgid.global.subnet_prefix), + (unsigned long long) be64_to_cpu(target->path.dgid.global.interface_id)); target->status = SRP_PORT_REDIRECT; } else { - printk(KERN_WARNING " REJ reason: IB_CM_REJ_PORT_REDIRECT\n"); + shost_printk(KERN_WARNING, shost, + " REJ reason: IB_CM_REJ_PORT_REDIRECT\n"); target->status = -ECONNRESET; } break; case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID: - printk(KERN_WARNING " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n"); + shost_printk(KERN_WARNING, shost, + " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n"); target->status = -ECONNRESET; break; @@ -1021,20 +2167,28 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id, u32 reason = be32_to_cpu(rej->reason); if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE) - printk(KERN_WARNING PFX - "SRP_LOGIN_REJ: requested max_it_iu_len too large\n"); + shost_printk(KERN_WARNING, shost, + PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n"); else - printk(KERN_WARNING PFX - "SRP LOGIN REJECTED, reason 0x%08x\n", reason); + shost_printk(KERN_WARNING, shost, PFX + "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n", + target->path.sgid.raw, + target->orig_dgid, reason); } else - printk(KERN_WARNING " REJ reason: IB_CM_REJ_CONSUMER_DEFINED," - " opcode 0x%02x\n", opcode); + shost_printk(KERN_WARNING, shost, + " REJ reason: IB_CM_REJ_CONSUMER_DEFINED," + " opcode 0x%02x\n", opcode); target->status = -ECONNRESET; break; + case IB_CM_REJ_STALE_CONN: + shost_printk(KERN_WARNING, shost, " REJ reason: stale connection\n"); + target->status = SRP_STALE_CONN; + break; + default: - printk(KERN_WARNING " REJ reason 0x%x\n", - event->param.rej_rcvd.reason); + shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n", + event->param.rej_rcvd.reason); target->status = -ECONNRESET; } } @@ -1042,244 +2196,498 @@ static void srp_cm_rej_handler(struct ib_cm_id *cm_id, static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) { struct srp_target_port *target = cm_id->context; - struct ib_qp_attr *qp_attr = NULL; - int attr_mask = 0; int comp = 0; - int opcode = 0; switch (event->event) { case IB_CM_REQ_ERROR: - printk(KERN_DEBUG PFX "Sending CM REQ failed\n"); + shost_printk(KERN_DEBUG, target->scsi_host, + PFX "Sending CM REQ failed\n"); comp = 1; target->status = -ECONNRESET; break; case IB_CM_REP_RECEIVED: comp = 1; - opcode = *(u8 *) event->private_data; - - if (opcode == SRP_LOGIN_RSP) { - struct srp_login_rsp *rsp = event->private_data; - - target->max_ti_iu_len = be32_to_cpu(rsp->max_ti_iu_len); - target->req_lim = be32_to_cpu(rsp->req_lim_delta); - - target->scsi_host->can_queue = min(target->req_lim, - target->scsi_host->can_queue); - } else { - printk(KERN_WARNING PFX "Unhandled RSP opcode %#x\n", opcode); - target->status = -ECONNRESET; - break; - } - - target->status = srp_alloc_iu_bufs(target); - if (target->status) - break; - - qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL); - if (!qp_attr) { - target->status = -ENOMEM; - break; - } - - qp_attr->qp_state = IB_QPS_RTR; - target->status = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask); - if (target->status) - break; - - target->status = ib_modify_qp(target->qp, qp_attr, attr_mask); - if (target->status) - break; - - target->status = srp_post_recv(target); - if (target->status) - break; - - qp_attr->qp_state = IB_QPS_RTS; - target->status = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask); - if (target->status) - break; - - target->status = ib_modify_qp(target->qp, qp_attr, attr_mask); - if (target->status) - break; - - target->status = ib_send_cm_rtu(cm_id, NULL, 0); - if (target->status) - break; - + srp_cm_rep_handler(cm_id, event->private_data, target); break; case IB_CM_REJ_RECEIVED: - printk(KERN_DEBUG PFX "REJ received\n"); + shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n"); comp = 1; srp_cm_rej_handler(cm_id, event, target); break; - case IB_CM_MRA_RECEIVED: - printk(KERN_ERR PFX "MRA received\n"); - break; - - case IB_CM_DREP_RECEIVED: + case IB_CM_DREQ_RECEIVED: + shost_printk(KERN_WARNING, target->scsi_host, + PFX "DREQ received - connection closed\n"); + srp_change_conn_state(target, false); + if (ib_send_cm_drep(cm_id, NULL, 0)) + shost_printk(KERN_ERR, target->scsi_host, + PFX "Sending CM DREP failed\n"); + queue_work(system_long_wq, &target->tl_err_work); break; case IB_CM_TIMEWAIT_EXIT: - printk(KERN_ERR PFX "connection closed\n"); - + shost_printk(KERN_ERR, target->scsi_host, + PFX "connection closed\n"); comp = 1; + target->status = 0; break; + case IB_CM_MRA_RECEIVED: + case IB_CM_DREQ_ERROR: + case IB_CM_DREP_RECEIVED: + break; + default: - printk(KERN_WARNING PFX "Unhandled CM event %d\n", event->event); + shost_printk(KERN_WARNING, target->scsi_host, + PFX "Unhandled CM event %d\n", event->event); break; } if (comp) complete(&target->done); - kfree(qp_attr); - return 0; } -static int srp_send_tsk_mgmt(struct scsi_cmnd *scmnd, u8 func) +/** + * srp_change_queue_type - changing device queue tag type + * @sdev: scsi device struct + * @tag_type: requested tag type + * + * Returns queue tag type. + */ +static int +srp_change_queue_type(struct scsi_device *sdev, int tag_type) { - struct srp_target_port *target = host_to_target(scmnd->device->host); - struct srp_request *req; + if (sdev->tagged_supported) { + scsi_set_tag_type(sdev, tag_type); + if (tag_type) + scsi_activate_tcq(sdev, sdev->queue_depth); + else + scsi_deactivate_tcq(sdev, sdev->queue_depth); + } else + tag_type = 0; + + return tag_type; +} + +/** + * srp_change_queue_depth - setting device queue depth + * @sdev: scsi device struct + * @qdepth: requested queue depth + * @reason: SCSI_QDEPTH_DEFAULT/SCSI_QDEPTH_QFULL/SCSI_QDEPTH_RAMP_UP + * (see include/scsi/scsi_host.h for definition) + * + * Returns queue depth. + */ +static int +srp_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason) +{ + struct Scsi_Host *shost = sdev->host; + int max_depth; + if (reason == SCSI_QDEPTH_DEFAULT || reason == SCSI_QDEPTH_RAMP_UP) { + max_depth = shost->can_queue; + if (!sdev->tagged_supported) + max_depth = 1; + if (qdepth > max_depth) + qdepth = max_depth; + scsi_adjust_queue_depth(sdev, scsi_get_tag_type(sdev), qdepth); + } else if (reason == SCSI_QDEPTH_QFULL) + scsi_track_queue_full(sdev, qdepth); + else + return -EOPNOTSUPP; + + return sdev->queue_depth; +} + +static int srp_send_tsk_mgmt(struct srp_target_port *target, + u64 req_tag, unsigned int lun, u8 func) +{ + struct srp_rport *rport = target->rport; + struct ib_device *dev = target->srp_host->srp_dev->dev; struct srp_iu *iu; struct srp_tsk_mgmt *tsk_mgmt; - int req_index; - int ret = FAILED; - spin_lock_irq(target->scsi_host->host_lock); + if (!target->connected || target->qp_in_error) + return -1; - if (scmnd->host_scribble == (void *) -1L) - goto out; + init_completion(&target->tsk_mgmt_done); - req_index = (long) scmnd->host_scribble; - printk(KERN_ERR "Abort for req_index %d\n", req_index); + /* + * Lock the rport mutex to avoid that srp_create_target_ib() is + * invoked while a task management function is being sent. + */ + mutex_lock(&rport->mutex); + spin_lock_irq(&target->lock); + iu = __srp_get_tx_iu(target, SRP_IU_TSK_MGMT); + spin_unlock_irq(&target->lock); - req = &target->req_ring[req_index]; - init_completion(&req->done); + if (!iu) { + mutex_unlock(&rport->mutex); - iu = __srp_get_tx_iu(target); - if (!iu) - goto out; + return -1; + } + ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt, + DMA_TO_DEVICE); tsk_mgmt = iu->buf; memset(tsk_mgmt, 0, sizeof *tsk_mgmt); tsk_mgmt->opcode = SRP_TSK_MGMT; - tsk_mgmt->lun = cpu_to_be64((u64) scmnd->device->lun << 48); - tsk_mgmt->tag = req_index | SRP_TAG_TSK_MGMT; + tsk_mgmt->lun = cpu_to_be64((u64) lun << 48); + tsk_mgmt->tag = req_tag | SRP_TAG_TSK_MGMT; tsk_mgmt->tsk_mgmt_func = func; - tsk_mgmt->task_tag = req_index; + tsk_mgmt->task_tag = req_tag; - if (__srp_post_send(target, iu, sizeof *tsk_mgmt)) - goto out; + ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt, + DMA_TO_DEVICE); + if (srp_post_send(target, iu, sizeof *tsk_mgmt)) { + srp_put_tx_iu(target, iu, SRP_IU_TSK_MGMT); + mutex_unlock(&rport->mutex); - req->tsk_mgmt = iu; + return -1; + } + mutex_unlock(&rport->mutex); - spin_unlock_irq(target->scsi_host->host_lock); - if (!wait_for_completion_timeout(&req->done, + if (!wait_for_completion_timeout(&target->tsk_mgmt_done, msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS))) - return FAILED; - spin_lock_irq(target->scsi_host->host_lock); - - if (req->cmd_done) { - list_del(&req->list); - req->next = target->req_head; - target->req_head = req_index; - - scmnd->scsi_done(scmnd); - } else if (!req->tsk_status) { - scmnd->result = DID_ABORT << 16; - ret = SUCCESS; - } + return -1; -out: - spin_unlock_irq(target->scsi_host->host_lock); - return ret; + return 0; } static int srp_abort(struct scsi_cmnd *scmnd) { - printk(KERN_ERR "SRP abort called\n"); + struct srp_target_port *target = host_to_target(scmnd->device->host); + struct srp_request *req = (struct srp_request *) scmnd->host_scribble; + int ret; + + shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); + + if (!req || !srp_claim_req(target, req, NULL, scmnd)) + return SUCCESS; + if (srp_send_tsk_mgmt(target, req->index, scmnd->device->lun, + SRP_TSK_ABORT_TASK) == 0) + ret = SUCCESS; + else if (target->rport->state == SRP_RPORT_LOST) + ret = FAST_IO_FAIL; + else + ret = FAILED; + srp_free_req(target, req, scmnd, 0); + scmnd->result = DID_ABORT << 16; + scmnd->scsi_done(scmnd); - return srp_send_tsk_mgmt(scmnd, SRP_TSK_ABORT_TASK); + return ret; } static int srp_reset_device(struct scsi_cmnd *scmnd) { - printk(KERN_ERR "SRP reset_device called\n"); + struct srp_target_port *target = host_to_target(scmnd->device->host); + int i; + + shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n"); + + if (srp_send_tsk_mgmt(target, SRP_TAG_NO_REQ, scmnd->device->lun, + SRP_TSK_LUN_RESET)) + return FAILED; + if (target->tsk_mgmt_status) + return FAILED; - return srp_send_tsk_mgmt(scmnd, SRP_TSK_LUN_RESET); + for (i = 0; i < target->req_ring_size; ++i) { + struct srp_request *req = &target->req_ring[i]; + srp_finish_req(target, req, scmnd->device, DID_RESET << 16); + } + + return SUCCESS; } static int srp_reset_host(struct scsi_cmnd *scmnd) { struct srp_target_port *target = host_to_target(scmnd->device->host); - int ret = FAILED; - printk(KERN_ERR PFX "SRP reset_host called\n"); + shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n"); - if (!srp_reconnect_target(target)) - ret = SUCCESS; + return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED; +} - return ret; +static int srp_slave_configure(struct scsi_device *sdev) +{ + struct Scsi_Host *shost = sdev->host; + struct srp_target_port *target = host_to_target(shost); + struct request_queue *q = sdev->request_queue; + unsigned long timeout; + + if (sdev->type == TYPE_DISK) { + timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies); + blk_queue_rq_timeout(q, timeout); + } + + return 0; +} + +static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + + return sprintf(buf, "0x%016llx\n", + (unsigned long long) be64_to_cpu(target->id_ext)); +} + +static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + + return sprintf(buf, "0x%016llx\n", + (unsigned long long) be64_to_cpu(target->ioc_guid)); +} + +static ssize_t show_service_id(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + + return sprintf(buf, "0x%016llx\n", + (unsigned long long) be64_to_cpu(target->service_id)); } +static ssize_t show_pkey(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + + return sprintf(buf, "0x%04x\n", be16_to_cpu(target->path.pkey)); +} + +static ssize_t show_sgid(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + + return sprintf(buf, "%pI6\n", target->path.sgid.raw); +} + +static ssize_t show_dgid(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + + return sprintf(buf, "%pI6\n", target->path.dgid.raw); +} + +static ssize_t show_orig_dgid(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + + return sprintf(buf, "%pI6\n", target->orig_dgid); +} + +static ssize_t show_req_lim(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + + return sprintf(buf, "%d\n", target->req_lim); +} + +static ssize_t show_zero_req_lim(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + + return sprintf(buf, "%d\n", target->zero_req_lim); +} + +static ssize_t show_local_ib_port(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + + return sprintf(buf, "%d\n", target->srp_host->port); +} + +static ssize_t show_local_ib_device(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + + return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name); +} + +static ssize_t show_comp_vector(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + + return sprintf(buf, "%d\n", target->comp_vector); +} + +static ssize_t show_tl_retry_count(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + + return sprintf(buf, "%d\n", target->tl_retry_count); +} + +static ssize_t show_cmd_sg_entries(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + + return sprintf(buf, "%u\n", target->cmd_sg_cnt); +} + +static ssize_t show_allow_ext_sg(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + + return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false"); +} + +static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL); +static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL); +static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL); +static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); +static DEVICE_ATTR(sgid, S_IRUGO, show_sgid, NULL); +static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL); +static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL); +static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL); +static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL); +static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL); +static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL); +static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL); +static DEVICE_ATTR(tl_retry_count, S_IRUGO, show_tl_retry_count, NULL); +static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL); +static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL); + +static struct device_attribute *srp_host_attrs[] = { + &dev_attr_id_ext, + &dev_attr_ioc_guid, + &dev_attr_service_id, + &dev_attr_pkey, + &dev_attr_sgid, + &dev_attr_dgid, + &dev_attr_orig_dgid, + &dev_attr_req_lim, + &dev_attr_zero_req_lim, + &dev_attr_local_ib_port, + &dev_attr_local_ib_device, + &dev_attr_comp_vector, + &dev_attr_tl_retry_count, + &dev_attr_cmd_sg_entries, + &dev_attr_allow_ext_sg, + NULL +}; + static struct scsi_host_template srp_template = { .module = THIS_MODULE, - .name = DRV_NAME, + .name = "InfiniBand SRP initiator", + .proc_name = DRV_NAME, + .slave_configure = srp_slave_configure, .info = srp_target_info, .queuecommand = srp_queuecommand, + .change_queue_depth = srp_change_queue_depth, + .change_queue_type = srp_change_queue_type, .eh_abort_handler = srp_abort, .eh_device_reset_handler = srp_reset_device, .eh_host_reset_handler = srp_reset_host, - .can_queue = SRP_SQ_SIZE, + .skip_settle_delay = true, + .sg_tablesize = SRP_DEF_SG_TABLESIZE, + .can_queue = SRP_DEFAULT_CMD_SQ_SIZE, .this_id = -1, - .sg_tablesize = SRP_MAX_INDIRECT, - .cmd_per_lun = SRP_SQ_SIZE, - .use_clustering = ENABLE_CLUSTERING + .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE, + .use_clustering = ENABLE_CLUSTERING, + .shost_attrs = srp_host_attrs }; static int srp_add_target(struct srp_host *host, struct srp_target_port *target) { + struct srp_rport_identifiers ids; + struct srp_rport *rport; + sprintf(target->target_name, "SRP.T10:%016llX", (unsigned long long) be64_to_cpu(target->id_ext)); - if (scsi_add_host(target->scsi_host, host->dev->dma_device)) + if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dma_device)) return -ENODEV; - down(&host->target_mutex); + memcpy(ids.port_id, &target->id_ext, 8); + memcpy(ids.port_id + 8, &target->ioc_guid, 8); + ids.roles = SRP_RPORT_ROLE_TARGET; + rport = srp_rport_add(target->scsi_host, &ids); + if (IS_ERR(rport)) { + scsi_remove_host(target->scsi_host); + return PTR_ERR(rport); + } + + rport->lld_data = target; + target->rport = rport; + + spin_lock(&host->target_lock); list_add_tail(&target->list, &host->target_list); - up(&host->target_mutex); + spin_unlock(&host->target_lock); target->state = SRP_TARGET_LIVE; - /* XXX: are we supposed to have a definition of SCAN_WILD_CARD ?? */ scsi_scan_target(&target->scsi_host->shost_gendev, - 0, target->scsi_id, ~0, 0); + 0, target->scsi_id, SCAN_WILD_CARD, 0); return 0; } -static void srp_release_class_dev(struct class_device *class_dev) +static void srp_release_dev(struct device *dev) { struct srp_host *host = - container_of(class_dev, struct srp_host, class_dev); + container_of(dev, struct srp_host, dev); complete(&host->released); } static struct class srp_class = { .name = "infiniband_srp", - .release = srp_release_class_dev + .dev_release = srp_release_dev }; +/** + * srp_conn_unique() - check whether the connection to a target is unique + * @host: SRP host. + * @target: SRP target port. + */ +static bool srp_conn_unique(struct srp_host *host, + struct srp_target_port *target) +{ + struct srp_target_port *t; + bool ret = false; + + if (target->state == SRP_TARGET_REMOVED) + goto out; + + ret = true; + + spin_lock(&host->target_lock); + list_for_each_entry(t, &host->target_list, list) { + if (t != target && + target->id_ext == t->id_ext && + target->ioc_guid == t->ioc_guid && + target->initiator_ext == t->initiator_ext) { + ret = false; + break; + } + } + spin_unlock(&host->target_lock); + +out: + return ret; +} + /* * Target ports are added by writing * @@ -1296,6 +2704,15 @@ enum { SRP_OPT_PKEY = 1 << 3, SRP_OPT_SERVICE_ID = 1 << 4, SRP_OPT_MAX_SECT = 1 << 5, + SRP_OPT_MAX_CMD_PER_LUN = 1 << 6, + SRP_OPT_IO_CLASS = 1 << 7, + SRP_OPT_INITIATOR_EXT = 1 << 8, + SRP_OPT_CMD_SG_ENTRIES = 1 << 9, + SRP_OPT_ALLOW_EXT_SG = 1 << 10, + SRP_OPT_SG_TABLESIZE = 1 << 11, + SRP_OPT_COMP_VECTOR = 1 << 12, + SRP_OPT_TL_RETRY_COUNT = 1 << 13, + SRP_OPT_QUEUE_SIZE = 1 << 14, SRP_OPT_ALL = (SRP_OPT_ID_EXT | SRP_OPT_IOC_GUID | SRP_OPT_DGID | @@ -1303,14 +2720,23 @@ enum { SRP_OPT_SERVICE_ID), }; -static match_table_t srp_opt_tokens = { - { SRP_OPT_ID_EXT, "id_ext=%s" }, - { SRP_OPT_IOC_GUID, "ioc_guid=%s" }, - { SRP_OPT_DGID, "dgid=%s" }, - { SRP_OPT_PKEY, "pkey=%x" }, - { SRP_OPT_SERVICE_ID, "service_id=%s" }, - { SRP_OPT_MAX_SECT, "max_sect=%d" }, - { SRP_OPT_ERR, NULL } +static const match_table_t srp_opt_tokens = { + { SRP_OPT_ID_EXT, "id_ext=%s" }, + { SRP_OPT_IOC_GUID, "ioc_guid=%s" }, + { SRP_OPT_DGID, "dgid=%s" }, + { SRP_OPT_PKEY, "pkey=%x" }, + { SRP_OPT_SERVICE_ID, "service_id=%s" }, + { SRP_OPT_MAX_SECT, "max_sect=%d" }, + { SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" }, + { SRP_OPT_IO_CLASS, "io_class=%x" }, + { SRP_OPT_INITIATOR_EXT, "initiator_ext=%s" }, + { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" }, + { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" }, + { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" }, + { SRP_OPT_COMP_VECTOR, "comp_vector=%u" }, + { SRP_OPT_TL_RETRY_COUNT, "tl_retry_count=%u" }, + { SRP_OPT_QUEUE_SIZE, "queue_size=%d" }, + { SRP_OPT_ERR, NULL } }; static int srp_parse_options(const char *buf, struct srp_target_port *target) @@ -1339,20 +2765,33 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target) switch (token) { case SRP_OPT_ID_EXT: p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16)); kfree(p); break; case SRP_OPT_IOC_GUID: p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16)); kfree(p); break; case SRP_OPT_DGID: p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } if (strlen(p) != 32) { - printk(KERN_WARNING PFX "bad dest GID parameter '%s'\n", p); + pr_warn("bad dest GID parameter '%s'\n", p); + kfree(p); goto out; } @@ -1360,11 +2799,13 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target) strlcpy(dgid, p + i * 2, 3); target->path.dgid.raw[i] = simple_strtoul(dgid, NULL, 16); } + kfree(p); + memcpy(target->orig_dgid, target->path.dgid.raw, 16); break; case SRP_OPT_PKEY: if (match_hex(args, &token)) { - printk(KERN_WARNING PFX "bad P_Key parameter '%s'\n", p); + pr_warn("bad P_Key parameter '%s'\n", p); goto out; } target->path.pkey = cpu_to_be16(token); @@ -1372,21 +2813,116 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target) case SRP_OPT_SERVICE_ID: p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16)); + target->path.service_id = target->service_id; kfree(p); break; case SRP_OPT_MAX_SECT: if (match_int(args, &token)) { - printk(KERN_WARNING PFX "bad max sect parameter '%s'\n", p); + pr_warn("bad max sect parameter '%s'\n", p); goto out; } target->scsi_host->max_sectors = token; break; + case SRP_OPT_QUEUE_SIZE: + if (match_int(args, &token) || token < 1) { + pr_warn("bad queue_size parameter '%s'\n", p); + goto out; + } + target->scsi_host->can_queue = token; + target->queue_size = token + SRP_RSP_SQ_SIZE + + SRP_TSK_MGMT_SQ_SIZE; + if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN)) + target->scsi_host->cmd_per_lun = token; + break; + + case SRP_OPT_MAX_CMD_PER_LUN: + if (match_int(args, &token) || token < 1) { + pr_warn("bad max cmd_per_lun parameter '%s'\n", + p); + goto out; + } + target->scsi_host->cmd_per_lun = token; + break; + + case SRP_OPT_IO_CLASS: + if (match_hex(args, &token)) { + pr_warn("bad IO class parameter '%s'\n", p); + goto out; + } + if (token != SRP_REV10_IB_IO_CLASS && + token != SRP_REV16A_IB_IO_CLASS) { + pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n", + token, SRP_REV10_IB_IO_CLASS, + SRP_REV16A_IB_IO_CLASS); + goto out; + } + target->io_class = token; + break; + + case SRP_OPT_INITIATOR_EXT: + p = match_strdup(args); + if (!p) { + ret = -ENOMEM; + goto out; + } + target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16)); + kfree(p); + break; + + case SRP_OPT_CMD_SG_ENTRIES: + if (match_int(args, &token) || token < 1 || token > 255) { + pr_warn("bad max cmd_sg_entries parameter '%s'\n", + p); + goto out; + } + target->cmd_sg_cnt = token; + break; + + case SRP_OPT_ALLOW_EXT_SG: + if (match_int(args, &token)) { + pr_warn("bad allow_ext_sg parameter '%s'\n", p); + goto out; + } + target->allow_ext_sg = !!token; + break; + + case SRP_OPT_SG_TABLESIZE: + if (match_int(args, &token) || token < 1 || + token > SCSI_MAX_SG_CHAIN_SEGMENTS) { + pr_warn("bad max sg_tablesize parameter '%s'\n", + p); + goto out; + } + target->sg_tablesize = token; + break; + + case SRP_OPT_COMP_VECTOR: + if (match_int(args, &token) || token < 0) { + pr_warn("bad comp_vector parameter '%s'\n", p); + goto out; + } + target->comp_vector = token; + break; + + case SRP_OPT_TL_RETRY_COUNT: + if (match_int(args, &token) || token < 2 || token > 7) { + pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n", + p); + goto out; + } + target->tl_retry_count = token; + break; + default: - printk(KERN_WARNING PFX "unknown parameter or missing value " - "'%s' in target creation request\n", p); + pr_warn("unknown parameter or missing value '%s' in target creation request\n", + p); goto out; } } @@ -1397,79 +2933,111 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target) for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i) if ((srp_opt_tokens[i].token & SRP_OPT_ALL) && !(srp_opt_tokens[i].token & opt_mask)) - printk(KERN_WARNING PFX "target creation request is " - "missing parameter '%s'\n", - srp_opt_tokens[i].pattern); + pr_warn("target creation request is missing parameter '%s'\n", + srp_opt_tokens[i].pattern); + + if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue + && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN)) + pr_warn("cmd_per_lun = %d > queue_size = %d\n", + target->scsi_host->cmd_per_lun, + target->scsi_host->can_queue); out: kfree(options); return ret; } -static ssize_t srp_create_target(struct class_device *class_dev, +static ssize_t srp_create_target(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { struct srp_host *host = - container_of(class_dev, struct srp_host, class_dev); + container_of(dev, struct srp_host, dev); struct Scsi_Host *target_host; struct srp_target_port *target; + struct srp_device *srp_dev = host->srp_dev; + struct ib_device *ibdev = srp_dev->dev; int ret; - int i; target_host = scsi_host_alloc(&srp_template, sizeof (struct srp_target_port)); if (!target_host) return -ENOMEM; - target_host->max_lun = SRP_MAX_LUN; + target_host->transportt = ib_srp_transport_template; + target_host->max_channel = 0; + target_host->max_id = 1; + target_host->max_lun = SRP_MAX_LUN; + target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb; target = host_to_target(target_host); - memset(target, 0, sizeof *target); - target->scsi_host = target_host; - target->srp_host = host; + target->io_class = SRP_REV16A_IB_IO_CLASS; + target->scsi_host = target_host; + target->srp_host = host; + target->lkey = host->srp_dev->mr->lkey; + target->rkey = host->srp_dev->mr->rkey; + target->cmd_sg_cnt = cmd_sg_entries; + target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries; + target->allow_ext_sg = allow_ext_sg; + target->tl_retry_count = 7; + target->queue_size = SRP_DEFAULT_QUEUE_SIZE; - INIT_WORK(&target->work, srp_reconnect_work, target); - - for (i = 0; i < SRP_SQ_SIZE - 1; ++i) - target->req_ring[i].next = i + 1; - target->req_ring[SRP_SQ_SIZE - 1].next = -1; - INIT_LIST_HEAD(&target->req_queue); + mutex_lock(&host->add_target_mutex); ret = srp_parse_options(buf, target); if (ret) goto err; - ib_get_cached_gid(host->dev, host->port, 0, &target->path.sgid); - - printk(KERN_DEBUG PFX "new target: id_ext %016llx ioc_guid %016llx pkey %04x " - "service_id %016llx dgid %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", - (unsigned long long) be64_to_cpu(target->id_ext), - (unsigned long long) be64_to_cpu(target->ioc_guid), - be16_to_cpu(target->path.pkey), - (unsigned long long) be64_to_cpu(target->service_id), - (int) be16_to_cpu(*(__be16 *) &target->path.dgid.raw[0]), - (int) be16_to_cpu(*(__be16 *) &target->path.dgid.raw[2]), - (int) be16_to_cpu(*(__be16 *) &target->path.dgid.raw[4]), - (int) be16_to_cpu(*(__be16 *) &target->path.dgid.raw[6]), - (int) be16_to_cpu(*(__be16 *) &target->path.dgid.raw[8]), - (int) be16_to_cpu(*(__be16 *) &target->path.dgid.raw[10]), - (int) be16_to_cpu(*(__be16 *) &target->path.dgid.raw[12]), - (int) be16_to_cpu(*(__be16 *) &target->path.dgid.raw[14])); + target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE; - ret = srp_create_target_ib(target); - if (ret) + if (!srp_conn_unique(target->srp_host, target)) { + shost_printk(KERN_INFO, target->scsi_host, + PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n", + be64_to_cpu(target->id_ext), + be64_to_cpu(target->ioc_guid), + be64_to_cpu(target->initiator_ext)); + ret = -EEXIST; goto err; + } - target->cm_id = ib_create_cm_id(host->dev, srp_cm_handler, target); - if (IS_ERR(target->cm_id)) { - ret = PTR_ERR(target->cm_id); - goto err_free; + if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg && + target->cmd_sg_cnt < target->sg_tablesize) { + pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n"); + target->sg_tablesize = target->cmd_sg_cnt; } + target_host->sg_tablesize = target->sg_tablesize; + target->indirect_size = target->sg_tablesize * + sizeof (struct srp_direct_buf); + target->max_iu_len = sizeof (struct srp_cmd) + + sizeof (struct srp_indirect_buf) + + target->cmd_sg_cnt * sizeof (struct srp_direct_buf); + + INIT_WORK(&target->tl_err_work, srp_tl_err_work); + INIT_WORK(&target->remove_work, srp_remove_work); + spin_lock_init(&target->lock); + INIT_LIST_HEAD(&target->free_tx); + ret = srp_alloc_req_data(target); + if (ret) + goto err_free_mem; + + ret = ib_query_gid(ibdev, host->port, 0, &target->path.sgid); + if (ret) + goto err_free_mem; + + ret = srp_create_target_ib(target); + if (ret) + goto err_free_mem; + + ret = srp_new_cm_id(target); + if (ret) + goto err_free_ib; + ret = srp_connect_target(target); if (ret) { - printk(KERN_ERR PFX "Connection failed\n"); + shost_printk(KERN_ERR, target->scsi_host, + PFX "Connection failed\n"); goto err_cm_id; } @@ -1477,7 +3045,19 @@ static ssize_t srp_create_target(struct class_device *class_dev, if (ret) goto err_disconnect; - return count; + shost_printk(KERN_DEBUG, target->scsi_host, PFX + "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n", + be64_to_cpu(target->id_ext), + be64_to_cpu(target->ioc_guid), + be16_to_cpu(target->path.pkey), + be64_to_cpu(target->service_id), + target->path.sgid.raw, target->path.dgid.raw); + + ret = count; + +out: + mutex_unlock(&host->add_target_mutex); + return ret; err_disconnect: srp_disconnect_target(target); @@ -1485,38 +3065,40 @@ err_disconnect: err_cm_id: ib_destroy_cm_id(target->cm_id); -err_free: +err_free_ib: srp_free_target_ib(target); +err_free_mem: + srp_free_req_data(target); + err: scsi_host_put(target_host); - - return ret; + goto out; } -static CLASS_DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target); +static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target); -static ssize_t show_ibdev(struct class_device *class_dev, char *buf) +static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, + char *buf) { - struct srp_host *host = - container_of(class_dev, struct srp_host, class_dev); + struct srp_host *host = container_of(dev, struct srp_host, dev); - return sprintf(buf, "%s\n", host->dev->name); + return sprintf(buf, "%s\n", host->srp_dev->dev->name); } -static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); +static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); -static ssize_t show_port(struct class_device *class_dev, char *buf) +static ssize_t show_port(struct device *dev, struct device_attribute *attr, + char *buf) { - struct srp_host *host = - container_of(class_dev, struct srp_host, class_dev); + struct srp_host *host = container_of(dev, struct srp_host, dev); return sprintf(buf, "%d\n", host->port); } -static CLASS_DEVICE_ATTR(port, S_IRUGO, show_port, NULL); +static DEVICE_ATTR(port, S_IRUGO, show_port, NULL); -static struct srp_host *srp_add_port(struct ib_device *device, u8 port) +static struct srp_host *srp_add_port(struct srp_device *device, u8 port) { struct srp_host *host; @@ -1525,51 +3107,31 @@ static struct srp_host *srp_add_port(struct ib_device *device, u8 port) return NULL; INIT_LIST_HEAD(&host->target_list); - init_MUTEX(&host->target_mutex); + spin_lock_init(&host->target_lock); init_completion(&host->released); - host->dev = device; + mutex_init(&host->add_target_mutex); + host->srp_dev = device; host->port = port; - host->initiator_port_id[7] = port; - memcpy(host->initiator_port_id + 8, &device->node_guid, 8); - - host->pd = ib_alloc_pd(device); - if (IS_ERR(host->pd)) - goto err_free; - - host->mr = ib_get_dma_mr(host->pd, - IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_READ | - IB_ACCESS_REMOTE_WRITE); - if (IS_ERR(host->mr)) - goto err_pd; + host->dev.class = &srp_class; + host->dev.parent = device->dev->dma_device; + dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port); - host->class_dev.class = &srp_class; - host->class_dev.dev = device->dma_device; - snprintf(host->class_dev.class_id, BUS_ID_SIZE, "srp-%s-%d", - device->name, port); - - if (class_device_register(&host->class_dev)) - goto err_mr; - if (class_device_create_file(&host->class_dev, &class_device_attr_add_target)) + if (device_register(&host->dev)) + goto free_host; + if (device_create_file(&host->dev, &dev_attr_add_target)) goto err_class; - if (class_device_create_file(&host->class_dev, &class_device_attr_ibdev)) + if (device_create_file(&host->dev, &dev_attr_ibdev)) goto err_class; - if (class_device_create_file(&host->class_dev, &class_device_attr_port)) + if (device_create_file(&host->dev, &dev_attr_port)) goto err_class; return host; err_class: - class_device_unregister(&host->class_dev); - -err_mr: - ib_dereg_mr(host->mr); - -err_pd: - ib_dealloc_pd(host->pd); + device_unregister(&host->dev); -err_free: +free_host: kfree(host); return NULL; @@ -1577,17 +3139,74 @@ err_free: static void srp_add_one(struct ib_device *device) { - struct list_head *dev_list; + struct srp_device *srp_dev; + struct ib_device_attr *dev_attr; struct srp_host *host; - int s, e, p; + int mr_page_shift, s, e, p; + u64 max_pages_per_mr; - dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL); - if (!dev_list) + dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL); + if (!dev_attr) return; - INIT_LIST_HEAD(dev_list); + if (ib_query_device(device, dev_attr)) { + pr_warn("Query device failed for %s\n", device->name); + goto free_attr; + } + + srp_dev = kmalloc(sizeof *srp_dev, GFP_KERNEL); + if (!srp_dev) + goto free_attr; + + srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr && + device->map_phys_fmr && device->unmap_fmr); + srp_dev->has_fr = (dev_attr->device_cap_flags & + IB_DEVICE_MEM_MGT_EXTENSIONS); + if (!srp_dev->has_fmr && !srp_dev->has_fr) + dev_warn(&device->dev, "neither FMR nor FR is supported\n"); - if (device->node_type == IB_NODE_SWITCH) { + srp_dev->use_fast_reg = (srp_dev->has_fr && + (!srp_dev->has_fmr || prefer_fr)); + + /* + * Use the smallest page size supported by the HCA, down to a + * minimum of 4096 bytes. We're unlikely to build large sglists + * out of smaller entries. + */ + mr_page_shift = max(12, ffs(dev_attr->page_size_cap) - 1); + srp_dev->mr_page_size = 1 << mr_page_shift; + srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1); + max_pages_per_mr = dev_attr->max_mr_size; + do_div(max_pages_per_mr, srp_dev->mr_page_size); + srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR, + max_pages_per_mr); + if (srp_dev->use_fast_reg) { + srp_dev->max_pages_per_mr = + min_t(u32, srp_dev->max_pages_per_mr, + dev_attr->max_fast_reg_page_list_len); + } + srp_dev->mr_max_size = srp_dev->mr_page_size * + srp_dev->max_pages_per_mr; + pr_debug("%s: mr_page_shift = %d, dev_attr->max_mr_size = %#llx, dev_attr->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n", + device->name, mr_page_shift, dev_attr->max_mr_size, + dev_attr->max_fast_reg_page_list_len, + srp_dev->max_pages_per_mr, srp_dev->mr_max_size); + + INIT_LIST_HEAD(&srp_dev->dev_list); + + srp_dev->dev = device; + srp_dev->pd = ib_alloc_pd(device); + if (IS_ERR(srp_dev->pd)) + goto free_dev; + + srp_dev->mr = ib_get_dma_mr(srp_dev->pd, + IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_WRITE); + if (IS_ERR(srp_dev->mr)) + goto err_pd; + + if (device->node_type == RDMA_NODE_IB_SWITCH) { s = 0; e = 0; } else { @@ -1596,26 +3215,37 @@ static void srp_add_one(struct ib_device *device) } for (p = s; p <= e; ++p) { - host = srp_add_port(device, p); + host = srp_add_port(srp_dev, p); if (host) - list_add_tail(&host->list, dev_list); + list_add_tail(&host->list, &srp_dev->dev_list); } - ib_set_client_data(device, &srp_client, dev_list); + ib_set_client_data(device, &srp_client, srp_dev); + + goto free_attr; + +err_pd: + ib_dealloc_pd(srp_dev->pd); + +free_dev: + kfree(srp_dev); + +free_attr: + kfree(dev_attr); } static void srp_remove_one(struct ib_device *device) { - struct list_head *dev_list; + struct srp_device *srp_dev; struct srp_host *host, *tmp_host; - LIST_HEAD(target_list); - struct srp_target_port *target, *tmp_target; - unsigned long flags; + struct srp_target_port *target; - dev_list = ib_get_client_data(device, &srp_client); + srp_dev = ib_get_client_data(device, &srp_client); + if (!srp_dev) + return; - list_for_each_entry_safe(host, tmp_host, dev_list, list) { - class_device_unregister(&host->class_dev); + list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) { + device_unregister(&host->dev); /* * Wait for the sysfs entry to go away, so that no new * target ports can be created. @@ -1623,56 +3253,85 @@ static void srp_remove_one(struct ib_device *device) wait_for_completion(&host->released); /* - * Mark all target ports as removed, so we stop queueing - * commands and don't try to reconnect. + * Remove all target ports. */ - down(&host->target_mutex); - list_for_each_entry_safe(target, tmp_target, - &host->target_list, list) { - spin_lock_irqsave(target->scsi_host->host_lock, flags); - if (target->state != SRP_TARGET_REMOVED) - target->state = SRP_TARGET_REMOVED; - spin_unlock_irqrestore(target->scsi_host->host_lock, flags); - } - up(&host->target_mutex); + spin_lock(&host->target_lock); + list_for_each_entry(target, &host->target_list, list) + srp_queue_remove_work(target); + spin_unlock(&host->target_lock); /* - * Wait for any reconnection tasks that may have - * started before we marked our target ports as - * removed, and any target port removal tasks. + * Wait for target port removal tasks. */ - flush_scheduled_work(); - - list_for_each_entry_safe(target, tmp_target, - &host->target_list, list) { - scsi_remove_host(target->scsi_host); - srp_disconnect_target(target); - ib_destroy_cm_id(target->cm_id); - srp_free_target_ib(target); - scsi_host_put(target->scsi_host); - } + flush_workqueue(system_long_wq); - ib_dereg_mr(host->mr); - ib_dealloc_pd(host->pd); kfree(host); } - kfree(dev_list); + ib_dereg_mr(srp_dev->mr); + ib_dealloc_pd(srp_dev->pd); + + kfree(srp_dev); } +static struct srp_function_template ib_srp_transport_functions = { + .has_rport_state = true, + .reset_timer_if_blocked = true, + .reconnect_delay = &srp_reconnect_delay, + .fast_io_fail_tmo = &srp_fast_io_fail_tmo, + .dev_loss_tmo = &srp_dev_loss_tmo, + .reconnect = srp_rport_reconnect, + .rport_delete = srp_rport_delete, + .terminate_rport_io = srp_terminate_io, +}; + static int __init srp_init_module(void) { int ret; + BUILD_BUG_ON(FIELD_SIZEOF(struct ib_wc, wr_id) < sizeof(void *)); + + if (srp_sg_tablesize) { + pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n"); + if (!cmd_sg_entries) + cmd_sg_entries = srp_sg_tablesize; + } + + if (!cmd_sg_entries) + cmd_sg_entries = SRP_DEF_SG_TABLESIZE; + + if (cmd_sg_entries > 255) { + pr_warn("Clamping cmd_sg_entries to 255\n"); + cmd_sg_entries = 255; + } + + if (!indirect_sg_entries) + indirect_sg_entries = cmd_sg_entries; + else if (indirect_sg_entries < cmd_sg_entries) { + pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n", + cmd_sg_entries); + indirect_sg_entries = cmd_sg_entries; + } + + ib_srp_transport_template = + srp_attach_transport(&ib_srp_transport_functions); + if (!ib_srp_transport_template) + return -ENOMEM; + ret = class_register(&srp_class); if (ret) { - printk(KERN_ERR PFX "couldn't register class infiniband_srp\n"); + pr_err("couldn't register class infiniband_srp\n"); + srp_release_transport(ib_srp_transport_template); return ret; } + ib_sa_register_client(&srp_sa_client); + ret = ib_register_client(&srp_client); if (ret) { - printk(KERN_ERR PFX "couldn't register IB client\n"); + pr_err("couldn't register IB client\n"); + srp_release_transport(ib_srp_transport_template); + ib_sa_unregister_client(&srp_sa_client); class_unregister(&srp_class); return ret; } @@ -1683,7 +3342,9 @@ static int __init srp_init_module(void) static void __exit srp_cleanup_module(void) { ib_unregister_client(&srp_client); + ib_sa_unregister_client(&srp_sa_client); class_unregister(&srp_class); + srp_release_transport(ib_srp_transport_template); } module_init(srp_init_module); |
