diff options
Diffstat (limited to 'drivers/infiniband/ulp/iser/iser_initiator.c')
| -rw-r--r-- | drivers/infiniband/ulp/iser/iser_initiator.c | 334 |
1 files changed, 238 insertions, 96 deletions
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index a607542fc79..8d44a406063 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved. + * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -40,14 +41,15 @@ #include "iscsi_iser.h" /* Register user buffer memory and initialize passive rdma - * dto descriptor. Total data size is stored in - * iser_task->data[ISER_DIR_IN].data_len + * dto descriptor. Data size is stored in + * task->data[ISER_DIR_IN].data_len, Protection size + * os stored in task->prot[ISER_DIR_IN].data_len */ -static int iser_prepare_read_cmd(struct iscsi_task *task, - unsigned int edtl) +static int iser_prepare_read_cmd(struct iscsi_task *task) { struct iscsi_iser_task *iser_task = task->dd_data; + struct iser_device *device = iser_task->ib_conn->device; struct iser_regd_buf *regd_buf; int err; struct iser_hdr *hdr = &iser_task->desc.iser_header; @@ -60,15 +62,18 @@ static int iser_prepare_read_cmd(struct iscsi_task *task, if (err) return err; - if (edtl > iser_task->data[ISER_DIR_IN].data_len) { - iser_err("Total data length: %ld, less than EDTL: " - "%d, in READ cmd BHS itt: %d, conn: 0x%p\n", - iser_task->data[ISER_DIR_IN].data_len, edtl, - task->itt, iser_task->iser_conn); - return -EINVAL; + if (scsi_prot_sg_count(iser_task->sc)) { + struct iser_data_buf *pbuf_in = &iser_task->prot[ISER_DIR_IN]; + + err = iser_dma_map_task_data(iser_task, + pbuf_in, + ISER_DIR_IN, + DMA_FROM_DEVICE); + if (err) + return err; } - err = iser_reg_rdma_mem(iser_task,ISER_DIR_IN); + err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_IN); if (err) { iser_err("Failed to set up Data-IN RDMA\n"); return err; @@ -87,8 +92,9 @@ static int iser_prepare_read_cmd(struct iscsi_task *task, } /* Register user buffer memory and initialize passive rdma - * dto descriptor. Total data size is stored in - * task->data[ISER_DIR_OUT].data_len + * dto descriptor. Data size is stored in + * task->data[ISER_DIR_OUT].data_len, Protection size + * is stored at task->prot[ISER_DIR_OUT].data_len */ static int iser_prepare_write_cmd(struct iscsi_task *task, @@ -97,6 +103,7 @@ iser_prepare_write_cmd(struct iscsi_task *task, unsigned int edtl) { struct iscsi_iser_task *iser_task = task->dd_data; + struct iser_device *device = iser_task->ib_conn->device; struct iser_regd_buf *regd_buf; int err; struct iser_hdr *hdr = &iser_task->desc.iser_header; @@ -110,15 +117,18 @@ iser_prepare_write_cmd(struct iscsi_task *task, if (err) return err; - if (edtl > iser_task->data[ISER_DIR_OUT].data_len) { - iser_err("Total data length: %ld, less than EDTL: %d, " - "in WRITE cmd BHS itt: %d, conn: 0x%p\n", - iser_task->data[ISER_DIR_OUT].data_len, - edtl, task->itt, task->conn); - return -EINVAL; + if (scsi_prot_sg_count(iser_task->sc)) { + struct iser_data_buf *pbuf_out = &iser_task->prot[ISER_DIR_OUT]; + + err = iser_dma_map_task_data(iser_task, + pbuf_out, + ISER_DIR_OUT, + DMA_TO_DEVICE); + if (err) + return err; } - err = iser_reg_rdma_mem(iser_task,ISER_DIR_OUT); + err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_OUT); if (err != 0) { iser_err("Failed to register write cmd RDMA mem\n"); return err; @@ -169,8 +179,78 @@ static void iser_create_send_desc(struct iser_conn *ib_conn, } } +static void iser_free_login_buf(struct iser_conn *ib_conn) +{ + if (!ib_conn->login_buf) + return; + + if (ib_conn->login_req_dma) + ib_dma_unmap_single(ib_conn->device->ib_device, + ib_conn->login_req_dma, + ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); + + if (ib_conn->login_resp_dma) + ib_dma_unmap_single(ib_conn->device->ib_device, + ib_conn->login_resp_dma, + ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); -static int iser_alloc_rx_descriptors(struct iser_conn *ib_conn) + kfree(ib_conn->login_buf); + + /* make sure we never redo any unmapping */ + ib_conn->login_req_dma = 0; + ib_conn->login_resp_dma = 0; + ib_conn->login_buf = NULL; +} + +static int iser_alloc_login_buf(struct iser_conn *ib_conn) +{ + struct iser_device *device; + int req_err, resp_err; + + BUG_ON(ib_conn->device == NULL); + + device = ib_conn->device; + + ib_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN + + ISER_RX_LOGIN_SIZE, GFP_KERNEL); + if (!ib_conn->login_buf) + goto out_err; + + ib_conn->login_req_buf = ib_conn->login_buf; + ib_conn->login_resp_buf = ib_conn->login_buf + + ISCSI_DEF_MAX_RECV_SEG_LEN; + + ib_conn->login_req_dma = ib_dma_map_single(ib_conn->device->ib_device, + (void *)ib_conn->login_req_buf, + ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); + + ib_conn->login_resp_dma = ib_dma_map_single(ib_conn->device->ib_device, + (void *)ib_conn->login_resp_buf, + ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); + + req_err = ib_dma_mapping_error(device->ib_device, + ib_conn->login_req_dma); + resp_err = ib_dma_mapping_error(device->ib_device, + ib_conn->login_resp_dma); + + if (req_err || resp_err) { + if (req_err) + ib_conn->login_req_dma = 0; + if (resp_err) + ib_conn->login_resp_dma = 0; + goto free_login_buf; + } + return 0; + +free_login_buf: + iser_free_login_buf(ib_conn); + +out_err: + iser_err("unable to alloc or map login buf\n"); + return -ENOMEM; +} + +int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session) { int i, j; u64 dma_addr; @@ -178,14 +258,24 @@ static int iser_alloc_rx_descriptors(struct iser_conn *ib_conn) struct ib_sge *rx_sg; struct iser_device *device = ib_conn->device; - ib_conn->rx_descs = kmalloc(ISER_QP_MAX_RECV_DTOS * + ib_conn->qp_max_recv_dtos = session->cmds_max; + ib_conn->qp_max_recv_dtos_mask = session->cmds_max - 1; /* cmds_max is 2^N */ + ib_conn->min_posted_rx = ib_conn->qp_max_recv_dtos >> 2; + + if (device->iser_alloc_rdma_reg_res(ib_conn, session->scsi_cmds_max)) + goto create_rdma_reg_res_failed; + + if (iser_alloc_login_buf(ib_conn)) + goto alloc_login_buf_fail; + + ib_conn->rx_descs = kmalloc(session->cmds_max * sizeof(struct iser_rx_desc), GFP_KERNEL); if (!ib_conn->rx_descs) goto rx_desc_alloc_fail; rx_desc = ib_conn->rx_descs; - for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++) { + for (i = 0; i < ib_conn->qp_max_recv_dtos; i++, rx_desc++) { dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc, ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); if (ib_dma_mapping_error(device->ib_device, dma_addr)) @@ -206,10 +296,14 @@ rx_desc_dma_map_failed: rx_desc = ib_conn->rx_descs; for (j = 0; j < i; j++, rx_desc++) ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr, - ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); + ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); kfree(ib_conn->rx_descs); ib_conn->rx_descs = NULL; rx_desc_alloc_fail: + iser_free_login_buf(ib_conn); +alloc_login_buf_fail: + device->iser_free_rdma_reg_res(ib_conn); +create_rdma_reg_res_failed: iser_err("failed allocating rx descriptors / data buffers\n"); return -ENOMEM; } @@ -220,47 +314,51 @@ void iser_free_rx_descriptors(struct iser_conn *ib_conn) struct iser_rx_desc *rx_desc; struct iser_device *device = ib_conn->device; - if (ib_conn->login_buf) { - if (ib_conn->login_req_dma) - ib_dma_unmap_single(device->ib_device, - ib_conn->login_req_dma, - ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); - if (ib_conn->login_resp_dma) - ib_dma_unmap_single(device->ib_device, - ib_conn->login_resp_dma, - ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); - kfree(ib_conn->login_buf); - } - if (!ib_conn->rx_descs) - return; + goto free_login_buf; + + if (device->iser_free_rdma_reg_res) + device->iser_free_rdma_reg_res(ib_conn); rx_desc = ib_conn->rx_descs; - for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++) + for (i = 0; i < ib_conn->qp_max_recv_dtos; i++, rx_desc++) ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr, - ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); + ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); kfree(ib_conn->rx_descs); + /* make sure we never redo any unmapping */ + ib_conn->rx_descs = NULL; + +free_login_buf: + iser_free_login_buf(ib_conn); } -/** - * iser_conn_set_full_featured_mode - (iSER API) - */ -int iser_conn_set_full_featured_mode(struct iscsi_conn *conn) +static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req) { - struct iscsi_iser_conn *iser_conn = conn->dd_data; + struct iser_conn *ib_conn = conn->dd_data; + struct iscsi_session *session = conn->session; - iser_dbg("Initially post: %d\n", ISER_MIN_POSTED_RX); + iser_dbg("req op %x flags %x\n", req->opcode, req->flags); + /* check if this is the last login - going to full feature phase */ + if ((req->flags & ISCSI_FULL_FEATURE_PHASE) != ISCSI_FULL_FEATURE_PHASE) + return 0; - /* Check that there is no posted recv or send buffers left - */ - /* they must be consumed during the login phase */ - BUG_ON(iser_conn->ib_conn->post_recv_buf_count != 0); - BUG_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0); + /* + * Check that there is one posted recv buffer (for the last login + * response) and no posted send buffers left - they must have been + * consumed during previous login phases. + */ + WARN_ON(ib_conn->post_recv_buf_count != 1); + WARN_ON(atomic_read(&ib_conn->post_send_buf_count) != 0); - if (iser_alloc_rx_descriptors(iser_conn->ib_conn)) - return -ENOMEM; + if (session->discovery_sess) { + iser_info("Discovery session, re-using login RX buffer\n"); + return 0; + } else + iser_info("Normal session, posting batch of RX %d buffers\n", + ib_conn->min_posted_rx); /* Initial post receive buffers */ - if (iser_post_recvm(iser_conn->ib_conn, ISER_MIN_POSTED_RX)) + if (iser_post_recvm(ib_conn, ib_conn->min_posted_rx)) return -ENOMEM; return 0; @@ -272,11 +370,11 @@ int iser_conn_set_full_featured_mode(struct iscsi_conn *conn) int iser_send_command(struct iscsi_conn *conn, struct iscsi_task *task) { - struct iscsi_iser_conn *iser_conn = conn->dd_data; + struct iser_conn *ib_conn = conn->dd_data; struct iscsi_iser_task *iser_task = task->dd_data; unsigned long edtl; int err; - struct iser_data_buf *data_buf; + struct iser_data_buf *data_buf, *prot_buf; struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr; struct scsi_cmnd *sc = task->sc; struct iser_tx_desc *tx_desc = &iser_task->desc; @@ -285,22 +383,31 @@ int iser_send_command(struct iscsi_conn *conn, /* build the tx desc regd header and add it to the tx desc dto */ tx_desc->type = ISCSI_TX_SCSI_COMMAND; - iser_create_send_desc(iser_conn->ib_conn, tx_desc); + iser_create_send_desc(ib_conn, tx_desc); - if (hdr->flags & ISCSI_FLAG_CMD_READ) + if (hdr->flags & ISCSI_FLAG_CMD_READ) { data_buf = &iser_task->data[ISER_DIR_IN]; - else + prot_buf = &iser_task->prot[ISER_DIR_IN]; + } else { data_buf = &iser_task->data[ISER_DIR_OUT]; + prot_buf = &iser_task->prot[ISER_DIR_OUT]; + } if (scsi_sg_count(sc)) { /* using a scatter list */ data_buf->buf = scsi_sglist(sc); data_buf->size = scsi_sg_count(sc); } - data_buf->data_len = scsi_bufflen(sc); + if (scsi_prot_sg_count(sc)) { + prot_buf->buf = scsi_prot_sglist(sc); + prot_buf->size = scsi_prot_sg_count(sc); + prot_buf->data_len = data_buf->data_len >> + ilog2(sc->device->sector_size) * 8; + } + if (hdr->flags & ISCSI_FLAG_CMD_READ) { - err = iser_prepare_read_cmd(task, edtl); + err = iser_prepare_read_cmd(task); if (err) goto send_command_error; } @@ -316,7 +423,7 @@ int iser_send_command(struct iscsi_conn *conn, iser_task->status = ISER_TASK_STATUS_STARTED; - err = iser_post_send(iser_conn->ib_conn, tx_desc); + err = iser_post_send(ib_conn, tx_desc); if (!err) return 0; @@ -332,7 +439,7 @@ int iser_send_data_out(struct iscsi_conn *conn, struct iscsi_task *task, struct iscsi_data *hdr) { - struct iscsi_iser_conn *iser_conn = conn->dd_data; + struct iser_conn *ib_conn = conn->dd_data; struct iscsi_iser_task *iser_task = task->dd_data; struct iser_tx_desc *tx_desc = NULL; struct iser_regd_buf *regd_buf; @@ -381,7 +488,7 @@ int iser_send_data_out(struct iscsi_conn *conn, itt, buf_offset, data_seg_len); - err = iser_post_send(iser_conn->ib_conn, tx_desc); + err = iser_post_send(ib_conn, tx_desc); if (!err) return 0; @@ -394,19 +501,18 @@ send_data_out_error: int iser_send_control(struct iscsi_conn *conn, struct iscsi_task *task) { - struct iscsi_iser_conn *iser_conn = conn->dd_data; + struct iser_conn *ib_conn = conn->dd_data; struct iscsi_iser_task *iser_task = task->dd_data; struct iser_tx_desc *mdesc = &iser_task->desc; unsigned long data_seg_len; int err = 0; struct iser_device *device; - struct iser_conn *ib_conn = iser_conn->ib_conn; /* build the tx desc regd header and add it to the tx desc dto */ mdesc->type = ISCSI_TX_CONTROL; - iser_create_send_desc(iser_conn->ib_conn, mdesc); + iser_create_send_desc(ib_conn, mdesc); - device = iser_conn->ib_conn->device; + device = ib_conn->device; data_seg_len = ntoh24(task->hdr->dlength); @@ -421,26 +527,30 @@ int iser_send_control(struct iscsi_conn *conn, ib_conn->login_req_dma, task->data_count, DMA_TO_DEVICE); - memcpy(iser_conn->ib_conn->login_req_buf, task->data, - task->data_count); + memcpy(ib_conn->login_req_buf, task->data, task->data_count); ib_dma_sync_single_for_device(device->ib_device, ib_conn->login_req_dma, task->data_count, DMA_TO_DEVICE); - tx_dsg->addr = iser_conn->ib_conn->login_req_dma; + tx_dsg->addr = ib_conn->login_req_dma; tx_dsg->length = task->data_count; tx_dsg->lkey = device->mr->lkey; mdesc->num_sge = 2; } if (task == conn->login_task) { - err = iser_post_recvl(iser_conn->ib_conn); + iser_dbg("op %x dsl %lx, posting login rx buffer\n", + task->hdr->opcode, data_seg_len); + err = iser_post_recvl(ib_conn); + if (err) + goto send_control_error; + err = iser_post_rx_bufs(conn, task->hdr); if (err) goto send_control_error; } - err = iser_post_send(iser_conn->ib_conn, mdesc); + err = iser_post_send(ib_conn, mdesc); if (!err) return 0; @@ -456,7 +566,6 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc, unsigned long rx_xfer_len, struct iser_conn *ib_conn) { - struct iscsi_iser_conn *conn = ib_conn->iser_conn; struct iscsi_hdr *hdr; u64 rx_dma; int rx_buflen, outstanding, count, err; @@ -478,25 +587,25 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc, iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode, hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN)); - iscsi_iser_recv(conn->iscsi_conn, hdr, - rx_desc->data, rx_xfer_len - ISER_HEADERS_LEN); + iscsi_iser_recv(ib_conn->iscsi_conn, hdr, rx_desc->data, + rx_xfer_len - ISER_HEADERS_LEN); ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma, - rx_buflen, DMA_FROM_DEVICE); + rx_buflen, DMA_FROM_DEVICE); /* decrementing conn->post_recv_buf_count only --after-- freeing the * * task eliminates the need to worry on tasks which are completed in * * parallel to the execution of iser_conn_term. So the code that waits * * for the posted rx bufs refcount to become zero handles everything */ - conn->ib_conn->post_recv_buf_count--; + ib_conn->post_recv_buf_count--; if (rx_dma == ib_conn->login_resp_dma) return; outstanding = ib_conn->post_recv_buf_count; - if (outstanding + ISER_MIN_POSTED_RX <= ISER_QP_MAX_RECV_DTOS) { - count = min(ISER_QP_MAX_RECV_DTOS - outstanding, - ISER_MIN_POSTED_RX); + if (outstanding + ib_conn->min_posted_rx <= ib_conn->qp_max_recv_dtos) { + count = min(ib_conn->qp_max_recv_dtos - outstanding, + ib_conn->min_posted_rx); err = iser_post_recvm(ib_conn, count); if (err) iser_err("posting %d rx bufs err %d\n", count, err); @@ -513,11 +622,12 @@ void iser_snd_completion(struct iser_tx_desc *tx_desc, ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); kmem_cache_free(ig.desc_cache, tx_desc); + tx_desc = NULL; } atomic_dec(&ib_conn->post_send_buf_count); - if (tx_desc->type == ISCSI_TX_CONTROL) { + if (tx_desc && tx_desc->type == ISCSI_TX_CONTROL) { /* this arithmetic is legal by libiscsi dd_data allocation */ task = (void *) ((long)(void *)tx_desc - sizeof(struct iscsi_task)); @@ -537,6 +647,9 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task) iser_task->data[ISER_DIR_IN].data_len = 0; iser_task->data[ISER_DIR_OUT].data_len = 0; + iser_task->prot[ISER_DIR_IN].data_len = 0; + iser_task->prot[ISER_DIR_OUT].data_len = 0; + memset(&iser_task->rdma_regd[ISER_DIR_IN], 0, sizeof(struct iser_regd_buf)); memset(&iser_task->rdma_regd[ISER_DIR_OUT], 0, @@ -545,34 +658,63 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task) void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) { - int is_rdma_aligned = 1; - struct iser_regd_buf *regd; + struct iser_device *device = iser_task->ib_conn->device; + int is_rdma_data_aligned = 1; + int is_rdma_prot_aligned = 1; + int prot_count = scsi_prot_sg_count(iser_task->sc); /* if we were reading, copy back to unaligned sglist, * anyway dma_unmap and free the copy */ if (iser_task->data_copy[ISER_DIR_IN].copy_buf != NULL) { - is_rdma_aligned = 0; - iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_IN); + is_rdma_data_aligned = 0; + iser_finalize_rdma_unaligned_sg(iser_task, + &iser_task->data[ISER_DIR_IN], + &iser_task->data_copy[ISER_DIR_IN], + ISER_DIR_IN); } + if (iser_task->data_copy[ISER_DIR_OUT].copy_buf != NULL) { - is_rdma_aligned = 0; - iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_OUT); + is_rdma_data_aligned = 0; + iser_finalize_rdma_unaligned_sg(iser_task, + &iser_task->data[ISER_DIR_OUT], + &iser_task->data_copy[ISER_DIR_OUT], + ISER_DIR_OUT); + } + + if (iser_task->prot_copy[ISER_DIR_IN].copy_buf != NULL) { + is_rdma_prot_aligned = 0; + iser_finalize_rdma_unaligned_sg(iser_task, + &iser_task->prot[ISER_DIR_IN], + &iser_task->prot_copy[ISER_DIR_IN], + ISER_DIR_IN); + } + + if (iser_task->prot_copy[ISER_DIR_OUT].copy_buf != NULL) { + is_rdma_prot_aligned = 0; + iser_finalize_rdma_unaligned_sg(iser_task, + &iser_task->prot[ISER_DIR_OUT], + &iser_task->prot_copy[ISER_DIR_OUT], + ISER_DIR_OUT); } if (iser_task->dir[ISER_DIR_IN]) { - regd = &iser_task->rdma_regd[ISER_DIR_IN]; - if (regd->reg.is_fmr) - iser_unreg_mem(®d->reg); + device->iser_unreg_rdma_mem(iser_task, ISER_DIR_IN); + if (is_rdma_data_aligned) + iser_dma_unmap_task_data(iser_task, + &iser_task->data[ISER_DIR_IN]); + if (prot_count && is_rdma_prot_aligned) + iser_dma_unmap_task_data(iser_task, + &iser_task->prot[ISER_DIR_IN]); } if (iser_task->dir[ISER_DIR_OUT]) { - regd = &iser_task->rdma_regd[ISER_DIR_OUT]; - if (regd->reg.is_fmr) - iser_unreg_mem(®d->reg); + device->iser_unreg_rdma_mem(iser_task, ISER_DIR_OUT); + if (is_rdma_data_aligned) + iser_dma_unmap_task_data(iser_task, + &iser_task->data[ISER_DIR_OUT]); + if (prot_count && is_rdma_prot_aligned) + iser_dma_unmap_task_data(iser_task, + &iser_task->prot[ISER_DIR_OUT]); } - - /* if the data was unaligned, it was already unmapped and then copied */ - if (is_rdma_aligned) - iser_dma_unmap_task_data(iser_task); } |
