From 6edf602341cd8f6e79479ff7f5bca72562c1f608 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Wed, 27 Sep 2006 14:42:56 -0700 Subject: RDMA/amso1100: Fix compile warnings Make sure all 64-bit quantities are cast to unsigned long long when printed with "%ll" printk formats. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/amso1100/c2_ae.c | 2 +- drivers/infiniband/hw/amso1100/c2_alloc.c | 2 +- drivers/infiniband/hw/amso1100/c2_provider.c | 4 +++- drivers/infiniband/hw/amso1100/c2_rnic.c | 4 ++-- 4 files changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/amso1100/c2_ae.c b/drivers/infiniband/hw/amso1100/c2_ae.c index 08f46c83a3a..3aae4978e1c 100644 --- a/drivers/infiniband/hw/amso1100/c2_ae.c +++ b/drivers/infiniband/hw/amso1100/c2_ae.c @@ -197,7 +197,7 @@ void c2_ae_event(struct c2_dev *c2dev, u32 mq_index) "resource=%x, qp_state=%s\n", __FUNCTION__, to_event_str(event_id), - be64_to_cpu(wr->ae.ae_generic.user_context), + (unsigned long long) be64_to_cpu(wr->ae.ae_generic.user_context), be32_to_cpu(wr->ae.ae_generic.resource_type), be32_to_cpu(wr->ae.ae_generic.resource), to_qp_state_str(be32_to_cpu(wr->ae.ae_generic.qp_state))); diff --git a/drivers/infiniband/hw/amso1100/c2_alloc.c b/drivers/infiniband/hw/amso1100/c2_alloc.c index 1d2529992c0..028a60bbfca 100644 --- a/drivers/infiniband/hw/amso1100/c2_alloc.c +++ b/drivers/infiniband/hw/amso1100/c2_alloc.c @@ -115,7 +115,7 @@ u16 *c2_alloc_mqsp(struct c2_dev *c2dev, struct sp_chunk *head, ((unsigned long) &(head->shared_ptr[mqsp]) - (unsigned long) head); pr_debug("%s addr %p dma_addr %llx\n", __FUNCTION__, - &(head->shared_ptr[mqsp]), (u64)*dma_addr); + &(head->shared_ptr[mqsp]), (unsigned long long) *dma_addr); return &(head->shared_ptr[mqsp]); } return NULL; diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c index dd6af551108..622d6f1f920 100644 --- a/drivers/infiniband/hw/amso1100/c2_provider.c +++ b/drivers/infiniband/hw/amso1100/c2_provider.c @@ -397,7 +397,9 @@ static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd, pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, " "*iova_start %llx, first pa %llx, last pa %llx\n", __FUNCTION__, page_shift, pbl_depth, total_len, - *iova_start, page_list[0], page_list[pbl_depth-1]); + (unsigned long long) *iova_start, + (unsigned long long) page_list[0], + (unsigned long long) page_list[pbl_depth-1]); err = c2_nsmr_register_phys_kern(to_c2dev(ib_pd->device), page_list, (1 << page_shift), pbl_depth, total_len, 0, iova_start, diff --git a/drivers/infiniband/hw/amso1100/c2_rnic.c b/drivers/infiniband/hw/amso1100/c2_rnic.c index f49a32b7a8f..e37c5688c21 100644 --- a/drivers/infiniband/hw/amso1100/c2_rnic.c +++ b/drivers/infiniband/hw/amso1100/c2_rnic.c @@ -527,7 +527,7 @@ int c2_rnic_init(struct c2_dev *c2dev) DMA_FROM_DEVICE); pci_unmap_addr_set(&c2dev->rep_vq, mapping, c2dev->rep_vq.host_dma); pr_debug("%s rep_vq va %p dma %llx\n", __FUNCTION__, q1_pages, - (u64)c2dev->rep_vq.host_dma); + (unsigned long long) c2dev->rep_vq.host_dma); c2_mq_rep_init(&c2dev->rep_vq, 1, qsize, @@ -550,7 +550,7 @@ int c2_rnic_init(struct c2_dev *c2dev) DMA_FROM_DEVICE); pci_unmap_addr_set(&c2dev->aeq, mapping, c2dev->aeq.host_dma); pr_debug("%s aeq va %p dma %llx\n", __FUNCTION__, q1_pages, - (u64)c2dev->rep_vq.host_dma); + (unsigned long long) c2dev->rep_vq.host_dma); c2_mq_rep_init(&c2dev->aeq, 2, qsize, -- cgit v1.2.3-18-g5258 From 44334bd97e76662c5f40c629357e6acc4dee3e8a Mon Sep 17 00:00:00 2001 From: Eric Sesterhenn Date: Thu, 28 Sep 2006 10:38:32 -0700 Subject: RDMA/amso1100: Fix error path in c2_llp_accept() Another NULL dereference spotted by the Coverity checker (cid #1395): In case we can't alloc the vq_req, we goto bail1, where we call vq_req_free(c2dev, vq_req); which then dereferences vq_req. Signed-off-by: Eric Sesterhenn Signed-off-by: Andrew Morton Acked-by: Tom Tucker Signed-off-by: Roland Dreier --- drivers/infiniband/hw/amso1100/c2_cm.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/amso1100/c2_cm.c b/drivers/infiniband/hw/amso1100/c2_cm.c index 485254efdd1..75b93e9b881 100644 --- a/drivers/infiniband/hw/amso1100/c2_cm.c +++ b/drivers/infiniband/hw/amso1100/c2_cm.c @@ -302,7 +302,7 @@ int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) vq_req = vq_req_alloc(c2dev); if (!vq_req) { err = -ENOMEM; - goto bail1; + goto bail0; } vq_req->qp = qp; vq_req->cm_id = cm_id; @@ -311,7 +311,7 @@ int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL); if (!wr) { err = -ENOMEM; - goto bail2; + goto bail1; } /* Build the WR */ @@ -331,7 +331,7 @@ int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) /* Validate private_data length */ if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) { err = -EINVAL; - goto bail2; + goto bail1; } if (iw_param->private_data) { @@ -348,19 +348,19 @@ int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) err = vq_send_wr(c2dev, (union c2wr *) wr); if (err) { vq_req_put(c2dev, vq_req); - goto bail2; + goto bail1; } /* Wait for reply from adapter */ err = vq_wait_for_reply(c2dev, vq_req); if (err) - goto bail2; + goto bail1; /* Check that reply is present */ reply = (struct c2wr_cr_accept_rep *) (unsigned long) vq_req->reply_msg; if (!reply) { err = -ENOMEM; - goto bail2; + goto bail1; } err = c2_errno(reply); @@ -368,9 +368,8 @@ int c2_llp_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) if (!err) c2_set_qp_state(qp, C2_QP_STATE_RTS); - bail2: - kfree(wr); bail1: + kfree(wr); vq_req_free(c2dev, vq_req); bail0: if (err) { -- cgit v1.2.3-18-g5258 From ee30cb5b0b65392843cc3beaba48160ee4a3764e Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 28 Sep 2006 10:44:07 -0700 Subject: RDMA/amso1100: Fix memory leak in c2_reg_phys_mr() If the allocation of mr fails, then c2_reg_phys_mr() leaks the page_list array it allocated earlier. This was Coverity CID #1413. Signed-off-by: Roland Dreier --- drivers/infiniband/hw/amso1100/c2_provider.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c index 622d6f1f920..da98d9f7142 100644 --- a/drivers/infiniband/hw/amso1100/c2_provider.c +++ b/drivers/infiniband/hw/amso1100/c2_provider.c @@ -390,8 +390,10 @@ static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd, } mr = kmalloc(sizeof(*mr), GFP_KERNEL); - if (!mr) + if (!mr) { + vfree(page_list); return ERR_PTR(-ENOMEM); + } mr->pd = to_c2pd(ib_pd); pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, " -- cgit v1.2.3-18-g5258 From 87e8df7a273c7c1acb864c90b5253609c44375a6 Mon Sep 17 00:00:00 2001 From: Erez Zilber Date: Wed, 27 Sep 2006 15:27:10 +0300 Subject: IB/iser: Have iSER data transaction object point to iSER conn iSER uses a data transaction object (struct iser_dto) as part of its IB data descriptors (struct iser_desc) management. It also uses a hierarchy of connection structures pointing to each other. A DTO may exist even after the iscsi_iser connection pointed by it is destroyed (eg one that is bound to a post receive buffer which was flushed by the IB HW). Hence DTOs need point to the lowest connection, which is struct iser_conn. Signed-off-by: Erez Zilber Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.c | 2 ++ drivers/infiniband/ulp/iser/iscsi_iser.h | 2 +- drivers/infiniband/ulp/iser/iser_initiator.c | 11 ++++++----- drivers/infiniband/ulp/iser/iser_verbs.c | 8 +++++--- 4 files changed, 14 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 2a14fe2e322..eb6f98d8228 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -317,6 +317,8 @@ iscsi_iser_conn_destroy(struct iscsi_cls_conn *cls_conn) struct iscsi_iser_conn *iser_conn = conn->dd_data; iscsi_conn_teardown(cls_conn); + if (iser_conn->ib_conn) + iser_conn->ib_conn->iser_conn = NULL; kfree(iser_conn); } diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 2cf9ae0def1..2826540d2f2 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -192,7 +192,7 @@ struct iser_regd_buf { struct iser_dto { struct iscsi_iser_cmd_task *ctask; - struct iscsi_iser_conn *conn; + struct iser_conn *ib_conn; int notify_enable; /* vector of registered buffers */ diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index ccf56f6f723..14ae61e0759 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -249,7 +249,7 @@ static int iser_post_receive_control(struct iscsi_conn *conn) } recv_dto = &rx_desc->dto; - recv_dto->conn = iser_conn; + recv_dto->ib_conn = iser_conn->ib_conn; recv_dto->regd_vector_len = 0; regd_hdr = &rx_desc->hdr_regd_buf; @@ -296,7 +296,7 @@ static void iser_create_send_desc(struct iscsi_iser_conn *iser_conn, regd_hdr->virt_addr = tx_desc; /* == &tx_desc->iser_header */ regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN; - send_dto->conn = iser_conn; + send_dto->ib_conn = iser_conn->ib_conn; send_dto->notify_enable = 1; send_dto->regd_vector_len = 0; @@ -588,7 +588,7 @@ void iser_rcv_completion(struct iser_desc *rx_desc, unsigned long dto_xfer_len) { struct iser_dto *dto = &rx_desc->dto; - struct iscsi_iser_conn *conn = dto->conn; + struct iscsi_iser_conn *conn = dto->ib_conn->iser_conn; struct iscsi_session *session = conn->iscsi_conn->session; struct iscsi_cmd_task *ctask; struct iscsi_iser_cmd_task *iser_ctask; @@ -641,7 +641,8 @@ void iser_rcv_completion(struct iser_desc *rx_desc, void iser_snd_completion(struct iser_desc *tx_desc) { struct iser_dto *dto = &tx_desc->dto; - struct iscsi_iser_conn *iser_conn = dto->conn; + struct iser_conn *ib_conn = dto->ib_conn; + struct iscsi_iser_conn *iser_conn = ib_conn->iser_conn; struct iscsi_conn *conn = iser_conn->iscsi_conn; struct iscsi_mgmt_task *mtask; @@ -652,7 +653,7 @@ void iser_snd_completion(struct iser_desc *tx_desc) if (tx_desc->type == ISCSI_TX_DATAOUT) kmem_cache_free(ig.desc_cache, tx_desc); - atomic_dec(&iser_conn->ib_conn->post_send_buf_count); + atomic_dec(&ib_conn->post_send_buf_count); write_lock(conn->recv_lock); if (conn->suspend_tx) { diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index ecdca7fc1e4..18a00003499 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -571,6 +571,8 @@ void iser_conn_release(struct iser_conn *ib_conn) /* on EVENT_ADDR_ERROR there's no device yet for this conn */ if (device != NULL) iser_device_try_release(device); + if (ib_conn->iser_conn) + ib_conn->iser_conn->ib_conn = NULL; kfree(ib_conn); } @@ -694,7 +696,7 @@ int iser_post_recv(struct iser_desc *rx_desc) struct iser_dto *recv_dto = &rx_desc->dto; /* Retrieve conn */ - ib_conn = recv_dto->conn->ib_conn; + ib_conn = recv_dto->ib_conn; iser_dto_to_iov(recv_dto, iov, 2); @@ -727,7 +729,7 @@ int iser_post_send(struct iser_desc *tx_desc) struct iser_conn *ib_conn; struct iser_dto *dto = &tx_desc->dto; - ib_conn = dto->conn->ib_conn; + ib_conn = dto->ib_conn; iser_dto_to_iov(dto, iov, MAX_REGD_BUF_VECTOR_LEN); @@ -774,7 +776,7 @@ static void iser_comp_error_worker(void *data) static void iser_handle_comp_error(struct iser_desc *desc) { struct iser_dto *dto = &desc->dto; - struct iser_conn *ib_conn = dto->conn->ib_conn; + struct iser_conn *ib_conn = dto->ib_conn; iser_dto_buffs_release(dto); -- cgit v1.2.3-18-g5258 From 74a2078061409e027ccb51a28cf6174c31ab8f99 Mon Sep 17 00:00:00 2001 From: Erez Zilber Date: Wed, 27 Sep 2006 16:43:06 +0300 Subject: IB/iser: DMA unmap unaligned for RDMA data before touching it iSER uses the DMA mapping api to map the page holding the SCSI command data to the HCA DMA address space. When the command data is not aligned for RDMA, the data is copied to/from an allocated buffer which in turn is used for executing this command. The pages associated with the command must be unmapped before being touched. Signed-off-by: Erez Zilber Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.h | 7 ++++ drivers/infiniband/ulp/iser/iser_initiator.c | 49 ++++++---------------------- drivers/infiniband/ulp/iser/iser_memory.c | 42 ++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 39 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 2826540d2f2..9c53916f28c 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -355,4 +355,11 @@ int iser_post_send(struct iser_desc *tx_desc); int iser_conn_state_comp(struct iser_conn *ib_conn, enum iser_ib_conn_state comp); + +int iser_dma_map_task_data(struct iscsi_iser_cmd_task *iser_ctask, + struct iser_data_buf *data, + enum iser_data_dir iser_dir, + enum dma_data_direction dma_dir); + +void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask); #endif diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 14ae61e0759..9b3d79c796c 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -66,42 +66,6 @@ static void iser_dto_add_regd_buff(struct iser_dto *dto, dto->regd_vector_len++; } -static int iser_dma_map_task_data(struct iscsi_iser_cmd_task *iser_ctask, - struct iser_data_buf *data, - enum iser_data_dir iser_dir, - enum dma_data_direction dma_dir) -{ - struct device *dma_device; - - iser_ctask->dir[iser_dir] = 1; - dma_device = iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device; - - data->dma_nents = dma_map_sg(dma_device, data->buf, data->size, dma_dir); - if (data->dma_nents == 0) { - iser_err("dma_map_sg failed!!!\n"); - return -EINVAL; - } - return 0; -} - -static void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask) -{ - struct device *dma_device; - struct iser_data_buf *data; - - dma_device = iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device; - - if (iser_ctask->dir[ISER_DIR_IN]) { - data = &iser_ctask->data[ISER_DIR_IN]; - dma_unmap_sg(dma_device, data->buf, data->size, DMA_FROM_DEVICE); - } - - if (iser_ctask->dir[ISER_DIR_OUT]) { - data = &iser_ctask->data[ISER_DIR_OUT]; - dma_unmap_sg(dma_device, data->buf, data->size, DMA_TO_DEVICE); - } -} - /* Register user buffer memory and initialize passive rdma * dto descriptor. Total data size is stored in * iser_ctask->data[ISER_DIR_IN].data_len @@ -699,14 +663,19 @@ void iser_ctask_rdma_init(struct iscsi_iser_cmd_task *iser_ctask) void iser_ctask_rdma_finalize(struct iscsi_iser_cmd_task *iser_ctask) { int deferred; + int is_rdma_aligned = 1; /* if we were reading, copy back to unaligned sglist, * anyway dma_unmap and free the copy */ - if (iser_ctask->data_copy[ISER_DIR_IN].copy_buf != NULL) + if (iser_ctask->data_copy[ISER_DIR_IN].copy_buf != NULL) { + is_rdma_aligned = 0; iser_finalize_rdma_unaligned_sg(iser_ctask, ISER_DIR_IN); - if (iser_ctask->data_copy[ISER_DIR_OUT].copy_buf != NULL) + } + if (iser_ctask->data_copy[ISER_DIR_OUT].copy_buf != NULL) { + is_rdma_aligned = 0; iser_finalize_rdma_unaligned_sg(iser_ctask, ISER_DIR_OUT); + } if (iser_ctask->dir[ISER_DIR_IN]) { deferred = iser_regd_buff_release @@ -726,7 +695,9 @@ void iser_ctask_rdma_finalize(struct iscsi_iser_cmd_task *iser_ctask) } } - iser_dma_unmap_task_data(iser_ctask); + /* if the data was unaligned, it was already unmapped and then copied */ + if (is_rdma_aligned) + iser_dma_unmap_task_data(iser_ctask); } void iser_dto_buffs_release(struct iser_dto *dto) diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index d0b03f42658..0606744c3f8 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -369,6 +369,44 @@ static void iser_page_vec_build(struct iser_data_buf *data, } } +int iser_dma_map_task_data(struct iscsi_iser_cmd_task *iser_ctask, + struct iser_data_buf *data, + enum iser_data_dir iser_dir, + enum dma_data_direction dma_dir) +{ + struct device *dma_device; + + iser_ctask->dir[iser_dir] = 1; + dma_device = + iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device; + + data->dma_nents = dma_map_sg(dma_device, data->buf, data->size, dma_dir); + if (data->dma_nents == 0) { + iser_err("dma_map_sg failed!!!\n"); + return -EINVAL; + } + return 0; +} + +void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask) +{ + struct device *dma_device; + struct iser_data_buf *data; + + dma_device = + iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device; + + if (iser_ctask->dir[ISER_DIR_IN]) { + data = &iser_ctask->data[ISER_DIR_IN]; + dma_unmap_sg(dma_device, data->buf, data->size, DMA_FROM_DEVICE); + } + + if (iser_ctask->dir[ISER_DIR_OUT]) { + data = &iser_ctask->data[ISER_DIR_OUT]; + dma_unmap_sg(dma_device, data->buf, data->size, DMA_TO_DEVICE); + } +} + /** * iser_reg_rdma_mem - Registers memory intended for RDMA, * obtaining rkey and va @@ -394,6 +432,10 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask, iser_err("rdma alignment violation %d/%d aligned\n", aligned_len, mem->size); iser_data_buf_dump(mem); + + /* unmap the command data before accessing it */ + iser_dma_unmap_task_data(iser_ctask); + /* allocate copy buf, if we are writing, copy the */ /* unaligned scatterlist, dma map the copy */ if (iser_start_rdma_unaligned_sg(iser_ctask, cmd_dir) != 0) -- cgit v1.2.3-18-g5258 From fd6a79a786b84510d00ee6aa6449a468e6d75dee Mon Sep 17 00:00:00 2001 From: Erez Zilber Date: Wed, 27 Sep 2006 16:48:57 +0300 Subject: IB/iser: Fix the description of iSER in Kconfig Fix the description of iSER in Kconfig. It is not accurate. Signed-off-by: Erez Zilber Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/Kconfig | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/iser/Kconfig b/drivers/infiniband/ulp/iser/Kconfig index 365a1b5f19e..aecbb9083f0 100644 --- a/drivers/infiniband/ulp/iser/Kconfig +++ b/drivers/infiniband/ulp/iser/Kconfig @@ -1,11 +1,12 @@ config INFINIBAND_ISER - tristate "ISCSI RDMA Protocol" + tristate "iSCSI Extensions for RDMA (iSER)" depends on INFINIBAND && SCSI && INET select SCSI_ISCSI_ATTRS ---help--- - Support for the ISCSI RDMA Protocol over InfiniBand. This - allows you to access storage devices that speak ISER/ISCSI - over InfiniBand. + Support for the iSCSI Extensions for RDMA (iSER) Protocol + over InfiniBand. This allows you to access storage devices + that speak iSCSI over iSER over InfiniBand. - The ISER protocol is defined by IETF. - See . + The iSER protocol is defined by IETF. + See + and -- cgit v1.2.3-18-g5258 From 6022943eb4cb3cb9e43f27f1faeaba38e162d966 Mon Sep 17 00:00:00 2001 From: Bryan O'Sullivan Date: Thu, 28 Sep 2006 08:59:57 -0700 Subject: IB/ipath: Limit # of packets sent without an ACK received The sender requests an ACK every 1/2 MB to avoid retransmit timeouts that were causing MVAPICH mod_bw to fail after a predictable number of sends. Signed-off-by: Bryan O'Sullivan Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_qp.c | 3 +- drivers/infiniband/hw/ipath/ipath_rc.c | 47 +++++++++++++++++++------------ drivers/infiniband/hw/ipath/ipath_verbs.c | 3 +- drivers/infiniband/hw/ipath/ipath_verbs.h | 4 +++ 4 files changed, 37 insertions(+), 20 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 224b0f40767..ecfaca7a571 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -342,6 +342,7 @@ static void ipath_reset_qp(struct ipath_qp *qp) qp->s_last = 0; qp->s_ssn = 1; qp->s_lsn = 0; + qp->s_wait_credit = 0; if (qp->r_rq.wq) { qp->r_rq.wq->head = 0; qp->r_rq.wq->tail = 0; @@ -516,7 +517,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, qp->remote_qpn = attr->dest_qp_num; if (attr_mask & IB_QP_SQ_PSN) { - qp->s_next_psn = attr->sq_psn; + qp->s_psn = qp->s_next_psn = attr->sq_psn; qp->s_last_psn = qp->s_next_psn - 1; } diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index a08654042c0..52caa2edf5a 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -201,6 +201,18 @@ int ipath_make_rc_req(struct ipath_qp *qp, qp->s_rnr_timeout) goto done; + /* Limit the number of packets sent without an ACK. */ + if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) { + qp->s_wait_credit = 1; + dev->n_rc_stalls++; + spin_lock(&dev->pending_lock); + if (list_empty(&qp->timerwait)) + list_add_tail(&qp->timerwait, + &dev->pending[dev->pending_index]); + spin_unlock(&dev->pending_lock); + goto done; + } + /* header size in 32-bit words LRH+BTH = (8+12)/4. */ hwords = 5; bth0 = 0; @@ -221,7 +233,7 @@ int ipath_make_rc_req(struct ipath_qp *qp, /* Check if send work queue is empty. */ if (qp->s_tail == qp->s_head) goto done; - qp->s_psn = wqe->psn = qp->s_next_psn; + wqe->psn = qp->s_next_psn; newreq = 1; } /* @@ -393,12 +405,6 @@ int ipath_make_rc_req(struct ipath_qp *qp, ss = &qp->s_sge; len = qp->s_len; if (len > pmtu) { - /* - * Request an ACK every 1/2 MB to avoid retransmit - * timeouts. - */ - if (((wqe->length - len) % (512 * 1024)) == 0) - bth2 |= 1 << 31; len = pmtu; break; } @@ -435,12 +441,6 @@ int ipath_make_rc_req(struct ipath_qp *qp, ss = &qp->s_sge; len = qp->s_len; if (len > pmtu) { - /* - * Request an ACK every 1/2 MB to avoid retransmit - * timeouts. - */ - if (((wqe->length - len) % (512 * 1024)) == 0) - bth2 |= 1 << 31; len = pmtu; break; } @@ -498,6 +498,8 @@ int ipath_make_rc_req(struct ipath_qp *qp, */ goto done; } + if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0) + bth2 |= 1 << 31; /* Request ACK. */ qp->s_len -= len; qp->s_hdrwords = hwords; qp->s_cur_sge = ss; @@ -737,6 +739,15 @@ bail: return; } +static inline void update_last_psn(struct ipath_qp *qp, u32 psn) +{ + if (qp->s_wait_credit) { + qp->s_wait_credit = 0; + tasklet_hi_schedule(&qp->s_task); + } + qp->s_last_psn = psn; +} + /** * do_rc_ack - process an incoming RC ACK * @qp: the QP the ACK came in on @@ -805,7 +816,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) * The last valid PSN seen is the previous * request's. */ - qp->s_last_psn = wqe->psn - 1; + update_last_psn(qp, wqe->psn - 1); /* Retry this request. */ ipath_restart_rc(qp, wqe->psn, &wc); /* @@ -864,7 +875,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) ipath_get_credit(qp, aeth); qp->s_rnr_retry = qp->s_rnr_retry_cnt; qp->s_retry = qp->s_retry_cnt; - qp->s_last_psn = psn; + update_last_psn(qp, psn); ret = 1; goto bail; @@ -883,7 +894,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) goto bail; /* The last valid PSN is the previous PSN. */ - qp->s_last_psn = psn - 1; + update_last_psn(qp, psn - 1); dev->n_rc_resends += (int)qp->s_psn - (int)psn; @@ -898,7 +909,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode) case 3: /* NAK */ /* The last valid PSN seen is the previous request's. */ if (qp->s_last != qp->s_tail) - qp->s_last_psn = wqe->psn - 1; + update_last_psn(qp, wqe->psn - 1); switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) & IPATH_AETH_CREDIT_MASK) { case 0: /* PSN sequence error */ @@ -1071,7 +1082,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev, * since we don't want s_sge modified. */ qp->s_len -= pmtu; - qp->s_last_psn = psn; + update_last_psn(qp, psn); spin_unlock_irqrestore(&qp->s_lock, flags); ipath_copy_sge(&qp->s_sge, data, pmtu); goto bail; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index b8381c5e72b..a4bf870c5e3 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -1683,6 +1683,7 @@ static ssize_t show_stats(struct class_device *cdev, char *buf) "RC OTH NAKs %d\n" "RC timeouts %d\n" "RC RDMA dup %d\n" + "RC stalls %d\n" "piobuf wait %d\n" "no piobuf %d\n" "PKT drops %d\n" @@ -1690,7 +1691,7 @@ static ssize_t show_stats(struct class_device *cdev, char *buf) dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks, dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks, dev->n_other_naks, dev->n_timeouts, - dev->n_rdma_dup_busy, dev->n_piowait, + dev->n_rdma_dup_busy, dev->n_rc_stalls, dev->n_piowait, dev->n_no_piobuf, dev->n_pkt_drops, dev->n_wqe_errs); for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) { const struct ipath_opcode_stats *si = &dev->opstats[i]; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index 09bbb3f9a21..3fffaa02e66 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -370,6 +370,7 @@ struct ipath_qp { u8 s_rnr_retry_cnt; u8 s_retry; /* requester retry counter */ u8 s_rnr_retry; /* requester RNR retry counter */ + u8 s_wait_credit; /* limit number of unacked packets sent */ u8 s_pkey_index; /* PKEY index to use */ u8 timeout; /* Timeout for this QP */ enum ib_mtu path_mtu; @@ -393,6 +394,8 @@ struct ipath_qp { #define IPATH_S_BUSY 0 #define IPATH_S_SIGNAL_REQ_WR 1 +#define IPATH_PSN_CREDIT 2048 + /* * Since struct ipath_swqe is not a fixed size, we can't simply index into * struct ipath_qp.s_wq. This function does the array index computation. @@ -521,6 +524,7 @@ struct ipath_ibdev { u32 n_rnr_naks; u32 n_other_naks; u32 n_timeouts; + u32 n_rc_stalls; u32 n_pkt_drops; u32 n_vl15_dropped; u32 n_wqe_errs; -- cgit v1.2.3-18-g5258 From 221e31985b490309eb9ae33ac815deae3b5aa021 Mon Sep 17 00:00:00 2001 From: Bryan O'Sullivan Date: Thu, 28 Sep 2006 08:59:58 -0700 Subject: IB/ipath: Fix memory leak if allocation fails If the second allocation failed, the first structure allocated in this routine was not freed. Signed-off-by: Bryan O'Sullivan Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_driver.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index 2108466c7e3..a01301d0753 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -1326,6 +1326,9 @@ int ipath_create_rcvhdrq(struct ipath_devdata *dd, "for port %u rcvhdrqtailaddr failed\n", pd->port_port); ret = -ENOMEM; + dma_free_coherent(&dd->pcidev->dev, amt, + pd->port_rcvhdrq, pd->port_rcvhdrq_phys); + pd->port_rcvhdrq = NULL; goto bail; } pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail; -- cgit v1.2.3-18-g5258 From 9929b0fb0f35f54371e9364bab809bcd753f9d3a Mon Sep 17 00:00:00 2001 From: Bryan O'Sullivan Date: Thu, 28 Sep 2006 08:59:59 -0700 Subject: IB/ipath: Driver support for userspace sharing of HW contexts This allows multiple userspace processes to share a single hardware context in a master/slave arrangement. It is backwards binary compatible with existing userspace. Signed-off-by: Bryan O'Sullivan Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ipath/ipath_common.h | 45 +- drivers/infiniband/hw/ipath/ipath_driver.c | 7 +- drivers/infiniband/hw/ipath/ipath_file_ops.c | 901 ++++++++++++++++++++------- drivers/infiniband/hw/ipath/ipath_kernel.h | 30 +- drivers/infiniband/hw/ipath/ipath_sysfs.c | 12 + 5 files changed, 754 insertions(+), 241 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h index f577905e3ac..46b7b20d8a9 100644 --- a/drivers/infiniband/hw/ipath/ipath_common.h +++ b/drivers/infiniband/hw/ipath/ipath_common.h @@ -185,6 +185,7 @@ typedef enum _ipath_ureg { #define IPATH_RUNTIME_PCIE 0x2 #define IPATH_RUNTIME_FORCE_WC_ORDER 0x4 #define IPATH_RUNTIME_RCVHDR_COPY 0x8 +#define IPATH_RUNTIME_MASTER 0x10 /* * This structure is returned by ipath_userinit() immediately after @@ -202,7 +203,8 @@ struct ipath_base_info { /* version of software, for feature checking. */ __u32 spi_sw_version; /* InfiniPath port assigned, goes into sent packets */ - __u32 spi_port; + __u16 spi_port; + __u16 spi_subport; /* * IB MTU, packets IB data must be less than this. * The MTU is in bytes, and will be a multiple of 4 bytes. @@ -218,7 +220,7 @@ struct ipath_base_info { __u32 spi_tidcnt; /* size of the TID Eager list in infinipath, in entries */ __u32 spi_tidegrcnt; - /* size of a single receive header queue entry. */ + /* size of a single receive header queue entry in words. */ __u32 spi_rcvhdrent_size; /* * Count of receive header queue entries allocated. @@ -310,6 +312,12 @@ struct ipath_base_info { __u32 spi_filler_for_align; /* address of readonly memory copy of the rcvhdrq tail register. */ __u64 spi_rcvhdr_tailaddr; + + /* shared memory pages for subports if IPATH_RUNTIME_MASTER is set */ + __u64 spi_subport_uregbase; + __u64 spi_subport_rcvegrbuf; + __u64 spi_subport_rcvhdr_base; + } __attribute__ ((aligned(8))); @@ -328,12 +336,12 @@ struct ipath_base_info { /* * Minor version differences are always compatible - * a within a major version, however if if user software is larger + * a within a major version, however if user software is larger * than driver software, some new features and/or structure fields * may not be implemented; the user code must deal with this if it - * cares, or it must abort after initialization reports the difference + * cares, or it must abort after initialization reports the difference. */ -#define IPATH_USER_SWMINOR 2 +#define IPATH_USER_SWMINOR 3 #define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR) @@ -379,7 +387,16 @@ struct ipath_user_info { */ __u32 spu_rcvhdrsize; - __u64 spu_unused; /* kept for compatible layout */ + /* + * If two or more processes wish to share a port, each process + * must set the spu_subport_cnt and spu_subport_id to the same + * values. The only restriction on the spu_subport_id is that + * it be unique for a given node. + */ + __u16 spu_subport_cnt; + __u16 spu_subport_id; + + __u32 spu_unused; /* kept for compatible layout */ /* * address of struct base_info to write to @@ -398,13 +415,17 @@ struct ipath_user_info { #define IPATH_CMD_TID_UPDATE 19 /* update expected TID entries */ #define IPATH_CMD_TID_FREE 20 /* free expected TID entries */ #define IPATH_CMD_SET_PART_KEY 21 /* add partition key */ +#define IPATH_CMD_SLAVE_INFO 22 /* return info on slave processes */ -#define IPATH_CMD_MAX 21 +#define IPATH_CMD_MAX 22 struct ipath_port_info { __u32 num_active; /* number of active units */ __u32 unit; /* unit (chip) assigned to caller */ - __u32 port; /* port on unit assigned to caller */ + __u16 port; /* port on unit assigned to caller */ + __u16 subport; /* subport on unit assigned to caller */ + __u16 num_ports; /* number of ports available on unit */ + __u16 num_subports; /* number of subport slaves opened on port */ }; struct ipath_tid_info { @@ -435,6 +456,8 @@ struct ipath_cmd { __u32 recv_ctrl; /* partition key to set */ __u16 part_key; + /* user address of __u32 bitmask of active slaves */ + __u64 slave_mask_addr; } cmd; }; @@ -596,6 +619,10 @@ struct infinipath_counters { /* K_PktFlags bits */ #define INFINIPATH_KPF_INTR 0x1 +#define INFINIPATH_KPF_SUBPORT_MASK 0x3 +#define INFINIPATH_KPF_SUBPORT_SHIFT 1 + +#define INFINIPATH_MAX_SUBPORT 4 /* SendPIO per-buffer control */ #define INFINIPATH_SP_TEST 0x40 @@ -610,7 +637,7 @@ struct ipath_header { /* * Version - 4 bits, Port - 4 bits, TID - 10 bits and Offset - * 14 bits before ECO change ~28 Dec 03. After that, Vers 4, - * Port 3, TID 11, offset 14. + * Port 4, TID 11, offset 13. */ __le32 ver_port_tid_offset; __le16 chksum; diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index a01301d0753..0fe37c5467a 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -1827,9 +1827,9 @@ void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd) dma_free_coherent(&dd->pcidev->dev, size, base, pd->port_rcvegrbuf_phys[e]); } - vfree(pd->port_rcvegrbuf); + kfree(pd->port_rcvegrbuf); pd->port_rcvegrbuf = NULL; - vfree(pd->port_rcvegrbuf_phys); + kfree(pd->port_rcvegrbuf_phys); pd->port_rcvegrbuf_phys = NULL; pd->port_rcvegrbuf_chunks = 0; } else if (pd->port_port == 0 && dd->ipath_port0_skbs) { @@ -1845,6 +1845,9 @@ void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd) vfree(skbs); } kfree(pd->port_tid_pg_list); + vfree(pd->subport_uregbase); + vfree(pd->subport_rcvegrbuf); + vfree(pd->subport_rcvhdr_base); kfree(pd); } diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index 29930e22318..caf8cb891da 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -41,6 +41,12 @@ #include "ipath_kernel.h" #include "ipath_common.h" +/* + * mmap64 doesn't allow all 64 bits for 32-bit applications + * so only use the low 43 bits. + */ +#define MMAP64_MASK 0x7FFFFFFFFFFUL + static int ipath_open(struct inode *, struct file *); static int ipath_close(struct inode *, struct file *); static ssize_t ipath_write(struct file *, const char __user *, size_t, @@ -57,18 +63,35 @@ static struct file_operations ipath_file_ops = { .mmap = ipath_mmap }; -static int ipath_get_base_info(struct ipath_portdata *pd, +static int ipath_get_base_info(struct file *fp, void __user *ubase, size_t ubase_size) { + struct ipath_portdata *pd = port_fp(fp); int ret = 0; struct ipath_base_info *kinfo = NULL; struct ipath_devdata *dd = pd->port_dd; + unsigned subport_cnt; + int shared, master; + size_t sz; + + subport_cnt = pd->port_subport_cnt; + if (!subport_cnt) { + shared = 0; + master = 0; + subport_cnt = 1; + } else { + shared = 1; + master = !subport_fp(fp); + } - if (ubase_size < sizeof(*kinfo)) { + sz = sizeof(*kinfo); + /* If port sharing is not requested, allow the old size structure */ + if (!shared) + sz -= 3 * sizeof(u64); + if (ubase_size < sz) { ipath_cdbg(PROC, - "Base size %lu, need %lu (version mismatch?)\n", - (unsigned long) ubase_size, - (unsigned long) sizeof(*kinfo)); + "Base size %zu, need %zu (version mismatch?)\n", + ubase_size, sz); ret = -EINVAL; goto bail; } @@ -95,7 +118,9 @@ static int ipath_get_base_info(struct ipath_portdata *pd, kinfo->spi_rcv_egrperchunk = pd->port_rcvegrbufs_perchunk; kinfo->spi_rcv_egrchunksize = kinfo->spi_rcv_egrbuftotlen / pd->port_rcvegrbuf_chunks; - kinfo->spi_tidcnt = dd->ipath_rcvtidcnt; + kinfo->spi_tidcnt = dd->ipath_rcvtidcnt / subport_cnt; + if (master) + kinfo->spi_tidcnt += dd->ipath_rcvtidcnt % subport_cnt; /* * for this use, may be ipath_cfgports summed over all chips that * are are configured and present @@ -118,31 +143,76 @@ static int ipath_get_base_info(struct ipath_portdata *pd, * page_address() macro worked, but in 2.6.11, even that returns the * full 64 bit address (upper bits all 1's). So far, using the * physical addresses (or chip offsets, for chip mapping) works, but - * no doubt some future kernel release will chang that, and we'll be - * on to yet another method of dealing with this + * no doubt some future kernel release will change that, and we'll be + * on to yet another method of dealing with this. */ kinfo->spi_rcvhdr_base = (u64) pd->port_rcvhdrq_phys; - kinfo->spi_rcvhdr_tailaddr = (u64)pd->port_rcvhdrqtailaddr_phys; + kinfo->spi_rcvhdr_tailaddr = (u64) pd->port_rcvhdrqtailaddr_phys; kinfo->spi_rcv_egrbufs = (u64) pd->port_rcvegr_phys; kinfo->spi_pioavailaddr = (u64) dd->ipath_pioavailregs_phys; kinfo->spi_status = (u64) kinfo->spi_pioavailaddr + (void *) dd->ipath_statusp - (void *) dd->ipath_pioavailregs_dma; - kinfo->spi_piobufbase = (u64) pd->port_piobufs; - kinfo->__spi_uregbase = - dd->ipath_uregbase + dd->ipath_palign * pd->port_port; + if (!shared) { + kinfo->spi_piocnt = dd->ipath_pbufsport; + kinfo->spi_piobufbase = (u64) pd->port_piobufs; + kinfo->__spi_uregbase = (u64) dd->ipath_uregbase + + dd->ipath_palign * pd->port_port; + } else if (master) { + kinfo->spi_piocnt = (dd->ipath_pbufsport / subport_cnt) + + (dd->ipath_pbufsport % subport_cnt); + /* Master's PIO buffers are after all the slave's */ + kinfo->spi_piobufbase = (u64) pd->port_piobufs + + dd->ipath_palign * + (dd->ipath_pbufsport - kinfo->spi_piocnt); + kinfo->__spi_uregbase = (u64) dd->ipath_uregbase + + dd->ipath_palign * pd->port_port; + } else { + unsigned slave = subport_fp(fp) - 1; - kinfo->spi_pioindex = dd->ipath_pbufsport * (pd->port_port - 1); - kinfo->spi_piocnt = dd->ipath_pbufsport; + kinfo->spi_piocnt = dd->ipath_pbufsport / subport_cnt; + kinfo->spi_piobufbase = (u64) pd->port_piobufs + + dd->ipath_palign * kinfo->spi_piocnt * slave; + kinfo->__spi_uregbase = ((u64) pd->subport_uregbase + + PAGE_SIZE * slave) & MMAP64_MASK; + + kinfo->spi_rcvhdr_base = ((u64) pd->subport_rcvhdr_base + + pd->port_rcvhdrq_size * slave) & MMAP64_MASK; + kinfo->spi_rcvhdr_tailaddr = + (u64) pd->port_rcvhdrqtailaddr_phys & MMAP64_MASK; + kinfo->spi_rcv_egrbufs = ((u64) pd->subport_rcvegrbuf + + dd->ipath_rcvegrcnt * dd->ipath_rcvegrbufsize * slave) & + MMAP64_MASK; + } + + kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->ipath_piobufbase) / + dd->ipath_palign; kinfo->spi_pioalign = dd->ipath_palign; kinfo->spi_qpair = IPATH_KD_QP; kinfo->spi_piosize = dd->ipath_ibmaxlen; kinfo->spi_mtu = dd->ipath_ibmaxlen; /* maxlen, not ibmtu */ kinfo->spi_port = pd->port_port; + kinfo->spi_subport = subport_fp(fp); kinfo->spi_sw_version = IPATH_KERN_SWVERSION; kinfo->spi_hw_version = dd->ipath_revision; + if (master) { + kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER; + kinfo->spi_subport_uregbase = + (u64) pd->subport_uregbase & MMAP64_MASK; + kinfo->spi_subport_rcvegrbuf = + (u64) pd->subport_rcvegrbuf & MMAP64_MASK; + kinfo->spi_subport_rcvhdr_base = + (u64) pd->subport_rcvhdr_base & MMAP64_MASK; + ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n", + kinfo->spi_port, + kinfo->spi_runtime_flags, + kinfo->spi_subport_uregbase, + kinfo->spi_subport_rcvegrbuf, + kinfo->spi_subport_rcvhdr_base); + } + if (copy_to_user(ubase, kinfo, sizeof(*kinfo))) ret = -EFAULT; @@ -154,6 +224,7 @@ bail: /** * ipath_tid_update - update a port TID * @pd: the port + * @fp: the ipath device file * @ti: the TID information * * The new implementation as of Oct 2004 is that the driver assigns @@ -176,11 +247,11 @@ bail: * virtually contiguous pages, that should change to improve * performance. */ -static int ipath_tid_update(struct ipath_portdata *pd, +static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp, const struct ipath_tid_info *ti) { int ret = 0, ntids; - u32 tid, porttid, cnt, i, tidcnt; + u32 tid, porttid, cnt, i, tidcnt, tidoff; u16 *tidlist; struct ipath_devdata *dd = pd->port_dd; u64 physaddr; @@ -188,6 +259,7 @@ static int ipath_tid_update(struct ipath_portdata *pd, u64 __iomem *tidbase; unsigned long tidmap[8]; struct page **pagep = NULL; + unsigned subport = subport_fp(fp); if (!dd->ipath_pageshadow) { ret = -ENOMEM; @@ -204,20 +276,34 @@ static int ipath_tid_update(struct ipath_portdata *pd, ret = -EFAULT; goto done; } - tidcnt = dd->ipath_rcvtidcnt; - if (cnt >= tidcnt) { + porttid = pd->port_port * dd->ipath_rcvtidcnt; + if (!pd->port_subport_cnt) { + tidcnt = dd->ipath_rcvtidcnt; + tid = pd->port_tidcursor; + tidoff = 0; + } else if (!subport) { + tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) + + (dd->ipath_rcvtidcnt % pd->port_subport_cnt); + tidoff = dd->ipath_rcvtidcnt - tidcnt; + porttid += tidoff; + tid = tidcursor_fp(fp); + } else { + tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt; + tidoff = tidcnt * (subport - 1); + porttid += tidoff; + tid = tidcursor_fp(fp); + } + if (cnt > tidcnt) { /* make sure it all fits in port_tid_pg_list */ dev_info(&dd->pcidev->dev, "Process tried to allocate %u " "TIDs, only trying max (%u)\n", cnt, tidcnt); cnt = tidcnt; } - pagep = (struct page **)pd->port_tid_pg_list; - tidlist = (u16 *) (&pagep[cnt]); + pagep = &((struct page **) pd->port_tid_pg_list)[tidoff]; + tidlist = &((u16 *) &pagep[dd->ipath_rcvtidcnt])[tidoff]; memset(tidmap, 0, sizeof(tidmap)); - tid = pd->port_tidcursor; /* before decrement; chip actual # */ - porttid = pd->port_port * tidcnt; ntids = tidcnt; tidbase = (u64 __iomem *) (((char __iomem *) dd->ipath_kregbase) + dd->ipath_rcvtidbase + @@ -274,9 +360,9 @@ static int ipath_tid_update(struct ipath_portdata *pd, ret = -ENOMEM; break; } - tidlist[i] = tid; + tidlist[i] = tid + tidoff; ipath_cdbg(VERBOSE, "Updating idx %u to TID %u, " - "vaddr %lx\n", i, tid, vaddr); + "vaddr %lx\n", i, tid + tidoff, vaddr); /* we "know" system pages and TID pages are same size */ dd->ipath_pageshadow[porttid + tid] = pagep[i]; /* @@ -341,7 +427,10 @@ static int ipath_tid_update(struct ipath_portdata *pd, } if (tid == tidcnt) tid = 0; - pd->port_tidcursor = tid; + if (!pd->port_subport_cnt) + pd->port_tidcursor = tid; + else + tidcursor_fp(fp) = tid; } done: @@ -354,6 +443,7 @@ done: /** * ipath_tid_free - free a port TID * @pd: the port + * @subport: the subport * @ti: the TID info * * right now we are unlocking one page at a time, but since @@ -367,7 +457,7 @@ done: * they pass in to us. */ -static int ipath_tid_free(struct ipath_portdata *pd, +static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport, const struct ipath_tid_info *ti) { int ret = 0; @@ -388,11 +478,20 @@ static int ipath_tid_free(struct ipath_portdata *pd, } porttid = pd->port_port * dd->ipath_rcvtidcnt; + if (!pd->port_subport_cnt) + tidcnt = dd->ipath_rcvtidcnt; + else if (!subport) { + tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) + + (dd->ipath_rcvtidcnt % pd->port_subport_cnt); + porttid += dd->ipath_rcvtidcnt - tidcnt; + } else { + tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt; + porttid += tidcnt * (subport - 1); + } tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) + dd->ipath_rcvtidbase + porttid * sizeof(*tidbase)); - tidcnt = dd->ipath_rcvtidcnt; limit = sizeof(tidmap) * BITS_PER_BYTE; if (limit > tidcnt) /* just in case size changes in future */ @@ -581,20 +680,24 @@ bail: /** * ipath_manage_rcvq - manage a port's receive queue * @pd: the port + * @subport: the subport * @start_stop: action to carry out * * start_stop == 0 disables receive on the port, for use in queue * overflow conditions. start_stop==1 re-enables, to be used to * re-init the software copy of the head register */ -static int ipath_manage_rcvq(struct ipath_portdata *pd, int start_stop) +static int ipath_manage_rcvq(struct ipath_portdata *pd, unsigned subport, + int start_stop) { struct ipath_devdata *dd = pd->port_dd; u64 tval; - ipath_cdbg(PROC, "%sabling rcv for unit %u port %u\n", + ipath_cdbg(PROC, "%sabling rcv for unit %u port %u:%u\n", start_stop ? "en" : "dis", dd->ipath_unit, - pd->port_port); + pd->port_port, subport); + if (subport) + goto bail; /* atomically clear receive enable port. */ if (start_stop) { /* @@ -630,6 +733,7 @@ static int ipath_manage_rcvq(struct ipath_portdata *pd, int start_stop) tval = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port); } /* always; new head should be equal to new tail; see above */ +bail: return 0; } @@ -687,6 +791,36 @@ static void ipath_clean_part_key(struct ipath_portdata *pd, } } +/* + * Initialize the port data with the receive buffer sizes + * so this can be done while the master port is locked. + * Otherwise, there is a race with a slave opening the port + * and seeing these fields uninitialized. + */ +static void init_user_egr_sizes(struct ipath_portdata *pd) +{ + struct ipath_devdata *dd = pd->port_dd; + unsigned egrperchunk, egrcnt, size; + + /* + * to avoid wasting a lot of memory, we allocate 32KB chunks of + * physically contiguous memory, advance through it until used up + * and then allocate more. Of course, we need memory to store those + * extra pointers, now. Started out with 256KB, but under heavy + * memory pressure (creating large files and then copying them over + * NFS while doing lots of MPI jobs), we hit some allocation + * failures, even though we can sleep... (2.6.10) Still get + * failures at 64K. 32K is the lowest we can go without wasting + * additional memory. + */ + size = 0x8000; + egrperchunk = size / dd->ipath_rcvegrbufsize; + egrcnt = dd->ipath_rcvegrcnt; + pd->port_rcvegrbuf_chunks = (egrcnt + egrperchunk - 1) / egrperchunk; + pd->port_rcvegrbufs_perchunk = egrperchunk; + pd->port_rcvegrbuf_size = size; +} + /** * ipath_create_user_egr - allocate eager TID buffers * @pd: the port to allocate TID buffers for @@ -702,7 +836,7 @@ static void ipath_clean_part_key(struct ipath_portdata *pd, static int ipath_create_user_egr(struct ipath_portdata *pd) { struct ipath_devdata *dd = pd->port_dd; - unsigned e, egrcnt, alloced, egrperchunk, chunk, egrsize, egroff; + unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff; size_t size; int ret; gfp_t gfp_flags; @@ -722,31 +856,18 @@ static int ipath_create_user_egr(struct ipath_portdata *pd) ipath_cdbg(VERBOSE, "Allocating %d egr buffers, at egrtid " "offset %x, egrsize %u\n", egrcnt, egroff, egrsize); - /* - * to avoid wasting a lot of memory, we allocate 32KB chunks of - * physically contiguous memory, advance through it until used up - * and then allocate more. Of course, we need memory to store those - * extra pointers, now. Started out with 256KB, but under heavy - * memory pressure (creating large files and then copying them over - * NFS while doing lots of MPI jobs), we hit some allocation - * failures, even though we can sleep... (2.6.10) Still get - * failures at 64K. 32K is the lowest we can go without wasting - * additional memory. - */ - size = 0x8000; - alloced = ALIGN(egrsize * egrcnt, size); - egrperchunk = size / egrsize; - chunk = (egrcnt + egrperchunk - 1) / egrperchunk; - pd->port_rcvegrbuf_chunks = chunk; - pd->port_rcvegrbufs_perchunk = egrperchunk; - pd->port_rcvegrbuf_size = size; - pd->port_rcvegrbuf = vmalloc(chunk * sizeof(pd->port_rcvegrbuf[0])); + chunk = pd->port_rcvegrbuf_chunks; + egrperchunk = pd->port_rcvegrbufs_perchunk; + size = pd->port_rcvegrbuf_size; + pd->port_rcvegrbuf = kmalloc(chunk * sizeof(pd->port_rcvegrbuf[0]), + GFP_KERNEL); if (!pd->port_rcvegrbuf) { ret = -ENOMEM; goto bail; } pd->port_rcvegrbuf_phys = - vmalloc(chunk * sizeof(pd->port_rcvegrbuf_phys[0])); + kmalloc(chunk * sizeof(pd->port_rcvegrbuf_phys[0]), + GFP_KERNEL); if (!pd->port_rcvegrbuf_phys) { ret = -ENOMEM; goto bail_rcvegrbuf; @@ -791,97 +912,15 @@ bail_rcvegrbuf_phys: pd->port_rcvegrbuf_phys[e]); } - vfree(pd->port_rcvegrbuf_phys); + kfree(pd->port_rcvegrbuf_phys); pd->port_rcvegrbuf_phys = NULL; bail_rcvegrbuf: - vfree(pd->port_rcvegrbuf); + kfree(pd->port_rcvegrbuf); pd->port_rcvegrbuf = NULL; bail: return ret; } -static int ipath_do_user_init(struct ipath_portdata *pd, - const struct ipath_user_info *uinfo) -{ - int ret = 0; - struct ipath_devdata *dd = pd->port_dd; - u32 head32; - - /* for now, if major version is different, bail */ - if ((uinfo->spu_userversion >> 16) != IPATH_USER_SWMAJOR) { - dev_info(&dd->pcidev->dev, - "User major version %d not same as driver " - "major %d\n", uinfo->spu_userversion >> 16, - IPATH_USER_SWMAJOR); - ret = -ENODEV; - goto done; - } - - if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR) - ipath_dbg("User minor version %d not same as driver " - "minor %d\n", uinfo->spu_userversion & 0xffff, - IPATH_USER_SWMINOR); - - if (uinfo->spu_rcvhdrsize) { - ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize); - if (ret) - goto done; - } - - /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */ - - /* for right now, kernel piobufs are at end, so port 1 is at 0 */ - pd->port_piobufs = dd->ipath_piobufbase + - dd->ipath_pbufsport * (pd->port_port - - 1) * dd->ipath_palign; - ipath_cdbg(VERBOSE, "Set base of piobufs for port %u to 0x%x\n", - pd->port_port, pd->port_piobufs); - - /* - * Now allocate the rcvhdr Q and eager TIDs; skip the TID - * array for time being. If pd->port_port > chip-supported, - * we need to do extra stuff here to handle by handling overflow - * through port 0, someday - */ - ret = ipath_create_rcvhdrq(dd, pd); - if (!ret) - ret = ipath_create_user_egr(pd); - if (ret) - goto done; - - /* - * set the eager head register for this port to the current values - * of the tail pointers, since we don't know if they were - * updated on last use of the port. - */ - head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port); - ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port); - dd->ipath_lastegrheads[pd->port_port] = -1; - dd->ipath_lastrcvhdrqtails[pd->port_port] = -1; - ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n", - pd->port_port, head32); - pd->port_tidcursor = 0; /* start at beginning after open */ - /* - * now enable the port; the tail registers will be written to memory - * by the chip as soon as it sees the write to - * dd->ipath_kregs->kr_rcvctrl. The update only happens on - * transition from 0 to 1, so clear it first, then set it as part of - * enabling the port. This will (very briefly) affect any other - * open ports, but it shouldn't be long enough to be an issue. - * We explictly set the in-memory copy to 0 beforehand, so we don't - * have to wait to be sure the DMA update has happened. - */ - *pd->port_rcvhdrtail_kvaddr = 0ULL; - set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port, - &dd->ipath_rcvctrl); - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, - dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD); - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, - dd->ipath_rcvctrl); -done: - return ret; -} - /* common code for the mappings on dma_alloc_coherent mem */ static int ipath_mmap_mem(struct vm_area_struct *vma, @@ -957,7 +996,8 @@ static int mmap_ureg(struct vm_area_struct *vma, struct ipath_devdata *dd, static int mmap_piobufs(struct vm_area_struct *vma, struct ipath_devdata *dd, - struct ipath_portdata *pd) + struct ipath_portdata *pd, + unsigned piobufs, unsigned piocnt) { unsigned long phys; int ret; @@ -968,16 +1008,15 @@ static int mmap_piobufs(struct vm_area_struct *vma, * process data, and catches users who might try to read the i/o * space due to a bug. */ - if ((vma->vm_end - vma->vm_start) > - (dd->ipath_pbufsport * dd->ipath_palign)) { + if ((vma->vm_end - vma->vm_start) > (piocnt * dd->ipath_palign)) { dev_info(&dd->pcidev->dev, "FAIL mmap piobufs: " "reqlen %lx > PAGE\n", vma->vm_end - vma->vm_start); - ret = -EFAULT; + ret = -EINVAL; goto bail; } - phys = dd->ipath_physaddr + pd->port_piobufs; + phys = dd->ipath_physaddr + piobufs; /* * Don't mark this as non-cached, or we don't get the @@ -1021,7 +1060,7 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma, "reqlen %lx > actual %lx\n", vma->vm_end - vma->vm_start, (unsigned long) total_size); - ret = -EFAULT; + ret = -EINVAL; goto bail; } @@ -1049,6 +1088,122 @@ bail: return ret; } +/* + * ipath_file_vma_nopage - handle a VMA page fault. + */ +static struct page *ipath_file_vma_nopage(struct vm_area_struct *vma, + unsigned long address, int *type) +{ + unsigned long offset = address - vma->vm_start; + struct page *page = NOPAGE_SIGBUS; + void *pageptr; + + /* + * Convert the vmalloc address into a struct page. + */ + pageptr = (void *)(offset + (vma->vm_pgoff << PAGE_SHIFT)); + page = vmalloc_to_page(pageptr); + if (!page) + goto out; + + /* Increment the reference count. */ + get_page(page); + if (type) + *type = VM_FAULT_MINOR; +out: + return page; +} + +static struct vm_operations_struct ipath_file_vm_ops = { + .nopage = ipath_file_vma_nopage, +}; + +static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr, + struct ipath_portdata *pd, unsigned subport) +{ + unsigned long len; + struct ipath_devdata *dd; + void *addr; + size_t size; + int ret; + + /* If the port is not shared, all addresses should be physical */ + if (!pd->port_subport_cnt) { + ret = -EINVAL; + goto bail; + } + + dd = pd->port_dd; + size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size; + + /* + * Master has all the slave uregbase, rcvhdrq, and + * rcvegrbufs mmapped. + */ + if (subport == 0) { + unsigned num_slaves = pd->port_subport_cnt - 1; + + if (pgaddr == ((u64) pd->subport_uregbase & MMAP64_MASK)) { + addr = pd->subport_uregbase; + size = PAGE_SIZE * num_slaves; + } else if (pgaddr == ((u64) pd->subport_rcvhdr_base & + MMAP64_MASK)) { + addr = pd->subport_rcvhdr_base; + size = pd->port_rcvhdrq_size * num_slaves; + } else if (pgaddr == ((u64) pd->subport_rcvegrbuf & + MMAP64_MASK)) { + addr = pd->subport_rcvegrbuf; + size *= num_slaves; + } else { + ret = -EINVAL; + goto bail; + } + } else if (pgaddr == (((u64) pd->subport_uregbase + + PAGE_SIZE * (subport - 1)) & MMAP64_MASK)) { + addr = pd->subport_uregbase + PAGE_SIZE * (subport - 1); + size = PAGE_SIZE; + } else if (pgaddr == (((u64) pd->subport_rcvhdr_base + + pd->port_rcvhdrq_size * (subport - 1)) & + MMAP64_MASK)) { + addr = pd->subport_rcvhdr_base + + pd->port_rcvhdrq_size * (subport - 1); + size = pd->port_rcvhdrq_size; + } else if (pgaddr == (((u64) pd->subport_rcvegrbuf + + size * (subport - 1)) & MMAP64_MASK)) { + addr = pd->subport_rcvegrbuf + size * (subport - 1); + /* rcvegrbufs are read-only on the slave */ + if (vma->vm_flags & VM_WRITE) { + dev_info(&dd->pcidev->dev, + "Can't map eager buffers as " + "writable (flags=%lx)\n", vma->vm_flags); + ret = -EPERM; + goto bail; + } + /* + * Don't allow permission to later change to writeable + * with mprotect. + */ + vma->vm_flags &= ~VM_MAYWRITE; + } else { + ret = -EINVAL; + goto bail; + } + len = vma->vm_end - vma->vm_start; + if (len > size) { + ipath_cdbg(MM, "FAIL: reqlen %lx > %zx\n", len, size); + ret = -EINVAL; + goto bail; + } + + vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT; + vma->vm_ops = &ipath_file_vm_ops; + vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND; + ret = 0; + +bail: + return ret; +} + /** * ipath_mmap - mmap various structures into user space * @fp: the file pointer @@ -1064,73 +1219,99 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma) struct ipath_portdata *pd; struct ipath_devdata *dd; u64 pgaddr, ureg; + unsigned piobufs, piocnt; int ret; pd = port_fp(fp); + if (!pd) { + ret = -EINVAL; + goto bail; + } dd = pd->port_dd; /* * This is the ipath_do_user_init() code, mapping the shared buffers * into the user process. The address referred to by vm_pgoff is the - * virtual, not physical, address; we only do one mmap for each - * space mapped. + * file offset passed via mmap(). For shared ports, this is the + * kernel vmalloc() address of the pages to share with the master. + * For non-shared or master ports, this is a physical address. + * We only do one mmap for each space mapped. */ pgaddr = vma->vm_pgoff << PAGE_SHIFT; /* - * Must fit in 40 bits for our hardware; some checked elsewhere, - * but we'll be paranoid. Check for 0 is mostly in case one of the - * allocations failed, but user called mmap anyway. We want to catch - * that before it can match. + * Check for 0 in case one of the allocations failed, but user + * called mmap anyway. */ - if (!pgaddr || pgaddr >= (1ULL<<40)) { - ipath_dev_err(dd, "Bad phys addr %llx, start %lx, end %lx\n", - (unsigned long long)pgaddr, vma->vm_start, vma->vm_end); - return -EINVAL; + if (!pgaddr) { + ret = -EINVAL; + goto bail; } - /* just the offset of the port user registers, not physical addr */ - ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port; - - ipath_cdbg(MM, "ushare: pgaddr %llx vm_start=%lx, vmlen %lx\n", + ipath_cdbg(MM, "pgaddr %llx vm_start=%lx len %lx port %u:%u:%u\n", (unsigned long long) pgaddr, vma->vm_start, - vma->vm_end - vma->vm_start); + vma->vm_end - vma->vm_start, dd->ipath_unit, + pd->port_port, subport_fp(fp)); - if (vma->vm_start & (PAGE_SIZE-1)) { - ipath_dev_err(dd, - "vm_start not aligned: %lx, end=%lx phys %lx\n", - vma->vm_start, vma->vm_end, (unsigned long)pgaddr); - ret = -EINVAL; + /* + * Physical addresses must fit in 40 bits for our hardware. + * Check for kernel virtual addresses first, anything else must + * match a HW or memory address. + */ + if (pgaddr >= (1ULL<<40)) { + ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp)); + goto bail; + } + + if (!pd->port_subport_cnt) { + /* port is not shared */ + ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port; + piocnt = dd->ipath_pbufsport; + piobufs = pd->port_piobufs; + } else if (!subport_fp(fp)) { + /* caller is the master */ + ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port; + piocnt = (dd->ipath_pbufsport / pd->port_subport_cnt) + + (dd->ipath_pbufsport % pd->port_subport_cnt); + piobufs = pd->port_piobufs + + dd->ipath_palign * (dd->ipath_pbufsport - piocnt); + } else { + unsigned slave = subport_fp(fp) - 1; + + /* caller is a slave */ + ureg = 0; + piocnt = dd->ipath_pbufsport / pd->port_subport_cnt; + piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave; } - else if (pgaddr == ureg) + + if (pgaddr == ureg) ret = mmap_ureg(vma, dd, ureg); - else if (pgaddr == pd->port_piobufs) - ret = mmap_piobufs(vma, dd, pd); - else if (pgaddr == (u64) pd->port_rcvegr_phys) + else if (pgaddr == piobufs) + ret = mmap_piobufs(vma, dd, pd, piobufs, piocnt); + else if (pgaddr == dd->ipath_pioavailregs_phys) + /* in-memory copy of pioavail registers */ + ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0, + dd->ipath_pioavailregs_phys, + "pioavail registers"); + else if (subport_fp(fp)) + /* Subports don't mmap the physical receive buffers */ + ret = -EINVAL; + else if (pgaddr == pd->port_rcvegr_phys) ret = mmap_rcvegrbufs(vma, pd); - else if (pgaddr == (u64) pd->port_rcvhdrq_phys) { + else if (pgaddr == (u64) pd->port_rcvhdrq_phys) /* * The rcvhdrq itself; readonly except on HT (so have * to allow writable mapping), multiple pages, contiguous * from an i/o perspective. */ - unsigned total_size = - ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize - * sizeof(u32), PAGE_SIZE); - ret = ipath_mmap_mem(vma, pd, total_size, 1, + ret = ipath_mmap_mem(vma, pd, pd->port_rcvhdrq_size, 1, pd->port_rcvhdrq_phys, "rcvhdrq"); - } - else if (pgaddr == (u64)pd->port_rcvhdrqtailaddr_phys) + else if (pgaddr == (u64) pd->port_rcvhdrqtailaddr_phys) /* in-memory copy of rcvhdrq tail register */ ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0, pd->port_rcvhdrqtailaddr_phys, "rcvhdrq tail"); - else if (pgaddr == dd->ipath_pioavailregs_phys) - /* in-memory copy of pioavail registers */ - ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0, - dd->ipath_pioavailregs_phys, - "pioavail registers"); else ret = -EINVAL; @@ -1138,9 +1319,10 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma) if (ret < 0) dev_info(&dd->pcidev->dev, - "Failure %d on addr %lx, off %lx\n", - -ret, vma->vm_start, vma->vm_pgoff); - + "Failure %d on off %llx len %lx\n", + -ret, (unsigned long long)pgaddr, + vma->vm_end - vma->vm_start); +bail: return ret; } @@ -1154,6 +1336,8 @@ static unsigned int ipath_poll(struct file *fp, struct ipath_devdata *dd; pd = port_fp(fp); + if (!pd) + goto bail; dd = pd->port_dd; bit = pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT; @@ -1176,7 +1360,7 @@ static unsigned int ipath_poll(struct file *fp, if (tail == head) { set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag); - if(dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */ + if (dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */ (void)ipath_write_ureg(dd, ur_rcvhdrhead, dd->ipath_rhdrhead_intr_off | head, pd->port_port); @@ -1200,18 +1384,80 @@ static unsigned int ipath_poll(struct file *fp, ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, dd->ipath_rcvctrl); +bail: return pollflag; } +static int init_subports(struct ipath_devdata *dd, + struct ipath_portdata *pd, + const struct ipath_user_info *uinfo) +{ + int ret = 0; + unsigned num_slaves; + size_t size; + + /* Old user binaries don't know about subports */ + if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR) + goto bail; + /* + * If the user is requesting zero or one port, + * skip the subport allocation. + */ + if (uinfo->spu_subport_cnt <= 1) + goto bail; + if (uinfo->spu_subport_cnt > 4) { + ret = -EINVAL; + goto bail; + } + + num_slaves = uinfo->spu_subport_cnt - 1; + pd->subport_uregbase = vmalloc(PAGE_SIZE * num_slaves); + if (!pd->subport_uregbase) { + ret = -ENOMEM; + goto bail; + } + /* Note: pd->port_rcvhdrq_size isn't initialized yet. */ + size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize * + sizeof(u32), PAGE_SIZE) * num_slaves; + pd->subport_rcvhdr_base = vmalloc(size); + if (!pd->subport_rcvhdr_base) { + ret = -ENOMEM; + goto bail_ureg; + } + + pd->subport_rcvegrbuf = vmalloc(pd->port_rcvegrbuf_chunks * + pd->port_rcvegrbuf_size * + num_slaves); + if (!pd->subport_rcvegrbuf) { + ret = -ENOMEM; + goto bail_rhdr; + } + + pd->port_subport_cnt = uinfo->spu_subport_cnt; + pd->port_subport_id = uinfo->spu_subport_id; + pd->active_slaves = 1; + goto bail; + +bail_rhdr: + vfree(pd->subport_rcvhdr_base); +bail_ureg: + vfree(pd->subport_uregbase); + pd->subport_uregbase = NULL; +bail: + return ret; +} + static