diff options
Diffstat (limited to 'drivers/infiniband/hw/cxgb3')
| -rw-r--r-- | drivers/infiniband/hw/cxgb3/Kconfig | 4 | ||||
| -rw-r--r-- | drivers/infiniband/hw/cxgb3/Makefile | 7 | ||||
| -rw-r--r-- | drivers/infiniband/hw/cxgb3/cxio_dbg.c | 25 | ||||
| -rw-r--r-- | drivers/infiniband/hw/cxgb3/cxio_hal.c | 333 | ||||
| -rw-r--r-- | drivers/infiniband/hw/cxgb3/cxio_hal.h | 42 | ||||
| -rw-r--r-- | drivers/infiniband/hw/cxgb3/cxio_resource.c | 127 | ||||
| -rw-r--r-- | drivers/infiniband/hw/cxgb3/cxio_wr.h | 177 | ||||
| -rw-r--r-- | drivers/infiniband/hw/cxgb3/iwch.c | 132 | ||||
| -rw-r--r-- | drivers/infiniband/hw/cxgb3/iwch.h | 34 | ||||
| -rw-r--r-- | drivers/infiniband/hw/cxgb3/iwch_cm.c | 873 | ||||
| -rw-r--r-- | drivers/infiniband/hw/cxgb3/iwch_cm.h | 15 | ||||
| -rw-r--r-- | drivers/infiniband/hw/cxgb3/iwch_cq.c | 19 | ||||
| -rw-r--r-- | drivers/infiniband/hw/cxgb3/iwch_ev.c | 40 | ||||
| -rw-r--r-- | drivers/infiniband/hw/cxgb3/iwch_mem.c | 104 | ||||
| -rw-r--r-- | drivers/infiniband/hw/cxgb3/iwch_provider.c | 537 | ||||
| -rw-r--r-- | drivers/infiniband/hw/cxgb3/iwch_provider.h | 27 | ||||
| -rw-r--r-- | drivers/infiniband/hw/cxgb3/iwch_qp.c | 579 | ||||
| -rw-r--r-- | drivers/infiniband/hw/cxgb3/iwch_user.h | 8 |
18 files changed, 2019 insertions, 1064 deletions
diff --git a/drivers/infiniband/hw/cxgb3/Kconfig b/drivers/infiniband/hw/cxgb3/Kconfig index 77977f55dca..2b6352b8548 100644 --- a/drivers/infiniband/hw/cxgb3/Kconfig +++ b/drivers/infiniband/hw/cxgb3/Kconfig @@ -1,6 +1,6 @@ config INFINIBAND_CXGB3 tristate "Chelsio RDMA Driver" - depends on CHELSIO_T3 && INFINIBAND && INET + depends on CHELSIO_T3 && INET select GENERIC_ALLOCATOR ---help--- This is an iWARP/RDMA driver for the Chelsio T3 1GbE and @@ -10,7 +10,7 @@ config INFINIBAND_CXGB3 our website at <http://www.chelsio.com>. For customer support, please visit our customer support page at - <http://www.chelsio.com/support.htm>. + <http://www.chelsio.com/support.html>. Please send feedback to <linux-bugs@chelsio.com>. diff --git a/drivers/infiniband/hw/cxgb3/Makefile b/drivers/infiniband/hw/cxgb3/Makefile index 36b98989b15..2761364185a 100644 --- a/drivers/infiniband/hw/cxgb3/Makefile +++ b/drivers/infiniband/hw/cxgb3/Makefile @@ -1,11 +1,8 @@ -EXTRA_CFLAGS += -I$(TOPDIR)/drivers/net/cxgb3 \ - -I$(TOPDIR)/drivers/infiniband/hw/cxgb3/core +ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb3 obj-$(CONFIG_INFINIBAND_CXGB3) += iw_cxgb3.o iw_cxgb3-y := iwch_cm.o iwch_ev.o iwch_cq.o iwch_qp.o iwch_mem.o \ iwch_provider.o iwch.o cxio_hal.o cxio_resource.o -ifdef CONFIG_INFINIBAND_CXGB3_DEBUG -EXTRA_CFLAGS += -DDEBUG -endif +ccflags-$(CONFIG_INFINIBAND_CXGB3_DEBUG) += -DDEBUG diff --git a/drivers/infiniband/hw/cxgb3/cxio_dbg.c b/drivers/infiniband/hw/cxgb3/cxio_dbg.c index 75f7b16a271..8bca6b4ec9a 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_dbg.c +++ b/drivers/infiniband/hw/cxgb3/cxio_dbg.c @@ -31,6 +31,7 @@ */ #ifdef DEBUG #include <linux/types.h> +#include <linux/slab.h> #include "common.h" #include "cxgb3_ioctl.h" #include "cxio_hal.h" @@ -45,16 +46,16 @@ void cxio_dump_tpt(struct cxio_rdev *rdev, u32 stag) m = kmalloc(sizeof(*m) + size, GFP_ATOMIC); if (!m) { - PDBG("%s couldn't allocate memory.\n", __FUNCTION__); + PDBG("%s couldn't allocate memory.\n", __func__); return; } m->mem_id = MEM_PMRX; m->addr = (stag>>8) * 32 + rdev->rnic_info.tpt_base; m->len = size; - PDBG("%s TPT addr 0x%x len %d\n", __FUNCTION__, m->addr, m->len); + PDBG("%s TPT addr 0x%x len %d\n", __func__, m->addr, m->len); rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m); if (rc) { - PDBG("%s toectl returned error %d\n", __FUNCTION__, rc); + PDBG("%s toectl returned error %d\n", __func__, rc); kfree(m); return; } @@ -82,17 +83,17 @@ void cxio_dump_pbl(struct cxio_rdev *rdev, u32 pbl_addr, uint len, u8 shift) m = kmalloc(sizeof(*m) + size, GFP_ATOMIC); if (!m) { - PDBG("%s couldn't allocate memory.\n", __FUNCTION__); + PDBG("%s couldn't allocate memory.\n", __func__); return; } m->mem_id = MEM_PMRX; m->addr = pbl_addr; m->len = size; PDBG("%s PBL addr 0x%x len %d depth %d\n", - __FUNCTION__, m->addr, m->len, npages); + __func__, m->addr, m->len, npages); rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m); if (rc) { - PDBG("%s toectl returned error %d\n", __FUNCTION__, rc); + PDBG("%s toectl returned error %d\n", __func__, rc); kfree(m); return; } @@ -144,16 +145,16 @@ void cxio_dump_rqt(struct cxio_rdev *rdev, u32 hwtid, int nents) m = kmalloc(sizeof(*m) + size, GFP_ATOMIC); if (!m) { - PDBG("%s couldn't allocate memory.\n", __FUNCTION__); + PDBG("%s couldn't allocate memory.\n", __func__); return; } m->mem_id = MEM_PMRX; m->addr = ((hwtid)<<10) + rdev->rnic_info.rqt_base; m->len = size; - PDBG("%s RQT addr 0x%x len %d\n", __FUNCTION__, m->addr, m->len); + PDBG("%s RQT addr 0x%x len %d\n", __func__, m->addr, m->len); rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m); if (rc) { - PDBG("%s toectl returned error %d\n", __FUNCTION__, rc); + PDBG("%s toectl returned error %d\n", __func__, rc); kfree(m); return; } @@ -177,16 +178,16 @@ void cxio_dump_tcb(struct cxio_rdev *rdev, u32 hwtid) m = kmalloc(sizeof(*m) + size, GFP_ATOMIC); if (!m) { - PDBG("%s couldn't allocate memory.\n", __FUNCTION__); + PDBG("%s couldn't allocate memory.\n", __func__); return; } m->mem_id = MEM_CM; m->addr = hwtid * size; m->len = size; - PDBG("%s TCB %d len %d\n", __FUNCTION__, m->addr, m->len); + PDBG("%s TCB %d len %d\n", __func__, m->addr, m->len); rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m); if (rc) { - PDBG("%s toectl returned error %d\n", __FUNCTION__, rc); + PDBG("%s toectl returned error %d\n", __func__, rc); kfree(m); return; } diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index 76049afc765..de1c61b417d 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -37,6 +37,8 @@ #include <linux/spinlock.h> #include <linux/pci.h> #include <linux/dma-mapping.h> +#include <linux/slab.h> +#include <net/net_namespace.h> #include "cxio_resource.h" #include "cxio_hal.h" @@ -108,7 +110,6 @@ int cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq, while (!CQ_VLD_ENTRY(rptr, cq->size_log2, cqe)) { udelay(1); if (i++ > 1000000) { - BUG_ON(1); printk(KERN_ERR "%s: stalled rnic\n", rdev_p->dev_name); return -EIO; @@ -139,52 +140,56 @@ static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid) struct t3_modify_qp_wr *wqe; struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_KERNEL); if (!skb) { - PDBG("%s alloc_skb failed\n", __FUNCTION__); + PDBG("%s alloc_skb failed\n", __func__); return -ENOMEM; } wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe)); memset(wqe, 0, sizeof(*wqe)); - build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 3, 1, qpid, 7); + build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, + T3_COMPLETION_FLAG | T3_NOTIFY_FLAG, 0, qpid, 7, + T3_SOPEOP); wqe->flags = cpu_to_be32(MODQP_WRITE_EC); sge_cmd = qpid << 8 | 3; wqe->sge_cmd = cpu_to_be64(sge_cmd); skb->priority = CPL_PRIORITY_CONTROL; - return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb)); + return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb); } -int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq) +int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq, int kernel) { struct rdma_cq_setup setup; int size = (1UL << (cq->size_log2)) * sizeof(struct t3_cqe); + size += 1; /* one extra page for storing cq-in-err state */ cq->cqid = cxio_hal_get_cqid(rdev_p->rscp); if (!cq->cqid) return -ENOMEM; - cq->sw_queue = kzalloc(size, GFP_KERNEL); - if (!cq->sw_queue) - return -ENOMEM; - cq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev), - (1UL << (cq->size_log2)) * - sizeof(struct t3_cqe), + if (kernel) { + cq->sw_queue = kzalloc(size, GFP_KERNEL); + if (!cq->sw_queue) + return -ENOMEM; + } + cq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev), size, &(cq->dma_addr), GFP_KERNEL); if (!cq->queue) { kfree(cq->sw_queue); return -ENOMEM; } - pci_unmap_addr_set(cq, mapping, cq->dma_addr); + dma_unmap_addr_set(cq, mapping, cq->dma_addr); memset(cq->queue, 0, size); setup.id = cq->cqid; setup.base_addr = (u64) (cq->dma_addr); setup.size = 1UL << cq->size_log2; setup.credits = 65535; setup.credit_thres = 1; - if (rdev_p->t3cdev_p->type == T3B) + if (rdev_p->t3cdev_p->type != T3A) setup.ovfl_mode = 0; else setup.ovfl_mode = 1; return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup)); } +#ifdef notyet int cxio_resize_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq) { struct rdma_cq_setup setup; @@ -196,6 +201,7 @@ int cxio_resize_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq) setup.ovfl_mode = 1; return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup)); } +#endif static u32 get_qpid(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx) { @@ -224,7 +230,7 @@ static u32 get_qpid(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx) } out: mutex_unlock(&uctx->lock); - PDBG("%s qpid 0x%x\n", __FUNCTION__, qpid); + PDBG("%s qpid 0x%x\n", __func__, qpid); return qpid; } @@ -236,7 +242,7 @@ static void put_qpid(struct cxio_rdev *rdev_p, u32 qpid, entry = kmalloc(sizeof *entry, GFP_KERNEL); if (!entry) return; - PDBG("%s qpid 0x%x\n", __FUNCTION__, qpid); + PDBG("%s qpid 0x%x\n", __func__, qpid); entry->qpid = qpid; mutex_lock(&uctx->lock); list_add_tail(&entry->entry, &uctx->qpids); @@ -275,7 +281,7 @@ int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain, if (!wq->qpid) return -ENOMEM; - wq->rq = kzalloc(depth * sizeof(u64), GFP_KERNEL); + wq->rq = kzalloc(depth * sizeof(struct t3_swrq), GFP_KERNEL); if (!wq->rq) goto err1; @@ -294,12 +300,13 @@ int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain, goto err4; memset(wq->queue, 0, depth * sizeof(union t3_wr)); - pci_unmap_addr_set(wq, mapping, wq->dma_addr); + dma_unmap_addr_set(wq, mapping, wq->dma_addr); wq->doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr; if (!kernel_domain) wq->udb = (u64)rdev_p->rnic_info.udbell_physbase + (wq->qpid << rdev_p->qpshift); - PDBG("%s qpid 0x%x doorbell 0x%p udb 0x%llx\n", __FUNCTION__, + wq->rdev = rdev_p; + PDBG("%s qpid 0x%x doorbell 0x%p udb 0x%llx\n", __func__, wq->qpid, wq->doorbell, (unsigned long long) wq->udb); return 0; err4: @@ -321,7 +328,7 @@ int cxio_destroy_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq) dma_free_coherent(&(rdev_p->rnic_info.pdev->dev), (1UL << (cq->size_log2)) * sizeof(struct t3_cqe), cq->queue, - pci_unmap_addr(cq, mapping)); + dma_unmap_addr(cq, mapping)); cxio_hal_put_cqid(rdev_p->rscp, cq->cqid); return err; } @@ -332,7 +339,7 @@ int cxio_destroy_qp(struct cxio_rdev *rdev_p, struct t3_wq *wq, dma_free_coherent(&(rdev_p->rnic_info.pdev->dev), (1UL << (wq->size_log2)) * sizeof(union t3_wr), wq->queue, - pci_unmap_addr(wq, mapping)); + dma_unmap_addr(wq, mapping)); kfree(wq->sq); cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, (1UL << wq->rq_size_log2)); kfree(wq->rq); @@ -344,7 +351,7 @@ static void insert_recv_cqe(struct t3_wq *wq, struct t3_cq *cq) { struct t3_cqe cqe; - PDBG("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __FUNCTION__, + PDBG("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__, wq, cq, cq->sw_rptr, cq->sw_wptr); memset(&cqe, 0, sizeof(cqe)); cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) | @@ -358,18 +365,22 @@ static void insert_recv_cqe(struct t3_wq *wq, struct t3_cq *cq) cq->sw_wptr++; } -void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count) +int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count) { u32 ptr; + int flushed = 0; - PDBG("%s wq %p cq %p\n", __FUNCTION__, wq, cq); + PDBG("%s wq %p cq %p\n", __func__, wq, cq); /* flush RQ */ - PDBG("%s rq_rptr %u rq_wptr %u skip count %u\n", __FUNCTION__, + PDBG("%s rq_rptr %u rq_wptr %u skip count %u\n", __func__, wq->rq_rptr, wq->rq_wptr, count); ptr = wq->rq_rptr + count; - while (ptr++ != wq->rq_wptr) + while (ptr++ != wq->rq_wptr) { insert_recv_cqe(wq, cq); + flushed++; + } + return flushed; } static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq, @@ -377,7 +388,7 @@ static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq, { struct t3_cqe cqe; - PDBG("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __FUNCTION__, + PDBG("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__, wq, cq, cq->sw_rptr, cq->sw_wptr); memset(&cqe, 0, sizeof(cqe)); cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) | @@ -393,18 +404,22 @@ static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq, cq->sw_wptr++; } -void cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count) +int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count) { __u32 ptr; + int flushed = 0; struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2); ptr = wq->sq_rptr + count; - sqp += count; + sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); while (ptr != wq->sq_wptr) { + sqp->signaled = 0; insert_sq_cqe(wq, cq, sqp); - sqp++; ptr++; + sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); + flushed++; } + return flushed; } /* @@ -414,11 +429,11 @@ void cxio_flush_hw_cq(struct t3_cq *cq) { struct t3_cqe *cqe, *swcqe; - PDBG("%s cq %p cqid 0x%x\n", __FUNCTION__, cq, cq->cqid); + PDBG("%s cq %p cqid 0x%x\n", __func__, cq, cq->cqid); cqe = cxio_next_hw_cqe(cq); while (cqe) { PDBG("%s flushing hwcq rptr 0x%x to swcq wptr 0x%x\n", - __FUNCTION__, cq->rptr, cq->sw_wptr); + __func__, cq->rptr, cq->sw_wptr); swcqe = cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2); *swcqe = *cqe; swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1)); @@ -439,7 +454,7 @@ static int cqe_completes_wr(struct t3_cqe *cqe, struct t3_wq *wq) if ((CQE_OPCODE(*cqe) == T3_READ_RESP) && SQ_TYPE(*cqe)) return 0; - if ((CQE_OPCODE(*cqe) == T3_SEND) && RQ_TYPE(*cqe) && + if (CQE_SEND_OPCODE(*cqe) && RQ_TYPE(*cqe) && Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) return 0; @@ -455,12 +470,13 @@ void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count) ptr = cq->sw_rptr; while (!Q_EMPTY(ptr, cq->sw_wptr)) { cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2)); - if ((SQ_TYPE(*cqe) || (CQE_OPCODE(*cqe) == T3_READ_RESP)) && + if ((SQ_TYPE(*cqe) || + ((CQE_OPCODE(*cqe) == T3_READ_RESP) && wq->oldest_read)) && (CQE_QPID(*cqe) == wq->qpid)) (*count)++; ptr++; } - PDBG("%s cq %p count %d\n", __FUNCTION__, cq, *count); + PDBG("%s cq %p count %d\n", __func__, cq, *count); } void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count) @@ -469,7 +485,7 @@ void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count) u32 ptr; *count = 0; - PDBG("%s count zero %d\n", __FUNCTION__, *count); + PDBG("%s count zero %d\n", __func__, *count); ptr = cq->sw_rptr; while (!Q_EMPTY(ptr, cq->sw_wptr)) { cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2)); @@ -478,7 +494,7 @@ void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count) (*count)++; ptr++; } - PDBG("%s cq %p count %d\n", __FUNCTION__, cq, *count); + PDBG("%s cq %p count %d\n", __func__, cq, *count); } static int cxio_hal_init_ctrl_cq(struct cxio_rdev *rdev_p) @@ -505,12 +521,12 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p) skb = alloc_skb(sizeof(*wqe), GFP_KERNEL); if (!skb) { - PDBG("%s alloc_skb failed\n", __FUNCTION__); + PDBG("%s alloc_skb failed\n", __func__); return -ENOMEM; } err = cxio_hal_init_ctrl_cq(rdev_p); if (err) { - PDBG("%s err %d initializing ctrl_cq\n", __FUNCTION__, err); + PDBG("%s err %d initializing ctrl_cq\n", __func__, err); goto err; } rdev_p->ctrl_qp.workq = dma_alloc_coherent( @@ -520,11 +536,11 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p) &(rdev_p->ctrl_qp.dma_addr), GFP_KERNEL); if (!rdev_p->ctrl_qp.workq) { - PDBG("%s dma_alloc_coherent failed\n", __FUNCTION__); + PDBG("%s dma_alloc_coherent failed\n", __func__); err = -ENOMEM; goto err; } - pci_unmap_addr_set(&rdev_p->ctrl_qp, mapping, + dma_unmap_addr_set(&rdev_p->ctrl_qp, mapping, rdev_p->ctrl_qp.dma_addr); rdev_p->ctrl_qp.doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr; memset(rdev_p->ctrl_qp.workq, 0, @@ -548,8 +564,8 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p) V_EC_UP_TOKEN(T3_CTL_QP_TID) | F_EC_VALID)) << 32; wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe)); memset(wqe, 0, sizeof(*wqe)); - build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 1, - T3_CTL_QP_TID, 7); + build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 0, + T3_CTL_QP_TID, 7, T3_SOPEOP); wqe->flags = cpu_to_be32(MODQP_WRITE_EC); sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3; wqe->sge_cmd = cpu_to_be64(sge_cmd); @@ -559,7 +575,7 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p) (unsigned long long) rdev_p->ctrl_qp.dma_addr, rdev_p->ctrl_qp.workq, 1 << T3_CTRL_QP_SIZE_LOG2); skb->priority = CPL_PRIORITY_CONTROL; - return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb)); + return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb); err: kfree_skb(skb); return err; @@ -570,52 +586,52 @@ static int cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p) dma_free_coherent(&(rdev_p->rnic_info.pdev->dev), (1UL << T3_CTRL_QP_SIZE_LOG2) * sizeof(union t3_wr), rdev_p->ctrl_qp.workq, - pci_unmap_addr(&rdev_p->ctrl_qp, mapping)); + dma_unmap_addr(&rdev_p->ctrl_qp, mapping)); return cxio_hal_clear_qp_ctx(rdev_p, T3_CTRL_QP_ID); } /* write len bytes of data into addr (32B aligned address) * If data is NULL, clear len byte of memory to zero. - * caller aquires the ctrl_qp lock before the call + * caller acquires the ctrl_qp lock before the call */ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr, - u32 len, void *data, int completion) + u32 len, void *data) { u32 i, nr_wqe, copy_len; u8 *copy_data; - u8 wr_len, utx_len; /* lenght in 8 byte flit */ + u8 wr_len, utx_len; /* length in 8 byte flit */ enum t3_wr_flags flag; __be64 *wqe; u64 utx_cmd; addr &= 0x7FFFFFF; nr_wqe = len % 96 ? len / 96 + 1 : len / 96; /* 96B max per WQE */ PDBG("%s wptr 0x%x rptr 0x%x len %d, nr_wqe %d data %p addr 0x%0x\n", - __FUNCTION__, rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, len, + __func__, rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, len, nr_wqe, data, addr); utx_len = 3; /* in 32B unit */ for (i = 0; i < nr_wqe; i++) { if (Q_FULL(rdev_p->ctrl_qp.rptr, rdev_p->ctrl_qp.wptr, T3_CTRL_QP_SIZE_LOG2)) { PDBG("%s ctrl_qp full wtpr 0x%0x rptr 0x%0x, " - "wait for more space i %d\n", __FUNCTION__, + "wait for more space i %d\n", __func__, rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, i); if (wait_event_interruptible(rdev_p->ctrl_qp.waitq, !Q_FULL(rdev_p->ctrl_qp.rptr, rdev_p->ctrl_qp.wptr, T3_CTRL_QP_SIZE_LOG2))) { PDBG("%s ctrl_qp workq interrupted\n", - __FUNCTION__); + __func__); return -ERESTARTSYS; } PDBG("%s ctrl_qp wakeup, continue posting work request " - "i %d\n", __FUNCTION__, i); + "i %d\n", __func__, i); } wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr % (1 << T3_CTRL_QP_SIZE_LOG2))); flag = 0; if (i == (nr_wqe - 1)) { /* last WQE */ - flag = completion ? T3_COMPLETION_FLAG : 0; + flag = T3_COMPLETION_FLAG; if (len % 32) utx_len = len / 32 + 1; else @@ -629,7 +645,7 @@ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr, if ((i != 0) && (i % (((1 << T3_CTRL_QP_SIZE_LOG2)) >> 1) == 0)) { flag = T3_COMPLETION_FLAG; - PDBG("%s force completion at i %d\n", __FUNCTION__, i); + PDBG("%s force completion at i %d\n", __func__, i); } /* build the utx mem command */ @@ -665,7 +681,7 @@ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr, build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_BP, flag, Q_GENBIT(rdev_p->ctrl_qp.wptr, T3_CTRL_QP_SIZE_LOG2), T3_CTRL_QP_ID, - wr_len); + wr_len, T3_SOPEOP); if (flag == T3_COMPLETION_FLAG) ring_doorbell(rdev_p->ctrl_qp.doorbell, T3_CTRL_QP_ID); len -= 96; @@ -674,21 +690,23 @@ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr, return 0; } -/* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl, and pbl_size - * OUT: stag index, actual pbl_size, pbl_addr allocated. +/* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl_size and pbl_addr + * OUT: stag index * TBD: shared memory region support */ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry, u32 *stag, u8 stag_state, u32 pdid, enum tpt_mem_type type, enum tpt_mem_perm perm, - u32 zbva, u64 to, u32 len, u8 page_size, __be64 *pbl, - u32 *pbl_size, u32 *pbl_addr) + u32 zbva, u64 to, u32 len, u8 page_size, + u32 pbl_size, u32 pbl_addr) { int err; struct tpt_entry tpt; u32 stag_idx; u32 wptr; - int rereg = (*stag != T3_STAG_UNSET); + + if (cxio_fatal_error(rdev_p)) + return -EIO; stag_state = stag_state > 0; stag_idx = (*stag) >> 8; @@ -700,32 +718,10 @@ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry, *stag = (stag_idx << 8) | ((*stag) & 0xFF); } PDBG("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n", - __FUNCTION__, stag_state, type, pdid, stag_idx); - - if (reset_tpt_entry) - cxio_hal_pblpool_free(rdev_p, *pbl_addr, *pbl_size << 3); - else if (!rereg) { - *pbl_addr = cxio_hal_pblpool_alloc(rdev_p, *pbl_size << 3); - if (!*pbl_addr) { - return -ENOMEM; - } - } + __func__, stag_state, type, pdid, stag_idx); mutex_lock(&rdev_p->ctrl_qp.lock); - /* write PBL first if any - update pbl only if pbl list exist */ - if (pbl) { - - PDBG("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n", - __FUNCTION__, *pbl_addr, rdev_p->rnic_info.pbl_base, - *pbl_size); - err = cxio_hal_ctrl_qp_write_mem(rdev_p, - (*pbl_addr >> 5), - (*pbl_size << 3), pbl, 0); - if (err) - goto ret; - } - /* write TPT entry */ if (reset_tpt_entry) memset(&tpt, 0, sizeof(tpt)); @@ -736,27 +732,25 @@ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry, V_TPT_STAG_TYPE(type) | V_TPT_PDID(pdid)); BUG_ON(page_size >= 28); tpt.flags_pagesize_qpid = cpu_to_be32(V_TPT_PERM(perm) | - F_TPT_MW_BIND_ENABLE | - V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) | - V_TPT_PAGE_SIZE(page_size)); - tpt.rsvd_pbl_addr = reset_tpt_entry ? 0 : - cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, *pbl_addr)>>3)); + ((perm & TPT_MW_BIND) ? F_TPT_MW_BIND_ENABLE : 0) | + V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) | + V_TPT_PAGE_SIZE(page_size)); + tpt.rsvd_pbl_addr = cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3)); tpt.len = cpu_to_be32(len); tpt.va_hi = cpu_to_be32((u32) (to >> 32)); tpt.va_low_or_fbo = cpu_to_be32((u32) (to & 0xFFFFFFFFULL)); tpt.rsvd_bind_cnt_or_pstag = 0; - tpt.rsvd_pbl_size = reset_tpt_entry ? 0 : - cpu_to_be32(V_TPT_PBL_SIZE((*pbl_size) >> 2)); + tpt.rsvd_pbl_size = cpu_to_be32(V_TPT_PBL_SIZE(pbl_size >> 2)); } err = cxio_hal_ctrl_qp_write_mem(rdev_p, stag_idx + (rdev_p->rnic_info.tpt_base >> 5), - sizeof(tpt), &tpt, 1); + sizeof(tpt), &tpt); /* release the stag index to free pool */ if (reset_tpt_entry) cxio_hal_put_stag(rdev_p->rscp, stag_idx); -ret: + wptr = rdev_p->ctrl_qp.wptr; mutex_unlock(&rdev_p->ctrl_qp.lock); if (!err) @@ -767,44 +761,74 @@ ret: return err; } +int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl, + u32 pbl_addr, u32 pbl_size) +{ + u32 wptr; + int err; + + PDBG("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n", + __func__, pbl_addr, rdev_p->rnic_info.pbl_base, + pbl_size); + + mutex_lock(&rdev_p->ctrl_qp.lock); + err = cxio_hal_ctrl_qp_write_mem(rdev_p, pbl_addr >> 5, pbl_size << 3, + pbl); + wptr = rdev_p->ctrl_qp.wptr; + mutex_unlock(&rdev_p->ctrl_qp.lock); + if (err) + return err; + + if (wait_event_interruptible(rdev_p->ctrl_qp.waitq, + SEQ32_GE(rdev_p->ctrl_qp.rptr, + wptr))) + return -ERESTARTSYS; + + return 0; +} + int cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, - u8 page_size, __be64 *pbl, u32 *pbl_size, - u32 *pbl_addr) + u8 page_size, u32 pbl_size, u32 pbl_addr) { *stag = T3_STAG_UNSET; return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm, - zbva, to, len, page_size, pbl, pbl_size, pbl_addr); + zbva, to, len, page_size, pbl_size, pbl_addr); } int cxio_reregister_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, - u8 page_size, __be64 *pbl, u32 *pbl_size, - u32 *pbl_addr) + u8 page_size, u32 pbl_size, u32 pbl_addr) { return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm, - zbva, to, len, page_size, pbl, pbl_size, pbl_addr); + zbva, to, len, page_size, pbl_size, pbl_addr); } int cxio_dereg_mem(struct cxio_rdev *rdev_p, u32 stag, u32 pbl_size, u32 pbl_addr) { - return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, NULL, - &pbl_size, &pbl_addr); + return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, + pbl_size, pbl_addr); } int cxio_allocate_window(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid) { - u32 pbl_size = 0; *stag = T3_STAG_UNSET; return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_MW, 0, 0, 0ULL, 0, 0, - NULL, &pbl_size, NULL); + 0, 0); } int cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag) { - return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, NULL, - NULL, NULL); + return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, + 0, 0); +} + +int cxio_allocate_stag(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr) +{ + *stag = T3_STAG_UNSET; + return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_NON_SHARED_MR, + 0, 0, 0ULL, 0, 0, pbl_size, pbl_addr); } int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr) @@ -813,7 +837,7 @@ int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr) struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_ATOMIC); if (!skb) return -ENOMEM; - PDBG("%s rdev_p %p\n", __FUNCTION__, rdev_p); + PDBG("%s rdev_p %p\n", __func__, rdev_p); wqe = (struct t3_rdma_init_wr *) __skb_put(skb, sizeof(*wqe)); wqe->wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_INIT)); wqe->wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(attr->tid) | @@ -828,14 +852,17 @@ int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr) wqe->mpaattrs = attr->mpaattrs; wqe->qpcaps = attr->qpcaps; wqe->ulpdu_size = cpu_to_be16(attr->tcp_emss); - wqe->flags = cpu_to_be32(attr->flags); + wqe->rqe_count = cpu_to_be16(attr->rqe_count); + wqe->flags_rtr_type = cpu_to_be16(attr->flags | + V_RTR_TYPE(attr->rtr_type) | + V_CHAN(attr->chan)); wqe->ord = cpu_to_be32(attr->ord); wqe->ird = cpu_to_be32(attr->ird); wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr); wqe->qp_dma_size = cpu_to_be32(attr->qp_dma_size); - wqe->rsvd = 0; + wqe->irs = cpu_to_be32(attr->irs); skb->priority = 0; /* 0=>ToeQ; 1=>CtrlQ */ - return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb)); + return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb); } void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb) @@ -855,7 +882,7 @@ static int cxio_hal_ev_handler(struct t3cdev *t3cdev_p, struct sk_buff *skb) struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) skb->data; PDBG("%d: %s cq_id 0x%x cq_ptr 0x%x genbit %0x overflow %0x an %0x" " se %0x notify %0x cqbranch %0x creditth %0x\n", - cnt, __FUNCTION__, RSPQ_CQID(rsp_msg), RSPQ_CQPTR(rsp_msg), + cnt, __func__, RSPQ_CQID(rsp_msg), RSPQ_CQPTR(rsp_msg), RSPQ_GENBIT(rsp_msg), RSPQ_OVERFLOW(rsp_msg), RSPQ_AN(rsp_msg), RSPQ_SE(rsp_msg), RSPQ_NOTIFY(rsp_msg), RSPQ_CQBRANCH(rsp_msg), RSPQ_CREDIT_THRESH(rsp_msg)); @@ -867,7 +894,7 @@ static int cxio_hal_ev_handler(struct t3cdev *t3cdev_p, struct sk_buff *skb) CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe)); rdev_p = (struct cxio_rdev *)t3cdev_p->ulp; if (!rdev_p) { - PDBG("%s called by t3cdev %p with null ulp\n", __FUNCTION__, + PDBG("%s called by t3cdev %p with null ulp\n", __func__, t3cdev_p); return 0; } @@ -894,7 +921,7 @@ int cxio_rdev_open(struct cxio_rdev *rdev_p) if (cxio_hal_find_rdev_by_name(rdev_p->dev_name)) { return -EBUSY; } - netdev_p = dev_get_by_name(rdev_p->dev_name); + netdev_p = dev_get_by_name(&init_net, rdev_p->dev_name); if (!netdev_p) { return -EINVAL; } @@ -907,29 +934,46 @@ int cxio_rdev_open(struct cxio_rdev *rdev_p) strncpy(rdev_p->dev_name, rdev_p->t3cdev_p->name, T3_MAX_DEV_NAME_LEN); } else { - PDBG("%s t3cdev_p or dev_name must be set\n", __FUNCTION__); + PDBG("%s t3cdev_p or dev_name must be set\n", __func__); return -EINVAL; } list_add_tail(&rdev_p->entry, &rdev_list); - PDBG("%s opening rnic dev %s\n", __FUNCTION__, rdev_p->dev_name); + PDBG("%s opening rnic dev %s\n", __func__, rdev_p->dev_name); memset(&rdev_p->ctrl_qp, 0, sizeof(rdev_p->ctrl_qp)); if (!rdev_p->t3cdev_p) - rdev_p->t3cdev_p = T3CDEV(netdev_p); + rdev_p->t3cdev_p = dev2t3cdev(netdev_p); rdev_p->t3cdev_p->ulp = (void *) rdev_p; + + err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_EMBEDDED_INFO, + &(rdev_p->fw_info)); + if (err) { + printk(KERN_ERR "%s t3cdev_p(%p)->ctl returned error %d.\n", + __func__, rdev_p->t3cdev_p, err); + goto err1; + } + if (G_FW_VERSION_MAJOR(rdev_p->fw_info.fw_vers) != CXIO_FW_MAJ) { + printk(KERN_ERR MOD "fatal firmware version mismatch: " + "need version %u but adapter has version %u\n", + CXIO_FW_MAJ, + G_FW_VERSION_MAJOR(rdev_p->fw_info.fw_vers)); + err = -EINVAL; + goto err1; + } + err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_GET_PARAMS, &(rdev_p->rnic_info)); if (err) { printk(KERN_ERR "%s t3cdev_p(%p)->ctl returned error %d.\n", - __FUNCTION__, rdev_p->t3cdev_p, err); + __func__, rdev_p->t3cdev_p, err); goto err1; } err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_PORTS, &(rdev_p->port_info)); if (err) { printk(KERN_ERR "%s t3cdev_p(%p)->ctl returned error %d.\n", - __FUNCTION__, rdev_p->t3cdev_p, err); + __func__, rdev_p->t3cdev_p, err); goto err1; } @@ -946,7 +990,7 @@ int cxio_rdev_open(struct cxio_rdev *rdev_p) rdev_p->qpmask = (65536 >> ilog2(rdev_p->qpnr)) - 1; PDBG("%s rnic %s info: tpt_base 0x%0x tpt_top 0x%0x num stags %d " "pbl_base 0x%0x pbl_top 0x%0x rqt_base 0x%0x, rqt_top 0x%0x\n", - __FUNCTION__, rdev_p->dev_name, rdev_p->rnic_info.tpt_base, + __func__, rdev_p->dev_name, rdev_p->rnic_info.tpt_base, rdev_p->rnic_info.tpt_top, cxio_num_stags(rdev_p), rdev_p->rnic_info.pbl_base, rdev_p->rnic_info.pbl_top, rdev_p->rnic_info.rqt_base, @@ -960,7 +1004,7 @@ int cxio_rdev_open(struct cxio_rdev *rdev_p) err = cxio_hal_init_ctrl_qp(rdev_p); if (err) { printk(KERN_ERR "%s error %d initializing ctrl_qp.\n", - __FUNCTION__, err); + __func__, err); goto err1; } err = cxio_hal_init_resource(rdev_p, cxio_num_stags(rdev_p), 0, @@ -968,19 +1012,19 @@ int cxio_rdev_open(struct cxio_rdev *rdev_p) T3_MAX_NUM_PD); if (err) { printk(KERN_ERR "%s error %d initializing hal resources.\n", - __FUNCTION__, err); + __func__, err); goto err2; } err = cxio_hal_pblpool_create(rdev_p); if (err) { printk(KERN_ERR "%s error %d initializing pbl mem pool.\n", - __FUNCTION__, err); + __func__, err); goto err3; } err = cxio_hal_rqtpool_create(rdev_p); if (err) { printk(KERN_ERR "%s error %d initializing rqt mem pool.\n", - __FUNCTION__, err); + __func__, err); goto err4; } return 0; @@ -991,6 +1035,7 @@ err3: err2: cxio_hal_destroy_ctrl_qp(rdev_p); err1: + rdev_p->t3cdev_p->ulp = NULL; list_del(&rdev_p->entry); return err; } @@ -1001,9 +1046,9 @@ void cxio_rdev_close(struct cxio_rdev *rdev_p) cxio_hal_pblpool_destroy(rdev_p); cxio_hal_rqtpool_destroy(rdev_p); list_del(&rdev_p->entry); - rdev_p->t3cdev_p->ulp = NULL; cxio_hal_destroy_ctrl_qp(rdev_p); cxio_hal_destroy_resource(rdev_p->rscp); + rdev_p->t3cdev_p->ulp = NULL; } } @@ -1042,7 +1087,7 @@ static void flush_completed_wrs(struct t3_wq *wq, struct t3_cq *cq) * Insert this completed cqe into the swcq. */ PDBG("%s moving cqe into swcq sq idx %ld cq idx %ld\n", - __FUNCTION__, Q_PTR2IDX(ptr, wq->sq_size_log2), + __func__, Q_PTR2IDX(ptr, wq->sq_size_log2), Q_PTR2IDX(cq->sw_wptr, cq->size_log2)); sqp->cqe.header |= htonl(V_CQE_SWCQE(1)); *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) @@ -1111,7 +1156,7 @@ int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe, PDBG("%s CQE OOO %d qpid 0x%0x genbit %d type %d status 0x%0x" " opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n", - __FUNCTION__, CQE_OOO(*hw_cqe), CQE_QPID(*hw_cqe), + __func__, CQE_OOO(*hw_cqe), CQE_QPID(*hw_cqe), CQE_GENBIT(*hw_cqe), CQE_TYPE(*hw_cqe), CQE_STATUS(*hw_cqe), CQE_OPCODE(*hw_cqe), CQE_LEN(*hw_cqe), CQE_WRID_HI(*hw_cqe), CQE_WRID_LOW(*hw_cqe)); @@ -1134,6 +1179,18 @@ int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe, if (RQ_TYPE(*hw_cqe) && (CQE_OPCODE(*hw_cqe) == T3_READ_RESP)) { /* + * If this is an unsolicited read response, then the read + * was generated by the kernel driver as part of peer-2-peer + * connection setup. So ignore the completion. + */ + if (!wq->oldest_read) { + if (CQE_STATUS(*hw_cqe)) + wq->error = 1; + ret = -1; + goto skip_cqe; + } + + /* * Don't write to the HWCQ, so create a new read req CQE * in local memory. */ @@ -1172,11 +1229,12 @@ int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe, } /* incoming SEND with no receive posted failures */ - if ((CQE_OPCODE(*hw_cqe) == T3_SEND) && RQ_TYPE(*hw_cqe) && + if (CQE_SEND_OPCODE(*hw_cqe) && RQ_TYPE(*hw_cqe) && Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) { ret = -1; goto skip_cqe; } + BUG_ON((*cqe_flushed == 0) && !SW_CQE(*hw_cqe)); goto proc_cqe; } @@ -1191,6 +1249,13 @@ int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe, * then we complete this with TPT_ERR_MSN and mark the wq in * error. */ + + if (Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) { + wq->error = 1; + ret = -1; + goto skip_cqe; + } + if (unlikely((CQE_WRID_MSN(*hw_cqe) != (wq->rq_rptr + 1)))) { wq->error = 1; hw_cqe->header |= htonl(V_CQE_STATUS(TPT_ERR_MSN)); @@ -1214,7 +1279,7 @@ int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe, struct t3_swsq *sqp; PDBG("%s out of order completion going in swsq at idx %ld\n", - __FUNCTION__, + __func__, Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2)); sqp = wq->sq + Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2); @@ -1233,15 +1298,19 @@ proc_cqe: */ if (SQ_TYPE(*hw_cqe)) { wq->sq_rptr = CQE_WRID_SQ_WPTR(*hw_cqe); - PDBG("%s completing sq idx %ld\n", __FUNCTION__, + PDBG("%s completing sq idx %ld\n", __func__, Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)); - *cookie = (wq->sq + - Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2))->wr_id; + *cookie = wq->sq[Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)].wr_id; wq->sq_rptr++; } else { - PDBG("%s completing rq idx %ld\n", __FUNCTION__, + PDBG("%s completing rq idx %ld\n", __func__, Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)); - *cookie = *(wq->rq + Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)); + *cookie = wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].wr_id; + if (wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].pbl_addr) + cxio_hal_pblpool_free(wq->rdev, + wq->rq[Q_PTR2IDX(wq->rq_rptr, + wq->rq_size_log2)].pbl_addr, T3_STAG0_PBL_SIZE); + BUG_ON(Q_EMPTY(wq->rq_rptr, wq->rq_wptr)); wq->rq_rptr++; } @@ -1254,11 +1323,11 @@ flush_wq: skip_cqe: if (SW_CQE(*hw_cqe)) { PDBG("%s cq %p cqid 0x%x skip sw cqe sw_rptr 0x%x\n", - __FUNCTION__, cq, cq->cqid, cq->sw_rptr); + __func__, cq, cq->cqid, cq->sw_rptr); ++cq->sw_rptr; } else { PDBG("%s cq %p cqid 0x%x skip hw cqe rptr 0x%x\n", - __FUNCTION__, cq, cq->cqid, cq->rptr); + __func__, cq, cq->cqid, cq->rptr); ++cq->rptr; /* diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h index 99543d63470..78fbe9ffe7f 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.h +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h @@ -34,6 +34,7 @@ #include <linux/list.h> #include <linux/mutex.h> +#include <linux/kfifo.h> #include "t3_cpl.h" #include "t3cdev.h" @@ -45,19 +46,24 @@ #define T3_CTRL_QP_SIZE_LOG2 8 #define T3_CTRL_CQ_ID 0 -/* TBD */ #define T3_MAX_NUM_RI (1<<15) #define T3_MAX_NUM_QP (1<<15) #define T3_MAX_NUM_CQ (1<<15) #define T3_MAX_NUM_PD (1<<15) #define T3_MAX_PBL_SIZE 256 #define T3_MAX_RQ_SIZE 1024 +#define T3_MAX_QP_DEPTH (T3_MAX_RQ_SIZE-1) +#define T3_MAX_CQ_DEPTH 65536 #define T3_MAX_NUM_STAG (1<<15) +#define T3_MAX_MR_SIZE 0x100000000ULL +#define T3_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */ #define T3_STAG_UNSET 0xffffffff #define T3_MAX_DEV_NAME_LEN 32 +#define CXIO_FW_MAJ 7 + struct cxio_hal_ctrl_qp { u32 wptr; u32 rptr; @@ -65,18 +71,18 @@ struct cxio_hal_ctrl_qp { wait_queue_head_t waitq;/* wait for RspQ/CQE msg */ union t3_wr *workq; /* the work request queue */ dma_addr_t dma_addr; /* pci bus address of the workq */ - DECLARE_PCI_UNMAP_ADDR(mapping) + DEFINE_DMA_UNMAP_ADDR(mapping); void __iomem *doorbell; }; struct cxio_hal_resource { - struct kfifo *tpt_fifo; + struct kfifo tpt_fifo; spinlock_t tpt_fifo_lock; - struct kfifo *qpid_fifo; + struct kfifo qpid_fifo; spinlock_t qpid_fifo_lock; - struct kfifo *cqid_fifo; + struct kfifo cqid_fifo; spinlock_t cqid_fifo_lock; - struct kfifo *pdid_fifo; + struct kfifo pdid_fifo; spinlock_t pdid_fifo_lock; }; @@ -105,8 +111,16 @@ struct cxio_rdev { struct gen_pool *pbl_pool; struct gen_pool *rqt_pool; struct list_head entry; + struct ch_embedded_info fw_info; + u32 flags; +#define CXIO_ERROR_FATAL 1 }; +static inline int cxio_fatal_error(struct cxio_rdev *rdev_p) +{ + return rdev_p->flags & CXIO_ERROR_FATAL; +} + static inline int cxio_num_stags(struct cxio_rdev *rdev_p) { return min((int)T3_MAX_NUM_STAG, (int)((rdev_p->rnic_info.tpt_top - rdev_p->rnic_info.tpt_base) >> 5)); @@ -143,7 +157,7 @@ int cxio_rdev_open(struct cxio_rdev *rdev); void cxio_rdev_close(struct cxio_rdev *rdev); int cxio_hal_cq_op(struct cxio_rdev *rdev, struct t3_cq *cq, enum t3_cq_opcode op, u32 credit); -int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq); +int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq, int kernel); int cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq); int cxio_resize_cq(struct cxio_rdev *rdev, struct t3_cq *cq); void cxio_release_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx); @@ -153,17 +167,18 @@ int cxio_create_qp(struct cxio_rdev *rdev, u32 kernel_domain, struct t3_wq *wq, int cxio_destroy_qp(struct cxio_rdev *rdev, struct t3_wq *wq, struct cxio_ucontext *uctx); int cxio_peek_cq(struct t3_wq *wr, struct t3_cq *cq, int opcode); +int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl, + u32 pbl_addr, u32 pbl_size); int cxio_register_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid, enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, - u8 page_size, __be64 *pbl, u32 *pbl_size, - u32 *pbl_addr); + u8 page_size, u32 pbl_size, u32 pbl_addr); int cxio_reregister_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid, enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, - u8 page_size, __be64 *pbl, u32 *pbl_size, - u32 *pbl_addr); + u8 page_size, u32 pbl_size, u32 pbl_addr); int cxio_dereg_mem(struct cxio_rdev *rdev, u32 stag, u32 pbl_size, u32 pbl_addr); int cxio_allocate_window(struct cxio_rdev *rdev, u32 * stag, u32 pdid); +int cxio_allocate_stag(struct cxio_rdev *rdev, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr); int cxio_deallocate_window(struct cxio_rdev *rdev, u32 stag); int cxio_rdma_init(struct cxio_rdev *rdev, struct t3_rdma_init_attr *attr); void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb); @@ -172,13 +187,14 @@ u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp); void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid); int __init cxio_hal_init(void); void __exit cxio_hal_exit(void); -void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count); -void cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count); +int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count); +int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count); void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count); void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count); void cxio_flush_hw_cq(struct t3_cq *cq); int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe, u8 *cqe_flushed, u64 *cookie, u32 *credit); +int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb); #define MOD "iw_cxgb3: " #define PDBG(fmt, args...) pr_debug(MOD fmt, ## args) diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c index d3095ae5bc2..c40088ecf9f 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_resource.c +++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c @@ -39,12 +39,12 @@ #include "cxio_resource.h" #include "cxio_hal.h" -static struct kfifo *rhdl_fifo; +static struct kfifo rhdl_fifo; static spinlock_t rhdl_fifo_lock; #define RANDOM_SIZE 16 -static int __cxio_init_resource_fifo(struct kfifo **fifo, +static int __cxio_init_resource_fifo(struct kfifo *fifo, spinlock_t *fifo_lock, u32 nr, u32 skip_low, u32 skip_high, @@ -55,50 +55,51 @@ static int __cxio_init_resource_fifo(struct kfifo **fifo, u32 rarray[16]; spin_lock_init(fifo_lock); - *fifo = kfifo_alloc(nr * sizeof(u32), GFP_KERNEL, fifo_lock); - if (IS_ERR(*fifo)) + if (kfifo_alloc(fifo, nr * sizeof(u32), GFP_KERNEL)) return -ENOMEM; for (i = 0; i < skip_low + skip_high; i++) - __kfifo_put(*fifo, (unsigned char *) &entry, sizeof(u32)); + kfifo_in(fifo, (unsigned char *) &entry, sizeof(u32)); if (random) { j = 0; - random_bytes = random32(); + random_bytes = prandom_u32(); for (i = 0; i < RANDOM_SIZE; i++) rarray[i] = i + skip_low; for (i = skip_low + RANDOM_SIZE; i < nr - skip_high; i++) { if (j >= RANDOM_SIZE) { j = 0; - random_bytes = random32(); + random_bytes = prandom_u32(); } idx = (random_bytes >> (j * 2)) & 0xF; - __kfifo_put(*fifo, + kfifo_in(fifo, (unsigned char *) &rarray[idx], sizeof(u32)); rarray[idx] = i; j++; } for (i = 0; i < RANDOM_SIZE; i++) - __kfifo_put(*fifo, + kfifo_in(fifo, (unsigned char *) &rarray[i], sizeof(u32)); } else for (i = skip_low; i < nr - skip_high; i++) - __kfifo_put(*fifo, (unsigned char *) &i, sizeof(u32)); + kfifo_in(fifo, (unsigned char *) &i, sizeof(u32)); for (i = 0; i < skip_low + skip_high; i++) - kfifo_get(*fifo, (unsigned char *) &entry, sizeof(u32)); + if (kfifo_out_locked(fifo, (unsigned char *) &entry, + sizeof(u32), fifo_lock) != sizeof(u32)) + break; return 0; } -static int cxio_init_resource_fifo(struct kfifo **fifo, spinlock_t * fifo_lock, +static int cxio_init_resource_fifo(struct kfifo *fifo, spinlock_t * fifo_lock, u32 nr, u32 skip_low, u32 skip_high) { return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low, skip_high, 0)); } -static int cxio_init_resource_fifo_random(struct kfifo **fifo, +static int cxio_init_resource_fifo_random(struct kfifo *fifo, spinlock_t * fifo_lock, u32 nr, u32 skip_low, u32 skip_high) { @@ -113,15 +114,13 @@ static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p) spin_lock_init(&rdev_p->rscp->qpid_fifo_lock); - rdev_p->rscp->qpid_fifo = kfifo_alloc(T3_MAX_NUM_QP * sizeof(u32), - GFP_KERNEL, - &rdev_p->rscp->qpid_fifo_lock); - if (IS_ERR(rdev_p->rscp->qpid_fifo)) + if (kfifo_alloc(&rdev_p->rscp->qpid_fifo, T3_MAX_NUM_QP * sizeof(u32), + GFP_KERNEL)) return -ENOMEM; for (i = 16; i < T3_MAX_NUM_QP; i++) if (!(i & rdev_p->qpmask)) - __kfifo_put(rdev_p->rscp->qpid_fifo, + kfifo_in(&rdev_p->rscp->qpid_fifo, (unsigned char *) &i, sizeof(u32)); return 0; } @@ -134,7 +133,7 @@ int cxio_hal_init_rhdl_resource(u32 nr_rhdl) void cxio_hal_destroy_rhdl_resource(void) { - kfifo_free(rhdl_fifo); + kfifo_free(&rhdl_fifo); } /* nr_* must be power of 2 */ @@ -167,11 +166,11 @@ int cxio_hal_init_resource(struct cxio_rdev *rdev_p, goto pdid_err; return 0; pdid_err: - kfifo_free(rscp->cqid_fifo); + kfifo_free(&rscp->cqid_fifo); cqid_err: - kfifo_free(rscp->qpid_fifo); + kfifo_free(&rscp->qpid_fifo); qpid_err: - kfifo_free(rscp->tpt_fifo); + kfifo_free(&rscp->tpt_fifo); tpt_err: return -ENOMEM; } @@ -179,69 +178,73 @@ tpt_err: /* * returns 0 if no resource available */ -static u32 cxio_hal_get_resource(struct kfifo *fifo) +static u32 cxio_hal_get_resource(struct kfifo *fifo, spinlock_t * lock) { u32 entry; - if (kfifo_get(fifo, (unsigned char *) &entry, sizeof(u32))) + if (kfifo_out_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock)) return entry; else return 0; /* fifo emptry */ } -static void cxio_hal_put_resource(struct kfifo *fifo, u32 entry) +static void cxio_hal_put_resource(struct kfifo *fifo, spinlock_t * lock, + u32 entry) { - BUG_ON(kfifo_put(fifo, (unsigned char *) &entry, sizeof(u32)) == 0); + BUG_ON( + kfifo_in_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock) + == 0); } u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp) { - return cxio_hal_get_resource(rscp->tpt_fifo); + return cxio_hal_get_resource(&rscp->tpt_fifo, &rscp->tpt_fifo_lock); } void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag) { - cxio_hal_put_resource(rscp->tpt_fifo, stag); + cxio_hal_put_resource(&rscp->tpt_fifo, &rscp->tpt_fifo_lock, stag); } u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp) { - u32 qpid = cxio_hal_get_resource(rscp->qpid_fifo); - PDBG("%s qpid 0x%x\n", __FUNCTION__, qpid); + u32 qpid = cxio_hal_get_resource(&rscp->qpid_fifo, + &rscp->qpid_fifo_lock); + PDBG("%s qpid 0x%x\n", __func__, qpid); return qpid; } void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid) { - PDBG("%s qpid 0x%x\n", __FUNCTION__, qpid); - cxio_hal_put_resource(rscp->qpid_fifo, qpid); + PDBG("%s qpid 0x%x\n", __func__, qpid); + cxio_hal_put_resource(&rscp->qpid_fifo, &rscp->qpid_fifo_lock, qpid); } u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp) { - return cxio_hal_get_resource(rscp->cqid_fifo); + return cxio_hal_get_resource(&rscp->cqid_fifo, &rscp->cqid_fifo_lock); } void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid) { - cxio_hal_put_resource(rscp->cqid_fifo, cqid); + cxio_hal_put_resource(&rscp->cqid_fifo, &rscp->cqid_fifo_lock, cqid); } u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp) { - return cxio_hal_get_resource(rscp->pdid_fifo); + return cxio_hal_get_resource(&rscp->pdid_fifo, &rscp->pdid_fifo_lock); } void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid) { - cxio_hal_put_resource(rscp->pdid_fifo, pdid); + cxio_hal_put_resource(&rscp->pdid_fifo, &rscp->pdid_fifo_lock, pdid); } void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp) { - kfifo_free(rscp->tpt_fifo); - kfifo_free(rscp->cqid_fifo); - kfifo_free(rscp->qpid_fifo); - kfifo_free(rscp->pdid_fifo); + kfifo_free(&rscp->tpt_fifo); + kfifo_free(&rscp->cqid_fifo); + kfifo_free(&rscp->qpid_fifo); + kfifo_free(&rscp->pdid_fifo); kfree(rscp); } @@ -250,31 +253,51 @@ void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp) */ #define MIN_PBL_SHIFT 8 /* 256B == min PBL size (32 entries) */ -#define PBL_CHUNK 2*1024*1024 u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size) { unsigned long addr = gen_pool_alloc(rdev_p->pbl_pool, size); - PDBG("%s addr 0x%x size %d\n", __FUNCTION__, (u32)addr, size); + PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size); return (u32)addr; } void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size) { - PDBG("%s addr 0x%x size %d\n", __FUNCTION__, addr, size); + PDBG("%s addr 0x%x size %d\n", __func__, addr, size); gen_pool_free(rdev_p->pbl_pool, (unsigned long)addr, size); } int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p) { - unsigned long i; + unsigned pbl_start, pbl_chunk; + rdev_p->pbl_pool = gen_pool_create(MIN_PBL_SHIFT, -1); - if (rdev_p->pbl_pool) - for (i = rdev_p->rnic_info.pbl_base; - i <= rdev_p->rnic_info.pbl_top - PBL_CHUNK + 1; - i += PBL_CHUNK) - gen_pool_add(rdev_p->pbl_pool, i, PBL_CHUNK, -1); - return rdev_p->pbl_pool ? 0 : -ENOMEM; + if (!rdev_p->pbl_pool) + return -ENOMEM; + + pbl_start = rdev_p->rnic_info.pbl_base; + pbl_chunk = rdev_p->rnic_info.pbl_top - pbl_start + 1; + + while (pbl_start < rdev_p->rnic_info.pbl_top) { + pbl_chunk = min(rdev_p->rnic_info.pbl_top - pbl_start + 1, + pbl_chunk); + if (gen_pool_add(rdev_p->pbl_pool, pbl_start, pbl_chunk, -1)) { + PDBG("%s failed to add PBL chunk (%x/%x)\n", + __func__, pbl_start, pbl_chunk); + if (pbl_chunk <= 1024 << MIN_PBL_SHIFT) { + printk(KERN_WARNING MOD "%s: Failed to add all PBL chunks (%x/%x)\n", + __func__, pbl_start, rdev_p->rnic_info.pbl_top - pbl_start); + return 0; + } + pbl_chunk >>= 1; + } else { + PDBG("%s added PBL chunk (%x/%x)\n", + __func__, pbl_start, pbl_chunk); + pbl_start += pbl_chunk; + } + } + + return 0; } void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p) @@ -292,13 +315,13 @@ void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p) u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size) { unsigned long addr = gen_pool_alloc(rdev_p->rqt_pool, size << 6); - PDBG("%s addr 0x%x size %d\n", __FUNCTION__, (u32)addr, size << 6); + PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6); return (u32)addr; } void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size) { - PDBG("%s addr 0x%x size %d\n", __FUNCTION__, addr, size << 6); + PDBG("%s addr 0x%x size %d\n", __func__, addr, size << 6); gen_pool_free(rdev_p->rqt_pool, (unsigned long)addr, size << 6); } diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h index ff7290eacef..83d2e19d31a 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_wr.h +++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h @@ -39,6 +39,9 @@ #define T3_MAX_SGE 4 #define T3_MAX_INLINE 64 +#define T3_STAG0_PBL_SIZE (2 * T3_MAX_SGE << 3) +#define T3_STAG0_MAX_PBE_LEN (128 * 1024 * 1024) +#define T3_STAG0_PAGE_SHIFT 15 #define Q_EMPTY(rptr,wptr) ((rptr)==(wptr)) #define Q_FULL(rptr,wptr,size_log2) ( (((wptr)-(rptr))>>(size_log2)) && \ @@ -72,7 +75,8 @@ enum t3_wr_opcode { T3_WR_BIND = FW_WROPCODE_RI_BIND_MW, T3_WR_RCV = FW_WROPCODE_RI_RECEIVE, T3_WR_INIT = FW_WROPCODE_RI_RDMA_INIT, - T3_WR_QP_MOD = FW_WROPCODE_RI_MODIFY_QP + T3_WR_QP_MOD = FW_WROPCODE_RI_MODIFY_QP, + T3_WR_FASTREG = FW_WROPCODE_RI_FASTREGISTER_MR } __attribute__ ((packed)); enum t3_rdma_opcode { @@ -89,7 +93,8 @@ enum t3_rdma_opcode { T3_FAST_REGISTER, T3_LOCAL_INV, T3_QP_MOD, - T3_BYPASS + T3_BYPASS, + T3_RDMA_READ_REQ_WITH_INV, } __attribute__ ((packed)); static inline enum t3_rdma_opcode wr2opcode(enum t3_wr_opcode wrop) @@ -103,6 +108,7 @@ static inline enum t3_rdma_opcode wr2opcode(enum t3_wr_opcode wrop) case T3_WR_BIND: return T3_BIND_MW; case T3_WR_INIT: return T3_RDMA_INIT; case T3_WR_QP_MOD: return T3_QP_MOD; + case T3_WR_FASTREG: return T3_FAST_REGISTER; default: break; } return -1; @@ -170,11 +176,54 @@ struct t3_send_wr { struct t3_sge sgl[T3_MAX_SGE]; /* 4+ */ }; +#define T3_MAX_FASTREG_DEPTH 10 +#define T3_MAX_FASTREG_FRAG 10 + +struct t3_fastreg_wr { + struct fw_riwrh wrh; /* 0 */ + union t3_wrid wrid; /* 1 */ + __be32 stag; /* 2 */ + __be32 len; + __be32 va_base_hi; /* 3 */ + __be32 va_base_lo_fbo; + __be32 page_type_perms; /* 4 */ + __be32 reserved1; + __be64 pbl_addrs[0]; /* 5+ */ +}; + +/* + * If a fastreg wr spans multiple wqes, then the 2nd fragment look like this. + */ +struct t3_pbl_frag { + struct fw_riwrh wrh; /* 0 */ + __be64 pbl_addrs[14]; /* 1..14 */ +}; + +#define S_FR_PAGE_COUNT 24 +#define M_FR_PAGE_COUNT 0xff +#define V_FR_PAGE_COUNT(x) ((x) << S_FR_PAGE_COUNT) +#define G_FR_PAGE_COUNT(x) ((((x) >> S_FR_PAGE_COUNT)) & M_FR_PAGE_COUNT) + +#define S_FR_PAGE_SIZE 16 +#define M_FR_PAGE_SIZE 0x1f +#define V_FR_PAGE_SIZE(x) ((x) << S_FR_PAGE_SIZE) +#define G_FR_PAGE_SIZE(x) ((((x) >> S_FR_PAGE_SIZE)) & M_FR_PAGE_SIZE) + +#define S_FR_TYPE 8 +#define M_FR_TYPE 0x1 +#define V_FR_TYPE(x) ((x) << S_FR_TYPE) +#define G_FR_TYPE(x) ((((x) >> S_FR_TYPE)) & M_FR_TYPE) + +#define S_FR_PERMS 0 +#define M_FR_PERMS 0xff +#define V_FR_PERMS(x) ((x) << S_FR_PERMS) +#define G_FR_PERMS(x) ((((x) >> S_FR_PERMS)) & M_FR_PERMS) + struct t3_local_inv_wr { struct fw_riwrh wrh; /* 0 */ union t3_wrid wrid; /* 1 */ __be32 stag; /* 2 */ - __be32 reserved3; + __be32 reserved; }; struct t3_rdma_write_wr { @@ -193,7 +242,8 @@ struct t3_rdma_read_wr { struct fw_riwrh wrh; /* 0 */ union t3_wrid wrid; /* 1 */ u8 rdmaop; /* 2 */ - u8 reserved[3]; + u8 local_inv; + u8 reserved[2]; __be32 rem_stag; __be64 rem_to; /* 3 */ __be32 local_stag; /* 4 */ @@ -201,18 +251,6 @@ struct t3_rdma_read_wr { __be64 local_to; /* 5 */ }; -enum t3_addr_type { - T3_VA_BASED_TO = 0x0, - T3_ZERO_BASED_TO = 0x1 -} __attribute__ ((packed)); - -enum t3_mem_perms { - T3_MEM_ACCESS_LOCAL_READ = 0x1, - T3_MEM_ACCESS_LOCAL_WRITE = 0x2, - T3_MEM_ACCESS_REM_READ = 0x4, - T3_MEM_ACCESS_REM_WRITE = 0x8 -} __attribute__ ((packed)); - struct t3_bind_mw_wr { struct fw_riwrh wrh; /* 0 */ union t3_wrid wrid; /* 1 */ @@ -278,6 +316,22 @@ enum t3_qp_caps { uP_RI_QP_STAG0_ENABLE = 0x10 } __attribute__ ((packed)); +enum rdma_init_rtr_types { + RTR_READ = 1, + RTR_WRITE = 2, + RTR_SEND = 3, +}; + +#define S_RTR_TYPE 2 +#define M_RTR_TYPE 0x3 +#define V_RTR_TYPE(x) ((x) << S_RTR_TYPE) +#define G_RTR_TYPE(x) ((((x) >> S_RTR_TYPE)) & M_RTR_TYPE) + +#define S_CHAN 4 +#define M_CHAN 0x3 +#define V_CHAN(x) ((x) << S_CHAN) +#define G_CHAN(x) ((((x) >> S_CHAN)) & M_CHAN) + struct t3_rdma_init_attr { u32 tid; u32 qpid; @@ -293,7 +347,11 @@ struct t3_rdma_init_attr { u32 ird; u64 qp_dma_addr; u32 qp_dma_size; - u32 flags; + enum rdma_init_rtr_types rtr_type; + u16 flags; + u16 rqe_count; + u32 irs; + u32 chan; }; struct t3_rdma_init_wr { @@ -308,13 +366,13 @@ struct t3_rdma_init_wr { u8 mpaattrs; /* 5 */ u8 qpcaps; __be16 ulpdu_size; - __be32 flags; /* bits 31-1 - reservered */ - /* bit 0 - set if RECV posted */ + __be16 flags_rtr_type; + __be16 rqe_count; __be32 ord; /* 6 */ __be32 ird; __be64 qp_dma_addr; /* 7 */ __be32 qp_dma_size; /* 8 */ - u32 rsvd; + __be32 irs; }; struct t3_genbit { @@ -322,8 +380,14 @@ struct t3_genbit { __be64 genbit; }; +struct t3_wq_in_err { + u64 flit[13]; + u64 err; +}; + enum rdma_init_wr_flags { - RECVS_POSTED = 1, + MPA_INITIATOR = (1<<0), + PRIV_QP = (1<<1), }; union t3_wr { @@ -331,13 +395,16 @@ union t3_wr { struct t3_rdma_write_wr write; struct t3_rdma_read_wr read; struct t3_receive_wr recv; + struct t3_fastreg_wr fastreg; + struct t3_pbl_frag pbl_frag; struct t3_local_inv_wr local_inv; struct t3_bind_mw_wr bind; struct t3_bypass_wr bypass; struct t3_rdma_init_wr init; struct t3_modify_qp_wr qp_mod; struct t3_genbit genbit; - u64 flit[16]; + struct t3_wq_in_err wq_in_err; + __be64 flit[16]; }; #define T3_SQ_CQE_FLIT 13 @@ -351,12 +418,18 @@ static inline enum t3_wr_opcode fw_riwrh_opcode(struct fw_riwrh *wqe) return G_FW_RIWR_OP(be32_to_cpu(wqe->op_seop_flags)); } +enum t3_wr_hdr_bits { + T3_EOP = 1, + T3_SOP = 2, + T3_SOPEOP = T3_EOP|T3_SOP, +}; + static inline void build_fw_riwrh(struct fw_riwrh *wqe, enum t3_wr_opcode op, enum t3_wr_flags flags, u8 genbit, u32 tid, - u8 len) + u8 len, u8 sopeop) { wqe->op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(op) | - V_FW_RIWR_SOPEOP(M_FW_RIWR_SOPEOP) | + V_FW_RIWR_SOPEOP(sopeop) | V_FW_RIWR_FLAGS(flags)); wmb(); wqe->gen_tid_len = cpu_to_be32(V_FW_RIWR_GEN(genbit) | @@ -389,6 +462,7 @@ enum tpt_addr_type { }; enum tpt_mem_perm { + TPT_MW_BIND = 0x10, TPT_LOCAL_READ = 0x8, TPT_LOCAL_WRITE = 0x4, TPT_REMOTE_READ = 0x2, @@ -536,6 +610,12 @@ struct t3_cqe { #define CQE_STATUS(x) (G_CQE_STATUS(be32_to_cpu((x).header))) #define CQE_OPCODE(x) (G_CQE_OPCODE(be32_to_cpu((x).header))) +#define CQE_SEND_OPCODE(x)( \ + (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND) || \ + (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_SE) || \ + (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_INV) || \ + (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_SE_INV)) + #define CQE_LEN(x) (be32_to_cpu((x).len)) /* used for RQ completion processing */ @@ -600,13 +680,18 @@ struct t3_swsq { int signaled; }; +struct t3_swrq { + __u64 wr_id; + __u32 pbl_addr; +}; + /* * A T3 WQ implements both the SQ and RQ. */ struct t3_wq { - union t3_wr *queue; /* DMA accessable memory */ + union t3_wr *queue; /* DMA accessible memory */ dma_addr_t dma_addr; /* DMA address for HW */ - DECLARE_PCI_UNMAP_ADDR(mapping) /* unmap kruft */ + DEFINE_DMA_UNMAP_ADDR(mapping); /* unmap kruft */ u32 error; /* 1 once we go to ERROR */ u32 qpid; u32 wptr; /* idx to next available WR slot */ @@ -616,14 +701,15 @@ struct t3_wq { u32 sq_wptr; /* sq_wptr - sq_rptr == count of */ u32 sq_rptr; /* pending wrs */ u32 sq_size_log2; /* sq size */ - u64 *rq; /* SW RQ (holds consumer wr_ids */ + struct t3_swrq *rq; /* SW RQ (holds consumer wr_ids */ u32 rq_wptr; /* rq_wptr - rq_rptr == count of */ u32 rq_rptr; /* pending wrs */ - u64 *rq_oldest_wr; /* oldest wr on the SW RQ */ + struct t3_swrq *rq_oldest_wr; /* oldest wr on the SW RQ */ u32 rq_size_log2; /* rq size */ u32 rq_addr; /* rq adapter address */ void __iomem *doorbell; /* kernel db */ u64 udb; /* user db if any */ + struct cxio_rdev *rdev; }; struct t3_cq { @@ -632,7 +718,7 @@ struct t3_cq { u32 wptr; u32 size_log2; dma_addr_t dma_addr; - DECLARE_PCI_UNMAP_ADDR(mapping) + DEFINE_DMA_UNMAP_ADDR(mapping); struct t3_cqe *queue; struct t3_cqe *sw_queue; u32 sw_rptr; @@ -642,9 +728,40 @@ struct t3_cq { #define CQ_VLD_ENTRY(ptr,size_log2,cqe) (Q_GENBIT(ptr,size_log2) == \ CQE_GENBIT(*cqe)) +struct t3_cq_status_page { + u32 cq_err; +}; + +static inline int cxio_cq_in_error(struct t3_cq *cq) +{ + return ((struct t3_cq_status_page *) + &cq->queue[1 << cq->size_log2])->cq_err; +} + +static inline void cxio_set_cq_in_error(struct t3_cq *cq) +{ + ((struct t3_cq_status_page *) + &cq->queue[1 << cq->size_log2])->cq_err = 1; +} + static inline void cxio_set_wq_in_error(struct t3_wq *wq) { - wq->queue->flit[13] = 1; + wq->queue->wq_in_err.err |= 1; +} + +static inline void cxio_disable_wq_db(struct t3_wq *wq) +{ + wq->queue->wq_in_err.err |= 2; +} + +static inline void cxio_enable_wq_db(struct t3_wq *wq) +{ + wq->queue->wq_in_err.err &= ~2; +} + +static inline int cxio_wq_db_enabled(struct t3_wq *wq) +{ + return !(wq->queue->wq_in_err.err & 2); } static inline struct t3_cqe *cxio_next_hw_cqe(struct t3_cq *cq) diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c index 0315c9d9fce..8e77dc543dd 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.c +++ b/drivers/infiniband/hw/cxgb3/iwch.c @@ -47,42 +47,82 @@ MODULE_DESCRIPTION("Chelsio T3 RDMA Driver"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(DRV_VERSION); -cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS]; - static void open_rnic_dev(struct t3cdev *); static void close_rnic_dev(struct t3cdev *); +static void iwch_event_handler(struct t3cdev *, u32, u32); struct cxgb3_client t3c_client = { .name = "iw_cxgb3", .add = open_rnic_dev, .remove = close_rnic_dev, .handlers = t3c_handlers, - .redirect = iwch_ep_redirect + .redirect = iwch_ep_redirect, + .event_handler = iwch_event_handler }; static LIST_HEAD(dev_list); static DEFINE_MUTEX(dev_mutex); +static int disable_qp_db(int id, void *p, void *data) +{ + struct iwch_qp *qhp = p; + + cxio_disable_wq_db(&qhp->wq); + return 0; +} + +static int enable_qp_db(int id, void *p, void *data) +{ + struct iwch_qp *qhp = p; + + if (data) + ring_doorbell(qhp->rhp->rdev.ctrl_qp.doorbell, qhp->wq.qpid); + cxio_enable_wq_db(&qhp->wq); + return 0; +} + +static void disable_dbs(struct iwch_dev *rnicp) +{ + spin_lock_irq(&rnicp->lock); + idr_for_each(&rnicp->qpidr, disable_qp_db, NULL); + spin_unlock_irq(&rnicp->lock); +} + +static void enable_dbs(struct iwch_dev *rnicp, int ring_db) +{ + spin_lock_irq(&rnicp->lock); + idr_for_each(&rnicp->qpidr, enable_qp_db, + (void *)(unsigned long)ring_db); + spin_unlock_irq(&rnicp->lock); +} + +static void iwch_db_drop_task(struct work_struct *work) +{ + struct iwch_dev *rnicp = container_of(work, struct iwch_dev, + db_drop_task.work); + enable_dbs(rnicp, 1); +} + static void rnic_init(struct iwch_dev *rnicp) { - PDBG("%s iwch_dev %p\n", __FUNCTION__, rnicp); + PDBG("%s iwch_dev %p\n", __func__, rnicp); idr_init(&rnicp->cqidr); idr_init(&rnicp->qpidr); idr_init(&rnicp->mmidr); spin_lock_init(&rnicp->lock); + INIT_DELAYED_WORK(&rnicp->db_drop_task, iwch_db_drop_task); - rnicp->attr.vendor_id = 0x168; - rnicp->attr.vendor_part_id = 7; rnicp->attr.max_qps = T3_MAX_NUM_QP - 32; - rnicp->attr.max_wrs = (1UL << 24) - 1; + rnicp->attr.max_wrs = T3_MAX_QP_DEPTH; rnicp->attr.max_sge_per_wr = T3_MAX_SGE; rnicp->attr.max_sge_per_rdma_write_wr = T3_MAX_SGE; rnicp->attr.max_cqs = T3_MAX_NUM_CQ - 1; - rnicp->attr.max_cqes_per_cq = (1UL << 24) - 1; + rnicp->attr.max_cqes_per_cq = T3_MAX_CQ_DEPTH; rnicp->attr.max_mem_regs = cxio_num_stags(&rnicp->rdev); rnicp->attr.max_phys_buf_entries = T3_MAX_PBL_SIZE; rnicp->attr.max_pds = T3_MAX_NUM_PD - 1; - rnicp->attr.mem_pgsizes_bitmask = 0x7FFF; /* 4KB-128MB */ + rnicp->attr.mem_pgsizes_bitmask = T3_PAGESIZE_MASK; + rnicp->attr.max_mr_size = T3_MAX_MR_SIZE; rnicp->attr.can_resize_wq = 0; rnicp->attr.max_rdma_reads_per_qp = 8; rnicp->attr.max_rdma_read_resources = @@ -104,11 +144,9 @@ static void rnic_init(struct iwch_dev *rnicp) static void open_rnic_dev(struct t3cdev *tdev) { struct iwch_dev *rnicp; - static int vers_printed; - PDBG("%s t3cdev %p\n", __FUNCTION__, tdev); - if (!vers_printed++) - printk(KERN_INFO MOD "Chelsio T3 RDMA Driver - version %s\n", + PDBG("%s t3cdev %p\n", __func__, tdev); + printk_once(KERN_INFO MOD "Chelsio T3 RDMA Driver - version %s\n", DRV_VERSION); rnicp = (struct iwch_dev *)ib_alloc_device(sizeof(*rnicp)); if (!rnicp) { @@ -144,10 +182,13 @@ static void open_rnic_dev(struct t3cdev *tdev) static void close_rnic_dev(struct t3cdev *tdev) { struct iwch_dev *dev, *tmp; - PDBG("%s t3cdev %p\n", __FUNCTION__, tdev); + PDBG("%s t3cdev %p\n", __func__, tdev); mutex_lock(&dev_mutex); list_for_each_entry_safe(dev, tmp, &dev_list, entry) { if (dev->rdev.t3cdev_p == tdev) { + dev->rdev.flags = CXIO_ERROR_FATAL; + synchronize_net(); + cancel_delayed_work_sync(&dev->db_drop_task); list_del(&dev->entry); iwch_unregister_device(dev); cxio_rdev_close(&dev->rdev); @@ -161,6 +202,69 @@ static void close_rnic_dev(struct t3cdev *tdev) mutex_unlock(&dev_mutex); } +static void iwch_event_handler(struct t3cdev *tdev, u32 evt, u32 port_id) +{ + struct cxio_rdev *rdev = tdev->ulp; + struct iwch_dev *rnicp; + struct ib_event event; + u32 portnum = port_id + 1; + int dispatch = 0; + + if (!rdev) + return; + rnicp = rdev_to_iwch_dev(rdev); + switch (evt) { + case OFFLOAD_STATUS_DOWN: { + rdev->flags = CXIO_ERROR_FATAL; + synchronize_net(); + event.event = IB_EVENT_DEVICE_FATAL; + dispatch = 1; + break; + } + case OFFLOAD_PORT_DOWN: { + event.event = IB_EVENT_PORT_ERR; + dispatch = 1; + break; + } + case OFFLOAD_PORT_UP: { + event.event = IB_EVENT_PORT_ACTIVE; + dispatch = 1; + break; + } + case OFFLOAD_DB_FULL: { + disable_dbs(rnicp); + break; + } + case OFFLOAD_DB_EMPTY: { + enable_dbs(rnicp, 1); + break; + } + case OFFLOAD_DB_DROP: { + unsigned long delay = 1000; + unsigned short r; + + disable_dbs(rnicp); + get_random_bytes(&r, 2); + delay += r & 1023; + + /* + * delay is between 1000-2023 usecs. + */ + schedule_delayed_work(&rnicp->db_drop_task, + usecs_to_jiffies(delay)); + break; + } + } + + if (dispatch) { + event.device = &rnicp->ibdev; + event.element.port_num = portnum; + ib_dispatch_event(&event); + } + + return; +} + static int __init iwch_init_module(void) { int err; diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h index caf4e6007a4..837862287a2 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.h +++ b/drivers/infiniband/hw/cxgb3/iwch.h @@ -36,6 +36,7 @@ #include <linux/list.h> #include <linux/spinlock.h> #include <linux/idr.h> +#include <linux/workqueue.h> #include <rdma/ib_verbs.h> @@ -48,8 +49,6 @@ struct iwch_qp; struct iwch_mr; struct iwch_rnic_attributes { - u32 vendor_id; - u32 vendor_part_id; u32 max_qps; u32 max_wrs; /* Max for any SQ/RQ */ u32 max_sge_per_wr; @@ -66,6 +65,7 @@ struct iwch_rnic_attributes { * size (4k)^i. Phys block list mode unsupported. */ u32 mem_pgsizes_bitmask; + u64 max_mr_size; u8 can_resize_wq; /* @@ -111,6 +111,7 @@ struct iwch_dev { struct idr mmidr; spinlock_t lock; struct list_head entry; + struct delayed_work db_drop_task; }; static inline struct iwch_dev *to_iwch_dev(struct ib_device *ibdev) @@ -118,6 +119,11 @@ static inline struct iwch_dev *to_iwch_dev(struct ib_device *ibdev) return container_of(ibdev, struct iwch_dev, ibdev); } +static inline struct iwch_dev *rdev_to_iwch_dev(struct cxio_rdev *rdev) +{ + return container_of(rdev, struct iwch_dev, rdev); +} + static inline int t3b_device(const struct iwch_dev *rhp) { return rhp->rdev.t3cdev_p->type == T3B; @@ -147,19 +153,17 @@ static inline int insert_handle(struct iwch_dev *rhp, struct idr *idr, void *handle, u32 id) { int ret; - u32 newid; - - do { - if (!idr_pre_get(idr, GFP_KERNEL)) { - return -ENOMEM; - } - spin_lock_irq(&rhp->lock); - ret = idr_get_new_above(idr, handle, id, &newid); - BUG_ON(newid != id); - spin_unlock_irq(&rhp->lock); - } while (ret == -EAGAIN); - - return ret; + + idr_preload(GFP_KERNEL); + spin_lock_irq(&rhp->lock); + + ret = idr_alloc(idr, handle, id, id + 1, GFP_NOWAIT); + + spin_unlock_irq(&rhp->lock); + idr_preload_end(); + + BUG_ON(ret == -ENOSPC); + return ret < 0 ? ret : 0; } static inline void remove_handle(struct iwch_dev *rhp, struct idr *idr, u32 id) diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index b2faff5abce..cb78b1e9bcd 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -31,10 +31,12 @@ */ #include <linux/module.h> #include <linux/list.h> +#include <linux/slab.h> #include <linux/workqueue.h> #include <linux/skbuff.h> #include <linux/timer.h> #include <linux/notifier.h> +#include <linux/inetdevice.h> #include <net/neighbour.h> #include <net/netevent.h> @@ -62,46 +64,47 @@ static char *states[] = { NULL, }; -static int ep_timeout_secs = 10; -module_param(ep_timeout_secs, int, 0444); +int peer2peer = 0; +module_param(peer2peer, int, 0644); +MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)"); + +static int ep_timeout_secs = 60; +module_param(ep_timeout_secs, int, 0644); MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout " - "in seconds (default=10)"); + "in seconds (default=60)"); static int mpa_rev = 1; -module_param(mpa_rev, int, 0444); +module_param(mpa_rev, int, 0644); MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, " "1 is spec compliant. (default=1)"); static int markers_enabled = 0; -module_param(markers_enabled, int, 0444); +module_param(markers_enabled, int, 0644); MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)"); static int crc_enabled = 1; -module_param(crc_enabled, int, 0444); +module_param(crc_enabled, int, 0644); MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)"); static int rcv_win = 256 * 1024; -module_param(rcv_win, int, 0444); +module_param(rcv_win, int, 0644); MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256)"); static int snd_win = 32 * 1024; -module_param(snd_win, int, 0444); +module_param(snd_win, int, 0644); MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=32KB)"); static unsigned int nocong = 0; -module_param(nocong, uint, 0444); +module_param(nocong, uint, 0644); MODULE_PARM_DESC(nocong, "Turn off congestion control (default=0)"); static unsigned int cong_flavor = 1; -module_param(cong_flavor, uint, 0444); +module_param(cong_flavor, uint, 0644); MODULE_PARM_DESC(cong_flavor, "TCP Congestion control flavor (default=1)"); -static void process_work(struct work_struct *work); static struct workqueue_struct *workq; -static DECLARE_WORK(skb_work, process_work); static struct sk_buff_head rxq; -static cxgb3_cpl_handler_func work_handlers[NUM_CPL_CMDS]; static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp); static void ep_timeout(unsigned long arg); @@ -109,9 +112,9 @@ static void connect_reply_upcall(struct iwch_ep *ep, int status); static void start_ep_timer(struct iwch_ep *ep) { - PDBG("%s ep %p\n", __FUNCTION__, ep); + PDBG("%s ep %p\n", __func__, ep); if (timer_pending(&ep->timer)) { - PDBG("%s stopped / restarted timer ep %p\n", __FUNCTION__, ep); + PDBG("%s stopped / restarted timer ep %p\n", __func__, ep); del_timer_sync(&ep->timer); } else get_ep(&ep->com); @@ -123,11 +126,48 @@ static void start_ep_timer(struct iwch_ep *ep) static void stop_ep_timer(struct iwch_ep *ep) { - PDBG("%s ep %p\n", __FUNCTION__, ep); + PDBG("%s ep %p\n", __func__, ep); + if (!timer_pending(&ep->timer)) { + WARN(1, "%s timer stopped when its not running! ep %p state %u\n", + __func__, ep, ep->com.state); + return; + } del_timer_sync(&ep->timer); put_ep(&ep->com); } +static int iwch_l2t_send(struct t3cdev *tdev, struct sk_buff *skb, struct l2t_entry *l2e) +{ + int error = 0; + struct cxio_rdev *rdev; + + rdev = (struct cxio_rdev *)tdev->ulp; + if (cxio_fatal_error(rdev)) { + kfree_skb(skb); + return -EIO; + } + error = l2t_send(tdev, skb, l2e); + if (error < 0) + kfree_skb(skb); + return error; +} + +int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb) +{ + int error = 0; + struct cxio_rdev *rdev; + + rdev = (struct cxio_rdev *)tdev->ulp; + if (cxio_fatal_error(rdev)) { + kfree_skb(skb); + return -EIO; + } + error = cxgb3_ofld_send(tdev, skb); + if (error < 0) + kfree_skb(skb); + return error; +} + static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb) { struct cpl_tid_release *req; @@ -139,7 +179,7 @@ static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb) req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid)); skb->priority = CPL_PRIORITY_SETUP; - tdev->send(tdev, skb); + iwch_cxgb3_ofld_send(tdev, skb); return; } @@ -161,8 +201,7 @@ int iwch_quiesce_tid(struct iwch_ep *ep) req->val = cpu_to_be64(1 << S_TCB_RX_QUIESCE); skb->priority = CPL_PRIORITY_DATA; - ep->com.tdev->send(ep->com.tdev, skb); - return 0; + return iwch_cxgb3_ofld_send(ep->com.tdev, skb); } int iwch_resume_tid(struct iwch_ep *ep) @@ -183,13 +222,12 @@ int iwch_resume_tid(struct iwch_ep *ep) req->val = 0; skb->priority = CPL_PRIORITY_DATA; - ep->com.tdev->send(ep->com.tdev, skb); - return 0; + return iwch_cxgb3_ofld_send(ep->com.tdev, skb); } static void set_emss(struct iwch_ep *ep, u16 opt) { - PDBG("%s ep %p opt %u\n", __FUNCTION__, ep, opt); + PDBG("%s ep %p opt %u\n", __func__, ep, opt); ep->emss = T3C_DATA(ep->com.tdev)->mtus[G_TCPOPT_MSS(opt)] - 40; if (G_TCPOPT_TSTAMP(opt)) ep->emss -= 12; @@ -219,7 +257,7 @@ static void state_set(struct iwch_ep_common *epc, enum iwch_ep_state new) unsigned long flags; spin_lock_irqsave(&epc->lock, flags); - PDBG("%s - %s -> %s\n", __FUNCTION__, states[epc->state], states[new]); + PDBG("%s - %s -> %s\n", __func__, states[epc->state], states[new]); __state_set(epc, new); spin_unlock_irqrestore(&epc->lock, flags); return; @@ -229,57 +267,37 @@ static void *alloc_ep(int size, gfp_t gfp) { struct iwch_ep_common *epc; - epc = kmalloc(size, gfp); + epc = kzalloc(size, gfp); if (epc) { - memset(epc, 0, size); kref_init(&epc->kref); spin_lock_init(&epc->lock); init_waitqueue_head(&epc->waitq); } - PDBG("%s alloc ep %p\n", __FUNCTION__, epc); + PDBG("%s alloc ep %p\n", __func__, epc); return epc; } void __free_ep(struct kref *kref) { - struct iwch_ep_common *epc; - epc = container_of(kref, struct iwch_ep_common, kref); - PDBG("%s ep %p state %s\n", __FUNCTION__, epc, states[state_read(epc)]); - kfree(epc); + struct iwch_ep *ep; + ep = container_of(container_of(kref, struct iwch_ep_common, kref), + struct iwch_ep, com); + PDBG("%s ep %p state %s\n", __func__, ep, states[state_read(&ep->com)]); + if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) { + cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid); + dst_release(ep->dst); + l2t_release(ep->com.tdev, ep->l2t); + } + kfree(ep); } static void release_ep_resources(struct iwch_ep *ep) { - PDBG("%s ep %p tid %d\n", __FUNCTION__, ep, ep->hwtid); - cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid); - dst_release(ep->dst); - l2t_release(L2DATA(ep->com.tdev), ep->l2t); - if (ep->com.tdev->type == T3B) - release_tid(ep->com.tdev, ep->hwtid, NULL); + PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid); + set_bit(RELEASE_RESOURCES, &ep->com.flags); put_ep(&ep->com); } -static void process_work(struct work_struct *work) -{ - struct sk_buff *skb = NULL; - void *ep; - struct t3cdev *tdev; - int ret; - - while ((skb = skb_dequeue(&rxq))) { - ep = *((void **) (skb->cb)); - tdev = *((struct t3cdev **) (skb->cb + sizeof(void *))); - ret = work_handlers[G_OPCODE(ntohl((__force __be32)skb->csum))](tdev, skb, ep); - if (ret & CPL_RET_BUF_DONE) - kfree_skb(skb); - - /* - * ep was referenced in sched(), and is freed here. - */ - put_ep((struct iwch_ep_common *)ep); - } -} - static int status2errno(int status) { switch (status) { @@ -319,23 +337,12 @@ static struct rtable *find_route(struct t3cdev *dev, __be32 local_ip, __be16 peer_port, u8 tos) { struct rtable *rt; - struct flowi fl = { - .oif = 0, - .nl_u = { - .ip4_u = { - .daddr = peer_ip, - .saddr = local_ip, - .tos = tos} - }, - .proto = IPPROTO_TCP, - .uli_u = { - .ports = { - .sport = local_port, - .dport = peer_port} - } - }; - - if (ip_route_output_flow(&rt, &fl, NULL, 0)) + struct flowi4 fl4; + + rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip, + peer_port, local_port, IPPROTO_TCP, + tos, 0); + if (IS_ERR(rt)) return NULL; return rt; } @@ -351,7 +358,7 @@ static unsigned int find_best_mtu(const struct t3c_data *d, unsigned short mtu) static void arp_failure_discard(struct t3cdev *dev, struct sk_buff *skb) { - PDBG("%s t3cdev %p\n", __FUNCTION__, dev); + PDBG("%s t3cdev %p\n", __func__, dev); kfree_skb(skb); } @@ -372,9 +379,9 @@ static void abort_arp_failure(struct t3cdev *dev, struct sk_buff *skb) { struct cpl_abort_req *req = cplhdr(skb); - PDBG("%s t3cdev %p\n", __FUNCTION__, dev); + PDBG("%s t3cdev %p\n", __func__, dev); req->cmd = CPL_ABORT_NO_RST; - cxgb3_ofld_send(dev, skb); + iwch_cxgb3_ofld_send(dev, skb); } static int send_halfclose(struct iwch_ep *ep, gfp_t gfp) @@ -382,10 +389,10 @@ static int send_halfclose(struct iwch_ep *ep, gfp_t gfp) struct cpl_close_con_req *req; struct sk_buff *skb; - PDBG("%s ep %p\n", __FUNCTION__, ep); + PDBG("%s ep %p\n", __func__, ep); skb = get_skb(NULL, sizeof(*req), gfp); if (!skb) { - printk(KERN_ERR MOD "%s - failed to alloc skb\n", __FUNCTION__); + printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__); return -ENOMEM; } skb->priority = CPL_PRIORITY_DATA; @@ -394,30 +401,29 @@ static int send_halfclose(struct iwch_ep *ep, gfp_t gfp) req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON)); req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, ep->hwtid)); - l2t_send(ep->com.tdev, skb, ep->l2t); - return 0; + return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); } static int send_abort(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp) { struct cpl_abort_req *req; - PDBG("%s ep %p\n", __FUNCTION__, ep); + PDBG("%s ep %p\n", __func__, ep); skb = get_skb(skb, sizeof(*req), gfp); if (!skb) { printk(KERN_ERR MOD "%s - failed to alloc skb.\n", - __FUNCTION__); + __func__); return -ENOMEM; } skb->priority = CPL_PRIORITY_DATA; set_arp_failure_handler(skb, abort_arp_failure); req = (struct cpl_abort_req *) skb_put(skb, sizeof(*req)); + memset(req, 0, sizeof(*req)); req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ)); req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid)); req->cmd = CPL_ABORT_SEND_RST; - l2t_send(ep->com.tdev, skb, ep->l2t); - return 0; + return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); } static int send_connect(struct iwch_ep *ep) @@ -428,12 +434,12 @@ static int send_connect(struct iwch_ep *ep) unsigned int mtu_idx; int wscale; - PDBG("%s ep %p\n", __FUNCTION__, ep); + PDBG("%s ep %p\n", __func__, ep); skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); if (!skb) { printk(KERN_ERR MOD "%s - failed to alloc skb.\n", - __FUNCTION__); + __func__); return -ENOMEM; } mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst)); @@ -446,7 +452,8 @@ static int send_connect(struct iwch_ep *ep) V_MSS_IDX(mtu_idx) | V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx); opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10); - opt2 = V_FLAVORS_VALID(1) | V_CONG_CONTROL_FLAVOR(cong_flavor); + opt2 = F_RX_COALESCE_VALID | V_RX_COALESCE(0) | V_FLAVORS_VALID(1) | + V_CONG_CONTROL_FLAVOR(cong_flavor); skb->priority = CPL_PRIORITY_SETUP; set_arp_failure_handler(skb, act_open_req_arp_failure); @@ -461,8 +468,7 @@ static int send_connect(struct iwch_ep *ep) req->opt0l = htonl(opt0l); req->params = 0; req->opt2 = htonl(opt2); - l2t_send(ep->com.tdev, skb, ep->l2t); - return 0; + return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); } static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb) @@ -472,7 +478,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb) struct mpa_message *mpa; int len; - PDBG("%s ep %p pd_len %d\n", __FUNCTION__, ep, ep->plen); + PDBG("%s ep %p pd_len %d\n", __func__, ep, ep->plen); BUG_ON(skb_cloned(skb)); @@ -510,16 +516,16 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb) skb_reset_transport_header(skb); len = skb->len; req = (struct tx_data_wr *) skb_push(skb, sizeof(*req)); - req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); + req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL); req->wr_lo = htonl(V_WR_TID(ep->hwtid)); req->len = htonl(len); req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | V_TX_SNDBUF(snd_win>>15)); - req->flags = htonl(F_TX_IMM_ACK|F_TX_INIT); + req->flags = htonl(F_TX_INIT); req->sndseq = htonl(ep->snd_seq); BUG_ON(ep->mpa_skb); ep->mpa_skb = skb; - l2t_send(ep->com.tdev, skb, ep->l2t); + iwch_l2t_send(ep->com.tdev, skb, ep->l2t); start_ep_timer(ep); state_set(&ep->com, MPA_REQ_SENT); return; @@ -532,13 +538,13 @@ static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen) struct mpa_message *mpa; struct sk_buff *skb; - PDBG("%s ep %p plen %d\n", __FUNCTION__, ep, plen); + PDBG("%s ep %p plen %d\n", __func__, ep, plen); mpalen = sizeof(*mpa) + plen; skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL); if (!skb) { - printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __FUNCTION__); + printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__); return -ENOMEM; } skb_reserve(skb, sizeof(*req)); @@ -561,17 +567,16 @@ static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen) set_arp_failure_handler(skb, arp_failure_discard); skb_reset_transport_header(skb); req = (struct tx_data_wr *) skb_push(skb, sizeof(*req)); - req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); + req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL); req->wr_lo = htonl(V_WR_TID(ep->hwtid)); req->len = htonl(mpalen); req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | V_TX_SNDBUF(snd_win>>15)); - req->flags = htonl(F_TX_IMM_ACK|F_TX_INIT); + req->flags = htonl(F_TX_INIT); req->sndseq = htonl(ep->snd_seq); BUG_ON(ep->mpa_skb); ep->mpa_skb = skb; - l2t_send(ep->com.tdev, skb, ep->l2t); - return 0; + return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); } static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen) @@ -582,13 +587,13 @@ static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen) int len; struct sk_buff *skb; - PDBG("%s ep %p plen %d\n", __FUNCTION__, ep, plen); + PDBG("%s ep %p plen %d\n", __func__, ep, plen); mpalen = sizeof(*mpa) + plen; skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL); if (!skb) { - printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __FUNCTION__); + printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__); return -ENOMEM; } skb->priority = CPL_PRIORITY_DATA; @@ -613,17 +618,16 @@ static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen) skb_reset_transport_header(skb); len = skb->len; req = (struct tx_data_wr *) skb_push(skb, sizeof(*req)); - req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); + req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL); req->wr_lo = htonl(V_WR_TID(ep->hwtid)); req->len = htonl(len); req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) | V_TX_SNDBUF(snd_win>>15)); - req->flags = htonl(F_TX_MORE | F_TX_IMM_ACK | F_TX_INIT); + req->flags = htonl(F_TX_INIT); req->sndseq = htonl(ep->snd_seq); ep->mpa_skb = skb; state_set(&ep->com, MPA_REP_SENT); - l2t_send(ep->com.tdev, skb, ep->l2t); - return 0; + return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); } static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) @@ -632,7 +636,7 @@ static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) struct cpl_act_establish *req = cplhdr(skb); unsigned int tid = GET_TID(req); - PDBG("%s ep %p tid %d\n", __FUNCTION__, ep, tid); + PDBG("%s ep %p tid %d\n", __func__, ep, tid); dst_confirm(ep->dst); @@ -641,6 +645,7 @@ static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) cxgb3_insert_tid(ep->com.tdev, &t3c_client, ep, tid); ep->snd_seq = ntohl(req->snd_isn); + ep->rcv_seq = ntohl(req->rcv_isn); set_emss(ep, ntohs(req->tcp_opt)); @@ -664,7 +669,7 @@ static void close_complete_upcall(struct iwch_ep *ep) { struct iw_cm_event event; - PDBG("%s ep %p\n", __FUNCTION__, ep); + PDBG("%s ep %p\n", __func__, ep); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_CLOSE; if (ep->com.cm_id) { @@ -681,7 +686,7 @@ static void peer_close_upcall(struct iwch_ep *ep) { struct iw_cm_event event; - PDBG("%s ep %p\n", __FUNCTION__, ep); + PDBG("%s ep %p\n", __func__, ep); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_DISCONNECT; if (ep->com.cm_id) { @@ -695,7 +700,7 @@ static void peer_abort_upcall(struct iwch_ep *ep) { struct iw_cm_event event; - PDBG("%s ep %p\n", __FUNCTION__, ep); + PDBG("%s ep %p\n", __func__, ep); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_CLOSE; event.status = -ECONNRESET; @@ -713,19 +718,21 @@ static void connect_reply_upcall(struct iwch_ep *ep, int status) { struct iw_cm_event event; - PDBG("%s ep %p status %d\n", __FUNCTION__, ep, status); + PDBG("%s ep %p status %d\n", __func__, ep, status); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_CONNECT_REPLY; event.status = status; - event.local_addr = ep->com.local_addr; - event.remote_addr = ep->com.remote_addr; + memcpy(&event.local_addr, &ep->com.local_addr, + sizeof(ep->com.local_addr)); + memcpy(&event.remote_addr, &ep->com.remote_addr, + sizeof(ep->com.remote_addr)); if ((status == 0) || (status == -ECONNREFUSED)) { event.private_data_len = ep->plen; event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); } if (ep->com.cm_id) { - PDBG("%s ep %p tid %d status %d\n", __FUNCTION__, ep, + PDBG("%s ep %p tid %d status %d\n", __func__, ep, ep->hwtid, status); ep->com.cm_id->event_handler(ep->com.cm_id, &event); } @@ -740,18 +747,27 @@ static void connect_request_upcall(struct iwch_ep *ep) { struct iw_cm_event event; - PDBG("%s ep %p tid %d\n", __FUNCTION__, ep, ep->hwtid); + PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_CONNECT_REQUEST; - event.local_addr = ep->com.local_addr; - event.remote_addr = ep->com.remote_addr; + memcpy(&event.local_addr, &ep->com.local_addr, + sizeof(ep->com.local_addr)); + memcpy(&event.remote_addr, &ep->com.remote_addr, + sizeof(ep->com.local_addr)); event.private_data_len = ep->plen; event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); event.provider_data = ep; - if (state_read(&ep->parent_ep->com) != DEAD) + /* + * Until ird/ord negotiation via MPAv2 support is added, send max + * supported values + */ + event.ird = event.ord = 8; + if (state_read(&ep->parent_ep->com) != DEAD) { + get_ep(&ep->com); ep->parent_ep->com.cm_id->event_handler( ep->parent_ep->com.cm_id, &event); + } put_ep(&ep->parent_ep->com); ep->parent_ep = NULL; } @@ -760,11 +776,16 @@ static void established_upcall(struct iwch_ep *ep) { struct iw_cm_event event; - PDBG("%s ep %p\n", __FUNCTION__, ep); + PDBG("%s ep %p\n", __func__, ep); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_ESTABLISHED; + /* + * Until ird/ord negotiation via MPAv2 support is added, send max + * supported values + */ + event.ird = event.ord = 8; if (ep->com.cm_id) { - PDBG("%s ep %p tid %d\n", __FUNCTION__, ep, ep->hwtid); + PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid); ep->com.cm_id->event_handler(ep->com.cm_id, &event); } } @@ -774,7 +795,7 @@ static int update_rx_credits(struct iwch_ep *ep, u32 credits) struct cpl_rx_data_ack *req; struct sk_buff *skb; - PDBG("%s ep %p credits %u\n", __FUNCTION__, ep, credits); + PDBG("%s ep %p credits %u\n", __func__, ep, credits); skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); if (!skb) { printk(KERN_ERR MOD "update_rx_credits - cannot alloc skb!\n"); @@ -786,7 +807,7 @@ static int update_rx_credits(struct iwch_ep *ep, u32 credits) OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, ep->hwtid)); req->credit_dack = htonl(V_RX_CREDITS(credits) | V_RX_FORCE_ACK(1)); skb->priority = CPL_PRIORITY_ACK; - ep->com.tdev->send(ep->com.tdev, skb); + iwch_cxgb3_ofld_send(ep->com.tdev, skb); return credits; } @@ -798,7 +819,7 @@ static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb) enum iwch_qp_attr_mask mask; int err; - PDBG("%s ep %p\n", __FUNCTION__, ep); + PDBG("%s ep %p\n", __func__, ep); /* * Stop mpa timer. If it expired, then the state has @@ -880,12 +901,13 @@ static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb) * the MPA header is valid. */ state_set(&ep->com, FPDU_MODE); + ep->mpa_attr.initiator = 1; ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; ep->mpa_attr.recv_marker_enabled = markers_enabled; ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; ep->mpa_attr.version = mpa_rev; PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, " - "xmit_marker_enabled=%d, version=%d\n", __FUNCTION__, + "xmit_marker_enabled=%d, version=%d\n", __func__, ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); @@ -902,8 +924,14 @@ static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb) /* bind QP and TID with INIT_WR */ err = iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1); - if (!err) - goto out; + if (err) + goto err; + + if (peer2peer && iwch_rqes_posted(ep->com.qp) == 0) { + iwch_post_zb_read(ep); + } + + goto out; err: abort_connection(ep, skb, GFP_KERNEL); out: @@ -916,7 +944,7 @@ static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb) struct mpa_message *mpa; u16 plen; - PDBG("%s ep %p\n", __FUNCTION__, ep); + PDBG("%s ep %p\n", __func__, ep); /* * Stop mpa timer. If it expired, then the state has @@ -936,7 +964,7 @@ static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb) return; } - PDBG("%s enter (%s line %u)\n", __FUNCTION__, __FILE__, __LINE__); + PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__); /* * Copy the new data into our accumulation buffer. @@ -951,7 +979,7 @@ static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb) */ if (ep->mpa_pkt_len < sizeof(*mpa)) return; - PDBG("%s enter (%s line %u)\n", __FUNCTION__, __FILE__, __LINE__); + PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__); mpa = (struct mpa_message *) ep->mpa_pkt; /* @@ -996,12 +1024,13 @@ static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb) * If we get here we have accumulated the entire mpa * start reply message including private data. */ + ep->mpa_attr.initiator = 0; ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; ep->mpa_attr.recv_marker_enabled = markers_enabled; ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; ep->mpa_attr.version = mpa_rev; PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, " - "xmit_marker_enabled=%d, version=%d\n", __FUNCTION__, + "xmit_marker_enabled=%d, version=%d\n", __func__, ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); @@ -1018,11 +1047,14 @@ static int rx_data(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) struct cpl_rx_data *hdr = cplhdr(skb); unsigned int dlen = ntohs(hdr->len); - PDBG("%s ep %p dlen %u\n", __FUNCTION__, ep, dlen); + PDBG("%s ep %p dlen %u\n", __func__, ep, dlen); skb_pull(skb, sizeof(*hdr)); skb_trim(skb, dlen); + ep->rcv_seq += dlen; + BUG_ON(ep->rcv_seq != (ntohl(hdr->seq) + dlen)); + switch (state_read(&ep->com)) { case MPA_REQ_SENT: process_mpa_reply(ep, skb); @@ -1035,7 +1067,7 @@ static int rx_data(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) default: printk(KERN_ERR MOD "%s Unexpected streaming data." " ep %p state %d tid %d\n", - __FUNCTION__, ep, state_read(&ep->com), ep->hwtid); + __func__, ep, state_read(&ep->com), ep->hwtid); /* * The ep will timeout and inform the ULP of the failure. @@ -1060,84 +1092,107 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) struct iwch_ep *ep = ctx; struct cpl_wr_ack *hdr = cplhdr(skb); unsigned int credits = ntohs(hdr->credits); - enum iwch_qp_attr_mask mask; + unsigned long flags; + int post_zb = 0; - PDBG("%s ep %p credits %u\n", __FUNCTION__, ep, credits); + PDBG("%s ep %p credits %u\n", __func__, ep, credits); - if (credits == 0) + if (credits == 0) { + PDBG("%s 0 credit ack ep %p state %u\n", + __func__, ep, state_read(&ep->com)); return CPL_RET_BUF_DONE; + } + + spin_lock_irqsave(&ep->com.lock, flags); BUG_ON(credits != 1); - BUG_ON(ep->mpa_skb == NULL); - kfree_skb(ep->mpa_skb); - ep->mpa_skb = NULL; dst_confirm(ep->dst); - if (state_read(&ep->com) == MPA_REP_SENT) { - struct iwch_qp_attributes attrs; - - /* bind QP to EP and move to RTS */ - attrs.mpa_attr = ep->mpa_attr; - attrs.max_ird = ep->ord; - attrs.max_ord = ep->ord; - attrs.llp_stream_handle = ep; - attrs.next_state = IWCH_QP_STATE_RTS; - - /* bind QP and TID with INIT_WR */ - mask = IWCH_QP_ATTR_NEXT_STATE | - IWCH_QP_ATTR_LLP_STREAM_HANDLE | - IWCH_QP_ATTR_MPA_ATTR | - IWCH_QP_ATTR_MAX_IRD | - IWCH_QP_ATTR_MAX_ORD; - - ep->com.rpl_err = iwch_modify_qp(ep->com.qp->rhp, - ep->com.qp, mask, &attrs, 1); - - if (!ep->com.rpl_err) { - state_set(&ep->com, FPDU_MODE); - established_upcall(ep); + if (!ep->mpa_skb) { + PDBG("%s rdma_init wr_ack ep %p state %u\n", + __func__, ep, ep->com.state); + if (ep->mpa_attr.initiator) { + PDBG("%s initiator ep %p state %u\n", + __func__, ep, ep->com.state); + if (peer2peer && ep->com.state == FPDU_MODE) + post_zb = 1; + } else { + PDBG("%s responder ep %p state %u\n", + __func__, ep, ep->com.state); + if (ep->com.state == MPA_REQ_RCVD) { + ep->com.rpl_done = 1; + wake_up(&ep->com.waitq); + } } - - ep->com.rpl_done = 1; - PDBG("waking up ep %p\n", ep); - wake_up(&ep->com.waitq); + } else { + PDBG("%s lsm ack ep %p state %u freeing skb\n", + __func__, ep, ep->com.state); + kfree_skb(ep->mpa_skb); + ep->mpa_skb = NULL; } + spin_unlock_irqrestore(&ep->com.lock, flags); + if (post_zb) + iwch_post_zb_read(ep); return CPL_RET_BUF_DONE; } static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) { struct iwch_ep *ep = ctx; + unsigned long flags; + int release = 0; - PDBG("%s ep %p\n", __FUNCTION__, ep); + PDBG("%s ep %p\n", __func__, ep); + BUG_ON(!ep); /* * We get 2 abort replies from the HW. The first one must * be ignored except for scribbling that we need one more. */ - if (!(ep->flags & ABORT_REQ_IN_PROGRESS)) { - ep->flags |= ABORT_REQ_IN_PROGRESS; + if (!test_and_set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags)) { return CPL_RET_BUF_DONE; } - close_complete_upcall(ep); - state_set(&ep->com, DEAD); - release_ep_resources(ep); + spin_lock_irqsave(&ep->com.lock, flags); + switch (ep->com.state) { + case ABORTING: + close_complete_upcall(ep); + __state_set(&ep->com, DEAD); + release = 1; + break; + default: + printk(KERN_ERR "%s ep %p state %d\n", + __func__, ep, ep->com.state); + break; + } + spin_unlock_irqrestore(&ep->com.lock, flags); + + if (release) + release_ep_resources(ep); return CPL_RET_BUF_DONE; } +/* + * Return whether a failed active open has allocated a TID + */ +static inline int act_open_has_tid(int status) +{ + return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST && + status != CPL_ERR_ARP_MISS; +} + static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) { struct iwch_ep *ep = ctx; struct cpl_act_open_rpl *rpl = cplhdr(skb); - PDBG("%s ep %p status %u errno %d\n", __FUNCTION__, ep, rpl->status, + PDBG("%s ep %p status %u errno %d\n", __func__, ep, rpl->status, status2errno(rpl->status)); connect_reply_upcall(ep, status2errno(rpl->status)); state_set(&ep->com, DEAD); - if (ep->com.tdev->type == T3B) + if (ep->com.tdev->type != T3A && act_open_has_tid(rpl->status)) release_tid(ep->com.tdev, GET_TID(rpl), NULL); cxgb3_free_atid(ep->com.tdev, ep->atid); dst_release(ep->dst); - l2t_release(L2DATA(ep->com.tdev), ep->l2t); + l2t_release(ep->com.tdev, ep->l2t); put_ep(&ep->com); return CPL_RET_BUF_DONE; } @@ -1147,7 +1202,7 @@ static int listen_start(struct iwch_listen_ep *ep) struct sk_buff *skb; struct cpl_pass_open_req *req; - PDBG("%s ep %p\n", __FUNCTION__, ep); + PDBG("%s ep %p\n", __func__, ep); skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); if (!skb) { printk(KERN_ERR MOD "t3c_listen_start failed to alloc skb!\n"); @@ -1167,8 +1222,7 @@ static int listen_start(struct iwch_listen_ep *ep) req->opt1 = htonl(V_CONN_POLICY(CPL_CONN_POLICY_ASK)); skb->priority = 1; - ep->com.tdev->send(ep->com.tdev, skb); - return 0; + return iwch_cxgb3_ofld_send(ep->com.tdev, skb); } static int pass_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) @@ -1176,7 +1230,7 @@ static int pass_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) struct iwch_listen_ep *ep = ctx; struct cpl_pass_open_rpl *rpl = cplhdr(skb); - PDBG("%s ep %p status %d error %d\n", __FUNCTION__, ep, + PDBG("%s ep %p status %d error %d\n", __func__, ep, rpl->status, status2errno(rpl->status)); ep->com.rpl_err = status2errno(rpl->status); ep->com.rpl_done = 1; @@ -1190,10 +1244,10 @@ static int listen_stop(struct iwch_listen_ep *ep) struct sk_buff *skb; struct cpl_close_listserv_req *req; - PDBG("%s ep %p\n", __FUNCTION__, ep); + PDBG("%s ep %p\n", __func__, ep); skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); if (!skb) { - printk(KERN_ERR MOD "%s - failed to alloc skb\n", __FUNCTION__); + printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__); return -ENOMEM; } req = (struct cpl_close_listserv_req *) skb_put(skb, sizeof(*req)); @@ -1201,8 +1255,7 @@ static int listen_stop(struct iwch_listen_ep *ep) req->cpu_idx = 0; OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, ep->stid)); skb->priority = 1; - ep->com.tdev->send(ep->com.tdev, skb); - return 0; + return iwch_cxgb3_ofld_send(ep->com.tdev, skb); } static int close_listsrv_rpl(struct t3cdev *tdev, struct sk_buff *skb, @@ -1211,7 +1264,7 @@ static int close_listsrv_rpl(struct t3cdev *tdev, struct sk_buff *skb, struct iwch_listen_ep *ep = ctx; struct cpl_close_listserv_rpl *rpl = cplhdr(skb); - PDBG("%s ep %p\n", __FUNCTION__, ep); + PDBG("%s ep %p\n", __func__, ep); ep->com.rpl_err = status2errno(rpl->status); ep->com.rpl_done = 1; wake_up(&ep->com.waitq); @@ -1225,7 +1278,7 @@ static void accept_cr(struct iwch_ep *ep, __be32 peer_ip, struct sk_buff *skb) u32 opt0h, opt0l, opt2; int wscale; - PDBG("%s ep %p\n", __FUNCTION__, ep); + PDBG("%s ep %p\n", __func__, ep); BUG_ON(skb_cloned(skb)); skb_trim(skb, sizeof(*rpl)); skb_get(skb); @@ -1239,7 +1292,8 @@ static void accept_cr(struct iwch_ep *ep, __be32 peer_ip, struct sk_buff *skb) V_MSS_IDX(mtu_idx) | V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx); opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10); - opt2 = V_FLAVORS_VALID(1) | V_CONG_CONTROL_FLAVOR(cong_flavor); + opt2 = F_RX_COALESCE_VALID | V_RX_COALESCE(0) | V_FLAVORS_VALID(1) | + V_CONG_CONTROL_FLAVOR(cong_flavor); rpl = cplhdr(skb); rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); @@ -1250,7 +1304,7 @@ static void accept_cr(struct iwch_ep *ep, __be32 peer_ip, struct sk_buff *skb) rpl->opt2 = htonl(opt2); rpl->rsvd = rpl->opt2; /* workaround for HW bug */ skb->priority = CPL_PRIORITY_SETUP; - l2t_send(ep->com.tdev, skb, ep->l2t); + iwch_l2t_send(ep->com.tdev, skb, ep->l2t); return; } @@ -1258,13 +1312,13 @@ static void accept_cr(struct iwch_ep *ep, __be32 peer_ip, struct sk_buff *skb) static void reject_cr(struct t3cdev *tdev, u32 hwtid, __be32 peer_ip, struct sk_buff *skb) { - PDBG("%s t3cdev %p tid %u peer_ip %x\n", __FUNCTION__, tdev, hwtid, + PDBG("%s t3cdev %p tid %u peer_ip %x\n", __func__, tdev, hwtid, peer_ip); BUG_ON(skb_cloned(skb)); skb_trim(skb, sizeof(struct cpl_tid_release)); skb_get(skb); - if (tdev->type == T3B) + if (tdev->type != T3A) release_tid(tdev, hwtid, skb); else { struct cpl_pass_accept_rpl *rpl; @@ -1279,7 +1333,7 @@ static void reject_cr(struct t3cdev *tdev, u32 hwtid, __be32 peer_ip, rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT); rpl->opt2 = 0; rpl->rsvd = rpl->opt2; - tdev->send(tdev, skb); + iwch_cxgb3_ofld_send(tdev, skb); } } @@ -1293,11 +1347,11 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) struct rtable *rt; struct iff_mac tim; - PDBG("%s parent ep %p tid %u\n", __FUNCTION__, parent_ep, hwtid); + PDBG("%s parent ep %p tid %u\n", __func__, parent_ep, hwtid); if (state_read(&parent_ep->com) != LISTEN) { printk(KERN_ERR "%s - listening ep not in LISTEN\n", - __FUNCTION__); + __func__); goto reject; } @@ -1307,15 +1361,8 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) tim.mac_addr = req->dst_mac; tim.vlan_tag = ntohs(req->vlan_tag); if (tdev->ctl(tdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) { - printk(KERN_ERR - "%s bad dst mac %02x %02x %02x %02x %02x %02x\n", - __FUNCTION__, - req->dst_mac[0], - req->dst_mac[1], - req->dst_mac[2], - req->dst_mac[3], - req->dst_mac[4], - req->dst_mac[5]); + printk(KERN_ERR "%s bad dst mac %pM\n", + __func__, req->dst_mac); goto reject; } @@ -1327,22 +1374,22 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) req->peer_port, G_PASS_OPEN_TOS(ntohl(req->tos_tid))); if (!rt) { printk(KERN_ERR MOD "%s - failed to find dst entry!\n", - __FUNCTION__); + __func__); goto reject; } - dst = &rt->u.dst; - l2t = t3_l2t_get(tdev, dst->neighbour, dst->neighbour->dev); + dst = &rt->dst; + l2t = t3_l2t_get(tdev, dst, NULL, &req->peer_ip); if (!l2t) { printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n", - __FUNCTION__); + __func__); dst_release(dst); goto reject; } child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL); if (!child_ep) { printk(KERN_ERR MOD "%s - failed to allocate ep entry!\n", - __FUNCTION__); - l2t_release(L2DATA(tdev), l2t); + __func__); + l2t_release(tdev, l2t); dst_release(dst); goto reject; } @@ -1376,8 +1423,9 @@ static int pass_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) struct iwch_ep *ep = ctx; struct cpl_pass_establish *req = cplhdr(skb); - PDBG("%s ep %p\n", __FUNCTION__, ep); + PDBG("%s ep %p\n", __func__, ep); ep->snd_seq = ntohl(req->snd_isn); + ep->rcv_seq = ntohl(req->rcv_isn); set_emss(ep, ntohs(req->tcp_opt)); @@ -1396,7 +1444,7 @@ static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) int disconnect = 1; int release = 0; - PDBG("%s ep %p\n", __FUNCTION__, ep); + PDBG("%s ep %p\n", __func__, ep); dst_confirm(ep->dst); spin_lock_irqsave(&ep->com.lock, flags); @@ -1413,10 +1461,14 @@ static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) /* * We're gonna mark this puppy DEAD, but keep * the reference on it until the ULP accepts or - * rejects the CR. + * rejects the CR. Also wake up anyone waiting + * in rdma connection migration (see iwch_accept_cr()). */ __state_set(&ep->com, CLOSING); - get_ep(&ep->com); + ep->com.rpl_done = 1; + ep->com.rpl_err = -ECONNRESET; + PDBG("waking up ep %p\n", ep); + wake_up(&ep->com.waitq); break; case MPA_REP_SENT: __state_set(&ep->com, CLOSING); @@ -1483,27 +1535,27 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) struct sk_buff *rpl_skb; struct iwch_qp_attributes attrs; int ret; - int state; + int release = 0; + unsigned long flags; + + if (is_neg_adv_abort(req->status)) { + PDBG("%s neg_adv_abort ep %p tid %d\n", __func__, ep, + ep->hwtid); + t3_l2t_send_event(ep->com.tdev, ep->l2t); + return CPL_RET_BUF_DONE; + } /* * We get 2 peer aborts from the HW. The first one must * be ignored except for scribbling that we need one more. */ - if (!(ep->flags & PEER_ABORT_IN_PROGRESS)) { - ep->flags |= PEER_ABORT_IN_PROGRESS; - return CPL_RET_BUF_DONE; - } - - if (is_neg_adv_abort(req->status)) { - PDBG("%s neg_adv_abort ep %p tid %d\n", __FUNCTION__, ep, - ep->hwtid); - t3_l2t_send_event(ep->com.tdev, ep->l2t); + if (!test_and_set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags)) { return CPL_RET_BUF_DONE; } - state = state_read(&ep->com); - PDBG("%s ep %p state %u\n", __FUNCTION__, ep, state); - switch (state) { + spin_lock_irqsave(&ep->com.lock, flags); + PDBG("%s ep %p state %u\n", __func__, ep, ep->com.state); + switch (ep->com.state) { case CONNECTING: break; case MPA_REQ_WAIT: @@ -1524,9 +1576,13 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) /* * We're gonna mark this puppy DEAD, but keep * the reference on it until the ULP accepts or - * rejects the CR. + * rejects the CR. Also wake up anyone waiting + * in rdma connection migration (see iwch_accept_cr()). */ - get_ep(&ep->com); + ep->com.rpl_done = 1; + ep->com.rpl_err = -ECONNRESET; + PDBG("waking up ep %p\n", ep); + wake_up(&ep->com.waitq); break; case MORIBUND: case CLOSING: @@ -1541,29 +1597,33 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) if (ret) printk(KERN_ERR MOD "%s - qp <- error failed!\n", - __FUNCTION__); + __func__); } peer_abort_upcall(ep); break; case ABORTING: break; case DEAD: - PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __FUNCTION__); + PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__); + spin_unlock_irqrestore(&ep->com.lock, flags); return CPL_RET_BUF_DONE; default: BUG_ON(1); break; } dst_confirm(ep->dst); + if (ep->com.state != ABORTING) { + __state_set(&ep->com, DEAD); + release = 1; + } + spin_unlock_irqrestore(&ep->com.lock, flags); rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL); if (!rpl_skb) { printk(KERN_ERR MOD "%s - cannot allocate skb!\n", - __FUNCTION__); - dst_release(ep->dst); - l2t_release(L2DATA(ep->com.tdev), ep->l2t); - put_ep(&ep->com); - return CPL_RET_BUF_DONE; + __func__); + release = 1; + goto out; } rpl_skb->priority = CPL_PRIORITY_DATA; rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl)); @@ -1571,11 +1631,10 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) rpl->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid)); rpl->cmd = CPL_ABORT_NO_RST; - ep->com.tdev->send(ep->com.tdev, rpl_skb); - if (state != ABORTING) { - state_set(&ep->com, DEAD); + iwch_cxgb3_ofld_send(ep->com.tdev, rpl_skb); +out: + if (release) release_ep_resources(ep); - } return CPL_RET_BUF_DONE; } @@ -1586,7 +1645,7 @@ static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) unsigned long flags; int release = 0; - PDBG("%s ep %p\n", __FUNCTION__, ep); + PDBG("%s ep %p\n", __func__, ep); BUG_ON(!ep); /* The cm_id may be null if we failed to connect */ @@ -1609,8 +1668,8 @@ static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) release = 1; break; case ABORTING: - break; case DEAD: + break; default: BUG_ON(1); break; @@ -1625,7 +1684,7 @@ static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) * T3A does 3 things when a TERM is received: * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet * 2) generate an async event on the QP with the TERMINATE opcode - * 3) post a TERMINATE opcde cqe into the associated CQ. + * 3) post a TERMINATE opcode cqe into the associated CQ. * * For (1), we save the message in the qp for later consumer consumption. * For (2), we move the QP into TERMINATE, post a QP event and disconnect. @@ -1637,9 +1696,12 @@ static int terminate(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) { struct iwch_ep *ep = ctx; - PDBG("%s ep %p\n", __FUNCTION__, ep); + if (state_read(&ep->com) != FPDU_MODE) + return CPL_RET_BUF_DONE; + + PDBG("%s ep %p\n", __func__, ep); skb_pull(skb, sizeof(struct cpl_rdma_terminate)); - PDBG("%s saving %d bytes of term msg\n", __FUNCTION__, skb->len); + PDBG("%s saving %d bytes of term msg\n", __func__, skb->len); skb_copy_from_linear_data(skb, ep->com.qp->attr.terminate_buffer, skb->len); ep->com.qp->attr.terminate_msg_len = skb->len; @@ -1652,13 +1714,13 @@ static int ec_status(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) struct cpl_rdma_ec_status *rep = cplhdr(skb); struct iwch_ep *ep = ctx; - PDBG("%s ep %p tid %u status %d\n", __FUNCTION__, ep, ep->hwtid, + PDBG("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid, rep->status); if (rep->status) { struct iwch_qp_attributes attrs; printk(KERN_ERR MOD "%s BAD CLOSE - Aborting tid %u\n", - __FUNCTION__, ep->hwtid); + __func__, ep->hwtid); stop_ep_timer(ep); attrs.next_state = IWCH_QP_STATE_ERROR; iwch_modify_qp(ep->com.qp->rhp, @@ -1674,15 +1736,18 @@ static void ep_timeout(unsigned long arg) struct iwch_ep *ep = (struct iwch_ep *)arg; struct iwch_qp_attributes attrs; unsigned long flags; + int abort = 1; spin_lock_irqsave(&ep->com.lock, flags); - PDBG("%s ep %p tid %u state %d\n", __FUNCTION__, ep, ep->hwtid, + PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid, ep->com.state); switch (ep->com.state) { case MPA_REQ_SENT: + __state_set(&ep->com, ABORTING); connect_reply_upcall(ep, -ETIMEDOUT); break; case MPA_REQ_WAIT: + __state_set(&ep->com, ABORTING); break; case CLOSING: case MORIBUND: @@ -1692,13 +1757,16 @@ static void ep_timeout(unsigned long arg) ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); } + __state_set(&ep->com, ABORTING); break; default: - BUG(); + WARN(1, "%s unexpected state ep %p state %u\n", + __func__, ep, ep->com.state); + abort = 0; } - __state_set(&ep->com, CLOSING); spin_unlock_irqrestore(&ep->com.lock, flags); - abort_connection(ep, NULL, GFP_ATOMIC); + if (abort) + abort_connection(ep, NULL, GFP_ATOMIC); put_ep(&ep->com); } @@ -1706,7 +1774,7 @@ int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) { int err; struct iwch_ep *ep = to_ep(cm_id); - PDBG("%s ep %p tid %u\n", __FUNCTION__, ep, ep->hwtid); + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); if (state_read(&ep->com) == DEAD) { put_ep(&ep->com); @@ -1719,6 +1787,7 @@ int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) err = send_mpa_reject(ep, pdata, pdata_len); err = iwch_ep_disconnect(ep, 0, GFP_KERNEL); } + put_ep(&ep->com); return 0; } @@ -1731,10 +1800,10 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct iwch_dev *h = to_iwch_dev(cm_id->device); struct iwch_qp *qp = get_qhp(h, conn_param->qpn); - PDBG("%s ep %p tid %u\n", __FUNCTION__, ep, ep->hwtid); + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); if (state_read(&ep->com) == DEAD) { - put_ep(&ep->com); - return -ECONNRESET; + err = -ECONNRESET; + goto err; } BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); @@ -1743,33 +1812,25 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) || (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) { abort_connection(ep, NULL, GFP_KERNEL); - return -EINVAL; + err = -EINVAL; + goto err; } cm_id->add_ref(cm_id); ep->com.cm_id = cm_id; ep->com.qp = qp; - ep->com.rpl_done = 0; - ep->com.rpl_err = 0; ep->ird = conn_param->ird; ep->ord = conn_param->ord; - PDBG("%s %d ird %d ord %d\n", __FUNCTION__, __LINE__, ep->ird, ep->ord); - get_ep(&ep->com); - err = send_mpa_reply(ep, conn_param->private_data, - conn_param->private_data_len); - if (err) { - ep->com.cm_id = NULL; - ep->com.qp = NULL; - cm_id->rem_ref(cm_id); - abort_connection(ep, NULL, GFP_KERNEL); - put_ep(&ep->com); - return err; - } + + if (peer2peer && ep->ird == 0) + ep->ird = 1; + + PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord); /* bind QP to EP and move to RTS */ attrs.mpa_attr = ep->mpa_attr; - attrs.max_ird = ep->ord; + attrs.max_ird = ep->ird; attrs.max_ord = ep->ord; attrs.llp_stream_handle = ep; attrs.next_state = IWCH_QP_STATE_RTS; @@ -1783,30 +1844,70 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) err = iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1); + if (err) + goto err1; - if (err) { - ep->com.cm_id = NULL; - ep->com.qp = NULL; - cm_id->rem_ref(cm_id); - abort_connection(ep, NULL, GFP_KERNEL); - } else { - state_set(&ep->com, FPDU_MODE); - established_upcall(ep); + /* if needed, wait for wr_ack */ + if (iwch_rqes_posted(qp)) { + wait_event(ep->com.waitq, ep->com.rpl_done); + err = ep->com.rpl_err; + if (err) + goto err1; } + + err = send_mpa_reply(ep, conn_param->private_data, + conn_param->private_data_len); + if (err) + goto err1; + + + state_set(&ep->com, FPDU_MODE); + established_upcall(ep); + put_ep(&ep->com); + return 0; +err1: + ep->com.cm_id = NULL; + ep->com.qp = NULL; + cm_id->rem_ref(cm_id); +err: put_ep(&ep->com); return err; } +static int is_loopback_dst(struct iw_cm_id *cm_id) +{ + struct net_device *dev; + struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr; + + dev = ip_dev_find(&init_net, raddr->sin_addr.s_addr); + if (!dev) + return 0; + dev_put(dev); + return 1; +} + int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) { - int err = 0; struct iwch_dev *h = to_iwch_dev(cm_id->device); struct iwch_ep *ep; struct rtable *rt; + int err = 0; + struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr; + struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr; + + if (cm_id->remote_addr.ss_family != PF_INET) { + err = -ENOSYS; + goto out; + } + + if (is_loopback_dst(cm_id)) { + err = -ENOSYS; + goto out; + } ep = alloc_ep(sizeof(*ep), GFP_KERNEL); if (!ep) { - printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __FUNCTION__); + printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__); err = -ENOMEM; goto out; } @@ -1817,13 +1918,17 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) conn_param->private_data, ep->plen); ep->ird = conn_param->ird; ep->ord = conn_param->ord; + + if (peer2peer && ep->ord == 0) + ep->ord = 1; + ep->com.tdev = h->rdev.t3cdev_p; cm_id->add_ref(cm_id); ep->com.cm_id = cm_id; ep->com.qp = get_qhp(h, conn_param->qpn); BUG_ON(!ep->com.qp); - PDBG("%s qpn 0x%x qp %p cm_id %p\n", __FUNCTION__, conn_param->qpn, + PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn, ep->com.qp, cm_id); /* @@ -1831,49 +1936,48 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) */ ep->atid = cxgb3_alloc_atid(h->rdev.t3cdev_p, &t3c_client, ep); if (ep->atid == -1) { - printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __FUNCTION__); + printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__); err = -ENOMEM; goto fail2; } /* find a route */ - rt = find_route(h->rdev.t3cdev_p, - cm_id->local_addr.sin_addr.s_addr, - cm_id->remote_addr.sin_addr.s_addr, - cm_id->local_addr.sin_port, - cm_id->remote_addr.sin_port, IPTOS_LOWDELAY); + rt = find_route(h->rdev.t3cdev_p, laddr->sin_addr.s_addr, + raddr->sin_addr.s_addr, laddr->sin_port, + raddr->sin_port, IPTOS_LOWDELAY); if (!rt) { - printk(KERN_ERR MOD "%s - cannot find route.\n", __FUNCTION__); + printk(KERN_ERR MOD "%s - cannot find route.\n", __func__); err = -EHOSTUNREACH; goto fail3; } - ep->dst = &rt->u.dst; - - /* get a l2t entry */ - ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst->neighbour, - ep->dst->neighbour->dev); + ep->dst = &rt->dst; + ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst, NULL, + &raddr->sin_addr.s_addr); if (!ep->l2t) { - printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __FUNCTION__); + printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); err = -ENOMEM; goto fail4; } state_set(&ep->com, CONNECTING); ep->tos = IPTOS_LOWDELAY; - ep->com.local_addr = cm_id->local_addr; - ep->com.remote_addr = cm_id->remote_addr; + memcpy(&ep->com.local_addr, &cm_id->local_addr, + sizeof(ep->com.local_addr)); + memcpy(&ep->com.remote_addr, &cm_id->remote_addr, + sizeof(ep->com.remote_addr)); /* send connect request to rnic */ err = send_connect(ep); if (!err) goto out; - l2t_release(L2DATA(h->rdev.t3cdev_p), ep->l2t); + l2t_release(h->rdev.t3cdev_p, ep->l2t); fail4: dst_release(ep->dst); fail3: cxgb3_free_atid(ep->com.tdev, ep->atid); fail2: + cm_id->rem_ref(cm_id); put_ep(&ep->com); out: return err; @@ -1888,25 +1992,31 @@ int iwch_create_listen(struct iw_cm_id *cm_id, int backlog) might_sleep(); + if (cm_id->local_addr.ss_family != PF_INET) { + err = -ENOSYS; + goto fail1; + } + ep = alloc_ep(sizeof(*ep), GFP_KERNEL); if (!ep) { - printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __FUNCTION__); + printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__); err = -ENOMEM; goto fail1; } - PDBG("%s ep %p\n", __FUNCTION__, ep); + PDBG("%s ep %p\n", __func__, ep); ep->com.tdev = h->rdev.t3cdev_p; cm_id->add_ref(cm_id); ep->com.cm_id = cm_id; ep->backlog = backlog; - ep->com.local_addr = cm_id->local_addr; + memcpy(&ep->com.local_addr, &cm_id->local_addr, + sizeof(ep->com.local_addr)); /* * Allocate a server TID. */ ep->stid = cxgb3_alloc_stid(h->rdev.t3cdev_p, &t3c_client, ep); if (ep->stid == -1) { - printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __FUNCTION__); + printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__); err = -ENOMEM; goto fail2; } @@ -1926,6 +2036,7 @@ int iwch_create_listen(struct iw_cm_id *cm_id, int backlog) fail3: cxgb3_free_stid(ep->com.tdev, ep->stid); fail2: + cm_id->rem_ref(cm_id); put_ep(&ep->com); fail1: out: @@ -1937,15 +2048,18 @@ int iwch_destroy_listen(struct iw_cm_id *cm_id) int err; struct iwch_listen_ep *ep = to_listen_ep(cm_id); - PDBG("%s ep %p\n", __FUNCTION__, ep); + PDBG("%s ep %p\n", __func__, ep); might_sleep(); state_set(&ep->com, DEAD); ep->com.rpl_done = 0; ep->com.rpl_err = 0; err = listen_stop(ep); + if (err) + goto done; wait_event(ep->com.waitq, ep->com.rpl_done); cxgb3_free_stid(ep->com.tdev, ep->stid); +done: err = ep->com.rpl_err; cm_id->rem_ref(cm_id); put_ep(&ep->com); @@ -1957,53 +2071,69 @@ int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp) int ret=0; unsigned long flags; int close = 0; + int fatal = 0; + struct t3cdev *tdev; + struct cxio_rdev *rdev; spin_lock_irqsave(&ep->com.lock, flags); - PDBG("%s ep %p state %s, abrupt %d\n", __FUNCTION__, ep, + PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep, states[ep->com.state], abrupt); - if (ep->com.state == DEAD) { - PDBG("%s already dead ep %p\n", __FUNCTION__, ep); - goto out; - } - - if (abrupt) { - if (ep->com.state != ABORTING) { - ep->com.state = ABORTING; - close = 1; - } - goto out; + tdev = (struct t3cdev *)ep->com.tdev; + rdev = (struct cxio_rdev *)tdev->ulp; + if (cxio_fatal_error(rdev)) { + fatal = 1; + close_complete_upcall(ep); + ep->com.state = DEAD; } - switch (ep->com.state) { case MPA_REQ_WAIT: case MPA_REQ_SENT: case MPA_REQ_RCVD: case MPA_REP_SENT: case FPDU_MODE: - start_ep_timer(ep); - ep->com.state = CLOSING; close = 1; + if (abrupt) + ep->com.state = ABORTING; + else { + ep->com.state = CLOSING; + start_ep_timer(ep); + } + set_bit(CLOSE_SENT, &ep->com.flags); break; case CLOSING: - ep->com.state = MORIBUND; - close = 1; + if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) { + close = 1; + if (abrupt) { + stop_ep_timer(ep); + ep->com.state = ABORTING; + } else + ep->com.state = MORIBUND; + } break; case MORIBUND: + case ABORTING: + case DEAD: + PDBG("%s ignoring disconnect ep %p state %u\n", + __func__, ep, ep->com.state); break; default: BUG(); break; } -out: + spin_unlock_irqrestore(&ep->com.lock, flags); if (close) { if (abrupt) ret = send_abort(ep, NULL, gfp); else ret = send_halfclose(ep, gfp); + if (ret) + fatal = 1; } + if (fatal) + release_ep_resources(ep); return ret; } @@ -2015,10 +2145,10 @@ int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, if (ep->dst != old) return 0; - PDBG("%s ep %p redirect to dst %p l2t %p\n", __FUNCTION__, ep, new, + PDBG("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new, l2t); dst_hold(new); - l2t_release(L2DATA(ep->com.tdev), ep->l2t); + l2t_release(ep->com.tdev, ep->l2t); ep->l2t = l2t; dst_release(old); ep->dst = new; @@ -2027,7 +2157,49 @@ int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, /* * All the CM events are handled on a work queue to have a safe context. + * These are the real handlers that are called from the work queue. */ +static const cxgb3_cpl_handler_func work_handlers[NUM_CPL_CMDS] = { + [CPL_ACT_ESTABLISH] = act_establish, + [CPL_ACT_OPEN_RPL] = act_open_rpl, + [CPL_RX_DATA] = rx_data, + [CPL_TX_DMA_ACK] = tx_ack, + [CPL_ABORT_RPL_RSS] = abort_rpl, + [CPL_ABORT_RPL] = abort_rpl, + [CPL_PASS_OPEN_RPL] = pass_open_rpl, + [CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl, + [CPL_PASS_ACCEPT_REQ] = pass_accept_req, + [CPL_PASS_ESTABLISH] = pass_establish, + [CPL_PEER_CLOSE] = peer_close, + [CPL_ABORT_REQ_RSS] = peer_abort, + [CPL_CLOSE_CON_RPL] = close_con_rpl, + [CPL_RDMA_TERMINATE] = terminate, + [CPL_RDMA_EC_STATUS] = ec_status, +}; + +static void process_work(struct work_struct *work) +{ + struct sk_buff *skb = NULL; + void *ep; + struct t3cdev *tdev; + int ret; + + while ((skb = skb_dequeue(&rxq))) { + ep = *((void **) (skb->cb)); + tdev = *((struct t3cdev **) (skb->cb + sizeof(void *))); + ret = work_handlers[G_OPCODE(ntohl((__force __be32)skb->csum))](tdev, skb, ep); + if (ret & CPL_RET_BUF_DONE) + kfree_skb(skb); + + /* + * ep was referenced in sched(), and is freed here. + */ + put_ep((struct iwch_ep_common *)ep); + } +} + +static DECLARE_WORK(skb_work, process_work); + static int sched(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) { struct iwch_ep_common *epc = ctx; @@ -2059,6 +2231,29 @@ static int set_tcb_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) return CPL_RET_BUF_DONE; } +/* + * All upcalls from the T3 Core go to sched() to schedule the + * processing on a work queue. + */ +cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS] = { + [CPL_ACT_ESTABLISH] = sched, + [CPL_ACT_OPEN_RPL] = sched, + [CPL_RX_DATA] = sched, + [CPL_TX_DMA_ACK] = sched, + [CPL_ABORT_RPL_RSS] = sched, + [CPL_ABORT_RPL] = sched, + [CPL_PASS_OPEN_RPL] = sched, + [CPL_CLOSE_LISTSRV_RPL] = sched, + [CPL_PASS_ACCEPT_REQ] = sched, + [CPL_PASS_ESTABLISH] = sched, + [CPL_PEER_CLOSE] = sched, + [CPL_CLOSE_CON_RPL] = sched, + [CPL_ABORT_REQ_RSS] = sched, + [CPL_RDMA_TERMINATE] = sched, + [CPL_RDMA_EC_STATUS] = sched, + [CPL_SET_TCB_RPL] = set_tcb_rpl, +}; + int __init iwch_cm_init(void) { skb_queue_head_init(&rxq); @@ -2067,46 +2262,6 @@ int __init iwch_cm_init(void) if (!workq) return -ENOMEM; - /* - * All upcalls from the T3 Core go to sched() to - * schedule the processing on a work queue. - */ - t3c_handlers[CPL_ACT_ESTABLISH] = sched; - t3c_handlers[CPL_ACT_OPEN_RPL] = sched; - t3c_handlers[CPL_RX_DATA] = sched; - t3c_handlers[CPL_TX_DMA_ACK] = sched; - t3c_handlers[CPL_ABORT_RPL_RSS] = sched; - t3c_handlers[CPL_ABORT_RPL] = sched; - t3c_handlers[CPL_PASS_OPEN_RPL] = sched; - t3c_handlers[CPL_CLOSE_LISTSRV_RPL] = sched; - t3c_handlers[CPL_PASS_ACCEPT_REQ] = sched; - t3c_handlers[CPL_PASS_ESTABLISH] = sched; - t3c_handlers[CPL_PEER_CLOSE] = sched; - t3c_handlers[CPL_CLOSE_CON_RPL] = sched; - t3c_handlers[CPL_ABORT_REQ_RSS] = sched; - t3c_handlers[CPL_RDMA_TERMINATE] = sched; - t3c_handlers[CPL_RDMA_EC_STATUS] = sched; - t3c_handlers[CPL_SET_TCB_RPL] = set_tcb_rpl; - - /* - * These are the real handlers that are called from a - * work queue. - */ - work_handlers[CPL_ACT_ESTABLISH] = act_establish; - work_handlers[CPL_ACT_OPEN_RPL] = act_open_rpl; - work_handlers[CPL_RX_DATA] = rx_data; - work_handlers[CPL_TX_DMA_ACK] = tx_ack; - work_handlers[CPL_ABORT_RPL_RSS] = abort_rpl; - work_handlers[CPL_ABORT_RPL] = abort_rpl; - work_handlers[CPL_PASS_OPEN_RPL] = pass_open_rpl; - work_handlers[CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl; - work_handlers[CPL_PASS_ACCEPT_REQ] = pass_accept_req; - work_handlers[CPL_PASS_ESTABLISH] = pass_establish; - work_handlers[CPL_PEER_CLOSE] = peer_close; - work_handlers[CPL_ABORT_REQ_RSS] = peer_abort; - work_handlers[CPL_CLOSE_CON_RPL] = close_con_rpl; - work_handlers[CPL_RDMA_TERMINATE] = terminate; - work_handlers[CPL_RDMA_EC_STATUS] = ec_status; return 0; } diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h index 21a388c313c..b9efadfffb4 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.h +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h @@ -54,13 +54,14 @@ #define MPA_FLAGS_MASK 0xE0 #define put_ep(ep) { \ - PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __FUNCTION__, __LINE__, \ + PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \ ep, atomic_read(&((ep)->kref.refcount))); \ + WARN_ON(atomic_read(&((ep)->kref.refcount)) < 1); \ kref_put(&((ep)->kref), __free_ep); \ } #define get_ep(ep) { \ - PDBG("get_ep (via %s:%u) ep %p, refcnt %d\n", __FUNCTION__, __LINE__, \ + PDBG("get_ep (via %s:%u) ep %p, refcnt %d\n", __func__, __LINE__, \ ep, atomic_read(&((ep)->kref.refcount))); \ kref_get(&((ep)->kref)); \ } @@ -144,8 +145,10 @@ enum iwch_ep_state { }; enum iwch_ep_flags { - PEER_ABORT_IN_PROGRESS = (1 << 0), - ABORT_REQ_IN_PROGRESS = (1 << 1), + PEER_ABORT_IN_PROGRESS = 0, + ABORT_REQ_IN_PROGRESS = 1, + RELEASE_RESOURCES = 2, + CLOSE_SENT = 3, }; struct iwch_ep_common { @@ -160,6 +163,7 @@ struct iwch_ep_common { wait_queue_head_t waitq; int rpl_done; int rpl_err; + unsigned long flags; }; struct iwch_listen_ep { @@ -175,6 +179,7 @@ struct iwch_ep { unsigned int atid; u32 hwtid; u32 snd_seq; + u32 rcv_seq; struct l2t_entry *l2t; struct dst_entry *dst; struct sk_buff *mpa_skb; @@ -186,7 +191,6 @@ struct iwch_ep { u16 plen; u32 ird; u32 ord; - u32 flags; }; static inline struct iwch_ep *to_ep(struct iw_cm_id *cm_id) @@ -224,5 +228,6 @@ int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, st int __init iwch_cm_init(void); void __exit iwch_cm_term(void); +extern int peer2peer; #endif /* _IWCH_CM_H_ */ diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c index d7624c170ee..cf5474ae68f 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cq.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c @@ -67,7 +67,7 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, ret = cxio_poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit); if (t3a_device(chp->rhp) && credit) { - PDBG("%s updating %d cq credits on id %d\n", __FUNCTION__, + PDBG("%s updating %d cq credits on id %d\n", __func__, credit, chp->cq.cqid); cxio_hal_cq_op(&rhp->rdev, &chp->cq, CQ_CREDIT_UPDATE, credit); } @@ -81,9 +81,10 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, wc->wr_id = cookie; wc->qp = &qhp->ibqp; wc->vendor_err = CQE_STATUS(cqe); + wc->wc_flags = 0; PDBG("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x " - "lo 0x%x cookie 0x%llx\n", __FUNCTION__, + "lo 0x%x cookie 0x%llx\n", __func__, CQE_QPID(cqe), CQE_TYPE(cqe), CQE_OPCODE(cqe), CQE_STATUS(cqe), CQE_WRID_HI(cqe), CQE_WRID_LOW(cqe), (unsigned long long) cookie); @@ -94,6 +95,11 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, else wc->byte_len = 0; wc->opcode = IB_WC_RECV; + if (CQE_OPCODE(cqe) == T3_SEND_WITH_INV || + CQE_OPCODE(cqe) == T3_SEND_WITH_SE_INV) { + wc->ex.invalidate_rkey = CQE_WRID_STAG(cqe); + wc->wc_flags |= IB_WC_WITH_INVALIDATE; + } } else { switch (CQE_OPCODE(cqe)) { case T3_RDMA_WRITE: @@ -105,17 +111,20 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, break; case T3_SEND: case T3_SEND_WITH_SE: + case T3_SEND_WITH_INV: + case T3_SEND_WITH_SE_INV: wc->opcode = IB_WC_SEND; break; case T3_BIND_MW: wc->opcode = IB_WC_BIND_MW; break; - /* these aren't supported yet */ - case T3_SEND_WITH_INV: - case T3_SEND_WITH_SE_INV: case T3_LOCAL_INV: + wc->opcode = IB_WC_LOCAL_INV; + break; case T3_FAST_REGISTER: + wc->opcode = IB_WC_FAST_REG_MR; + break; default: printk(KERN_ERR MOD "Unexpected opcode %d " "in the CQE received for QPID=0x%0x\n", diff --git a/drivers/infiniband/hw/cxgb3/iwch_ev.c b/drivers/infiniband/hw/cxgb3/iwch_ev.c index b40676662a8..abcc9e76962 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_ev.c +++ b/drivers/infiniband/hw/cxgb3/iwch_ev.c @@ -29,7 +29,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include <linux/slab.h> +#include <linux/gfp.h> #include <linux/mman.h> #include <net/sock.h> #include "iwch_provider.h" @@ -46,13 +46,14 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp, struct ib_event event; struct iwch_qp_attributes attrs; struct iwch_qp *qhp; + unsigned long flag; spin_lock(&rnicp->lock); qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe)); if (!qhp) { printk(KERN_ERR "%s unaffiliated error 0x%x qpid 0x%x\n", - __FUNCTION__, CQE_STATUS(rsp_msg->cqe), + __func__, CQE_STATUS(rsp_msg->cqe), CQE_QPID(rsp_msg->cqe)); spin_unlock(&rnicp->lock); return; @@ -61,14 +62,14 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp, if ((qhp->attr.state == IWCH_QP_STATE_ERROR) || (qhp->attr.state == IWCH_QP_STATE_TERMINATE)) { PDBG("%s AE received after RTS - " - "qp state %d qpid 0x%x status 0x%x\n", __FUNCTION__, + "qp state %d qpid 0x%x status 0x%x\n", __func__, qhp->attr.state, qhp->wq.qpid, CQE_STATUS(rsp_msg->cqe)); spin_unlock(&rnicp->lock); return; } printk(KERN_ERR "%s - AE qpid 0x%x opcode %d status 0x%x " - "type %d wrid.hi 0x%x wrid.lo 0x%x \n", __FUNCTION__, + "type %d wrid.hi 0x%x wrid.lo 0x%x \n", __func__, CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe), CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe)); @@ -76,6 +77,14 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp, atomic_inc(&qhp->refcnt); spin_unlock(&rnicp->lock); + if (qhp->attr.state == IWCH_QP_STATE_RTS) { + attrs.next_state = IWCH_QP_STATE_TERMINATE; + iwch_modify_qp(qhp->rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, + &attrs, 1); + if (send_term) + iwch_post_terminate(qhp, rsp_msg); + } + event.event = ib_event; event.device = chp->ibcq.device; if (ib_event == IB_EVENT_CQ_ERR) @@ -86,13 +95,9 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp, if (qhp->ibqp.event_handler) (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context); - if (qhp->attr.state == IWCH_QP_STATE_RTS) { - attrs.next_state = IWCH_QP_STATE_TERMINATE; - iwch_modify_qp(qhp->rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, - &attrs, 1); - if (send_term) - iwch_post_terminate(qhp, rsp_msg); - } + spin_lock_irqsave(&chp->comp_handler_lock, flag); + (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); + spin_unlock_irqrestore(&chp->comp_handler_lock, flag); if (atomic_dec_and_test(&qhp->refcnt)) wake_up(&qhp->wait); @@ -105,6 +110,7 @@ void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb) struct iwch_cq *chp; struct iwch_qp *qhp; u32 cqid = RSPQ_CQID(rsp_msg); + unsigned long flag; rnicp = (struct iwch_dev *) rdev_p->ulp; spin_lock(&rnicp->lock); @@ -132,10 +138,10 @@ void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb) (CQE_STATUS(rsp_msg->cqe) == 0)) { if (SQ_TYPE(rsp_msg->cqe)) { PDBG("%s QPID 0x%x ep %p disconnecting\n", - __FUNCTION__, qhp->wq.qpid, qhp->ep); + __func__, qhp->wq.qpid, qhp->ep); iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC); } else { - PDBG("%s post REQ_ERR AE QPID 0x%x\n", __FUNCTION__, + PDBG("%s post REQ_ERR AE QPID 0x%x\n", __func__, qhp->wq.qpid); post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 0); @@ -168,7 +174,9 @@ void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb) */ if (qhp->ep && SQ_TYPE(rsp_msg->cqe)) dst_confirm(qhp->ep->dst); + spin_lock_irqsave(&chp->comp_handler_lock, flag); (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); + spin_unlock_irqrestore(&chp->comp_handler_lock, flag); break; case TPT_ERR_STAG: @@ -179,12 +187,6 @@ void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb) case TPT_ERR_BOUND: case TPT_ERR_INVALIDATE_SHARED_MR: case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND: - printk(KERN_ERR "%s - CQE Err qpid 0x%x opcode %d status 0x%x " - "type %d wrid.hi 0x%x wrid.lo 0x%x \n", __FUNCTION__, - CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe), - CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe), - CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe)); - (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_ACCESS_ERR, 1); break; diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c index a6c2c4ba29e..5c36ee2809a 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_mem.c +++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c @@ -29,23 +29,34 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#include <linux/slab.h> #include <asm/byteorder.h> #include <rdma/iw_cm.h> #include <rdma/ib_verbs.h> #include "cxio_hal.h" +#include "cxio_resource.h" #include "iwch.h" #include "iwch_provider.h" -int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, - struct iwch_mr *mhp, - int shift, - __be64 *page_list) +static int iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag) { - u32 stag; u32 mmid; + mhp->attr.state = 1; + mhp->attr.stag = stag; + mmid = stag >> 8; + mhp->ibmr.rkey = mhp->ibmr.lkey = stag; + PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp); + return insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid); +} + +int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, + struct iwch_mr *mhp, int shift) +{ + u32 stag; + int ret; if (cxio_register_phys_mem(&rhp->rdev, &stag, mhp->attr.pdid, @@ -53,28 +64,24 @@ int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, mhp->attr.zbva, mhp->attr.va_fbo, mhp->attr.len, - shift-12, - page_list, - &mhp->attr.pbl_size, &mhp->attr.pbl_addr)) + shift - 12, + mhp->attr.pbl_size, mhp->attr.pbl_addr)) return -ENOMEM; - mhp->attr.state = 1; - mhp->attr.stag = stag; - mmid = stag >> 8; - mhp->ibmr.rkey = mhp->ibmr.lkey = stag; - insert_handle(rhp, &rhp->mmidr, mhp, mmid); - PDBG("%s mmid 0x%x mhp %p\n", __FUNCTION__, mmid, mhp); - return 0; + + ret = iwch_finish_mem_reg(mhp, stag); + if (ret) + cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, + mhp->attr.pbl_addr); + return ret; } int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php, struct iwch_mr *mhp, int shift, - __be64 *page_list, int npages) { u32 stag; - u32 mmid; - + int ret; /* We could support this... */ if (npages > mhp->attr.pbl_size) @@ -87,19 +94,43 @@ int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php, mhp->attr.zbva, mhp->attr.va_fbo, mhp->attr.len, - shift-12, - page_list, - &mhp->attr.pbl_size, &mhp->attr.pbl_addr)) + shift - 12, + mhp->attr.pbl_size, mhp->attr.pbl_addr)) return -ENOMEM; - mhp->attr.state = 1; - mhp->attr.stag = stag; - mmid = stag >> 8; - mhp->ibmr.rkey = mhp->ibmr.lkey = stag; - insert_handle(rhp, &rhp->mmidr, mhp, mmid); - PDBG("%s mmid 0x%x mhp %p\n", __FUNCTION__, mmid, mhp); + + ret = iwch_finish_mem_reg(mhp, stag); + if (ret) + cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, + mhp->attr.pbl_addr); + + return ret; +} + +int iwch_alloc_pbl(struct iwch_mr *mhp, int npages) +{ + mhp->attr.pbl_addr = cxio_hal_pblpool_alloc(&mhp->rhp->rdev, + npages << 3); + + if (!mhp->attr.pbl_addr) + return -ENOMEM; + + mhp->attr.pbl_size = npages; + return 0; } +void iwch_free_pbl(struct iwch_mr *mhp) +{ + cxio_hal_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, + mhp->attr.pbl_size << 3); +} + +int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset) +{ + return cxio_write_pbl(&mhp->rhp->rdev, pages, + mhp->attr.pbl_addr + (offset << 3), npages); +} + int build_phys_page_list(struct ib_phys_buf *buffer_list, int num_phys_buf, u64 *iova_start, @@ -122,6 +153,13 @@ int build_phys_page_list(struct ib_phys_buf *buffer_list, *total_size += buffer_list[i].size; if (i > 0) mask |= buffer_list[i].addr; + else + mask |= buffer_list[i].addr & PAGE_MASK; + if (i != num_phys_buf - 1) + mask |= buffer_list[i].addr + buffer_list[i].size; + else + mask |= (buffer_list[i].addr + buffer_list[i].size + + PAGE_SIZE - 1) & PAGE_MASK; } if (*total_size > 0xFFFFFFFFULL) @@ -129,14 +167,8 @@ int build_phys_page_list(struct ib_phys_buf *buffer_list, /* Find largest page shift we can use to cover buffers */ for (*shift = PAGE_SHIFT; *shift < 27; ++(*shift)) - if (num_phys_buf > 1) { - if ((1ULL << *shift) & mask) - break; - } else - if (1ULL << *shift >= - buffer_list[0].size + - (buffer_list[0].addr & ((1ULL << *shift) - 1))) - break; + if ((1ULL << *shift) & mask) + break; buffer_list[0].size += buffer_list[0].addr & ((1ULL << *shift) - 1); buffer_list[0].addr &= ~0ull << *shift; @@ -162,7 +194,7 @@ int build_phys_page_list(struct ib_phys_buf *buffer_list, ((u64) j << *shift)); PDBG("%s va 0x%llx mask 0x%llx shift %d len %lld pbl_size %d\n", - __FUNCTION__, (unsigned long long) *iova_start, + __func__, (unsigned long long) *iova_start, (unsigned long long) mask, *shift, (unsigned long long) *total_size, *npages); diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index e7c2c394803..811b24a539c 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -37,8 +37,12 @@ #include <linux/delay.h> #include <linux/errno.h> #include <linux/list.h> +#include <linux/sched.h> #include <linux/spinlock.h> #include <linux/ethtool.h> +#include <linux/rtnetlink.h> +#include <linux/inetdevice.h> +#include <linux/slab.h> #include <asm/io.h> #include <asm/irq.h> @@ -55,13 +59,7 @@ #include "iwch_provider.h" #include "iwch_cm.h" #include "iwch_user.h" - -static int iwch_modify_port(struct ib_device *ibdev, - u8 port, int port_modify_mask, - struct ib_port_modify *props) -{ - return -ENOSYS; -} +#include "common.h" static struct ib_ah *iwch_ah_create(struct ib_pd *pd, struct ib_ah_attr *ah_attr) @@ -100,7 +98,7 @@ static int iwch_dealloc_ucontext(struct ib_ucontext *context) struct iwch_ucontext *ucontext = to_iwch_ucontext(context); struct iwch_mm_entry *mm, *tmp; - PDBG("%s context %p\n", __FUNCTION__, context); + PDBG("%s context %p\n", __func__, context); list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry) kfree(mm); cxio_release_ucontext(&rhp->rdev, &ucontext->uctx); @@ -114,7 +112,7 @@ static struct ib_ucontext *iwch_alloc_ucontext(struct ib_device *ibdev, struct iwch_ucontext *context; struct iwch_dev *rhp = to_iwch_dev(ibdev); - PDBG("%s ibdev %p\n", __FUNCTION__, ibdev); + PDBG("%s ibdev %p\n", __func__, ibdev); context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) return ERR_PTR(-ENOMEM); @@ -128,7 +126,7 @@ static int iwch_destroy_cq(struct ib_cq *ib_cq) { struct iwch_cq *chp; - PDBG("%s ib_cq %p\n", __FUNCTION__, ib_cq); + PDBG("%s ib_cq %p\n", __func__, ib_cq); chp = to_iwch_cq(ib_cq); remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid); @@ -149,8 +147,10 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, int ve struct iwch_create_cq_resp uresp; struct iwch_create_cq_req ureq; struct iwch_ucontext *ucontext = NULL; + static int warned; + size_t resplen; - PDBG("%s ib_dev %p entries %d\n", __FUNCTION__, ibdev, entries); + PDBG("%s ib_dev %p entries %d\n", __func__, ibdev, entries); rhp = to_iwch_dev(ibdev); chp = kzalloc(sizeof(*chp), GFP_KERNEL); if (!chp) @@ -183,16 +183,21 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, int ve entries = roundup_pow_of_two(entries); chp->cq.size_log2 = ilog2(entries); - if (cxio_create_cq(&rhp->rdev, &chp->cq)) { + if (cxio_create_cq(&rhp->rdev, &chp->cq, !ucontext)) { kfree(chp); return ERR_PTR(-ENOMEM); } chp->rhp = rhp; - chp->ibcq.cqe = (1 << chp->cq.size_log2) - 1; + chp->ibcq.cqe = 1 << chp->cq.size_log2; spin_lock_init(&chp->lock); + spin_lock_init(&chp->comp_handler_lock); atomic_set(&chp->refcnt, 1); init_waitqueue_head(&chp->wait); - insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid); + if (insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid)) { + cxio_destroy_cq(&chp->rhp->rdev, &chp->cq); + kfree(chp); + return ERR_PTR(-ENOMEM); + } if (ucontext) { struct iwch_mm_entry *mm; @@ -208,15 +213,27 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, int ve uresp.key = ucontext->key; ucontext->key += PAGE_SIZE; spin_unlock(&ucontext->mmap_lock); - if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) { + mm->key = uresp.key; + mm->addr = virt_to_phys(chp->cq.queue); + if (udata->outlen < sizeof uresp) { + if (!warned++) + printk(KERN_WARNING MOD "Warning - " + "downlevel libcxgb3 (non-fatal).\n"); + mm->len = PAGE_ALIGN((1UL << uresp.size_log2) * + sizeof(struct t3_cqe)); + resplen = sizeof(struct iwch_create_cq_resp_v0); + } else { + mm->len = PAGE_ALIGN(((1UL << uresp.size_log2) + 1) * + sizeof(struct t3_cqe)); + uresp.memsize = mm->len; + uresp.reserved = 0; + resplen = sizeof uresp; + } + if (ib_copy_to_udata(udata, &uresp, resplen)) { kfree(mm); iwch_destroy_cq(&chp->ibcq); return ERR_PTR(-EFAULT); } - mm->key = uresp.key; - mm->addr = virt_to_phys(chp->cq.queue); - mm->len = PAGE_ALIGN((1UL << uresp.size_log2) * - sizeof (struct t3_cqe)); insert_mmap(ucontext, mm); } PDBG("created cqid 0x%0x chp %p size 0x%0x, dma_addr 0x%0llx\n", @@ -232,7 +249,7 @@ static int iwch_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) struct t3_cq oldcq, newcq; int ret; - PDBG("%s ib_cq %p cqe %d\n", __FUNCTION__, cq, cqe); + PDBG("%s ib_cq %p cqe %d\n", __func__, cq, cqe); /* We don't downsize... */ if (cqe <= cq->cqe) @@ -280,7 +297,7 @@ static int iwch_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) ret = cxio_destroy_cq(&chp->rhp->rdev, &oldcq); if (ret) { printk(KERN_ERR MOD "%s - cxio_destroy_cq failed %d\n", - __FUNCTION__, ret); + __func__, ret); } /* add user hooks here */ @@ -315,7 +332,7 @@ static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) chp->cq.rptr = rptr; } else spin_lock_irqsave(&chp->lock, flag); - PDBG("%s rptr 0x%x\n", __FUNCTION__, chp->cq.rptr); + PDBG("%s rptr 0x%x\n", __func__, chp->cq.rptr); err = cxio_hal_cq_op(&rhp->rdev, &chp->cq, cq_op, 0); spin_unlock_irqrestore(&chp->lock, flag); if (err < 0) @@ -336,7 +353,7 @@ static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) struct iwch_ucontext *ucontext; u64 addr; - PDBG("%s pgoff 0x%lx key 0x%x len %d\n", __FUNCTION__, vma->vm_pgoff, + PDBG("%s pgoff 0x%lx key 0x%x len %d\n", __func__, vma->vm_pgoff, key, len); if (vma->vm_start & (PAGE_SIZE-1)) { @@ -389,7 +406,7 @@ static int iwch_deallocate_pd(struct ib_pd *pd) php = to_iwch_pd(pd); rhp = php->rhp; - PDBG("%s ibpd %p pdid 0x%x\n", __FUNCTION__, pd, php->pdid); + PDBG("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid); cxio_hal_put_pdid(rhp->rdev.rscp, php->pdid); kfree(php); return 0; @@ -403,7 +420,7 @@ static struct ib_pd *iwch_allocate_pd(struct ib_device *ibdev, u32 pdid; struct iwch_dev *rhp; - PDBG("%s ibdev %p\n", __FUNCTION__, ibdev); + PDBG("%s ibdev %p\n", __func__, ibdev); rhp = (struct iwch_dev *) ibdev; pdid = cxio_hal_get_pdid(rhp->rdev.rscp); if (!pdid) @@ -421,7 +438,7 @@ static struct ib_pd *iwch_allocate_pd(struct ib_device *ibdev, return ERR_PTR(-EFAULT); } } - PDBG("%s pdid 0x%0x ptr 0x%p\n", __FUNCTION__, pdid, php); + PDBG("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php); return &php->ibpd; } @@ -431,7 +448,7 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr) struct iwch_mr *mhp; u32 mmid; - PDBG("%s ib_mr %p\n", __FUNCTION__, ib_mr); + PDBG("%s ib_mr %p\n", __func__, ib_mr); /* There can be no memory windows */ if (atomic_read(&ib_mr->usecnt)) return -EINVAL; @@ -441,12 +458,13 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr) mmid = mhp->attr.stag >> 8; cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, mhp->attr.pbl_addr); + iwch_free_pbl(mhp); remove_handle(rhp, &rhp->mmidr, mmid); if (mhp->kva) kfree((void *) (unsigned long) mhp->kva); if (mhp->umem) ib_umem_release(mhp->umem); - PDBG("%s mmid 0x%x ptr %p\n", __FUNCTION__, mmid, mhp); + PDBG("%s mmid 0x%x ptr %p\n", __func__, mmid, mhp); kfree(mhp); return 0; } @@ -466,7 +484,7 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd, struct iwch_mr *mhp; int ret; - PDBG("%s ib_pd %p\n", __FUNCTION__, pd); + PDBG("%s ib_pd %p\n", __func__, pd); php = to_iwch_pd(pd); rhp = php->rhp; @@ -474,6 +492,8 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd, if (!mhp) return ERR_PTR(-ENOMEM); + mhp->rhp = rhp; + /* First check that we have enough alignment */ if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) { ret = -EINVAL; @@ -491,7 +511,17 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd, if (ret) goto err; - mhp->rhp = rhp; + ret = iwch_alloc_pbl(mhp, npages); + if (ret) { + kfree(page_list); + goto err_pbl; + } + + ret = iwch_write_pbl(mhp, page_list, npages, 0); + kfree(page_list); + if (ret) + goto err_pbl; + mhp->attr.pdid = php->pdid; mhp->attr.zbva = 0; @@ -501,12 +531,15 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd, mhp->attr.len = (u32) total_size; mhp->attr.pbl_size = npages; - ret = iwch_register_mem(rhp, php, mhp, shift, page_list); - kfree(page_list); - if (ret) { - goto err; - } + ret = iwch_register_mem(rhp, php, mhp, shift); + if (ret) + goto err_pbl; + return &mhp->ibmr; + +err_pbl: + iwch_free_pbl(mhp); + err: kfree(mhp); return ERR_PTR(ret); @@ -527,10 +560,10 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr, __be64 *page_list = NULL; int shift = 0; u64 total_size; - int npages; + int npages = 0; int ret; - PDBG("%s ib_mr %p ib_pd %p\n", __FUNCTION__, mr, pd); + PDBG("%s ib_mr %p ib_pd %p\n", __func__, mr, pd); /* There can be no memory windows */ if (atomic_read(&mr->usecnt)) @@ -559,7 +592,7 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr, return ret; } - ret = iwch_reregister_mem(rhp, php, &mh, shift, page_list, npages); + ret = iwch_reregister_mem(rhp, php, &mh, shift, npages); kfree(page_list); if (ret) { return ret; @@ -585,15 +618,14 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, { __be64 *pages; int shift, n, len; - int i, j, k; + int i, k, entry; int err = 0; - struct ib_umem_chunk *chunk; struct iwch_dev *rhp; struct iwch_pd *php; struct iwch_mr *mhp; struct iwch_reg_user_mr_resp uresp; - - PDBG("%s ib_pd %p\n", __FUNCTION__, pd); + struct scatterlist *sg; + PDBG("%s ib_pd %p\n", __func__, pd); php = to_iwch_pd(pd); rhp = php->rhp; @@ -601,7 +633,9 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mhp) return ERR_PTR(-ENOMEM); - mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc); + mhp->rhp = rhp; + + mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0); if (IS_ERR(mhp->umem)) { err = PTR_ERR(mhp->umem); kfree(mhp); @@ -610,45 +644,58 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, shift = ffs(mhp->umem->page_size) - 1; - n = 0; - list_for_each_entry(chunk, &mhp->umem->chunk_list, list) - n += chunk->nents; + n = mhp->umem->nmap; - pages = kmalloc(n * sizeof(u64), GFP_KERNEL); + err = iwch_alloc_pbl(mhp, n); + if (err) + goto err; + + pages = (__be64 *) __get_free_page(GFP_KERNEL); if (!pages) { err = -ENOMEM; - goto err; + goto err_pbl; } i = n = 0; - list_for_each_entry(chunk, &mhp->umem->chunk_list, list) - for (j = 0; j < chunk->nmap; ++j) { - len = sg_dma_len(&chunk->page_list[j]) >> shift; + for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) { + len = sg_dma_len(sg) >> shift; for (k = 0; k < len; ++k) { - pages[i++] = cpu_to_be64(sg_dma_address( - &chunk->page_list[j]) + + pages[i++] = cpu_to_be64(sg_dma_address(sg) + mhp->umem->page_size * k); + if (i == PAGE_SIZE / sizeof *pages) { + err = iwch_write_pbl(mhp, pages, i, n); + if (err) + goto pbl_done; + n += i; + i = 0; + } } - } + } + + if (i) + err = iwch_write_pbl(mhp, pages, i, n); + +pbl_done: + free_page((unsigned long) pages); + if (err) + goto err_pbl; - mhp->rhp = rhp; mhp->attr.pdid = php->pdid; mhp->attr.zbva = 0; mhp->attr.perms = iwch_ib_to_tpt_access(acc); mhp->attr.va_fbo = virt; mhp->attr.page_size = shift - 12; mhp->attr.len = (u32) length; - mhp->attr.pbl_size = i; - err = iwch_register_mem(rhp, php, mhp, shift, pages); - kfree(pages); + + err = iwch_register_mem(rhp, php, mhp, shift); if (err) - goto err; + goto err_pbl; - if (udata && t3b_device(rhp)) { + if (udata && !t3a_device(rhp)) { uresp.pbl_addr = (mhp->attr.pbl_addr - - rhp->rdev.rnic_info.pbl_base) >> 3; - PDBG("%s user resp pbl_addr 0x%x\n", __FUNCTION__, + rhp->rdev.rnic_info.pbl_base) >> 3; + PDBG("%s user resp pbl_addr 0x%x\n", __func__, uresp.pbl_addr); if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) { @@ -660,6 +707,9 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return &mhp->ibmr; +err_pbl: + iwch_free_pbl(mhp); + err: ib_umem_release(mhp->umem); kfree(mhp); @@ -672,7 +722,7 @@ static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc) u64 kva; struct ib_mr *ibmr; - PDBG("%s ib_pd %p\n", __FUNCTION__, pd); + PDBG("%s ib_pd %p\n", __func__, pd); /* * T3 only supports 32 bits of size. @@ -684,7 +734,7 @@ static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc) return ibmr; } -static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd) +static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) { struct iwch_dev *rhp; struct iwch_pd *php; @@ -693,6 +743,9 @@ static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd) u32 stag = 0; int ret; + if (type != IB_MW_TYPE_1) + return ERR_PTR(-EINVAL); + php = to_iwch_pd(pd); rhp = php->rhp; mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); @@ -708,8 +761,13 @@ static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd) mhp->attr.type = TPT_MW; mhp->attr.stag = stag; mmid = (stag) >> 8; - insert_handle(rhp, &rhp->mmidr, mhp, mmid); - PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __FUNCTION__, mmid, mhp, stag); + mhp->ibmw.rkey = stag; + if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) { + cxio_deallocate_window(&rhp->rdev, mhp->attr.stag); + kfree(mhp); + return ERR_PTR(-ENOMEM); + } + PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag); return &(mhp->ibmw); } @@ -724,11 +782,79 @@ static int iwch_dealloc_mw(struct ib_mw *mw) mmid = (mw->rkey) >> 8; cxio_deallocate_window(&rhp->rdev, mhp->attr.stag); remove_handle(rhp, &rhp->mmidr, mmid); + PDBG("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp); kfree(mhp); - PDBG("%s ib_mw %p mmid 0x%x ptr %p\n", __FUNCTION__, mw, mmid, mhp); return 0; } +static struct ib_mr *iwch_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth) +{ + struct iwch_dev *rhp; + struct iwch_pd *php; + struct iwch_mr *mhp; + u32 mmid; + u32 stag = 0; + int ret = 0; + + php = to_iwch_pd(pd); + rhp = php->rhp; + mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); + if (!mhp) + goto err; + + mhp->rhp = rhp; + ret = iwch_alloc_pbl(mhp, pbl_depth); + if (ret) + goto err1; + mhp->attr.pbl_size = pbl_depth; + ret = cxio_allocate_stag(&rhp->rdev, &stag, php->pdid, + mhp->attr.pbl_size, mhp->attr.pbl_addr); + if (ret) + goto err2; + mhp->attr.pdid = php->pdid; + mhp->attr.type = TPT_NON_SHARED_MR; + mhp->attr.stag = stag; + mhp->attr.state = 1; + mmid = (stag) >> 8; + mhp->ibmr.rkey = mhp->ibmr.lkey = stag; + if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) + goto err3; + + PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag); + return &(mhp->ibmr); +err3: + cxio_dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size, + mhp->attr.pbl_addr); +err2: + iwch_free_pbl(mhp); +err1: + kfree(mhp); +err: + return ERR_PTR(ret); +} + +static struct ib_fast_reg_page_list *iwch_alloc_fastreg_pbl( + struct ib_device *device, + int page_list_len) +{ + struct ib_fast_reg_page_list *page_list; + + page_list = kmalloc(sizeof *page_list + page_list_len * sizeof(u64), + GFP_KERNEL); + if (!page_list) + return ERR_PTR(-ENOMEM); + + page_list->page_list = (u64 *)(page_list + 1); + page_list->max_page_list_len = page_list_len; + + return page_list; +} + +static void iwch_free_fastreg_pbl(struct ib_fast_reg_page_list *page_list) +{ + kfree(page_list); +} + static int iwch_destroy_qp(struct ib_qp *ib_qp) { struct iwch_dev *rhp; @@ -753,7 +879,7 @@ static int iwch_destroy_qp(struct ib_qp *ib_qp) cxio_destroy_qp(&rhp->rdev, &qhp->wq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx); - PDBG("%s ib_qp %p qpid 0x%0x qhp %p\n", __FUNCTION__, + PDBG("%s ib_qp %p qpid 0x%0x qhp %p\n", __func__, ib_qp, qhp->wq.qpid, qhp); kfree(qhp); return 0; @@ -772,7 +898,7 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd, int wqsize, sqsize, rqsize; struct iwch_ucontext *ucontext; - PDBG("%s ib_pd %p\n", __FUNCTION__, pd); + PDBG("%s ib_pd %p\n", __func__, pd); if (attrs->qp_type != IB_QPT_RC) return ERR_PTR(-EINVAL); php = to_iwch_pd(pd); @@ -804,7 +930,16 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd, */ sqsize = roundup_pow_of_two(attrs->cap.max_send_wr); wqsize = roundup_pow_of_two(rqsize + sqsize); - PDBG("%s wqsize %d sqsize %d rqsize %d\n", __FUNCTION__, + + /* + * Kernel users need more wq space for fastreg WRs which can take + * 2 WR fragments. + */ + ucontext = pd->uobject ? to_iwch_ucontext(pd->uobject->context) : NULL; + if (!ucontext && wqsize < (rqsize + (2 * sqsize))) + wqsize = roundup_pow_of_two(rqsize + + roundup_pow_of_two(attrs->cap.max_send_wr * 2)); + PDBG("%s wqsize %d sqsize %d rqsize %d\n", __func__, wqsize, sqsize, rqsize); qhp = kzalloc(sizeof(*qhp), GFP_KERNEL); if (!qhp) @@ -812,14 +947,16 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd, qhp->wq.size_log2 = ilog2(wqsize); qhp->wq.rq_size_log2 = ilog2(rqsize); qhp->wq.sq_size_log2 = ilog2(sqsize); - ucontext = pd->uobject ? to_iwch_ucontext(pd->uobject->context) : NULL; if (cxio_create_qp(&rhp->rdev, !udata, &qhp->wq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx)) { kfree(qhp); return ERR_PTR(-ENOMEM); } + attrs->cap.max_recv_wr = rqsize - 1; attrs->cap.max_send_wr = sqsize; + attrs->cap.max_inline_data = T3_MAX_INLINE; + qhp->rhp = rhp; qhp->attr.pd = php->pdid; qhp->attr.scq = ((struct iwch_cq *) attrs->send_cq)->cq.cqid; @@ -846,7 +983,13 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd, spin_lock_init(&qhp->lock); init_waitqueue_head(&qhp->wait); atomic_set(&qhp->refcnt, 1); - insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.qpid); + + if (insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.qpid)) { + cxio_destroy_qp(&rhp->rdev, &qhp->wq, + ucontext ? &ucontext->uctx : &rhp->rdev.uctx); + kfree(qhp); + return ERR_PTR(-ENOMEM); + } if (udata) { @@ -893,10 +1036,10 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd, qhp->ibqp.qp_num = qhp->wq.qpid; init_timer(&(qhp->timer)); PDBG("%s sq_num_entries %d, rq_num_entries %d " - "qpid 0x%0x qhp %p dma_addr 0x%llx size %d\n", - __FUNCTION__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries, + "qpid 0x%0x qhp %p dma_addr 0x%llx size %d rq_addr 0x%x\n", + __func__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries, qhp->wq.qpid, qhp, (unsigned long long) qhp->wq.dma_addr, - 1 << qhp->wq.size_log2); + 1 << qhp->wq.size_log2, qhp->wq.rq_addr); return &qhp->ibqp; } @@ -908,7 +1051,7 @@ static int iwch_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, enum iwch_qp_attr_mask mask = 0; struct iwch_qp_attributes attrs; - PDBG("%s ib_qp %p\n", __FUNCTION__, ibqp); + PDBG("%s ib_qp %p\n", __func__, ibqp); /* iwarp does not support the RTR state */ if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR)) @@ -941,20 +1084,20 @@ static int iwch_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, void iwch_qp_add_ref(struct ib_qp *qp) { - PDBG("%s ib_qp %p\n", __FUNCTION__, qp); + PDBG("%s ib_qp %p\n", __func__, qp); atomic_inc(&(to_iwch_qp(qp)->refcnt)); } void iwch_qp_rem_ref(struct ib_qp *qp) { - PDBG("%s ib_qp %p\n", __FUNCTION__, qp); + PDBG("%s ib_qp %p\n", __func__, qp); if (atomic_dec_and_test(&(to_iwch_qp(qp)->refcnt))) wake_up(&(to_iwch_qp(qp)->wait)); } static struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn) { - PDBG("%s ib_dev %p qpn 0x%x\n", __FUNCTION__, dev, qpn); + PDBG("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn); return (struct ib_qp *)get_qhp(to_iwch_dev(dev), qpn); } @@ -962,7 +1105,7 @@ static struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn) static int iwch_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey) { - PDBG("%s ibdev %p\n", __FUNCTION__, ibdev); + PDBG("%s ibdev %p\n", __func__, ibdev); *pkey = 0; return 0; } @@ -973,7 +1116,7 @@ static int iwch_query_gid(struct ib_device *ibdev, u8 port, struct iwch_dev *dev; PDBG("%s ibdev %p, port %d, index %d, gid %p\n", - __FUNCTION__, ibdev, port, index, gid); + __func__, ibdev, port, index, gid); dev = to_iwch_dev(ibdev); BUG_ON(port == 0 || port > 2); memset(&(gid->raw[0]), 0, sizeof(gid->raw)); @@ -981,30 +1124,56 @@ static int iwch_query_gid(struct ib_device *ibdev, u8 port, return 0; } +static u64 fw_vers_string_to_u64(struct iwch_dev *iwch_dev) +{ + struct ethtool_drvinfo info; + struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; + char *cp, *next; + unsigned fw_maj, fw_min, fw_mic; + + lldev->ethtool_ops->get_drvinfo(lldev, &info); + + next = info.fw_version + 1; + cp = strsep(&next, "."); + sscanf(cp, "%i", &fw_maj); + cp = strsep(&next, "."); + sscanf(cp, "%i", &fw_min); + cp = strsep(&next, "."); + sscanf(cp, "%i", &fw_mic); + + return (((u64)fw_maj & 0xffff) << 32) | ((fw_min & 0xffff) << 16) | + (fw_mic & 0xffff); +} + static int iwch_query_device(struct ib_device *ibdev, struct ib_device_attr *props) { struct iwch_dev *dev; - PDBG("%s ibdev %p\n", __FUNCTION__, ibdev); + PDBG("%s ibdev %p\n", __func__, ibdev); dev = to_iwch_dev(ibdev); memset(props, 0, sizeof *props); memcpy(&props->sys_image_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); + props->hw_ver = dev->rdev.t3cdev_p->type; + props->fw_ver = fw_vers_string_to_u64(dev); props->device_cap_flags = dev->device_cap_flags; + props->page_size_cap = dev->attr.mem_pgsizes_bitmask; props->vendor_id = (u32)dev->rdev.rnic_info.pdev->vendor; props->vendor_part_id = (u32)dev->rdev.rnic_info.pdev->device; - props->max_mr_size = ~0ull; + props->max_mr_size = dev->attr.max_mr_size; props->max_qp = dev->attr.max_qps; props->max_qp_wr = dev->attr.max_wrs; props->max_sge = dev->attr.max_sge_per_wr; props->max_sge_rd = 1; props->max_qp_rd_atom = dev->attr.max_rdma_reads_per_qp; + props->max_qp_init_rd_atom = dev->attr.max_rdma_reads_per_qp; props->max_cq = dev->attr.max_cqs; props->max_cqe = dev->attr.max_cqes_per_cq; props->max_mr = dev->attr.max_mem_regs; props->max_pd = dev->attr.max_pds; props->local_ca_ack_delay = 0; + props->max_fast_reg_page_list_len = T3_MAX_FASTREG_DEPTH; return 0; } @@ -1012,14 +1181,42 @@ static int iwch_query_device(struct ib_device *ibdev, static int iwch_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { - PDBG("%s ibdev %p\n", __FUNCTION__, ibdev); + struct iwch_dev *dev; + struct net_device *netdev; + struct in_device *inetdev; + + PDBG("%s ibdev %p\n", __func__, ibdev); + + dev = to_iwch_dev(ibdev); + netdev = dev->rdev.port_info.lldevs[port-1]; + + memset(props, 0, sizeof(struct ib_port_attr)); props->max_mtu = IB_MTU_4096; - props->lid = 0; - props->lmc = 0; - props->sm_lid = 0; - props->sm_sl = 0; - props->state = IB_PORT_ACTIVE; - props->phys_state = 0; + if (netdev->mtu >= 4096) + props->active_mtu = IB_MTU_4096; + else if (netdev->mtu >= 2048) + props->active_mtu = IB_MTU_2048; + else if (netdev->mtu >= 1024) + props->active_mtu = IB_MTU_1024; + else if (netdev->mtu >= 512) + props->active_mtu = IB_MTU_512; + else + props->active_mtu = IB_MTU_256; + + if (!netif_carrier_ok(netdev)) + props->state = IB_PORT_DOWN; + else { + inetdev = in_dev_get(netdev); + if (inetdev) { + if (inetdev->ifa_list) + props->state = IB_PORT_ACTIVE; + else + props->state = IB_PORT_INIT; + in_dev_put(inetdev); + } else + props->state = IB_PORT_INIT; + } + props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_SNMP_TUNNEL_SUP | @@ -1028,65 +1225,122 @@ static int iwch_query_port(struct ib_device *ibdev, IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; props->gid_tbl_len = 1; props->pkey_tbl_len = 1; - props->qkey_viol_cntr = 0; props->active_width = 2; - props->active_speed = 2; + props->active_speed = IB_SPEED_DDR; props->max_msg_sz = -1; return 0; } -static ssize_t show_rev(struct class_device *cdev, char *buf) +static ssize_t show_rev(struct device *dev, struct device_attribute *attr, + char *buf) { - struct iwch_dev *dev = container_of(cdev, struct iwch_dev, - ibdev.class_dev); - PDBG("%s class dev 0x%p\n", __FUNCTION__, cdev); - return sprintf(buf, "%d\n", dev->rdev.t3cdev_p->type); + struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev, + ibdev.dev); + PDBG("%s dev 0x%p\n", __func__, dev); + return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type); } -static ssize_t show_fw_ver(struct class_device *cdev, char *buf) +static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, char *buf) { - struct iwch_dev *dev = container_of(cdev, struct iwch_dev, - ibdev.class_dev); + struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev, + ibdev.dev); struct ethtool_drvinfo info; - struct net_device *lldev = dev->rdev.t3cdev_p->lldev; + struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; - PDBG("%s class dev 0x%p\n", __FUNCTION__, cdev); + PDBG("%s dev 0x%p\n", __func__, dev); lldev->ethtool_ops->get_drvinfo(lldev, &info); return sprintf(buf, "%s\n", info.fw_version); } -static ssize_t show_hca(struct class_device *cdev, char *buf) +static ssize_t show_hca(struct device *dev, struct device_attribute *attr, + char *buf) { - struct iwch_dev *dev = container_of(cdev, struct iwch_dev, - ibdev.class_dev); + struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev, + ibdev.dev); struct ethtool_drvinfo info; - struct net_device *lldev = dev->rdev.t3cdev_p->lldev; + struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; - PDBG("%s class dev 0x%p\n", __FUNCTION__, cdev); + PDBG("%s dev 0x%p\n", __func__, dev); lldev->ethtool_ops->get_drvinfo(lldev, &info); return sprintf(buf, "%s\n", info.driver); } -static ssize_t show_board(struct class_device *cdev, char *buf) +static ssize_t show_board(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev, + ibdev.dev); + PDBG("%s dev 0x%p\n", __func__, dev); + return sprintf(buf, "%x.%x\n", iwch_dev->rdev.rnic_info.pdev->vendor, + iwch_dev->rdev.rnic_info.pdev->device); +} + +static int iwch_get_mib(struct ib_device *ibdev, + union rdma_protocol_stats *stats) { - struct iwch_dev *dev = container_of(cdev, struct iwch_dev, - ibdev.class_dev); - PDBG("%s class dev 0x%p\n", __FUNCTION__, dev); - return sprintf(buf, "%x.%x\n", dev->rdev.rnic_info.pdev->vendor, - dev->rdev.rnic_info.pdev->device); + struct iwch_dev *dev; + struct tp_mib_stats m; + int ret; + + PDBG("%s ibdev %p\n", __func__, ibdev); + dev = to_iwch_dev(ibdev); + ret = dev->rdev.t3cdev_p->ctl(dev->rdev.t3cdev_p, RDMA_GET_MIB, &m); + if (ret) + return -ENOSYS; + + memset(stats, 0, sizeof *stats); + stats->iw.ipInReceives = ((u64) m.ipInReceive_hi << 32) + + m.ipInReceive_lo; + stats->iw.ipInHdrErrors = ((u64) m.ipInHdrErrors_hi << 32) + + m.ipInHdrErrors_lo; + stats->iw.ipInAddrErrors = ((u64) m.ipInAddrErrors_hi << 32) + + m.ipInAddrErrors_lo; + stats->iw.ipInUnknownProtos = ((u64) m.ipInUnknownProtos_hi << 32) + + m.ipInUnknownProtos_lo; + stats->iw.ipInDiscards = ((u64) m.ipInDiscards_hi << 32) + + m.ipInDiscards_lo; + stats->iw.ipInDelivers = ((u64) m.ipInDelivers_hi << 32) + + m.ipInDelivers_lo; + stats->iw.ipOutRequests = ((u64) m.ipOutRequests_hi << 32) + + m.ipOutRequests_lo; + stats->iw.ipOutDiscards = ((u64) m.ipOutDiscards_hi << 32) + + m.ipOutDiscards_lo; + stats->iw.ipOutNoRoutes = ((u64) m.ipOutNoRoutes_hi << 32) + + m.ipOutNoRoutes_lo; + stats->iw.ipReasmTimeout = (u64) m.ipReasmTimeout; + stats->iw.ipReasmReqds = (u64) m.ipReasmReqds; + stats->iw.ipReasmOKs = (u64) m.ipReasmOKs; + stats->iw.ipReasmFails = (u64) m.ipReasmFails; + stats->iw.tcpActiveOpens = (u64) m.tcpActiveOpens; + stats->iw.tcpPassiveOpens = (u64) m.tcpPassiveOpens; + stats->iw.tcpAttemptFails = (u64) m.tcpAttemptFails; + stats->iw.tcpEstabResets = (u64) m.tcpEstabResets; + stats->iw.tcpOutRsts = (u64) m.tcpOutRsts; + stats->iw.tcpCurrEstab = (u64) m.tcpCurrEstab; + stats->iw.tcpInSegs = ((u64) m.tcpInSegs_hi << 32) + + m.tcpInSegs_lo; + stats->iw.tcpOutSegs = ((u64) m.tcpOutSegs_hi << 32) + + m.tcpOutSegs_lo; + stats->iw.tcpRetransSegs = ((u64) m.tcpRetransSeg_hi << 32) + + m.tcpRetransSeg_lo; + stats->iw.tcpInErrs = ((u64) m.tcpInErrs_hi << 32) + + m.tcpInErrs_lo; + stats->iw.tcpRtoMin = (u64) m.tcpRtoMin; + stats->iw.tcpRtoMax = (u64) m.tcpRtoMax; + return 0; } -static CLASS_DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); -static CLASS_DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); -static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); -static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); +static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); +static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); +static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); +static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); -static struct class_device_attribute *iwch_class_attributes[] = { - &class_device_attr_hw_rev, - &class_device_attr_fw_ver, - &class_device_attr_hca_type, - &class_device_attr_board_id +static struct device_attribute *iwch_class_attributes[] = { + &dev_attr_hw_rev, + &dev_attr_fw_ver, + &dev_attr_hca_type, + &dev_attr_board_id, }; int iwch_register_device(struct iwch_dev *dev) @@ -1094,14 +1348,17 @@ int iwch_register_device(struct iwch_dev *dev) int ret; int i; - PDBG("%s iwch_dev %p\n", __FUNCTION__, dev); + PDBG("%s iwch_dev %p\n", __func__, dev); strlcpy(dev->ibdev.name, "cxgb3_%d", IB_DEVICE_NAME_MAX); memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); dev->ibdev.owner = THIS_MODULE; - dev->device_cap_flags = - (IB_DEVICE_ZERO_STAG | - IB_DEVICE_SEND_W_INV | IB_DEVICE_MEM_WINDOW); + dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | + IB_DEVICE_MEM_WINDOW | + IB_DEVICE_MEM_MGT_EXTENSIONS; + + /* cxgb3 supports STag 0. */ + dev->ibdev.local_dma_lkey = 0; dev->ibdev.uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | @@ -1128,7 +1385,6 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.dma_device = &(dev->rdev.rnic_info.pdev->dev); dev->ibdev.query_device = iwch_query_device; dev->ibdev.query_port = iwch_query_port; - dev->ibdev.modify_port = iwch_modify_port; dev->ibdev.query_pkey = iwch_query_pkey; dev->ibdev.query_gid = iwch_query_gid; dev->ibdev.alloc_ucontext = iwch_alloc_ucontext; @@ -1153,19 +1409,22 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.alloc_mw = iwch_alloc_mw; dev->ibdev.bind_mw = iwch_bind_mw; dev->ibdev.dealloc_mw = iwch_dealloc_mw; - + dev->ibdev.alloc_fast_reg_mr = iwch_alloc_fast_reg_mr; + dev->ibdev.alloc_fast_reg_page_list = iwch_alloc_fastreg_pbl; + dev->ibdev.free_fast_reg_page_list = iwch_free_fastreg_pbl; dev->ibdev.attach_mcast = iwch_multicast_attach; dev->ibdev.detach_mcast = iwch_multicast_detach; dev->ibdev.process_mad = iwch_process_mad; - dev->ibdev.req_notify_cq = iwch_arm_cq; dev->ibdev.post_send = iwch_post_send; dev->ibdev.post_recv = iwch_post_receive; + dev->ibdev.get_protocol_stats = iwch_get_mib; + dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION; + dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); + if (!dev->ibdev.iwcm) + return -ENOMEM; - dev->ibdev.iwcm = - (struct iw_cm_verbs *) kmalloc(sizeof(struct iw_cm_verbs), - GFP_KERNEL); dev->ibdev.iwcm->connect = iwch_connect; dev->ibdev.iwcm->accept = iwch_accept_cr; dev->ibdev.iwcm->reject = iwch_reject_cr; @@ -1175,13 +1434,13 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.iwcm->rem_ref = iwch_qp_rem_ref; dev->ibdev.iwcm->get_qp = iwch_get_qp; - ret = ib_register_device(&dev->ibdev); + ret = ib_register_device(&dev->ibdev, NULL); if (ret) goto bail1; for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i) { - ret = class_device_create_file(&dev->ibdev.class_dev, - iwch_class_attributes[i]); + ret = device_create_file(&dev->ibdev.dev, + iwch_class_attributes[i]); if (ret) { goto bail2; } @@ -1190,6 +1449,7 @@ int iwch_register_device(struct iwch_dev *dev) bail2: ib_unregister_device(&dev->ibdev); bail1: + kfree(dev->ibdev.iwcm); return ret; } @@ -1197,10 +1457,11 @@ void iwch_unregister_device(struct iwch_dev *dev) { int i; - PDBG("%s iwch_dev %p\n", __FUNCTION__, dev); + PDBG("%s iwch_dev %p\n", __func__, dev); for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i) - class_device_remove_file(&dev->ibdev.class_dev, - iwch_class_attributes[i]); + device_remove_file(&dev->ibdev.dev, + iwch_class_attributes[i]); ib_unregister_device(&dev->ibdev); + kfree(dev->ibdev.iwcm); return; } diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h index 48833f3f3bd..87c14b0c5ac 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.h +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h @@ -103,6 +103,7 @@ struct iwch_cq { struct iwch_dev *rhp; struct t3_cq cq; spinlock_t lock; + spinlock_t comp_handler_lock; atomic_t refcnt; wait_queue_head_t wait; u32 __user *user_rptr_addr; @@ -118,6 +119,7 @@ enum IWCH_QP_FLAGS { }; struct iwch_mpa_attributes { + u8 initiator; u8 recv_marker_enabled; u8 xmit_marker_enabled; /* iWARP: enable inbound Read Resp. */ u8 crc_enabled; @@ -213,7 +215,7 @@ static inline struct iwch_mm_entry *remove_mmap(struct iwch_ucontext *ucontext, if (mm->key == key && mm->len == len) { list_del_init(&mm->entry); spin_unlock(&ucontext->mmap_lock); - PDBG("%s key 0x%x addr 0x%llx len %d\n", __FUNCTION__, + PDBG("%s key 0x%x addr 0x%llx len %d\n", __func__, key, (unsigned long long) mm->addr, mm->len); return mm; } @@ -226,7 +228,7 @@ static inline void insert_mmap(struct iwch_ucontext *ucontext, struct iwch_mm_entry *mm) { spin_lock(&ucontext->mmap_lock); - PDBG("%s key 0x%x addr 0x%llx len %d\n", __FUNCTION__, + PDBG("%s key 0x%x addr 0x%llx len %d\n", __func__, mm->key, (unsigned long long) mm->addr, mm->len); list_add_tail(&mm->entry, &ucontext->mmaps); spin_unlock(&ucontext->mmap_lock); @@ -292,15 +294,14 @@ static inline u32 iwch_ib_to_tpt_access(int acc) return (acc & IB_ACCESS_REMOTE_WRITE ? TPT_REMOTE_WRITE : 0) | (acc & IB_ACCESS_REMOTE_READ ? TPT_REMOTE_READ : 0) | (acc & IB_ACCESS_LOCAL_WRITE ? TPT_LOCAL_WRITE : 0) | + (acc & IB_ACCESS_MW_BIND ? TPT_MW_BIND : 0) | TPT_LOCAL_READ; } -static inline u32 iwch_ib_to_mwbind_access(int acc) +static inline u32 iwch_ib_to_tpt_bind_access(int acc) { - return (acc & IB_ACCESS_REMOTE_WRITE ? T3_MEM_ACCESS_REM_WRITE : 0) | - (acc & IB_ACCESS_REMOTE_READ ? T3_MEM_ACCESS_REM_READ : 0) | - (acc & IB_ACCESS_LOCAL_WRITE ? T3_MEM_ACCESS_LOCAL_WRITE : 0) | - T3_MEM_ACCESS_LOCAL_READ; + return (acc & IB_ACCESS_REMOTE_WRITE ? TPT_REMOTE_WRITE : 0) | + (acc & IB_ACCESS_REMOTE_READ ? TPT_REMOTE_READ : 0); } enum iwch_mmid_state { @@ -322,6 +323,7 @@ enum iwch_qp_query_flags { IWCH_QP_QUERY_TEST_USERWRITE = 0x32 /* Test special */ }; +u16 iwch_rqes_posted(struct iwch_qp *qhp); int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr); int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, @@ -331,20 +333,19 @@ int iwch_bind_mw(struct ib_qp *qp, struct ib_mw_bind *mw_bind); int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg); +int iwch_post_zb_read(struct iwch_ep *ep); int iwch_register_device(struct iwch_dev *dev); void iwch_unregister_device(struct iwch_dev *dev); -int iwch_quiesce_qps(struct iwch_cq *chp); -int iwch_resume_qps(struct iwch_cq *chp); void stop_read_rep_timer(struct iwch_qp *qhp); int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, - struct iwch_mr *mhp, - int shift, - __be64 *page_list); + struct iwch_mr *mhp, int shift); int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php, struct iwch_mr *mhp, int shift, - __be64 *page_list, int npages); +int iwch_alloc_pbl(struct iwch_mr *mhp, int npages); +void iwch_free_pbl(struct iwch_mr *mhp); +int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset); int build_phys_page_list(struct ib_phys_buf *buffer_list, int num_phys_buf, u64 *iova_start, diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index 714dddbc9a9..b57c0befd96 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -29,14 +29,17 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#include <linux/sched.h> +#include <linux/gfp.h> #include "iwch_provider.h" #include "iwch.h" #include "iwch_cm.h" #include "cxio_hal.h" +#include "cxio_resource.h" #define NO_SUPPORT -1 -static int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr, +static int build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr, u8 * flit_cnt) { int i; @@ -44,59 +47,44 @@ static int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_SEND: - case IB_WR_SEND_WITH_IMM: if (wr->send_flags & IB_SEND_SOLICITED) wqe->send.rdmaop = T3_SEND_WITH_SE; else wqe->send.rdmaop = T3_SEND; wqe->send.rem_stag = 0; break; -#if 0 /* Not currently supported */ - case TYPE_SEND_INVALIDATE: - case TYPE_SEND_INVALIDATE_IMMEDIATE: - wqe->send.rdmaop = T3_SEND_WITH_INV; - wqe->send.rem_stag = cpu_to_be32(wr->wr.rdma.rkey); - break; - case TYPE_SEND_SE_INVALIDATE: - wqe->send.rdmaop = T3_SEND_WITH_SE_INV; - wqe->send.rem_stag = cpu_to_be32(wr->wr.rdma.rkey); + case IB_WR_SEND_WITH_INV: + if (wr->send_flags & IB_SEND_SOLICITED) + wqe->send.rdmaop = T3_SEND_WITH_SE_INV; + else + wqe->send.rdmaop = T3_SEND_WITH_INV; + wqe->send.rem_stag = cpu_to_be32(wr->ex.invalidate_rkey); break; -#endif default: - break; + return -EINVAL; } if (wr->num_sge > T3_MAX_SGE) return -EINVAL; wqe->send.reserved[0] = 0; wqe->send.reserved[1] = 0; wqe->send.reserved[2] = 0; - if (wr->opcode == IB_WR_SEND_WITH_IMM) { - plen = 4; - wqe->send.sgl[0].stag = wr->imm_data; - wqe->send.sgl[0].len = __constant_cpu_to_be32(0); - wqe->send.num_sgle = __constant_cpu_to_be32(0); - *flit_cnt = 5; - } else { - plen = 0; - for (i = 0; i < wr->num_sge; i++) { - if ((plen + wr->sg_list[i].length) < plen) { - return -EMSGSIZE; - } - plen += wr->sg_list[i].length; - wqe->send.sgl[i].stag = - cpu_to_be32(wr->sg_list[i].lkey); - wqe->send.sgl[i].len = - cpu_to_be32(wr->sg_list[i].length); - wqe->send.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr); - } - wqe->send.num_sgle = cpu_to_be32(wr->num_sge); - *flit_cnt = 4 + ((wr->num_sge) << 1); + plen = 0; + for (i = 0; i < wr->num_sge; i++) { + if ((plen + wr->sg_list[i].length) < plen) + return -EMSGSIZE; + + plen += wr->sg_list[i].length; + wqe->send.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey); + wqe->send.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); + wqe->send.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr); } + wqe->send.num_sgle = cpu_to_be32(wr->num_sge); + *flit_cnt = 4 + ((wr->num_sge) << 1); wqe->send.plen = cpu_to_be32(plen); return 0; } -static int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, +static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, u8 *flit_cnt) { int i; @@ -112,9 +100,9 @@ static int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { plen = 4; - wqe->write.sgl[0].stag = wr->imm_data; - wqe->write.sgl[0].len = __constant_cpu_to_be32(0); - wqe->write.num_sgle = __constant_cpu_to_be32(0); + wqe->write.sgl[0].stag = wr->ex.imm_data; + wqe->write.sgl[0].len = cpu_to_be32(0); + wqe->write.num_sgle = cpu_to_be32(0); *flit_cnt = 6; } else { plen = 0; @@ -137,15 +125,18 @@ static int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, return 0; } -static int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, +static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, u8 *flit_cnt) { if (wr->num_sge > 1) return -EINVAL; wqe->read.rdmaop = T3_READ_REQ; + if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) + wqe->read.local_inv = 1; + else + wqe->read.local_inv = 0; wqe->read.reserved[0] = 0; wqe->read.reserved[1] = 0; - wqe->read.reserved[2] = 0; wqe->read.rem_stag = cpu_to_be32(wr->wr.rdma.rkey); wqe->read.rem_to = cpu_to_be64(wr->wr.rdma.remote_addr); wqe->read.local_stag = cpu_to_be32(wr->sg_list[0].lkey); @@ -155,48 +146,96 @@ static int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, return 0; } -/* - * TBD: this is going to be moved to firmware. Missing pdid/qpid check for now. - */ +static int build_fastreg(union t3_wr *wqe, struct ib_send_wr *wr, + u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq) +{ + int i; + __be64 *p; + + if (wr->wr.fast_reg.page_list_len > T3_MAX_FASTREG_DEPTH) + return -EINVAL; + *wr_cnt = 1; + wqe->fastreg.stag = cpu_to_be32(wr->wr.fast_reg.rkey); + wqe->fastreg.len = cpu_to_be32(wr->wr.fast_reg.length); + wqe->fastreg.va_base_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32); + wqe->fastreg.va_base_lo_fbo = + cpu_to_be32(wr->wr.fast_reg.iova_start & 0xffffffff); + wqe->fastreg.page_type_perms = cpu_to_be32( + V_FR_PAGE_COUNT(wr->wr.fast_reg.page_list_len) | + V_FR_PAGE_SIZE(wr->wr.fast_reg.page_shift-12) | + V_FR_TYPE(TPT_VATO) | + V_FR_PERMS(iwch_ib_to_tpt_access(wr->wr.fast_reg.access_flags))); + p = &wqe->fastreg.pbl_addrs[0]; + for (i = 0; i < wr->wr.fast_reg.page_list_len; i++, p++) { + + /* If we need a 2nd WR, then set it up */ + if (i == T3_MAX_FASTREG_FRAG) { + *wr_cnt = 2; + wqe = (union t3_wr *)(wq->queue + + Q_PTR2IDX((wq->wptr+1), wq->size_log2)); + build_fw_riwrh((void *)wqe, T3_WR_FASTREG, 0, + Q_GENBIT(wq->wptr + 1, wq->size_log2), + 0, 1 + wr->wr.fast_reg.page_list_len - T3_MAX_FASTREG_FRAG, + T3_EOP); + + p = &wqe->pbl_frag.pbl_addrs[0]; + } + *p = cpu_to_be64((u64)wr->wr.fast_reg.page_list->page_list[i]); + } + *flit_cnt = 5 + wr->wr.fast_reg.page_list_len; + if (*flit_cnt > 15) + *flit_cnt = 15; + return 0; +} + +static int build_inv_stag(union t3_wr *wqe, struct ib_send_wr *wr, + u8 *flit_cnt) +{ + wqe->local_inv.stag = cpu_to_be32(wr->ex.invalidate_rkey); + wqe->local_inv.reserved = 0; + *flit_cnt = sizeof(struct t3_local_inv_wr) >> 3; + return 0; +} + static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list, u32 num_sgle, u32 * pbl_addr, u8 * page_size) { int i; struct iwch_mr *mhp; - u32 offset; + u64 offset; for (i = 0; i < num_sgle; i++) { mhp = get_mhp(rhp, (sg_list[i].lkey) >> 8); if (!mhp) { - PDBG("%s %d\n", __FUNCTION__, __LINE__); + PDBG("%s %d\n", __func__, __LINE__); return -EIO; } if (!mhp->attr.state) { - PDBG("%s %d\n", __FUNCTION__, __LINE__); + PDBG("%s %d\n", __func__, __LINE__); return -EIO; } if (mhp->attr.zbva) { - PDBG("%s %d\n", __FUNCTION__, __LINE__); + PDBG("%s %d\n", __func__, __LINE__); return -EIO; } if (sg_list[i].addr < mhp->attr.va_fbo) { - PDBG("%s %d\n", __FUNCTION__, __LINE__); + PDBG("%s %d\n", __func__, __LINE__); return -EINVAL; } if (sg_list[i].addr + ((u64) sg_list[i].length) < sg_list[i].addr) { - PDBG("%s %d\n", __FUNCTION__, __LINE__); + PDBG("%s %d\n", __func__, __LINE__); return -EINVAL; } if (sg_list[i].addr + ((u64) sg_list[i].length) > mhp->attr.va_fbo + ((u64) mhp->attr.len)) { - PDBG("%s %d\n", __FUNCTION__, __LINE__); + PDBG("%s %d\n", __func__, __LINE__); return -EINVAL; } offset = sg_list[i].addr - mhp->attr.va_fbo; - offset += ((u32) mhp->attr.va_fbo) % - (1UL << (12 + mhp->attr.page_size)); + offset += mhp->attr.va_fbo & + ((1UL << (12 + mhp->attr.page_size)) - 1); pbl_addr[i] = ((mhp->attr.pbl_addr - rhp->rdev.rnic_info.pbl_base) >> 3) + (offset >> (12 + mhp->attr.page_size)); @@ -205,15 +244,14 @@ static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list, return 0; } -static int iwch_build_rdma_recv(struct iwch_dev *rhp, union t3_wr *wqe, +static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe, struct ib_recv_wr *wr) { int i, err = 0; - u32 pbl_addr[4]; - u8 page_size[4]; - if (wr->num_sge > T3_MAX_SGE) - return -EINVAL; - err = iwch_sgl2pbl_map(rhp, wr->sg_list, wr->num_sge, pbl_addr, + u32 pbl_addr[T3_MAX_SGE]; + u8 page_size[T3_MAX_SGE]; + + err = iwch_sgl2pbl_map(qhp->rhp, wr->sg_list, wr->num_sge, pbl_addr, page_size); if (err) return err; @@ -227,8 +265,8 @@ static int iwch_build_rdma_recv(struct iwch_dev *rhp, union t3_wr *wqe, wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); /* to in the WQE == the offset into the page */ - wqe->recv.sgl[i].to = cpu_to_be64(((u32) wr->sg_list[i].addr) % - (1UL << (12 + page_size[i]))); + wqe->recv.sgl[i].to = cpu_to_be64(((u32)wr->sg_list[i].addr) & + ((1UL << (12 + page_size[i])) - 1)); /* pbl_addr is the adapters address in the PBL */ wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]); @@ -239,6 +277,73 @@ static int iwch_build_rdma_recv(struct iwch_dev *rhp, union t3_wr *wqe, wqe->recv.sgl[i].to = 0; wqe->recv.pbl_addr[i] = 0; } + qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, + qhp->wq.rq_size_log2)].wr_id = wr->wr_id; + qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, + qhp->wq.rq_size_log2)].pbl_addr = 0; + return 0; +} + +static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe, + struct ib_recv_wr *wr) +{ + int i; + u32 pbl_addr; + u32 pbl_offset; + + + /* + * The T3 HW requires the PBL in the HW recv descriptor to reference + * a PBL entry. So we allocate the max needed PBL memory here and pass + * it to the uP in the recv WR. The uP will build the PBL and setup + * the HW recv descriptor. + */ + pbl_addr = cxio_hal_pblpool_alloc(&qhp->rhp->rdev, T3_STAG0_PBL_SIZE); + if (!pbl_addr) + return -ENOMEM; + + /* + * Compute the 8B aligned offset. + */ + pbl_offset = (pbl_addr - qhp->rhp->rdev.rnic_info.pbl_base) >> 3; + + wqe->recv.num_sgle = cpu_to_be32(wr->num_sge); + + for (i = 0; i < wr->num_sge; i++) { + + /* + * Use a 128MB page size. This and an imposed 128MB + * sge length limit allows us to require only a 2-entry HW + * PBL for each SGE. This restriction is acceptable since + * since it is not possible to allocate 128MB of contiguous + * DMA coherent memory! + */ + if (wr->sg_list[i].length > T3_STAG0_MAX_PBE_LEN) + return -EINVAL; + wqe->recv.pagesz[i] = T3_STAG0_PAGE_SHIFT; + + /* + * T3 restricts a recv to all zero-stag or all non-zero-stag. + */ + if (wr->sg_list[i].lkey != 0) + return -EINVAL; + wqe->recv.sgl[i].stag = 0; + wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); + wqe->recv.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr); + wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_offset); + pbl_offset += 2; + } + for (; i < T3_MAX_SGE; i++) { + wqe->recv.pagesz[i] = 0; + wqe->recv.sgl[i].stag = 0; + wqe->recv.sgl[i].len = 0; + wqe->recv.sgl[i].to = 0; + wqe->recv.pbl_addr[i] = 0; + } + qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, + qhp->wq.rq_size_log2)].wr_id = wr->wr_id; + qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, + qhp->wq.rq_size_log2)].pbl_addr = pbl_addr; return 0; } @@ -246,7 +351,7 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { int err = 0; - u8 t3_wr_flit_cnt; + u8 uninitialized_var(t3_wr_flit_cnt); enum t3_wr_opcode t3_wr_opcode = 0; enum t3_wr_flags t3_wr_flags; struct iwch_qp *qhp; @@ -255,23 +360,25 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, u32 num_wrs; unsigned long flag; struct t3_swsq *sqp; + int wr_cnt = 1; qhp = to_iwch_qp(ibqp); spin_lock_irqsave(&qhp->lock, flag); if (qhp->attr.state > IWCH_QP_STATE_RTS) { spin_unlock_irqrestore(&qhp->lock, flag); - return -EINVAL; + err = -EINVAL; + goto out; } num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr, qhp->wq.sq_size_log2); - if (num_wrs <= 0) { + if (num_wrs == 0) { spin_unlock_irqrestore(&qhp->lock, flag); - return -ENOMEM; + err = -ENOMEM; + goto out; } while (wr) { if (num_wrs == 0) { err = -ENOMEM; - *bad_wr = wr; break; } idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2); @@ -279,42 +386,52 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, t3_wr_flags = 0; if (wr->send_flags & IB_SEND_SOLICITED) t3_wr_flags |= T3_SOLICITED_EVENT_FLAG; - if (wr->send_flags & IB_SEND_FENCE) - t3_wr_flags |= T3_READ_FENCE_FLAG; if (wr->send_flags & IB_SEND_SIGNALED) t3_wr_flags |= T3_COMPLETION_FLAG; sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2); switch (wr->opcode) { case IB_WR_SEND: - case IB_WR_SEND_WITH_IMM: + case IB_WR_SEND_WITH_INV: + if (wr->send_flags & IB_SEND_FENCE) + t3_wr_flags |= T3_READ_FENCE_FLAG; t3_wr_opcode = T3_WR_SEND; - err = iwch_build_rdma_send(wqe, wr, &t3_wr_flit_cnt); + err = build_rdma_send(wqe, wr, &t3_wr_flit_cnt); break; case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: t3_wr_opcode = T3_WR_WRITE; - err = iwch_build_rdma_write(wqe, wr, &t3_wr_flit_cnt); + err = build_rdma_write(wqe, wr, &t3_wr_flit_cnt); break; case IB_WR_RDMA_READ: + case IB_WR_RDMA_READ_WITH_INV: t3_wr_opcode = T3_WR_READ; t3_wr_flags = 0; /* T3 reads are always signaled */ - err = iwch_build_rdma_read(wqe, wr, &t3_wr_flit_cnt); + err = build_rdma_read(wqe, wr, &t3_wr_flit_cnt); if (err) break; sqp->read_len = wqe->read.local_len; if (!qhp->wq.oldest_read) qhp->wq.oldest_read = sqp; break; + case IB_WR_FAST_REG_MR: + t3_wr_opcode = T3_WR_FASTREG; + err = build_fastreg(wqe, wr, &t3_wr_flit_cnt, + &wr_cnt, &qhp->wq); + break; + case IB_WR_LOCAL_INV: + if (wr->send_flags & IB_SEND_FENCE) + t3_wr_flags |= T3_LOCAL_FENCE_FLAG; + t3_wr_opcode = T3_WR_INV_STAG; + err = build_inv_stag(wqe, wr, &t3_wr_flit_cnt); + break; default: - PDBG("%s post of type=%d TBD!\n", __FUNCTION__, + PDBG("%s post of type=%d TBD!\n", __func__, wr->opcode); err = -EINVAL; } - if (err) { - *bad_wr = wr; + if (err) break; - } wqe->send.wrid.id0.hi = qhp->wq.sq_wptr; sqp->wr_id = wr->wr_id; sqp->opcode = wr2opcode(t3_wr_opcode); @@ -324,18 +441,24 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, build_fw_riwrh((void *) wqe, t3_wr_opcode, t3_wr_flags, Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), - 0, t3_wr_flit_cnt); + 0, t3_wr_flit_cnt, + (wr_cnt == 1) ? T3_SOPEOP : T3_SOP); PDBG("%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d\n", - __FUNCTION__, (unsigned long long) wr->wr_id, idx, + __func__, (unsigned long long) wr->wr_id, idx, Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2), sqp->opcode); wr = wr->next; num_wrs--; - ++(qhp->wq.wptr); + qhp->wq.wptr += wr_cnt; ++(qhp->wq.sq_wptr); } spin_unlock_irqrestore(&qhp->lock, flag); - ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); + if (cxio_wq_db_enabled(&qhp->wq)) + ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); + +out: + if (err) + *bad_wr = wr; return err; } @@ -353,32 +476,39 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, spin_lock_irqsave(&qhp->lock, flag); if (qhp->attr.state > IWCH_QP_STATE_RTS) { spin_unlock_irqrestore(&qhp->lock, flag); - return -EINVAL; + err = -EINVAL; + goto out; } num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr, qhp->wq.rq_size_log2) - 1; if (!wr) { spin_unlock_irqrestore(&qhp->lock, flag); - return -EINVAL; + err = -ENOMEM; + goto out; } while (wr) { + if (wr->num_sge > T3_MAX_SGE) { + err = -EINVAL; + break; + } idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2); wqe = (union t3_wr *) (qhp->wq.queue + idx); if (num_wrs) - err = iwch_build_rdma_recv(qhp->rhp, wqe, wr); + if (wr->sg_list[0].lkey) + err = build_rdma_recv(qhp, wqe, wr); + else + err = build_zero_stag_recv(qhp, wqe, wr); else err = -ENOMEM; - if (err) { - *bad_wr = wr; + + if (err) break; - } - qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, qhp->wq.rq_size_log2)] = - wr->wr_id; + build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG, Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), - 0, sizeof(struct t3_receive_wr) >> 3); + 0, sizeof(struct t3_receive_wr) >> 3, T3_SOPEOP); PDBG("%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x " - "wqe %p \n", __FUNCTION__, (unsigned long long) wr->wr_id, + "wqe %p \n", __func__, (unsigned long long) wr->wr_id, idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe); ++(qhp->wq.rq_wptr); ++(qhp->wq.wptr); @@ -386,7 +516,12 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, num_wrs--; } spin_unlock_irqrestore(&qhp->lock, flag); - ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); + if (cxio_wq_db_enabled(&qhp->wq)) + ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); + +out: + if (err) + *bad_wr = wr; return err; } @@ -419,12 +554,12 @@ int iwch_bind_mw(struct ib_qp *qp, } num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr, qhp->wq.sq_size_log2); - if ((num_wrs) <= 0) { + if (num_wrs == 0) { spin_unlock_irqrestore(&qhp->lock, flag); return -ENOMEM; } idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2); - PDBG("%s: idx 0x%0x, mw 0x%p, mw_bind 0x%p\n", __FUNCTION__, idx, + PDBG("%s: idx 0x%0x, mw 0x%p, mw_bind 0x%p\n", __func__, idx, mw, mw_bind); wqe = (union t3_wr *) (qhp->wq.queue + idx); @@ -432,22 +567,23 @@ int iwch_bind_mw(struct ib_qp *qp, if (mw_bind->send_flags & IB_SEND_SIGNALED) t3_wr_flags = T3_COMPLETION_FLAG; - sgl.addr = mw_bind->addr; - sgl.lkey = mw_bind->mr->lkey; - sgl.length = mw_bind->length; + sgl.addr = mw_bind->bind_info.addr; + sgl.lkey = mw_bind->bind_info.mr->lkey; + sgl.length = mw_bind->bind_info.length; wqe->bind.reserved = 0; - wqe->bind.type = T3_VA_BASED_TO; + wqe->bind.type = TPT_VATO; /* TBD: check perms */ - wqe->bind.perms = iwch_ib_to_mwbind_access(mw_bind->mw_access_flags); - wqe->bind.mr_stag = cpu_to_be32(mw_bind->mr->lkey); + wqe->bind.perms = iwch_ib_to_tpt_bind_access( + mw_bind->bind_info.mw_access_flags); + wqe->bind.mr_stag = cpu_to_be32(mw_bind->bind_info.mr->lkey); wqe->bind.mw_stag = cpu_to_be32(mw->rkey); - wqe->bind.mw_len = cpu_to_be32(mw_bind->length); - wqe->bind.mw_va = cpu_to_be64(mw_bind->addr); + wqe->bind.mw_len = cpu_to_be32(mw_bind->bind_info.length); + wqe->bind.mw_va = cpu_to_be64(mw_bind->bind_info.addr); err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size); if (err) { spin_unlock_irqrestore(&qhp->lock, flag); - return err; + return err; } wqe->send.wrid.id0.hi = qhp->wq.sq_wptr; sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2); @@ -458,15 +594,15 @@ int iwch_bind_mw(struct ib_qp *qp, sqp->signaled = (mw_bind->send_flags & IB_SEND_SIGNALED); wqe->bind.mr_pbl_addr = cpu_to_be32(pbl_addr); wqe->bind.mr_pagesz = page_size; - wqe->flit[T3_SQ_COOKIE_FLIT] = mw_bind->wr_id; build_fw_riwrh((void *)wqe, T3_WR_BIND, t3_wr_flags, Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0, - sizeof(struct t3_bind_mw_wr) >> 3); + sizeof(struct t3_bind_mw_wr) >> 3, T3_SOPEOP); ++(qhp->wq.wptr); ++(qhp->wq.sq_wptr); spin_unlock_irqrestore(&qhp->lock, flag); - ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); + if (cxio_wq_db_enabled(&qhp->wq)) + ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); return err; } @@ -603,6 +739,35 @@ static inline void build_term_codes(struct respQ_msg_t *rsp_msg, } } +int iwch_post_zb_read(struct iwch_ep *ep) +{ + union t3_wr *wqe; + struct sk_buff *skb; + u8 flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3; + + PDBG("%s enter\n", __func__); + skb = alloc_skb(40, GFP_KERNEL); + if (!skb) { + printk(KERN_ERR "%s cannot send zb_read!!\n", __func__); + return -ENOMEM; + } + wqe = (union t3_wr *)skb_put(skb, sizeof(struct t3_rdma_read_wr)); + memset(wqe, 0, sizeof(struct t3_rdma_read_wr)); + wqe->read.rdmaop = T3_READ_REQ; + wqe->read.reserved[0] = 0; + wqe->read.reserved[1] = 0; + wqe->read.rem_stag = cpu_to_be32(1); + wqe->read.rem_to = cpu_to_be64(1); + wqe->read.local_stag = cpu_to_be32(1); + wqe->read.local_len = cpu_to_be32(0); + wqe->read.local_to = cpu_to_be64(1); + wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_READ)); + wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(ep->hwtid)| + V_FW_RIWR_LEN(flit_cnt)); + skb->priority = CPL_PRIORITY_DATA; + return iwch_cxgb3_ofld_send(ep->com.qp->rhp->rdev.t3cdev_p, skb); +} + /* * This posts a TERMINATE with layer=RDMA, type=catastrophic. */ @@ -612,10 +777,10 @@ int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg) struct terminate_message *term; struct sk_buff *skb; - PDBG("%s %d\n", __FUNCTION__, __LINE__); + PDBG("%s %d\n", __func__, __LINE__); skb = alloc_skb(40, GFP_ATOMIC); if (!skb) { - printk(KERN_ERR "%s cannot send TERMINATE!\n", __FUNCTION__); + printk(KERN_ERR "%s cannot send TERMINATE!\n", __func__); return -ENOMEM; } wqe = (union t3_wr *)skb_put(skb, 40); @@ -628,69 +793,103 @@ int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg) /* immediate data starts here. */ term = (struct terminate_message *)wqe->send.sgl; build_term_codes(rsp_msg, &term->layer_etype, &term->ecode); - build_fw_riwrh((void *)wqe, T3_WR_SEND, - T3_COMPLETION_FLAG | T3_NOTIFY_FLAG, 1, - qhp->ep->hwtid, 5); + wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_SEND) | + V_FW_RIWR_FLAGS(T3_COMPLETION_FLAG | T3_NOTIFY_FLAG)); + wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid)); skb->priority = CPL_PRIORITY_DATA; - return cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb); + return iwch_cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb); } /* * Assumes qhp lock is held. */ -static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag) +static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp, + struct iwch_cq *schp) { - struct iwch_cq *rchp, *schp; int count; + int flushed; - rchp = get_chp(qhp->rhp, qhp->attr.rcq); - schp = get_chp(qhp->rhp, qhp->attr.scq); - PDBG("%s qhp %p rchp %p schp %p\n", __FUNCTION__, qhp, rchp, schp); + PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp); /* take a ref on the qhp since we must release the lock */ atomic_inc(&qhp->refcnt); - spin_unlock_irqrestore(&qhp->lock, *flag); + spin_unlock(&qhp->lock); - /* locking heirarchy: cq lock first, then qp lock. */ - spin_lock_irqsave(&rchp->lock, *flag); + /* locking hierarchy: cq lock first, then qp lock. */ + spin_lock(&rchp->lock); spin_lock(&qhp->lock); cxio_flush_hw_cq(&rchp->cq); cxio_count_rcqes(&rchp->cq, &qhp->wq, &count); - cxio_flush_rq(&qhp->wq, &rchp->cq, count); + flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count); spin_unlock(&qhp->lock); - spin_unlock_irqrestore(&rchp->lock, *flag); + spin_unlock(&rchp->lock); + if (flushed) { + spin_lock(&rchp->comp_handler_lock); + (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); + spin_unlock(&rchp->comp_handler_lock); + } - /* locking heirarchy: cq lock first, then qp lock. */ - spin_lock_irqsave(&schp->lock, *flag); + /* locking hierarchy: cq lock first, then qp lock. */ + spin_lock(&schp->lock); spin_lock(&qhp->lock); cxio_flush_hw_cq(&schp->cq); cxio_count_scqes(&schp->cq, &qhp->wq, &count); - cxio_flush_sq(&qhp->wq, &schp->cq, count); + flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count); spin_unlock(&qhp->lock); - spin_unlock_irqrestore(&schp->lock, *flag); + spin_unlock(&schp->lock); + if (flushed) { + spin_lock(&schp->comp_handler_lock); + (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); + spin_unlock(&schp->comp_handler_lock); + } /* deref */ if (atomic_dec_and_test(&qhp->refcnt)) wake_up(&qhp->wait); - spin_lock_irqsave(&qhp->lock, *flag); + spin_lock(&qhp->lock); } -static void flush_qp(struct iwch_qp *qhp, unsigned long *flag) +static void flush_qp(struct iwch_qp *qhp) { - if (t3b_device(qhp->rhp)) + struct iwch_cq *rchp, *schp; + + rchp = get_chp(qhp->rhp, qhp->attr.rcq); + schp = get_chp(qhp->rhp, qhp->attr.scq); + + if (qhp->ibqp.uobject) { cxio_set_wq_in_error(&qhp->wq); - else - __flush_qp(qhp, flag); + cxio_set_cq_in_error(&rchp->cq); + spin_lock(&rchp->comp_handler_lock); + (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); + spin_unlock(&rchp->comp_handler_lock); + if (schp != rchp) { + cxio_set_cq_in_error(&schp->cq); + spin_lock(&schp->comp_handler_lock); + (*schp->ibcq.comp_handler)(&schp->ibcq, + schp->ibcq.cq_context); + spin_unlock(&schp->comp_handler_lock); + } + return; + } + __flush_qp(qhp, rchp, schp); } /* - * Return non zero if at least one RECV was pre-posted. + * Return count of RECV WRs posted */ -static int rqes_posted(struct iwch_qp *qhp) +u16 iwch_rqes_posted(struct iwch_qp *qhp) { - return fw_riwrh_opcode((struct fw_riwrh *)qhp->wq.queue) == T3_WR_RCV; + union t3_wr *wqe = qhp->wq.queue; + u16 count = 0; + + while (count < USHRT_MAX && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) { + count++; + wqe++; + } + PDBG("%s qhp %p count %u\n", __func__, qhp, count); + return count; } static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp, @@ -712,32 +911,36 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp, (qhp->attr.mpa_attr.xmit_marker_enabled << 1) | (qhp->attr.mpa_attr.crc_enabled << 2); - /* - * XXX - The IWCM doesn't quite handle getting these - * attrs set before going into RTS. For now, just turn - * them on always... - */ -#if 0 - init_attr.qpcaps = qhp->attr.enableRdmaRead | - (qhp->attr.enableRdmaWrite << 1) | - (qhp->attr.enableBind << 2) | - (qhp->attr.enable_stag0_fastreg << 3) | - (qhp->attr.enable_stag0_fastreg << 4); -#else - init_attr.qpcaps = 0x1f; -#endif + init_attr.qpcaps = uP_RI_QP_RDMA_READ_ENABLE | + uP_RI_QP_RDMA_WRITE_ENABLE | + uP_RI_QP_BIND_ENABLE; + if (!qhp->ibqp.uobject) + init_attr.qpcaps |= uP_RI_QP_STAG0_ENABLE | + uP_RI_QP_FAST_REGISTER_ENABLE; + init_attr.tcp_emss = qhp->ep->emss; init_attr.ord = qhp->attr.max_ord; init_attr.ird = qhp->attr.max_ird; init_attr.qp_dma_addr = qhp->wq.dma_addr; init_attr.qp_dma_size = (1UL << qhp->wq.size_log2); - init_attr.flags = rqes_posted(qhp) ? RECVS_POSTED : 0; + init_attr.rqe_count = iwch_rqes_posted(qhp); + init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0; + init_attr.chan = qhp->ep->l2t->smt_idx; + if (peer2peer) { + init_attr.rtr_type = RTR_READ; + if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator) + init_attr.ord = 1; + if (init_attr.ird == 0 && !qhp->attr.mpa_attr.initiator) + init_attr.ird = 1; + } else + init_attr.rtr_type = 0; + init_attr.irs = qhp->ep->rcv_seq; PDBG("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d " - "flags 0x%x qpcaps 0x%x\n", __FUNCTION__, + "flags 0x%x qpcaps 0x%x\n", __func__, init_attr.rq_addr, init_attr.rq_size, init_attr.flags, init_attr.qpcaps); ret = cxio_rdma_init(&rhp->rdev, &init_attr); - PDBG("%s ret %d\n", __FUNCTION__, ret); + PDBG("%s ret %d\n", __func__, ret); return ret; } @@ -755,7 +958,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, int free = 0; struct iwch_ep *ep = NULL; - PDBG("%s qhp %p qpid 0x%x ep %p state %d -> %d\n", __FUNCTION__, + PDBG("%s qhp %p qpid 0x%x ep %p state %d -> %d\n", __func__, qhp, qhp->wq.qpid, qhp->ep, qhp->attr.state, (mask & IWCH_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1); @@ -829,7 +1032,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, break; case IWCH_QP_STATE_ERROR: qhp->attr.state = IWCH_QP_STATE_ERROR; - flush_qp(qhp, &flag); + flush_qp(qhp); break; default: ret = -EINVAL; @@ -845,11 +1048,12 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, abort=0; disconnect = 1; ep = qhp->ep; + get_ep(&ep->com); } break; case IWCH_QP_STATE_TERMINATE: qhp->attr.state = IWCH_QP_STATE_TERMINATE; - if (t3b_device(qhp->rhp)) + if (qhp->ibqp.uobject) cxio_set_wq_in_error(&qhp->wq); if (!internal) terminate = 1; @@ -860,6 +1064,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, abort=1; disconnect = 1; ep = qhp->ep; + get_ep(&ep->com); } goto err; break; @@ -875,6 +1080,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, } switch (attrs->next_state) { case IWCH_QP_STATE_IDLE: + flush_qp(qhp); qhp->attr.state = IWCH_QP_STATE_IDLE; qhp->attr.llp_stream_handle = NULL; put_ep(&qhp->ep->com); @@ -900,7 +1106,6 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, goto out; } qhp->attr.state = IWCH_QP_STATE_IDLE; - memset(&qhp->attr, 0, sizeof(qhp->attr)); break; case IWCH_QP_STATE_TERMINATE: if (!internal) { @@ -911,14 +1116,14 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, break; default: printk(KERN_ERR "%s in a bad state %d\n", - __FUNCTION__, qhp->attr.state); + __func__, qhp->attr.state); ret = -EINVAL; goto err; break; } goto out; err: - PDBG("%s disassociating ep %p qpid 0x%x\n", __FUNCTION__, qhp->ep, + PDBG("%s disassociating ep %p qpid 0x%x\n", __func__, qhp->ep, qhp->wq.qpid); /* disassociate the LLP connection */ @@ -929,7 +1134,7 @@ err: free=1; wake_up(&qhp->wait); BUG_ON(!ep); - flush_qp(qhp, &flag); + flush_qp(qhp); out: spin_unlock_irqrestore(&qhp->lock, flag); @@ -941,8 +1146,10 @@ out: * on the EP. This can be a normal close (RTS->CLOSING) or * an abnormal close (RTS/CLOSING->ERROR). */ - if (disconnect) + if (disconnect) { iwch_ep_disconnect(ep, abort, GFP_KERNEL); + put_ep(&ep->com); + } /* * If free is 1, then we've disassociated the EP from the QP @@ -951,62 +1158,6 @@ out: if (free) put_ep(&ep->com); - PDBG("%s exit state %d\n", __FUNCTION__, qhp->attr.state); + PDBG("%s exit state %d\n", __func__, qhp->attr.state); return ret; } - -static int quiesce_qp(struct iwch_qp *qhp) -{ - spin_lock_irq(&qhp->lock); - iwch_quiesce_tid(qhp->ep); - qhp->flags |= QP_QUIESCED; - spin_unlock_irq(&qhp->lock); - return 0; -} - -static int resume_qp(struct iwch_qp *qhp) -{ - spin_lock_irq(&qhp->lock); - iwch_resume_tid(qhp->ep); - qhp->flags &= ~QP_QUIESCED; - spin_unlock_irq(&qhp->lock); - return 0; -} - -int iwch_quiesce_qps(struct iwch_cq *chp) -{ - int i; - struct iwch_qp *qhp; - - for (i=0; i < T3_MAX_NUM_QP; i++) { - qhp = get_qhp(chp->rhp, i); - if (!qhp) - continue; - if ((qhp->attr.rcq == chp->cq.cqid) && !qp_quiesced(qhp)) { - quiesce_qp(qhp); - continue; - } - if ((qhp->attr.scq == chp->cq.cqid) && !qp_quiesced(qhp)) - quiesce_qp(qhp); - } - return 0; -} - -int iwch_resume_qps(struct iwch_cq *chp) -{ - int i; - struct iwch_qp *qhp; - - for (i=0; i < T3_MAX_NUM_QP; i++) { - qhp = get_qhp(chp->rhp, i); - if (!qhp) - continue; - if ((qhp->attr.rcq == chp->cq.cqid) && qp_quiesced(qhp)) { - resume_qp(qhp); - continue; - } - if ((qhp->attr.scq == chp->cq.cqid) && qp_quiesced(qhp)) - resume_qp(qhp); - } - return 0; -} diff --git a/drivers/infiniband/hw/cxgb3/iwch_user.h b/drivers/infiniband/hw/cxgb3/iwch_user.h index cb7086f558c..a277c31fcaf 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_user.h +++ b/drivers/infiniband/hw/cxgb3/iwch_user.h @@ -45,10 +45,18 @@ struct iwch_create_cq_req { __u64 user_rptr_addr; }; +struct iwch_create_cq_resp_v0 { + __u64 key; + __u32 cqid; + __u32 size_log2; +}; + struct iwch_create_cq_resp { __u64 key; __u32 cqid; __u32 size_log2; + __u32 memsize; + __u32 reserved; }; struct iwch_create_qp_resp { |
