diff options
Diffstat (limited to 'drivers/infiniband/hw')
265 files changed, 112060 insertions, 12543 deletions
diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile new file mode 100644 index 00000000000..e900b03531a --- /dev/null +++ b/drivers/infiniband/hw/Makefile @@ -0,0 +1,12 @@ +obj-$(CONFIG_INFINIBAND_MTHCA) += mthca/ +obj-$(CONFIG_INFINIBAND_IPATH) += ipath/ +obj-$(CONFIG_INFINIBAND_QIB) += qib/ +obj-$(CONFIG_INFINIBAND_EHCA) += ehca/ +obj-$(CONFIG_INFINIBAND_AMSO1100) += amso1100/ +obj-$(CONFIG_INFINIBAND_CXGB3) += cxgb3/ +obj-$(CONFIG_INFINIBAND_CXGB4) += cxgb4/ +obj-$(CONFIG_MLX4_INFINIBAND) += mlx4/ +obj-$(CONFIG_MLX5_INFINIBAND) += mlx5/ +obj-$(CONFIG_INFINIBAND_NES) += nes/ +obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma/ +obj-$(CONFIG_INFINIBAND_USNIC) += usnic/ diff --git a/drivers/infiniband/hw/amso1100/Kbuild b/drivers/infiniband/hw/amso1100/Kbuild index 06964c4af84..950dfabcd89 100644 --- a/drivers/infiniband/hw/amso1100/Kbuild +++ b/drivers/infiniband/hw/amso1100/Kbuild @@ -1,6 +1,4 @@ -ifdef CONFIG_INFINIBAND_AMSO1100_DEBUG -EXTRA_CFLAGS += -DDEBUG -endif +ccflags-$(CONFIG_INFINIBAND_AMSO1100_DEBUG) := -DDEBUG obj-$(CONFIG_INFINIBAND_AMSO1100) += iw_c2.o diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c index 113f3c03c5b..00400c352c1 100644 --- a/drivers/infiniband/hw/amso1100/c2.c +++ b/drivers/infiniband/hw/amso1100/c2.c @@ -36,6 +36,7 @@ #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/inetdevice.h> +#include <linux/interrupt.h> #include <linux/delay.h> #include <linux/ethtool.h> #include <linux/mii.h> @@ -46,6 +47,8 @@ #include <linux/tcp.h> #include <linux/init.h> #include <linux/dma-mapping.h> +#include <linux/slab.h> +#include <linux/prefetch.h> #include <asm/io.h> #include <asm/irq.h> @@ -76,7 +79,6 @@ static irqreturn_t c2_interrupt(int irq, void *dev_id); static void c2_tx_timeout(struct net_device *netdev); static int c2_change_mtu(struct net_device *netdev, int new_mtu); static void c2_reset(struct c2_port *c2_port); -static struct net_device_stats *c2_get_stats(struct net_device *netdev); static struct pci_device_id c2_pci_table[] = { { PCI_DEVICE(0x18b8, 0xb001) }, @@ -87,11 +89,7 @@ MODULE_DEVICE_TABLE(pci, c2_pci_table); static void c2_print_macaddr(struct net_device *netdev) { - pr_debug("%s: MAC %02X:%02X:%02X:%02X:%02X:%02X, " - "IRQ %u\n", netdev->name, - netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2], - netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5], - netdev->irq); + pr_debug("%s: MAC %pM, IRQ %u\n", netdev->name, netdev->dev_addr, netdev->irq); } static void c2_set_rxbufsize(struct c2_port *c2_port) @@ -349,7 +347,7 @@ static void c2_tx_clean(struct c2_port *c2_port) elem->hw_desc + C2_TXP_ADDR); __raw_writew((__force u16) cpu_to_be16(TXP_HTXD_DONE), elem->hw_desc + C2_TXP_FLAGS); - c2_port->netstats.tx_dropped++; + c2_port->netdev->stats.tx_dropped++; break; } else { __raw_writew(0, @@ -457,7 +455,7 @@ static void c2_rx_error(struct c2_port *c2_port, struct c2_element *elem) elem->hw_desc + C2_RXP_FLAGS); pr_debug("packet dropped\n"); - c2_port->netstats.rx_dropped++; + c2_port->netdev->stats.rx_dropped++; } static void c2_rx_interrupt(struct net_device *netdev) @@ -531,9 +529,8 @@ static void c2_rx_interrupt(struct net_device *netdev) netif_rx(skb); - netdev->last_rx = jiffies; - c2_port->netstats.rx_packets++; - c2_port->netstats.rx_bytes += buflen; + netdev->stats.rx_packets++; + netdev->stats.rx_bytes += buflen; } /* Save where we left off */ @@ -797,19 +794,16 @@ static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev) __raw_writew((__force u16) cpu_to_be16(TXP_HTXD_READY), elem->hw_desc + C2_TXP_FLAGS); - c2_port->netstats.tx_packets++; - c2_port->netstats.tx_bytes += maplen; + netdev->stats.tx_packets++; + netdev->stats.tx_bytes += maplen; /* Loop thru additional data fragments and queue them */ if (skb_shinfo(skb)->nr_frags) { for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - maplen = frag->size; - mapaddr = - pci_map_page(c2dev->pcidev, frag->page, - frag->page_offset, maplen, - PCI_DMA_TODEVICE); - + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + maplen = skb_frag_size(frag); + mapaddr = skb_frag_dma_map(&c2dev->pcidev->dev, frag, + 0, maplen, DMA_TO_DEVICE); elem = elem->next; elem->skb = NULL; elem->mapaddr = mapaddr; @@ -823,8 +817,8 @@ static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev) __raw_writew((__force u16) cpu_to_be16(TXP_HTXD_READY), elem->hw_desc + C2_TXP_FLAGS); - c2_port->netstats.tx_packets++; - c2_port->netstats.tx_bytes += maplen; + netdev->stats.tx_packets++; + netdev->stats.tx_bytes += maplen; } } @@ -845,13 +839,6 @@ static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev) return NETDEV_TX_OK; } -static struct net_device_stats *c2_get_stats(struct net_device *netdev) -{ - struct c2_port *c2_port = netdev_priv(netdev); - - return &c2_port->netstats; -} - static void c2_tx_timeout(struct net_device *netdev) { struct c2_port *c2_port = netdev_priv(netdev); @@ -880,6 +867,16 @@ static int c2_change_mtu(struct net_device *netdev, int new_mtu) return ret; } +static const struct net_device_ops c2_netdev = { + .ndo_open = c2_up, + .ndo_stop = c2_down, + .ndo_start_xmit = c2_xmit_frame, + .ndo_tx_timeout = c2_tx_timeout, + .ndo_change_mtu = c2_change_mtu, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, +}; + /* Initialize network device */ static struct net_device *c2_devinit(struct c2_dev *c2dev, void __iomem * mmio_addr) @@ -894,12 +891,7 @@ static struct net_device *c2_devinit(struct c2_dev *c2dev, SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev); - netdev->open = c2_up; - netdev->stop = c2_down; - netdev->hard_start_xmit = c2_xmit_frame; - netdev->get_stats = c2_get_stats; - netdev->tx_timeout = c2_tx_timeout; - netdev->change_mtu = c2_change_mtu; + netdev->netdev_ops = &c2_netdev; netdev->watchdog_timeo = C2_TX_TIMEOUT; netdev->irq = c2dev->pcidev->irq; @@ -928,8 +920,7 @@ static struct net_device *c2_devinit(struct c2_dev *c2dev, return netdev; } -static int __devinit c2_probe(struct pci_dev *pcidev, - const struct pci_device_id *ent) +static int c2_probe(struct pci_dev *pcidev, const struct pci_device_id *ent) { int ret = 0, i; unsigned long reg0_start, reg0_flags, reg0_len; @@ -992,13 +983,13 @@ static int __devinit c2_probe(struct pci_dev *pcidev, } if ((sizeof(dma_addr_t) > 4)) { - ret = pci_set_dma_mask(pcidev, DMA_64BIT_MASK); + ret = pci_set_dma_mask(pcidev, DMA_BIT_MASK(64)); if (ret < 0) { printk(KERN_ERR PFX "64b DMA configuration failed\n"); goto bail2; } } else { - ret = pci_set_dma_mask(pcidev, DMA_32BIT_MASK); + ret = pci_set_dma_mask(pcidev, DMA_BIT_MASK(32)); if (ret < 0) { printk(KERN_ERR PFX "32b DMA configuration failed\n"); goto bail2; @@ -1091,6 +1082,7 @@ static int __devinit c2_probe(struct pci_dev *pcidev, /* Initialize network device */ if ((netdev = c2_devinit(c2dev, mmio_regs)) == NULL) { + ret = -ENOMEM; iounmap(mmio_regs); goto bail4; } @@ -1160,7 +1152,8 @@ static int __devinit c2_probe(struct pci_dev *pcidev, goto bail10; } - if (c2_register_device(c2dev)) + ret = c2_register_device(c2dev); + if (ret) goto bail10; return 0; @@ -1199,7 +1192,7 @@ static int __devinit c2_probe(struct pci_dev *pcidev, return ret; } -static void __devexit c2_remove(struct pci_dev *pcidev) +static void c2_remove(struct pci_dev *pcidev) { struct c2_dev *c2dev = pci_get_drvdata(pcidev); struct net_device *netdev = c2dev->netdev; @@ -1244,18 +1237,7 @@ static struct pci_driver c2_pci_driver = { .name = DRV_NAME, .id_table = c2_pci_table, .probe = c2_probe, - .remove = __devexit_p(c2_remove), + .remove = c2_remove, }; -static int __init c2_init_module(void) -{ - return pci_register_driver(&c2_pci_driver); -} - -static void __exit c2_exit_module(void) -{ - pci_unregister_driver(&c2_pci_driver); -} - -module_init(c2_init_module); -module_exit(c2_exit_module); +module_pci_driver(c2_pci_driver); diff --git a/drivers/infiniband/hw/amso1100/c2.h b/drivers/infiniband/hw/amso1100/c2.h index d12a24a84fd..d619d735838 100644 --- a/drivers/infiniband/hw/amso1100/c2.h +++ b/drivers/infiniband/hw/amso1100/c2.h @@ -250,7 +250,7 @@ struct c2_array { struct sp_chunk { struct sp_chunk *next; dma_addr_t dma_addr; - DECLARE_PCI_UNMAP_ADDR(mapping); + DEFINE_DMA_UNMAP_ADDR(mapping); u16 head; u16 shared_ptr[0]; }; @@ -265,7 +265,6 @@ struct c2_pd_table { struct c2_qp_table { struct idr idr; spinlock_t lock; - int last; }; struct c2_element { @@ -369,8 +368,6 @@ struct c2_port { unsigned long mem_size; u32 rx_buf_size; - - struct net_device_stats netstats; }; /* @@ -500,16 +497,16 @@ extern int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, struct ib_send_wr **bad_wr); extern int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr, struct ib_recv_wr **bad_wr); -extern void __devinit c2_init_qp_table(struct c2_dev *c2dev); -extern void __devexit c2_cleanup_qp_table(struct c2_dev *c2dev); +extern void c2_init_qp_table(struct c2_dev *c2dev); +extern void c2_cleanup_qp_table(struct c2_dev *c2dev); extern void c2_set_qp_state(struct c2_qp *, int); extern struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn); /* PDs */ extern int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd); extern void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd); -extern int __devinit c2_init_pd_table(struct c2_dev *c2dev); -extern void __devexit c2_cleanup_pd_table(struct c2_dev *c2dev); +extern int c2_init_pd_table(struct c2_dev *c2dev); +extern void c2_cleanup_pd_table(struct c2_dev *c2dev); /* CQs */ extern int c2_init_cq(struct c2_dev *c2dev, int entries, diff --git a/drivers/infiniband/hw/amso1100/c2_ae.c b/drivers/infiniband/hw/amso1100/c2_ae.c index 62af74295db..cedda25232b 100644 --- a/drivers/infiniband/hw/amso1100/c2_ae.c +++ b/drivers/infiniband/hw/amso1100/c2_ae.c @@ -141,7 +141,7 @@ static const char *to_qp_state_str(int state) return "C2_QP_STATE_ERROR"; default: return "<invalid QP state>"; - }; + } } void c2_ae_event(struct c2_dev *c2dev, u32 mq_index) @@ -155,9 +155,11 @@ void c2_ae_event(struct c2_dev *c2dev, u32 mq_index) enum c2_event_id event_id; unsigned long flags; int status; + struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_event.local_addr; + struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_event.remote_addr; /* - * retreive the message + * retrieve the message */ wr = c2_mq_consume(mq); if (!wr) @@ -206,10 +208,10 @@ void c2_ae_event(struct c2_dev *c2dev, u32 mq_index) case CCAE_ACTIVE_CONNECT_RESULTS: res = &wr->ae.ae_active_connect_results; cm_event.event = IW_CM_EVENT_CONNECT_REPLY; - cm_event.local_addr.sin_addr.s_addr = res->laddr; - cm_event.remote_addr.sin_addr.s_addr = res->raddr; - cm_event.local_addr.sin_port = res->lport; - cm_event.remote_addr.sin_port = res->rport; + laddr->sin_addr.s_addr = res->laddr; + raddr->sin_addr.s_addr = res->raddr; + laddr->sin_port = res->lport; + raddr->sin_port = res->rport; if (status == 0) { cm_event.private_data_len = be32_to_cpu(res->private_data_length); @@ -281,13 +283,18 @@ void c2_ae_event(struct c2_dev *c2dev, u32 mq_index) } cm_event.event = IW_CM_EVENT_CONNECT_REQUEST; cm_event.provider_data = (void*)(unsigned long)req->cr_handle; - cm_event.local_addr.sin_addr.s_addr = req->laddr; - cm_event.remote_addr.sin_addr.s_addr = req->raddr; - cm_event.local_addr.sin_port = req->lport; - cm_event.remote_addr.sin_port = req->rport; + laddr->sin_addr.s_addr = req->laddr; + raddr->sin_addr.s_addr = req->raddr; + laddr->sin_port = req->lport; + raddr->sin_port = req->rport; cm_event.private_data_len = be32_to_cpu(req->private_data_length); cm_event.private_data = req->private_data; + /* + * Until ird/ord negotiation via MPAv2 support is added, send + * max supported values + */ + cm_event.ird = cm_event.ord = 128; if (cm_id->event_handler) cm_id->event_handler(cm_id, &cm_event); @@ -306,6 +313,7 @@ void c2_ae_event(struct c2_dev *c2dev, u32 mq_index) if (cq->ibcq.event_handler) cq->ibcq.event_handler(&ib_event, cq->ibcq.cq_context); + break; } default: diff --git a/drivers/infiniband/hw/amso1100/c2_alloc.c b/drivers/infiniband/hw/amso1100/c2_alloc.c index e9110163aef..78d247ec696 100644 --- a/drivers/infiniband/hw/amso1100/c2_alloc.c +++ b/drivers/infiniband/hw/amso1100/c2_alloc.c @@ -32,7 +32,6 @@ */ #include <linux/errno.h> -#include <linux/slab.h> #include <linux/bitmap.h> #include "c2.h" @@ -50,7 +49,7 @@ static int c2_alloc_mqsp_chunk(struct c2_dev *c2dev, gfp_t gfp_mask, return -ENOMEM; new_head->dma_addr = dma_addr; - pci_unmap_addr_set(new_head, mapping, new_head->dma_addr); + dma_unmap_addr_set(new_head, mapping, new_head->dma_addr); new_head->next = NULL; new_head->head = 0; @@ -82,7 +81,7 @@ void c2_free_mqsp_pool(struct c2_dev *c2dev, struct sp_chunk *root) while (root) { next = root->next; dma_free_coherent(&c2dev->pcidev->dev, PAGE_SIZE, root, - pci_unmap_addr(root, mapping)); + dma_unmap_addr(root, mapping)); root = next; } } diff --git a/drivers/infiniband/hw/amso1100/c2_cm.c b/drivers/infiniband/hw/amso1100/c2_cm.c index 75b93e9b881..23bfa94fbd4 100644 --- a/drivers/infiniband/hw/amso1100/c2_cm.c +++ b/drivers/infiniband/hw/amso1100/c2_cm.c @@ -31,6 +31,8 @@ * SOFTWARE. * */ +#include <linux/slab.h> + #include "c2.h" #include "c2_wr.h" #include "c2_vq.h" @@ -44,6 +46,10 @@ int c2_llp_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) struct c2wr_qp_connect_req *wr; /* variable size needs a malloc. */ struct c2_vq_req *vq_req; int err; + struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr; + + if (cm_id->remote_addr.ss_family != AF_INET) + return -ENOSYS; ibqp = c2_get_qp(cm_id->device, iw_param->qpn); if (!ibqp) @@ -89,8 +95,8 @@ int c2_llp_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) wr->rnic_handle = c2dev->adapter_handle; wr->qp_handle = qp->adapter_handle; - wr->remote_addr = cm_id->remote_addr.sin_addr.s_addr; - wr->remote_port = cm_id->remote_addr.sin_port; + wr->remote_addr = raddr->sin_addr.s_addr; + wr->remote_port = raddr->sin_port; /* * Move any private data from the callers's buf into @@ -133,6 +139,10 @@ int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog) struct c2wr_ep_listen_create_rep *reply; struct c2_vq_req *vq_req; int err; + struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr; + + if (cm_id->local_addr.ss_family != AF_INET) + return -ENOSYS; c2dev = to_c2dev(cm_id->device); if (c2dev == NULL) @@ -151,8 +161,8 @@ int c2_llp_service_create(struct iw_cm_id *cm_id, int backlog) c2_wr_set_id(&wr, CCWR_EP_LISTEN_CREATE); wr.hdr.context = (u64) (unsigned long) vq_req; wr.rnic_handle = c2dev->adapter_handle; - wr.local_addr = cm_id->local_addr.sin_addr.s_addr; - wr.local_port = cm_id->local_addr.sin_port; + wr.local_addr = laddr->sin_addr.s_addr; + wr.local_port = laddr->sin_port; wr.backlog = cpu_to_be32(backlog); wr.user_context = (u64) (unsigned long) cm_id; diff --git a/drivers/infiniband/hw/amso1100/c2_cq.c b/drivers/infiniband/hw/amso1100/c2_cq.c index bb17cce3cb5..49e0e8533f7 100644 --- a/drivers/infiniband/hw/amso1100/c2_cq.c +++ b/drivers/infiniband/hw/amso1100/c2_cq.c @@ -35,6 +35,8 @@ * SOFTWARE. * */ +#include <linux/gfp.h> + #include "c2.h" #include "c2_vq.h" #include "c2_status.h" @@ -133,7 +135,7 @@ static inline int c2_poll_one(struct c2_dev *c2dev, struct c2_qp *qp; int is_recv = 0; - ce = (struct c2wr_ce *) c2_mq_consume(&cq->mq); + ce = c2_mq_consume(&cq->mq); if (!ce) { return -EAGAIN; } @@ -146,7 +148,7 @@ static inline int c2_poll_one(struct c2_dev *c2dev, while ((qp = (struct c2_qp *) (unsigned long) ce->qp_user_context) == NULL) { c2_mq_free(&cq->mq); - ce = (struct c2wr_ce *) c2_mq_consume(&cq->mq); + ce = c2_mq_consume(&cq->mq); if (!ce) return -EAGAIN; } @@ -255,7 +257,7 @@ int c2_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) static void c2_free_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq) { dma_free_coherent(&c2dev->pcidev->dev, mq->q_size * mq->msg_size, - mq->msg_pool.host, pci_unmap_addr(mq, mapping)); + mq->msg_pool.host, dma_unmap_addr(mq, mapping)); } static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq, int q_size, @@ -276,7 +278,7 @@ static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq, int q_size, NULL, /* peer (currently unknown) */ C2_MQ_HOST_TARGET); - pci_unmap_addr_set(mq, mapping, mq->host_dma); + dma_unmap_addr_set(mq, mapping, mq->host_dma); return 0; } diff --git a/drivers/infiniband/hw/amso1100/c2_intr.c b/drivers/infiniband/hw/amso1100/c2_intr.c index 3b5095470cb..3a17d9b36db 100644 --- a/drivers/infiniband/hw/amso1100/c2_intr.c +++ b/drivers/infiniband/hw/amso1100/c2_intr.c @@ -62,8 +62,8 @@ void c2_rnic_interrupt(struct c2_dev *c2dev) static void handle_mq(struct c2_dev *c2dev, u32 mq_index) { if (c2dev->qptr_array[mq_index] == NULL) { - pr_debug(KERN_INFO "handle_mq: stray activity for mq_index=%d\n", - mq_index); + pr_debug("handle_mq: stray activity for mq_index=%d\n", + mq_index); return; } @@ -169,7 +169,8 @@ static void handle_vq(struct c2_dev *c2dev, u32 mq_index) * We should never get here, as the adapter should * never send us a reply that we're not expecting. */ - vq_repbuf_free(c2dev, host_msg); + if (reply_msg != NULL) + vq_repbuf_free(c2dev, host_msg); pr_debug("handle_vq: UNEXPECTEDLY got NULL req\n"); return; } @@ -183,6 +184,11 @@ static void handle_vq(struct c2_dev *c2dev, u32 mq_index) case IW_CM_EVENT_ESTABLISHED: c2_set_qp_state(req->qp, C2_QP_STATE_RTS); + /* + * Until ird/ord negotiation via MPAv2 support is added, send + * max supported values + */ + cm_event.ird = cm_event.ord = 128; case IW_CM_EVENT_CLOSE: /* diff --git a/drivers/infiniband/hw/amso1100/c2_mm.c b/drivers/infiniband/hw/amso1100/c2_mm.c index b506fe22b4d..119c4f3d979 100644 --- a/drivers/infiniband/hw/amso1100/c2_mm.c +++ b/drivers/infiniband/hw/amso1100/c2_mm.c @@ -30,6 +30,8 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#include <linux/slab.h> + #include "c2.h" #include "c2_vq.h" diff --git a/drivers/infiniband/hw/amso1100/c2_mq.h b/drivers/infiniband/hw/amso1100/c2_mq.h index acede007b94..fc1b9a7cec4 100644 --- a/drivers/infiniband/hw/amso1100/c2_mq.h +++ b/drivers/infiniband/hw/amso1100/c2_mq.h @@ -71,7 +71,7 @@ struct c2_mq { u8 __iomem *adapter; } msg_pool; dma_addr_t host_dma; - DECLARE_PCI_UNMAP_ADDR(mapping); + DEFINE_DMA_UNMAP_ADDR(mapping); u16 hint_count; u16 priv; struct c2_mq_shared __iomem *peer; diff --git a/drivers/infiniband/hw/amso1100/c2_pd.c b/drivers/infiniband/hw/amso1100/c2_pd.c index 00c709926c8..f3e81dc357b 100644 --- a/drivers/infiniband/hw/amso1100/c2_pd.c +++ b/drivers/infiniband/hw/amso1100/c2_pd.c @@ -34,6 +34,7 @@ */ #include <linux/init.h> +#include <linux/slab.h> #include <linux/errno.h> #include "c2.h" @@ -69,7 +70,7 @@ void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd) spin_unlock(&c2dev->pd_table.lock); } -int __devinit c2_init_pd_table(struct c2_dev *c2dev) +int c2_init_pd_table(struct c2_dev *c2dev) { c2dev->pd_table.last = 0; @@ -83,7 +84,7 @@ int __devinit c2_init_pd_table(struct c2_dev *c2dev) return 0; } -void __devexit c2_cleanup_pd_table(struct c2_dev *c2dev) +void c2_cleanup_pd_table(struct c2_dev *c2dev) { kfree(c2dev->pd_table.table); } diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c index 2acf9b62cf9..8af33cf1fc4 100644 --- a/drivers/infiniband/hw/amso1100/c2_provider.c +++ b/drivers/infiniband/hw/amso1100/c2_provider.c @@ -50,6 +50,7 @@ #include <linux/dma-mapping.h> #include <linux/if_arp.h> #include <linux/vmalloc.h> +#include <linux/slab.h> #include <asm/io.h> #include <asm/irq.h> @@ -93,19 +94,11 @@ static int c2_query_port(struct ib_device *ibdev, props->pkey_tbl_len = 1; props->qkey_viol_cntr = 0; props->active_width = 1; - props->active_speed = 1; + props->active_speed = IB_SPEED_SDR; return 0; } -static int c2_modify_port(struct ib_device *ibdev, - u8 port, int port_modify_mask, - struct ib_port_modify *props) -{ - pr_debug("%s:%u\n", __func__, __LINE__); - return 0; -} - static int c2_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey) { @@ -272,7 +265,6 @@ static struct ib_qp *c2_create_qp(struct ib_pd *pd, pr_debug("%s: Invalid QP type: %d\n", __func__, init_attr->qp_type); return ERR_PTR(-EINVAL); - break; } if (err) { @@ -439,9 +431,9 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 *pages; u64 kva = 0; int shift, n, len; - int i, j, k; + int i, k, entry; int err = 0; - struct ib_umem_chunk *chunk; + struct scatterlist *sg; struct c2_pd *c2pd = to_c2pd(pd); struct c2_mr *c2mr; @@ -460,10 +452,7 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } shift = ffs(c2mr->umem->page_size) - 1; - - n = 0; - list_for_each_entry(chunk, &c2mr->umem->chunk_list, list) - n += chunk->nents; + n = c2mr->umem->nmap; pages = kmalloc(n * sizeof(u64), GFP_KERNEL); if (!pages) { @@ -472,14 +461,12 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } i = 0; - list_for_each_entry(chunk, &c2mr->umem->chunk_list, list) { - for (j = 0; j < chunk->nmap; ++j) { - len = sg_dma_len(&chunk->page_list[j]) >> shift; - for (k = 0; k < len; ++k) { - pages[i++] = - sg_dma_address(&chunk->page_list[j]) + - (c2mr->umem->page_size * k); - } + for_each_sg(c2mr->umem->sg_head.sgl, sg, c2mr->umem->nmap, entry) { + len = sg_dma_len(sg) >> shift; + for (k = 0; k < len; ++k) { + pages[i++] = + sg_dma_address(sg) + + (c2mr->umem->page_size * k); } } @@ -654,7 +641,7 @@ static int c2_service_destroy(struct iw_cm_id *cm_id) static int c2_pseudo_up(struct net_device *netdev) { struct in_device *ind; - struct c2_dev *c2dev = netdev->priv; + struct c2_dev *c2dev = netdev->ml_priv; ind = in_dev_get(netdev); if (!ind) @@ -679,7 +666,7 @@ static int c2_pseudo_up(struct net_device *netdev) static int c2_pseudo_down(struct net_device *netdev) { struct in_device *ind; - struct c2_dev *c2dev = netdev->priv; + struct c2_dev *c2dev = netdev->ml_priv; ind = in_dev_get(netdev); if (!ind) @@ -709,26 +696,27 @@ static int c2_pseudo_xmit_frame(struct sk_buff *skb, struct net_device *netdev) static int c2_pseudo_change_mtu(struct net_device *netdev, int new_mtu) { - int ret = 0; - if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU) return -EINVAL; netdev->mtu = new_mtu; /* TODO: Tell rnic about new rmda interface mtu */ - return ret; + return 0; } +static const struct net_device_ops c2_pseudo_netdev_ops = { + .ndo_open = c2_pseudo_up, + .ndo_stop = c2_pseudo_down, + .ndo_start_xmit = c2_pseudo_xmit_frame, + .ndo_change_mtu = c2_pseudo_change_mtu, + .ndo_validate_addr = eth_validate_addr, +}; + static void setup(struct net_device *netdev) { - netdev->open = c2_pseudo_up; - netdev->stop = c2_pseudo_down; - netdev->hard_start_xmit = c2_pseudo_xmit_frame; - netdev->get_stats = NULL; - netdev->tx_timeout = NULL; - netdev->set_mac_address = NULL; - netdev->change_mtu = c2_pseudo_change_mtu; + netdev->netdev_ops = &c2_pseudo_netdev_ops; + netdev->watchdog_timeo = 0; netdev->type = ARPHRD_ETHER; netdev->mtu = 1500; @@ -736,7 +724,6 @@ static void setup(struct net_device *netdev) netdev->addr_len = ETH_ALEN; netdev->tx_queue_len = 0; netdev->flags |= IFF_NOARP; - return; } static struct net_device *c2_pseudo_netdev_init(struct c2_dev *c2dev) @@ -747,24 +734,21 @@ static struct net_device *c2_pseudo_netdev_init(struct c2_dev *c2dev) /* change ethxxx to iwxxx */ strcpy(name, "iw"); strcat(name, &c2dev->netdev->name[3]); - netdev = alloc_netdev(sizeof(*netdev), name, setup); + netdev = alloc_netdev(0, name, setup); if (!netdev) { printk(KERN_ERR PFX "%s - etherdev alloc failed", __func__); return NULL; } - netdev->priv = c2dev; + netdev->ml_priv = c2dev; SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev); memcpy_fromio(netdev->dev_addr, c2dev->kva + C2_REGS_RDMA_ENADDR, 6); /* Print out the MAC address */ - pr_debug("%s: MAC %02X:%02X:%02X:%02X:%02X:%02X\n", - netdev->name, - netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2], - netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5]); + pr_debug("%s: MAC %pM\n", netdev->name, netdev->dev_addr); #if 0 /* Disable network packets */ @@ -781,11 +765,11 @@ int c2_register_device(struct c2_dev *dev) /* Register pseudo network device */ dev->pseudo_netdev = c2_pseudo_netdev_init(dev); if (!dev->pseudo_netdev) - goto out3; + goto out; ret = register_netdev(dev->pseudo_netdev); if (ret) - goto out2; + goto out_free_netdev; pr_debug("%s:%u\n", __func__, __LINE__); strlcpy(dev->ibdev.name, "amso%d", IB_DEVICE_NAME_MAX); @@ -817,7 +801,6 @@ int c2_register_device(struct c2_dev *dev) dev->ibdev.dma_device = &dev->pcidev->dev; dev->ibdev.query_device = c2_query_device; dev->ibdev.query_port = c2_query_port; - dev->ibdev.modify_port = c2_modify_port; dev->ibdev.query_pkey = c2_query_pkey; dev->ibdev.query_gid = c2_query_gid; dev->ibdev.alloc_ucontext = c2_alloc_ucontext; @@ -852,6 +835,10 @@ int c2_register_device(struct c2_dev *dev) dev->ibdev.post_recv = c2_post_receive; dev->ibdev.iwcm = kmalloc(sizeof(*dev->ibdev.iwcm), GFP_KERNEL); + if (dev->ibdev.iwcm == NULL) { + ret = -ENOMEM; + goto out_unregister_netdev; + } dev->ibdev.iwcm->add_ref = c2_add_ref; dev->ibdev.iwcm->rem_ref = c2_rem_ref; dev->ibdev.iwcm->get_qp = c2_get_qp; @@ -861,25 +848,27 @@ int c2_register_device(struct c2_dev *dev) dev->ibdev.iwcm->create_listen = c2_service_create; dev->ibdev.iwcm->destroy_listen = c2_service_destroy; - ret = ib_register_device(&dev->ibdev); + ret = ib_register_device(&dev->ibdev, NULL); if (ret) - goto out1; + goto out_free_iwcm; for (i = 0; i < ARRAY_SIZE(c2_dev_attributes); ++i) { ret = device_create_file(&dev->ibdev.dev, c2_dev_attributes[i]); if (ret) - goto out0; + goto out_unregister_ibdev; } - goto out3; + goto out; -out0: +out_unregister_ibdev: ib_unregister_device(&dev->ibdev); -out1: +out_free_iwcm: + kfree(dev->ibdev.iwcm); +out_unregister_netdev: unregister_netdev(dev->pseudo_netdev); -out2: +out_free_netdev: free_netdev(dev->pseudo_netdev); -out3: +out: pr_debug("%s:%u ret=%d\n", __func__, __LINE__, ret); return ret; } diff --git a/drivers/infiniband/hw/amso1100/c2_provider.h b/drivers/infiniband/hw/amso1100/c2_provider.h index 1076df2ee96..bf189987711 100644 --- a/drivers/infiniband/hw/amso1100/c2_provider.h +++ b/drivers/infiniband/hw/amso1100/c2_provider.h @@ -50,7 +50,7 @@ struct c2_buf_list { void *buf; - DECLARE_PCI_UNMAP_ADDR(mapping) + DEFINE_DMA_UNMAP_ADDR(mapping); }; diff --git a/drivers/infiniband/hw/amso1100/c2_qp.c b/drivers/infiniband/hw/amso1100/c2_qp.c index a6d89440ad2..86708dee58b 100644 --- a/drivers/infiniband/hw/amso1100/c2_qp.c +++ b/drivers/infiniband/hw/amso1100/c2_qp.c @@ -36,6 +36,7 @@ */ #include <linux/delay.h> +#include <linux/gfp.h> #include "c2.h" #include "c2_vq.h" @@ -381,14 +382,16 @@ static int c2_alloc_qpn(struct c2_dev *c2dev, struct c2_qp *qp) { int ret; - do { - spin_lock_irq(&c2dev->qp_table.lock); - ret = idr_get_new_above(&c2dev->qp_table.idr, qp, - c2dev->qp_table.last++, &qp->qpn); - spin_unlock_irq(&c2dev->qp_table.lock); - } while ((ret == -EAGAIN) && - idr_pre_get(&c2dev->qp_table.idr, GFP_KERNEL)); - return ret; + idr_preload(GFP_KERNEL); + spin_lock_irq(&c2dev->qp_table.lock); + + ret = idr_alloc_cyclic(&c2dev->qp_table.idr, qp, 0, 0, GFP_NOWAIT); + if (ret >= 0) + qp->qpn = ret; + + spin_unlock_irq(&c2dev->qp_table.lock); + idr_preload_end(); + return ret < 0 ? ret : 0; } static void c2_free_qpn(struct c2_dev *c2dev, int qpn) @@ -611,7 +614,7 @@ void c2_free_qp(struct c2_dev *c2dev, struct c2_qp *qp) c2_unlock_cqs(send_cq, recv_cq); /* - * Destory qp in the rnic... + * Destroy qp in the rnic... */ destroy_qp(c2dev, qp); @@ -798,8 +801,10 @@ int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, u8 actual_sge_count; u32 msg_size; - if (qp->state > IB_QPS_RTS) - return -EINVAL; + if (qp->state > IB_QPS_RTS) { + err = -EINVAL; + goto out; + } while (ib_wr) { @@ -930,6 +935,7 @@ int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, ib_wr = ib_wr->next; } +out: if (err) *bad_wr = ib_wr; return err; @@ -944,8 +950,10 @@ int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr, unsigned long lock_flags; int err = 0; - if (qp->state > IB_QPS_RTS) - return -EINVAL; + if (qp->state > IB_QPS_RTS) { + err = -EINVAL; + goto out; + } /* * Try and post each work request @@ -998,18 +1006,19 @@ int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr, ib_wr = ib_wr->next; } +out: if (err) *bad_wr = ib_wr; return err; } -void __devinit c2_init_qp_table(struct c2_dev *c2dev) +void c2_init_qp_table(struct c2_dev *c2dev) { spin_lock_init(&c2dev->qp_table.lock); idr_init(&c2dev->qp_table.idr); } -void __devexit c2_cleanup_qp_table(struct c2_dev *c2dev) +void c2_cleanup_qp_table(struct c2_dev *c2dev) { idr_destroy(&c2dev->qp_table.idr); } diff --git a/drivers/infiniband/hw/amso1100/c2_rnic.c b/drivers/infiniband/hw/amso1100/c2_rnic.c index dd05c483564..d2a6d961344 100644 --- a/drivers/infiniband/hw/amso1100/c2_rnic.c +++ b/drivers/infiniband/hw/amso1100/c2_rnic.c @@ -51,6 +51,7 @@ #include <linux/mm.h> #include <linux/inet.h> #include <linux/vmalloc.h> +#include <linux/slab.h> #include <linux/route.h> @@ -438,10 +439,10 @@ static int c2_rnic_close(struct c2_dev *c2dev) /* * Called by c2_probe to initialize the RNIC. This principally - * involves initalizing the various limits and resouce pools that + * involves initializing the various limits and resource pools that * comprise the RNIC instance. */ -int __devinit c2_rnic_init(struct c2_dev *c2dev) +int c2_rnic_init(struct c2_dev *c2dev) { int err; u32 qsize, msgsize; @@ -458,13 +459,12 @@ int __devinit c2_rnic_init(struct c2_dev *c2dev) IB_DEVICE_MEM_WINDOW); /* Allocate the qptr_array */ - c2dev->qptr_array = vmalloc(C2_MAX_CQS * sizeof(void *)); + c2dev->qptr_array = vzalloc(C2_MAX_CQS * sizeof(void *)); if (!c2dev->qptr_array) { return -ENOMEM; } - /* Inialize the qptr_array */ - memset(c2dev->qptr_array, 0, C2_MAX_CQS * sizeof(void *)); + /* Initialize the qptr_array */ c2dev->qptr_array[0] = (void *) &c2dev->req_vq; c2dev->qptr_array[1] = (void *) &c2dev->rep_vq; c2dev->qptr_array[2] = (void *) &c2dev->aeq; @@ -523,7 +523,7 @@ int __devinit c2_rnic_init(struct c2_dev *c2dev) err = -ENOMEM; goto bail1; } - pci_unmap_addr_set(&c2dev->rep_vq, mapping, c2dev->rep_vq.host_dma); + dma_unmap_addr_set(&c2dev->rep_vq, mapping, c2dev->rep_vq.host_dma); pr_debug("%s rep_vq va %p dma %llx\n", __func__, q1_pages, (unsigned long long) c2dev->rep_vq.host_dma); c2_mq_rep_init(&c2dev->rep_vq, @@ -544,7 +544,7 @@ int __devinit c2_rnic_init(struct c2_dev *c2dev) err = -ENOMEM; goto bail2; } - pci_unmap_addr_set(&c2dev->aeq, mapping, c2dev->aeq.host_dma); + dma_unmap_addr_set(&c2dev->aeq, mapping, c2dev->aeq.host_dma); pr_debug("%s aeq va %p dma %llx\n", __func__, q2_pages, (unsigned long long) c2dev->aeq.host_dma); c2_mq_rep_init(&c2dev->aeq, @@ -576,7 +576,8 @@ int __devinit c2_rnic_init(struct c2_dev *c2dev) goto bail4; /* Initialize cached the adapter limits */ - if (c2_rnic_query(c2dev, &c2dev->props)) + err = c2_rnic_query(c2dev, &c2dev->props); + if (err) goto bail5; /* Initialize the PD pool */ @@ -595,11 +596,11 @@ int __devinit c2_rnic_init(struct c2_dev *c2dev) bail3: dma_free_coherent(&c2dev->pcidev->dev, c2dev->aeq.q_size * c2dev->aeq.msg_size, - q2_pages, pci_unmap_addr(&c2dev->aeq, mapping)); + q2_pages, dma_unmap_addr(&c2dev->aeq, mapping)); bail2: dma_free_coherent(&c2dev->pcidev->dev, c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size, - q1_pages, pci_unmap_addr(&c2dev->rep_vq, mapping)); + q1_pages, dma_unmap_addr(&c2dev->rep_vq, mapping)); bail1: c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool); bail0: @@ -611,7 +612,7 @@ int __devinit c2_rnic_init(struct c2_dev *c2dev) /* * Called by c2_remove to cleanup the RNIC resources. */ -void __devexit c2_rnic_term(struct c2_dev *c2dev) +void c2_rnic_term(struct c2_dev *c2dev) { /* Close the open adapter instance */ @@ -636,13 +637,13 @@ void __devexit c2_rnic_term(struct c2_dev *c2dev) dma_free_coherent(&c2dev->pcidev->dev, c2dev->aeq.q_size * c2dev->aeq.msg_size, c2dev->aeq.msg_pool.host, - pci_unmap_addr(&c2dev->aeq, mapping)); + dma_unmap_addr(&c2dev->aeq, mapping)); /* Free the verbs reply queue */ dma_free_coherent(&c2dev->pcidev->dev, c2dev->rep_vq.q_size * c2dev->rep_vq.msg_size, c2dev->rep_vq.msg_pool.host, - pci_unmap_addr(&c2dev->rep_vq, mapping)); + dma_unmap_addr(&c2dev->rep_vq, mapping)); /* Free the MQ shared pointer pool */ c2_free_mqsp_pool(c2dev, c2dev->kern_mqsp_pool); diff --git a/drivers/infiniband/hw/amso1100/c2_vq.c b/drivers/infiniband/hw/amso1100/c2_vq.c index 9ce7819b7b2..2ec716fb2ed 100644 --- a/drivers/infiniband/hw/amso1100/c2_vq.c +++ b/drivers/infiniband/hw/amso1100/c2_vq.c @@ -107,7 +107,7 @@ struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev) r = kmalloc(sizeof(struct c2_vq_req), GFP_KERNEL); if (r) { init_waitqueue_head(&r->wait_object); - r->reply_msg = (u64) NULL; + r->reply_msg = 0; r->event = 0; r->cm_id = NULL; r->qp = NULL; @@ -123,7 +123,7 @@ struct c2_vq_req *vq_req_alloc(struct c2_dev *c2dev) */ void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *r) { - r->reply_msg = (u64) NULL; + r->reply_msg = 0; if (atomic_dec_and_test(&r->refcnt)) { kfree(r); } @@ -151,7 +151,7 @@ void vq_req_get(struct c2_dev *c2dev, struct c2_vq_req *r) void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *r) { if (atomic_dec_and_test(&r->refcnt)) { - if (r->reply_msg != (u64) NULL) + if (r->reply_msg != 0) vq_repbuf_free(c2dev, (void *) (unsigned long) r->reply_msg); kfree(r); diff --git a/drivers/infiniband/hw/amso1100/c2_wr.h b/drivers/infiniband/hw/amso1100/c2_wr.h index c65fbdd6e46..8d4b4ca463c 100644 --- a/drivers/infiniband/hw/amso1100/c2_wr.h +++ b/drivers/infiniband/hw/amso1100/c2_wr.h @@ -131,7 +131,7 @@ enum c2wr_ids { * All the preceding IDs are fixed, and must not change. * You can add new IDs, but must not remove or reorder * any IDs. If you do, YOU will ruin any hope of - * compatability between versions. + * compatibility between versions. */ CCWR_LAST, @@ -242,7 +242,7 @@ enum c2_acf { /* * to fix bug 1815 we define the max size allowable of the * terminate message (per the IETF spec).Refer to the IETF - * protocal specification, section 12.1.6, page 64) + * protocol specification, section 12.1.6, page 64) * The message is prefixed by 20 types of DDP info. * * Then the message has 6 bytes for the terminate control diff --git a/drivers/infiniband/hw/cxgb3/Kconfig b/drivers/infiniband/hw/cxgb3/Kconfig index 2acec3fadf6..2b6352b8548 100644 --- a/drivers/infiniband/hw/cxgb3/Kconfig +++ b/drivers/infiniband/hw/cxgb3/Kconfig @@ -10,7 +10,7 @@ config INFINIBAND_CXGB3 our website at <http://www.chelsio.com>. For customer support, please visit our customer support page at - <http://www.chelsio.com/support.htm>. + <http://www.chelsio.com/support.html>. Please send feedback to <linux-bugs@chelsio.com>. diff --git a/drivers/infiniband/hw/cxgb3/Makefile b/drivers/infiniband/hw/cxgb3/Makefile index 7e7b5a66f04..2761364185a 100644 --- a/drivers/infiniband/hw/cxgb3/Makefile +++ b/drivers/infiniband/hw/cxgb3/Makefile @@ -1,10 +1,8 @@ -EXTRA_CFLAGS += -Idrivers/net/cxgb3 +ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb3 obj-$(CONFIG_INFINIBAND_CXGB3) += iw_cxgb3.o iw_cxgb3-y := iwch_cm.o iwch_ev.o iwch_cq.o iwch_qp.o iwch_mem.o \ iwch_provider.o iwch.o cxio_hal.o cxio_resource.o -ifdef CONFIG_INFINIBAND_CXGB3_DEBUG -EXTRA_CFLAGS += -DDEBUG -endif +ccflags-$(CONFIG_INFINIBAND_CXGB3_DEBUG) += -DDEBUG diff --git a/drivers/infiniband/hw/cxgb3/cxio_dbg.c b/drivers/infiniband/hw/cxgb3/cxio_dbg.c index a8d24d53f30..8bca6b4ec9a 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_dbg.c +++ b/drivers/infiniband/hw/cxgb3/cxio_dbg.c @@ -31,6 +31,7 @@ */ #ifdef DEBUG #include <linux/types.h> +#include <linux/slab.h> #include "common.h" #include "cxgb3_ioctl.h" #include "cxio_hal.h" diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index f6d5747153a..de1c61b417d 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -37,6 +37,7 @@ #include <linux/spinlock.h> #include <linux/pci.h> #include <linux/dma-mapping.h> +#include <linux/slab.h> #include <net/net_namespace.h> #include "cxio_resource.h" @@ -109,7 +110,6 @@ int cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq, while (!CQ_VLD_ENTRY(rptr, cq->size_log2, cqe)) { udelay(1); if (i++ > 1000000) { - BUG_ON(1); printk(KERN_ERR "%s: stalled rnic\n", rdev_p->dev_name); return -EIO; @@ -152,29 +152,30 @@ static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid) sge_cmd = qpid << 8 | 3; wqe->sge_cmd = cpu_to_be64(sge_cmd); skb->priority = CPL_PRIORITY_CONTROL; - return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb)); + return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb); } -int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq) +int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq, int kernel) { struct rdma_cq_setup setup; int size = (1UL << (cq->size_log2)) * sizeof(struct t3_cqe); + size += 1; /* one extra page for storing cq-in-err state */ cq->cqid = cxio_hal_get_cqid(rdev_p->rscp); if (!cq->cqid) return -ENOMEM; - cq->sw_queue = kzalloc(size, GFP_KERNEL); - if (!cq->sw_queue) - return -ENOMEM; - cq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev), - (1UL << (cq->size_log2)) * - sizeof(struct t3_cqe), + if (kernel) { + cq->sw_queue = kzalloc(size, GFP_KERNEL); + if (!cq->sw_queue) + return -ENOMEM; + } + cq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev), size, &(cq->dma_addr), GFP_KERNEL); if (!cq->queue) { kfree(cq->sw_queue); return -ENOMEM; } - pci_unmap_addr_set(cq, mapping, cq->dma_addr); + dma_unmap_addr_set(cq, mapping, cq->dma_addr); memset(cq->queue, 0, size); setup.id = cq->cqid; setup.base_addr = (u64) (cq->dma_addr); @@ -188,6 +189,7 @@ int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq) return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup)); } +#ifdef notyet int cxio_resize_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq) { struct rdma_cq_setup setup; @@ -199,6 +201,7 @@ int cxio_resize_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq) setup.ovfl_mode = 1; return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup)); } +#endif static u32 get_qpid(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx) { @@ -297,7 +300,7 @@ int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain, goto err4; memset(wq->queue, 0, depth * sizeof(union t3_wr)); - pci_unmap_addr_set(wq, mapping, wq->dma_addr); + dma_unmap_addr_set(wq, mapping, wq->dma_addr); wq->doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr; if (!kernel_domain) wq->udb = (u64)rdev_p->rnic_info.udbell_physbase + @@ -325,7 +328,7 @@ int cxio_destroy_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq) dma_free_coherent(&(rdev_p->rnic_info.pdev->dev), (1UL << (cq->size_log2)) * sizeof(struct t3_cqe), cq->queue, - pci_unmap_addr(cq, mapping)); + dma_unmap_addr(cq, mapping)); cxio_hal_put_cqid(rdev_p->rscp, cq->cqid); return err; } @@ -336,7 +339,7 @@ int cxio_destroy_qp(struct cxio_rdev *rdev_p, struct t3_wq *wq, dma_free_coherent(&(rdev_p->rnic_info.pdev->dev), (1UL << (wq->size_log2)) * sizeof(union t3_wr), wq->queue, - pci_unmap_addr(wq, mapping)); + dma_unmap_addr(wq, mapping)); kfree(wq->sq); cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, (1UL << wq->rq_size_log2)); kfree(wq->rq); @@ -410,6 +413,7 @@ int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count) ptr = wq->sq_rptr + count; sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); while (ptr != wq->sq_wptr) { + sqp->signaled = 0; insert_sq_cqe(wq, cq, sqp); ptr++; sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); @@ -450,7 +454,7 @@ static int cqe_completes_wr(struct t3_cqe *cqe, struct t3_wq *wq) if ((CQE_OPCODE(*cqe) == T3_READ_RESP) && SQ_TYPE(*cqe)) return 0; - if ((CQE_OPCODE(*cqe) == T3_SEND) && RQ_TYPE(*cqe) && + if (CQE_SEND_OPCODE(*cqe) && RQ_TYPE(*cqe) && Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) return 0; @@ -536,7 +540,7 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p) err = -ENOMEM; goto err; } - pci_unmap_addr_set(&rdev_p->ctrl_qp, mapping, + dma_unmap_addr_set(&rdev_p->ctrl_qp, mapping, rdev_p->ctrl_qp.dma_addr); rdev_p->ctrl_qp.doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr; memset(rdev_p->ctrl_qp.workq, 0, @@ -571,7 +575,7 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p) (unsigned long long) rdev_p->ctrl_qp.dma_addr, rdev_p->ctrl_qp.workq, 1 << T3_CTRL_QP_SIZE_LOG2); skb->priority = CPL_PRIORITY_CONTROL; - return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb)); + return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb); err: kfree_skb(skb); return err; @@ -582,13 +586,13 @@ static int cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p) dma_free_coherent(&(rdev_p->rnic_info.pdev->dev), (1UL << T3_CTRL_QP_SIZE_LOG2) * sizeof(union t3_wr), rdev_p->ctrl_qp.workq, - pci_unmap_addr(&rdev_p->ctrl_qp, mapping)); + dma_unmap_addr(&rdev_p->ctrl_qp, mapping)); return cxio_hal_clear_qp_ctx(rdev_p, T3_CTRL_QP_ID); } /* write len bytes of data into addr (32B aligned address) * If data is NULL, clear len byte of memory to zero. - * caller aquires the ctrl_qp lock before the call + * caller acquires the ctrl_qp lock before the call */ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr, u32 len, void *data) @@ -701,6 +705,9 @@ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry, u32 stag_idx; u32 wptr; + if (cxio_fatal_error(rdev_p)) + return -EIO; + stag_state = stag_state > 0; stag_idx = (*stag) >> 8; @@ -725,17 +732,15 @@ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry, V_TPT_STAG_TYPE(type) | V_TPT_PDID(pdid)); BUG_ON(page_size >= 28); tpt.flags_pagesize_qpid = cpu_to_be32(V_TPT_PERM(perm) | - F_TPT_MW_BIND_ENABLE | - V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) | - V_TPT_PAGE_SIZE(page_size)); - tpt.rsvd_pbl_addr = reset_tpt_entry ? 0 : - cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3)); + ((perm & TPT_MW_BIND) ? F_TPT_MW_BIND_ENABLE : 0) | + V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) | + V_TPT_PAGE_SIZE(page_size)); + tpt.rsvd_pbl_addr = cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3)); tpt.len = cpu_to_be32(len); tpt.va_hi = cpu_to_be32((u32) (to >> 32)); tpt.va_low_or_fbo = cpu_to_be32((u32) (to & 0xFFFFFFFFULL)); tpt.rsvd_bind_cnt_or_pstag = 0; - tpt.rsvd_pbl_size = reset_tpt_entry ? 0 : - cpu_to_be32(V_TPT_PBL_SIZE(pbl_size >> 2)); + tpt.rsvd_pbl_size = cpu_to_be32(V_TPT_PBL_SIZE(pbl_size >> 2)); } err = cxio_hal_ctrl_qp_write_mem(rdev_p, stag_idx + @@ -848,14 +853,16 @@ int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr) wqe->qpcaps = attr->qpcaps; wqe->ulpdu_size = cpu_to_be16(attr->tcp_emss); wqe->rqe_count = cpu_to_be16(attr->rqe_count); - wqe->flags_rtr_type = cpu_to_be16(attr->flags|V_RTR_TYPE(attr->rtr_type)); + wqe->flags_rtr_type = cpu_to_be16(attr->flags | + V_RTR_TYPE(attr->rtr_type) | + V_CHAN(attr->chan)); wqe->ord = cpu_to_be32(attr->ord); wqe->ird = cpu_to_be32(attr->ird); wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr); wqe->qp_dma_size = cpu_to_be32(attr->qp_dma_size); wqe->irs = cpu_to_be32(attr->irs); skb->priority = 0; /* 0=>ToeQ; 1=>CtrlQ */ - return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb)); + return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb); } void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb) @@ -938,6 +945,23 @@ int cxio_rdev_open(struct cxio_rdev *rdev_p) if (!rdev_p->t3cdev_p) rdev_p->t3cdev_p = dev2t3cdev(netdev_p); rdev_p->t3cdev_p->ulp = (void *) rdev_p; + + err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_EMBEDDED_INFO, + &(rdev_p->fw_info)); + if (err) { + printk(KERN_ERR "%s t3cdev_p(%p)->ctl returned error %d.\n", + __func__, rdev_p->t3cdev_p, err); + goto err1; + } + if (G_FW_VERSION_MAJOR(rdev_p->fw_info.fw_vers) != CXIO_FW_MAJ) { + printk(KERN_ERR MOD "fatal firmware version mismatch: " + "need version %u but adapter has version %u\n", + CXIO_FW_MAJ, + G_FW_VERSION_MAJOR(rdev_p->fw_info.fw_vers)); + err = -EINVAL; + goto err1; + } + err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_GET_PARAMS, &(rdev_p->rnic_info)); if (err) { @@ -1011,6 +1035,7 @@ err3: err2: cxio_hal_destroy_ctrl_qp(rdev_p); err1: + rdev_p->t3cdev_p->ulp = NULL; list_del(&rdev_p->entry); return err; } @@ -1021,9 +1046,9 @@ void cxio_rdev_close(struct cxio_rdev *rdev_p) cxio_hal_pblpool_destroy(rdev_p); cxio_hal_rqtpool_destroy(rdev_p); list_del(&rdev_p->entry); - rdev_p->t3cdev_p->ulp = NULL; cxio_hal_destroy_ctrl_qp(rdev_p); cxio_hal_destroy_resource(rdev_p->rscp); + rdev_p->t3cdev_p->ulp = NULL; } } @@ -1204,11 +1229,12 @@ int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe, } /* incoming SEND with no receive posted failures */ - if ((CQE_OPCODE(*hw_cqe) == T3_SEND) && RQ_TYPE(*hw_cqe) && + if (CQE_SEND_OPCODE(*hw_cqe) && RQ_TYPE(*hw_cqe) && Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) { ret = -1; goto skip_cqe; } + BUG_ON((*cqe_flushed == 0) && !SW_CQE(*hw_cqe)); goto proc_cqe; } @@ -1223,6 +1249,13 @@ int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe, * then we complete this with TPT_ERR_MSN and mark the wq in * error. */ + + if (Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) { + wq->error = 1; + ret = -1; + goto skip_cqe; + } + if (unlikely((CQE_WRID_MSN(*hw_cqe) != (wq->rq_rptr + 1)))) { wq->error = 1; hw_cqe->header |= htonl(V_CQE_STATUS(TPT_ERR_MSN)); @@ -1277,6 +1310,7 @@ proc_cqe: cxio_hal_pblpool_free(wq->rdev, wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].pbl_addr, T3_STAG0_PBL_SIZE); + BUG_ON(Q_EMPTY(wq->rq_rptr, wq->rq_wptr)); wq->rq_rptr++; } diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h index 656fe47bc84..78fbe9ffe7f 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.h +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h @@ -34,6 +34,7 @@ #include <linux/list.h> #include <linux/mutex.h> +#include <linux/kfifo.h> #include "t3_cpl.h" #include "t3cdev.h" @@ -52,7 +53,7 @@ #define T3_MAX_PBL_SIZE 256 #define T3_MAX_RQ_SIZE 1024 #define T3_MAX_QP_DEPTH (T3_MAX_RQ_SIZE-1) -#define T3_MAX_CQ_DEPTH 8192 +#define T3_MAX_CQ_DEPTH 65536 #define T3_MAX_NUM_STAG (1<<15) #define T3_MAX_MR_SIZE 0x100000000ULL #define T3_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */ @@ -61,6 +62,8 @@ #define T3_MAX_DEV_NAME_LEN 32 +#define CXIO_FW_MAJ 7 + struct cxio_hal_ctrl_qp { u32 wptr; u32 rptr; @@ -68,18 +71,18 @@ struct cxio_hal_ctrl_qp { wait_queue_head_t waitq;/* wait for RspQ/CQE msg */ union t3_wr *workq; /* the work request queue */ dma_addr_t dma_addr; /* pci bus address of the workq */ - DECLARE_PCI_UNMAP_ADDR(mapping) + DEFINE_DMA_UNMAP_ADDR(mapping); void __iomem *doorbell; }; struct cxio_hal_resource { - struct kfifo *tpt_fifo; + struct kfifo tpt_fifo; spinlock_t tpt_fifo_lock; - struct kfifo *qpid_fifo; + struct kfifo qpid_fifo; spinlock_t qpid_fifo_lock; - struct kfifo *cqid_fifo; + struct kfifo cqid_fifo; spinlock_t cqid_fifo_lock; - struct kfifo *pdid_fifo; + struct kfifo pdid_fifo; spinlock_t pdid_fifo_lock; }; @@ -108,8 +111,16 @@ struct cxio_rdev { struct gen_pool *pbl_pool; struct gen_pool *rqt_pool; struct list_head entry; + struct ch_embedded_info fw_info; + u32 flags; +#define CXIO_ERROR_FATAL 1 }; +static inline int cxio_fatal_error(struct cxio_rdev *rdev_p) +{ + return rdev_p->flags & CXIO_ERROR_FATAL; +} + static inline int cxio_num_stags(struct cxio_rdev *rdev_p) { return min((int)T3_MAX_NUM_STAG, (int)((rdev_p->rnic_info.tpt_top - rdev_p->rnic_info.tpt_base) >> 5)); @@ -146,7 +157,7 @@ int cxio_rdev_open(struct cxio_rdev *rdev); void cxio_rdev_close(struct cxio_rdev *rdev); int cxio_hal_cq_op(struct cxio_rdev *rdev, struct t3_cq *cq, enum t3_cq_opcode op, u32 credit); -int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq); +int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq, int kernel); int cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq); int cxio_resize_cq(struct cxio_rdev *rdev, struct t3_cq *cq); void cxio_release_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx); @@ -183,6 +194,7 @@ void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count); void cxio_flush_hw_cq(struct t3_cq *cq); int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe, u8 *cqe_flushed, u64 *cookie, u32 *credit); +int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb); #define MOD "iw_cxgb3: " #define PDBG(fmt, args...) pr_debug(MOD fmt, ## args) diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c index bd233c08765..c40088ecf9f 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_resource.c +++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c @@ -39,12 +39,12 @@ #include "cxio_resource.h" #include "cxio_hal.h" -static struct kfifo *rhdl_fifo; +static struct kfifo rhdl_fifo; static spinlock_t rhdl_fifo_lock; #define RANDOM_SIZE 16 -static int __cxio_init_resource_fifo(struct kfifo **fifo, +static int __cxio_init_resource_fifo(struct kfifo *fifo, spinlock_t *fifo_lock, u32 nr, u32 skip_low, u32 skip_high, @@ -55,50 +55,51 @@ static int __cxio_init_resource_fifo(struct kfifo **fifo, u32 rarray[16]; spin_lock_init(fifo_lock); - *fifo = kfifo_alloc(nr * sizeof(u32), GFP_KERNEL, fifo_lock); - if (IS_ERR(*fifo)) + if (kfifo_alloc(fifo, nr * sizeof(u32), GFP_KERNEL)) return -ENOMEM; for (i = 0; i < skip_low + skip_high; i++) - __kfifo_put(*fifo, (unsigned char *) &entry, sizeof(u32)); + kfifo_in(fifo, (unsigned char *) &entry, sizeof(u32)); if (random) { j = 0; - random_bytes = random32(); + random_bytes = prandom_u32(); for (i = 0; i < RANDOM_SIZE; i++) rarray[i] = i + skip_low; for (i = skip_low + RANDOM_SIZE; i < nr - skip_high; i++) { if (j >= RANDOM_SIZE) { j = 0; - random_bytes = random32(); + random_bytes = prandom_u32(); } idx = (random_bytes >> (j * 2)) & 0xF; - __kfifo_put(*fifo, + kfifo_in(fifo, (unsigned char *) &rarray[idx], sizeof(u32)); rarray[idx] = i; j++; } for (i = 0; i < RANDOM_SIZE; i++) - __kfifo_put(*fifo, + kfifo_in(fifo, (unsigned char *) &rarray[i], sizeof(u32)); } else for (i = skip_low; i < nr - skip_high; i++) - __kfifo_put(*fifo, (unsigned char *) &i, sizeof(u32)); + kfifo_in(fifo, (unsigned char *) &i, sizeof(u32)); for (i = 0; i < skip_low + skip_high; i++) - kfifo_get(*fifo, (unsigned char *) &entry, sizeof(u32)); + if (kfifo_out_locked(fifo, (unsigned char *) &entry, + sizeof(u32), fifo_lock) != sizeof(u32)) + break; return 0; } -static int cxio_init_resource_fifo(struct kfifo **fifo, spinlock_t * fifo_lock, +static int cxio_init_resource_fifo(struct kfifo *fifo, spinlock_t * fifo_lock, u32 nr, u32 skip_low, u32 skip_high) { return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low, skip_high, 0)); } -static int cxio_init_resource_fifo_random(struct kfifo **fifo, +static int cxio_init_resource_fifo_random(struct kfifo *fifo, spinlock_t * fifo_lock, u32 nr, u32 skip_low, u32 skip_high) { @@ -113,15 +114,13 @@ static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p) spin_lock_init(&rdev_p->rscp->qpid_fifo_lock); - rdev_p->rscp->qpid_fifo = kfifo_alloc(T3_MAX_NUM_QP * sizeof(u32), - GFP_KERNEL, - &rdev_p->rscp->qpid_fifo_lock); - if (IS_ERR(rdev_p->rscp->qpid_fifo)) + if (kfifo_alloc(&rdev_p->rscp->qpid_fifo, T3_MAX_NUM_QP * sizeof(u32), + GFP_KERNEL)) return -ENOMEM; for (i = 16; i < T3_MAX_NUM_QP; i++) if (!(i & rdev_p->qpmask)) - __kfifo_put(rdev_p->rscp->qpid_fifo, + kfifo_in(&rdev_p->rscp->qpid_fifo, (unsigned char *) &i, sizeof(u32)); return 0; } @@ -134,7 +133,7 @@ int cxio_hal_init_rhdl_resource(u32 nr_rhdl) void cxio_hal_destroy_rhdl_resource(void) { - kfifo_free(rhdl_fifo); + kfifo_free(&rhdl_fifo); } /* nr_* must be power of 2 */ @@ -167,11 +166,11 @@ int cxio_hal_init_resource(struct cxio_rdev *rdev_p, goto pdid_err; return 0; pdid_err: - kfifo_free(rscp->cqid_fifo); + kfifo_free(&rscp->cqid_fifo); cqid_err: - kfifo_free(rscp->qpid_fifo); + kfifo_free(&rscp->qpid_fifo); qpid_err: - kfifo_free(rscp->tpt_fifo); + kfifo_free(&rscp->tpt_fifo); tpt_err: return -ENOMEM; } @@ -179,33 +178,37 @@ tpt_err: /* * returns 0 if no resource available */ -static u32 cxio_hal_get_resource(struct kfifo *fifo) +static u32 cxio_hal_get_resource(struct kfifo *fifo, spinlock_t * lock) { u32 entry; - if (kfifo_get(fifo, (unsigned char *) &entry, sizeof(u32))) + if (kfifo_out_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock)) return entry; else return 0; /* fifo emptry */ } -static void cxio_hal_put_resource(struct kfifo *fifo, u32 entry) +static void cxio_hal_put_resource(struct kfifo *fifo, spinlock_t * lock, + u32 entry) { - BUG_ON(kfifo_put(fifo, (unsigned char *) &entry, sizeof(u32)) == 0); + BUG_ON( + kfifo_in_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock) + == 0); } u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp) { - return cxio_hal_get_resource(rscp->tpt_fifo); + return cxio_hal_get_resource(&rscp->tpt_fifo, &rscp->tpt_fifo_lock); } void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag) { - cxio_hal_put_resource(rscp->tpt_fifo, stag); + cxio_hal_put_resource(&rscp->tpt_fifo, &rscp->tpt_fifo_lock, stag); } u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp) { - u32 qpid = cxio_hal_get_resource(rscp->qpid_fifo); + u32 qpid = cxio_hal_get_resource(&rscp->qpid_fifo, + &rscp->qpid_fifo_lock); PDBG("%s qpid 0x%x\n", __func__, qpid); return qpid; } @@ -213,35 +216,35 @@ u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp) void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid) { PDBG("%s qpid 0x%x\n", __func__, qpid); - cxio_hal_put_resource(rscp->qpid_fifo, qpid); + cxio_hal_put_resource(&rscp->qpid_fifo, &rscp->qpid_fifo_lock, qpid); } u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp) { - return cxio_hal_get_resource(rscp->cqid_fifo); + return cxio_hal_get_resource(&rscp->cqid_fifo, &rscp->cqid_fifo_lock); } void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid) { - cxio_hal_put_resource(rscp->cqid_fifo, cqid); + cxio_hal_put_resource(&rscp->cqid_fifo, &rscp->cqid_fifo_lock, cqid); } u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp) { - return cxio_hal_get_resource(rscp->pdid_fifo); + return cxio_hal_get_resource(&rscp->pdid_fifo, &rscp->pdid_fifo_lock); } void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid) { - cxio_hal_put_resource(rscp->pdid_fifo, pdid); + cxio_hal_put_resource(&rscp->pdid_fifo, &rscp->pdid_fifo_lock, pdid); } void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp) { - kfifo_free(rscp->tpt_fifo); - kfifo_free(rscp->cqid_fifo); - kfifo_free(rscp->qpid_fifo); - kfifo_free(rscp->pdid_fifo); + kfifo_free(&rscp->tpt_fifo); + kfifo_free(&rscp->cqid_fifo); + kfifo_free(&rscp->qpid_fifo); + kfifo_free(&rscp->pdid_fifo); kfree(rscp); } diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h index 04618f7bfbb..83d2e19d31a 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_wr.h +++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h @@ -176,7 +176,7 @@ struct t3_send_wr { struct t3_sge sgl[T3_MAX_SGE]; /* 4+ */ }; -#define T3_MAX_FASTREG_DEPTH 24 +#define T3_MAX_FASTREG_DEPTH 10 #define T3_MAX_FASTREG_FRAG 10 struct t3_fastreg_wr { @@ -327,6 +327,11 @@ enum rdma_init_rtr_types { #define V_RTR_TYPE(x) ((x) << S_RTR_TYPE) #define G_RTR_TYPE(x) ((((x) >> S_RTR_TYPE)) & M_RTR_TYPE) +#define S_CHAN 4 +#define M_CHAN 0x3 +#define V_CHAN(x) ((x) << S_CHAN) +#define G_CHAN(x) ((((x) >> S_CHAN)) & M_CHAN) + struct t3_rdma_init_attr { u32 tid; u32 qpid; @@ -346,6 +351,7 @@ struct t3_rdma_init_attr { u16 flags; u16 rqe_count; u32 irs; + u32 chan; }; struct t3_rdma_init_wr { @@ -604,6 +610,12 @@ struct t3_cqe { #define CQE_STATUS(x) (G_CQE_STATUS(be32_to_cpu((x).header))) #define CQE_OPCODE(x) (G_CQE_OPCODE(be32_to_cpu((x).header))) +#define CQE_SEND_OPCODE(x)( \ + (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND) || \ + (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_SE) || \ + (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_INV) || \ + (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_SE_INV)) + #define CQE_LEN(x) (be32_to_cpu((x).len)) /* used for RQ completion processing */ @@ -677,9 +689,9 @@ struct t3_swrq { * A T3 WQ implements both the SQ and RQ. */ struct t3_wq { - union t3_wr *queue; /* DMA accessable memory */ + union t3_wr *queue; /* DMA accessible memory */ dma_addr_t dma_addr; /* DMA address for HW */ - DECLARE_PCI_UNMAP_ADDR(mapping) /* unmap kruft */ + DEFINE_DMA_UNMAP_ADDR(mapping); /* unmap kruft */ u32 error; /* 1 once we go to ERROR */ u32 qpid; u32 wptr; /* idx to next available WR slot */ @@ -706,7 +718,7 @@ struct t3_cq { u32 wptr; u32 size_log2; dma_addr_t dma_addr; - DECLARE_PCI_UNMAP_ADDR(mapping) + DEFINE_DMA_UNMAP_ADDR(mapping); struct t3_cqe *queue; struct t3_cqe *sw_queue; u32 sw_rptr; @@ -716,9 +728,40 @@ struct t3_cq { #define CQ_VLD_ENTRY(ptr,size_log2,cqe) (Q_GENBIT(ptr,size_log2) == \ CQE_GENBIT(*cqe)) +struct t3_cq_status_page { + u32 cq_err; +}; + +static inline int cxio_cq_in_error(struct t3_cq *cq) +{ + return ((struct t3_cq_status_page *) + &cq->queue[1 << cq->size_log2])->cq_err; +} + +static inline void cxio_set_cq_in_error(struct t3_cq *cq) +{ + ((struct t3_cq_status_page *) + &cq->queue[1 << cq->size_log2])->cq_err = 1; +} + static inline void cxio_set_wq_in_error(struct t3_wq *wq) { - wq->queue->wq_in_err.err = 1; + wq->queue->wq_in_err.err |= 1; +} + +static inline void cxio_disable_wq_db(struct t3_wq *wq) +{ + wq->queue->wq_in_err.err |= 2; +} + +static inline void cxio_enable_wq_db(struct t3_wq *wq) +{ + wq->queue->wq_in_err.err &= ~2; +} + +static inline int cxio_wq_db_enabled(struct t3_wq *wq) +{ + return !(wq->queue->wq_in_err.err & 2); } static inline struct t3_cqe *cxio_next_hw_cqe(struct t3_cq *cq) diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c index 4489c89d671..8e77dc543dd 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.c +++ b/drivers/infiniband/hw/cxgb3/iwch.c @@ -47,22 +47,62 @@ MODULE_DESCRIPTION("Chelsio T3 RDMA Driver"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(DRV_VERSION); -cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS]; - static void open_rnic_dev(struct t3cdev *); static void close_rnic_dev(struct t3cdev *); +static void iwch_event_handler(struct t3cdev *, u32, u32); struct cxgb3_client t3c_client = { .name = "iw_cxgb3", .add = open_rnic_dev, .remove = close_rnic_dev, .handlers = t3c_handlers, - .redirect = iwch_ep_redirect + .redirect = iwch_ep_redirect, + .event_handler = iwch_event_handler }; static LIST_HEAD(dev_list); static DEFINE_MUTEX(dev_mutex); +static int disable_qp_db(int id, void *p, void *data) +{ + struct iwch_qp *qhp = p; + + cxio_disable_wq_db(&qhp->wq); + return 0; +} + +static int enable_qp_db(int id, void *p, void *data) +{ + struct iwch_qp *qhp = p; + + if (data) + ring_doorbell(qhp->rhp->rdev.ctrl_qp.doorbell, qhp->wq.qpid); + cxio_enable_wq_db(&qhp->wq); + return 0; +} + +static void disable_dbs(struct iwch_dev *rnicp) +{ + spin_lock_irq(&rnicp->lock); + idr_for_each(&rnicp->qpidr, disable_qp_db, NULL); + spin_unlock_irq(&rnicp->lock); +} + +static void enable_dbs(struct iwch_dev *rnicp, int ring_db) +{ + spin_lock_irq(&rnicp->lock); + idr_for_each(&rnicp->qpidr, enable_qp_db, + (void *)(unsigned long)ring_db); + spin_unlock_irq(&rnicp->lock); +} + +static void iwch_db_drop_task(struct work_struct *work) +{ + struct iwch_dev *rnicp = container_of(work, struct iwch_dev, + db_drop_task.work); + enable_dbs(rnicp, 1); +} + static void rnic_init(struct iwch_dev *rnicp) { PDBG("%s iwch_dev %p\n", __func__, rnicp); @@ -70,6 +110,7 @@ static void rnic_init(struct iwch_dev *rnicp) idr_init(&rnicp->qpidr); idr_init(&rnicp->mmidr); spin_lock_init(&rnicp->lock); + INIT_DELAYED_WORK(&rnicp->db_drop_task, iwch_db_drop_task); rnicp->attr.max_qps = T3_MAX_NUM_QP - 32; rnicp->attr.max_wrs = T3_MAX_QP_DEPTH; @@ -103,11 +144,9 @@ static void rnic_init(struct iwch_dev *rnicp) static void open_rnic_dev(struct t3cdev *tdev) { struct iwch_dev *rnicp; - static int vers_printed; PDBG("%s t3cdev %p\n", __func__, tdev); - if (!vers_printed++) - printk(KERN_INFO MOD "Chelsio T3 RDMA Driver - version %s\n", + printk_once(KERN_INFO MOD "Chelsio T3 RDMA Driver - version %s\n", DRV_VERSION); rnicp = (struct iwch_dev *)ib_alloc_device(sizeof(*rnicp)); if (!rnicp) { @@ -147,6 +186,9 @@ static void close_rnic_dev(struct t3cdev *tdev) mutex_lock(&dev_mutex); list_for_each_entry_safe(dev, tmp, &dev_list, entry) { if (dev->rdev.t3cdev_p == tdev) { + dev->rdev.flags = CXIO_ERROR_FATAL; + synchronize_net(); + cancel_delayed_work_sync(&dev->db_drop_task); list_del(&dev->entry); iwch_unregister_device(dev); cxio_rdev_close(&dev->rdev); @@ -160,6 +202,69 @@ static void close_rnic_dev(struct t3cdev *tdev) mutex_unlock(&dev_mutex); } +static void iwch_event_handler(struct t3cdev *tdev, u32 evt, u32 port_id) +{ + struct cxio_rdev *rdev = tdev->ulp; + struct iwch_dev *rnicp; + struct ib_event event; + u32 portnum = port_id + 1; + int dispatch = 0; + + if (!rdev) + return; + rnicp = rdev_to_iwch_dev(rdev); + switch (evt) { + case OFFLOAD_STATUS_DOWN: { + rdev->flags = CXIO_ERROR_FATAL; + synchronize_net(); + event.event = IB_EVENT_DEVICE_FATAL; + dispatch = 1; + break; + } + case OFFLOAD_PORT_DOWN: { + event.event = IB_EVENT_PORT_ERR; + dispatch = 1; + break; + } + case OFFLOAD_PORT_UP: { + event.event = IB_EVENT_PORT_ACTIVE; + dispatch = 1; + break; + } + case OFFLOAD_DB_FULL: { + disable_dbs(rnicp); + break; + } + case OFFLOAD_DB_EMPTY: { + enable_dbs(rnicp, 1); + break; + } + case OFFLOAD_DB_DROP: { + unsigned long delay = 1000; + unsigned short r; + + disable_dbs(rnicp); + get_random_bytes(&r, 2); + delay += r & 1023; + + /* + * delay is between 1000-2023 usecs. + */ + schedule_delayed_work(&rnicp->db_drop_task, + usecs_to_jiffies(delay)); + break; + } + } + + if (dispatch) { + event.device = &rnicp->ibdev; + event.element.port_num = portnum; + ib_dispatch_event(&event); + } + + return; +} + static int __init iwch_init_module(void) { int err; diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h index 3773453b2cf..837862287a2 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.h +++ b/drivers/infiniband/hw/cxgb3/iwch.h @@ -36,6 +36,7 @@ #include <linux/list.h> #include <linux/spinlock.h> #include <linux/idr.h> +#include <linux/workqueue.h> #include <rdma/ib_verbs.h> @@ -110,6 +111,7 @@ struct iwch_dev { struct idr mmidr; spinlock_t lock; struct list_head entry; + struct delayed_work db_drop_task; }; static inline struct iwch_dev *to_iwch_dev(struct ib_device *ibdev) @@ -117,6 +119,11 @@ static inline struct iwch_dev *to_iwch_dev(struct ib_device *ibdev) return container_of(ibdev, struct iwch_dev, ibdev); } +static inline struct iwch_dev *rdev_to_iwch_dev(struct cxio_rdev *rdev) +{ + return container_of(rdev, struct iwch_dev, rdev); +} + static inline int t3b_device(const struct iwch_dev *rhp) { return rhp->rdev.t3cdev_p->type == T3B; @@ -146,19 +153,17 @@ static inline int insert_handle(struct iwch_dev *rhp, struct idr *idr, void *handle, u32 id) { int ret; - int newid; - - do { - if (!idr_pre_get(idr, GFP_KERNEL)) { - return -ENOMEM; - } - spin_lock_irq(&rhp->lock); - ret = idr_get_new_above(idr, handle, id, &newid); - BUG_ON(newid != id); - spin_unlock_irq(&rhp->lock); - } while (ret == -EAGAIN); - - return ret; + + idr_preload(GFP_KERNEL); + spin_lock_irq(&rhp->lock); + + ret = idr_alloc(idr, handle, id, id + 1, GFP_NOWAIT); + + spin_unlock_irq(&rhp->lock); + idr_preload_end(); + + BUG_ON(ret == -ENOSPC); + return ret < 0 ? ret : 0; } static inline void remove_handle(struct iwch_dev *rhp, struct idr *idr, u32 id) diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index c325c44807e..cb78b1e9bcd 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -31,6 +31,7 @@ */ #include <linux/module.h> #include <linux/list.h> +#include <linux/slab.h> #include <linux/workqueue.h> #include <linux/skbuff.h> #include <linux/timer.h> @@ -101,12 +102,9 @@ static unsigned int cong_flavor = 1; module_param(cong_flavor, uint, 0644); MODULE_PARM_DESC(cong_flavor, "TCP Congestion control flavor (default=1)"); -static void process_work(struct work_struct *work); static struct workqueue_struct *workq; -static DECLARE_WORK(skb_work, process_work); static struct sk_buff_head rxq; -static cxgb3_cpl_handler_func work_handlers[NUM_CPL_CMDS]; static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp); static void ep_timeout(unsigned long arg); @@ -130,15 +128,46 @@ static void stop_ep_timer(struct iwch_ep *ep) { PDBG("%s ep %p\n", __func__, ep); if (!timer_pending(&ep->timer)) { - printk(KERN_ERR "%s timer stopped when its not running! ep %p state %u\n", + WARN(1, "%s timer stopped when its not running! ep %p state %u\n", __func__, ep, ep->com.state); - WARN_ON(1); return; } del_timer_sync(&ep->timer); put_ep(&ep->com); } +static int iwch_l2t_send(struct t3cdev *tdev, struct sk_buff *skb, struct l2t_entry *l2e) +{ + int error = 0; + struct cxio_rdev *rdev; + + rdev = (struct cxio_rdev *)tdev->ulp; + if (cxio_fatal_error(rdev)) { + kfree_skb(skb); + return -EIO; + } + error = l2t_send(tdev, skb, l2e); + if (error < 0) + kfree_skb(skb); + return error; +} + +int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb) +{ + int error = 0; + struct cxio_rdev *rdev; + + rdev = (struct cxio_rdev *)tdev->ulp; + if (cxio_fatal_error(rdev)) { + kfree_skb(skb); + return -EIO; + } + error = cxgb3_ofld_send(tdev, skb); + if (error < 0) + kfree_skb(skb); + return error; +} + static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb) { struct cpl_tid_release *req; @@ -150,7 +179,7 @@ static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb) req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid)); skb->priority = CPL_PRIORITY_SETUP; - cxgb3_ofld_send(tdev, skb); + iwch_cxgb3_ofld_send(tdev, skb); return; } @@ -172,8 +201,7 @@ int iwch_quiesce_tid(struct iwch_ep *ep) req->val = cpu_to_be64(1 << S_TCB_RX_QUIESCE); skb->priority = CPL_PRIORITY_DATA; - cxgb3_ofld_send(ep->com.tdev, skb); - return 0; + return iwch_cxgb3_ofld_send(ep->com.tdev, skb); } int iwch_resume_tid(struct iwch_ep *ep) @@ -194,8 +222,7 @@ int iwch_resume_tid(struct iwch_ep *ep) req->val = 0; skb->priority = CPL_PRIORITY_DATA; - cxgb3_ofld_send(ep->com.tdev, skb); - return 0; + return iwch_cxgb3_ofld_send(ep->com.tdev, skb); } static void set_emss(struct iwch_ep *ep, u16 opt) @@ -252,42 +279,25 @@ static void *alloc_ep(int size, gfp_t gfp) void __free_ep(struct kref *kref) { - struct iwch_ep_common *epc; - epc = container_of(kref, struct iwch_ep_common, kref); - PDBG("%s ep %p state %s\n", __func__, epc, states[state_read(epc)]); - kfree(epc); + struct iwch_ep *ep; + ep = container_of(container_of(kref, struct iwch_ep_common, kref), + struct iwch_ep, com); + PDBG("%s ep %p state %s\n", __func__, ep, states[state_read(&ep->com)]); + if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) { + cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid); + dst_release(ep->dst); + l2t_release(ep->com.tdev, ep->l2t); + } + kfree(ep); } static void release_ep_resources(struct iwch_ep *ep) { PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid); - cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid); - dst_release(ep->dst); - l2t_release(L2DATA(ep->com.tdev), ep->l2t); + set_bit(RELEASE_RESOURCES, &ep->com.flags); put_ep(&ep->com); } -static void process_work(struct work_struct *work) -{ - struct sk_buff *skb = NULL; - void *ep; - struct t3cdev *tdev; - int ret; - - while ((skb = skb_dequeue(&rxq))) { - ep = *((void **) (skb->cb)); - tdev = *((struct t3cdev **) (skb->cb + sizeof(void *))); - ret = work_handlers[G_OPCODE(ntohl((__force __be32)skb->csum))](tdev, skb, ep); - if (ret & CPL_RET_BUF_DONE) - kfree_skb(skb); - - /* - * ep was referenced in sched(), and is freed here. - */ - put_ep((struct iwch_ep_common *)ep); - } -} - static int status2errno(int status) { switch (status) { @@ -327,23 +337,12 @@ static struct rtable *find_route(struct t3cdev *dev, __be32 local_ip, __be16 peer_port, u8 tos) { struct rtable *rt; - struct flowi fl = { - .oif = 0, - .nl_u = { - .ip4_u = { - .daddr = peer_ip, - .saddr = local_ip, - .tos = tos} - }, - .proto = IPPROTO_TCP, - .uli_u = { - .ports = { - .sport = local_port, - .dport = peer_port} - } - }; - - if (ip_route_output_flow(&init_net, &rt, &fl, NULL, 0)) + struct flowi4 fl4; + + rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip, + peer_port, local_port, IPPROTO_TCP, + tos, 0); + if (IS_ERR(rt)) return NULL; return rt; } @@ -382,7 +381,7 @@ static void abort_arp_failure(struct t3cdev *dev, struct sk_buff *skb) PDBG("%s t3cdev %p\n", __func__, dev); req->cmd = CPL_ABORT_NO_RST; - cxgb3_ofld_send(dev, skb); + iwch_cxgb3_ofld_send(dev, skb); } static int send_halfclose(struct iwch_ep *ep, gfp_t gfp) @@ -402,8 +401,7 @@ static int send_halfclose(struct iwch_ep *ep, gfp_t gfp) req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON)); req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, ep->hwtid)); - l2t_send(ep->com.tdev, skb, ep->l2t); - return 0; + return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); } static int send_abort(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp) @@ -420,12 +418,12 @@ static int send_abort(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp) skb->priority = CPL_PRIORITY_DATA; set_arp_failure_handler(skb, abort_arp_failure); req = (struct cpl_abort_req *) skb_put(skb, sizeof(*req)); + memset(req, 0, sizeof(*req)); req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ)); req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid)); req->cmd = CPL_ABORT_SEND_RST; - l2t_send(ep->com.tdev, skb, ep->l2t); - return 0; + return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); } static int send_connect(struct iwch_ep *ep) @@ -454,7 +452,8 @@ static int send_connect(struct iwch_ep *ep) V_MSS_IDX(mtu_idx) | V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx); opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10); - opt2 = V_FLAVORS_VALID(1) | V_CONG_CONTROL_FLAVOR(cong_flavor); + opt2 = F_RX_COALESCE_VALID | V_RX_COALESCE(0) | V_FLAVORS_VALID(1) | + V_CONG_CONTROL_FLAVOR(cong_flavor); skb->priority = CPL_PRIORITY_SETUP; set_arp_failure_handler(skb, act_open_req_arp_failure); @@ -469,8 +468,7 @@ static int send_connect(struct iwch_ep *ep) req->opt0l = htonl(opt0l); req->params = 0; req->opt2 = htonl(opt2); - l2t_send(ep->com.tdev, skb, ep->l2t); - return 0; + return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); } static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb) @@ -527,7 +525,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb) req->sndseq = htonl(ep->snd_seq); BUG_ON(ep->mpa_skb); ep->mpa_skb = skb; - l2t_send(ep->com.tdev, skb, ep->l2t); + iwch_l2t_send(ep->com.tdev, skb, ep->l2t); start_ep_timer(ep); state_set(&ep->com, MPA_REQ_SENT); return; @@ -578,8 +576,7 @@ static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen) req->sndseq = htonl(ep->snd_seq); BUG_ON(ep->mpa_skb); ep->mpa_skb = skb; - l2t_send(ep->com.tdev, skb, ep->l2t); - return 0; + return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); } static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen) @@ -630,8 +627,7 @@ static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen) req->sndseq = htonl(ep->snd_seq); ep->mpa_skb = skb; state_set(&ep->com, MPA_REP_SENT); - l2t_send(ep->com.tdev, skb, ep->l2t); - return 0; + return iwch_l2t_send(ep->com.tdev, skb, ep->l2t); } static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) @@ -726,8 +722,10 @@ static void connect_reply_upcall(struct iwch_ep *ep, int status) memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_CONNECT_REPLY; event.status = status; - event.local_addr = ep->com.local_addr; - event.remote_addr = ep->com.remote_addr; + memcpy(&event.local_addr, &ep->com.local_addr, + sizeof(ep->com.local_addr)); + memcpy(&event.remote_addr, &ep->com.remote_addr, + sizeof(ep->com.remote_addr)); if ((status == 0) || (status == -ECONNREFUSED)) { event.private_data_len = ep->plen; @@ -752,15 +750,24 @@ static void connect_request_upcall(struct iwch_ep *ep) PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_CONNECT_REQUEST; - event.local_addr = ep->com.local_addr; - event.remote_addr = ep->com.remote_addr; + memcpy(&event.local_addr, &ep->com.local_addr, + sizeof(ep->com.local_addr)); + memcpy(&event.remote_addr, &ep->com.remote_addr, + sizeof(ep->com.local_addr)); event.private_data_len = ep->plen; event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); event.provider_data = ep; - if (state_read(&ep->parent_ep->com) != DEAD) + /* + * Until ird/ord negotiation via MPAv2 support is added, send max + * supported values + */ + event.ird = event.ord = 8; + if (state_read(&ep->parent_ep->com) != DEAD) { + get_ep(&ep->com); ep->parent_ep->com.cm_id->event_handler( ep->parent_ep->com.cm_id, &event); + } put_ep(&ep->parent_ep->com); ep->parent_ep = NULL; } @@ -772,6 +779,11 @@ static void established_upcall(struct iwch_ep *ep) PDBG("%s ep %p\n", __func__, ep); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_ESTABLISHED; + /* + * Until ird/ord negotiation via MPAv2 support is added, send max + * supported values + */ + event.ird = event.ord = 8; if (ep->com.cm_id) { PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid); ep->com.cm_id->event_handler(ep->com.cm_id, &event); @@ -795,7 +807,7 @@ static int update_rx_credits(struct iwch_ep *ep, u32 credits) OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, ep->hwtid)); req->credit_dack = htonl(V_RX_CREDITS(credits) | V_RX_FORCE_ACK(1)); skb->priority = CPL_PRIORITY_ACK; - cxgb3_ofld_send(ep->com.tdev, skb); + iwch_cxgb3_ofld_send(ep->com.tdev, skb); return credits; } @@ -916,7 +928,7 @@ static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb) goto err; if (peer2peer && iwch_rqes_posted(ep->com.qp) == 0) { - iwch_post_zb_read(ep->com.qp); + iwch_post_zb_read(ep); } goto out; @@ -1080,37 +1092,45 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) struct iwch_ep *ep = ctx; struct cpl_wr_ack *hdr = cplhdr(skb); unsigned int credits = ntohs(hdr->credits); + unsigned long flags; + int post_zb = 0; PDBG("%s ep %p credits %u\n", __func__, ep, credits); if (credits == 0) { - PDBG(KERN_ERR "%s 0 credit ack ep %p state %u\n", - __func__, ep, state_read(&ep->com)); + PDBG("%s 0 credit ack ep %p state %u\n", + __func__, ep, state_read(&ep->com)); return CPL_RET_BUF_DONE; } + spin_lock_irqsave(&ep->com.lock, flags); BUG_ON(credits != 1); dst_confirm(ep->dst); if (!ep->mpa_skb) { PDBG("%s rdma_init wr_ack ep %p state %u\n", - __func__, ep, state_read(&ep->com)); + __func__, ep, ep->com.state); if (ep->mpa_attr.initiator) { PDBG("%s initiator ep %p state %u\n", - __func__, ep, state_read(&ep->com)); - if (peer2peer) - iwch_post_zb_read(ep->com.qp); + __func__, ep, ep->com.state); + if (peer2peer && ep->com.state == FPDU_MODE) + post_zb = 1; } else { PDBG("%s responder ep %p state %u\n", - __func__, ep, state_read(&ep->com)); - ep->com.rpl_done = 1; - wake_up(&ep->com.waitq); + __func__, ep, ep->com.state); + if (ep->com.state == MPA_REQ_RCVD) { + ep->com.rpl_done = 1; + wake_up(&ep->com.waitq); + } } } else { PDBG("%s lsm ack ep %p state %u freeing skb\n", - __func__, ep, state_read(&ep->com)); + __func__, ep, ep->com.state); kfree_skb(ep->mpa_skb); ep->mpa_skb = NULL; } + spin_unlock_irqrestore(&ep->com.lock, flags); + if (post_zb) + iwch_post_zb_read(ep); return CPL_RET_BUF_DONE; } @@ -1127,8 +1147,7 @@ static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) * We get 2 abort replies from the HW. The first one must * be ignored except for scribbling that we need one more. */ - if (!(ep->flags & ABORT_REQ_IN_PROGRESS)) { - ep->flags |= ABORT_REQ_IN_PROGRESS; + if (!test_and_set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags)) { return CPL_RET_BUF_DONE; } @@ -1173,7 +1192,7 @@ static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) release_tid(ep->com.tdev, GET_TID(rpl), NULL); cxgb3_free_atid(ep->com.tdev, ep->atid); dst_release(ep->dst); - l2t_release(L2DATA(ep->com.tdev), ep->l2t); + l2t_release(ep->com.tdev, ep->l2t); put_ep(&ep->com); return CPL_RET_BUF_DONE; } @@ -1203,8 +1222,7 @@ static int listen_start(struct iwch_listen_ep *ep) req->opt1 = htonl(V_CONN_POLICY(CPL_CONN_POLICY_ASK)); skb->priority = 1; - cxgb3_ofld_send(ep->com.tdev, skb); - return 0; + return iwch_cxgb3_ofld_send(ep->com.tdev, skb); } static int pass_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) @@ -1237,8 +1255,7 @@ static int listen_stop(struct iwch_listen_ep *ep) req->cpu_idx = 0; OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, ep->stid)); skb->priority = 1; - cxgb3_ofld_send(ep->com.tdev, skb); - return 0; + return iwch_cxgb3_ofld_send(ep->com.tdev, skb); } static int close_listsrv_rpl(struct t3cdev *tdev, struct sk_buff *skb, @@ -1275,7 +1292,8 @@ static void accept_cr(struct iwch_ep *ep, __be32 peer_ip, struct sk_buff *skb) V_MSS_IDX(mtu_idx) | V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx); opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10); - opt2 = V_FLAVORS_VALID(1) | V_CONG_CONTROL_FLAVOR(cong_flavor); + opt2 = F_RX_COALESCE_VALID | V_RX_COALESCE(0) | V_FLAVORS_VALID(1) | + V_CONG_CONTROL_FLAVOR(cong_flavor); rpl = cplhdr(skb); rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); @@ -1286,7 +1304,7 @@ static void accept_cr(struct iwch_ep *ep, __be32 peer_ip, struct sk_buff *skb) rpl->opt2 = htonl(opt2); rpl->rsvd = rpl->opt2; /* workaround for HW bug */ skb->priority = CPL_PRIORITY_SETUP; - l2t_send(ep->com.tdev, skb, ep->l2t); + iwch_l2t_send(ep->com.tdev, skb, ep->l2t); return; } @@ -1315,7 +1333,7 @@ static void reject_cr(struct t3cdev *tdev, u32 hwtid, __be32 peer_ip, rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT); rpl->opt2 = 0; rpl->rsvd = rpl->opt2; - cxgb3_ofld_send(tdev, skb); + iwch_cxgb3_ofld_send(tdev, skb); } } @@ -1343,15 +1361,8 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) tim.mac_addr = req->dst_mac; tim.vlan_tag = ntohs(req->vlan_tag); if (tdev->ctl(tdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) { - printk(KERN_ERR - "%s bad dst mac %02x %02x %02x %02x %02x %02x\n", - __func__, - req->dst_mac[0], - req->dst_mac[1], - req->dst_mac[2], - req->dst_mac[3], - req->dst_mac[4], - req->dst_mac[5]); + printk(KERN_ERR "%s bad dst mac %pM\n", + __func__, req->dst_mac); goto reject; } @@ -1366,8 +1377,8 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) __func__); goto reject; } - dst = &rt->u.dst; - l2t = t3_l2t_get(tdev, dst->neighbour, dst->neighbour->dev); + dst = &rt->dst; + l2t = t3_l2t_get(tdev, dst, NULL, &req->peer_ip); if (!l2t) { printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n", __func__); @@ -1378,7 +1389,7 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) if (!child_ep) { printk(KERN_ERR MOD "%s - failed to allocate ep entry!\n", __func__); - l2t_release(L2DATA(tdev), l2t); + l2t_release(tdev, l2t); dst_release(dst); goto reject; } @@ -1450,10 +1461,14 @@ static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) /* * We're gonna mark this puppy DEAD, but keep * the reference on it until the ULP accepts or - * rejects the CR. + * rejects the CR. Also wake up anyone waiting + * in rdma connection migration (see iwch_accept_cr()). */ __state_set(&ep->com, CLOSING); - get_ep(&ep->com); + ep->com.rpl_done = 1; + ep->com.rpl_err = -ECONNRESET; + PDBG("waking up ep %p\n", ep); + wake_up(&ep->com.waitq); break; case MPA_REP_SENT: __state_set(&ep->com, CLOSING); @@ -1534,8 +1549,7 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) * We get 2 peer aborts from the HW. The first one must * be ignored except for scribbling that we need one more. */ - if (!(ep->flags & PEER_ABORT_IN_PROGRESS)) { - ep->flags |= PEER_ABORT_IN_PROGRESS; + if (!test_and_set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags)) { return CPL_RET_BUF_DONE; } @@ -1562,9 +1576,13 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) /* * We're gonna mark this puppy DEAD, but keep * the reference on it until the ULP accepts or - * rejects the CR. + * rejects the CR. Also wake up anyone waiting + * in rdma connection migration (see iwch_accept_cr()). */ - get_ep(&ep->com); + ep->com.rpl_done = 1; + ep->com.rpl_err = -ECONNRESET; + PDBG("waking up ep %p\n", ep); + wake_up(&ep->com.waitq); break; case MORIBUND: case CLOSING: @@ -1613,7 +1631,7 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) rpl->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid)); rpl->cmd = CPL_ABORT_NO_RST; - cxgb3_ofld_send(ep->com.tdev, rpl_skb); + iwch_cxgb3_ofld_send(ep->com.tdev, rpl_skb); out: if (release) release_ep_resources(ep); @@ -1666,7 +1684,7 @@ static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) * T3A does 3 things when a TERM is received: * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet * 2) generate an async event on the QP with the TERMINATE opcode - * 3) post a TERMINATE opcde cqe into the associated CQ. + * 3) post a TERMINATE opcode cqe into the associated CQ. * * For (1), we save the message in the qp for later consumer consumption. * For (2), we move the QP into TERMINATE, post a QP event and disconnect. @@ -1678,6 +1696,9 @@ static int terminate(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) { struct iwch_ep *ep = ctx; + if (state_read(&ep->com) != FPDU_MODE) + return CPL_RET_BUF_DONE; + PDBG("%s ep %p\n", __func__, ep); skb_pull(skb, sizeof(struct cpl_rdma_terminate)); PDBG("%s saving %d bytes of term msg\n", __func__, skb->len); @@ -1739,9 +1760,8 @@ static void ep_timeout(unsigned long arg) __state_set(&ep->com, ABORTING); break; default: - printk(KERN_ERR "%s unexpected state ep %p state %u\n", + WARN(1, "%s unexpected state ep %p state %u\n", __func__, ep, ep->com.state); - WARN_ON(1); abort = 0; } spin_unlock_irqrestore(&ep->com.lock, flags); @@ -1767,6 +1787,7 @@ int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) err = send_mpa_reject(ep, pdata, pdata_len); err = iwch_ep_disconnect(ep, 0, GFP_KERNEL); } + put_ep(&ep->com); return 0; } @@ -1780,8 +1801,10 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct iwch_qp *qp = get_qhp(h, conn_param->qpn); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - if (state_read(&ep->com) == DEAD) - return -ECONNRESET; + if (state_read(&ep->com) == DEAD) { + err = -ECONNRESET; + goto err; + } BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); BUG_ON(!qp); @@ -1789,20 +1812,21 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) || (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) { abort_connection(ep, NULL, GFP_KERNEL); - return -EINVAL; + err = -EINVAL; + goto err; } cm_id->add_ref(cm_id); ep->com.cm_id = cm_id; ep->com.qp = qp; - ep->com.rpl_done = 0; - ep->com.rpl_err = 0; ep->ird = conn_param->ird; ep->ord = conn_param->ord; - PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord); - get_ep(&ep->com); + if (peer2peer && ep->ird == 0) + ep->ird = 1; + + PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord); /* bind QP to EP and move to RTS */ attrs.mpa_attr = ep->mpa_attr; @@ -1821,30 +1845,31 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) err = iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1); if (err) - goto err; + goto err1; /* if needed, wait for wr_ack */ if (iwch_rqes_posted(qp)) { wait_event(ep->com.waitq, ep->com.rpl_done); err = ep->com.rpl_err; if (err) - goto err; + goto err1; } err = send_mpa_reply(ep, conn_param->private_data, conn_param->private_data_len); if (err) - goto err; + goto err1; state_set(&ep->com, FPDU_MODE); established_upcall(ep); put_ep(&ep->com); return 0; -err: +err1: ep->com.cm_id = NULL; ep->com.qp = NULL; cm_id->rem_ref(cm_id); +err: put_ep(&ep->com); return err; } @@ -1852,8 +1877,9 @@ err: static int is_loopback_dst(struct iw_cm_id *cm_id) { struct net_device *dev; + struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr; - dev = ip_dev_find(&init_net, cm_id->remote_addr.sin_addr.s_addr); + dev = ip_dev_find(&init_net, raddr->sin_addr.s_addr); if (!dev) return 0; dev_put(dev); @@ -1862,10 +1888,17 @@ static int is_loopback_dst(struct iw_cm_id *cm_id) int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) { - int err = 0; struct iwch_dev *h = to_iwch_dev(cm_id->device); struct iwch_ep *ep; struct rtable *rt; + int err = 0; + struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr; + struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr; + + if (cm_id->remote_addr.ss_family != PF_INET) { + err = -ENOSYS; + goto out; + } if (is_loopback_dst(cm_id)) { err = -ENOSYS; @@ -1885,6 +1918,10 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) conn_param->private_data, ep->plen); ep->ird = conn_param->ird; ep->ord = conn_param->ord; + + if (peer2peer && ep->ord == 0) + ep->ord = 1; + ep->com.tdev = h->rdev.t3cdev_p; cm_id->add_ref(cm_id); @@ -1905,21 +1942,17 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) } /* find a route */ - rt = find_route(h->rdev.t3cdev_p, - cm_id->local_addr.sin_addr.s_addr, - cm_id->remote_addr.sin_addr.s_addr, - cm_id->local_addr.sin_port, - cm_id->remote_addr.sin_port, IPTOS_LOWDELAY); + rt = find_route(h->rdev.t3cdev_p, laddr->sin_addr.s_addr, + raddr->sin_addr.s_addr, laddr->sin_port, + raddr->sin_port, IPTOS_LOWDELAY); if (!rt) { printk(KERN_ERR MOD "%s - cannot find route.\n", __func__); err = -EHOSTUNREACH; goto fail3; } - ep->dst = &rt->u.dst; - - /* get a l2t entry */ - ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst->neighbour, - ep->dst->neighbour->dev); + ep->dst = &rt->dst; + ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst, NULL, + &raddr->sin_addr.s_addr); if (!ep->l2t) { printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); err = -ENOMEM; @@ -1928,20 +1961,23 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) state_set(&ep->com, CONNECTING); ep->tos = IPTOS_LOWDELAY; - ep->com.local_addr = cm_id->local_addr; - ep->com.remote_addr = cm_id->remote_addr; + memcpy(&ep->com.local_addr, &cm_id->local_addr, + sizeof(ep->com.local_addr)); + memcpy(&ep->com.remote_addr, &cm_id->remote_addr, + sizeof(ep->com.remote_addr)); /* send connect request to rnic */ err = send_connect(ep); if (!err) goto out; - l2t_release(L2DATA(h->rdev.t3cdev_p), ep->l2t); + l2t_release(h->rdev.t3cdev_p, ep->l2t); fail4: dst_release(ep->dst); fail3: cxgb3_free_atid(ep->com.tdev, ep->atid); fail2: + cm_id->rem_ref(cm_id); put_ep(&ep->com); out: return err; @@ -1956,6 +1992,11 @@ int iwch_create_listen(struct iw_cm_id *cm_id, int backlog) might_sleep(); + if (cm_id->local_addr.ss_family != PF_INET) { + err = -ENOSYS; + goto fail1; + } + ep = alloc_ep(sizeof(*ep), GFP_KERNEL); if (!ep) { printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__); @@ -1967,7 +2008,8 @@ int iwch_create_listen(struct iw_cm_id *cm_id, int backlog) cm_id->add_ref(cm_id); ep->com.cm_id = cm_id; ep->backlog = backlog; - ep->com.local_addr = cm_id->local_addr; + memcpy(&ep->com.local_addr, &cm_id->local_addr, + sizeof(ep->com.local_addr)); /* * Allocate a server TID. @@ -2013,8 +2055,11 @@ int iwch_destroy_listen(struct iw_cm_id *cm_id) ep->com.rpl_done = 0; ep->com.rpl_err = 0; err = listen_stop(ep); + if (err) + goto done; wait_event(ep->com.waitq, ep->com.rpl_done); cxgb3_free_stid(ep->com.tdev, ep->stid); +done: err = ep->com.rpl_err; cm_id->rem_ref(cm_id); put_ep(&ep->com); @@ -2026,12 +2071,22 @@ int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp) int ret=0; unsigned long flags; int close = 0; + int fatal = 0; + struct t3cdev *tdev; + struct cxio_rdev *rdev; spin_lock_irqsave(&ep->com.lock, flags); PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep, states[ep->com.state], abrupt); + tdev = (struct t3cdev *)ep->com.tdev; + rdev = (struct cxio_rdev *)tdev->ulp; + if (cxio_fatal_error(rdev)) { + fatal = 1; + close_complete_upcall(ep); + ep->com.state = DEAD; + } switch (ep->com.state) { case MPA_REQ_WAIT: case MPA_REQ_SENT: @@ -2045,14 +2100,17 @@ int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp) ep->com.state = CLOSING; start_ep_timer(ep); } + set_bit(CLOSE_SENT, &ep->com.flags); break; case CLOSING: - close = 1; - if (abrupt) { - stop_ep_timer(ep); - ep->com.state = ABORTING; - } else - ep->com.state = MORIBUND; + if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) { + close = 1; + if (abrupt) { + stop_ep_timer(ep); + ep->com.state = ABORTING; + } else + ep->com.state = MORIBUND; + } break; case MORIBUND: case ABORTING: @@ -2071,7 +2129,11 @@ int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp) ret = send_abort(ep, NULL, gfp); else ret = send_halfclose(ep, gfp); + if (ret) + fatal = 1; } + if (fatal) + release_ep_resources(ep); return ret; } @@ -2086,7 +2148,7 @@ int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, PDBG("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new, l2t); dst_hold(new); - l2t_release(L2DATA(ep->com.tdev), ep->l2t); + l2t_release(ep->com.tdev, ep->l2t); ep->l2t = l2t; dst_release(old); ep->dst = new; @@ -2095,7 +2157,49 @@ int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, /* * All the CM events are handled on a work queue to have a safe context. + * These are the real handlers that are called from the work queue. */ +static const cxgb3_cpl_handler_func work_handlers[NUM_CPL_CMDS] = { + [CPL_ACT_ESTABLISH] = act_establish, + [CPL_ACT_OPEN_RPL] = act_open_rpl, + [CPL_RX_DATA] = rx_data, + [CPL_TX_DMA_ACK] = tx_ack, + [CPL_ABORT_RPL_RSS] = abort_rpl, + [CPL_ABORT_RPL] = abort_rpl, + [CPL_PASS_OPEN_RPL] = pass_open_rpl, + [CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl, + [CPL_PASS_ACCEPT_REQ] = pass_accept_req, + [CPL_PASS_ESTABLISH] = pass_establish, + [CPL_PEER_CLOSE] = peer_close, + [CPL_ABORT_REQ_RSS] = peer_abort, + [CPL_CLOSE_CON_RPL] = close_con_rpl, + [CPL_RDMA_TERMINATE] = terminate, + [CPL_RDMA_EC_STATUS] = ec_status, +}; + +static void process_work(struct work_struct *work) +{ + struct sk_buff *skb = NULL; + void *ep; + struct t3cdev *tdev; + int ret; + + while ((skb = skb_dequeue(&rxq))) { + ep = *((void **) (skb->cb)); + tdev = *((struct t3cdev **) (skb->cb + sizeof(void *))); + ret = work_handlers[G_OPCODE(ntohl((__force __be32)skb->csum))](tdev, skb, ep); + if (ret & CPL_RET_BUF_DONE) + kfree_skb(skb); + + /* + * ep was referenced in sched(), and is freed here. + */ + put_ep((struct iwch_ep_common *)ep); + } +} + +static DECLARE_WORK(skb_work, process_work); + static int sched(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) { struct iwch_ep_common *epc = ctx; @@ -2127,6 +2231,29 @@ static int set_tcb_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) return CPL_RET_BUF_DONE; } +/* + * All upcalls from the T3 Core go to sched() to schedule the + * processing on a work queue. + */ +cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS] = { + [CPL_ACT_ESTABLISH] = sched, + [CPL_ACT_OPEN_RPL] = sched, + [CPL_RX_DATA] = sched, + [CPL_TX_DMA_ACK] = sched, + [CPL_ABORT_RPL_RSS] = sched, + [CPL_ABORT_RPL] = sched, + [CPL_PASS_OPEN_RPL] = sched, + [CPL_CLOSE_LISTSRV_RPL] = sched, + [CPL_PASS_ACCEPT_REQ] = sched, + [CPL_PASS_ESTABLISH] = sched, + [CPL_PEER_CLOSE] = sched, + [CPL_CLOSE_CON_RPL] = sched, + [CPL_ABORT_REQ_RSS] = sched, + [CPL_RDMA_TERMINATE] = sched, + [CPL_RDMA_EC_STATUS] = sched, + [CPL_SET_TCB_RPL] = set_tcb_rpl, +}; + int __init iwch_cm_init(void) { skb_queue_head_init(&rxq); @@ -2135,46 +2262,6 @@ int __init iwch_cm_init(void) if (!workq) return -ENOMEM; - /* - * All upcalls from the T3 Core go to sched() to - * schedule the processing on a work queue. - */ - t3c_handlers[CPL_ACT_ESTABLISH] = sched; - t3c_handlers[CPL_ACT_OPEN_RPL] = sched; - t3c_handlers[CPL_RX_DATA] = sched; - t3c_handlers[CPL_TX_DMA_ACK] = sched; - t3c_handlers[CPL_ABORT_RPL_RSS] = sched; - t3c_handlers[CPL_ABORT_RPL] = sched; - t3c_handlers[CPL_PASS_OPEN_RPL] = sched; - t3c_handlers[CPL_CLOSE_LISTSRV_RPL] = sched; - t3c_handlers[CPL_PASS_ACCEPT_REQ] = sched; - t3c_handlers[CPL_PASS_ESTABLISH] = sched; - t3c_handlers[CPL_PEER_CLOSE] = sched; - t3c_handlers[CPL_CLOSE_CON_RPL] = sched; - t3c_handlers[CPL_ABORT_REQ_RSS] = sched; - t3c_handlers[CPL_RDMA_TERMINATE] = sched; - t3c_handlers[CPL_RDMA_EC_STATUS] = sched; - t3c_handlers[CPL_SET_TCB_RPL] = set_tcb_rpl; - - /* - * These are the real handlers that are called from a - * work queue. - */ - work_handlers[CPL_ACT_ESTABLISH] = act_establish; - work_handlers[CPL_ACT_OPEN_RPL] = act_open_rpl; - work_handlers[CPL_RX_DATA] = rx_data; - work_handlers[CPL_TX_DMA_ACK] = tx_ack; - work_handlers[CPL_ABORT_RPL_RSS] = abort_rpl; - work_handlers[CPL_ABORT_RPL] = abort_rpl; - work_handlers[CPL_PASS_OPEN_RPL] = pass_open_rpl; - work_handlers[CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl; - work_handlers[CPL_PASS_ACCEPT_REQ] = pass_accept_req; - work_handlers[CPL_PASS_ESTABLISH] = pass_establish; - work_handlers[CPL_PEER_CLOSE] = peer_close; - work_handlers[CPL_ABORT_REQ_RSS] = peer_abort; - work_handlers[CPL_CLOSE_CON_RPL] = close_con_rpl; - work_handlers[CPL_RDMA_TERMINATE] = terminate; - work_handlers[CPL_RDMA_EC_STATUS] = ec_status; return 0; } diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h index d7c7e09f099..b9efadfffb4 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.h +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h @@ -145,8 +145,10 @@ enum iwch_ep_state { }; enum iwch_ep_flags { - PEER_ABORT_IN_PROGRESS = (1 << 0), - ABORT_REQ_IN_PROGRESS = (1 << 1), + PEER_ABORT_IN_PROGRESS = 0, + ABORT_REQ_IN_PROGRESS = 1, + RELEASE_RESOURCES = 2, + CLOSE_SENT = 3, }; struct iwch_ep_common { @@ -161,6 +163,7 @@ struct iwch_ep_common { wait_queue_head_t waitq; int rpl_done; int rpl_err; + unsigned long flags; }; struct iwch_listen_ep { @@ -188,7 +191,6 @@ struct iwch_ep { u16 plen; u32 ird; u32 ord; - u32 flags; }; static inline struct iwch_ep *to_ep(struct iw_cm_id *cm_id) diff --git a/drivers/infiniband/hw/cxgb3/iwch_ev.c b/drivers/infiniband/hw/cxgb3/iwch_ev.c index 7b67a677172..abcc9e76962 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_ev.c +++ b/drivers/infiniband/hw/cxgb3/iwch_ev.c @@ -29,7 +29,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include <linux/slab.h> +#include <linux/gfp.h> #include <linux/mman.h> #include <net/sock.h> #include "iwch_provider.h" @@ -46,6 +46,7 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp, struct ib_event event; struct iwch_qp_attributes attrs; struct iwch_qp *qhp; + unsigned long flag; spin_lock(&rnicp->lock); qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe)); @@ -76,6 +77,14 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp, atomic_inc(&qhp->refcnt); spin_unlock(&rnicp->lock); + if (qhp->attr.state == IWCH_QP_STATE_RTS) { + attrs.next_state = IWCH_QP_STATE_TERMINATE; + iwch_modify_qp(qhp->rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, + &attrs, 1); + if (send_term) + iwch_post_terminate(qhp, rsp_msg); + } + event.event = ib_event; event.device = chp->ibcq.device; if (ib_event == IB_EVENT_CQ_ERR) @@ -86,13 +95,9 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp, if (qhp->ibqp.event_handler) (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context); - if (qhp->attr.state == IWCH_QP_STATE_RTS) { - attrs.next_state = IWCH_QP_STATE_TERMINATE; - iwch_modify_qp(qhp->rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, - &attrs, 1); - if (send_term) - iwch_post_terminate(qhp, rsp_msg); - } + spin_lock_irqsave(&chp->comp_handler_lock, flag); + (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); + spin_unlock_irqrestore(&chp->comp_handler_lock, flag); if (atomic_dec_and_test(&qhp->refcnt)) wake_up(&qhp->wait); @@ -105,6 +110,7 @@ void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb) struct iwch_cq *chp; struct iwch_qp *qhp; u32 cqid = RSPQ_CQID(rsp_msg); + unsigned long flag; rnicp = (struct iwch_dev *) rdev_p->ulp; spin_lock(&rnicp->lock); @@ -168,7 +174,9 @@ void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb) */ if (qhp->ep && SQ_TYPE(rsp_msg->cqe)) dst_confirm(qhp->ep->dst); + spin_lock_irqsave(&chp->comp_handler_lock, flag); (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); + spin_unlock_irqrestore(&chp->comp_handler_lock, flag); break; case TPT_ERR_STAG: @@ -179,12 +187,6 @@ void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb) case TPT_ERR_BOUND: case TPT_ERR_INVALIDATE_SHARED_MR: case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND: - printk(KERN_ERR "%s - CQE Err qpid 0x%x opcode %d status 0x%x " - "type %d wrid.hi 0x%x wrid.lo 0x%x \n", __func__, - CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe), - CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe), - CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe)); - (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_ACCESS_ERR, 1); break; diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c index ec49a5cbdeb..5c36ee2809a 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_mem.c +++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c @@ -29,6 +29,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#include <linux/slab.h> #include <asm/byteorder.h> #include <rdma/iw_cm.h> @@ -39,7 +40,7 @@ #include "iwch.h" #include "iwch_provider.h" -static void iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag) +static int iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag) { u32 mmid; @@ -47,14 +48,15 @@ static void iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag) mhp->attr.stag = stag; mmid = stag >> 8; mhp->ibmr.rkey = mhp->ibmr.lkey = stag; - insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid); PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp); + return insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid); } int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, struct iwch_mr *mhp, int shift) { u32 stag; + int ret; if (cxio_register_phys_mem(&rhp->rdev, &stag, mhp->attr.pdid, @@ -66,9 +68,11 @@ int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, mhp->attr.pbl_size, mhp->attr.pbl_addr)) return -ENOMEM; - iwch_finish_mem_reg(mhp, stag); - - return 0; + ret = iwch_finish_mem_reg(mhp, stag); + if (ret) + cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, + mhp->attr.pbl_addr); + return ret; } int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php, @@ -77,6 +81,7 @@ int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php, int npages) { u32 stag; + int ret; /* We could support this... */ if (npages > mhp->attr.pbl_size) @@ -93,9 +98,12 @@ int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php, mhp->attr.pbl_size, mhp->attr.pbl_addr)) return -ENOMEM; - iwch_finish_mem_reg(mhp, stag); + ret = iwch_finish_mem_reg(mhp, stag); + if (ret) + cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, + mhp->attr.pbl_addr); - return 0; + return ret; } int iwch_alloc_pbl(struct iwch_mr *mhp, int npages) diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index b89640aa6e1..811b24a539c 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -37,9 +37,12 @@ #include <linux/delay.h> #include <linux/errno.h> #include <linux/list.h> +#include <linux/sched.h> #include <linux/spinlock.h> #include <linux/ethtool.h> #include <linux/rtnetlink.h> +#include <linux/inetdevice.h> +#include <linux/slab.h> #include <asm/io.h> #include <asm/irq.h> @@ -58,13 +61,6 @@ #include "iwch_user.h" #include "common.h" -static int iwch_modify_port(struct ib_device *ibdev, - u8 port, int port_modify_mask, - struct ib_port_modify *props) -{ - return -ENOSYS; -} - static struct ib_ah *iwch_ah_create(struct ib_pd *pd, struct ib_ah_attr *ah_attr) { @@ -151,6 +147,8 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, int ve struct iwch_create_cq_resp uresp; struct iwch_create_cq_req ureq; struct iwch_ucontext *ucontext = NULL; + static int warned; + size_t resplen; PDBG("%s ib_dev %p entries %d\n", __func__, ibdev, entries); rhp = to_iwch_dev(ibdev); @@ -185,16 +183,21 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, int ve entries = roundup_pow_of_two(entries); chp->cq.size_log2 = ilog2(entries); - if (cxio_create_cq(&rhp->rdev, &chp->cq)) { + if (cxio_create_cq(&rhp->rdev, &chp->cq, !ucontext)) { kfree(chp); return ERR_PTR(-ENOMEM); } chp->rhp = rhp; chp->ibcq.cqe = 1 << chp->cq.size_log2; spin_lock_init(&chp->lock); + spin_lock_init(&chp->comp_handler_lock); atomic_set(&chp->refcnt, 1); init_waitqueue_head(&chp->wait); - insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid); + if (insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid)) { + cxio_destroy_cq(&chp->rhp->rdev, &chp->cq); + kfree(chp); + return ERR_PTR(-ENOMEM); + } if (ucontext) { struct iwch_mm_entry *mm; @@ -210,15 +213,27 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, int ve uresp.key = ucontext->key; ucontext->key += PAGE_SIZE; spin_unlock(&ucontext->mmap_lock); - if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) { + mm->key = uresp.key; + mm->addr = virt_to_phys(chp->cq.queue); + if (udata->outlen < sizeof uresp) { + if (!warned++) + printk(KERN_WARNING MOD "Warning - " + "downlevel libcxgb3 (non-fatal).\n"); + mm->len = PAGE_ALIGN((1UL << uresp.size_log2) * + sizeof(struct t3_cqe)); + resplen = sizeof(struct iwch_create_cq_resp_v0); + } else { + mm->len = PAGE_ALIGN(((1UL << uresp.size_log2) + 1) * + sizeof(struct t3_cqe)); + uresp.memsize = mm->len; + uresp.reserved = 0; + resplen = sizeof uresp; + } + if (ib_copy_to_udata(udata, &uresp, resplen)) { kfree(mm); iwch_destroy_cq(&chp->ibcq); return ERR_PTR(-EFAULT); } - mm->key = uresp.key; - mm->addr = virt_to_phys(chp->cq.queue); - mm->len = PAGE_ALIGN((1UL << uresp.size_log2) * - sizeof (struct t3_cqe)); insert_mmap(ucontext, mm); } PDBG("created cqid 0x%0x chp %p size 0x%0x, dma_addr 0x%0llx\n", @@ -545,7 +560,7 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr, __be64 *page_list = NULL; int shift = 0; u64 total_size; - int npages; + int npages = 0; int ret; PDBG("%s ib_mr %p ib_pd %p\n", __func__, mr, pd); @@ -603,14 +618,13 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, { __be64 *pages; int shift, n, len; - int i, j, k; + int i, k, entry; int err = 0; - struct ib_umem_chunk *chunk; struct iwch_dev *rhp; struct iwch_pd *php; struct iwch_mr *mhp; struct iwch_reg_user_mr_resp uresp; - + struct scatterlist *sg; PDBG("%s ib_pd %p\n", __func__, pd); php = to_iwch_pd(pd); @@ -630,9 +644,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, shift = ffs(mhp->umem->page_size) - 1; - n = 0; - list_for_each_entry(chunk, &mhp->umem->chunk_list, list) - n += chunk->nents; + n = mhp->umem->nmap; err = iwch_alloc_pbl(mhp, n); if (err) @@ -646,12 +658,10 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, i = n = 0; - list_for_each_entry(chunk, &mhp->umem->chunk_list, list) - for (j = 0; j < chunk->nmap; ++j) { - len = sg_dma_len(&chunk->page_list[j]) >> shift; + for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) { + len = sg_dma_len(sg) >> shift; for (k = 0; k < len; ++k) { - pages[i++] = cpu_to_be64(sg_dma_address( - &chunk->page_list[j]) + + pages[i++] = cpu_to_be64(sg_dma_address(sg) + mhp->umem->page_size * k); if (i == PAGE_SIZE / sizeof *pages) { err = iwch_write_pbl(mhp, pages, i, n); @@ -661,7 +671,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, i = 0; } } - } + } if (i) err = iwch_write_pbl(mhp, pages, i, n); @@ -724,7 +734,7 @@ static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc) return ibmr; } -static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd) +static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) { struct iwch_dev *rhp; struct iwch_pd *php; @@ -733,6 +743,9 @@ static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd) u32 stag = 0; int ret; + if (type != IB_MW_TYPE_1) + return ERR_PTR(-EINVAL); + php = to_iwch_pd(pd); rhp = php->rhp; mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); @@ -749,7 +762,11 @@ static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd) mhp->attr.stag = stag; mmid = (stag) >> 8; mhp->ibmw.rkey = stag; - insert_handle(rhp, &rhp->mmidr, mhp, mmid); + if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) { + cxio_deallocate_window(&rhp->rdev, mhp->attr.stag); + kfree(mhp); + return ERR_PTR(-ENOMEM); + } PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag); return &(mhp->ibmw); } @@ -765,8 +782,8 @@ static int iwch_dealloc_mw(struct ib_mw *mw) mmid = (mw->rkey) >> 8; cxio_deallocate_window(&rhp->rdev, mhp->attr.stag); remove_handle(rhp, &rhp->mmidr, mmid); - kfree(mhp); PDBG("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp); + kfree(mhp); return 0; } @@ -777,37 +794,43 @@ static struct ib_mr *iwch_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth) struct iwch_mr *mhp; u32 mmid; u32 stag = 0; - int ret; + int ret = 0; php = to_iwch_pd(pd); rhp = php->rhp; mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); if (!mhp) - return ERR_PTR(-ENOMEM); + goto err; mhp->rhp = rhp; ret = iwch_alloc_pbl(mhp, pbl_depth); - if (ret) { - kfree(mhp); - return ERR_PTR(ret); - } + if (ret) + goto err1; mhp->attr.pbl_size = pbl_depth; ret = cxio_allocate_stag(&rhp->rdev, &stag, php->pdid, mhp->attr.pbl_size, mhp->attr.pbl_addr); - if (ret) { - iwch_free_pbl(mhp); - kfree(mhp); - return ERR_PTR(ret); - } + if (ret) + goto err2; mhp->attr.pdid = php->pdid; mhp->attr.type = TPT_NON_SHARED_MR; mhp->attr.stag = stag; mhp->attr.state = 1; mmid = (stag) >> 8; mhp->ibmr.rkey = mhp->ibmr.lkey = stag; - insert_handle(rhp, &rhp->mmidr, mhp, mmid); + if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) + goto err3; + PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag); return &(mhp->ibmr); +err3: + cxio_dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size, + mhp->attr.pbl_addr); +err2: + iwch_free_pbl(mhp); +err1: + kfree(mhp); +err: + return ERR_PTR(ret); } static struct ib_fast_reg_page_list *iwch_alloc_fastreg_pbl( @@ -960,7 +983,13 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd, spin_lock_init(&qhp->lock); init_waitqueue_head(&qhp->wait); atomic_set(&qhp->refcnt, 1); - insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.qpid); + + if (insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.qpid)) { + cxio_destroy_qp(&rhp->rdev, &qhp->wq, + ucontext ? &ucontext->uctx : &rhp->rdev.uctx); + kfree(qhp); + return ERR_PTR(-ENOMEM); + } if (udata) { @@ -1102,9 +1131,7 @@ static u64 fw_vers_string_to_u64(struct iwch_dev *iwch_dev) char *cp, *next; unsigned fw_maj, fw_min, fw_mic; - rtnl_lock(); lldev->ethtool_ops->get_drvinfo(lldev, &info); - rtnl_unlock(); next = info.fw_version + 1; cp = strsep(&next, "."); @@ -1154,14 +1181,42 @@ static int iwch_query_device(struct ib_device *ibdev, static int iwch_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { + struct iwch_dev *dev; + struct net_device *netdev; + struct in_device *inetdev; + PDBG("%s ibdev %p\n", __func__, ibdev); + + dev = to_iwch_dev(ibdev); + netdev = dev->rdev.port_info.lldevs[port-1]; + + memset(props, 0, sizeof(struct ib_port_attr)); props->max_mtu = IB_MTU_4096; - props->lid = 0; - props->lmc = 0; - props->sm_lid = 0; - props->sm_sl = 0; - props->state = IB_PORT_ACTIVE; - props->phys_state = 0; + if (netdev->mtu >= 4096) + props->active_mtu = IB_MTU_4096; + else if (netdev->mtu >= 2048) + props->active_mtu = IB_MTU_2048; + else if (netdev->mtu >= 1024) + props->active_mtu = IB_MTU_1024; + else if (netdev->mtu >= 512) + props->active_mtu = IB_MTU_512; + else + props->active_mtu = IB_MTU_256; + + if (!netif_carrier_ok(netdev)) + props->state = IB_PORT_DOWN; + else { + inetdev = in_dev_get(netdev); + if (inetdev) { + if (inetdev->ifa_list) + props->state = IB_PORT_ACTIVE; + else + props->state = IB_PORT_INIT; + in_dev_put(inetdev); + } else + props->state = IB_PORT_INIT; + } + props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_SNMP_TUNNEL_SUP | @@ -1170,9 +1225,8 @@ static int iwch_query_port(struct ib_device *ibdev, IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; props->gid_tbl_len = 1; props->pkey_tbl_len = 1; - props->qkey_viol_cntr = 0; props->active_width = 2; - props->active_speed = 2; + props->active_speed = IB_SPEED_DDR; props->max_msg_sz = -1; return 0; @@ -1187,28 +1241,6 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr, return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type); } -static int fw_supports_fastreg(struct iwch_dev *iwch_dev) -{ - struct ethtool_drvinfo info; - struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; - char *cp, *next; - unsigned fw_maj, fw_min; - - rtnl_lock(); - lldev->ethtool_ops->get_drvinfo(lldev, &info); - rtnl_unlock(); - - next = info.fw_version+1; - cp = strsep(&next, "."); - sscanf(cp, "%i", &fw_maj); - cp = strsep(&next, "."); - sscanf(cp, "%i", &fw_min); - - PDBG("%s maj %u min %u\n", __func__, fw_maj, fw_min); - - return fw_maj > 6 || (fw_maj == 6 && fw_min > 0); -} - static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, char *buf) { struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev, @@ -1217,9 +1249,7 @@ static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, ch struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; PDBG("%s dev 0x%p\n", __func__, dev); - rtnl_lock(); lldev->ethtool_ops->get_drvinfo(lldev, &info); - rtnl_unlock(); return sprintf(buf, "%s\n", info.fw_version); } @@ -1232,9 +1262,7 @@ static ssize_t show_hca(struct device *dev, struct device_attribute *attr, struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev; PDBG("%s dev 0x%p\n", __func__, dev); - rtnl_lock(); lldev->ethtool_ops->get_drvinfo(lldev, &info); - rtnl_unlock(); return sprintf(buf, "%s\n", info.driver); } @@ -1325,12 +1353,12 @@ int iwch_register_device(struct iwch_dev *dev) memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); dev->ibdev.owner = THIS_MODULE; - dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW; + dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | + IB_DEVICE_MEM_WINDOW | + IB_DEVICE_MEM_MGT_EXTENSIONS; /* cxgb3 supports STag 0. */ dev->ibdev.local_dma_lkey = 0; - if (fw_supports_fastreg(dev)) - dev->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; dev->ibdev.uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | @@ -1357,7 +1385,6 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.dma_device = &(dev->rdev.rnic_info.pdev->dev); dev->ibdev.query_device = iwch_query_device; dev->ibdev.query_port = iwch_query_port; - dev->ibdev.modify_port = iwch_modify_port; dev->ibdev.query_pkey = iwch_query_pkey; dev->ibdev.query_gid = iwch_query_gid; dev->ibdev.alloc_ucontext = iwch_alloc_ucontext; @@ -1392,6 +1419,7 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.post_send = iwch_post_send; dev->ibdev.post_recv = iwch_post_receive; dev->ibdev.get_protocol_stats = iwch_get_mib; + dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION; dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); if (!dev->ibdev.iwcm) @@ -1406,7 +1434,7 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.iwcm->rem_ref = iwch_qp_rem_ref; dev->ibdev.iwcm->get_qp = iwch_get_qp; - ret = ib_register_device(&dev->ibdev); + ret = ib_register_device(&dev->ibdev, NULL); if (ret) goto bail1; @@ -1421,6 +1449,7 @@ int iwch_register_device(struct iwch_dev *dev) bail2: ib_unregister_device(&dev->ibdev); bail1: + kfree(dev->ibdev.iwcm); return ret; } @@ -1433,5 +1462,6 @@ void iwch_unregister_device(struct iwch_dev *dev) device_remove_file(&dev->ibdev.dev, iwch_class_attributes[i]); ib_unregister_device(&dev->ibdev); + kfree(dev->ibdev.iwcm); return; } diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h index f5ceca05c43..87c14b0c5ac 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.h +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h @@ -103,6 +103,7 @@ struct iwch_cq { struct iwch_dev *rhp; struct t3_cq cq; spinlock_t lock; + spinlock_t comp_handler_lock; atomic_t refcnt; wait_queue_head_t wait; u32 __user *user_rptr_addr; @@ -293,9 +294,16 @@ static inline u32 iwch_ib_to_tpt_access(int acc) return (acc & IB_ACCESS_REMOTE_WRITE ? TPT_REMOTE_WRITE : 0) | (acc & IB_ACCESS_REMOTE_READ ? TPT_REMOTE_READ : 0) | (acc & IB_ACCESS_LOCAL_WRITE ? TPT_LOCAL_WRITE : 0) | + (acc & IB_ACCESS_MW_BIND ? TPT_MW_BIND : 0) | TPT_LOCAL_READ; } +static inline u32 iwch_ib_to_tpt_bind_access(int acc) +{ + return (acc & IB_ACCESS_REMOTE_WRITE ? TPT_REMOTE_WRITE : 0) | + (acc & IB_ACCESS_REMOTE_READ ? TPT_REMOTE_READ : 0); +} + enum iwch_mmid_state { IWCH_STAG_STATE_VALID, IWCH_STAG_STATE_INVALID @@ -325,11 +333,9 @@ int iwch_bind_mw(struct ib_qp *qp, struct ib_mw_bind *mw_bind); int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg); -int iwch_post_zb_read(struct iwch_qp *qhp); +int iwch_post_zb_read(struct iwch_ep *ep); int iwch_register_device(struct iwch_dev *dev); void iwch_unregister_device(struct iwch_dev *dev); -int iwch_quiesce_qps(struct iwch_cq *chp); -int iwch_resume_qps(struct iwch_cq *chp); void stop_read_rep_timer(struct iwch_qp *qhp); int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, struct iwch_mr *mhp, int shift); diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index 9a3be3a9d5d..b57c0befd96 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -29,6 +29,8 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#include <linux/sched.h> +#include <linux/gfp.h> #include "iwch_provider.h" #include "iwch.h" #include "iwch_cm.h" @@ -99,8 +101,8 @@ static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { plen = 4; wqe->write.sgl[0].stag = wr->ex.imm_data; - wqe->write.sgl[0].len = __constant_cpu_to_be32(0); - wqe->write.num_sgle = __constant_cpu_to_be32(0); + wqe->write.sgl[0].len = cpu_to_be32(0); + wqe->write.num_sgle = cpu_to_be32(0); *flit_cnt = 6; } else { plen = 0; @@ -195,15 +197,12 @@ static int build_inv_stag(union t3_wr *wqe, struct ib_send_wr *wr, return 0; } -/* - * TBD: this is going to be moved to firmware. Missing pdid/qpid check for now. - */ static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list, u32 num_sgle, u32 * pbl_addr, u8 * page_size) { int i; struct iwch_mr *mhp; - u32 offset; + u64 offset; for (i = 0; i < num_sgle; i++) { mhp = get_mhp(rhp, (sg_list[i].lkey) >> 8); @@ -235,8 +234,8 @@ static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list, return -EINVAL; } offset = sg_list[i].addr - mhp->attr.va_fbo; - offset += ((u32) mhp->attr.va_fbo) % - (1UL << (12 + mhp->attr.page_size)); + offset += mhp->attr.va_fbo & + ((1UL << (12 + mhp->attr.page_size)) - 1); pbl_addr[i] = ((mhp->attr.pbl_addr - rhp->rdev.rnic_info.pbl_base) >> 3) + (offset >> (12 + mhp->attr.page_size)); @@ -266,8 +265,8 @@ static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe, wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length); /* to in the WQE == the offset into the page */ - wqe->recv.sgl[i].to = cpu_to_be64(((u32) wr->sg_list[i].addr) % - (1UL << (12 + page_size[i]))); + wqe->recv.sgl[i].to = cpu_to_be64(((u32)wr->sg_list[i].addr) & + ((1UL << (12 + page_size[i])) - 1)); /* pbl_addr is the adapters address in the PBL */ wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]); @@ -367,18 +366,19 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, spin_lock_irqsave(&qhp->lock, flag); if (qhp->attr.state > IWCH_QP_STATE_RTS) { spin_unlock_irqrestore(&qhp->lock, flag); - return -EINVAL; + err = -EINVAL; + goto out; } num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr, qhp->wq.sq_size_log2); - if (num_wrs <= 0) { + if (num_wrs == 0) { spin_unlock_irqrestore(&qhp->lock, flag); - return -ENOMEM; + err = -ENOMEM; + goto out; } while (wr) { if (num_wrs == 0) { err = -ENOMEM; - *bad_wr = wr; break; } idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2); @@ -430,10 +430,8 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, wr->opcode); err = -EINVAL; } - if (err) { - *bad_wr = wr; + if (err) break; - } wqe->send.wrid.id0.hi = qhp->wq.sq_wptr; sqp->wr_id = wr->wr_id; sqp->opcode = wr2opcode(t3_wr_opcode); @@ -455,7 +453,12 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ++(qhp->wq.sq_wptr); } spin_unlock_irqrestore(&qhp->lock, flag); - ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); + if (cxio_wq_db_enabled(&qhp->wq)) + ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); + +out: + if (err) + *bad_wr = wr; return err; } @@ -473,18 +476,19 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, spin_lock_irqsave(&qhp->lock, flag); if (qhp->attr.state > IWCH_QP_STATE_RTS) { spin_unlock_irqrestore(&qhp->lock, flag); - return -EINVAL; + err = -EINVAL; + goto out; } num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr, qhp->wq.rq_size_log2) - 1; if (!wr) { spin_unlock_irqrestore(&qhp->lock, flag); - return -EINVAL; + err = -ENOMEM; + goto out; } while (wr) { if (wr->num_sge > T3_MAX_SGE) { err = -EINVAL; - *bad_wr = wr; break; } idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2); @@ -496,10 +500,10 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, err = build_zero_stag_recv(qhp, wqe, wr); else err = -ENOMEM; - if (err) { - *bad_wr = wr; + + if (err) break; - } + build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG, Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0, sizeof(struct t3_receive_wr) >> 3, T3_SOPEOP); @@ -512,7 +516,12 @@ int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, num_wrs--; } spin_unlock_irqrestore(&qhp->lock, flag); - ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); + if (cxio_wq_db_enabled(&qhp->wq)) + ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); + +out: + if (err) + *bad_wr = wr; return err; } @@ -545,7 +554,7 @@ int iwch_bind_mw(struct ib_qp *qp, } num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr, qhp->wq.sq_size_log2); - if ((num_wrs) <= 0) { + if (num_wrs == 0) { spin_unlock_irqrestore(&qhp->lock, flag); return -ENOMEM; } @@ -558,18 +567,19 @@ int iwch_bind_mw(struct ib_qp *qp, if (mw_bind->send_flags & IB_SEND_SIGNALED) t3_wr_flags = T3_COMPLETION_FLAG; - sgl.addr = mw_bind->addr; - sgl.lkey = mw_bind->mr->lkey; - sgl.length = mw_bind->length; + sgl.addr = mw_bind->bind_info.addr; + sgl.lkey = mw_bind->bind_info.mr->lkey; + sgl.length = mw_bind->bind_info.length; wqe->bind.reserved = 0; wqe->bind.type = TPT_VATO; /* TBD: check perms */ - wqe->bind.perms = iwch_ib_to_tpt_access(mw_bind->mw_access_flags); - wqe->bind.mr_stag = cpu_to_be32(mw_bind->mr->lkey); + wqe->bind.perms = iwch_ib_to_tpt_bind_access( + mw_bind->bind_info.mw_access_flags); + wqe->bind.mr_stag = cpu_to_be32(mw_bind->bind_info.mr->lkey); wqe->bind.mw_stag = cpu_to_be32(mw->rkey); - wqe->bind.mw_len = cpu_to_be32(mw_bind->length); - wqe->bind.mw_va = cpu_to_be64(mw_bind->addr); + wqe->bind.mw_len = cpu_to_be32(mw_bind->bind_info.length); + wqe->bind.mw_va = cpu_to_be64(mw_bind->bind_info.addr); err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size); if (err) { spin_unlock_irqrestore(&qhp->lock, flag); @@ -591,7 +601,8 @@ int iwch_bind_mw(struct ib_qp *qp, ++(qhp->wq.sq_wptr); spin_unlock_irqrestore(&qhp->lock, flag); - ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); + if (cxio_wq_db_enabled(&qhp->wq)) + ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); return err; } @@ -728,7 +739,7 @@ static inline void build_term_codes(struct respQ_msg_t *rsp_msg, } } -int iwch_post_zb_read(struct iwch_qp *qhp) +int iwch_post_zb_read(struct iwch_ep *ep) { union t3_wr *wqe; struct sk_buff *skb; @@ -745,17 +756,16 @@ int iwch_post_zb_read(struct iwch_qp *qhp) wqe->read.rdmaop = T3_READ_REQ; wqe->read.reserved[0] = 0; wqe->read.reserved[1] = 0; - wqe->read.reserved[2] = 0; wqe->read.rem_stag = cpu_to_be32(1); wqe->read.rem_to = cpu_to_be64(1); wqe->read.local_stag = cpu_to_be32(1); wqe->read.local_len = cpu_to_be32(0); wqe->read.local_to = cpu_to_be64(1); wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_READ)); - wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid)| + wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(ep->hwtid)| V_FW_RIWR_LEN(flit_cnt)); skb->priority = CPL_PRIORITY_DATA; - return cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb); + return iwch_cxgb3_ofld_send(ep->com.qp->rhp->rdev.t3cdev_p, skb); } /* @@ -787,61 +797,82 @@ int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg) V_FW_RIWR_FLAGS(T3_COMPLETION_FLAG | T3_NOTIFY_FLAG)); wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid)); skb->priority = CPL_PRIORITY_DATA; - return cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb); + return iwch_cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb); } /* * Assumes qhp lock is held. */ -static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag) +static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp, + struct iwch_cq *schp) { - struct iwch_cq *rchp, *schp; int count; int flushed; - rchp = get_chp(qhp->rhp, qhp->attr.rcq); - schp = get_chp(qhp->rhp, qhp->attr.scq); PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp); /* take a ref on the qhp since we must release the lock */ atomic_inc(&qhp->refcnt); - spin_unlock_irqrestore(&qhp->lock, *flag); + spin_unlock(&qhp->lock); - /* locking heirarchy: cq lock first, then qp lock. */ - spin_lock_irqsave(&rchp->lock, *flag); + /* locking hierarchy: cq lock first, then qp lock. */ + spin_lock(&rchp->lock); spin_lock(&qhp->lock); cxio_flush_hw_cq(&rchp->cq); cxio_count_rcqes(&rchp->cq, &qhp->wq, &count); flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count); spin_unlock(&qhp->lock); - spin_unlock_irqrestore(&rchp->lock, *flag); - if (flushed) + spin_unlock(&rchp->lock); + if (flushed) { + spin_lock(&rchp->comp_handler_lock); (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); + spin_unlock(&rchp->comp_handler_lock); + } - /* locking heirarchy: cq lock first, then qp lock. */ - spin_lock_irqsave(&schp->lock, *flag); + /* locking hierarchy: cq lock first, then qp lock. */ + spin_lock(&schp->lock); spin_lock(&qhp->lock); cxio_flush_hw_cq(&schp->cq); cxio_count_scqes(&schp->cq, &qhp->wq, &count); flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count); spin_unlock(&qhp->lock); - spin_unlock_irqrestore(&schp->lock, *flag); - if (flushed) + spin_unlock(&schp->lock); + if (flushed) { + spin_lock(&schp->comp_handler_lock); (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); + spin_unlock(&schp->comp_handler_lock); + } /* deref */ if (atomic_dec_and_test(&qhp->refcnt)) wake_up(&qhp->wait); - spin_lock_irqsave(&qhp->lock, *flag); + spin_lock(&qhp->lock); } -static void flush_qp(struct iwch_qp *qhp, unsigned long *flag) +static void flush_qp(struct iwch_qp *qhp) { - if (qhp->ibqp.uobject) + struct iwch_cq *rchp, *schp; + + rchp = get_chp(qhp->rhp, qhp->attr.rcq); + schp = get_chp(qhp->rhp, qhp->attr.scq); + + if (qhp->ibqp.uobject) { cxio_set_wq_in_error(&qhp->wq); - else - __flush_qp(qhp, flag); + cxio_set_cq_in_error(&rchp->cq); + spin_lock(&rchp->comp_handler_lock); + (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); + spin_unlock(&rchp->comp_handler_lock); + if (schp != rchp) { + cxio_set_cq_in_error(&schp->cq); + spin_lock(&schp->comp_handler_lock); + (*schp->ibcq.comp_handler)(&schp->ibcq, + schp->ibcq.cq_context); + spin_unlock(&schp->comp_handler_lock); + } + return; + } + __flush_qp(qhp, rchp, schp); } @@ -852,7 +883,8 @@ u16 iwch_rqes_posted(struct iwch_qp *qhp) { union t3_wr *wqe = qhp->wq.queue; u16 count = 0; - while ((count+1) != 0 && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) { + + while (count < USHRT_MAX && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) { count++; wqe++; } @@ -879,20 +911,13 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp, (qhp->attr.mpa_attr.xmit_marker_enabled << 1) | (qhp->attr.mpa_attr.crc_enabled << 2); - /* - * XXX - The IWCM doesn't quite handle getting these - * attrs set before going into RTS. For now, just turn - * them on always... - */ -#if 0 - init_attr.qpcaps = qhp->attr.enableRdmaRead | - (qhp->attr.enableRdmaWrite << 1) | - (qhp->attr.enableBind << 2) | - (qhp->attr.enable_stag0_fastreg << 3) | - (qhp->attr.enable_stag0_fastreg << 4); -#else - init_attr.qpcaps = 0x1f; -#endif + init_attr.qpcaps = uP_RI_QP_RDMA_READ_ENABLE | + uP_RI_QP_RDMA_WRITE_ENABLE | + uP_RI_QP_BIND_ENABLE; + if (!qhp->ibqp.uobject) + init_attr.qpcaps |= uP_RI_QP_STAG0_ENABLE | + uP_RI_QP_FAST_REGISTER_ENABLE; + init_attr.tcp_emss = qhp->ep->emss; init_attr.ord = qhp->attr.max_ord; init_attr.ird = qhp->attr.max_ird; @@ -900,8 +925,7 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp, init_attr.qp_dma_size = (1UL << qhp->wq.size_log2); init_attr.rqe_count = iwch_rqes_posted(qhp); init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0; - if (!qhp->ibqp.uobject) - init_attr.flags |= PRIV_QP; + init_attr.chan = qhp->ep->l2t->smt_idx; if (peer2peer) { init_attr.rtr_type = RTR_READ; if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator) @@ -1008,7 +1032,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, break; case IWCH_QP_STATE_ERROR: qhp->attr.state = IWCH_QP_STATE_ERROR; - flush_qp(qhp, &flag); + flush_qp(qhp); break; default: ret = -EINVAL; @@ -1056,7 +1080,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, } switch (attrs->next_state) { case IWCH_QP_STATE_IDLE: - flush_qp(qhp, &flag); + flush_qp(qhp); qhp->attr.state = IWCH_QP_STATE_IDLE; qhp->attr.llp_stream_handle = NULL; put_ep(&qhp->ep->com); @@ -1082,7 +1106,6 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, goto out; } qhp->attr.state = IWCH_QP_STATE_IDLE; - memset(&qhp->attr, 0, sizeof(qhp->attr)); break; case IWCH_QP_STATE_TERMINATE: if (!internal) { @@ -1111,7 +1134,7 @@ err: free=1; wake_up(&qhp->wait); BUG_ON(!ep); - flush_qp(qhp, &flag); + flush_qp(qhp); out: spin_unlock_irqrestore(&qhp->lock, flag); @@ -1138,59 +1161,3 @@ out: PDBG("%s exit state %d\n", __func__, qhp->attr.state); return ret; } - -static int quiesce_qp(struct iwch_qp *qhp) -{ - spin_lock_irq(&qhp->lock); - iwch_quiesce_tid(qhp->ep); - qhp->flags |= QP_QUIESCED; - spin_unlock_irq(&qhp->lock); - return 0; -} - -static int resume_qp(struct iwch_qp *qhp) -{ - spin_lock_irq(&qhp->lock); - iwch_resume_tid(qhp->ep); - qhp->flags &= ~QP_QUIESCED; - spin_unlock_irq(&qhp->lock); - return 0; -} - -int iwch_quiesce_qps(struct iwch_cq *chp) -{ - int i; - struct iwch_qp *qhp; - - for (i=0; i < T3_MAX_NUM_QP; i++) { - qhp = get_qhp(chp->rhp, i); - if (!qhp) - continue; - if ((qhp->attr.rcq == chp->cq.cqid) && !qp_quiesced(qhp)) { - quiesce_qp(qhp); - continue; - } - if ((qhp->attr.scq == chp->cq.cqid) && !qp_quiesced(qhp)) - quiesce_qp(qhp); - } - return 0; -} - -int iwch_resume_qps(struct iwch_cq *chp) -{ - int i; - struct iwch_qp *qhp; - - for (i=0; i < T3_MAX_NUM_QP; i++) { - qhp = get_qhp(chp->rhp, i); - if (!qhp) - continue; - if ((qhp->attr.rcq == chp->cq.cqid) && qp_quiesced(qhp)) { - resume_qp(qhp); - continue; - } - if ((qhp->attr.scq == chp->cq.cqid) && qp_quiesced(qhp)) - resume_qp(qhp); - } - return 0; -} diff --git a/drivers/infiniband/hw/cxgb3/iwch_user.h b/drivers/infiniband/hw/cxgb3/iwch_user.h index cb7086f558c..a277c31fcaf 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_user.h +++ b/drivers/infiniband/hw/cxgb3/iwch_user.h @@ -45,10 +45,18 @@ struct iwch_create_cq_req { __u64 user_rptr_addr; }; +struct iwch_create_cq_resp_v0 { + __u64 key; + __u32 cqid; + __u32 size_log2; +}; + struct iwch_create_cq_resp { __u64 key; __u32 cqid; __u32 size_log2; + __u32 memsize; + __u32 reserved; }; struct iwch_create_qp_resp { diff --git a/drivers/infiniband/hw/cxgb4/Kconfig b/drivers/infiniband/hw/cxgb4/Kconfig new file mode 100644 index 00000000000..23f38cf2c5c --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/Kconfig @@ -0,0 +1,18 @@ +config INFINIBAND_CXGB4 + tristate "Chelsio T4/T5 RDMA Driver" + depends on CHELSIO_T4 && INET && (IPV6 || IPV6=n) + select GENERIC_ALLOCATOR + ---help--- + This is an iWARP/RDMA driver for the Chelsio T4 and T5 + 1GbE, 10GbE adapters and T5 40GbE adapter. + + For general information about Chelsio and our products, visit + our website at <http://www.chelsio.com>. + + For customer support, please visit our customer support page at + <http://www.chelsio.com/support.html>. + + Please send feedback to <linux-bugs@chelsio.com>. + + To compile this driver as a module, choose M here: the module + will be called iw_cxgb4. diff --git a/drivers/infiniband/hw/cxgb4/Makefile b/drivers/infiniband/hw/cxgb4/Makefile new file mode 100644 index 00000000000..e11cf729994 --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/Makefile @@ -0,0 +1,5 @@ +ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4 + +obj-$(CONFIG_INFINIBAND_CXGB4) += iw_cxgb4.o + +iw_cxgb4-y := device.o cm.o provider.o mem.o cq.o qp.o resource.o ev.o id_table.o diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c new file mode 100644 index 00000000000..768a0fb67dd --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -0,0 +1,3933 @@ +/* + * Copyright (c) 2009-2014 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/module.h> +#include <linux/list.h> +#include <linux/workqueue.h> +#include <linux/skbuff.h> +#include <linux/timer.h> +#include <linux/notifier.h> +#include <linux/inetdevice.h> +#include <linux/ip.h> +#include <linux/tcp.h> +#include <linux/if_vlan.h> + +#include <net/neighbour.h> +#include <net/netevent.h> +#include <net/route.h> +#include <net/tcp.h> +#include <net/ip6_route.h> +#include <net/addrconf.h> + +#include <rdma/ib_addr.h> + +#include "iw_cxgb4.h" + +static char *states[] = { + "idle", + "listen", + "connecting", + "mpa_wait_req", + "mpa_req_sent", + "mpa_req_rcvd", + "mpa_rep_sent", + "fpdu_mode", + "aborting", + "closing", + "moribund", + "dead", + NULL, +}; + +static int nocong; +module_param(nocong, int, 0644); +MODULE_PARM_DESC(nocong, "Turn of congestion control (default=0)"); + +static int enable_ecn; +module_param(enable_ecn, int, 0644); +MODULE_PARM_DESC(enable_ecn, "Enable ECN (default=0/disabled)"); + +static int dack_mode = 1; +module_param(dack_mode, int, 0644); +MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=1)"); + +int c4iw_max_read_depth = 8; +module_param(c4iw_max_read_depth, int, 0644); +MODULE_PARM_DESC(c4iw_max_read_depth, "Per-connection max ORD/IRD (default=8)"); + +static int enable_tcp_timestamps; +module_param(enable_tcp_timestamps, int, 0644); +MODULE_PARM_DESC(enable_tcp_timestamps, "Enable tcp timestamps (default=0)"); + +static int enable_tcp_sack; +module_param(enable_tcp_sack, int, 0644); +MODULE_PARM_DESC(enable_tcp_sack, "Enable tcp SACK (default=0)"); + +static int enable_tcp_window_scaling = 1; +module_param(enable_tcp_window_scaling, int, 0644); +MODULE_PARM_DESC(enable_tcp_window_scaling, + "Enable tcp window scaling (default=1)"); + +int c4iw_debug; +module_param(c4iw_debug, int, 0644); +MODULE_PARM_DESC(c4iw_debug, "Enable debug logging (default=0)"); + +static int peer2peer = 1; +module_param(peer2peer, int, 0644); +MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=1)"); + +static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ; +module_param(p2p_type, int, 0644); +MODULE_PARM_DESC(p2p_type, "RDMAP opcode to use for the RTR message: " + "1=RDMA_READ 0=RDMA_WRITE (default 1)"); + +static int ep_timeout_secs = 60; +module_param(ep_timeout_secs, int, 0644); +MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout " + "in seconds (default=60)"); + +static int mpa_rev = 1; +module_param(mpa_rev, int, 0644); +MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, " + "1 is RFC0544 spec compliant, 2 is IETF MPA Peer Connect Draft" + " compliant (default=1)"); + +static int markers_enabled; +module_param(markers_enabled, int, 0644); +MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)"); + +static int crc_enabled = 1; +module_param(crc_enabled, int, 0644); +MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)"); + +static int rcv_win = 256 * 1024; +module_param(rcv_win, int, 0644); +MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256KB)"); + +static int snd_win = 128 * 1024; +module_param(snd_win, int, 0644); +MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=128KB)"); + +static struct workqueue_struct *workq; + +static struct sk_buff_head rxq; + +static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp); +static void ep_timeout(unsigned long arg); +static void connect_reply_upcall(struct c4iw_ep *ep, int status); + +static LIST_HEAD(timeout_list); +static spinlock_t timeout_lock; + +static void deref_qp(struct c4iw_ep *ep) +{ + c4iw_qp_rem_ref(&ep->com.qp->ibqp); + clear_bit(QP_REFERENCED, &ep->com.flags); +} + +static void ref_qp(struct c4iw_ep *ep) +{ + set_bit(QP_REFERENCED, &ep->com.flags); + c4iw_qp_add_ref(&ep->com.qp->ibqp); +} + +static void start_ep_timer(struct c4iw_ep *ep) +{ + PDBG("%s ep %p\n", __func__, ep); + if (timer_pending(&ep->timer)) { + pr_err("%s timer already started! ep %p\n", + __func__, ep); + return; + } + clear_bit(TIMEOUT, &ep->com.flags); + c4iw_get_ep(&ep->com); + ep->timer.expires = jiffies + ep_timeout_secs * HZ; + ep->timer.data = (unsigned long)ep; + ep->timer.function = ep_timeout; + add_timer(&ep->timer); +} + +static int stop_ep_timer(struct c4iw_ep *ep) +{ + PDBG("%s ep %p stopping\n", __func__, ep); + del_timer_sync(&ep->timer); + if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) { + c4iw_put_ep(&ep->com); + return 0; + } + return 1; +} + +static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb, + struct l2t_entry *l2e) +{ + int error = 0; + + if (c4iw_fatal_error(rdev)) { + kfree_skb(skb); + PDBG("%s - device in error state - dropping\n", __func__); + return -EIO; + } + error = cxgb4_l2t_send(rdev->lldi.ports[0], skb, l2e); + if (error < 0) + kfree_skb(skb); + return error < 0 ? error : 0; +} + +int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb) +{ + int error = 0; + + if (c4iw_fatal_error(rdev)) { + kfree_skb(skb); + PDBG("%s - device in error state - dropping\n", __func__); + return -EIO; + } + error = cxgb4_ofld_send(rdev->lldi.ports[0], skb); + if (error < 0) + kfree_skb(skb); + return error < 0 ? error : 0; +} + +static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb) +{ + struct cpl_tid_release *req; + + skb = get_skb(skb, sizeof *req, GFP_KERNEL); + if (!skb) + return; + req = (struct cpl_tid_release *) skb_put(skb, sizeof(*req)); + INIT_TP_WR(req, hwtid); + OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid)); + set_wr_txq(skb, CPL_PRIORITY_SETUP, 0); + c4iw_ofld_send(rdev, skb); + return; +} + +static void set_emss(struct c4iw_ep *ep, u16 opt) +{ + ep->emss = ep->com.dev->rdev.lldi.mtus[GET_TCPOPT_MSS(opt)] - + sizeof(struct iphdr) - sizeof(struct tcphdr); + ep->mss = ep->emss; + if (GET_TCPOPT_TSTAMP(opt)) + ep->emss -= 12; + if (ep->emss < 128) + ep->emss = 128; + if (ep->emss & 7) + PDBG("Warning: misaligned mtu idx %u mss %u emss=%u\n", + GET_TCPOPT_MSS(opt), ep->mss, ep->emss); + PDBG("%s mss_idx %u mss %u emss=%u\n", __func__, GET_TCPOPT_MSS(opt), + ep->mss, ep->emss); +} + +static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc) +{ + enum c4iw_ep_state state; + + mutex_lock(&epc->mutex); + state = epc->state; + mutex_unlock(&epc->mutex); + return state; +} + +static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new) +{ + epc->state = new; +} + +static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new) +{ + mutex_lock(&epc->mutex); + PDBG("%s - %s -> %s\n", __func__, states[epc->state], states[new]); + __state_set(epc, new); + mutex_unlock(&epc->mutex); + return; +} + +static void *alloc_ep(int size, gfp_t gfp) +{ + struct c4iw_ep_common *epc; + + epc = kzalloc(size, gfp); + if (epc) { + kref_init(&epc->kref); + mutex_init(&epc->mutex); + c4iw_init_wr_wait(&epc->wr_wait); + } + PDBG("%s alloc ep %p\n", __func__, epc); + return epc; +} + +void _c4iw_free_ep(struct kref *kref) +{ + struct c4iw_ep *ep; + + ep = container_of(kref, struct c4iw_ep, com.kref); + PDBG("%s ep %p state %s\n", __func__, ep, states[state_read(&ep->com)]); + if (test_bit(QP_REFERENCED, &ep->com.flags)) + deref_qp(ep); + if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) { + remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid); + cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid); + dst_release(ep->dst); + cxgb4_l2t_release(ep->l2t); + } + if (test_bit(RELEASE_MAPINFO, &ep->com.flags)) { + print_addr(&ep->com, __func__, "remove_mapinfo/mapping"); + iwpm_remove_mapinfo(&ep->com.local_addr, + &ep->com.mapped_local_addr); + iwpm_remove_mapping(&ep->com.local_addr, RDMA_NL_C4IW); + } + kfree(ep); +} + +static void release_ep_resources(struct c4iw_ep *ep) +{ + set_bit(RELEASE_RESOURCES, &ep->com.flags); + c4iw_put_ep(&ep->com); +} + +static int status2errno(int status) +{ + switch (status) { + case CPL_ERR_NONE: + return 0; + case CPL_ERR_CONN_RESET: + return -ECONNRESET; + case CPL_ERR_ARP_MISS: + return -EHOSTUNREACH; + case CPL_ERR_CONN_TIMEDOUT: + return -ETIMEDOUT; + case CPL_ERR_TCAM_FULL: + return -ENOMEM; + case CPL_ERR_CONN_EXIST: + return -EADDRINUSE; + default: + return -EIO; + } +} + +/* + * Try and reuse skbs already allocated... + */ +static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp) +{ + if (skb && !skb_is_nonlinear(skb) && !skb_cloned(skb)) { + skb_trim(skb, 0); + skb_get(skb); + skb_reset_transport_header(skb); + } else { + skb = alloc_skb(len, gfp); + } + t4_set_arp_err_handler(skb, NULL, NULL); + return skb; +} + +static struct net_device *get_real_dev(struct net_device *egress_dev) +{ + return rdma_vlan_dev_real_dev(egress_dev) ? : egress_dev; +} + +static int our_interface(struct c4iw_dev *dev, struct net_device *egress_dev) +{ + int i; + + egress_dev = get_real_dev(egress_dev); + for (i = 0; i < dev->rdev.lldi.nports; i++) + if (dev->rdev.lldi.ports[i] == egress_dev) + return 1; + return 0; +} + +static struct dst_entry *find_route6(struct c4iw_dev *dev, __u8 *local_ip, + __u8 *peer_ip, __be16 local_port, + __be16 peer_port, u8 tos, + __u32 sin6_scope_id) +{ + struct dst_entry *dst = NULL; + + if (IS_ENABLED(CONFIG_IPV6)) { + struct flowi6 fl6; + + memset(&fl6, 0, sizeof(fl6)); + memcpy(&fl6.daddr, peer_ip, 16); + memcpy(&fl6.saddr, local_ip, 16); + if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL) + fl6.flowi6_oif = sin6_scope_id; + dst = ip6_route_output(&init_net, NULL, &fl6); + if (!dst) + goto out; + if (!our_interface(dev, ip6_dst_idev(dst)->dev) && + !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) { + dst_release(dst); + dst = NULL; + } + } + +out: + return dst; +} + +static struct dst_entry *find_route(struct c4iw_dev *dev, __be32 local_ip, + __be32 peer_ip, __be16 local_port, + __be16 peer_port, u8 tos) +{ + struct rtable *rt; + struct flowi4 fl4; + struct neighbour *n; + + rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip, + peer_port, local_port, IPPROTO_TCP, + tos, 0); + if (IS_ERR(rt)) + return NULL; + n = dst_neigh_lookup(&rt->dst, &peer_ip); + if (!n) + return NULL; + if (!our_interface(dev, n->dev) && + !(n->dev->flags & IFF_LOOPBACK)) { + dst_release(&rt->dst); + return NULL; + } + neigh_release(n); + return &rt->dst; +} + +static void arp_failure_discard(void *handle, struct sk_buff *skb) +{ + PDBG("%s c4iw_dev %p\n", __func__, handle); + kfree_skb(skb); +} + +/* + * Handle an ARP failure for an active open. + */ +static void act_open_req_arp_failure(void *handle, struct sk_buff *skb) +{ + struct c4iw_ep *ep = handle; + + printk(KERN_ERR MOD "ARP failure duing connect\n"); + kfree_skb(skb); + connect_reply_upcall(ep, -EHOSTUNREACH); + state_set(&ep->com, DEAD); + remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid); + cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); + dst_release(ep->dst); + cxgb4_l2t_release(ep->l2t); + c4iw_put_ep(&ep->com); +} + +/* + * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant + * and send it along. + */ +static void abort_arp_failure(void *handle, struct sk_buff *skb) +{ + struct c4iw_rdev *rdev = handle; + struct cpl_abort_req *req = cplhdr(skb); + + PDBG("%s rdev %p\n", __func__, rdev); + req->cmd = CPL_ABORT_NO_RST; + c4iw_ofld_send(rdev, skb); +} + +static void send_flowc(struct c4iw_ep *ep, struct sk_buff *skb) +{ + unsigned int flowclen = 80; + struct fw_flowc_wr *flowc; + int i; + + skb = get_skb(skb, flowclen, GFP_KERNEL); + flowc = (struct fw_flowc_wr *)__skb_put(skb, flowclen); + + flowc->op_to_nparams = cpu_to_be32(FW_WR_OP(FW_FLOWC_WR) | + FW_FLOWC_WR_NPARAMS(8)); + flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16(DIV_ROUND_UP(flowclen, + 16)) | FW_WR_FLOWID(ep->hwtid)); + + flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; + flowc->mnemval[0].val = cpu_to_be32(PCI_FUNC(ep->com.dev->rdev.lldi.pdev->devfn) << 8); + flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH; + flowc->mnemval[1].val = cpu_to_be32(ep->tx_chan); + flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT; + flowc->mnemval[2].val = cpu_to_be32(ep->tx_chan); + flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID; + flowc->mnemval[3].val = cpu_to_be32(ep->rss_qid); + flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDNXT; + flowc->mnemval[4].val = cpu_to_be32(ep->snd_seq); + flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT; + flowc->mnemval[5].val = cpu_to_be32(ep->rcv_seq); + flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF; + flowc->mnemval[6].val = cpu_to_be32(ep->snd_win); + flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS; + flowc->mnemval[7].val = cpu_to_be32(ep->emss); + /* Pad WR to 16 byte boundary */ + flowc->mnemval[8].mnemonic = 0; + flowc->mnemval[8].val = 0; + for (i = 0; i < 9; i++) { + flowc->mnemval[i].r4[0] = 0; + flowc->mnemval[i].r4[1] = 0; + flowc->mnemval[i].r4[2] = 0; + } + + set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); + c4iw_ofld_send(&ep->com.dev->rdev, skb); +} + +static int send_halfclose(struct c4iw_ep *ep, gfp_t gfp) +{ + struct cpl_close_con_req *req; + struct sk_buff *skb; + int wrlen = roundup(sizeof *req, 16); + + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + skb = get_skb(NULL, wrlen, gfp); + if (!skb) { + printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__); + return -ENOMEM; + } + set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); + t4_set_arp_err_handler(skb, NULL, arp_failure_discard); + req = (struct cpl_close_con_req *) skb_put(skb, wrlen); + memset(req, 0, wrlen); + INIT_TP_WR(req, ep->hwtid); + OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, + ep->hwtid)); + return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); +} + +static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) +{ + struct cpl_abort_req *req; + int wrlen = roundup(sizeof *req, 16); + + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + skb = get_skb(skb, wrlen, gfp); + if (!skb) { + printk(KERN_ERR MOD "%s - failed to alloc skb.\n", + __func__); + return -ENOMEM; + } + set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); + t4_set_arp_err_handler(skb, &ep->com.dev->rdev, abort_arp_failure); + req = (struct cpl_abort_req *) skb_put(skb, wrlen); + memset(req, 0, wrlen); + INIT_TP_WR(req, ep->hwtid); + OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid)); + req->cmd = CPL_ABORT_SEND_RST; + return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); +} + +/* + * c4iw_form_pm_msg - Form a port mapper message with mapping info + */ +static void c4iw_form_pm_msg(struct c4iw_ep *ep, + struct iwpm_sa_data *pm_msg) +{ + memcpy(&pm_msg->loc_addr, &ep->com.local_addr, + sizeof(ep->com.local_addr)); + memcpy(&pm_msg->rem_addr, &ep->com.remote_addr, + sizeof(ep->com.remote_addr)); +} + +/* + * c4iw_form_reg_msg - Form a port mapper message with dev info + */ +static void c4iw_form_reg_msg(struct c4iw_dev *dev, + struct iwpm_dev_data *pm_msg) +{ + memcpy(pm_msg->dev_name, dev->ibdev.name, IWPM_DEVNAME_SIZE); + memcpy(pm_msg->if_name, dev->rdev.lldi.ports[0]->name, + IWPM_IFNAME_SIZE); +} + +static void c4iw_record_pm_msg(struct c4iw_ep *ep, + struct iwpm_sa_data *pm_msg) +{ + memcpy(&ep->com.mapped_local_addr, &pm_msg->mapped_loc_addr, + sizeof(ep->com.mapped_local_addr)); + memcpy(&ep->com.mapped_remote_addr, &pm_msg->mapped_rem_addr, + sizeof(ep->com.mapped_remote_addr)); +} + +static void best_mtu(const unsigned short *mtus, unsigned short mtu, + unsigned int *idx, int use_ts) +{ + unsigned short hdr_size = sizeof(struct iphdr) + + sizeof(struct tcphdr) + + (use_ts ? 12 : 0); + unsigned short data_size = mtu - hdr_size; + + cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx); +} + +static int send_connect(struct c4iw_ep *ep) +{ + struct cpl_act_open_req *req; + struct cpl_t5_act_open_req *t5_req; + struct cpl_act_open_req6 *req6; + struct cpl_t5_act_open_req6 *t5_req6; + struct sk_buff *skb; + u64 opt0; + u32 opt2; + unsigned int mtu_idx; + int wscale; + int wrlen; + int sizev4 = is_t4(ep->com.dev->rdev.lldi.adapter_type) ? + sizeof(struct cpl_act_open_req) : + sizeof(struct cpl_t5_act_open_req); + int sizev6 = is_t4(ep->com.dev->rdev.lldi.adapter_type) ? + sizeof(struct cpl_act_open_req6) : + sizeof(struct cpl_t5_act_open_req6); + struct sockaddr_in *la = (struct sockaddr_in *) + &ep->com.mapped_local_addr; + struct sockaddr_in *ra = (struct sockaddr_in *) + &ep->com.mapped_remote_addr; + struct sockaddr_in6 *la6 = (struct sockaddr_in6 *) + &ep->com.mapped_local_addr; + struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *) + &ep->com.mapped_remote_addr; + int win; + + wrlen = (ep->com.remote_addr.ss_family == AF_INET) ? + roundup(sizev4, 16) : + roundup(sizev6, 16); + + PDBG("%s ep %p atid %u\n", __func__, ep, ep->atid); + + skb = get_skb(NULL, wrlen, GFP_KERNEL); + if (!skb) { + printk(KERN_ERR MOD "%s - failed to alloc skb.\n", + __func__); + return -ENOMEM; + } + set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx); + + best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, + enable_tcp_timestamps); + wscale = compute_wscale(rcv_win); + + /* + * Specify the largest window that will fit in opt0. The + * remainder will be specified in the rx_data_ack. + */ + win = ep->rcv_win >> 10; + if (win > RCV_BUFSIZ_MASK) + win = RCV_BUFSIZ_MASK; + + opt0 = (nocong ? NO_CONG(1) : 0) | + KEEP_ALIVE(1) | + DELACK(1) | + WND_SCALE(wscale) | + MSS_IDX(mtu_idx) | + L2T_IDX(ep->l2t->idx) | + TX_CHAN(ep->tx_chan) | + SMAC_SEL(ep->smac_idx) | + DSCP(ep->tos) | + ULP_MODE(ULP_MODE_TCPDDP) | + RCV_BUFSIZ(win); + opt2 = RX_CHANNEL(0) | + CCTRL_ECN(enable_ecn) | + RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid); + if (enable_tcp_timestamps) + opt2 |= TSTAMPS_EN(1); + if (enable_tcp_sack) + opt2 |= SACK_EN(1); + if (wscale && enable_tcp_window_scaling) + opt2 |= WND_SCALE_EN(1); + if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { + opt2 |= T5_OPT_2_VALID; + opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE); + } + t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure); + + if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) { + if (ep->com.remote_addr.ss_family == AF_INET) { + req = (struct cpl_act_open_req *) skb_put(skb, wrlen); + INIT_TP_WR(req, 0); + OPCODE_TID(req) = cpu_to_be32( + MK_OPCODE_TID(CPL_ACT_OPEN_REQ, + ((ep->rss_qid << 14) | ep->atid))); + req->local_port = la->sin_port; + req->peer_port = ra->sin_port; + req->local_ip = la->sin_addr.s_addr; + req->peer_ip = ra->sin_addr.s_addr; + req->opt0 = cpu_to_be64(opt0); + req->params = cpu_to_be32(cxgb4_select_ntuple( + ep->com.dev->rdev.lldi.ports[0], + ep->l2t)); + req->opt2 = cpu_to_be32(opt2); + } else { + req6 = (struct cpl_act_open_req6 *)skb_put(skb, wrlen); + + INIT_TP_WR(req6, 0); + OPCODE_TID(req6) = cpu_to_be32( + MK_OPCODE_TID(CPL_ACT_OPEN_REQ6, + ((ep->rss_qid<<14)|ep->atid))); + req6->local_port = la6->sin6_port; + req6->peer_port = ra6->sin6_port; + req6->local_ip_hi = *((__be64 *) + (la6->sin6_addr.s6_addr)); + req6->local_ip_lo = *((__be64 *) + (la6->sin6_addr.s6_addr + 8)); + req6->peer_ip_hi = *((__be64 *) + (ra6->sin6_addr.s6_addr)); + req6->peer_ip_lo = *((__be64 *) + (ra6->sin6_addr.s6_addr + 8)); + req6->opt0 = cpu_to_be64(opt0); + req6->params = cpu_to_be32(cxgb4_select_ntuple( + ep->com.dev->rdev.lldi.ports[0], + ep->l2t)); + req6->opt2 = cpu_to_be32(opt2); + } + } else { + u32 isn = (prandom_u32() & ~7UL) - 1; + + opt2 |= T5_OPT_2_VALID; + opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */ + if (peer2peer) + isn += 4; + + if (ep->com.remote_addr.ss_family == AF_INET) { + t5_req = (struct cpl_t5_act_open_req *) + skb_put(skb, wrlen); + INIT_TP_WR(t5_req, 0); + OPCODE_TID(t5_req) = cpu_to_be32( + MK_OPCODE_TID(CPL_ACT_OPEN_REQ, + ((ep->rss_qid << 14) | ep->atid))); + t5_req->local_port = la->sin_port; + t5_req->peer_port = ra->sin_port; + t5_req->local_ip = la->sin_addr.s_addr; + t5_req->peer_ip = ra->sin_addr.s_addr; + t5_req->opt0 = cpu_to_be64(opt0); + t5_req->params = cpu_to_be64(V_FILTER_TUPLE( + cxgb4_select_ntuple( + ep->com.dev->rdev.lldi.ports[0], + ep->l2t))); + t5_req->rsvd = cpu_to_be32(isn); + PDBG("%s snd_isn %u\n", __func__, + be32_to_cpu(t5_req->rsvd)); + t5_req->opt2 = cpu_to_be32(opt2); + } else { + t5_req6 = (struct cpl_t5_act_open_req6 *) + skb_put(skb, wrlen); + INIT_TP_WR(t5_req6, 0); + OPCODE_TID(t5_req6) = cpu_to_be32( + MK_OPCODE_TID(CPL_ACT_OPEN_REQ6, + ((ep->rss_qid<<14)|ep->atid))); + t5_req6->local_port = la6->sin6_port; + t5_req6->peer_port = ra6->sin6_port; + t5_req6->local_ip_hi = *((__be64 *) + (la6->sin6_addr.s6_addr)); + t5_req6->local_ip_lo = *((__be64 *) + (la6->sin6_addr.s6_addr + 8)); + t5_req6->peer_ip_hi = *((__be64 *) + (ra6->sin6_addr.s6_addr)); + t5_req6->peer_ip_lo = *((__be64 *) + (ra6->sin6_addr.s6_addr + 8)); + t5_req6->opt0 = cpu_to_be64(opt0); + t5_req6->params = (__force __be64)cpu_to_be32( + cxgb4_select_ntuple( + ep->com.dev->rdev.lldi.ports[0], + ep->l2t)); + t5_req6->rsvd = cpu_to_be32(isn); + PDBG("%s snd_isn %u\n", __func__, + be32_to_cpu(t5_req6->rsvd)); + t5_req6->opt2 = cpu_to_be32(opt2); + } + } + + set_bit(ACT_OPEN_REQ, &ep->com.history); + return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); +} + +static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb, + u8 mpa_rev_to_use) +{ + int mpalen, wrlen; + struct fw_ofld_tx_data_wr *req; + struct mpa_message *mpa; + struct mpa_v2_conn_params mpa_v2_params; + + PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen); + + BUG_ON(skb_cloned(skb)); + + mpalen = sizeof(*mpa) + ep->plen; + if (mpa_rev_to_use == 2) + mpalen += sizeof(struct mpa_v2_conn_params); + wrlen = roundup(mpalen + sizeof *req, 16); + skb = get_skb(skb, wrlen, GFP_KERNEL); + if (!skb) { + connect_reply_upcall(ep, -ENOMEM); + return; + } + set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); + + req = (struct fw_ofld_tx_data_wr *)skb_put(skb, wrlen); + memset(req, 0, wrlen); + req->op_to_immdlen = cpu_to_be32( + FW_WR_OP(FW_OFLD_TX_DATA_WR) | + FW_WR_COMPL(1) | + FW_WR_IMMDLEN(mpalen)); + req->flowid_len16 = cpu_to_be32( + FW_WR_FLOWID(ep->hwtid) | + FW_WR_LEN16(wrlen >> 4)); + req->plen = cpu_to_be32(mpalen); + req->tunnel_to_proxy = cpu_to_be32( + FW_OFLD_TX_DATA_WR_FLUSH(1) | + FW_OFLD_TX_DATA_WR_SHOVE(1)); + + mpa = (struct mpa_message *)(req + 1); + memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)); + mpa->flags = (crc_enabled ? MPA_CRC : 0) | + (markers_enabled ? MPA_MARKERS : 0) | + (mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0); + mpa->private_data_size = htons(ep->plen); + mpa->revision = mpa_rev_to_use; + if (mpa_rev_to_use == 1) { + ep->tried_with_mpa_v1 = 1; + ep->retry_with_mpa_v1 = 0; + } + + if (mpa_rev_to_use == 2) { + mpa->private_data_size = htons(ntohs(mpa->private_data_size) + + sizeof (struct mpa_v2_conn_params)); + mpa_v2_params.ird = htons((u16)ep->ird); + mpa_v2_params.ord = htons((u16)ep->ord); + + if (peer2peer) { + mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL); + if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) + mpa_v2_params.ord |= + htons(MPA_V2_RDMA_WRITE_RTR); + else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) + mpa_v2_params.ord |= + htons(MPA_V2_RDMA_READ_RTR); + } + memcpy(mpa->private_data, &mpa_v2_params, + sizeof(struct mpa_v2_conn_params)); + + if (ep->plen) + memcpy(mpa->private_data + + sizeof(struct mpa_v2_conn_params), + ep->mpa_pkt + sizeof(*mpa), ep->plen); + } else + if (ep->plen) + memcpy(mpa->private_data, + ep->mpa_pkt + sizeof(*mpa), ep->plen); + + /* + * Reference the mpa skb. This ensures the data area + * will remain in memory until the hw acks the tx. + * Function fw4_ack() will deref it. + */ + skb_get(skb); + t4_set_arp_err_handler(skb, NULL, arp_failure_discard); + BUG_ON(ep->mpa_skb); + ep->mpa_skb = skb; + c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); + start_ep_timer(ep); + __state_set(&ep->com, MPA_REQ_SENT); + ep->mpa_attr.initiator = 1; + ep->snd_seq += mpalen; + return; +} + +static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) +{ + int mpalen, wrlen; + struct fw_ofld_tx_data_wr *req; + struct mpa_message *mpa; + struct sk_buff *skb; + struct mpa_v2_conn_params mpa_v2_params; + + PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen); + + mpalen = sizeof(*mpa) + plen; + if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) + mpalen += sizeof(struct mpa_v2_conn_params); + wrlen = roundup(mpalen + sizeof *req, 16); + + skb = get_skb(NULL, wrlen, GFP_KERNEL); + if (!skb) { + printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__); + return -ENOMEM; + } + set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); + + req = (struct fw_ofld_tx_data_wr *)skb_put(skb, wrlen); + memset(req, 0, wrlen); + req->op_to_immdlen = cpu_to_be32( + FW_WR_OP(FW_OFLD_TX_DATA_WR) | + FW_WR_COMPL(1) | + FW_WR_IMMDLEN(mpalen)); + req->flowid_len16 = cpu_to_be32( + FW_WR_FLOWID(ep->hwtid) | + FW_WR_LEN16(wrlen >> 4)); + req->plen = cpu_to_be32(mpalen); + req->tunnel_to_proxy = cpu_to_be32( + FW_OFLD_TX_DATA_WR_FLUSH(1) | + FW_OFLD_TX_DATA_WR_SHOVE(1)); + + mpa = (struct mpa_message *)(req + 1); + memset(mpa, 0, sizeof(*mpa)); + memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); + mpa->flags = MPA_REJECT; + mpa->revision = ep->mpa_attr.version; + mpa->private_data_size = htons(plen); + + if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { + mpa->flags |= MPA_ENHANCED_RDMA_CONN; + mpa->private_data_size = htons(ntohs(mpa->private_data_size) + + sizeof (struct mpa_v2_conn_params)); + mpa_v2_params.ird = htons(((u16)ep->ird) | + (peer2peer ? MPA_V2_PEER2PEER_MODEL : + 0)); + mpa_v2_params.ord = htons(((u16)ep->ord) | (peer2peer ? + (p2p_type == + FW_RI_INIT_P2PTYPE_RDMA_WRITE ? + MPA_V2_RDMA_WRITE_RTR : p2p_type == + FW_RI_INIT_P2PTYPE_READ_REQ ? + MPA_V2_RDMA_READ_RTR : 0) : 0)); + memcpy(mpa->private_data, &mpa_v2_params, + sizeof(struct mpa_v2_conn_params)); + + if (ep->plen) + memcpy(mpa->private_data + + sizeof(struct mpa_v2_conn_params), pdata, plen); + } else + if (plen) + memcpy(mpa->private_data, pdata, plen); + + /* + * Reference the mpa skb again. This ensures the data area + * will remain in memory until the hw acks the tx. + * Function fw4_ack() will deref it. + */ + skb_get(skb); + set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); + t4_set_arp_err_handler(skb, NULL, arp_failure_discard); + BUG_ON(ep->mpa_skb); + ep->mpa_skb = skb; + ep->snd_seq += mpalen; + return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); +} + +static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen) +{ + int mpalen, wrlen; + struct fw_ofld_tx_data_wr *req; + struct mpa_message *mpa; + struct sk_buff *skb; + struct mpa_v2_conn_params mpa_v2_params; + + PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen); + + mpalen = sizeof(*mpa) + plen; + if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) + mpalen += sizeof(struct mpa_v2_conn_params); + wrlen = roundup(mpalen + sizeof *req, 16); + + skb = get_skb(NULL, wrlen, GFP_KERNEL); + if (!skb) { + printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__); + return -ENOMEM; + } + set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); + + req = (struct fw_ofld_tx_data_wr *) skb_put(skb, wrlen); + memset(req, 0, wrlen); + req->op_to_immdlen = cpu_to_be32( + FW_WR_OP(FW_OFLD_TX_DATA_WR) | + FW_WR_COMPL(1) | + FW_WR_IMMDLEN(mpalen)); + req->flowid_len16 = cpu_to_be32( + FW_WR_FLOWID(ep->hwtid) | + FW_WR_LEN16(wrlen >> 4)); + req->plen = cpu_to_be32(mpalen); + req->tunnel_to_proxy = cpu_to_be32( + FW_OFLD_TX_DATA_WR_FLUSH(1) | + FW_OFLD_TX_DATA_WR_SHOVE(1)); + + mpa = (struct mpa_message *)(req + 1); + memset(mpa, 0, sizeof(*mpa)); + memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); + mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) | + (markers_enabled ? MPA_MARKERS : 0); + mpa->revision = ep->mpa_attr.version; + mpa->private_data_size = htons(plen); + + if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { + mpa->flags |= MPA_ENHANCED_RDMA_CONN; + mpa->private_data_size = htons(ntohs(mpa->private_data_size) + + sizeof (struct mpa_v2_conn_params)); + mpa_v2_params.ird = htons((u16)ep->ird); + mpa_v2_params.ord = htons((u16)ep->ord); + if (peer2peer && (ep->mpa_attr.p2p_type != + FW_RI_INIT_P2PTYPE_DISABLED)) { + mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL); + + if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) + mpa_v2_params.ord |= + htons(MPA_V2_RDMA_WRITE_RTR); + else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) + mpa_v2_params.ord |= + htons(MPA_V2_RDMA_READ_RTR); + } + + memcpy(mpa->private_data, &mpa_v2_params, + sizeof(struct mpa_v2_conn_params)); + + if (ep->plen) + memcpy(mpa->private_data + + sizeof(struct mpa_v2_conn_params), pdata, plen); + } else + if (plen) + memcpy(mpa->private_data, pdata, plen); + + /* + * Reference the mpa skb. This ensures the data area + * will remain in memory until the hw acks the tx. + * Function fw4_ack() will deref it. + */ + skb_get(skb); + t4_set_arp_err_handler(skb, NULL, arp_failure_discard); + ep->mpa_skb = skb; + __state_set(&ep->com, MPA_REP_SENT); + ep->snd_seq += mpalen; + return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); +} + +static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct c4iw_ep *ep; + struct cpl_act_establish *req = cplhdr(skb); + unsigned int tid = GET_TID(req); + unsigned int atid = GET_TID_TID(ntohl(req->tos_atid)); + struct tid_info *t = dev->rdev.lldi.tids; + + ep = lookup_atid(t, atid); + + PDBG("%s ep %p tid %u snd_isn %u rcv_isn %u\n", __func__, ep, tid, + be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn)); + + mutex_lock(&ep->com.mutex); + dst_confirm(ep->dst); + + /* setup the hwtid for this connection */ + ep->hwtid = tid; + cxgb4_insert_tid(t, ep, tid); + insert_handle(dev, &dev->hwtid_idr, ep, ep->hwtid); + + ep->snd_seq = be32_to_cpu(req->snd_isn); + ep->rcv_seq = be32_to_cpu(req->rcv_isn); + + set_emss(ep, ntohs(req->tcp_opt)); + + /* dealloc the atid */ + remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid); + cxgb4_free_atid(t, atid); + set_bit(ACT_ESTAB, &ep->com.history); + + /* start MPA negotiation */ + send_flowc(ep, NULL); + if (ep->retry_with_mpa_v1) + send_mpa_req(ep, skb, 1); + else + send_mpa_req(ep, skb, mpa_rev); + mutex_unlock(&ep->com.mutex); + return 0; +} + +static void close_complete_upcall(struct c4iw_ep *ep, int status) +{ + struct iw_cm_event event; + + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_CLOSE; + event.status = status; + if (ep->com.cm_id) { + PDBG("close complete delivered ep %p cm_id %p tid %u\n", + ep, ep->com.cm_id, ep->hwtid); + ep->com.cm_id->event_handler(ep->com.cm_id, &event); + ep->com.cm_id->rem_ref(ep->com.cm_id); + ep->com.cm_id = NULL; + set_bit(CLOSE_UPCALL, &ep->com.history); + } +} + +static int abort_connection(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) +{ + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + __state_set(&ep->com, ABORTING); + set_bit(ABORT_CONN, &ep->com.history); + return send_abort(ep, skb, gfp); +} + +static void peer_close_upcall(struct c4iw_ep *ep) +{ + struct iw_cm_event event; + + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_DISCONNECT; + if (ep->com.cm_id) { + PDBG("peer close delivered ep %p cm_id %p tid %u\n", + ep, ep->com.cm_id, ep->hwtid); + ep->com.cm_id->event_handler(ep->com.cm_id, &event); + set_bit(DISCONN_UPCALL, &ep->com.history); + } +} + +static void peer_abort_upcall(struct c4iw_ep *ep) +{ + struct iw_cm_event event; + + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_CLOSE; + event.status = -ECONNRESET; + if (ep->com.cm_id) { + PDBG("abort delivered ep %p cm_id %p tid %u\n", ep, + ep->com.cm_id, ep->hwtid); + ep->com.cm_id->event_handler(ep->com.cm_id, &event); + ep->com.cm_id->rem_ref(ep->com.cm_id); + ep->com.cm_id = NULL; + set_bit(ABORT_UPCALL, &ep->com.history); + } +} + +static void connect_reply_upcall(struct c4iw_ep *ep, int status) +{ + struct iw_cm_event event; + + PDBG("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid, status); + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_CONNECT_REPLY; + event.status = status; + memcpy(&event.local_addr, &ep->com.local_addr, + sizeof(ep->com.local_addr)); + memcpy(&event.remote_addr, &ep->com.remote_addr, + sizeof(ep->com.remote_addr)); + + if ((status == 0) || (status == -ECONNREFUSED)) { + if (!ep->tried_with_mpa_v1) { + /* this means MPA_v2 is used */ + event.private_data_len = ep->plen - + sizeof(struct mpa_v2_conn_params); + event.private_data = ep->mpa_pkt + + sizeof(struct mpa_message) + + sizeof(struct mpa_v2_conn_params); + } else { + /* this means MPA_v1 is used */ + event.private_data_len = ep->plen; + event.private_data = ep->mpa_pkt + + sizeof(struct mpa_message); + } + } + + PDBG("%s ep %p tid %u status %d\n", __func__, ep, + ep->hwtid, status); + set_bit(CONN_RPL_UPCALL, &ep->com.history); + ep->com.cm_id->event_handler(ep->com.cm_id, &event); + + if (status < 0) { + ep->com.cm_id->rem_ref(ep->com.cm_id); + ep->com.cm_id = NULL; + } +} + +static int connect_request_upcall(struct c4iw_ep *ep) +{ + struct iw_cm_event event; + int ret; + + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_CONNECT_REQUEST; + memcpy(&event.local_addr, &ep->com.local_addr, + sizeof(ep->com.local_addr)); + memcpy(&event.remote_addr, &ep->com.remote_addr, + sizeof(ep->com.remote_addr)); + event.provider_data = ep; + if (!ep->tried_with_mpa_v1) { + /* this means MPA_v2 is used */ + event.ord = ep->ord; + event.ird = ep->ird; + event.private_data_len = ep->plen - + sizeof(struct mpa_v2_conn_params); + event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) + + sizeof(struct mpa_v2_conn_params); + } else { + /* this means MPA_v1 is used. Send max supported */ + event.ord = c4iw_max_read_depth; + event.ird = c4iw_max_read_depth; + event.private_data_len = ep->plen; + event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); + } + c4iw_get_ep(&ep->com); + ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id, + &event); + if (ret) + c4iw_put_ep(&ep->com); + set_bit(CONNREQ_UPCALL, &ep->com.history); + c4iw_put_ep(&ep->parent_ep->com); + return ret; +} + +static void established_upcall(struct c4iw_ep *ep) +{ + struct iw_cm_event event; + + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_ESTABLISHED; + event.ird = ep->ird; + event.ord = ep->ord; + if (ep->com.cm_id) { + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + ep->com.cm_id->event_handler(ep->com.cm_id, &event); + set_bit(ESTAB_UPCALL, &ep->com.history); + } +} + +static int update_rx_credits(struct c4iw_ep *ep, u32 credits) +{ + struct cpl_rx_data_ack *req; + struct sk_buff *skb; + int wrlen = roundup(sizeof *req, 16); + + PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits); + skb = get_skb(NULL, wrlen, GFP_KERNEL); + if (!skb) { + printk(KERN_ERR MOD "update_rx_credits - cannot alloc skb!\n"); + return 0; + } + + /* + * If we couldn't specify the entire rcv window at connection setup + * due to the limit in the number of bits in the RCV_BUFSIZ field, + * then add the overage in to the credits returned. + */ + if (ep->rcv_win > RCV_BUFSIZ_MASK * 1024) + credits += ep->rcv_win - RCV_BUFSIZ_MASK * 1024; + + req = (struct cpl_rx_data_ack *) skb_put(skb, wrlen); + memset(req, 0, wrlen); + INIT_TP_WR(req, ep->hwtid); + OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK, + ep->hwtid)); + req->credit_dack = cpu_to_be32(credits | RX_FORCE_ACK(1) | + F_RX_DACK_CHANGE | + V_RX_DACK_MODE(dack_mode)); + set_wr_txq(skb, CPL_PRIORITY_ACK, ep->ctrlq_idx); + c4iw_ofld_send(&ep->com.dev->rdev, skb); + return credits; +} + +static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) +{ + struct mpa_message *mpa; + struct mpa_v2_conn_params *mpa_v2_params; + u16 plen; + u16 resp_ird, resp_ord; + u8 rtr_mismatch = 0, insuff_ird = 0; + struct c4iw_qp_attributes attrs; + enum c4iw_qp_attr_mask mask; + int err; + int disconnect = 0; + + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + + /* + * Stop mpa timer. If it expired, then + * we ignore the MPA reply. process_timeout() + * will abort the connection. + */ + if (stop_ep_timer(ep)) + return 0; + + /* + * If we get more than the supported amount of private data + * then we must fail this connection. + */ + if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) { + err = -EINVAL; + goto err; + } + + /* + * copy the new data into our accumulation buffer. + */ + skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]), + skb->len); + ep->mpa_pkt_len += skb->len; + + /* + * if we don't even have the mpa message, then bail. + */ + if (ep->mpa_pkt_len < sizeof(*mpa)) + return 0; + mpa = (struct mpa_message *) ep->mpa_pkt; + + /* Validate MPA header. */ + if (mpa->revision > mpa_rev) { + printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d," + " Received = %d\n", __func__, mpa_rev, mpa->revision); + err = -EPROTO; + goto err; + } + if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) { + err = -EPROTO; + goto err; + } + + plen = ntohs(mpa->private_data_size); + + /* + * Fail if there's too much private data. + */ + if (plen > MPA_MAX_PRIVATE_DATA) { + err = -EPROTO; + goto err; + } + + /* + * If plen does not account for pkt size + */ + if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { + err = -EPROTO; + goto err; + } + + ep->plen = (u8) plen; + + /* + * If we don't have all the pdata yet, then bail. + * We'll continue process when more data arrives. + */ + if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) + return 0; + + if (mpa->flags & MPA_REJECT) { + err = -ECONNREFUSED; + goto err; + } + + /* + * If we get here we have accumulated the entire mpa + * start reply message including private data. And + * the MPA header is valid. + */ + __state_set(&ep->com, FPDU_MODE); + ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; + ep->mpa_attr.recv_marker_enabled = markers_enabled; + ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; + ep->mpa_attr.version = mpa->revision; + ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; + + if (mpa->revision == 2) { + ep->mpa_attr.enhanced_rdma_conn = + mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0; + if (ep->mpa_attr.enhanced_rdma_conn) { + mpa_v2_params = (struct mpa_v2_conn_params *) + (ep->mpa_pkt + sizeof(*mpa)); + resp_ird = ntohs(mpa_v2_params->ird) & + MPA_V2_IRD_ORD_MASK; + resp_ord = ntohs(mpa_v2_params->ord) & + MPA_V2_IRD_ORD_MASK; + + /* + * This is a double-check. Ideally, below checks are + * not required since ird/ord stuff has been taken + * care of in c4iw_accept_cr + */ + if ((ep->ird < resp_ord) || (ep->ord > resp_ird)) { + err = -ENOMEM; + ep->ird = resp_ord; + ep->ord = resp_ird; + insuff_ird = 1; + } + + if (ntohs(mpa_v2_params->ird) & + MPA_V2_PEER2PEER_MODEL) { + if (ntohs(mpa_v2_params->ord) & + MPA_V2_RDMA_WRITE_RTR) + ep->mpa_attr.p2p_type = + FW_RI_INIT_P2PTYPE_RDMA_WRITE; + else if (ntohs(mpa_v2_params->ord) & + MPA_V2_RDMA_READ_RTR) + ep->mpa_attr.p2p_type = + FW_RI_INIT_P2PTYPE_READ_REQ; + } + } + } else if (mpa->revision == 1) + if (peer2peer) + ep->mpa_attr.p2p_type = p2p_type; + + PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, " + "xmit_marker_enabled=%d, version=%d p2p_type=%d local-p2p_type = " + "%d\n", __func__, ep->mpa_attr.crc_enabled, + ep->mpa_attr.recv_marker_enabled, + ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version, + ep->mpa_attr.p2p_type, p2p_type); + + /* + * If responder's RTR does not match with that of initiator, assign + * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not + * generated when moving QP to RTS state. + * A TERM message will be sent after QP has moved to RTS state + */ + if ((ep->mpa_attr.version == 2) && peer2peer && + (ep->mpa_attr.p2p_type != p2p_type)) { + ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; + rtr_mismatch = 1; + } + + attrs.mpa_attr = ep->mpa_attr; + attrs.max_ird = ep->ird; + attrs.max_ord = ep->ord; + attrs.llp_stream_handle = ep; + attrs.next_state = C4IW_QP_STATE_RTS; + + mask = C4IW_QP_ATTR_NEXT_STATE | + C4IW_QP_ATTR_LLP_STREAM_HANDLE | C4IW_QP_ATTR_MPA_ATTR | + C4IW_QP_ATTR_MAX_IRD | C4IW_QP_ATTR_MAX_ORD; + + /* bind QP and TID with INIT_WR */ + err = c4iw_modify_qp(ep->com.qp->rhp, + ep->com.qp, mask, &attrs, 1); + if (err) + goto err; + + /* + * If responder's RTR requirement did not match with what initiator + * supports, generate TERM message + */ + if (rtr_mismatch) { + printk(KERN_ERR "%s: RTR mismatch, sending TERM\n", __func__); + attrs.layer_etype = LAYER_MPA | DDP_LLP; + attrs.ecode = MPA_NOMATCH_RTR; + attrs.next_state = C4IW_QP_STATE_TERMINATE; + attrs.send_term = 1; + err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + err = -ENOMEM; + disconnect = 1; + goto out; + } + + /* + * Generate TERM if initiator IRD is not sufficient for responder + * provided ORD. Currently, we do the same behaviour even when + * responder provided IRD is also not sufficient as regards to + * initiator ORD. + */ + if (insuff_ird) { + printk(KERN_ERR "%s: Insufficient IRD, sending TERM\n", + __func__); + attrs.layer_etype = LAYER_MPA | DDP_LLP; + attrs.ecode = MPA_INSUFF_IRD; + attrs.next_state = C4IW_QP_STATE_TERMINATE; + attrs.send_term = 1; + err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + err = -ENOMEM; + disconnect = 1; + goto out; + } + goto out; +err: + __state_set(&ep->com, ABORTING); + send_abort(ep, skb, GFP_KERNEL); +out: + connect_reply_upcall(ep, err); + return disconnect; +} + +static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) +{ + struct mpa_message *mpa; + struct mpa_v2_conn_params *mpa_v2_params; + u16 plen; + + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + + /* + * If we get more than the supported amount of private data + * then we must fail this connection. + */ + if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) { + (void)stop_ep_timer(ep); + abort_connection(ep, skb, GFP_KERNEL); + return; + } + + PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__); + + /* + * Copy the new data into our accumulation buffer. + */ + skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]), + skb->len); + ep->mpa_pkt_len += skb->len; + + /* + * If we don't even have the mpa message, then bail. + * We'll continue process when more data arrives. + */ + if (ep->mpa_pkt_len < sizeof(*mpa)) + return; + + PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__); + mpa = (struct mpa_message *) ep->mpa_pkt; + + /* + * Validate MPA Header. + */ + if (mpa->revision > mpa_rev) { + printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d," + " Received = %d\n", __func__, mpa_rev, mpa->revision); + (void)stop_ep_timer(ep); + abort_connection(ep, skb, GFP_KERNEL); + return; + } + + if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) { + (void)stop_ep_timer(ep); + abort_connection(ep, skb, GFP_KERNEL); + return; + } + + plen = ntohs(mpa->private_data_size); + + /* + * Fail if there's too much private data. + */ + if (plen > MPA_MAX_PRIVATE_DATA) { + (void)stop_ep_timer(ep); + abort_connection(ep, skb, GFP_KERNEL); + return; + } + + /* + * If plen does not account for pkt size + */ + if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { + (void)stop_ep_timer(ep); + abort_connection(ep, skb, GFP_KERNEL); + return; + } + ep->plen = (u8) plen; + + /* + * If we don't have all the pdata yet, then bail. + */ + if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) + return; + + /* + * If we get here we have accumulated the entire mpa + * start reply message including private data. + */ + ep->mpa_attr.initiator = 0; + ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; + ep->mpa_attr.recv_marker_enabled = markers_enabled; + ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; + ep->mpa_attr.version = mpa->revision; + if (mpa->revision == 1) + ep->tried_with_mpa_v1 = 1; + ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; + + if (mpa->revision == 2) { + ep->mpa_attr.enhanced_rdma_conn = + mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0; + if (ep->mpa_attr.enhanced_rdma_conn) { + mpa_v2_params = (struct mpa_v2_conn_params *) + (ep->mpa_pkt + sizeof(*mpa)); + ep->ird = ntohs(mpa_v2_params->ird) & + MPA_V2_IRD_ORD_MASK; + ep->ord = ntohs(mpa_v2_params->ord) & + MPA_V2_IRD_ORD_MASK; + if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL) + if (peer2peer) { + if (ntohs(mpa_v2_params->ord) & + MPA_V2_RDMA_WRITE_RTR) + ep->mpa_attr.p2p_type = + FW_RI_INIT_P2PTYPE_RDMA_WRITE; + else if (ntohs(mpa_v2_params->ord) & + MPA_V2_RDMA_READ_RTR) + ep->mpa_attr.p2p_type = + FW_RI_INIT_P2PTYPE_READ_REQ; + } + } + } else if (mpa->revision == 1) + if (peer2peer) + ep->mpa_attr.p2p_type = p2p_type; + + PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, " + "xmit_marker_enabled=%d, version=%d p2p_type=%d\n", __func__, + ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, + ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version, + ep->mpa_attr.p2p_type); + + /* + * If the endpoint timer already expired, then we ignore + * the start request. process_timeout() will abort + * the connection. + */ + if (!stop_ep_timer(ep)) { + __state_set(&ep->com, MPA_REQ_RCVD); + + /* drive upcall */ + mutex_lock(&ep->parent_ep->com.mutex); + if (ep->parent_ep->com.state != DEAD) { + if (connect_request_upcall(ep)) + abort_connection(ep, skb, GFP_KERNEL); + } else { + abort_connection(ep, skb, GFP_KERNEL); + } + mutex_unlock(&ep->parent_ep->com.mutex); + } + return; +} + +static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct c4iw_ep *ep; + struct cpl_rx_data *hdr = cplhdr(skb); + unsigned int dlen = ntohs(hdr->len); + unsigned int tid = GET_TID(hdr); + struct tid_info *t = dev->rdev.lldi.tids; + __u8 status = hdr->status; + int disconnect = 0; + + ep = lookup_tid(t, tid); + if (!ep) + return 0; + PDBG("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen); + skb_pull(skb, sizeof(*hdr)); + skb_trim(skb, dlen); + mutex_lock(&ep->com.mutex); + + /* update RX credits */ + update_rx_credits(ep, dlen); + + switch (ep->com.state) { + case MPA_REQ_SENT: + ep->rcv_seq += dlen; + disconnect = process_mpa_reply(ep, skb); + break; + case MPA_REQ_WAIT: + ep->rcv_seq += dlen; + process_mpa_request(ep, skb); + break; + case FPDU_MODE: { + struct c4iw_qp_attributes attrs; + BUG_ON(!ep->com.qp); + if (status) + pr_err("%s Unexpected streaming data." \ + " qpid %u ep %p state %d tid %u status %d\n", + __func__, ep->com.qp->wq.sq.qid, ep, + ep->com.state, ep->hwtid, status); + attrs.next_state = C4IW_QP_STATE_TERMINATE; + c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + disconnect = 1; + break; + } + default: + break; + } + mutex_unlock(&ep->com.mutex); + if (disconnect) + c4iw_ep_disconnect(ep, 0, GFP_KERNEL); + return 0; +} + +static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct c4iw_ep *ep; + struct cpl_abort_rpl_rss *rpl = cplhdr(skb); + int release = 0; + unsigned int tid = GET_TID(rpl); + struct tid_info *t = dev->rdev.lldi.tids; + + ep = lookup_tid(t, tid); + if (!ep) { + printk(KERN_WARNING MOD "Abort rpl to freed endpoint\n"); + return 0; + } + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + mutex_lock(&ep->com.mutex); + switch (ep->com.state) { + case ABORTING: + c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); + __state_set(&ep->com, DEAD); + release = 1; + break; + default: + printk(KERN_ERR "%s ep %p state %d\n", + __func__, ep, ep->com.state); + break; + } + mutex_unlock(&ep->com.mutex); + + if (release) + release_ep_resources(ep); + return 0; +} + +static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) +{ + struct sk_buff *skb; + struct fw_ofld_connection_wr *req; + unsigned int mtu_idx; + int wscale; + struct sockaddr_in *sin; + int win; + + skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); + req = (struct fw_ofld_connection_wr *)__skb_put(skb, sizeof(*req)); + memset(req, 0, sizeof(*req)); + req->op_compl = htonl(V_WR_OP(FW_OFLD_CONNECTION_WR)); + req->len16_pkd = htonl(FW_WR_LEN16(DIV_ROUND_UP(sizeof(*req), 16))); + req->le.filter = cpu_to_be32(cxgb4_select_ntuple( + ep->com.dev->rdev.lldi.ports[0], + ep->l2t)); + sin = (struct sockaddr_in *)&ep->com.mapped_local_addr; + req->le.lport = sin->sin_port; + req->le.u.ipv4.lip = sin->sin_addr.s_addr; + sin = (struct sockaddr_in *)&ep->com.mapped_remote_addr; + req->le.pport = sin->sin_port; + req->le.u.ipv4.pip = sin->sin_addr.s_addr; + req->tcb.t_state_to_astid = + htonl(V_FW_OFLD_CONNECTION_WR_T_STATE(TCP_SYN_SENT) | + V_FW_OFLD_CONNECTION_WR_ASTID(atid)); + req->tcb.cplrxdataack_cplpassacceptrpl = + htons(F_FW_OFLD_CONNECTION_WR_CPLRXDATAACK); + req->tcb.tx_max = (__force __be32) jiffies; + req->tcb.rcv_adv = htons(1); + best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, + enable_tcp_timestamps); + wscale = compute_wscale(rcv_win); + + /* + * Specify the largest window that will fit in opt0. The + * remainder will be specified in the rx_data_ack. + */ + win = ep->rcv_win >> 10; + if (win > RCV_BUFSIZ_MASK) + win = RCV_BUFSIZ_MASK; + + req->tcb.opt0 = (__force __be64) (TCAM_BYPASS(1) | + (nocong ? NO_CONG(1) : 0) | + KEEP_ALIVE(1) | + DELACK(1) | + WND_SCALE(wscale) | + MSS_IDX(mtu_idx) | + L2T_IDX(ep->l2t->idx) | + TX_CHAN(ep->tx_chan) | + SMAC_SEL(ep->smac_idx) | + DSCP(ep->tos) | + ULP_MODE(ULP_MODE_TCPDDP) | + RCV_BUFSIZ(win)); + req->tcb.opt2 = (__force __be32) (PACE(1) | + TX_QUEUE(ep->com.dev->rdev.lldi.tx_modq[ep->tx_chan]) | + RX_CHANNEL(0) | + CCTRL_ECN(enable_ecn) | + RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid)); + if (enable_tcp_timestamps) + req->tcb.opt2 |= (__force __be32) TSTAMPS_EN(1); + if (enable_tcp_sack) + req->tcb.opt2 |= (__force __be32) SACK_EN(1); + if (wscale && enable_tcp_window_scaling) + req->tcb.opt2 |= (__force __be32) WND_SCALE_EN(1); + req->tcb.opt0 = cpu_to_be64((__force u64) req->tcb.opt0); + req->tcb.opt2 = cpu_to_be32((__force u32) req->tcb.opt2); + set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx); + set_bit(ACT_OFLD_CONN, &ep->com.history); + c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); +} + +/* + * Return whether a failed active open has allocated a TID + */ +static inline int act_open_has_tid(int status) +{ + return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST && + status != CPL_ERR_ARP_MISS; +} + +/* Returns whether a CPL status conveys negative advice. + */ +static int is_neg_adv(unsigned int status) +{ + return status == CPL_ERR_RTX_NEG_ADVICE || + status == CPL_ERR_PERSIST_NEG_ADVICE || + status == CPL_ERR_KEEPALV_NEG_ADVICE; +} + +static void set_tcp_window(struct c4iw_ep *ep, struct port_info *pi) +{ + ep->snd_win = snd_win; + ep->rcv_win = rcv_win; + PDBG("%s snd_win %d rcv_win %d\n", __func__, ep->snd_win, ep->rcv_win); +} + +#define ACT_OPEN_RETRY_COUNT 2 + +static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, + struct dst_entry *dst, struct c4iw_dev *cdev, + bool clear_mpa_v1) +{ + struct neighbour *n; + int err, step; + struct net_device *pdev; + + n = dst_neigh_lookup(dst, peer_ip); + if (!n) + return -ENODEV; + + rcu_read_lock(); + err = -ENOMEM; + if (n->dev->flags & IFF_LOOPBACK) { + if (iptype == 4) + pdev = ip_dev_find(&init_net, *(__be32 *)peer_ip); + else if (IS_ENABLED(CONFIG_IPV6)) + for_each_netdev(&init_net, pdev) { + if (ipv6_chk_addr(&init_net, + (struct in6_addr *)peer_ip, + pdev, 1)) + break; + } + else + pdev = NULL; + + if (!pdev) { + err = -ENODEV; + goto out; + } + ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, + n, pdev, 0); + if (!ep->l2t) + goto out; + ep->mtu = pdev->mtu; + ep->tx_chan = cxgb4_port_chan(pdev); + ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1; + step = cdev->rdev.lldi.ntxq / + cdev->rdev.lldi.nchan; + ep->txq_idx = cxgb4_port_idx(pdev) * step; + step = cdev->rdev.lldi.nrxq / + cdev->rdev.lldi.nchan; + ep->ctrlq_idx = cxgb4_port_idx(pdev); + ep->rss_qid = cdev->rdev.lldi.rxq_ids[ + cxgb4_port_idx(pdev) * step]; + set_tcp_window(ep, (struct port_info *)netdev_priv(pdev)); + dev_put(pdev); + } else { + pdev = get_real_dev(n->dev); + ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, + n, pdev, 0); + if (!ep->l2t) + goto out; + ep->mtu = dst_mtu(dst); + ep->tx_chan = cxgb4_port_chan(pdev); + ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1; + step = cdev->rdev.lldi.ntxq / + cdev->rdev.lldi.nchan; + ep->txq_idx = cxgb4_port_idx(pdev) * step; + ep->ctrlq_idx = cxgb4_port_idx(pdev); + step = cdev->rdev.lldi.nrxq / + cdev->rdev.lldi.nchan; + ep->rss_qid = cdev->rdev.lldi.rxq_ids[ + cxgb4_port_idx(pdev) * step]; + set_tcp_window(ep, (struct port_info *)netdev_priv(pdev)); + + if (clear_mpa_v1) { + ep->retry_with_mpa_v1 = 0; + ep->tried_with_mpa_v1 = 0; + } + } + err = 0; +out: + rcu_read_unlock(); + + neigh_release(n); + + return err; +} + +static int c4iw_reconnect(struct c4iw_ep *ep) +{ + int err = 0; + struct sockaddr_in *laddr = (struct sockaddr_in *) + &ep->com.cm_id->local_addr; + struct sockaddr_in *raddr = (struct sockaddr_in *) + &ep->com.cm_id->remote_addr; + struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *) + &ep->com.cm_id->local_addr; + struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *) + &ep->com.cm_id->remote_addr; + int iptype; + __u8 *ra; + + PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id); + init_timer(&ep->timer); + + /* + * Allocate an active TID to initiate a TCP connection. + */ + ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep); + if (ep->atid == -1) { + pr_err("%s - cannot alloc atid.\n", __func__); + err = -ENOMEM; + goto fail2; + } + insert_handle(ep->com.dev, &ep->com.dev->atid_idr, ep, ep->atid); + + /* find a route */ + if (ep->com.cm_id->local_addr.ss_family == AF_INET) { + ep->dst = find_route(ep->com.dev, laddr->sin_addr.s_addr, + raddr->sin_addr.s_addr, laddr->sin_port, + raddr->sin_port, 0); + iptype = 4; + ra = (__u8 *)&raddr->sin_addr; + } else { + ep->dst = find_route6(ep->com.dev, laddr6->sin6_addr.s6_addr, + raddr6->sin6_addr.s6_addr, + laddr6->sin6_port, raddr6->sin6_port, 0, + raddr6->sin6_scope_id); + iptype = 6; + ra = (__u8 *)&raddr6->sin6_addr; + } + if (!ep->dst) { + pr_err("%s - cannot find route.\n", __func__); + err = -EHOSTUNREACH; + goto fail3; + } + err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false); + if (err) { + pr_err("%s - cannot alloc l2e.\n", __func__); + goto fail4; + } + + PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n", + __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid, + ep->l2t->idx); + + state_set(&ep->com, CONNECTING); + ep->tos = 0; + + /* send connect request to rnic */ + err = send_connect(ep); + if (!err) + goto out; + + cxgb4_l2t_release(ep->l2t); +fail4: + dst_release(ep->dst); +fail3: + remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid); + cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); +fail2: + /* + * remember to send notification to upper layer. + * We are in here so the upper layer is not aware that this is + * re-connect attempt and so, upper layer is still waiting for + * response of 1st connect request. + */ + connect_reply_upcall(ep, -ECONNRESET); + c4iw_put_ep(&ep->com); +out: + return err; +} + +static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct c4iw_ep *ep; + struct cpl_act_open_rpl *rpl = cplhdr(skb); + unsigned int atid = GET_TID_TID(GET_AOPEN_ATID( + ntohl(rpl->atid_status))); + struct tid_info *t = dev->rdev.lldi.tids; + int status = GET_AOPEN_STATUS(ntohl(rpl->atid_status)); + struct sockaddr_in *la; + struct sockaddr_in *ra; + struct sockaddr_in6 *la6; + struct sockaddr_in6 *ra6; + + ep = lookup_atid(t, atid); + la = (struct sockaddr_in *)&ep->com.mapped_local_addr; + ra = (struct sockaddr_in *)&ep->com.mapped_remote_addr; + la6 = (struct sockaddr_in6 *)&ep->com.mapped_local_addr; + ra6 = (struct sockaddr_in6 *)&ep->com.mapped_remote_addr; + + PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid, + status, status2errno(status)); + + if (is_neg_adv(status)) { + printk(KERN_WARNING MOD "Connection problems for atid %u\n", + atid); + return 0; + } + + set_bit(ACT_OPEN_RPL, &ep->com.history); + + /* + * Log interesting failures. + */ + switch (status) { + case CPL_ERR_CONN_RESET: + case CPL_ERR_CONN_TIMEDOUT: + break; + case CPL_ERR_TCAM_FULL: + mutex_lock(&dev->rdev.stats.lock); + dev->rdev.stats.tcam_full++; + mutex_unlock(&dev->rdev.stats.lock); + if (ep->com.local_addr.ss_family == AF_INET && + dev->rdev.lldi.enable_fw_ofld_conn) { + send_fw_act_open_req(ep, + GET_TID_TID(GET_AOPEN_ATID( + ntohl(rpl->atid_status)))); + return 0; + } + break; + case CPL_ERR_CONN_EXIST: + if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) { + set_bit(ACT_RETRY_INUSE, &ep->com.history); + remove_handle(ep->com.dev, &ep->com.dev->atid_idr, + atid); + cxgb4_free_atid(t, atid); + dst_release(ep->dst); + cxgb4_l2t_release(ep->l2t); + c4iw_reconnect(ep); + return 0; + } + break; + default: + if (ep->com.local_addr.ss_family == AF_INET) { + pr_info("Active open failure - atid %u status %u errno %d %pI4:%u->%pI4:%u\n", + atid, status, status2errno(status), + &la->sin_addr.s_addr, ntohs(la->sin_port), + &ra->sin_addr.s_addr, ntohs(ra->sin_port)); + } else { + pr_info("Active open failure - atid %u status %u errno %d %pI6:%u->%pI6:%u\n", + atid, status, status2errno(status), + la6->sin6_addr.s6_addr, ntohs(la6->sin6_port), + ra6->sin6_addr.s6_addr, ntohs(ra6->sin6_port)); + } + break; + } + + connect_reply_upcall(ep, status2errno(status)); + state_set(&ep->com, DEAD); + + if (status && act_open_has_tid(status)) + cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, GET_TID(rpl)); + + remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid); + cxgb4_free_atid(t, atid); + dst_release(ep->dst); + cxgb4_l2t_release(ep->l2t); + c4iw_put_ep(&ep->com); + + return 0; +} + +static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct cpl_pass_open_rpl *rpl = cplhdr(skb); + struct tid_info *t = dev->rdev.lldi.tids; + unsigned int stid = GET_TID(rpl); + struct c4iw_listen_ep *ep = lookup_stid(t, stid); + + if (!ep) { + PDBG("%s stid %d lookup failure!\n", __func__, stid); + goto out; + } + PDBG("%s ep %p status %d error %d\n", __func__, ep, + rpl->status, status2errno(rpl->status)); + c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status)); + +out: + return 0; +} + +static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct cpl_close_listsvr_rpl *rpl = cplhdr(skb); + struct tid_info *t = dev->rdev.lldi.tids; + unsigned int stid = GET_TID(rpl); + struct c4iw_listen_ep *ep = lookup_stid(t, stid); + + PDBG("%s ep %p\n", __func__, ep); + c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status)); + return 0; +} + +static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, + struct cpl_pass_accept_req *req) +{ + struct cpl_pass_accept_rpl *rpl; + unsigned int mtu_idx; + u64 opt0; + u32 opt2; + int wscale; + struct cpl_t5_pass_accept_rpl *rpl5 = NULL; + int win; + + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + BUG_ON(skb_cloned(skb)); + + skb_get(skb); + rpl = cplhdr(skb); + if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { + skb_trim(skb, roundup(sizeof(*rpl5), 16)); + rpl5 = (void *)rpl; + INIT_TP_WR(rpl5, ep->hwtid); + } else { + skb_trim(skb, sizeof(*rpl)); + INIT_TP_WR(rpl, ep->hwtid); + } + OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, + ep->hwtid)); + + best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, + enable_tcp_timestamps && req->tcpopt.tstamp); + wscale = compute_wscale(rcv_win); + + /* + * Specify the largest window that will fit in opt0. The + * remainder will be specified in the rx_data_ack. + */ + win = ep->rcv_win >> 10; + if (win > RCV_BUFSIZ_MASK) + win = RCV_BUFSIZ_MASK; + opt0 = (nocong ? NO_CONG(1) : 0) | + KEEP_ALIVE(1) | + DELACK(1) | + WND_SCALE(wscale) | + MSS_IDX(mtu_idx) | + L2T_IDX(ep->l2t->idx) | + TX_CHAN(ep->tx_chan) | + SMAC_SEL(ep->smac_idx) | + DSCP(ep->tos >> 2) | + ULP_MODE(ULP_MODE_TCPDDP) | + RCV_BUFSIZ(win); + opt2 = RX_CHANNEL(0) | + RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid); + + if (enable_tcp_timestamps && req->tcpopt.tstamp) + opt2 |= TSTAMPS_EN(1); + if (enable_tcp_sack && req->tcpopt.sack) + opt2 |= SACK_EN(1); + if (wscale && enable_tcp_window_scaling) + opt2 |= WND_SCALE_EN(1); + if (enable_ecn) { + const struct tcphdr *tcph; + u32 hlen = ntohl(req->hdr_len); + + tcph = (const void *)(req + 1) + G_ETH_HDR_LEN(hlen) + + G_IP_HDR_LEN(hlen); + if (tcph->ece && tcph->cwr) + opt2 |= CCTRL_ECN(1); + } + if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { + u32 isn = (prandom_u32() & ~7UL) - 1; + opt2 |= T5_OPT_2_VALID; + opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE); + opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */ + rpl5 = (void *)rpl; + memset(&rpl5->iss, 0, roundup(sizeof(*rpl5)-sizeof(*rpl), 16)); + if (peer2peer) + isn += 4; + rpl5->iss = cpu_to_be32(isn); + PDBG("%s iss %u\n", __func__, be32_to_cpu(rpl5->iss)); + } + + rpl->opt0 = cpu_to_be64(opt0); + rpl->opt2 = cpu_to_be32(opt2); + set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx); + t4_set_arp_err_handler(skb, NULL, arp_failure_discard); + c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); + + return; +} + +static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb) +{ + PDBG("%s c4iw_dev %p tid %u\n", __func__, dev, hwtid); + BUG_ON(skb_cloned(skb)); + skb_trim(skb, sizeof(struct cpl_tid_release)); + release_tid(&dev->rdev, hwtid, skb); + return; +} + +static void get_4tuple(struct cpl_pass_accept_req *req, int *iptype, + __u8 *local_ip, __u8 *peer_ip, + __be16 *local_port, __be16 *peer_port) +{ + int eth_len = G_ETH_HDR_LEN(be32_to_cpu(req->hdr_len)); + int ip_len = G_IP_HDR_LEN(be32_to_cpu(req->hdr_len)); + struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len); + struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len); + struct tcphdr *tcp = (struct tcphdr *) + ((u8 *)(req + 1) + eth_len + ip_len); + + if (ip->version == 4) { + PDBG("%s saddr 0x%x daddr 0x%x sport %u dport %u\n", __func__, + ntohl(ip->saddr), ntohl(ip->daddr), ntohs(tcp->source), + ntohs(tcp->dest)); + *iptype = 4; + memcpy(peer_ip, &ip->saddr, 4); + memcpy(local_ip, &ip->daddr, 4); + } else { + PDBG("%s saddr %pI6 daddr %pI6 sport %u dport %u\n", __func__, + ip6->saddr.s6_addr, ip6->daddr.s6_addr, ntohs(tcp->source), + ntohs(tcp->dest)); + *iptype = 6; + memcpy(peer_ip, ip6->saddr.s6_addr, 16); + memcpy(local_ip, ip6->daddr.s6_addr, 16); + } + *peer_port = tcp->source; + *local_port = tcp->dest; + + return; +} + +static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct c4iw_ep *child_ep = NULL, *parent_ep; + struct cpl_pass_accept_req *req = cplhdr(skb); + unsigned int stid = GET_POPEN_TID(ntohl(req->tos_stid)); + struct tid_info *t = dev->rdev.lldi.tids; + unsigned int hwtid = GET_TID(req); + struct dst_entry *dst; + __u8 local_ip[16], peer_ip[16]; + __be16 local_port, peer_port; + int err; + u16 peer_mss = ntohs(req->tcpopt.mss); + int iptype; + unsigned short hdrs; + + parent_ep = lookup_stid(t, stid); + if (!parent_ep) { + PDBG("%s connect request on invalid stid %d\n", __func__, stid); + goto reject; + } + + if (state_read(&parent_ep->com) != LISTEN) { + printk(KERN_ERR "%s - listening ep not in LISTEN\n", + __func__); + goto reject; + } + + get_4tuple(req, &iptype, local_ip, peer_ip, &local_port, &peer_port); + + /* Find output route */ + if (iptype == 4) { + PDBG("%s parent ep %p hwtid %u laddr %pI4 raddr %pI4 lport %d rport %d peer_mss %d\n" + , __func__, parent_ep, hwtid, + local_ip, peer_ip, ntohs(local_port), + ntohs(peer_port), peer_mss); + dst = find_route(dev, *(__be32 *)local_ip, *(__be32 *)peer_ip, + local_port, peer_port, + GET_POPEN_TOS(ntohl(req->tos_stid))); + } else { + PDBG("%s parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n" + , __func__, parent_ep, hwtid, + local_ip, peer_ip, ntohs(local_port), + ntohs(peer_port), peer_mss); + dst = find_route6(dev, local_ip, peer_ip, local_port, peer_port, + PASS_OPEN_TOS(ntohl(req->tos_stid)), + ((struct sockaddr_in6 *) + &parent_ep->com.local_addr)->sin6_scope_id); + } + if (!dst) { + printk(KERN_ERR MOD "%s - failed to find dst entry!\n", + __func__); + goto reject; + } + + child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL); + if (!child_ep) { + printk(KERN_ERR MOD "%s - failed to allocate ep entry!\n", + __func__); + dst_release(dst); + goto reject; + } + + err = import_ep(child_ep, iptype, peer_ip, dst, dev, false); + if (err) { + printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n", + __func__); + dst_release(dst); + kfree(child_ep); + goto reject; + } + + hdrs = sizeof(struct iphdr) + sizeof(struct tcphdr) + + ((enable_tcp_timestamps && req->tcpopt.tstamp) ? 12 : 0); + if (peer_mss && child_ep->mtu > (peer_mss + hdrs)) + child_ep->mtu = peer_mss + hdrs; + + state_set(&child_ep->com, CONNECTING); + child_ep->com.dev = dev; + child_ep->com.cm_id = NULL; + if (iptype == 4) { + struct sockaddr_in *sin = (struct sockaddr_in *) + &child_ep->com.local_addr; + sin->sin_family = PF_INET; + sin->sin_port = local_port; + sin->sin_addr.s_addr = *(__be32 *)local_ip; + sin = (struct sockaddr_in *)&child_ep->com.remote_addr; + sin->sin_family = PF_INET; + sin->sin_port = peer_port; + sin->sin_addr.s_addr = *(__be32 *)peer_ip; + } else { + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) + &child_ep->com.local_addr; + sin6->sin6_family = PF_INET6; + sin6->sin6_port = local_port; + memcpy(sin6->sin6_addr.s6_addr, local_ip, 16); + sin6 = (struct sockaddr_in6 *)&child_ep->com.remote_addr; + sin6->sin6_family = PF_INET6; + sin6->sin6_port = peer_port; + memcpy(sin6->sin6_addr.s6_addr, peer_ip, 16); + } + c4iw_get_ep(&parent_ep->com); + child_ep->parent_ep = parent_ep; + child_ep->tos = GET_POPEN_TOS(ntohl(req->tos_stid)); + child_ep->dst = dst; + child_ep->hwtid = hwtid; + + PDBG("%s tx_chan %u smac_idx %u rss_qid %u\n", __func__, + child_ep->tx_chan, child_ep->smac_idx, child_ep->rss_qid); + + init_timer(&child_ep->timer); + cxgb4_insert_tid(t, child_ep, hwtid); + insert_handle(dev, &dev->hwtid_idr, child_ep, child_ep->hwtid); + accept_cr(child_ep, skb, req); + set_bit(PASS_ACCEPT_REQ, &child_ep->com.history); + goto out; +reject: + reject_cr(dev, hwtid, skb); +out: + return 0; +} + +static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct c4iw_ep *ep; + struct cpl_pass_establish *req = cplhdr(skb); + struct tid_info *t = dev->rdev.lldi.tids; + unsigned int tid = GET_TID(req); + + ep = lookup_tid(t, tid); + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + ep->snd_seq = be32_to_cpu(req->snd_isn); + ep->rcv_seq = be32_to_cpu(req->rcv_isn); + + PDBG("%s ep %p hwtid %u tcp_opt 0x%02x\n", __func__, ep, tid, + ntohs(req->tcp_opt)); + + set_emss(ep, ntohs(req->tcp_opt)); + + dst_confirm(ep->dst); + state_set(&ep->com, MPA_REQ_WAIT); + start_ep_timer(ep); + send_flowc(ep, skb); + set_bit(PASS_ESTAB, &ep->com.history); + + return 0; +} + +static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct cpl_peer_close *hdr = cplhdr(skb); + struct c4iw_ep *ep; + struct c4iw_qp_attributes attrs; + int disconnect = 1; + int release = 0; + struct tid_info *t = dev->rdev.lldi.tids; + unsigned int tid = GET_TID(hdr); + int ret; + + ep = lookup_tid(t, tid); + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + dst_confirm(ep->dst); + + set_bit(PEER_CLOSE, &ep->com.history); + mutex_lock(&ep->com.mutex); + switch (ep->com.state) { + case MPA_REQ_WAIT: + __state_set(&ep->com, CLOSING); + break; + case MPA_REQ_SENT: + __state_set(&ep->com, CLOSING); + connect_reply_upcall(ep, -ECONNRESET); + break; + case MPA_REQ_RCVD: + + /* + * We're gonna mark this puppy DEAD, but keep + * the reference on it until the ULP accepts or + * rejects the CR. Also wake up anyone waiting + * in rdma connection migration (see c4iw_accept_cr()). + */ + __state_set(&ep->com, CLOSING); + PDBG("waking up ep %p tid %u\n", ep, ep->hwtid); + c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); + break; + case MPA_REP_SENT: + __state_set(&ep->com, CLOSING); + PDBG("waking up ep %p tid %u\n", ep, ep->hwtid); + c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); + break; + case FPDU_MODE: + start_ep_timer(ep); + __state_set(&ep->com, CLOSING); + attrs.next_state = C4IW_QP_STATE_CLOSING; + ret = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + if (ret != -ECONNRESET) { + peer_close_upcall(ep); + disconnect = 1; + } + break; + case ABORTING: + disconnect = 0; + break; + case CLOSING: + __state_set(&ep->com, MORIBUND); + disconnect = 0; + break; + case MORIBUND: + (void)stop_ep_timer(ep); + if (ep->com.cm_id && ep->com.qp) { + attrs.next_state = C4IW_QP_STATE_IDLE; + c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + } + close_complete_upcall(ep, 0); + __state_set(&ep->com, DEAD); + release = 1; + disconnect = 0; + break; + case DEAD: + disconnect = 0; + break; + default: + BUG_ON(1); + } + mutex_unlock(&ep->com.mutex); + if (disconnect) + c4iw_ep_disconnect(ep, 0, GFP_KERNEL); + if (release) + release_ep_resources(ep); + return 0; +} + +static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct cpl_abort_req_rss *req = cplhdr(skb); + struct c4iw_ep *ep; + struct cpl_abort_rpl *rpl; + struct sk_buff *rpl_skb; + struct c4iw_qp_attributes attrs; + int ret; + int release = 0; + struct tid_info *t = dev->rdev.lldi.tids; + unsigned int tid = GET_TID(req); + + ep = lookup_tid(t, tid); + if (is_neg_adv(req->status)) { + PDBG("%s neg_adv_abort ep %p tid %u\n", __func__, ep, + ep->hwtid); + return 0; + } + PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid, + ep->com.state); + set_bit(PEER_ABORT, &ep->com.history); + + /* + * Wake up any threads in rdma_init() or rdma_fini(). + * However, this is not needed if com state is just + * MPA_REQ_SENT + */ + if (ep->com.state != MPA_REQ_SENT) + c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); + + mutex_lock(&ep->com.mutex); + switch (ep->com.state) { + case CONNECTING: + break; + case MPA_REQ_WAIT: + (void)stop_ep_timer(ep); + break; + case MPA_REQ_SENT: + (void)stop_ep_timer(ep); + if (mpa_rev == 1 || (mpa_rev == 2 && ep->tried_with_mpa_v1)) + connect_reply_upcall(ep, -ECONNRESET); + else { + /* + * we just don't send notification upwards because we + * want to retry with mpa_v1 without upper layers even + * knowing it. + * + * do some housekeeping so as to re-initiate the + * connection + */ + PDBG("%s: mpa_rev=%d. Retrying with mpav1\n", __func__, + mpa_rev); + ep->retry_with_mpa_v1 = 1; + } + break; + case MPA_REP_SENT: + break; + case MPA_REQ_RCVD: + break; + case MORIBUND: + case CLOSING: + stop_ep_timer(ep); + /*FALLTHROUGH*/ + case FPDU_MODE: + if (ep->com.cm_id && ep->com.qp) { + attrs.next_state = C4IW_QP_STATE_ERROR; + ret = c4iw_modify_qp(ep->com.qp->rhp, + ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, + &attrs, 1); + if (ret) + printk(KERN_ERR MOD + "%s - qp <- error failed!\n", + __func__); + } + peer_abort_upcall(ep); + break; + case ABORTING: + break; + case DEAD: + PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__); + mutex_unlock(&ep->com.mutex); + return 0; + default: + BUG_ON(1); + break; + } + dst_confirm(ep->dst); + if (ep->com.state != ABORTING) { + __state_set(&ep->com, DEAD); + /* we don't release if we want to retry with mpa_v1 */ + if (!ep->retry_with_mpa_v1) + release = 1; + } + mutex_unlock(&ep->com.mutex); + + rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL); + if (!rpl_skb) { + printk(KERN_ERR MOD "%s - cannot allocate skb!\n", + __func__); + release = 1; + goto out; + } + set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); + rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl)); + INIT_TP_WR(rpl, ep->hwtid); + OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid)); + rpl->cmd = CPL_ABORT_NO_RST; + c4iw_ofld_send(&ep->com.dev->rdev, rpl_skb); +out: + if (release) + release_ep_resources(ep); + else if (ep->retry_with_mpa_v1) { + remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid); + cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid); + dst_release(ep->dst); + cxgb4_l2t_release(ep->l2t); + c4iw_reconnect(ep); + } + + return 0; +} + +static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct c4iw_ep *ep; + struct c4iw_qp_attributes attrs; + struct cpl_close_con_rpl *rpl = cplhdr(skb); + int release = 0; + struct tid_info *t = dev->rdev.lldi.tids; + unsigned int tid = GET_TID(rpl); + + ep = lookup_tid(t, tid); + + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + BUG_ON(!ep); + + /* The cm_id may be null if we failed to connect */ + mutex_lock(&ep->com.mutex); + switch (ep->com.state) { + case CLOSING: + __state_set(&ep->com, MORIBUND); + break; + case MORIBUND: + (void)stop_ep_timer(ep); + if ((ep->com.cm_id) && (ep->com.qp)) { + attrs.next_state = C4IW_QP_STATE_IDLE; + c4iw_modify_qp(ep->com.qp->rhp, + ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, + &attrs, 1); + } + close_complete_upcall(ep, 0); + __state_set(&ep->com, DEAD); + release = 1; + break; + case ABORTING: + case DEAD: + break; + default: + BUG_ON(1); + break; + } + mutex_unlock(&ep->com.mutex); + if (release) + release_ep_resources(ep); + return 0; +} + +static int terminate(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct cpl_rdma_terminate *rpl = cplhdr(skb); + struct tid_info *t = dev->rdev.lldi.tids; + unsigned int tid = GET_TID(rpl); + struct c4iw_ep *ep; + struct c4iw_qp_attributes attrs; + + ep = lookup_tid(t, tid); + BUG_ON(!ep); + + if (ep && ep->com.qp) { + printk(KERN_WARNING MOD "TERM received tid %u qpid %u\n", tid, + ep->com.qp->wq.sq.qid); + attrs.next_state = C4IW_QP_STATE_TERMINATE; + c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + } else + printk(KERN_WARNING MOD "TERM received tid %u no ep/qp\n", tid); + + return 0; +} + +/* + * Upcall from the adapter indicating data has been transmitted. + * For us its just the single MPA request or reply. We can now free + * the skb holding the mpa message. + */ +static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct c4iw_ep *ep; + struct cpl_fw4_ack *hdr = cplhdr(skb); + u8 credits = hdr->credits; + unsigned int tid = GET_TID(hdr); + struct tid_info *t = dev->rdev.lldi.tids; + + + ep = lookup_tid(t, tid); + PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits); + if (credits == 0) { + PDBG("%s 0 credit ack ep %p tid %u state %u\n", + __func__, ep, ep->hwtid, state_read(&ep->com)); + return 0; + } + + dst_confirm(ep->dst); + if (ep->mpa_skb) { + PDBG("%s last streaming msg ack ep %p tid %u state %u " + "initiator %u freeing skb\n", __func__, ep, ep->hwtid, + state_read(&ep->com), ep->mpa_attr.initiator ? 1 : 0); + kfree_skb(ep->mpa_skb); + ep->mpa_skb = NULL; + } + return 0; +} + +int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) +{ + int err = 0; + int disconnect = 0; + struct c4iw_ep *ep = to_ep(cm_id); + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + + mutex_lock(&ep->com.mutex); + if (ep->com.state == DEAD) { + mutex_unlock(&ep->com.mutex); + c4iw_put_ep(&ep->com); + return -ECONNRESET; + } + set_bit(ULP_REJECT, &ep->com.history); + BUG_ON(ep->com.state != MPA_REQ_RCVD); + if (mpa_rev == 0) + abort_connection(ep, NULL, GFP_KERNEL); + else { + err = send_mpa_reject(ep, pdata, pdata_len); + disconnect = 1; + } + mutex_unlock(&ep->com.mutex); + if (disconnect) + err = c4iw_ep_disconnect(ep, 0, GFP_KERNEL); + c4iw_put_ep(&ep->com); + return 0; +} + +int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) +{ + int err; + struct c4iw_qp_attributes attrs; + enum c4iw_qp_attr_mask mask; + struct c4iw_ep *ep = to_ep(cm_id); + struct c4iw_dev *h = to_c4iw_dev(cm_id->device); + struct c4iw_qp *qp = get_qhp(h, conn_param->qpn); + + PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); + + mutex_lock(&ep->com.mutex); + if (ep->com.state == DEAD) { + err = -ECONNRESET; + goto err; + } + + BUG_ON(ep->com.state != MPA_REQ_RCVD); + BUG_ON(!qp); + + set_bit(ULP_ACCEPT, &ep->com.history); + if ((conn_param->ord > c4iw_max_read_depth) || + (conn_param->ird > c4iw_max_read_depth)) { + abort_connection(ep, NULL, GFP_KERNEL); + err = -EINVAL; + goto err; + } + + if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { + if (conn_param->ord > ep->ird) { + ep->ird = conn_param->ird; + ep->ord = conn_param->ord; + send_mpa_reject(ep, conn_param->private_data, + conn_param->private_data_len); + abort_connection(ep, NULL, GFP_KERNEL); + err = -ENOMEM; + goto err; + } + if (conn_param->ird > ep->ord) { + if (!ep->ord) + conn_param->ird = 1; + else { + abort_connection(ep, NULL, GFP_KERNEL); + err = -ENOMEM; + goto err; + } + } + + } + ep->ird = conn_param->ird; + ep->ord = conn_param->ord; + + if (ep->mpa_attr.version != 2) + if (peer2peer && ep->ird == 0) + ep->ird = 1; + + PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord); + + cm_id->add_ref(cm_id); + ep->com.cm_id = cm_id; + ep->com.qp = qp; + ref_qp(ep); + + /* bind QP to EP and move to RTS */ + attrs.mpa_attr = ep->mpa_attr; + attrs.max_ird = ep->ird; + attrs.max_ord = ep->ord; + attrs.llp_stream_handle = ep; + attrs.next_state = C4IW_QP_STATE_RTS; + + /* bind QP and TID with INIT_WR */ + mask = C4IW_QP_ATTR_NEXT_STATE | + C4IW_QP_ATTR_LLP_STREAM_HANDLE | + C4IW_QP_ATTR_MPA_ATTR | + C4IW_QP_ATTR_MAX_IRD | + C4IW_QP_ATTR_MAX_ORD; + + err = c4iw_modify_qp(ep->com.qp->rhp, + ep->com.qp, mask, &attrs, 1); + if (err) + goto err1; + err = send_mpa_reply(ep, conn_param->private_data, + conn_param->private_data_len); + if (err) + goto err1; + + __state_set(&ep->com, FPDU_MODE); + established_upcall(ep); + mutex_unlock(&ep->com.mutex); + c4iw_put_ep(&ep->com); + return 0; +err1: + ep->com.cm_id = NULL; + cm_id->rem_ref(cm_id); +err: + mutex_unlock(&ep->com.mutex); + c4iw_put_ep(&ep->com); + return err; +} + +static int pick_local_ipaddrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id) +{ + struct in_device *ind; + int found = 0; + struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr; + struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr; + + ind = in_dev_get(dev->rdev.lldi.ports[0]); + if (!ind) + return -EADDRNOTAVAIL; + for_primary_ifa(ind) { + laddr->sin_addr.s_addr = ifa->ifa_address; + raddr->sin_addr.s_addr = ifa->ifa_address; + found = 1; + break; + } + endfor_ifa(ind); + in_dev_put(ind); + return found ? 0 : -EADDRNOTAVAIL; +} + +static int get_lladdr(struct net_device *dev, struct in6_addr *addr, + unsigned char banned_flags) +{ + struct inet6_dev *idev; + int err = -EADDRNOTAVAIL; + + rcu_read_lock(); + idev = __in6_dev_get(dev); + if (idev != NULL) { + struct inet6_ifaddr *ifp; + + read_lock_bh(&idev->lock); + list_for_each_entry(ifp, &idev->addr_list, if_list) { + if (ifp->scope == IFA_LINK && + !(ifp->flags & banned_flags)) { + memcpy(addr, &ifp->addr, 16); + err = 0; + break; + } + } + read_unlock_bh(&idev->lock); + } + rcu_read_unlock(); + return err; +} + +static int pick_local_ip6addrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id) +{ + struct in6_addr uninitialized_var(addr); + struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&cm_id->local_addr; + struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&cm_id->remote_addr; + + if (get_lladdr(dev->rdev.lldi.ports[0], &addr, IFA_F_TENTATIVE)) { + memcpy(la6->sin6_addr.s6_addr, &addr, 16); + memcpy(ra6->sin6_addr.s6_addr, &addr, 16); + return 0; + } + return -EADDRNOTAVAIL; +} + +int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) +{ + struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); + struct c4iw_ep *ep; + int err = 0; + struct sockaddr_in *laddr; + struct sockaddr_in *raddr; + struct sockaddr_in6 *laddr6; + struct sockaddr_in6 *raddr6; + struct iwpm_dev_data pm_reg_msg; + struct iwpm_sa_data pm_msg; + __u8 *ra; + int iptype; + int iwpm_err = 0; + + if ((conn_param->ord > c4iw_max_read_depth) || + (conn_param->ird > c4iw_max_read_depth)) { + err = -EINVAL; + goto out; + } + ep = alloc_ep(sizeof(*ep), GFP_KERNEL); + if (!ep) { + printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__); + err = -ENOMEM; + goto out; + } + init_timer(&ep->timer); + ep->plen = conn_param->private_data_len; + if (ep->plen) + memcpy(ep->mpa_pkt + sizeof(struct mpa_message), + conn_param->private_data, ep->plen); + ep->ird = conn_param->ird; + ep->ord = conn_param->ord; + + if (peer2peer && ep->ord == 0) + ep->ord = 1; + + cm_id->add_ref(cm_id); + ep->com.dev = dev; + ep->com.cm_id = cm_id; + ep->com.qp = get_qhp(dev, conn_param->qpn); + if (!ep->com.qp) { + PDBG("%s qpn 0x%x not found!\n", __func__, conn_param->qpn); + err = -EINVAL; + goto fail1; + } + ref_qp(ep); + PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn, + ep->com.qp, cm_id); + + /* + * Allocate an active TID to initiate a TCP connection. + */ + ep->atid = cxgb4_alloc_atid(dev->rdev.lldi.tids, ep); + if (ep->atid == -1) { + printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__); + err = -ENOMEM; + goto fail1; + } + insert_handle(dev, &dev->atid_idr, ep, ep->atid); + + memcpy(&ep->com.local_addr, &cm_id->local_addr, + sizeof(ep->com.local_addr)); + memcpy(&ep->com.remote_addr, &cm_id->remote_addr, + sizeof(ep->com.remote_addr)); + + /* No port mapper available, go with the specified peer information */ + memcpy(&ep->com.mapped_local_addr, &cm_id->local_addr, + sizeof(ep->com.mapped_local_addr)); + memcpy(&ep->com.mapped_remote_addr, &cm_id->remote_addr, + sizeof(ep->com.mapped_remote_addr)); + + c4iw_form_reg_msg(dev, &pm_reg_msg); + iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_C4IW); + if (iwpm_err) { + PDBG("%s: Port Mapper reg pid fail (err = %d).\n", + __func__, iwpm_err); + } + if (iwpm_valid_pid() && !iwpm_err) { + c4iw_form_pm_msg(ep, &pm_msg); + iwpm_err = iwpm_add_and_query_mapping(&pm_msg, RDMA_NL_C4IW); + if (iwpm_err) + PDBG("%s: Port Mapper query fail (err = %d).\n", + __func__, iwpm_err); + else + c4iw_record_pm_msg(ep, &pm_msg); + } + if (iwpm_create_mapinfo(&ep->com.local_addr, + &ep->com.mapped_local_addr, RDMA_NL_C4IW)) { + iwpm_remove_mapping(&ep->com.local_addr, RDMA_NL_C4IW); + err = -ENOMEM; + goto fail1; + } + print_addr(&ep->com, __func__, "add_query/create_mapinfo"); + set_bit(RELEASE_MAPINFO, &ep->com.flags); + + laddr = (struct sockaddr_in *)&ep->com.mapped_local_addr; + raddr = (struct sockaddr_in *)&ep->com.mapped_remote_addr; + laddr6 = (struct sockaddr_in6 *)&ep->com.mapped_local_addr; + raddr6 = (struct sockaddr_in6 *) &ep->com.mapped_remote_addr; + + if (cm_id->remote_addr.ss_family == AF_INET) { + iptype = 4; + ra = (__u8 *)&raddr->sin_addr; + + /* + * Handle loopback requests to INADDR_ANY. + */ + if ((__force int)raddr->sin_addr.s_addr == INADDR_ANY) { + err = pick_local_ipaddrs(dev, cm_id); + if (err) + goto fail1; + } + + /* find a route */ + PDBG("%s saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n", + __func__, &laddr->sin_addr, ntohs(laddr->sin_port), + ra, ntohs(raddr->sin_port)); + ep->dst = find_route(dev, laddr->sin_addr.s_addr, + raddr->sin_addr.s_addr, laddr->sin_port, + raddr->sin_port, 0); + } else { + iptype = 6; + ra = (__u8 *)&raddr6->sin6_addr; + + /* + * Handle loopback requests to INADDR_ANY. + */ + if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) { + err = pick_local_ip6addrs(dev, cm_id); + if (err) + goto fail1; + } + + /* find a route */ + PDBG("%s saddr %pI6 sport 0x%x raddr %pI6 rport 0x%x\n", + __func__, laddr6->sin6_addr.s6_addr, + ntohs(laddr6->sin6_port), + raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port)); + ep->dst = find_route6(dev, laddr6->sin6_addr.s6_addr, + raddr6->sin6_addr.s6_addr, + laddr6->sin6_port, raddr6->sin6_port, 0, + raddr6->sin6_scope_id); + } + if (!ep->dst) { + printk(KERN_ERR MOD "%s - cannot find route.\n", __func__); + err = -EHOSTUNREACH; + goto fail2; + } + + err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true); + if (err) { + printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); + goto fail3; + } + + PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n", + __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid, + ep->l2t->idx); + + state_set(&ep->com, CONNECTING); + ep->tos = 0; + + /* send connect request to rnic */ + err = send_connect(ep); + if (!err) + goto out; + + cxgb4_l2t_release(ep->l2t); +fail3: + dst_release(ep->dst); +fail2: + remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid); + cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); +fail1: + cm_id->rem_ref(cm_id); + c4iw_put_ep(&ep->com); +out: + return err; +} + +static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep) +{ + int err; + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) + &ep->com.mapped_local_addr; + + c4iw_init_wr_wait(&ep->com.wr_wait); + err = cxgb4_create_server6(ep->com.dev->rdev.lldi.ports[0], + ep->stid, &sin6->sin6_addr, + sin6->sin6_port, + ep->com.dev->rdev.lldi.rxq_ids[0]); + if (!err) + err = c4iw_wait_for_reply(&ep->com.dev->rdev, + &ep->com.wr_wait, + 0, 0, __func__); + if (err) + pr_err("cxgb4_create_server6/filter failed err %d stid %d laddr %pI6 lport %d\n", + err, ep->stid, + sin6->sin6_addr.s6_addr, ntohs(sin6->sin6_port)); + return err; +} + +static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep) +{ + int err; + struct sockaddr_in *sin = (struct sockaddr_in *) + &ep->com.mapped_local_addr; + + if (dev->rdev.lldi.enable_fw_ofld_conn) { + do { + err = cxgb4_create_server_filter( + ep->com.dev->rdev.lldi.ports[0], ep->stid, + sin->sin_addr.s_addr, sin->sin_port, 0, + ep->com.dev->rdev.lldi.rxq_ids[0], 0, 0); + if (err == -EBUSY) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(usecs_to_jiffies(100)); + } + } while (err == -EBUSY); + } else { + c4iw_init_wr_wait(&ep->com.wr_wait); + err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0], + ep->stid, sin->sin_addr.s_addr, sin->sin_port, + 0, ep->com.dev->rdev.lldi.rxq_ids[0]); + if (!err) + err = c4iw_wait_for_reply(&ep->com.dev->rdev, + &ep->com.wr_wait, + 0, 0, __func__); + } + if (err) + pr_err("cxgb4_create_server/filter failed err %d stid %d laddr %pI4 lport %d\n" + , err, ep->stid, + &sin->sin_addr, ntohs(sin->sin_port)); + return err; +} + +int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) +{ + int err = 0; + struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); + struct c4iw_listen_ep *ep; + struct iwpm_dev_data pm_reg_msg; + struct iwpm_sa_data pm_msg; + int iwpm_err = 0; + + might_sleep(); + + ep = alloc_ep(sizeof(*ep), GFP_KERNEL); + if (!ep) { + printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__); + err = -ENOMEM; + goto fail1; + } + PDBG("%s ep %p\n", __func__, ep); + cm_id->add_ref(cm_id); + ep->com.cm_id = cm_id; + ep->com.dev = dev; + ep->backlog = backlog; + memcpy(&ep->com.local_addr, &cm_id->local_addr, + sizeof(ep->com.local_addr)); + + /* + * Allocate a server TID. + */ + if (dev->rdev.lldi.enable_fw_ofld_conn && + ep->com.local_addr.ss_family == AF_INET) + ep->stid = cxgb4_alloc_sftid(dev->rdev.lldi.tids, + cm_id->local_addr.ss_family, ep); + else + ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids, + cm_id->local_addr.ss_family, ep); + + if (ep->stid == -1) { + printk(KERN_ERR MOD "%s - cannot alloc stid.\n", __func__); + err = -ENOMEM; + goto fail2; + } + insert_handle(dev, &dev->stid_idr, ep, ep->stid); + + /* No port mapper available, go with the specified info */ + memcpy(&ep->com.mapped_local_addr, &cm_id->local_addr, + sizeof(ep->com.mapped_local_addr)); + + c4iw_form_reg_msg(dev, &pm_reg_msg); + iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_C4IW); + if (iwpm_err) { + PDBG("%s: Port Mapper reg pid fail (err = %d).\n", + __func__, iwpm_err); + } + if (iwpm_valid_pid() && !iwpm_err) { + memcpy(&pm_msg.loc_addr, &ep->com.local_addr, + sizeof(ep->com.local_addr)); + iwpm_err = iwpm_add_mapping(&pm_msg, RDMA_NL_C4IW); + if (iwpm_err) + PDBG("%s: Port Mapper query fail (err = %d).\n", + __func__, iwpm_err); + else + memcpy(&ep->com.mapped_local_addr, + &pm_msg.mapped_loc_addr, + sizeof(ep->com.mapped_local_addr)); + } + if (iwpm_create_mapinfo(&ep->com.local_addr, + &ep->com.mapped_local_addr, RDMA_NL_C4IW)) { + err = -ENOMEM; + goto fail3; + } + print_addr(&ep->com, __func__, "add_mapping/create_mapinfo"); + + set_bit(RELEASE_MAPINFO, &ep->com.flags); + state_set(&ep->com, LISTEN); + if (ep->com.local_addr.ss_family == AF_INET) + err = create_server4(dev, ep); + else + err = create_server6(dev, ep); + if (!err) { + cm_id->provider_data = ep; + goto out; + } + +fail3: + cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, + ep->com.local_addr.ss_family); +fail2: + cm_id->rem_ref(cm_id); + c4iw_put_ep(&ep->com); +fail1: +out: + return err; +} + +int c4iw_destroy_listen(struct iw_cm_id *cm_id) +{ + int err; + struct c4iw_listen_ep *ep = to_listen_ep(cm_id); + + PDBG("%s ep %p\n", __func__, ep); + + might_sleep(); + state_set(&ep->com, DEAD); + if (ep->com.dev->rdev.lldi.enable_fw_ofld_conn && + ep->com.local_addr.ss_family == AF_INET) { + err = cxgb4_remove_server_filter( + ep->com.dev->rdev.lldi.ports[0], ep->stid, + ep->com.dev->rdev.lldi.rxq_ids[0], 0); + } else { + c4iw_init_wr_wait(&ep->com.wr_wait); + err = cxgb4_remove_server( + ep->com.dev->rdev.lldi.ports[0], ep->stid, + ep->com.dev->rdev.lldi.rxq_ids[0], 0); + if (err) + goto done; + err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, + 0, 0, __func__); + } + remove_handle(ep->com.dev, &ep->com.dev->stid_idr, ep->stid); + cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, + ep->com.local_addr.ss_family); +done: + cm_id->rem_ref(cm_id); + c4iw_put_ep(&ep->com); + return err; +} + +int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) +{ + int ret = 0; + int close = 0; + int fatal = 0; + struct c4iw_rdev *rdev; + + mutex_lock(&ep->com.mutex); + + PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep, + states[ep->com.state], abrupt); + + rdev = &ep->com.dev->rdev; + if (c4iw_fatal_error(rdev)) { + fatal = 1; + close_complete_upcall(ep, -EIO); + ep->com.state = DEAD; + } + switch (ep->com.state) { + case MPA_REQ_WAIT: + case MPA_REQ_SENT: + case MPA_REQ_RCVD: + case MPA_REP_SENT: + case FPDU_MODE: + close = 1; + if (abrupt) + ep->com.state = ABORTING; + else { + ep->com.state = CLOSING; + start_ep_timer(ep); + } + set_bit(CLOSE_SENT, &ep->com.flags); + break; + case CLOSING: + if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) { + close = 1; + if (abrupt) { + (void)stop_ep_timer(ep); + ep->com.state = ABORTING; + } else + ep->com.state = MORIBUND; + } + break; + case MORIBUND: + case ABORTING: + case DEAD: + PDBG("%s ignoring disconnect ep %p state %u\n", + __func__, ep, ep->com.state); + break; + default: + BUG(); + break; + } + + if (close) { + if (abrupt) { + set_bit(EP_DISC_ABORT, &ep->com.history); + close_complete_upcall(ep, -ECONNRESET); + ret = send_abort(ep, NULL, gfp); + } else { + set_bit(EP_DISC_CLOSE, &ep->com.history); + ret = send_halfclose(ep, gfp); + } + if (ret) + fatal = 1; + } + mutex_unlock(&ep->com.mutex); + if (fatal) + release_ep_resources(ep); + return ret; +} + +static void active_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, + struct cpl_fw6_msg_ofld_connection_wr_rpl *req) +{ + struct c4iw_ep *ep; + int atid = be32_to_cpu(req->tid); + + ep = (struct c4iw_ep *)lookup_atid(dev->rdev.lldi.tids, + (__force u32) req->tid); + if (!ep) + return; + + switch (req->retval) { + case FW_ENOMEM: + set_bit(ACT_RETRY_NOMEM, &ep->com.history); + if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) { + send_fw_act_open_req(ep, atid); + return; + } + case FW_EADDRINUSE: + set_bit(ACT_RETRY_INUSE, &ep->com.history); + if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) { + send_fw_act_open_req(ep, atid); + return; + } + break; + default: + pr_info("%s unexpected ofld conn wr retval %d\n", + __func__, req->retval); + break; + } + pr_err("active ofld_connect_wr failure %d atid %d\n", + req->retval, atid); + mutex_lock(&dev->rdev.stats.lock); + dev->rdev.stats.act_ofld_conn_fails++; + mutex_unlock(&dev->rdev.stats.lock); + connect_reply_upcall(ep, status2errno(req->retval)); + state_set(&ep->com, DEAD); + remove_handle(dev, &dev->atid_idr, atid); + cxgb4_free_atid(dev->rdev.lldi.tids, atid); + dst_release(ep->dst); + cxgb4_l2t_release(ep->l2t); + c4iw_put_ep(&ep->com); +} + +static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, + struct cpl_fw6_msg_ofld_connection_wr_rpl *req) +{ + struct sk_buff *rpl_skb; + struct cpl_pass_accept_req *cpl; + int ret; + + rpl_skb = (struct sk_buff *)(unsigned long)req->cookie; + BUG_ON(!rpl_skb); + if (req->retval) { + PDBG("%s passive open failure %d\n", __func__, req->retval); + mutex_lock(&dev->rdev.stats.lock); + dev->rdev.stats.pas_ofld_conn_fails++; + mutex_unlock(&dev->rdev.stats.lock); + kfree_skb(rpl_skb); + } else { + cpl = (struct cpl_pass_accept_req *)cplhdr(rpl_skb); + OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ, + (__force u32) htonl( + (__force u32) req->tid))); + ret = pass_accept_req(dev, rpl_skb); + if (!ret) + kfree_skb(rpl_skb); + } + return; +} + +static int deferred_fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct cpl_fw6_msg *rpl = cplhdr(skb); + struct cpl_fw6_msg_ofld_connection_wr_rpl *req; + + switch (rpl->type) { + case FW6_TYPE_CQE: + c4iw_ev_dispatch(dev, (struct t4_cqe *)&rpl->data[0]); + break; + case FW6_TYPE_OFLD_CONNECTION_WR_RPL: + req = (struct cpl_fw6_msg_ofld_connection_wr_rpl *)rpl->data; + switch (req->t_state) { + case TCP_SYN_SENT: + active_ofld_conn_reply(dev, skb, req); + break; + case TCP_SYN_RECV: + passive_ofld_conn_reply(dev, skb, req); + break; + default: + pr_err("%s unexpected ofld conn wr state %d\n", + __func__, req->t_state); + break; + } + break; + } + return 0; +} + +static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos) +{ + u32 l2info; + u16 vlantag, len, hdr_len, eth_hdr_len; + u8 intf; + struct cpl_rx_pkt *cpl = cplhdr(skb); + struct cpl_pass_accept_req *req; + struct tcp_options_received tmp_opt; + struct c4iw_dev *dev; + + dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *))); + /* Store values from cpl_rx_pkt in temporary location. */ + vlantag = (__force u16) cpl->vlan; + len = (__force u16) cpl->len; + l2info = (__force u32) cpl->l2info; + hdr_len = (__force u16) cpl->hdr_len; + intf = cpl->iff; + + __skb_pull(skb, sizeof(*req) + sizeof(struct rss_header)); + + /* + * We need to parse the TCP options from SYN packet. + * to generate cpl_pass_accept_req. + */ + memset(&tmp_opt, 0, sizeof(tmp_opt)); + tcp_clear_options(&tmp_opt); + tcp_parse_options(skb, &tmp_opt, 0, NULL); + + req = (struct cpl_pass_accept_req *)__skb_push(skb, sizeof(*req)); + memset(req, 0, sizeof(*req)); + req->l2info = cpu_to_be16(V_SYN_INTF(intf) | + V_SYN_MAC_IDX(G_RX_MACIDX( + (__force int) htonl(l2info))) | + F_SYN_XACT_MATCH); + eth_hdr_len = is_t4(dev->rdev.lldi.adapter_type) ? + G_RX_ETHHDR_LEN((__force int) htonl(l2info)) : + G_RX_T5_ETHHDR_LEN((__force int) htonl(l2info)); + req->hdr_len = cpu_to_be32(V_SYN_RX_CHAN(G_RX_CHAN( + (__force int) htonl(l2info))) | + V_TCP_HDR_LEN(G_RX_TCPHDR_LEN( + (__force int) htons(hdr_len))) | + V_IP_HDR_LEN(G_RX_IPHDR_LEN( + (__force int) htons(hdr_len))) | + V_ETH_HDR_LEN(G_RX_ETHHDR_LEN(eth_hdr_len))); + req->vlan = (__force __be16) vlantag; + req->len = (__force __be16) len; + req->tos_stid = cpu_to_be32(PASS_OPEN_TID(stid) | + PASS_OPEN_TOS(tos)); + req->tcpopt.mss = htons(tmp_opt.mss_clamp); + if (tmp_opt.wscale_ok) + req->tcpopt.wsf = tmp_opt.snd_wscale; + req->tcpopt.tstamp = tmp_opt.saw_tstamp; + if (tmp_opt.sack_ok) + req->tcpopt.sack = 1; + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ, 0)); + return; +} + +static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb, + __be32 laddr, __be16 lport, + __be32 raddr, __be16 rport, + u32 rcv_isn, u32 filter, u16 window, + u32 rss_qid, u8 port_id) +{ + struct sk_buff *req_skb; + struct fw_ofld_connection_wr *req; + struct cpl_pass_accept_req *cpl = cplhdr(skb); + int ret; + + req_skb = alloc_skb(sizeof(struct fw_ofld_connection_wr), GFP_KERNEL); + req = (struct fw_ofld_connection_wr *)__skb_put(req_skb, sizeof(*req)); + memset(req, 0, sizeof(*req)); + req->op_compl = htonl(V_WR_OP(FW_OFLD_CONNECTION_WR) | FW_WR_COMPL(1)); + req->len16_pkd = htonl(FW_WR_LEN16(DIV_ROUND_UP(sizeof(*req), 16))); + req->le.version_cpl = htonl(F_FW_OFLD_CONNECTION_WR_CPL); + req->le.filter = (__force __be32) filter; + req->le.lport = lport; + req->le.pport = rport; + req->le.u.ipv4.lip = laddr; + req->le.u.ipv4.pip = raddr; + req->tcb.rcv_nxt = htonl(rcv_isn + 1); + req->tcb.rcv_adv = htons(window); + req->tcb.t_state_to_astid = + htonl(V_FW_OFLD_CONNECTION_WR_T_STATE(TCP_SYN_RECV) | + V_FW_OFLD_CONNECTION_WR_RCV_SCALE(cpl->tcpopt.wsf) | + V_FW_OFLD_CONNECTION_WR_ASTID( + GET_PASS_OPEN_TID(ntohl(cpl->tos_stid)))); + + /* + * We store the qid in opt2 which will be used by the firmware + * to send us the wr response. + */ + req->tcb.opt2 = htonl(V_RSS_QUEUE(rss_qid)); + + /* + * We initialize the MSS index in TCB to 0xF. + * So that when driver sends cpl_pass_accept_rpl + * TCB picks up the correct value. If this was 0 + * TP will ignore any value > 0 for MSS index. + */ + req->tcb.opt0 = cpu_to_be64(V_MSS_IDX(0xF)); + req->cookie = (unsigned long)skb; + + set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id); + ret = cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb); + if (ret < 0) { + pr_err("%s - cxgb4_ofld_send error %d - dropping\n", __func__, + ret); + kfree_skb(skb); + kfree_skb(req_skb); + } +} + +/* + * Handler for CPL_RX_PKT message. Need to handle cpl_rx_pkt + * messages when a filter is being used instead of server to + * redirect a syn packet. When packets hit filter they are redirected + * to the offload queue and driver tries to establish the connection + * using firmware work request. + */ +static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) +{ + int stid; + unsigned int filter; + struct ethhdr *eh = NULL; + struct vlan_ethhdr *vlan_eh = NULL; + struct iphdr *iph; + struct tcphdr *tcph; + struct rss_header *rss = (void *)skb->data; + struct cpl_rx_pkt *cpl = (void *)skb->data; + struct cpl_pass_accept_req *req = (void *)(rss + 1); + struct l2t_entry *e; + struct dst_entry *dst; + struct c4iw_ep *lep; + u16 window; + struct port_info *pi; + struct net_device *pdev; + u16 rss_qid, eth_hdr_len; + int step; + u32 tx_chan; + struct neighbour *neigh; + + /* Drop all non-SYN packets */ + if (!(cpl->l2info & cpu_to_be32(F_RXF_SYN))) + goto reject; + + /* + * Drop all packets which did not hit the filter. + * Unlikely to happen. + */ + if (!(rss->filter_hit && rss->filter_tid)) + goto reject; + + /* + * Calculate the server tid from filter hit index from cpl_rx_pkt. + */ + stid = (__force int) cpu_to_be32((__force u32) rss->hash_val); + + lep = (struct c4iw_ep *)lookup_stid(dev->rdev.lldi.tids, stid); + if (!lep) { + PDBG("%s connect request on invalid stid %d\n", __func__, stid); + goto reject; + } + + eth_hdr_len = is_t4(dev->rdev.lldi.adapter_type) ? + G_RX_ETHHDR_LEN(htonl(cpl->l2info)) : + G_RX_T5_ETHHDR_LEN(htonl(cpl->l2info)); + if (eth_hdr_len == ETH_HLEN) { + eh = (struct ethhdr *)(req + 1); + iph = (struct iphdr *)(eh + 1); + } else { + vlan_eh = (struct vlan_ethhdr *)(req + 1); + iph = (struct iphdr *)(vlan_eh + 1); + skb->vlan_tci = ntohs(cpl->vlan); + } + + if (iph->version != 0x4) + goto reject; + + tcph = (struct tcphdr *)(iph + 1); + skb_set_network_header(skb, (void *)iph - (void *)rss); + skb_set_transport_header(skb, (void *)tcph - (void *)rss); + skb_get(skb); + + PDBG("%s lip 0x%x lport %u pip 0x%x pport %u tos %d\n", __func__, + ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr), + ntohs(tcph->source), iph->tos); + + dst = find_route(dev, iph->daddr, iph->saddr, tcph->dest, tcph->source, + iph->tos); + if (!dst) { + pr_err("%s - failed to find dst entry!\n", + __func__); + goto reject; + } + neigh = dst_neigh_lookup_skb(dst, skb); + + if (!neigh) { + pr_err("%s - failed to allocate neigh!\n", + __func__); + goto free_dst; + } + + if (neigh->dev->flags & IFF_LOOPBACK) { + pdev = ip_dev_find(&init_net, iph->daddr); + e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh, + pdev, 0); + pi = (struct port_info *)netdev_priv(pdev); + tx_chan = cxgb4_port_chan(pdev); + dev_put(pdev); + } else { + pdev = get_real_dev(neigh->dev); + e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh, + pdev, 0); + pi = (struct port_info *)netdev_priv(pdev); + tx_chan = cxgb4_port_chan(pdev); + } + neigh_release(neigh); + if (!e) { + pr_err("%s - failed to allocate l2t entry!\n", + __func__); + goto free_dst; + } + + step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan; + rss_qid = dev->rdev.lldi.rxq_ids[pi->port_id * step]; + window = (__force u16) htons((__force u16)tcph->window); + + /* Calcuate filter portion for LE region. */ + filter = (__force unsigned int) cpu_to_be32(cxgb4_select_ntuple( + dev->rdev.lldi.ports[0], + e)); + + /* + * Synthesize the cpl_pass_accept_req. We have everything except the + * TID. Once firmware sends a reply with TID we update the TID field + * in cpl and pass it through the regular cpl_pass_accept_req path. + */ + build_cpl_pass_accept_req(skb, stid, iph->tos); + send_fw_pass_open_req(dev, skb, iph->daddr, tcph->dest, iph->saddr, + tcph->source, ntohl(tcph->seq), filter, window, + rss_qid, pi->port_id); + cxgb4_l2t_release(e); +free_dst: + dst_release(dst); +reject: + return 0; +} + +/* + * These are the real handlers that are called from a + * work queue. + */ +static c4iw_handler_func work_handlers[NUM_CPL_CMDS] = { + [CPL_ACT_ESTABLISH] = act_establish, + [CPL_ACT_OPEN_RPL] = act_open_rpl, + [CPL_RX_DATA] = rx_data, + [CPL_ABORT_RPL_RSS] = abort_rpl, + [CPL_ABORT_RPL] = abort_rpl, + [CPL_PASS_OPEN_RPL] = pass_open_rpl, + [CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl, + [CPL_PASS_ACCEPT_REQ] = pass_accept_req, + [CPL_PASS_ESTABLISH] = pass_establish, + [CPL_PEER_CLOSE] = peer_close, + [CPL_ABORT_REQ_RSS] = peer_abort, + [CPL_CLOSE_CON_RPL] = close_con_rpl, + [CPL_RDMA_TERMINATE] = terminate, + [CPL_FW4_ACK] = fw4_ack, + [CPL_FW6_MSG] = deferred_fw6_msg, + [CPL_RX_PKT] = rx_pkt +}; + +static void process_timeout(struct c4iw_ep *ep) +{ + struct c4iw_qp_attributes attrs; + int abort = 1; + + mutex_lock(&ep->com.mutex); + PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid, + ep->com.state); + set_bit(TIMEDOUT, &ep->com.history); + switch (ep->com.state) { + case MPA_REQ_SENT: + __state_set(&ep->com, ABORTING); + connect_reply_upcall(ep, -ETIMEDOUT); + break; + case MPA_REQ_WAIT: + __state_set(&ep->com, ABORTING); + break; + case CLOSING: + case MORIBUND: + if (ep->com.cm_id && ep->com.qp) { + attrs.next_state = C4IW_QP_STATE_ERROR; + c4iw_modify_qp(ep->com.qp->rhp, + ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, + &attrs, 1); + } + __state_set(&ep->com, ABORTING); + close_complete_upcall(ep, -ETIMEDOUT); + break; + case ABORTING: + case DEAD: + + /* + * These states are expected if the ep timed out at the same + * time as another thread was calling stop_ep_timer(). + * So we silently do nothing for these states. + */ + abort = 0; + break; + default: + WARN(1, "%s unexpected state ep %p tid %u state %u\n", + __func__, ep, ep->hwtid, ep->com.state); + abort = 0; + } + if (abort) + abort_connection(ep, NULL, GFP_KERNEL); + mutex_unlock(&ep->com.mutex); + c4iw_put_ep(&ep->com); +} + +static void process_timedout_eps(void) +{ + struct c4iw_ep *ep; + + spin_lock_irq(&timeout_lock); + while (!list_empty(&timeout_list)) { + struct list_head *tmp; + + tmp = timeout_list.next; + list_del(tmp); + tmp->next = NULL; + tmp->prev = NULL; + spin_unlock_irq(&timeout_lock); + ep = list_entry(tmp, struct c4iw_ep, entry); + process_timeout(ep); + spin_lock_irq(&timeout_lock); + } + spin_unlock_irq(&timeout_lock); +} + +static void process_work(struct work_struct *work) +{ + struct sk_buff *skb = NULL; + struct c4iw_dev *dev; + struct cpl_act_establish *rpl; + unsigned int opcode; + int ret; + + process_timedout_eps(); + while ((skb = skb_dequeue(&rxq))) { + rpl = cplhdr(skb); + dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *))); + opcode = rpl->ot.opcode; + + BUG_ON(!work_handlers[opcode]); + ret = work_handlers[opcode](dev, skb); + if (!ret) + kfree_skb(skb); + process_timedout_eps(); + } +} + +static DECLARE_WORK(skb_work, process_work); + +static void ep_timeout(unsigned long arg) +{ + struct c4iw_ep *ep = (struct c4iw_ep *)arg; + int kickit = 0; + + spin_lock(&timeout_lock); + if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) { + /* + * Only insert if it is not already on the list. + */ + if (!ep->entry.next) { + list_add_tail(&ep->entry, &timeout_list); + kickit = 1; + } + } + spin_unlock(&timeout_lock); + if (kickit) + queue_work(workq, &skb_work); +} + +/* + * All the CM events are handled on a work queue to have a safe context. + */ +static int sched(struct c4iw_dev *dev, struct sk_buff *skb) +{ + + /* + * Save dev in the skb->cb area. + */ + *((struct c4iw_dev **) (skb->cb + sizeof(void *))) = dev; + + /* + * Queue the skb and schedule the worker thread. + */ + skb_queue_tail(&rxq, skb); + queue_work(workq, &skb_work); + return 0; +} + +static int set_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct cpl_set_tcb_rpl *rpl = cplhdr(skb); + + if (rpl->status != CPL_ERR_NONE) { + printk(KERN_ERR MOD "Unexpected SET_TCB_RPL status %u " + "for tid %u\n", rpl->status, GET_TID(rpl)); + } + kfree_skb(skb); + return 0; +} + +static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct cpl_fw6_msg *rpl = cplhdr(skb); + struct c4iw_wr_wait *wr_waitp; + int ret; + + PDBG("%s type %u\n", __func__, rpl->type); + + switch (rpl->type) { + case FW6_TYPE_WR_RPL: + ret = (int)((be64_to_cpu(rpl->data[0]) >> 8) & 0xff); + wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1]; + PDBG("%s wr_waitp %p ret %u\n", __func__, wr_waitp, ret); + if (wr_waitp) + c4iw_wake_up(wr_waitp, ret ? -ret : 0); + kfree_skb(skb); + break; + case FW6_TYPE_CQE: + case FW6_TYPE_OFLD_CONNECTION_WR_RPL: + sched(dev, skb); + break; + default: + printk(KERN_ERR MOD "%s unexpected fw6 msg type %u\n", __func__, + rpl->type); + kfree_skb(skb); + break; + } + return 0; +} + +static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb) +{ + struct cpl_abort_req_rss *req = cplhdr(skb); + struct c4iw_ep *ep; + struct tid_info *t = dev->rdev.lldi.tids; + unsigned int tid = GET_TID(req); + + ep = lookup_tid(t, tid); + if (!ep) { + printk(KERN_WARNING MOD + "Abort on non-existent endpoint, tid %d\n", tid); + kfree_skb(skb); + return 0; + } + if (is_neg_adv(req->status)) { + PDBG("%s neg_adv_abort ep %p tid %u\n", __func__, ep, + ep->hwtid); + kfree_skb(skb); + return 0; + } + PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid, + ep->com.state); + + /* + * Wake up any threads in rdma_init() or rdma_fini(). + * However, if we are on MPAv2 and want to retry with MPAv1 + * then, don't wake up yet. + */ + if (mpa_rev == 2 && !ep->tried_with_mpa_v1) { + if (ep->com.state != MPA_REQ_SENT) + c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); + } else + c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); + sched(dev, skb); + return 0; +} + +/* + * Most upcalls from the T4 Core go to sched() to + * schedule the processing on a work queue. + */ +c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS] = { + [CPL_ACT_ESTABLISH] = sched, + [CPL_ACT_OPEN_RPL] = sched, + [CPL_RX_DATA] = sched, + [CPL_ABORT_RPL_RSS] = sched, + [CPL_ABORT_RPL] = sched, + [CPL_PASS_OPEN_RPL] = sched, + [CPL_CLOSE_LISTSRV_RPL] = sched, + [CPL_PASS_ACCEPT_REQ] = sched, + [CPL_PASS_ESTABLISH] = sched, + [CPL_PEER_CLOSE] = sched, + [CPL_CLOSE_CON_RPL] = sched, + [CPL_ABORT_REQ_RSS] = peer_abort_intr, + [CPL_RDMA_TERMINATE] = sched, + [CPL_FW4_ACK] = sched, + [CPL_SET_TCB_RPL] = set_tcb_rpl, + [CPL_FW6_MSG] = fw6_msg, + [CPL_RX_PKT] = sched +}; + +int __init c4iw_cm_init(void) +{ + spin_lock_init(&timeout_lock); + skb_queue_head_init(&rxq); + + workq = create_singlethread_workqueue("iw_cxgb4"); + if (!workq) + return -ENOMEM; + + return 0; +} + +void c4iw_cm_term(void) +{ + WARN_ON(!list_empty(&timeout_list)); + flush_workqueue(workq); + destroy_workqueue(workq); +} diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c new file mode 100644 index 00000000000..c04292c950f --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -0,0 +1,1011 @@ +/* + * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "iw_cxgb4.h" + +static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, + struct c4iw_dev_ucontext *uctx) +{ + struct fw_ri_res_wr *res_wr; + struct fw_ri_res *res; + int wr_len; + struct c4iw_wr_wait wr_wait; + struct sk_buff *skb; + int ret; + + wr_len = sizeof *res_wr + sizeof *res; + skb = alloc_skb(wr_len, GFP_KERNEL); + if (!skb) + return -ENOMEM; + set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); + + res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len); + memset(res_wr, 0, wr_len); + res_wr->op_nres = cpu_to_be32( + FW_WR_OP(FW_RI_RES_WR) | + V_FW_RI_RES_WR_NRES(1) | + FW_WR_COMPL(1)); + res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); + res_wr->cookie = (unsigned long) &wr_wait; + res = res_wr->res; + res->u.cq.restype = FW_RI_RES_TYPE_CQ; + res->u.cq.op = FW_RI_RES_OP_RESET; + res->u.cq.iqid = cpu_to_be32(cq->cqid); + + c4iw_init_wr_wait(&wr_wait); + ret = c4iw_ofld_send(rdev, skb); + if (!ret) { + ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__); + } + + kfree(cq->sw_queue); + dma_free_coherent(&(rdev->lldi.pdev->dev), + cq->memsize, cq->queue, + dma_unmap_addr(cq, mapping)); + c4iw_put_cqid(rdev, cq->cqid, uctx); + return ret; +} + +static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, + struct c4iw_dev_ucontext *uctx) +{ + struct fw_ri_res_wr *res_wr; + struct fw_ri_res *res; + int wr_len; + int user = (uctx != &rdev->uctx); + struct c4iw_wr_wait wr_wait; + int ret; + struct sk_buff *skb; + + cq->cqid = c4iw_get_cqid(rdev, uctx); + if (!cq->cqid) { + ret = -ENOMEM; + goto err1; + } + + if (!user) { + cq->sw_queue = kzalloc(cq->memsize, GFP_KERNEL); + if (!cq->sw_queue) { + ret = -ENOMEM; + goto err2; + } + } + cq->queue = dma_alloc_coherent(&rdev->lldi.pdev->dev, cq->memsize, + &cq->dma_addr, GFP_KERNEL); + if (!cq->queue) { + ret = -ENOMEM; + goto err3; + } + dma_unmap_addr_set(cq, mapping, cq->dma_addr); + memset(cq->queue, 0, cq->memsize); + + /* build fw_ri_res_wr */ + wr_len = sizeof *res_wr + sizeof *res; + + skb = alloc_skb(wr_len, GFP_KERNEL); + if (!skb) { + ret = -ENOMEM; + goto err4; + } + set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); + + res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len); + memset(res_wr, 0, wr_len); + res_wr->op_nres = cpu_to_be32( + FW_WR_OP(FW_RI_RES_WR) | + V_FW_RI_RES_WR_NRES(1) | + FW_WR_COMPL(1)); + res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); + res_wr->cookie = (unsigned long) &wr_wait; + res = res_wr->res; + res->u.cq.restype = FW_RI_RES_TYPE_CQ; + res->u.cq.op = FW_RI_RES_OP_WRITE; + res->u.cq.iqid = cpu_to_be32(cq->cqid); + res->u.cq.iqandst_to_iqandstindex = cpu_to_be32( + V_FW_RI_RES_WR_IQANUS(0) | + V_FW_RI_RES_WR_IQANUD(1) | + F_FW_RI_RES_WR_IQANDST | + V_FW_RI_RES_WR_IQANDSTINDEX( + rdev->lldi.ciq_ids[cq->vector])); + res->u.cq.iqdroprss_to_iqesize = cpu_to_be16( + F_FW_RI_RES_WR_IQDROPRSS | + V_FW_RI_RES_WR_IQPCIECH(2) | + V_FW_RI_RES_WR_IQINTCNTTHRESH(0) | + F_FW_RI_RES_WR_IQO | + V_FW_RI_RES_WR_IQESIZE(1)); + res->u.cq.iqsize = cpu_to_be16(cq->size); + res->u.cq.iqaddr = cpu_to_be64(cq->dma_addr); + + c4iw_init_wr_wait(&wr_wait); + + ret = c4iw_ofld_send(rdev, skb); + if (ret) + goto err4; + PDBG("%s wait_event wr_wait %p\n", __func__, &wr_wait); + ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__); + if (ret) + goto err4; + + cq->gen = 1; + cq->gts = rdev->lldi.gts_reg; + cq->rdev = rdev; + if (user) { + cq->ugts = (u64)pci_resource_start(rdev->lldi.pdev, 2) + + (cq->cqid << rdev->cqshift); + cq->ugts &= PAGE_MASK; + } + return 0; +err4: + dma_free_coherent(&rdev->lldi.pdev->dev, cq->memsize, cq->queue, + dma_unmap_addr(cq, mapping)); +err3: + kfree(cq->sw_queue); +err2: + c4iw_put_cqid(rdev, cq->cqid, uctx); +err1: + return ret; +} + +static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq) +{ + struct t4_cqe cqe; + + PDBG("%s wq %p cq %p sw_cidx %u sw_pidx %u\n", __func__, + wq, cq, cq->sw_cidx, cq->sw_pidx); + memset(&cqe, 0, sizeof(cqe)); + cqe.header = cpu_to_be32(V_CQE_STATUS(T4_ERR_SWFLUSH) | + V_CQE_OPCODE(FW_RI_SEND) | + V_CQE_TYPE(0) | + V_CQE_SWCQE(1) | + V_CQE_QPID(wq->sq.qid)); + cqe.bits_type_ts = cpu_to_be64(V_CQE_GENBIT((u64)cq->gen)); + cq->sw_queue[cq->sw_pidx] = cqe; + t4_swcq_produce(cq); +} + +int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count) +{ + int flushed = 0; + int in_use = wq->rq.in_use - count; + + BUG_ON(in_use < 0); + PDBG("%s wq %p cq %p rq.in_use %u skip count %u\n", __func__, + wq, cq, wq->rq.in_use, count); + while (in_use--) { + insert_recv_cqe(wq, cq); + flushed++; + } + return flushed; +} + +static void insert_sq_cqe(struct t4_wq *wq, struct t4_cq *cq, + struct t4_swsqe *swcqe) +{ + struct t4_cqe cqe; + + PDBG("%s wq %p cq %p sw_cidx %u sw_pidx %u\n", __func__, + wq, cq, cq->sw_cidx, cq->sw_pidx); + memset(&cqe, 0, sizeof(cqe)); + cqe.header = cpu_to_be32(V_CQE_STATUS(T4_ERR_SWFLUSH) | + V_CQE_OPCODE(swcqe->opcode) | + V_CQE_TYPE(1) | + V_CQE_SWCQE(1) | + V_CQE_QPID(wq->sq.qid)); + CQE_WRID_SQ_IDX(&cqe) = swcqe->idx; + cqe.bits_type_ts = cpu_to_be64(V_CQE_GENBIT((u64)cq->gen)); + cq->sw_queue[cq->sw_pidx] = cqe; + t4_swcq_produce(cq); +} + +static void advance_oldest_read(struct t4_wq *wq); + +int c4iw_flush_sq(struct c4iw_qp *qhp) +{ + int flushed = 0; + struct t4_wq *wq = &qhp->wq; + struct c4iw_cq *chp = to_c4iw_cq(qhp->ibqp.send_cq); + struct t4_cq *cq = &chp->cq; + int idx; + struct t4_swsqe *swsqe; + + if (wq->sq.flush_cidx == -1) + wq->sq.flush_cidx = wq->sq.cidx; + idx = wq->sq.flush_cidx; + BUG_ON(idx >= wq->sq.size); + while (idx != wq->sq.pidx) { + swsqe = &wq->sq.sw_sq[idx]; + BUG_ON(swsqe->flushed); + swsqe->flushed = 1; + insert_sq_cqe(wq, cq, swsqe); + if (wq->sq.oldest_read == swsqe) { + BUG_ON(swsqe->opcode != FW_RI_READ_REQ); + advance_oldest_read(wq); + } + flushed++; + if (++idx == wq->sq.size) + idx = 0; + } + wq->sq.flush_cidx += flushed; + if (wq->sq.flush_cidx >= wq->sq.size) + wq->sq.flush_cidx -= wq->sq.size; + return flushed; +} + +static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq) +{ + struct t4_swsqe *swsqe; + int cidx; + + if (wq->sq.flush_cidx == -1) + wq->sq.flush_cidx = wq->sq.cidx; + cidx = wq->sq.flush_cidx; + BUG_ON(cidx > wq->sq.size); + + while (cidx != wq->sq.pidx) { + swsqe = &wq->sq.sw_sq[cidx]; + if (!swsqe->signaled) { + if (++cidx == wq->sq.size) + cidx = 0; + } else if (swsqe->complete) { + + BUG_ON(swsqe->flushed); + + /* + * Insert this completed cqe into the swcq. + */ + PDBG("%s moving cqe into swcq sq idx %u cq idx %u\n", + __func__, cidx, cq->sw_pidx); + swsqe->cqe.header |= htonl(V_CQE_SWCQE(1)); + cq->sw_queue[cq->sw_pidx] = swsqe->cqe; + t4_swcq_produce(cq); + swsqe->flushed = 1; + if (++cidx == wq->sq.size) + cidx = 0; + wq->sq.flush_cidx = cidx; + } else + break; + } +} + +static void create_read_req_cqe(struct t4_wq *wq, struct t4_cqe *hw_cqe, + struct t4_cqe *read_cqe) +{ + read_cqe->u.scqe.cidx = wq->sq.oldest_read->idx; + read_cqe->len = htonl(wq->sq.oldest_read->read_len); + read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(hw_cqe)) | + V_CQE_SWCQE(SW_CQE(hw_cqe)) | + V_CQE_OPCODE(FW_RI_READ_REQ) | + V_CQE_TYPE(1)); + read_cqe->bits_type_ts = hw_cqe->bits_type_ts; +} + +static void advance_oldest_read(struct t4_wq *wq) +{ + + u32 rptr = wq->sq.oldest_read - wq->sq.sw_sq + 1; + + if (rptr == wq->sq.size) + rptr = 0; + while (rptr != wq->sq.pidx) { + wq->sq.oldest_read = &wq->sq.sw_sq[rptr]; + + if (wq->sq.oldest_read->opcode == FW_RI_READ_REQ) + return; + if (++rptr == wq->sq.size) + rptr = 0; + } + wq->sq.oldest_read = NULL; +} + +/* + * Move all CQEs from the HWCQ into the SWCQ. + * Deal with out-of-order and/or completions that complete + * prior unsignalled WRs. + */ +void c4iw_flush_hw_cq(struct c4iw_cq *chp) +{ + struct t4_cqe *hw_cqe, *swcqe, read_cqe; + struct c4iw_qp *qhp; + struct t4_swsqe *swsqe; + int ret; + + PDBG("%s cqid 0x%x\n", __func__, chp->cq.cqid); + ret = t4_next_hw_cqe(&chp->cq, &hw_cqe); + + /* + * This logic is similar to poll_cq(), but not quite the same + * unfortunately. Need to move pertinent HW CQEs to the SW CQ but + * also do any translation magic that poll_cq() normally does. + */ + while (!ret) { + qhp = get_qhp(chp->rhp, CQE_QPID(hw_cqe)); + + /* + * drop CQEs with no associated QP + */ + if (qhp == NULL) + goto next_cqe; + + if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) + goto next_cqe; + + if (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP) { + + /* If we have reached here because of async + * event or other error, and have egress error + * then drop + */ + if (CQE_TYPE(hw_cqe) == 1) + goto next_cqe; + + /* drop peer2peer RTR reads. + */ + if (CQE_WRID_STAG(hw_cqe) == 1) + goto next_cqe; + + /* + * Eat completions for unsignaled read WRs. + */ + if (!qhp->wq.sq.oldest_read->signaled) { + advance_oldest_read(&qhp->wq); + goto next_cqe; + } + + /* + * Don't write to the HWCQ, create a new read req CQE + * in local memory and move it into the swcq. + */ + create_read_req_cqe(&qhp->wq, hw_cqe, &read_cqe); + hw_cqe = &read_cqe; + advance_oldest_read(&qhp->wq); + } + + /* if its a SQ completion, then do the magic to move all the + * unsignaled and now in-order completions into the swcq. + */ + if (SQ_TYPE(hw_cqe)) { + swsqe = &qhp->wq.sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)]; + swsqe->cqe = *hw_cqe; + swsqe->complete = 1; + flush_completed_wrs(&qhp->wq, &chp->cq); + } else { + swcqe = &chp->cq.sw_queue[chp->cq.sw_pidx]; + *swcqe = *hw_cqe; + swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1)); + t4_swcq_produce(&chp->cq); + } +next_cqe: + t4_hwcq_consume(&chp->cq); + ret = t4_next_hw_cqe(&chp->cq, &hw_cqe); + } +} + +static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq) +{ + if (CQE_OPCODE(cqe) == FW_RI_TERMINATE) + return 0; + + if ((CQE_OPCODE(cqe) == FW_RI_RDMA_WRITE) && RQ_TYPE(cqe)) + return 0; + + if ((CQE_OPCODE(cqe) == FW_RI_READ_RESP) && SQ_TYPE(cqe)) + return 0; + + if (CQE_SEND_OPCODE(cqe) && RQ_TYPE(cqe) && t4_rq_empty(wq)) + return 0; + return 1; +} + +void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count) +{ + struct t4_cqe *cqe; + u32 ptr; + + *count = 0; + PDBG("%s count zero %d\n", __func__, *count); + ptr = cq->sw_cidx; + while (ptr != cq->sw_pidx) { + cqe = &cq->sw_queue[ptr]; + if (RQ_TYPE(cqe) && (CQE_OPCODE(cqe) != FW_RI_READ_RESP) && + (CQE_QPID(cqe) == wq->sq.qid) && cqe_completes_wr(cqe, wq)) + (*count)++; + if (++ptr == cq->size) + ptr = 0; + } + PDBG("%s cq %p count %d\n", __func__, cq, *count); +} + +/* + * poll_cq + * + * Caller must: + * check the validity of the first CQE, + * supply the wq assicated with the qpid. + * + * credit: cq credit to return to sge. + * cqe_flushed: 1 iff the CQE is flushed. + * cqe: copy of the polled CQE. + * + * return value: + * 0 CQE returned ok. + * -EAGAIN CQE skipped, try again. + * -EOVERFLOW CQ overflow detected. + */ +static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, + u8 *cqe_flushed, u64 *cookie, u32 *credit) +{ + int ret = 0; + struct t4_cqe *hw_cqe, read_cqe; + + *cqe_flushed = 0; + *credit = 0; + ret = t4_next_cqe(cq, &hw_cqe); + if (ret) + return ret; + + PDBG("%s CQE OVF %u qpid 0x%0x genbit %u type %u status 0x%0x" + " opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n", + __func__, CQE_OVFBIT(hw_cqe), CQE_QPID(hw_cqe), + CQE_GENBIT(hw_cqe), CQE_TYPE(hw_cqe), CQE_STATUS(hw_cqe), + CQE_OPCODE(hw_cqe), CQE_LEN(hw_cqe), CQE_WRID_HI(hw_cqe), + CQE_WRID_LOW(hw_cqe)); + + /* + * skip cqe's not affiliated with a QP. + */ + if (wq == NULL) { + ret = -EAGAIN; + goto skip_cqe; + } + + /* + * skip hw cqe's if the wq is flushed. + */ + if (wq->flushed && !SW_CQE(hw_cqe)) { + ret = -EAGAIN; + goto skip_cqe; + } + + /* + * skip TERMINATE cqes... + */ + if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) { + ret = -EAGAIN; + goto skip_cqe; + } + + /* + * Gotta tweak READ completions: + * 1) the cqe doesn't contain the sq_wptr from the wr. + * 2) opcode not reflected from the wr. + * 3) read_len not reflected from the wr. + * 4) cq_type is RQ_TYPE not SQ_TYPE. + */ + if (RQ_TYPE(hw_cqe) && (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP)) { + + /* If we have reached here because of async + * event or other error, and have egress error + * then drop + */ + if (CQE_TYPE(hw_cqe) == 1) { + if (CQE_STATUS(hw_cqe)) + t4_set_wq_in_error(wq); + ret = -EAGAIN; + goto skip_cqe; + } + + /* If this is an unsolicited read response, then the read + * was generated by the kernel driver as part of peer-2-peer + * connection setup. So ignore the completion. + */ + if (CQE_WRID_STAG(hw_cqe) == 1) { + if (CQE_STATUS(hw_cqe)) + t4_set_wq_in_error(wq); + ret = -EAGAIN; + goto skip_cqe; + } + + /* + * Eat completions for unsignaled read WRs. + */ + if (!wq->sq.oldest_read->signaled) { + advance_oldest_read(wq); + ret = -EAGAIN; + goto skip_cqe; + } + + /* + * Don't write to the HWCQ, so create a new read req CQE + * in local memory. + */ + create_read_req_cqe(wq, hw_cqe, &read_cqe); + hw_cqe = &read_cqe; + advance_oldest_read(wq); + } + + if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) { + *cqe_flushed = (CQE_STATUS(hw_cqe) == T4_ERR_SWFLUSH); + t4_set_wq_in_error(wq); + } + + /* + * RECV completion. + */ + if (RQ_TYPE(hw_cqe)) { + + /* + * HW only validates 4 bits of MSN. So we must validate that + * the MSN in the SEND is the next expected MSN. If its not, + * then we complete this with T4_ERR_MSN and mark the wq in + * error. + */ + + if (t4_rq_empty(wq)) { + t4_set_wq_in_error(wq); + ret = -EAGAIN; + goto skip_cqe; + } + if (unlikely((CQE_WRID_MSN(hw_cqe) != (wq->rq.msn)))) { + t4_set_wq_in_error(wq); + hw_cqe->header |= htonl(V_CQE_STATUS(T4_ERR_MSN)); + goto proc_cqe; + } + goto proc_cqe; + } + + /* + * If we get here its a send completion. + * + * Handle out of order completion. These get stuffed + * in the SW SQ. Then the SW SQ is walked to move any + * now in-order completions into the SW CQ. This handles + * 2 cases: + * 1) reaping unsignaled WRs when the first subsequent + * signaled WR is completed. + * 2) out of order read completions. + */ + if (!SW_CQE(hw_cqe) && (CQE_WRID_SQ_IDX(hw_cqe) != wq->sq.cidx)) { + struct t4_swsqe *swsqe; + + PDBG("%s out of order completion going in sw_sq at idx %u\n", + __func__, CQE_WRID_SQ_IDX(hw_cqe)); + swsqe = &wq->sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)]; + swsqe->cqe = *hw_cqe; + swsqe->complete = 1; + ret = -EAGAIN; + goto flush_wq; + } + +proc_cqe: + *cqe = *hw_cqe; + + /* + * Reap the associated WR(s) that are freed up with this + * completion. + */ + if (SQ_TYPE(hw_cqe)) { + int idx = CQE_WRID_SQ_IDX(hw_cqe); + BUG_ON(idx >= wq->sq.size); + + /* + * Account for any unsignaled completions completed by + * this signaled completion. In this case, cidx points + * to the first unsignaled one, and idx points to the + * signaled one. So adjust in_use based on this delta. + * if this is not completing any unsigned wrs, then the + * delta will be 0. Handle wrapping also! + */ + if (idx < wq->sq.cidx) + wq->sq.in_use -= wq->sq.size + idx - wq->sq.cidx; + else + wq->sq.in_use -= idx - wq->sq.cidx; + BUG_ON(wq->sq.in_use <= 0 && wq->sq.in_use >= wq->sq.size); + + wq->sq.cidx = (uint16_t)idx; + PDBG("%s completing sq idx %u\n", __func__, wq->sq.cidx); + *cookie = wq->sq.sw_sq[wq->sq.cidx].wr_id; + t4_sq_consume(wq); + } else { + PDBG("%s completing rq idx %u\n", __func__, wq->rq.cidx); + *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id; + BUG_ON(t4_rq_empty(wq)); + t4_rq_consume(wq); + goto skip_cqe; + } + +flush_wq: + /* + * Flush any completed cqes that are now in-order. + */ + flush_completed_wrs(wq, cq); + +skip_cqe: + if (SW_CQE(hw_cqe)) { + PDBG("%s cq %p cqid 0x%x skip sw cqe cidx %u\n", + __func__, cq, cq->cqid, cq->sw_cidx); + t4_swcq_consume(cq); + } else { + PDBG("%s cq %p cqid 0x%x skip hw cqe cidx %u\n", + __func__, cq, cq->cqid, cq->cidx); + t4_hwcq_consume(cq); + } + return ret; +} + +/* + * Get one cq entry from c4iw and map it to openib. + * + * Returns: + * 0 cqe returned + * -ENODATA EMPTY; + * -EAGAIN caller must try again + * any other -errno fatal error + */ +static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) +{ + struct c4iw_qp *qhp = NULL; + struct t4_cqe uninitialized_var(cqe), *rd_cqe; + struct t4_wq *wq; + u32 credit = 0; + u8 cqe_flushed; + u64 cookie = 0; + int ret; + + ret = t4_next_cqe(&chp->cq, &rd_cqe); + + if (ret) + return ret; + + qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe)); + if (!qhp) + wq = NULL; + else { + spin_lock(&qhp->lock); + wq = &(qhp->wq); + } + ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit); + if (ret) + goto out; + + wc->wr_id = cookie; + wc->qp = &qhp->ibqp; + wc->vendor_err = CQE_STATUS(&cqe); + wc->wc_flags = 0; + + PDBG("%s qpid 0x%x type %d opcode %d status 0x%x len %u wrid hi 0x%x " + "lo 0x%x cookie 0x%llx\n", __func__, CQE_QPID(&cqe), + CQE_TYPE(&cqe), CQE_OPCODE(&cqe), CQE_STATUS(&cqe), CQE_LEN(&cqe), + CQE_WRID_HI(&cqe), CQE_WRID_LOW(&cqe), (unsigned long long)cookie); + + if (CQE_TYPE(&cqe) == 0) { + if (!CQE_STATUS(&cqe)) + wc->byte_len = CQE_LEN(&cqe); + else + wc->byte_len = 0; + wc->opcode = IB_WC_RECV; + if (CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_INV || + CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) { + wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe); + wc->wc_flags |= IB_WC_WITH_INVALIDATE; + } + } else { + switch (CQE_OPCODE(&cqe)) { + case FW_RI_RDMA_WRITE: + wc->opcode = IB_WC_RDMA_WRITE; + break; + case FW_RI_READ_REQ: + wc->opcode = IB_WC_RDMA_READ; + wc->byte_len = CQE_LEN(&cqe); + break; + case FW_RI_SEND_WITH_INV: + case FW_RI_SEND_WITH_SE_INV: + wc->opcode = IB_WC_SEND; + wc->wc_flags |= IB_WC_WITH_INVALIDATE; + break; + case FW_RI_SEND: + case FW_RI_SEND_WITH_SE: + wc->opcode = IB_WC_SEND; + break; + case FW_RI_BIND_MW: + wc->opcode = IB_WC_BIND_MW; + break; + + case FW_RI_LOCAL_INV: + wc->opcode = IB_WC_LOCAL_INV; + break; + case FW_RI_FAST_REGISTER: + wc->opcode = IB_WC_FAST_REG_MR; + break; + default: + printk(KERN_ERR MOD "Unexpected opcode %d " + "in the CQE received for QPID=0x%0x\n", + CQE_OPCODE(&cqe), CQE_QPID(&cqe)); + ret = -EINVAL; + goto out; + } + } + + if (cqe_flushed) + wc->status = IB_WC_WR_FLUSH_ERR; + else { + + switch (CQE_STATUS(&cqe)) { + case T4_ERR_SUCCESS: + wc->status = IB_WC_SUCCESS; + break; + case T4_ERR_STAG: + wc->status = IB_WC_LOC_ACCESS_ERR; + break; + case T4_ERR_PDID: + wc->status = IB_WC_LOC_PROT_ERR; + break; + case T4_ERR_QPID: + case T4_ERR_ACCESS: + wc->status = IB_WC_LOC_ACCESS_ERR; + break; + case T4_ERR_WRAP: + wc->status = IB_WC_GENERAL_ERR; + break; + case T4_ERR_BOUND: + wc->status = IB_WC_LOC_LEN_ERR; + break; + case T4_ERR_INVALIDATE_SHARED_MR: + case T4_ERR_INVALIDATE_MR_WITH_MW_BOUND: + wc->status = IB_WC_MW_BIND_ERR; + break; + case T4_ERR_CRC: + case T4_ERR_MARKER: + case T4_ERR_PDU_LEN_ERR: + case T4_ERR_OUT_OF_RQE: + case T4_ERR_DDP_VERSION: + case T4_ERR_RDMA_VERSION: + case T4_ERR_DDP_QUEUE_NUM: + case T4_ERR_MSN: + case T4_ERR_TBIT: + case T4_ERR_MO: + case T4_ERR_MSN_RANGE: + case T4_ERR_IRD_OVERFLOW: + case T4_ERR_OPCODE: + case T4_ERR_INTERNAL_ERR: + wc->status = IB_WC_FATAL_ERR; + break; + case T4_ERR_SWFLUSH: + wc->status = IB_WC_WR_FLUSH_ERR; + break; + default: + printk(KERN_ERR MOD + "Unexpected cqe_status 0x%x for QPID=0x%0x\n", + CQE_STATUS(&cqe), CQE_QPID(&cqe)); + ret = -EINVAL; + } + } +out: + if (wq) + spin_unlock(&qhp->lock); + return ret; +} + +int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) +{ + struct c4iw_cq *chp; + unsigned long flags; + int npolled; + int err = 0; + + chp = to_c4iw_cq(ibcq); + + spin_lock_irqsave(&chp->lock, flags); + for (npolled = 0; npolled < num_entries; ++npolled) { + do { + err = c4iw_poll_cq_one(chp, wc + npolled); + } while (err == -EAGAIN); + if (err) + break; + } + spin_unlock_irqrestore(&chp->lock, flags); + return !err || err == -ENODATA ? npolled : err; +} + +int c4iw_destroy_cq(struct ib_cq *ib_cq) +{ + struct c4iw_cq *chp; + struct c4iw_ucontext *ucontext; + + PDBG("%s ib_cq %p\n", __func__, ib_cq); + chp = to_c4iw_cq(ib_cq); + + remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid); + atomic_dec(&chp->refcnt); + wait_event(chp->wait, !atomic_read(&chp->refcnt)); + + ucontext = ib_cq->uobject ? to_c4iw_ucontext(ib_cq->uobject->context) + : NULL; + destroy_cq(&chp->rhp->rdev, &chp->cq, + ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx); + kfree(chp); + return 0; +} + +struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, + int vector, struct ib_ucontext *ib_context, + struct ib_udata *udata) +{ + struct c4iw_dev *rhp; + struct c4iw_cq *chp; + struct c4iw_create_cq_resp uresp; + struct c4iw_ucontext *ucontext = NULL; + int ret; + size_t memsize, hwentries; + struct c4iw_mm_entry *mm, *mm2; + + PDBG("%s ib_dev %p entries %d\n", __func__, ibdev, entries); + + rhp = to_c4iw_dev(ibdev); + + if (vector >= rhp->rdev.lldi.nciq) + return ERR_PTR(-EINVAL); + + chp = kzalloc(sizeof(*chp), GFP_KERNEL); + if (!chp) + return ERR_PTR(-ENOMEM); + + if (ib_context) + ucontext = to_c4iw_ucontext(ib_context); + + /* account for the status page. */ + entries++; + + /* IQ needs one extra entry to differentiate full vs empty. */ + entries++; + + /* + * entries must be multiple of 16 for HW. + */ + entries = roundup(entries, 16); + + /* + * Make actual HW queue 2x to avoid cdix_inc overflows. + */ + hwentries = min(entries * 2, T4_MAX_IQ_SIZE); + + /* + * Make HW queue at least 64 entries so GTS updates aren't too + * frequent. + */ + if (hwentries < 64) + hwentries = 64; + + memsize = hwentries * sizeof *chp->cq.queue; + + /* + * memsize must be a multiple of the page size if its a user cq. + */ + if (ucontext) { + memsize = roundup(memsize, PAGE_SIZE); + hwentries = memsize / sizeof *chp->cq.queue; + while (hwentries > T4_MAX_IQ_SIZE) { + memsize -= PAGE_SIZE; + hwentries = memsize / sizeof *chp->cq.queue; + } + } + chp->cq.size = hwentries; + chp->cq.memsize = memsize; + chp->cq.vector = vector; + + ret = create_cq(&rhp->rdev, &chp->cq, + ucontext ? &ucontext->uctx : &rhp->rdev.uctx); + if (ret) + goto err1; + + chp->rhp = rhp; + chp->cq.size--; /* status page */ + chp->ibcq.cqe = entries - 2; + spin_lock_init(&chp->lock); + spin_lock_init(&chp->comp_handler_lock); + atomic_set(&chp->refcnt, 1); + init_waitqueue_head(&chp->wait); + ret = insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid); + if (ret) + goto err2; + + if (ucontext) { + mm = kmalloc(sizeof *mm, GFP_KERNEL); + if (!mm) + goto err3; + mm2 = kmalloc(sizeof *mm2, GFP_KERNEL); + if (!mm2) + goto err4; + + uresp.qid_mask = rhp->rdev.cqmask; + uresp.cqid = chp->cq.cqid; + uresp.size = chp->cq.size; + uresp.memsize = chp->cq.memsize; + spin_lock(&ucontext->mmap_lock); + uresp.key = ucontext->key; + ucontext->key += PAGE_SIZE; + uresp.gts_key = ucontext->key; + ucontext->key += PAGE_SIZE; + spin_unlock(&ucontext->mmap_lock); + ret = ib_copy_to_udata(udata, &uresp, + sizeof(uresp) - sizeof(uresp.reserved)); + if (ret) + goto err5; + + mm->key = uresp.key; + mm->addr = virt_to_phys(chp->cq.queue); + mm->len = chp->cq.memsize; + insert_mmap(ucontext, mm); + + mm2->key = uresp.gts_key; + mm2->addr = chp->cq.ugts; + mm2->len = PAGE_SIZE; + insert_mmap(ucontext, mm2); + } + PDBG("%s cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx\n", + __func__, chp->cq.cqid, chp, chp->cq.size, + chp->cq.memsize, + (unsigned long long) chp->cq.dma_addr); + return &chp->ibcq; +err5: + kfree(mm2); +err4: + kfree(mm); +err3: + remove_handle(rhp, &rhp->cqidr, chp->cq.cqid); +err2: + destroy_cq(&chp->rhp->rdev, &chp->cq, + ucontext ? &ucontext->uctx : &rhp->rdev.uctx); +err1: + kfree(chp); + return ERR_PTR(ret); +} + +int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) +{ + return -ENOSYS; +} + +int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) +{ + struct c4iw_cq *chp; + int ret; + unsigned long flag; + + chp = to_c4iw_cq(ibcq); + spin_lock_irqsave(&chp->lock, flag); + ret = t4_arm_cq(&chp->cq, + (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED); + spin_unlock_irqrestore(&chp->lock, flag); + if (ret && !(flags & IB_CQ_REPORT_MISSED_EVENTS)) + ret = 0; + return ret; +} diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c new file mode 100644 index 00000000000..7db82b24302 --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -0,0 +1,1362 @@ +/* + * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/debugfs.h> +#include <linux/vmalloc.h> + +#include <rdma/ib_verbs.h> + +#include "iw_cxgb4.h" + +#define DRV_VERSION "0.1" + +MODULE_AUTHOR("Steve Wise"); +MODULE_DESCRIPTION("Chelsio T4/T5 RDMA Driver"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION(DRV_VERSION); + +static int allow_db_fc_on_t5; +module_param(allow_db_fc_on_t5, int, 0644); +MODULE_PARM_DESC(allow_db_fc_on_t5, + "Allow DB Flow Control on T5 (default = 0)"); + +static int allow_db_coalescing_on_t5; +module_param(allow_db_coalescing_on_t5, int, 0644); +MODULE_PARM_DESC(allow_db_coalescing_on_t5, + "Allow DB Coalescing on T5 (default = 0)"); + +struct uld_ctx { + struct list_head entry; + struct cxgb4_lld_info lldi; + struct c4iw_dev *dev; +}; + +static LIST_HEAD(uld_ctx_list); +static DEFINE_MUTEX(dev_mutex); + +#define DB_FC_RESUME_SIZE 64 +#define DB_FC_RESUME_DELAY 1 +#define DB_FC_DRAIN_THRESH 0 + +static struct dentry *c4iw_debugfs_root; + +struct c4iw_debugfs_data { + struct c4iw_dev *devp; + char *buf; + int bufsize; + int pos; +}; + +/* registered cxgb4 netlink callbacks */ +static struct ibnl_client_cbs c4iw_nl_cb_table[] = { + [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb}, + [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb}, + [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb}, + [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb}, + [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb}, + [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb} +}; + +static int count_idrs(int id, void *p, void *data) +{ + int *countp = data; + + *countp = *countp + 1; + return 0; +} + +static ssize_t debugfs_read(struct file *file, char __user *buf, size_t count, + loff_t *ppos) +{ + struct c4iw_debugfs_data *d = file->private_data; + + return simple_read_from_buffer(buf, count, ppos, d->buf, d->pos); +} + +static int dump_qp(int id, void *p, void *data) +{ + struct c4iw_qp *qp = p; + struct c4iw_debugfs_data *qpd = data; + int space; + int cc; + + if (id != qp->wq.sq.qid) + return 0; + + space = qpd->bufsize - qpd->pos - 1; + if (space == 0) + return 1; + + if (qp->ep) { + if (qp->ep->com.local_addr.ss_family == AF_INET) { + struct sockaddr_in *lsin = (struct sockaddr_in *) + &qp->ep->com.local_addr; + struct sockaddr_in *rsin = (struct sockaddr_in *) + &qp->ep->com.remote_addr; + struct sockaddr_in *mapped_lsin = (struct sockaddr_in *) + &qp->ep->com.mapped_local_addr; + struct sockaddr_in *mapped_rsin = (struct sockaddr_in *) + &qp->ep->com.mapped_remote_addr; + + cc = snprintf(qpd->buf + qpd->pos, space, + "rc qp sq id %u rq id %u state %u " + "onchip %u ep tid %u state %u " + "%pI4:%u/%u->%pI4:%u/%u\n", + qp->wq.sq.qid, qp->wq.rq.qid, + (int)qp->attr.state, + qp->wq.sq.flags & T4_SQ_ONCHIP, + qp->ep->hwtid, (int)qp->ep->com.state, + &lsin->sin_addr, ntohs(lsin->sin_port), + ntohs(mapped_lsin->sin_port), + &rsin->sin_addr, ntohs(rsin->sin_port), + ntohs(mapped_rsin->sin_port)); + } else { + struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *) + &qp->ep->com.local_addr; + struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *) + &qp->ep->com.remote_addr; + struct sockaddr_in6 *mapped_lsin6 = + (struct sockaddr_in6 *) + &qp->ep->com.mapped_local_addr; + struct sockaddr_in6 *mapped_rsin6 = + (struct sockaddr_in6 *) + &qp->ep->com.mapped_remote_addr; + + cc = snprintf(qpd->buf + qpd->pos, space, + "rc qp sq id %u rq id %u state %u " + "onchip %u ep tid %u state %u " + "%pI6:%u/%u->%pI6:%u/%u\n", + qp->wq.sq.qid, qp->wq.rq.qid, + (int)qp->attr.state, + qp->wq.sq.flags & T4_SQ_ONCHIP, + qp->ep->hwtid, (int)qp->ep->com.state, + &lsin6->sin6_addr, + ntohs(lsin6->sin6_port), + ntohs(mapped_lsin6->sin6_port), + &rsin6->sin6_addr, + ntohs(rsin6->sin6_port), + ntohs(mapped_rsin6->sin6_port)); + } + } else + cc = snprintf(qpd->buf + qpd->pos, space, + "qp sq id %u rq id %u state %u onchip %u\n", + qp->wq.sq.qid, qp->wq.rq.qid, + (int)qp->attr.state, + qp->wq.sq.flags & T4_SQ_ONCHIP); + if (cc < space) + qpd->pos += cc; + return 0; +} + +static int qp_release(struct inode *inode, struct file *file) +{ + struct c4iw_debugfs_data *qpd = file->private_data; + if (!qpd) { + printk(KERN_INFO "%s null qpd?\n", __func__); + return 0; + } + vfree(qpd->buf); + kfree(qpd); + return 0; +} + +static int qp_open(struct inode *inode, struct file *file) +{ + struct c4iw_debugfs_data *qpd; + int ret = 0; + int count = 1; + + qpd = kmalloc(sizeof *qpd, GFP_KERNEL); + if (!qpd) { + ret = -ENOMEM; + goto out; + } + qpd->devp = inode->i_private; + qpd->pos = 0; + + spin_lock_irq(&qpd->devp->lock); + idr_for_each(&qpd->devp->qpidr, count_idrs, &count); + spin_unlock_irq(&qpd->devp->lock); + + qpd->bufsize = count * 128; + qpd->buf = vmalloc(qpd->bufsize); + if (!qpd->buf) { + ret = -ENOMEM; + goto err1; + } + + spin_lock_irq(&qpd->devp->lock); + idr_for_each(&qpd->devp->qpidr, dump_qp, qpd); + spin_unlock_irq(&qpd->devp->lock); + + qpd->buf[qpd->pos++] = 0; + file->private_data = qpd; + goto out; +err1: + kfree(qpd); +out: + return ret; +} + +static const struct file_operations qp_debugfs_fops = { + .owner = THIS_MODULE, + .open = qp_open, + .release = qp_release, + .read = debugfs_read, + .llseek = default_llseek, +}; + +static int dump_stag(int id, void *p, void *data) +{ + struct c4iw_debugfs_data *stagd = data; + int space; + int cc; + + space = stagd->bufsize - stagd->pos - 1; + if (space == 0) + return 1; + + cc = snprintf(stagd->buf + stagd->pos, space, "0x%x\n", id<<8); + if (cc < space) + stagd->pos += cc; + return 0; +} + +static int stag_release(struct inode *inode, struct file *file) +{ + struct c4iw_debugfs_data *stagd = file->private_data; + if (!stagd) { + printk(KERN_INFO "%s null stagd?\n", __func__); + return 0; + } + kfree(stagd->buf); + kfree(stagd); + return 0; +} + +static int stag_open(struct inode *inode, struct file *file) +{ + struct c4iw_debugfs_data *stagd; + int ret = 0; + int count = 1; + + stagd = kmalloc(sizeof *stagd, GFP_KERNEL); + if (!stagd) { + ret = -ENOMEM; + goto out; + } + stagd->devp = inode->i_private; + stagd->pos = 0; + + spin_lock_irq(&stagd->devp->lock); + idr_for_each(&stagd->devp->mmidr, count_idrs, &count); + spin_unlock_irq(&stagd->devp->lock); + + stagd->bufsize = count * sizeof("0x12345678\n"); + stagd->buf = kmalloc(stagd->bufsize, GFP_KERNEL); + if (!stagd->buf) { + ret = -ENOMEM; + goto err1; + } + + spin_lock_irq(&stagd->devp->lock); + idr_for_each(&stagd->devp->mmidr, dump_stag, stagd); + spin_unlock_irq(&stagd->devp->lock); + + stagd->buf[stagd->pos++] = 0; + file->private_data = stagd; + goto out; +err1: + kfree(stagd); +out: + return ret; +} + +static const struct file_operations stag_debugfs_fops = { + .owner = THIS_MODULE, + .open = stag_open, + .release = stag_release, + .read = debugfs_read, + .llseek = default_llseek, +}; + +static char *db_state_str[] = {"NORMAL", "FLOW_CONTROL", "RECOVERY", "STOPPED"}; + +static int stats_show(struct seq_file *seq, void *v) +{ + struct c4iw_dev *dev = seq->private; + + seq_printf(seq, " Object: %10s %10s %10s %10s\n", "Total", "Current", + "Max", "Fail"); + seq_printf(seq, " PDID: %10llu %10llu %10llu %10llu\n", + dev->rdev.stats.pd.total, dev->rdev.stats.pd.cur, + dev->rdev.stats.pd.max, dev->rdev.stats.pd.fail); + seq_printf(seq, " QID: %10llu %10llu %10llu %10llu\n", + dev->rdev.stats.qid.total, dev->rdev.stats.qid.cur, + dev->rdev.stats.qid.max, dev->rdev.stats.qid.fail); + seq_printf(seq, " TPTMEM: %10llu %10llu %10llu %10llu\n", + dev->rdev.stats.stag.total, dev->rdev.stats.stag.cur, + dev->rdev.stats.stag.max, dev->rdev.stats.stag.fail); + seq_printf(seq, " PBLMEM: %10llu %10llu %10llu %10llu\n", + dev->rdev.stats.pbl.total, dev->rdev.stats.pbl.cur, + dev->rdev.stats.pbl.max, dev->rdev.stats.pbl.fail); + seq_printf(seq, " RQTMEM: %10llu %10llu %10llu %10llu\n", + dev->rdev.stats.rqt.total, dev->rdev.stats.rqt.cur, + dev->rdev.stats.rqt.max, dev->rdev.stats.rqt.fail); + seq_printf(seq, " OCQPMEM: %10llu %10llu %10llu %10llu\n", + dev->rdev.stats.ocqp.total, dev->rdev.stats.ocqp.cur, + dev->rdev.stats.ocqp.max, dev->rdev.stats.ocqp.fail); + seq_printf(seq, " DB FULL: %10llu\n", dev->rdev.stats.db_full); + seq_printf(seq, " DB EMPTY: %10llu\n", dev->rdev.stats.db_empty); + seq_printf(seq, " DB DROP: %10llu\n", dev->rdev.stats.db_drop); + seq_printf(seq, " DB State: %s Transitions %llu FC Interruptions %llu\n", + db_state_str[dev->db_state], + dev->rdev.stats.db_state_transitions, + dev->rdev.stats.db_fc_interruptions); + seq_printf(seq, "TCAM_FULL: %10llu\n", dev->rdev.stats.tcam_full); + seq_printf(seq, "ACT_OFLD_CONN_FAILS: %10llu\n", + dev->rdev.stats.act_ofld_conn_fails); + seq_printf(seq, "PAS_OFLD_CONN_FAILS: %10llu\n", + dev->rdev.stats.pas_ofld_conn_fails); + return 0; +} + +static int stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, stats_show, inode->i_private); +} + +static ssize_t stats_clear(struct file *file, const char __user *buf, + size_t count, loff_t *pos) +{ + struct c4iw_dev *dev = ((struct seq_file *)file->private_data)->private; + + mutex_lock(&dev->rdev.stats.lock); + dev->rdev.stats.pd.max = 0; + dev->rdev.stats.pd.fail = 0; + dev->rdev.stats.qid.max = 0; + dev->rdev.stats.qid.fail = 0; + dev->rdev.stats.stag.max = 0; + dev->rdev.stats.stag.fail = 0; + dev->rdev.stats.pbl.max = 0; + dev->rdev.stats.pbl.fail = 0; + dev->rdev.stats.rqt.max = 0; + dev->rdev.stats.rqt.fail = 0; + dev->rdev.stats.ocqp.max = 0; + dev->rdev.stats.ocqp.fail = 0; + dev->rdev.stats.db_full = 0; + dev->rdev.stats.db_empty = 0; + dev->rdev.stats.db_drop = 0; + dev->rdev.stats.db_state_transitions = 0; + dev->rdev.stats.tcam_full = 0; + dev->rdev.stats.act_ofld_conn_fails = 0; + dev->rdev.stats.pas_ofld_conn_fails = 0; + mutex_unlock(&dev->rdev.stats.lock); + return count; +} + +static const struct file_operations stats_debugfs_fops = { + .owner = THIS_MODULE, + .open = stats_open, + .release = single_release, + .read = seq_read, + .llseek = seq_lseek, + .write = stats_clear, +}; + +static int dump_ep(int id, void *p, void *data) +{ + struct c4iw_ep *ep = p; + struct c4iw_debugfs_data *epd = data; + int space; + int cc; + + space = epd->bufsize - epd->pos - 1; + if (space == 0) + return 1; + + if (ep->com.local_addr.ss_family == AF_INET) { + struct sockaddr_in *lsin = (struct sockaddr_in *) + &ep->com.local_addr; + struct sockaddr_in *rsin = (struct sockaddr_in *) + &ep->com.remote_addr; + struct sockaddr_in *mapped_lsin = (struct sockaddr_in *) + &ep->com.mapped_local_addr; + struct sockaddr_in *mapped_rsin = (struct sockaddr_in *) + &ep->com.mapped_remote_addr; + + cc = snprintf(epd->buf + epd->pos, space, + "ep %p cm_id %p qp %p state %d flags 0x%lx " + "history 0x%lx hwtid %d atid %d " + "%pI4:%d/%d <-> %pI4:%d/%d\n", + ep, ep->com.cm_id, ep->com.qp, + (int)ep->com.state, ep->com.flags, + ep->com.history, ep->hwtid, ep->atid, + &lsin->sin_addr, ntohs(lsin->sin_port), + ntohs(mapped_lsin->sin_port), + &rsin->sin_addr, ntohs(rsin->sin_port), + ntohs(mapped_rsin->sin_port)); + } else { + struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *) + &ep->com.local_addr; + struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *) + &ep->com.remote_addr; + struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *) + &ep->com.mapped_local_addr; + struct sockaddr_in6 *mapped_rsin6 = (struct sockaddr_in6 *) + &ep->com.mapped_remote_addr; + + cc = snprintf(epd->buf + epd->pos, space, + "ep %p cm_id %p qp %p state %d flags 0x%lx " + "history 0x%lx hwtid %d atid %d " + "%pI6:%d/%d <-> %pI6:%d/%d\n", + ep, ep->com.cm_id, ep->com.qp, + (int)ep->com.state, ep->com.flags, + ep->com.history, ep->hwtid, ep->atid, + &lsin6->sin6_addr, ntohs(lsin6->sin6_port), + ntohs(mapped_lsin6->sin6_port), + &rsin6->sin6_addr, ntohs(rsin6->sin6_port), + ntohs(mapped_rsin6->sin6_port)); + } + if (cc < space) + epd->pos += cc; + return 0; +} + +static int dump_listen_ep(int id, void *p, void *data) +{ + struct c4iw_listen_ep *ep = p; + struct c4iw_debugfs_data *epd = data; + int space; + int cc; + + space = epd->bufsize - epd->pos - 1; + if (space == 0) + return 1; + + if (ep->com.local_addr.ss_family == AF_INET) { + struct sockaddr_in *lsin = (struct sockaddr_in *) + &ep->com.local_addr; + struct sockaddr_in *mapped_lsin = (struct sockaddr_in *) + &ep->com.mapped_local_addr; + + cc = snprintf(epd->buf + epd->pos, space, + "ep %p cm_id %p state %d flags 0x%lx stid %d " + "backlog %d %pI4:%d/%d\n", + ep, ep->com.cm_id, (int)ep->com.state, + ep->com.flags, ep->stid, ep->backlog, + &lsin->sin_addr, ntohs(lsin->sin_port), + ntohs(mapped_lsin->sin_port)); + } else { + struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *) + &ep->com.local_addr; + struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *) + &ep->com.mapped_local_addr; + + cc = snprintf(epd->buf + epd->pos, space, + "ep %p cm_id %p state %d flags 0x%lx stid %d " + "backlog %d %pI6:%d/%d\n", + ep, ep->com.cm_id, (int)ep->com.state, + ep->com.flags, ep->stid, ep->backlog, + &lsin6->sin6_addr, ntohs(lsin6->sin6_port), + ntohs(mapped_lsin6->sin6_port)); + } + if (cc < space) + epd->pos += cc; + return 0; +} + +static int ep_release(struct inode *inode, struct file *file) +{ + struct c4iw_debugfs_data *epd = file->private_data; + if (!epd) { + pr_info("%s null qpd?\n", __func__); + return 0; + } + vfree(epd->buf); + kfree(epd); + return 0; +} + +static int ep_open(struct inode *inode, struct file *file) +{ + struct c4iw_debugfs_data *epd; + int ret = 0; + int count = 1; + + epd = kmalloc(sizeof(*epd), GFP_KERNEL); + if (!epd) { + ret = -ENOMEM; + goto out; + } + epd->devp = inode->i_private; + epd->pos = 0; + + spin_lock_irq(&epd->devp->lock); + idr_for_each(&epd->devp->hwtid_idr, count_idrs, &count); + idr_for_each(&epd->devp->atid_idr, count_idrs, &count); + idr_for_each(&epd->devp->stid_idr, count_idrs, &count); + spin_unlock_irq(&epd->devp->lock); + + epd->bufsize = count * 160; + epd->buf = vmalloc(epd->bufsize); + if (!epd->buf) { + ret = -ENOMEM; + goto err1; + } + + spin_lock_irq(&epd->devp->lock); + idr_for_each(&epd->devp->hwtid_idr, dump_ep, epd); + idr_for_each(&epd->devp->atid_idr, dump_ep, epd); + idr_for_each(&epd->devp->stid_idr, dump_listen_ep, epd); + spin_unlock_irq(&epd->devp->lock); + + file->private_data = epd; + goto out; +err1: + kfree(epd); +out: + return ret; +} + +static const struct file_operations ep_debugfs_fops = { + .owner = THIS_MODULE, + .open = ep_open, + .release = ep_release, + .read = debugfs_read, +}; + +static int setup_debugfs(struct c4iw_dev *devp) +{ + struct dentry *de; + + if (!devp->debugfs_root) + return -1; + + de = debugfs_create_file("qps", S_IWUSR, devp->debugfs_root, + (void *)devp, &qp_debugfs_fops); + if (de && de->d_inode) + de->d_inode->i_size = 4096; + + de = debugfs_create_file("stags", S_IWUSR, devp->debugfs_root, + (void *)devp, &stag_debugfs_fops); + if (de && de->d_inode) + de->d_inode->i_size = 4096; + + de = debugfs_create_file("stats", S_IWUSR, devp->debugfs_root, + (void *)devp, &stats_debugfs_fops); + if (de && de->d_inode) + de->d_inode->i_size = 4096; + + de = debugfs_create_file("eps", S_IWUSR, devp->debugfs_root, + (void *)devp, &ep_debugfs_fops); + if (de && de->d_inode) + de->d_inode->i_size = 4096; + + return 0; +} + +void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev, + struct c4iw_dev_ucontext *uctx) +{ + struct list_head *pos, *nxt; + struct c4iw_qid_list *entry; + + mutex_lock(&uctx->lock); + list_for_each_safe(pos, nxt, &uctx->qpids) { + entry = list_entry(pos, struct c4iw_qid_list, entry); + list_del_init(&entry->entry); + if (!(entry->qid & rdev->qpmask)) { + c4iw_put_resource(&rdev->resource.qid_table, + entry->qid); + mutex_lock(&rdev->stats.lock); + rdev->stats.qid.cur -= rdev->qpmask + 1; + mutex_unlock(&rdev->stats.lock); + } + kfree(entry); + } + + list_for_each_safe(pos, nxt, &uctx->qpids) { + entry = list_entry(pos, struct c4iw_qid_list, entry); + list_del_init(&entry->entry); + kfree(entry); + } + mutex_unlock(&uctx->lock); +} + +void c4iw_init_dev_ucontext(struct c4iw_rdev *rdev, + struct c4iw_dev_ucontext *uctx) +{ + INIT_LIST_HEAD(&uctx->qpids); + INIT_LIST_HEAD(&uctx->cqids); + mutex_init(&uctx->lock); +} + +/* Caller takes care of locking if needed */ +static int c4iw_rdev_open(struct c4iw_rdev *rdev) +{ + int err; + + c4iw_init_dev_ucontext(rdev, &rdev->uctx); + + /* + * qpshift is the number of bits to shift the qpid left in order + * to get the correct address of the doorbell for that qp. + */ + rdev->qpshift = PAGE_SHIFT - ilog2(rdev->lldi.udb_density); + rdev->qpmask = rdev->lldi.udb_density - 1; + rdev->cqshift = PAGE_SHIFT - ilog2(rdev->lldi.ucq_density); + rdev->cqmask = rdev->lldi.ucq_density - 1; + PDBG("%s dev %s stag start 0x%0x size 0x%0x num stags %d " + "pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x " + "qp qid start %u size %u cq qid start %u size %u\n", + __func__, pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start, + rdev->lldi.vr->stag.size, c4iw_num_stags(rdev), + rdev->lldi.vr->pbl.start, + rdev->lldi.vr->pbl.size, rdev->lldi.vr->rq.start, + rdev->lldi.vr->rq.size, + rdev->lldi.vr->qp.start, + rdev->lldi.vr->qp.size, + rdev->lldi.vr->cq.start, + rdev->lldi.vr->cq.size); + PDBG("udb len 0x%x udb base %llx db_reg %p gts_reg %p qpshift %lu " + "qpmask 0x%x cqshift %lu cqmask 0x%x\n", + (unsigned)pci_resource_len(rdev->lldi.pdev, 2), + (u64)pci_resource_start(rdev->lldi.pdev, 2), + rdev->lldi.db_reg, + rdev->lldi.gts_reg, + rdev->qpshift, rdev->qpmask, + rdev->cqshift, rdev->cqmask); + + if (c4iw_num_stags(rdev) == 0) { + err = -EINVAL; + goto err1; + } + + rdev->stats.pd.total = T4_MAX_NUM_PD; + rdev->stats.stag.total = rdev->lldi.vr->stag.size; + rdev->stats.pbl.total = rdev->lldi.vr->pbl.size; + rdev->stats.rqt.total = rdev->lldi.vr->rq.size; + rdev->stats.ocqp.total = rdev->lldi.vr->ocq.size; + rdev->stats.qid.total = rdev->lldi.vr->qp.size; + + err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD); + if (err) { + printk(KERN_ERR MOD "error %d initializing resources\n", err); + goto err1; + } + err = c4iw_pblpool_create(rdev); + if (err) { + printk(KERN_ERR MOD "error %d initializing pbl pool\n", err); + goto err2; + } + err = c4iw_rqtpool_create(rdev); + if (err) { + printk(KERN_ERR MOD "error %d initializing rqt pool\n", err); + goto err3; + } + err = c4iw_ocqp_pool_create(rdev); + if (err) { + printk(KERN_ERR MOD "error %d initializing ocqp pool\n", err); + goto err4; + } + rdev->status_page = (struct t4_dev_status_page *) + __get_free_page(GFP_KERNEL); + if (!rdev->status_page) { + pr_err(MOD "error allocating status page\n"); + goto err4; + } + rdev->status_page->db_off = 0; + return 0; +err4: + c4iw_rqtpool_destroy(rdev); +err3: + c4iw_pblpool_destroy(rdev); +err2: + c4iw_destroy_resource(&rdev->resource); +err1: + return err; +} + +static void c4iw_rdev_close(struct c4iw_rdev *rdev) +{ + free_page((unsigned long)rdev->status_page); + c4iw_pblpool_destroy(rdev); + c4iw_rqtpool_destroy(rdev); + c4iw_destroy_resource(&rdev->resource); +} + +static void c4iw_dealloc(struct uld_ctx *ctx) +{ + c4iw_rdev_close(&ctx->dev->rdev); + idr_destroy(&ctx->dev->cqidr); + idr_destroy(&ctx->dev->qpidr); + idr_destroy(&ctx->dev->mmidr); + idr_destroy(&ctx->dev->hwtid_idr); + idr_destroy(&ctx->dev->stid_idr); + idr_destroy(&ctx->dev->atid_idr); + if (ctx->dev->rdev.bar2_kva) + iounmap(ctx->dev->rdev.bar2_kva); + if (ctx->dev->rdev.oc_mw_kva) + iounmap(ctx->dev->rdev.oc_mw_kva); + ib_dealloc_device(&ctx->dev->ibdev); + ctx->dev = NULL; +} + +static void c4iw_remove(struct uld_ctx *ctx) +{ + PDBG("%s c4iw_dev %p\n", __func__, ctx->dev); + c4iw_unregister_device(ctx->dev); + c4iw_dealloc(ctx); +} + +static int rdma_supported(const struct cxgb4_lld_info *infop) +{ + return infop->vr->stag.size > 0 && infop->vr->pbl.size > 0 && + infop->vr->rq.size > 0 && infop->vr->qp.size > 0 && + infop->vr->cq.size > 0; +} + +static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) +{ + struct c4iw_dev *devp; + int ret; + + if (!rdma_supported(infop)) { + printk(KERN_INFO MOD "%s: RDMA not supported on this device.\n", + pci_name(infop->pdev)); + return ERR_PTR(-ENOSYS); + } + if (!ocqp_supported(infop)) + pr_info("%s: On-Chip Queues not supported on this device.\n", + pci_name(infop->pdev)); + + devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp)); + if (!devp) { + printk(KERN_ERR MOD "Cannot allocate ib device\n"); + return ERR_PTR(-ENOMEM); + } + devp->rdev.lldi = *infop; + + /* + * For T5 devices, we map all of BAR2 with WC. + * For T4 devices with onchip qp mem, we map only that part + * of BAR2 with WC. + */ + devp->rdev.bar2_pa = pci_resource_start(devp->rdev.lldi.pdev, 2); + if (is_t5(devp->rdev.lldi.adapter_type)) { + devp->rdev.bar2_kva = ioremap_wc(devp->rdev.bar2_pa, + pci_resource_len(devp->rdev.lldi.pdev, 2)); + if (!devp->rdev.bar2_kva) { + pr_err(MOD "Unable to ioremap BAR2\n"); + ib_dealloc_device(&devp->ibdev); + return ERR_PTR(-EINVAL); + } + } else if (ocqp_supported(infop)) { + devp->rdev.oc_mw_pa = + pci_resource_start(devp->rdev.lldi.pdev, 2) + + pci_resource_len(devp->rdev.lldi.pdev, 2) - + roundup_pow_of_two(devp->rdev.lldi.vr->ocq.size); + devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa, + devp->rdev.lldi.vr->ocq.size); + if (!devp->rdev.oc_mw_kva) { + pr_err(MOD "Unable to ioremap onchip mem\n"); + ib_dealloc_device(&devp->ibdev); + return ERR_PTR(-EINVAL); + } + } + + PDBG(KERN_INFO MOD "ocq memory: " + "hw_start 0x%x size %u mw_pa 0x%lx mw_kva %p\n", + devp->rdev.lldi.vr->ocq.start, devp->rdev.lldi.vr->ocq.size, + devp->rdev.oc_mw_pa, devp->rdev.oc_mw_kva); + + ret = c4iw_rdev_open(&devp->rdev); + if (ret) { + printk(KERN_ERR MOD "Unable to open CXIO rdev err %d\n", ret); + ib_dealloc_device(&devp->ibdev); + return ERR_PTR(ret); + } + + idr_init(&devp->cqidr); + idr_init(&devp->qpidr); + idr_init(&devp->mmidr); + idr_init(&devp->hwtid_idr); + idr_init(&devp->stid_idr); + idr_init(&devp->atid_idr); + spin_lock_init(&devp->lock); + mutex_init(&devp->rdev.stats.lock); + mutex_init(&devp->db_mutex); + INIT_LIST_HEAD(&devp->db_fc_list); + + if (c4iw_debugfs_root) { + devp->debugfs_root = debugfs_create_dir( + pci_name(devp->rdev.lldi.pdev), + c4iw_debugfs_root); + setup_debugfs(devp); + } + + + return devp; +} + +static void *c4iw_uld_add(const struct cxgb4_lld_info *infop) +{ + struct uld_ctx *ctx; + static int vers_printed; + int i; + + if (!vers_printed++) + pr_info("Chelsio T4/T5 RDMA Driver - version %s\n", + DRV_VERSION); + + ctx = kzalloc(sizeof *ctx, GFP_KERNEL); + if (!ctx) { + ctx = ERR_PTR(-ENOMEM); + goto out; + } + ctx->lldi = *infop; + + PDBG("%s found device %s nchan %u nrxq %u ntxq %u nports %u\n", + __func__, pci_name(ctx->lldi.pdev), + ctx->lldi.nchan, ctx->lldi.nrxq, + ctx->lldi.ntxq, ctx->lldi.nports); + + mutex_lock(&dev_mutex); + list_add_tail(&ctx->entry, &uld_ctx_list); + mutex_unlock(&dev_mutex); + + for (i = 0; i < ctx->lldi.nrxq; i++) + PDBG("rxqid[%u] %u\n", i, ctx->lldi.rxq_ids[i]); +out: + return ctx; +} + +static inline struct sk_buff *copy_gl_to_skb_pkt(const struct pkt_gl *gl, + const __be64 *rsp, + u32 pktshift) +{ + struct sk_buff *skb; + + /* + * Allocate space for cpl_pass_accept_req which will be synthesized by + * driver. Once the driver synthesizes the request the skb will go + * through the regular cpl_pass_accept_req processing. + * The math here assumes sizeof cpl_pass_accept_req >= sizeof + * cpl_rx_pkt. + */ + skb = alloc_skb(gl->tot_len + sizeof(struct cpl_pass_accept_req) + + sizeof(struct rss_header) - pktshift, GFP_ATOMIC); + if (unlikely(!skb)) + return NULL; + + __skb_put(skb, gl->tot_len + sizeof(struct cpl_pass_accept_req) + + sizeof(struct rss_header) - pktshift); + + /* + * This skb will contain: + * rss_header from the rspq descriptor (1 flit) + * cpl_rx_pkt struct from the rspq descriptor (2 flits) + * space for the difference between the size of an + * rx_pkt and pass_accept_req cpl (1 flit) + * the packet data from the gl + */ + skb_copy_to_linear_data(skb, rsp, sizeof(struct cpl_pass_accept_req) + + sizeof(struct rss_header)); + skb_copy_to_linear_data_offset(skb, sizeof(struct rss_header) + + sizeof(struct cpl_pass_accept_req), + gl->va + pktshift, + gl->tot_len - pktshift); + return skb; +} + +static inline int recv_rx_pkt(struct c4iw_dev *dev, const struct pkt_gl *gl, + const __be64 *rsp) +{ + unsigned int opcode = *(u8 *)rsp; + struct sk_buff *skb; + + if (opcode != CPL_RX_PKT) + goto out; + + skb = copy_gl_to_skb_pkt(gl , rsp, dev->rdev.lldi.sge_pktshift); + if (skb == NULL) + goto out; + + if (c4iw_handlers[opcode] == NULL) { + pr_info("%s no handler opcode 0x%x...\n", __func__, + opcode); + kfree_skb(skb); + goto out; + } + c4iw_handlers[opcode](dev, skb); + return 1; +out: + return 0; +} + +static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp, + const struct pkt_gl *gl) +{ + struct uld_ctx *ctx = handle; + struct c4iw_dev *dev = ctx->dev; + struct sk_buff *skb; + u8 opcode; + + if (gl == NULL) { + /* omit RSS and rsp_ctrl at end of descriptor */ + unsigned int len = 64 - sizeof(struct rsp_ctrl) - 8; + + skb = alloc_skb(256, GFP_ATOMIC); + if (!skb) + goto nomem; + __skb_put(skb, len); + skb_copy_to_linear_data(skb, &rsp[1], len); + } else if (gl == CXGB4_MSG_AN) { + const struct rsp_ctrl *rc = (void *)rsp; + + u32 qid = be32_to_cpu(rc->pldbuflen_qid); + c4iw_ev_handler(dev, qid); + return 0; + } else if (unlikely(*(u8 *)rsp != *(u8 *)gl->va)) { + if (recv_rx_pkt(dev, gl, rsp)) + return 0; + + pr_info("%s: unexpected FL contents at %p, " \ + "RSS %#llx, FL %#llx, len %u\n", + pci_name(ctx->lldi.pdev), gl->va, + (unsigned long long)be64_to_cpu(*rsp), + (unsigned long long)be64_to_cpu( + *(__force __be64 *)gl->va), + gl->tot_len); + + return 0; + } else { + skb = cxgb4_pktgl_to_skb(gl, 128, 128); + if (unlikely(!skb)) + goto nomem; + } + + opcode = *(u8 *)rsp; + if (c4iw_handlers[opcode]) { + c4iw_handlers[opcode](dev, skb); + } else { + pr_info("%s no handler opcode 0x%x...\n", __func__, + opcode); + kfree_skb(skb); + } + + return 0; +nomem: + return -1; +} + +static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state) +{ + struct uld_ctx *ctx = handle; + + PDBG("%s new_state %u\n", __func__, new_state); + switch (new_state) { + case CXGB4_STATE_UP: + printk(KERN_INFO MOD "%s: Up\n", pci_name(ctx->lldi.pdev)); + if (!ctx->dev) { + int ret; + + ctx->dev = c4iw_alloc(&ctx->lldi); + if (IS_ERR(ctx->dev)) { + printk(KERN_ERR MOD + "%s: initialization failed: %ld\n", + pci_name(ctx->lldi.pdev), + PTR_ERR(ctx->dev)); + ctx->dev = NULL; + break; + } + ret = c4iw_register_device(ctx->dev); + if (ret) { + printk(KERN_ERR MOD + "%s: RDMA registration failed: %d\n", + pci_name(ctx->lldi.pdev), ret); + c4iw_dealloc(ctx); + } + } + break; + case CXGB4_STATE_DOWN: + printk(KERN_INFO MOD "%s: Down\n", + pci_name(ctx->lldi.pdev)); + if (ctx->dev) + c4iw_remove(ctx); + break; + case CXGB4_STATE_START_RECOVERY: + printk(KERN_INFO MOD "%s: Fatal Error\n", + pci_name(ctx->lldi.pdev)); + if (ctx->dev) { + struct ib_event event; + + ctx->dev->rdev.flags |= T4_FATAL_ERROR; + memset(&event, 0, sizeof event); + event.event = IB_EVENT_DEVICE_FATAL; + event.device = &ctx->dev->ibdev; + ib_dispatch_event(&event); + c4iw_remove(ctx); + } + break; + case CXGB4_STATE_DETACH: + printk(KERN_INFO MOD "%s: Detach\n", + pci_name(ctx->lldi.pdev)); + if (ctx->dev) + c4iw_remove(ctx); + break; + } + return 0; +} + +static int disable_qp_db(int id, void *p, void *data) +{ + struct c4iw_qp *qp = p; + + t4_disable_wq_db(&qp->wq); + return 0; +} + +static void stop_queues(struct uld_ctx *ctx) +{ + unsigned long flags; + + spin_lock_irqsave(&ctx->dev->lock, flags); + ctx->dev->rdev.stats.db_state_transitions++; + ctx->dev->db_state = STOPPED; + if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED) + idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL); + else + ctx->dev->rdev.status_page->db_off = 1; + spin_unlock_irqrestore(&ctx->dev->lock, flags); +} + +static int enable_qp_db(int id, void *p, void *data) +{ + struct c4iw_qp *qp = p; + + t4_enable_wq_db(&qp->wq); + return 0; +} + +static void resume_rc_qp(struct c4iw_qp *qp) +{ + spin_lock(&qp->lock); + t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc, + is_t5(qp->rhp->rdev.lldi.adapter_type), NULL); + qp->wq.sq.wq_pidx_inc = 0; + t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc, + is_t5(qp->rhp->rdev.lldi.adapter_type), NULL); + qp->wq.rq.wq_pidx_inc = 0; + spin_unlock(&qp->lock); +} + +static void resume_a_chunk(struct uld_ctx *ctx) +{ + int i; + struct c4iw_qp *qp; + + for (i = 0; i < DB_FC_RESUME_SIZE; i++) { + qp = list_first_entry(&ctx->dev->db_fc_list, struct c4iw_qp, + db_fc_entry); + list_del_init(&qp->db_fc_entry); + resume_rc_qp(qp); + if (list_empty(&ctx->dev->db_fc_list)) + break; + } +} + +static void resume_queues(struct uld_ctx *ctx) +{ + spin_lock_irq(&ctx->dev->lock); + if (ctx->dev->db_state != STOPPED) + goto out; + ctx->dev->db_state = FLOW_CONTROL; + while (1) { + if (list_empty(&ctx->dev->db_fc_list)) { + WARN_ON(ctx->dev->db_state != FLOW_CONTROL); + ctx->dev->db_state = NORMAL; + ctx->dev->rdev.stats.db_state_transitions++; + if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED) { + idr_for_each(&ctx->dev->qpidr, enable_qp_db, + NULL); + } else { + ctx->dev->rdev.status_page->db_off = 0; + } + break; + } else { + if (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) + < (ctx->dev->rdev.lldi.dbfifo_int_thresh << + DB_FC_DRAIN_THRESH)) { + resume_a_chunk(ctx); + } + if (!list_empty(&ctx->dev->db_fc_list)) { + spin_unlock_irq(&ctx->dev->lock); + if (DB_FC_RESUME_DELAY) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(DB_FC_RESUME_DELAY); + } + spin_lock_irq(&ctx->dev->lock); + if (ctx->dev->db_state != FLOW_CONTROL) + break; + } + } + } +out: + if (ctx->dev->db_state != NORMAL) + ctx->dev->rdev.stats.db_fc_interruptions++; + spin_unlock_irq(&ctx->dev->lock); +} + +struct qp_list { + unsigned idx; + struct c4iw_qp **qps; +}; + +static int add_and_ref_qp(int id, void *p, void *data) +{ + struct qp_list *qp_listp = data; + struct c4iw_qp *qp = p; + + c4iw_qp_add_ref(&qp->ibqp); + qp_listp->qps[qp_listp->idx++] = qp; + return 0; +} + +static int count_qps(int id, void *p, void *data) +{ + unsigned *countp = data; + (*countp)++; + return 0; +} + +static void deref_qps(struct qp_list *qp_list) +{ + int idx; + + for (idx = 0; idx < qp_list->idx; idx++) + c4iw_qp_rem_ref(&qp_list->qps[idx]->ibqp); +} + +static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list) +{ + int idx; + int ret; + + for (idx = 0; idx < qp_list->idx; idx++) { + struct c4iw_qp *qp = qp_list->qps[idx]; + + spin_lock_irq(&qp->rhp->lock); + spin_lock(&qp->lock); + ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0], + qp->wq.sq.qid, + t4_sq_host_wq_pidx(&qp->wq), + t4_sq_wq_size(&qp->wq)); + if (ret) { + pr_err(KERN_ERR MOD "%s: Fatal error - " + "DB overflow recovery failed - " + "error syncing SQ qid %u\n", + pci_name(ctx->lldi.pdev), qp->wq.sq.qid); + spin_unlock(&qp->lock); + spin_unlock_irq(&qp->rhp->lock); + return; + } + qp->wq.sq.wq_pidx_inc = 0; + + ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0], + qp->wq.rq.qid, + t4_rq_host_wq_pidx(&qp->wq), + t4_rq_wq_size(&qp->wq)); + + if (ret) { + pr_err(KERN_ERR MOD "%s: Fatal error - " + "DB overflow recovery failed - " + "error syncing RQ qid %u\n", + pci_name(ctx->lldi.pdev), qp->wq.rq.qid); + spin_unlock(&qp->lock); + spin_unlock_irq(&qp->rhp->lock); + return; + } + qp->wq.rq.wq_pidx_inc = 0; + spin_unlock(&qp->lock); + spin_unlock_irq(&qp->rhp->lock); + + /* Wait for the dbfifo to drain */ + while (cxgb4_dbfifo_count(qp->rhp->rdev.lldi.ports[0], 1) > 0) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(usecs_to_jiffies(10)); + } + } +} + +static void recover_queues(struct uld_ctx *ctx) +{ + int count = 0; + struct qp_list qp_list; + int ret; + + /* slow everybody down */ + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(usecs_to_jiffies(1000)); + + /* flush the SGE contexts */ + ret = cxgb4_flush_eq_cache(ctx->dev->rdev.lldi.ports[0]); + if (ret) { + printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n", + pci_name(ctx->lldi.pdev)); + return; + } + + /* Count active queues so we can build a list of queues to recover */ + spin_lock_irq(&ctx->dev->lock); + WARN_ON(ctx->dev->db_state != STOPPED); + ctx->dev->db_state = RECOVERY; + idr_for_each(&ctx->dev->qpidr, count_qps, &count); + + qp_list.qps = kzalloc(count * sizeof *qp_list.qps, GFP_ATOMIC); + if (!qp_list.qps) { + printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n", + pci_name(ctx->lldi.pdev)); + spin_unlock_irq(&ctx->dev->lock); + return; + } + qp_list.idx = 0; + + /* add and ref each qp so it doesn't get freed */ + idr_for_each(&ctx->dev->qpidr, add_and_ref_qp, &qp_list); + + spin_unlock_irq(&ctx->dev->lock); + + /* now traverse the list in a safe context to recover the db state*/ + recover_lost_dbs(ctx, &qp_list); + + /* we're almost done! deref the qps and clean up */ + deref_qps(&qp_list); + kfree(qp_list.qps); + + spin_lock_irq(&ctx->dev->lock); + WARN_ON(ctx->dev->db_state != RECOVERY); + ctx->dev->db_state = STOPPED; + spin_unlock_irq(&ctx->dev->lock); +} + +static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...) +{ + struct uld_ctx *ctx = handle; + + switch (control) { + case CXGB4_CONTROL_DB_FULL: + stop_queues(ctx); + ctx->dev->rdev.stats.db_full++; + break; + case CXGB4_CONTROL_DB_EMPTY: + resume_queues(ctx); + mutex_lock(&ctx->dev->rdev.stats.lock); + ctx->dev->rdev.stats.db_empty++; + mutex_unlock(&ctx->dev->rdev.stats.lock); + break; + case CXGB4_CONTROL_DB_DROP: + recover_queues(ctx); + mutex_lock(&ctx->dev->rdev.stats.lock); + ctx->dev->rdev.stats.db_drop++; + mutex_unlock(&ctx->dev->rdev.stats.lock); + break; + default: + printk(KERN_WARNING MOD "%s: unknown control cmd %u\n", + pci_name(ctx->lldi.pdev), control); + break; + } + return 0; +} + +static struct cxgb4_uld_info c4iw_uld_info = { + .name = DRV_NAME, + .add = c4iw_uld_add, + .rx_handler = c4iw_uld_rx_handler, + .state_change = c4iw_uld_state_change, + .control = c4iw_uld_control, +}; + +static int __init c4iw_init_module(void) +{ + int err; + + err = c4iw_cm_init(); + if (err) + return err; + + c4iw_debugfs_root = debugfs_create_dir(DRV_NAME, NULL); + if (!c4iw_debugfs_root) + printk(KERN_WARNING MOD + "could not create debugfs entry, continuing\n"); + + if (ibnl_add_client(RDMA_NL_C4IW, RDMA_NL_IWPM_NUM_OPS, + c4iw_nl_cb_table)) + pr_err("%s[%u]: Failed to add netlink callback\n" + , __func__, __LINE__); + + err = iwpm_init(RDMA_NL_C4IW); + if (err) { + pr_err("port mapper initialization failed with %d\n", err); + ibnl_remove_client(RDMA_NL_C4IW); + c4iw_cm_term(); + debugfs_remove_recursive(c4iw_debugfs_root); + return err; + } + + cxgb4_register_uld(CXGB4_ULD_RDMA, &c4iw_uld_info); + + return 0; +} + +static void __exit c4iw_exit_module(void) +{ + struct uld_ctx *ctx, *tmp; + + mutex_lock(&dev_mutex); + list_for_each_entry_safe(ctx, tmp, &uld_ctx_list, entry) { + if (ctx->dev) + c4iw_remove(ctx); + kfree(ctx); + } + mutex_unlock(&dev_mutex); + cxgb4_unregister_uld(CXGB4_ULD_RDMA); + iwpm_exit(RDMA_NL_C4IW); + ibnl_remove_client(RDMA_NL_C4IW); + c4iw_cm_term(); + debugfs_remove_recursive(c4iw_debugfs_root); +} + +module_init(c4iw_init_module); +module_exit(c4iw_exit_module); diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c new file mode 100644 index 00000000000..d61d0a18f78 --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/ev.c @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/slab.h> +#include <linux/mman.h> +#include <net/sock.h> + +#include "iw_cxgb4.h" + +static void post_qp_event(struct c4iw_dev *dev, struct c4iw_cq *chp, + struct c4iw_qp *qhp, + struct t4_cqe *err_cqe, + enum ib_event_type ib_event) +{ + struct ib_event event; + struct c4iw_qp_attributes attrs; + unsigned long flag; + + printk(KERN_ERR MOD "AE qpid 0x%x opcode %d status 0x%x " + "type %d wrid.hi 0x%x wrid.lo 0x%x\n", + CQE_QPID(err_cqe), CQE_OPCODE(err_cqe), + CQE_STATUS(err_cqe), CQE_TYPE(err_cqe), + CQE_WRID_HI(err_cqe), CQE_WRID_LOW(err_cqe)); + + if (qhp->attr.state == C4IW_QP_STATE_RTS) { + attrs.next_state = C4IW_QP_STATE_TERMINATE; + c4iw_modify_qp(qhp->rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, + &attrs, 0); + } + + event.event = ib_event; + event.device = chp->ibcq.device; + if (ib_event == IB_EVENT_CQ_ERR) + event.element.cq = &chp->ibcq; + else + event.element.qp = &qhp->ibqp; + if (qhp->ibqp.event_handler) + (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context); + + spin_lock_irqsave(&chp->comp_handler_lock, flag); + (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); + spin_unlock_irqrestore(&chp->comp_handler_lock, flag); +} + +void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe) +{ + struct c4iw_cq *chp; + struct c4iw_qp *qhp; + u32 cqid; + + spin_lock_irq(&dev->lock); + qhp = get_qhp(dev, CQE_QPID(err_cqe)); + if (!qhp) { + printk(KERN_ERR MOD "BAD AE qpid 0x%x opcode %d " + "status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n", + CQE_QPID(err_cqe), + CQE_OPCODE(err_cqe), CQE_STATUS(err_cqe), + CQE_TYPE(err_cqe), CQE_WRID_HI(err_cqe), + CQE_WRID_LOW(err_cqe)); + spin_unlock_irq(&dev->lock); + goto out; + } + + if (SQ_TYPE(err_cqe)) + cqid = qhp->attr.scq; + else + cqid = qhp->attr.rcq; + chp = get_chp(dev, cqid); + if (!chp) { + printk(KERN_ERR MOD "BAD AE cqid 0x%x qpid 0x%x opcode %d " + "status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n", + cqid, CQE_QPID(err_cqe), + CQE_OPCODE(err_cqe), CQE_STATUS(err_cqe), + CQE_TYPE(err_cqe), CQE_WRID_HI(err_cqe), + CQE_WRID_LOW(err_cqe)); + spin_unlock_irq(&dev->lock); + goto out; + } + + c4iw_qp_add_ref(&qhp->ibqp); + atomic_inc(&chp->refcnt); + spin_unlock_irq(&dev->lock); + + /* Bad incoming write */ + if (RQ_TYPE(err_cqe) && + (CQE_OPCODE(err_cqe) == FW_RI_RDMA_WRITE)) { + post_qp_event(dev, chp, qhp, err_cqe, IB_EVENT_QP_REQ_ERR); + goto done; + } + + switch (CQE_STATUS(err_cqe)) { + + /* Completion Events */ + case T4_ERR_SUCCESS: + printk(KERN_ERR MOD "AE with status 0!\n"); + break; + + case T4_ERR_STAG: + case T4_ERR_PDID: + case T4_ERR_QPID: + case T4_ERR_ACCESS: + case T4_ERR_WRAP: + case T4_ERR_BOUND: + case T4_ERR_INVALIDATE_SHARED_MR: + case T4_ERR_INVALIDATE_MR_WITH_MW_BOUND: + post_qp_event(dev, chp, qhp, err_cqe, IB_EVENT_QP_ACCESS_ERR); + break; + + /* Device Fatal Errors */ + case T4_ERR_ECC: + case T4_ERR_ECC_PSTAG: + case T4_ERR_INTERNAL_ERR: + post_qp_event(dev, chp, qhp, err_cqe, IB_EVENT_DEVICE_FATAL); + break; + + /* QP Fatal Errors */ + case T4_ERR_OUT_OF_RQE: + case T4_ERR_PBL_ADDR_BOUND: + case T4_ERR_CRC: + case T4_ERR_MARKER: + case T4_ERR_PDU_LEN_ERR: + case T4_ERR_DDP_VERSION: + case T4_ERR_RDMA_VERSION: + case T4_ERR_OPCODE: + case T4_ERR_DDP_QUEUE_NUM: + case T4_ERR_MSN: + case T4_ERR_TBIT: + case T4_ERR_MO: + case T4_ERR_MSN_GAP: + case T4_ERR_MSN_RANGE: + case T4_ERR_RQE_ADDR_BOUND: + case T4_ERR_IRD_OVERFLOW: + post_qp_event(dev, chp, qhp, err_cqe, IB_EVENT_QP_FATAL); + break; + + default: + printk(KERN_ERR MOD "Unknown T4 status 0x%x QPID 0x%x\n", + CQE_STATUS(err_cqe), qhp->wq.sq.qid); + post_qp_event(dev, chp, qhp, err_cqe, IB_EVENT_QP_FATAL); + break; + } +done: + if (atomic_dec_and_test(&chp->refcnt)) + wake_up(&chp->wait); + c4iw_qp_rem_ref(&qhp->ibqp); +out: + return; +} + +int c4iw_ev_handler(struct c4iw_dev *dev, u32 qid) +{ + struct c4iw_cq *chp; + unsigned long flag; + + chp = get_chp(dev, qid); + if (chp) { + spin_lock_irqsave(&chp->comp_handler_lock, flag); + (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); + spin_unlock_irqrestore(&chp->comp_handler_lock, flag); + } else + PDBG("%s unknown cqid 0x%x\n", __func__, qid); + return 0; +} diff --git a/drivers/infiniband/hw/cxgb4/id_table.c b/drivers/infiniband/hw/cxgb4/id_table.c new file mode 100644 index 00000000000..0161ae6ad62 --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/id_table.c @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2011 Chelsio Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/kernel.h> +#include <linux/random.h> +#include "iw_cxgb4.h" + +#define RANDOM_SKIP 16 + +/* + * Trivial bitmap-based allocator. If the random flag is set, the + * allocator is designed to: + * - pseudo-randomize the id returned such that it is not trivially predictable. + * - avoid reuse of recently used id (at the expense of predictability) + */ +u32 c4iw_id_alloc(struct c4iw_id_table *alloc) +{ + unsigned long flags; + u32 obj; + + spin_lock_irqsave(&alloc->lock, flags); + + obj = find_next_zero_bit(alloc->table, alloc->max, alloc->last); + if (obj >= alloc->max) + obj = find_first_zero_bit(alloc->table, alloc->max); + + if (obj < alloc->max) { + if (alloc->flags & C4IW_ID_TABLE_F_RANDOM) + alloc->last += prandom_u32() % RANDOM_SKIP; + else + alloc->last = obj + 1; + if (alloc->last >= alloc->max) + alloc->last = 0; + set_bit(obj, alloc->table); + obj += alloc->start; + } else + obj = -1; + + spin_unlock_irqrestore(&alloc->lock, flags); + return obj; +} + +void c4iw_id_free(struct c4iw_id_table *alloc, u32 obj) +{ + unsigned long flags; + + obj -= alloc->start; + BUG_ON((int)obj < 0); + + spin_lock_irqsave(&alloc->lock, flags); + clear_bit(obj, alloc->table); + spin_unlock_irqrestore(&alloc->lock, flags); +} + +int c4iw_id_table_alloc(struct c4iw_id_table *alloc, u32 start, u32 num, + u32 reserved, u32 flags) +{ + int i; + + alloc->start = start; + alloc->flags = flags; + if (flags & C4IW_ID_TABLE_F_RANDOM) + alloc->last = prandom_u32() % RANDOM_SKIP; + else + alloc->last = 0; + alloc->max = num; + spin_lock_init(&alloc->lock); + alloc->table = kmalloc(BITS_TO_LONGS(num) * sizeof(long), + GFP_KERNEL); + if (!alloc->table) + return -ENOMEM; + + bitmap_zero(alloc->table, num); + if (!(alloc->flags & C4IW_ID_TABLE_F_EMPTY)) + for (i = 0; i < reserved; ++i) + set_bit(i, alloc->table); + + return 0; +} + +void c4iw_id_table_free(struct c4iw_id_table *alloc) +{ + kfree(alloc->table); +} diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h new file mode 100644 index 00000000000..361fff7a074 --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -0,0 +1,1000 @@ +/* + * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __IW_CXGB4_H__ +#define __IW_CXGB4_H__ + +#include <linux/mutex.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/idr.h> +#include <linux/completion.h> +#include <linux/netdevice.h> +#include <linux/sched.h> +#include <linux/pci.h> +#include <linux/dma-mapping.h> +#include <linux/inet.h> +#include <linux/wait.h> +#include <linux/kref.h> +#include <linux/timer.h> +#include <linux/io.h> + +#include <asm/byteorder.h> + +#include <net/net_namespace.h> + +#include <rdma/ib_verbs.h> +#include <rdma/iw_cm.h> +#include <rdma/rdma_netlink.h> +#include <rdma/iw_portmap.h> + +#include "cxgb4.h" +#include "cxgb4_uld.h" +#include "l2t.h" +#include "user.h" + +#define DRV_NAME "iw_cxgb4" +#define MOD DRV_NAME ":" + +extern int c4iw_debug; +#define PDBG(fmt, args...) \ +do { \ + if (c4iw_debug) \ + printk(MOD fmt, ## args); \ +} while (0) + +#include "t4.h" + +#define PBL_OFF(rdev_p, a) ((a) - (rdev_p)->lldi.vr->pbl.start) +#define RQT_OFF(rdev_p, a) ((a) - (rdev_p)->lldi.vr->rq.start) + +static inline void *cplhdr(struct sk_buff *skb) +{ + return skb->data; +} + +#define C4IW_ID_TABLE_F_RANDOM 1 /* Pseudo-randomize the id's returned */ +#define C4IW_ID_TABLE_F_EMPTY 2 /* Table is initially empty */ + +struct c4iw_id_table { + u32 flags; + u32 start; /* logical minimal id */ + u32 last; /* hint for find */ + u32 max; + spinlock_t lock; + unsigned long *table; +}; + +struct c4iw_resource { + struct c4iw_id_table tpt_table; + struct c4iw_id_table qid_table; + struct c4iw_id_table pdid_table; +}; + +struct c4iw_qid_list { + struct list_head entry; + u32 qid; +}; + +struct c4iw_dev_ucontext { + struct list_head qpids; + struct list_head cqids; + struct mutex lock; +}; + +enum c4iw_rdev_flags { + T4_FATAL_ERROR = (1<<0), + T4_STATUS_PAGE_DISABLED = (1<<1), +}; + +struct c4iw_stat { + u64 total; + u64 cur; + u64 max; + u64 fail; +}; + +struct c4iw_stats { + struct mutex lock; + struct c4iw_stat qid; + struct c4iw_stat pd; + struct c4iw_stat stag; + struct c4iw_stat pbl; + struct c4iw_stat rqt; + struct c4iw_stat ocqp; + u64 db_full; + u64 db_empty; + u64 db_drop; + u64 db_state_transitions; + u64 db_fc_interruptions; + u64 tcam_full; + u64 act_ofld_conn_fails; + u64 pas_ofld_conn_fails; +}; + +struct c4iw_rdev { + struct c4iw_resource resource; + unsigned long qpshift; + u32 qpmask; + unsigned long cqshift; + u32 cqmask; + struct c4iw_dev_ucontext uctx; + struct gen_pool *pbl_pool; + struct gen_pool *rqt_pool; + struct gen_pool *ocqp_pool; + u32 flags; + struct cxgb4_lld_info lldi; + unsigned long bar2_pa; + void __iomem *bar2_kva; + unsigned long oc_mw_pa; + void __iomem *oc_mw_kva; + struct c4iw_stats stats; + struct t4_dev_status_page *status_page; +}; + +static inline int c4iw_fatal_error(struct c4iw_rdev *rdev) +{ + return rdev->flags & T4_FATAL_ERROR; +} + +static inline int c4iw_num_stags(struct c4iw_rdev *rdev) +{ + return min((int)T4_MAX_NUM_STAG, (int)(rdev->lldi.vr->stag.size >> 5)); +} + +#define C4IW_WR_TO (30*HZ) + +struct c4iw_wr_wait { + struct completion completion; + int ret; +}; + +static inline void c4iw_init_wr_wait(struct c4iw_wr_wait *wr_waitp) +{ + wr_waitp->ret = 0; + init_completion(&wr_waitp->completion); +} + +static inline void c4iw_wake_up(struct c4iw_wr_wait *wr_waitp, int ret) +{ + wr_waitp->ret = ret; + complete(&wr_waitp->completion); +} + +static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev, + struct c4iw_wr_wait *wr_waitp, + u32 hwtid, u32 qpid, + const char *func) +{ + unsigned to = C4IW_WR_TO; + int ret; + + do { + ret = wait_for_completion_timeout(&wr_waitp->completion, to); + if (!ret) { + printk(KERN_ERR MOD "%s - Device %s not responding - " + "tid %u qpid %u\n", func, + pci_name(rdev->lldi.pdev), hwtid, qpid); + if (c4iw_fatal_error(rdev)) { + wr_waitp->ret = -EIO; + break; + } + to = to << 2; + } + } while (!ret); + if (wr_waitp->ret) + PDBG("%s: FW reply %d tid %u qpid %u\n", + pci_name(rdev->lldi.pdev), wr_waitp->ret, hwtid, qpid); + return wr_waitp->ret; +} + +enum db_state { + NORMAL = 0, + FLOW_CONTROL = 1, + RECOVERY = 2, + STOPPED = 3 +}; + +struct c4iw_dev { + struct ib_device ibdev; + struct c4iw_rdev rdev; + u32 device_cap_flags; + struct idr cqidr; + struct idr qpidr; + struct idr mmidr; + spinlock_t lock; + struct mutex db_mutex; + struct dentry *debugfs_root; + enum db_state db_state; + struct idr hwtid_idr; + struct idr atid_idr; + struct idr stid_idr; + struct list_head db_fc_list; +}; + +static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev) +{ + return container_of(ibdev, struct c4iw_dev, ibdev); +} + +static inline struct c4iw_dev *rdev_to_c4iw_dev(struct c4iw_rdev *rdev) +{ + return container_of(rdev, struct c4iw_dev, rdev); +} + +static inline struct c4iw_cq *get_chp(struct c4iw_dev *rhp, u32 cqid) +{ + return idr_find(&rhp->cqidr, cqid); +} + +static inline struct c4iw_qp *get_qhp(struct c4iw_dev *rhp, u32 qpid) +{ + return idr_find(&rhp->qpidr, qpid); +} + +static inline struct c4iw_mr *get_mhp(struct c4iw_dev *rhp, u32 mmid) +{ + return idr_find(&rhp->mmidr, mmid); +} + +static inline int _insert_handle(struct c4iw_dev *rhp, struct idr *idr, + void *handle, u32 id, int lock) +{ + int ret; + + if (lock) { + idr_preload(GFP_KERNEL); + spin_lock_irq(&rhp->lock); + } + + ret = idr_alloc(idr, handle, id, id + 1, GFP_ATOMIC); + + if (lock) { + spin_unlock_irq(&rhp->lock); + idr_preload_end(); + } + + BUG_ON(ret == -ENOSPC); + return ret < 0 ? ret : 0; +} + +static inline int insert_handle(struct c4iw_dev *rhp, struct idr *idr, + void *handle, u32 id) +{ + return _insert_handle(rhp, idr, handle, id, 1); +} + +static inline int insert_handle_nolock(struct c4iw_dev *rhp, struct idr *idr, + void *handle, u32 id) +{ + return _insert_handle(rhp, idr, handle, id, 0); +} + +static inline void _remove_handle(struct c4iw_dev *rhp, struct idr *idr, + u32 id, int lock) +{ + if (lock) + spin_lock_irq(&rhp->lock); + idr_remove(idr, id); + if (lock) + spin_unlock_irq(&rhp->lock); +} + +static inline void remove_handle(struct c4iw_dev *rhp, struct idr *idr, u32 id) +{ + _remove_handle(rhp, idr, id, 1); +} + +static inline void remove_handle_nolock(struct c4iw_dev *rhp, + struct idr *idr, u32 id) +{ + _remove_handle(rhp, idr, id, 0); +} + +struct c4iw_pd { + struct ib_pd ibpd; + u32 pdid; + struct c4iw_dev *rhp; +}; + +static inline struct c4iw_pd *to_c4iw_pd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct c4iw_pd, ibpd); +} + +struct tpt_attributes { + u64 len; + u64 va_fbo; + enum fw_ri_mem_perms perms; + u32 stag; + u32 pdid; + u32 qpid; + u32 pbl_addr; + u32 pbl_size; + u32 state:1; + u32 type:2; + u32 rsvd:1; + u32 remote_invaliate_disable:1; + u32 zbva:1; + u32 mw_bind_enable:1; + u32 page_size:5; +}; + +struct c4iw_mr { + struct ib_mr ibmr; + struct ib_umem *umem; + struct c4iw_dev *rhp; + u64 kva; + struct tpt_attributes attr; +}; + +static inline struct c4iw_mr *to_c4iw_mr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct c4iw_mr, ibmr); +} + +struct c4iw_mw { + struct ib_mw ibmw; + struct c4iw_dev *rhp; + u64 kva; + struct tpt_attributes attr; +}; + +static inline struct c4iw_mw *to_c4iw_mw(struct ib_mw *ibmw) +{ + return container_of(ibmw, struct c4iw_mw, ibmw); +} + +struct c4iw_fr_page_list { + struct ib_fast_reg_page_list ibpl; + DEFINE_DMA_UNMAP_ADDR(mapping); + dma_addr_t dma_addr; + struct c4iw_dev *dev; + int pll_len; +}; + +static inline struct c4iw_fr_page_list *to_c4iw_fr_page_list( + struct ib_fast_reg_page_list *ibpl) +{ + return container_of(ibpl, struct c4iw_fr_page_list, ibpl); +} + +struct c4iw_cq { + struct ib_cq ibcq; + struct c4iw_dev *rhp; + struct t4_cq cq; + spinlock_t lock; + spinlock_t comp_handler_lock; + atomic_t refcnt; + wait_queue_head_t wait; +}; + +static inline struct c4iw_cq *to_c4iw_cq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct c4iw_cq, ibcq); +} + +struct c4iw_mpa_attributes { + u8 initiator; + u8 recv_marker_enabled; + u8 xmit_marker_enabled; + u8 crc_enabled; + u8 enhanced_rdma_conn; + u8 version; + u8 p2p_type; +}; + +struct c4iw_qp_attributes { + u32 scq; + u32 rcq; + u32 sq_num_entries; + u32 rq_num_entries; + u32 sq_max_sges; + u32 sq_max_sges_rdma_write; + u32 rq_max_sges; + u32 state; + u8 enable_rdma_read; + u8 enable_rdma_write; + u8 enable_bind; + u8 enable_mmid0_fastreg; + u32 max_ord; + u32 max_ird; + u32 pd; + u32 next_state; + char terminate_buffer[52]; + u32 terminate_msg_len; + u8 is_terminate_local; + struct c4iw_mpa_attributes mpa_attr; + struct c4iw_ep *llp_stream_handle; + u8 layer_etype; + u8 ecode; + u16 sq_db_inc; + u16 rq_db_inc; + u8 send_term; +}; + +struct c4iw_qp { + struct ib_qp ibqp; + struct list_head db_fc_entry; + struct c4iw_dev *rhp; + struct c4iw_ep *ep; + struct c4iw_qp_attributes attr; + struct t4_wq wq; + spinlock_t lock; + struct mutex mutex; + atomic_t refcnt; + wait_queue_head_t wait; + struct timer_list timer; + int sq_sig_all; +}; + +static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct c4iw_qp, ibqp); +} + +struct c4iw_ucontext { + struct ib_ucontext ibucontext; + struct c4iw_dev_ucontext uctx; + u32 key; + spinlock_t mmap_lock; + struct list_head mmaps; +}; + +static inline struct c4iw_ucontext *to_c4iw_ucontext(struct ib_ucontext *c) +{ + return container_of(c, struct c4iw_ucontext, ibucontext); +} + +struct c4iw_mm_entry { + struct list_head entry; + u64 addr; + u32 key; + unsigned len; +}; + +static inline struct c4iw_mm_entry *remove_mmap(struct c4iw_ucontext *ucontext, + u32 key, unsigned len) +{ + struct list_head *pos, *nxt; + struct c4iw_mm_entry *mm; + + spin_lock(&ucontext->mmap_lock); + list_for_each_safe(pos, nxt, &ucontext->mmaps) { + + mm = list_entry(pos, struct c4iw_mm_entry, entry); + if (mm->key == key && mm->len == len) { + list_del_init(&mm->entry); + spin_unlock(&ucontext->mmap_lock); + PDBG("%s key 0x%x addr 0x%llx len %d\n", __func__, + key, (unsigned long long) mm->addr, mm->len); + return mm; + } + } + spin_unlock(&ucontext->mmap_lock); + return NULL; +} + +static inline void insert_mmap(struct c4iw_ucontext *ucontext, + struct c4iw_mm_entry *mm) +{ + spin_lock(&ucontext->mmap_lock); + PDBG("%s key 0x%x addr 0x%llx len %d\n", __func__, + mm->key, (unsigned long long) mm->addr, mm->len); + list_add_tail(&mm->entry, &ucontext->mmaps); + spin_unlock(&ucontext->mmap_lock); +} + +enum c4iw_qp_attr_mask { + C4IW_QP_ATTR_NEXT_STATE = 1 << 0, + C4IW_QP_ATTR_SQ_DB = 1<<1, + C4IW_QP_ATTR_RQ_DB = 1<<2, + C4IW_QP_ATTR_ENABLE_RDMA_READ = 1 << 7, + C4IW_QP_ATTR_ENABLE_RDMA_WRITE = 1 << 8, + C4IW_QP_ATTR_ENABLE_RDMA_BIND = 1 << 9, + C4IW_QP_ATTR_MAX_ORD = 1 << 11, + C4IW_QP_ATTR_MAX_IRD = 1 << 12, + C4IW_QP_ATTR_LLP_STREAM_HANDLE = 1 << 22, + C4IW_QP_ATTR_STREAM_MSG_BUFFER = 1 << 23, + C4IW_QP_ATTR_MPA_ATTR = 1 << 24, + C4IW_QP_ATTR_QP_CONTEXT_ACTIVATE = 1 << 25, + C4IW_QP_ATTR_VALID_MODIFY = (C4IW_QP_ATTR_ENABLE_RDMA_READ | + C4IW_QP_ATTR_ENABLE_RDMA_WRITE | + C4IW_QP_ATTR_MAX_ORD | + C4IW_QP_ATTR_MAX_IRD | + C4IW_QP_ATTR_LLP_STREAM_HANDLE | + C4IW_QP_ATTR_STREAM_MSG_BUFFER | + C4IW_QP_ATTR_MPA_ATTR | + C4IW_QP_ATTR_QP_CONTEXT_ACTIVATE) +}; + +int c4iw_modify_qp(struct c4iw_dev *rhp, + struct c4iw_qp *qhp, + enum c4iw_qp_attr_mask mask, + struct c4iw_qp_attributes *attrs, + int internal); + +enum c4iw_qp_state { + C4IW_QP_STATE_IDLE, + C4IW_QP_STATE_RTS, + C4IW_QP_STATE_ERROR, + C4IW_QP_STATE_TERMINATE, + C4IW_QP_STATE_CLOSING, + C4IW_QP_STATE_TOT +}; + +static inline int c4iw_convert_state(enum ib_qp_state ib_state) +{ + switch (ib_state) { + case IB_QPS_RESET: + case IB_QPS_INIT: + return C4IW_QP_STATE_IDLE; + case IB_QPS_RTS: + return C4IW_QP_STATE_RTS; + case IB_QPS_SQD: + return C4IW_QP_STATE_CLOSING; + case IB_QPS_SQE: + return C4IW_QP_STATE_TERMINATE; + case IB_QPS_ERR: + return C4IW_QP_STATE_ERROR; + default: + return -1; + } +} + +static inline int to_ib_qp_state(int c4iw_qp_state) +{ + switch (c4iw_qp_state) { + case C4IW_QP_STATE_IDLE: + return IB_QPS_INIT; + case C4IW_QP_STATE_RTS: + return IB_QPS_RTS; + case C4IW_QP_STATE_CLOSING: + return IB_QPS_SQD; + case C4IW_QP_STATE_TERMINATE: + return IB_QPS_SQE; + case C4IW_QP_STATE_ERROR: + return IB_QPS_ERR; + } + return IB_QPS_ERR; +} + +static inline u32 c4iw_ib_to_tpt_access(int a) +{ + return (a & IB_ACCESS_REMOTE_WRITE ? FW_RI_MEM_ACCESS_REM_WRITE : 0) | + (a & IB_ACCESS_REMOTE_READ ? FW_RI_MEM_ACCESS_REM_READ : 0) | + (a & IB_ACCESS_LOCAL_WRITE ? FW_RI_MEM_ACCESS_LOCAL_WRITE : 0) | + FW_RI_MEM_ACCESS_LOCAL_READ; +} + +static inline u32 c4iw_ib_to_tpt_bind_access(int acc) +{ + return (acc & IB_ACCESS_REMOTE_WRITE ? FW_RI_MEM_ACCESS_REM_WRITE : 0) | + (acc & IB_ACCESS_REMOTE_READ ? FW_RI_MEM_ACCESS_REM_READ : 0); +} + +enum c4iw_mmid_state { + C4IW_STAG_STATE_VALID, + C4IW_STAG_STATE_INVALID +}; + +#define C4IW_NODE_DESC "cxgb4 Chelsio Communications" + +#define MPA_KEY_REQ "MPA ID Req Frame" +#define MPA_KEY_REP "MPA ID Rep Frame" + +#define MPA_MAX_PRIVATE_DATA 256 +#define MPA_ENHANCED_RDMA_CONN 0x10 +#define MPA_REJECT 0x20 +#define MPA_CRC 0x40 +#define MPA_MARKERS 0x80 +#define MPA_FLAGS_MASK 0xE0 + +#define MPA_V2_PEER2PEER_MODEL 0x8000 +#define MPA_V2_ZERO_LEN_FPDU_RTR 0x4000 +#define MPA_V2_RDMA_WRITE_RTR 0x8000 +#define MPA_V2_RDMA_READ_RTR 0x4000 +#define MPA_V2_IRD_ORD_MASK 0x3FFF + +#define c4iw_put_ep(ep) { \ + PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \ + ep, atomic_read(&((ep)->kref.refcount))); \ + WARN_ON(atomic_read(&((ep)->kref.refcount)) < 1); \ + kref_put(&((ep)->kref), _c4iw_free_ep); \ +} + +#define c4iw_get_ep(ep) { \ + PDBG("get_ep (via %s:%u) ep %p, refcnt %d\n", __func__, __LINE__, \ + ep, atomic_read(&((ep)->kref.refcount))); \ + kref_get(&((ep)->kref)); \ +} +void _c4iw_free_ep(struct kref *kref); + +struct mpa_message { + u8 key[16]; + u8 flags; + u8 revision; + __be16 private_data_size; + u8 private_data[0]; +}; + +struct mpa_v2_conn_params { + __be16 ird; + __be16 ord; +}; + +struct terminate_message { + u8 layer_etype; + u8 ecode; + __be16 hdrct_rsvd; + u8 len_hdrs[0]; +}; + +#define TERM_MAX_LENGTH (sizeof(struct terminate_message) + 2 + 18 + 28) + +enum c4iw_layers_types { + LAYER_RDMAP = 0x00, + LAYER_DDP = 0x10, + LAYER_MPA = 0x20, + RDMAP_LOCAL_CATA = 0x00, + RDMAP_REMOTE_PROT = 0x01, + RDMAP_REMOTE_OP = 0x02, + DDP_LOCAL_CATA = 0x00, + DDP_TAGGED_ERR = 0x01, + DDP_UNTAGGED_ERR = 0x02, + DDP_LLP = 0x03 +}; + +enum c4iw_rdma_ecodes { + RDMAP_INV_STAG = 0x00, + RDMAP_BASE_BOUNDS = 0x01, + RDMAP_ACC_VIOL = 0x02, + RDMAP_STAG_NOT_ASSOC = 0x03, + RDMAP_TO_WRAP = 0x04, + RDMAP_INV_VERS = 0x05, + RDMAP_INV_OPCODE = 0x06, + RDMAP_STREAM_CATA = 0x07, + RDMAP_GLOBAL_CATA = 0x08, + RDMAP_CANT_INV_STAG = 0x09, + RDMAP_UNSPECIFIED = 0xff +}; + +enum c4iw_ddp_ecodes { + DDPT_INV_STAG = 0x00, + DDPT_BASE_BOUNDS = 0x01, + DDPT_STAG_NOT_ASSOC = 0x02, + DDPT_TO_WRAP = 0x03, + DDPT_INV_VERS = 0x04, + DDPU_INV_QN = 0x01, + DDPU_INV_MSN_NOBUF = 0x02, + DDPU_INV_MSN_RANGE = 0x03, + DDPU_INV_MO = 0x04, + DDPU_MSG_TOOBIG = 0x05, + DDPU_INV_VERS = 0x06 +}; + +enum c4iw_mpa_ecodes { + MPA_CRC_ERR = 0x02, + MPA_MARKER_ERR = 0x03, + MPA_LOCAL_CATA = 0x05, + MPA_INSUFF_IRD = 0x06, + MPA_NOMATCH_RTR = 0x07, +}; + +enum c4iw_ep_state { + IDLE = 0, + LISTEN, + CONNECTING, + MPA_REQ_WAIT, + MPA_REQ_SENT, + MPA_REQ_RCVD, + MPA_REP_SENT, + FPDU_MODE, + ABORTING, + CLOSING, + MORIBUND, + DEAD, +}; + +enum c4iw_ep_flags { + PEER_ABORT_IN_PROGRESS = 0, + ABORT_REQ_IN_PROGRESS = 1, + RELEASE_RESOURCES = 2, + CLOSE_SENT = 3, + TIMEOUT = 4, + QP_REFERENCED = 5, + RELEASE_MAPINFO = 6, +}; + +enum c4iw_ep_history { + ACT_OPEN_REQ = 0, + ACT_OFLD_CONN = 1, + ACT_OPEN_RPL = 2, + ACT_ESTAB = 3, + PASS_ACCEPT_REQ = 4, + PASS_ESTAB = 5, + ABORT_UPCALL = 6, + ESTAB_UPCALL = 7, + CLOSE_UPCALL = 8, + ULP_ACCEPT = 9, + ULP_REJECT = 10, + TIMEDOUT = 11, + PEER_ABORT = 12, + PEER_CLOSE = 13, + CONNREQ_UPCALL = 14, + ABORT_CONN = 15, + DISCONN_UPCALL = 16, + EP_DISC_CLOSE = 17, + EP_DISC_ABORT = 18, + CONN_RPL_UPCALL = 19, + ACT_RETRY_NOMEM = 20, + ACT_RETRY_INUSE = 21 +}; + +struct c4iw_ep_common { + struct iw_cm_id *cm_id; + struct c4iw_qp *qp; + struct c4iw_dev *dev; + enum c4iw_ep_state state; + struct kref kref; + struct mutex mutex; + struct sockaddr_storage local_addr; + struct sockaddr_storage remote_addr; + struct sockaddr_storage mapped_local_addr; + struct sockaddr_storage mapped_remote_addr; + struct c4iw_wr_wait wr_wait; + unsigned long flags; + unsigned long history; +}; + +struct c4iw_listen_ep { + struct c4iw_ep_common com; + unsigned int stid; + int backlog; +}; + +struct c4iw_ep { + struct c4iw_ep_common com; + struct c4iw_ep *parent_ep; + struct timer_list timer; + struct list_head entry; + unsigned int atid; + u32 hwtid; + u32 snd_seq; + u32 rcv_seq; + struct l2t_entry *l2t; + struct dst_entry *dst; + struct sk_buff *mpa_skb; + struct c4iw_mpa_attributes mpa_attr; + u8 mpa_pkt[sizeof(struct mpa_message) + MPA_MAX_PRIVATE_DATA]; + unsigned int mpa_pkt_len; + u32 ird; + u32 ord; + u32 smac_idx; + u32 tx_chan; + u32 mtu; + u16 mss; + u16 emss; + u16 plen; + u16 rss_qid; + u16 txq_idx; + u16 ctrlq_idx; + u8 tos; + u8 retry_with_mpa_v1; + u8 tried_with_mpa_v1; + unsigned int retry_count; + int snd_win; + int rcv_win; +}; + +static inline void print_addr(struct c4iw_ep_common *epc, const char *func, + const char *msg) +{ + +#define SINA(a) (&(((struct sockaddr_in *)(a))->sin_addr.s_addr)) +#define SINP(a) ntohs(((struct sockaddr_in *)(a))->sin_port) +#define SIN6A(a) (&(((struct sockaddr_in6 *)(a))->sin6_addr)) +#define SIN6P(a) ntohs(((struct sockaddr_in6 *)(a))->sin6_port) + + if (c4iw_debug) { + switch (epc->local_addr.ss_family) { + case AF_INET: + PDBG("%s %s %pI4:%u/%u <-> %pI4:%u/%u\n", + func, msg, SINA(&epc->local_addr), + SINP(&epc->local_addr), + SINP(&epc->mapped_local_addr), + SINA(&epc->remote_addr), + SINP(&epc->remote_addr), + SINP(&epc->mapped_remote_addr)); + break; + case AF_INET6: + PDBG("%s %s %pI6:%u/%u <-> %pI6:%u/%u\n", + func, msg, SIN6A(&epc->local_addr), + SIN6P(&epc->local_addr), + SIN6P(&epc->mapped_local_addr), + SIN6A(&epc->remote_addr), + SIN6P(&epc->remote_addr), + SIN6P(&epc->mapped_remote_addr)); + break; + default: + break; + } + } +#undef SINA +#undef SINP +#undef SIN6A +#undef SIN6P +} + +static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id) +{ + return cm_id->provider_data; +} + +static inline struct c4iw_listen_ep *to_listen_ep(struct iw_cm_id *cm_id) +{ + return cm_id->provider_data; +} + +static inline int compute_wscale(int win) +{ + int wscale = 0; + + while (wscale < 14 && (65535<<wscale) < win) + wscale++; + return wscale; +} + +static inline int ocqp_supported(const struct cxgb4_lld_info *infop) +{ +#if defined(__i386__) || defined(__x86_64__) || defined(CONFIG_PPC64) + return infop->vr->ocq.size > 0; +#else + return 0; +#endif +} + +u32 c4iw_id_alloc(struct c4iw_id_table *alloc); +void c4iw_id_free(struct c4iw_id_table *alloc, u32 obj); +int c4iw_id_table_alloc(struct c4iw_id_table *alloc, u32 start, u32 num, + u32 reserved, u32 flags); +void c4iw_id_table_free(struct c4iw_id_table *alloc); + +typedef int (*c4iw_handler_func)(struct c4iw_dev *dev, struct sk_buff *skb); + +int c4iw_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, + struct l2t_entry *l2t); +void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qpid, + struct c4iw_dev_ucontext *uctx); +u32 c4iw_get_resource(struct c4iw_id_table *id_table); +void c4iw_put_resource(struct c4iw_id_table *id_table, u32 entry); +int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid); +int c4iw_init_ctrl_qp(struct c4iw_rdev *rdev); +int c4iw_pblpool_create(struct c4iw_rdev *rdev); +int c4iw_rqtpool_create(struct c4iw_rdev *rdev); +int c4iw_ocqp_pool_create(struct c4iw_rdev *rdev); +void c4iw_pblpool_destroy(struct c4iw_rdev *rdev); +void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev); +void c4iw_ocqp_pool_destroy(struct c4iw_rdev *rdev); +void c4iw_destroy_resource(struct c4iw_resource *rscp); +int c4iw_destroy_ctrl_qp(struct c4iw_rdev *rdev); +int c4iw_register_device(struct c4iw_dev *dev); +void c4iw_unregister_device(struct c4iw_dev *dev); +int __init c4iw_cm_init(void); +void c4iw_cm_term(void); +void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev, + struct c4iw_dev_ucontext *uctx); +void c4iw_init_dev_ucontext(struct c4iw_rdev *rdev, + struct c4iw_dev_ucontext *uctx); +int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); +int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr); +int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); +int c4iw_bind_mw(struct ib_qp *qp, struct ib_mw *mw, + struct ib_mw_bind *mw_bind); +int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); +int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog); +int c4iw_destroy_listen(struct iw_cm_id *cm_id); +int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); +int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len); +void c4iw_qp_add_ref(struct ib_qp *qp); +void c4iw_qp_rem_ref(struct ib_qp *qp); +void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list *page_list); +struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl( + struct ib_device *device, + int page_list_len); +struct ib_mr *c4iw_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth); +int c4iw_dealloc_mw(struct ib_mw *mw); +struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); +struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, + u64 length, u64 virt, int acc, + struct ib_udata *udata); +struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc); +struct ib_mr *c4iw_register_phys_mem(struct ib_pd *pd, + struct ib_phys_buf *buffer_list, + int num_phys_buf, + int acc, + u64 *iova_start); +int c4iw_reregister_phys_mem(struct ib_mr *mr, + int mr_rereg_mask, + struct ib_pd *pd, + struct ib_phys_buf *buffer_list, + int num_phys_buf, + int acc, u64 *iova_start); +int c4iw_dereg_mr(struct ib_mr *ib_mr); +int c4iw_destroy_cq(struct ib_cq *ib_cq); +struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, + int vector, + struct ib_ucontext *ib_context, + struct ib_udata *udata); +int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata); +int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); +int c4iw_destroy_qp(struct ib_qp *ib_qp); +struct ib_qp *c4iw_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *attrs, + struct ib_udata *udata); +int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); +int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_qp_init_attr *init_attr); +struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn); +u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size); +void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size); +u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size); +void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size); +u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size); +void c4iw_ocqp_pool_free(struct c4iw_rdev *rdev, u32 addr, int size); +int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb); +void c4iw_flush_hw_cq(struct c4iw_cq *chp); +void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count); +int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp); +int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count); +int c4iw_flush_sq(struct c4iw_qp *qhp); +int c4iw_ev_handler(struct c4iw_dev *rnicp, u32 qid); +u16 c4iw_rqes_posted(struct c4iw_qp *qhp); +int c4iw_post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe); +u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx); +void c4iw_put_cqid(struct c4iw_rdev *rdev, u32 qid, + struct c4iw_dev_ucontext *uctx); +u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx); +void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qid, + struct c4iw_dev_ucontext *uctx); +void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe); + +extern struct cxgb4_client t4c_client; +extern c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS]; +extern int c4iw_max_read_depth; +extern int db_fc_threshold; +extern int db_coalescing_threshold; +extern int use_dsgl; + + +#endif diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c new file mode 100644 index 00000000000..ec7a2988a70 --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -0,0 +1,955 @@ +/* + * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <rdma/ib_umem.h> +#include <linux/atomic.h> + +#include "iw_cxgb4.h" + +int use_dsgl = 0; +module_param(use_dsgl, int, 0644); +MODULE_PARM_DESC(use_dsgl, "Use DSGL for PBL/FastReg (default=0)"); + +#define T4_ULPTX_MIN_IO 32 +#define C4IW_MAX_INLINE_SIZE 96 +#define T4_ULPTX_MAX_DMA 1024 +#define C4IW_INLINE_THRESHOLD 128 + +static int inline_threshold = C4IW_INLINE_THRESHOLD; +module_param(inline_threshold, int, 0644); +MODULE_PARM_DESC(inline_threshold, "inline vs dsgl threshold (default=128)"); + +static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr, + u32 len, dma_addr_t data, int wait) +{ + struct sk_buff *skb; + struct ulp_mem_io *req; + struct ulptx_sgl *sgl; + u8 wr_len; + int ret = 0; + struct c4iw_wr_wait wr_wait; + + addr &= 0x7FFFFFF; + + if (wait) + c4iw_init_wr_wait(&wr_wait); + wr_len = roundup(sizeof(*req) + sizeof(*sgl), 16); + + skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL); + if (!skb) + return -ENOMEM; + set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); + + req = (struct ulp_mem_io *)__skb_put(skb, wr_len); + memset(req, 0, wr_len); + INIT_ULPTX_WR(req, wr_len, 0, 0); + req->wr.wr_hi = cpu_to_be32(FW_WR_OP(FW_ULPTX_WR) | + (wait ? FW_WR_COMPL(1) : 0)); + req->wr.wr_lo = wait ? (__force __be64)(unsigned long) &wr_wait : 0L; + req->wr.wr_mid = cpu_to_be32(FW_WR_LEN16(DIV_ROUND_UP(wr_len, 16))); + req->cmd = cpu_to_be32(ULPTX_CMD(ULP_TX_MEM_WRITE)); + req->cmd |= cpu_to_be32(V_T5_ULP_MEMIO_ORDER(1)); + req->dlen = cpu_to_be32(ULP_MEMIO_DATA_LEN(len>>5)); + req->len16 = cpu_to_be32(DIV_ROUND_UP(wr_len-sizeof(req->wr), 16)); + req->lock_addr = cpu_to_be32(ULP_MEMIO_ADDR(addr)); + + sgl = (struct ulptx_sgl *)(req + 1); + sgl->cmd_nsge = cpu_to_be32(ULPTX_CMD(ULP_TX_SC_DSGL) | + ULPTX_NSGE(1)); + sgl->len0 = cpu_to_be32(len); + sgl->addr0 = cpu_to_be64(data); + + ret = c4iw_ofld_send(rdev, skb); + if (ret) + return ret; + if (wait) + ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__); + return ret; +} + +static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len, + void *data) +{ + struct sk_buff *skb; + struct ulp_mem_io *req; + struct ulptx_idata *sc; + u8 wr_len, *to_dp, *from_dp; + int copy_len, num_wqe, i, ret = 0; + struct c4iw_wr_wait wr_wait; + __be32 cmd = cpu_to_be32(ULPTX_CMD(ULP_TX_MEM_WRITE)); + + if (is_t4(rdev->lldi.adapter_type)) + cmd |= cpu_to_be32(ULP_MEMIO_ORDER(1)); + else + cmd |= cpu_to_be32(V_T5_ULP_MEMIO_IMM(1)); + + addr &= 0x7FFFFFF; + PDBG("%s addr 0x%x len %u\n", __func__, addr, len); + num_wqe = DIV_ROUND_UP(len, C4IW_MAX_INLINE_SIZE); + c4iw_init_wr_wait(&wr_wait); + for (i = 0; i < num_wqe; i++) { + + copy_len = len > C4IW_MAX_INLINE_SIZE ? C4IW_MAX_INLINE_SIZE : + len; + wr_len = roundup(sizeof *req + sizeof *sc + + roundup(copy_len, T4_ULPTX_MIN_IO), 16); + + skb = alloc_skb(wr_len, GFP_KERNEL); + if (!skb) + return -ENOMEM; + set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); + + req = (struct ulp_mem_io *)__skb_put(skb, wr_len); + memset(req, 0, wr_len); + INIT_ULPTX_WR(req, wr_len, 0, 0); + + if (i == (num_wqe-1)) { + req->wr.wr_hi = cpu_to_be32(FW_WR_OP(FW_ULPTX_WR) | + FW_WR_COMPL(1)); + req->wr.wr_lo = (__force __be64)(unsigned long) &wr_wait; + } else + req->wr.wr_hi = cpu_to_be32(FW_WR_OP(FW_ULPTX_WR)); + req->wr.wr_mid = cpu_to_be32( + FW_WR_LEN16(DIV_ROUND_UP(wr_len, 16))); + + req->cmd = cmd; + req->dlen = cpu_to_be32(ULP_MEMIO_DATA_LEN( + DIV_ROUND_UP(copy_len, T4_ULPTX_MIN_IO))); + req->len16 = cpu_to_be32(DIV_ROUND_UP(wr_len-sizeof(req->wr), + 16)); + req->lock_addr = cpu_to_be32(ULP_MEMIO_ADDR(addr + i * 3)); + + sc = (struct ulptx_idata *)(req + 1); + sc->cmd_more = cpu_to_be32(ULPTX_CMD(ULP_TX_SC_IMM)); + sc->len = cpu_to_be32(roundup(copy_len, T4_ULPTX_MIN_IO)); + + to_dp = (u8 *)(sc + 1); + from_dp = (u8 *)data + i * C4IW_MAX_INLINE_SIZE; + if (data) + memcpy(to_dp, from_dp, copy_len); + else + memset(to_dp, 0, copy_len); + if (copy_len % T4_ULPTX_MIN_IO) + memset(to_dp + copy_len, 0, T4_ULPTX_MIN_IO - + (copy_len % T4_ULPTX_MIN_IO)); + ret = c4iw_ofld_send(rdev, skb); + if (ret) + return ret; + len -= C4IW_MAX_INLINE_SIZE; + } + + ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__); + return ret; +} + +static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, void *data) +{ + u32 remain = len; + u32 dmalen; + int ret = 0; + dma_addr_t daddr; + dma_addr_t save; + + daddr = dma_map_single(&rdev->lldi.pdev->dev, data, len, DMA_TO_DEVICE); + if (dma_mapping_error(&rdev->lldi.pdev->dev, daddr)) + return -1; + save = daddr; + + while (remain > inline_threshold) { + if (remain < T4_ULPTX_MAX_DMA) { + if (remain & ~T4_ULPTX_MIN_IO) + dmalen = remain & ~(T4_ULPTX_MIN_IO-1); + else + dmalen = remain; + } else + dmalen = T4_ULPTX_MAX_DMA; + remain -= dmalen; + ret = _c4iw_write_mem_dma_aligned(rdev, addr, dmalen, daddr, + !remain); + if (ret) + goto out; + addr += dmalen >> 5; + data += dmalen; + daddr += dmalen; + } + if (remain) + ret = _c4iw_write_mem_inline(rdev, addr, remain, data); +out: + dma_unmap_single(&rdev->lldi.pdev->dev, save, len, DMA_TO_DEVICE); + return ret; +} + +/* + * write len bytes of data into addr (32B aligned address) + * If data is NULL, clear len byte of memory to zero. + */ +static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len, + void *data) +{ + if (is_t5(rdev->lldi.adapter_type) && use_dsgl) { + if (len > inline_threshold) { + if (_c4iw_write_mem_dma(rdev, addr, len, data)) { + printk_ratelimited(KERN_WARNING + "%s: dma map" + " failure (non fatal)\n", + pci_name(rdev->lldi.pdev)); + return _c4iw_write_mem_inline(rdev, addr, len, + data); + } else + return 0; + } else + return _c4iw_write_mem_inline(rdev, addr, len, data); + } else + return _c4iw_write_mem_inline(rdev, addr, len, data); +} + +/* + * Build and write a TPT entry. + * IN: stag key, pdid, perm, bind_enabled, zbva, to, len, page_size, + * pbl_size and pbl_addr + * OUT: stag index + */ +static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, + u32 *stag, u8 stag_state, u32 pdid, + enum fw_ri_stag_type type, enum fw_ri_mem_perms perm, + int bind_enabled, u32 zbva, u64 to, + u64 len, u8 page_size, u32 pbl_size, u32 pbl_addr) +{ + int err; + struct fw_ri_tpte tpt; + u32 stag_idx; + static atomic_t key; + + if (c4iw_fatal_error(rdev)) + return -EIO; + + stag_state = stag_state > 0; + stag_idx = (*stag) >> 8; + + if ((!reset_tpt_entry) && (*stag == T4_STAG_UNSET)) { + stag_idx = c4iw_get_resource(&rdev->resource.tpt_table); + if (!stag_idx) { + mutex_lock(&rdev->stats.lock); + rdev->stats.stag.fail++; + mutex_unlock(&rdev->stats.lock); + return -ENOMEM; + } + mutex_lock(&rdev->stats.lock); + rdev->stats.stag.cur += 32; + if (rdev->stats.stag.cur > rdev->stats.stag.max) + rdev->stats.stag.max = rdev->stats.stag.cur; + mutex_unlock(&rdev->stats.lock); + *stag = (stag_idx << 8) | (atomic_inc_return(&key) & 0xff); + } + PDBG("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n", + __func__, stag_state, type, pdid, stag_idx); + + /* write TPT entry */ + if (reset_tpt_entry) + memset(&tpt, 0, sizeof(tpt)); + else { + tpt.valid_to_pdid = cpu_to_be32(F_FW_RI_TPTE_VALID | + V_FW_RI_TPTE_STAGKEY((*stag & M_FW_RI_TPTE_STAGKEY)) | + V_FW_RI_TPTE_STAGSTATE(stag_state) | + V_FW_RI_TPTE_STAGTYPE(type) | V_FW_RI_TPTE_PDID(pdid)); + tpt.locread_to_qpid = cpu_to_be32(V_FW_RI_TPTE_PERM(perm) | + (bind_enabled ? F_FW_RI_TPTE_MWBINDEN : 0) | + V_FW_RI_TPTE_ADDRTYPE((zbva ? FW_RI_ZERO_BASED_TO : + FW_RI_VA_BASED_TO))| + V_FW_RI_TPTE_PS(page_size)); + tpt.nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32( + V_FW_RI_TPTE_PBLADDR(PBL_OFF(rdev, pbl_addr)>>3)); + tpt.len_lo = cpu_to_be32((u32)(len & 0xffffffffUL)); + tpt.va_hi = cpu_to_be32((u32)(to >> 32)); + tpt.va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL)); + tpt.dca_mwbcnt_pstag = cpu_to_be32(0); + tpt.len_hi = cpu_to_be32((u32)(len >> 32)); + } + err = write_adapter_mem(rdev, stag_idx + + (rdev->lldi.vr->stag.start >> 5), + sizeof(tpt), &tpt); + + if (reset_tpt_entry) { + c4iw_put_resource(&rdev->resource.tpt_table, stag_idx); + mutex_lock(&rdev->stats.lock); + rdev->stats.stag.cur -= 32; + mutex_unlock(&rdev->stats.lock); + } + return err; +} + +static int write_pbl(struct c4iw_rdev *rdev, __be64 *pbl, + u32 pbl_addr, u32 pbl_size) +{ + int err; + + PDBG("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n", + __func__, pbl_addr, rdev->lldi.vr->pbl.start, + pbl_size); + + err = write_adapter_mem(rdev, pbl_addr >> 5, pbl_size << 3, pbl); + return err; +} + +static int dereg_mem(struct c4iw_rdev *rdev, u32 stag, u32 pbl_size, + u32 pbl_addr) +{ + return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0, + pbl_size, pbl_addr); +} + +static int allocate_window(struct c4iw_rdev *rdev, u32 * stag, u32 pdid) +{ + *stag = T4_STAG_UNSET; + return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_MW, 0, 0, 0, + 0UL, 0, 0, 0, 0); +} + +static int deallocate_window(struct c4iw_rdev *rdev, u32 stag) +{ + return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0, 0, + 0); +} + +static int allocate_stag(struct c4iw_rdev *rdev, u32 *stag, u32 pdid, + u32 pbl_size, u32 pbl_addr) +{ + *stag = T4_STAG_UNSET; + return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_NSMR, 0, 0, 0, + 0UL, 0, 0, pbl_size, pbl_addr); +} + +static int finish_mem_reg(struct c4iw_mr *mhp, u32 stag) +{ + u32 mmid; + + mhp->attr.state = 1; + mhp->attr.stag = stag; + mmid = stag >> 8; + mhp->ibmr.rkey = mhp->ibmr.lkey = stag; + PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp); + return insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid); +} + +static int register_mem(struct c4iw_dev *rhp, struct c4iw_pd *php, + struct c4iw_mr *mhp, int shift) +{ + u32 stag = T4_STAG_UNSET; + int ret; + + ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, mhp->attr.pdid, + FW_RI_STAG_NSMR, mhp->attr.perms, + mhp->attr.mw_bind_enable, mhp->attr.zbva, + mhp->attr.va_fbo, mhp->attr.len, shift - 12, + mhp->attr.pbl_size, mhp->attr.pbl_addr); + if (ret) + return ret; + + ret = finish_mem_reg(mhp, stag); + if (ret) + dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, + mhp->attr.pbl_addr); + return ret; +} + +static int reregister_mem(struct c4iw_dev *rhp, struct c4iw_pd *php, + struct c4iw_mr *mhp, int shift, int npages) +{ + u32 stag; + int ret; + + if (npages > mhp->attr.pbl_size) + return -ENOMEM; + + stag = mhp->attr.stag; + ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, mhp->attr.pdid, + FW_RI_STAG_NSMR, mhp->attr.perms, + mhp->attr.mw_bind_enable, mhp->attr.zbva, + mhp->attr.va_fbo, mhp->attr.len, shift - 12, + mhp->attr.pbl_size, mhp->attr.pbl_addr); + if (ret) + return ret; + + ret = finish_mem_reg(mhp, stag); + if (ret) + dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, + mhp->attr.pbl_addr); + + return ret; +} + +static int alloc_pbl(struct c4iw_mr *mhp, int npages) +{ + mhp->attr.pbl_addr = c4iw_pblpool_alloc(&mhp->rhp->rdev, + npages << 3); + + if (!mhp->attr.pbl_addr) + return -ENOMEM; + + mhp->attr.pbl_size = npages; + + return 0; +} + +static int build_phys_page_list(struct ib_phys_buf *buffer_list, + int num_phys_buf, u64 *iova_start, + u64 *total_size, int *npages, + int *shift, __be64 **page_list) +{ + u64 mask; + int i, j, n; + + mask = 0; + *total_size = 0; + for (i = 0; i < num_phys_buf; ++i) { + if (i != 0 && buffer_list[i].addr & ~PAGE_MASK) + return -EINVAL; + if (i != 0 && i != num_phys_buf - 1 && + (buffer_list[i].size & ~PAGE_MASK)) + return -EINVAL; + *total_size += buffer_list[i].size; + if (i > 0) + mask |= buffer_list[i].addr; + else + mask |= buffer_list[i].addr & PAGE_MASK; + if (i != num_phys_buf - 1) + mask |= buffer_list[i].addr + buffer_list[i].size; + else + mask |= (buffer_list[i].addr + buffer_list[i].size + + PAGE_SIZE - 1) & PAGE_MASK; + } + + if (*total_size > 0xFFFFFFFFULL) + return -ENOMEM; + + /* Find largest page shift we can use to cover buffers */ + for (*shift = PAGE_SHIFT; *shift < 27; ++(*shift)) + if ((1ULL << *shift) & mask) + break; + + buffer_list[0].size += buffer_list[0].addr & ((1ULL << *shift) - 1); + buffer_list[0].addr &= ~0ull << *shift; + + *npages = 0; + for (i = 0; i < num_phys_buf; ++i) + *npages += (buffer_list[i].size + + (1ULL << *shift) - 1) >> *shift; + + if (!*npages) + return -EINVAL; + + *page_list = kmalloc(sizeof(u64) * *npages, GFP_KERNEL); + if (!*page_list) + return -ENOMEM; + + n = 0; + for (i = 0; i < num_phys_buf; ++i) + for (j = 0; + j < (buffer_list[i].size + (1ULL << *shift) - 1) >> *shift; + ++j) + (*page_list)[n++] = cpu_to_be64(buffer_list[i].addr + + ((u64) j << *shift)); + + PDBG("%s va 0x%llx mask 0x%llx shift %d len %lld pbl_size %d\n", + __func__, (unsigned long long)*iova_start, + (unsigned long long)mask, *shift, (unsigned long long)*total_size, + *npages); + + return 0; + +} + +int c4iw_reregister_phys_mem(struct ib_mr *mr, int mr_rereg_mask, + struct ib_pd *pd, struct ib_phys_buf *buffer_list, + int num_phys_buf, int acc, u64 *iova_start) +{ + + struct c4iw_mr mh, *mhp; + struct c4iw_pd *php; + struct c4iw_dev *rhp; + __be64 *page_list = NULL; + int shift = 0; + u64 total_size; + int npages; + int ret; + + PDBG("%s ib_mr %p ib_pd %p\n", __func__, mr, pd); + + /* There can be no memory windows */ + if (atomic_read(&mr->usecnt)) + return -EINVAL; + + mhp = to_c4iw_mr(mr); + rhp = mhp->rhp; + php = to_c4iw_pd(mr->pd); + + /* make sure we are on the same adapter */ + if (rhp != php->rhp) + return -EINVAL; + + memcpy(&mh, mhp, sizeof *mhp); + + if (mr_rereg_mask & IB_MR_REREG_PD) + php = to_c4iw_pd(pd); + if (mr_rereg_mask & IB_MR_REREG_ACCESS) { + mh.attr.perms = c4iw_ib_to_tpt_access(acc); + mh.attr.mw_bind_enable = (acc & IB_ACCESS_MW_BIND) == + IB_ACCESS_MW_BIND; + } + if (mr_rereg_mask & IB_MR_REREG_TRANS) { + ret = build_phys_page_list(buffer_list, num_phys_buf, + iova_start, + &total_size, &npages, + &shift, &page_list); + if (ret) + return ret; + } + + ret = reregister_mem(rhp, php, &mh, shift, npages); + kfree(page_list); + if (ret) + return ret; + if (mr_rereg_mask & IB_MR_REREG_PD) + mhp->attr.pdid = php->pdid; + if (mr_rereg_mask & IB_MR_REREG_ACCESS) + mhp->attr.perms = c4iw_ib_to_tpt_access(acc); + if (mr_rereg_mask & IB_MR_REREG_TRANS) { + mhp->attr.zbva = 0; + mhp->attr.va_fbo = *iova_start; + mhp->attr.page_size = shift - 12; + mhp->attr.len = (u32) total_size; + mhp->attr.pbl_size = npages; + } + + return 0; +} + +struct ib_mr *c4iw_register_phys_mem(struct ib_pd *pd, + struct ib_phys_buf *buffer_list, + int num_phys_buf, int acc, u64 *iova_start) +{ + __be64 *page_list; + int shift; + u64 total_size; + int npages; + struct c4iw_dev *rhp; + struct c4iw_pd *php; + struct c4iw_mr *mhp; + int ret; + + PDBG("%s ib_pd %p\n", __func__, pd); + php = to_c4iw_pd(pd); + rhp = php->rhp; + + mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); + if (!mhp) + return ERR_PTR(-ENOMEM); + + mhp->rhp = rhp; + + /* First check that we have enough alignment */ + if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) { + ret = -EINVAL; + goto err; + } + + if (num_phys_buf > 1 && + ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK)) { + ret = -EINVAL; + goto err; + } + + ret = build_phys_page_list(buffer_list, num_phys_buf, iova_start, + &total_size, &npages, &shift, + &page_list); + if (ret) + goto err; + + ret = alloc_pbl(mhp, npages); + if (ret) { + kfree(page_list); + goto err; + } + + ret = write_pbl(&mhp->rhp->rdev, page_list, mhp->attr.pbl_addr, + npages); + kfree(page_list); + if (ret) + goto err_pbl; + + mhp->attr.pdid = php->pdid; + mhp->attr.zbva = 0; + + mhp->attr.perms = c4iw_ib_to_tpt_access(acc); + mhp->attr.va_fbo = *iova_start; + mhp->attr.page_size = shift - 12; + + mhp->attr.len = (u32) total_size; + mhp->attr.pbl_size = npages; + ret = register_mem(rhp, php, mhp, shift); + if (ret) + goto err_pbl; + + return &mhp->ibmr; + +err_pbl: + c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, + mhp->attr.pbl_size << 3); + +err: + kfree(mhp); + return ERR_PTR(ret); + +} + +struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc) +{ + struct c4iw_dev *rhp; + struct c4iw_pd *php; + struct c4iw_mr *mhp; + int ret; + u32 stag = T4_STAG_UNSET; + + PDBG("%s ib_pd %p\n", __func__, pd); + php = to_c4iw_pd(pd); + rhp = php->rhp; + + mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); + if (!mhp) + return ERR_PTR(-ENOMEM); + + mhp->rhp = rhp; + mhp->attr.pdid = php->pdid; + mhp->attr.perms = c4iw_ib_to_tpt_access(acc); + mhp->attr.mw_bind_enable = (acc&IB_ACCESS_MW_BIND) == IB_ACCESS_MW_BIND; + mhp->attr.zbva = 0; + mhp->attr.va_fbo = 0; + mhp->attr.page_size = 0; + mhp->attr.len = ~0UL; + mhp->attr.pbl_size = 0; + + ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, php->pdid, + FW_RI_STAG_NSMR, mhp->attr.perms, + mhp->attr.mw_bind_enable, 0, 0, ~0UL, 0, 0, 0); + if (ret) + goto err1; + + ret = finish_mem_reg(mhp, stag); + if (ret) + goto err2; + return &mhp->ibmr; +err2: + dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, + mhp->attr.pbl_addr); +err1: + kfree(mhp); + return ERR_PTR(ret); +} + +struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt, int acc, struct ib_udata *udata) +{ + __be64 *pages; + int shift, n, len; + int i, k, entry; + int err = 0; + struct scatterlist *sg; + struct c4iw_dev *rhp; + struct c4iw_pd *php; + struct c4iw_mr *mhp; + + PDBG("%s ib_pd %p\n", __func__, pd); + + if (length == ~0ULL) + return ERR_PTR(-EINVAL); + + if ((length + start) < start) + return ERR_PTR(-EINVAL); + + php = to_c4iw_pd(pd); + rhp = php->rhp; + mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); + if (!mhp) + return ERR_PTR(-ENOMEM); + + mhp->rhp = rhp; + + mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0); + if (IS_ERR(mhp->umem)) { + err = PTR_ERR(mhp->umem); + kfree(mhp); + return ERR_PTR(err); + } + + shift = ffs(mhp->umem->page_size) - 1; + + n = mhp->umem->nmap; + err = alloc_pbl(mhp, n); + if (err) + goto err; + + pages = (__be64 *) __get_free_page(GFP_KERNEL); + if (!pages) { + err = -ENOMEM; + goto err_pbl; + } + + i = n = 0; + + for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) { + len = sg_dma_len(sg) >> shift; + for (k = 0; k < len; ++k) { + pages[i++] = cpu_to_be64(sg_dma_address(sg) + + mhp->umem->page_size * k); + if (i == PAGE_SIZE / sizeof *pages) { + err = write_pbl(&mhp->rhp->rdev, + pages, + mhp->attr.pbl_addr + (n << 3), i); + if (err) + goto pbl_done; + n += i; + i = 0; + } + } + } + + if (i) + err = write_pbl(&mhp->rhp->rdev, pages, + mhp->attr.pbl_addr + (n << 3), i); + +pbl_done: + free_page((unsigned long) pages); + if (err) + goto err_pbl; + + mhp->attr.pdid = php->pdid; + mhp->attr.zbva = 0; + mhp->attr.perms = c4iw_ib_to_tpt_access(acc); + mhp->attr.va_fbo = virt; + mhp->attr.page_size = shift - 12; + mhp->attr.len = length; + + err = register_mem(rhp, php, mhp, shift); + if (err) + goto err_pbl; + + return &mhp->ibmr; + +err_pbl: + c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, + mhp->attr.pbl_size << 3); + +err: + ib_umem_release(mhp->umem); + kfree(mhp); + return ERR_PTR(err); +} + +struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) +{ + struct c4iw_dev *rhp; + struct c4iw_pd *php; + struct c4iw_mw *mhp; + u32 mmid; + u32 stag = 0; + int ret; + + if (type != IB_MW_TYPE_1) + return ERR_PTR(-EINVAL); + + php = to_c4iw_pd(pd); + rhp = php->rhp; + mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); + if (!mhp) + return ERR_PTR(-ENOMEM); + ret = allocate_window(&rhp->rdev, &stag, php->pdid); + if (ret) { + kfree(mhp); + return ERR_PTR(ret); + } + mhp->rhp = rhp; + mhp->attr.pdid = php->pdid; + mhp->attr.type = FW_RI_STAG_MW; + mhp->attr.stag = stag; + mmid = (stag) >> 8; + mhp->ibmw.rkey = stag; + if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) { + deallocate_window(&rhp->rdev, mhp->attr.stag); + kfree(mhp); + return ERR_PTR(-ENOMEM); + } + PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag); + return &(mhp->ibmw); +} + +int c4iw_dealloc_mw(struct ib_mw *mw) +{ + struct c4iw_dev *rhp; + struct c4iw_mw *mhp; + u32 mmid; + + mhp = to_c4iw_mw(mw); + rhp = mhp->rhp; + mmid = (mw->rkey) >> 8; + remove_handle(rhp, &rhp->mmidr, mmid); + deallocate_window(&rhp->rdev, mhp->attr.stag); + kfree(mhp); + PDBG("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp); + return 0; +} + +struct ib_mr *c4iw_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth) +{ + struct c4iw_dev *rhp; + struct c4iw_pd *php; + struct c4iw_mr *mhp; + u32 mmid; + u32 stag = 0; + int ret = 0; + + php = to_c4iw_pd(pd); + rhp = php->rhp; + mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); + if (!mhp) { + ret = -ENOMEM; + goto err; + } + + mhp->rhp = rhp; + ret = alloc_pbl(mhp, pbl_depth); + if (ret) + goto err1; + mhp->attr.pbl_size = pbl_depth; + ret = allocate_stag(&rhp->rdev, &stag, php->pdid, + mhp->attr.pbl_size, mhp->attr.pbl_addr); + if (ret) + goto err2; + mhp->attr.pdid = php->pdid; + mhp->attr.type = FW_RI_STAG_NSMR; + mhp->attr.stag = stag; + mhp->attr.state = 1; + mmid = (stag) >> 8; + mhp->ibmr.rkey = mhp->ibmr.lkey = stag; + if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) { + ret = -ENOMEM; + goto err3; + } + + PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag); + return &(mhp->ibmr); +err3: + dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size, + mhp->attr.pbl_addr); +err2: + c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, + mhp->attr.pbl_size << 3); +err1: + kfree(mhp); +err: + return ERR_PTR(ret); +} + +struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(struct ib_device *device, + int page_list_len) +{ + struct c4iw_fr_page_list *c4pl; + struct c4iw_dev *dev = to_c4iw_dev(device); + dma_addr_t dma_addr; + int pll_len = roundup(page_list_len * sizeof(u64), 32); + + c4pl = kmalloc(sizeof(*c4pl), GFP_KERNEL); + if (!c4pl) + return ERR_PTR(-ENOMEM); + + c4pl->ibpl.page_list = dma_alloc_coherent(&dev->rdev.lldi.pdev->dev, + pll_len, &dma_addr, + GFP_KERNEL); + if (!c4pl->ibpl.page_list) { + kfree(c4pl); + return ERR_PTR(-ENOMEM); + } + dma_unmap_addr_set(c4pl, mapping, dma_addr); + c4pl->dma_addr = dma_addr; + c4pl->dev = dev; + c4pl->pll_len = pll_len; + + PDBG("%s c4pl %p pll_len %u page_list %p dma_addr %pad\n", + __func__, c4pl, c4pl->pll_len, c4pl->ibpl.page_list, + &c4pl->dma_addr); + + return &c4pl->ibpl; +} + +void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list *ibpl) +{ + struct c4iw_fr_page_list *c4pl = to_c4iw_fr_page_list(ibpl); + + PDBG("%s c4pl %p pll_len %u page_list %p dma_addr %pad\n", + __func__, c4pl, c4pl->pll_len, c4pl->ibpl.page_list, + &c4pl->dma_addr); + + dma_free_coherent(&c4pl->dev->rdev.lldi.pdev->dev, + c4pl->pll_len, + c4pl->ibpl.page_list, dma_unmap_addr(c4pl, mapping)); + kfree(c4pl); +} + +int c4iw_dereg_mr(struct ib_mr *ib_mr) +{ + struct c4iw_dev *rhp; + struct c4iw_mr *mhp; + u32 mmid; + + PDBG("%s ib_mr %p\n", __func__, ib_mr); + /* There can be no memory windows */ + if (atomic_read(&ib_mr->usecnt)) + return -EINVAL; + + mhp = to_c4iw_mr(ib_mr); + rhp = mhp->rhp; + mmid = mhp->attr.stag >> 8; + remove_handle(rhp, &rhp->mmidr, mmid); + dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, + mhp->attr.pbl_addr); + if (mhp->attr.pbl_size) + c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, + mhp->attr.pbl_size << 3); + if (mhp->kva) + kfree((void *) (unsigned long) mhp->kva); + if (mhp->umem) + ib_umem_release(mhp->umem); + PDBG("%s mmid 0x%x ptr %p\n", __func__, mmid, mhp); + kfree(mhp); + return 0; +} diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c new file mode 100644 index 00000000000..b1d305338de --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -0,0 +1,586 @@ +/* + * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/device.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/delay.h> +#include <linux/errno.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/ethtool.h> +#include <linux/rtnetlink.h> +#include <linux/inetdevice.h> +#include <linux/io.h> + +#include <asm/irq.h> +#include <asm/byteorder.h> + +#include <rdma/iw_cm.h> +#include <rdma/ib_verbs.h> +#include <rdma/ib_smi.h> +#include <rdma/ib_umem.h> +#include <rdma/ib_user_verbs.h> + +#include "iw_cxgb4.h" + +static int fastreg_support = 1; +module_param(fastreg_support, int, 0644); +MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default=1)"); + +static struct ib_ah *c4iw_ah_create(struct ib_pd *pd, + struct ib_ah_attr *ah_attr) +{ + return ERR_PTR(-ENOSYS); +} + +static int c4iw_ah_destroy(struct ib_ah *ah) +{ + return -ENOSYS; +} + +static int c4iw_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + return -ENOSYS; +} + +static int c4iw_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + return -ENOSYS; +} + +static int c4iw_process_mad(struct ib_device *ibdev, int mad_flags, + u8 port_num, struct ib_wc *in_wc, + struct ib_grh *in_grh, struct ib_mad *in_mad, + struct ib_mad *out_mad) +{ + return -ENOSYS; +} + +static int c4iw_dealloc_ucontext(struct ib_ucontext *context) +{ + struct c4iw_dev *rhp = to_c4iw_dev(context->device); + struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context); + struct c4iw_mm_entry *mm, *tmp; + + PDBG("%s context %p\n", __func__, context); + list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry) + kfree(mm); + c4iw_release_dev_ucontext(&rhp->rdev, &ucontext->uctx); + kfree(ucontext); + return 0; +} + +static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev, + struct ib_udata *udata) +{ + struct c4iw_ucontext *context; + struct c4iw_dev *rhp = to_c4iw_dev(ibdev); + static int warned; + struct c4iw_alloc_ucontext_resp uresp; + int ret = 0; + struct c4iw_mm_entry *mm = NULL; + + PDBG("%s ibdev %p\n", __func__, ibdev); + context = kzalloc(sizeof(*context), GFP_KERNEL); + if (!context) { + ret = -ENOMEM; + goto err; + } + + c4iw_init_dev_ucontext(&rhp->rdev, &context->uctx); + INIT_LIST_HEAD(&context->mmaps); + spin_lock_init(&context->mmap_lock); + + if (udata->outlen < sizeof(uresp) - sizeof(uresp.reserved)) { + if (!warned++) + pr_err(MOD "Warning - downlevel libcxgb4 (non-fatal), device status page disabled."); + rhp->rdev.flags |= T4_STATUS_PAGE_DISABLED; + } else { + mm = kmalloc(sizeof(*mm), GFP_KERNEL); + if (!mm) { + ret = -ENOMEM; + goto err_free; + } + + uresp.status_page_size = PAGE_SIZE; + + spin_lock(&context->mmap_lock); + uresp.status_page_key = context->key; + context->key += PAGE_SIZE; + spin_unlock(&context->mmap_lock); + + ret = ib_copy_to_udata(udata, &uresp, + sizeof(uresp) - sizeof(uresp.reserved)); + if (ret) + goto err_mm; + + mm->key = uresp.status_page_key; + mm->addr = virt_to_phys(rhp->rdev.status_page); + mm->len = PAGE_SIZE; + insert_mmap(context, mm); + } + return &context->ibucontext; +err_mm: + kfree(mm); +err_free: + kfree(context); +err: + return ERR_PTR(ret); +} + +static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +{ + int len = vma->vm_end - vma->vm_start; + u32 key = vma->vm_pgoff << PAGE_SHIFT; + struct c4iw_rdev *rdev; + int ret = 0; + struct c4iw_mm_entry *mm; + struct c4iw_ucontext *ucontext; + u64 addr; + + PDBG("%s pgoff 0x%lx key 0x%x len %d\n", __func__, vma->vm_pgoff, + key, len); + + if (vma->vm_start & (PAGE_SIZE-1)) + return -EINVAL; + + rdev = &(to_c4iw_dev(context->device)->rdev); + ucontext = to_c4iw_ucontext(context); + + mm = remove_mmap(ucontext, key, len); + if (!mm) + return -EINVAL; + addr = mm->addr; + kfree(mm); + + if ((addr >= pci_resource_start(rdev->lldi.pdev, 0)) && + (addr < (pci_resource_start(rdev->lldi.pdev, 0) + + pci_resource_len(rdev->lldi.pdev, 0)))) { + + /* + * MA_SYNC register... + */ + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + ret = io_remap_pfn_range(vma, vma->vm_start, + addr >> PAGE_SHIFT, + len, vma->vm_page_prot); + } else if ((addr >= pci_resource_start(rdev->lldi.pdev, 2)) && + (addr < (pci_resource_start(rdev->lldi.pdev, 2) + + pci_resource_len(rdev->lldi.pdev, 2)))) { + + /* + * Map user DB or OCQP memory... + */ + if (addr >= rdev->oc_mw_pa) + vma->vm_page_prot = t4_pgprot_wc(vma->vm_page_prot); + else { + if (is_t5(rdev->lldi.adapter_type)) + vma->vm_page_prot = + t4_pgprot_wc(vma->vm_page_prot); + else + vma->vm_page_prot = + pgprot_noncached(vma->vm_page_prot); + } + ret = io_remap_pfn_range(vma, vma->vm_start, + addr >> PAGE_SHIFT, + len, vma->vm_page_prot); + } else { + + /* + * Map WQ or CQ contig dma memory... + */ + ret = remap_pfn_range(vma, vma->vm_start, + addr >> PAGE_SHIFT, + len, vma->vm_page_prot); + } + + return ret; +} + +static int c4iw_deallocate_pd(struct ib_pd *pd) +{ + struct c4iw_dev *rhp; + struct c4iw_pd *php; + + php = to_c4iw_pd(pd); + rhp = php->rhp; + PDBG("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid); + c4iw_put_resource(&rhp->rdev.resource.pdid_table, php->pdid); + mutex_lock(&rhp->rdev.stats.lock); + rhp->rdev.stats.pd.cur--; + mutex_unlock(&rhp->rdev.stats.lock); + kfree(php); + return 0; +} + +static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct c4iw_pd *php; + u32 pdid; + struct c4iw_dev *rhp; + + PDBG("%s ibdev %p\n", __func__, ibdev); + rhp = (struct c4iw_dev *) ibdev; + pdid = c4iw_get_resource(&rhp->rdev.resource.pdid_table); + if (!pdid) + return ERR_PTR(-EINVAL); + php = kzalloc(sizeof(*php), GFP_KERNEL); + if (!php) { + c4iw_put_resource(&rhp->rdev.resource.pdid_table, pdid); + return ERR_PTR(-ENOMEM); + } + php->pdid = pdid; + php->rhp = rhp; + if (context) { + if (ib_copy_to_udata(udata, &php->pdid, sizeof(u32))) { + c4iw_deallocate_pd(&php->ibpd); + return ERR_PTR(-EFAULT); + } + } + mutex_lock(&rhp->rdev.stats.lock); + rhp->rdev.stats.pd.cur++; + if (rhp->rdev.stats.pd.cur > rhp->rdev.stats.pd.max) + rhp->rdev.stats.pd.max = rhp->rdev.stats.pd.cur; + mutex_unlock(&rhp->rdev.stats.lock); + PDBG("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php); + return &php->ibpd; +} + +static int c4iw_query_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey) +{ + PDBG("%s ibdev %p\n", __func__, ibdev); + *pkey = 0; + return 0; +} + +static int c4iw_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid) +{ + struct c4iw_dev *dev; + + PDBG("%s ibdev %p, port %d, index %d, gid %p\n", + __func__, ibdev, port, index, gid); + dev = to_c4iw_dev(ibdev); + BUG_ON(port == 0); + memset(&(gid->raw[0]), 0, sizeof(gid->raw)); + memcpy(&(gid->raw[0]), dev->rdev.lldi.ports[port-1]->dev_addr, 6); + return 0; +} + +static int c4iw_query_device(struct ib_device *ibdev, + struct ib_device_attr *props) +{ + + struct c4iw_dev *dev; + PDBG("%s ibdev %p\n", __func__, ibdev); + + dev = to_c4iw_dev(ibdev); + memset(props, 0, sizeof *props); + memcpy(&props->sys_image_guid, dev->rdev.lldi.ports[0]->dev_addr, 6); + props->hw_ver = CHELSIO_CHIP_RELEASE(dev->rdev.lldi.adapter_type); + props->fw_ver = dev->rdev.lldi.fw_vers; + props->device_cap_flags = dev->device_cap_flags; + props->page_size_cap = T4_PAGESIZE_MASK; + props->vendor_id = (u32)dev->rdev.lldi.pdev->vendor; + props->vendor_part_id = (u32)dev->rdev.lldi.pdev->device; + props->max_mr_size = T4_MAX_MR_SIZE; + props->max_qp = T4_MAX_NUM_QP; + props->max_qp_wr = T4_MAX_QP_DEPTH; + props->max_sge = T4_MAX_RECV_SGE; + props->max_sge_rd = 1; + props->max_qp_rd_atom = c4iw_max_read_depth; + props->max_qp_init_rd_atom = c4iw_max_read_depth; + props->max_cq = T4_MAX_NUM_CQ; + props->max_cqe = T4_MAX_CQ_DEPTH; + props->max_mr = c4iw_num_stags(&dev->rdev); + props->max_pd = T4_MAX_NUM_PD; + props->local_ca_ack_delay = 0; + props->max_fast_reg_page_list_len = t4_max_fr_depth(use_dsgl); + + return 0; +} + +static int c4iw_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props) +{ + struct c4iw_dev *dev; + struct net_device *netdev; + struct in_device *inetdev; + + PDBG("%s ibdev %p\n", __func__, ibdev); + + dev = to_c4iw_dev(ibdev); + netdev = dev->rdev.lldi.ports[port-1]; + + memset(props, 0, sizeof(struct ib_port_attr)); + props->max_mtu = IB_MTU_4096; + if (netdev->mtu >= 4096) + props->active_mtu = IB_MTU_4096; + else if (netdev->mtu >= 2048) + props->active_mtu = IB_MTU_2048; + else if (netdev->mtu >= 1024) + props->active_mtu = IB_MTU_1024; + else if (netdev->mtu >= 512) + props->active_mtu = IB_MTU_512; + else + props->active_mtu = IB_MTU_256; + + if (!netif_carrier_ok(netdev)) + props->state = IB_PORT_DOWN; + else { + inetdev = in_dev_get(netdev); + if (inetdev) { + if (inetdev->ifa_list) + props->state = IB_PORT_ACTIVE; + else + props->state = IB_PORT_INIT; + in_dev_put(inetdev); + } else + props->state = IB_PORT_INIT; + } + + props->port_cap_flags = + IB_PORT_CM_SUP | + IB_PORT_SNMP_TUNNEL_SUP | + IB_PORT_REINIT_SUP | + IB_PORT_DEVICE_MGMT_SUP | + IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; + props->gid_tbl_len = 1; + props->pkey_tbl_len = 1; + props->active_width = 2; + props->active_speed = IB_SPEED_DDR; + props->max_msg_sz = -1; + + return 0; +} + +static ssize_t show_rev(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, + ibdev.dev); + PDBG("%s dev 0x%p\n", __func__, dev); + return sprintf(buf, "%d\n", + CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type)); +} + +static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, + ibdev.dev); + PDBG("%s dev 0x%p\n", __func__, dev); + + return sprintf(buf, "%u.%u.%u.%u\n", + FW_HDR_FW_VER_MAJOR_GET(c4iw_dev->rdev.lldi.fw_vers), + FW_HDR_FW_VER_MINOR_GET(c4iw_dev->rdev.lldi.fw_vers), + FW_HDR_FW_VER_MICRO_GET(c4iw_dev->rdev.lldi.fw_vers), + FW_HDR_FW_VER_BUILD_GET(c4iw_dev->rdev.lldi.fw_vers)); +} + +static ssize_t show_hca(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, + ibdev.dev); + struct ethtool_drvinfo info; + struct net_device *lldev = c4iw_dev->rdev.lldi.ports[0]; + + PDBG("%s dev 0x%p\n", __func__, dev); + lldev->ethtool_ops->get_drvinfo(lldev, &info); + return sprintf(buf, "%s\n", info.driver); +} + +static ssize_t show_board(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, + ibdev.dev); + PDBG("%s dev 0x%p\n", __func__, dev); + return sprintf(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor, + c4iw_dev->rdev.lldi.pdev->device); +} + +static int c4iw_get_mib(struct ib_device *ibdev, + union rdma_protocol_stats *stats) +{ + struct tp_tcp_stats v4, v6; + struct c4iw_dev *c4iw_dev = to_c4iw_dev(ibdev); + + cxgb4_get_tcp_stats(c4iw_dev->rdev.lldi.pdev, &v4, &v6); + memset(stats, 0, sizeof *stats); + stats->iw.tcpInSegs = v4.tcpInSegs + v6.tcpInSegs; + stats->iw.tcpOutSegs = v4.tcpOutSegs + v6.tcpOutSegs; + stats->iw.tcpRetransSegs = v4.tcpRetransSegs + v6.tcpRetransSegs; + stats->iw.tcpOutRsts = v4.tcpOutRsts + v6.tcpOutSegs; + + return 0; +} + +static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); +static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); +static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); +static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); + +static struct device_attribute *c4iw_class_attributes[] = { + &dev_attr_hw_rev, + &dev_attr_fw_ver, + &dev_attr_hca_type, + &dev_attr_board_id, +}; + +int c4iw_register_device(struct c4iw_dev *dev) +{ + int ret; + int i; + + PDBG("%s c4iw_dev %p\n", __func__, dev); + BUG_ON(!dev->rdev.lldi.ports[0]); + strlcpy(dev->ibdev.name, "cxgb4_%d", IB_DEVICE_NAME_MAX); + memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); + memcpy(&dev->ibdev.node_guid, dev->rdev.lldi.ports[0]->dev_addr, 6); + dev->ibdev.owner = THIS_MODULE; + dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW; + if (fastreg_support) + dev->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; + dev->ibdev.local_dma_lkey = 0; + dev->ibdev.uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | + (1ull << IB_USER_VERBS_CMD_POLL_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_POST_SEND) | + (1ull << IB_USER_VERBS_CMD_POST_RECV); + dev->ibdev.node_type = RDMA_NODE_RNIC; + memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC)); + dev->ibdev.phys_port_cnt = dev->rdev.lldi.nports; + dev->ibdev.num_comp_vectors = dev->rdev.lldi.nciq; + dev->ibdev.dma_device = &(dev->rdev.lldi.pdev->dev); + dev->ibdev.query_device = c4iw_query_device; + dev->ibdev.query_port = c4iw_query_port; + dev->ibdev.query_pkey = c4iw_query_pkey; + dev->ibdev.query_gid = c4iw_query_gid; + dev->ibdev.alloc_ucontext = c4iw_alloc_ucontext; + dev->ibdev.dealloc_ucontext = c4iw_dealloc_ucontext; + dev->ibdev.mmap = c4iw_mmap; + dev->ibdev.alloc_pd = c4iw_allocate_pd; + dev->ibdev.dealloc_pd = c4iw_deallocate_pd; + dev->ibdev.create_ah = c4iw_ah_create; + dev->ibdev.destroy_ah = c4iw_ah_destroy; + dev->ibdev.create_qp = c4iw_create_qp; + dev->ibdev.modify_qp = c4iw_ib_modify_qp; + dev->ibdev.query_qp = c4iw_ib_query_qp; + dev->ibdev.destroy_qp = c4iw_destroy_qp; + dev->ibdev.create_cq = c4iw_create_cq; + dev->ibdev.destroy_cq = c4iw_destroy_cq; + dev->ibdev.resize_cq = c4iw_resize_cq; + dev->ibdev.poll_cq = c4iw_poll_cq; + dev->ibdev.get_dma_mr = c4iw_get_dma_mr; + dev->ibdev.reg_phys_mr = c4iw_register_phys_mem; + dev->ibdev.rereg_phys_mr = c4iw_reregister_phys_mem; + dev->ibdev.reg_user_mr = c4iw_reg_user_mr; + dev->ibdev.dereg_mr = c4iw_dereg_mr; + dev->ibdev.alloc_mw = c4iw_alloc_mw; + dev->ibdev.bind_mw = c4iw_bind_mw; + dev->ibdev.dealloc_mw = c4iw_dealloc_mw; + dev->ibdev.alloc_fast_reg_mr = c4iw_alloc_fast_reg_mr; + dev->ibdev.alloc_fast_reg_page_list = c4iw_alloc_fastreg_pbl; + dev->ibdev.free_fast_reg_page_list = c4iw_free_fastreg_pbl; + dev->ibdev.attach_mcast = c4iw_multicast_attach; + dev->ibdev.detach_mcast = c4iw_multicast_detach; + dev->ibdev.process_mad = c4iw_process_mad; + dev->ibdev.req_notify_cq = c4iw_arm_cq; + dev->ibdev.post_send = c4iw_post_send; + dev->ibdev.post_recv = c4iw_post_receive; + dev->ibdev.get_protocol_stats = c4iw_get_mib; + dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; + + dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); + if (!dev->ibdev.iwcm) + return -ENOMEM; + + dev->ibdev.iwcm->connect = c4iw_connect; + dev->ibdev.iwcm->accept = c4iw_accept_cr; + dev->ibdev.iwcm->reject = c4iw_reject_cr; + dev->ibdev.iwcm->create_listen = c4iw_create_listen; + dev->ibdev.iwcm->destroy_listen = c4iw_destroy_listen; + dev->ibdev.iwcm->add_ref = c4iw_qp_add_ref; + dev->ibdev.iwcm->rem_ref = c4iw_qp_rem_ref; + dev->ibdev.iwcm->get_qp = c4iw_get_qp; + + ret = ib_register_device(&dev->ibdev, NULL); + if (ret) + goto bail1; + + for (i = 0; i < ARRAY_SIZE(c4iw_class_attributes); ++i) { + ret = device_create_file(&dev->ibdev.dev, + c4iw_class_attributes[i]); + if (ret) + goto bail2; + } + return 0; +bail2: + ib_unregister_device(&dev->ibdev); +bail1: + kfree(dev->ibdev.iwcm); + return ret; +} + +void c4iw_unregister_device(struct c4iw_dev *dev) +{ + int i; + + PDBG("%s c4iw_dev %p\n", __func__, dev); + for (i = 0; i < ARRAY_SIZE(c4iw_class_attributes); ++i) + device_remove_file(&dev->ibdev.dev, + c4iw_class_attributes[i]); + ib_unregister_device(&dev->ibdev); + kfree(dev->ibdev.iwcm); + return; +} diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c new file mode 100644 index 00000000000..086f62f5dc9 --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -0,0 +1,1808 @@ +/* + * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/module.h> + +#include "iw_cxgb4.h" + +static int db_delay_usecs = 1; +module_param(db_delay_usecs, int, 0644); +MODULE_PARM_DESC(db_delay_usecs, "Usecs to delay awaiting db fifo to drain"); + +static int ocqp_support = 1; +module_param(ocqp_support, int, 0644); +MODULE_PARM_DESC(ocqp_support, "Support on-chip SQs (default=1)"); + +int db_fc_threshold = 1000; +module_param(db_fc_threshold, int, 0644); +MODULE_PARM_DESC(db_fc_threshold, + "QP count/threshold that triggers" + " automatic db flow control mode (default = 1000)"); + +int db_coalescing_threshold; +module_param(db_coalescing_threshold, int, 0644); +MODULE_PARM_DESC(db_coalescing_threshold, + "QP count/threshold that triggers" + " disabling db coalescing (default = 0)"); + +static int max_fr_immd = T4_MAX_FR_IMMD; +module_param(max_fr_immd, int, 0644); +MODULE_PARM_DESC(max_fr_immd, "fastreg threshold for using DSGL instead of immedate"); + +static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state) +{ + unsigned long flag; + spin_lock_irqsave(&qhp->lock, flag); + qhp->attr.state = state; + spin_unlock_irqrestore(&qhp->lock, flag); +} + +static void dealloc_oc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) +{ + c4iw_ocqp_pool_free(rdev, sq->dma_addr, sq->memsize); +} + +static void dealloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) +{ + dma_free_coherent(&(rdev->lldi.pdev->dev), sq->memsize, sq->queue, + pci_unmap_addr(sq, mapping)); +} + +static void dealloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) +{ + if (t4_sq_onchip(sq)) + dealloc_oc_sq(rdev, sq); + else + dealloc_host_sq(rdev, sq); +} + +static int alloc_oc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) +{ + if (!ocqp_support || !ocqp_supported(&rdev->lldi)) + return -ENOSYS; + sq->dma_addr = c4iw_ocqp_pool_alloc(rdev, sq->memsize); + if (!sq->dma_addr) + return -ENOMEM; + sq->phys_addr = rdev->oc_mw_pa + sq->dma_addr - + rdev->lldi.vr->ocq.start; + sq->queue = (__force union t4_wr *)(rdev->oc_mw_kva + sq->dma_addr - + rdev->lldi.vr->ocq.start); + sq->flags |= T4_SQ_ONCHIP; + return 0; +} + +static int alloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) +{ + sq->queue = dma_alloc_coherent(&(rdev->lldi.pdev->dev), sq->memsize, + &(sq->dma_addr), GFP_KERNEL); + if (!sq->queue) + return -ENOMEM; + sq->phys_addr = virt_to_phys(sq->queue); + pci_unmap_addr_set(sq, mapping, sq->dma_addr); + return 0; +} + +static int alloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq, int user) +{ + int ret = -ENOSYS; + if (user) + ret = alloc_oc_sq(rdev, sq); + if (ret) + ret = alloc_host_sq(rdev, sq); + return ret; +} + +static int destroy_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, + struct c4iw_dev_ucontext *uctx) +{ + /* + * uP clears EQ contexts when the connection exits rdma mode, + * so no need to post a RESET WR for these EQs. + */ + dma_free_coherent(&(rdev->lldi.pdev->dev), + wq->rq.memsize, wq->rq.queue, + dma_unmap_addr(&wq->rq, mapping)); + dealloc_sq(rdev, &wq->sq); + c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); + kfree(wq->rq.sw_rq); + kfree(wq->sq.sw_sq); + c4iw_put_qpid(rdev, wq->rq.qid, uctx); + c4iw_put_qpid(rdev, wq->sq.qid, uctx); + return 0; +} + +static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, + struct t4_cq *rcq, struct t4_cq *scq, + struct c4iw_dev_ucontext *uctx) +{ + int user = (uctx != &rdev->uctx); + struct fw_ri_res_wr *res_wr; + struct fw_ri_res *res; + int wr_len; + struct c4iw_wr_wait wr_wait; + struct sk_buff *skb; + int ret = 0; + int eqsize; + + wq->sq.qid = c4iw_get_qpid(rdev, uctx); + if (!wq->sq.qid) + return -ENOMEM; + + wq->rq.qid = c4iw_get_qpid(rdev, uctx); + if (!wq->rq.qid) { + ret = -ENOMEM; + goto free_sq_qid; + } + + if (!user) { + wq->sq.sw_sq = kzalloc(wq->sq.size * sizeof *wq->sq.sw_sq, + GFP_KERNEL); + if (!wq->sq.sw_sq) { + ret = -ENOMEM; + goto free_rq_qid; + } + + wq->rq.sw_rq = kzalloc(wq->rq.size * sizeof *wq->rq.sw_rq, + GFP_KERNEL); + if (!wq->rq.sw_rq) { + ret = -ENOMEM; + goto free_sw_sq; + } + } + + /* + * RQT must be a power of 2. + */ + wq->rq.rqt_size = roundup_pow_of_two(wq->rq.size); + wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size); + if (!wq->rq.rqt_hwaddr) { + ret = -ENOMEM; + goto free_sw_rq; + } + + ret = alloc_sq(rdev, &wq->sq, user); + if (ret) + goto free_hwaddr; + memset(wq->sq.queue, 0, wq->sq.memsize); + dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr); + + wq->rq.queue = dma_alloc_coherent(&(rdev->lldi.pdev->dev), + wq->rq.memsize, &(wq->rq.dma_addr), + GFP_KERNEL); + if (!wq->rq.queue) { + ret = -ENOMEM; + goto free_sq; + } + PDBG("%s sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n", + __func__, wq->sq.queue, + (unsigned long long)virt_to_phys(wq->sq.queue), + wq->rq.queue, + (unsigned long long)virt_to_phys(wq->rq.queue)); + memset(wq->rq.queue, 0, wq->rq.memsize); + dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr); + + wq->db = rdev->lldi.db_reg; + wq->gts = rdev->lldi.gts_reg; + if (user || is_t5(rdev->lldi.adapter_type)) { + u32 off; + + off = (wq->sq.qid << rdev->qpshift) & PAGE_MASK; + if (user) { + wq->sq.udb = (u64 __iomem *)(rdev->bar2_pa + off); + } else { + off += 128 * (wq->sq.qid & rdev->qpmask) + 8; + wq->sq.udb = (u64 __iomem *)(rdev->bar2_kva + off); + } + off = (wq->rq.qid << rdev->qpshift) & PAGE_MASK; + if (user) { + wq->rq.udb = (u64 __iomem *)(rdev->bar2_pa + off); + } else { + off += 128 * (wq->rq.qid & rdev->qpmask) + 8; + wq->rq.udb = (u64 __iomem *)(rdev->bar2_kva + off); + } + } + wq->rdev = rdev; + wq->rq.msn = 1; + + /* build fw_ri_res_wr */ + wr_len = sizeof *res_wr + 2 * sizeof *res; + + skb = alloc_skb(wr_len, GFP_KERNEL); + if (!skb) { + ret = -ENOMEM; + goto free_dma; + } + set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); + + res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len); + memset(res_wr, 0, wr_len); + res_wr->op_nres = cpu_to_be32( + FW_WR_OP(FW_RI_RES_WR) | + V_FW_RI_RES_WR_NRES(2) | + FW_WR_COMPL(1)); + res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); + res_wr->cookie = (unsigned long) &wr_wait; + res = res_wr->res; + res->u.sqrq.restype = FW_RI_RES_TYPE_SQ; + res->u.sqrq.op = FW_RI_RES_OP_WRITE; + + /* + * eqsize is the number of 64B entries plus the status page size. + */ + eqsize = wq->sq.size * T4_SQ_NUM_SLOTS + T4_EQ_STATUS_ENTRIES; + + res->u.sqrq.fetchszm_to_iqid = cpu_to_be32( + V_FW_RI_RES_WR_HOSTFCMODE(0) | /* no host cidx updates */ + V_FW_RI_RES_WR_CPRIO(0) | /* don't keep in chip cache */ + V_FW_RI_RES_WR_PCIECHN(0) | /* set by uP at ri_init time */ + (t4_sq_onchip(&wq->sq) ? F_FW_RI_RES_WR_ONCHIP : 0) | + V_FW_RI_RES_WR_IQID(scq->cqid)); + res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( + V_FW_RI_RES_WR_DCAEN(0) | + V_FW_RI_RES_WR_DCACPU(0) | + V_FW_RI_RES_WR_FBMIN(2) | + V_FW_RI_RES_WR_FBMAX(2) | + V_FW_RI_RES_WR_CIDXFTHRESHO(0) | + V_FW_RI_RES_WR_CIDXFTHRESH(0) | + V_FW_RI_RES_WR_EQSIZE(eqsize)); + res->u.sqrq.eqid = cpu_to_be32(wq->sq.qid); + res->u.sqrq.eqaddr = cpu_to_be64(wq->sq.dma_addr); + res++; + res->u.sqrq.restype = FW_RI_RES_TYPE_RQ; + res->u.sqrq.op = FW_RI_RES_OP_WRITE; + + /* + * eqsize is the number of 64B entries plus the status page size. + */ + eqsize = wq->rq.size * T4_RQ_NUM_SLOTS + T4_EQ_STATUS_ENTRIES; + res->u.sqrq.fetchszm_to_iqid = cpu_to_be32( + V_FW_RI_RES_WR_HOSTFCMODE(0) | /* no host cidx updates */ + V_FW_RI_RES_WR_CPRIO(0) | /* don't keep in chip cache */ + V_FW_RI_RES_WR_PCIECHN(0) | /* set by uP at ri_init time */ + V_FW_RI_RES_WR_IQID(rcq->cqid)); + res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( + V_FW_RI_RES_WR_DCAEN(0) | + V_FW_RI_RES_WR_DCACPU(0) | + V_FW_RI_RES_WR_FBMIN(2) | + V_FW_RI_RES_WR_FBMAX(2) | + V_FW_RI_RES_WR_CIDXFTHRESHO(0) | + V_FW_RI_RES_WR_CIDXFTHRESH(0) | + V_FW_RI_RES_WR_EQSIZE(eqsize)); + res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid); + res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr); + + c4iw_init_wr_wait(&wr_wait); + + ret = c4iw_ofld_send(rdev, skb); + if (ret) + goto free_dma; + ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, wq->sq.qid, __func__); + if (ret) + goto free_dma; + + PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%lx rqudb 0x%lx\n", + __func__, wq->sq.qid, wq->rq.qid, wq->db, + (__force unsigned long) wq->sq.udb, + (__force unsigned long) wq->rq.udb); + + return 0; +free_dma: + dma_free_coherent(&(rdev->lldi.pdev->dev), + wq->rq.memsize, wq->rq.queue, + dma_unmap_addr(&wq->rq, mapping)); +free_sq: + dealloc_sq(rdev, &wq->sq); +free_hwaddr: + c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); +free_sw_rq: + kfree(wq->rq.sw_rq); +free_sw_sq: + kfree(wq->sq.sw_sq); +free_rq_qid: + c4iw_put_qpid(rdev, wq->rq.qid, uctx); +free_sq_qid: + c4iw_put_qpid(rdev, wq->sq.qid, uctx); + return ret; +} + +static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp, + struct ib_send_wr *wr, int max, u32 *plenp) +{ + u8 *dstp, *srcp; + u32 plen = 0; + int i; + int rem, len; + + dstp = (u8 *)immdp->data; + for (i = 0; i < wr->num_sge; i++) { + if ((plen + wr->sg_list[i].length) > max) + return -EMSGSIZE; + srcp = (u8 *)(unsigned long)wr->sg_list[i].addr; + plen += wr->sg_list[i].length; + rem = wr->sg_list[i].length; + while (rem) { + if (dstp == (u8 *)&sq->queue[sq->size]) + dstp = (u8 *)sq->queue; + if (rem <= (u8 *)&sq->queue[sq->size] - dstp) + len = rem; + else + len = (u8 *)&sq->queue[sq->size] - dstp; + memcpy(dstp, srcp, len); + dstp += len; + srcp += len; + rem -= len; + } + } + len = roundup(plen + sizeof *immdp, 16) - (plen + sizeof *immdp); + if (len) + memset(dstp, 0, len); + immdp->op = FW_RI_DATA_IMMD; + immdp->r1 = 0; + immdp->r2 = 0; + immdp->immdlen = cpu_to_be32(plen); + *plenp = plen; + return 0; +} + +static int build_isgl(__be64 *queue_start, __be64 *queue_end, + struct fw_ri_isgl *isglp, struct ib_sge *sg_list, + int num_sge, u32 *plenp) + +{ + int i; + u32 plen = 0; + __be64 *flitp = (__be64 *)isglp->sge; + + for (i = 0; i < num_sge; i++) { + if ((plen + sg_list[i].length) < plen) + return -EMSGSIZE; + plen += sg_list[i].length; + *flitp = cpu_to_be64(((u64)sg_list[i].lkey << 32) | + sg_list[i].length); + if (++flitp == queue_end) + flitp = queue_start; + *flitp = cpu_to_be64(sg_list[i].addr); + if (++flitp == queue_end) + flitp = queue_start; + } + *flitp = (__force __be64)0; + isglp->op = FW_RI_DATA_ISGL; + isglp->r1 = 0; + isglp->nsge = cpu_to_be16(num_sge); + isglp->r2 = 0; + if (plenp) + *plenp = plen; + return 0; +} + +static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe, + struct ib_send_wr *wr, u8 *len16) +{ + u32 plen; + int size; + int ret; + + if (wr->num_sge > T4_MAX_SEND_SGE) + return -EINVAL; + switch (wr->opcode) { + case IB_WR_SEND: + if (wr->send_flags & IB_SEND_SOLICITED) + wqe->send.sendop_pkd = cpu_to_be32( + V_FW_RI_SEND_WR_SENDOP(FW_RI_SEND_WITH_SE)); + else + wqe->send.sendop_pkd = cpu_to_be32( + V_FW_RI_SEND_WR_SENDOP(FW_RI_SEND)); + wqe->send.stag_inv = 0; + break; + case IB_WR_SEND_WITH_INV: + if (wr->send_flags & IB_SEND_SOLICITED) + wqe->send.sendop_pkd = cpu_to_be32( + V_FW_RI_SEND_WR_SENDOP(FW_RI_SEND_WITH_SE_INV)); + else + wqe->send.sendop_pkd = cpu_to_be32( + V_FW_RI_SEND_WR_SENDOP(FW_RI_SEND_WITH_INV)); + wqe->send.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey); + break; + + default: + return -EINVAL; + } + wqe->send.r3 = 0; + wqe->send.r4 = 0; + + plen = 0; + if (wr->num_sge) { + if (wr->send_flags & IB_SEND_INLINE) { + ret = build_immd(sq, wqe->send.u.immd_src, wr, + T4_MAX_SEND_INLINE, &plen); + if (ret) + return ret; + size = sizeof wqe->send + sizeof(struct fw_ri_immd) + + plen; + } else { + ret = build_isgl((__be64 *)sq->queue, + (__be64 *)&sq->queue[sq->size], + wqe->send.u.isgl_src, + wr->sg_list, wr->num_sge, &plen); + if (ret) + return ret; + size = sizeof wqe->send + sizeof(struct fw_ri_isgl) + + wr->num_sge * sizeof(struct fw_ri_sge); + } + } else { + wqe->send.u.immd_src[0].op = FW_RI_DATA_IMMD; + wqe->send.u.immd_src[0].r1 = 0; + wqe->send.u.immd_src[0].r2 = 0; + wqe->send.u.immd_src[0].immdlen = 0; + size = sizeof wqe->send + sizeof(struct fw_ri_immd); + plen = 0; + } + *len16 = DIV_ROUND_UP(size, 16); + wqe->send.plen = cpu_to_be32(plen); + return 0; +} + +static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe, + struct ib_send_wr *wr, u8 *len16) +{ + u32 plen; + int size; + int ret; + + if (wr->num_sge > T4_MAX_SEND_SGE) + return -EINVAL; + wqe->write.r2 = 0; + wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey); + wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr); + if (wr->num_sge) { + if (wr->send_flags & IB_SEND_INLINE) { + ret = build_immd(sq, wqe->write.u.immd_src, wr, + T4_MAX_WRITE_INLINE, &plen); + if (ret) + return ret; + size = sizeof wqe->write + sizeof(struct fw_ri_immd) + + plen; + } else { + ret = build_isgl((__be64 *)sq->queue, + (__be64 *)&sq->queue[sq->size], + wqe->write.u.isgl_src, + wr->sg_list, wr->num_sge, &plen); + if (ret) + return ret; + size = sizeof wqe->write + sizeof(struct fw_ri_isgl) + + wr->num_sge * sizeof(struct fw_ri_sge); + } + } else { + wqe->write.u.immd_src[0].op = FW_RI_DATA_IMMD; + wqe->write.u.immd_src[0].r1 = 0; + wqe->write.u.immd_src[0].r2 = 0; + wqe->write.u.immd_src[0].immdlen = 0; + size = sizeof wqe->write + sizeof(struct fw_ri_immd); + plen = 0; + } + *len16 = DIV_ROUND_UP(size, 16); + wqe->write.plen = cpu_to_be32(plen); + return 0; +} + +static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) +{ + if (wr->num_sge > 1) + return -EINVAL; + if (wr->num_sge) { + wqe->read.stag_src = cpu_to_be32(wr->wr.rdma.rkey); + wqe->read.to_src_hi = cpu_to_be32((u32)(wr->wr.rdma.remote_addr + >> 32)); + wqe->read.to_src_lo = cpu_to_be32((u32)wr->wr.rdma.remote_addr); + wqe->read.stag_sink = cpu_to_be32(wr->sg_list[0].lkey); + wqe->read.plen = cpu_to_be32(wr->sg_list[0].length); + wqe->read.to_sink_hi = cpu_to_be32((u32)(wr->sg_list[0].addr + >> 32)); + wqe->read.to_sink_lo = cpu_to_be32((u32)(wr->sg_list[0].addr)); + } else { + wqe->read.stag_src = cpu_to_be32(2); + wqe->read.to_src_hi = 0; + wqe->read.to_src_lo = 0; + wqe->read.stag_sink = cpu_to_be32(2); + wqe->read.plen = 0; + wqe->read.to_sink_hi = 0; + wqe->read.to_sink_lo = 0; + } + wqe->read.r2 = 0; + wqe->read.r5 = 0; + *len16 = DIV_ROUND_UP(sizeof wqe->read, 16); + return 0; +} + +static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, + struct ib_recv_wr *wr, u8 *len16) +{ + int ret; + + ret = build_isgl((__be64 *)qhp->wq.rq.queue, + (__be64 *)&qhp->wq.rq.queue[qhp->wq.rq.size], + &wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL); + if (ret) + return ret; + *len16 = DIV_ROUND_UP(sizeof wqe->recv + + wr->num_sge * sizeof(struct fw_ri_sge), 16); + return 0; +} + +static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe, + struct ib_send_wr *wr, u8 *len16, u8 t5dev) +{ + + struct fw_ri_immd *imdp; + __be64 *p; + int i; + int pbllen = roundup(wr->wr.fast_reg.page_list_len * sizeof(u64), 32); + int rem; + + if (wr->wr.fast_reg.page_list_len > + t4_max_fr_depth(use_dsgl)) + return -EINVAL; + + wqe->fr.qpbinde_to_dcacpu = 0; + wqe->fr.pgsz_shift = wr->wr.fast_reg.page_shift - 12; + wqe->fr.addr_type = FW_RI_VA_BASED_TO; + wqe->fr.mem_perms = c4iw_ib_to_tpt_access(wr->wr.fast_reg.access_flags); + wqe->fr.len_hi = 0; + wqe->fr.len_lo = cpu_to_be32(wr->wr.fast_reg.length); + wqe->fr.stag = cpu_to_be32(wr->wr.fast_reg.rkey); + wqe->fr.va_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32); + wqe->fr.va_lo_fbo = cpu_to_be32(wr->wr.fast_reg.iova_start & + 0xffffffff); + + if (t5dev && use_dsgl && (pbllen > max_fr_immd)) { + struct c4iw_fr_page_list *c4pl = + to_c4iw_fr_page_list(wr->wr.fast_reg.page_list); + struct fw_ri_dsgl *sglp; + + for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { + wr->wr.fast_reg.page_list->page_list[i] = (__force u64) + cpu_to_be64((u64) + wr->wr.fast_reg.page_list->page_list[i]); + } + + sglp = (struct fw_ri_dsgl *)(&wqe->fr + 1); + sglp->op = FW_RI_DATA_DSGL; + sglp->r1 = 0; + sglp->nsge = cpu_to_be16(1); + sglp->addr0 = cpu_to_be64(c4pl->dma_addr); + sglp->len0 = cpu_to_be32(pbllen); + + *len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*sglp), 16); + } else { + imdp = (struct fw_ri_immd *)(&wqe->fr + 1); + imdp->op = FW_RI_DATA_IMMD; + imdp->r1 = 0; + imdp->r2 = 0; + imdp->immdlen = cpu_to_be32(pbllen); + p = (__be64 *)(imdp + 1); + rem = pbllen; + for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { + *p = cpu_to_be64( + (u64)wr->wr.fast_reg.page_list->page_list[i]); + rem -= sizeof(*p); + if (++p == (__be64 *)&sq->queue[sq->size]) + p = (__be64 *)sq->queue; + } + BUG_ON(rem < 0); + while (rem) { + *p = 0; + rem -= sizeof(*p); + if (++p == (__be64 *)&sq->queue[sq->size]) + p = (__be64 *)sq->queue; + } + *len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*imdp) + + pbllen, 16); + } + return 0; +} + +static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr, + u8 *len16) +{ + wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey); + wqe->inv.r2 = 0; + *len16 = DIV_ROUND_UP(sizeof wqe->inv, 16); + return 0; +} + +void c4iw_qp_add_ref(struct ib_qp *qp) +{ + PDBG("%s ib_qp %p\n", __func__, qp); + atomic_inc(&(to_c4iw_qp(qp)->refcnt)); +} + +void c4iw_qp_rem_ref(struct ib_qp *qp) +{ + PDBG("%s ib_qp %p\n", __func__, qp); + if (atomic_dec_and_test(&(to_c4iw_qp(qp)->refcnt))) + wake_up(&(to_c4iw_qp(qp)->wait)); +} + +static void add_to_fc_list(struct list_head *head, struct list_head *entry) +{ + if (list_empty(entry)) + list_add_tail(entry, head); +} + +static int ring_kernel_sq_db(struct c4iw_qp *qhp, u16 inc) +{ + unsigned long flags; + + spin_lock_irqsave(&qhp->rhp->lock, flags); + spin_lock(&qhp->lock); + if (qhp->rhp->db_state == NORMAL) + t4_ring_sq_db(&qhp->wq, inc, + is_t5(qhp->rhp->rdev.lldi.adapter_type), NULL); + else { + add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry); + qhp->wq.sq.wq_pidx_inc += inc; + } + spin_unlock(&qhp->lock); + spin_unlock_irqrestore(&qhp->rhp->lock, flags); + return 0; +} + +static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc) +{ + unsigned long flags; + + spin_lock_irqsave(&qhp->rhp->lock, flags); + spin_lock(&qhp->lock); + if (qhp->rhp->db_state == NORMAL) + t4_ring_rq_db(&qhp->wq, inc, + is_t5(qhp->rhp->rdev.lldi.adapter_type), NULL); + else { + add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry); + qhp->wq.rq.wq_pidx_inc += inc; + } + spin_unlock(&qhp->lock); + spin_unlock_irqrestore(&qhp->rhp->lock, flags); + return 0; +} + +int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + int err = 0; + u8 len16 = 0; + enum fw_wr_opcodes fw_opcode = 0; + enum fw_ri_wr_flags fw_flags; + struct c4iw_qp *qhp; + union t4_wr *wqe = NULL; + u32 num_wrs; + struct t4_swsqe *swsqe; + unsigned long flag; + u16 idx = 0; + + qhp = to_c4iw_qp(ibqp); + spin_lock_irqsave(&qhp->lock, flag); + if (t4_wq_in_error(&qhp->wq)) { + spin_unlock_irqrestore(&qhp->lock, flag); + return -EINVAL; + } + num_wrs = t4_sq_avail(&qhp->wq); + if (num_wrs == 0) { + spin_unlock_irqrestore(&qhp->lock, flag); + return -ENOMEM; + } + while (wr) { + if (num_wrs == 0) { + err = -ENOMEM; + *bad_wr = wr; + break; + } + wqe = (union t4_wr *)((u8 *)qhp->wq.sq.queue + + qhp->wq.sq.wq_pidx * T4_EQ_ENTRY_SIZE); + + fw_flags = 0; + if (wr->send_flags & IB_SEND_SOLICITED) + fw_flags |= FW_RI_SOLICITED_EVENT_FLAG; + if (wr->send_flags & IB_SEND_SIGNALED || qhp->sq_sig_all) + fw_flags |= FW_RI_COMPLETION_FLAG; + swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx]; + switch (wr->opcode) { + case IB_WR_SEND_WITH_INV: + case IB_WR_SEND: + if (wr->send_flags & IB_SEND_FENCE) + fw_flags |= FW_RI_READ_FENCE_FLAG; + fw_opcode = FW_RI_SEND_WR; + if (wr->opcode == IB_WR_SEND) + swsqe->opcode = FW_RI_SEND; + else + swsqe->opcode = FW_RI_SEND_WITH_INV; + err = build_rdma_send(&qhp->wq.sq, wqe, wr, &len16); + break; + case IB_WR_RDMA_WRITE: + fw_opcode = FW_RI_RDMA_WRITE_WR; + swsqe->opcode = FW_RI_RDMA_WRITE; + err = build_rdma_write(&qhp->wq.sq, wqe, wr, &len16); + break; + case IB_WR_RDMA_READ: + case IB_WR_RDMA_READ_WITH_INV: + fw_opcode = FW_RI_RDMA_READ_WR; + swsqe->opcode = FW_RI_READ_REQ; + if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) + fw_flags = FW_RI_RDMA_READ_INVALIDATE; + else + fw_flags = 0; + err = build_rdma_read(wqe, wr, &len16); + if (err) + break; + swsqe->read_len = wr->sg_list[0].length; + if (!qhp->wq.sq.oldest_read) + qhp->wq.sq.oldest_read = swsqe; + break; + case IB_WR_FAST_REG_MR: + fw_opcode = FW_RI_FR_NSMR_WR; + swsqe->opcode = FW_RI_FAST_REGISTER; + err = build_fastreg(&qhp->wq.sq, wqe, wr, &len16, + is_t5( + qhp->rhp->rdev.lldi.adapter_type) ? + 1 : 0); + break; + case IB_WR_LOCAL_INV: + if (wr->send_flags & IB_SEND_FENCE) + fw_flags |= FW_RI_LOCAL_FENCE_FLAG; + fw_opcode = FW_RI_INV_LSTAG_WR; + swsqe->opcode = FW_RI_LOCAL_INV; + err = build_inv_stag(wqe, wr, &len16); + break; + default: + PDBG("%s post of type=%d TBD!\n", __func__, + wr->opcode); + err = -EINVAL; + } + if (err) { + *bad_wr = wr; + break; + } + swsqe->idx = qhp->wq.sq.pidx; + swsqe->complete = 0; + swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED) || + qhp->sq_sig_all; + swsqe->flushed = 0; + swsqe->wr_id = wr->wr_id; + + init_wr_hdr(wqe, qhp->wq.sq.pidx, fw_opcode, fw_flags, len16); + + PDBG("%s cookie 0x%llx pidx 0x%x opcode 0x%x read_len %u\n", + __func__, (unsigned long long)wr->wr_id, qhp->wq.sq.pidx, + swsqe->opcode, swsqe->read_len); + wr = wr->next; + num_wrs--; + t4_sq_produce(&qhp->wq, len16); + idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); + } + if (!qhp->rhp->rdev.status_page->db_off) { + t4_ring_sq_db(&qhp->wq, idx, + is_t5(qhp->rhp->rdev.lldi.adapter_type), wqe); + spin_unlock_irqrestore(&qhp->lock, flag); + } else { + spin_unlock_irqrestore(&qhp->lock, flag); + ring_kernel_sq_db(qhp, idx); + } + return err; +} + +int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + int err = 0; + struct c4iw_qp *qhp; + union t4_recv_wr *wqe = NULL; + u32 num_wrs; + u8 len16 = 0; + unsigned long flag; + u16 idx = 0; + + qhp = to_c4iw_qp(ibqp); + spin_lock_irqsave(&qhp->lock, flag); + if (t4_wq_in_error(&qhp->wq)) { + spin_unlock_irqrestore(&qhp->lock, flag); + return -EINVAL; + } + num_wrs = t4_rq_avail(&qhp->wq); + if (num_wrs == 0) { + spin_unlock_irqrestore(&qhp->lock, flag); + return -ENOMEM; + } + while (wr) { + if (wr->num_sge > T4_MAX_RECV_SGE) { + err = -EINVAL; + *bad_wr = wr; + break; + } + wqe = (union t4_recv_wr *)((u8 *)qhp->wq.rq.queue + + qhp->wq.rq.wq_pidx * + T4_EQ_ENTRY_SIZE); + if (num_wrs) + err = build_rdma_recv(qhp, wqe, wr, &len16); + else + err = -ENOMEM; + if (err) { + *bad_wr = wr; + break; + } + + qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].wr_id = wr->wr_id; + + wqe->recv.opcode = FW_RI_RECV_WR; + wqe->recv.r1 = 0; + wqe->recv.wrid = qhp->wq.rq.pidx; + wqe->recv.r2[0] = 0; + wqe->recv.r2[1] = 0; + wqe->recv.r2[2] = 0; + wqe->recv.len16 = len16; + PDBG("%s cookie 0x%llx pidx %u\n", __func__, + (unsigned long long) wr->wr_id, qhp->wq.rq.pidx); + t4_rq_produce(&qhp->wq, len16); + idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); + wr = wr->next; + num_wrs--; + } + if (!qhp->rhp->rdev.status_page->db_off) { + t4_ring_rq_db(&qhp->wq, idx, + is_t5(qhp->rhp->rdev.lldi.adapter_type), wqe); + spin_unlock_irqrestore(&qhp->lock, flag); + } else { + spin_unlock_irqrestore(&qhp->lock, flag); + ring_kernel_rq_db(qhp, idx); + } + return err; +} + +int c4iw_bind_mw(struct ib_qp *qp, struct ib_mw *mw, struct ib_mw_bind *mw_bind) +{ + return -ENOSYS; +} + +static inline void build_term_codes(struct t4_cqe *err_cqe, u8 *layer_type, + u8 *ecode) +{ + int status; + int tagged; + int opcode; + int rqtype; + int send_inv; + + if (!err_cqe) { + *layer_type = LAYER_RDMAP|DDP_LOCAL_CATA; + *ecode = 0; + return; + } + + status = CQE_STATUS(err_cqe); + opcode = CQE_OPCODE(err_cqe); + rqtype = RQ_TYPE(err_cqe); + send_inv = (opcode == FW_RI_SEND_WITH_INV) || + (opcode == FW_RI_SEND_WITH_SE_INV); + tagged = (opcode == FW_RI_RDMA_WRITE) || + (rqtype && (opcode == FW_RI_READ_RESP)); + + switch (status) { + case T4_ERR_STAG: + if (send_inv) { + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; + *ecode = RDMAP_CANT_INV_STAG; + } else { + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; + *ecode = RDMAP_INV_STAG; + } + break; + case T4_ERR_PDID: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; + if ((opcode == FW_RI_SEND_WITH_INV) || + (opcode == FW_RI_SEND_WITH_SE_INV)) + *ecode = RDMAP_CANT_INV_STAG; + else + *ecode = RDMAP_STAG_NOT_ASSOC; + break; + case T4_ERR_QPID: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; + *ecode = RDMAP_STAG_NOT_ASSOC; + break; + case T4_ERR_ACCESS: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; + *ecode = RDMAP_ACC_VIOL; + break; + case T4_ERR_WRAP: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; + *ecode = RDMAP_TO_WRAP; + break; + case T4_ERR_BOUND: + if (tagged) { + *layer_type = LAYER_DDP|DDP_TAGGED_ERR; + *ecode = DDPT_BASE_BOUNDS; + } else { + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; + *ecode = RDMAP_BASE_BOUNDS; + } + break; + case T4_ERR_INVALIDATE_SHARED_MR: + case T4_ERR_INVALIDATE_MR_WITH_MW_BOUND: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; + *ecode = RDMAP_CANT_INV_STAG; + break; + case T4_ERR_ECC: + case T4_ERR_ECC_PSTAG: + case T4_ERR_INTERNAL_ERR: + *layer_type = LAYER_RDMAP|RDMAP_LOCAL_CATA; + *ecode = 0; + break; + case T4_ERR_OUT_OF_RQE: + *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; + *ecode = DDPU_INV_MSN_NOBUF; + break; + case T4_ERR_PBL_ADDR_BOUND: + *layer_type = LAYER_DDP|DDP_TAGGED_ERR; + *ecode = DDPT_BASE_BOUNDS; + break; + case T4_ERR_CRC: + *layer_type = LAYER_MPA|DDP_LLP; + *ecode = MPA_CRC_ERR; + break; + case T4_ERR_MARKER: + *layer_type = LAYER_MPA|DDP_LLP; + *ecode = MPA_MARKER_ERR; + break; + case T4_ERR_PDU_LEN_ERR: + *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; + *ecode = DDPU_MSG_TOOBIG; + break; + case T4_ERR_DDP_VERSION: + if (tagged) { + *layer_type = LAYER_DDP|DDP_TAGGED_ERR; + *ecode = DDPT_INV_VERS; + } else { + *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; + *ecode = DDPU_INV_VERS; + } + break; + case T4_ERR_RDMA_VERSION: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; + *ecode = RDMAP_INV_VERS; + break; + case T4_ERR_OPCODE: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; + *ecode = RDMAP_INV_OPCODE; + break; + case T4_ERR_DDP_QUEUE_NUM: + *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; + *ecode = DDPU_INV_QN; + break; + case T4_ERR_MSN: + case T4_ERR_MSN_GAP: + case T4_ERR_MSN_RANGE: + case T4_ERR_IRD_OVERFLOW: + *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; + *ecode = DDPU_INV_MSN_RANGE; + break; + case T4_ERR_TBIT: + *layer_type = LAYER_DDP|DDP_LOCAL_CATA; + *ecode = 0; + break; + case T4_ERR_MO: + *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; + *ecode = DDPU_INV_MO; + break; + default: + *layer_type = LAYER_RDMAP|DDP_LOCAL_CATA; + *ecode = 0; + break; + } +} + +static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe, + gfp_t gfp) +{ + struct fw_ri_wr *wqe; + struct sk_buff *skb; + struct terminate_message *term; + + PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid, + qhp->ep->hwtid); + + skb = alloc_skb(sizeof *wqe, gfp); + if (!skb) + return; + set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx); + + wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe)); + memset(wqe, 0, sizeof *wqe); + wqe->op_compl = cpu_to_be32(FW_WR_OP(FW_RI_INIT_WR)); + wqe->flowid_len16 = cpu_to_be32( + FW_WR_FLOWID(qhp->ep->hwtid) | + FW_WR_LEN16(DIV_ROUND_UP(sizeof *wqe, 16))); + + wqe->u.terminate.type = FW_RI_TYPE_TERMINATE; + wqe->u.terminate.immdlen = cpu_to_be32(sizeof *term); + term = (struct terminate_message *)wqe->u.terminate.termmsg; + if (qhp->attr.layer_etype == (LAYER_MPA|DDP_LLP)) { + term->layer_etype = qhp->attr.layer_etype; + term->ecode = qhp->attr.ecode; + } else + build_term_codes(err_cqe, &term->layer_etype, &term->ecode); + c4iw_ofld_send(&qhp->rhp->rdev, skb); +} + +/* + * Assumes qhp lock is held. + */ +static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, + struct c4iw_cq *schp) +{ + int count; + int flushed; + unsigned long flag; + + PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp); + + /* locking hierarchy: cq lock first, then qp lock. */ + spin_lock_irqsave(&rchp->lock, flag); + spin_lock(&qhp->lock); + + if (qhp->wq.flushed) { + spin_unlock(&qhp->lock); + spin_unlock_irqrestore(&rchp->lock, flag); + return; + } + qhp->wq.flushed = 1; + + c4iw_flush_hw_cq(rchp); + c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); + flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); + spin_unlock(&qhp->lock); + spin_unlock_irqrestore(&rchp->lock, flag); + if (flushed) { + spin_lock_irqsave(&rchp->comp_handler_lock, flag); + (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); + spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); + } + + /* locking hierarchy: cq lock first, then qp lock. */ + spin_lock_irqsave(&schp->lock, flag); + spin_lock(&qhp->lock); + if (schp != rchp) + c4iw_flush_hw_cq(schp); + flushed = c4iw_flush_sq(qhp); + spin_unlock(&qhp->lock); + spin_unlock_irqrestore(&schp->lock, flag); + if (flushed) { + spin_lock_irqsave(&schp->comp_handler_lock, flag); + (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); + spin_unlock_irqrestore(&schp->comp_handler_lock, flag); + } +} + +static void flush_qp(struct c4iw_qp *qhp) +{ + struct c4iw_cq *rchp, *schp; + unsigned long flag; + + rchp = to_c4iw_cq(qhp->ibqp.recv_cq); + schp = to_c4iw_cq(qhp->ibqp.send_cq); + + t4_set_wq_in_error(&qhp->wq); + if (qhp->ibqp.uobject) { + t4_set_cq_in_error(&rchp->cq); + spin_lock_irqsave(&rchp->comp_handler_lock, flag); + (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); + spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); + if (schp != rchp) { + t4_set_cq_in_error(&schp->cq); + spin_lock_irqsave(&schp->comp_handler_lock, flag); + (*schp->ibcq.comp_handler)(&schp->ibcq, + schp->ibcq.cq_context); + spin_unlock_irqrestore(&schp->comp_handler_lock, flag); + } + return; + } + __flush_qp(qhp, rchp, schp); +} + +static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp, + struct c4iw_ep *ep) +{ + struct fw_ri_wr *wqe; + int ret; + struct sk_buff *skb; + + PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid, + ep->hwtid); + + skb = alloc_skb(sizeof *wqe, GFP_KERNEL); + if (!skb) + return -ENOMEM; + set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); + + wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe)); + memset(wqe, 0, sizeof *wqe); + wqe->op_compl = cpu_to_be32( + FW_WR_OP(FW_RI_INIT_WR) | + FW_WR_COMPL(1)); + wqe->flowid_len16 = cpu_to_be32( + FW_WR_FLOWID(ep->hwtid) | + FW_WR_LEN16(DIV_ROUND_UP(sizeof *wqe, 16))); + wqe->cookie = (unsigned long) &ep->com.wr_wait; + + wqe->u.fini.type = FW_RI_TYPE_FINI; + ret = c4iw_ofld_send(&rhp->rdev, skb); + if (ret) + goto out; + + ret = c4iw_wait_for_reply(&rhp->rdev, &ep->com.wr_wait, qhp->ep->hwtid, + qhp->wq.sq.qid, __func__); +out: + PDBG("%s ret %d\n", __func__, ret); + return ret; +} + +static void build_rtr_msg(u8 p2p_type, struct fw_ri_init *init) +{ + PDBG("%s p2p_type = %d\n", __func__, p2p_type); + memset(&init->u, 0, sizeof init->u); + switch (p2p_type) { + case FW_RI_INIT_P2PTYPE_RDMA_WRITE: + init->u.write.opcode = FW_RI_RDMA_WRITE_WR; + init->u.write.stag_sink = cpu_to_be32(1); + init->u.write.to_sink = cpu_to_be64(1); + init->u.write.u.immd_src[0].op = FW_RI_DATA_IMMD; + init->u.write.len16 = DIV_ROUND_UP(sizeof init->u.write + + sizeof(struct fw_ri_immd), + 16); + break; + case FW_RI_INIT_P2PTYPE_READ_REQ: + init->u.write.opcode = FW_RI_RDMA_READ_WR; + init->u.read.stag_src = cpu_to_be32(1); + init->u.read.to_src_lo = cpu_to_be32(1); + init->u.read.stag_sink = cpu_to_be32(1); + init->u.read.to_sink_lo = cpu_to_be32(1); + init->u.read.len16 = DIV_ROUND_UP(sizeof init->u.read, 16); + break; + } +} + +static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp) +{ + struct fw_ri_wr *wqe; + int ret; + struct sk_buff *skb; + + PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid, + qhp->ep->hwtid); + + skb = alloc_skb(sizeof *wqe, GFP_KERNEL); + if (!skb) + return -ENOMEM; + set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx); + + wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe)); + memset(wqe, 0, sizeof *wqe); + wqe->op_compl = cpu_to_be32( + FW_WR_OP(FW_RI_INIT_WR) | + FW_WR_COMPL(1)); + wqe->flowid_len16 = cpu_to_be32( + FW_WR_FLOWID(qhp->ep->hwtid) | + FW_WR_LEN16(DIV_ROUND_UP(sizeof *wqe, 16))); + + wqe->cookie = (unsigned long) &qhp->ep->com.wr_wait; + + wqe->u.init.type = FW_RI_TYPE_INIT; + wqe->u.init.mpareqbit_p2ptype = + V_FW_RI_WR_MPAREQBIT(qhp->attr.mpa_attr.initiator) | + V_FW_RI_WR_P2PTYPE(qhp->attr.mpa_attr.p2p_type); + wqe->u.init.mpa_attrs = FW_RI_MPA_IETF_ENABLE; + if (qhp->attr.mpa_attr.recv_marker_enabled) + wqe->u.init.mpa_attrs |= FW_RI_MPA_RX_MARKER_ENABLE; + if (qhp->attr.mpa_attr.xmit_marker_enabled) + wqe->u.init.mpa_attrs |= FW_RI_MPA_TX_MARKER_ENABLE; + if (qhp->attr.mpa_attr.crc_enabled) + wqe->u.init.mpa_attrs |= FW_RI_MPA_CRC_ENABLE; + + wqe->u.init.qp_caps = FW_RI_QP_RDMA_READ_ENABLE | + FW_RI_QP_RDMA_WRITE_ENABLE | + FW_RI_QP_BIND_ENABLE; + if (!qhp->ibqp.uobject) + wqe->u.init.qp_caps |= FW_RI_QP_FAST_REGISTER_ENABLE | + FW_RI_QP_STAG0_ENABLE; + wqe->u.init.nrqe = cpu_to_be16(t4_rqes_posted(&qhp->wq)); + wqe->u.init.pdid = cpu_to_be32(qhp->attr.pd); + wqe->u.init.qpid = cpu_to_be32(qhp->wq.sq.qid); + wqe->u.init.sq_eqid = cpu_to_be32(qhp->wq.sq.qid); + wqe->u.init.rq_eqid = cpu_to_be32(qhp->wq.rq.qid); + wqe->u.init.scqid = cpu_to_be32(qhp->attr.scq); + wqe->u.init.rcqid = cpu_to_be32(qhp->attr.rcq); + wqe->u.init.ord_max = cpu_to_be32(qhp->attr.max_ord); + wqe->u.init.ird_max = cpu_to_be32(qhp->attr.max_ird); + wqe->u.init.iss = cpu_to_be32(qhp->ep->snd_seq); + wqe->u.init.irs = cpu_to_be32(qhp->ep->rcv_seq); + wqe->u.init.hwrqsize = cpu_to_be32(qhp->wq.rq.rqt_size); + wqe->u.init.hwrqaddr = cpu_to_be32(qhp->wq.rq.rqt_hwaddr - + rhp->rdev.lldi.vr->rq.start); + if (qhp->attr.mpa_attr.initiator) + build_rtr_msg(qhp->attr.mpa_attr.p2p_type, &wqe->u.init); + + ret = c4iw_ofld_send(&rhp->rdev, skb); + if (ret) + goto out; + + ret = c4iw_wait_for_reply(&rhp->rdev, &qhp->ep->com.wr_wait, + qhp->ep->hwtid, qhp->wq.sq.qid, __func__); +out: + PDBG("%s ret %d\n", __func__, ret); + return ret; +} + +int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, + enum c4iw_qp_attr_mask mask, + struct c4iw_qp_attributes *attrs, + int internal) +{ + int ret = 0; + struct c4iw_qp_attributes newattr = qhp->attr; + int disconnect = 0; + int terminate = 0; + int abort = 0; + int free = 0; + struct c4iw_ep *ep = NULL; + + PDBG("%s qhp %p sqid 0x%x rqid 0x%x ep %p state %d -> %d\n", __func__, + qhp, qhp->wq.sq.qid, qhp->wq.rq.qid, qhp->ep, qhp->attr.state, + (mask & C4IW_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1); + + mutex_lock(&qhp->mutex); + + /* Process attr changes if in IDLE */ + if (mask & C4IW_QP_ATTR_VALID_MODIFY) { + if (qhp->attr.state != C4IW_QP_STATE_IDLE) { + ret = -EIO; + goto out; + } + if (mask & C4IW_QP_ATTR_ENABLE_RDMA_READ) + newattr.enable_rdma_read = attrs->enable_rdma_read; + if (mask & C4IW_QP_ATTR_ENABLE_RDMA_WRITE) + newattr.enable_rdma_write = attrs->enable_rdma_write; + if (mask & C4IW_QP_ATTR_ENABLE_RDMA_BIND) + newattr.enable_bind = attrs->enable_bind; + if (mask & C4IW_QP_ATTR_MAX_ORD) { + if (attrs->max_ord > c4iw_max_read_depth) { + ret = -EINVAL; + goto out; + } + newattr.max_ord = attrs->max_ord; + } + if (mask & C4IW_QP_ATTR_MAX_IRD) { + if (attrs->max_ird > c4iw_max_read_depth) { + ret = -EINVAL; + goto out; + } + newattr.max_ird = attrs->max_ird; + } + qhp->attr = newattr; + } + + if (mask & C4IW_QP_ATTR_SQ_DB) { + ret = ring_kernel_sq_db(qhp, attrs->sq_db_inc); + goto out; + } + if (mask & C4IW_QP_ATTR_RQ_DB) { + ret = ring_kernel_rq_db(qhp, attrs->rq_db_inc); + goto out; + } + + if (!(mask & C4IW_QP_ATTR_NEXT_STATE)) + goto out; + if (qhp->attr.state == attrs->next_state) + goto out; + + switch (qhp->attr.state) { + case C4IW_QP_STATE_IDLE: + switch (attrs->next_state) { + case C4IW_QP_STATE_RTS: + if (!(mask & C4IW_QP_ATTR_LLP_STREAM_HANDLE)) { + ret = -EINVAL; + goto out; + } + if (!(mask & C4IW_QP_ATTR_MPA_ATTR)) { + ret = -EINVAL; + goto out; + } + qhp->attr.mpa_attr = attrs->mpa_attr; + qhp->attr.llp_stream_handle = attrs->llp_stream_handle; + qhp->ep = qhp->attr.llp_stream_handle; + set_state(qhp, C4IW_QP_STATE_RTS); + + /* + * Ref the endpoint here and deref when we + * disassociate the endpoint from the QP. This + * happens in CLOSING->IDLE transition or *->ERROR + * transition. + */ + c4iw_get_ep(&qhp->ep->com); + ret = rdma_init(rhp, qhp); + if (ret) + goto err; + break; + case C4IW_QP_STATE_ERROR: + set_state(qhp, C4IW_QP_STATE_ERROR); + flush_qp(qhp); + break; + default: + ret = -EINVAL; + goto out; + } + break; + case C4IW_QP_STATE_RTS: + switch (attrs->next_state) { + case C4IW_QP_STATE_CLOSING: + BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2); + t4_set_wq_in_error(&qhp->wq); + set_state(qhp, C4IW_QP_STATE_CLOSING); + ep = qhp->ep; + if (!internal) { + abort = 0; + disconnect = 1; + c4iw_get_ep(&qhp->ep->com); + } + ret = rdma_fini(rhp, qhp, ep); + if (ret) + goto err; + break; + case C4IW_QP_STATE_TERMINATE: + t4_set_wq_in_error(&qhp->wq); + set_state(qhp, C4IW_QP_STATE_TERMINATE); + qhp->attr.layer_etype = attrs->layer_etype; + qhp->attr.ecode = attrs->ecode; + ep = qhp->ep; + if (!internal) { + c4iw_get_ep(&qhp->ep->com); + terminate = 1; + disconnect = 1; + } else { + terminate = qhp->attr.send_term; + ret = rdma_fini(rhp, qhp, ep); + if (ret) + goto err; + } + break; + case C4IW_QP_STATE_ERROR: + t4_set_wq_in_error(&qhp->wq); + set_state(qhp, C4IW_QP_STATE_ERROR); + if (!internal) { + abort = 1; + disconnect = 1; + ep = qhp->ep; + c4iw_get_ep(&qhp->ep->com); + } + goto err; + break; + default: + ret = -EINVAL; + goto out; + } + break; + case C4IW_QP_STATE_CLOSING: + if (!internal) { + ret = -EINVAL; + goto out; + } + switch (attrs->next_state) { + case C4IW_QP_STATE_IDLE: + flush_qp(qhp); + set_state(qhp, C4IW_QP_STATE_IDLE); + qhp->attr.llp_stream_handle = NULL; + c4iw_put_ep(&qhp->ep->com); + qhp->ep = NULL; + wake_up(&qhp->wait); + break; + case C4IW_QP_STATE_ERROR: + goto err; + default: + ret = -EINVAL; + goto err; + } + break; + case C4IW_QP_STATE_ERROR: + if (attrs->next_state != C4IW_QP_STATE_IDLE) { + ret = -EINVAL; + goto out; + } + if (!t4_sq_empty(&qhp->wq) || !t4_rq_empty(&qhp->wq)) { + ret = -EINVAL; + goto out; + } + set_state(qhp, C4IW_QP_STATE_IDLE); + break; + case C4IW_QP_STATE_TERMINATE: + if (!internal) { + ret = -EINVAL; + goto out; + } + goto err; + break; + default: + printk(KERN_ERR "%s in a bad state %d\n", + __func__, qhp->attr.state); + ret = -EINVAL; + goto err; + break; + } + goto out; +err: + PDBG("%s disassociating ep %p qpid 0x%x\n", __func__, qhp->ep, + qhp->wq.sq.qid); + + /* disassociate the LLP connection */ + qhp->attr.llp_stream_handle = NULL; + if (!ep) + ep = qhp->ep; + qhp->ep = NULL; + set_state(qhp, C4IW_QP_STATE_ERROR); + free = 1; + abort = 1; + wake_up(&qhp->wait); + BUG_ON(!ep); + flush_qp(qhp); +out: + mutex_unlock(&qhp->mutex); + + if (terminate) + post_terminate(qhp, NULL, internal ? GFP_ATOMIC : GFP_KERNEL); + + /* + * If disconnect is 1, then we need to initiate a disconnect + * on the EP. This can be a normal close (RTS->CLOSING) or + * an abnormal close (RTS/CLOSING->ERROR). + */ + if (disconnect) { + c4iw_ep_disconnect(ep, abort, internal ? GFP_ATOMIC : + GFP_KERNEL); + c4iw_put_ep(&ep->com); + } + + /* + * If free is 1, then we've disassociated the EP from the QP + * and we need to dereference the EP. + */ + if (free) + c4iw_put_ep(&ep->com); + PDBG("%s exit state %d\n", __func__, qhp->attr.state); + return ret; +} + +int c4iw_destroy_qp(struct ib_qp *ib_qp) +{ + struct c4iw_dev *rhp; + struct c4iw_qp *qhp; + struct c4iw_qp_attributes attrs; + struct c4iw_ucontext *ucontext; + + qhp = to_c4iw_qp(ib_qp); + rhp = qhp->rhp; + + attrs.next_state = C4IW_QP_STATE_ERROR; + if (qhp->attr.state == C4IW_QP_STATE_TERMINATE) + c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + else + c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); + wait_event(qhp->wait, !qhp->ep); + + remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid); + atomic_dec(&qhp->refcnt); + wait_event(qhp->wait, !atomic_read(&qhp->refcnt)); + + spin_lock_irq(&rhp->lock); + if (!list_empty(&qhp->db_fc_entry)) + list_del_init(&qhp->db_fc_entry); + spin_unlock_irq(&rhp->lock); + + ucontext = ib_qp->uobject ? + to_c4iw_ucontext(ib_qp->uobject->context) : NULL; + destroy_qp(&rhp->rdev, &qhp->wq, + ucontext ? &ucontext->uctx : &rhp->rdev.uctx); + + PDBG("%s ib_qp %p qpid 0x%0x\n", __func__, ib_qp, qhp->wq.sq.qid); + kfree(qhp); + return 0; +} + +struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, + struct ib_udata *udata) +{ + struct c4iw_dev *rhp; + struct c4iw_qp *qhp; + struct c4iw_pd *php; + struct c4iw_cq *schp; + struct c4iw_cq *rchp; + struct c4iw_create_qp_resp uresp; + unsigned int sqsize, rqsize; + struct c4iw_ucontext *ucontext; + int ret; + struct c4iw_mm_entry *mm1, *mm2, *mm3, *mm4, *mm5 = NULL; + + PDBG("%s ib_pd %p\n", __func__, pd); + + if (attrs->qp_type != IB_QPT_RC) + return ERR_PTR(-EINVAL); + + php = to_c4iw_pd(pd); + rhp = php->rhp; + schp = get_chp(rhp, ((struct c4iw_cq *)attrs->send_cq)->cq.cqid); + rchp = get_chp(rhp, ((struct c4iw_cq *)attrs->recv_cq)->cq.cqid); + if (!schp || !rchp) + return ERR_PTR(-EINVAL); + + if (attrs->cap.max_inline_data > T4_MAX_SEND_INLINE) + return ERR_PTR(-EINVAL); + + rqsize = roundup(attrs->cap.max_recv_wr + 1, 16); + if (rqsize > T4_MAX_RQ_SIZE) + return ERR_PTR(-E2BIG); + + sqsize = roundup(attrs->cap.max_send_wr + 1, 16); + if (sqsize > T4_MAX_SQ_SIZE) + return ERR_PTR(-E2BIG); + + ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL; + + qhp = kzalloc(sizeof(*qhp), GFP_KERNEL); + if (!qhp) + return ERR_PTR(-ENOMEM); + qhp->wq.sq.size = sqsize; + qhp->wq.sq.memsize = (sqsize + 1) * sizeof *qhp->wq.sq.queue; + qhp->wq.sq.flush_cidx = -1; + qhp->wq.rq.size = rqsize; + qhp->wq.rq.memsize = (rqsize + 1) * sizeof *qhp->wq.rq.queue; + + if (ucontext) { + qhp->wq.sq.memsize = roundup(qhp->wq.sq.memsize, PAGE_SIZE); + qhp->wq.rq.memsize = roundup(qhp->wq.rq.memsize, PAGE_SIZE); + } + + PDBG("%s sqsize %u sqmemsize %zu rqsize %u rqmemsize %zu\n", + __func__, sqsize, qhp->wq.sq.memsize, rqsize, qhp->wq.rq.memsize); + + ret = create_qp(&rhp->rdev, &qhp->wq, &schp->cq, &rchp->cq, + ucontext ? &ucontext->uctx : &rhp->rdev.uctx); + if (ret) + goto err1; + + attrs->cap.max_recv_wr = rqsize - 1; + attrs->cap.max_send_wr = sqsize - 1; + attrs->cap.max_inline_data = T4_MAX_SEND_INLINE; + + qhp->rhp = rhp; + qhp->attr.pd = php->pdid; + qhp->attr.scq = ((struct c4iw_cq *) attrs->send_cq)->cq.cqid; + qhp->attr.rcq = ((struct c4iw_cq *) attrs->recv_cq)->cq.cqid; + qhp->attr.sq_num_entries = attrs->cap.max_send_wr; + qhp->attr.rq_num_entries = attrs->cap.max_recv_wr; + qhp->attr.sq_max_sges = attrs->cap.max_send_sge; + qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge; + qhp->attr.rq_max_sges = attrs->cap.max_recv_sge; + qhp->attr.state = C4IW_QP_STATE_IDLE; + qhp->attr.next_state = C4IW_QP_STATE_IDLE; + qhp->attr.enable_rdma_read = 1; + qhp->attr.enable_rdma_write = 1; + qhp->attr.enable_bind = 1; + qhp->attr.max_ord = 1; + qhp->attr.max_ird = 1; + qhp->sq_sig_all = attrs->sq_sig_type == IB_SIGNAL_ALL_WR; + spin_lock_init(&qhp->lock); + mutex_init(&qhp->mutex); + init_waitqueue_head(&qhp->wait); + atomic_set(&qhp->refcnt, 1); + + ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid); + if (ret) + goto err2; + + if (udata) { + mm1 = kmalloc(sizeof *mm1, GFP_KERNEL); + if (!mm1) { + ret = -ENOMEM; + goto err3; + } + mm2 = kmalloc(sizeof *mm2, GFP_KERNEL); + if (!mm2) { + ret = -ENOMEM; + goto err4; + } + mm3 = kmalloc(sizeof *mm3, GFP_KERNEL); + if (!mm3) { + ret = -ENOMEM; + goto err5; + } + mm4 = kmalloc(sizeof *mm4, GFP_KERNEL); + if (!mm4) { + ret = -ENOMEM; + goto err6; + } + if (t4_sq_onchip(&qhp->wq.sq)) { + mm5 = kmalloc(sizeof *mm5, GFP_KERNEL); + if (!mm5) { + ret = -ENOMEM; + goto err7; + } + uresp.flags = C4IW_QPF_ONCHIP; + } else + uresp.flags = 0; + uresp.qid_mask = rhp->rdev.qpmask; + uresp.sqid = qhp->wq.sq.qid; + uresp.sq_size = qhp->wq.sq.size; + uresp.sq_memsize = qhp->wq.sq.memsize; + uresp.rqid = qhp->wq.rq.qid; + uresp.rq_size = qhp->wq.rq.size; + uresp.rq_memsize = qhp->wq.rq.memsize; + spin_lock(&ucontext->mmap_lock); + if (mm5) { + uresp.ma_sync_key = ucontext->key; + ucontext->key += PAGE_SIZE; + } else { + uresp.ma_sync_key = 0; + } + uresp.sq_key = ucontext->key; + ucontext->key += PAGE_SIZE; + uresp.rq_key = ucontext->key; + ucontext->key += PAGE_SIZE; + uresp.sq_db_gts_key = ucontext->key; + ucontext->key += PAGE_SIZE; + uresp.rq_db_gts_key = ucontext->key; + ucontext->key += PAGE_SIZE; + spin_unlock(&ucontext->mmap_lock); + ret = ib_copy_to_udata(udata, &uresp, sizeof uresp); + if (ret) + goto err8; + mm1->key = uresp.sq_key; + mm1->addr = qhp->wq.sq.phys_addr; + mm1->len = PAGE_ALIGN(qhp->wq.sq.memsize); + insert_mmap(ucontext, mm1); + mm2->key = uresp.rq_key; + mm2->addr = virt_to_phys(qhp->wq.rq.queue); + mm2->len = PAGE_ALIGN(qhp->wq.rq.memsize); + insert_mmap(ucontext, mm2); + mm3->key = uresp.sq_db_gts_key; + mm3->addr = (__force unsigned long) qhp->wq.sq.udb; + mm3->len = PAGE_SIZE; + insert_mmap(ucontext, mm3); + mm4->key = uresp.rq_db_gts_key; + mm4->addr = (__force unsigned long) qhp->wq.rq.udb; + mm4->len = PAGE_SIZE; + insert_mmap(ucontext, mm4); + if (mm5) { + mm5->key = uresp.ma_sync_key; + mm5->addr = (pci_resource_start(rhp->rdev.lldi.pdev, 0) + + A_PCIE_MA_SYNC) & PAGE_MASK; + mm5->len = PAGE_SIZE; + insert_mmap(ucontext, mm5); + } + } + qhp->ibqp.qp_num = qhp->wq.sq.qid; + init_timer(&(qhp->timer)); + INIT_LIST_HEAD(&qhp->db_fc_entry); + PDBG("%s qhp %p sq_num_entries %d, rq_num_entries %d qpid 0x%0x\n", + __func__, qhp, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries, + qhp->wq.sq.qid); + return &qhp->ibqp; +err8: + kfree(mm5); +err7: + kfree(mm4); +err6: + kfree(mm3); +err5: + kfree(mm2); +err4: + kfree(mm1); +err3: + remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid); +err2: + destroy_qp(&rhp->rdev, &qhp->wq, + ucontext ? &ucontext->uctx : &rhp->rdev.uctx); +err1: + kfree(qhp); + return ERR_PTR(ret); +} + +int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + struct c4iw_dev *rhp; + struct c4iw_qp *qhp; + enum c4iw_qp_attr_mask mask = 0; + struct c4iw_qp_attributes attrs; + + PDBG("%s ib_qp %p\n", __func__, ibqp); + + /* iwarp does not support the RTR state */ + if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR)) + attr_mask &= ~IB_QP_STATE; + + /* Make sure we still have something left to do */ + if (!attr_mask) + return 0; + + memset(&attrs, 0, sizeof attrs); + qhp = to_c4iw_qp(ibqp); + rhp = qhp->rhp; + + attrs.next_state = c4iw_convert_state(attr->qp_state); + attrs.enable_rdma_read = (attr->qp_access_flags & + IB_ACCESS_REMOTE_READ) ? 1 : 0; + attrs.enable_rdma_write = (attr->qp_access_flags & + IB_ACCESS_REMOTE_WRITE) ? 1 : 0; + attrs.enable_bind = (attr->qp_access_flags & IB_ACCESS_MW_BIND) ? 1 : 0; + + + mask |= (attr_mask & IB_QP_STATE) ? C4IW_QP_ATTR_NEXT_STATE : 0; + mask |= (attr_mask & IB_QP_ACCESS_FLAGS) ? + (C4IW_QP_ATTR_ENABLE_RDMA_READ | + C4IW_QP_ATTR_ENABLE_RDMA_WRITE | + C4IW_QP_ATTR_ENABLE_RDMA_BIND) : 0; + + /* + * Use SQ_PSN and RQ_PSN to pass in IDX_INC values for + * ringing the queue db when we're in DB_FULL mode. + * Only allow this on T4 devices. + */ + attrs.sq_db_inc = attr->sq_psn; + attrs.rq_db_inc = attr->rq_psn; + mask |= (attr_mask & IB_QP_SQ_PSN) ? C4IW_QP_ATTR_SQ_DB : 0; + mask |= (attr_mask & IB_QP_RQ_PSN) ? C4IW_QP_ATTR_RQ_DB : 0; + if (is_t5(to_c4iw_qp(ibqp)->rhp->rdev.lldi.adapter_type) && + (mask & (C4IW_QP_ATTR_SQ_DB|C4IW_QP_ATTR_RQ_DB))) + return -EINVAL; + + return c4iw_modify_qp(rhp, qhp, mask, &attrs, 0); +} + +struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn) +{ + PDBG("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn); + return (struct ib_qp *)get_qhp(to_c4iw_dev(dev), qpn); +} + +int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_qp_init_attr *init_attr) +{ + struct c4iw_qp *qhp = to_c4iw_qp(ibqp); + + memset(attr, 0, sizeof *attr); + memset(init_attr, 0, sizeof *init_attr); + attr->qp_state = to_ib_qp_state(qhp->attr.state); + return 0; +} diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c new file mode 100644 index 00000000000..67df71a7012 --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/resource.c @@ -0,0 +1,453 @@ +/* + * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +/* Crude resource management */ +#include <linux/spinlock.h> +#include <linux/genalloc.h> +#include <linux/ratelimit.h> +#include "iw_cxgb4.h" + +static int c4iw_init_qid_table(struct c4iw_rdev *rdev) +{ + u32 i; + + if (c4iw_id_table_alloc(&rdev->resource.qid_table, + rdev->lldi.vr->qp.start, + rdev->lldi.vr->qp.size, + rdev->lldi.vr->qp.size, 0)) + return -ENOMEM; + + for (i = rdev->lldi.vr->qp.start; + i < rdev->lldi.vr->qp.start + rdev->lldi.vr->qp.size; i++) + if (!(i & rdev->qpmask)) + c4iw_id_free(&rdev->resource.qid_table, i); + return 0; +} + +/* nr_* must be power of 2 */ +int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid) +{ + int err = 0; + err = c4iw_id_table_alloc(&rdev->resource.tpt_table, 0, nr_tpt, 1, + C4IW_ID_TABLE_F_RANDOM); + if (err) + goto tpt_err; + err = c4iw_init_qid_table(rdev); + if (err) + goto qid_err; + err = c4iw_id_table_alloc(&rdev->resource.pdid_table, 0, + nr_pdid, 1, 0); + if (err) + goto pdid_err; + return 0; + pdid_err: + c4iw_id_table_free(&rdev->resource.qid_table); + qid_err: + c4iw_id_table_free(&rdev->resource.tpt_table); + tpt_err: + return -ENOMEM; +} + +/* + * returns 0 if no resource available + */ +u32 c4iw_get_resource(struct c4iw_id_table *id_table) +{ + u32 entry; + entry = c4iw_id_alloc(id_table); + if (entry == (u32)(-1)) + return 0; + return entry; +} + +void c4iw_put_resource(struct c4iw_id_table *id_table, u32 entry) +{ + PDBG("%s entry 0x%x\n", __func__, entry); + c4iw_id_free(id_table, entry); +} + +u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx) +{ + struct c4iw_qid_list *entry; + u32 qid; + int i; + + mutex_lock(&uctx->lock); + if (!list_empty(&uctx->cqids)) { + entry = list_entry(uctx->cqids.next, struct c4iw_qid_list, + entry); + list_del(&entry->entry); + qid = entry->qid; + kfree(entry); + } else { + qid = c4iw_get_resource(&rdev->resource.qid_table); + if (!qid) + goto out; + mutex_lock(&rdev->stats.lock); + rdev->stats.qid.cur += rdev->qpmask + 1; + mutex_unlock(&rdev->stats.lock); + for (i = qid+1; i & rdev->qpmask; i++) { + entry = kmalloc(sizeof *entry, GFP_KERNEL); + if (!entry) + goto out; + entry->qid = i; + list_add_tail(&entry->entry, &uctx->cqids); + } + + /* + * now put the same ids on the qp list since they all + * map to the same db/gts page. + */ + entry = kmalloc(sizeof *entry, GFP_KERNEL); + if (!entry) + goto out; + entry->qid = qid; + list_add_tail(&entry->entry, &uctx->qpids); + for (i = qid+1; i & rdev->qpmask; i++) { + entry = kmalloc(sizeof *entry, GFP_KERNEL); + if (!entry) + goto out; + entry->qid = i; + list_add_tail(&entry->entry, &uctx->qpids); + } + } +out: + mutex_unlock(&uctx->lock); + PDBG("%s qid 0x%x\n", __func__, qid); + mutex_lock(&rdev->stats.lock); + if (rdev->stats.qid.cur > rdev->stats.qid.max) + rdev->stats.qid.max = rdev->stats.qid.cur; + mutex_unlock(&rdev->stats.lock); + return qid; +} + +void c4iw_put_cqid(struct c4iw_rdev *rdev, u32 qid, + struct c4iw_dev_ucontext *uctx) +{ + struct c4iw_qid_list *entry; + + entry = kmalloc(sizeof *entry, GFP_KERNEL); + if (!entry) + return; + PDBG("%s qid 0x%x\n", __func__, qid); + entry->qid = qid; + mutex_lock(&uctx->lock); + list_add_tail(&entry->entry, &uctx->cqids); + mutex_unlock(&uctx->lock); +} + +u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx) +{ + struct c4iw_qid_list *entry; + u32 qid; + int i; + + mutex_lock(&uctx->lock); + if (!list_empty(&uctx->qpids)) { + entry = list_entry(uctx->qpids.next, struct c4iw_qid_list, + entry); + list_del(&entry->entry); + qid = entry->qid; + kfree(entry); + } else { + qid = c4iw_get_resource(&rdev->resource.qid_table); + if (!qid) { + mutex_lock(&rdev->stats.lock); + rdev->stats.qid.fail++; + mutex_unlock(&rdev->stats.lock); + goto out; + } + mutex_lock(&rdev->stats.lock); + rdev->stats.qid.cur += rdev->qpmask + 1; + mutex_unlock(&rdev->stats.lock); + for (i = qid+1; i & rdev->qpmask; i++) { + entry = kmalloc(sizeof *entry, GFP_KERNEL); + if (!entry) + goto out; + entry->qid = i; + list_add_tail(&entry->entry, &uctx->qpids); + } + + /* + * now put the same ids on the cq list since they all + * map to the same db/gts page. + */ + entry = kmalloc(sizeof *entry, GFP_KERNEL); + if (!entry) + goto out; + entry->qid = qid; + list_add_tail(&entry->entry, &uctx->cqids); + for (i = qid; i & rdev->qpmask; i++) { + entry = kmalloc(sizeof *entry, GFP_KERNEL); + if (!entry) + goto out; + entry->qid = i; + list_add_tail(&entry->entry, &uctx->cqids); + } + } +out: + mutex_unlock(&uctx->lock); + PDBG("%s qid 0x%x\n", __func__, qid); + mutex_lock(&rdev->stats.lock); + if (rdev->stats.qid.cur > rdev->stats.qid.max) + rdev->stats.qid.max = rdev->stats.qid.cur; + mutex_unlock(&rdev->stats.lock); + return qid; +} + +void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qid, + struct c4iw_dev_ucontext *uctx) +{ + struct c4iw_qid_list *entry; + + entry = kmalloc(sizeof *entry, GFP_KERNEL); + if (!entry) + return; + PDBG("%s qid 0x%x\n", __func__, qid); + entry->qid = qid; + mutex_lock(&uctx->lock); + list_add_tail(&entry->entry, &uctx->qpids); + mutex_unlock(&uctx->lock); +} + +void c4iw_destroy_resource(struct c4iw_resource *rscp) +{ + c4iw_id_table_free(&rscp->tpt_table); + c4iw_id_table_free(&rscp->qid_table); + c4iw_id_table_free(&rscp->pdid_table); +} + +/* + * PBL Memory Manager. Uses Linux generic allocator. + */ + +#define MIN_PBL_SHIFT 8 /* 256B == min PBL size (32 entries) */ + +u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size) +{ + unsigned long addr = gen_pool_alloc(rdev->pbl_pool, size); + PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size); + mutex_lock(&rdev->stats.lock); + if (addr) { + rdev->stats.pbl.cur += roundup(size, 1 << MIN_PBL_SHIFT); + if (rdev->stats.pbl.cur > rdev->stats.pbl.max) + rdev->stats.pbl.max = rdev->stats.pbl.cur; + } else + rdev->stats.pbl.fail++; + mutex_unlock(&rdev->stats.lock); + return (u32)addr; +} + +void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size) +{ + PDBG("%s addr 0x%x size %d\n", __func__, addr, size); + mutex_lock(&rdev->stats.lock); + rdev->stats.pbl.cur -= roundup(size, 1 << MIN_PBL_SHIFT); + mutex_unlock(&rdev->stats.lock); + gen_pool_free(rdev->pbl_pool, (unsigned long)addr, size); +} + +int c4iw_pblpool_create(struct c4iw_rdev *rdev) +{ + unsigned pbl_start, pbl_chunk, pbl_top; + + rdev->pbl_pool = gen_pool_create(MIN_PBL_SHIFT, -1); + if (!rdev->pbl_pool) + return -ENOMEM; + + pbl_start = rdev->lldi.vr->pbl.start; + pbl_chunk = rdev->lldi.vr->pbl.size; + pbl_top = pbl_start + pbl_chunk; + + while (pbl_start < pbl_top) { + pbl_chunk = min(pbl_top - pbl_start + 1, pbl_chunk); + if (gen_pool_add(rdev->pbl_pool, pbl_start, pbl_chunk, -1)) { + PDBG("%s failed to add PBL chunk (%x/%x)\n", + __func__, pbl_start, pbl_chunk); + if (pbl_chunk <= 1024 << MIN_PBL_SHIFT) { + printk(KERN_WARNING MOD + "Failed to add all PBL chunks (%x/%x)\n", + pbl_start, + pbl_top - pbl_start); + return 0; + } + pbl_chunk >>= 1; + } else { + PDBG("%s added PBL chunk (%x/%x)\n", + __func__, pbl_start, pbl_chunk); + pbl_start += pbl_chunk; + } + } + + return 0; +} + +void c4iw_pblpool_destroy(struct c4iw_rdev *rdev) +{ + gen_pool_destroy(rdev->pbl_pool); +} + +/* + * RQT Memory Manager. Uses Linux generic allocator. + */ + +#define MIN_RQT_SHIFT 10 /* 1KB == min RQT size (16 entries) */ + +u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size) +{ + unsigned long addr = gen_pool_alloc(rdev->rqt_pool, size << 6); + PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6); + if (!addr) + pr_warn_ratelimited(MOD "%s: Out of RQT memory\n", + pci_name(rdev->lldi.pdev)); + mutex_lock(&rdev->stats.lock); + if (addr) { + rdev->stats.rqt.cur += roundup(size << 6, 1 << MIN_RQT_SHIFT); + if (rdev->stats.rqt.cur > rdev->stats.rqt.max) + rdev->stats.rqt.max = rdev->stats.rqt.cur; + } else + rdev->stats.rqt.fail++; + mutex_unlock(&rdev->stats.lock); + return (u32)addr; +} + +void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size) +{ + PDBG("%s addr 0x%x size %d\n", __func__, addr, size << 6); + mutex_lock(&rdev->stats.lock); + rdev->stats.rqt.cur -= roundup(size << 6, 1 << MIN_RQT_SHIFT); + mutex_unlock(&rdev->stats.lock); + gen_pool_free(rdev->rqt_pool, (unsigned long)addr, size << 6); +} + +int c4iw_rqtpool_create(struct c4iw_rdev *rdev) +{ + unsigned rqt_start, rqt_chunk, rqt_top; + + rdev->rqt_pool = gen_pool_create(MIN_RQT_SHIFT, -1); + if (!rdev->rqt_pool) + return -ENOMEM; + + rqt_start = rdev->lldi.vr->rq.start; + rqt_chunk = rdev->lldi.vr->rq.size; + rqt_top = rqt_start + rqt_chunk; + + while (rqt_start < rqt_top) { + rqt_chunk = min(rqt_top - rqt_start + 1, rqt_chunk); + if (gen_pool_add(rdev->rqt_pool, rqt_start, rqt_chunk, -1)) { + PDBG("%s failed to add RQT chunk (%x/%x)\n", + __func__, rqt_start, rqt_chunk); + if (rqt_chunk <= 1024 << MIN_RQT_SHIFT) { + printk(KERN_WARNING MOD + "Failed to add all RQT chunks (%x/%x)\n", + rqt_start, rqt_top - rqt_start); + return 0; + } + rqt_chunk >>= 1; + } else { + PDBG("%s added RQT chunk (%x/%x)\n", + __func__, rqt_start, rqt_chunk); + rqt_start += rqt_chunk; + } + } + return 0; +} + +void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev) +{ + gen_pool_destroy(rdev->rqt_pool); +} + +/* + * On-Chip QP Memory. + */ +#define MIN_OCQP_SHIFT 12 /* 4KB == min ocqp size */ + +u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size) +{ + unsigned long addr = gen_pool_alloc(rdev->ocqp_pool, size); + PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size); + if (addr) { + mutex_lock(&rdev->stats.lock); + rdev->stats.ocqp.cur += roundup(size, 1 << MIN_OCQP_SHIFT); + if (rdev->stats.ocqp.cur > rdev->stats.ocqp.max) + rdev->stats.ocqp.max = rdev->stats.ocqp.cur; + mutex_unlock(&rdev->stats.lock); + } + return (u32)addr; +} + +void c4iw_ocqp_pool_free(struct c4iw_rdev *rdev, u32 addr, int size) +{ + PDBG("%s addr 0x%x size %d\n", __func__, addr, size); + mutex_lock(&rdev->stats.lock); + rdev->stats.ocqp.cur -= roundup(size, 1 << MIN_OCQP_SHIFT); + mutex_unlock(&rdev->stats.lock); + gen_pool_free(rdev->ocqp_pool, (unsigned long)addr, size); +} + +int c4iw_ocqp_pool_create(struct c4iw_rdev *rdev) +{ + unsigned start, chunk, top; + + rdev->ocqp_pool = gen_pool_create(MIN_OCQP_SHIFT, -1); + if (!rdev->ocqp_pool) + return -ENOMEM; + + start = rdev->lldi.vr->ocq.start; + chunk = rdev->lldi.vr->ocq.size; + top = start + chunk; + + while (start < top) { + chunk = min(top - start + 1, chunk); + if (gen_pool_add(rdev->ocqp_pool, start, chunk, -1)) { + PDBG("%s failed to add OCQP chunk (%x/%x)\n", + __func__, start, chunk); + if (chunk <= 1024 << MIN_OCQP_SHIFT) { + printk(KERN_WARNING MOD + "Failed to add all OCQP chunks (%x/%x)\n", + start, top - start); + return 0; + } + chunk >>= 1; + } else { + PDBG("%s added OCQP chunk (%x/%x)\n", + __func__, start, chunk); + start += chunk; + } + } + return 0; +} + +void c4iw_ocqp_pool_destroy(struct c4iw_rdev *rdev) +{ + gen_pool_destroy(rdev->ocqp_pool); +} diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h new file mode 100644 index 00000000000..68b0a6bf4eb --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -0,0 +1,680 @@ +/* + * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __T4_H__ +#define __T4_H__ + +#include "t4_hw.h" +#include "t4_regs.h" +#include "t4_msg.h" +#include "t4fw_ri_api.h" + +#define T4_MAX_NUM_QP 65536 +#define T4_MAX_NUM_CQ 65536 +#define T4_MAX_NUM_PD 65536 +#define T4_EQ_STATUS_ENTRIES (L1_CACHE_BYTES > 64 ? 2 : 1) +#define T4_MAX_EQ_SIZE (65520 - T4_EQ_STATUS_ENTRIES) +#define T4_MAX_IQ_SIZE (65520 - 1) +#define T4_MAX_RQ_SIZE (8192 - T4_EQ_STATUS_ENTRIES) +#define T4_MAX_SQ_SIZE (T4_MAX_EQ_SIZE - 1) +#define T4_MAX_QP_DEPTH (T4_MAX_RQ_SIZE - 1) +#define T4_MAX_CQ_DEPTH (T4_MAX_IQ_SIZE - 1) +#define T4_MAX_NUM_STAG (1<<15) +#define T4_MAX_MR_SIZE (~0ULL) +#define T4_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */ +#define T4_STAG_UNSET 0xffffffff +#define T4_FW_MAJ 0 +#define T4_EQ_STATUS_ENTRIES (L1_CACHE_BYTES > 64 ? 2 : 1) +#define A_PCIE_MA_SYNC 0x30b4 + +struct t4_status_page { + __be32 rsvd1; /* flit 0 - hw owns */ + __be16 rsvd2; + __be16 qid; + __be16 cidx; + __be16 pidx; + u8 qp_err; /* flit 1 - sw owns */ + u8 db_off; + u8 pad; + u16 host_wq_pidx; + u16 host_cidx; + u16 host_pidx; +}; + +#define T4_EQ_ENTRY_SIZE 64 + +#define T4_SQ_NUM_SLOTS 5 +#define T4_SQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_SQ_NUM_SLOTS) +#define T4_MAX_SEND_SGE ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_send_wr) - \ + sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge)) +#define T4_MAX_SEND_INLINE ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_send_wr) - \ + sizeof(struct fw_ri_immd))) +#define T4_MAX_WRITE_INLINE ((T4_SQ_NUM_BYTES - \ + sizeof(struct fw_ri_rdma_write_wr) - \ + sizeof(struct fw_ri_immd))) +#define T4_MAX_WRITE_SGE ((T4_SQ_NUM_BYTES - \ + sizeof(struct fw_ri_rdma_write_wr) - \ + sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge)) +#define T4_MAX_FR_IMMD ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_fr_nsmr_wr) - \ + sizeof(struct fw_ri_immd)) & ~31UL) +#define T4_MAX_FR_IMMD_DEPTH (T4_MAX_FR_IMMD / sizeof(u64)) +#define T4_MAX_FR_DSGL 1024 +#define T4_MAX_FR_DSGL_DEPTH (T4_MAX_FR_DSGL / sizeof(u64)) + +static inline int t4_max_fr_depth(int use_dsgl) +{ + return use_dsgl ? T4_MAX_FR_DSGL_DEPTH : T4_MAX_FR_IMMD_DEPTH; +} + +#define T4_RQ_NUM_SLOTS 2 +#define T4_RQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_RQ_NUM_SLOTS) +#define T4_MAX_RECV_SGE 4 + +union t4_wr { + struct fw_ri_res_wr res; + struct fw_ri_wr ri; + struct fw_ri_rdma_write_wr write; + struct fw_ri_send_wr send; + struct fw_ri_rdma_read_wr read; + struct fw_ri_bind_mw_wr bind; + struct fw_ri_fr_nsmr_wr fr; + struct fw_ri_inv_lstag_wr inv; + struct t4_status_page status; + __be64 flits[T4_EQ_ENTRY_SIZE / sizeof(__be64) * T4_SQ_NUM_SLOTS]; +}; + +union t4_recv_wr { + struct fw_ri_recv_wr recv; + struct t4_status_page status; + __be64 flits[T4_EQ_ENTRY_SIZE / sizeof(__be64) * T4_RQ_NUM_SLOTS]; +}; + +static inline void init_wr_hdr(union t4_wr *wqe, u16 wrid, + enum fw_wr_opcodes opcode, u8 flags, u8 len16) +{ + wqe->send.opcode = (u8)opcode; + wqe->send.flags = flags; + wqe->send.wrid = wrid; + wqe->send.r1[0] = 0; + wqe->send.r1[1] = 0; + wqe->send.r1[2] = 0; + wqe->send.len16 = len16; +} + +/* CQE/AE status codes */ +#define T4_ERR_SUCCESS 0x0 +#define T4_ERR_STAG 0x1 /* STAG invalid: either the */ + /* STAG is offlimt, being 0, */ + /* or STAG_key mismatch */ +#define T4_ERR_PDID 0x2 /* PDID mismatch */ +#define T4_ERR_QPID 0x3 /* QPID mismatch */ +#define T4_ERR_ACCESS 0x4 /* Invalid access right */ +#define T4_ERR_WRAP 0x5 /* Wrap error */ +#define T4_ERR_BOUND 0x6 /* base and bounds voilation */ +#define T4_ERR_INVALIDATE_SHARED_MR 0x7 /* attempt to invalidate a */ + /* shared memory region */ +#define T4_ERR_INVALIDATE_MR_WITH_MW_BOUND 0x8 /* attempt to invalidate a */ + /* shared memory region */ +#define T4_ERR_ECC 0x9 /* ECC error detected */ +#define T4_ERR_ECC_PSTAG 0xA /* ECC error detected when */ + /* reading PSTAG for a MW */ + /* Invalidate */ +#define T4_ERR_PBL_ADDR_BOUND 0xB /* pbl addr out of bounds: */ + /* software error */ +#define T4_ERR_SWFLUSH 0xC /* SW FLUSHED */ +#define T4_ERR_CRC 0x10 /* CRC error */ +#define T4_ERR_MARKER 0x11 /* Marker error */ +#define T4_ERR_PDU_LEN_ERR 0x12 /* invalid PDU length */ +#define T4_ERR_OUT_OF_RQE 0x13 /* out of RQE */ +#define T4_ERR_DDP_VERSION 0x14 /* wrong DDP version */ +#define T4_ERR_RDMA_VERSION 0x15 /* wrong RDMA version */ +#define T4_ERR_OPCODE 0x16 /* invalid rdma opcode */ +#define T4_ERR_DDP_QUEUE_NUM 0x17 /* invalid ddp queue number */ +#define T4_ERR_MSN 0x18 /* MSN error */ +#define T4_ERR_TBIT 0x19 /* tag bit not set correctly */ +#define T4_ERR_MO 0x1A /* MO not 0 for TERMINATE */ + /* or READ_REQ */ +#define T4_ERR_MSN_GAP 0x1B +#define T4_ERR_MSN_RANGE 0x1C +#define T4_ERR_IRD_OVERFLOW 0x1D +#define T4_ERR_RQE_ADDR_BOUND 0x1E /* RQE addr out of bounds: */ + /* software error */ +#define T4_ERR_INTERNAL_ERR 0x1F /* internal error (opcode */ + /* mismatch) */ +/* + * CQE defs + */ +struct t4_cqe { + __be32 header; + __be32 len; + union { + struct { + __be32 stag; + __be32 msn; + } rcqe; + struct { + u32 nada1; + u16 nada2; + u16 cidx; + } scqe; + struct { + __be32 wrid_hi; + __be32 wrid_low; + } gen; + } u; + __be64 reserved; + __be64 bits_type_ts; +}; + +/* macros for flit 0 of the cqe */ + +#define S_CQE_QPID 12 +#define M_CQE_QPID 0xFFFFF +#define G_CQE_QPID(x) ((((x) >> S_CQE_QPID)) & M_CQE_QPID) +#define V_CQE_QPID(x) ((x)<<S_CQE_QPID) + +#define S_CQE_SWCQE 11 +#define M_CQE_SWCQE 0x1 +#define G_CQE_SWCQE(x) ((((x) >> S_CQE_SWCQE)) & M_CQE_SWCQE) +#define V_CQE_SWCQE(x) ((x)<<S_CQE_SWCQE) + +#define S_CQE_STATUS 5 +#define M_CQE_STATUS 0x1F +#define G_CQE_STATUS(x) ((((x) >> S_CQE_STATUS)) & M_CQE_STATUS) +#define V_CQE_STATUS(x) ((x)<<S_CQE_STATUS) + +#define S_CQE_TYPE 4 +#define M_CQE_TYPE 0x1 +#define G_CQE_TYPE(x) ((((x) >> S_CQE_TYPE)) & M_CQE_TYPE) +#define V_CQE_TYPE(x) ((x)<<S_CQE_TYPE) + +#define S_CQE_OPCODE 0 +#define M_CQE_OPCODE 0xF +#define G_CQE_OPCODE(x) ((((x) >> S_CQE_OPCODE)) & M_CQE_OPCODE) +#define V_CQE_OPCODE(x) ((x)<<S_CQE_OPCODE) + +#define SW_CQE(x) (G_CQE_SWCQE(be32_to_cpu((x)->header))) +#define CQE_QPID(x) (G_CQE_QPID(be32_to_cpu((x)->header))) +#define CQE_TYPE(x) (G_CQE_TYPE(be32_to_cpu((x)->header))) +#define SQ_TYPE(x) (CQE_TYPE((x))) +#define RQ_TYPE(x) (!CQE_TYPE((x))) +#define CQE_STATUS(x) (G_CQE_STATUS(be32_to_cpu((x)->header))) +#define CQE_OPCODE(x) (G_CQE_OPCODE(be32_to_cpu((x)->header))) + +#define CQE_SEND_OPCODE(x)( \ + (G_CQE_OPCODE(be32_to_cpu((x)->header)) == FW_RI_SEND) || \ + (G_CQE_OPCODE(be32_to_cpu((x)->header)) == FW_RI_SEND_WITH_SE) || \ + (G_CQE_OPCODE(be32_to_cpu((x)->header)) == FW_RI_SEND_WITH_INV) || \ + (G_CQE_OPCODE(be32_to_cpu((x)->header)) == FW_RI_SEND_WITH_SE_INV)) + +#define CQE_LEN(x) (be32_to_cpu((x)->len)) + +/* used for RQ completion processing */ +#define CQE_WRID_STAG(x) (be32_to_cpu((x)->u.rcqe.stag)) +#define CQE_WRID_MSN(x) (be32_to_cpu((x)->u.rcqe.msn)) + +/* used for SQ completion processing */ +#define CQE_WRID_SQ_IDX(x) ((x)->u.scqe.cidx) + +/* generic accessor macros */ +#define CQE_WRID_HI(x) ((x)->u.gen.wrid_hi) +#define CQE_WRID_LOW(x) ((x)->u.gen.wrid_low) + +/* macros for flit 3 of the cqe */ +#define S_CQE_GENBIT 63 +#define M_CQE_GENBIT 0x1 +#define G_CQE_GENBIT(x) (((x) >> S_CQE_GENBIT) & M_CQE_GENBIT) +#define V_CQE_GENBIT(x) ((x)<<S_CQE_GENBIT) + +#define S_CQE_OVFBIT 62 +#define M_CQE_OVFBIT 0x1 +#define G_CQE_OVFBIT(x) ((((x) >> S_CQE_OVFBIT)) & M_CQE_OVFBIT) + +#define S_CQE_IQTYPE 60 +#define M_CQE_IQTYPE 0x3 +#define G_CQE_IQTYPE(x) ((((x) >> S_CQE_IQTYPE)) & M_CQE_IQTYPE) + +#define M_CQE_TS 0x0fffffffffffffffULL +#define G_CQE_TS(x) ((x) & M_CQE_TS) + +#define CQE_OVFBIT(x) ((unsigned)G_CQE_OVFBIT(be64_to_cpu((x)->bits_type_ts))) +#define CQE_GENBIT(x) ((unsigned)G_CQE_GENBIT(be64_to_cpu((x)->bits_type_ts))) +#define CQE_TS(x) (G_CQE_TS(be64_to_cpu((x)->bits_type_ts))) + +struct t4_swsqe { + u64 wr_id; + struct t4_cqe cqe; + int read_len; + int opcode; + int complete; + int signaled; + u16 idx; + int flushed; +}; + +static inline pgprot_t t4_pgprot_wc(pgprot_t prot) +{ +#if defined(__i386__) || defined(__x86_64__) || defined(CONFIG_PPC64) + return pgprot_writecombine(prot); +#else + return pgprot_noncached(prot); +#endif +} + +enum { + T4_SQ_ONCHIP = (1<<0), +}; + +struct t4_sq { + union t4_wr *queue; + dma_addr_t dma_addr; + DEFINE_DMA_UNMAP_ADDR(mapping); + unsigned long phys_addr; + struct t4_swsqe *sw_sq; + struct t4_swsqe *oldest_read; + u64 __iomem *udb; + size_t memsize; + u32 qid; + u16 in_use; + u16 size; + u16 cidx; + u16 pidx; + u16 wq_pidx; + u16 wq_pidx_inc; + u16 flags; + short flush_cidx; +}; + +struct t4_swrqe { + u64 wr_id; +}; + +struct t4_rq { + union t4_recv_wr *queue; + dma_addr_t dma_addr; + DEFINE_DMA_UNMAP_ADDR(mapping); + struct t4_swrqe *sw_rq; + u64 __iomem *udb; + size_t memsize; + u32 qid; + u32 msn; + u32 rqt_hwaddr; + u16 rqt_size; + u16 in_use; + u16 size; + u16 cidx; + u16 pidx; + u16 wq_pidx; + u16 wq_pidx_inc; +}; + +struct t4_wq { + struct t4_sq sq; + struct t4_rq rq; + void __iomem *db; + void __iomem *gts; + struct c4iw_rdev *rdev; + int flushed; +}; + +static inline int t4_rqes_posted(struct t4_wq *wq) +{ + return wq->rq.in_use; +} + +static inline int t4_rq_empty(struct t4_wq *wq) +{ + return wq->rq.in_use == 0; +} + +static inline int t4_rq_full(struct t4_wq *wq) +{ + return wq->rq.in_use == (wq->rq.size - 1); +} + +static inline u32 t4_rq_avail(struct t4_wq *wq) +{ + return wq->rq.size - 1 - wq->rq.in_use; +} + +static inline void t4_rq_produce(struct t4_wq *wq, u8 len16) +{ + wq->rq.in_use++; + if (++wq->rq.pidx == wq->rq.size) + wq->rq.pidx = 0; + wq->rq.wq_pidx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); + if (wq->rq.wq_pidx >= wq->rq.size * T4_RQ_NUM_SLOTS) + wq->rq.wq_pidx %= wq->rq.size * T4_RQ_NUM_SLOTS; +} + +static inline void t4_rq_consume(struct t4_wq *wq) +{ + wq->rq.in_use--; + wq->rq.msn++; + if (++wq->rq.cidx == wq->rq.size) + wq->rq.cidx = 0; +} + +static inline u16 t4_rq_host_wq_pidx(struct t4_wq *wq) +{ + return wq->rq.queue[wq->rq.size].status.host_wq_pidx; +} + +static inline u16 t4_rq_wq_size(struct t4_wq *wq) +{ + return wq->rq.size * T4_RQ_NUM_SLOTS; +} + +static inline int t4_sq_onchip(struct t4_sq *sq) +{ + return sq->flags & T4_SQ_ONCHIP; +} + +static inline int t4_sq_empty(struct t4_wq *wq) +{ + return wq->sq.in_use == 0; +} + +static inline int t4_sq_full(struct t4_wq *wq) +{ + return wq->sq.in_use == (wq->sq.size - 1); +} + +static inline u32 t4_sq_avail(struct t4_wq *wq) +{ + return wq->sq.size - 1 - wq->sq.in_use; +} + +static inline void t4_sq_produce(struct t4_wq *wq, u8 len16) +{ + wq->sq.in_use++; + if (++wq->sq.pidx == wq->sq.size) + wq->sq.pidx = 0; + wq->sq.wq_pidx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); + if (wq->sq.wq_pidx >= wq->sq.size * T4_SQ_NUM_SLOTS) + wq->sq.wq_pidx %= wq->sq.size * T4_SQ_NUM_SLOTS; +} + +static inline void t4_sq_consume(struct t4_wq *wq) +{ + BUG_ON(wq->sq.in_use < 1); + if (wq->sq.cidx == wq->sq.flush_cidx) + wq->sq.flush_cidx = -1; + wq->sq.in_use--; + if (++wq->sq.cidx == wq->sq.size) + wq->sq.cidx = 0; +} + +static inline u16 t4_sq_host_wq_pidx(struct t4_wq *wq) +{ + return wq->sq.queue[wq->sq.size].status.host_wq_pidx; +} + +static inline u16 t4_sq_wq_size(struct t4_wq *wq) +{ + return wq->sq.size * T4_SQ_NUM_SLOTS; +} + +/* This function copies 64 byte coalesced work request to memory + * mapped BAR2 space. For coalesced WRs, the SGE fetches data + * from the FIFO instead of from Host. + */ +static inline void pio_copy(u64 __iomem *dst, u64 *src) +{ + int count = 8; + + while (count) { + writeq(*src, dst); + src++; + dst++; + count--; + } +} + +static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, u8 t5, + union t4_wr *wqe) +{ + + /* Flush host queue memory writes. */ + wmb(); + if (t5) { + if (inc == 1 && wqe) { + PDBG("%s: WC wq->sq.pidx = %d\n", + __func__, wq->sq.pidx); + pio_copy(wq->sq.udb + 7, (void *)wqe); + } else { + PDBG("%s: DB wq->sq.pidx = %d\n", + __func__, wq->sq.pidx); + writel(PIDX_T5(inc), wq->sq.udb); + } + + /* Flush user doorbell area writes. */ + wmb(); + return; + } + writel(QID(wq->sq.qid) | PIDX(inc), wq->db); +} + +static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc, u8 t5, + union t4_recv_wr *wqe) +{ + + /* Flush host queue memory writes. */ + wmb(); + if (t5) { + if (inc == 1 && wqe) { + PDBG("%s: WC wq->rq.pidx = %d\n", + __func__, wq->rq.pidx); + pio_copy(wq->rq.udb + 7, (void *)wqe); + } else { + PDBG("%s: DB wq->rq.pidx = %d\n", + __func__, wq->rq.pidx); + writel(PIDX_T5(inc), wq->rq.udb); + } + + /* Flush user doorbell area writes. */ + wmb(); + return; + } + writel(QID(wq->rq.qid) | PIDX(inc), wq->db); +} + +static inline int t4_wq_in_error(struct t4_wq *wq) +{ + return wq->rq.queue[wq->rq.size].status.qp_err; +} + +static inline void t4_set_wq_in_error(struct t4_wq *wq) +{ + wq->rq.queue[wq->rq.size].status.qp_err = 1; +} + +static inline void t4_disable_wq_db(struct t4_wq *wq) +{ + wq->rq.queue[wq->rq.size].status.db_off = 1; +} + +static inline void t4_enable_wq_db(struct t4_wq *wq) +{ + wq->rq.queue[wq->rq.size].status.db_off = 0; +} + +static inline int t4_wq_db_enabled(struct t4_wq *wq) +{ + return !wq->rq.queue[wq->rq.size].status.db_off; +} + +struct t4_cq { + struct t4_cqe *queue; + dma_addr_t dma_addr; + DEFINE_DMA_UNMAP_ADDR(mapping); + struct t4_cqe *sw_queue; + void __iomem *gts; + struct c4iw_rdev *rdev; + u64 ugts; + size_t memsize; + __be64 bits_type_ts; + u32 cqid; + int vector; + u16 size; /* including status page */ + u16 cidx; + u16 sw_pidx; + u16 sw_cidx; + u16 sw_in_use; + u16 cidx_inc; + u8 gen; + u8 error; +}; + +static inline int t4_arm_cq(struct t4_cq *cq, int se) +{ + u32 val; + + while (cq->cidx_inc > CIDXINC_MASK) { + val = SEINTARM(0) | CIDXINC(CIDXINC_MASK) | TIMERREG(7) | + INGRESSQID(cq->cqid); + writel(val, cq->gts); + cq->cidx_inc -= CIDXINC_MASK; + } + val = SEINTARM(se) | CIDXINC(cq->cidx_inc) | TIMERREG(6) | + INGRESSQID(cq->cqid); + writel(val, cq->gts); + cq->cidx_inc = 0; + return 0; +} + +static inline void t4_swcq_produce(struct t4_cq *cq) +{ + cq->sw_in_use++; + if (cq->sw_in_use == cq->size) { + PDBG("%s cxgb4 sw cq overflow cqid %u\n", __func__, cq->cqid); + cq->error = 1; + BUG_ON(1); + } + if (++cq->sw_pidx == cq->size) + cq->sw_pidx = 0; +} + +static inline void t4_swcq_consume(struct t4_cq *cq) +{ + BUG_ON(cq->sw_in_use < 1); + cq->sw_in_use--; + if (++cq->sw_cidx == cq->size) + cq->sw_cidx = 0; +} + +static inline void t4_hwcq_consume(struct t4_cq *cq) +{ + cq->bits_type_ts = cq->queue[cq->cidx].bits_type_ts; + if (++cq->cidx_inc == (cq->size >> 4) || cq->cidx_inc == CIDXINC_MASK) { + u32 val; + + val = SEINTARM(0) | CIDXINC(cq->cidx_inc) | TIMERREG(7) | + INGRESSQID(cq->cqid); + writel(val, cq->gts); + cq->cidx_inc = 0; + } + if (++cq->cidx == cq->size) { + cq->cidx = 0; + cq->gen ^= 1; + } +} + +static inline int t4_valid_cqe(struct t4_cq *cq, struct t4_cqe *cqe) +{ + return (CQE_GENBIT(cqe) == cq->gen); +} + +static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe) +{ + int ret; + u16 prev_cidx; + + if (cq->cidx == 0) + prev_cidx = cq->size - 1; + else + prev_cidx = cq->cidx - 1; + + if (cq->queue[prev_cidx].bits_type_ts != cq->bits_type_ts) { + ret = -EOVERFLOW; + cq->error = 1; + printk(KERN_ERR MOD "cq overflow cqid %u\n", cq->cqid); + BUG_ON(1); + } else if (t4_valid_cqe(cq, &cq->queue[cq->cidx])) { + + /* Ensure CQE is flushed to memory */ + rmb(); + *cqe = &cq->queue[cq->cidx]; + ret = 0; + } else + ret = -ENODATA; + return ret; +} + +static inline struct t4_cqe *t4_next_sw_cqe(struct t4_cq *cq) +{ + if (cq->sw_in_use == cq->size) { + PDBG("%s cxgb4 sw cq overflow cqid %u\n", __func__, cq->cqid); + cq->error = 1; + BUG_ON(1); + return NULL; + } + if (cq->sw_in_use) + return &cq->sw_queue[cq->sw_cidx]; + return NULL; +} + +static inline int t4_next_cqe(struct t4_cq *cq, struct t4_cqe **cqe) +{ + int ret = 0; + + if (cq->error) + ret = -ENODATA; + else if (cq->sw_in_use) + *cqe = &cq->sw_queue[cq->sw_cidx]; + else + ret = t4_next_hw_cqe(cq, cqe); + return ret; +} + +static inline int t4_cq_in_error(struct t4_cq *cq) +{ + return ((struct t4_status_page *)&cq->queue[cq->size])->qp_err; +} + +static inline void t4_set_cq_in_error(struct t4_cq *cq) +{ + ((struct t4_status_page *)&cq->queue[cq->size])->qp_err = 1; +} +#endif + +struct t4_dev_status_page { + u8 db_off; +}; diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h new file mode 100644 index 00000000000..91289a051af --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h @@ -0,0 +1,854 @@ +/* + * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _T4FW_RI_API_H_ +#define _T4FW_RI_API_H_ + +#include "t4fw_api.h" + +enum fw_ri_wr_opcode { + FW_RI_RDMA_WRITE = 0x0, /* IETF RDMAP v1.0 ... */ + FW_RI_READ_REQ = 0x1, + FW_RI_READ_RESP = 0x2, + FW_RI_SEND = 0x3, + FW_RI_SEND_WITH_INV = 0x4, + FW_RI_SEND_WITH_SE = 0x5, + FW_RI_SEND_WITH_SE_INV = 0x6, + FW_RI_TERMINATE = 0x7, + FW_RI_RDMA_INIT = 0x8, /* CHELSIO RI specific ... */ + FW_RI_BIND_MW = 0x9, + FW_RI_FAST_REGISTER = 0xa, + FW_RI_LOCAL_INV = 0xb, + FW_RI_QP_MODIFY = 0xc, + FW_RI_BYPASS = 0xd, + FW_RI_RECEIVE = 0xe, + + FW_RI_SGE_EC_CR_RETURN = 0xf +}; + +enum fw_ri_wr_flags { + FW_RI_COMPLETION_FLAG = 0x01, + FW_RI_NOTIFICATION_FLAG = 0x02, + FW_RI_SOLICITED_EVENT_FLAG = 0x04, + FW_RI_READ_FENCE_FLAG = 0x08, + FW_RI_LOCAL_FENCE_FLAG = 0x10, + FW_RI_RDMA_READ_INVALIDATE = 0x20 +}; + +enum fw_ri_mpa_attrs { + FW_RI_MPA_RX_MARKER_ENABLE = 0x01, + FW_RI_MPA_TX_MARKER_ENABLE = 0x02, + FW_RI_MPA_CRC_ENABLE = 0x04, + FW_RI_MPA_IETF_ENABLE = 0x08 +}; + +enum fw_ri_qp_caps { + FW_RI_QP_RDMA_READ_ENABLE = 0x01, + FW_RI_QP_RDMA_WRITE_ENABLE = 0x02, + FW_RI_QP_BIND_ENABLE = 0x04, + FW_RI_QP_FAST_REGISTER_ENABLE = 0x08, + FW_RI_QP_STAG0_ENABLE = 0x10 +}; + +enum fw_ri_addr_type { + FW_RI_ZERO_BASED_TO = 0x00, + FW_RI_VA_BASED_TO = 0x01 +}; + +enum fw_ri_mem_perms { + FW_RI_MEM_ACCESS_REM_WRITE = 0x01, + FW_RI_MEM_ACCESS_REM_READ = 0x02, + FW_RI_MEM_ACCESS_REM = 0x03, + FW_RI_MEM_ACCESS_LOCAL_WRITE = 0x04, + FW_RI_MEM_ACCESS_LOCAL_READ = 0x08, + FW_RI_MEM_ACCESS_LOCAL = 0x0C +}; + +enum fw_ri_stag_type { + FW_RI_STAG_NSMR = 0x00, + FW_RI_STAG_SMR = 0x01, + FW_RI_STAG_MW = 0x02, + FW_RI_STAG_MW_RELAXED = 0x03 +}; + +enum fw_ri_data_op { + FW_RI_DATA_IMMD = 0x81, + FW_RI_DATA_DSGL = 0x82, + FW_RI_DATA_ISGL = 0x83 +}; + +enum fw_ri_sgl_depth { + FW_RI_SGL_DEPTH_MAX_SQ = 16, + FW_RI_SGL_DEPTH_MAX_RQ = 4 +}; + +struct fw_ri_dsge_pair { + __be32 len[2]; + __be64 addr[2]; +}; + +struct fw_ri_dsgl { + __u8 op; + __u8 r1; + __be16 nsge; + __be32 len0; + __be64 addr0; +#ifndef C99_NOT_SUPPORTED + struct fw_ri_dsge_pair sge[0]; +#endif +}; + +struct fw_ri_sge { + __be32 stag; + __be32 len; + __be64 to; +}; + +struct fw_ri_isgl { + __u8 op; + __u8 r1; + __be16 nsge; + __be32 r2; +#ifndef C99_NOT_SUPPORTED + struct fw_ri_sge sge[0]; +#endif +}; + +struct fw_ri_immd { + __u8 op; + __u8 r1; + __be16 r2; + __be32 immdlen; +#ifndef C99_NOT_SUPPORTED + __u8 data[0]; +#endif +}; + +struct fw_ri_tpte { + __be32 valid_to_pdid; + __be32 locread_to_qpid; + __be32 nosnoop_pbladdr; + __be32 len_lo; + __be32 va_hi; + __be32 va_lo_fbo; + __be32 dca_mwbcnt_pstag; + __be32 len_hi; +}; + +#define S_FW_RI_TPTE_VALID 31 +#define M_FW_RI_TPTE_VALID 0x1 +#define V_FW_RI_TPTE_VALID(x) ((x) << S_FW_RI_TPTE_VALID) +#define G_FW_RI_TPTE_VALID(x) \ + (((x) >> S_FW_RI_TPTE_VALID) & M_FW_RI_TPTE_VALID) +#define F_FW_RI_TPTE_VALID V_FW_RI_TPTE_VALID(1U) + +#define S_FW_RI_TPTE_STAGKEY 23 +#define M_FW_RI_TPTE_STAGKEY 0xff +#define V_FW_RI_TPTE_STAGKEY(x) ((x) << S_FW_RI_TPTE_STAGKEY) +#define G_FW_RI_TPTE_STAGKEY(x) \ + (((x) >> S_FW_RI_TPTE_STAGKEY) & M_FW_RI_TPTE_STAGKEY) + +#define S_FW_RI_TPTE_STAGSTATE 22 +#define M_FW_RI_TPTE_STAGSTATE 0x1 +#define V_FW_RI_TPTE_STAGSTATE(x) ((x) << S_FW_RI_TPTE_STAGSTATE) +#define G_FW_RI_TPTE_STAGSTATE(x) \ + (((x) >> S_FW_RI_TPTE_STAGSTATE) & M_FW_RI_TPTE_STAGSTATE) +#define F_FW_RI_TPTE_STAGSTATE V_FW_RI_TPTE_STAGSTATE(1U) + +#define S_FW_RI_TPTE_STAGTYPE 20 +#define M_FW_RI_TPTE_STAGTYPE 0x3 +#define V_FW_RI_TPTE_STAGTYPE(x) ((x) << S_FW_RI_TPTE_STAGTYPE) +#define G_FW_RI_TPTE_STAGTYPE(x) \ + (((x) >> S_FW_RI_TPTE_STAGTYPE) & M_FW_RI_TPTE_STAGTYPE) + +#define S_FW_RI_TPTE_PDID 0 +#define M_FW_RI_TPTE_PDID 0xfffff +#define V_FW_RI_TPTE_PDID(x) ((x) << S_FW_RI_TPTE_PDID) +#define G_FW_RI_TPTE_PDID(x) \ + (((x) >> S_FW_RI_TPTE_PDID) & M_FW_RI_TPTE_PDID) + +#define S_FW_RI_TPTE_PERM 28 +#define M_FW_RI_TPTE_PERM 0xf +#define V_FW_RI_TPTE_PERM(x) ((x) << S_FW_RI_TPTE_PERM) +#define G_FW_RI_TPTE_PERM(x) \ + (((x) >> S_FW_RI_TPTE_PERM) & M_FW_RI_TPTE_PERM) + +#define S_FW_RI_TPTE_REMINVDIS 27 +#define M_FW_RI_TPTE_REMINVDIS 0x1 +#define V_FW_RI_TPTE_REMINVDIS(x) ((x) << S_FW_RI_TPTE_REMINVDIS) +#define G_FW_RI_TPTE_REMINVDIS(x) \ + (((x) >> S_FW_RI_TPTE_REMINVDIS) & M_FW_RI_TPTE_REMINVDIS) +#define F_FW_RI_TPTE_REMINVDIS V_FW_RI_TPTE_REMINVDIS(1U) + +#define S_FW_RI_TPTE_ADDRTYPE 26 +#define M_FW_RI_TPTE_ADDRTYPE 1 +#define V_FW_RI_TPTE_ADDRTYPE(x) ((x) << S_FW_RI_TPTE_ADDRTYPE) +#define G_FW_RI_TPTE_ADDRTYPE(x) \ + (((x) >> S_FW_RI_TPTE_ADDRTYPE) & M_FW_RI_TPTE_ADDRTYPE) +#define F_FW_RI_TPTE_ADDRTYPE V_FW_RI_TPTE_ADDRTYPE(1U) + +#define S_FW_RI_TPTE_MWBINDEN 25 +#define M_FW_RI_TPTE_MWBINDEN 0x1 +#define V_FW_RI_TPTE_MWBINDEN(x) ((x) << S_FW_RI_TPTE_MWBINDEN) +#define G_FW_RI_TPTE_MWBINDEN(x) \ + (((x) >> S_FW_RI_TPTE_MWBINDEN) & M_FW_RI_TPTE_MWBINDEN) +#define F_FW_RI_TPTE_MWBINDEN V_FW_RI_TPTE_MWBINDEN(1U) + +#define S_FW_RI_TPTE_PS 20 +#define M_FW_RI_TPTE_PS 0x1f +#define V_FW_RI_TPTE_PS(x) ((x) << S_FW_RI_TPTE_PS) +#define G_FW_RI_TPTE_PS(x) \ + (((x) >> S_FW_RI_TPTE_PS) & M_FW_RI_TPTE_PS) + +#define S_FW_RI_TPTE_QPID 0 +#define M_FW_RI_TPTE_QPID 0xfffff +#define V_FW_RI_TPTE_QPID(x) ((x) << S_FW_RI_TPTE_QPID) +#define G_FW_RI_TPTE_QPID(x) \ + (((x) >> S_FW_RI_TPTE_QPID) & M_FW_RI_TPTE_QPID) + +#define S_FW_RI_TPTE_NOSNOOP 30 +#define M_FW_RI_TPTE_NOSNOOP 0x1 +#define V_FW_RI_TPTE_NOSNOOP(x) ((x) << S_FW_RI_TPTE_NOSNOOP) +#define G_FW_RI_TPTE_NOSNOOP(x) \ + (((x) >> S_FW_RI_TPTE_NOSNOOP) & M_FW_RI_TPTE_NOSNOOP) +#define F_FW_RI_TPTE_NOSNOOP V_FW_RI_TPTE_NOSNOOP(1U) + +#define S_FW_RI_TPTE_PBLADDR 0 +#define M_FW_RI_TPTE_PBLADDR 0x1fffffff +#define V_FW_RI_TPTE_PBLADDR(x) ((x) << S_FW_RI_TPTE_PBLADDR) +#define G_FW_RI_TPTE_PBLADDR(x) \ + (((x) >> S_FW_RI_TPTE_PBLADDR) & M_FW_RI_TPTE_PBLADDR) + +#define S_FW_RI_TPTE_DCA 24 +#define M_FW_RI_TPTE_DCA 0x1f +#define V_FW_RI_TPTE_DCA(x) ((x) << S_FW_RI_TPTE_DCA) +#define G_FW_RI_TPTE_DCA(x) \ + (((x) >> S_FW_RI_TPTE_DCA) & M_FW_RI_TPTE_DCA) + +#define S_FW_RI_TPTE_MWBCNT_PSTAG 0 +#define M_FW_RI_TPTE_MWBCNT_PSTAG 0xffffff +#define V_FW_RI_TPTE_MWBCNT_PSTAT(x) \ + ((x) << S_FW_RI_TPTE_MWBCNT_PSTAG) +#define G_FW_RI_TPTE_MWBCNT_PSTAG(x) \ + (((x) >> S_FW_RI_TPTE_MWBCNT_PSTAG) & M_FW_RI_TPTE_MWBCNT_PSTAG) + +enum fw_ri_res_type { + FW_RI_RES_TYPE_SQ, + FW_RI_RES_TYPE_RQ, + FW_RI_RES_TYPE_CQ, +}; + +enum fw_ri_res_op { + FW_RI_RES_OP_WRITE, + FW_RI_RES_OP_RESET, +}; + +struct fw_ri_res { + union fw_ri_restype { + struct fw_ri_res_sqrq { + __u8 restype; + __u8 op; + __be16 r3; + __be32 eqid; + __be32 r4[2]; + __be32 fetchszm_to_iqid; + __be32 dcaen_to_eqsize; + __be64 eqaddr; + } sqrq; + struct fw_ri_res_cq { + __u8 restype; + __u8 op; + __be16 r3; + __be32 iqid; + __be32 r4[2]; + __be32 iqandst_to_iqandstindex; + __be16 iqdroprss_to_iqesize; + __be16 iqsize; + __be64 iqaddr; + __be32 iqns_iqro; + __be32 r6_lo; + __be64 r7; + } cq; + } u; +}; + +struct fw_ri_res_wr { + __be32 op_nres; + __be32 len16_pkd; + __u64 cookie; +#ifndef C99_NOT_SUPPORTED + struct fw_ri_res res[0]; +#endif +}; + +#define S_FW_RI_RES_WR_NRES 0 +#define M_FW_RI_RES_WR_NRES 0xff +#define V_FW_RI_RES_WR_NRES(x) ((x) << S_FW_RI_RES_WR_NRES) +#define G_FW_RI_RES_WR_NRES(x) \ + (((x) >> S_FW_RI_RES_WR_NRES) & M_FW_RI_RES_WR_NRES) + +#define S_FW_RI_RES_WR_FETCHSZM 26 +#define M_FW_RI_RES_WR_FETCHSZM 0x1 +#define V_FW_RI_RES_WR_FETCHSZM(x) ((x) << S_FW_RI_RES_WR_FETCHSZM) +#define G_FW_RI_RES_WR_FETCHSZM(x) \ + (((x) >> S_FW_RI_RES_WR_FETCHSZM) & M_FW_RI_RES_WR_FETCHSZM) +#define F_FW_RI_RES_WR_FETCHSZM V_FW_RI_RES_WR_FETCHSZM(1U) + +#define S_FW_RI_RES_WR_STATUSPGNS 25 +#define M_FW_RI_RES_WR_STATUSPGNS 0x1 +#define V_FW_RI_RES_WR_STATUSPGNS(x) ((x) << S_FW_RI_RES_WR_STATUSPGNS) +#define G_FW_RI_RES_WR_STATUSPGNS(x) \ + (((x) >> S_FW_RI_RES_WR_STATUSPGNS) & M_FW_RI_RES_WR_STATUSPGNS) +#define F_FW_RI_RES_WR_STATUSPGNS V_FW_RI_RES_WR_STATUSPGNS(1U) + +#define S_FW_RI_RES_WR_STATUSPGRO 24 +#define M_FW_RI_RES_WR_STATUSPGRO 0x1 +#define V_FW_RI_RES_WR_STATUSPGRO(x) ((x) << S_FW_RI_RES_WR_STATUSPGRO) +#define G_FW_RI_RES_WR_STATUSPGRO(x) \ + (((x) >> S_FW_RI_RES_WR_STATUSPGRO) & M_FW_RI_RES_WR_STATUSPGRO) +#define F_FW_RI_RES_WR_STATUSPGRO V_FW_RI_RES_WR_STATUSPGRO(1U) + +#define S_FW_RI_RES_WR_FETCHNS 23 +#define M_FW_RI_RES_WR_FETCHNS 0x1 +#define V_FW_RI_RES_WR_FETCHNS(x) ((x) << S_FW_RI_RES_WR_FETCHNS) +#define G_FW_RI_RES_WR_FETCHNS(x) \ + (((x) >> S_FW_RI_RES_WR_FETCHNS) & M_FW_RI_RES_WR_FETCHNS) +#define F_FW_RI_RES_WR_FETCHNS V_FW_RI_RES_WR_FETCHNS(1U) + +#define S_FW_RI_RES_WR_FETCHRO 22 +#define M_FW_RI_RES_WR_FETCHRO 0x1 +#define V_FW_RI_RES_WR_FETCHRO(x) ((x) << S_FW_RI_RES_WR_FETCHRO) +#define G_FW_RI_RES_WR_FETCHRO(x) \ + (((x) >> S_FW_RI_RES_WR_FETCHRO) & M_FW_RI_RES_WR_FETCHRO) +#define F_FW_RI_RES_WR_FETCHRO V_FW_RI_RES_WR_FETCHRO(1U) + +#define S_FW_RI_RES_WR_HOSTFCMODE 20 +#define M_FW_RI_RES_WR_HOSTFCMODE 0x3 +#define V_FW_RI_RES_WR_HOSTFCMODE(x) ((x) << S_FW_RI_RES_WR_HOSTFCMODE) +#define G_FW_RI_RES_WR_HOSTFCMODE(x) \ + (((x) >> S_FW_RI_RES_WR_HOSTFCMODE) & M_FW_RI_RES_WR_HOSTFCMODE) + +#define S_FW_RI_RES_WR_CPRIO 19 +#define M_FW_RI_RES_WR_CPRIO 0x1 +#define V_FW_RI_RES_WR_CPRIO(x) ((x) << S_FW_RI_RES_WR_CPRIO) +#define G_FW_RI_RES_WR_CPRIO(x) \ + (((x) >> S_FW_RI_RES_WR_CPRIO) & M_FW_RI_RES_WR_CPRIO) +#define F_FW_RI_RES_WR_CPRIO V_FW_RI_RES_WR_CPRIO(1U) + +#define S_FW_RI_RES_WR_ONCHIP 18 +#define M_FW_RI_RES_WR_ONCHIP 0x1 +#define V_FW_RI_RES_WR_ONCHIP(x) ((x) << S_FW_RI_RES_WR_ONCHIP) +#define G_FW_RI_RES_WR_ONCHIP(x) \ + (((x) >> S_FW_RI_RES_WR_ONCHIP) & M_FW_RI_RES_WR_ONCHIP) +#define F_FW_RI_RES_WR_ONCHIP V_FW_RI_RES_WR_ONCHIP(1U) + +#define S_FW_RI_RES_WR_PCIECHN 16 +#define M_FW_RI_RES_WR_PCIECHN 0x3 +#define V_FW_RI_RES_WR_PCIECHN(x) ((x) << S_FW_RI_RES_WR_PCIECHN) +#define G_FW_RI_RES_WR_PCIECHN(x) \ + (((x) >> S_FW_RI_RES_WR_PCIECHN) & M_FW_RI_RES_WR_PCIECHN) + +#define S_FW_RI_RES_WR_IQID 0 +#define M_FW_RI_RES_WR_IQID 0xffff +#define V_FW_RI_RES_WR_IQID(x) ((x) << S_FW_RI_RES_WR_IQID) +#define G_FW_RI_RES_WR_IQID(x) \ + (((x) >> S_FW_RI_RES_WR_IQID) & M_FW_RI_RES_WR_IQID) + +#define S_FW_RI_RES_WR_DCAEN 31 +#define M_FW_RI_RES_WR_DCAEN 0x1 +#define V_FW_RI_RES_WR_DCAEN(x) ((x) << S_FW_RI_RES_WR_DCAEN) +#define G_FW_RI_RES_WR_DCAEN(x) \ + (((x) >> S_FW_RI_RES_WR_DCAEN) & M_FW_RI_RES_WR_DCAEN) +#define F_FW_RI_RES_WR_DCAEN V_FW_RI_RES_WR_DCAEN(1U) + +#define S_FW_RI_RES_WR_DCACPU 26 +#define M_FW_RI_RES_WR_DCACPU 0x1f +#define V_FW_RI_RES_WR_DCACPU(x) ((x) << S_FW_RI_RES_WR_DCACPU) +#define G_FW_RI_RES_WR_DCACPU(x) \ + (((x) >> S_FW_RI_RES_WR_DCACPU) & M_FW_RI_RES_WR_DCACPU) + +#define S_FW_RI_RES_WR_FBMIN 23 +#define M_FW_RI_RES_WR_FBMIN 0x7 +#define V_FW_RI_RES_WR_FBMIN(x) ((x) << S_FW_RI_RES_WR_FBMIN) +#define G_FW_RI_RES_WR_FBMIN(x) \ + (((x) >> S_FW_RI_RES_WR_FBMIN) & M_FW_RI_RES_WR_FBMIN) + +#define S_FW_RI_RES_WR_FBMAX 20 +#define M_FW_RI_RES_WR_FBMAX 0x7 +#define V_FW_RI_RES_WR_FBMAX(x) ((x) << S_FW_RI_RES_WR_FBMAX) +#define G_FW_RI_RES_WR_FBMAX(x) \ + (((x) >> S_FW_RI_RES_WR_FBMAX) & M_FW_RI_RES_WR_FBMAX) + +#define S_FW_RI_RES_WR_CIDXFTHRESHO 19 +#define M_FW_RI_RES_WR_CIDXFTHRESHO 0x1 +#define V_FW_RI_RES_WR_CIDXFTHRESHO(x) ((x) << S_FW_RI_RES_WR_CIDXFTHRESHO) +#define G_FW_RI_RES_WR_CIDXFTHRESHO(x) \ + (((x) >> S_FW_RI_RES_WR_CIDXFTHRESHO) & M_FW_RI_RES_WR_CIDXFTHRESHO) +#define F_FW_RI_RES_WR_CIDXFTHRESHO V_FW_RI_RES_WR_CIDXFTHRESHO(1U) + +#define S_FW_RI_RES_WR_CIDXFTHRESH 16 +#define M_FW_RI_RES_WR_CIDXFTHRESH 0x7 +#define V_FW_RI_RES_WR_CIDXFTHRESH(x) ((x) << S_FW_RI_RES_WR_CIDXFTHRESH) +#define G_FW_RI_RES_WR_CIDXFTHRESH(x) \ + (((x) >> S_FW_RI_RES_WR_CIDXFTHRESH) & M_FW_RI_RES_WR_CIDXFTHRESH) + +#define S_FW_RI_RES_WR_EQSIZE 0 +#define M_FW_RI_RES_WR_EQSIZE 0xffff +#define V_FW_RI_RES_WR_EQSIZE(x) ((x) << S_FW_RI_RES_WR_EQSIZE) +#define G_FW_RI_RES_WR_EQSIZE(x) \ + (((x) >> S_FW_RI_RES_WR_EQSIZE) & M_FW_RI_RES_WR_EQSIZE) + +#define S_FW_RI_RES_WR_IQANDST 15 +#define M_FW_RI_RES_WR_IQANDST 0x1 +#define V_FW_RI_RES_WR_IQANDST(x) ((x) << S_FW_RI_RES_WR_IQANDST) +#define G_FW_RI_RES_WR_IQANDST(x) \ + (((x) >> S_FW_RI_RES_WR_IQANDST) & M_FW_RI_RES_WR_IQANDST) +#define F_FW_RI_RES_WR_IQANDST V_FW_RI_RES_WR_IQANDST(1U) + +#define S_FW_RI_RES_WR_IQANUS 14 +#define M_FW_RI_RES_WR_IQANUS 0x1 +#define V_FW_RI_RES_WR_IQANUS(x) ((x) << S_FW_RI_RES_WR_IQANUS) +#define G_FW_RI_RES_WR_IQANUS(x) \ + (((x) >> S_FW_RI_RES_WR_IQANUS) & M_FW_RI_RES_WR_IQANUS) +#define F_FW_RI_RES_WR_IQANUS V_FW_RI_RES_WR_IQANUS(1U) + +#define S_FW_RI_RES_WR_IQANUD 12 +#define M_FW_RI_RES_WR_IQANUD 0x3 +#define V_FW_RI_RES_WR_IQANUD(x) ((x) << S_FW_RI_RES_WR_IQANUD) +#define G_FW_RI_RES_WR_IQANUD(x) \ + (((x) >> S_FW_RI_RES_WR_IQANUD) & M_FW_RI_RES_WR_IQANUD) + +#define S_FW_RI_RES_WR_IQANDSTINDEX 0 +#define M_FW_RI_RES_WR_IQANDSTINDEX 0xfff +#define V_FW_RI_RES_WR_IQANDSTINDEX(x) ((x) << S_FW_RI_RES_WR_IQANDSTINDEX) +#define G_FW_RI_RES_WR_IQANDSTINDEX(x) \ + (((x) >> S_FW_RI_RES_WR_IQANDSTINDEX) & M_FW_RI_RES_WR_IQANDSTINDEX) + +#define S_FW_RI_RES_WR_IQDROPRSS 15 +#define M_FW_RI_RES_WR_IQDROPRSS 0x1 +#define V_FW_RI_RES_WR_IQDROPRSS(x) ((x) << S_FW_RI_RES_WR_IQDROPRSS) +#define G_FW_RI_RES_WR_IQDROPRSS(x) \ + (((x) >> S_FW_RI_RES_WR_IQDROPRSS) & M_FW_RI_RES_WR_IQDROPRSS) +#define F_FW_RI_RES_WR_IQDROPRSS V_FW_RI_RES_WR_IQDROPRSS(1U) + +#define S_FW_RI_RES_WR_IQGTSMODE 14 +#define M_FW_RI_RES_WR_IQGTSMODE 0x1 +#define V_FW_RI_RES_WR_IQGTSMODE(x) ((x) << S_FW_RI_RES_WR_IQGTSMODE) +#define G_FW_RI_RES_WR_IQGTSMODE(x) \ + (((x) >> S_FW_RI_RES_WR_IQGTSMODE) & M_FW_RI_RES_WR_IQGTSMODE) +#define F_FW_RI_RES_WR_IQGTSMODE V_FW_RI_RES_WR_IQGTSMODE(1U) + +#define S_FW_RI_RES_WR_IQPCIECH 12 +#define M_FW_RI_RES_WR_IQPCIECH 0x3 +#define V_FW_RI_RES_WR_IQPCIECH(x) ((x) << S_FW_RI_RES_WR_IQPCIECH) +#define G_FW_RI_RES_WR_IQPCIECH(x) \ + (((x) >> S_FW_RI_RES_WR_IQPCIECH) & M_FW_RI_RES_WR_IQPCIECH) + +#define S_FW_RI_RES_WR_IQDCAEN 11 +#define M_FW_RI_RES_WR_IQDCAEN 0x1 +#define V_FW_RI_RES_WR_IQDCAEN(x) ((x) << S_FW_RI_RES_WR_IQDCAEN) +#define G_FW_RI_RES_WR_IQDCAEN(x) \ + (((x) >> S_FW_RI_RES_WR_IQDCAEN) & M_FW_RI_RES_WR_IQDCAEN) +#define F_FW_RI_RES_WR_IQDCAEN V_FW_RI_RES_WR_IQDCAEN(1U) + +#define S_FW_RI_RES_WR_IQDCACPU 6 +#define M_FW_RI_RES_WR_IQDCACPU 0x1f +#define V_FW_RI_RES_WR_IQDCACPU(x) ((x) << S_FW_RI_RES_WR_IQDCACPU) +#define G_FW_RI_RES_WR_IQDCACPU(x) \ + (((x) >> S_FW_RI_RES_WR_IQDCACPU) & M_FW_RI_RES_WR_IQDCACPU) + +#define S_FW_RI_RES_WR_IQINTCNTTHRESH 4 +#define M_FW_RI_RES_WR_IQINTCNTTHRESH 0x3 +#define V_FW_RI_RES_WR_IQINTCNTTHRESH(x) \ + ((x) << S_FW_RI_RES_WR_IQINTCNTTHRESH) +#define G_FW_RI_RES_WR_IQINTCNTTHRESH(x) \ + (((x) >> S_FW_RI_RES_WR_IQINTCNTTHRESH) & M_FW_RI_RES_WR_IQINTCNTTHRESH) + +#define S_FW_RI_RES_WR_IQO 3 +#define M_FW_RI_RES_WR_IQO 0x1 +#define V_FW_RI_RES_WR_IQO(x) ((x) << S_FW_RI_RES_WR_IQO) +#define G_FW_RI_RES_WR_IQO(x) \ + (((x) >> S_FW_RI_RES_WR_IQO) & M_FW_RI_RES_WR_IQO) +#define F_FW_RI_RES_WR_IQO V_FW_RI_RES_WR_IQO(1U) + +#define S_FW_RI_RES_WR_IQCPRIO 2 +#define M_FW_RI_RES_WR_IQCPRIO 0x1 +#define V_FW_RI_RES_WR_IQCPRIO(x) ((x) << S_FW_RI_RES_WR_IQCPRIO) +#define G_FW_RI_RES_WR_IQCPRIO(x) \ + (((x) >> S_FW_RI_RES_WR_IQCPRIO) & M_FW_RI_RES_WR_IQCPRIO) +#define F_FW_RI_RES_WR_IQCPRIO V_FW_RI_RES_WR_IQCPRIO(1U) + +#define S_FW_RI_RES_WR_IQESIZE 0 +#define M_FW_RI_RES_WR_IQESIZE 0x3 +#define V_FW_RI_RES_WR_IQESIZE(x) ((x) << S_FW_RI_RES_WR_IQESIZE) +#define G_FW_RI_RES_WR_IQESIZE(x) \ + (((x) >> S_FW_RI_RES_WR_IQESIZE) & M_FW_RI_RES_WR_IQESIZE) + +#define S_FW_RI_RES_WR_IQNS 31 +#define M_FW_RI_RES_WR_IQNS 0x1 +#define V_FW_RI_RES_WR_IQNS(x) ((x) << S_FW_RI_RES_WR_IQNS) +#define G_FW_RI_RES_WR_IQNS(x) \ + (((x) >> S_FW_RI_RES_WR_IQNS) & M_FW_RI_RES_WR_IQNS) +#define F_FW_RI_RES_WR_IQNS V_FW_RI_RES_WR_IQNS(1U) + +#define S_FW_RI_RES_WR_IQRO 30 +#define M_FW_RI_RES_WR_IQRO 0x1 +#define V_FW_RI_RES_WR_IQRO(x) ((x) << S_FW_RI_RES_WR_IQRO) +#define G_FW_RI_RES_WR_IQRO(x) \ + (((x) >> S_FW_RI_RES_WR_IQRO) & M_FW_RI_RES_WR_IQRO) +#define F_FW_RI_RES_WR_IQRO V_FW_RI_RES_WR_IQRO(1U) + +struct fw_ri_rdma_write_wr { + __u8 opcode; + __u8 flags; + __u16 wrid; + __u8 r1[3]; + __u8 len16; + __be64 r2; + __be32 plen; + __be32 stag_sink; + __be64 to_sink; +#ifndef C99_NOT_SUPPORTED + union { + struct fw_ri_immd immd_src[0]; + struct fw_ri_isgl isgl_src[0]; + } u; +#endif +}; + +struct fw_ri_send_wr { + __u8 opcode; + __u8 flags; + __u16 wrid; + __u8 r1[3]; + __u8 len16; + __be32 sendop_pkd; + __be32 stag_inv; + __be32 plen; + __be32 r3; + __be64 r4; +#ifndef C99_NOT_SUPPORTED + union { + struct fw_ri_immd immd_src[0]; + struct fw_ri_isgl isgl_src[0]; + } u; +#endif +}; + +#define S_FW_RI_SEND_WR_SENDOP 0 +#define M_FW_RI_SEND_WR_SENDOP 0xf +#define V_FW_RI_SEND_WR_SENDOP(x) ((x) << S_FW_RI_SEND_WR_SENDOP) +#define G_FW_RI_SEND_WR_SENDOP(x) \ + (((x) >> S_FW_RI_SEND_WR_SENDOP) & M_FW_RI_SEND_WR_SENDOP) + +struct fw_ri_rdma_read_wr { + __u8 opcode; + __u8 flags; + __u16 wrid; + __u8 r1[3]; + __u8 len16; + __be64 r2; + __be32 stag_sink; + __be32 to_sink_hi; + __be32 to_sink_lo; + __be32 plen; + __be32 stag_src; + __be32 to_src_hi; + __be32 to_src_lo; + __be32 r5; +}; + +struct fw_ri_recv_wr { + __u8 opcode; + __u8 r1; + __u16 wrid; + __u8 r2[3]; + __u8 len16; + struct fw_ri_isgl isgl; +}; + +struct fw_ri_bind_mw_wr { + __u8 opcode; + __u8 flags; + __u16 wrid; + __u8 r1[3]; + __u8 len16; + __u8 qpbinde_to_dcacpu; + __u8 pgsz_shift; + __u8 addr_type; + __u8 mem_perms; + __be32 stag_mr; + __be32 stag_mw; + __be32 r3; + __be64 len_mw; + __be64 va_fbo; + __be64 r4; +}; + +#define S_FW_RI_BIND_MW_WR_QPBINDE 6 +#define M_FW_RI_BIND_MW_WR_QPBINDE 0x1 +#define V_FW_RI_BIND_MW_WR_QPBINDE(x) ((x) << S_FW_RI_BIND_MW_WR_QPBINDE) +#define G_FW_RI_BIND_MW_WR_QPBINDE(x) \ + (((x) >> S_FW_RI_BIND_MW_WR_QPBINDE) & M_FW_RI_BIND_MW_WR_QPBINDE) +#define F_FW_RI_BIND_MW_WR_QPBINDE V_FW_RI_BIND_MW_WR_QPBINDE(1U) + +#define S_FW_RI_BIND_MW_WR_NS 5 +#define M_FW_RI_BIND_MW_WR_NS 0x1 +#define V_FW_RI_BIND_MW_WR_NS(x) ((x) << S_FW_RI_BIND_MW_WR_NS) +#define G_FW_RI_BIND_MW_WR_NS(x) \ + (((x) >> S_FW_RI_BIND_MW_WR_NS) & M_FW_RI_BIND_MW_WR_NS) +#define F_FW_RI_BIND_MW_WR_NS V_FW_RI_BIND_MW_WR_NS(1U) + +#define S_FW_RI_BIND_MW_WR_DCACPU 0 +#define M_FW_RI_BIND_MW_WR_DCACPU 0x1f +#define V_FW_RI_BIND_MW_WR_DCACPU(x) ((x) << S_FW_RI_BIND_MW_WR_DCACPU) +#define G_FW_RI_BIND_MW_WR_DCACPU(x) \ + (((x) >> S_FW_RI_BIND_MW_WR_DCACPU) & M_FW_RI_BIND_MW_WR_DCACPU) + +struct fw_ri_fr_nsmr_wr { + __u8 opcode; + __u8 flags; + __u16 wrid; + __u8 r1[3]; + __u8 len16; + __u8 qpbinde_to_dcacpu; + __u8 pgsz_shift; + __u8 addr_type; + __u8 mem_perms; + __be32 stag; + __be32 len_hi; + __be32 len_lo; + __be32 va_hi; + __be32 va_lo_fbo; +}; + +#define S_FW_RI_FR_NSMR_WR_QPBINDE 6 +#define M_FW_RI_FR_NSMR_WR_QPBINDE 0x1 +#define V_FW_RI_FR_NSMR_WR_QPBINDE(x) ((x) << S_FW_RI_FR_NSMR_WR_QPBINDE) +#define G_FW_RI_FR_NSMR_WR_QPBINDE(x) \ + (((x) >> S_FW_RI_FR_NSMR_WR_QPBINDE) & M_FW_RI_FR_NSMR_WR_QPBINDE) +#define F_FW_RI_FR_NSMR_WR_QPBINDE V_FW_RI_FR_NSMR_WR_QPBINDE(1U) + +#define S_FW_RI_FR_NSMR_WR_NS 5 +#define M_FW_RI_FR_NSMR_WR_NS 0x1 +#define V_FW_RI_FR_NSMR_WR_NS(x) ((x) << S_FW_RI_FR_NSMR_WR_NS) +#define G_FW_RI_FR_NSMR_WR_NS(x) \ + (((x) >> S_FW_RI_FR_NSMR_WR_NS) & M_FW_RI_FR_NSMR_WR_NS) +#define F_FW_RI_FR_NSMR_WR_NS V_FW_RI_FR_NSMR_WR_NS(1U) + +#define S_FW_RI_FR_NSMR_WR_DCACPU 0 +#define M_FW_RI_FR_NSMR_WR_DCACPU 0x1f +#define V_FW_RI_FR_NSMR_WR_DCACPU(x) ((x) << S_FW_RI_FR_NSMR_WR_DCACPU) +#define G_FW_RI_FR_NSMR_WR_DCACPU(x) \ + (((x) >> S_FW_RI_FR_NSMR_WR_DCACPU) & M_FW_RI_FR_NSMR_WR_DCACPU) + +struct fw_ri_inv_lstag_wr { + __u8 opcode; + __u8 flags; + __u16 wrid; + __u8 r1[3]; + __u8 len16; + __be32 r2; + __be32 stag_inv; +}; + +enum fw_ri_type { + FW_RI_TYPE_INIT, + FW_RI_TYPE_FINI, + FW_RI_TYPE_TERMINATE +}; + +enum fw_ri_init_p2ptype { + FW_RI_INIT_P2PTYPE_RDMA_WRITE = FW_RI_RDMA_WRITE, + FW_RI_INIT_P2PTYPE_READ_REQ = FW_RI_READ_REQ, + FW_RI_INIT_P2PTYPE_SEND = FW_RI_SEND, + FW_RI_INIT_P2PTYPE_SEND_WITH_INV = FW_RI_SEND_WITH_INV, + FW_RI_INIT_P2PTYPE_SEND_WITH_SE = FW_RI_SEND_WITH_SE, + FW_RI_INIT_P2PTYPE_SEND_WITH_SE_INV = FW_RI_SEND_WITH_SE_INV, + FW_RI_INIT_P2PTYPE_DISABLED = 0xf, +}; + +struct fw_ri_wr { + __be32 op_compl; + __be32 flowid_len16; + __u64 cookie; + union fw_ri { + struct fw_ri_init { + __u8 type; + __u8 mpareqbit_p2ptype; + __u8 r4[2]; + __u8 mpa_attrs; + __u8 qp_caps; + __be16 nrqe; + __be32 pdid; + __be32 qpid; + __be32 sq_eqid; + __be32 rq_eqid; + __be32 scqid; + __be32 rcqid; + __be32 ord_max; + __be32 ird_max; + __be32 iss; + __be32 irs; + __be32 hwrqsize; + __be32 hwrqaddr; + __be64 r5; + union fw_ri_init_p2p { + struct fw_ri_rdma_write_wr write; + struct fw_ri_rdma_read_wr read; + struct fw_ri_send_wr send; + } u; + } init; + struct fw_ri_fini { + __u8 type; + __u8 r3[7]; + __be64 r4; + } fini; + struct fw_ri_terminate { + __u8 type; + __u8 r3[3]; + __be32 immdlen; + __u8 termmsg[40]; + } terminate; + } u; +}; + +#define S_FW_RI_WR_MPAREQBIT 7 +#define M_FW_RI_WR_MPAREQBIT 0x1 +#define V_FW_RI_WR_MPAREQBIT(x) ((x) << S_FW_RI_WR_MPAREQBIT) +#define G_FW_RI_WR_MPAREQBIT(x) \ + (((x) >> S_FW_RI_WR_MPAREQBIT) & M_FW_RI_WR_MPAREQBIT) +#define F_FW_RI_WR_MPAREQBIT V_FW_RI_WR_MPAREQBIT(1U) + +#define S_FW_RI_WR_P2PTYPE 0 +#define M_FW_RI_WR_P2PTYPE 0xf +#define V_FW_RI_WR_P2PTYPE(x) ((x) << S_FW_RI_WR_P2PTYPE) +#define G_FW_RI_WR_P2PTYPE(x) \ + (((x) >> S_FW_RI_WR_P2PTYPE) & M_FW_RI_WR_P2PTYPE) + +struct tcp_options { + __be16 mss; + __u8 wsf; +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8:4; + __u8 unknown:1; + __u8:1; + __u8 sack:1; + __u8 tstamp:1; +#else + __u8 tstamp:1; + __u8 sack:1; + __u8:1; + __u8 unknown:1; + __u8:4; +#endif +}; + +struct cpl_pass_accept_req { + union opcode_tid ot; + __be16 rsvd; + __be16 len; + __be32 hdr_len; + __be16 vlan; + __be16 l2info; + __be32 tos_stid; + struct tcp_options tcpopt; +}; + +/* cpl_pass_accept_req.hdr_len fields */ +#define S_SYN_RX_CHAN 0 +#define M_SYN_RX_CHAN 0xF +#define V_SYN_RX_CHAN(x) ((x) << S_SYN_RX_CHAN) +#define G_SYN_RX_CHAN(x) (((x) >> S_SYN_RX_CHAN) & M_SYN_RX_CHAN) + +#define S_TCP_HDR_LEN 10 +#define M_TCP_HDR_LEN 0x3F +#define V_TCP_HDR_LEN(x) ((x) << S_TCP_HDR_LEN) +#define G_TCP_HDR_LEN(x) (((x) >> S_TCP_HDR_LEN) & M_TCP_HDR_LEN) + +#define S_IP_HDR_LEN 16 +#define M_IP_HDR_LEN 0x3FF +#define V_IP_HDR_LEN(x) ((x) << S_IP_HDR_LEN) +#define G_IP_HDR_LEN(x) (((x) >> S_IP_HDR_LEN) & M_IP_HDR_LEN) + +#define S_ETH_HDR_LEN 26 +#define M_ETH_HDR_LEN 0x1F +#define V_ETH_HDR_LEN(x) ((x) << S_ETH_HDR_LEN) +#define G_ETH_HDR_LEN(x) (((x) >> S_ETH_HDR_LEN) & M_ETH_HDR_LEN) + +/* cpl_pass_accept_req.l2info fields */ +#define S_SYN_MAC_IDX 0 +#define M_SYN_MAC_IDX 0x1FF +#define V_SYN_MAC_IDX(x) ((x) << S_SYN_MAC_IDX) +#define G_SYN_MAC_IDX(x) (((x) >> S_SYN_MAC_IDX) & M_SYN_MAC_IDX) + +#define S_SYN_XACT_MATCH 9 +#define V_SYN_XACT_MATCH(x) ((x) << S_SYN_XACT_MATCH) +#define F_SYN_XACT_MATCH V_SYN_XACT_MATCH(1U) + +#define S_SYN_INTF 12 +#define M_SYN_INTF 0xF +#define V_SYN_INTF(x) ((x) << S_SYN_INTF) +#define G_SYN_INTF(x) (((x) >> S_SYN_INTF) & M_SYN_INTF) + +struct ulptx_idata { + __be32 cmd_more; + __be32 len; +}; + +#define S_ULPTX_NSGE 0 +#define M_ULPTX_NSGE 0xFFFF +#define V_ULPTX_NSGE(x) ((x) << S_ULPTX_NSGE) + +#define S_RX_DACK_MODE 29 +#define M_RX_DACK_MODE 0x3 +#define V_RX_DACK_MODE(x) ((x) << S_RX_DACK_MODE) +#define G_RX_DACK_MODE(x) (((x) >> S_RX_DACK_MODE) & M_RX_DACK_MODE) + +#define S_RX_DACK_CHANGE 31 +#define V_RX_DACK_CHANGE(x) ((x) << S_RX_DACK_CHANGE) +#define F_RX_DACK_CHANGE V_RX_DACK_CHANGE(1U) + +enum { /* TCP congestion control algorithms */ + CONG_ALG_RENO, + CONG_ALG_TAHOE, + CONG_ALG_NEWRENO, + CONG_ALG_HIGHSPEED +}; + +#define S_CONG_CNTRL 14 +#define M_CONG_CNTRL 0x3 +#define V_CONG_CNTRL(x) ((x) << S_CONG_CNTRL) +#define G_CONG_CNTRL(x) (((x) >> S_CONG_CNTRL) & M_CONG_CNTRL) + +#define CONG_CNTRL_VALID (1 << 18) +#define T5_OPT_2_VALID (1 << 31) + +#endif /* _T4FW_RI_API_H_ */ diff --git a/drivers/infiniband/hw/cxgb4/user.h b/drivers/infiniband/hw/cxgb4/user.h new file mode 100644 index 00000000000..cbd0ce17072 --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/user.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __C4IW_USER_H__ +#define __C4IW_USER_H__ + +#define C4IW_UVERBS_ABI_VERSION 2 + +/* + * Make sure that all structs defined in this file remain laid out so + * that they pack the same way on 32-bit and 64-bit architectures (to + * avoid incompatibility between 32-bit userspace and 64-bit kernels). + * In particular do not use pointer types -- pass pointers in __u64 + * instead. + */ +struct c4iw_create_cq_resp { + __u64 key; + __u64 gts_key; + __u64 memsize; + __u32 cqid; + __u32 size; + __u32 qid_mask; + __u32 reserved; /* explicit padding (optional for i386) */ +}; + + +enum { + C4IW_QPF_ONCHIP = (1<<0) +}; + +struct c4iw_create_qp_resp { + __u64 ma_sync_key; + __u64 sq_key; + __u64 rq_key; + __u64 sq_db_gts_key; + __u64 rq_db_gts_key; + __u64 sq_memsize; + __u64 rq_memsize; + __u32 sqid; + __u32 rqid; + __u32 sq_size; + __u32 rq_size; + __u32 qid_mask; + __u32 flags; +}; + +struct c4iw_alloc_ucontext_resp { + __u64 status_page_key; + __u32 status_page_size; + __u32 reserved; /* explicit padding (optional for i386) */ +}; +#endif diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c index 56735ea2fc5..465926319f3 100644 --- a/drivers/infiniband/hw/ehca/ehca_av.c +++ b/drivers/infiniband/hw/ehca/ehca_av.c @@ -41,6 +41,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <linux/slab.h> + #include "ehca_tools.h" #include "ehca_iverbs.h" #include "hcp_if.h" diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 1e9e99a1393..bd45e0f3923 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -112,7 +112,7 @@ struct ehca_sport { struct ehca_shca { struct ib_device ib_device; - struct of_device *ofdev; + struct platform_device *ofdev; u8 num_ports; int hw_level; struct list_head shca_list; @@ -128,6 +128,8 @@ struct ehca_shca { /* MR pgsize: bit 0-3 means 4K, 64K, 1M, 16M respectively */ u32 hca_cap_mr_pgsize; int max_mtu; + int max_num_qps; + int max_num_cqs; atomic_t num_cqs; atomic_t num_qps; }; @@ -156,6 +158,30 @@ struct ehca_mod_qp_parm { #define EHCA_MOD_QP_PARM_MAX 4 +#define QMAP_IDX_MASK 0xFFFFULL + +/* struct for tracking if cqes have been reported to the application */ +struct ehca_qmap_entry { + u16 app_wr_id; + u8 reported; + u8 cqe_req; +}; + +struct ehca_queue_map { + struct ehca_qmap_entry *map; + unsigned int entries; + unsigned int tail; + unsigned int left_to_poll; + unsigned int next_wqe_idx; /* Idx to first wqe to be flushed */ +}; + +/* function to calculate the next index for the qmap */ +static inline unsigned int next_index(unsigned int cur_index, unsigned int limit) +{ + unsigned int temp = cur_index + 1; + return (temp == limit) ? 0 : temp; +} + struct ehca_qp { union { struct ib_qp ib_qp; @@ -165,7 +191,9 @@ struct ehca_qp { enum ehca_ext_qp_type ext_type; enum ib_qp_state state; struct ipz_queue ipz_squeue; + struct ehca_queue_map sq_map; struct ipz_queue ipz_rqueue; + struct ehca_queue_map rq_map; struct h_galpas galpas; u32 qkey; u32 real_qp_num; @@ -194,6 +222,9 @@ struct ehca_qp { u32 packet_count; atomic_t nr_events; /* events seen */ wait_queue_head_t wait_completion; + int mig_armed; + struct list_head sq_err_node; + struct list_head rq_err_node; }; #define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ) @@ -223,6 +254,8 @@ struct ehca_cq { /* mmap counter for resources mapped into user space */ u32 mm_count_queue; u32 mm_count_galpa; + struct list_head sqp_err_list; + struct list_head rqp_err_list; }; enum ehca_mr_flag { @@ -289,7 +322,7 @@ struct ehca_mr_pginfo { } phy; struct { /* type EHCA_MR_PGI_USER section */ struct ib_umem *region; - struct ib_umem_chunk *next_chunk; + struct scatterlist *next_sg; u64 next_nmap; } usr; struct { /* type EHCA_MR_PGI_FMR section */ @@ -342,11 +375,12 @@ extern rwlock_t ehca_qp_idr_lock; extern rwlock_t ehca_cq_idr_lock; extern struct idr ehca_qp_idr; extern struct idr ehca_cq_idr; +extern spinlock_t shca_list_lock; extern int ehca_static_rate; extern int ehca_port_act_time; -extern int ehca_use_hp_mr; -extern int ehca_scaling_code; +extern bool ehca_use_hp_mr; +extern bool ehca_scaling_code; extern int ehca_lock_hcalls; extern int ehca_nr_ports; extern int ehca_max_cq; diff --git a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h index 1798e6466bd..689c35786dd 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h +++ b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h @@ -165,7 +165,6 @@ struct hcp_modify_qp_control_block { #define MQPCB_MASK_ALT_P_KEY_IDX EHCA_BMASK_IBM( 7, 7) #define MQPCB_MASK_RDMA_ATOMIC_CTRL EHCA_BMASK_IBM( 8, 8) #define MQPCB_MASK_QP_STATE EHCA_BMASK_IBM( 9, 9) -#define MQPCB_QP_STATE EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES EHCA_BMASK_IBM(11, 11) #define MQPCB_MASK_PATH_MIGRATION_STATE EHCA_BMASK_IBM(12, 12) #define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP EHCA_BMASK_IBM(13, 13) @@ -176,60 +175,33 @@ struct hcp_modify_qp_control_block { #define MQPCB_MASK_RETRY_COUNT EHCA_BMASK_IBM(18, 18) #define MQPCB_MASK_TIMEOUT EHCA_BMASK_IBM(19, 19) #define MQPCB_MASK_PATH_MTU EHCA_BMASK_IBM(20, 20) -#define MQPCB_PATH_MTU EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_MAX_STATIC_RATE EHCA_BMASK_IBM(21, 21) -#define MQPCB_MAX_STATIC_RATE EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_DLID EHCA_BMASK_IBM(22, 22) -#define MQPCB_DLID EHCA_BMASK_IBM(16, 31) #define MQPCB_MASK_RNR_RETRY_COUNT EHCA_BMASK_IBM(23, 23) -#define MQPCB_RNR_RETRY_COUNT EHCA_BMASK_IBM(29, 31) #define MQPCB_MASK_SOURCE_PATH_BITS EHCA_BMASK_IBM(24, 24) -#define MQPCB_SOURCE_PATH_BITS EHCA_BMASK_IBM(25, 31) #define MQPCB_MASK_TRAFFIC_CLASS EHCA_BMASK_IBM(25, 25) -#define MQPCB_TRAFFIC_CLASS EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_HOP_LIMIT EHCA_BMASK_IBM(26, 26) -#define MQPCB_HOP_LIMIT EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_SOURCE_GID_IDX EHCA_BMASK_IBM(27, 27) -#define MQPCB_SOURCE_GID_IDX EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_FLOW_LABEL EHCA_BMASK_IBM(28, 28) -#define MQPCB_FLOW_LABEL EHCA_BMASK_IBM(12, 31) #define MQPCB_MASK_DEST_GID EHCA_BMASK_IBM(30, 30) #define MQPCB_MASK_SERVICE_LEVEL_AL EHCA_BMASK_IBM(31, 31) -#define MQPCB_SERVICE_LEVEL_AL EHCA_BMASK_IBM(28, 31) #define MQPCB_MASK_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(32, 32) -#define MQPCB_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(31, 31) #define MQPCB_MASK_RETRY_COUNT_AL EHCA_BMASK_IBM(33, 33) -#define MQPCB_RETRY_COUNT_AL EHCA_BMASK_IBM(29, 31) #define MQPCB_MASK_TIMEOUT_AL EHCA_BMASK_IBM(34, 34) -#define MQPCB_TIMEOUT_AL EHCA_BMASK_IBM(27, 31) #define MQPCB_MASK_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(35, 35) -#define MQPCB_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_DLID_AL EHCA_BMASK_IBM(36, 36) -#define MQPCB_DLID_AL EHCA_BMASK_IBM(16, 31) #define MQPCB_MASK_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(37, 37) -#define MQPCB_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(29, 31) #define MQPCB_MASK_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(38, 38) -#define MQPCB_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(25, 31) #define MQPCB_MASK_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(39, 39) -#define MQPCB_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_HOP_LIMIT_AL EHCA_BMASK_IBM(40, 40) -#define MQPCB_HOP_LIMIT_AL EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(41, 41) -#define MQPCB_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(24, 31) #define MQPCB_MASK_FLOW_LABEL_AL EHCA_BMASK_IBM(42, 42) -#define MQPCB_FLOW_LABEL_AL EHCA_BMASK_IBM(12, 31) #define MQPCB_MASK_DEST_GID_AL EHCA_BMASK_IBM(44, 44) #define MQPCB_MASK_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(45, 45) -#define MQPCB_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(16, 31) #define MQPCB_MASK_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(46, 46) -#define MQPCB_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(16, 31) #define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(47, 47) -#define MQPCB_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(31, 31) -#define MQPCB_QP_NUMBER EHCA_BMASK_IBM( 8, 31) #define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48, 48) -#define MQPCB_QP_ENABLE EHCA_BMASK_IBM(31, 31) #define MQPCB_MASK_CURR_SRQ_LIMIT EHCA_BMASK_IBM(49, 49) -#define MQPCB_CURR_SRQ_LIMIT EHCA_BMASK_IBM(16, 31) #define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50, 50) #define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51, 51) diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 5540b276a33..8cc83753776 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -43,6 +43,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <linux/slab.h> + #include "ehca_iverbs.h" #include "ehca_classes.h" #include "ehca_irq.h" @@ -126,15 +128,15 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, void *vpage; u32 counter; u64 rpage, cqx_fec, h_ret; - int ipz_rc, ret, i; + int ipz_rc, i; unsigned long flags; if (cqe >= 0xFFFFFFFF - 64 - additional_cqe) return ERR_PTR(-EINVAL); - if (!atomic_add_unless(&shca->num_cqs, 1, ehca_max_cq)) { + if (!atomic_add_unless(&shca->num_cqs, 1, shca->max_num_cqs)) { ehca_err(device, "Unable to create CQ, max number of %i " - "CQs reached.", ehca_max_cq); + "CQs reached.", shca->max_num_cqs); ehca_err(device, "To increase the maximum number of CQs " "use the number_of_cqs module parameter.\n"); return ERR_PTR(-ENOSPC); @@ -161,32 +163,19 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, adapter_handle = shca->ipz_hca_handle; param.eq_handle = shca->eq.ipz_eq_handle; - do { - if (!idr_pre_get(&ehca_cq_idr, GFP_KERNEL)) { - cq = ERR_PTR(-ENOMEM); - ehca_err(device, "Can't reserve idr nr. device=%p", - device); - goto create_cq_exit1; - } - - write_lock_irqsave(&ehca_cq_idr_lock, flags); - ret = idr_get_new(&ehca_cq_idr, my_cq, &my_cq->token); - write_unlock_irqrestore(&ehca_cq_idr_lock, flags); - } while (ret == -EAGAIN); + idr_preload(GFP_KERNEL); + write_lock_irqsave(&ehca_cq_idr_lock, flags); + my_cq->token = idr_alloc(&ehca_cq_idr, my_cq, 0, 0x2000000, GFP_NOWAIT); + write_unlock_irqrestore(&ehca_cq_idr_lock, flags); + idr_preload_end(); - if (ret) { + if (my_cq->token < 0) { cq = ERR_PTR(-ENOMEM); ehca_err(device, "Can't allocate new idr entry. device=%p", device); goto create_cq_exit1; } - if (my_cq->token > 0x1FFFFFF) { - cq = ERR_PTR(-ENOMEM); - ehca_err(device, "Invalid number of cq. device=%p", device); - goto create_cq_exit2; - } - /* * CQs maximum depth is 4GB-64, but we need additional 20 as buffer * for receiving errors CQEs. @@ -196,7 +185,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, if (h_ret != H_SUCCESS) { ehca_err(device, "hipz_h_alloc_resource_cq() failed " - "h_ret=%li device=%p", h_ret, device); + "h_ret=%lli device=%p", h_ret, device); cq = ERR_PTR(ehca2ib_return_code(h_ret)); goto create_cq_exit2; } @@ -218,7 +207,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, cq = ERR_PTR(-EAGAIN); goto create_cq_exit4; } - rpage = virt_to_abs(vpage); + rpage = __pa(vpage); h_ret = hipz_h_register_rpage_cq(adapter_handle, my_cq->ipz_cq_handle, @@ -232,7 +221,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, if (h_ret < H_SUCCESS) { ehca_err(device, "hipz_h_register_rpage_cq() failed " - "ehca_cq=%p cq_num=%x h_ret=%li counter=%i " + "ehca_cq=%p cq_num=%x h_ret=%lli counter=%i " "act_pages=%i", my_cq, my_cq->cq_number, h_ret, counter, param.act_pages); cq = ERR_PTR(-EINVAL); @@ -244,7 +233,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, if ((h_ret != H_SUCCESS) || vpage) { ehca_err(device, "Registration of pages not " "complete ehca_cq=%p cq_num=%x " - "h_ret=%li", my_cq, my_cq->cq_number, + "h_ret=%lli", my_cq, my_cq->cq_number, h_ret); cq = ERR_PTR(-EAGAIN); goto create_cq_exit4; @@ -252,7 +241,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, } else { if (h_ret != H_PAGE_REGISTERED) { ehca_err(device, "Registration of page failed " - "ehca_cq=%p cq_num=%x h_ret=%li " + "ehca_cq=%p cq_num=%x h_ret=%lli " "counter=%i act_pages=%i", my_cq, my_cq->cq_number, h_ret, counter, param.act_pages); @@ -266,7 +255,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, gal = my_cq->galpas.kernel; cqx_fec = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_fec)); - ehca_dbg(device, "ehca_cq=%p cq_num=%x CQX_FEC=%lx", + ehca_dbg(device, "ehca_cq=%p cq_num=%x CQX_FEC=%llx", my_cq, my_cq->cq_number, cqx_fec); my_cq->ib_cq.cqe = my_cq->nr_of_entries = @@ -276,6 +265,9 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, for (i = 0; i < QP_HASHTAB_LEN; i++) INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]); + INIT_LIST_HEAD(&my_cq->sqp_err_list); + INIT_LIST_HEAD(&my_cq->rqp_err_list); + if (context) { struct ipz_queue *ipz_queue = &my_cq->ipz_queue; struct ehca_create_cq_resp resp; @@ -291,6 +283,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, (my_cq->galpas.user.fw_handle & (PAGE_SIZE - 1)); if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { ehca_err(device, "Copy to udata failed."); + cq = ERR_PTR(-EFAULT); goto create_cq_exit4; } } @@ -304,7 +297,7 @@ create_cq_exit3: h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1); if (h_ret != H_SUCCESS) ehca_err(device, "hipz_h_destroy_cq() failed ehca_cq=%p " - "cq_num=%x h_ret=%li", my_cq, my_cq->cq_number, h_ret); + "cq_num=%x h_ret=%lli", my_cq, my_cq->cq_number, h_ret); create_cq_exit2: write_lock_irqsave(&ehca_cq_idr_lock, flags); @@ -352,7 +345,7 @@ int ehca_destroy_cq(struct ib_cq *cq) h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0); if (h_ret == H_R_STATE) { /* cq in err: read err data and destroy it forcibly */ - ehca_dbg(device, "ehca_cq=%p cq_num=%x ressource=%lx in err " + ehca_dbg(device, "ehca_cq=%p cq_num=%x resource=%llx in err " "state. Try to delete it forcibly.", my_cq, cq_num, my_cq->ipz_cq_handle.handle); ehca_error_data(shca, my_cq, my_cq->ipz_cq_handle.handle); @@ -362,7 +355,7 @@ int ehca_destroy_cq(struct ib_cq *cq) cq_num); } if (h_ret != H_SUCCESS) { - ehca_err(device, "hipz_h_destroy_cq() failed h_ret=%li " + ehca_err(device, "hipz_h_destroy_cq() failed h_ret=%lli " "ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num); return ehca2ib_return_code(h_ret); } diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c index 49660dfa186..90da6747d39 100644 --- a/drivers/infiniband/hw/ehca/ehca_eq.c +++ b/drivers/infiniband/hw/ehca/ehca_eq.c @@ -101,7 +101,7 @@ int ehca_create_eq(struct ehca_shca *shca, if (!vpage) goto create_eq_exit2; - rpage = virt_to_abs(vpage); + rpage = __pa(vpage); h_ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle, eq->ipz_eq_handle, &eq->pf, @@ -113,7 +113,7 @@ int ehca_create_eq(struct ehca_shca *shca, if (h_ret != H_SUCCESS || vpage) goto create_eq_exit2; } else { - if (h_ret != H_PAGE_REGISTERED || !vpage) + if (h_ret != H_PAGE_REGISTERED) goto create_eq_exit2; } } @@ -122,21 +122,21 @@ int ehca_create_eq(struct ehca_shca *shca, /* register interrupt handlers and initialize work queues */ if (type == EHCA_EQ) { + tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca); + ret = ibmebus_request_irq(eq->ist, ehca_interrupt_eq, - IRQF_DISABLED, "ehca_eq", + 0, "ehca_eq", (void *)shca); if (ret < 0) ehca_err(ib_dev, "Can't map interrupt handler."); - - tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca); } else if (type == EHCA_NEQ) { + tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca); + ret = ibmebus_request_irq(eq->ist, ehca_interrupt_neq, - IRQF_DISABLED, "ehca_neq", + 0, "ehca_neq", (void *)shca); if (ret < 0) ehca_err(ib_dev, "Can't map interrupt handler."); - - tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca); } eq->is_initialized = 1; @@ -169,12 +169,15 @@ int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq) unsigned long flags; u64 h_ret; - spin_lock_irqsave(&eq->spinlock, flags); ibmebus_free_irq(eq->ist, (void *)shca); - h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq); + spin_lock_irqsave(&shca_list_lock, flags); + eq->is_initialized = 0; + spin_unlock_irqrestore(&shca_list_lock, flags); - spin_unlock_irqrestore(&eq->spinlock, flags); + tasklet_kill(&eq->interrupt_task); + + h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq); if (h_ret != H_SUCCESS) { ehca_err(&shca->ib_device, "Can't free EQ resources."); diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c index bc3b37d2070..9ed4d258830 100644 --- a/drivers/infiniband/hw/ehca/ehca_hca.c +++ b/drivers/infiniband/hw/ehca/ehca_hca.c @@ -39,6 +39,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <linux/gfp.h> + #include "ehca_tools.h" #include "ehca_iverbs.h" #include "hcp_if.h" @@ -114,7 +116,9 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) } props->max_pkeys = 16; - props->local_ca_ack_delay = min_t(u8, rblock->local_ca_ack_delay, 255); + /* Some FW versions say 0 here; insert sensible value in that case */ + props->local_ca_ack_delay = rblock->local_ca_ack_delay ? + min_t(u8, rblock->local_ca_ack_delay, 255) : 12; props->max_raw_ipv6_qp = limit_uint(rblock->max_raw_ipv6_qp); props->max_raw_ethy_qp = limit_uint(rblock->max_raw_ethy_qp); props->max_mcast_grp = limit_uint(rblock->max_mcast_grp); @@ -229,7 +233,7 @@ int ehca_query_port(struct ib_device *ibdev, props->phys_state = 5; props->state = rblock->state; props->active_width = IB_WIDTH_12X; - props->active_speed = 0x1; + props->active_speed = IB_SPEED_SDR; } query_port1: @@ -317,7 +321,7 @@ int ehca_query_gid(struct ib_device *ibdev, u8 port, ib_device); struct hipz_query_port *rblock; - if (index > 255) { + if (index < 0 || index > 255) { ehca_err(&shca->ib_device, "Invalid index: %x.", index); return -EINVAL; } @@ -391,7 +395,7 @@ int ehca_modify_port(struct ib_device *ibdev, hret = hipz_h_modify_port(shca->ipz_hca_handle, port, cap, props->init_type, port_modify_mask); if (hret != H_SUCCESS) { - ehca_err(&shca->ib_device, "Modify port failed h_ret=%li", + ehca_err(&shca->ib_device, "Modify port failed h_ret=%lli", hret); ret = -EINVAL; } diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index 0792d930c48..8615d7cf7e0 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -41,6 +41,9 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <linux/slab.h> +#include <linux/smpboot.h> + #include "ehca_classes.h" #include "ehca_irq.h" #include "ehca_iverbs.h" @@ -99,7 +102,7 @@ static void print_error_data(struct ehca_shca *shca, void *data, return; ehca_err(&shca->ib_device, - "QP 0x%x (resource=%lx) has errors.", + "QP 0x%x (resource=%llx) has errors.", qp->ib_qp.qp_num, resource); break; } @@ -108,21 +111,21 @@ static void print_error_data(struct ehca_shca *shca, void *data, struct ehca_cq *cq = (struct ehca_cq *)data; ehca_err(&shca->ib_device, - "CQ 0x%x (resource=%lx) has errors.", + "CQ 0x%x (resource=%llx) has errors.", cq->cq_number, resource); break; } default: ehca_err(&shca->ib_device, - "Unknown error type: %lx on %s.", + "Unknown error type: %llx on %s.", type, shca->ib_device.name); break; } - ehca_err(&shca->ib_device, "Error data is available: %lx.", resource); + ehca_err(&shca->ib_device, "Error data is available: %llx.", resource); ehca_err(&shca->ib_device, "EHCA ----- error data begin " "---------------------------------------------------"); - ehca_dmp(rblock, length, "resource=%lx", resource); + ehca_dmp(rblock, length, "resource=%llx", resource); ehca_err(&shca->ib_device, "EHCA ----- error data end " "----------------------------------------------------"); @@ -152,7 +155,7 @@ int ehca_error_data(struct ehca_shca *shca, void *data, if (ret == H_R_STATE) ehca_err(&shca->ib_device, - "No error data is available: %lx.", resource); + "No error data is available: %llx.", resource); else if (ret == H_SUCCESS) { int length; @@ -164,7 +167,7 @@ int ehca_error_data(struct ehca_shca *shca, void *data, print_error_data(shca, data, rblock, length); } else ehca_err(&shca->ib_device, - "Error data could not be fetched: %lx", resource); + "Error data could not be fetched: %llx", resource); ehca_free_fw_ctrlblock(rblock); @@ -178,6 +181,10 @@ static void dispatch_qp_event(struct ehca_shca *shca, struct ehca_qp *qp, { struct ib_event event; + /* PATH_MIG without the QP ever having been armed is false alarm */ + if (event_type == IB_EVENT_PATH_MIG && !qp->mig_armed) + return; + event.device = &shca->ib_device; event.event = event_type; @@ -355,36 +362,48 @@ static void notify_port_conf_change(struct ehca_shca *shca, int port_num) *old_attr = new_attr; } +/* replay modify_qp for sqps -- return 0 if all is well, 1 if AQP1 destroyed */ +static int replay_modify_qp(struct ehca_sport *sport) +{ + int aqp1_destroyed; + unsigned long flags; + + spin_lock_irqsave(&sport->mod_sqp_lock, flags); + + aqp1_destroyed = !sport->ibqp_sqp[IB_QPT_GSI]; + + if (sport->ibqp_sqp[IB_QPT_SMI]) + ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_SMI]); + if (!aqp1_destroyed) + ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_GSI]); + + spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); + + return aqp1_destroyed; +} + static void parse_ec(struct ehca_shca *shca, u64 eqe) { u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe); u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe); u8 spec_event; struct ehca_sport *sport = &shca->sport[port - 1]; - unsigned long flags; switch (ec) { case 0x30: /* port availability change */ if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) { - int suppress_event; - /* replay modify_qp for sqps */ - spin_lock_irqsave(&sport->mod_sqp_lock, flags); - suppress_event = !sport->ibqp_sqp[IB_QPT_GSI]; - if (sport->ibqp_sqp[IB_QPT_SMI]) - ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_SMI]); - if (!suppress_event) - ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_GSI]); - spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); - - /* AQP1 was destroyed, ignore this event */ - if (suppress_event) - break; + /* only replay modify_qp calls in autodetect mode; + * if AQP1 was destroyed, the port is already down + * again and we can drop the event. + */ + if (ehca_nr_ports < 0) + if (replay_modify_qp(sport)) + break; sport->port_state = IB_PORT_ACTIVE; dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, "is active"); - ehca_query_sma_attr(shca, port, - &sport->saved_attr); + ehca_query_sma_attr(shca, port, &sport->saved_attr); } else { sport->port_state = IB_PORT_DOWN; dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, @@ -463,13 +482,13 @@ void ehca_tasklet_neq(unsigned long data) struct ehca_eqe *eqe; u64 ret; - eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq); + eqe = ehca_poll_eq(shca, &shca->neq); while (eqe) { if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry)) parse_ec(shca, eqe->entry); - eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->neq); + eqe = ehca_poll_eq(shca, &shca->neq); } ret = hipz_h_reset_event(shca->ipz_hca_handle, @@ -498,7 +517,7 @@ static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe) struct ehca_cq *cq; eqe_value = eqe->entry; - ehca_dbg(&shca->ib_device, "eqe_value=%lx", eqe_value); + ehca_dbg(&shca->ib_device, "eqe_value=%llx", eqe_value); if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { ehca_dbg(&shca->ib_device, "Got completion event"); token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); @@ -532,11 +551,10 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) struct ehca_eq *eq = &shca->eq; struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache; u64 eqe_value, ret; - unsigned long flags; int eqe_cnt, i; int eq_empty = 0; - spin_lock_irqsave(&eq->irq_spinlock, flags); + spin_lock(&eq->irq_spinlock); if (is_irq) { const int max_query_cnt = 100; int query_cnt = 0; @@ -556,8 +574,7 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) eqe_cnt = 0; do { u32 token; - eqe_cache[eqe_cnt].eqe = - (struct ehca_eqe *)ehca_poll_eq(shca, eq); + eqe_cache[eqe_cnt].eqe = ehca_poll_eq(shca, eq); if (!eqe_cache[eqe_cnt].eqe) break; eqe_value = eqe_cache[eqe_cnt].eqe->entry; @@ -587,7 +604,7 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) ret = hipz_h_eoi(eq->ist); if (ret != H_SUCCESS) ehca_err(&shca->ib_device, - "bad return code EOI -rc = %ld\n", ret); + "bad return code EOI -rc = %lld\n", ret); ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt); } if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE)) @@ -621,14 +638,14 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq) goto unlock_irq_spinlock; do { struct ehca_eqe *eqe; - eqe = (struct ehca_eqe *)ehca_poll_eq(shca, &shca->eq); + eqe = ehca_poll_eq(shca, &shca->eq); if (!eqe) break; process_eqe(shca, eqe); } while (1); unlock_irq_spinlock: - spin_unlock_irqrestore(&eq->irq_spinlock, flags); + spin_unlock(&eq->irq_spinlock); } void ehca_tasklet_eq(unsigned long data) @@ -636,27 +653,30 @@ void ehca_tasklet_eq(unsigned long data) ehca_process_eq((struct ehca_shca*)data, 1); } -static inline int find_next_online_cpu(struct ehca_comp_pool *pool) +static int find_next_online_cpu(struct ehca_comp_pool *pool) { int cpu; unsigned long flags; WARN_ON_ONCE(!in_interrupt()); if (ehca_debug_level >= 3) - ehca_dmp(&cpu_online_map, sizeof(cpumask_t), ""); + ehca_dmp(cpu_online_mask, cpumask_size(), ""); spin_lock_irqsave(&pool->last_cpu_lock, flags); - cpu = next_cpu(pool->last_cpu, cpu_online_map); - if (cpu == NR_CPUS) - cpu = first_cpu(cpu_online_map); - pool->last_cpu = cpu; + do { + cpu = cpumask_next(pool->last_cpu, cpu_online_mask); + if (cpu >= nr_cpu_ids) + cpu = cpumask_first(cpu_online_mask); + pool->last_cpu = cpu; + } while (!per_cpu_ptr(pool->cpu_comp_tasks, cpu)->active); spin_unlock_irqrestore(&pool->last_cpu_lock, flags); return cpu; } static void __queue_comp_task(struct ehca_cq *__cq, - struct ehca_cpu_comp_task *cct) + struct ehca_cpu_comp_task *cct, + struct task_struct *thread) { unsigned long flags; @@ -667,7 +687,7 @@ static void __queue_comp_task(struct ehca_cq *__cq, __cq->nr_callbacks++; list_add_tail(&__cq->entry, &cct->cq_list); cct->cq_jobs++; - wake_up(&cct->wait_queue); + wake_up_process(thread); } else __cq->nr_callbacks++; @@ -679,6 +699,7 @@ static void queue_comp_task(struct ehca_cq *__cq) { int cpu_id; struct ehca_cpu_comp_task *cct; + struct task_struct *thread; int cq_jobs; unsigned long flags; @@ -686,7 +707,8 @@ static void queue_comp_task(struct ehca_cq *__cq) BUG_ON(!cpu_online(cpu_id)); cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); - BUG_ON(!cct); + thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id); + BUG_ON(!cct || !thread); spin_lock_irqsave(&cct->task_lock, flags); cq_jobs = cct->cq_jobs; @@ -694,28 +716,25 @@ static void queue_comp_task(struct ehca_cq *__cq) if (cq_jobs > 0) { cpu_id = find_next_online_cpu(pool); cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); - BUG_ON(!cct); + thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id); + BUG_ON(!cct || !thread); } - - __queue_comp_task(__cq, cct); + __queue_comp_task(__cq, cct, thread); } static void run_comp_task(struct ehca_cpu_comp_task *cct) { struct ehca_cq *cq; - unsigned long flags; - - spin_lock_irqsave(&cct->task_lock, flags); while (!list_empty(&cct->cq_list)) { cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); - spin_unlock_irqrestore(&cct->task_lock, flags); + spin_unlock_irq(&cct->task_lock); comp_event_callback(cq); if (atomic_dec_and_test(&cq->nr_events)) wake_up(&cq->wait_completion); - spin_lock_irqsave(&cct->task_lock, flags); + spin_lock_irq(&cct->task_lock); spin_lock(&cq->task_lock); cq->nr_callbacks--; if (!cq->nr_callbacks) { @@ -724,159 +743,76 @@ static void run_comp_task(struct ehca_cpu_comp_task *cct) } spin_unlock(&cq->task_lock); } - - spin_unlock_irqrestore(&cct->task_lock, flags); } -static int comp_task(void *__cct) +static void comp_task_park(unsigned int cpu) { - struct ehca_cpu_comp_task *cct = __cct; - int cql_empty; - DECLARE_WAITQUEUE(wait, current); - - set_current_state(TASK_INTERRUPTIBLE); - while (!kthread_should_stop()) { - add_wait_queue(&cct->wait_queue, &wait); - - spin_lock_irq(&cct->task_lock); - cql_empty = list_empty(&cct->cq_list); - spin_unlock_irq(&cct->task_lock); - if (cql_empty) - schedule(); - else - __set_current_state(TASK_RUNNING); - - remove_wait_queue(&cct->wait_queue, &wait); + struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + struct ehca_cpu_comp_task *target; + struct task_struct *thread; + struct ehca_cq *cq, *tmp; + LIST_HEAD(list); - spin_lock_irq(&cct->task_lock); - cql_empty = list_empty(&cct->cq_list); - spin_unlock_irq(&cct->task_lock); - if (!cql_empty) - run_comp_task(__cct); + spin_lock_irq(&cct->task_lock); + cct->cq_jobs = 0; + cct->active = 0; + list_splice_init(&cct->cq_list, &list); + spin_unlock_irq(&cct->task_lock); - set_current_state(TASK_INTERRUPTIBLE); + cpu = find_next_online_cpu(pool); + target = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu); + spin_lock_irq(&target->task_lock); + list_for_each_entry_safe(cq, tmp, &list, entry) { + list_del(&cq->entry); + __queue_comp_task(cq, target, thread); } - __set_current_state(TASK_RUNNING); - - return 0; + spin_unlock_irq(&target->task_lock); } -static struct task_struct *create_comp_task(struct ehca_comp_pool *pool, - int cpu) +static void comp_task_stop(unsigned int cpu, bool online) { - struct ehca_cpu_comp_task *cct; - - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - spin_lock_init(&cct->task_lock); - INIT_LIST_HEAD(&cct->cq_list); - init_waitqueue_head(&cct->wait_queue); - cct->task = kthread_create(comp_task, cct, "ehca_comp/%d", cpu); - - return cct->task; -} - -static void destroy_comp_task(struct ehca_comp_pool *pool, - int cpu) -{ - struct ehca_cpu_comp_task *cct; - struct task_struct *task; - unsigned long flags_cct; - - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - - spin_lock_irqsave(&cct->task_lock, flags_cct); + struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - task = cct->task; - cct->task = NULL; + spin_lock_irq(&cct->task_lock); cct->cq_jobs = 0; - - spin_unlock_irqrestore(&cct->task_lock, flags_cct); - - if (task) - kthread_stop(task); + cct->active = 0; + WARN_ON(!list_empty(&cct->cq_list)); + spin_unlock_irq(&cct->task_lock); } -static void __cpuinit take_over_work(struct ehca_comp_pool *pool, int cpu) +static int comp_task_should_run(unsigned int cpu) { struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - LIST_HEAD(list); - struct ehca_cq *cq; - unsigned long flags_cct; - - spin_lock_irqsave(&cct->task_lock, flags_cct); - - list_splice_init(&cct->cq_list, &list); - - while (!list_empty(&list)) { - cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); - - list_del(&cq->entry); - __queue_comp_task(cq, per_cpu_ptr(pool->cpu_comp_tasks, - smp_processor_id())); - } - - spin_unlock_irqrestore(&cct->task_lock, flags_cct); + return cct->cq_jobs; } -static int __cpuinit comp_pool_callback(struct notifier_block *nfb, - unsigned long action, - void *hcpu) +static void comp_task(unsigned int cpu) { - unsigned int cpu = (unsigned long)hcpu; - struct ehca_cpu_comp_task *cct; + struct ehca_cpu_comp_task *cct = this_cpu_ptr(pool->cpu_comp_tasks); + int cql_empty; - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu); - if (!create_comp_task(pool, cpu)) { - ehca_gen_err("Can't create comp_task for cpu: %x", cpu); - return NOTIFY_BAD; - } - break; - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu); - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - kthread_bind(cct->task, any_online_cpu(cpu_online_map)); - destroy_comp_task(pool, cpu); - break; - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu); - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - kthread_bind(cct->task, cpu); - wake_up_process(cct->task); - break; - case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: - ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu); - break; - case CPU_DOWN_FAILED: - case CPU_DOWN_FAILED_FROZEN: - ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu); - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu); - destroy_comp_task(pool, cpu); - take_over_work(pool, cpu); - break; + spin_lock_irq(&cct->task_lock); + cql_empty = list_empty(&cct->cq_list); + if (!cql_empty) { + __set_current_state(TASK_RUNNING); + run_comp_task(cct); } - - return NOTIFY_OK; + spin_unlock_irq(&cct->task_lock); } -static struct notifier_block comp_pool_callback_nb __cpuinitdata = { - .notifier_call = comp_pool_callback, - .priority = 0, +static struct smp_hotplug_thread comp_pool_threads = { + .thread_should_run = comp_task_should_run, + .thread_fn = comp_task, + .thread_comm = "ehca_comp/%u", + .cleanup = comp_task_stop, + .park = comp_task_park, }; int ehca_create_comp_pool(void) { - int cpu; - struct task_struct *task; + int cpu, ret = -ENOMEM; if (!ehca_scaling_code) return 0; @@ -886,42 +822,49 @@ int ehca_create_comp_pool(void) return -ENOMEM; spin_lock_init(&pool->last_cpu_lock); - pool->last_cpu = any_online_cpu(cpu_online_map); + pool->last_cpu = cpumask_any(cpu_online_mask); pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task); - if (pool->cpu_comp_tasks == NULL) { - kfree(pool); - return -EINVAL; - } + if (!pool->cpu_comp_tasks) + goto out_pool; - for_each_online_cpu(cpu) { - task = create_comp_task(pool, cpu); - if (task) { - kthread_bind(task, cpu); - wake_up_process(task); - } + pool->cpu_comp_threads = alloc_percpu(struct task_struct *); + if (!pool->cpu_comp_threads) + goto out_tasks; + + for_each_present_cpu(cpu) { + struct ehca_cpu_comp_task *cct; + + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + spin_lock_init(&cct->task_lock); + INIT_LIST_HEAD(&cct->cq_list); } - register_hotcpu_notifier(&comp_pool_callback_nb); + comp_pool_threads.store = pool->cpu_comp_threads; + ret = smpboot_register_percpu_thread(&comp_pool_threads); + if (ret) + goto out_threads; - printk(KERN_INFO "eHCA scaling code enabled\n"); + pr_info("eHCA scaling code enabled\n"); + return ret; - return 0; +out_threads: + free_percpu(pool->cpu_comp_threads); +out_tasks: + free_percpu(pool->cpu_comp_tasks); +out_pool: + kfree(pool); + return ret; } void ehca_destroy_comp_pool(void) { - int i; - if (!ehca_scaling_code) return; - unregister_hotcpu_notifier(&comp_pool_callback_nb); + smpboot_unregister_percpu_thread(&comp_pool_threads); - for (i = 0; i < NR_CPUS; i++) { - if (cpu_online(i)) - destroy_comp_task(pool, i); - } + free_percpu(pool->cpu_comp_threads); free_percpu(pool->cpu_comp_tasks); kfree(pool); } diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h index 3346cb06cea..5370199f08c 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.h +++ b/drivers/infiniband/hw/ehca/ehca_irq.h @@ -58,15 +58,15 @@ void ehca_tasklet_eq(unsigned long data); void ehca_process_eq(struct ehca_shca *shca, int is_irq); struct ehca_cpu_comp_task { - wait_queue_head_t wait_queue; struct list_head cq_list; - struct task_struct *task; spinlock_t task_lock; int cq_jobs; + int active; }; struct ehca_comp_pool { - struct ehca_cpu_comp_task *cpu_comp_tasks; + struct ehca_cpu_comp_task __percpu *cpu_comp_tasks; + struct task_struct * __percpu *cpu_comp_threads; int last_cpu; spinlock_t last_cpu_lock; }; diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h index a8a2ea585d2..22f79afa7fc 100644 --- a/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h @@ -95,7 +95,7 @@ int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr); int ehca_dereg_mr(struct ib_mr *mr); -struct ib_mw *ehca_alloc_mw(struct ib_pd *pd); +struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); int ehca_bind_mw(struct ib_qp *qp, struct ib_mw *mw, struct ib_mw_bind *mw_bind); @@ -197,6 +197,8 @@ void ehca_poll_eqs(unsigned long data); int ehca_calc_ipd(struct ehca_shca *shca, int port, enum ib_rate path_rate, u32 *ipd); +void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq); + #ifdef CONFIG_PPC_64K_PAGES void *ehca_alloc_fw_ctrlblock(gfp_t flags); void ehca_free_fw_ctrlblock(void *ptr); diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index 598844d2edc..cd8d290a09f 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -44,29 +44,31 @@ #include <linux/slab.h> #endif +#include <linux/notifier.h> +#include <linux/memory.h> #include "ehca_classes.h" #include "ehca_iverbs.h" #include "ehca_mrmw.h" #include "ehca_tools.h" #include "hcp_if.h" -#define HCAD_VERSION "0026" +#define HCAD_VERSION "0029" MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>"); MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver"); MODULE_VERSION(HCAD_VERSION); -static int ehca_open_aqp1 = 0; +static bool ehca_open_aqp1 = 0; static int ehca_hw_level = 0; -static int ehca_poll_all_eqs = 1; +static bool ehca_poll_all_eqs = 1; int ehca_debug_level = 0; -int ehca_nr_ports = 2; -int ehca_use_hp_mr = 0; +int ehca_nr_ports = -1; +bool ehca_use_hp_mr = 0; int ehca_port_act_time = 30; int ehca_static_rate = -1; -int ehca_scaling_code = 0; +bool ehca_scaling_code = 0; int ehca_lock_hcalls = -1; int ehca_max_cq = -1; int ehca_max_qp = -1; @@ -80,7 +82,7 @@ module_param_named(port_act_time, ehca_port_act_time, int, S_IRUGO); module_param_named(poll_all_eqs, ehca_poll_all_eqs, bool, S_IRUGO); module_param_named(static_rate, ehca_static_rate, int, S_IRUGO); module_param_named(scaling_code, ehca_scaling_code, bool, S_IRUGO); -module_param_named(lock_hcalls, ehca_lock_hcalls, bool, S_IRUGO); +module_param_named(lock_hcalls, ehca_lock_hcalls, bint, S_IRUGO); module_param_named(number_of_cqs, ehca_max_cq, int, S_IRUGO); module_param_named(number_of_qps, ehca_max_qp, int, S_IRUGO); @@ -93,8 +95,8 @@ MODULE_PARM_DESC(hw_level, "Hardware level (0: autosensing (default), " "0x10..0x14: eHCA, 0x20..0x23: eHCA2)"); MODULE_PARM_DESC(nr_ports, - "number of connected ports (-1: autodetect, 1: port one only, " - "2: two ports (default)"); + "number of connected ports (-1: autodetect (default), " + "1: port one only, 2: two ports)"); MODULE_PARM_DESC(use_hp_mr, "Use high performance MRs (default: no)"); MODULE_PARM_DESC(port_act_time, @@ -121,7 +123,7 @@ DEFINE_IDR(ehca_qp_idr); DEFINE_IDR(ehca_cq_idr); static LIST_HEAD(shca_list); /* list of all registered ehcas */ -static DEFINE_SPINLOCK(shca_list_lock); +DEFINE_SPINLOCK(shca_list_lock); static struct timer_list poll_eqs_timer; @@ -209,6 +211,7 @@ static int ehca_create_slab_caches(void) if (!ctblk_cache) { ehca_gen_err("Cannot create ctblk SLAB cache."); ehca_cleanup_small_qp_cache(); + ret = -ENOMEM; goto create_slab_caches6; } #endif @@ -289,8 +292,9 @@ static int ehca_sense_attributes(struct ehca_shca *shca) }; ehca_gen_dbg("Probing adapter %s...", - shca->ofdev->node->full_name); - loc_code = of_get_property(shca->ofdev->node, "ibm,loc-code", NULL); + shca->ofdev->dev.of_node->full_name); + loc_code = of_get_property(shca->ofdev->dev.of_node, "ibm,loc-code", + NULL); if (loc_code) ehca_gen_dbg(" ... location lode=%s", loc_code); @@ -302,7 +306,7 @@ static int ehca_sense_attributes(struct ehca_shca *shca) h_ret = hipz_h_query_hca(shca->ipz_hca_handle, rblock); if (h_ret != H_SUCCESS) { - ehca_gen_err("Cannot query device properties. h_ret=%li", + ehca_gen_err("Cannot query device properties. h_ret=%lli", h_ret); ret = -EPERM; goto sense_attributes1; @@ -357,7 +361,8 @@ static int ehca_sense_attributes(struct ehca_shca *shca) * a firmware property, so it's valid across all adapters */ if (ehca_lock_hcalls == -1) - ehca_lock_hcalls = !(shca->hca_cap & HCA_CAP_H_ALLOC_RES_SYNC); + ehca_lock_hcalls = !EHCA_BMASK_GET(HCA_CAP_H_ALLOC_RES_SYNC, + shca->hca_cap); /* translate supported MR page sizes; always support 4K */ shca->hca_cap_mr_pgsize = EHCA_PAGESIZE; @@ -366,29 +371,30 @@ static int ehca_sense_attributes(struct ehca_shca *shca) shca->hca_cap_mr_pgsize |= pgsize_map[i + 1]; /* Set maximum number of CQs and QPs to calculate EQ size */ - if (ehca_max_qp == -1) - ehca_max_qp = min_t(int, rblock->max_qp, EHCA_MAX_NUM_QUEUES); - else if (ehca_max_qp < 1 || ehca_max_qp > rblock->max_qp) { - ehca_gen_err("Requested number of QPs is out of range (1 - %i) " - "specified by HW", rblock->max_qp); - ret = -EINVAL; - goto sense_attributes1; + if (shca->max_num_qps == -1) + shca->max_num_qps = min_t(int, rblock->max_qp, + EHCA_MAX_NUM_QUEUES); + else if (shca->max_num_qps < 1 || shca->max_num_qps > rblock->max_qp) { + ehca_gen_warn("The requested number of QPs is out of range " + "(1 - %i) specified by HW. Value is set to %i", + rblock->max_qp, rblock->max_qp); + shca->max_num_qps = rblock->max_qp; } - if (ehca_max_cq == -1) - ehca_max_cq = min_t(int, rblock->max_cq, EHCA_MAX_NUM_QUEUES); - else if (ehca_max_cq < 1 || ehca_max_cq > rblock->max_cq) { - ehca_gen_err("Requested number of CQs is out of range (1 - %i) " - "specified by HW", rblock->max_cq); - ret = -EINVAL; - goto sense_attributes1; + if (shca->max_num_cqs == -1) + shca->max_num_cqs = min_t(int, rblock->max_cq, + EHCA_MAX_NUM_QUEUES); + else if (shca->max_num_cqs < 1 || shca->max_num_cqs > rblock->max_cq) { + ehca_gen_warn("The requested number of CQs is out of range " + "(1 - %i) specified by HW. Value is set to %i", + rblock->max_cq, rblock->max_cq); } /* query max MTU from first port -- it's the same for all ports */ port = (struct hipz_query_port *)rblock; h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port); if (h_ret != H_SUCCESS) { - ehca_gen_err("Cannot query port properties. h_ret=%li", + ehca_gen_err("Cannot query port properties. h_ret=%lli", h_ret); ret = -EPERM; goto sense_attributes1; @@ -503,6 +509,7 @@ static int ehca_init_device(struct ehca_shca *shca) shca->ib_device.detach_mcast = ehca_detach_mcast; shca->ib_device.process_mad = ehca_process_mad; shca->ib_device.mmap = ehca_mmap; + shca->ib_device.dma_ops = &ehca_dma_mapping_ops; if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) { shca->ib_device.uverbs_cmd_mask |= @@ -619,7 +626,7 @@ static struct attribute_group ehca_drv_attr_grp = { .attrs = ehca_drv_attrs }; -static struct attribute_group *ehca_drv_attr_groups[] = { +static const struct attribute_group *ehca_drv_attr_groups[] = { &ehca_drv_attr_grp, NULL, }; @@ -633,7 +640,7 @@ static ssize_t ehca_show_##name(struct device *dev, \ struct hipz_query_hca *rblock; \ int data; \ \ - shca = dev->driver_data; \ + shca = dev_get_drvdata(dev); \ \ rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); \ if (!rblock) { \ @@ -677,9 +684,9 @@ static ssize_t ehca_show_adapter_handle(struct device *dev, struct device_attribute *attr, char *buf) { - struct ehca_shca *shca = dev->driver_data; + struct ehca_shca *shca = dev_get_drvdata(dev); - return sprintf(buf, "%lx\n", shca->ipz_hca_handle.handle); + return sprintf(buf, "%llx\n", shca->ipz_hca_handle.handle); } static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL); @@ -707,24 +714,24 @@ static struct attribute_group ehca_dev_attr_grp = { .attrs = ehca_dev_attrs }; -static int __devinit ehca_probe(struct of_device *dev, - const struct of_device_id *id) +static int ehca_probe(struct platform_device *dev) { struct ehca_shca *shca; const u64 *handle; struct ib_pd *ibpd; int ret, i, eq_size; + unsigned long flags; - handle = of_get_property(dev->node, "ibm,hca-handle", NULL); + handle = of_get_property(dev->dev.of_node, "ibm,hca-handle", NULL); if (!handle) { ehca_gen_err("Cannot get eHCA handle for adapter: %s.", - dev->node->full_name); + dev->dev.of_node->full_name); return -ENODEV; } if (!(*handle)) { ehca_gen_err("Wrong eHCA handle for adapter: %s.", - dev->node->full_name); + dev->dev.of_node->full_name); return -ENODEV; } @@ -733,15 +740,19 @@ static int __devinit ehca_probe(struct of_device *dev, ehca_gen_err("Cannot allocate shca memory."); return -ENOMEM; } + mutex_init(&shca->modify_mutex); atomic_set(&shca->num_cqs, 0); atomic_set(&shca->num_qps, 0); + shca->max_num_qps = ehca_max_qp; + shca->max_num_cqs = ehca_max_cq; + for (i = 0; i < ARRAY_SIZE(shca->sport); i++) spin_lock_init(&shca->sport[i].mod_sqp_lock); shca->ofdev = dev; shca->ipz_hca_handle.handle = *handle; - dev->dev.driver_data = shca; + dev_set_drvdata(&dev->dev, shca); ret = ehca_sense_attributes(shca); if (ret < 0) { @@ -755,7 +766,7 @@ static int __devinit ehca_probe(struct of_device *dev, goto probe1; } - eq_size = 2 * ehca_max_cq + 4 * ehca_max_qp; + eq_size = 2 * shca->max_num_cqs + 4 * shca->max_num_qps; /* create event queues */ ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, eq_size); if (ret) { @@ -789,7 +800,7 @@ static int __devinit ehca_probe(struct of_device *dev, goto probe5; } - ret = ib_register_device(&shca->ib_device); + ret = ib_register_device(&shca->ib_device, NULL); if (ret) { ehca_err(&shca->ib_device, "ib_register_device() failed ret=%i", ret); @@ -823,9 +834,9 @@ static int __devinit ehca_probe(struct of_device *dev, ehca_err(&shca->ib_device, "Cannot create device attributes ret=%d", ret); - spin_lock(&shca_list_lock); + spin_lock_irqsave(&shca_list_lock, flags); list_add(&shca->shca_list, &shca_list); - spin_unlock(&shca_list_lock); + spin_unlock_irqrestore(&shca_list_lock, flags); return 0; @@ -868,9 +879,10 @@ probe1: return -EINVAL; } -static int __devexit ehca_remove(struct of_device *dev) +static int ehca_remove(struct platform_device *dev) { - struct ehca_shca *shca = dev->dev.driver_data; + struct ehca_shca *shca = dev_get_drvdata(&dev->dev); + unsigned long flags; int ret; sysfs_remove_group(&dev->dev.kobj, &ehca_dev_attr_grp); @@ -908,9 +920,9 @@ static int __devexit ehca_remove(struct of_device *dev) ib_dealloc_device(&shca->ib_device); - spin_lock(&shca_list_lock); + spin_lock_irqsave(&shca_list_lock, flags); list_del(&shca->shca_list); - spin_unlock(&shca_list_lock); + spin_unlock_irqrestore(&shca_list_lock, flags); return ret; } @@ -925,13 +937,14 @@ static struct of_device_id ehca_device_table[] = }; MODULE_DEVICE_TABLE(of, ehca_device_table); -static struct of_platform_driver ehca_driver = { - .name = "ehca", - .match_table = ehca_device_table, +static struct platform_driver ehca_driver = { .probe = ehca_probe, .remove = ehca_remove, - .driver = { + .driver = { + .name = "ehca", + .owner = THIS_MODULE, .groups = ehca_drv_attr_groups, + .of_match_table = ehca_device_table, }, }; @@ -946,7 +959,7 @@ void ehca_poll_eqs(unsigned long data) struct ehca_eq *eq = &shca->eq; int max = 3; volatile u64 q_ofs, q_ofs2; - u64 flags; + unsigned long flags; spin_lock_irqsave(&eq->spinlock, flags); q_ofs = eq->ipz_queue.current_q_offset; spin_unlock_irqrestore(&eq->spinlock, flags); @@ -964,6 +977,41 @@ void ehca_poll_eqs(unsigned long data) spin_unlock(&shca_list_lock); } +static int ehca_mem_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + static unsigned long ehca_dmem_warn_time; + unsigned long flags; + + switch (action) { + case MEM_CANCEL_OFFLINE: + case MEM_CANCEL_ONLINE: + case MEM_ONLINE: + case MEM_OFFLINE: + return NOTIFY_OK; + case MEM_GOING_ONLINE: + case MEM_GOING_OFFLINE: + /* only ok if no hca is attached to the lpar */ + spin_lock_irqsave(&shca_list_lock, flags); + if (list_empty(&shca_list)) { + spin_unlock_irqrestore(&shca_list_lock, flags); + return NOTIFY_OK; + } else { + spin_unlock_irqrestore(&shca_list_lock, flags); + if (printk_timed_ratelimit(&ehca_dmem_warn_time, + 30 * 1000)) + ehca_gen_err("DMEM operations are not allowed" + "in conjunction with eHCA"); + return NOTIFY_BAD; + } + } + return NOTIFY_OK; +} + +static struct notifier_block ehca_mem_nb = { + .notifier_call = ehca_mem_notifier, +}; + static int __init ehca_module_init(void) { int ret; @@ -984,11 +1032,23 @@ static int __init ehca_module_init(void) goto module_init1; } + ret = ehca_create_busmap(); + if (ret) { + ehca_gen_err("Cannot create busmap."); + goto module_init2; + } + ret = ibmebus_register_driver(&ehca_driver); if (ret) { ehca_gen_err("Cannot register eHCA device driver"); ret = -EINVAL; - goto module_init2; + goto module_init3; + } + + ret = register_memory_notifier(&ehca_mem_nb); + if (ret) { + ehca_gen_err("Failed registering memory add/remove notifier"); + goto module_init4; } if (ehca_poll_all_eqs != 1) { @@ -1003,6 +1063,12 @@ static int __init ehca_module_init(void) return 0; +module_init4: + ibmebus_unregister_driver(&ehca_driver); + +module_init3: + ehca_destroy_busmap(); + module_init2: ehca_destroy_slab_caches(); @@ -1018,6 +1084,10 @@ static void __exit ehca_module_exit(void) ibmebus_unregister_driver(&ehca_driver); + unregister_memory_notifier(&ehca_mem_nb); + + ehca_destroy_busmap(); + ehca_destroy_slab_caches(); ehca_destroy_comp_pool(); diff --git a/drivers/infiniband/hw/ehca/ehca_mcast.c b/drivers/infiniband/hw/ehca/ehca_mcast.c index e3ef0264ccc..120aedf9f98 100644 --- a/drivers/infiniband/hw/ehca/ehca_mcast.c +++ b/drivers/infiniband/hw/ehca/ehca_mcast.c @@ -88,7 +88,7 @@ int ehca_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) if (h_ret != H_SUCCESS) ehca_err(ibqp->device, "ehca_qp=%p qp_num=%x hipz_h_attach_mcqp() failed " - "h_ret=%li", my_qp, ibqp->qp_num, h_ret); + "h_ret=%lli", my_qp, ibqp->qp_num, h_ret); return ehca2ib_return_code(h_ret); } @@ -125,7 +125,7 @@ int ehca_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) if (h_ret != H_SUCCESS) ehca_err(ibqp->device, "ehca_qp=%p qp_num=%x hipz_h_detach_mcqp() failed " - "h_ret=%li", my_qp, ibqp->qp_num, h_ret); + "h_ret=%lli", my_qp, ibqp->qp_num, h_ret); return ehca2ib_return_code(h_ret); } diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c index f974367cad4..3488e8c9fcb 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.c +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c @@ -40,6 +40,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <linux/slab.h> #include <rdma/ib_umem.h> #include "ehca_iverbs.h" @@ -53,6 +54,38 @@ /* max number of rpages (per hcall register_rpages) */ #define MAX_RPAGES 512 +/* DMEM toleration management */ +#define EHCA_SECTSHIFT SECTION_SIZE_BITS +#define EHCA_SECTSIZE (1UL << EHCA_SECTSHIFT) +#define EHCA_HUGEPAGESHIFT 34 +#define EHCA_HUGEPAGE_SIZE (1UL << EHCA_HUGEPAGESHIFT) +#define EHCA_HUGEPAGE_PFN_MASK ((EHCA_HUGEPAGE_SIZE - 1) >> PAGE_SHIFT) +#define EHCA_INVAL_ADDR 0xFFFFFFFFFFFFFFFFULL +#define EHCA_DIR_INDEX_SHIFT 13 /* 8k Entries in 64k block */ +#define EHCA_TOP_INDEX_SHIFT (EHCA_DIR_INDEX_SHIFT * 2) +#define EHCA_MAP_ENTRIES (1 << EHCA_DIR_INDEX_SHIFT) +#define EHCA_TOP_MAP_SIZE (0x10000) /* currently fixed map size */ +#define EHCA_DIR_MAP_SIZE (0x10000) +#define EHCA_ENT_MAP_SIZE (0x10000) +#define EHCA_INDEX_MASK (EHCA_MAP_ENTRIES - 1) + +static unsigned long ehca_mr_len; + +/* + * Memory map data structures + */ +struct ehca_dir_bmap { + u64 ent[EHCA_MAP_ENTRIES]; +}; +struct ehca_top_bmap { + struct ehca_dir_bmap *dir[EHCA_MAP_ENTRIES]; +}; +struct ehca_bmap { + struct ehca_top_bmap *top[EHCA_MAP_ENTRIES]; +}; + +static struct ehca_bmap *ehca_bmap; + static struct kmem_cache *mr_cache; static struct kmem_cache *mw_cache; @@ -68,6 +101,8 @@ enum ehca_mr_pgsize { #define EHCA_MR_PGSHIFT1M 20 #define EHCA_MR_PGSHIFT16M 24 +static u64 ehca_map_vaddr(void *caddr); + static u32 ehca_encode_hwpage_size(u32 pgsize) { int log = ilog2(pgsize); @@ -77,7 +112,7 @@ static u32 ehca_encode_hwpage_size(u32 pgsize) static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca) { - return 1UL << ilog2(shca->hca_cap_mr_pgsize); + return rounddown_pow_of_two(shca->hca_cap_mr_pgsize); } static struct ehca_mr *ehca_mr_new(void) @@ -135,7 +170,8 @@ struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags) goto get_dma_mr_exit0; } - ret = ehca_reg_maxmr(shca, e_maxmr, (u64 *)KERNELBASE, + ret = ehca_reg_maxmr(shca, e_maxmr, + (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)), mr_access_flags, e_pd, &e_maxmr->ib.ib_mr.lkey, &e_maxmr->ib.ib_mr.rkey); @@ -204,7 +240,7 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, } if ((size == 0) || (((u64)iova_start + size) < (u64)iova_start)) { - ehca_err(pd->device, "bad input values: size=%lx iova_start=%p", + ehca_err(pd->device, "bad input values: size=%llx iova_start=%p", size, iova_start); ib_mr = ERR_PTR(-EINVAL); goto reg_phys_mr_exit0; @@ -251,7 +287,7 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags, e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, - &e_mr->ib.ib_mr.rkey); + &e_mr->ib.ib_mr.rkey, EHCA_REG_MR); if (ret) { ib_mr = ERR_PTR(ret); goto reg_phys_mr_exit1; @@ -309,8 +345,8 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } if (length == 0 || virt + length < virt) { - ehca_err(pd->device, "bad input values: length=%lx " - "virt_base=%lx", length, virt); + ehca_err(pd->device, "bad input values: length=%llx " + "virt_base=%llx", length, virt); ib_mr = ERR_PTR(-EINVAL); goto reg_user_mr_exit0; } @@ -364,16 +400,13 @@ reg_user_mr_fallback: pginfo.num_hwpages = num_hwpages; pginfo.u.usr.region = e_mr->umem; pginfo.next_hwpage = e_mr->umem->offset / hwpage_size; - pginfo.u.usr.next_chunk = list_prepare_entry(pginfo.u.usr.next_chunk, - (&e_mr->umem->chunk_list), - list); - + pginfo.u.usr.next_sg = pginfo.u.usr.region->sg_head.sgl; ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags, e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, - &e_mr->ib.ib_mr.rkey); + &e_mr->ib.ib_mr.rkey, EHCA_REG_MR); if (ret == -EINVAL && pginfo.hwpage_size > PAGE_SIZE) { ehca_warn(pd->device, "failed to register mr " - "with hwpage_size=%lx", hwpage_size); + "with hwpage_size=%llx", hwpage_size); ehca_info(pd->device, "try to register mr with " "kpage_size=%lx", PAGE_SIZE); /* @@ -509,7 +542,7 @@ int ehca_rereg_phys_mr(struct ib_mr *mr, goto rereg_phys_mr_exit1; if ((new_size == 0) || (((u64)iova_start + new_size) < (u64)iova_start)) { - ehca_err(mr->device, "bad input values: new_size=%lx " + ehca_err(mr->device, "bad input values: new_size=%llx " "iova_start=%p", new_size, iova_start); ret = -EINVAL; goto rereg_phys_mr_exit1; @@ -580,8 +613,8 @@ int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr) h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout); if (h_ret != H_SUCCESS) { - ehca_err(mr->device, "hipz_mr_query failed, h_ret=%li mr=%p " - "hca_hndl=%lx mr_hndl=%lx lkey=%x", + ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lli mr=%p " + "hca_hndl=%llx mr_hndl=%llx lkey=%x", h_ret, mr, shca->ipz_hca_handle.handle, e_mr->ipz_mr_handle.handle, mr->lkey); ret = ehca2ib_return_code(h_ret); @@ -630,8 +663,8 @@ int ehca_dereg_mr(struct ib_mr *mr) /* TODO: BUSY: MR still has bound window(s) */ h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); if (h_ret != H_SUCCESS) { - ehca_err(mr->device, "hipz_free_mr failed, h_ret=%li shca=%p " - "e_mr=%p hca_hndl=%lx mr_hndl=%lx mr->lkey=%x", + ehca_err(mr->device, "hipz_free_mr failed, h_ret=%lli shca=%p " + "e_mr=%p hca_hndl=%llx mr_hndl=%llx mr->lkey=%x", h_ret, shca, e_mr, shca->ipz_hca_handle.handle, e_mr->ipz_mr_handle.handle, mr->lkey); ret = ehca2ib_return_code(h_ret); @@ -652,7 +685,7 @@ dereg_mr_exit0: /*----------------------------------------------------------------------*/ -struct ib_mw *ehca_alloc_mw(struct ib_pd *pd) +struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) { struct ib_mw *ib_mw; u64 h_ret; @@ -662,6 +695,9 @@ struct ib_mw *ehca_alloc_mw(struct ib_pd *pd) container_of(pd->device, struct ehca_shca, ib_device); struct ehca_mw_hipzout_parms hipzout; + if (type != IB_MW_TYPE_1) + return ERR_PTR(-EINVAL); + e_mw = ehca_mw_new(); if (!e_mw) { ib_mw = ERR_PTR(-ENOMEM); @@ -671,8 +707,8 @@ struct ib_mw *ehca_alloc_mw(struct ib_pd *pd) h_ret = hipz_h_alloc_resource_mw(shca->ipz_hca_handle, e_mw, e_pd->fw_pd, &hipzout); if (h_ret != H_SUCCESS) { - ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%li " - "shca=%p hca_hndl=%lx mw=%p", + ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lli " + "shca=%p hca_hndl=%llx mw=%p", h_ret, shca, shca->ipz_hca_handle.handle, e_mw); ib_mw = ERR_PTR(ehca2ib_return_code(h_ret)); goto alloc_mw_exit1; @@ -713,8 +749,8 @@ int ehca_dealloc_mw(struct ib_mw *mw) h_ret = hipz_h_free_resource_mw(shca->ipz_hca_handle, e_mw); if (h_ret != H_SUCCESS) { - ehca_err(mw->device, "hipz_free_mw failed, h_ret=%li shca=%p " - "mw=%p rkey=%x hca_hndl=%lx mw_hndl=%lx", + ehca_err(mw->device, "hipz_free_mw failed, h_ret=%lli shca=%p " + "mw=%p rkey=%x hca_hndl=%llx mw_hndl=%llx", h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle, e_mw->ipz_mw_handle.handle); return ehca2ib_return_code(h_ret); @@ -794,7 +830,7 @@ struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, ret = ehca_reg_mr(shca, e_fmr, NULL, fmr_attr->max_pages * (1 << fmr_attr->page_shift), mr_access_flags, e_pd, &pginfo, - &tmp_lkey, &tmp_rkey); + &tmp_lkey, &tmp_rkey, EHCA_REG_MR); if (ret) { ib_fmr = ERR_PTR(ret); goto alloc_fmr_exit1; @@ -840,7 +876,7 @@ int ehca_map_phys_fmr(struct ib_fmr *fmr, goto map_phys_fmr_exit0; if (iova % e_fmr->fmr_page_size) { /* only whole-numbered pages */ - ehca_err(fmr->device, "bad iova, iova=%lx fmr_page_size=%x", + ehca_err(fmr->device, "bad iova, iova=%llx fmr_page_size=%x", iova, e_fmr->fmr_page_size); ret = -EINVAL; goto map_phys_fmr_exit0; @@ -878,7 +914,7 @@ int ehca_map_phys_fmr(struct ib_fmr *fmr, map_phys_fmr_exit0: if (ret) ehca_err(fmr->device, "ret=%i fmr=%p page_list=%p list_len=%x " - "iova=%lx", ret, fmr, page_list, list_len, iova); + "iova=%llx", ret, fmr, page_list, list_len, iova); return ret; } /* end ehca_map_phys_fmr() */ @@ -897,11 +933,6 @@ int ehca_unmap_fmr(struct list_head *fmr_list) /* check all FMR belong to same SHCA, and check internal flag */ list_for_each_entry(ib_fmr, fmr_list, list) { prev_shca = shca; - if (!ib_fmr) { - ehca_gen_err("bad fmr=%p in list", ib_fmr); - ret = -EINVAL; - goto unmap_fmr_exit0; - } shca = container_of(ib_fmr->device, struct ehca_shca, ib_device); e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr); @@ -964,8 +995,8 @@ int ehca_dealloc_fmr(struct ib_fmr *fmr) h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); if (h_ret != H_SUCCESS) { - ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%li e_fmr=%p " - "hca_hndl=%lx fmr_hndl=%lx fmr->lkey=%x", + ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%lli e_fmr=%p " + "hca_hndl=%llx fmr_hndl=%llx fmr->lkey=%x", h_ret, e_fmr, shca->ipz_hca_handle.handle, e_fmr->ipz_mr_handle.handle, fmr->lkey); ret = ehca2ib_return_code(h_ret); @@ -983,6 +1014,10 @@ free_fmr_exit0: /*----------------------------------------------------------------------*/ +static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca, + struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo); + int ehca_reg_mr(struct ehca_shca *shca, struct ehca_mr *e_mr, u64 *iova_start, @@ -991,7 +1026,8 @@ int ehca_reg_mr(struct ehca_shca *shca, struct ehca_pd *e_pd, struct ehca_mr_pginfo *pginfo, u32 *lkey, /*OUT*/ - u32 *rkey) /*OUT*/ + u32 *rkey, /*OUT*/ + enum ehca_reg_type reg_type) { int ret; u64 h_ret; @@ -1007,15 +1043,21 @@ int ehca_reg_mr(struct ehca_shca *shca, (u64)iova_start, size, hipz_acl, e_pd->fw_pd, &hipzout); if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%li " - "hca_hndl=%lx", h_ret, shca->ipz_hca_handle.handle); + ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lli " + "hca_hndl=%llx", h_ret, shca->ipz_hca_handle.handle); ret = ehca2ib_return_code(h_ret); goto ehca_reg_mr_exit0; } e_mr->ipz_mr_handle = hipzout.handle; - ret = ehca_reg_mr_rpages(shca, e_mr, pginfo); + if (reg_type == EHCA_REG_BUSMAP_MR) + ret = ehca_reg_bmap_mr_rpages(shca, e_mr, pginfo); + else if (reg_type == EHCA_REG_MR) + ret = ehca_reg_mr_rpages(shca, e_mr, pginfo); + else + ret = -EINVAL; + if (ret) goto ehca_reg_mr_exit1; @@ -1033,9 +1075,9 @@ int ehca_reg_mr(struct ehca_shca *shca, ehca_reg_mr_exit1: h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "h_ret=%li shca=%p e_mr=%p " - "iova_start=%p size=%lx acl=%x e_pd=%p lkey=%x " - "pginfo=%p num_kpages=%lx num_hwpages=%lx ret=%i", + ehca_err(&shca->ib_device, "h_ret=%lli shca=%p e_mr=%p " + "iova_start=%p size=%llx acl=%x e_pd=%p lkey=%x " + "pginfo=%p num_kpages=%llx num_hwpages=%llx ret=%i", h_ret, shca, e_mr, iova_start, size, acl, e_pd, hipzout.lkey, pginfo, pginfo->num_kpages, pginfo->num_hwpages, ret); @@ -1045,8 +1087,8 @@ ehca_reg_mr_exit1: ehca_reg_mr_exit0: if (ret) ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p " - "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p " - "num_kpages=%lx num_hwpages=%lx", + "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p " + "num_kpages=%llx num_hwpages=%llx", ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo, pginfo->num_kpages, pginfo->num_hwpages); return ret; @@ -1094,7 +1136,7 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca, } if (rnum > 1) { - rpage = virt_to_abs(kpage); + rpage = __pa(kpage); if (!rpage) { ehca_err(&shca->ib_device, "kpage=%p i=%x", kpage, i); @@ -1116,8 +1158,8 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca, */ if (h_ret != H_SUCCESS) { ehca_err(&shca->ib_device, "last " - "hipz_reg_rpage_mr failed, h_ret=%li " - "e_mr=%p i=%x hca_hndl=%lx mr_hndl=%lx" + "hipz_reg_rpage_mr failed, h_ret=%lli " + "e_mr=%p i=%x hca_hndl=%llx mr_hndl=%llx" " lkey=%x", h_ret, e_mr, i, shca->ipz_hca_handle.handle, e_mr->ipz_mr_handle.handle, @@ -1128,8 +1170,8 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca, ret = 0; } else if (h_ret != H_PAGE_REGISTERED) { ehca_err(&shca->ib_device, "hipz_reg_rpage_mr failed, " - "h_ret=%li e_mr=%p i=%x lkey=%x hca_hndl=%lx " - "mr_hndl=%lx", h_ret, e_mr, i, + "h_ret=%lli e_mr=%p i=%x lkey=%x hca_hndl=%llx " + "mr_hndl=%llx", h_ret, e_mr, i, e_mr->ib.ib_mr.lkey, shca->ipz_hca_handle.handle, e_mr->ipz_mr_handle.handle); @@ -1145,7 +1187,7 @@ ehca_reg_mr_rpages_exit1: ehca_reg_mr_rpages_exit0: if (ret) ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p pginfo=%p " - "num_kpages=%lx num_hwpages=%lx", ret, shca, e_mr, + "num_kpages=%llx num_hwpages=%llx", ret, shca, e_mr, pginfo, pginfo->num_kpages, pginfo->num_hwpages); return ret; } /* end ehca_reg_mr_rpages() */ @@ -1184,12 +1226,12 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, ret = ehca_set_pagebuf(pginfo, pginfo->num_hwpages, kpage); if (ret) { ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p " - "pginfo=%p type=%x num_kpages=%lx num_hwpages=%lx " + "pginfo=%p type=%x num_kpages=%llx num_hwpages=%llx " "kpage=%p", e_mr, pginfo, pginfo->type, pginfo->num_kpages, pginfo->num_hwpages, kpage); goto ehca_rereg_mr_rereg1_exit1; } - rpage = virt_to_abs(kpage); + rpage = __pa(kpage); if (!rpage) { ehca_err(&shca->ib_device, "kpage=%p", kpage); ret = -EFAULT; @@ -1205,13 +1247,13 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, * (MW bound or MR is shared) */ ehca_warn(&shca->ib_device, "hipz_h_reregister_pmr failed " - "(Rereg1), h_ret=%li e_mr=%p", h_ret, e_mr); + "(Rereg1), h_ret=%lli e_mr=%p", h_ret, e_mr); *pginfo = pginfo_save; ret = -EAGAIN; } else if ((u64 *)hipzout.vaddr != iova_start) { ehca_err(&shca->ib_device, "PHYP changed iova_start in " - "rereg_pmr, iova_start=%p iova_start_out=%lx e_mr=%p " - "mr_handle=%lx lkey=%x lkey_out=%x", iova_start, + "rereg_pmr, iova_start=%p iova_start_out=%llx e_mr=%p " + "mr_handle=%llx lkey=%x lkey_out=%x", iova_start, hipzout.vaddr, e_mr, e_mr->ipz_mr_handle.handle, e_mr->ib.ib_mr.lkey, hipzout.lkey); ret = -EFAULT; @@ -1235,7 +1277,7 @@ ehca_rereg_mr_rereg1_exit1: ehca_rereg_mr_rereg1_exit0: if ( ret && (ret != -EAGAIN) ) ehca_err(&shca->ib_device, "ret=%i lkey=%x rkey=%x " - "pginfo=%p num_kpages=%lx num_hwpages=%lx", + "pginfo=%p num_kpages=%llx num_hwpages=%llx", ret, *lkey, *rkey, pginfo, pginfo->num_kpages, pginfo->num_hwpages); return ret; @@ -1263,7 +1305,7 @@ int ehca_rereg_mr(struct ehca_shca *shca, (e_mr->num_hwpages > MAX_RPAGES) || (pginfo->num_hwpages > e_mr->num_hwpages)) { ehca_dbg(&shca->ib_device, "Rereg3 case, " - "pginfo->num_hwpages=%lx e_mr->num_hwpages=%x", + "pginfo->num_hwpages=%llx e_mr->num_hwpages=%x", pginfo->num_hwpages, e_mr->num_hwpages); rereg_1_hcall = 0; rereg_3_hcall = 1; @@ -1295,7 +1337,7 @@ int ehca_rereg_mr(struct ehca_shca *shca, h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); if (h_ret != H_SUCCESS) { ehca_err(&shca->ib_device, "hipz_free_mr failed, " - "h_ret=%li e_mr=%p hca_hndl=%lx mr_hndl=%lx " + "h_ret=%lli e_mr=%p hca_hndl=%llx mr_hndl=%llx " "mr->lkey=%x", h_ret, e_mr, shca->ipz_hca_handle.handle, e_mr->ipz_mr_handle.handle, @@ -1316,7 +1358,7 @@ int ehca_rereg_mr(struct ehca_shca *shca, e_mr->fmr_map_cnt = save_mr.fmr_map_cnt; ret = ehca_reg_mr(shca, e_mr, iova_start, size, acl, - e_pd, pginfo, lkey, rkey); + e_pd, pginfo, lkey, rkey, EHCA_REG_MR); if (ret) { u32 offset = (u64)(&e_mr->flags) - (u64)e_mr; memcpy(&e_mr->flags, &(save_mr.flags), @@ -1328,8 +1370,8 @@ int ehca_rereg_mr(struct ehca_shca *shca, ehca_rereg_mr_exit0: if (ret) ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p " - "iova_start=%p size=%lx acl=%x e_pd=%p pginfo=%p " - "num_kpages=%lx lkey=%x rkey=%x rereg_1_hcall=%x " + "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p " + "num_kpages=%llx lkey=%x rkey=%x rereg_1_hcall=%x " "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo, pginfo->num_kpages, *lkey, *rkey, rereg_1_hcall, rereg_3_hcall); @@ -1371,8 +1413,8 @@ int ehca_unmap_one_fmr(struct ehca_shca *shca, * FMRs are not shared and no MW bound to FMRs */ ehca_err(&shca->ib_device, "hipz_reregister_pmr failed " - "(Rereg1), h_ret=%li e_fmr=%p hca_hndl=%lx " - "mr_hndl=%lx lkey=%x lkey_out=%x", + "(Rereg1), h_ret=%lli e_fmr=%p hca_hndl=%llx " + "mr_hndl=%llx lkey=%x lkey_out=%x", h_ret, e_fmr, shca->ipz_hca_handle.handle, e_fmr->ipz_mr_handle.handle, e_fmr->ib.ib_fmr.lkey, hipzout.lkey); @@ -1383,7 +1425,7 @@ int ehca_unmap_one_fmr(struct ehca_shca *shca, h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); if (h_ret != H_SUCCESS) { ehca_err(&shca->ib_device, "hipz_free_mr failed, " - "h_ret=%li e_fmr=%p hca_hndl=%lx mr_hndl=%lx " + "h_ret=%lli e_fmr=%p hca_hndl=%llx mr_hndl=%llx " "lkey=%x", h_ret, e_fmr, shca->ipz_hca_handle.handle, e_fmr->ipz_mr_handle.handle, @@ -1409,7 +1451,7 @@ int ehca_unmap_one_fmr(struct ehca_shca *shca, ret = ehca_reg_mr(shca, e_fmr, NULL, (e_fmr->fmr_max_pages * e_fmr->fmr_page_size), e_fmr->acl, e_pd, &pginfo, &tmp_lkey, - &tmp_rkey); + &tmp_rkey, EHCA_REG_MR); if (ret) { u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr; memcpy(&e_fmr->flags, &(save_mr.flags), @@ -1447,9 +1489,9 @@ int ehca_reg_smr(struct ehca_shca *shca, (u64)iova_start, hipz_acl, e_pd->fw_pd, &hipzout); if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%li " + ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli " "shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x " - "e_pd=%p hca_hndl=%lx mr_hndl=%lx lkey=%x", + "e_pd=%p hca_hndl=%llx mr_hndl=%llx lkey=%x", h_ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd, shca->ipz_hca_handle.handle, e_origmr->ipz_mr_handle.handle, @@ -1478,6 +1520,90 @@ ehca_reg_smr_exit0: } /* end ehca_reg_smr() */ /*----------------------------------------------------------------------*/ +static inline void *ehca_calc_sectbase(int top, int dir, int idx) +{ + unsigned long ret = idx; + ret |= dir << EHCA_DIR_INDEX_SHIFT; + ret |= top << EHCA_TOP_INDEX_SHIFT; + return __va(ret << SECTION_SIZE_BITS); +} + +#define ehca_bmap_valid(entry) \ + ((u64)entry != (u64)EHCA_INVAL_ADDR) + +static u64 ehca_reg_mr_section(int top, int dir, int idx, u64 *kpage, + struct ehca_shca *shca, struct ehca_mr *mr, + struct ehca_mr_pginfo *pginfo) +{ + u64 h_ret = 0; + unsigned long page = 0; + u64 rpage = __pa(kpage); + int page_count; + + void *sectbase = ehca_calc_sectbase(top, dir, idx); + if ((unsigned long)sectbase & (pginfo->hwpage_size - 1)) { + ehca_err(&shca->ib_device, "reg_mr_section will probably fail:" + "hwpage_size does not fit to " + "section start address"); + } + page_count = EHCA_SECTSIZE / pginfo->hwpage_size; + + while (page < page_count) { + u64 rnum; + for (rnum = 0; (rnum < MAX_RPAGES) && (page < page_count); + rnum++) { + void *pg = sectbase + ((page++) * pginfo->hwpage_size); + kpage[rnum] = __pa(pg); + } + + h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, mr, + ehca_encode_hwpage_size(pginfo->hwpage_size), + 0, rpage, rnum); + + if ((h_ret != H_SUCCESS) && (h_ret != H_PAGE_REGISTERED)) { + ehca_err(&shca->ib_device, "register_rpage_mr failed"); + return h_ret; + } + } + return h_ret; +} + +static u64 ehca_reg_mr_sections(int top, int dir, u64 *kpage, + struct ehca_shca *shca, struct ehca_mr *mr, + struct ehca_mr_pginfo *pginfo) +{ + u64 hret = H_SUCCESS; + int idx; + + for (idx = 0; idx < EHCA_MAP_ENTRIES; idx++) { + if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]->ent[idx])) + continue; + + hret = ehca_reg_mr_section(top, dir, idx, kpage, shca, mr, + pginfo); + if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED)) + return hret; + } + return hret; +} + +static u64 ehca_reg_mr_dir_sections(int top, u64 *kpage, struct ehca_shca *shca, + struct ehca_mr *mr, + struct ehca_mr_pginfo *pginfo) +{ + u64 hret = H_SUCCESS; + int dir; + + for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) { + if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir])) + continue; + + hret = ehca_reg_mr_sections(top, dir, kpage, shca, mr, pginfo); + if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED)) + return hret; + } + return hret; +} /* register internal max-MR to internal SHCA */ int ehca_reg_internal_maxmr( @@ -1495,6 +1621,11 @@ int ehca_reg_internal_maxmr( u32 num_hwpages; u64 hw_pgsize; + if (!ehca_bmap) { + ret = -EFAULT; + goto ehca_reg_internal_maxmr_exit0; + } + e_mr = ehca_mr_new(); if (!e_mr) { ehca_err(&shca->ib_device, "out of memory"); @@ -1504,8 +1635,8 @@ int ehca_reg_internal_maxmr( e_mr->flags |= EHCA_MR_FLAG_MAXMR; /* register internal max-MR on HCA */ - size_maxmr = (u64)high_memory - PAGE_OFFSET; - iova_start = (u64 *)KERNELBASE; + size_maxmr = ehca_mr_len; + iova_start = (u64 *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)); ib_pbuf.addr = 0; ib_pbuf.size = size_maxmr; num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr, @@ -1524,10 +1655,10 @@ int ehca_reg_internal_maxmr( ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, - &e_mr->ib.ib_mr.rkey); + &e_mr->ib.ib_mr.rkey, EHCA_REG_BUSMAP_MR); if (ret) { ehca_err(&shca->ib_device, "reg of internal max MR failed, " - "e_mr=%p iova_start=%p size_maxmr=%lx num_kpages=%x " + "e_mr=%p iova_start=%p size_maxmr=%llx num_kpages=%x " "num_hwpages=%x", e_mr, iova_start, size_maxmr, num_kpages, num_hwpages); goto ehca_reg_internal_maxmr_exit1; @@ -1573,8 +1704,8 @@ int ehca_reg_maxmr(struct ehca_shca *shca, (u64)iova_start, hipz_acl, e_pd->fw_pd, &hipzout); if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%li " - "e_origmr=%p hca_hndl=%lx mr_hndl=%lx lkey=%x", + ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli " + "e_origmr=%p hca_hndl=%llx mr_hndl=%llx lkey=%x", h_ret, e_origmr, shca->ipz_hca_handle.handle, e_origmr->ipz_mr_handle.handle, e_origmr->ib.ib_mr.lkey); @@ -1651,28 +1782,28 @@ int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array, /* check first buffer */ if (((u64)iova_start & ~PAGE_MASK) != (pbuf->addr & ~PAGE_MASK)) { ehca_gen_err("iova_start/addr mismatch, iova_start=%p " - "pbuf->addr=%lx pbuf->size=%lx", + "pbuf->addr=%llx pbuf->size=%llx", iova_start, pbuf->addr, pbuf->size); return -EINVAL; } if (((pbuf->addr + pbuf->size) % PAGE_SIZE) && (num_phys_buf > 1)) { - ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%lx " - "pbuf->size=%lx", pbuf->addr, pbuf->size); + ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%llx " + "pbuf->size=%llx", pbuf->addr, pbuf->size); return -EINVAL; } for (i = 0; i < num_phys_buf; i++) { if ((i > 0) && (pbuf->addr % PAGE_SIZE)) { - ehca_gen_err("bad address, i=%x pbuf->addr=%lx " - "pbuf->size=%lx", + ehca_gen_err("bad address, i=%x pbuf->addr=%llx " + "pbuf->size=%llx", i, pbuf->addr, pbuf->size); return -EINVAL; } if (((i > 0) && /* not 1st */ (i < (num_phys_buf - 1)) && /* not last */ (pbuf->size % PAGE_SIZE)) || (pbuf->size == 0)) { - ehca_gen_err("bad size, i=%x pbuf->size=%lx", + ehca_gen_err("bad size, i=%x pbuf->size=%llx", i, pbuf->size); return -EINVAL; } @@ -1705,7 +1836,7 @@ int ehca_fmr_check_page_list(struct ehca_mr *e_fmr, page = page_list; for (i = 0; i < list_len; i++) { if (*page % e_fmr->fmr_page_size) { - ehca_gen_err("bad page, i=%x *page=%lx page=%p fmr=%p " + ehca_gen_err("bad page, i=%x *page=%llx page=%p fmr=%p " "fmr_page_size=%x", i, *page, page, e_fmr, e_fmr->fmr_page_size); return -EINVAL; @@ -1724,62 +1855,39 @@ static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo, u64 *kpage) { int ret = 0; - struct ib_umem_chunk *prev_chunk; - struct ib_umem_chunk *chunk; u64 pgaddr; - u32 i = 0; u32 j = 0; int hwpages_per_kpage = PAGE_SIZE / pginfo->hwpage_size; - - /* loop over desired chunk entries */ - chunk = pginfo->u.usr.next_chunk; - prev_chunk = pginfo->u.usr.next_chunk; - list_for_each_entry_continue( - chunk, (&(pginfo->u.usr.region->chunk_list)), list) { - for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) { - pgaddr = page_to_pfn(sg_page(&chunk->page_list[i])) - << PAGE_SHIFT ; - *kpage = phys_to_abs(pgaddr + - (pginfo->next_hwpage * - pginfo->hwpage_size)); - if ( !(*kpage) ) { - ehca_gen_err("pgaddr=%lx " - "chunk->page_list[i]=%lx " - "i=%x next_hwpage=%lx", - pgaddr, (u64)sg_dma_address( - &chunk->page_list[i]), - i, pginfo->next_hwpage); - return -EFAULT; - } - (pginfo->hwpage_cnt)++; - (pginfo->next_hwpage)++; - kpage++; - if (pginfo->next_hwpage % hwpages_per_kpage == 0) { - (pginfo->kpage_cnt)++; - (pginfo->u.usr.next_nmap)++; - pginfo->next_hwpage = 0; - i++; - } - j++; - if (j >= number) break; + struct scatterlist **sg = &pginfo->u.usr.next_sg; + + while (*sg != NULL) { + pgaddr = page_to_pfn(sg_page(*sg)) + << PAGE_SHIFT; + *kpage = pgaddr + (pginfo->next_hwpage * + pginfo->hwpage_size); + if (!(*kpage)) { + ehca_gen_err("pgaddr=%llx " + "sg_dma_address=%llx " + "entry=%llx next_hwpage=%llx", + pgaddr, (u64)sg_dma_address(*sg), + pginfo->u.usr.next_nmap, + pginfo->next_hwpage); + return -EFAULT; } - if ((pginfo->u.usr.next_nmap >= chunk->nmap) && - (j >= number)) { - pginfo->u.usr.next_nmap = 0; - prev_chunk = chunk; - break; - } else if (pginfo->u.usr.next_nmap >= chunk->nmap) { - pginfo->u.usr.next_nmap = 0; - prev_chunk = chunk; - } else if (j >= number) + (pginfo->hwpage_cnt)++; + (pginfo->next_hwpage)++; + kpage++; + if (pginfo->next_hwpage % hwpages_per_kpage == 0) { + (pginfo->kpage_cnt)++; + (pginfo->u.usr.next_nmap)++; + pginfo->next_hwpage = 0; + *sg = sg_next(*sg); + } + j++; + if (j >= number) break; - else - prev_chunk = chunk; } - pginfo->u.usr.next_chunk = - list_prepare_entry(prev_chunk, - (&(pginfo->u.usr.region->chunk_list)), - list); + return ret; } @@ -1787,20 +1895,19 @@ static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo, * check given pages for contiguous layout * last page addr is returned in prev_pgaddr for further check */ -static int ehca_check_kpages_per_ate(struct scatterlist *page_list, - int start_idx, int end_idx, +static int ehca_check_kpages_per_ate(struct scatterlist **sg, + int num_pages, u64 *prev_pgaddr) { - int t; - for (t = start_idx; t <= end_idx; t++) { - u64 pgaddr = page_to_pfn(sg_page(&page_list[t])) << PAGE_SHIFT; + for (; *sg && num_pages > 0; *sg = sg_next(*sg), num_pages--) { + u64 pgaddr = page_to_pfn(sg_page(*sg)) << PAGE_SHIFT; if (ehca_debug_level >= 3) - ehca_gen_dbg("chunk_page=%lx value=%016lx", pgaddr, - *(u64 *)abs_to_virt(phys_to_abs(pgaddr))); + ehca_gen_dbg("chunk_page=%llx value=%016llx", pgaddr, + *(u64 *)__va(pgaddr)); if (pgaddr - PAGE_SIZE != *prev_pgaddr) { - ehca_gen_err("uncontiguous page found pgaddr=%lx " - "prev_pgaddr=%lx page_list_i=%x", - pgaddr, *prev_pgaddr, t); + ehca_gen_err("uncontiguous page found pgaddr=%llx " + "prev_pgaddr=%llx entries_left_in_hwpage=%x", + pgaddr, *prev_pgaddr, num_pages); return -EINVAL; } *prev_pgaddr = pgaddr; @@ -1814,113 +1921,80 @@ static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo, u64 *kpage) { int ret = 0; - struct ib_umem_chunk *prev_chunk; - struct ib_umem_chunk *chunk; u64 pgaddr, prev_pgaddr; - u32 i = 0; u32 j = 0; int kpages_per_hwpage = pginfo->hwpage_size / PAGE_SIZE; int nr_kpages = kpages_per_hwpage; + struct scatterlist **sg = &pginfo->u.usr.next_sg; + + while (*sg != NULL) { - /* loop over desired chunk entries */ - chunk = pginfo->u.usr.next_chunk; - prev_chunk = pginfo->u.usr.next_chunk; - list_for_each_entry_continue( - chunk, (&(pginfo->u.usr.region->chunk_list)), list) { - for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) { - if (nr_kpages == kpages_per_hwpage) { - pgaddr = ( page_to_pfn(sg_page(&chunk->page_list[i])) - << PAGE_SHIFT ); - *kpage = phys_to_abs(pgaddr); - if ( !(*kpage) ) { - ehca_gen_err("pgaddr=%lx i=%x", - pgaddr, i); + if (nr_kpages == kpages_per_hwpage) { + pgaddr = (page_to_pfn(sg_page(*sg)) + << PAGE_SHIFT); + *kpage = pgaddr; + if (!(*kpage)) { + ehca_gen_err("pgaddr=%llx entry=%llx", + pgaddr, pginfo->u.usr.next_nmap); + ret = -EFAULT; + return ret; + } + /* + * The first page in a hwpage must be aligned; + * the first MR page is exempt from this rule. + */ + if (pgaddr & (pginfo->hwpage_size - 1)) { + if (pginfo->hwpage_cnt) { + ehca_gen_err( + "invalid alignment " + "pgaddr=%llx entry=%llx " + "mr_pgsize=%llx", + pgaddr, pginfo->u.usr.next_nmap, + pginfo->hwpage_size); ret = -EFAULT; return ret; } - /* - * The first page in a hwpage must be aligned; - * the first MR page is exempt from this rule. - */ - if (pgaddr & (pginfo->hwpage_size - 1)) { - if (pginfo->hwpage_cnt) { - ehca_gen_err( - "invalid alignment " - "pgaddr=%lx i=%x " - "mr_pgsize=%lx", - pgaddr, i, - pginfo->hwpage_size); - ret = -EFAULT; - return ret; - } - /* first MR page */ - pginfo->kpage_cnt = - (pgaddr & - (pginfo->hwpage_size - 1)) >> - PAGE_SHIFT; - nr_kpages -= pginfo->kpage_cnt; - *kpage = phys_to_abs( - pgaddr & - ~(pginfo->hwpage_size - 1)); - } - if (ehca_debug_level >= 3) { - u64 val = *(u64 *)abs_to_virt( - phys_to_abs(pgaddr)); - ehca_gen_dbg("kpage=%lx chunk_page=%lx " - "value=%016lx", - *kpage, pgaddr, val); - } - prev_pgaddr = pgaddr; - i++; - pginfo->kpage_cnt++; - pginfo->u.usr.next_nmap++; - nr_kpages--; - if (!nr_kpages) - goto next_kpage; - continue; + /* first MR page */ + pginfo->kpage_cnt = + (pgaddr & + (pginfo->hwpage_size - 1)) >> + PAGE_SHIFT; + nr_kpages -= pginfo->kpage_cnt; + *kpage = pgaddr & + ~(pginfo->hwpage_size - 1); } - if (i + nr_kpages > chunk->nmap) { - ret = ehca_check_kpages_per_ate( - chunk->page_list, i, - chunk->nmap - 1, &prev_pgaddr); - if (ret) return ret; - pginfo->kpage_cnt += chunk->nmap - i; - pginfo->u.usr.next_nmap += chunk->nmap - i; - nr_kpages -= chunk->nmap - i; - break; + if (ehca_debug_level >= 3) { + u64 val = *(u64 *)__va(pgaddr); + ehca_gen_dbg("kpage=%llx page=%llx " + "value=%016llx", + *kpage, pgaddr, val); } + prev_pgaddr = pgaddr; + *sg = sg_next(*sg); + pginfo->kpage_cnt++; + pginfo->u.usr.next_nmap++; + nr_kpages--; + if (!nr_kpages) + goto next_kpage; + continue; + } + + ret = ehca_check_kpages_per_ate(sg, nr_kpages, + &prev_pgaddr); + if (ret) + return ret; + pginfo->kpage_cnt += nr_kpages; + pginfo->u.usr.next_nmap += nr_kpages; - ret = ehca_check_kpages_per_ate(chunk->page_list, i, - i + nr_kpages - 1, - &prev_pgaddr); - if (ret) return ret; - i += nr_kpages; - pginfo->kpage_cnt += nr_kpages; - pginfo->u.usr.next_nmap += nr_kpages; next_kpage: - nr_kpages = kpages_per_hwpage; - (pginfo->hwpage_cnt)++; - kpage++; - j++; - if (j >= number) break; - } - if ((pginfo->u.usr.next_nmap >= chunk->nmap) && - (j >= number)) { - pginfo->u.usr.next_nmap = 0; - prev_chunk = chunk; - break; - } else if (pginfo->u.usr.next_nmap >= chunk->nmap) { - pginfo->u.usr.next_nmap = 0; - prev_chunk = chunk; - } else if (j >= number) + nr_kpages = kpages_per_hwpage; + (pginfo->hwpage_cnt)++; + kpage++; + j++; + if (j >= number) break; - else - prev_chunk = chunk; } - pginfo->u.usr.next_chunk = - list_prepare_entry(prev_chunk, - (&(pginfo->u.usr.region->chunk_list)), - list); + return ret; } @@ -1944,21 +2018,20 @@ static int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo, if ((pginfo->kpage_cnt >= pginfo->num_kpages) || (pginfo->hwpage_cnt >= pginfo->num_hwpages)) { ehca_gen_err("kpage_cnt >= num_kpages, " - "kpage_cnt=%lx num_kpages=%lx " - "hwpage_cnt=%lx " - "num_hwpages=%lx i=%x", + "kpage_cnt=%llx num_kpages=%llx " + "hwpage_cnt=%llx " + "num_hwpages=%llx i=%x", pginfo->kpage_cnt, pginfo->num_kpages, pginfo->hwpage_cnt, pginfo->num_hwpages, i); return -EFAULT; } - *kpage = phys_to_abs( - (pbuf->addr & ~(pginfo->hwpage_size - 1)) + - (pginfo->next_hwpage * pginfo->hwpage_size)); + *kpage = (pbuf->addr & ~(pginfo->hwpage_size - 1)) + + (pginfo->next_hwpage * pginfo->hwpage_size); if ( !(*kpage) && pbuf->addr ) { - ehca_gen_err("pbuf->addr=%lx pbuf->size=%lx " - "next_hwpage=%lx", pbuf->addr, + ehca_gen_err("pbuf->addr=%llx pbuf->size=%llx " + "next_hwpage=%llx", pbuf->addr, pbuf->size, pginfo->next_hwpage); return -EFAULT; } @@ -1993,11 +2066,11 @@ static int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo, /* loop over desired page_list entries */ fmrlist = pginfo->u.fmr.page_list + pginfo->u.fmr.next_listelem; for (i = 0; i < number; i++) { - *kpage = phys_to_abs((*fmrlist & ~(pginfo->hwpage_size - 1)) + - pginfo->next_hwpage * pginfo->hwpage_size); + *kpage = (*fmrlist & ~(pginfo->hwpage_size - 1)) + + pginfo->next_hwpage * pginfo->hwpage_size; if ( !(*kpage) ) { - ehca_gen_err("*fmrlist=%lx fmrlist=%p " - "next_listelem=%lx next_hwpage=%lx", + ehca_gen_err("*fmrlist=%llx fmrlist=%p " + "next_listelem=%llx next_hwpage=%llx", *fmrlist, fmrlist, pginfo->u.fmr.next_listelem, pginfo->next_hwpage); @@ -2021,11 +2094,10 @@ static int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo, u64 prev = *kpage; /* check if adrs are contiguous */ for (j = 1; j < cnt_per_hwpage; j++) { - u64 p = phys_to_abs(fmrlist[j] & - ~(pginfo->hwpage_size - 1)); + u64 p = fmrlist[j] & ~(pginfo->hwpage_size - 1); if (prev + pginfo->u.fmr.fmr_pgsize != p) { ehca_gen_err("uncontiguous fmr pages " - "found prev=%lx p=%lx " + "found prev=%llx p=%llx " "idx=%x", prev, p, i + j); return -EINVAL; } @@ -2077,8 +2149,8 @@ int ehca_mr_is_maxmr(u64 size, u64 *iova_start) { /* a MR is treated as max-MR only if it fits following: */ - if ((size == ((u64)high_memory - PAGE_OFFSET)) && - (iova_start == (void *)KERNELBASE)) { + if ((size == ehca_mr_len) && + (iova_start == (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)))) { ehca_gen_dbg("this is a max-MR"); return 1; } else @@ -2184,3 +2256,338 @@ void ehca_cleanup_mrmw_cache(void) if (mw_cache) kmem_cache_destroy(mw_cache); } + +static inline int ehca_init_top_bmap(struct ehca_top_bmap *ehca_top_bmap, + int dir) +{ + if (!ehca_bmap_valid(ehca_top_bmap->dir[dir])) { + ehca_top_bmap->dir[dir] = + kmalloc(sizeof(struct ehca_dir_bmap), GFP_KERNEL); + if (!ehca_top_bmap->dir[dir]) + return -ENOMEM; + /* Set map block to 0xFF according to EHCA_INVAL_ADDR */ + memset(ehca_top_bmap->dir[dir], 0xFF, EHCA_ENT_MAP_SIZE); + } + return 0; +} + +static inline int ehca_init_bmap(struct ehca_bmap *ehca_bmap, int top, int dir) +{ + if (!ehca_bmap_valid(ehca_bmap->top[top])) { + ehca_bmap->top[top] = + kmalloc(sizeof(struct ehca_top_bmap), GFP_KERNEL); + if (!ehca_bmap->top[top]) + return -ENOMEM; + /* Set map block to 0xFF according to EHCA_INVAL_ADDR */ + memset(ehca_bmap->top[top], 0xFF, EHCA_DIR_MAP_SIZE); + } + return ehca_init_top_bmap(ehca_bmap->top[top], dir); +} + +static inline int ehca_calc_index(unsigned long i, unsigned long s) +{ + return (i >> s) & EHCA_INDEX_MASK; +} + +void ehca_destroy_busmap(void) +{ + int top, dir; + + if (!ehca_bmap) + return; + + for (top = 0; top < EHCA_MAP_ENTRIES; top++) { + if (!ehca_bmap_valid(ehca_bmap->top[top])) + continue; + for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) { + if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir])) + continue; + + kfree(ehca_bmap->top[top]->dir[dir]); + } + + kfree(ehca_bmap->top[top]); + } + + kfree(ehca_bmap); + ehca_bmap = NULL; +} + +static int ehca_update_busmap(unsigned long pfn, unsigned long nr_pages) +{ + unsigned long i, start_section, end_section; + int top, dir, idx; + + if (!nr_pages) + return 0; + + if (!ehca_bmap) { + ehca_bmap = kmalloc(sizeof(struct ehca_bmap), GFP_KERNEL); + if (!ehca_bmap) + return -ENOMEM; + /* Set map block to 0xFF according to EHCA_INVAL_ADDR */ + memset(ehca_bmap, 0xFF, EHCA_TOP_MAP_SIZE); + } + + start_section = (pfn * PAGE_SIZE) / EHCA_SECTSIZE; + end_section = ((pfn + nr_pages) * PAGE_SIZE) / EHCA_SECTSIZE; + for (i = start_section; i < end_section; i++) { + int ret; + top = ehca_calc_index(i, EHCA_TOP_INDEX_SHIFT); + dir = ehca_calc_index(i, EHCA_DIR_INDEX_SHIFT); + idx = i & EHCA_INDEX_MASK; + + ret = ehca_init_bmap(ehca_bmap, top, dir); + if (ret) { + ehca_destroy_busmap(); + return ret; + } + ehca_bmap->top[top]->dir[dir]->ent[idx] = ehca_mr_len; + ehca_mr_len += EHCA_SECTSIZE; + } + return 0; +} + +static int ehca_is_hugepage(unsigned long pfn) +{ + int page_order; + + if (pfn & EHCA_HUGEPAGE_PFN_MASK) + return 0; + + page_order = compound_order(pfn_to_page(pfn)); + if (page_order + PAGE_SHIFT != EHCA_HUGEPAGESHIFT) + return 0; + + return 1; +} + +static int ehca_create_busmap_callback(unsigned long initial_pfn, + unsigned long total_nr_pages, void *arg) +{ + int ret; + unsigned long pfn, start_pfn, end_pfn, nr_pages; + + if ((total_nr_pages * PAGE_SIZE) < EHCA_HUGEPAGE_SIZE) + return ehca_update_busmap(initial_pfn, total_nr_pages); + + /* Given chunk is >= 16GB -> check for hugepages */ + start_pfn = initial_pfn; + end_pfn = initial_pfn + total_nr_pages; + pfn = start_pfn; + + while (pfn < end_pfn) { + if (ehca_is_hugepage(pfn)) { + /* Add mem found in front of the hugepage */ + nr_pages = pfn - start_pfn; + ret = ehca_update_busmap(start_pfn, nr_pages); + if (ret) + return ret; + /* Skip the hugepage */ + pfn += (EHCA_HUGEPAGE_SIZE / PAGE_SIZE); + start_pfn = pfn; + } else + pfn += (EHCA_SECTSIZE / PAGE_SIZE); + } + + /* Add mem found behind the hugepage(s) */ + nr_pages = pfn - start_pfn; + return ehca_update_busmap(start_pfn, nr_pages); +} + +int ehca_create_busmap(void) +{ + int ret; + + ehca_mr_len = 0; + ret = walk_system_ram_range(0, 1ULL << MAX_PHYSMEM_BITS, NULL, + ehca_create_busmap_callback); + return ret; +} + +static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca, + struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo) +{ + int top; + u64 hret, *kpage; + + kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!kpage) { + ehca_err(&shca->ib_device, "kpage alloc failed"); + return -ENOMEM; + } + for (top = 0; top < EHCA_MAP_ENTRIES; top++) { + if (!ehca_bmap_valid(ehca_bmap->top[top])) + continue; + hret = ehca_reg_mr_dir_sections(top, kpage, shca, e_mr, pginfo); + if ((hret != H_PAGE_REGISTERED) && (hret != H_SUCCESS)) + break; + } + + ehca_free_fw_ctrlblock(kpage); + + if (hret == H_SUCCESS) + return 0; /* Everything is fine */ + else { + ehca_err(&shca->ib_device, "ehca_reg_bmap_mr_rpages failed, " + "h_ret=%lli e_mr=%p top=%x lkey=%x " + "hca_hndl=%llx mr_hndl=%llx", hret, e_mr, top, + e_mr->ib.ib_mr.lkey, + shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle); + return ehca2ib_return_code(hret); + } +} + +static u64 ehca_map_vaddr(void *caddr) +{ + int top, dir, idx; + unsigned long abs_addr, offset; + u64 entry; + + if (!ehca_bmap) + return EHCA_INVAL_ADDR; + + abs_addr = __pa(caddr); + top = ehca_calc_index(abs_addr, EHCA_TOP_INDEX_SHIFT + EHCA_SECTSHIFT); + if (!ehca_bmap_valid(ehca_bmap->top[top])) + return EHCA_INVAL_ADDR; + + dir = ehca_calc_index(abs_addr, EHCA_DIR_INDEX_SHIFT + EHCA_SECTSHIFT); + if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir])) + return EHCA_INVAL_ADDR; + + idx = ehca_calc_index(abs_addr, EHCA_SECTSHIFT); + + entry = ehca_bmap->top[top]->dir[dir]->ent[idx]; + if (ehca_bmap_valid(entry)) { + offset = (unsigned long)caddr & (EHCA_SECTSIZE - 1); + return entry | offset; + } else + return EHCA_INVAL_ADDR; +} + +static int ehca_dma_mapping_error(struct ib_device *dev, u64 dma_addr) +{ + return dma_addr == EHCA_INVAL_ADDR; +} + +static u64 ehca_dma_map_single(struct ib_device *dev, void *cpu_addr, + size_t size, enum dma_data_direction direction) +{ + if (cpu_addr) + return ehca_map_vaddr(cpu_addr); + else + return EHCA_INVAL_ADDR; +} + +static void ehca_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size, + enum dma_data_direction direction) +{ + /* This is only a stub; nothing to be done here */ +} + +static u64 ehca_dma_map_page(struct ib_device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + u64 addr; + + if (offset + size > PAGE_SIZE) + return EHCA_INVAL_ADDR; + + addr = ehca_map_vaddr(page_address(page)); + if (!ehca_dma_mapping_error(dev, addr)) + addr += offset; + + return addr; +} + +static void ehca_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size, + enum dma_data_direction direction) +{ + /* This is only a stub; nothing to be done here */ +} + +static int ehca_dma_map_sg(struct ib_device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction direction) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nents, i) { + u64 addr; + addr = ehca_map_vaddr(sg_virt(sg)); + if (ehca_dma_mapping_error(dev, addr)) + return 0; + + sg->dma_address = addr; + sg->dma_length = sg->length; + } + return nents; +} + +static void ehca_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction direction) +{ + /* This is only a stub; nothing to be done here */ +} + +static void ehca_dma_sync_single_for_cpu(struct ib_device *dev, u64 addr, + size_t size, + enum dma_data_direction dir) +{ + dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); +} + +static void ehca_dma_sync_single_for_device(struct ib_device *dev, u64 addr, + size_t size, + enum dma_data_direction dir) +{ + dma_sync_single_for_device(dev->dma_device, addr, size, dir); +} + +static void *ehca_dma_alloc_coherent(struct ib_device *dev, size_t size, + u64 *dma_handle, gfp_t flag) +{ + struct page *p; + void *addr = NULL; + u64 dma_addr; + + p = alloc_pages(flag, get_order(size)); + if (p) { + addr = page_address(p); + dma_addr = ehca_map_vaddr(addr); + if (ehca_dma_mapping_error(dev, dma_addr)) { + free_pages((unsigned long)addr, get_order(size)); + return NULL; + } + if (dma_handle) + *dma_handle = dma_addr; + return addr; + } + return NULL; +} + +static void ehca_dma_free_coherent(struct ib_device *dev, size_t size, + void *cpu_addr, u64 dma_handle) +{ + if (cpu_addr && size) + free_pages((unsigned long)cpu_addr, get_order(size)); +} + + +struct ib_dma_mapping_ops ehca_dma_mapping_ops = { + .mapping_error = ehca_dma_mapping_error, + .map_single = ehca_dma_map_single, + .unmap_single = ehca_dma_unmap_single, + .map_page = ehca_dma_map_page, + .unmap_page = ehca_dma_unmap_page, + .map_sg = ehca_dma_map_sg, + .unmap_sg = ehca_dma_unmap_sg, + .sync_single_for_cpu = ehca_dma_sync_single_for_cpu, + .sync_single_for_device = ehca_dma_sync_single_for_device, + .alloc_coherent = ehca_dma_alloc_coherent, + .free_coherent = ehca_dma_free_coherent, +}; diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.h b/drivers/infiniband/hw/ehca/ehca_mrmw.h index bc8f4e31c12..50d8b51306d 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.h +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.h @@ -42,6 +42,11 @@ #ifndef _EHCA_MRMW_H_ #define _EHCA_MRMW_H_ +enum ehca_reg_type { + EHCA_REG_MR, + EHCA_REG_BUSMAP_MR +}; + int ehca_reg_mr(struct ehca_shca *shca, struct ehca_mr *e_mr, u64 *iova_start, @@ -50,7 +55,8 @@ int ehca_reg_mr(struct ehca_shca *shca, struct ehca_pd *e_pd, struct ehca_mr_pginfo *pginfo, u32 *lkey, - u32 *rkey); + u32 *rkey, + enum ehca_reg_type reg_type); int ehca_reg_mr_rpages(struct ehca_shca *shca, struct ehca_mr *e_mr, @@ -118,4 +124,9 @@ void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl, void ehca_mr_deletenew(struct ehca_mr *mr); +int ehca_create_busmap(void); + +void ehca_destroy_busmap(void); + +extern struct ib_dma_mapping_ops ehca_dma_mapping_ops; #endif /*_EHCA_MRMW_H_*/ diff --git a/drivers/infiniband/hw/ehca/ehca_pd.c b/drivers/infiniband/hw/ehca/ehca_pd.c index 2fe554855fa..351577a6670 100644 --- a/drivers/infiniband/hw/ehca/ehca_pd.c +++ b/drivers/infiniband/hw/ehca/ehca_pd.c @@ -38,6 +38,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <linux/slab.h> + #include "ehca_tools.h" #include "ehca_iverbs.h" diff --git a/drivers/infiniband/hw/ehca/ehca_qes.h b/drivers/infiniband/hw/ehca/ehca_qes.h index 818803057eb..90c4efa6758 100644 --- a/drivers/infiniband/hw/ehca/ehca_qes.h +++ b/drivers/infiniband/hw/ehca/ehca_qes.h @@ -46,7 +46,7 @@ #include "ehca_tools.h" -/* virtual scatter gather entry to specify remote adresses with length */ +/* virtual scatter gather entry to specify remote addresses with length */ struct ehca_vsgentry { u64 vaddr; u32 lkey; @@ -148,7 +148,7 @@ struct ehca_wqe { u32 immediate_data; union { struct { - u64 remote_virtual_adress; + u64 remote_virtual_address; u32 rkey; u32 reserved; u64 atomic_1st_op_dma_len; @@ -213,6 +213,7 @@ struct ehca_wqe { #define WC_STATUS_ERROR_BIT 0x80000000 #define WC_STATUS_REMOTE_ERROR_FLAGS 0x0000F800 #define WC_STATUS_PURGE_BIT 0x10 +#define WC_SEND_RECEIVE_BIT 0x80 struct ehca_cqe { u64 work_request_id; diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 3f59587338e..2e89356c46f 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -43,6 +43,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <linux/slab.h> + #include "ehca_classes.h" #include "ehca_tools.h" #include "ehca_qes.h" @@ -55,9 +57,7 @@ static struct kmem_cache *qp_cache; /* * attributes not supported by query qp */ -#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_MAX_DEST_RD_ATOMIC | \ - IB_QP_MAX_QP_RD_ATOMIC | \ - IB_QP_ACCESS_FLAGS | \ +#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_ACCESS_FLAGS | \ IB_QP_EN_SQD_ASYNC_NOTIFY) /* @@ -251,7 +251,7 @@ static inline int ibqptype2servicetype(enum ib_qp_type ibqptype) return ST_UD; case IB_QPT_RAW_IPV6: return -EINVAL; - case IB_QPT_RAW_ETY: + case IB_QPT_RAW_ETHERTYPE: return -EINVAL; default: ehca_gen_err("Invalid ibqptype=%x", ibqptype); @@ -321,7 +321,7 @@ static inline int init_qp_queue(struct ehca_shca *shca, ret = -EINVAL; goto init_qp_queue1; } - rpage = virt_to_abs(vpage); + rpage = __pa(vpage); h_ret = hipz_h_register_rpage_qp(ipz_hca_handle, my_qp->ipz_qp_handle, @@ -331,7 +331,7 @@ static inline int init_qp_queue(struct ehca_shca *shca, if (cnt == (nr_q_pages - 1)) { /* last page! */ if (h_ret != expected_hret) { ehca_err(ib_dev, "hipz_qp_register_rpage() " - "h_ret=%li", h_ret); + "h_ret=%lli", h_ret); ret = ehca2ib_return_code(h_ret); goto init_qp_queue1; } @@ -345,7 +345,7 @@ static inline int init_qp_queue(struct ehca_shca *shca, } else { if (h_ret != H_PAGE_REGISTERED) { ehca_err(ib_dev, "hipz_qp_register_rpage() " - "h_ret=%li", h_ret); + "h_ret=%lli", h_ret); ret = ehca2ib_return_code(h_ret); goto init_qp_queue1; } @@ -396,6 +396,54 @@ static void ehca_determine_small_queue(struct ehca_alloc_queue_parms *queue, queue->is_small = (queue->page_size != 0); } +/* needs to be called with cq->spinlock held */ +void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq) +{ + struct list_head *list, *node; + + /* TODO: support low latency QPs */ + if (qp->ext_type == EQPT_LLQP) + return; + + if (on_sq) { + list = &qp->send_cq->sqp_err_list; + node = &qp->sq_err_node; + } else { + list = &qp->recv_cq->rqp_err_list; + node = &qp->rq_err_node; + } + + if (list_empty(node)) + list_add_tail(node, list); + + return; +} + +static void del_from_err_list(struct ehca_cq *cq, struct list_head *node) +{ + unsigned long flags; + + spin_lock_irqsave(&cq->spinlock, flags); + + if (!list_empty(node)) + list_del_init(node); + + spin_unlock_irqrestore(&cq->spinlock, flags); +} + +static void reset_queue_map(struct ehca_queue_map *qmap) +{ + int i; + + qmap->tail = qmap->entries - 1; + qmap->left_to_poll = 0; + qmap->next_wqe_idx = 0; + for (i = 0; i < qmap->entries; i++) { + qmap->map[i].reported = 1; + qmap->map[i].cqe_req = 0; + } +} + /* * Create an ib_qp struct that is either a QP or an SRQ, depending on * the value of the is_srq parameter. If init_attr and srq_init_attr share @@ -407,13 +455,13 @@ static struct ehca_qp *internal_create_qp( struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata, int is_srq) { - struct ehca_qp *my_qp; + struct ehca_qp *my_qp, *my_srq = NULL; struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, ib_device); struct ib_ucontext *context = NULL; u64 h_ret; - int is_llqp = 0, has_srq = 0; + int is_llqp = 0, has_srq = 0, is_user = 0; int qp_type, max_send_sge, max_recv_sge, ret; /* h_call's out parameters */ @@ -421,9 +469,9 @@ static struct ehca_qp *internal_create_qp( u32 swqe_size = 0, rwqe_size = 0, ib_qp_num; unsigned long flags; - if (!atomic_add_unless(&shca->num_qps, 1, ehca_max_qp)) { + if (!atomic_add_unless(&shca->num_qps, 1, shca->max_num_qps)) { ehca_err(pd->device, "Unable to create QP, max number of %i " - "QPs reached.", ehca_max_qp); + "QPs reached.", shca->max_num_qps); ehca_err(pd->device, "To increase the maximum number of QPs " "use the number_of_qps module parameter.\n"); return ERR_PTR(-ENOSPC); @@ -456,8 +504,13 @@ static struct ehca_qp *internal_create_qp( /* handle SRQ base QPs */ if (init_attr->srq) { - struct ehca_qp *my_srq = - container_of(init_attr->srq, struct ehca_qp, ib_srq); + my_srq = container_of(init_attr->srq, struct ehca_qp, ib_srq); + + if (qp_type == IB_QPT_UC) { + ehca_err(pd->device, "UC with SRQ not supported"); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } has_srq = 1; parms.ext_type = EQPT_SRQBASE; @@ -556,9 +609,6 @@ static struct ehca_qp *internal_create_qp( } } - if (pd->uobject && udata) - context = pd->uobject->context; - my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL); if (!my_qp) { ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd); @@ -566,6 +616,11 @@ static struct ehca_qp *internal_create_qp( return ERR_PTR(-ENOMEM); } + if (pd->uobject && udata) { + is_user = 1; + context = pd->uobject->context; + } + atomic_set(&my_qp->nr_events, 0); init_waitqueue_head(&my_qp->wait_completion); spin_lock_init(&my_qp->spinlock_s); @@ -581,30 +636,26 @@ static struct ehca_qp *internal_create_qp( my_qp->send_cq = container_of(init_attr->send_cq, struct ehca_cq, ib_cq); - do { - if (!idr_pre_get(&ehca_qp_idr, GFP_KERNEL)) { - ret = -ENOMEM; - ehca_err(pd->device, "Can't reserve idr resources."); - goto create_qp_exit0; - } + idr_preload(GFP_KERNEL); + write_lock_irqsave(&ehca_qp_idr_lock, flags); - write_lock_irqsave(&ehca_qp_idr_lock, flags); - ret = idr_get_new(&ehca_qp_idr, my_qp, &my_qp->token); - write_unlock_irqrestore(&ehca_qp_idr_lock, flags); - } while (ret == -EAGAIN); + ret = idr_alloc(&ehca_qp_idr, my_qp, 0, 0x2000000, GFP_NOWAIT); + if (ret >= 0) + my_qp->token = ret; - if (ret) { - ret = -ENOMEM; - ehca_err(pd->device, "Can't allocate new idr entry."); + write_unlock_irqrestore(&ehca_qp_idr_lock, flags); + idr_preload_end(); + if (ret < 0) { + if (ret == -ENOSPC) { + ret = -EINVAL; + ehca_err(pd->device, "Invalid number of qp"); + } else { + ret = -ENOMEM; + ehca_err(pd->device, "Can't allocate new idr entry."); + } goto create_qp_exit0; } - if (my_qp->token > 0x1FFFFFF) { - ret = -EINVAL; - ehca_err(pd->device, "Invalid number of qp"); - goto create_qp_exit1; - } - if (has_srq) parms.srq_token = my_qp->token; @@ -654,9 +705,9 @@ static struct ehca_qp *internal_create_qp( (parms.squeue.is_small || parms.rqueue.is_small); } - h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms); + h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms, is_user); if (h_ret != H_SUCCESS) { - ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%li", + ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lli", h_ret); ret = ehca2ib_return_code(h_ret); goto create_qp_exit1; @@ -715,6 +766,21 @@ static struct ehca_qp *internal_create_qp( "and pages ret=%i", ret); goto create_qp_exit2; } + + if (!is_user) { + my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length / + my_qp->ipz_squeue.qe_size; + my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries * + sizeof(struct ehca_qmap_entry)); + if (!my_qp->sq_map.map) { + ehca_err(pd->device, "Couldn't allocate squeue " + "map ret=%i", ret); + goto create_qp_exit3; + } + INIT_LIST_HEAD(&my_qp->sq_err_node); + /* to avoid the generation of bogus flush CQEs */ + reset_queue_map(&my_qp->sq_map); + } } if (HAS_RQ(my_qp)) { @@ -724,8 +790,28 @@ static struct ehca_qp *internal_create_qp( if (ret) { ehca_err(pd->device, "Couldn't initialize rqueue " "and pages ret=%i", ret); - goto create_qp_exit3; + goto create_qp_exit4; + } + if (!is_user) { + my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length / + my_qp->ipz_rqueue.qe_size; + my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries * + sizeof(struct ehca_qmap_entry)); + if (!my_qp->rq_map.map) { + ehca_err(pd->device, "Couldn't allocate squeue " + "map ret=%i", ret); + goto create_qp_exit5; + } + INIT_LIST_HEAD(&my_qp->rq_err_node); + /* to avoid the generation of bogus flush CQEs */ + reset_queue_map(&my_qp->rq_map); } + } else if (init_attr->srq && !is_user) { + /* this is a base QP, use the queue map of the SRQ */ + my_qp->rq_map = my_srq->rq_map; + INIT_LIST_HEAD(&my_qp->rq_err_node); + + my_qp->ipz_rqueue = my_srq->ipz_rqueue; } if (is_srq) { @@ -770,7 +856,7 @@ static struct ehca_qp *internal_create_qp( if (!my_qp->mod_qp_parm) { ehca_err(pd->device, "Could not alloc mod_qp_parm"); - goto create_qp_exit4; + goto create_qp_exit5; } } } @@ -779,8 +865,13 @@ static struct ehca_qp *internal_create_qp( if (qp_type == IB_QPT_GSI) { h_ret = ehca_define_sqp(shca, my_qp, init_attr); if (h_ret != H_SUCCESS) { + kfree(my_qp->mod_qp_parm); + my_qp->mod_qp_parm = NULL; + /* the QP pointer is no longer valid */ + shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] = + NULL; ret = ehca2ib_return_code(h_ret); - goto create_qp_exit5; + goto create_qp_exit6; } } @@ -789,7 +880,7 @@ static struct ehca_qp *internal_create_qp( if (ret) { ehca_err(pd->device, "Couldn't assign qp to send_cq ret=%i", ret); - goto create_qp_exit5; + goto create_qp_exit7; } } @@ -815,22 +906,30 @@ static struct ehca_qp *internal_create_qp( if (ib_copy_to_udata(udata, &resp, sizeof resp)) { ehca_err(pd->device, "Copy to udata failed"); ret = -EINVAL; - goto create_qp_exit6; + goto create_qp_exit8; } } return my_qp; -create_qp_exit6: +create_qp_exit8: ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num); -create_qp_exit5: +create_qp_exit7: kfree(my_qp->mod_qp_parm); -create_qp_exit4: +create_qp_exit6: + if (HAS_RQ(my_qp) && !is_user) + vfree(my_qp->rq_map.map); + +create_qp_exit5: if (HAS_RQ(my_qp)) ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); +create_qp_exit4: + if (HAS_SQ(my_qp) && !is_user) + vfree(my_qp->sq_map.map); + create_qp_exit3: if (HAS_SQ(my_qp)) ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); @@ -874,6 +973,9 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd, struct hcp_modify_qp_control_block *mqpcb; u64 hret, update_mask; + if (srq_init_attr->srq_type != IB_SRQT_BASIC) + return ERR_PTR(-ENOSYS); + /* For common attributes, internal_create_qp() takes its info * out of qp_init_attr, so copy all common attrs there. */ @@ -912,7 +1014,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd, mqpcb, my_qp->galpas.kernel); if (hret != H_SUCCESS) { ehca_err(pd->device, "Could not modify SRQ to INIT " - "ehca_qp=%p qp_num=%x h_ret=%li", + "ehca_qp=%p qp_num=%x h_ret=%lli", my_qp, my_qp->real_qp_num, hret); goto create_srq2; } @@ -926,7 +1028,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd, mqpcb, my_qp->galpas.kernel); if (hret != H_SUCCESS) { ehca_err(pd->device, "Could not enable SRQ " - "ehca_qp=%p qp_num=%x h_ret=%li", + "ehca_qp=%p qp_num=%x h_ret=%lli", my_qp, my_qp->real_qp_num, hret); goto create_srq2; } @@ -940,7 +1042,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd, mqpcb, my_qp->galpas.kernel); if (hret != H_SUCCESS) { ehca_err(pd->device, "Could not modify SRQ to RTR " - "ehca_qp=%p qp_num=%x h_ret=%li", + "ehca_qp=%p qp_num=%x h_ret=%lli", my_qp, my_qp->real_qp_num, hret); goto create_srq2; } @@ -980,7 +1082,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, &bad_send_wqe_p, NULL, 2); if (h_ret != H_SUCCESS) { ehca_err(&shca->ib_device, "hipz_h_disable_and_get_wqe() failed" - " ehca_qp=%p qp_num=%x h_ret=%li", + " ehca_qp=%p qp_num=%x h_ret=%lli", my_qp, qp_num, h_ret); return ehca2ib_return_code(h_ret); } @@ -988,7 +1090,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p", qp_num, bad_send_wqe_p); /* convert wqe pointer to vadr */ - bad_send_wqe_v = abs_to_virt((u64)bad_send_wqe_p); + bad_send_wqe_v = __va((u64)bad_send_wqe_p); if (ehca_debug_level >= 2) ehca_dmp(bad_send_wqe_v, 32, "qp_num=%x bad_wqe", qp_num); squeue = &my_qp->ipz_squeue; @@ -1021,6 +1123,111 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, return 0; } +static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue, + struct ehca_queue_map *qmap) +{ + void *wqe_v; + u64 q_ofs; + u32 wqe_idx; + unsigned int tail_idx; + + /* convert real to abs address */ + wqe_p = wqe_p & (~(1UL << 63)); + + wqe_v = __va(wqe_p); + + if (ipz_queue_abs_to_offset(ipz_queue, wqe_p, &q_ofs)) { + ehca_gen_err("Invalid offset for calculating left cqes " + "wqe_p=%#llx wqe_v=%p\n", wqe_p, wqe_v); + return -EFAULT; + } + + tail_idx = next_index(qmap->tail, qmap->entries); + wqe_idx = q_ofs / ipz_queue->qe_size; + + /* check all processed wqes, whether a cqe is requested or not */ + while (tail_idx != wqe_idx) { + if (qmap->map[tail_idx].cqe_req) + qmap->left_to_poll++; + tail_idx = next_index(tail_idx, qmap->entries); + } + /* save index in queue, where we have to start flushing */ + qmap->next_wqe_idx = wqe_idx; + return 0; +} + +static int check_for_left_cqes(struct ehca_qp *my_qp, struct ehca_shca *shca) +{ + u64 h_ret; + void *send_wqe_p, *recv_wqe_p; + int ret; + unsigned long flags; + int qp_num = my_qp->ib_qp.qp_num; + + /* this hcall is not supported on base QPs */ + if (my_qp->ext_type != EQPT_SRQBASE) { + /* get send and receive wqe pointer */ + h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, &my_qp->pf, + &send_wqe_p, &recv_wqe_p, 4); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "disable_and_get_wqe() " + "failed ehca_qp=%p qp_num=%x h_ret=%lli", + my_qp, qp_num, h_ret); + return ehca2ib_return_code(h_ret); + } + + /* + * acquire lock to ensure that nobody is polling the cq which + * could mean that the qmap->tail pointer is in an + * inconsistent state. + */ + spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); + ret = calc_left_cqes((u64)send_wqe_p, &my_qp->ipz_squeue, + &my_qp->sq_map); + spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); + if (ret) + return ret; + + + spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); + ret = calc_left_cqes((u64)recv_wqe_p, &my_qp->ipz_rqueue, + &my_qp->rq_map); + spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); + if (ret) + return ret; + } else { + spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); + my_qp->sq_map.left_to_poll = 0; + my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail, + my_qp->sq_map.entries); + spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); + + spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); + my_qp->rq_map.left_to_poll = 0; + my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail, + my_qp->rq_map.entries); + spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); + } + + /* this assures flush cqes being generated only for pending wqes */ + if ((my_qp->sq_map.left_to_poll == 0) && + (my_qp->rq_map.left_to_poll == 0)) { + spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); + ehca_add_to_err_list(my_qp, 1); + spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); + + if (HAS_RQ(my_qp)) { + spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); + ehca_add_to_err_list(my_qp, 0); + spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, + flags); + } + } + + return 0; +} + /* * internal_modify_qp with circumvention to handle aqp0 properly * smi_reset2init indicates if this is an internal reset-to-init-call for @@ -1041,6 +1248,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, u64 update_mask; u64 h_ret; int bad_wqe_cnt = 0; + int is_user = 0; int squeue_locked = 0; unsigned long flags = 0; @@ -1058,11 +1266,13 @@ static int internal_modify_qp(struct ib_qp *ibqp, mqpcb, my_qp->galpas.kernel); if (h_ret != H_SUCCESS) { ehca_err(ibqp->device, "hipz_h_query_qp() failed " - "ehca_qp=%p qp_num=%x h_ret=%li", + "ehca_qp=%p qp_num=%x h_ret=%lli", my_qp, ibqp->qp_num, h_ret); ret = ehca2ib_return_code(h_ret); goto modify_qp_exit1; } + if (ibqp->uobject) + is_user = 1; qp_cur_state = ehca2ib_qp_state(mqpcb->qp_state); @@ -1119,7 +1329,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, qp_new_state = attr_mask & IB_QP_STATE ? attr->qp_state : qp_cur_state; if (!smi_reset2init && !ib_modify_qp_is_ok(qp_cur_state, qp_new_state, ibqp->qp_type, - attr_mask)) { + attr_mask, IB_LINK_LAYER_UNSPECIFIED)) { ret = -EINVAL; ehca_err(ibqp->device, "Invalid qp transition new_state=%x cur_state=%x " @@ -1460,6 +1670,8 @@ static int internal_modify_qp(struct ib_qp *ibqp, goto modify_qp_exit2; } mqpcb->path_migration_state = attr->path_mig_state + 1; + if (attr->path_mig_state == IB_MIG_REARM) + my_qp->mig_armed = 1; update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1); } @@ -1485,7 +1697,7 @@ static int internal_modify_qp(struct ib_qp *ibqp, if (h_ret != H_SUCCESS) { ret = ehca2ib_return_code(h_ret); - ehca_err(ibqp->device, "hipz_h_modify_qp() failed h_ret=%li " + ehca_err(ibqp->device, "hipz_h_modify_qp() failed h_ret=%lli " "ehca_qp=%p qp_num=%x", h_ret, my_qp, ibqp->qp_num); goto modify_qp_exit2; } @@ -1518,22 +1730,39 @@ static int internal_modify_qp(struct ib_qp *ibqp, ret = ehca2ib_return_code(h_ret); ehca_err(ibqp->device, "ENABLE in context of " "RESET_2_INIT failed! Maybe you didn't get " - "a LID h_ret=%li ehca_qp=%p qp_num=%x", + "a LID h_ret=%lli ehca_qp=%p qp_num=%x", h_ret, my_qp, ibqp->qp_num); goto modify_qp_exit2; } } + if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR) + && !is_user) { + ret = check_for_left_cqes(my_qp, shca); + if (ret) + goto modify_qp_exit2; + } if (statetrans == IB_QPST_ANY2RESET) { ipz_qeit_reset(&my_qp->ipz_rqueue); ipz_qeit_reset(&my_qp->ipz_squeue); + + if (qp_cur_state == IB_QPS_ERR && !is_user) { + del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); + + if (HAS_RQ(my_qp)) + del_from_err_list(my_qp->recv_cq, + &my_qp->rq_err_node); + } + if (!is_user) + reset_queue_map(&my_qp->sq_map); + + if (HAS_RQ(my_qp) && !is_user) + reset_queue_map(&my_qp->rq_map); } if (attr_mask & IB_QP_QKEY) my_qp->qkey = attr->qkey; - my_qp->state = qp_new_state; - modify_qp_exit2: if (squeue_locked) { /* this means: sqe -> rts */ spin_unlock_irqrestore(&my_qp->spinlock_s, flags); @@ -1549,6 +1778,8 @@ modify_qp_exit1: int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { + int ret = 0; + struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca, ib_device); struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); @@ -1595,12 +1826,18 @@ int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, attr->qp_state, my_qp->init_attr.port_num, ibqp->qp_type); spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); - return 0; + goto out; } spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); } - return internal_modify_qp(ibqp, attr, attr_mask, 0); + ret = internal_modify_qp(ibqp, attr, attr_mask, 0); + +out: + if ((ret == 0) && (attr_mask & IB_QP_STATE)) + my_qp->state = attr->qp_state; + + return ret; } void ehca_recover_sqp(struct ib_qp *sqp) @@ -1681,7 +1918,7 @@ int ehca_query_qp(struct ib_qp *qp, if (h_ret != H_SUCCESS) { ret = ehca2ib_return_code(h_ret); ehca_err(qp->device, "hipz_h_query_qp() failed " - "ehca_qp=%p qp_num=%x h_ret=%li", + "ehca_qp=%p qp_num=%x h_ret=%lli", my_qp, qp->qp_num, h_ret); goto query_qp_exit1; } @@ -1724,19 +1961,13 @@ int ehca_query_qp(struct ib_qp *qp, qp_attr->cap.max_inline_data = my_qp->sq_max_inline_data_size; qp_attr->dest_qp_num = qpcb->dest_qp_nr; - qp_attr->pkey_index = - EHCA_BMASK_GET(MQPCB_PRIM_P_KEY_IDX, qpcb->prim_p_key_idx); - - qp_attr->port_num = - EHCA_BMASK_GET(MQPCB_PRIM_PHYS_PORT, qpcb->prim_phys_port); - + qp_attr->pkey_index = qpcb->prim_p_key_idx; + qp_attr->port_num = qpcb->prim_phys_port; qp_attr->timeout = qpcb->timeout; qp_attr->retry_cnt = qpcb->retry_count; qp_attr->rnr_retry = qpcb->rnr_retry_count; - qp_attr->alt_pkey_index = - EHCA_BMASK_GET(MQPCB_PRIM_P_KEY_IDX, qpcb->alt_p_key_idx); - + qp_attr->alt_pkey_index = qpcb->alt_p_key_idx; qp_attr->alt_port_num = qpcb->alt_phys_port; qp_attr->alt_timeout = qpcb->timeout_al; @@ -1823,8 +2054,7 @@ int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, update_mask |= EHCA_BMASK_SET(MQPCB_MASK_CURR_SRQ_LIMIT, 1) | EHCA_BMASK_SET(MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG, 1); - mqpcb->curr_srq_limit = - EHCA_BMASK_SET(MQPCB_CURR_SRQ_LIMIT, attr->srq_limit); + mqpcb->curr_srq_limit = attr->srq_limit; mqpcb->qp_aff_asyn_ev_log_reg = EHCA_BMASK_SET(QPX_AAELOG_RESET_SRQ_LIMIT, 1); } @@ -1846,7 +2076,7 @@ int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, if (h_ret != H_SUCCESS) { ret = ehca2ib_return_code(h_ret); - ehca_err(ibsrq->device, "hipz_h_modify_qp() failed h_ret=%li " + ehca_err(ibsrq->device, "hipz_h_modify_qp() failed h_ret=%lli " "ehca_qp=%p qp_num=%x", h_ret, my_qp, my_qp->real_qp_num); } @@ -1880,15 +2110,14 @@ int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr) if (h_ret != H_SUCCESS) { ret = ehca2ib_return_code(h_ret); ehca_err(srq->device, "hipz_h_query_qp() failed " - "ehca_qp=%p qp_num=%x h_ret=%li", + "ehca_qp=%p qp_num=%x h_ret=%lli", my_qp, my_qp->real_qp_num, h_ret); goto query_srq_exit1; } srq_attr->max_wr = qpcb->max_nr_outst_recv_wr - 1; srq_attr->max_sge = 3; - srq_attr->srq_limit = EHCA_BMASK_GET( - MQPCB_CURR_SRQ_LIMIT, qpcb->curr_srq_limit); + srq_attr->srq_limit = qpcb->curr_srq_limit; if (ehca_debug_level >= 2) ehca_dmp(qpcb, 4*70, "qp_num=%x", my_qp->real_qp_num); @@ -1910,10 +2139,12 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, int ret; u64 h_ret; u8 port_num; + int is_user = 0; enum ib_qp_type qp_type; unsigned long flags; if (uobject) { + is_user = 1; if (my_qp->mm_count_galpa || my_qp->mm_count_rqueue || my_qp->mm_count_squeue) { ehca_err(dev, "Resources still referenced in " @@ -1936,12 +2167,22 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, idr_remove(&ehca_qp_idr, my_qp->token); write_unlock_irqrestore(&ehca_qp_idr_lock, flags); + /* + * SRQs will never get into an error list and do not have a recv_cq, + * so we need to skip them here. + */ + if (HAS_RQ(my_qp) && !IS_SRQ(my_qp) && !is_user) + del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node); + + if (HAS_SQ(my_qp) && !is_user) + del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); + /* now wait until all pending events have completed */ wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events)); h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); if (h_ret != H_SUCCESS) { - ehca_err(dev, "hipz_h_destroy_qp() failed h_ret=%li " + ehca_err(dev, "hipz_h_destroy_qp() failed h_ret=%lli " "ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num); return ehca2ib_return_code(h_ret); } @@ -1961,7 +2202,7 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, if (qp_type == IB_QPT_GSI) { struct ib_event event; ehca_info(dev, "device %s: port %x is inactive.", - shca->ib_device.name, port_num); + shca->ib_device.name, port_num); event.device = &shca->ib_device; event.event = IB_EVENT_PORT_ERR; event.element.port_num = port_num; @@ -1969,10 +2210,16 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, ib_dispatch_event(&event); } - if (HAS_RQ(my_qp)) + if (HAS_RQ(my_qp)) { ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); - if (HAS_SQ(my_qp)) + if (!is_user) + vfree(my_qp->rq_map.map); + } + if (HAS_SQ(my_qp)) { ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); + if (!is_user) + vfree(my_qp->sq_map.map); + } kmem_cache_free(qp_cache, my_qp); atomic_dec(&shca->num_qps); return 0; diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c index dd9bc68f1c7..47f94984353 100644 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -42,7 +42,6 @@ */ -#include <asm-powerpc/system.h> #include "ehca_classes.h" #include "ehca_tools.h" #include "ehca_qes.h" @@ -53,9 +52,25 @@ /* in RC traffic, insert an empty RDMA READ every this many packets */ #define ACK_CIRC_THRESHOLD 2000000 +static u64 replace_wr_id(u64 wr_id, u16 idx) +{ + u64 ret; + + ret = wr_id & ~QMAP_IDX_MASK; + ret |= idx & QMAP_IDX_MASK; + + return ret; +} + +static u16 get_app_wr_id(u64 wr_id) +{ + return wr_id & QMAP_IDX_MASK; +} + static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, struct ehca_wqe *wqe_p, - struct ib_recv_wr *recv_wr) + struct ib_recv_wr *recv_wr, + u32 rq_map_idx) { u8 cnt_ds; if (unlikely((recv_wr->num_sge < 0) || @@ -69,7 +84,7 @@ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, /* clear wqe header until sglist */ memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); - wqe_p->work_request_id = recv_wr->wr_id; + wqe_p->work_request_id = replace_wr_id(recv_wr->wr_id, rq_map_idx); wqe_p->nr_of_data_seg = recv_wr->num_sge; for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) { @@ -120,7 +135,7 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr) mad_hdr->attr_mod); } for (j = 0; j < send_wr->num_sge; j++) { - u8 *data = (u8 *)abs_to_virt(sge->addr); + u8 *data = __va(sge->addr); ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x " "lkey=%x", idx, j, data, sge->length, sge->lkey); @@ -139,12 +154,14 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr) static inline int ehca_write_swqe(struct ehca_qp *qp, struct ehca_wqe *wqe_p, const struct ib_send_wr *send_wr, + u32 sq_map_idx, int hidden) { u32 idx; u64 dma_length; struct ehca_av *my_av; u32 remote_qkey = send_wr->wr.ud.remote_qkey; + struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx]; if (unlikely((send_wr->num_sge < 0) || (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) { @@ -157,7 +174,11 @@ static inline int ehca_write_swqe(struct ehca_qp *qp, /* clear wqe header until sglist */ memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); - wqe_p->work_request_id = send_wr->wr_id; + wqe_p->work_request_id = replace_wr_id(send_wr->wr_id, sq_map_idx); + + qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id); + qmap_entry->reported = 0; + qmap_entry->cqe_req = 0; switch (send_wr->opcode) { case IB_WR_SEND: @@ -182,8 +203,10 @@ static inline int ehca_write_swqe(struct ehca_qp *qp, if ((send_wr->send_flags & IB_SEND_SIGNALED || qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR) - && !hidden) + && !hidden) { wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM; + qmap_entry->cqe_req = 1; + } if (send_wr->opcode == IB_WR_SEND_WITH_IMM || send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { @@ -245,7 +268,7 @@ static inline int ehca_write_swqe(struct ehca_qp *qp, /* no break is intentional here */ case IB_QPT_RC: /* TODO: atomic not implemented */ - wqe_p->u.nud.remote_virtual_adress = + wqe_p->u.nud.remote_virtual_address = send_wr->wr.rdma.remote_addr; wqe_p->u.nud.rkey = send_wr->wr.rdma.rkey; @@ -376,33 +399,36 @@ static inline void map_ib_wc_status(u32 cqe_status, static inline int post_one_send(struct ehca_qp *my_qp, struct ib_send_wr *cur_send_wr, - struct ib_send_wr **bad_send_wr, int hidden) { struct ehca_wqe *wqe_p; int ret; + u32 sq_map_idx; u64 start_offset = my_qp->ipz_squeue.current_q_offset; /* get pointer next to free WQE */ wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue); if (unlikely(!wqe_p)) { /* too many posted work requests: queue overflow */ - if (bad_send_wr) - *bad_send_wr = cur_send_wr; ehca_err(my_qp->ib_qp.device, "Too many posted WQEs " "qp_num=%x", my_qp->ib_qp.qp_num); return -ENOMEM; } + + /* + * Get the index of the WQE in the send queue. The same index is used + * for writing into the sq_map. + */ + sq_map_idx = start_offset / my_qp->ipz_squeue.qe_size; + /* write a SEND WQE into the QUEUE */ - ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, hidden); + ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, sq_map_idx, hidden); /* * if something failed, * reset the free entry pointer to the start value */ if (unlikely(ret)) { my_qp->ipz_squeue.current_q_offset = start_offset; - if (bad_send_wr) - *bad_send_wr = cur_send_wr; ehca_err(my_qp->ib_qp.device, "Could not write WQE " "qp_num=%x", my_qp->ib_qp.qp_num); return -EINVAL; @@ -416,7 +442,6 @@ int ehca_post_send(struct ib_qp *qp, struct ib_send_wr **bad_send_wr) { struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); - struct ib_send_wr *cur_send_wr; int wqe_cnt = 0; int ret = 0; unsigned long flags; @@ -425,7 +450,8 @@ int ehca_post_send(struct ib_qp *qp, if (unlikely(my_qp->state < IB_QPS_RTS)) { ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", my_qp->state, qp->qp_num); - return -EINVAL; + ret = -EINVAL; + goto out; } /* LOCK the QUEUE */ @@ -444,24 +470,21 @@ int ehca_post_send(struct ib_qp *qp, struct ib_send_wr circ_wr; memset(&circ_wr, 0, sizeof(circ_wr)); circ_wr.opcode = IB_WR_RDMA_READ; - post_one_send(my_qp, &circ_wr, NULL, 1); /* ignore retcode */ + post_one_send(my_qp, &circ_wr, 1); /* ignore retcode */ wqe_cnt++; ehca_dbg(qp->device, "posted circ wr qp_num=%x", qp->qp_num); my_qp->message_count = my_qp->packet_count = 0; } /* loop processes list of send reqs */ - for (cur_send_wr = send_wr; cur_send_wr != NULL; - cur_send_wr = cur_send_wr->next) { - ret = post_one_send(my_qp, cur_send_wr, bad_send_wr, 0); + while (send_wr) { + ret = post_one_send(my_qp, send_wr, 0); if (unlikely(ret)) { - /* if one or more WQEs were successful, don't fail */ - if (wqe_cnt) - ret = 0; goto post_send_exit0; } wqe_cnt++; - } /* eof for cur_send_wr */ + send_wr = send_wr->next; + } post_send_exit0: iosync(); /* serialize GAL register access */ @@ -471,6 +494,10 @@ post_send_exit0: my_qp, qp->qp_num, wqe_cnt, ret); my_qp->message_count += wqe_cnt; spin_unlock_irqrestore(&my_qp->spinlock_s, flags); + +out: + if (ret) + *bad_send_wr = send_wr; return ret; } @@ -479,56 +506,64 @@ static int internal_post_recv(struct ehca_qp *my_qp, struct ib_recv_wr *recv_wr, struct ib_recv_wr **bad_recv_wr) { - struct ib_recv_wr *cur_recv_wr; struct ehca_wqe *wqe_p; int wqe_cnt = 0; int ret = 0; + u32 rq_map_idx; unsigned long flags; + struct ehca_qmap_entry *qmap_entry; if (unlikely(!HAS_RQ(my_qp))) { ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d", my_qp, my_qp->real_qp_num, my_qp->ext_type); - return -ENODEV; + ret = -ENODEV; + goto out; } /* LOCK the QUEUE */ spin_lock_irqsave(&my_qp->spinlock_r, flags); - /* loop processes list of send reqs */ - for (cur_recv_wr = recv_wr; cur_recv_wr != NULL; - cur_recv_wr = cur_recv_wr->next) { + /* loop processes list of recv reqs */ + while (recv_wr) { u64 start_offset = my_qp->ipz_rqueue.current_q_offset; /* get pointer next to free WQE */ wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue); if (unlikely(!wqe_p)) { /* too many posted work requests: queue overflow */ - if (bad_recv_wr) - *bad_recv_wr = cur_recv_wr; - if (wqe_cnt == 0) { - ret = -ENOMEM; - ehca_err(dev, "Too many posted WQEs " - "qp_num=%x", my_qp->real_qp_num); - } + ret = -ENOMEM; + ehca_err(dev, "Too many posted WQEs " + "qp_num=%x", my_qp->real_qp_num); goto post_recv_exit0; } + /* + * Get the index of the WQE in the recv queue. The same index + * is used for writing into the rq_map. + */ + rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size; + /* write a RECV WQE into the QUEUE */ - ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr); + ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, recv_wr, + rq_map_idx); /* * if something failed, * reset the free entry pointer to the start value */ if (unlikely(ret)) { my_qp->ipz_rqueue.current_q_offset = start_offset; - *bad_recv_wr = cur_recv_wr; - if (wqe_cnt == 0) { - ret = -EINVAL; - ehca_err(dev, "Could not write WQE " - "qp_num=%x", my_qp->real_qp_num); - } + ret = -EINVAL; + ehca_err(dev, "Could not write WQE " + "qp_num=%x", my_qp->real_qp_num); goto post_recv_exit0; } + + qmap_entry = &my_qp->rq_map.map[rq_map_idx]; + qmap_entry->app_wr_id = get_app_wr_id(recv_wr->wr_id); + qmap_entry->reported = 0; + qmap_entry->cqe_req = 1; + wqe_cnt++; - } /* eof for cur_recv_wr */ + recv_wr = recv_wr->next; + } /* eof for recv_wr */ post_recv_exit0: iosync(); /* serialize GAL register access */ @@ -537,6 +572,11 @@ post_recv_exit0: ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i", my_qp, my_qp->real_qp_num, wqe_cnt, ret); spin_unlock_irqrestore(&my_qp->spinlock_r, flags); + +out: + if (ret) + *bad_recv_wr = recv_wr; + return ret; } @@ -550,6 +590,7 @@ int ehca_post_recv(struct ib_qp *qp, if (unlikely(my_qp->state == IB_QPS_RESET)) { ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", my_qp->state, qp->qp_num); + *bad_recv_wr = recv_wr; return -EINVAL; } @@ -583,13 +624,15 @@ static const u8 ib_wc_opcode[255] = { /* internal function to poll one entry of cq */ static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc) { - int ret = 0; + int ret = 0, qmap_tail_idx; struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); struct ehca_cqe *cqe; struct ehca_qp *my_qp; + struct ehca_qmap_entry *qmap_entry; + struct ehca_queue_map *qmap; int cqe_count = 0, is_error; -poll_cq_one_read_cqe: +repoll: cqe = (struct ehca_cqe *) ipz_qeit_get_inc_valid(&my_cq->ipz_queue); if (!cqe) { @@ -617,7 +660,7 @@ poll_cq_one_read_cqe: ehca_dmp(cqe, 64, "cq_num=%x qp_num=%x", my_cq->cq_number, cqe->local_qp_number); /* ignore this purged cqe */ - goto poll_cq_one_read_cqe; + goto repoll; } spin_lock_irqsave(&qp->spinlock_s, flags); purgeflag = qp->sqerr_purgeflag; @@ -636,7 +679,7 @@ poll_cq_one_read_cqe: * that caused sqe and turn off purge flag */ qp->sqerr_purgeflag = 0; - goto poll_cq_one_read_cqe; + goto repoll; } } @@ -654,8 +697,62 @@ poll_cq_one_read_cqe: my_cq, my_cq->cq_number); } - /* we got a completion! */ - wc->wr_id = cqe->work_request_id; + read_lock(&ehca_qp_idr_lock); + my_qp = idr_find(&ehca_qp_idr, cqe->qp_token); + read_unlock(&ehca_qp_idr_lock); + if (!my_qp) + goto repoll; + wc->qp = &my_qp->ib_qp; + + qmap_tail_idx = get_app_wr_id(cqe->work_request_id); + if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT)) + /* We got a send completion. */ + qmap = &my_qp->sq_map; + else + /* We got a receive completion. */ + qmap = &my_qp->rq_map; + + /* advance the tail pointer */ + qmap->tail = qmap_tail_idx; + + if (is_error) { + /* + * set left_to_poll to 0 because in error state, we will not + * get any additional CQEs + */ + my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail, + my_qp->sq_map.entries); + my_qp->sq_map.left_to_poll = 0; + ehca_add_to_err_list(my_qp, 1); + + my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail, + my_qp->rq_map.entries); + my_qp->rq_map.left_to_poll = 0; + if (HAS_RQ(my_qp)) + ehca_add_to_err_list(my_qp, 0); + } + + qmap_entry = &qmap->map[qmap_tail_idx]; + if (qmap_entry->reported) { + ehca_warn(cq->device, "Double cqe on qp_num=%#x", + my_qp->real_qp_num); + /* found a double cqe, discard it and read next one */ + goto repoll; + } + + wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id); + qmap_entry->reported = 1; + + /* if left_to_poll is decremented to 0, add the QP to the error list */ + if (qmap->left_to_poll > 0) { + qmap->left_to_poll--; + if ((my_qp->sq_map.left_to_poll == 0) && + (my_qp->rq_map.left_to_poll == 0)) { + ehca_add_to_err_list(my_qp, 1); + if (HAS_RQ(my_qp)) + ehca_add_to_err_list(my_qp, 0); + } + } /* eval ib_wc_opcode */ wc->opcode = ib_wc_opcode[cqe->optype]-1; @@ -667,7 +764,7 @@ poll_cq_one_read_cqe: ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x", my_cq, my_cq->cq_number); /* update also queue adder to throw away this entry!!! */ - goto poll_cq_one_exit0; + goto repoll; } /* eval ib_wc_status */ @@ -678,17 +775,16 @@ poll_cq_one_read_cqe: } else wc->status = IB_WC_SUCCESS; - read_lock(&ehca_qp_idr_lock); - my_qp = idr_find(&ehca_qp_idr, cqe->qp_token); - wc->qp = &my_qp->ib_qp; - read_unlock(&ehca_qp_idr_lock); - wc->byte_len = cqe->nr_bytes_transferred; wc->pkey_index = cqe->pkey_index; wc->slid = cqe->rlid; wc->dlid_path_bits = cqe->dlid; wc->src_qp = cqe->remote_qp_number; - wc->wc_flags = cqe->w_completion_flags; + /* + * HW has "Immed data present" and "GRH present" in bits 6 and 5. + * SW defines those in bits 1 and 0, so we can just shift and mask. + */ + wc->wc_flags = (cqe->w_completion_flags >> 5) & 3; wc->ex.imm_data = cpu_to_be32(cqe->immediate_data); wc->sl = cqe->service_level; @@ -699,13 +795,89 @@ poll_cq_one_exit0: return ret; } +static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq, + struct ib_wc *wc, int num_entries, + struct ipz_queue *ipz_queue, int on_sq) +{ + int nr = 0; + struct ehca_wqe *wqe; + u64 offset; + struct ehca_queue_map *qmap; + struct ehca_qmap_entry *qmap_entry; + + if (on_sq) + qmap = &my_qp->sq_map; + else + qmap = &my_qp->rq_map; + + qmap_entry = &qmap->map[qmap->next_wqe_idx]; + + while ((nr < num_entries) && (qmap_entry->reported == 0)) { + /* generate flush CQE */ + + memset(wc, 0, sizeof(*wc)); + + offset = qmap->next_wqe_idx * ipz_queue->qe_size; + wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset); + if (!wqe) { + ehca_err(cq->device, "Invalid wqe offset=%#llx on " + "qp_num=%#x", offset, my_qp->real_qp_num); + return nr; + } + + wc->wr_id = replace_wr_id(wqe->work_request_id, + qmap_entry->app_wr_id); + + if (on_sq) { + switch (wqe->optype) { + case WQE_OPTYPE_SEND: + wc->opcode = IB_WC_SEND; + break; + case WQE_OPTYPE_RDMAWRITE: + wc->opcode = IB_WC_RDMA_WRITE; + break; + case WQE_OPTYPE_RDMAREAD: + wc->opcode = IB_WC_RDMA_READ; + break; + default: + ehca_err(cq->device, "Invalid optype=%x", + wqe->optype); + return nr; + } + } else + wc->opcode = IB_WC_RECV; + + if (wqe->wr_flag & WQE_WRFLAG_IMM_DATA_PRESENT) { + wc->ex.imm_data = wqe->immediate_data; + wc->wc_flags |= IB_WC_WITH_IMM; + } + + wc->status = IB_WC_WR_FLUSH_ERR; + + wc->qp = &my_qp->ib_qp; + + /* mark as reported and advance next_wqe pointer */ + qmap_entry->reported = 1; + qmap->next_wqe_idx = next_index(qmap->next_wqe_idx, + qmap->entries); + qmap_entry = &qmap->map[qmap->next_wqe_idx]; + + wc++; nr++; + } + + return nr; + +} + int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) { struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); int nr; + struct ehca_qp *err_qp; struct ib_wc *current_wc = wc; int ret = 0; unsigned long flags; + int entries_left = num_entries; if (num_entries < 1) { ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p " @@ -715,15 +887,40 @@ int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) } spin_lock_irqsave(&my_cq->spinlock, flags); - for (nr = 0; nr < num_entries; nr++) { + + /* generate flush cqes for send queues */ + list_for_each_entry(err_qp, &my_cq->sqp_err_list, sq_err_node) { + nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left, + &err_qp->ipz_squeue, 1); + entries_left -= nr; + current_wc += nr; + + if (entries_left == 0) + break; + } + + /* generate flush cqes for receive queues */ + list_for_each_entry(err_qp, &my_cq->rqp_err_list, rq_err_node) { + nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left, + &err_qp->ipz_rqueue, 0); + entries_left -= nr; + current_wc += nr; + + if (entries_left == 0) + break; + } + + for (nr = 0; nr < entries_left; nr++) { ret = ehca_poll_cq_one(cq, current_wc); if (ret) break; current_wc++; } /* eof for nr */ + entries_left -= nr; + spin_unlock_irqrestore(&my_cq->spinlock, flags); if (ret == -EAGAIN || !ret) - ret = nr; + ret = num_entries - entries_left; poll_cq_exit0: return ret; diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/infiniband/hw/ehca/ehca_sqp.c index 706d97ad555..dba8f9f8b99 100644 --- a/drivers/infiniband/hw/ehca/ehca_sqp.c +++ b/drivers/infiniband/hw/ehca/ehca_sqp.c @@ -46,11 +46,11 @@ #include "ehca_iverbs.h" #include "hcp_if.h" -#define IB_MAD_STATUS_REDIRECT __constant_htons(0x0002) -#define IB_MAD_STATUS_UNSUP_VERSION __constant_htons(0x0004) -#define IB_MAD_STATUS_UNSUP_METHOD __constant_htons(0x0008) +#define IB_MAD_STATUS_REDIRECT cpu_to_be16(0x0002) +#define IB_MAD_STATUS_UNSUP_VERSION cpu_to_be16(0x0004) +#define IB_MAD_STATUS_UNSUP_METHOD cpu_to_be16(0x0008) -#define IB_PMA_CLASS_PORT_INFO __constant_htons(0x0001) +#define IB_PMA_CLASS_PORT_INFO cpu_to_be16(0x0001) /** * ehca_define_sqp - Defines special queue pair 1 (GSI QP). When special queue @@ -85,7 +85,7 @@ u64 ehca_define_sqp(struct ehca_shca *shca, if (ret != H_SUCCESS) { ehca_err(&shca->ib_device, - "Can't define AQP1 for port %x. h_ret=%li", + "Can't define AQP1 for port %x. h_ret=%lli", port, ret); return ret; } @@ -125,14 +125,30 @@ struct ib_perf { u8 data[192]; } __attribute__ ((packed)); +/* TC/SL/FL packed into 32 bits, as in ClassPortInfo */ +struct tcslfl { + u32 tc:8; + u32 sl:4; + u32 fl:20; +} __attribute__ ((packed)); + +/* IP Version/TC/FL packed into 32 bits, as in GRH */ +struct vertcfl { + u32 ver:4; + u32 tc:8; + u32 fl:20; +} __attribute__ ((packed)); static int ehca_process_perf(struct ib_device *ibdev, u8 port_num, + struct ib_wc *in_wc, struct ib_grh *in_grh, struct ib_mad *in_mad, struct ib_mad *out_mad) { struct ib_perf *in_perf = (struct ib_perf *)in_mad; struct ib_perf *out_perf = (struct ib_perf *)out_mad; struct ib_class_port_info *poi = (struct ib_class_port_info *)out_perf->data; + struct tcslfl *tcslfl = + (struct tcslfl *)&poi->redirect_tcslfl; struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, ib_device); struct ehca_sport *sport = &shca->sport[port_num - 1]; @@ -158,10 +174,29 @@ static int ehca_process_perf(struct ib_device *ibdev, u8 port_num, poi->base_version = 1; poi->class_version = 1; poi->resp_time_value = 18; - poi->redirect_lid = sport->saved_attr.lid; - poi->redirect_qp = sport->pma_qp_nr; + + /* copy local routing information from WC where applicable */ + tcslfl->sl = in_wc->sl; + poi->redirect_lid = + sport->saved_attr.lid | in_wc->dlid_path_bits; + poi->redirect_qp = sport->pma_qp_nr; poi->redirect_qkey = IB_QP1_QKEY; - poi->redirect_pkey = IB_DEFAULT_PKEY_FULL; + + ehca_query_pkey(ibdev, port_num, in_wc->pkey_index, + &poi->redirect_pkey); + + /* if request was globally routed, copy route info */ + if (in_grh) { + struct vertcfl *vertcfl = + (struct vertcfl *)&in_grh->version_tclass_flow; + memcpy(poi->redirect_gid, in_grh->dgid.raw, + sizeof(poi->redirect_gid)); + tcslfl->tc = vertcfl->tc; + tcslfl->fl = vertcfl->fl; + } else + /* else only fill in default GID */ + ehca_query_gid(ibdev, port_num, 0, + (union ib_gid *)&poi->redirect_gid); ehca_dbg(ibdev, "ehca_pma_lid=%x ehca_pma_qp=%x", sport->saved_attr.lid, sport->pma_qp_nr); @@ -183,12 +218,11 @@ perf_reply: int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, struct ib_wc *in_wc, struct ib_grh *in_grh, - struct ib_mad *in_mad, - struct ib_mad *out_mad) + struct ib_mad *in_mad, struct ib_mad *out_mad) { int ret; - if (!port_num || port_num > ibdev->phys_port_cnt) + if (!port_num || port_num > ibdev->phys_port_cnt || !in_wc) return IB_MAD_RESULT_FAILURE; /* accept only pma request */ @@ -196,7 +230,8 @@ int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, return IB_MAD_RESULT_SUCCESS; ehca_dbg(ibdev, "port_num=%x src_qp=%x", port_num, in_wc->src_qp); - ret = ehca_process_perf(ibdev, port_num, in_mad, out_mad); + ret = ehca_process_perf(ibdev, port_num, in_wc, in_grh, + in_mad, out_mad); return ret; } diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h index ec950bf8c47..d280b12aae6 100644 --- a/drivers/infiniband/hw/ehca/ehca_tools.h +++ b/drivers/infiniband/hw/ehca/ehca_tools.h @@ -54,13 +54,11 @@ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/vmalloc.h> -#include <linux/version.h> #include <linux/notifier.h> #include <linux/cpu.h> #include <linux/device.h> -#include <asm/atomic.h> -#include <asm/abs_addr.h> +#include <linux/atomic.h> #include <asm/ibmebus.h> #include <asm/io.h> #include <asm/pgtable.h> @@ -117,7 +115,7 @@ extern int ehca_debug_level; unsigned char *deb = (unsigned char *)(adr); \ for (x = 0; x < l; x += 16) { \ printk(KERN_INFO "EHCA_DMP:%s " format \ - " adr=%p ofs=%04x %016lx %016lx\n", \ + " adr=%p ofs=%04x %016llx %016llx\n", \ __func__, ##args, deb, x, \ *((u64 *)&deb[0]), *((u64 *)&deb[8])); \ deb += 16; \ diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c index e43ed8f8a0c..1a1d5d99fcf 100644 --- a/drivers/infiniband/hw/ehca/ehca_uverbs.c +++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c @@ -40,6 +40,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <linux/slab.h> + #include "ehca_classes.h" #include "ehca_iverbs.h" #include "ehca_mrmw.h" @@ -95,7 +97,7 @@ static void ehca_mm_close(struct vm_area_struct *vma) vma->vm_start, vma->vm_end, *count); } -static struct vm_operations_struct vm_ops = { +static const struct vm_operations_struct vm_ops = { .open = ehca_mm_open, .close = ehca_mm_close, }; @@ -114,8 +116,8 @@ static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas, physical = galpas->user.fw_handle; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - ehca_gen_dbg("vsize=%lx physical=%lx", vsize, physical); - /* VM_IO | VM_RESERVED are set by remap_pfn_range() */ + ehca_gen_dbg("vsize=%llx physical=%llx", vsize, physical); + /* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */ ret = remap_4k_pfn(vma, vma->vm_start, physical >> EHCA_PAGESHIFT, vma->vm_page_prot); if (unlikely(ret)) { @@ -137,7 +139,7 @@ static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue, u64 start, ofs; struct page *page; - vma->vm_flags |= VM_RESERVED; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; start = vma->vm_start; for (ofs = 0; ofs < queue->queue_length; ofs += PAGE_SIZE) { u64 virt_addr = (u64)ipz_qeit_calc(queue, ofs); diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c index 415d3a465de..89517ffb438 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.c +++ b/drivers/infiniband/hw/ehca/hcp_if.c @@ -90,26 +90,6 @@ static DEFINE_SPINLOCK(hcall_lock); -static u32 get_longbusy_msecs(int longbusy_rc) -{ - switch (longbusy_rc) { - case H_LONG_BUSY_ORDER_1_MSEC: - return 1; - case H_LONG_BUSY_ORDER_10_MSEC: - return 10; - case H_LONG_BUSY_ORDER_100_MSEC: - return 100; - case H_LONG_BUSY_ORDER_1_SEC: - return 1000; - case H_LONG_BUSY_ORDER_10_SEC: - return 10000; - case H_LONG_BUSY_ORDER_100_SEC: - return 100000; - default: - return 1; - } -} - static long ehca_plpar_hcall_norets(unsigned long opcode, unsigned long arg1, unsigned long arg2, @@ -226,7 +206,7 @@ u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle, u32 *eq_ist) { u64 ret; - u64 outs[PLPAR_HCALL9_BUFSIZE]; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; u64 allocate_controls; /* resource type */ @@ -249,7 +229,7 @@ u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle, *eq_ist = (u32)outs[5]; if (ret == H_NOT_ENOUGH_RESOURCES) - ehca_gen_err("Not enough resource - ret=%li ", ret); + ehca_gen_err("Not enough resource - ret=%lli ", ret); return ret; } @@ -269,8 +249,9 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, struct ehca_cq *cq, struct ehca_alloc_cq_parms *param) { + int rc; u64 ret; - u64 outs[PLPAR_HCALL9_BUFSIZE]; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, adapter_handle.handle, /* r4 */ @@ -283,21 +264,33 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, param->act_nr_of_entries = (u32)outs[3]; param->act_pages = (u32)outs[4]; - if (ret == H_SUCCESS) - hcp_galpas_ctor(&cq->galpas, outs[5], outs[6]); + if (ret == H_SUCCESS) { + rc = hcp_galpas_ctor(&cq->galpas, 0, outs[5], outs[6]); + if (rc) { + ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx", + rc, outs[5]); + + ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + cq->ipz_cq_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); + ret = H_NO_MEM; + } + } if (ret == H_NOT_ENOUGH_RESOURCES) - ehca_gen_err("Not enough resources. ret=%li", ret); + ehca_gen_err("Not enough resources. ret=%lli", ret); return ret; } u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, - struct ehca_alloc_qp_parms *parms) + struct ehca_alloc_qp_parms *parms, int is_user) { + int rc; u64 ret; u64 allocate_controls, max_r10_reg, r11, r12; - u64 outs[PLPAR_HCALL9_BUFSIZE]; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; allocate_controls = EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS, parms->ext_type) @@ -358,11 +351,22 @@ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, parms->rqueue.queue_size = (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]); - if (ret == H_SUCCESS) - hcp_galpas_ctor(&parms->galpas, outs[6], outs[6]); + if (ret == H_SUCCESS) { + rc = hcp_galpas_ctor(&parms->galpas, is_user, outs[6], outs[6]); + if (rc) { + ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx", + rc, outs[6]); + + ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + parms->qp_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); + ret = H_NO_MEM; + } + } if (ret == H_NOT_ENOUGH_RESOURCES) - ehca_gen_err("Not enough resources. ret=%li", ret); + ehca_gen_err("Not enough resources. ret=%lli", ret); return ret; } @@ -372,7 +376,7 @@ u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, struct hipz_query_port *query_port_response_block) { u64 ret; - u64 r_cb = virt_to_abs(query_port_response_block); + u64 r_cb = __pa(query_port_response_block); if (r_cb & (EHCA_PAGESIZE-1)) { ehca_gen_err("response block not page aligned"); @@ -414,7 +418,7 @@ u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle, u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, struct hipz_query_hca *query_hca_rblock) { - u64 r_cb = virt_to_abs(query_hca_rblock); + u64 r_cb = __pa(query_hca_rblock); if (r_cb & (EHCA_PAGESIZE-1)) { ehca_gen_err("response_block=%p not page aligned", @@ -454,7 +458,7 @@ u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle, const u64 count) { if (count != 1) { - ehca_gen_err("Ppage counter=%lx", count); + ehca_gen_err("Ppage counter=%llx", count); return H_PARAMETER; } return hipz_h_register_rpage(adapter_handle, @@ -489,7 +493,7 @@ u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle, const struct h_galpa gal) { if (count != 1) { - ehca_gen_err("Page counter=%lx", count); + ehca_gen_err("Page counter=%llx", count); return H_PARAMETER; } @@ -508,7 +512,7 @@ u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle, const struct h_galpa galpa) { if (count > 1) { - ehca_gen_err("Page counter=%lx", count); + ehca_gen_err("Page counter=%llx", count); return H_PARAMETER; } @@ -525,7 +529,7 @@ u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle, int dis_and_get_function_code) { u64 ret; - u64 outs[PLPAR_HCALL9_BUFSIZE]; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs, adapter_handle.handle, /* r4 */ @@ -548,16 +552,16 @@ u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle, struct h_galpa gal) { u64 ret; - u64 outs[PLPAR_HCALL9_BUFSIZE]; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; ret = ehca_plpar_hcall9(H_MODIFY_QP, outs, adapter_handle.handle, /* r4 */ qp_handle.handle, /* r5 */ update_mask, /* r6 */ - virt_to_abs(mqpcb), /* r7 */ + __pa(mqpcb), /* r7 */ 0, 0, 0, 0, 0); if (ret == H_NOT_ENOUGH_RESOURCES) - ehca_gen_err("Insufficient resources ret=%li", ret); + ehca_gen_err("Insufficient resources ret=%lli", ret); return ret; } @@ -571,7 +575,7 @@ u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle, return ehca_plpar_hcall_norets(H_QUERY_QP, adapter_handle.handle, /* r4 */ qp_handle.handle, /* r5 */ - virt_to_abs(qqpcb), /* r6 */ + __pa(qqpcb), /* r6 */ 0, 0, 0, 0); } @@ -579,7 +583,7 @@ u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle, struct ehca_qp *qp) { u64 ret; - u64 outs[PLPAR_HCALL9_BUFSIZE]; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; ret = hcp_galpas_dtor(&qp->galpas); if (ret) { @@ -593,7 +597,7 @@ u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle, qp->ipz_qp_handle.handle, /* r6 */ 0, 0, 0, 0, 0, 0); if (ret == H_HARDWARE) - ehca_gen_err("HCA not operational. ret=%li", ret); + ehca_gen_err("HCA not operational. ret=%lli", ret); ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE, adapter_handle.handle, /* r4 */ @@ -601,7 +605,7 @@ u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle, 0, 0, 0, 0, 0); if (ret == H_RESOURCE) - ehca_gen_err("Resource still in use. ret=%li", ret); + ehca_gen_err("Resource still in use. ret=%lli", ret); return ret; } @@ -625,7 +629,7 @@ u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle, u32 * bma_qp_nr) { u64 ret; - u64 outs[PLPAR_HCALL9_BUFSIZE]; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; ret = ehca_plpar_hcall9(H_DEFINE_AQP1, outs, adapter_handle.handle, /* r4 */ @@ -636,7 +640,7 @@ u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle, *bma_qp_nr = (u32)outs[1]; if (ret == H_ALIAS_EXIST) - ehca_gen_err("AQP1 already exists. ret=%li", ret); + ehca_gen_err("AQP1 already exists. ret=%lli", ret); return ret; } @@ -658,7 +662,7 @@ u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle, 0, 0); if (ret == H_NOT_ENOUGH_RESOURCES) - ehca_gen_err("Not enough resources. ret=%li", ret); + ehca_gen_err("Not enough resources. ret=%lli", ret); return ret; } @@ -697,7 +701,7 @@ u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle, 0, 0, 0, 0); if (ret == H_RESOURCE) - ehca_gen_err("H_FREE_RESOURCE failed ret=%li ", ret); + ehca_gen_err("H_FREE_RESOURCE failed ret=%lli ", ret); return ret; } @@ -719,7 +723,7 @@ u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle, 0, 0, 0, 0, 0); if (ret == H_RESOURCE) - ehca_gen_err("Resource in use. ret=%li ", ret); + ehca_gen_err("Resource in use. ret=%lli ", ret); return ret; } @@ -733,7 +737,7 @@ u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle, struct ehca_mr_hipzout_parms *outparms) { u64 ret; - u64 outs[PLPAR_HCALL9_BUFSIZE]; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, adapter_handle.handle, /* r4 */ @@ -763,7 +767,7 @@ u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle, if (count > 1) { u64 *kpage; int i; - kpage = (u64 *)abs_to_virt(logical_address_of_page); + kpage = __va(logical_address_of_page); for (i = 0; i < count; i++) ehca_gen_dbg("kpage[%d]=%p", i, (void *)kpage[i]); @@ -774,9 +778,9 @@ u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle, if ((count > 1) && (logical_address_of_page & (EHCA_PAGESIZE-1))) { ehca_gen_err("logical_address_of_page not on a 4k boundary " - "adapter_handle=%lx mr=%p mr_handle=%lx " + "adapter_handle=%llx mr=%p mr_handle=%llx " "pagesize=%x queue_type=%x " - "logical_address_of_page=%lx count=%lx", + "logical_address_of_page=%llx count=%llx", adapter_handle.handle, mr, mr->ipz_mr_handle.handle, pagesize, queue_type, logical_address_of_page, count); @@ -794,7 +798,7 @@ u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle, struct ehca_mr_hipzout_parms *outparms) { u64 ret; - u64 outs[PLPAR_HCALL9_BUFSIZE]; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; ret = ehca_plpar_hcall9(H_QUERY_MR, outs, adapter_handle.handle, /* r4 */ @@ -828,7 +832,7 @@ u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle, struct ehca_mr_hipzout_parms *outparms) { u64 ret; - u64 outs[PLPAR_HCALL9_BUFSIZE]; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; ret = ehca_plpar_hcall9(H_REREGISTER_PMR, outs, adapter_handle.handle, /* r4 */ @@ -855,7 +859,7 @@ u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle, struct ehca_mr_hipzout_parms *outparms) { u64 ret; - u64 outs[PLPAR_HCALL9_BUFSIZE]; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; ret = ehca_plpar_hcall9(H_REGISTER_SMR, outs, adapter_handle.handle, /* r4 */ @@ -877,7 +881,7 @@ u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle, struct ehca_mw_hipzout_parms *outparms) { u64 ret; - u64 outs[PLPAR_HCALL9_BUFSIZE]; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, adapter_handle.handle, /* r4 */ @@ -895,7 +899,7 @@ u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle, struct ehca_mw_hipzout_parms *outparms) { u64 ret; - u64 outs[PLPAR_HCALL9_BUFSIZE]; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; ret = ehca_plpar_hcall9(H_QUERY_MW, outs, adapter_handle.handle, /* r4 */ @@ -920,7 +924,7 @@ u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle, void *rblock, unsigned long *byte_count) { - u64 r_cb = virt_to_abs(rblock); + u64 r_cb = __pa(rblock); if (r_cb & (EHCA_PAGESIZE-1)) { ehca_gen_err("rblock not page aligned."); diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h index 2c3c6e0ea5c..a46e514c367 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.h +++ b/drivers/infiniband/hw/ehca/hcp_if.h @@ -49,7 +49,7 @@ #include "hipz_hw.h" /* - * hipz_h_alloc_resource_eq allocates EQ resources in HW and FW, initalize + * hipz_h_alloc_resource_eq allocates EQ resources in HW and FW, initialize * resources, create the empty EQPT (ring). */ u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle, @@ -78,7 +78,7 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, * initialize resources, create empty QPPTs (2 rings). */ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, - struct ehca_alloc_qp_parms *parms); + struct ehca_alloc_qp_parms *parms, int is_user); u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, const u8 port_id, diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.c b/drivers/infiniband/hw/ehca/hcp_phyp.c index 214821095cb..077376ff3d2 100644 --- a/drivers/infiniband/hw/ehca/hcp_phyp.c +++ b/drivers/infiniband/hw/ehca/hcp_phyp.c @@ -42,10 +42,9 @@ #include "ehca_classes.h" #include "hipz_hw.h" -int hcall_map_page(u64 physaddr, u64 *mapaddr) +u64 hcall_map_page(u64 physaddr) { - *mapaddr = (u64)(ioremap(physaddr, EHCA_PAGESIZE)); - return 0; + return (u64)ioremap(physaddr, EHCA_PAGESIZE); } int hcall_unmap_page(u64 mapaddr) @@ -54,12 +53,15 @@ int hcall_unmap_page(u64 mapaddr) return 0; } -int hcp_galpas_ctor(struct h_galpas *galpas, +int hcp_galpas_ctor(struct h_galpas *galpas, int is_user, u64 paddr_kernel, u64 paddr_user) { - int ret = hcall_map_page(paddr_kernel, &galpas->kernel.fw_handle); - if (ret) - return ret; + if (!is_user) { + galpas->kernel.fw_handle = hcall_map_page(paddr_kernel); + if (!galpas->kernel.fw_handle) + return -ENOMEM; + } else + galpas->kernel.fw_handle = 0; galpas->user.fw_handle = paddr_user; diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.h b/drivers/infiniband/hw/ehca/hcp_phyp.h index 5305c2a3ed9..d1b02991024 100644 --- a/drivers/infiniband/hw/ehca/hcp_phyp.h +++ b/drivers/infiniband/hw/ehca/hcp_phyp.h @@ -78,12 +78,12 @@ static inline void hipz_galpa_store(struct h_galpa galpa, u32 offset, u64 value) *(volatile u64 __force *)addr = value; } -int hcp_galpas_ctor(struct h_galpas *galpas, +int hcp_galpas_ctor(struct h_galpas *galpas, int is_user, u64 paddr_kernel, u64 paddr_user); int hcp_galpas_dtor(struct h_galpas *galpas); -int hcall_map_page(u64 physaddr, u64 * mapaddr); +u64 hcall_map_page(u64 physaddr); int hcall_unmap_page(u64 mapaddr); diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c index 661f8db6270..8d594517cd2 100644 --- a/drivers/infiniband/hw/ehca/ipz_pt_fn.c +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c @@ -38,6 +38,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <linux/slab.h> + #include "ehca_tools.h" #include "ipz_pt_fn.h" #include "ehca_classes.h" @@ -79,7 +81,7 @@ int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset) { int i; for (i = 0; i < queue->queue_length / queue->pagesize; i++) { - u64 page = (u64)virt_to_abs(queue->queue_pages[i]); + u64 page = __pa(queue->queue_pages[i]); if (addr >= page && addr < page + queue->pagesize) { *q_offset = addr - page + i * queue->pagesize; return 0; @@ -163,6 +165,7 @@ static int alloc_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd) out: ehca_err(pd->ib_pd.device, "failed to allocate small queue page"); + mutex_unlock(&pd->lock); return 0; } @@ -219,12 +222,15 @@ int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, queue->small_page = NULL; /* allocate queue page pointers */ - queue->queue_pages = vmalloc(nr_of_pages * sizeof(void *)); + queue->queue_pages = kzalloc(nr_of_pages * sizeof(void *), + GFP_KERNEL | __GFP_NOWARN); if (!queue->queue_pages) { - ehca_gen_err("Couldn't allocate queue page list"); - return 0; + queue->queue_pages = vzalloc(nr_of_pages * sizeof(void *)); + if (!queue->queue_pages) { + ehca_gen_err("Couldn't allocate queue page list"); + return 0; + } } - memset(queue->queue_pages, 0, nr_of_pages * sizeof(void *)); /* allocate actual queue pages */ if (is_small) { @@ -239,7 +245,10 @@ int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, ipz_queue_ctor_exit0: ehca_gen_err("Couldn't alloc pages queue=%p " "nr_of_pages=%x", queue, nr_of_pages); - vfree(queue->queue_pages); + if (is_vmalloc_addr(queue->queue_pages)) + vfree(queue->queue_pages); + else + kfree(queue->queue_pages); return 0; } @@ -261,7 +270,10 @@ int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue) free_page((unsigned long)queue->queue_pages[i]); } - vfree(queue->queue_pages); + if (is_vmalloc_addr(queue->queue_pages)) + vfree(queue->queue_pages); + else + kfree(queue->queue_pages); return 1; } diff --git a/drivers/infiniband/hw/ipath/Kconfig b/drivers/infiniband/hw/ipath/Kconfig index 3c7968f25ec..1d9bb115cbf 100644 --- a/drivers/infiniband/hw/ipath/Kconfig +++ b/drivers/infiniband/hw/ipath/Kconfig @@ -1,9 +1,11 @@ config INFINIBAND_IPATH - tristate "QLogic InfiniPath Driver" - depends on 64BIT && NET + tristate "QLogic HTX HCA support" + depends on 64BIT && NET && HT_IRQ ---help--- - This is a driver for QLogic InfiniPath host channel adapters, + This is a driver for the obsolete QLogic Hyper-Transport + IB host channel adapter (model QHT7140), including InfiniBand verbs support. This driver allows these devices to be used with both kernel upper level protocols such as IP-over-InfiniBand as well as with userspace applications (in conjunction with InfiniBand userspace access). + For QLogic PCIe QLE based cards, use the QIB driver instead. diff --git a/drivers/infiniband/hw/ipath/Makefile b/drivers/infiniband/hw/ipath/Makefile index bf945006198..4496f2820c9 100644 --- a/drivers/infiniband/hw/ipath/Makefile +++ b/drivers/infiniband/hw/ipath/Makefile @@ -1,4 +1,4 @@ -EXTRA_CFLAGS += -DIPATH_IDSTR='"QLogic kernel.org driver"' \ +ccflags-y := -DIPATH_IDSTR='"QLogic kernel.org driver"' \ -DIPATH_KERN_TYPE=0 obj-$(CONFIG_INFINIBAND_IPATH) += ib_ipath.o @@ -29,13 +29,9 @@ ib_ipath-y := \ ipath_user_pages.o \ ipath_user_sdma.o \ ipath_verbs_mcast.o \ - ipath_verbs.o \ - ipath_iba7220.o \ - ipath_sd7220.o \ - ipath_sd7220_img.o + ipath_verbs.o ib_ipath-$(CONFIG_HT_IRQ) += ipath_iba6110.o -ib_ipath-$(CONFIG_PCI_MSI) += ipath_iba6120.o ib_ipath-$(CONFIG_X86_64) += ipath_wc_x86_64.o ib_ipath-$(CONFIG_PPC64) += ipath_wc_ppc64.o diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c index d385e4168c9..0416c6c0e12 100644 --- a/drivers/infiniband/hw/ipath/ipath_cq.c +++ b/drivers/infiniband/hw/ipath/ipath_cq.c @@ -32,6 +32,7 @@ */ #include <linux/err.h> +#include <linux/slab.h> #include <linux/vmalloc.h> #include "ipath_verbs.h" diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c index d4ce8b63e19..45802e97332 100644 --- a/drivers/infiniband/hw/ipath/ipath_diag.c +++ b/drivers/infiniband/hw/ipath/ipath_diag.c @@ -45,6 +45,7 @@ #include <linux/pci.h> #include <linux/vmalloc.h> #include <linux/fs.h> +#include <linux/export.h> #include <asm/uaccess.h> #include "ipath_kernel.h" @@ -65,7 +66,8 @@ static const struct file_operations diag_file_ops = { .write = ipath_diag_write, .read = ipath_diag_read, .open = ipath_diag_open, - .release = ipath_diag_release + .release = ipath_diag_release, + .llseek = default_llseek, }; static ssize_t ipath_diagpkt_write(struct file *fp, @@ -75,6 +77,7 @@ static ssize_t ipath_diagpkt_write(struct file *fp, static const struct file_operations diagpkt_file_ops = { .owner = THIS_MODULE, .write = ipath_diagpkt_write, + .llseek = noop_llseek, }; static atomic_t diagpkt_count = ATOMIC_INIT(0); @@ -323,7 +326,7 @@ static ssize_t ipath_diagpkt_write(struct file *fp, size_t count, loff_t *off) { u32 __iomem *piobuf; - u32 plen, clen, pbufn; + u32 plen, pbufn, maxlen_reserve; struct ipath_diag_pkt odp; struct ipath_diag_xpkt dp; u32 *tmpbuf = NULL; @@ -332,42 +335,24 @@ static ssize_t ipath_diagpkt_write(struct file *fp, u64 val; u32 l_state, lt_state; /* LinkState, LinkTrainingState */ - if (count < sizeof(odp)) { - ret = -EINVAL; - goto bail; - } if (count == sizeof(dp)) { if (copy_from_user(&dp, data, sizeof(dp))) { ret = -EFAULT; goto bail; } - } else if (copy_from_user(&odp, data, sizeof(odp))) { - ret = -EFAULT; - goto bail; - } - - /* - * Due to padding/alignment issues (lessened with new struct) - * the old and new structs are the same length. We need to - * disambiguate them, which we can do because odp.len has never - * been less than the total of LRH+BTH+DETH so far, while - * dp.unit (same offset) unit is unlikely to get that high. - * Similarly, dp.data, the pointer to user at the same offset - * as odp.unit, is almost certainly at least one (512byte)page - * "above" NULL. The if-block below can be omitted if compatibility - * between a new driver and older diagnostic code is unimportant. - * compatibility the other direction (new diags, old driver) is - * handled in the diagnostic code, with a warning. - */ - if (dp.unit >= 20 && dp.data < 512) { - /* very probable version mismatch. Fix it up */ - memcpy(&odp, &dp, sizeof(odp)); - /* We got a legacy dp, copy elements to dp */ + } else if (count == sizeof(odp)) { + if (copy_from_user(&odp, data, sizeof(odp))) { + ret = -EFAULT; + goto bail; + } + dp.len = odp.len; dp.unit = odp.unit; dp.data = odp.data; - dp.len = odp.len; - dp.pbc_wd = 0; /* Indicate we need to compute PBC wd */ + dp.pbc_wd = 0; + } else { + ret = -EINVAL; + goto bail; } /* send count must be an exact number of dwords */ @@ -376,7 +361,7 @@ static ssize_t ipath_diagpkt_write(struct file *fp, goto bail; } - clen = dp.len >> 2; + plen = dp.len >> 2; dd = ipath_lookup(dp.unit); if (!dd || !(dd->ipath_flags & IPATH_PRESENT) || @@ -419,16 +404,22 @@ static ssize_t ipath_diagpkt_write(struct file *fp, goto bail; } - /* need total length before first word written */ - /* +1 word is for the qword padding */ - plen = sizeof(u32) + dp.len; - - if ((plen + 4) > dd->ipath_ibmaxlen) { + /* + * need total length before first word written, plus 2 Dwords. One Dword + * is for padding so we get the full user data when not aligned on + * a word boundary. The other Dword is to make sure we have room for the + * ICRC which gets tacked on later. + */ + maxlen_reserve = 2 * sizeof(u32); + if (dp.len > dd->ipath_ibmaxlen - maxlen_reserve) { ipath_dbg("Pkt len 0x%x > ibmaxlen %x\n", - plen - 4, dd->ipath_ibmaxlen); + dp.len, dd->ipath_ibmaxlen); ret = -EINVAL; - goto bail; /* before writing pbc */ + goto bail; } + + plen = sizeof(u32) + dp.len; + tmpbuf = vmalloc(plen); if (!tmpbuf) { dev_info(&dd->pcidev->dev, "Unable to allocate tmp buffer, " @@ -470,11 +461,11 @@ static ssize_t ipath_diagpkt_write(struct file *fp, */ if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) { ipath_flush_wc(); - __iowrite32_copy(piobuf + 2, tmpbuf, clen - 1); + __iowrite32_copy(piobuf + 2, tmpbuf, plen - 1); ipath_flush_wc(); - __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1); + __raw_writel(tmpbuf[plen - 1], piobuf + plen + 1); } else - __iowrite32_copy(piobuf + 2, tmpbuf, clen); + __iowrite32_copy(piobuf + 2, tmpbuf, plen); ipath_flush_wc(); diff --git a/drivers/infiniband/hw/ipath/ipath_dma.c b/drivers/infiniband/hw/ipath/ipath_dma.c index e90a0ea538a..123a8c05353 100644 --- a/drivers/infiniband/hw/ipath/ipath_dma.c +++ b/drivers/infiniband/hw/ipath/ipath_dma.c @@ -31,6 +31,7 @@ */ #include <linux/scatterlist.h> +#include <linux/gfp.h> #include <rdma/ib_verbs.h> #include "ipath_verbs.h" @@ -114,6 +115,10 @@ static int ipath_map_sg(struct ib_device *dev, struct scatterlist *sgl, ret = 0; break; } + sg->dma_address = addr + sg->offset; +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg->dma_length = sg->length; +#endif } return ret; } @@ -125,21 +130,6 @@ static void ipath_unmap_sg(struct ib_device *dev, BUG_ON(!valid_dma_direction(direction)); } -static u64 ipath_sg_dma_address(struct ib_device *dev, struct scatterlist *sg) -{ - u64 addr = (u64) page_address(sg_page(sg)); - - if (addr) - addr += sg->offset; - return addr; -} - -static unsigned int ipath_sg_dma_len(struct ib_device *dev, - struct scatterlist *sg) -{ - return sg->length; -} - static void ipath_sync_single_for_cpu(struct ib_device *dev, u64 addr, size_t size, @@ -175,17 +165,15 @@ static void ipath_dma_free_coherent(struct ib_device *dev, size_t size, } struct ib_dma_mapping_ops ipath_dma_mapping_ops = { - ipath_mapping_error, - ipath_dma_map_single, - ipath_dma_unmap_single, - ipath_dma_map_page, - ipath_dma_unmap_page, - ipath_map_sg, - ipath_unmap_sg, - ipath_sg_dma_address, - ipath_sg_dma_len, - ipath_sync_single_for_cpu, - ipath_sync_single_for_device, - ipath_dma_alloc_coherent, - ipath_dma_free_coherent + .mapping_error = ipath_mapping_error, + .map_single = ipath_dma_map_single, + .unmap_single = ipath_dma_unmap_single, + .map_page = ipath_dma_map_page, + .unmap_page = ipath_dma_unmap_page, + .map_sg = ipath_map_sg, + .unmap_sg = ipath_unmap_sg, + .sync_single_for_cpu = ipath_sync_single_for_cpu, + .sync_single_for_device = ipath_sync_single_for_device, + .alloc_coherent = ipath_dma_alloc_coherent, + .free_coherent = ipath_dma_free_coherent }; diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index daad09a4591..bd0caedafe9 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c @@ -31,6 +31,7 @@ * SOFTWARE. */ +#include <linux/sched.h> #include <linux/spinlock.h> #include <linux/idr.h> #include <linux/pci.h> @@ -38,6 +39,9 @@ #include <linux/delay.h> #include <linux/netdevice.h> #include <linux/vmalloc.h> +#include <linux/bitmap.h> +#include <linux/slab.h> +#include <linux/module.h> #include "ipath_kernel.h" #include "ipath_verbs.h" @@ -123,24 +127,18 @@ const char *ipath_ibcstatus_str[] = { "LTState1C", "LTState1D", "LTState1E", "LTState1F" }; -static void __devexit ipath_remove_one(struct pci_dev *); -static int __devinit ipath_init_one(struct pci_dev *, - const struct pci_device_id *); +static void ipath_remove_one(struct pci_dev *); +static int ipath_init_one(struct pci_dev *, const struct pci_device_id *); /* Only needed for registration, nothing else needs this info */ #define PCI_VENDOR_ID_PATHSCALE 0x1fc1 -#define PCI_VENDOR_ID_QLOGIC 0x1077 #define PCI_DEVICE_ID_INFINIPATH_HT 0xd -#define PCI_DEVICE_ID_INFINIPATH_PE800 0x10 -#define PCI_DEVICE_ID_INFINIPATH_7220 0x7220 /* Number of seconds before our card status check... */ #define STATUS_TIMEOUT 60 static const struct pci_device_id ipath_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) }, - { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_PE800) }, - { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_INFINIPATH_7220) }, { 0, } }; @@ -149,7 +147,7 @@ MODULE_DEVICE_TABLE(pci, ipath_pci_tbl); static struct pci_driver ipath_driver = { .name = IPATH_DRV_NAME, .probe = ipath_init_one, - .remove = __devexit_p(ipath_remove_one), + .remove = ipath_remove_one, .id_table = ipath_pci_tbl, .driver = { .groups = ipath_driver_attr_groups, @@ -196,22 +194,17 @@ static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev) struct ipath_devdata *dd; int ret; - if (!idr_pre_get(&unit_table, GFP_KERNEL)) { - dd = ERR_PTR(-ENOMEM); - goto bail; - } - - dd = vmalloc(sizeof(*dd)); + dd = vzalloc(sizeof(*dd)); if (!dd) { dd = ERR_PTR(-ENOMEM); goto bail; } - memset(dd, 0, sizeof(*dd)); dd->ipath_unit = -1; + idr_preload(GFP_KERNEL); spin_lock_irqsave(&ipath_devs_lock, flags); - ret = idr_get_new(&unit_table, dd, &dd->ipath_unit); + ret = idr_alloc(&unit_table, dd, 0, 0, GFP_NOWAIT); if (ret < 0) { printk(KERN_ERR IPATH_DRV_NAME ": Could not allocate unit ID: error %d\n", -ret); @@ -219,6 +212,7 @@ static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev) dd = ERR_PTR(ret); goto bail_unlock; } + dd->ipath_unit = ret; dd->pcidev = pdev; pci_set_drvdata(pdev, dd); @@ -227,7 +221,7 @@ static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev) bail_unlock: spin_unlock_irqrestore(&ipath_devs_lock, flags); - + idr_preload_end(); bail: return dd; } @@ -392,14 +386,14 @@ done: ipath_enable_armlaunch(dd); } -static int __devinit ipath_init_one(struct pci_dev *pdev, - const struct pci_device_id *ent) +static void cleanup_device(struct ipath_devdata *dd); + +static int ipath_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { int ret, len, j; struct ipath_devdata *dd; unsigned long long addr; u32 bar0 = 0, bar1 = 0; - u8 rev; dd = ipath_alloc_devdata(pdev); if (IS_ERR(dd)) { @@ -470,14 +464,14 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, goto bail_disable; } - ret = pci_set_dma_mask(pdev, DMA_64BIT_MASK); + ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); if (ret) { /* * if the 64 bit setup fails, try 32 bit. Some systems * do not setup 64 bit maps on systems with 2GB or less * memory installed. */ - ret = pci_set_dma_mask(pdev, DMA_32BIT_MASK); + ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); if (ret) { dev_info(&pdev->dev, "Unable to set DMA mask for unit %u: %d\n", @@ -486,7 +480,7 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, } else { ipath_dbg("No 64bit DMA mask, used 32 bit mask\n"); - ret = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK); + ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); if (ret) dev_info(&pdev->dev, "Unable to set DMA consistent mask " @@ -496,7 +490,7 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, } } else { - ret = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK); + ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); if (ret) dev_info(&pdev->dev, "Unable to set DMA consistent mask " @@ -518,30 +512,9 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, /* setup the chip-specific functions, as early as possible. */ switch (ent->device) { case PCI_DEVICE_ID_INFINIPATH_HT: -#ifdef CONFIG_HT_IRQ ipath_init_iba6110_funcs(dd); break; -#else - ipath_dev_err(dd, "QLogic HT device 0x%x cannot work if " - "CONFIG_HT_IRQ is not enabled\n", ent->device); - return -ENODEV; -#endif - case PCI_DEVICE_ID_INFINIPATH_PE800: -#ifdef CONFIG_PCI_MSI - ipath_init_iba6120_funcs(dd); - break; -#else - ipath_dev_err(dd, "QLogic PCIE device 0x%x cannot work if " - "CONFIG_PCI_MSI is not enabled\n", ent->device); - return -ENODEV; -#endif - case PCI_DEVICE_ID_INFINIPATH_7220: -#ifndef CONFIG_PCI_MSI - ipath_dbg("CONFIG_PCI_MSI is not enabled, " - "using INTx for unit %u\n", dd->ipath_unit); -#endif - ipath_init_iba7220_funcs(dd); - break; + default: ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, " "failing\n", ent->device); @@ -551,9 +524,8 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, for (j = 0; j < 6; j++) { if (!pdev->resource[j].start) continue; - ipath_cdbg(VERBOSE, "BAR %d start %llx, end %llx, len %llx\n", - j, (unsigned long long)pdev->resource[j].start, - (unsigned long long)pdev->resource[j].end, + ipath_cdbg(VERBOSE, "BAR %d %pR, len %llx\n", + j, &pdev->resource[j], (unsigned long long)pci_resource_len(pdev, j)); } @@ -563,13 +535,7 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, goto bail_regions; } - ret = pci_read_config_byte(pdev, PCI_REVISION_ID, &rev); - if (ret) { - ipath_dev_err(dd, "Failed to read PCI revision ID unit " - "%u: err %d\n", dd->ipath_unit, -ret); - goto bail_regions; /* shouldn't ever happen */ - } - dd->ipath_pcirev = rev; + dd->ipath_pcirev = pdev->revision; #if defined(__powerpc__) /* There isn't a generic way to specify writethrough mappings */ @@ -639,8 +605,13 @@ static int __devinit ipath_init_one(struct pci_dev *pdev, goto bail; bail_irqsetup: - if (pdev->irq) - free_irq(pdev->irq, dd); + cleanup_device(dd); + + if (dd->ipath_irq) + dd->ipath_f_free_irq(dd); + + if (dd->ipath_f_cleanup) + dd->ipath_f_cleanup(dd); bail_iounmap: iounmap((volatile void __iomem *) dd->ipath_kregbase); @@ -658,9 +629,11 @@ bail: return ret; } -static void __devexit cleanup_device(struct ipath_devdata *dd) +static void cleanup_device(struct ipath_devdata *dd) { int port; + struct ipath_portdata **tmp; + unsigned long flags; if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) { /* can't do anything more with chip; needs re-init */ @@ -742,23 +715,24 @@ static void __devexit cleanup_device(struct ipath_devdata *dd) /* * free any resources still in use (usually just kernel ports) - * at unload; we do for portcnt, not cfgports, because cfgports - * could have changed while we were loaded. + * at unload; we do for portcnt, because that's what we allocate. + * We acquire lock to be really paranoid that ipath_pd isn't being + * accessed from some interrupt-related code (that should not happen, + * but best to be sure). */ + spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); + tmp = dd->ipath_pd; + dd->ipath_pd = NULL; + spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); for (port = 0; port < dd->ipath_portcnt; port++) { - struct ipath_portdata *pd = dd->ipath_pd[port]; - dd->ipath_pd[port] = NULL; + struct ipath_portdata *pd = tmp[port]; + tmp[port] = NULL; /* debugging paranoia */ ipath_free_pddata(dd, pd); } - kfree(dd->ipath_pd); - /* - * debuggability, in case some cleanup path tries to use it - * after this - */ - dd->ipath_pd = NULL; + kfree(tmp); } -static void __devexit ipath_remove_one(struct pci_dev *pdev) +static void ipath_remove_one(struct pci_dev *pdev) { struct ipath_devdata *dd = pci_get_drvdata(pdev); @@ -770,7 +744,7 @@ static void __devexit ipath_remove_one(struct pci_dev *pdev) */ ipath_shutdown_device(dd); - flush_scheduled_work(); + flush_workqueue(ib_wq); if (dd->verbs_dev) ipath_unregister_ib_device(dd->verbs_dev); @@ -1259,7 +1233,7 @@ reloop: */ ipath_cdbg(ERRPKT, "Error Pkt, but no eflags! egrbuf" " %x, len %x hdrq+%x rhf: %Lx\n", - etail, tlen, l, + etail, tlen, l, (unsigned long long) le64_to_cpu(*(__le64 *) rhf_addr)); if (ipath_debug & __IPATH_ERRPKTDBG) { u32 j, *d, dw = rsize-2; @@ -1457,7 +1431,8 @@ static void ipath_reset_availshadow(struct ipath_devdata *dd) 0xaaaaaaaaaaaaaaaaULL); /* All BUSY bits in qword */ if (oldval != dd->ipath_pioavailshadow[i]) ipath_dbg("shadow[%d] was %Lx, now %lx\n", - i, oldval, dd->ipath_pioavailshadow[i]); + i, (unsigned long long) oldval, + dd->ipath_pioavailshadow[i]); } spin_unlock_irqrestore(&ipath_pioavail_lock, flags); } @@ -1692,7 +1667,7 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start, unsigned len, int avail) { unsigned long flags; - unsigned end, cnt = 0, next; + unsigned end, cnt = 0; /* There are two bits per send buffer (busy and generation) */ start *= 2; @@ -1743,12 +1718,7 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start, if (dd->ipath_pioupd_thresh) { end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k); - next = find_first_bit(dd->ipath_pioavailkernel, end); - while (next < end) { - cnt++; - next = find_next_bit(dd->ipath_pioavailkernel, end, - next + 1); - } + cnt = bitmap_weight(dd->ipath_pioavailkernel, end); } spin_unlock_irqrestore(&ipath_pioavail_lock, flags); @@ -2411,7 +2381,7 @@ void ipath_shutdown_device(struct ipath_devdata *dd) /* * clear SerdesEnable and turn the leds off; do this here because * we are unloading, so don't count on interrupts to move along - * Turn the LEDs off explictly for the same reason. + * Turn the LEDs off explicitly for the same reason. */ dd->ipath_f_quiet_serdes(dd); @@ -2530,11 +2500,6 @@ static int __init infinipath_init(void) * the PCI subsystem. */ idr_init(&unit_table); - if (!idr_pre_get(&unit_table, GFP_KERNEL)) { - printk(KERN_ERR IPATH_DRV_NAME ": idr_pre_get() failed\n"); - ret = -ENOMEM; - goto bail; - } ret = pci_register_driver(&ipath_driver); if (ret < 0) { @@ -2585,6 +2550,7 @@ int ipath_reset_device(int unit) { int ret, i; struct ipath_devdata *dd = ipath_lookup(unit); + unsigned long flags; if (!dd) { ret = -ENODEV; @@ -2610,18 +2576,21 @@ int ipath_reset_device(int unit) goto bail; } + spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); if (dd->ipath_pd) for (i = 1; i < dd->ipath_cfgports; i++) { - if (dd->ipath_pd[i] && dd->ipath_pd[i]->port_cnt) { - ipath_dbg("unit %u port %d is in use " - "(PID %u cmd %s), can't reset\n", - unit, i, - pid_nr(dd->ipath_pd[i]->port_pid), - dd->ipath_pd[i]->port_comm); - ret = -EBUSY; - goto bail; - } + if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt) + continue; + spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); + ipath_dbg("unit %u port %d is in use " + "(PID %u cmd %s), can't reset\n", + unit, i, + pid_nr(dd->ipath_pd[i]->port_pid), + dd->ipath_pd[i]->port_comm); + ret = -EBUSY; + goto bail; } + spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); if (dd->ipath_flags & IPATH_HAS_SEND_DMA) teardown_sdma(dd); @@ -2655,9 +2624,12 @@ static int ipath_signal_procs(struct ipath_devdata *dd, int sig) { int i, sub, any = 0; struct pid *pid; + unsigned long flags; if (!dd->ipath_pd) return 0; + + spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); for (i = 1; i < dd->ipath_cfgports; i++) { if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt) continue; @@ -2681,6 +2653,7 @@ static int ipath_signal_procs(struct ipath_devdata *dd, int sig) any++; } } + spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); return any; } @@ -2703,7 +2676,7 @@ static void ipath_hol_signal_up(struct ipath_devdata *dd) * to prevent HoL blocking, then start the HoL timer that * periodically continues, then stop procs, so they can detect * link down if they want, and do something about it. - * Timer may already be running, so use __mod_timer, not add_timer. + * Timer may already be running, so use mod_timer, not add_timer. */ void ipath_hol_down(struct ipath_devdata *dd) { @@ -2712,7 +2685,7 @@ void ipath_hol_down(struct ipath_devdata *dd) dd->ipath_hol_next = IPATH_HOL_DOWNCONT; dd->ipath_hol_timer.expires = jiffies + msecs_to_jiffies(ipath_hol_timeout_ms); - __mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires); + mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires); } /* @@ -2751,7 +2724,7 @@ void ipath_hol_event(unsigned long opaque) else { dd->ipath_hol_timer.expires = jiffies + msecs_to_jiffies(ipath_hol_timeout_ms); - __mod_timer(&dd->ipath_hol_timer, + mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires); } } diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c index dc37277f1c8..fc7181985e8 100644 --- a/drivers/infiniband/hw/ipath/ipath_eeprom.c +++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c @@ -772,8 +772,8 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd) "0x%x, not 0x%x\n", csum, ifp->if_csum); goto done; } - if (*(__be64 *) ifp->if_guid == 0ULL || - *(__be64 *) ifp->if_guid == __constant_cpu_to_be64(-1LL)) { + if (*(__be64 *) ifp->if_guid == cpu_to_be64(0) || + *(__be64 *) ifp->if_guid == ~cpu_to_be64(0)) { ipath_dev_err(dd, "Invalid GUID %llx from flash; " "ignoring\n", *(unsigned long long *) ifp->if_guid); diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c index 35f301c88b5..6d7f453b4d0 100644 --- a/drivers/infiniband/hw/ipath/ipath_file_ops.c +++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c @@ -35,11 +35,14 @@ #include <linux/poll.h> #include <linux/cdev.h> #include <linux/swap.h> +#include <linux/export.h> #include <linux/vmalloc.h> +#include <linux/slab.h> #include <linux/highmem.h> #include <linux/io.h> +#include <linux/aio.h> #include <linux/jiffies.h> -#include <linux/smp_lock.h> +#include <linux/cpu.h> #include <asm/pgtable.h> #include "ipath_kernel.h" @@ -62,7 +65,8 @@ static const struct file_operations ipath_file_ops = { .open = ipath_open, .release = ipath_close, .poll = ipath_poll, - .mmap = ipath_mmap + .mmap = ipath_mmap, + .llseek = noop_llseek, }; /* @@ -223,8 +227,13 @@ static int ipath_get_base_info(struct file *fp, (unsigned long long) kinfo->spi_subport_rcvhdr_base); } - kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->ipath_piobufbase) / - dd->ipath_palign; + /* + * All user buffers are 2KB buffers. If we ever support + * giving 4KB buffers to user processes, this will need some + * work. + */ + kinfo->spi_pioindex = (kinfo->spi_piobufbase - + (dd->ipath_piobufbase & 0xffffffff)) / dd->ipath_palign; kinfo->spi_pioalign = dd->ipath_palign; kinfo->spi_qpair = IPATH_KD_QP; @@ -1146,7 +1155,7 @@ static int ipath_file_vma_fault(struct vm_area_struct *vma, return 0; } -static struct vm_operations_struct ipath_file_vm_ops = { +static const struct vm_operations_struct ipath_file_vm_ops = { .fault = ipath_file_vma_fault, }; @@ -1217,7 +1226,7 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr, vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT; vma->vm_ops = &ipath_file_vm_ops; - vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; ret = 1; bail: @@ -1524,7 +1533,7 @@ static int init_subports(struct ipath_devdata *dd, } num_subports = uinfo->spu_subport_cnt; - pd->subport_uregbase = vmalloc(PAGE_SIZE * num_subports); + pd->subport_uregbase = vzalloc(PAGE_SIZE * num_subports); if (!pd->subport_uregbase) { ret = -ENOMEM; goto bail; @@ -1532,13 +1541,13 @@ static int init_subports(struct ipath_devdata *dd, /* Note: pd->port_rcvhdrq_size isn't initialized yet. */ size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize * sizeof(u32), PAGE_SIZE) * num_subports; - pd->subport_rcvhdr_base = vmalloc(size); + pd->subport_rcvhdr_base = vzalloc(size); if (!pd->subport_rcvhdr_base) { ret = -ENOMEM; goto bail_ureg; } - pd->subport_rcvegrbuf = vmalloc(pd->port_rcvegrbuf_chunks * + pd->subport_rcvegrbuf = vzalloc(pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size * num_subports); if (!pd->subport_rcvegrbuf) { @@ -1550,11 +1559,6 @@ static int init_subports(struct ipath_devdata *dd, pd->port_subport_id = uinfo->spu_subport_id; pd->active_slaves = 1; set_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag); - memset(pd->subport_uregbase, 0, PAGE_SIZE * num_subports); - memset(pd->subport_rcvhdr_base, 0, size); - memset(pd->subport_rcvegrbuf, 0, pd->port_rcvegrbuf_chunks * - pd->port_rcvegrbuf_size * - num_subports); goto bail; bail_rhdr: @@ -1611,7 +1615,7 @@ static int try_alloc_port(struct ipath_devdata *dd, int port, pd->port_cnt = 1; port_fp(fp) = pd; pd->port_pid = get_pid(task_pid(current)); - strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm)); + strlcpy(pd->port_comm, current->comm, sizeof(pd->port_comm)); ipath_stats.sps_ports++; ret = 0; } else @@ -1674,7 +1678,7 @@ static int find_best_unit(struct file *fp, * InfiniPath chip to that processor (we assume reasonable connectivity, * for now). This code assumes that if affinity has been set * before this point, that at most one cpu is set; for now this - * is reasonable. I check for both cpus_empty() and cpus_full(), + * is reasonable. I check for both cpumask_empty() and cpumask_full(), * in case some kernel variant sets none of the bits when no * affinity is set. 2.6.11 and 12 kernels have all present * cpus set. Some day we'll have to fix it up further to handle @@ -1683,17 +1687,19 @@ static int find_best_unit(struct file *fp, * information. There may be some issues with dual core numbering * as well. This needs more work prior to release. */ - if (!cpus_empty(current->cpus_allowed) && - !cpus_full(current->cpus_allowed)) { + if (!cpumask_empty(tsk_cpus_allowed(current)) && + !cpumask_full(tsk_cpus_allowed(current))) { int ncpus = num_online_cpus(), curcpu = -1, nset = 0; - for (i = 0; i < ncpus; i++) - if (cpu_isset(i, current->cpus_allowed)) { + get_online_cpus(); + for_each_online_cpu(i) + if (cpumask_test_cpu(i, tsk_cpus_allowed(current))) { ipath_cdbg(PROC, "%s[%u] affinity set for " "cpu %d/%d\n", current->comm, current->pid, i, ncpus); curcpu = i; nset++; } + put_online_cpus(); if (curcpu != -1 && nset != ncpus) { if (npresent) { prefunit = curcpu / (ncpus / npresent); @@ -1816,7 +1822,6 @@ done: static int ipath_open(struct inode *in, struct file *fp) { /* The real work is performed later in ipath_assign_port() */ - cycle_kernel_lock(); fp->private_data = kzalloc(sizeof(struct ipath_filedata), GFP_KERNEL); return fp->private_data ? 0 : -ENOMEM; } @@ -1860,9 +1865,9 @@ static int ipath_assign_port(struct file *fp, goto done_chk_sdma; } - i_minor = iminor(fp->f_path.dentry->d_inode) - IPATH_USER_MINOR_BASE; + i_minor = iminor(file_inode(fp)) - IPATH_USER_MINOR_BASE; ipath_cdbg(VERBOSE, "open on dev %lx (minor %d)\n", - (long)fp->f_path.dentry->d_inode->i_rdev, i_minor); + (long)file_inode(fp)->i_rdev, i_minor); if (i_minor) ret = find_free_port(i_minor - 1, fp, uinfo); @@ -1972,7 +1977,7 @@ static int ipath_do_user_init(struct file *fp, * 0 to 1. So for those chips, we turn it off and then back on. * This will (very briefly) affect any other open ports, but the * duration is very short, and therefore isn't an issue. We - * explictly set the in-memory tail copy to 0 beforehand, so we + * explicitly set the in-memory tail copy to 0 beforehand, so we * don't have to wait to be sure the DMA update has happened * (chip resets head/tail to 0 on transition to enable). */ @@ -2041,14 +2046,16 @@ static int ipath_close(struct inode *in, struct file *fp) struct ipath_filedata *fd; struct ipath_portdata *pd; struct ipath_devdata *dd; + unsigned long flags; unsigned port; + struct pid *pid; ipath_cdbg(VERBOSE, "close on dev %lx, private data %p\n", (long)in->i_rdev, fp->private_data); mutex_lock(&ipath_mutex); - fd = (struct ipath_filedata *) fp->private_data; + fd = fp->private_data; fp->private_data = NULL; pd = fd->pd; if (!pd) { @@ -2074,14 +2081,13 @@ static int ipath_close(struct inode *in, struct file *fp) mutex_unlock(&ipath_mutex); goto bail; } + /* early; no interrupt users after this */ + spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); port = pd->port_port; - - if (pd->port_hdrqfull) { - ipath_cdbg(PROC, "%s[%u] had %u rcvhdrqfull errors " - "during run\n", pd->port_comm, pid_nr(pd->port_pid), - pd->port_hdrqfull); - pd->port_hdrqfull = 0; - } + dd->ipath_pd[port] = NULL; + pid = pd->port_pid; + pd->port_pid = NULL; + spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); if (pd->port_rcvwait_to || pd->port_piowait_to || pd->port_rcvnowait || pd->port_pionowait) { @@ -2138,13 +2144,11 @@ static int ipath_close(struct inode *in, struct file *fp) unlock_expected_tids(pd); ipath_stats.sps_ports--; ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n", - pd->port_comm, pid_nr(pd->port_pid), + pd->port_comm, pid_nr(pid), dd->ipath_unit, port); } - put_pid(pd->port_pid); - pd->port_pid = NULL; - dd->ipath_pd[pd->port_port] = NULL; /* before releasing mutex */ + put_pid(pid); mutex_unlock(&ipath_mutex); ipath_free_pddata(dd, pd); /* after releasing the mutex */ @@ -2455,7 +2459,7 @@ static int init_cdev(int minor, char *name, const struct file_operations *fops, goto err_cdev; } - device = device_create(ipath_class, NULL, dev, name); + device = device_create(ipath_class, NULL, dev, NULL, name); if (IS_ERR(device)) { ret = PTR_ERR(device); diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index 23faba9d21e..e0c404bdc4a 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -31,13 +31,13 @@ * SOFTWARE. */ -#include <linux/version.h> #include <linux/module.h> #include <linux/fs.h> #include <linux/mount.h> #include <linux/pagemap.h> #include <linux/init.h> #include <linux/namei.h> +#include <linux/slab.h> #include "ipath_kernel.h" @@ -46,7 +46,7 @@ static struct super_block *ipath_super; static int ipathfs_mknod(struct inode *dir, struct dentry *dentry, - int mode, const struct file_operations *fops, + umode_t mode, const struct file_operations *fops, void *data) { int error; @@ -57,13 +57,11 @@ static int ipathfs_mknod(struct inode *dir, struct dentry *dentry, goto bail; } + inode->i_ino = get_next_ino(); inode->i_mode = mode; - inode->i_uid = 0; - inode->i_gid = 0; - inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_private = data; - if ((mode & S_IFMT) == S_IFDIR) { + if (S_ISDIR(mode)) { inode->i_op = &simple_dir_inode_operations; inc_nlink(inode); inc_nlink(dir); @@ -78,7 +76,7 @@ bail: return error; } -static int create_file(const char *name, mode_t mode, +static int create_file(const char *name, umode_t mode, struct dentry *parent, struct dentry **dentry, const struct file_operations *fops, void *data) { @@ -87,11 +85,11 @@ static int create_file(const char *name, mode_t mode, *dentry = NULL; mutex_lock(&parent->d_inode->i_mutex); *dentry = lookup_one_len(name, parent, strlen(name)); - if (!IS_ERR(dentry)) + if (!IS_ERR(*dentry)) error = ipathfs_mknod(parent->d_inode, *dentry, mode, fops, data); else - error = PTR_ERR(dentry); + error = PTR_ERR(*dentry); mutex_unlock(&parent->d_inode->i_mutex); return error; @@ -106,6 +104,7 @@ static ssize_t atomic_stats_read(struct file *file, char __user *buf, static const struct file_operations atomic_stats_ops = { .read = atomic_stats_read, + .llseek = default_llseek, }; static ssize_t atomic_counters_read(struct file *file, char __user *buf, @@ -114,7 +113,7 @@ static ssize_t atomic_counters_read(struct file *file, char __user *buf, struct infinipath_counters counters; struct ipath_devdata *dd; - dd = file->f_path.dentry->d_inode->i_private; + dd = file_inode(file)->i_private; dd->ipath_f_read_counters(dd, &counters); return simple_read_from_buffer(buf, count, ppos, &counters, @@ -123,6 +122,7 @@ static ssize_t atomic_counters_read(struct file *file, char __user *buf, static const struct file_operations atomic_counters_ops = { .read = atomic_counters_read, + .llseek = default_llseek, }; static ssize_t flash_read(struct file *file, char __user *buf, @@ -154,7 +154,7 @@ static ssize_t flash_read(struct file *file, char __user *buf, goto bail; } - dd = file->f_path.dentry->d_inode->i_private; + dd = file_inode(file)->i_private; if (ipath_eeprom_read(dd, pos, tmp, count)) { ipath_dev_err(dd, "failed to read from flash\n"); ret = -ENXIO; @@ -207,7 +207,7 @@ static ssize_t flash_write(struct file *file, const char __user *buf, goto bail_tmp; } - dd = file->f_path.dentry->d_inode->i_private; + dd = file_inode(file)->i_private; if (ipath_eeprom_write(dd, pos, tmp, count)) { ret = -ENXIO; ipath_dev_err(dd, "failed to write to flash\n"); @@ -227,6 +227,7 @@ bail: static const struct file_operations flash_ops = { .read = flash_read, .write = flash_write, + .llseek = default_llseek, }; static int create_device_files(struct super_block *sb, @@ -276,18 +277,14 @@ static int remove_file(struct dentry *parent, char *name) goto bail; } - spin_lock(&dcache_lock); spin_lock(&tmp->d_lock); if (!(d_unhashed(tmp) && tmp->d_inode)) { - dget_locked(tmp); + dget_dlock(tmp); __d_drop(tmp); spin_unlock(&tmp->d_lock); - spin_unlock(&dcache_lock); simple_unlink(parent->d_inode, tmp); - } else { + } else spin_unlock(&tmp->d_lock); - spin_unlock(&dcache_lock); - } ret = 0; bail: @@ -350,10 +347,8 @@ static int ipathfs_fill_super(struct super_block *sb, void *data, list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) { spin_unlock_irqrestore(&ipath_devs_lock, flags); ret = create_device_files(sb, dd); - if (ret) { - deactivate_super(sb); + if (ret) goto bail; - } spin_lock_irqsave(&ipath_devs_lock, flags); } @@ -363,13 +358,13 @@ bail: return ret; } -static int ipathfs_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *ipathfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - int ret = get_sb_single(fs_type, flags, data, - ipathfs_fill_super, mnt); - if (ret >= 0) - ipath_super = mnt->mnt_sb; + struct dentry *ret; + ret = mount_single(fs_type, flags, data, ipathfs_fill_super); + if (!IS_ERR(ret)) + ipath_super = ret->d_sb; return ret; } @@ -412,9 +407,10 @@ bail: static struct file_system_type ipathfs_fs_type = { .owner = THIS_MODULE, .name = "ipathfs", - .get_sb = ipathfs_get_sb, + .mount = ipathfs_mount, .kill_sb = ipathfs_kill_super, }; +MODULE_ALIAS_FS("ipathfs"); int __init ipath_init_ipathfs(void) { diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c index 02831ad070b..7cc305488a3 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6110.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c @@ -381,7 +381,7 @@ static const ipath_err_t infinipath_hwe_htclnkbbyte1crcerr = #define IPATH_GPIO_SCL \ (1ULL << (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT)) -/* keep the code below somewhat more readonable; not used elsewhere */ +/* keep the code below somewhat more readable; not used elsewhere */ #define _IPATH_HTLINK0_CRCBITS (infinipath_hwe_htclnkabyte0crcerr | \ infinipath_hwe_htclnkabyte1crcerr) #define _IPATH_HTLINK1_CRCBITS (infinipath_hwe_htclnkbbyte0crcerr | \ @@ -596,8 +596,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, ipath_format_hwerrors(hwerrs, ipath_6110_hwerror_msgs, - sizeof(ipath_6110_hwerror_msgs) / - sizeof(ipath_6110_hwerror_msgs[0]), + ARRAY_SIZE(ipath_6110_hwerror_msgs), msg, msgl); if (hwerrs & (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS)) @@ -809,7 +808,7 @@ static int ipath_setup_ht_reset(struct ipath_devdata *dd) * errors. We only bother to do this at load time, because it's OK if * it happened before we were loaded (first time after boot/reset), * but any time after that, it's fatal anyway. Also need to not check - * for for upper byte errors if we are in 8 bit mode, so figure out + * for upper byte errors if we are in 8 bit mode, so figure out * our width. For now, at least, also complain if it's 8 bit. */ static void slave_or_pri_blk(struct ipath_devdata *dd, struct pci_dev *pdev, @@ -1474,7 +1473,7 @@ static void ipath_ht_quiet_serdes(struct ipath_devdata *dd) /** * ipath_pe_put_tid - write a TID in chip * @dd: the infinipath device - * @tidptr: pointer to the expected TID (in chip) to udpate + * @tidptr: pointer to the expected TID (in chip) to update * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing * diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c deleted file mode 100644 index 421cc2af891..00000000000 --- a/drivers/infiniband/hw/ipath/ipath_iba6120.c +++ /dev/null @@ -1,1801 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -/* - * This file contains all of the code that is specific to the - * InfiniPath PCIe chip. - */ - -#include <linux/interrupt.h> -#include <linux/pci.h> -#include <linux/delay.h> -#include <rdma/ib_verbs.h> - -#include "ipath_kernel.h" -#include "ipath_registers.h" - -static void ipath_setup_pe_setextled(struct ipath_devdata *, u64, u64); - -/* - * This file contains all the chip-specific register information and - * access functions for the QLogic InfiniPath PCI-Express chip. - * - * This lists the InfiniPath registers, in the actual chip layout. - * This structure should never be directly accessed. - */ -struct _infinipath_do_not_use_kernel_regs { - unsigned long long Revision; - unsigned long long Control; - unsigned long long PageAlign; - unsigned long long PortCnt; - unsigned long long DebugPortSelect; - unsigned long long Reserved0; - unsigned long long SendRegBase; - unsigned long long UserRegBase; - unsigned long long CounterRegBase; - unsigned long long Scratch; - unsigned long long Reserved1; - unsigned long long Reserved2; - unsigned long long IntBlocked; - unsigned long long IntMask; - unsigned long long IntStatus; - unsigned long long IntClear; - unsigned long long ErrorMask; - unsigned long long ErrorStatus; - unsigned long long ErrorClear; - unsigned long long HwErrMask; - unsigned long long HwErrStatus; - unsigned long long HwErrClear; - unsigned long long HwDiagCtrl; - unsigned long long MDIO; - unsigned long long IBCStatus; - unsigned long long IBCCtrl; - unsigned long long ExtStatus; - unsigned long long ExtCtrl; - unsigned long long GPIOOut; - unsigned long long GPIOMask; - unsigned long long GPIOStatus; - unsigned long long GPIOClear; - unsigned long long RcvCtrl; - unsigned long long RcvBTHQP; - unsigned long long RcvHdrSize; - unsigned long long RcvHdrCnt; - unsigned long long RcvHdrEntSize; - unsigned long long RcvTIDBase; - unsigned long long RcvTIDCnt; - unsigned long long RcvEgrBase; - unsigned long long RcvEgrCnt; - unsigned long long RcvBufBase; - unsigned long long RcvBufSize; - unsigned long long RxIntMemBase; - unsigned long long RxIntMemSize; - unsigned long long RcvPartitionKey; - unsigned long long Reserved3; - unsigned long long RcvPktLEDCnt; - unsigned long long Reserved4[8]; - unsigned long long SendCtrl; - unsigned long long SendPIOBufBase; - unsigned long long SendPIOSize; - unsigned long long SendPIOBufCnt; - unsigned long long SendPIOAvailAddr; - unsigned long long TxIntMemBase; - unsigned long long TxIntMemSize; - unsigned long long Reserved5; - unsigned long long PCIeRBufTestReg0; - unsigned long long PCIeRBufTestReg1; - unsigned long long Reserved51[6]; - unsigned long long SendBufferError; - unsigned long long SendBufferErrorCONT1; - unsigned long long Reserved6SBE[6]; - unsigned long long RcvHdrAddr0; - unsigned long long RcvHdrAddr1; - unsigned long long RcvHdrAddr2; - unsigned long long RcvHdrAddr3; - unsigned long long RcvHdrAddr4; - unsigned long long Reserved7RHA[11]; - unsigned long long RcvHdrTailAddr0; - unsigned long long RcvHdrTailAddr1; - unsigned long long RcvHdrTailAddr2; - unsigned long long RcvHdrTailAddr3; - unsigned long long RcvHdrTailAddr4; - unsigned long long Reserved8RHTA[11]; - unsigned long long Reserved9SW[8]; - unsigned long long SerdesConfig0; - unsigned long long SerdesConfig1; - unsigned long long SerdesStatus; - unsigned long long XGXSConfig; - unsigned long long IBPLLCfg; - unsigned long long Reserved10SW2[3]; - unsigned long long PCIEQ0SerdesConfig0; - unsigned long long PCIEQ0SerdesConfig1; - unsigned long long PCIEQ0SerdesStatus; - unsigned long long Reserved11; - unsigned long long PCIEQ1SerdesConfig0; - unsigned long long PCIEQ1SerdesConfig1; - unsigned long long PCIEQ1SerdesStatus; - unsigned long long Reserved12; -}; - -struct _infinipath_do_not_use_counters { - __u64 LBIntCnt; - __u64 LBFlowStallCnt; - __u64 Reserved1; - __u64 TxUnsupVLErrCnt; - __u64 TxDataPktCnt; - __u64 TxFlowPktCnt; - __u64 TxDwordCnt; - __u64 TxLenErrCnt; - __u64 TxMaxMinLenErrCnt; - __u64 TxUnderrunCnt; - __u64 TxFlowStallCnt; - __u64 TxDroppedPktCnt; - __u64 RxDroppedPktCnt; - __u64 RxDataPktCnt; - __u64 RxFlowPktCnt; - __u64 RxDwordCnt; - __u64 RxLenErrCnt; - __u64 RxMaxMinLenErrCnt; - __u64 RxICRCErrCnt; - __u64 RxVCRCErrCnt; - __u64 RxFlowCtrlErrCnt; - __u64 RxBadFormatCnt; - __u64 RxLinkProblemCnt; - __u64 RxEBPCnt; - __u64 RxLPCRCErrCnt; - __u64 RxBufOvflCnt; - __u64 RxTIDFullErrCnt; - __u64 RxTIDValidErrCnt; - __u64 RxPKeyMismatchCnt; - __u64 RxP0HdrEgrOvflCnt; - __u64 RxP1HdrEgrOvflCnt; - __u64 RxP2HdrEgrOvflCnt; - __u64 RxP3HdrEgrOvflCnt; - __u64 RxP4HdrEgrOvflCnt; - __u64 RxP5HdrEgrOvflCnt; - __u64 RxP6HdrEgrOvflCnt; - __u64 RxP7HdrEgrOvflCnt; - __u64 RxP8HdrEgrOvflCnt; - __u64 Reserved6; - __u64 Reserved7; - __u64 IBStatusChangeCnt; - __u64 IBLinkErrRecoveryCnt; - __u64 IBLinkDownedCnt; - __u64 IBSymbolErrCnt; -}; - -#define IPATH_KREG_OFFSET(field) (offsetof( \ - struct _infinipath_do_not_use_kernel_regs, field) / sizeof(u64)) -#define IPATH_CREG_OFFSET(field) (offsetof( \ - struct _infinipath_do_not_use_counters, field) / sizeof(u64)) - -static const struct ipath_kregs ipath_pe_kregs = { - .kr_control = IPATH_KREG_OFFSET(Control), - .kr_counterregbase = IPATH_KREG_OFFSET(CounterRegBase), - .kr_debugportselect = IPATH_KREG_OFFSET(DebugPortSelect), - .kr_errorclear = IPATH_KREG_OFFSET(ErrorClear), - .kr_errormask = IPATH_KREG_OFFSET(ErrorMask), - .kr_errorstatus = IPATH_KREG_OFFSET(ErrorStatus), - .kr_extctrl = IPATH_KREG_OFFSET(ExtCtrl), - .kr_extstatus = IPATH_KREG_OFFSET(ExtStatus), - .kr_gpio_clear = IPATH_KREG_OFFSET(GPIOClear), - .kr_gpio_mask = IPATH_KREG_OFFSET(GPIOMask), - .kr_gpio_out = IPATH_KREG_OFFSET(GPIOOut), - .kr_gpio_status = IPATH_KREG_OFFSET(GPIOStatus), - .kr_hwdiagctrl = IPATH_KREG_OFFSET(HwDiagCtrl), - .kr_hwerrclear = IPATH_KREG_OFFSET(HwErrClear), - .kr_hwerrmask = IPATH_KREG_OFFSET(HwErrMask), - .kr_hwerrstatus = IPATH_KREG_OFFSET(HwErrStatus), - .kr_ibcctrl = IPATH_KREG_OFFSET(IBCCtrl), - .kr_ibcstatus = IPATH_KREG_OFFSET(IBCStatus), - .kr_intblocked = IPATH_KREG_OFFSET(IntBlocked), - .kr_intclear = IPATH_KREG_OFFSET(IntClear), - .kr_intmask = IPATH_KREG_OFFSET(IntMask), - .kr_intstatus = IPATH_KREG_OFFSET(IntStatus), - .kr_mdio = IPATH_KREG_OFFSET(MDIO), - .kr_pagealign = IPATH_KREG_OFFSET(PageAlign), - .kr_partitionkey = IPATH_KREG_OFFSET(RcvPartitionKey), - .kr_portcnt = IPATH_KREG_OFFSET(PortCnt), - .kr_rcvbthqp = IPATH_KREG_OFFSET(RcvBTHQP), - .kr_rcvbufbase = IPATH_KREG_OFFSET(RcvBufBase), - .kr_rcvbufsize = IPATH_KREG_OFFSET(RcvBufSize), - .kr_rcvctrl = IPATH_KREG_OFFSET(RcvCtrl), - .kr_rcvegrbase = IPATH_KREG_OFFSET(RcvEgrBase), - .kr_rcvegrcnt = IPATH_KREG_OFFSET(RcvEgrCnt), - .kr_rcvhdrcnt = IPATH_KREG_OFFSET(RcvHdrCnt), - .kr_rcvhdrentsize = IPATH_KREG_OFFSET(RcvHdrEntSize), - .kr_rcvhdrsize = IPATH_KREG_OFFSET(RcvHdrSize), - .kr_rcvintmembase = IPATH_KREG_OFFSET(RxIntMemBase), - .kr_rcvintmemsize = IPATH_KREG_OFFSET(RxIntMemSize), - .kr_rcvtidbase = IPATH_KREG_OFFSET(RcvTIDBase), - .kr_rcvtidcnt = IPATH_KREG_OFFSET(RcvTIDCnt), - .kr_revision = IPATH_KREG_OFFSET(Revision), - .kr_scratch = IPATH_KREG_OFFSET(Scratch), - .kr_sendbuffererror = IPATH_KREG_OFFSET(SendBufferError), - .kr_sendctrl = IPATH_KREG_OFFSET(SendCtrl), - .kr_sendpioavailaddr = IPATH_KREG_OFFSET(SendPIOAvailAddr), - .kr_sendpiobufbase = IPATH_KREG_OFFSET(SendPIOBufBase), - .kr_sendpiobufcnt = IPATH_KREG_OFFSET(SendPIOBufCnt), - .kr_sendpiosize = IPATH_KREG_OFFSET(SendPIOSize), - .kr_sendregbase = IPATH_KREG_OFFSET(SendRegBase), - .kr_txintmembase = IPATH_KREG_OFFSET(TxIntMemBase), - .kr_txintmemsize = IPATH_KREG_OFFSET(TxIntMemSize), - .kr_userregbase = IPATH_KREG_OFFSET(UserRegBase), - .kr_serdesconfig0 = IPATH_KREG_OFFSET(SerdesConfig0), - .kr_serdesconfig1 = IPATH_KREG_OFFSET(SerdesConfig1), - .kr_serdesstatus = IPATH_KREG_OFFSET(SerdesStatus), - .kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig), - .kr_ibpllcfg = IPATH_KREG_OFFSET(IBPLLCfg), - - /* - * These should not be used directly via ipath_write_kreg64(), - * use them with ipath_write_kreg64_port(), - */ - .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0), - .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0), - - /* The rcvpktled register controls one of the debug port signals, so - * a packet activity LED can be connected to it. */ - .kr_rcvpktledcnt = IPATH_KREG_OFFSET(RcvPktLEDCnt), - .kr_pcierbuftestreg0 = IPATH_KREG_OFFSET(PCIeRBufTestReg0), - .kr_pcierbuftestreg1 = IPATH_KREG_OFFSET(PCIeRBufTestReg1), - .kr_pcieq0serdesconfig0 = IPATH_KREG_OFFSET(PCIEQ0SerdesConfig0), - .kr_pcieq0serdesconfig1 = IPATH_KREG_OFFSET(PCIEQ0SerdesConfig1), - .kr_pcieq0serdesstatus = IPATH_KREG_OFFSET(PCIEQ0SerdesStatus), - .kr_pcieq1serdesconfig0 = IPATH_KREG_OFFSET(PCIEQ1SerdesConfig0), - .kr_pcieq1serdesconfig1 = IPATH_KREG_OFFSET(PCIEQ1SerdesConfig1), - .kr_pcieq1serdesstatus = IPATH_KREG_OFFSET(PCIEQ1SerdesStatus) -}; - -static const struct ipath_cregs ipath_pe_cregs = { - .cr_badformatcnt = IPATH_CREG_OFFSET(RxBadFormatCnt), - .cr_erricrccnt = IPATH_CREG_OFFSET(RxICRCErrCnt), - .cr_errlinkcnt = IPATH_CREG_OFFSET(RxLinkProblemCnt), - .cr_errlpcrccnt = IPATH_CREG_OFFSET(RxLPCRCErrCnt), - .cr_errpkey = IPATH_CREG_OFFSET(RxPKeyMismatchCnt), - .cr_errrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowCtrlErrCnt), - .cr_err_rlencnt = IPATH_CREG_OFFSET(RxLenErrCnt), - .cr_errslencnt = IPATH_CREG_OFFSET(TxLenErrCnt), - .cr_errtidfull = IPATH_CREG_OFFSET(RxTIDFullErrCnt), - .cr_errtidvalid = IPATH_CREG_OFFSET(RxTIDValidErrCnt), - .cr_errvcrccnt = IPATH_CREG_OFFSET(RxVCRCErrCnt), - .cr_ibstatuschange = IPATH_CREG_OFFSET(IBStatusChangeCnt), - .cr_intcnt = IPATH_CREG_OFFSET(LBIntCnt), - .cr_invalidrlencnt = IPATH_CREG_OFFSET(RxMaxMinLenErrCnt), - .cr_invalidslencnt = IPATH_CREG_OFFSET(TxMaxMinLenErrCnt), - .cr_lbflowstallcnt = IPATH_CREG_OFFSET(LBFlowStallCnt), - .cr_pktrcvcnt = IPATH_CREG_OFFSET(RxDataPktCnt), - .cr_pktrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowPktCnt), - .cr_pktsendcnt = IPATH_CREG_OFFSET(TxDataPktCnt), - .cr_pktsendflowcnt = IPATH_CREG_OFFSET(TxFlowPktCnt), - .cr_portovflcnt = IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt), - .cr_rcvebpcnt = IPATH_CREG_OFFSET(RxEBPCnt), - .cr_rcvovflcnt = IPATH_CREG_OFFSET(RxBufOvflCnt), - .cr_senddropped = IPATH_CREG_OFFSET(TxDroppedPktCnt), - .cr_sendstallcnt = IPATH_CREG_OFFSET(TxFlowStallCnt), - .cr_sendunderruncnt = IPATH_CREG_OFFSET(TxUnderrunCnt), - .cr_wordrcvcnt = IPATH_CREG_OFFSET(RxDwordCnt), - .cr_wordsendcnt = IPATH_CREG_OFFSET(TxDwordCnt), - .cr_unsupvlcnt = IPATH_CREG_OFFSET(TxUnsupVLErrCnt), - .cr_rxdroppktcnt = IPATH_CREG_OFFSET(RxDroppedPktCnt), - .cr_iblinkerrrecovcnt = IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt), - .cr_iblinkdowncnt = IPATH_CREG_OFFSET(IBLinkDownedCnt), - .cr_ibsymbolerrcnt = IPATH_CREG_OFFSET(IBSymbolErrCnt) -}; - -/* kr_control bits */ -#define INFINIPATH_C_RESET 1U - -/* kr_intstatus, kr_intclear, kr_intmask bits */ -#define INFINIPATH_I_RCVURG_MASK ((1U<<5)-1) -#define INFINIPATH_I_RCVURG_SHIFT 0 -#define INFINIPATH_I_RCVAVAIL_MASK ((1U<<5)-1) -#define INFINIPATH_I_RCVAVAIL_SHIFT 12 - -/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */ -#define INFINIPATH_HWE_PCIEMEMPARITYERR_MASK 0x000000000000003fULL -#define INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT 0 -#define INFINIPATH_HWE_PCIEPOISONEDTLP 0x0000000010000000ULL -#define INFINIPATH_HWE_PCIECPLTIMEOUT 0x0000000020000000ULL -#define INFINIPATH_HWE_PCIEBUSPARITYXTLH 0x0000000040000000ULL -#define INFINIPATH_HWE_PCIEBUSPARITYXADM 0x0000000080000000ULL -#define INFINIPATH_HWE_PCIEBUSPARITYRADM 0x0000000100000000ULL -#define INFINIPATH_HWE_COREPLL_FBSLIP 0x0080000000000000ULL -#define INFINIPATH_HWE_COREPLL_RFSLIP 0x0100000000000000ULL -#define INFINIPATH_HWE_PCIE1PLLFAILED 0x0400000000000000ULL -#define INFINIPATH_HWE_PCIE0PLLFAILED 0x0800000000000000ULL -#define INFINIPATH_HWE_SERDESPLLFAILED 0x1000000000000000ULL - -#define IBA6120_IBCS_LINKTRAININGSTATE_MASK 0xf -#define IBA6120_IBCS_LINKSTATE_SHIFT 4 - -/* kr_extstatus bits */ -#define INFINIPATH_EXTS_FREQSEL 0x2 -#define INFINIPATH_EXTS_SERDESSEL 0x4 -#define INFINIPATH_EXTS_MEMBIST_ENDTEST 0x0000000000004000 -#define INFINIPATH_EXTS_MEMBIST_FOUND 0x0000000000008000 - -/* kr_xgxsconfig bits */ -#define INFINIPATH_XGXS_RESET 0x5ULL - -#define _IPATH_GPIO_SDA_NUM 1 -#define _IPATH_GPIO_SCL_NUM 0 - -#define IPATH_GPIO_SDA (1ULL << \ - (_IPATH_GPIO_SDA_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT)) -#define IPATH_GPIO_SCL (1ULL << \ - (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT)) - -#define INFINIPATH_RT_BUFSIZE_MASK 0xe0000000ULL -#define INFINIPATH_RT_BUFSIZE_SHIFTVAL(tid) \ - ((((tid) & INFINIPATH_RT_BUFSIZE_MASK) >> 29) + 11 - 1) -#define INFINIPATH_RT_BUFSIZE(tid) (1 << INFINIPATH_RT_BUFSIZE_SHIFTVAL(tid)) -#define INFINIPATH_RT_IS_VALID(tid) \ - (((tid) & INFINIPATH_RT_BUFSIZE_MASK) && \ - ((((tid) & INFINIPATH_RT_BUFSIZE_MASK) != INFINIPATH_RT_BUFSIZE_MASK))) -#define INFINIPATH_RT_ADDR_MASK 0x1FFFFFFFULL /* 29 bits valid */ -#define INFINIPATH_RT_ADDR_SHIFT 10 - -#define INFINIPATH_R_INTRAVAIL_SHIFT 16 -#define INFINIPATH_R_TAILUPD_SHIFT 31 - -/* 6120 specific hardware errors... */ -static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = { - INFINIPATH_HWE_MSG(PCIEPOISONEDTLP, "PCIe Poisoned TLP"), - INFINIPATH_HWE_MSG(PCIECPLTIMEOUT, "PCIe completion timeout"), - /* - * In practice, it's unlikely wthat we'll see PCIe PLL, or bus - * parity or memory parity error failures, because most likely we - * won't be able to talk to the core of the chip. Nonetheless, we - * might see them, if they are in parts of the PCIe core that aren't - * essential. - */ - INFINIPATH_HWE_MSG(PCIE1PLLFAILED, "PCIePLL1"), - INFINIPATH_HWE_MSG(PCIE0PLLFAILED, "PCIePLL0"), - INFINIPATH_HWE_MSG(PCIEBUSPARITYXTLH, "PCIe XTLH core parity"), - INFINIPATH_HWE_MSG(PCIEBUSPARITYXADM, "PCIe ADM TX core parity"), - INFINIPATH_HWE_MSG(PCIEBUSPARITYRADM, "PCIe ADM RX core parity"), - INFINIPATH_HWE_MSG(RXDSYNCMEMPARITYERR, "Rx Dsync"), - INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"), -}; - -#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \ - INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \ - << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) -#define RXE_EAGER_PARITY (INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID \ - << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) - -static void ipath_pe_put_tid_2(struct ipath_devdata *, u64 __iomem *, - u32, unsigned long); - -/* - * On platforms using this chip, and not having ordered WC stores, we - * can get TXE parity errors due to speculative reads to the PIO buffers, - * and this, due to a chip bug can result in (many) false parity error - * reports. So it's a debug print on those, and an info print on systems - * where the speculative reads don't occur. - */ -static void ipath_pe_txe_recover(struct ipath_devdata *dd) -{ - if (ipath_unordered_wc()) - ipath_dbg("Recovering from TXE PIO parity error\n"); - else { - ++ipath_stats.sps_txeparity; - dev_info(&dd->pcidev->dev, - "Recovering from TXE PIO parity error\n"); - } -} - - -/** - * ipath_pe_handle_hwerrors - display hardware errors. - * @dd: the infinipath device - * @msg: the output buffer - * @msgl: the size of the output buffer - * - * Use same msg buffer as regular errors to avoid excessive stack - * use. Most hardware errors are catastrophic, but for right now, - * we'll print them and continue. We reuse the same message buffer as - * ipath_handle_errors() to avoid excessive stack usage. - */ -static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, - size_t msgl) -{ - ipath_err_t hwerrs; - u32 bits, ctrl; - int isfatal = 0; - char bitsmsg[64]; - int log_idx; - - hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus); - if (!hwerrs) { - /* - * better than printing cofusing messages - * This seems to be related to clearing the crc error, or - * the pll error during init. - */ - ipath_cdbg(VERBOSE, "Called but no hardware errors set\n"); - return; - } else if (hwerrs == ~0ULL) { - ipath_dev_err(dd, "Read of hardware error status failed " - "(all bits set); ignoring\n"); - return; - } - ipath_stats.sps_hwerrs++; - - /* Always clear the error status register, except MEMBISTFAIL, - * regardless of whether we continue or stop using the chip. - * We want that set so we know it failed, even across driver reload. - * We'll still ignore it in the hwerrmask. We do this partly for - * diagnostics, but also for support */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, - hwerrs&~INFINIPATH_HWE_MEMBISTFAILED); - - hwerrs &= dd->ipath_hwerrmask; - - /* We log some errors to EEPROM, check if we have any of those. */ - for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx) - if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log) - ipath_inc_eeprom_err(dd, log_idx, 1); - - /* - * make sure we get this much out, unless told to be quiet, - * or it's occurred within the last 5 seconds - */ - if ((hwerrs & ~(dd->ipath_lasthwerror | TXE_PIO_PARITY | - RXE_EAGER_PARITY)) || - (ipath_debug & __IPATH_VERBDBG)) - dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx " - "(cleared)\n", (unsigned long long) hwerrs); - dd->ipath_lasthwerror |= hwerrs; - - if (hwerrs & ~dd->ipath_hwe_bitsextant) - ipath_dev_err(dd, "hwerror interrupt with unknown errors " - "%llx set\n", (unsigned long long) - (hwerrs & ~dd->ipath_hwe_bitsextant)); - - ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control); - if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) { - /* - * parity errors in send memory are recoverable, - * just cancel the send (if indicated in * sendbuffererror), - * count the occurrence, unfreeze (if no other handled - * hardware error bits are set), and continue. They can - * occur if a processor speculative read is done to the PIO - * buffer while we are sending a packet, for example. - */ - if (hwerrs & TXE_PIO_PARITY) { - ipath_pe_txe_recover(dd); - hwerrs &= ~TXE_PIO_PARITY; - } - if (!hwerrs) { - static u32 freeze_cnt; - - freeze_cnt++; - ipath_dbg("Clearing freezemode on ignored or recovered " - "hardware error (%u)\n", freeze_cnt); - ipath_clear_freeze(dd); - } - } - - *msg = '\0'; - - if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) { - strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]", - msgl); - /* ignore from now on, so disable until driver reloaded */ - *dd->ipath_statusp |= IPATH_STATUS_HWERROR; - dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED; - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, - dd->ipath_hwerrmask); - } - - ipath_format_hwerrors(hwerrs, - ipath_6120_hwerror_msgs, - sizeof(ipath_6120_hwerror_msgs)/ - sizeof(ipath_6120_hwerror_msgs[0]), - msg, msgl); - - if (hwerrs & (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK - << INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT)) { - bits = (u32) ((hwerrs >> - INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT) & - INFINIPATH_HWE_PCIEMEMPARITYERR_MASK); - snprintf(bitsmsg, sizeof bitsmsg, - "[PCIe Mem Parity Errs %x] ", bits); - strlcat(msg, bitsmsg, msgl); - } - -#define _IPATH_PLL_FAIL (INFINIPATH_HWE_COREPLL_FBSLIP | \ - INFINIPATH_HWE_COREPLL_RFSLIP ) - - if (hwerrs & _IPATH_PLL_FAIL) { - snprintf(bitsmsg, sizeof bitsmsg, - "[PLL failed (%llx), InfiniPath hardware unusable]", - (unsigned long long) hwerrs & _IPATH_PLL_FAIL); - strlcat(msg, bitsmsg, msgl); - /* ignore from now on, so disable until driver reloaded */ - dd->ipath_hwerrmask &= ~(hwerrs & _IPATH_PLL_FAIL); - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, - dd->ipath_hwerrmask); - } - - if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) { - /* - * If it occurs, it is left masked since the external - * interface is unused - */ - dd->ipath_hwerrmask &= ~INFINIPATH_HWE_SERDESPLLFAILED; - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, - dd->ipath_hwerrmask); - } - - if (hwerrs) { - /* - * if any set that we aren't ignoring; only - * make the complaint once, in case it's stuck - * or recurring, and we get here multiple - * times. - */ - ipath_dev_err(dd, "%s hardware error\n", msg); - if (dd->ipath_flags & IPATH_INITTED) { - ipath_set_linkstate(dd, IPATH_IB_LINKDOWN); - ipath_setup_pe_setextled(dd, - INFINIPATH_IBCS_L_STATE_DOWN, - INFINIPATH_IBCS_LT_STATE_DISABLED); - ipath_dev_err(dd, "Fatal Hardware Error (freeze " - "mode), no longer usable, SN %.16s\n", - dd->ipath_serial); - isfatal = 1; - } - *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; - /* mark as having had error */ - *dd->ipath_statusp |= IPATH_STATUS_HWERROR; - /* - * mark as not usable, at a minimum until driver - * is reloaded, probably until reboot, since no - * other reset is possible. - */ - dd->ipath_flags &= ~IPATH_INITTED; - } else - *msg = 0; /* recovered from all of them */ - - if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg && msg) { - /* - * for /sys status file ; if no trailing brace is copied, - * we'll know it was truncated. - */ - snprintf(dd->ipath_freezemsg, dd->ipath_freezelen, - "{%s}", msg); - } -} - -/** - * ipath_pe_boardname - fill in the board name - * @dd: the infinipath device - * @name: the output buffer - * @namelen: the size of the output buffer - * - * info is based on the board revision register - */ -static int ipath_pe_boardname(struct ipath_devdata *dd, char *name, - size_t namelen) -{ - char *n = NULL; - u8 boardrev = dd->ipath_boardrev; - int ret; - - switch (boardrev) { - case 0: - n = "InfiniPath_Emulation"; - break; - case 1: - n = "InfiniPath_QLE7140-Bringup"; - break; - case 2: - n = "InfiniPath_QLE7140"; - break; - case 3: - n = "InfiniPath_QMI7140"; - break; - case 4: - n = "InfiniPath_QEM7140"; - break; - case 5: - n = "InfiniPath_QMH7140"; - break; - case 6: - n = "InfiniPath_QLE7142"; - break; - default: - ipath_dev_err(dd, - "Don't yet know about board with ID %u\n", - boardrev); - snprintf(name, namelen, "Unknown_InfiniPath_PCIe_%u", - boardrev); - break; - } - if (n) - snprintf(name, namelen, "%s", n); - - if (dd->ipath_majrev != 4 || !dd->ipath_minrev || dd->ipath_minrev>2) { - ipath_dev_err(dd, "Unsupported InfiniPath hardware revision %u.%u!\n", - dd->ipath_majrev, dd->ipath_minrev); - ret = 1; - } else { - ret = 0; - if (dd->ipath_minrev >= 2) - dd->ipath_f_put_tid = ipath_pe_put_tid_2; - } - - /* - * set here, not in ipath_init_*_funcs because we have to do - * it after we can read chip registers. - */ - dd->ipath_ureg_align = - ipath_read_kreg32(dd, dd->ipath_kregs->kr_pagealign); - - return ret; -} - -/** - * ipath_pe_init_hwerrors - enable hardware errors - * @dd: the infinipath device - * - * now that we have finished initializing everything that might reasonably - * cause a hardware error, and cleared those errors bits as they occur, - * we can enable hardware errors in the mask (potentially enabling - * freeze mode), and enable hardware errors as errors (along with - * everything else) in errormask - */ -static void ipath_pe_init_hwerrors(struct ipath_devdata *dd) -{ - ipath_err_t val; - u64 extsval; - - extsval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus); - - if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST)) - ipath_dev_err(dd, "MemBIST did not complete!\n"); - if (extsval & INFINIPATH_EXTS_MEMBIST_FOUND) - ipath_dbg("MemBIST corrected\n"); - - val = ~0ULL; /* barring bugs, all hwerrors become interrupts, */ - - if (!dd->ipath_boardrev) // no PLL for Emulator - val &= ~INFINIPATH_HWE_SERDESPLLFAILED; - - if (dd->ipath_minrev < 2) { - /* workaround bug 9460 in internal interface bus parity - * checking. Fixed (HW bug 9490) in Rev2. - */ - val &= ~INFINIPATH_HWE_PCIEBUSPARITYRADM; - } - dd->ipath_hwerrmask = val; -} - -/** - * ipath_pe_bringup_serdes - bring up the serdes - * @dd: the infinipath device - */ -static int ipath_pe_bringup_serdes(struct ipath_devdata *dd) -{ - u64 val, config1, prev_val; - int ret = 0; - - ipath_dbg("Trying to bringup serdes\n"); - - if (ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus) & - INFINIPATH_HWE_SERDESPLLFAILED) { - ipath_dbg("At start, serdes PLL failed bit set " - "in hwerrstatus, clearing and continuing\n"); - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, - INFINIPATH_HWE_SERDESPLLFAILED); - } - - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0); - config1 = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig1); - - ipath_cdbg(VERBOSE, "SerDes status config0=%llx config1=%llx, " - "xgxsconfig %llx\n", (unsigned long long) val, - (unsigned long long) config1, (unsigned long long) - ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig)); - - /* - * Force reset on, also set rxdetect enable. Must do before reading - * serdesstatus at least for simulation, or some of the bits in - * serdes status will come back as undefined and cause simulation - * failures - */ - val |= INFINIPATH_SERDC0_RESET_PLL | INFINIPATH_SERDC0_RXDETECT_EN - | INFINIPATH_SERDC0_L1PWR_DN; - ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val); - /* be sure chip saw it */ - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - udelay(5); /* need pll reset set at least for a bit */ - /* - * after PLL is reset, set the per-lane Resets and TxIdle and - * clear the PLL reset and rxdetect (to get falling edge). - * Leave L1PWR bits set (permanently) - */ - val &= ~(INFINIPATH_SERDC0_RXDETECT_EN | INFINIPATH_SERDC0_RESET_PLL - | INFINIPATH_SERDC0_L1PWR_DN); - val |= INFINIPATH_SERDC0_RESET_MASK | INFINIPATH_SERDC0_TXIDLE; - ipath_cdbg(VERBOSE, "Clearing pll reset and setting lane resets " - "and txidle (%llx)\n", (unsigned long long) val); - ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val); - /* be sure chip saw it */ - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - /* need PLL reset clear for at least 11 usec before lane - * resets cleared; give it a few more to be sure */ - udelay(15); - val &= ~(INFINIPATH_SERDC0_RESET_MASK | INFINIPATH_SERDC0_TXIDLE); - - ipath_cdbg(VERBOSE, "Clearing lane resets and txidle " - "(writing %llx)\n", (unsigned long long) val); - ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val); - /* be sure chip saw it */ - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); - prev_val = val; - if (val & INFINIPATH_XGXS_RESET) - val &= ~INFINIPATH_XGXS_RESET; - if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) & - INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) { - /* need to compensate for Tx inversion in partner */ - val &= ~(INFINIPATH_XGXS_RX_POL_MASK << - INFINIPATH_XGXS_RX_POL_SHIFT); - val |= dd->ipath_rx_pol_inv << - INFINIPATH_XGXS_RX_POL_SHIFT; - } - if (val != prev_val) - ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); - - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0); - - /* clear current and de-emphasis bits */ - config1 &= ~0x0ffffffff00ULL; - /* set current to 20ma */ - config1 |= 0x00000000000ULL; - /* set de-emphasis to -5.68dB */ - config1 |= 0x0cccc000000ULL; - ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig1, config1); - - ipath_cdbg(VERBOSE, "done: SerDes status config0=%llx " - "config1=%llx, sstatus=%llx xgxs=%llx\n", - (unsigned long long) val, (unsigned long long) config1, - (unsigned long long) - ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesstatus), - (unsigned long long) - ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig)); - - return ret; -} - -/** - * ipath_pe_quiet_serdes - set serdes to txidle - * @dd: the infinipath device - * Called when driver is being unloaded - */ -static void ipath_pe_quiet_serdes(struct ipath_devdata *dd) -{ - u64 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0); - - val |= INFINIPATH_SERDC0_TXIDLE; - ipath_dbg("Setting TxIdleEn on serdes (config0 = %llx)\n", - (unsigned long long) val); - ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val); -} - -static int ipath_pe_intconfig(struct ipath_devdata *dd) -{ - u32 chiprev; - - /* - * If the chip supports added error indication via GPIO pins, - * enable interrupts on those bits so the interrupt routine - * can count the events. Also set flag so interrupt routine - * can know they are expected. - */ - chiprev = dd->ipath_revision >> INFINIPATH_R_CHIPREVMINOR_SHIFT; - if ((chiprev & INFINIPATH_R_CHIPREVMINOR_MASK) > 1) { - /* Rev2+ reports extra errors via internal GPIO pins */ - dd->ipath_flags |= IPATH_GPIO_ERRINTRS; - dd->ipath_gpio_mask |= IPATH_GPIO_ERRINTR_MASK; - ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, - dd->ipath_gpio_mask); - } - return 0; -} - -/** - * ipath_setup_pe_setextled - set the state of the two external LEDs - * @dd: the infinipath device - * @lst: the L state - * @ltst: the LT state - - * These LEDs indicate the physical and logical state of IB link. - * For this chip (at least with recommended board pinouts), LED1 - * is Yellow (logical state) and LED2 is Green (physical state), - * - * Note: We try to match the Mellanox HCA LED behavior as best - * we can. Green indicates physical link state is OK (something is - * plugged in, and we can train). - * Amber indicates the link is logically up (ACTIVE). - * Mellanox further blinks the amber LED to indicate data packet - * activity, but we have no hardware support for that, so it would - * require waking up every 10-20 msecs and checking the counters - * on the chip, and then turning the LED off if appropriate. That's - * visible overhead, so not something we will do. - * - */ -static void ipath_setup_pe_setextled(struct ipath_devdata *dd, u64 lst, - u64 ltst) -{ - u64 extctl; - unsigned long flags = 0; - - /* the diags use the LED to indicate diag info, so we leave - * the external LED alone when the diags are running */ - if (ipath_diag_inuse) - return; - - /* Allow override of LED display for, e.g. Locating system in rack */ - if (dd->ipath_led_override) { - ltst = (dd->ipath_led_override & IPATH_LED_PHYS) - ? INFINIPATH_IBCS_LT_STATE_LINKUP - : INFINIPATH_IBCS_LT_STATE_DISABLED; - lst = (dd->ipath_led_override & IPATH_LED_LOG) - ? INFINIPATH_IBCS_L_STATE_ACTIVE - : INFINIPATH_IBCS_L_STATE_DOWN; - } - - spin_lock_irqsave(&dd->ipath_gpio_lock, flags); - extctl = dd->ipath_extctrl & ~(INFINIPATH_EXTC_LED1PRIPORT_ON | - INFINIPATH_EXTC_LED2PRIPORT_ON); - - if (ltst == INFINIPATH_IBCS_LT_STATE_LINKUP) - extctl |= INFINIPATH_EXTC_LED2PRIPORT_ON; - if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE) - extctl |= INFINIPATH_EXTC_LED1PRIPORT_ON; - dd->ipath_extctrl = extctl; - ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl); - spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags); -} - -/** - * ipath_setup_pe_cleanup - clean up any per-chip chip-specific stuff - * @dd: the infinipath device - * - * This is called during driver unload. - * We do the pci_disable_msi here, not in generic code, because it - * isn't used for the HT chips. If we do end up needing pci_enable_msi - * at some point in the future for HT, we'll move the call back - * into the main init_one code. - */ -static void ipath_setup_pe_cleanup(struct ipath_devdata *dd) -{ - dd->ipath_msi_lo = 0; /* just in case unload fails */ - pci_disable_msi(dd->pcidev); -} - -static void ipath_6120_pcie_params(struct ipath_devdata *dd) -{ - u16 linkstat, speed; - int pos; - - pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_EXP); - if (!pos) { - ipath_dev_err(dd, "Can't find PCI Express capability!\n"); - goto bail; - } - - pci_read_config_word(dd->pcidev, pos + PCI_EXP_LNKSTA, - &linkstat); - /* - * speed is bits 0-4, linkwidth is bits 4-8 - * no defines for them in headers - */ - speed = linkstat & 0xf; - linkstat >>= 4; - linkstat &= 0x1f; - dd->ipath_lbus_width = linkstat; - - switch (speed) { - case 1: - dd->ipath_lbus_speed = 2500; /* Gen1, 2.5GHz */ - break; - case 2: - dd->ipath_lbus_speed = 5000; /* Gen1, 5GHz */ - break; - default: /* not defined, assume gen1 */ - dd->ipath_lbus_speed = 2500; - break; - } - - if (linkstat < 8) - ipath_dev_err(dd, - "PCIe width %u (x8 HCA), performance reduced\n", - linkstat); - else - ipath_cdbg(VERBOSE, "PCIe speed %u width %u (x8 HCA)\n", - dd->ipath_lbus_speed, linkstat); - - if (speed != 1) - ipath_dev_err(dd, - "PCIe linkspeed %u is incorrect; " - "should be 1 (2500)!\n", speed); -bail: - /* fill in string, even on errors */ - snprintf(dd->ipath_lbus_info, sizeof(dd->ipath_lbus_info), - "PCIe,%uMHz,x%u\n", - dd->ipath_lbus_speed, - dd->ipath_lbus_width); - - return; -} - -/** - * ipath_setup_pe_config - setup PCIe config related stuff - * @dd: the infinipath device - * @pdev: the PCI device - * - * The pci_enable_msi() call will fail on systems with MSI quirks - * such as those with AMD8131, even if the device of interest is not - * attached to that device, (in the 2.6.13 - 2.6.15 kernels, at least, fixed - * late in 2.6.16). - * All that can be done is to edit the kernel source to remove the quirk - * check until that is fixed. - * We do not need to call enable_msi() for our HyperTransport chip, - * even though it uses MSI, and we want to avoid the quirk warning, so - * So we call enable_msi only for PCIe. If we do end up needing - * pci_enable_msi at some point in the future for HT, we'll move the - * call back into the main init_one code. - * We save the msi lo and hi values, so we can restore them after - * chip reset (the kernel PCI infrastructure doesn't yet handle that - * correctly). - */ -static int ipath_setup_pe_config(struct ipath_devdata *dd, - struct pci_dev *pdev) -{ - int pos, ret; - - dd->ipath_msi_lo = 0; /* used as a flag during reset processing */ - ret = pci_enable_msi(dd->pcidev); - if (ret) - ipath_dev_err(dd, "pci_enable_msi failed: %d, " - "interrupts may not work\n", ret); - /* continue even if it fails, we may still be OK... */ - dd->ipath_irq = pdev->irq; - - if ((pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI))) { - u16 control; - pci_read_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_LO, - &dd->ipath_msi_lo); - pci_read_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_HI, - &dd->ipath_msi_hi); - pci_read_config_word(dd->pcidev, pos + PCI_MSI_FLAGS, - &control); - /* now save the data (vector) info */ - pci_read_config_word(dd->pcidev, - pos + ((control & PCI_MSI_FLAGS_64BIT) - ? 12 : 8), - &dd->ipath_msi_data); - ipath_cdbg(VERBOSE, "Read msi data 0x%x from config offset " - "0x%x, control=0x%x\n", dd->ipath_msi_data, - pos + ((control & PCI_MSI_FLAGS_64BIT) ? 12 : 8), - control); - /* we save the cachelinesize also, although it doesn't - * really matter */ - pci_read_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE, - &dd->ipath_pci_cacheline); - } else - ipath_dev_err(dd, "Can't find MSI capability, " - "can't save MSI settings for reset\n"); - - ipath_6120_pcie_params(dd); - - dd->ipath_link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X; - dd->ipath_link_speed_supported = IPATH_IB_SDR; - dd->ipath_link_width_enabled = IB_WIDTH_4X; - dd->ipath_link_speed_enabled = dd->ipath_link_speed_supported; - /* these can't change for this chip, so set once */ - dd->ipath_link_width_active = dd->ipath_link_width_enabled; - dd->ipath_link_speed_active = dd->ipath_link_speed_enabled; - return 0; -} - -static void ipath_init_pe_variables(struct ipath_devdata *dd) -{ - /* - * setup the register offsets, since they are different for each - * chip - */ - dd->ipath_kregs = &ipath_pe_kregs; - dd->ipath_cregs = &ipath_pe_cregs; - - /* - * bits for selecting i2c direction and values, - * used for I2C serial flash - */ - dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM; - dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM; - dd->ipath_gpio_sda = IPATH_GPIO_SDA; - dd->ipath_gpio_scl = IPATH_GPIO_SCL; - - /* - * Fill in data for field-values that change in newer chips. - * We dynamically specify only the mask for LINKTRAININGSTATE - * and only the shift for LINKSTATE, as they are the only ones - * that change. Also precalculate the 3 link states of interest - * and the combined mask. - */ - dd->ibcs_ls_shift = IBA6120_IBCS_LINKSTATE_SHIFT; - dd->ibcs_lts_mask = IBA6120_IBCS_LINKTRAININGSTATE_MASK; - dd->ibcs_mask = (INFINIPATH_IBCS_LINKSTATE_MASK << - dd->ibcs_ls_shift) | dd->ibcs_lts_mask; - dd->ib_init = (INFINIPATH_IBCS_LT_STATE_LINKUP << - INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | - (INFINIPATH_IBCS_L_STATE_INIT << dd->ibcs_ls_shift); - dd->ib_arm = (INFINIPATH_IBCS_LT_STATE_LINKUP << - INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | - (INFINIPATH_IBCS_L_STATE_ARM << dd->ibcs_ls_shift); - dd->ib_active = (INFINIPATH_IBCS_LT_STATE_LINKUP << - INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | - (INFINIPATH_IBCS_L_STATE_ACTIVE << dd->ibcs_ls_shift); - - /* - * Fill in data for ibcc field-values that change in newer chips. - * We dynamically specify only the mask for LINKINITCMD - * and only the shift for LINKCMD and MAXPKTLEN, as they are - * the only ones that change. - */ - dd->ibcc_lic_mask = INFINIPATH_IBCC_LINKINITCMD_MASK; - dd->ibcc_lc_shift = INFINIPATH_IBCC_LINKCMD_SHIFT; - dd->ibcc_mpl_shift = INFINIPATH_IBCC_MAXPKTLEN_SHIFT; - - /* Fill in shifts for RcvCtrl. */ - dd->ipath_r_portenable_shift = INFINIPATH_R_PORTENABLE_SHIFT; - dd->ipath_r_intravail_shift = INFINIPATH_R_INTRAVAIL_SHIFT; - dd->ipath_r_tailupd_shift = INFINIPATH_R_TAILUPD_SHIFT; - dd->ipath_r_portcfg_shift = 0; /* Not on IBA6120 */ - - /* variables for sanity checking interrupt and errors */ - dd->ipath_hwe_bitsextant = - (INFINIPATH_HWE_RXEMEMPARITYERR_MASK << - INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) | - (INFINIPATH_HWE_TXEMEMPARITYERR_MASK << - INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) | - (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK << - INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT) | - INFINIPATH_HWE_PCIE1PLLFAILED | - INFINIPATH_HWE_PCIE0PLLFAILED | - INFINIPATH_HWE_PCIEPOISONEDTLP | - INFINIPATH_HWE_PCIECPLTIMEOUT | - INFINIPATH_HWE_PCIEBUSPARITYXTLH | - INFINIPATH_HWE_PCIEBUSPARITYXADM | - INFINIPATH_HWE_PCIEBUSPARITYRADM | - INFINIPATH_HWE_MEMBISTFAILED | - INFINIPATH_HWE_COREPLL_FBSLIP | - INFINIPATH_HWE_COREPLL_RFSLIP | - INFINIPATH_HWE_SERDESPLLFAILED | - INFINIPATH_HWE_IBCBUSTOSPCPARITYERR | - INFINIPATH_HWE_IBCBUSFRSPCPARITYERR; - dd->ipath_i_bitsextant = - (INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) | - (INFINIPATH_I_RCVAVAIL_MASK << - INFINIPATH_I_RCVAVAIL_SHIFT) | - INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT | - INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO; - dd->ipath_e_bitsextant = - INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC | - INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN | - INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN | - INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RUNEXPCHAR | - INFINIPATH_E_RUNSUPVL | INFINIPATH_E_REBP | - INFINIPATH_E_RIBFLOW | INFINIPATH_E_RBADVERSION | - INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL | - INFINIPATH_E_RBADTID | INFINIPATH_E_RHDRLEN | - INFINIPATH_E_RHDR | INFINIPATH_E_RIBLOSTLINK | - INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SMAXPKTLEN | - INFINIPATH_E_SUNDERRUN | INFINIPATH_E_SPKTLEN | - INFINIPATH_E_SDROPPEDSMPPKT | INFINIPATH_E_SDROPPEDDATAPKT | - INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM | - INFINIPATH_E_SUNSUPVL | INFINIPATH_E_IBSTATUSCHANGED | - INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET | - INFINIPATH_E_HARDWARE; - - dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; - dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; - dd->ipath_i_rcvavail_shift = INFINIPATH_I_RCVAVAIL_SHIFT; - dd->ipath_i_rcvurg_shift = INFINIPATH_I_RCVURG_SHIFT; - - /* - * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity. - * 2 is Some Misc, 3 is reserved for future. - */ - dd->ipath_eep_st_masks[0].hwerrs_to_log = - INFINIPATH_HWE_TXEMEMPARITYERR_MASK << - INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT; - - /* Ignore errors in PIO/PBC on systems with unordered write-combining */ - if (ipath_unordered_wc()) - dd->ipath_eep_st_masks[0].hwerrs_to_log &= ~TXE_PIO_PARITY; - - dd->ipath_eep_st_masks[1].hwerrs_to_log = - INFINIPATH_HWE_RXEMEMPARITYERR_MASK << - INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT; - - dd->ipath_eep_st_masks[2].errs_to_log = INFINIPATH_E_RESET; - dd->delay_mult = 2; /* SDR, 4X, can't change */ -} - -/* setup the MSI stuff again after a reset. I'd like to just call - * pci_enable_msi() and request_irq() again, but when I do that, - * the MSI enable bit doesn't get set in the command word, and - * we switch to to a different interrupt vector, which is confusing, - * so I instead just do it all inline. Perhaps somehow can tie this - * into the PCIe hotplug support at some point - * Note, because I'm doing it all here, I don't call pci_disable_msi() - * or free_irq() at the start of ipath_setup_pe_reset(). - */ -static int ipath_reinit_msi(struct ipath_devdata *dd) -{ - int pos; - u16 control; - int ret; - - if (!dd->ipath_msi_lo) { - dev_info(&dd->pcidev->dev, "Can't restore MSI config, " - "initial setup failed?\n"); - ret = 0; - goto bail; - } - - if (!(pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI))) { - ipath_dev_err(dd, "Can't find MSI capability, " - "can't restore MSI settings\n"); - ret = 0; - goto bail; - } - ipath_cdbg(VERBOSE, "Writing msi_lo 0x%x to config offset 0x%x\n", - dd->ipath_msi_lo, pos + PCI_MSI_ADDRESS_LO); - pci_write_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_LO, - dd->ipath_msi_lo); - ipath_cdbg(VERBOSE, "Writing msi_lo 0x%x to config offset 0x%x\n", - dd->ipath_msi_hi, pos + PCI_MSI_ADDRESS_HI); - pci_write_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_HI, - dd->ipath_msi_hi); - pci_read_config_word(dd->pcidev, pos + PCI_MSI_FLAGS, &control); - if (!(control & PCI_MSI_FLAGS_ENABLE)) { - ipath_cdbg(VERBOSE, "MSI control at off %x was %x, " - "setting MSI enable (%x)\n", pos + PCI_MSI_FLAGS, - control, control | PCI_MSI_FLAGS_ENABLE); - control |= PCI_MSI_FLAGS_ENABLE; - pci_write_config_word(dd->pcidev, pos + PCI_MSI_FLAGS, - control); - } - /* now rewrite the data (vector) info */ - pci_write_config_word(dd->pcidev, pos + - ((control & PCI_MSI_FLAGS_64BIT) ? 12 : 8), - dd->ipath_msi_data); - /* we restore the cachelinesize also, although it doesn't really - * matter */ - pci_write_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE, - dd->ipath_pci_cacheline); - /* and now set the pci master bit again */ - pci_set_master(dd->pcidev); - ret = 1; - -bail: - return ret; -} - -/* This routine sleeps, so it can only be called from user context, not - * from interrupt context. If we need interrupt context, we can split - * it into two routines. -*/ -static int ipath_setup_pe_reset(struct ipath_devdata *dd) -{ - u64 val; - int i; - int ret; - u16 cmdval; - - pci_read_config_word(dd->pcidev, PCI_COMMAND, &cmdval); - - /* Use ERROR so it shows up in logs, etc. */ - ipath_dev_err(dd, "Resetting InfiniPath unit %u\n", dd->ipath_unit); - /* keep chip from being accessed in a few places */ - dd->ipath_flags &= ~(IPATH_INITTED|IPATH_PRESENT); - val = dd->ipath_control | INFINIPATH_C_RESET; - ipath_write_kreg(dd, dd->ipath_kregs->kr_control, val); - mb(); - - for (i = 1; i <= 5; i++) { - int r; - /* allow MBIST, etc. to complete; longer on each retry. - * We sometimes get machine checks from bus timeout if no - * response, so for now, make it *really* long. - */ - msleep(1000 + (1 + i) * 2000); - if ((r = - pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, - dd->ipath_pcibar0))) - ipath_dev_err(dd, "rewrite of BAR0 failed: %d\n", - r); - if ((r = - pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, - dd->ipath_pcibar1))) - ipath_dev_err(dd, "rewrite of BAR1 failed: %d\n", - r); - /* now re-enable memory access */ - pci_write_config_word(dd->pcidev, PCI_COMMAND, cmdval); - if ((r = pci_enable_device(dd->pcidev))) - ipath_dev_err(dd, "pci_enable_device failed after " - "reset: %d\n", r); - /* - * whether it fully enabled or not, mark as present, - * again (but not INITTED) - */ - dd->ipath_flags |= IPATH_PRESENT; - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_revision); - if (val == dd->ipath_revision) { - ipath_cdbg(VERBOSE, "Got matching revision " - "register %llx on try %d\n", - (unsigned long long) val, i); - ret = ipath_reinit_msi(dd); - goto bail; - } - /* Probably getting -1 back */ - ipath_dbg("Didn't get expected revision register, " - "got %llx, try %d\n", (unsigned long long) val, - i + 1); - } - ret = 0; /* failed */ - -bail: - if (ret) - ipath_6120_pcie_params(dd); - return ret; -} - -/** - * ipath_pe_put_tid - write a TID in chip - * @dd: the infinipath device - * @tidptr: pointer to the expected TID (in chip) to udpate - * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected - * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing - * - * This exists as a separate routine to allow for special locking etc. - * It's used for both the full cleanup on exit, as well as the normal - * setup and teardown. - */ -static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr, - u32 type, unsigned long pa) -{ - u32 __iomem *tidp32 = (u32 __iomem *)tidptr; - unsigned long flags = 0; /* keep gcc quiet */ - int tidx; - spinlock_t *tidlockp; - - if (!dd->ipath_kregbase) - return; - - if (pa != dd->ipath_tidinvalid) { - if (pa & ((1U << 11) - 1)) { - dev_info(&dd->pcidev->dev, "BUG: physaddr %lx " - "not 2KB aligned!\n", pa); - return; - } - pa >>= 11; - /* paranoia check */ - if (pa & ~INFINIPATH_RT_ADDR_MASK) - ipath_dev_err(dd, - "BUG: Physical page address 0x%lx " - "has bits set in 31-29\n", pa); - - if (type == RCVHQ_RCV_TYPE_EAGER) - pa |= dd->ipath_tidtemplate; - else /* for now, always full 4KB page */ - pa |= 2 << 29; - } - - /* - * Workaround chip bug 9437 by writing the scratch register - * before and after the TID, and with an io write barrier. - * We use a spinlock around the writes, so they can't intermix - * with other TID (eager or expected) writes (the chip bug - * is triggered by back to back TID writes). Unfortunately, this - * call can be done from interrupt level for the port 0 eager TIDs, - * so we have to use irqsave locks. - */ - /* - * Assumes tidptr always > ipath_egrtidbase - * if type == RCVHQ_RCV_TYPE_EAGER. - */ - tidx = tidptr - dd->ipath_egrtidbase; - - tidlockp = (type == RCVHQ_RCV_TYPE_EAGER && tidx < dd->ipath_rcvegrcnt) - ? &dd->ipath_kernel_tid_lock : &dd->ipath_user_tid_lock; - spin_lock_irqsave(tidlockp, flags); - ipath_write_kreg(dd, dd->ipath_kregs->kr_scratch, 0xfeeddeaf); - writel(pa, tidp32); - ipath_write_kreg(dd, dd->ipath_kregs->kr_scratch, 0xdeadbeef); - mmiowb(); - spin_unlock_irqrestore(tidlockp, flags); -} - -/** - * ipath_pe_put_tid_2 - write a TID in chip, Revision 2 or higher - * @dd: the infinipath device - * @tidptr: pointer to the expected TID (in chip) to udpate - * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected - * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing - * - * This exists as a separate routine to allow for selection of the - * appropriate "flavor". The static calls in cleanup just use the - * revision-agnostic form, as they are not performance critical. - */ -static void ipath_pe_put_tid_2(struct ipath_devdata *dd, u64 __iomem *tidptr, - u32 type, unsigned long pa) -{ - u32 __iomem *tidp32 = (u32 __iomem *)tidptr; - u32 tidx; - - if (!dd->ipath_kregbase) - return; - - if (pa != dd->ipath_tidinvalid) { - if (pa & ((1U << 11) - 1)) { - dev_info(&dd->pcidev->dev, "BUG: physaddr %lx " - "not 2KB aligned!\n", pa); - return; - } - pa >>= 11; - /* paranoia check */ - if (pa & ~INFINIPATH_RT_ADDR_MASK) - ipath_dev_err(dd, - "BUG: Physical page address 0x%lx " - "has bits set in 31-29\n", pa); - - if (type == RCVHQ_RCV_TYPE_EAGER) - pa |= dd->ipath_tidtemplate; - else /* for now, always full 4KB page */ - pa |= 2 << 29; - } - tidx = tidptr - dd->ipath_egrtidbase; - writel(pa, tidp32); - mmiowb(); -} - - -/** - * ipath_pe_clear_tid - clear all TID entries for a port, expected and eager - * @dd: the infinipath device - * @port: the port - * - * clear all TID entries for a port, expected and eager. - * Used from ipath_close(). On this chip, TIDs are only 32 bits, - * not 64, but they are still on 64 bit boundaries, so tidbase - * is declared as u64 * for the pointer math, even though we write 32 bits - */ -static void ipath_pe_clear_tids(struct ipath_devdata *dd, unsigned port) -{ - u64 __iomem *tidbase; - unsigned long tidinv; - int i; - - if (!dd->ipath_kregbase) - return; - - ipath_cdbg(VERBOSE, "Invalidate TIDs for port %u\n", port); - - tidinv = dd->ipath_tidinvalid; - tidbase = (u64 __iomem *) - ((char __iomem *)(dd->ipath_kregbase) + - dd->ipath_rcvtidbase + - port * dd->ipath_rcvtidcnt * sizeof(*tidbase)); - - for (i = 0; i < dd->ipath_rcvtidcnt; i++) - dd->ipath_f_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED, - tidinv); - - tidbase = (u64 __iomem *) - ((char __iomem *)(dd->ipath_kregbase) + - dd->ipath_rcvegrbase + - port * dd->ipath_rcvegrcnt * sizeof(*tidbase)); - - for (i = 0; i < dd->ipath_rcvegrcnt; i++) - dd->ipath_f_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EAGER, - tidinv); -} - -/** - * ipath_pe_tidtemplate - setup constants for TID updates - * @dd: the infinipath device - * - * We setup stuff that we use a lot, to avoid calculating each time - */ -static void ipath_pe_tidtemplate(struct ipath_devdata *dd) -{ - u32 egrsize = dd->ipath_rcvegrbufsize; - - /* For now, we always allocate 4KB buffers (at init) so we can - * receive max size packets. We may want a module parameter to - * specify 2KB or 4KB and/or make be per port instead of per device - * for those who want to reduce memory footprint. Note that the - * ipath_rcvhdrentsize size must be large enough to hold the largest - * IB header (currently 96 bytes) that we expect to handle (plus of - * course the 2 dwords of RHF). - */ - if (egrsize == 2048) - dd->ipath_tidtemplate = 1U << 29; - else if (egrsize == 4096) - dd->ipath_tidtemplate = 2U << 29; - else { - egrsize = 4096; - dev_info(&dd->pcidev->dev, "BUG: unsupported egrbufsize " - "%u, using %u\n", dd->ipath_rcvegrbufsize, - egrsize); - dd->ipath_tidtemplate = 2U << 29; - } - dd->ipath_tidinvalid = 0; -} - -static int ipath_pe_early_init(struct ipath_devdata *dd) -{ - dd->ipath_flags |= IPATH_4BYTE_TID; - if (ipath_unordered_wc()) - dd->ipath_flags |= IPATH_PIO_FLUSH_WC; - - /* - * For openfabrics, we need to be able to handle an IB header of - * 24 dwords. HT chip has arbitrary sized receive buffers, so we - * made them the same size as the PIO buffers. This chip does not - * handle arbitrary size buffers, so we need the header large enough - * to handle largest IB header, but still have room for a 2KB MTU - * standard IB packet. - */ - dd->ipath_rcvhdrentsize = 24; - dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE; - dd->ipath_rhf_offset = 0; - dd->ipath_egrtidbase = (u64 __iomem *) - ((char __iomem *) dd->ipath_kregbase + dd->ipath_rcvegrbase); - - dd->ipath_rcvegrbufsize = ipath_mtu4096 ? 4096 : 2048; - /* - * the min() check here is currently a nop, but it may not always - * be, depending on just how we do ipath_rcvegrbufsize - */ - dd->ipath_ibmaxlen = min(ipath_mtu4096 ? dd->ipath_piosize4k : - dd->ipath_piosize2k, - dd->ipath_rcvegrbufsize + - (dd->ipath_rcvhdrentsize << 2)); - dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen; - - /* - * We can request a receive interrupt for 1 or - * more packets from current offset. For now, we set this - * up for a single packet. - */ - dd->ipath_rhdrhead_intr_off = 1ULL<<32; - - ipath_get_eeprom_info(dd); - - return 0; -} - -int __attribute__((weak)) ipath_unordered_wc(void) -{ - return 0; -} - -/** - * ipath_init_pe_get_base_info - set chip-specific flags for user code - * @pd: the infinipath port - * @kbase: ipath_base_info pointer - * - * We set the PCIE flag because the lower bandwidth on PCIe vs - * HyperTransport can affect some user packet algorithms. - */ -static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase) -{ - struct ipath_base_info *kinfo = kbase; - struct ipath_devdata *dd; - - if (ipath_unordered_wc()) { - kinfo->spi_runtime_flags |= IPATH_RUNTIME_FORCE_WC_ORDER; - ipath_cdbg(PROC, "Intel processor, forcing WC order\n"); - } - else - ipath_cdbg(PROC, "Not Intel processor, WC ordered\n"); - - if (pd == NULL) - goto done; - - dd = pd->port_dd; - -done: - kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE | - IPATH_RUNTIME_FORCE_PIOAVAIL | IPATH_RUNTIME_PIO_REGSWAPPED; - return 0; -} - -static void ipath_pe_free_irq(struct ipath_devdata *dd) -{ - free_irq(dd->ipath_irq, dd); - dd->ipath_irq = 0; -} - - -static struct ipath_message_header * -ipath_pe_get_msgheader(struct ipath_devdata *dd, __le32 *rhf_addr) -{ - return (struct ipath_message_header *) - &rhf_addr[sizeof(u64) / sizeof(u32)]; -} - -static void ipath_pe_config_ports(struct ipath_devdata *dd, ushort cfgports) -{ - dd->ipath_portcnt = - ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt); - dd->ipath_p0_rcvegrcnt = - ipath_read_kreg32(dd, dd->ipath_kregs->kr_rcvegrcnt); -} - -static void ipath_pe_read_counters(struct ipath_devdata *dd, - struct infinipath_counters *cntrs) -{ - cntrs->LBIntCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBIntCnt)); - cntrs->LBFlowStallCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(LBFlowStallCnt)); - cntrs->TxSDmaDescCnt = 0; - cntrs->TxUnsupVLErrCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnsupVLErrCnt)); - cntrs->TxDataPktCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDataPktCnt)); - cntrs->TxFlowPktCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowPktCnt)); - cntrs->TxDwordCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDwordCnt)); - cntrs->TxLenErrCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxLenErrCnt)); - cntrs->TxMaxMinLenErrCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxMaxMinLenErrCnt)); - cntrs->TxUnderrunCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxUnderrunCnt)); - cntrs->TxFlowStallCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxFlowStallCnt)); - cntrs->TxDroppedPktCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(TxDroppedPktCnt)); - cntrs->RxDroppedPktCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDroppedPktCnt)); - cntrs->RxDataPktCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDataPktCnt)); - cntrs->RxFlowPktCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowPktCnt)); - cntrs->RxDwordCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxDwordCnt)); - cntrs->RxLenErrCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLenErrCnt)); - cntrs->RxMaxMinLenErrCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxMaxMinLenErrCnt)); - cntrs->RxICRCErrCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxICRCErrCnt)); - cntrs->RxVCRCErrCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxVCRCErrCnt)); - cntrs->RxFlowCtrlErrCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxFlowCtrlErrCnt)); - cntrs->RxBadFormatCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBadFormatCnt)); - cntrs->RxLinkProblemCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLinkProblemCnt)); - cntrs->RxEBPCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxEBPCnt)); - cntrs->RxLPCRCErrCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxLPCRCErrCnt)); - cntrs->RxBufOvflCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxBufOvflCnt)); - cntrs->RxTIDFullErrCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDFullErrCnt)); - cntrs->RxTIDValidErrCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxTIDValidErrCnt)); - cntrs->RxPKeyMismatchCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxPKeyMismatchCnt)); - cntrs->RxP0HdrEgrOvflCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt)); - cntrs->RxP1HdrEgrOvflCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP1HdrEgrOvflCnt)); - cntrs->RxP2HdrEgrOvflCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP2HdrEgrOvflCnt)); - cntrs->RxP3HdrEgrOvflCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP3HdrEgrOvflCnt)); - cntrs->RxP4HdrEgrOvflCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(RxP4HdrEgrOvflCnt)); - cntrs->RxP5HdrEgrOvflCnt = 0; - cntrs->RxP6HdrEgrOvflCnt = 0; - cntrs->RxP7HdrEgrOvflCnt = 0; - cntrs->RxP8HdrEgrOvflCnt = 0; - cntrs->RxP9HdrEgrOvflCnt = 0; - cntrs->RxP10HdrEgrOvflCnt = 0; - cntrs->RxP11HdrEgrOvflCnt = 0; - cntrs->RxP12HdrEgrOvflCnt = 0; - cntrs->RxP13HdrEgrOvflCnt = 0; - cntrs->RxP14HdrEgrOvflCnt = 0; - cntrs->RxP15HdrEgrOvflCnt = 0; - cntrs->RxP16HdrEgrOvflCnt = 0; - cntrs->IBStatusChangeCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBStatusChangeCnt)); - cntrs->IBLinkErrRecoveryCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt)); - cntrs->IBLinkDownedCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBLinkDownedCnt)); - cntrs->IBSymbolErrCnt = - ipath_snap_cntr(dd, IPATH_CREG_OFFSET(IBSymbolErrCnt)); - cntrs->RxVL15DroppedPktCnt = 0; - cntrs->RxOtherLocalPhyErrCnt = 0; - cntrs->PcieRetryBufDiagQwordCnt = 0; - cntrs->ExcessBufferOvflCnt = dd->ipath_overrun_thresh_errs; - cntrs->LocalLinkIntegrityErrCnt = dd->ipath_lli_errs; - cntrs->RxVlErrCnt = 0; - cntrs->RxDlidFltrCnt = 0; -} - - -/* no interrupt fallback for these chips */ -static int ipath_pe_nointr_fallback(struct ipath_devdata *dd) -{ - return 0; -} - - -/* - * reset the XGXS (between serdes and IBC). Slightly less intrusive - * than resetting the IBC or external link state, and useful in some - * cases to cause some retraining. To do this right, we reset IBC - * as well. - */ -static void ipath_pe_xgxs_reset(struct ipath_devdata *dd) -{ - u64 val, prev_val; - - prev_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); - val = prev_val | INFINIPATH_XGXS_RESET; - prev_val &= ~INFINIPATH_XGXS_RESET; /* be sure */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_control, - dd->ipath_control & ~INFINIPATH_C_LINKENABLE); - ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); - ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch); - ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, prev_val); - ipath_write_kreg(dd, dd->ipath_kregs->kr_control, - dd->ipath_control); -} - - -static int ipath_pe_get_ib_cfg(struct ipath_devdata *dd, int which) -{ - int ret; - - switch (which) { - case IPATH_IB_CFG_LWID: - ret = dd->ipath_link_width_active; - break; - case IPATH_IB_CFG_SPD: - ret = dd->ipath_link_speed_active; - break; - case IPATH_IB_CFG_LWID_ENB: - ret = dd->ipath_link_width_enabled; - break; - case IPATH_IB_CFG_SPD_ENB: - ret = dd->ipath_link_speed_enabled; - break; - default: - ret = -ENOTSUPP; - break; - } - return ret; -} - - -/* we assume range checking is already done, if needed */ -static int ipath_pe_set_ib_cfg(struct ipath_devdata *dd, int which, u32 val) -{ - int ret = 0; - - if (which == IPATH_IB_CFG_LWID_ENB) - dd->ipath_link_width_enabled = val; - else if (which == IPATH_IB_CFG_SPD_ENB) - dd->ipath_link_speed_enabled = val; - else - ret = -ENOTSUPP; - return ret; -} - -static void ipath_pe_config_jint(struct ipath_devdata *dd, u16 a, u16 b) -{ -} - - -static int ipath_pe_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs) -{ - ipath_setup_pe_setextled(dd, ipath_ib_linkstate(dd, ibcs), - ipath_ib_linktrstate(dd, ibcs)); - return 0; -} - - -/** - * ipath_init_iba6120_funcs - set up the chip-specific function pointers - * @dd: the infinipath device - * - * This is global, and is called directly at init to set up the - * chip-specific function pointers for later use. - */ -void ipath_init_iba6120_funcs(struct ipath_devdata *dd) -{ - dd->ipath_f_intrsetup = ipath_pe_intconfig; - dd->ipath_f_bus = ipath_setup_pe_config; - dd->ipath_f_reset = ipath_setup_pe_reset; - dd->ipath_f_get_boardname = ipath_pe_boardname; - dd->ipath_f_init_hwerrors = ipath_pe_init_hwerrors; - dd->ipath_f_early_init = ipath_pe_early_init; - dd->ipath_f_handle_hwerrors = ipath_pe_handle_hwerrors; - dd->ipath_f_quiet_serdes = ipath_pe_quiet_serdes; - dd->ipath_f_bringup_serdes = ipath_pe_bringup_serdes; - dd->ipath_f_clear_tids = ipath_pe_clear_tids; - /* - * _f_put_tid may get changed after we read the chip revision, - * but we start with the safe version for all revs - */ - dd->ipath_f_put_tid = ipath_pe_put_tid; - dd->ipath_f_cleanup = ipath_setup_pe_cleanup; - dd->ipath_f_setextled = ipath_setup_pe_setextled; - dd->ipath_f_get_base_info = ipath_pe_get_base_info; - dd->ipath_f_free_irq = ipath_pe_free_irq; - dd->ipath_f_tidtemplate = ipath_pe_tidtemplate; - dd->ipath_f_intr_fallback = ipath_pe_nointr_fallback; - dd->ipath_f_xgxs_reset = ipath_pe_xgxs_reset; - dd->ipath_f_get_msgheader = ipath_pe_get_msgheader; - dd->ipath_f_config_ports = ipath_pe_config_ports; - dd->ipath_f_read_counters = ipath_pe_read_counters; - dd->ipath_f_get_ib_cfg = ipath_pe_get_ib_cfg; - dd->ipath_f_set_ib_cfg = ipath_pe_set_ib_cfg; - dd->ipath_f_config_jint = ipath_pe_config_jint; - dd->ipath_f_ib_updown = ipath_pe_ib_updown; - - - /* initialize chip-specific variables */ - ipath_init_pe_variables(dd); -} - diff --git a/drivers/infiniband/hw/ipath/ipath_iba7220.c b/drivers/infiniband/hw/ipath/ipath_iba7220.c deleted file mode 100644 index fb70712ac85..00000000000 --- a/drivers/infiniband/hw/ipath/ipath_iba7220.c +++ /dev/null @@ -1,2558 +0,0 @@ -/* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. - * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -/* - * This file contains all of the code that is specific to the - * InfiniPath 7220 chip (except that specific to the SerDes) - */ - -#include <linux/interrupt.h> -#include <linux/pci.h> -#include <linux/delay.h> -#include <linux/io.h> -#include <rdma/ib_verbs.h> - -#include "ipath_kernel.h" -#include "ipath_registers.h" -#include "ipath_7220.h" - -static void ipath_setup_7220_setextled(struct ipath_devdata *, u64, u64); - -static unsigned ipath_compat_ddr_negotiate = 1; - -module_param_named(compat_ddr_negotiate, ipath_compat_ddr_negotiate, uint, - S_IWUSR | S_IRUGO); -MODULE_PARM_DESC(compat_ddr_negotiate, - "Attempt pre-IBTA 1.2 DDR speed negotiation"); - -static unsigned ipath_sdma_fetch_arb = 1; -module_param_named(fetch_arb, ipath_sdma_fetch_arb, uint, S_IRUGO); -MODULE_PARM_DESC(fetch_arb, "IBA7220: change SDMA descriptor arbitration"); - -/* - * This file contains almost all the chip-specific register information and - * access functions for the QLogic InfiniPath 7220 PCI-Express chip, with the - * exception of SerDes support, which in in ipath_sd7220.c. - * - * This lists the InfiniPath registers, in the actual chip layout. - * This structure should never be directly accessed. - */ -struct _infinipath_do_not_use_kernel_regs { - unsigned long long Revision; - unsigned long long Control; - unsigned long long PageAlign; - unsigned long long PortCnt; - unsigned long long DebugPortSelect; - unsigned long long DebugSigsIntSel; /* was Reserved0;*/ - unsigned long long SendRegBase; - unsigned long long UserRegBase; - unsigned long long CounterRegBase; - unsigned long long Scratch; - unsigned long long EEPROMAddrCmd; /* was Reserved1; */ - unsigned long long EEPROMData; /* was Reserved2; */ - unsigned long long IntBlocked; - unsigned long long IntMask; - unsigned long long IntStatus; - unsigned long long IntClear; - unsigned long long ErrorMask; - unsigned long long ErrorStatus; - unsigned long long ErrorClear; - unsigned long long HwErrMask; - unsigned long long HwErrStatus; - unsigned long long HwErrClear; - unsigned long long HwDiagCtrl; - unsigned long long MDIO; - unsigned long long IBCStatus; - unsigned long long IBCCtrl; - unsigned long long ExtStatus; - unsigned long long ExtCtrl; - unsigned long long GPIOOut; - unsigned long long GPIOMask; - unsigned long long GPIOStatus; - unsigned long long GPIOClear; - unsigned long long RcvCtrl; - unsigned long long RcvBTHQP; - unsigned long long RcvHdrSize; - unsigned long long RcvHdrCnt; - unsigned long long RcvHdrEntSize; - unsigned long long RcvTIDBase; - unsigned long long RcvTIDCnt; - unsigned long long RcvEgrBase; - unsigned long long RcvEgrCnt; - unsigned long long RcvBufBase; - unsigned long long RcvBufSize; - unsigned long long RxIntMemBase; - unsigned long long RxIntMemSize; - unsigned long long RcvPartitionKey; - unsigned long long RcvQPMulticastPort; - unsigned long long RcvPktLEDCnt; - unsigned long long IBCDDRCtrl; - unsigned long long HRTBT_GUID; - unsigned long long IB_SDTEST_IF_TX; - unsigned long long IB_SDTEST_IF_RX; - unsigned long long IBCDDRCtrl2; - unsigned long long IBCDDRStatus; - unsigned long long JIntReload; - unsigned long long IBNCModeCtrl; - unsigned long long SendCtrl; - unsigned long long SendBufBase; - unsigned long long SendBufSize; - unsigned long long SendBufCnt; - unsigned long long SendAvailAddr; - unsigned long long TxIntMemBase; - unsigned long long TxIntMemSize; - unsigned long long SendDmaBase; - unsigned long long SendDmaLenGen; - unsigned long long SendDmaTail; - unsigned long long SendDmaHead; - unsigned long long SendDmaHeadAddr; - unsigned long long SendDmaBufMask0; - unsigned long long SendDmaBufMask1; - unsigned long long SendDmaBufMask2; - unsigned long long SendDmaStatus; - unsigned long long SendBufferError; - unsigned long long SendBufferErrorCONT1; - unsigned long long SendBufErr2; /* was Reserved6SBE[0/6] */ - unsigned long long Reserved6L[2]; - unsigned long long AvailUpdCount; - unsigned long long RcvHdrAddr0; - unsigned long long RcvHdrAddrs[16]; /* Why enumerate? */ - unsigned long long Reserved7hdtl; /* Align next to 300 */ - unsigned long long RcvHdrTailAddr0; /* 300, like others */ - unsigned long long RcvHdrTailAddrs[16]; - unsigned long long Reserved9SW[7]; /* was [8]; we have 17 ports */ - unsigned long long IbsdEpbAccCtl; /* IB Serdes EPB access control */ - unsigned long long IbsdEpbTransReg; /* IB Serdes EPB Transaction */ - unsigned long long Reserved10sds; /* was SerdesStatus on */ - unsigned long long XGXSConfig; - unsigned long long IBSerDesCtrl; /* Was IBPLLCfg on Monty */ - unsigned long long EEPCtlStat; /* for "boot" EEPROM/FLASH */ - unsigned long long EEPAddrCmd; - unsigned long long EEPData; - unsigned long long PcieEpbAccCtl; - unsigned long long PcieEpbTransCtl; - unsigned long long EfuseCtl; /* E-Fuse control */ - unsigned long long EfuseData[4]; - unsigned long long ProcMon; - /* this chip moves following two from previous 200, 208 */ - unsigned long long PCIeRBufTestReg0; - unsigned long long PCIeRBufTestReg1; - /* added for this chip */ - unsigned long long PCIeRBufTestReg2; - unsigned long long PCIeRBufTestReg3; - /* added for this chip, debug only */ - unsigned long long SPC_JTAG_ACCESS_REG; - unsigned long long LAControlReg; - unsigned long long GPIODebugSelReg; - unsigned long long DebugPortValueReg; - /* added for this chip, DMA */ - unsigned long long SendDmaBufUsed[3]; - unsigned long long SendDmaReqTagUsed; - /* - * added for this chip, EFUSE: note that these program 64-bit - * words 2 and 3 */ - unsigned long long efuse_pgm_data[2]; - unsigned long long Reserved11LAalign[10]; /* Skip 4B0..4F8 */ - /* we have 30 regs for DDS and RXEQ in IB SERDES */ - unsigned long long SerDesDDSRXEQ[30]; - unsigned long long Reserved12LAalign[2]; /* Skip 5F0, 5F8 */ - /* added for LA debug support */ - unsigned long long LAMemory[32]; -}; - -struct _infinipath_do_not_use_counters { - __u64 LBIntCnt; - __u64 LBFlowStallCnt; - __u64 TxSDmaDescCnt; /* was Reserved1 */ - __u64 TxUnsupVLErrCnt; - __u64 TxDataPktCnt; - __u64 TxFlowPktCnt; - __u64 TxDwordCnt; - __u64 TxLenErrCnt; - __u64 TxMaxMinLenErrCnt; - __u64 TxUnderrunCnt; - __u64 TxFlowStallCnt; - __u64 TxDroppedPktCnt; - __u64 RxDroppedPktCnt; - __u64 RxDataPktCnt; - __u64 RxFlowPktCnt; - __u64 RxDwordCnt; - __u64 RxLenErrCnt; - __u64 RxMaxMinLenErrCnt; - __u64 RxICRCErrCnt; - __u64 RxVCRCErrCnt; - __u64 RxFlowCtrlErrCnt; - __u64 RxBadFormatCnt; - __u64 RxLinkProblemCnt; - __u64 RxEBPCnt; - __u64 RxLPCRCErrCnt; - __u64 RxBufOvflCnt; - __u64 RxTIDFullErrCnt; - __u64 RxTIDValidErrCnt; - __u64 RxPKeyMismatchCnt; - __u64 RxP0HdrEgrOvflCnt; - __u64 RxP1HdrEgrOvflCnt; - __u64 RxP2HdrEgrOvflCnt; - __u64 RxP3HdrEgrOvflCnt; - __u64 RxP4HdrEgrOvflCnt; - __u64 RxP5HdrEgrOvflCnt; - __u64 RxP6HdrEgrOvflCnt; - __u64 RxP7HdrEgrOvflCnt; - __u64 RxP8HdrEgrOvflCnt; - __u64 RxP9HdrEgrOvflCnt; /* was Reserved6 */ - __u64 RxP10HdrEgrOvflCnt; /* was Reserved7 */ - __u64 RxP11HdrEgrOvflCnt; /* new for IBA7220 */ - __u64 RxP12HdrEgrOvflCnt; /* new for IBA7220 */ - __u64 RxP13HdrEgrOvflCnt; /* new for IBA7220 */ - __u64 RxP14HdrEgrOvflCnt; /* new for IBA7220 */ - __u64 RxP15HdrEgrOvflCnt; /* new for IBA7220 */ - __u64 RxP16HdrEgrOvflCnt; /* new for IBA7220 */ - __u64 IBStatusChangeCnt; - __u64 IBLinkErrRecoveryCnt; - __u64 IBLinkDownedCnt; - __u64 IBSymbolErrCnt; - /* The following are new for IBA7220 */ - __u64 RxVL15DroppedPktCnt; - __u64 RxOtherLocalPhyErrCnt; - __u64 PcieRetryBufDiagQwordCnt; - __u64 ExcessBufferOvflCnt; - __u64 LocalLinkIntegrityErrCnt; - __u64 RxVlErrCnt; - __u64 RxDlidFltrCnt; - __u64 Reserved8[7]; - __u64 PSStat; - __u64 PSStart; - __u64 PSInterval; - __u64 PSRcvDataCount; - __u64 PSRcvPktsCount; - __u64 PSXmitDataCount; - __u64 PSXmitPktsCount; - __u64 PSXmitWaitCount; -}; - -#define IPATH_KREG_OFFSET(field) (offsetof( \ - struct _infinipath_do_not_use_kernel_regs, field) / sizeof(u64)) -#define IPATH_CREG_OFFSET(field) (offsetof( \ - struct _infinipath_do_not_use_counters, field) / sizeof(u64)) - -static const struct ipath_kregs ipath_7220_kregs = { - .kr_control = IPATH_KREG_OFFSET(Control), - .kr_counterregbase = IPATH_KREG_OFFSET(CounterRegBase), - .kr_debugportselect = IPATH_KREG_OFFSET(DebugPortSelect), - .kr_errorclear = IPATH_KREG_OFFSET(ErrorClear), - .kr_errormask = IPATH_KREG_OFFSET(ErrorMask), - .kr_errorstatus = IPATH_KREG_OFFSET(ErrorStatus), - .kr_extctrl = IPATH_KREG_OFFSET(ExtCtrl), - .kr_extstatus = IPATH_KREG_OFFSET(ExtStatus), - .kr_gpio_clear = IPATH_KREG_OFFSET(GPIOClear), - .kr_gpio_mask = IPATH_KREG_OFFSET(GPIOMask), - .kr_gpio_out = IPATH_KREG_OFFSET(GPIOOut), - .kr_gpio_status = IPATH_KREG_OFFSET(GPIOStatus), - .kr_hwdiagctrl = IPATH_KREG_OFFSET(HwDiagCtrl), - .kr_hwerrclear = IPATH_KREG_OFFSET(HwErrClear), - .kr_hwerrmask = IPATH_KREG_OFFSET(HwErrMask), - .kr_hwerrstatus = IPATH_KREG_OFFSET(HwErrStatus), - .kr_ibcctrl = IPATH_KREG_OFFSET(IBCCtrl), - .kr_ibcstatus = IPATH_KREG_OFFSET(IBCStatus), - .kr_intblocked = IPATH_KREG_OFFSET(IntBlocked), - .kr_intclear = IPATH_KREG_OFFSET(IntClear), - .kr_intmask = IPATH_KREG_OFFSET(IntMask), - .kr_intstatus = IPATH_KREG_OFFSET(IntStatus), - .kr_mdio = IPATH_KREG_OFFSET(MDIO), - .kr_pagealign = IPATH_KREG_OFFSET(PageAlign), - .kr_partitionkey = IPATH_KREG_OFFSET(RcvPartitionKey), - .kr_portcnt = IPATH_KREG_OFFSET(PortCnt), - .kr_rcvbthqp = IPATH_KREG_OFFSET(RcvBTHQP), - .kr_rcvbufbase = IPATH_KREG_OFFSET(RcvBufBase), - .kr_rcvbufsize = IPATH_KREG_OFFSET(RcvBufSize), - .kr_rcvctrl = IPATH_KREG_OFFSET(RcvCtrl), - .kr_rcvegrbase = IPATH_KREG_OFFSET(RcvEgrBase), - .kr_rcvegrcnt = IPATH_KREG_OFFSET(RcvEgrCnt), - .kr_rcvhdrcnt = IPATH_KREG_OFFSET(RcvHdrCnt), - .kr_rcvhdrentsize = IPATH_KREG_OFFSET(RcvHdrEntSize), - .kr_rcvhdrsize = IPATH_KREG_OFFSET(RcvHdrSize), - .kr_rcvintmembase = IPATH_KREG_OFFSET(RxIntMemBase), - .kr_rcvintmemsize = IPATH_KREG_OFFSET(RxIntMemSize), - .kr_rcvtidbase = IPATH_KREG_OFFSET(RcvTIDBase), - .kr_rcvtidcnt = IPATH_KREG_OFFSET(RcvTIDCnt), - .kr_revision = IPATH_KREG_OFFSET(Revision), - .kr_scratch = IPATH_KREG_OFFSET(Scratch), - .kr_sendbuffererror = IPATH_KREG_OFFSET(SendBufferError), - .kr_sendctrl = IPATH_KREG_OFFSET(SendCtrl), - .kr_sendpioavailaddr = IPATH_KREG_OFFSET(SendAvailAddr), - .kr_sendpiobufbase = IPATH_KREG_OFFSET(SendBufBase), - .kr_sendpiobufcnt = IPATH_KREG_OFFSET(SendBufCnt), - .kr_sendpiosize = IPATH_KREG_OFFSET(SendBufSize), - .kr_sendregbase = IPATH_KREG_OFFSET(SendRegBase), - .kr_txintmembase = IPATH_KREG_OFFSET(TxIntMemBase), - .kr_txintmemsize = IPATH_KREG_OFFSET(TxIntMemSize), - .kr_userregbase = IPATH_KREG_OFFSET(UserRegBase), - - .kr_xgxsconfig = IPATH_KREG_OFFSET(XGXSConfig), - - /* send dma related regs */ - .kr_senddmabase = IPATH_KREG_OFFSET(SendDmaBase), - .kr_senddmalengen = IPATH_KREG_OFFSET(SendDmaLenGen), - .kr_senddmatail = IPATH_KREG_OFFSET(SendDmaTail), - .kr_senddmahead = IPATH_KREG_OFFSET(SendDmaHead), - .kr_senddmaheadaddr = IPATH_KREG_OFFSET(SendDmaHeadAddr), - .kr_senddmabufmask0 = IPATH_KREG_OFFSET(SendDmaBufMask0), - .kr_senddmabufmask1 = IPATH_KREG_OFFSET(SendDmaBufMask1), - .kr_senddmabufmask2 = IPATH_KREG_OFFSET(SendDmaBufMask2), - .kr_senddmastatus = IPATH_KREG_OFFSET(SendDmaStatus), - - /* SerDes related regs */ - .kr_ibserdesctrl = IPATH_KREG_OFFSET(IBSerDesCtrl), - .kr_ib_epbacc = IPATH_KREG_OFFSET(IbsdEpbAccCtl), - .kr_ib_epbtrans = IPATH_KREG_OFFSET(IbsdEpbTransReg), - .kr_pcie_epbacc = IPATH_KREG_OFFSET(PcieEpbAccCtl), - .kr_pcie_epbtrans = IPATH_KREG_OFFSET(PcieEpbTransCtl), - .kr_ib_ddsrxeq = IPATH_KREG_OFFSET(SerDesDDSRXEQ), - - /* - * These should not be used directly via ipath_read_kreg64(), - * use them with ipath_read_kreg64_port() - */ - .kr_rcvhdraddr = IPATH_KREG_OFFSET(RcvHdrAddr0), - .kr_rcvhdrtailaddr = IPATH_KREG_OFFSET(RcvHdrTailAddr0), - - /* - * The rcvpktled register controls one of the debug port signals, so - * a packet activity LED can be connected to it. - */ - .kr_rcvpktledcnt = IPATH_KREG_OFFSET(RcvPktLEDCnt), - .kr_pcierbuftestreg0 = IPATH_KREG_OFFSET(PCIeRBufTestReg0), - .kr_pcierbuftestreg1 = IPATH_KREG_OFFSET(PCIeRBufTestReg1), - - .kr_hrtbt_guid = IPATH_KREG_OFFSET(HRTBT_GUID), - .kr_ibcddrctrl = IPATH_KREG_OFFSET(IBCDDRCtrl), - .kr_ibcddrstatus = IPATH_KREG_OFFSET(IBCDDRStatus), - .kr_jintreload = IPATH_KREG_OFFSET(JIntReload) -}; - -static const struct ipath_cregs ipath_7220_cregs = { - .cr_badformatcnt = IPATH_CREG_OFFSET(RxBadFormatCnt), - .cr_erricrccnt = IPATH_CREG_OFFSET(RxICRCErrCnt), - .cr_errlinkcnt = IPATH_CREG_OFFSET(RxLinkProblemCnt), - .cr_errlpcrccnt = IPATH_CREG_OFFSET(RxLPCRCErrCnt), - .cr_errpkey = IPATH_CREG_OFFSET(RxPKeyMismatchCnt), - .cr_errrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowCtrlErrCnt), - .cr_err_rlencnt = IPATH_CREG_OFFSET(RxLenErrCnt), - .cr_errslencnt = IPATH_CREG_OFFSET(TxLenErrCnt), - .cr_errtidfull = IPATH_CREG_OFFSET(RxTIDFullErrCnt), - .cr_errtidvalid = IPATH_CREG_OFFSET(RxTIDValidErrCnt), - .cr_errvcrccnt = IPATH_CREG_OFFSET(RxVCRCErrCnt), - .cr_ibstatuschange = IPATH_CREG_OFFSET(IBStatusChangeCnt), - .cr_intcnt = IPATH_CREG_OFFSET(LBIntCnt), - .cr_invalidrlencnt = IPATH_CREG_OFFSET(RxMaxMinLenErrCnt), - .cr_invalidslencnt = IPATH_CREG_OFFSET(TxMaxMinLenErrCnt), - .cr_lbflowstallcnt = IPATH_CREG_OFFSET(LBFlowStallCnt), - .cr_pktrcvcnt = IPATH_CREG_OFFSET(RxDataPktCnt), - .cr_pktrcvflowctrlcnt = IPATH_CREG_OFFSET(RxFlowPktCnt), - .cr_pktsendcnt = IPATH_CREG_OFFSET(TxDataPktCnt), - .cr_pktsendflowcnt = IPATH_CREG_OFFSET(TxFlowPktCnt), - .cr_portovflcnt = IPATH_CREG_OFFSET(RxP0HdrEgrOvflCnt), - .cr_rcvebpcnt = IPATH_CREG_OFFSET(RxEBPCnt), - .cr_rcvovflcnt = IPATH_CREG_OFFSET(RxBufOvflCnt), - .cr_senddropped = IPATH_CREG_OFFSET(TxDroppedPktCnt), - .cr_sendstallcnt = IPATH_CREG_OFFSET(TxFlowStallCnt), - .cr_sendunderruncnt = IPATH_CREG_OFFSET(TxUnderrunCnt), - .cr_wordrcvcnt = IPATH_CREG_OFFSET(RxDwordCnt), - .cr_wordsendcnt = IPATH_CREG_OFFSET(TxDwordCnt), - .cr_unsupvlcnt = IPATH_CREG_OFFSET(TxUnsupVLErrCnt), - .cr_rxdroppktcnt = IPATH_CREG_OFFSET(RxDroppedPktCnt), - .cr_iblinkerrrecovcnt = IPATH_CREG_OFFSET(IBLinkErrRecoveryCnt), - .cr_iblinkdowncnt = IPATH_CREG_OFFSET(IBLinkDownedCnt), - .cr_ibsymbolerrcnt = IPATH_CREG_OFFSET(IBSymbolErrCnt), - .cr_vl15droppedpktcnt = IPATH_CREG_OFFSET(RxVL15DroppedPktCnt), - .cr_rxotherlocalphyerrcnt = - IPATH_CREG_OFFSET(RxOtherLocalPhyErrCnt), - .cr_excessbufferovflcnt = IPATH_CREG_OFFSET(ExcessBufferOvflCnt), - .cr_locallinkintegrityerrcnt = - IPATH_CREG_OFFSET(LocalLinkIntegrityErrCnt), - .cr_rxvlerrcnt = IPATH_CREG_OFFSET(RxVlErrCnt), - .cr_rxdlidfltrcnt = IPATH_CREG_OFFSET(RxDlidFltrCnt), - .cr_psstat = IPATH_CREG_OFFSET(PSStat), - .cr_psstart = IPATH_CREG_OFFSET(PSStart), - .cr_psinterval = IPATH_CREG_OFFSET(PSInterval), - .cr_psrcvdatacount = IPATH_CREG_OFFSET(PSRcvDataCount), - .cr_psrcvpktscount = IPATH_CREG_OFFSET(PSRcvPktsCount), - .cr_psxmitdatacount = IPATH_CREG_OFFSET(PSXmitDataCount), - .cr_psxmitpktscount = IPATH_CREG_OFFSET(PSXmitPktsCount), - .cr_psxmitwaitcount = IPATH_CREG_OFFSET(PSXmitWaitCount), -}; - -/* kr_control bits */ -#define INFINIPATH_C_RESET (1U<<7) - -/* kr_intstatus, kr_intclear, kr_intmask bits */ -#define INFINIPATH_I_RCVURG_MASK ((1ULL<<17)-1) -#define INFINIPATH_I_RCVURG_SHIFT 32 -#define INFINIPATH_I_RCVAVAIL_MASK ((1ULL<<17)-1) -#define INFINIPATH_I_RCVAVAIL_SHIFT 0 -#define INFINIPATH_I_SERDESTRIMDONE (1ULL<<27) - -/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */ -#define INFINIPATH_HWE_PCIEMEMPARITYERR_MASK 0x00000000000000ffULL -#define INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT 0 -#define INFINIPATH_HWE_PCIEPOISONEDTLP 0x0000000010000000ULL -#define INFINIPATH_HWE_PCIECPLTIMEOUT 0x0000000020000000ULL -#define INFINIPATH_HWE_PCIEBUSPARITYXTLH 0x0000000040000000ULL -#define INFINIPATH_HWE_PCIEBUSPARITYXADM 0x0000000080000000ULL -#define INFINIPATH_HWE_PCIEBUSPARITYRADM 0x0000000100000000ULL -#define INFINIPATH_HWE_COREPLL_FBSLIP 0x0080000000000000ULL -#define INFINIPATH_HWE_COREPLL_RFSLIP 0x0100000000000000ULL -#define INFINIPATH_HWE_PCIE1PLLFAILED 0x0400000000000000ULL -#define INFINIPATH_HWE_PCIE0PLLFAILED 0x0800000000000000ULL -#define INFINIPATH_HWE_SERDESPLLFAILED 0x1000000000000000ULL -/* specific to this chip */ -#define INFINIPATH_HWE_PCIECPLDATAQUEUEERR 0x0000000000000040ULL -#define INFINIPATH_HWE_PCIECPLHDRQUEUEERR 0x0000000000000080ULL -#define INFINIPATH_HWE_SDMAMEMREADERR 0x0000000010000000ULL -#define INFINIPATH_HWE_CLK_UC_PLLNOTLOCKED 0x2000000000000000ULL -#define INFINIPATH_HWE_PCIESERDESQ0PCLKNOTDETECT 0x0100000000000000ULL -#define INFINIPATH_HWE_PCIESERDESQ1PCLKNOTDETECT 0x0200000000000000ULL -#define INFINIPATH_HWE_PCIESERDESQ2PCLKNOTDETECT 0x0400000000000000ULL -#define INFINIPATH_HWE_PCIESERDESQ3PCLKNOTDETECT 0x0800000000000000ULL -#define INFINIPATH_HWE_DDSRXEQMEMORYPARITYERR 0x0000008000000000ULL -#define INFINIPATH_HWE_IB_UC_MEMORYPARITYERR 0x0000004000000000ULL -#define INFINIPATH_HWE_PCIE_UC_OCT0MEMORYPARITYERR 0x0000001000000000ULL -#define INFINIPATH_HWE_PCIE_UC_OCT1MEMORYPARITYERR 0x0000002000000000ULL - -#define IBA7220_IBCS_LINKTRAININGSTATE_MASK 0x1F -#define IBA7220_IBCS_LINKSTATE_SHIFT 5 -#define IBA7220_IBCS_LINKSPEED_SHIFT 8 -#define IBA7220_IBCS_LINKWIDTH_SHIFT 9 - -#define IBA7220_IBCC_LINKINITCMD_MASK 0x7ULL -#define IBA7220_IBCC_LINKCMD_SHIFT 19 -#define IBA7220_IBCC_MAXPKTLEN_SHIFT 21 - -/* kr_ibcddrctrl bits */ -#define IBA7220_IBC_DLIDLMC_MASK 0xFFFFFFFFUL -#define IBA7220_IBC_DLIDLMC_SHIFT 32 -#define IBA7220_IBC_HRTBT_MASK 3 -#define IBA7220_IBC_HRTBT_SHIFT 16 -#define IBA7220_IBC_HRTBT_ENB 0x10000UL -#define IBA7220_IBC_LANE_REV_SUPPORTED (1<<8) -#define IBA7220_IBC_LREV_MASK 1 -#define IBA7220_IBC_LREV_SHIFT 8 -#define IBA7220_IBC_RXPOL_MASK 1 -#define IBA7220_IBC_RXPOL_SHIFT 7 -#define IBA7220_IBC_WIDTH_SHIFT 5 -#define IBA7220_IBC_WIDTH_MASK 0x3 -#define IBA7220_IBC_WIDTH_1X_ONLY (0<<IBA7220_IBC_WIDTH_SHIFT) -#define IBA7220_IBC_WIDTH_4X_ONLY (1<<IBA7220_IBC_WIDTH_SHIFT) -#define IBA7220_IBC_WIDTH_AUTONEG (2<<IBA7220_IBC_WIDTH_SHIFT) -#define IBA7220_IBC_SPEED_AUTONEG (1<<1) -#define IBA7220_IBC_SPEED_SDR (1<<2) -#define IBA7220_IBC_SPEED_DDR (1<<3) -#define IBA7220_IBC_SPEED_AUTONEG_MASK (0x7<<1) -#define IBA7220_IBC_IBTA_1_2_MASK (1) - -/* kr_ibcddrstatus */ -/* link latency shift is 0, don't bother defining */ -#define IBA7220_DDRSTAT_LINKLAT_MASK 0x3ffffff - -/* kr_extstatus bits */ -#define INFINIPATH_EXTS_FREQSEL 0x2 -#define INFINIPATH_EXTS_SERDESSEL 0x4 -#define INFINIPATH_EXTS_MEMBIST_ENDTEST 0x0000000000004000 -#define INFINIPATH_EXTS_MEMBIST_DISABLED 0x0000000000008000 - -/* kr_xgxsconfig bits */ -#define INFINIPATH_XGXS_RESET 0x5ULL -#define INFINIPATH_XGXS_FC_SAFE (1ULL<<63) - -/* kr_rcvpktledcnt */ -#define IBA7220_LEDBLINK_ON_SHIFT 32 /* 4ns period on after packet */ -#define IBA7220_LEDBLINK_OFF_SHIFT 0 /* 4ns period off before next on */ - -#define _IPATH_GPIO_SDA_NUM 1 -#define _IPATH_GPIO_SCL_NUM 0 - -#define IPATH_GPIO_SDA (1ULL << \ - (_IPATH_GPIO_SDA_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT)) -#define IPATH_GPIO_SCL (1ULL << \ - (_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT)) - -#define IBA7220_R_INTRAVAIL_SHIFT 17 -#define IBA7220_R_TAILUPD_SHIFT 35 -#define IBA7220_R_PORTCFG_SHIFT 36 - -#define INFINIPATH_JINT_PACKETSHIFT 16 -#define INFINIPATH_JINT_DEFAULT_IDLE_TICKS 0 -#define INFINIPATH_JINT_DEFAULT_MAX_PACKETS 0 - -#define IBA7220_HDRHEAD_PKTINT_SHIFT 32 /* interrupt cnt in upper 32 bits */ - -/* - * the size bits give us 2^N, in KB units. 0 marks as invalid, - * and 7 is reserved. We currently use only 2KB and 4KB - */ -#define IBA7220_TID_SZ_SHIFT 37 /* shift to 3bit size selector */ -#define IBA7220_TID_SZ_2K (1UL<<IBA7220_TID_SZ_SHIFT) /* 2KB */ -#define IBA7220_TID_SZ_4K (2UL<<IBA7220_TID_SZ_SHIFT) /* 4KB */ -#define IBA7220_TID_PA_SHIFT 11U /* TID addr in chip stored w/o low bits */ - -#define IPATH_AUTONEG_TRIES 5 /* sequential retries to negotiate DDR */ - -static char int_type[16] = "auto"; -module_param_string(interrupt_type, int_type, sizeof(int_type), 0444); -MODULE_PARM_DESC(int_type, " interrupt_type=auto|force_msi|force_intx\n"); - -/* packet rate matching delay; chip has support */ -static u8 rate_to_delay[2][2] = { - /* 1x, 4x */ - { 8, 2 }, /* SDR */ - { 4, 1 } /* DDR */ -}; - -/* 7220 specific hardware errors... */ -static const struct ipath_hwerror_msgs ipath_7220_hwerror_msgs[] = { - INFINIPATH_HWE_MSG(PCIEPOISONEDTLP, "PCIe Poisoned TLP"), - INFINIPATH_HWE_MSG(PCIECPLTIMEOUT, "PCIe completion timeout"), - /* - * In practice, it's unlikely wthat we'll see PCIe PLL, or bus - * parity or memory parity error failures, because most likely we - * won't be able to talk to the core of the chip. Nonetheless, we - * might see them, if they are in parts of the PCIe core that aren't - * essential. - */ - INFINIPATH_HWE_MSG(PCIE1PLLFAILED, "PCIePLL1"), - INFINIPATH_HWE_MSG(PCIE0PLLFAILED, "PCIePLL0"), - INFINIPATH_HWE_MSG(PCIEBUSPARITYXTLH, "PCIe XTLH core parity"), - INFINIPATH_HWE_MSG(PCIEBUSPARITYXADM, "PCIe ADM TX core parity"), - INFINIPATH_HWE_MSG(PCIEBUSPARITYRADM, "PCIe ADM RX core parity"), - INFINIPATH_HWE_MSG(RXDSYNCMEMPARITYERR, "Rx Dsync"), - INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"), - INFINIPATH_HWE_MSG(PCIECPLDATAQUEUEERR, "PCIe cpl header queue"), - INFINIPATH_HWE_MSG(PCIECPLHDRQUEUEERR, "PCIe cpl data queue"), - INFINIPATH_HWE_MSG(SDMAMEMREADERR, "Send DMA memory read"), - INFINIPATH_HWE_MSG(CLK_UC_PLLNOTLOCKED, "uC PLL clock not locked"), - INFINIPATH_HWE_MSG(PCIESERDESQ0PCLKNOTDETECT, - "PCIe serdes Q0 no clock"), - INFINIPATH_HWE_MSG(PCIESERDESQ1PCLKNOTDETECT, - "PCIe serdes Q1 no clock"), - INFINIPATH_HWE_MSG(PCIESERDESQ2PCLKNOTDETECT, - "PCIe serdes Q2 no clock"), - INFINIPATH_HWE_MSG(PCIESERDESQ3PCLKNOTDETECT, - "PCIe serdes Q3 no clock"), - INFINIPATH_HWE_MSG(DDSRXEQMEMORYPARITYERR, - "DDS RXEQ memory parity"), - INFINIPATH_HWE_MSG(IB_UC_MEMORYPARITYERR, "IB uC memory parity"), - INFINIPATH_HWE_MSG(PCIE_UC_OCT0MEMORYPARITYERR, - "PCIe uC oct0 memory parity"), - INFINIPATH_HWE_MSG(PCIE_UC_OCT1MEMORYPARITYERR, - "PCIe uC oct1 memory parity"), -}; - -static void autoneg_work(struct work_struct *); - -/* - * the offset is different for different configured port numbers, since - * port0 is fixed in size, but others can vary. Make it a function to - * make the issue more obvious. -*/ -static inline u32 port_egrtid_idx(struct ipath_devdata *dd, unsigned port) -{ - return port ? dd->ipath_p0_rcvegrcnt + - (port-1) * dd->ipath_rcvegrcnt : 0; -} - -static void ipath_7220_txe_recover(struct ipath_devdata *dd) -{ - ++ipath_stats.sps_txeparity; - - dev_info(&dd->pcidev->dev, - "Recovering from TXE PIO parity error\n"); - ipath_disarm_senderrbufs(dd); -} - - -/** - * ipath_7220_handle_hwerrors - display hardware errors. - * @dd: the infinipath device - * @msg: the output buffer - * @msgl: the size of the output buffer - * - * Use same msg buffer as regular errors to avoid excessive stack - * use. Most hardware errors are catastrophic, but for right now, - * we'll print them and continue. We reuse the same message buffer as - * ipath_handle_errors() to avoid excessive stack usage. - */ -static void ipath_7220_handle_hwerrors(struct ipath_devdata *dd, char *msg, - size_t msgl) -{ - ipath_err_t hwerrs; - u32 bits, ctrl; - int isfatal = 0; - char bitsmsg[64]; - int log_idx; - - hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus); - if (!hwerrs) { - /* - * better than printing cofusing messages - * This seems to be related to clearing the crc error, or - * the pll error during init. - */ - ipath_cdbg(VERBOSE, "Called but no hardware errors set\n"); - goto bail; - } else if (hwerrs == ~0ULL) { - ipath_dev_err(dd, "Read of hardware error status failed " - "(all bits set); ignoring\n"); - goto bail; - } - ipath_stats.sps_hwerrs++; - - /* - * Always clear the error status register, except MEMBISTFAIL, - * regardless of whether we continue or stop using the chip. - * We want that set so we know it failed, even across driver reload. - * We'll still ignore it in the hwerrmask. We do this partly for - * diagnostics, but also for support. - */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, - hwerrs&~INFINIPATH_HWE_MEMBISTFAILED); - - hwerrs &= dd->ipath_hwerrmask; - - /* We log some errors to EEPROM, check if we have any of those. */ - for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx) - if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log) - ipath_inc_eeprom_err(dd, log_idx, 1); - /* - * Make sure we get this much out, unless told to be quiet, - * or it's occurred within the last 5 seconds. - */ - if ((hwerrs & ~(dd->ipath_lasthwerror | - ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | - INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) - << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) || - (ipath_debug & __IPATH_VERBDBG)) - dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx " - "(cleared)\n", (unsigned long long) hwerrs); - dd->ipath_lasthwerror |= hwerrs; - - if (hwerrs & ~dd->ipath_hwe_bitsextant) - ipath_dev_err(dd, "hwerror interrupt with unknown errors " - "%llx set\n", (unsigned long long) - (hwerrs & ~dd->ipath_hwe_bitsextant)); - - if (hwerrs & INFINIPATH_HWE_IB_UC_MEMORYPARITYERR) - ipath_sd7220_clr_ibpar(dd); - - ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control); - if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) { - /* - * Parity errors in send memory are recoverable by h/w - * just do housekeeping, exit freeze mode and continue. - */ - if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | - INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) - << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) { - ipath_7220_txe_recover(dd); - hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | - INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) - << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT); - } - if (hwerrs) { - /* - * If any set that we aren't ignoring only make the - * complaint once, in case it's stuck or recurring, - * and we get here multiple times - * Force link down, so switch knows, and - * LEDs are turned off. - */ - if (dd->ipath_flags & IPATH_INITTED) { - ipath_set_linkstate(dd, IPATH_IB_LINKDOWN); - ipath_setup_7220_setextled(dd, - INFINIPATH_IBCS_L_STATE_DOWN, - INFINIPATH_IBCS_LT_STATE_DISABLED); - ipath_dev_err(dd, "Fatal Hardware Error " - "(freeze mode), no longer" - " usable, SN %.16s\n", - dd->ipath_serial); - isfatal = 1; - } - /* - * Mark as having had an error for driver, and also - * for /sys and status word mapped to user programs. - * This marks unit as not usable, until reset. - */ - *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; - *dd->ipath_statusp |= IPATH_STATUS_HWERROR; - dd->ipath_flags &= ~IPATH_INITTED; - } else { - ipath_dbg("Clearing freezemode on ignored or " - "recovered hardware error\n"); - ipath_clear_freeze(dd); - } - } - - *msg = '\0'; - - if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) { - strlcat(msg, "[Memory BIST test failed, " - "InfiniPath hardware unusable]", msgl); - /* ignore from now on, so disable until driver reloaded */ - *dd->ipath_statusp |= IPATH_STATUS_HWERROR; - dd->ipath_hwerrmask &= ~INFINIPATH_HWE_MEMBISTFAILED; - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, - dd->ipath_hwerrmask); - } - - ipath_format_hwerrors(hwerrs, - ipath_7220_hwerror_msgs, - ARRAY_SIZE(ipath_7220_hwerror_msgs), - msg, msgl); - - if (hwerrs & (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK - << INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT)) { - bits = (u32) ((hwerrs >> - INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT) & - INFINIPATH_HWE_PCIEMEMPARITYERR_MASK); - snprintf(bitsmsg, sizeof bitsmsg, - "[PCIe Mem Parity Errs %x] ", bits); - strlcat(msg, bitsmsg, msgl); - } - -#define _IPATH_PLL_FAIL (INFINIPATH_HWE_COREPLL_FBSLIP | \ - INFINIPATH_HWE_COREPLL_RFSLIP) - - if (hwerrs & _IPATH_PLL_FAIL) { - snprintf(bitsmsg, sizeof bitsmsg, - "[PLL failed (%llx), InfiniPath hardware unusable]", - (unsigned long long) hwerrs & _IPATH_PLL_FAIL); - strlcat(msg, bitsmsg, msgl); - /* ignore from now on, so disable until driver reloaded */ - dd->ipath_hwerrmask &= ~(hwerrs & _IPATH_PLL_FAIL); - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, - dd->ipath_hwerrmask); - } - - if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED) { - /* - * If it occurs, it is left masked since the eternal - * interface is unused. - */ - dd->ipath_hwerrmask &= ~INFINIPATH_HWE_SERDESPLLFAILED; - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, - dd->ipath_hwerrmask); - } - - ipath_dev_err(dd, "%s hardware error\n", msg); - /* - * For /sys status file. if no trailing } is copied, we'll - * know it was truncated. - */ - if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) - snprintf(dd->ipath_freezemsg, dd->ipath_freezelen, - "{%s}", msg); -bail:; -} - -/** - * ipath_7220_boardname - fill in the board name - * @dd: the infinipath device - * @name: the output buffer - * @namelen: the size of the output buffer - * - * info is based on the board revision register - */ -static int ipath_7220_boardname(struct ipath_devdata *dd, char *name, - size_t namelen) -{ - char *n = NULL; - u8 boardrev = dd->ipath_boardrev; - int ret; - - if (boardrev == 15) { - /* - * Emulator sometimes comes up all-ones, rather than zero. - */ - boardrev = 0; - dd->ipath_boardrev = boardrev; - } - switch (boardrev) { - case 0: - n = "InfiniPath_7220_Emulation"; - break; - case 1: - n = "InfiniPath_QLE7240"; - break; - case 2: - n = "InfiniPath_QLE7280"; - break; - case 3: - n = "InfiniPath_QLE7242"; - break; - case 4: - n = "InfiniPath_QEM7240"; - break; - case 5: - n = "InfiniPath_QMI7240"; - break; - case 6: - n = "InfiniPath_QMI7264"; - break; - case 7: - n = "InfiniPath_QMH7240"; - break; - case 8: - n = "InfiniPath_QME7240"; - break; - case 9: - n = "InfiniPath_QLE7250"; - break; - case 10: - n = "InfiniPath_QLE7290"; - break; - case 11: - n = "InfiniPath_QEM7250"; - break; - case 12: - n = "InfiniPath_QLE-Bringup"; - break; - default: - ipath_dev_err(dd, - "Don't yet know about board with ID %u\n", - boardrev); - snprintf(name, namelen, "Unknown_InfiniPath_PCIe_%u", - boardrev); - break; - } - if (n) - snprintf(name, namelen, "%s", n); - - if (dd->ipath_majrev != 5 || !dd->ipath_minrev || - dd->ipath_minrev > 2) { - ipath_dev_err(dd, "Unsupported InfiniPath hardware " - "revision %u.%u!\n", - dd->ipath_majrev, dd->ipath_minrev); - ret = 1; - } else if (dd->ipath_minrev == 1 && - !(dd->ipath_flags & IPATH_INITTED)) { - /* Rev1 chips are prototype. Complain at init, but allow use */ - ipath_dev_err(dd, "Unsupported hardware " - "revision %u.%u, Contact support@qlogic.com\n", - dd->ipath_majrev, dd->ipath_minrev); - ret = 0; - } else - ret = 0; - - /* - * Set here not in ipath_init_*_funcs because we have to do - * it after we can read chip registers. - */ - dd->ipath_ureg_align = 0x10000; /* 64KB alignment */ - - return ret; -} - -/** - * ipath_7220_init_hwerrors - enable hardware errors - * @dd: the infinipath device - * - * now that we have finished initializing everything that might reasonably - * cause a hardware error, and cleared those errors bits as they occur, - * we can enable hardware errors in the mask (potentially enabling - * freeze mode), and enable hardware errors as errors (along with - * everything else) in errormask - */ -static void ipath_7220_init_hwerrors(struct ipath_devdata *dd) -{ - ipath_err_t val; - u64 extsval; - - extsval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus); - - if (!(extsval & (INFINIPATH_EXTS_MEMBIST_ENDTEST | - INFINIPATH_EXTS_MEMBIST_DISABLED))) - ipath_dev_err(dd, "MemBIST did not complete!\n"); - if (extsval & INFINIPATH_EXTS_MEMBIST_DISABLED) - dev_info(&dd->pcidev->dev, "MemBIST is disabled.\n"); - - val = ~0ULL; /* barring bugs, all hwerrors become interrupts, */ - - if (!dd->ipath_boardrev) /* no PLL for Emulator */ - val &= ~INFINIPATH_HWE_SERDESPLLFAILED; - - if (dd->ipath_minrev == 1) - val &= ~(1ULL << 42); /* TXE LaunchFIFO Parity rev1 issue */ - - val &= ~INFINIPATH_HWE_IB_UC_MEMORYPARITYERR; - dd->ipath_hwerrmask = val; - - /* - * special trigger "error" is for debugging purposes. It - * works around a processor/chipset problem. The error - * interrupt allows us to count occurrences, but we don't - * want to pay the overhead for normal use. Emulation only - */ - if (!dd->ipath_boardrev) - dd->ipath_maskederrs = INFINIPATH_E_SENDSPECIALTRIGGER; -} - -/* - * All detailed interaction with the SerDes has been moved to ipath_sd7220.c - * - * The portion of IBA7220-specific bringup_serdes() that actually deals with - * registers and memory within the SerDes itself is ipath_sd7220_init(). - */ - -/** - * ipath_7220_bringup_serdes - bring up the serdes - * @dd: the infinipath device - */ -static int ipath_7220_bringup_serdes(struct ipath_devdata *dd) -{ - int ret = 0; - u64 val, prev_val, guid; - int was_reset; /* Note whether uC was reset */ - - ipath_dbg("Trying to bringup serdes\n"); - - if (ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus) & - INFINIPATH_HWE_SERDESPLLFAILED) { - ipath_dbg("At start, serdes PLL failed bit set " - "in hwerrstatus, clearing and continuing\n"); - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, - INFINIPATH_HWE_SERDESPLLFAILED); - } - - if (!dd->ipath_ibcddrctrl) { - /* not on re-init after reset */ - dd->ipath_ibcddrctrl = - ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcddrctrl); - - if (dd->ipath_link_speed_enabled == - (IPATH_IB_SDR | IPATH_IB_DDR)) - dd->ipath_ibcddrctrl |= - IBA7220_IBC_SPEED_AUTONEG_MASK | - IBA7220_IBC_IBTA_1_2_MASK; - else - dd->ipath_ibcddrctrl |= - dd->ipath_link_speed_enabled == IPATH_IB_DDR - ? IBA7220_IBC_SPEED_DDR : - IBA7220_IBC_SPEED_SDR; - if ((dd->ipath_link_width_enabled & (IB_WIDTH_1X | - IB_WIDTH_4X)) == (IB_WIDTH_1X | IB_WIDTH_4X)) - dd->ipath_ibcddrctrl |= IBA7220_IBC_WIDTH_AUTONEG; - else - dd->ipath_ibcddrctrl |= - dd->ipath_link_width_enabled == IB_WIDTH_4X - ? IBA7220_IBC_WIDTH_4X_ONLY : - IBA7220_IBC_WIDTH_1X_ONLY; - - /* always enable these on driver reload, not sticky */ - dd->ipath_ibcddrctrl |= - IBA7220_IBC_RXPOL_MASK << IBA7220_IBC_RXPOL_SHIFT; - dd->ipath_ibcddrctrl |= - IBA7220_IBC_HRTBT_MASK << IBA7220_IBC_HRTBT_SHIFT; - /* - * automatic lane reversal detection for receive - * doesn't work correctly in rev 1, so disable it - * on that rev, otherwise enable (disabling not - * sticky across reload for >rev1) - */ - if (dd->ipath_minrev == 1) - dd->ipath_ibcddrctrl &= - ~IBA7220_IBC_LANE_REV_SUPPORTED; - else - dd->ipath_ibcddrctrl |= - IBA7220_IBC_LANE_REV_SUPPORTED; - } - - ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcddrctrl, - dd->ipath_ibcddrctrl); - - ipath_write_kreg(dd, IPATH_KREG_OFFSET(IBNCModeCtrl), 0Ull); - - /* IBA7220 has SERDES MPU reset in D0 of what _was_ IBPLLCfg */ - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibserdesctrl); - /* remember if uC was in Reset or not, for dactrim */ - was_reset = (val & 1); - ipath_cdbg(VERBOSE, "IBReset %s xgxsconfig %llx\n", - was_reset ? "Asserted" : "Negated", (unsigned long long) - ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig)); - - if (dd->ipath_boardrev) { - /* - * Hardware is not emulator, and may have been reset. Init it. - * Below will release reset, but needs to know if chip was - * originally in reset, to only trim DACs on first time - * after chip reset or powercycle (not driver reload) - */ - ret = ipath_sd7220_init(dd, was_reset); - } - - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); - prev_val = val; - val |= INFINIPATH_XGXS_FC_SAFE; - if (val != prev_val) { - ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); - ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch); - } - if (val & INFINIPATH_XGXS_RESET) - val &= ~INFINIPATH_XGXS_RESET; - if (val != prev_val) - ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); - - ipath_cdbg(VERBOSE, "done: xgxs=%llx from %llx\n", - (unsigned long long) - ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig), - prev_val); - - guid = be64_to_cpu(dd->ipath_guid); - - if (!guid) { - /* have to have something, so use likely unique tsc */ - guid = get_cycles(); - ipath_dbg("No GUID for heartbeat, faking %llx\n", - (unsigned long long)guid); - } else - ipath_cdbg(VERBOSE, "Wrote %llX to HRTBT_GUID\n", guid); - ipath_write_kreg(dd, dd->ipath_kregs->kr_hrtbt_guid, guid); - return ret; -} - -static void ipath_7220_config_jint(struct ipath_devdata *dd, - u16 idle_ticks, u16 max_packets) -{ - - /* - * We can request a receive interrupt for 1 or more packets - * from current offset. - */ - if (idle_ticks == 0 || max_packets == 0) - /* interrupt after one packet if no mitigation */ - dd->ipath_rhdrhead_intr_off = - 1ULL << IBA7220_HDRHEAD_PKTINT_SHIFT; - else - /* Turn off RcvHdrHead interrupts if using mitigation */ - dd->ipath_rhdrhead_intr_off = 0ULL; - - /* refresh kernel RcvHdrHead registers... */ - ipath_write_ureg(dd, ur_rcvhdrhead, - dd->ipath_rhdrhead_intr_off | - dd->ipath_pd[0]->port_head, 0); - - dd->ipath_jint_max_packets = max_packets; - dd->ipath_jint_idle_ticks = idle_ticks; - ipath_write_kreg(dd, dd->ipath_kregs->kr_jintreload, - ((u64) max_packets << INFINIPATH_JINT_PACKETSHIFT) | - idle_ticks); -} - -/** - * ipath_7220_quiet_serdes - set serdes to txidle - * @dd: the infinipath device - * Called when driver is being unloaded - */ -static void ipath_7220_quiet_serdes(struct ipath_devdata *dd) -{ - u64 val; - dd->ipath_flags &= ~IPATH_IB_AUTONEG_INPROG; - wake_up(&dd->ipath_autoneg_wait); - cancel_delayed_work(&dd->ipath_autoneg_work); - flush_scheduled_work(); - ipath_shutdown_relock_poll(dd); - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); - val |= INFINIPATH_XGXS_RESET; - ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); -} - -static int ipath_7220_intconfig(struct ipath_devdata *dd) -{ - ipath_7220_config_jint(dd, dd->ipath_jint_idle_ticks, - dd->ipath_jint_max_packets); - return 0; -} - -/** - * ipath_setup_7220_setextled - set the state of the two external LEDs - * @dd: the infinipath device - * @lst: the L state - * @ltst: the LT state - * - * These LEDs indicate the physical and logical state of IB link. - * For this chip (at least with recommended board pinouts), LED1 - * is Yellow (logical state) and LED2 is Green (physical state), - * - * Note: We try to match the Mellanox HCA LED behavior as best - * we can. Green indicates physical link state is OK (something is - * plugged in, and we can train). - * Amber indicates the link is logically up (ACTIVE). - * Mellanox further blinks the amber LED to indicate data packet - * activity, but we have no hardware support for that, so it would - * require waking up every 10-20 msecs and checking the counters - * on the chip, and then turning the LED off if appropriate. That's - * visible overhead, so not something we will do. - * - */ -static void ipath_setup_7220_setextled(struct ipath_devdata *dd, u64 lst, - u64 ltst) -{ - u64 extctl, ledblink = 0; - unsigned long flags = 0; - - /* the diags use the LED to indicate diag info, so we leave - * the external LED alone when the diags are running */ - if (ipath_diag_inuse) - return; - - /* Allow override of LED display for, e.g. Locating system in rack */ - if (dd->ipath_led_override) { - ltst = (dd->ipath_led_override & IPATH_LED_PHYS) - ? INFINIPATH_IBCS_LT_STATE_LINKUP - : INFINIPATH_IBCS_LT_STATE_DISABLED; - lst = (dd->ipath_led_override & IPATH_LED_LOG) - ? INFINIPATH_IBCS_L_STATE_ACTIVE - : INFINIPATH_IBCS_L_STATE_DOWN; - } - - spin_lock_irqsave(&dd->ipath_gpio_lock, flags); - extctl = dd->ipath_extctrl & ~(INFINIPATH_EXTC_LED1PRIPORT_ON | - INFINIPATH_EXTC_LED2PRIPORT_ON); - if (ltst == INFINIPATH_IBCS_LT_STATE_LINKUP) { - extctl |= INFINIPATH_EXTC_LED1PRIPORT_ON; - /* - * counts are in chip clock (4ns) periods. - * This is 1/16 sec (66.6ms) on, - * 3/16 sec (187.5 ms) off, with packets rcvd - */ - ledblink = ((66600*1000UL/4) << IBA7220_LEDBLINK_ON_SHIFT) - | ((187500*1000UL/4) << IBA7220_LEDBLINK_OFF_SHIFT); - } - if (lst == INFINIPATH_IBCS_L_STATE_ACTIVE) - extctl |= INFINIPATH_EXTC_LED2PRIPORT_ON; - dd->ipath_extctrl = extctl; - ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl); - spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags); - - if (ledblink) /* blink the LED on packet receive */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvpktledcnt, - ledblink); -} - -/* - * Similar to pci_intx(pdev, 1), except that we make sure - * msi is off... - */ -static void ipath_enable_intx(struct pci_dev *pdev) -{ - u16 cw, new; - int pos; - - /* first, turn on INTx */ - pci_read_config_word(pdev, PCI_COMMAND, &cw); - new = cw & ~PCI_COMMAND_INTX_DISABLE; - if (new != cw) - pci_write_config_word(pdev, PCI_COMMAND, new); - - /* then turn off MSI */ - pos = pci_find_capability(pdev, PCI_CAP_ID_MSI); - if (pos) { - pci_read_config_word(pdev, pos + PCI_MSI_FLAGS, &cw); - new = cw & ~PCI_MSI_FLAGS_ENABLE; - if (new != cw) - pci_write_config_word(pdev, pos + PCI_MSI_FLAGS, new); - } -} - -static int ipath_msi_enabled(struct pci_dev *pdev) -{ - int pos, ret = 0; - - pos = pci_find_capability(pdev, PCI_CAP_ID_MSI); - if (pos) { - u16 cw; - - pci_read_config_word(pdev, pos + PCI_MSI_FLAGS, &cw); - ret = !!(cw & PCI_MSI_FLAGS_ENABLE); - } - return ret; -} - -/* - * disable msi interrupt if enabled, and clear the flag. - * flag is used primarily for the fallback to INTx, but - * is also used in reinit after reset as a flag. - */ -static void ipath_7220_nomsi(struct ipath_devdata *dd) -{ - dd->ipath_msi_lo = 0; - - if (ipath_msi_enabled(dd->pcidev)) { - /* - * free, but don't zero; later kernels require - * it be freed before disable_msi, so the intx - * setup has to request it again. - */ - if (dd->ipath_irq) - free_irq(dd->ipath_irq, dd); - pci_disable_msi(dd->pcidev); - } -} - -/* - * ipath_setup_7220_cleanup - clean up any per-chip chip-specific stuff - * @dd: the infinipath device - * - * Nothing but msi interrupt cleanup for now. - * - * This is called during driver unload. - */ -static void ipath_setup_7220_cleanup(struct ipath_devdata *dd) -{ - ipath_7220_nomsi(dd); -} - - -static void ipath_7220_pcie_params(struct ipath_devdata *dd, u32 boardrev) -{ - u16 linkstat, minwidth, speed; - int pos; - - pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_EXP); - if (!pos) { - ipath_dev_err(dd, "Can't find PCI Express capability!\n"); - goto bail; - } - - pci_read_config_word(dd->pcidev, pos + PCI_EXP_LNKSTA, - &linkstat); - /* - * speed is bits 0-4, linkwidth is bits 4-8 - * no defines for them in headers - */ - speed = linkstat & 0xf; - linkstat >>= 4; - linkstat &= 0x1f; - dd->ipath_lbus_width = linkstat; - switch (boardrev) { - case 0: - case 2: - case 10: - case 12: - minwidth = 16; /* x16 capable boards */ - break; - default: - minwidth = 8; /* x8 capable boards */ - break; - } - - switch (speed) { - case 1: - dd->ipath_lbus_speed = 2500; /* Gen1, 2.5GHz */ - break; - case 2: - dd->ipath_lbus_speed = 5000; /* Gen1, 5GHz */ - break; - default: /* not defined, assume gen1 */ - dd->ipath_lbus_speed = 2500; - break; - } - - if (linkstat < minwidth) - ipath_dev_err(dd, - "PCIe width %u (x%u HCA), performance " - "reduced\n", linkstat, minwidth); - else - ipath_cdbg(VERBOSE, "PCIe speed %u width %u (x%u HCA)\n", - dd->ipath_lbus_speed, linkstat, minwidth); - - if (speed != 1) - ipath_dev_err(dd, - "PCIe linkspeed %u is incorrect; " - "should be 1 (2500)!\n", speed); - -bail: - /* fill in string, even on errors */ - snprintf(dd->ipath_lbus_info, sizeof(dd->ipath_lbus_info), - "PCIe,%uMHz,x%u\n", - dd->ipath_lbus_speed, - dd->ipath_lbus_width); - return; -} - - -/** - * ipath_setup_7220_config - setup PCIe config related stuff - * @dd: the infinipath device - * @pdev: the PCI device - * - * The pci_enable_msi() call will fail on systems with MSI quirks - * such as those with AMD8131, even if the device of interest is not - * attached to that device, (in the 2.6.13 - 2.6.15 kernels, at least, fixed - * late in 2.6.16). - * All that can be done is to edit the kernel source to remove the quirk - * check until that is fixed. - * We do not need to call enable_msi() for our HyperTransport chip, - * even though it uses MSI, and we want to avoid the quirk warning, so - * So we call enable_msi only for PCIe. If we do end up needing - * pci_enable_msi at some point in the future for HT, we'll move the - * call back into the main init_one code. - * We save the msi lo and hi values, so we can restore them after - * chip reset (the kernel PCI infrastructure doesn't yet handle that - * correctly). - */ -static int ipath_setup_7220_config(struct ipath_devdata *dd, - struct pci_dev *pdev) -{ - int pos, ret = -1; - u32 boardrev; - - dd->ipath_msi_lo = 0; /* used as a flag during reset processing */ - - pos = pci_find_capability(pdev, PCI_CAP_ID_MSI); - if (!strcmp(int_type, "force_msi") || !strcmp(int_type, "auto")) - ret = pci_enable_msi(pdev); - if (ret) { - if (!strcmp(int_type, "force_msi")) { - ipath_dev_err(dd, "pci_enable_msi failed: %d, " - "force_msi is on, so not continuing.\n", - ret); - return ret; - } - - ipath_enable_intx(pdev); - if (!strcmp(int_type, "auto")) - ipath_dev_err(dd, "pci_enable_msi failed: %d, " - "falling back to INTx\n", ret); - } else if (pos) { - u16 control; - pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_LO, - &dd->ipath_msi_lo); - pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_HI, - &dd->ipath_msi_hi); - pci_read_config_word(pdev, pos + PCI_MSI_FLAGS, - &control); - /* now save the data (vector) info */ - pci_read_config_word(pdev, - pos + ((control & PCI_MSI_FLAGS_64BIT) - ? PCI_MSI_DATA_64 : - PCI_MSI_DATA_32), - &dd->ipath_msi_data); - } else - ipath_dev_err(dd, "Can't find MSI capability, " - "can't save MSI settings for reset\n"); - - dd->ipath_irq = pdev->irq; - - /* - * We save the cachelinesize also, although it doesn't - * really matter. - */ - pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, - &dd->ipath_pci_cacheline); - - /* - * this function called early, ipath_boardrev not set yet. Can't - * use ipath_read_kreg64() yet, too early in init, so use readq() - */ - boardrev = (readq(&dd->ipath_kregbase[dd->ipath_kregs->kr_revision]) - >> INFINIPATH_R_BOARDID_SHIFT) & INFINIPATH_R_BOARDID_MASK; - - ipath_7220_pcie_params(dd, boardrev); - - dd->ipath_flags |= IPATH_NODMA_RTAIL | IPATH_HAS_SEND_DMA | - IPATH_HAS_PBC_CNT | IPATH_HAS_THRESH_UPDATE; - dd->ipath_pioupd_thresh = 4U; /* set default update threshold */ - return 0; -} - -static void ipath_init_7220_variables(struct ipath_devdata *dd) -{ - /* - * setup the register offsets, since they are different for each - * chip - */ - dd->ipath_kregs = &ipath_7220_kregs; - dd->ipath_cregs = &ipath_7220_cregs; - - /* - * bits for selecting i2c direction and values, - * used for I2C serial flash - */ - dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM; - dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM; - dd->ipath_gpio_sda = IPATH_GPIO_SDA; - dd->ipath_gpio_scl = IPATH_GPIO_SCL; - - /* - * Fill in data for field-values that change in IBA7220. - * We dynamically specify only the mask for LINKTRAININGSTATE - * and only the shift for LINKSTATE, as they are the only ones - * that change. Also precalculate the 3 link states of interest - * and the combined mask. - */ - dd->ibcs_ls_shift = IBA7220_IBCS_LINKSTATE_SHIFT; - dd->ibcs_lts_mask = IBA7220_IBCS_LINKTRAININGSTATE_MASK; - dd->ibcs_mask = (INFINIPATH_IBCS_LINKSTATE_MASK << - dd->ibcs_ls_shift) | dd->ibcs_lts_mask; - dd->ib_init = (INFINIPATH_IBCS_LT_STATE_LINKUP << - INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | - (INFINIPATH_IBCS_L_STATE_INIT << dd->ibcs_ls_shift); - dd->ib_arm = (INFINIPATH_IBCS_LT_STATE_LINKUP << - INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | - (INFINIPATH_IBCS_L_STATE_ARM << dd->ibcs_ls_shift); - dd->ib_active = (INFINIPATH_IBCS_LT_STATE_LINKUP << - INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | - (INFINIPATH_IBCS_L_STATE_ACTIVE << dd->ibcs_ls_shift); - - /* - * Fill in data for ibcc field-values that change in IBA7220. - * We dynamically specify only the mask for LINKINITCMD - * and only the shift for LINKCMD and MAXPKTLEN, as they are - * the only ones that change. - */ - dd->ibcc_lic_mask = IBA7220_IBCC_LINKINITCMD_MASK; - dd->ibcc_lc_shift = IBA7220_IBCC_LINKCMD_SHIFT; - dd->ibcc_mpl_shift = IBA7220_IBCC_MAXPKTLEN_SHIFT; - - /* Fill in shifts for RcvCtrl. */ - dd->ipath_r_portenable_shift = INFINIPATH_R_PORTENABLE_SHIFT; - dd->ipath_r_intravail_shift = IBA7220_R_INTRAVAIL_SHIFT; - dd->ipath_r_tailupd_shift = IBA7220_R_TAILUPD_SHIFT; - dd->ipath_r_portcfg_shift = IBA7220_R_PORTCFG_SHIFT; - - /* variables for sanity checking interrupt and errors */ - dd->ipath_hwe_bitsextant = - (INFINIPATH_HWE_RXEMEMPARITYERR_MASK << - INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) | - (INFINIPATH_HWE_TXEMEMPARITYERR_MASK << - INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) | - (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK << - INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT) | - INFINIPATH_HWE_PCIE1PLLFAILED | - INFINIPATH_HWE_PCIE0PLLFAILED | - INFINIPATH_HWE_PCIEPOISONEDTLP | - INFINIPATH_HWE_PCIECPLTIMEOUT | - INFINIPATH_HWE_PCIEBUSPARITYXTLH | - INFINIPATH_HWE_PCIEBUSPARITYXADM | - INFINIPATH_HWE_PCIEBUSPARITYRADM | - INFINIPATH_HWE_MEMBISTFAILED | - INFINIPATH_HWE_COREPLL_FBSLIP | - INFINIPATH_HWE_COREPLL_RFSLIP | - INFINIPATH_HWE_SERDESPLLFAILED | - INFINIPATH_HWE_IBCBUSTOSPCPARITYERR | - INFINIPATH_HWE_IBCBUSFRSPCPARITYERR | - INFINIPATH_HWE_PCIECPLDATAQUEUEERR | - INFINIPATH_HWE_PCIECPLHDRQUEUEERR | - INFINIPATH_HWE_SDMAMEMREADERR | - INFINIPATH_HWE_CLK_UC_PLLNOTLOCKED | - INFINIPATH_HWE_PCIESERDESQ0PCLKNOTDETECT | - INFINIPATH_HWE_PCIESERDESQ1PCLKNOTDETECT | - INFINIPATH_HWE_PCIESERDESQ2PCLKNOTDETECT | - INFINIPATH_HWE_PCIESERDESQ3PCLKNOTDETECT | - INFINIPATH_HWE_DDSRXEQMEMORYPARITYERR | - INFINIPATH_HWE_IB_UC_MEMORYPARITYERR | - INFINIPATH_HWE_PCIE_UC_OCT0MEMORYPARITYERR | - INFINIPATH_HWE_PCIE_UC_OCT1MEMORYPARITYERR; - dd->ipath_i_bitsextant = - INFINIPATH_I_SDMAINT | INFINIPATH_I_SDMADISABLED | - (INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) | - (INFINIPATH_I_RCVAVAIL_MASK << - INFINIPATH_I_RCVAVAIL_SHIFT) | - INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT | - INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO | - INFINIPATH_I_JINT | INFINIPATH_I_SERDESTRIMDONE; - dd->ipath_e_bitsextant = - INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC | - INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN | - INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN | - INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RUNEXPCHAR | - INFINIPATH_E_RUNSUPVL | INFINIPATH_E_REBP | - INFINIPATH_E_RIBFLOW | INFINIPATH_E_RBADVERSION | - INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL | - INFINIPATH_E_RBADTID | INFINIPATH_E_RHDRLEN | - INFINIPATH_E_RHDR | INFINIPATH_E_RIBLOSTLINK | - INFINIPATH_E_SENDSPECIALTRIGGER | - INFINIPATH_E_SDMADISABLED | INFINIPATH_E_SMINPKTLEN | - INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SUNDERRUN | - INFINIPATH_E_SPKTLEN | INFINIPATH_E_SDROPPEDSMPPKT | - INFINIPATH_E_SDROPPEDDATAPKT | - INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM | - INFINIPATH_E_SUNSUPVL | INFINIPATH_E_SENDBUFMISUSE | - INFINIPATH_E_SDMAGENMISMATCH | INFINIPATH_E_SDMAOUTOFBOUND | - INFINIPATH_E_SDMATAILOUTOFBOUND | INFINIPATH_E_SDMABASE | - INFINIPATH_E_SDMA1STDESC | INFINIPATH_E_SDMARPYTAG | - INFINIPATH_E_SDMADWEN | INFINIPATH_E_SDMAMISSINGDW | - INFINIPATH_E_SDMAUNEXPDATA | - INFINIPATH_E_IBSTATUSCHANGED | INFINIPATH_E_INVALIDADDR | - INFINIPATH_E_RESET | INFINIPATH_E_HARDWARE | - INFINIPATH_E_SDMADESCADDRMISALIGN | - INFINIPATH_E_INVALIDEEPCMD; - - dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; - dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; - dd->ipath_i_rcvavail_shift = INFINIPATH_I_RCVAVAIL_SHIFT; - dd->ipath_i_rcvurg_shift = INFINIPATH_I_RCVURG_SHIFT; - dd->ipath_flags |= IPATH_INTREG_64 | IPATH_HAS_MULT_IB_SPEED - | IPATH_HAS_LINK_LATENCY; - - /* - * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity. - * 2 is Some Misc, 3 is reserved for future. - */ - dd->ipath_eep_st_masks[0].hwerrs_to_log = - INFINIPATH_HWE_TXEMEMPARITYERR_MASK << - INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT; - - dd->ipath_eep_st_masks[1].hwerrs_to_log = - INFINIPATH_HWE_RXEMEMPARITYERR_MASK << - INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT; - - dd->ipath_eep_st_masks[2].errs_to_log = INFINIPATH_E_RESET; - - ipath_linkrecovery = 0; - - init_waitqueue_head(&dd->ipath_autoneg_wait); - INIT_DELAYED_WORK(&dd->ipath_autoneg_work, autoneg_work); - - dd->ipath_link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X; - dd->ipath_link_speed_supported = IPATH_IB_SDR | IPATH_IB_DDR; - - dd->ipath_link_width_enabled = dd->ipath_link_width_supported; - dd->ipath_link_speed_enabled = dd->ipath_link_speed_supported; - /* - * set the initial values to reasonable default, will be set - * for real when link is up. - */ - dd->ipath_link_width_active = IB_WIDTH_4X; - dd->ipath_link_speed_active = IPATH_IB_SDR; - dd->delay_mult = rate_to_delay[0][1]; -} - - -/* - * Setup the MSI stuff again after a reset. I'd like to just call - * pci_enable_msi() and request_irq() again, but when I do that, - * the MSI enable bit doesn't get set in the command word, and - * we switch to to a different interrupt vector, which is confusing, - * so I instead just do it all inline. Perhaps somehow can tie this - * into the PCIe hotplug support at some point - * Note, because I'm doing it all here, I don't call pci_disable_msi() - * or free_irq() at the start of ipath_setup_7220_reset(). - */ -static int ipath_reinit_msi(struct ipath_devdata *dd) -{ - int ret = 0; - - int pos; - u16 control; - if (!dd->ipath_msi_lo) /* Using intX, or init problem */ - goto bail; - - pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI); - if (!pos) { - ipath_dev_err(dd, "Can't find MSI capability, " - "can't restore MSI settings\n"); - goto bail; - } - ipath_cdbg(VERBOSE, "Writing msi_lo 0x%x to config offset 0x%x\n", - dd->ipath_msi_lo, pos + PCI_MSI_ADDRESS_LO); - pci_write_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_LO, - dd->ipath_msi_lo); - ipath_cdbg(VERBOSE, "Writing msi_lo 0x%x to config offset 0x%x\n", - dd->ipath_msi_hi, pos + PCI_MSI_ADDRESS_HI); - pci_write_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_HI, - dd->ipath_msi_hi); - pci_read_config_word(dd->pcidev, pos + PCI_MSI_FLAGS, &control); - if (!(control & PCI_MSI_FLAGS_ENABLE)) { - ipath_cdbg(VERBOSE, "MSI control at off %x was %x, " - "setting MSI enable (%x)\n", pos + PCI_MSI_FLAGS, - control, control | PCI_MSI_FLAGS_ENABLE); - control |= PCI_MSI_FLAGS_ENABLE; - pci_write_config_word(dd->pcidev, pos + PCI_MSI_FLAGS, - control); - } - /* now rewrite the data (vector) info */ - pci_write_config_word(dd->pcidev, pos + - ((control & PCI_MSI_FLAGS_64BIT) ? 12 : 8), - dd->ipath_msi_data); - ret = 1; - -bail: - if (!ret) { - ipath_dbg("Using INTx, MSI disabled or not configured\n"); - ipath_enable_intx(dd->pcidev); - ret = 1; - } - /* - * We restore the cachelinesize also, although it doesn't really - * matter. - */ - pci_write_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE, - dd->ipath_pci_cacheline); - /* and now set the pci master bit again */ - pci_set_master(dd->pcidev); - - return ret; -} - -/* - * This routine sleeps, so it can only be called from user context, not - * from interrupt context. If we need interrupt context, we can split - * it into two routines. - */ -static int ipath_setup_7220_reset(struct ipath_devdata *dd) -{ - u64 val; - int i; - int ret; - u16 cmdval; - - pci_read_config_word(dd->pcidev, PCI_COMMAND, &cmdval); - - /* Use dev_err so it shows up in logs, etc. */ - ipath_dev_err(dd, "Resetting InfiniPath unit %u\n", dd->ipath_unit); - - /* keep chip from being accessed in a few places */ - dd->ipath_flags &= ~(IPATH_INITTED | IPATH_PRESENT); - val = dd->ipath_control | INFINIPATH_C_RESET; - ipath_write_kreg(dd, dd->ipath_kregs->kr_control, val); - mb(); - - for (i = 1; i <= 5; i++) { - int r; - - /* - * Allow MBIST, etc. to complete; longer on each retry. - * We sometimes get machine checks from bus timeout if no - * response, so for now, make it *really* long. - */ - msleep(1000 + (1 + i) * 2000); - r = pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, - dd->ipath_pcibar0); - if (r) - ipath_dev_err(dd, "rewrite of BAR0 failed: %d\n", r); - r = pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, - dd->ipath_pcibar1); - if (r) - ipath_dev_err(dd, "rewrite of BAR1 failed: %d\n", r); - /* now re-enable memory access */ - pci_write_config_word(dd->pcidev, PCI_COMMAND, cmdval); - r = pci_enable_device(dd->pcidev); - if (r) - ipath_dev_err(dd, "pci_enable_device failed after " - "reset: %d\n", r); - /* - * whether it fully enabled or not, mark as present, - * again (but not INITTED) - */ - dd->ipath_flags |= IPATH_PRESENT; - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_revision); - if (val == dd->ipath_revision) { - ipath_cdbg(VERBOSE, "Got matching revision " - "register %llx on try %d\n", - (unsigned long long) val, i); - ret = ipath_reinit_msi(dd); - goto bail; - } - /* Probably getting -1 back */ - ipath_dbg("Didn't get expected revision register, " - "got %llx, try %d\n", (unsigned long long) val, - i + 1); - } - ret = 0; /* failed */ - -bail: - if (ret) - ipath_7220_pcie_params(dd, dd->ipath_boardrev); - - return ret; -} - -/** - * ipath_7220_put_tid - write a TID to the chip - * @dd: the infinipath device - * @tidptr: pointer to the expected TID (in chip) to udpate - * @tidtype: 0 for eager, 1 for expected - * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing - * - * This exists as a separate routine to allow for selection of the - * appropriate "flavor". The static calls in cleanup just use the - * revision-agnostic form, as they are not performance critical. - */ -static void ipath_7220_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr, - u32 type, unsigned long pa) -{ - if (pa != dd->ipath_tidinvalid) { - u64 chippa = pa >> IBA7220_TID_PA_SHIFT; - - /* paranoia checks */ - if (pa != (chippa << IBA7220_TID_PA_SHIFT)) { - dev_info(&dd->pcidev->dev, "BUG: physaddr %lx " - "not 2KB aligned!\n", pa); - return; - } - if (pa >= (1UL << IBA7220_TID_SZ_SHIFT)) { - ipath_dev_err(dd, - "BUG: Physical page address 0x%lx " - "larger than supported\n", pa); - return; - } - - if (type == RCVHQ_RCV_TYPE_EAGER) - chippa |= dd->ipath_tidtemplate; - else /* for now, always full 4KB page */ - chippa |= IBA7220_TID_SZ_4K; - writeq(chippa, tidptr); - } else - writeq(pa, tidptr); - mmiowb(); -} - -/** - * ipath_7220_clear_tid - clear all TID entries for a port, expected and eager - * @dd: the infinipath device - * @port: the port - * - * clear all TID entries for a port, expected and eager. - * Used from ipath_close(). On this chip, TIDs are only 32 bits, - * not 64, but they are still on 64 bit boundaries, so tidbase - * is declared as u64 * for the pointer math, even though we write 32 bits - */ -static void ipath_7220_clear_tids(struct ipath_devdata *dd, unsigned port) -{ - u64 __iomem *tidbase; - unsigned long tidinv; - int i; - - if (!dd->ipath_kregbase) - return; - - ipath_cdbg(VERBOSE, "Invalidate TIDs for port %u\n", port); - - tidinv = dd->ipath_tidinvalid; - tidbase = (u64 __iomem *) - ((char __iomem *)(dd->ipath_kregbase) + - dd->ipath_rcvtidbase + - port * dd->ipath_rcvtidcnt * sizeof(*tidbase)); - - for (i = 0; i < dd->ipath_rcvtidcnt; i++) - ipath_7220_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED, - tidinv); - - tidbase = (u64 __iomem *) - ((char __iomem *)(dd->ipath_kregbase) + - dd->ipath_rcvegrbase + port_egrtid_idx(dd, port) - * sizeof(*tidbase)); - - for (i = port ? dd->ipath_rcvegrcnt : dd->ipath_p0_rcvegrcnt; i; i--) - ipath_7220_put_tid(dd, &tidbase[i-1], RCVHQ_RCV_TYPE_EAGER, - tidinv); -} - -/** - * ipath_7220_tidtemplate - setup constants for TID updates - * @dd: the infinipath device - * - * We setup stuff that we use a lot, to avoid calculating each time - */ -static void ipath_7220_tidtemplate(struct ipath_devdata *dd) -{ - /* For now, we always allocate 4KB buffers (at init) so we can - * receive max size packets. We may want a module parameter to - * specify 2KB or 4KB and/or make be per port instead of per device - * for those who want to reduce memory footprint. Note that the - * ipath_rcvhdrentsize size must be large enough to hold the largest - * IB header (currently 96 bytes) that we expect to handle (plus of - * course the 2 dwords of RHF). - */ - if (dd->ipath_rcvegrbufsize == 2048) - dd->ipath_tidtemplate = IBA7220_TID_SZ_2K; - else if (dd->ipath_rcvegrbufsize == 4096) - dd->ipath_tidtemplate = IBA7220_TID_SZ_4K; - else { - dev_info(&dd->pcidev->dev, "BUG: unsupported egrbufsize " - "%u, using %u\n", dd->ipath_rcvegrbufsize, - 4096); - dd->ipath_tidtemplate = IBA7220_TID_SZ_4K; - } - dd->ipath_tidinvalid = 0; -} - -static int ipath_7220_early_init(struct ipath_devdata *dd) -{ - u32 i, s; - - if (strcmp(int_type, "auto") && - strcmp(int_type, "force_msi") && - strcmp(int_type, "force_intx")) { - ipath_dev_err(dd, "Invalid interrupt_type: '%s', expecting " - "auto, force_msi or force_intx\n", int_type); - return -EINVAL; - } - - /* - * Control[4] has been added to change the arbitration within - * the SDMA engine between favoring data fetches over descriptor - * fetches. ipath_sdma_fetch_arb==0 gives data fetches priority. - */ - if (ipath_sdma_fetch_arb && (dd->ipath_minrev > 1)) - dd->ipath_control |= 1<<4; - - dd->ipath_flags |= IPATH_4BYTE_TID; - - /* - * For openfabrics, we need to be able to handle an IB header of - * 24 dwords. HT chip has arbitrary sized receive buffers, so we - * made them the same size as the PIO buffers. This chip does not - * handle arbitrary size buffers, so we need the header large enough - * to handle largest IB header, but still have room for a 2KB MTU - * standard IB packet. - */ - dd->ipath_rcvhdrentsize = 24; - dd->ipath_rcvhdrsize = IPATH_DFLT_RCVHDRSIZE; - dd->ipath_rhf_offset = - dd->ipath_rcvhdrentsize - sizeof(u64) / sizeof(u32); - - dd->ipath_rcvegrbufsize = ipath_mtu4096 ? 4096 : 2048; - /* - * the min() check here is currently a nop, but it may not always - * be, depending on just how we do ipath_rcvegrbufsize - */ - dd->ipath_ibmaxlen = min(ipath_mtu4096 ? dd->ipath_piosize4k : - dd->ipath_piosize2k, - dd->ipath_rcvegrbufsize + - (dd->ipath_rcvhdrentsize << 2)); - dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen; - - ipath_7220_config_jint(dd, INFINIPATH_JINT_DEFAULT_IDLE_TICKS, - INFINIPATH_JINT_DEFAULT_MAX_PACKETS); - - if (dd->ipath_boardrev) /* no eeprom on emulator */ - ipath_get_eeprom_info(dd); - - /* start of code to check and print procmon */ - s = ipath_read_kreg32(dd, IPATH_KREG_OFFSET(ProcMon)); - s &= ~(1U<<31); /* clear done bit */ - s |= 1U<<14; /* clear counter (write 1 to clear) */ - ipath_write_kreg(dd, IPATH_KREG_OFFSET(ProcMon), s); - /* make sure clear_counter low long enough before start */ - ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch); - ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch); - - s &= ~(1U<<14); /* allow counter to count (before starting) */ - ipath_write_kreg(dd, IPATH_KREG_OFFSET(ProcMon), s); - ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch); - ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch); - s = ipath_read_kreg32(dd, IPATH_KREG_OFFSET(ProcMon)); - - s |= 1U<<15; /* start the counter */ - s &= ~(1U<<31); /* clear done bit */ - s &= ~0x7ffU; /* clear frequency bits */ - s |= 0xe29; /* set frequency bits, in case cleared */ - ipath_write_kreg(dd, IPATH_KREG_OFFSET(ProcMon), s); - - s = 0; - for (i = 500; i > 0 && !(s&(1ULL<<31)); i--) { - ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch); - s = ipath_read_kreg32(dd, IPATH_KREG_OFFSET(ProcMon)); - } - if (!(s&(1U<<31))) - ipath_dev_err(dd, "ProcMon register not valid: 0x%x\n", s); - else - ipath_dbg("ProcMon=0x%x, count=0x%x\n", s, (s>>16)&0x1ff); - - return 0; -} - -/** - * ipath_init_7220_get_base_info - set chip-specific flags for user code - * @pd: the infinipath port - * @kbase: ipath_base_info pointer - * - * We set the PCIE flag because the lower bandwidth on PCIe vs - * HyperTransport can affect some user packet algorithims. - */ -static int ipath_7220_get_base_info(struct ipath_portdata *pd, void *kbase) -{ - struct ipath_base_info *kinfo = kbase; - - kinfo->spi_runtime_flags |= - IPATH_RUNTIME_PCIE | IPATH_RUNTIME_NODMA_RTAIL | - IPATH_RUNTIME_SDMA; - - return 0; -} - -static void ipath_7220_free_irq(struct ipath_devdata *dd) -{ - free_irq(dd->ipath_irq, dd); - dd->ipath_irq = 0; -} - -static struct ipath_message_header * -ipath_7220_get_msgheader(struct ipath_devdata *dd, __le32 *rhf_addr) -{ - u32 offset = ipath_hdrget_offset(rhf_addr); - - return (struct ipath_message_header *) - (rhf_addr - dd->ipath_rhf_offset + offset); -} - -static void ipath_7220_config_ports(struct ipath_devdata *dd, ushort cfgports) -{ - u32 nchipports; - - nchipports = ipath_read_kreg32(dd, dd->ipath_kregs->kr_portcnt); - if (!cfgports) { - int ncpus = num_online_cpus(); - - if (ncpus <= 4) - dd->ipath_portcnt = 5; - else if (ncpus <= 8) - dd->ipath_portcnt = 9; - if (dd->ipath_portcnt) - ipath_dbg("Auto-configured for %u ports, %d cpus " - "online\n", dd->ipath_portcnt, ncpus); - } else if (cfgports <= nchipports) - dd->ipath_portcnt = cfgports; - if (!dd->ipath_portcnt) /* none of the above, set to max */ - dd->ipath_portcnt = nchipports; - /* - * chip can be configured for 5, 9, or 17 ports, and choice - * affects number of eager TIDs per port (1K, 2K, 4K). - */ - if (dd->ipath_portcnt > 9) - dd->ipath_rcvctrl |= 2ULL << IBA7220_R_PORTCFG_SHIFT; - else if (dd->ipath_portcnt > 5) - dd->ipath_rcvctrl |= 1ULL << IBA7220_R_PORTCFG_SHIFT; - /* else configure for default 5 receive ports */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, - dd->ipath_rcvctrl); - dd->ipath_p0_rcvegrcnt = 2048; /* always */ - if (dd->ipath_flags & IPATH_HAS_SEND_DMA) - dd->ipath_pioreserved = 3; /* kpiobufs used for PIO */ -} - - -static int ipath_7220_get_ib_cfg(struct ipath_devdata *dd, int which) -{ - int lsb, ret = 0; - u64 maskr; /* right-justified mask */ - - switch (which) { - case IPATH_IB_CFG_HRTBT: /* Get Heartbeat off/enable/auto */ - lsb = IBA7220_IBC_HRTBT_SHIFT; - maskr = IBA7220_IBC_HRTBT_MASK; - break; - - case IPATH_IB_CFG_LWID_ENB: /* Get allowed Link-width */ - ret = dd->ipath_link_width_enabled; - goto done; - - case IPATH_IB_CFG_LWID: /* Get currently active Link-width */ - ret = dd->ipath_link_width_active; - goto done; - - case IPATH_IB_CFG_SPD_ENB: /* Get allowed Link speeds */ - ret = dd->ipath_link_speed_enabled; - goto done; - - case IPATH_IB_CFG_SPD: /* Get current Link spd */ - ret = dd->ipath_link_speed_active; - goto done; - - case IPATH_IB_CFG_RXPOL_ENB: /* Get Auto-RX-polarity enable */ - lsb = IBA7220_IBC_RXPOL_SHIFT; - maskr = IBA7220_IBC_RXPOL_MASK; - break; - - case IPATH_IB_CFG_LREV_ENB: /* Get Auto-Lane-reversal enable */ - lsb = IBA7220_IBC_LREV_SHIFT; - maskr = IBA7220_IBC_LREV_MASK; - break; - - case IPATH_IB_CFG_LINKLATENCY: - ret = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcddrstatus) - & IBA7220_DDRSTAT_LINKLAT_MASK; - goto done; - - default: - ret = -ENOTSUPP; - goto done; - } - ret = (int)((dd->ipath_ibcddrctrl >> lsb) & maskr); -done: - return ret; -} - -static int ipath_7220_set_ib_cfg(struct ipath_devdata *dd, int which, u32 val) -{ - int lsb, ret = 0, setforce = 0; - u64 maskr; /* right-justified mask */ - - switch (which) { - case IPATH_IB_CFG_LIDLMC: - /* - * Set LID and LMC. Combined to avoid possible hazard - * caller puts LMC in 16MSbits, DLID in 16LSbits of val - */ - lsb = IBA7220_IBC_DLIDLMC_SHIFT; - maskr = IBA7220_IBC_DLIDLMC_MASK; - break; - - case IPATH_IB_CFG_HRTBT: /* set Heartbeat off/enable/auto */ - if (val & IPATH_IB_HRTBT_ON && - (dd->ipath_flags & IPATH_NO_HRTBT)) - goto bail; - lsb = IBA7220_IBC_HRTBT_SHIFT; - maskr = IBA7220_IBC_HRTBT_MASK; - break; - - case IPATH_IB_CFG_LWID_ENB: /* set allowed Link-width */ - /* - * As with speed, only write the actual register if - * the link is currently down, otherwise takes effect - * on next link change. - */ - dd->ipath_link_width_enabled = val; - if ((dd->ipath_flags & (IPATH_LINKDOWN|IPATH_LINKINIT)) != - IPATH_LINKDOWN) - goto bail; - /* - * We set the IPATH_IB_FORCE_NOTIFY bit so updown - * will get called because we want update - * link_width_active, and the change may not take - * effect for some time (if we are in POLL), so this - * flag will force the updown routine to be called - * on the next ibstatuschange down interrupt, even - * if it's not an down->up transition. - */ - val--; /* convert from IB to chip */ - maskr = IBA7220_IBC_WIDTH_MASK; - lsb = IBA7220_IBC_WIDTH_SHIFT; - setforce = 1; - dd->ipath_flags |= IPATH_IB_FORCE_NOTIFY; - break; - - case IPATH_IB_CFG_SPD_ENB: /* set allowed Link speeds */ - /* - * If we turn off IB1.2, need to preset SerDes defaults, - * but not right now. Set a flag for the next time - * we command the link down. As with width, only write the - * actual register if the link is currently down, otherwise - * takes effect on next link change. Since setting is being - * explictly requested (via MAD or sysfs), clear autoneg - * failure status if speed autoneg is enabled. - */ - dd->ipath_link_speed_enabled = val; - if (dd->ipath_ibcddrctrl & IBA7220_IBC_IBTA_1_2_MASK && - !(val & (val - 1))) - dd->ipath_presets_needed = 1; - if ((dd->ipath_flags & (IPATH_LINKDOWN|IPATH_LINKINIT)) != - IPATH_LINKDOWN) - goto bail; - /* - * We set the IPATH_IB_FORCE_NOTIFY bit so updown - * will get called because we want update - * link_speed_active, and the change may not take - * effect for some time (if we are in POLL), so this - * flag will force the updown routine to be called - * on the next ibstatuschange down interrupt, even - * if it's not an down->up transition. When setting - * speed autoneg, clear AUTONEG_FAILED. - */ - if (val == (IPATH_IB_SDR | IPATH_IB_DDR)) { - val = IBA7220_IBC_SPEED_AUTONEG_MASK | - IBA7220_IBC_IBTA_1_2_MASK; - dd->ipath_flags &= ~IPATH_IB_AUTONEG_FAILED; - } else - val = val == IPATH_IB_DDR ? IBA7220_IBC_SPEED_DDR - : IBA7220_IBC_SPEED_SDR; - maskr = IBA7220_IBC_SPEED_AUTONEG_MASK | - IBA7220_IBC_IBTA_1_2_MASK; - lsb = 0; /* speed bits are low bits */ - setforce = 1; - break; - - case IPATH_IB_CFG_RXPOL_ENB: /* set Auto-RX-polarity enable */ - lsb = IBA7220_IBC_RXPOL_SHIFT; - maskr = IBA7220_IBC_RXPOL_MASK; - break; - - case IPATH_IB_CFG_LREV_ENB: /* set Auto-Lane-reversal enable */ - lsb = IBA7220_IBC_LREV_SHIFT; - maskr = IBA7220_IBC_LREV_MASK; - break; - - default: - ret = -ENOTSUPP; - goto bail; - } - dd->ipath_ibcddrctrl &= ~(maskr << lsb); - dd->ipath_ibcddrctrl |= (((u64) val & maskr) << lsb); - ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcddrctrl, - dd->ipath_ibcddrctrl); - if (setforce) - dd->ipath_flags |= IPATH_IB_FORCE_NOTIFY; -bail: - return ret; -} - -static void ipath_7220_read_counters(struct ipath_devdata *dd, - struct infinipath_counters *cntrs) -{ - u64 *counters = (u64 *) cntrs; - int i; - - for (i = 0; i < sizeof(*cntrs) / sizeof(u64); i++) - counters[i] = ipath_snap_cntr(dd, i); -} - -/* if we are using MSI, try to fallback to INTx */ -static int ipath_7220_intr_fallback(struct ipath_devdata *dd) -{ - if (dd->ipath_msi_lo) { - dev_info(&dd->pcidev->dev, "MSI interrupt not detected," - " trying INTx interrupts\n"); - ipath_7220_nomsi(dd); - ipath_enable_intx(dd->pcidev); - /* - * some newer kernels require free_irq before disable_msi, - * and irq can be changed during disable and intx enable - * and we need to therefore use the pcidev->irq value, - * not our saved MSI value. - */ - dd->ipath_irq = dd->pcidev->irq; - if (request_irq(dd->ipath_irq, ipath_intr, IRQF_SHARED, - IPATH_DRV_NAME, dd)) - ipath_dev_err(dd, - "Could not re-request_irq for INTx\n"); - return 1; - } - return 0; -} - -/* - * reset the XGXS (between serdes and IBC). Slightly less intrusive - * than resetting the IBC or external link state, and useful in some - * cases to cause some retraining. To do this right, we reset IBC - * as well. - */ -static void ipath_7220_xgxs_reset(struct ipath_devdata *dd) -{ - u64 val, prev_val; - - prev_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); - val = prev_val | INFINIPATH_XGXS_RESET; - prev_val &= ~INFINIPATH_XGXS_RESET; /* be sure */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_control, - dd->ipath_control & ~INFINIPATH_C_LINKENABLE); - ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); - ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch); - ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, prev_val); - ipath_write_kreg(dd, dd->ipath_kregs->kr_control, - dd->ipath_control); -} - - -/* Still needs cleanup, too much hardwired stuff */ -static void autoneg_send(struct ipath_devdata *dd, - u32 *hdr, u32 dcnt, u32 *data) -{ - int i; - u64 cnt; - u32 __iomem *piobuf; - u32 pnum; - - i = 0; - cnt = 7 + dcnt + 1; /* 7 dword header, dword data, icrc */ - while (!(piobuf = ipath_getpiobuf(dd, cnt, &pnum))) { - if (i++ > 15) { - ipath_dbg("Couldn't get pio buffer for send\n"); - return; - } - udelay(2); - } - if (dd->ipath_flags&IPATH_HAS_PBC_CNT) - cnt |= 0x80000000UL<<32; /* mark as VL15 */ - writeq(cnt, piobuf); - ipath_flush_wc(); - __iowrite32_copy(piobuf + 2, hdr, 7); - __iowrite32_copy(piobuf + 9, data, dcnt); - ipath_flush_wc(); -} - -/* - * _start packet gets sent twice at start, _done gets sent twice at end - */ -static void ipath_autoneg_send(struct ipath_devdata *dd, int which) -{ - static u32 swapped; - u32 dw, i, hcnt, dcnt, *data; - static u32 hdr[7] = { 0xf002ffff, 0x48ffff, 0x6400abba }; - static u32 madpayload_start[0x40] = { - 0x1810103, 0x1, 0x0, 0x0, 0x2c90000, 0x2c9, 0x0, 0x0, - 0xffffffff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x1, 0x1388, 0x15e, 0x1, /* rest 0's */ - }; - static u32 madpayload_done[0x40] = { - 0x1810103, 0x1, 0x0, 0x0, 0x2c90000, 0x2c9, 0x0, 0x0, - 0xffffffff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x40000001, 0x1388, 0x15e, /* rest 0's */ - }; - dcnt = ARRAY_SIZE(madpayload_start); - hcnt = ARRAY_SIZE(hdr); - if (!swapped) { - /* for maintainability, do it at runtime */ - for (i = 0; i < hcnt; i++) { - dw = (__force u32) cpu_to_be32(hdr[i]); - hdr[i] = dw; - } - for (i = 0; i < dcnt; i++) { - dw = (__force u32) cpu_to_be32(madpayload_start[i]); - madpayload_start[i] = dw; - dw = (__force u32) cpu_to_be32(madpayload_done[i]); - madpayload_done[i] = dw; - } - swapped = 1; - } - - data = which ? madpayload_done : madpayload_start; - ipath_cdbg(PKT, "Sending %s special MADs\n", which?"done":"start"); - - autoneg_send(dd, hdr, dcnt, data); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - udelay(2); - autoneg_send(dd, hdr, dcnt, data); - ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); - udelay(2); -} - - - -/* - * Do the absolute minimum to cause an IB speed change, and make it - * ready, but don't actually trigger the change. The caller will - * do that when ready (if link is in Polling training state, it will - * happen immediately, otherwise when link next goes down) - * - * This routine should only be used as part of the DDR autonegotation - * code for devices that are not compliant with IB 1.2 (or code that - * fixes things up for same). - * - * When link has gone down, and autoneg enabled, or autoneg has - * failed and we give up until next time we set both speeds, and - * then we want IBTA enabled as well as "use max enabled speed. - */ -static void set_speed_fast(struct ipath_devdata *dd, u32 speed) -{ - dd->ipath_ibcddrctrl &= ~(IBA7220_IBC_SPEED_AUTONEG_MASK | - IBA7220_IBC_IBTA_1_2_MASK | - (IBA7220_IBC_WIDTH_MASK << IBA7220_IBC_WIDTH_SHIFT)); - - if (speed == (IPATH_IB_SDR | IPATH_IB_DDR)) - dd->ipath_ibcddrctrl |= IBA7220_IBC_SPEED_AUTONEG_MASK | - IBA7220_IBC_IBTA_1_2_MASK; - else - dd->ipath_ibcddrctrl |= speed == IPATH_IB_DDR ? - IBA7220_IBC_SPEED_DDR : IBA7220_IBC_SPEED_SDR; - - /* - * Convert from IB-style 1 = 1x, 2 = 4x, 3 = auto - * to chip-centric 0 = 1x, 1 = 4x, 2 = auto - */ - dd->ipath_ibcddrctrl |= (u64)(dd->ipath_link_width_enabled - 1) << - IBA7220_IBC_WIDTH_SHIFT; - ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcddrctrl, - dd->ipath_ibcddrctrl); - ipath_cdbg(VERBOSE, "setup for IB speed (%x) done\n", speed); -} - - -/* - * this routine is only used when we are not talking to another - * IB 1.2-compliant device that we think can do DDR. - * (This includes all existing switch chips as of Oct 2007.) - * 1.2-compliant devices go directly to DDR prior to reaching INIT - */ -static void try_auto_neg(struct ipath_devdata *dd) -{ - /* - * required for older non-IB1.2 DDR switches. Newer - * non-IB-compliant switches don't need it, but so far, - * aren't bothered by it either. "Magic constant" - */ - ipath_write_kreg(dd, IPATH_KREG_OFFSET(IBNCModeCtrl), - 0x3b9dc07); - dd->ipath_flags |= IPATH_IB_AUTONEG_INPROG; - ipath_autoneg_send(dd, 0); - set_speed_fast(dd, IPATH_IB_DDR); - ipath_toggle_rclkrls(dd); - /* 2 msec is minimum length of a poll cycle */ - schedule_delayed_work(&dd->ipath_autoneg_work, - msecs_to_jiffies(2)); -} - - -static int ipath_7220_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs) -{ - int ret = 0; - u32 ltstate = ipath_ib_linkstate(dd, ibcs); - - dd->ipath_link_width_active = - ((ibcs >> IBA7220_IBCS_LINKWIDTH_SHIFT) & 1) ? - IB_WIDTH_4X : IB_WIDTH_1X; - dd->ipath_link_speed_active = - ((ibcs >> IBA7220_IBCS_LINKSPEED_SHIFT) & 1) ? - IPATH_IB_DDR : IPATH_IB_SDR; - - if (!ibup) { - /* - * when link goes down we don't want aeq running, so it - * won't't interfere with IBC training, etc., and we need - * to go back to the static SerDes preset values - */ - if (dd->ipath_x1_fix_tries && - ltstate <= INFINIPATH_IBCS_LT_STATE_SLEEPQUIET && - ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP) - dd->ipath_x1_fix_tries = 0; - if (!(dd->ipath_flags & (IPATH_IB_AUTONEG_FAILED | - IPATH_IB_AUTONEG_INPROG))) - set_speed_fast(dd, dd->ipath_link_speed_enabled); - if (!(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG)) { - ipath_cdbg(VERBOSE, "Setting RXEQ defaults\n"); - ipath_sd7220_presets(dd); - } - /* this might better in ipath_sd7220_presets() */ - ipath_set_relock_poll(dd, ibup); - } else { - if (ipath_compat_ddr_negotiate && - !(dd->ipath_flags & (IPATH_IB_AUTONEG_FAILED | - IPATH_IB_AUTONEG_INPROG)) && - dd->ipath_link_speed_active == IPATH_IB_SDR && - (dd->ipath_link_speed_enabled & - (IPATH_IB_DDR | IPATH_IB_SDR)) == - (IPATH_IB_DDR | IPATH_IB_SDR) && - dd->ipath_autoneg_tries < IPATH_AUTONEG_TRIES) { - /* we are SDR, and DDR auto-negotiation enabled */ - ++dd->ipath_autoneg_tries; - ipath_dbg("DDR negotiation try, %u/%u\n", - dd->ipath_autoneg_tries, - IPATH_AUTONEG_TRIES); - try_auto_neg(dd); - ret = 1; /* no other IB status change processing */ - } else if ((dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) - && dd->ipath_link_speed_active == IPATH_IB_SDR) { - ipath_autoneg_send(dd, 1); - set_speed_fast(dd, IPATH_IB_DDR); - udelay(2); - ipath_toggle_rclkrls(dd); - ret = 1; /* no other IB status change processing */ - } else { - if ((dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) && - (dd->ipath_link_speed_active & IPATH_IB_DDR)) { - ipath_dbg("Got to INIT with DDR autoneg\n"); - dd->ipath_flags &= ~(IPATH_IB_AUTONEG_INPROG - | IPATH_IB_AUTONEG_FAILED); - dd->ipath_autoneg_tries = 0; - /* re-enable SDR, for next link down */ - set_speed_fast(dd, - dd->ipath_link_speed_enabled); - wake_up(&dd->ipath_autoneg_wait); - } else if (dd->ipath_flags & IPATH_IB_AUTONEG_FAILED) { - /* - * clear autoneg failure flag, and do setup - * so we'll try next time link goes down and - * back to INIT (possibly connected to different - * device). - */ - ipath_dbg("INIT %sDR after autoneg failure\n", - (dd->ipath_link_speed_active & - IPATH_IB_DDR) ? "D" : "S"); - dd->ipath_flags &= ~IPATH_IB_AUTONEG_FAILED; - dd->ipath_ibcddrctrl |= - IBA7220_IBC_IBTA_1_2_MASK; - ipath_write_kreg(dd, - IPATH_KREG_OFFSET(IBNCModeCtrl), 0); - } - } - /* - * if we are in 1X, and are in autoneg width, it - * could be due to an xgxs problem, so if we haven't - * already tried, try twice to get to 4X; if we - * tried, and couldn't, report it, since it will - * probably not be what is desired. - */ - if ((dd->ipath_link_width_enabled & (IB_WIDTH_1X | - IB_WIDTH_4X)) == (IB_WIDTH_1X | IB_WIDTH_4X) - && dd->ipath_link_width_active == IB_WIDTH_1X - && dd->ipath_x1_fix_tries < 3) { - if (++dd->ipath_x1_fix_tries == 3) - dev_info(&dd->pcidev->dev, - "IB link is in 1X mode\n"); - else { - ipath_cdbg(VERBOSE, "IB 1X in " - "auto-width, try %u to be " - "sure it's really 1X; " - "ltstate %u\n", - dd->ipath_x1_fix_tries, - ltstate); - dd->ipath_f_xgxs_reset(dd); - ret = 1; /* skip other processing */ - } - } - - if (!ret) { - dd->delay_mult = rate_to_delay - [(ibcs >> IBA7220_IBCS_LINKSPEED_SHIFT) & 1] - [(ibcs >> IBA7220_IBCS_LINKWIDTH_SHIFT) & 1]; - - ipath_set_relock_poll(dd, ibup); - } - } - - if (!ret) - ipath_setup_7220_setextled(dd, ipath_ib_linkstate(dd, ibcs), - ltstate); - return ret; -} - - -/* - * Handle the empirically determined mechanism for auto-negotiation - * of DDR speed with switches. - */ -static void autoneg_work(struct work_struct *work) -{ - struct ipath_devdata *dd; - u64 startms; - u32 lastlts, i; - - dd = container_of(work, struct ipath_devdata, - ipath_autoneg_work.work); - - startms = jiffies_to_msecs(jiffies); - - /* - * busy wait for this first part, it should be at most a - * few hundred usec, since we scheduled ourselves for 2msec. - */ - for (i = 0; i < 25; i++) { - lastlts = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat); - if (lastlts == INFINIPATH_IBCS_LT_STATE_POLLQUIET) { - ipath_set_linkstate(dd, IPATH_IB_LINKDOWN_DISABLE); - break; - } - udelay(100); - } - - if (!(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG)) - goto done; /* we got there early or told to stop */ - - /* we expect this to timeout */ - if (wait_event_timeout(dd->ipath_autoneg_wait, - !(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG), - msecs_to_jiffies(90))) - goto done; - - ipath_toggle_rclkrls(dd); - - /* we expect this to timeout */ - if (wait_event_timeout(dd->ipath_autoneg_wait, - !(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG), - msecs_to_jiffies(1700))) - goto done; - - set_speed_fast(dd, IPATH_IB_SDR); - ipath_toggle_rclkrls(dd); - - /* - * wait up to 250 msec for link to train and get to INIT at DDR; - * this should terminate early - */ - wait_event_timeout(dd->ipath_autoneg_wait, - !(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG), - msecs_to_jiffies(250)); -done: - if (dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) { - ipath_dbg("Did not get to DDR INIT (%x) after %Lu msecs\n", - ipath_ib_state(dd, dd->ipath_lastibcstat), - jiffies_to_msecs(jiffies)-startms); - dd->ipath_flags &= ~IPATH_IB_AUTONEG_INPROG; - if (dd->ipath_autoneg_tries == IPATH_AUTONEG_TRIES) { - dd->ipath_flags |= IPATH_IB_AUTONEG_FAILED; - ipath_dbg("Giving up on DDR until next IB " - "link Down\n"); - dd->ipath_autoneg_tries = 0; - } - set_speed_fast(dd, dd->ipath_link_speed_enabled); - } -} - - -/** - * ipath_init_iba7220_funcs - set up the chip-specific function pointers - * @dd: the infinipath device - * - * This is global, and is called directly at init to set up the - * chip-specific function pointers for later use. - */ -void ipath_init_iba7220_funcs(struct ipath_devdata *dd) -{ - dd->ipath_f_intrsetup = ipath_7220_intconfig; - dd->ipath_f_bus = ipath_setup_7220_config; - dd->ipath_f_reset = ipath_setup_7220_reset; - dd->ipath_f_get_boardname = ipath_7220_boardname; - dd->ipath_f_init_hwerrors = ipath_7220_init_hwerrors; - dd->ipath_f_early_init = ipath_7220_early_init; - dd->ipath_f_handle_hwerrors = ipath_7220_handle_hwerrors; - dd->ipath_f_quiet_serdes = ipath_7220_quiet_serdes; - dd->ipath_f_bringup_serdes = ipath_7220_bringup_serdes; - dd->ipath_f_clear_tids = ipath_7220_clear_tids; - dd->ipath_f_put_tid = ipath_7220_put_tid; - dd->ipath_f_cleanup = ipath_setup_7220_cleanup; - dd->ipath_f_setextled = ipath_setup_7220_setextled; - dd->ipath_f_get_base_info = ipath_7220_get_base_info; - dd->ipath_f_free_irq = ipath_7220_free_irq; - dd->ipath_f_tidtemplate = ipath_7220_tidtemplate; - dd->ipath_f_intr_fallback = ipath_7220_intr_fallback; - dd->ipath_f_xgxs_reset = ipath_7220_xgxs_reset; - dd->ipath_f_get_ib_cfg = ipath_7220_get_ib_cfg; - dd->ipath_f_set_ib_cfg = ipath_7220_set_ib_cfg; - dd->ipath_f_config_jint = ipath_7220_config_jint; - dd->ipath_f_config_ports = ipath_7220_config_ports; - dd->ipath_f_read_counters = ipath_7220_read_counters; - dd->ipath_f_get_msgheader = ipath_7220_get_msgheader; - dd->ipath_f_ib_updown = ipath_7220_ib_updown; - - /* initialize chip-specific variables */ - ipath_init_7220_variables(dd); -} diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index 3e5baa43fc8..be2a60e142b 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c @@ -33,6 +33,9 @@ #include <linux/pci.h> #include <linux/netdevice.h> +#include <linux/moduleparam.h> +#include <linux/slab.h> +#include <linux/stat.h> #include <linux/vmalloc.h> #include "ipath_kernel.h" @@ -229,6 +232,7 @@ static int init_chip_first(struct ipath_devdata *dd) spin_lock_init(&dd->ipath_kernel_tid_lock); spin_lock_init(&dd->ipath_user_tid_lock); spin_lock_init(&dd->ipath_sendctrl_lock); + spin_lock_init(&dd->ipath_uctxt_lock); spin_lock_init(&dd->ipath_sdma_lock); spin_lock_init(&dd->ipath_gpio_lock); spin_lock_init(&dd->ipath_eep_st_lock); @@ -333,7 +337,7 @@ done: * @dd: the infinipath device * * sanity check at least some of the values after reset, and - * ensure no receive or transmit (explictly, in case reset + * ensure no receive or transmit (explicitly, in case reset * failed */ static int init_chip_reset(struct ipath_devdata *dd) @@ -440,7 +444,7 @@ static void init_shadow_tids(struct ipath_devdata *dd) struct page **pages; dma_addr_t *addrs; - pages = vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt * + pages = vzalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt * sizeof(struct page *)); if (!pages) { ipath_dev_err(dd, "failed to allocate shadow page * " @@ -454,14 +458,11 @@ static void init_shadow_tids(struct ipath_devdata *dd) if (!addrs) { ipath_dev_err(dd, "failed to allocate shadow dma handle " "array, no expected sends!\n"); - vfree(dd->ipath_pageshadow); + vfree(pages); dd->ipath_pageshadow = NULL; return; } - memset(pages, 0, dd->ipath_cfgports * dd->ipath_rcvtidcnt * - sizeof(struct page *)); - dd->ipath_pageshadow = pages; dd->ipath_physshadow = addrs; } @@ -718,16 +719,6 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) goto done; /* - * we ignore most issues after reporting them, but have to specially - * handle hardware-disabled chips. - */ - if (ret == 2) { - /* unique error, known to ipath_init_one */ - ret = -EPERM; - goto done; - } - - /* * We could bump this to allow for full rcvegrcnt + rcvtidcnt, * but then it no longer nicely fits power of two, and since * we now use routines that backend onto __get_free_pages, the diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index 26900b3b7a4..01ba792791a 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -33,6 +33,7 @@ #include <linux/pci.h> #include <linux/delay.h> +#include <linux/sched.h> #include "ipath_kernel.h" #include "ipath_verbs.h" @@ -69,7 +70,7 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd) if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) { int i; if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG) && - dd->ipath_lastcancel > jiffies) { + time_after(dd->ipath_lastcancel, jiffies)) { __IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG, "SendbufErrs %lx %lx", sbuf[0], sbuf[1]); @@ -208,8 +209,7 @@ void ipath_format_hwerrors(u64 hwerrs, { int i; const int glen = - sizeof(ipath_generic_hwerror_msgs) / - sizeof(ipath_generic_hwerror_msgs[0]); + ARRAY_SIZE(ipath_generic_hwerror_msgs); for (i=0; i<glen; i++) { if (hwerrs & ipath_generic_hwerror_msgs[i].mask) { @@ -356,9 +356,10 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd, dd->ipath_cregs->cr_iblinkerrrecovcnt); if (linkrecov != dd->ipath_lastlinkrecov) { ipath_dbg("IB linkrecov up %Lx (%s %s) recov %Lu\n", - ibcs, ib_linkstate(dd, ibcs), + (unsigned long long) ibcs, + ib_linkstate(dd, ibcs), ipath_ibcstatus_str[ltstate], - linkrecov); + (unsigned long long) linkrecov); /* and no more until active again */ dd->ipath_lastlinkrecov = 0; ipath_set_linkstate(dd, IPATH_IB_LINKDOWN); @@ -754,7 +755,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) /* likely due to cancel; so suppress message unless verbose */ if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) && - dd->ipath_lastcancel > jiffies) { + time_after(dd->ipath_lastcancel, jiffies)) { /* armlaunch takes precedence; it often causes both. */ ipath_cdbg(VERBOSE, "Suppressed %s error (%llx) after sendbuf cancel\n", @@ -1118,9 +1119,11 @@ irqreturn_t ipath_intr(int irq, void *data) if (unlikely(istat & ~dd->ipath_i_bitsextant)) ipath_dev_err(dd, "interrupt with unknown interrupts %Lx set\n", + (unsigned long long) istat & ~dd->ipath_i_bitsextant); else if (istat & ~INFINIPATH_I_ERROR) /* errors do own printing */ - ipath_cdbg(VERBOSE, "intr stat=0x%Lx\n", istat); + ipath_cdbg(VERBOSE, "intr stat=0x%Lx\n", + (unsigned long long) istat); if (istat & INFINIPATH_I_ERROR) { ipath_stats.sps_errints++; @@ -1128,7 +1131,8 @@ irqreturn_t ipath_intr(int irq, void *data) dd->ipath_kregs->kr_errorstatus); if (!estat) dev_info(&dd->pcidev->dev, "error interrupt (%Lx), " - "but no error bits set!\n", istat); + "but no error bits set!\n", + (unsigned long long) istat); else if (estat == -1LL) /* * should we try clearing all, or hope next read diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 0bd8bcb184a..6559af60bff 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -355,6 +355,19 @@ struct ipath_devdata { /* errors masked because they occur too fast */ ipath_err_t ipath_maskederrs; u64 ipath_lastlinkrecov; /* link recoveries at last ACTIVE */ + /* these 5 fields are used to establish deltas for IB Symbol + * errors and linkrecovery errors. They can be reported on + * some chips during link negotiation prior to INIT, and with + * DDR when faking DDR negotiations with non-IBTA switches. + * The chip counters are adjusted at driver unload if there is + * a non-zero delta. + */ + u64 ibdeltainprog; + u64 ibsymdelta; + u64 ibsymsnap; + u64 iblnkerrdelta; + u64 iblnkerrsnap; + /* time in jiffies at which to re-enable maskederrs */ unsigned long ipath_unmasktime; /* count of egrfull errors, combined for all ports */ @@ -464,6 +477,8 @@ struct ipath_devdata { spinlock_t ipath_kernel_tid_lock; spinlock_t ipath_user_tid_lock; spinlock_t ipath_sendctrl_lock; + /* around ipath_pd and (user ports) port_cnt use (intr vs free) */ + spinlock_t ipath_uctxt_lock; /* * IPATH_STATUS_*, @@ -1015,8 +1030,6 @@ void ipath_free_data(struct ipath_portdata *dd); u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32, u32 *); void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start, unsigned len, int avail); -void ipath_init_iba7220_funcs(struct ipath_devdata *); -void ipath_init_iba6120_funcs(struct ipath_devdata *); void ipath_init_iba6110_funcs(struct ipath_devdata *); void ipath_get_eeprom_info(struct ipath_devdata *); int ipath_update_eeprom_log(struct ipath_devdata *dd); @@ -1271,7 +1284,7 @@ struct device_driver; extern const char ib_ipath_version[]; -extern struct attribute_group *ipath_driver_attr_groups[]; +extern const struct attribute_group *ipath_driver_attr_groups[]; int ipath_device_create_group(struct device *, struct ipath_devdata *); void ipath_device_remove_group(struct device *, struct ipath_devdata *); diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c index 8f32b17a5ee..c0e933fec21 100644 --- a/drivers/infiniband/hw/ipath/ipath_keys.c +++ b/drivers/infiniband/hw/ipath/ipath_keys.c @@ -132,6 +132,7 @@ int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge, * (see ipath_get_dma_mr and ipath_dma.c). */ if (sge->lkey == 0) { + /* always a kernel port, no locking needed */ struct ipath_pd *pd = to_ipd(qp->ibqp.pd); if (pd->user) { @@ -211,6 +212,7 @@ int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss, * (see ipath_get_dma_mr and ipath_dma.c). */ if (rkey == 0) { + /* always a kernel port, no locking needed */ struct ipath_pd *pd = to_ipd(qp->ibqp.pd); if (pd->user) { diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c index be4fc9ada8e..43f2d0424d4 100644 --- a/drivers/infiniband/hw/ipath/ipath_mad.c +++ b/drivers/infiniband/hw/ipath/ipath_mad.c @@ -32,15 +32,16 @@ */ #include <rdma/ib_smi.h> +#include <rdma/ib_pma.h> #include "ipath_kernel.h" #include "ipath_verbs.h" #include "ipath_common.h" -#define IB_SMP_UNSUP_VERSION __constant_htons(0x0004) -#define IB_SMP_UNSUP_METHOD __constant_htons(0x0008) -#define IB_SMP_UNSUP_METH_ATTR __constant_htons(0x000C) -#define IB_SMP_INVALID_FIELD __constant_htons(0x001C) +#define IB_SMP_UNSUP_VERSION cpu_to_be16(0x0004) +#define IB_SMP_UNSUP_METHOD cpu_to_be16(0x0008) +#define IB_SMP_UNSUP_METH_ATTR cpu_to_be16(0x000C) +#define IB_SMP_INVALID_FIELD cpu_to_be16(0x001C) static int reply(struct ib_smp *smp) { @@ -60,7 +61,7 @@ static int recv_subn_get_nodedescription(struct ib_smp *smp, if (smp->attr_mod) smp->status |= IB_SMP_INVALID_FIELD; - strncpy(smp->data, ibdev->node_desc, sizeof(smp->data)); + memcpy(smp->data, ibdev->node_desc, sizeof(smp->data)); return reply(smp); } @@ -348,6 +349,7 @@ bail: */ static int get_pkeys(struct ipath_devdata *dd, u16 * pkeys) { + /* always a kernel port, no locking needed */ struct ipath_portdata *pd = dd->ipath_pd[0]; memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys)); @@ -730,6 +732,7 @@ static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys) int i; int changed = 0; + /* always a kernel port, no locking needed */ pd = dd->ipath_pd[0]; for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) { @@ -787,151 +790,18 @@ static int recv_subn_set_pkeytable(struct ib_smp *smp, return recv_subn_get_pkeytable(smp, ibdev); } -#define IB_PMA_CLASS_PORT_INFO __constant_htons(0x0001) -#define IB_PMA_PORT_SAMPLES_CONTROL __constant_htons(0x0010) -#define IB_PMA_PORT_SAMPLES_RESULT __constant_htons(0x0011) -#define IB_PMA_PORT_COUNTERS __constant_htons(0x0012) -#define IB_PMA_PORT_COUNTERS_EXT __constant_htons(0x001D) -#define IB_PMA_PORT_SAMPLES_RESULT_EXT __constant_htons(0x001E) - -struct ib_perf { - u8 base_version; - u8 mgmt_class; - u8 class_version; - u8 method; - __be16 status; - __be16 unused; - __be64 tid; - __be16 attr_id; - __be16 resv; - __be32 attr_mod; - u8 reserved[40]; - u8 data[192]; -} __attribute__ ((packed)); - -struct ib_pma_classportinfo { - u8 base_version; - u8 class_version; - __be16 cap_mask; - u8 reserved[3]; - u8 resp_time_value; /* only lower 5 bits */ - union ib_gid redirect_gid; - __be32 redirect_tc_sl_fl; /* 8, 4, 20 bits respectively */ - __be16 redirect_lid; - __be16 redirect_pkey; - __be32 redirect_qp; /* only lower 24 bits */ - __be32 redirect_qkey; - union ib_gid trap_gid; - __be32 trap_tc_sl_fl; /* 8, 4, 20 bits respectively */ - __be16 trap_lid; - __be16 trap_pkey; - __be32 trap_hl_qp; /* 8, 24 bits respectively */ - __be32 trap_qkey; -} __attribute__ ((packed)); - -struct ib_pma_portsamplescontrol { - u8 opcode; - u8 port_select; - u8 tick; - u8 counter_width; /* only lower 3 bits */ - __be32 counter_mask0_9; /* 2, 10 * 3, bits */ - __be16 counter_mask10_14; /* 1, 5 * 3, bits */ - u8 sample_mechanisms; - u8 sample_status; /* only lower 2 bits */ - __be64 option_mask; - __be64 vendor_mask; - __be32 sample_start; - __be32 sample_interval; - __be16 tag; - __be16 counter_select[15]; -} __attribute__ ((packed)); - -struct ib_pma_portsamplesresult { - __be16 tag; - __be16 sample_status; /* only lower 2 bits */ - __be32 counter[15]; -} __attribute__ ((packed)); - -struct ib_pma_portsamplesresult_ext { - __be16 tag; - __be16 sample_status; /* only lower 2 bits */ - __be32 extended_width; /* only upper 2 bits */ - __be64 counter[15]; -} __attribute__ ((packed)); - -struct ib_pma_portcounters { - u8 reserved; - u8 port_select; - __be16 counter_select; - __be16 symbol_error_counter; - u8 link_error_recovery_counter; - u8 link_downed_counter; - __be16 port_rcv_errors; - __be16 port_rcv_remphys_errors; - __be16 port_rcv_switch_relay_errors; - __be16 port_xmit_discards; - u8 port_xmit_constraint_errors; - u8 port_rcv_constraint_errors; - u8 reserved1; - u8 lli_ebor_errors; /* 4, 4, bits */ - __be16 reserved2; - __be16 vl15_dropped; - __be32 port_xmit_data; - __be32 port_rcv_data; - __be32 port_xmit_packets; - __be32 port_rcv_packets; -} __attribute__ ((packed)); - -#define IB_PMA_SEL_SYMBOL_ERROR __constant_htons(0x0001) -#define IB_PMA_SEL_LINK_ERROR_RECOVERY __constant_htons(0x0002) -#define IB_PMA_SEL_LINK_DOWNED __constant_htons(0x0004) -#define IB_PMA_SEL_PORT_RCV_ERRORS __constant_htons(0x0008) -#define IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS __constant_htons(0x0010) -#define IB_PMA_SEL_PORT_XMIT_DISCARDS __constant_htons(0x0040) -#define IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS __constant_htons(0x0200) -#define IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS __constant_htons(0x0400) -#define IB_PMA_SEL_PORT_VL15_DROPPED __constant_htons(0x0800) -#define IB_PMA_SEL_PORT_XMIT_DATA __constant_htons(0x1000) -#define IB_PMA_SEL_PORT_RCV_DATA __constant_htons(0x2000) -#define IB_PMA_SEL_PORT_XMIT_PACKETS __constant_htons(0x4000) -#define IB_PMA_SEL_PORT_RCV_PACKETS __constant_htons(0x8000) - -struct ib_pma_portcounters_ext { - u8 reserved; - u8 port_select; - __be16 counter_select; - __be32 reserved1; - __be64 port_xmit_data; - __be64 port_rcv_data; - __be64 port_xmit_packets; - __be64 port_rcv_packets; - __be64 port_unicast_xmit_packets; - __be64 port_unicast_rcv_packets; - __be64 port_multicast_xmit_packets; - __be64 port_multicast_rcv_packets; -} __attribute__ ((packed)); - -#define IB_PMA_SELX_PORT_XMIT_DATA __constant_htons(0x0001) -#define IB_PMA_SELX_PORT_RCV_DATA __constant_htons(0x0002) -#define IB_PMA_SELX_PORT_XMIT_PACKETS __constant_htons(0x0004) -#define IB_PMA_SELX_PORT_RCV_PACKETS __constant_htons(0x0008) -#define IB_PMA_SELX_PORT_UNI_XMIT_PACKETS __constant_htons(0x0010) -#define IB_PMA_SELX_PORT_UNI_RCV_PACKETS __constant_htons(0x0020) -#define IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS __constant_htons(0x0040) -#define IB_PMA_SELX_PORT_MULTI_RCV_PACKETS __constant_htons(0x0080) - -static int recv_pma_get_classportinfo(struct ib_perf *pmp) +static int recv_pma_get_classportinfo(struct ib_pma_mad *pmp) { - struct ib_pma_classportinfo *p = - (struct ib_pma_classportinfo *)pmp->data; + struct ib_class_port_info *p = + (struct ib_class_port_info *)pmp->data; memset(pmp->data, 0, sizeof(pmp->data)); - if (pmp->attr_mod != 0) - pmp->status |= IB_SMP_INVALID_FIELD; + if (pmp->mad_hdr.attr_mod != 0) + pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; /* Indicate AllPortSelect is valid (only one port anyway) */ - p->cap_mask = __constant_cpu_to_be16(1 << 8); + p->capability_mask = cpu_to_be16(1 << 8); p->base_version = 1; p->class_version = 1; /* @@ -949,14 +819,13 @@ static int recv_pma_get_classportinfo(struct ib_perf *pmp) * We support 5 counters which only count the mandatory quantities. */ #define COUNTER_MASK(q, n) (q << ((9 - n) * 3)) -#define COUNTER_MASK0_9 \ - __constant_cpu_to_be32(COUNTER_MASK(1, 0) | \ - COUNTER_MASK(1, 1) | \ - COUNTER_MASK(1, 2) | \ - COUNTER_MASK(1, 3) | \ - COUNTER_MASK(1, 4)) - -static int recv_pma_get_portsamplescontrol(struct ib_perf *pmp, +#define COUNTER_MASK0_9 cpu_to_be32(COUNTER_MASK(1, 0) | \ + COUNTER_MASK(1, 1) | \ + COUNTER_MASK(1, 2) | \ + COUNTER_MASK(1, 3) | \ + COUNTER_MASK(1, 4)) + +static int recv_pma_get_portsamplescontrol(struct ib_pma_mad *pmp, struct ib_device *ibdev, u8 port) { struct ib_pma_portsamplescontrol *p = @@ -969,9 +838,9 @@ static int recv_pma_get_portsamplescontrol(struct ib_perf *pmp, memset(pmp->data, 0, sizeof(pmp->data)); p->port_select = port_select; - if (pmp->attr_mod != 0 || + if (pmp->mad_hdr.attr_mod != 0 || (port_select != port && port_select != 0xFF)) - pmp->status |= IB_SMP_INVALID_FIELD; + pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; /* * Ticks are 10x the link transfer period which for 2.5Gbs is 4 * nsec. 0 == 4 nsec., 1 == 8 nsec., ..., 255 == 1020 nsec. Sample @@ -1005,7 +874,7 @@ static int recv_pma_get_portsamplescontrol(struct ib_perf *pmp, return reply((struct ib_smp *) pmp); } -static int recv_pma_set_portsamplescontrol(struct ib_perf *pmp, +static int recv_pma_set_portsamplescontrol(struct ib_pma_mad *pmp, struct ib_device *ibdev, u8 port) { struct ib_pma_portsamplescontrol *p = @@ -1016,9 +885,9 @@ static int recv_pma_set_portsamplescontrol(struct ib_perf *pmp, u8 status; int ret; - if (pmp->attr_mod != 0 || + if (pmp->mad_hdr.attr_mod != 0 || (p->port_select != port && p->port_select != 0xFF)) { - pmp->status |= IB_SMP_INVALID_FIELD; + pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; ret = reply((struct ib_smp *) pmp); goto bail; } @@ -1092,7 +961,7 @@ static u64 get_counter(struct ipath_ibdev *dev, return ret; } -static int recv_pma_get_portsamplesresult(struct ib_perf *pmp, +static int recv_pma_get_portsamplesresult(struct ib_pma_mad *pmp, struct ib_device *ibdev) { struct ib_pma_portsamplesresult *p = @@ -1117,7 +986,7 @@ static int recv_pma_get_portsamplesresult(struct ib_perf *pmp, return reply((struct ib_smp *) pmp); } -static int recv_pma_get_portsamplesresult_ext(struct ib_perf *pmp, +static int recv_pma_get_portsamplesresult_ext(struct ib_pma_mad *pmp, struct ib_device *ibdev) { struct ib_pma_portsamplesresult_ext *p = @@ -1135,7 +1004,7 @@ static int recv_pma_get_portsamplesresult_ext(struct ib_perf *pmp, status = dev->pma_sample_status; p->sample_status = cpu_to_be16(status); /* 64 bits */ - p->extended_width = __constant_cpu_to_be32(0x80000000); + p->extended_width = cpu_to_be32(0x80000000); for (i = 0; i < ARRAY_SIZE(dev->pma_counter_select); i++) p->counter[i] = (status != IB_PMA_SAMPLE_STATUS_DONE) ? 0 : cpu_to_be64( @@ -1144,7 +1013,7 @@ static int recv_pma_get_portsamplesresult_ext(struct ib_perf *pmp, return reply((struct ib_smp *) pmp); } -static int recv_pma_get_portcounters(struct ib_perf *pmp, +static int recv_pma_get_portcounters(struct ib_pma_mad *pmp, struct ib_device *ibdev, u8 port) { struct ib_pma_portcounters *p = (struct ib_pma_portcounters *) @@ -1178,12 +1047,12 @@ static int recv_pma_get_portcounters(struct ib_perf *pmp, memset(pmp->data, 0, sizeof(pmp->data)); p->port_select = port_select; - if (pmp->attr_mod != 0 || + if (pmp->mad_hdr.attr_mod != 0 || (port_select != port && port_select != 0xFF)) - pmp->status |= IB_SMP_INVALID_FIELD; + pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; if (cntrs.symbol_error_counter > 0xFFFFUL) - p->symbol_error_counter = __constant_cpu_to_be16(0xFFFF); + p->symbol_error_counter = cpu_to_be16(0xFFFF); else p->symbol_error_counter = cpu_to_be16((u16)cntrs.symbol_error_counter); @@ -1197,17 +1066,17 @@ static int recv_pma_get_portcounters(struct ib_perf *pmp, else p->link_downed_counter = (u8)cntrs.link_downed_counter; if (cntrs.port_rcv_errors > 0xFFFFUL) - p->port_rcv_errors = __constant_cpu_to_be16(0xFFFF); + p->port_rcv_errors = cpu_to_be16(0xFFFF); else p->port_rcv_errors = cpu_to_be16((u16) cntrs.port_rcv_errors); if (cntrs.port_rcv_remphys_errors > 0xFFFFUL) - p->port_rcv_remphys_errors = __constant_cpu_to_be16(0xFFFF); + p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF); else p->port_rcv_remphys_errors = cpu_to_be16((u16)cntrs.port_rcv_remphys_errors); if (cntrs.port_xmit_discards > 0xFFFFUL) - p->port_xmit_discards = __constant_cpu_to_be16(0xFFFF); + p->port_xmit_discards = cpu_to_be16(0xFFFF); else p->port_xmit_discards = cpu_to_be16((u16)cntrs.port_xmit_discards); @@ -1215,27 +1084,27 @@ static int recv_pma_get_portcounters(struct ib_perf *pmp, cntrs.local_link_integrity_errors = 0xFUL; if (cntrs.excessive_buffer_overrun_errors > 0xFUL) cntrs.excessive_buffer_overrun_errors = 0xFUL; - p->lli_ebor_errors = (cntrs.local_link_integrity_errors << 4) | + p->link_overrun_errors = (cntrs.local_link_integrity_errors << 4) | cntrs.excessive_buffer_overrun_errors; if (cntrs.vl15_dropped > 0xFFFFUL) - p->vl15_dropped = __constant_cpu_to_be16(0xFFFF); + p->vl15_dropped = cpu_to_be16(0xFFFF); else p->vl15_dropped = cpu_to_be16((u16)cntrs.vl15_dropped); if (cntrs.port_xmit_data > 0xFFFFFFFFUL) - p->port_xmit_data = __constant_cpu_to_be32(0xFFFFFFFF); + p->port_xmit_data = cpu_to_be32(0xFFFFFFFF); else p->port_xmit_data = cpu_to_be32((u32)cntrs.port_xmit_data); if (cntrs.port_rcv_data > 0xFFFFFFFFUL) - p->port_rcv_data = __constant_cpu_to_be32(0xFFFFFFFF); + p->port_rcv_data = cpu_to_be32(0xFFFFFFFF); else p->port_rcv_data = cpu_to_be32((u32)cntrs.port_rcv_data); if (cntrs.port_xmit_packets > 0xFFFFFFFFUL) - p->port_xmit_packets = __constant_cpu_to_be32(0xFFFFFFFF); + p->port_xmit_packets = cpu_to_be32(0xFFFFFFFF); else p->port_xmit_packets = cpu_to_be32((u32)cntrs.port_xmit_packets); if (cntrs.port_rcv_packets > 0xFFFFFFFFUL) - p->port_rcv_packets = __constant_cpu_to_be32(0xFFFFFFFF); + p->port_rcv_packets = cpu_to_be32(0xFFFFFFFF); else p->port_rcv_packets = cpu_to_be32((u32) cntrs.port_rcv_packets); @@ -1243,7 +1112,7 @@ static int recv_pma_get_portcounters(struct ib_perf *pmp, return reply((struct ib_smp *) pmp); } -static int recv_pma_get_portcounters_ext(struct ib_perf *pmp, +static int recv_pma_get_portcounters_ext(struct ib_pma_mad *pmp, struct ib_device *ibdev, u8 port) { struct ib_pma_portcounters_ext *p = @@ -1264,9 +1133,9 @@ static int recv_pma_get_portcounters_ext(struct ib_perf *pmp, memset(pmp->data, 0, sizeof(pmp->data)); p->port_select = port_select; - if (pmp->attr_mod != 0 || + if (pmp->mad_hdr.attr_mod != 0 || (port_select != port && port_select != 0xFF)) - pmp->status |= IB_SMP_INVALID_FIELD; + pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; p->port_xmit_data = cpu_to_be64(swords); p->port_rcv_data = cpu_to_be64(rwords); @@ -1280,7 +1149,7 @@ static int recv_pma_get_portcounters_ext(struct ib_perf *pmp, return reply((struct ib_smp *) pmp); } -static int recv_pma_set_portcounters(struct ib_perf *pmp, +static int recv_pma_set_portcounters(struct ib_pma_mad *pmp, struct ib_device *ibdev, u8 port) { struct ib_pma_portcounters *p = (struct ib_pma_portcounters *) @@ -1343,7 +1212,7 @@ static int recv_pma_set_portcounters(struct ib_perf *pmp, return recv_pma_get_portcounters(pmp, ibdev, port); } -static int recv_pma_set_portcounters_ext(struct ib_perf *pmp, +static int recv_pma_set_portcounters_ext(struct ib_pma_mad *pmp, struct ib_device *ibdev, u8 port) { struct ib_pma_portcounters *p = (struct ib_pma_portcounters *) @@ -1517,19 +1386,19 @@ static int process_perf(struct ib_device *ibdev, u8 port_num, struct ib_mad *in_mad, struct ib_mad *out_mad) { - struct ib_perf *pmp = (struct ib_perf *)out_mad; + struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad; int ret; *out_mad = *in_mad; - if (pmp->class_version != 1) { - pmp->status |= IB_SMP_UNSUP_VERSION; + if (pmp->mad_hdr.class_version != 1) { + pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION; ret = reply((struct ib_smp *) pmp); goto bail; } - switch (pmp->method) { + switch (pmp->mad_hdr.method) { case IB_MGMT_METHOD_GET: - switch (pmp->attr_id) { + switch (pmp->mad_hdr.attr_id) { case IB_PMA_CLASS_PORT_INFO: ret = recv_pma_get_classportinfo(pmp); goto bail; @@ -1553,13 +1422,13 @@ static int process_perf(struct ib_device *ibdev, u8 port_num, port_num); goto bail; default: - pmp->status |= IB_SMP_UNSUP_METH_ATTR; + pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR; ret = reply((struct ib_smp *) pmp); goto bail; } case IB_MGMT_METHOD_SET: - switch (pmp->attr_id) { + switch (pmp->mad_hdr.attr_id) { case IB_PMA_PORT_SAMPLES_CONTROL: ret = recv_pma_set_portsamplescontrol(pmp, ibdev, port_num); @@ -1573,7 +1442,7 @@ static int process_perf(struct ib_device *ibdev, u8 port_num, port_num); goto bail; default: - pmp->status |= IB_SMP_UNSUP_METH_ATTR; + pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR; ret = reply((struct ib_smp *) pmp); goto bail; } @@ -1587,7 +1456,7 @@ static int process_perf(struct ib_device *ibdev, u8 port_num, ret = IB_MAD_RESULT_SUCCESS; goto bail; default: - pmp->status |= IB_SMP_UNSUP_METHOD; + pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD; ret = reply((struct ib_smp *) pmp); } diff --git a/drivers/infiniband/hw/ipath/ipath_mmap.c b/drivers/infiniband/hw/ipath/ipath_mmap.c index fa830e22002..e7327422940 100644 --- a/drivers/infiniband/hw/ipath/ipath_mmap.c +++ b/drivers/infiniband/hw/ipath/ipath_mmap.c @@ -32,6 +32,7 @@ #include <linux/module.h> #include <linux/vmalloc.h> +#include <linux/slab.h> #include <linux/mm.h> #include <linux/errno.h> #include <asm/pgtable.h> @@ -74,7 +75,7 @@ static void ipath_vma_close(struct vm_area_struct *vma) kref_put(&ip->ref, ipath_release_mmap_info); } -static struct vm_operations_struct ipath_vm_ops = { +static const struct vm_operations_struct ipath_vm_ops = { .open = ipath_vma_open, .close = ipath_vma_close, }; diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c index 9d343b7c2f3..5e61e9bff69 100644 --- a/drivers/infiniband/hw/ipath/ipath_mr.c +++ b/drivers/infiniband/hw/ipath/ipath_mr.c @@ -31,6 +31,8 @@ * SOFTWARE. */ +#include <linux/slab.h> + #include <rdma/ib_umem.h> #include <rdma/ib_pack.h> #include <rdma/ib_smi.h> @@ -186,8 +188,8 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, { struct ipath_mr *mr; struct ib_umem *umem; - struct ib_umem_chunk *chunk; - int n, m, i; + int n, m, entry; + struct scatterlist *sg; struct ib_mr *ret; if (length == 0) { @@ -200,10 +202,7 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (IS_ERR(umem)) return (void *) umem; - n = 0; - list_for_each_entry(chunk, &umem->chunk_list, list) - n += chunk->nents; - + n = umem->nmap; mr = alloc_mr(n, &to_idev(pd->device)->lk_table); if (!mr) { ret = ERR_PTR(-ENOMEM); @@ -222,22 +221,20 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, m = 0; n = 0; - list_for_each_entry(chunk, &umem->chunk_list, list) { - for (i = 0; i < chunk->nents; i++) { - void *vaddr; - - vaddr = page_address(sg_page(&chunk->page_list[i])); - if (!vaddr) { - ret = ERR_PTR(-EINVAL); - goto bail; - } - mr->mr.map[m]->segs[n].vaddr = vaddr; - mr->mr.map[m]->segs[n].length = umem->page_size; - n++; - if (n == IPATH_SEGSZ) { - m++; - n = 0; - } + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + void *vaddr; + + vaddr = page_address(sg_page(sg)); + if (!vaddr) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + mr->mr.map[m]->segs[n].vaddr = vaddr; + mr->mr.map[m]->segs[n].length = umem->page_size; + n++; + if (n == IPATH_SEGSZ) { + m++; + n = 0; } } ret = &mr->ibmr; diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c index 4715911101e..face87602dc 100644 --- a/drivers/infiniband/hw/ipath/ipath_qp.c +++ b/drivers/infiniband/hw/ipath/ipath_qp.c @@ -32,6 +32,8 @@ */ #include <linux/err.h> +#include <linux/sched.h> +#include <linux/slab.h> #include <linux/vmalloc.h> #include "ipath_verbs.h" @@ -461,7 +463,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, - attr_mask)) + attr_mask, IB_LINK_LAYER_UNSPECIFIED)) goto inval; if (attr_mask & IB_QP_AV) { @@ -745,6 +747,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, struct ipath_swqe *swq = NULL; struct ipath_ibdev *dev; size_t sz; + size_t sg_list_sz; struct ib_qp *ret; if (init_attr->create_flags) { @@ -789,19 +792,31 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, goto bail; } sz = sizeof(*qp); + sg_list_sz = 0; if (init_attr->srq) { struct ipath_srq *srq = to_isrq(init_attr->srq); - sz += sizeof(*qp->r_sg_list) * - srq->rq.max_sge; - } else - sz += sizeof(*qp->r_sg_list) * - init_attr->cap.max_recv_sge; - qp = kmalloc(sz, GFP_KERNEL); + if (srq->rq.max_sge > 1) + sg_list_sz = sizeof(*qp->r_sg_list) * + (srq->rq.max_sge - 1); + } else if (init_attr->cap.max_recv_sge > 1) + sg_list_sz = sizeof(*qp->r_sg_list) * + (init_attr->cap.max_recv_sge - 1); + qp = kmalloc(sz + sg_list_sz, GFP_KERNEL); if (!qp) { ret = ERR_PTR(-ENOMEM); goto bail_swq; } + if (sg_list_sz && (init_attr->qp_type == IB_QPT_UD || + init_attr->qp_type == IB_QPT_SMI || + init_attr->qp_type == IB_QPT_GSI)) { + qp->r_ud_sg_list = kmalloc(sg_list_sz, GFP_KERNEL); + if (!qp->r_ud_sg_list) { + ret = ERR_PTR(-ENOMEM); + goto bail_qp; + } + } else + qp->r_ud_sg_list = NULL; if (init_attr->srq) { sz = 0; qp->r_rq.size = 0; @@ -818,7 +833,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, qp->r_rq.size * sz); if (!qp->r_rq.wq) { ret = ERR_PTR(-ENOMEM); - goto bail_qp; + goto bail_sg_list; } } @@ -848,7 +863,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd, if (err) { ret = ERR_PTR(err); vfree(qp->r_rq.wq); - goto bail_qp; + goto bail_sg_list; } qp->ip = NULL; qp->s_tx = NULL; @@ -925,6 +940,8 @@ bail_ip: vfree(qp->r_rq.wq); ipath_free_qp(&dev->qp_table, qp); free_qpn(&dev->qp_table, qp->ibqp.qp_num); +bail_sg_list: + kfree(qp->r_ud_sg_list); bail_qp: kfree(qp); bail_swq: @@ -989,6 +1006,7 @@ int ipath_destroy_qp(struct ib_qp *ibqp) kref_put(&qp->ip->ref, ipath_release_mmap_info); else vfree(qp->r_rq.wq); + kfree(qp->r_ud_sg_list); vfree(qp->s_wq); kfree(qp); return 0; diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index 97710522624..79b3dbc9717 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -573,9 +573,8 @@ int ipath_make_rc_req(struct ipath_qp *qp) ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len); qp->s_state = OP(RDMA_READ_REQUEST); hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); - bth2 = qp->s_psn++ & IPATH_PSN_MASK; - if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; + bth2 = qp->s_psn & IPATH_PSN_MASK; + qp->s_psn = wqe->lpsn + 1; ss = NULL; len = 0; qp->s_cur++; @@ -675,7 +674,8 @@ static void send_rc_ack(struct ipath_qp *qp) hdr.lrh[0] = cpu_to_be16(lrh0); hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC); - hdr.lrh[3] = cpu_to_be16(dd->ipath_lid); + hdr.lrh[3] = cpu_to_be16(dd->ipath_lid | + qp->remote_ah_attr.src_path_bits); ohdr->bth[0] = cpu_to_be32(bth0); ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPATH_PSN_MASK); @@ -1744,7 +1744,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, /* Signal completion event if the solicited bit is set. */ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, (ohdr->bth[0] & - __constant_cpu_to_be32(1 << 23)) != 0); + cpu_to_be32(1 << 23)) != 0); break; case OP(RDMA_WRITE_FIRST): diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index af051f75766..1f95bbaf760 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -31,6 +31,7 @@ * SOFTWARE. */ +#include <linux/sched.h> #include <linux/spinlock.h> #include "ipath_verbs.h" @@ -156,7 +157,7 @@ bail: /** * ipath_get_rwqe - copy the next RWQE into the QP's RWQE * @qp: the QP - * @wr_id_only: update wr_id only, not SGEs + * @wr_id_only: update qp->r_wr_id only, not qp->r_sge * * Return 0 if no RWQE is available, otherwise return 1. * @@ -173,8 +174,6 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) u32 tail; int ret; - qp->r_sge.sg_list = qp->r_sg_list; - if (qp->ibqp.srq) { srq = to_isrq(qp->ibqp.srq); handler = srq->ibsrq.event_handler; @@ -206,8 +205,10 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only) wqe = get_rwqe_ptr(rq, tail); if (++tail >= rq->size) tail = 0; - } while (!wr_id_only && !ipath_init_sge(qp, wqe, &qp->r_len, - &qp->r_sge)); + if (wr_id_only) + break; + qp->r_sge.sg_list = qp->r_sg_list; + } while (!ipath_init_sge(qp, wqe, &qp->r_len, &qp->r_sge)); qp->r_wr_id = wqe->wr_id; wq->tail = tail; @@ -618,7 +619,8 @@ void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp, qp->s_hdr.lrh[0] = cpu_to_be16(lrh0); qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); - qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid); + qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid | + qp->remote_ah_attr.src_path_bits); bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index); bth0 |= extra_bytes << 20; ohdr->bth[0] = cpu_to_be32(bth0 | (1 << 22)); diff --git a/drivers/infiniband/hw/ipath/ipath_sd7220_img.c b/drivers/infiniband/hw/ipath/ipath_sd7220_img.c deleted file mode 100644 index 5ef59da9270..00000000000 --- a/drivers/infiniband/hw/ipath/ipath_sd7220_img.c +++ /dev/null @@ -1,1082 +0,0 @@ -/* - * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -/* - * This file contains the memory image from the vendor, to be copied into - * the IB SERDES of the IBA7220 during initialization. - * The file also includes the two functions which use this image. - */ -#include <linux/pci.h> -#include <linux/delay.h> - -#include "ipath_kernel.h" -#include "ipath_registers.h" -#include "ipath_7220.h" - -static unsigned char ipath_sd7220_ib_img[] = { -/*0000*/0x02, 0x0A, 0x29, 0x02, 0x0A, 0x87, 0xE5, 0xE6, - 0x30, 0xE6, 0x04, 0x7F, 0x01, 0x80, 0x02, 0x7F, -/*0010*/0x00, 0xE5, 0xE2, 0x30, 0xE4, 0x04, 0x7E, 0x01, - 0x80, 0x02, 0x7E, 0x00, 0xEE, 0x5F, 0x60, 0x08, -/*0020*/0x53, 0xF9, 0xF7, 0xE4, 0xF5, 0xFE, 0x80, 0x08, - 0x7F, 0x0A, 0x12, 0x17, 0x31, 0x12, 0x0E, 0xA2, -/*0030*/0x75, 0xFC, 0x08, 0xE4, 0xF5, 0xFD, 0xE5, 0xE7, - 0x20, 0xE7, 0x03, 0x43, 0xF9, 0x08, 0x22, 0x00, -/*0040*/0x01, 0x20, 0x11, 0x00, 0x04, 0x20, 0x00, 0x75, - 0x51, 0x01, 0xE4, 0xF5, 0x52, 0xF5, 0x53, 0xF5, -/*0050*/0x52, 0xF5, 0x7E, 0x7F, 0x04, 0x02, 0x04, 0x38, - 0xC2, 0x36, 0x05, 0x52, 0xE5, 0x52, 0xD3, 0x94, -/*0060*/0x0C, 0x40, 0x05, 0x75, 0x52, 0x01, 0xD2, 0x36, - 0x90, 0x07, 0x0C, 0x74, 0x07, 0xF0, 0xA3, 0x74, -/*0070*/0xFF, 0xF0, 0xE4, 0xF5, 0x0C, 0xA3, 0xF0, 0x90, - 0x07, 0x14, 0xF0, 0xA3, 0xF0, 0x75, 0x0B, 0x20, -/*0080*/0xF5, 0x09, 0xE4, 0xF5, 0x08, 0xE5, 0x08, 0xD3, - 0x94, 0x30, 0x40, 0x03, 0x02, 0x04, 0x04, 0x12, -/*0090*/0x00, 0x06, 0x15, 0x0B, 0xE5, 0x08, 0x70, 0x04, - 0x7F, 0x01, 0x80, 0x02, 0x7F, 0x00, 0xE5, 0x09, -/*00A0*/0x70, 0x04, 0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, - 0xEE, 0x5F, 0x60, 0x05, 0x12, 0x18, 0x71, 0xD2, -/*00B0*/0x35, 0x53, 0xE1, 0xF7, 0xE5, 0x08, 0x45, 0x09, - 0xFF, 0xE5, 0x0B, 0x25, 0xE0, 0x25, 0xE0, 0x24, -/*00C0*/0x83, 0xF5, 0x82, 0xE4, 0x34, 0x07, 0xF5, 0x83, - 0xEF, 0xF0, 0x85, 0xE2, 0x20, 0xE5, 0x52, 0xD3, -/*00D0*/0x94, 0x01, 0x40, 0x0D, 0x12, 0x19, 0xF3, 0xE0, - 0x54, 0xA0, 0x64, 0x40, 0x70, 0x03, 0x02, 0x03, -/*00E0*/0xFB, 0x53, 0xF9, 0xF8, 0x90, 0x94, 0x70, 0xE4, - 0xF0, 0xE0, 0xF5, 0x10, 0xAF, 0x09, 0x12, 0x1E, -/*00F0*/0xB3, 0xAF, 0x08, 0xEF, 0x44, 0x08, 0xF5, 0x82, - 0x75, 0x83, 0x80, 0xE0, 0xF5, 0x29, 0xEF, 0x44, -/*0100*/0x07, 0x12, 0x1A, 0x3C, 0xF5, 0x22, 0x54, 0x40, - 0xD3, 0x94, 0x00, 0x40, 0x1E, 0xE5, 0x29, 0x54, -/*0110*/0xF0, 0x70, 0x21, 0x12, 0x19, 0xF3, 0xE0, 0x44, - 0x80, 0xF0, 0xE5, 0x22, 0x54, 0x30, 0x65, 0x08, -/*0120*/0x70, 0x09, 0x12, 0x19, 0xF3, 0xE0, 0x54, 0xBF, - 0xF0, 0x80, 0x09, 0x12, 0x19, 0xF3, 0x74, 0x40, -/*0130*/0xF0, 0x02, 0x03, 0xFB, 0x12, 0x1A, 0x12, 0x75, - 0x83, 0xAE, 0x74, 0xFF, 0xF0, 0xAF, 0x08, 0x7E, -/*0140*/0x00, 0xEF, 0x44, 0x07, 0xF5, 0x82, 0xE0, 0xFD, - 0xE5, 0x0B, 0x25, 0xE0, 0x25, 0xE0, 0x24, 0x81, -/*0150*/0xF5, 0x82, 0xE4, 0x34, 0x07, 0xF5, 0x83, 0xED, - 0xF0, 0x90, 0x07, 0x0E, 0xE0, 0x04, 0xF0, 0xEF, -/*0160*/0x44, 0x07, 0xF5, 0x82, 0x75, 0x83, 0x98, 0xE0, - 0xF5, 0x28, 0x12, 0x1A, 0x23, 0x40, 0x0C, 0x12, -/*0170*/0x19, 0xF3, 0xE0, 0x44, 0x01, 0x12, 0x1A, 0x32, - 0x02, 0x03, 0xF6, 0xAF, 0x08, 0x7E, 0x00, 0x74, -/*0180*/0x80, 0xCD, 0xEF, 0xCD, 0x8D, 0x82, 0xF5, 0x83, - 0xE0, 0x30, 0xE0, 0x0A, 0x12, 0x19, 0xF3, 0xE0, -/*0190*/0x44, 0x20, 0xF0, 0x02, 0x03, 0xFB, 0x12, 0x19, - 0xF3, 0xE0, 0x54, 0xDF, 0xF0, 0xEE, 0x44, 0xAE, -/*01A0*/0x12, 0x1A, 0x43, 0x30, 0xE4, 0x03, 0x02, 0x03, - 0xFB, 0x74, 0x9E, 0x12, 0x1A, 0x05, 0x20, 0xE0, -/*01B0*/0x03, 0x02, 0x03, 0xFB, 0x8F, 0x82, 0x8E, 0x83, - 0xE0, 0x20, 0xE0, 0x03, 0x02, 0x03, 0xFB, 0x12, -/*01C0*/0x19, 0xF3, 0xE0, 0x44, 0x10, 0xF0, 0xE5, 0xE3, - 0x20, 0xE7, 0x08, 0xE5, 0x08, 0x12, 0x1A, 0x3A, -/*01D0*/0x44, 0x04, 0xF0, 0xAF, 0x08, 0x7E, 0x00, 0xEF, - 0x12, 0x1A, 0x3A, 0x20, 0xE2, 0x34, 0x12, 0x19, -/*01E0*/0xF3, 0xE0, 0x44, 0x08, 0xF0, 0xE5, 0xE4, 0x30, - 0xE6, 0x04, 0x7D, 0x01, 0x80, 0x02, 0x7D, 0x00, -/*01F0*/0xE5, 0x7E, 0xC3, 0x94, 0x04, 0x50, 0x04, 0x7C, - 0x01, 0x80, 0x02, 0x7C, 0x00, 0xEC, 0x4D, 0x60, -/*0200*/0x05, 0xC2, 0x35, 0x02, 0x03, 0xFB, 0xEE, 0x44, - 0xD2, 0x12, 0x1A, 0x43, 0x44, 0x40, 0xF0, 0x02, -/*0210*/0x03, 0xFB, 0x12, 0x19, 0xF3, 0xE0, 0x54, 0xF7, - 0xF0, 0x12, 0x1A, 0x12, 0x75, 0x83, 0xD2, 0xE0, -/*0220*/0x54, 0xBF, 0xF0, 0x90, 0x07, 0x14, 0xE0, 0x04, - 0xF0, 0xE5, 0x7E, 0x70, 0x03, 0x75, 0x7E, 0x01, -/*0230*/0xAF, 0x08, 0x7E, 0x00, 0x12, 0x1A, 0x23, 0x40, - 0x12, 0x12, 0x19, 0xF3, 0xE0, 0x44, 0x01, 0x12, -/*0240*/0x19, 0xF2, 0xE0, 0x54, 0x02, 0x12, 0x1A, 0x32, - 0x02, 0x03, 0xFB, 0x12, 0x19, 0xF3, 0xE0, 0x44, -/*0250*/0x02, 0x12, 0x19, 0xF2, 0xE0, 0x54, 0xFE, 0xF0, - 0xC2, 0x35, 0xEE, 0x44, 0x8A, 0x8F, 0x82, 0xF5, -/*0260*/0x83, 0xE0, 0xF5, 0x17, 0x54, 0x8F, 0x44, 0x40, - 0xF0, 0x74, 0x90, 0xFC, 0xE5, 0x08, 0x44, 0x07, -/*0270*/0xFD, 0xF5, 0x82, 0x8C, 0x83, 0xE0, 0x54, 0x3F, - 0x90, 0x07, 0x02, 0xF0, 0xE0, 0x54, 0xC0, 0x8D, -/*0280*/0x82, 0x8C, 0x83, 0xF0, 0x74, 0x92, 0x12, 0x1A, - 0x05, 0x90, 0x07, 0x03, 0x12, 0x1A, 0x19, 0x74, -/*0290*/0x82, 0x12, 0x1A, 0x05, 0x90, 0x07, 0x04, 0x12, - 0x1A, 0x19, 0x74, 0xB4, 0x12, 0x1A, 0x05, 0x90, -/*02A0*/0x07, 0x05, 0x12, 0x1A, 0x19, 0x74, 0x94, 0xFE, - 0xE5, 0x08, 0x44, 0x06, 0x12, 0x1A, 0x0A, 0xF5, -/*02B0*/0x10, 0x30, 0xE0, 0x04, 0xD2, 0x37, 0x80, 0x02, - 0xC2, 0x37, 0xE5, 0x10, 0x54, 0x7F, 0x8F, 0x82, -/*02C0*/0x8E, 0x83, 0xF0, 0x30, 0x44, 0x30, 0x12, 0x1A, - 0x03, 0x54, 0x80, 0xD3, 0x94, 0x00, 0x40, 0x04, -/*02D0*/0xD2, 0x39, 0x80, 0x02, 0xC2, 0x39, 0x8F, 0x82, - 0x8E, 0x83, 0xE0, 0x44, 0x80, 0xF0, 0x12, 0x1A, -/*02E0*/0x03, 0x54, 0x40, 0xD3, 0x94, 0x00, 0x40, 0x04, - 0xD2, 0x3A, 0x80, 0x02, 0xC2, 0x3A, 0x8F, 0x82, -/*02F0*/0x8E, 0x83, 0xE0, 0x44, 0x40, 0xF0, 0x74, 0x92, - 0xFE, 0xE5, 0x08, 0x44, 0x06, 0x12, 0x1A, 0x0A, -/*0300*/0x30, 0xE7, 0x04, 0xD2, 0x38, 0x80, 0x02, 0xC2, - 0x38, 0x8F, 0x82, 0x8E, 0x83, 0xE0, 0x54, 0x7F, -/*0310*/0xF0, 0x12, 0x1E, 0x46, 0xE4, 0xF5, 0x0A, 0x20, - 0x03, 0x02, 0x80, 0x03, 0x30, 0x43, 0x03, 0x12, -/*0320*/0x19, 0x95, 0x20, 0x02, 0x02, 0x80, 0x03, 0x30, - 0x42, 0x03, 0x12, 0x0C, 0x8F, 0x30, 0x30, 0x06, -/*0330*/0x12, 0x19, 0x95, 0x12, 0x0C, 0x8F, 0x12, 0x0D, - 0x47, 0x12, 0x19, 0xF3, 0xE0, 0x54, 0xFB, 0xF0, -/*0340*/0xE5, 0x0A, 0xC3, 0x94, 0x01, 0x40, 0x46, 0x43, - 0xE1, 0x08, 0x12, 0x19, 0xF3, 0xE0, 0x44, 0x04, -/*0350*/0xF0, 0xE5, 0xE4, 0x20, 0xE7, 0x2A, 0x12, 0x1A, - 0x12, 0x75, 0x83, 0xD2, 0xE0, 0x54, 0x08, 0xD3, -/*0360*/0x94, 0x00, 0x40, 0x04, 0x7F, 0x01, 0x80, 0x02, - 0x7F, 0x00, 0xE5, 0x0A, 0xC3, 0x94, 0x01, 0x40, -/*0370*/0x04, 0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEF, - 0x5E, 0x60, 0x05, 0x12, 0x1D, 0xD7, 0x80, 0x17, -/*0380*/0x12, 0x1A, 0x12, 0x75, 0x83, 0xD2, 0xE0, 0x44, - 0x08, 0xF0, 0x02, 0x03, 0xFB, 0x12, 0x1A, 0x12, -/*0390*/0x75, 0x83, 0xD2, 0xE0, 0x54, 0xF7, 0xF0, 0x12, - 0x1E, 0x46, 0x7F, 0x08, 0x12, 0x17, 0x31, 0x74, -/*03A0*/0x8E, 0xFE, 0x12, 0x1A, 0x12, 0x8E, 0x83, 0xE0, - 0xF5, 0x10, 0x54, 0xFE, 0xF0, 0xE5, 0x10, 0x44, -/*03B0*/0x01, 0xFF, 0xE5, 0x08, 0xFD, 0xED, 0x44, 0x07, - 0xF5, 0x82, 0xEF, 0xF0, 0xE5, 0x10, 0x54, 0xFE, -/*03C0*/0xFF, 0xED, 0x44, 0x07, 0xF5, 0x82, 0xEF, 0x12, - 0x1A, 0x11, 0x75, 0x83, 0x86, 0xE0, 0x44, 0x10, -/*03D0*/0x12, 0x1A, 0x11, 0xE0, 0x44, 0x10, 0xF0, 0x12, - 0x19, 0xF3, 0xE0, 0x54, 0xFD, 0x44, 0x01, 0xFF, -/*03E0*/0x12, 0x19, 0xF3, 0xEF, 0x12, 0x1A, 0x32, 0x30, - 0x32, 0x0C, 0xE5, 0x08, 0x44, 0x08, 0xF5, 0x82, -/*03F0*/0x75, 0x83, 0x82, 0x74, 0x05, 0xF0, 0xAF, 0x0B, - 0x12, 0x18, 0xD7, 0x74, 0x10, 0x25, 0x08, 0xF5, -/*0400*/0x08, 0x02, 0x00, 0x85, 0x05, 0x09, 0xE5, 0x09, - 0xD3, 0x94, 0x07, 0x50, 0x03, 0x02, 0x00, 0x82, -/*0410*/0xE5, 0x7E, 0xD3, 0x94, 0x00, 0x40, 0x04, 0x7F, - 0x01, 0x80, 0x02, 0x7F, 0x00, 0xE5, 0x7E, 0xC3, -/*0420*/0x94, 0xFA, 0x50, 0x04, 0x7E, 0x01, 0x80, 0x02, - 0x7E, 0x00, 0xEE, 0x5F, 0x60, 0x02, 0x05, 0x7E, -/*0430*/0x30, 0x35, 0x0B, 0x43, 0xE1, 0x01, 0x7F, 0x09, - 0x12, 0x17, 0x31, 0x02, 0x00, 0x58, 0x53, 0xE1, -/*0440*/0xFE, 0x02, 0x00, 0x58, 0x8E, 0x6A, 0x8F, 0x6B, - 0x8C, 0x6C, 0x8D, 0x6D, 0x75, 0x6E, 0x01, 0x75, -/*0450*/0x6F, 0x01, 0x75, 0x70, 0x01, 0xE4, 0xF5, 0x73, - 0xF5, 0x74, 0xF5, 0x75, 0x90, 0x07, 0x2F, 0xF0, -/*0460*/0xF5, 0x3C, 0xF5, 0x3E, 0xF5, 0x46, 0xF5, 0x47, - 0xF5, 0x3D, 0xF5, 0x3F, 0xF5, 0x6F, 0xE5, 0x6F, -/*0470*/0x70, 0x0F, 0xE5, 0x6B, 0x45, 0x6A, 0x12, 0x07, - 0x2A, 0x75, 0x83, 0x80, 0x74, 0x3A, 0xF0, 0x80, -/*0480*/0x09, 0x12, 0x07, 0x2A, 0x75, 0x83, 0x80, 0x74, - 0x1A, 0xF0, 0xE4, 0xF5, 0x6E, 0xC3, 0x74, 0x3F, -/*0490*/0x95, 0x6E, 0xFF, 0x12, 0x08, 0x65, 0x75, 0x83, - 0x82, 0xEF, 0xF0, 0x12, 0x1A, 0x4D, 0x12, 0x08, -/*04A0*/0xC6, 0xE5, 0x33, 0xF0, 0x12, 0x08, 0xFA, 0x12, - 0x08, 0xB1, 0x40, 0xE1, 0xE5, 0x6F, 0x70, 0x0B, -/*04B0*/0x12, 0x07, 0x2A, 0x75, 0x83, 0x80, 0x74, 0x36, - 0xF0, 0x80, 0x09, 0x12, 0x07, 0x2A, 0x75, 0x83, -/*04C0*/0x80, 0x74, 0x16, 0xF0, 0x75, 0x6E, 0x01, 0x12, - 0x07, 0x2A, 0x75, 0x83, 0xB4, 0xE5, 0x6E, 0xF0, -/*04D0*/0x12, 0x1A, 0x4D, 0x74, 0x3F, 0x25, 0x6E, 0xF5, - 0x82, 0xE4, 0x34, 0x00, 0xF5, 0x83, 0xE5, 0x33, -/*04E0*/0xF0, 0x74, 0xBF, 0x25, 0x6E, 0xF5, 0x82, 0xE4, - 0x34, 0x00, 0x12, 0x08, 0xB1, 0x40, 0xD8, 0xE4, -/*04F0*/0xF5, 0x70, 0xF5, 0x46, 0xF5, 0x47, 0xF5, 0x6E, - 0x12, 0x08, 0xFA, 0xF5, 0x83, 0xE0, 0xFE, 0x12, -/*0500*/0x08, 0xC6, 0xE0, 0x7C, 0x00, 0x24, 0x00, 0xFF, - 0xEC, 0x3E, 0xFE, 0xAD, 0x3B, 0xD3, 0xEF, 0x9D, -/*0510*/0xEE, 0x9C, 0x50, 0x04, 0x7B, 0x01, 0x80, 0x02, - 0x7B, 0x00, 0xE5, 0x70, 0x70, 0x04, 0x7A, 0x01, -/*0520*/0x80, 0x02, 0x7A, 0x00, 0xEB, 0x5A, 0x60, 0x06, - 0x85, 0x6E, 0x46, 0x75, 0x70, 0x01, 0xD3, 0xEF, -/*0530*/0x9D, 0xEE, 0x9C, 0x50, 0x04, 0x7F, 0x01, 0x80, - 0x02, 0x7F, 0x00, 0xE5, 0x70, 0xB4, 0x01, 0x04, -/*0540*/0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEF, 0x5E, - 0x60, 0x03, 0x85, 0x6E, 0x47, 0x05, 0x6E, 0xE5, -/*0550*/0x6E, 0x64, 0x7F, 0x70, 0xA3, 0xE5, 0x46, 0x60, - 0x05, 0xE5, 0x47, 0xB4, 0x7E, 0x03, 0x85, 0x46, -/*0560*/0x47, 0xE5, 0x6F, 0x70, 0x08, 0x85, 0x46, 0x76, - 0x85, 0x47, 0x77, 0x80, 0x0E, 0xC3, 0x74, 0x7F, -/*0570*/0x95, 0x46, 0xF5, 0x78, 0xC3, 0x74, 0x7F, 0x95, - 0x47, 0xF5, 0x79, 0xE5, 0x6F, 0x70, 0x37, 0xE5, -/*0580*/0x46, 0x65, 0x47, 0x70, 0x0C, 0x75, 0x73, 0x01, - 0x75, 0x74, 0x01, 0xF5, 0x3C, 0xF5, 0x3D, 0x80, -/*0590*/0x35, 0xE4, 0xF5, 0x4E, 0xC3, 0xE5, 0x47, 0x95, - 0x46, 0xF5, 0x3C, 0xC3, 0x13, 0xF5, 0x71, 0x25, -/*05A0*/0x46, 0xF5, 0x72, 0xC3, 0x94, 0x3F, 0x40, 0x05, - 0xE4, 0xF5, 0x3D, 0x80, 0x40, 0xC3, 0x74, 0x3F, -/*05B0*/0x95, 0x72, 0xF5, 0x3D, 0x80, 0x37, 0xE5, 0x46, - 0x65, 0x47, 0x70, 0x0F, 0x75, 0x73, 0x01, 0x75, -/*05C0*/0x75, 0x01, 0xF5, 0x3E, 0xF5, 0x3F, 0x75, 0x4E, - 0x01, 0x80, 0x22, 0xE4, 0xF5, 0x4E, 0xC3, 0xE5, -/*05D0*/0x47, 0x95, 0x46, 0xF5, 0x3E, 0xC3, 0x13, 0xF5, - 0x71, 0x25, 0x46, 0xF5, 0x72, 0xD3, 0x94, 0x3F, -/*05E0*/0x50, 0x05, 0xE4, 0xF5, 0x3F, 0x80, 0x06, 0xE5, - 0x72, 0x24, 0xC1, 0xF5, 0x3F, 0x05, 0x6F, 0xE5, -/*05F0*/0x6F, 0xC3, 0x94, 0x02, 0x50, 0x03, 0x02, 0x04, - 0x6E, 0xE5, 0x6D, 0x45, 0x6C, 0x70, 0x02, 0x80, -/*0600*/0x04, 0xE5, 0x74, 0x45, 0x75, 0x90, 0x07, 0x2F, - 0xF0, 0x7F, 0x01, 0xE5, 0x3E, 0x60, 0x04, 0xE5, -/*0610*/0x3C, 0x70, 0x14, 0xE4, 0xF5, 0x3C, 0xF5, 0x3D, - 0xF5, 0x3E, 0xF5, 0x3F, 0x12, 0x08, 0xD2, 0x70, -/*0620*/0x04, 0xF0, 0x02, 0x06, 0xA4, 0x80, 0x7A, 0xE5, - 0x3C, 0xC3, 0x95, 0x3E, 0x40, 0x07, 0xE5, 0x3C, -/*0630*/0x95, 0x3E, 0xFF, 0x80, 0x06, 0xC3, 0xE5, 0x3E, - 0x95, 0x3C, 0xFF, 0xE5, 0x76, 0xD3, 0x95, 0x79, -/*0640*/0x40, 0x05, 0x85, 0x76, 0x7A, 0x80, 0x03, 0x85, - 0x79, 0x7A, 0xE5, 0x77, 0xC3, 0x95, 0x78, 0x50, -/*0650*/0x05, 0x85, 0x77, 0x7B, 0x80, 0x03, 0x85, 0x78, - 0x7B, 0xE5, 0x7B, 0xD3, 0x95, 0x7A, 0x40, 0x30, -/*0660*/0xE5, 0x7B, 0x95, 0x7A, 0xF5, 0x3C, 0xF5, 0x3E, - 0xC3, 0xE5, 0x7B, 0x95, 0x7A, 0x90, 0x07, 0x19, -/*0670*/0xF0, 0xE5, 0x3C, 0xC3, 0x13, 0xF5, 0x71, 0x25, - 0x7A, 0xF5, 0x72, 0xC3, 0x94, 0x3F, 0x40, 0x05, -/*0680*/0xE4, 0xF5, 0x3D, 0x80, 0x1F, 0xC3, 0x74, 0x3F, - 0x95, 0x72, 0xF5, 0x3D, 0xF5, 0x3F, 0x80, 0x14, -/*0690*/0xE4, 0xF5, 0x3C, 0xF5, 0x3E, 0x90, 0x07, 0x19, - 0xF0, 0x12, 0x08, 0xD2, 0x70, 0x03, 0xF0, 0x80, -/*06A0*/0x03, 0x74, 0x01, 0xF0, 0x12, 0x08, 0x65, 0x75, - 0x83, 0xD0, 0xE0, 0x54, 0x0F, 0xFE, 0xAD, 0x3C, -/*06B0*/0x70, 0x02, 0x7E, 0x07, 0xBE, 0x0F, 0x02, 0x7E, - 0x80, 0xEE, 0xFB, 0xEF, 0xD3, 0x9B, 0x74, 0x80, -/*06C0*/0xF8, 0x98, 0x40, 0x1F, 0xE4, 0xF5, 0x3C, 0xF5, - 0x3E, 0x12, 0x08, 0xD2, 0x70, 0x03, 0xF0, 0x80, -/*06D0*/0x12, 0x74, 0x01, 0xF0, 0xE5, 0x08, 0xFB, 0xEB, - 0x44, 0x07, 0xF5, 0x82, 0x75, 0x83, 0xD2, 0xE0, -/*06E0*/0x44, 0x10, 0xF0, 0xE5, 0x08, 0xFB, 0xEB, 0x44, - 0x09, 0xF5, 0x82, 0x75, 0x83, 0x9E, 0xED, 0xF0, -/*06F0*/0xEB, 0x44, 0x07, 0xF5, 0x82, 0x75, 0x83, 0xCA, - 0xED, 0xF0, 0x12, 0x08, 0x65, 0x75, 0x83, 0xCC, -/*0700*/0xEF, 0xF0, 0x22, 0xE5, 0x08, 0x44, 0x07, 0xF5, - 0x82, 0x75, 0x83, 0xBC, 0xE0, 0x54, 0xF0, 0xF0, -/*0710*/0xE5, 0x08, 0x44, 0x07, 0xF5, 0x82, 0x75, 0x83, - 0xBE, 0xE0, 0x54, 0xF0, 0xF0, 0xE5, 0x08, 0x44, -/*0720*/0x07, 0xF5, 0x82, 0x75, 0x83, 0xC0, 0xE0, 0x54, - 0xF0, 0xF0, 0xE5, 0x08, 0x44, 0x07, 0xF5, 0x82, -/*0730*/0x22, 0xF0, 0x90, 0x07, 0x28, 0xE0, 0xFE, 0xA3, - 0xE0, 0xF5, 0x82, 0x8E, 0x83, 0x22, 0x85, 0x42, -/*0740*/0x42, 0x85, 0x41, 0x41, 0x85, 0x40, 0x40, 0x74, - 0xC0, 0x2F, 0xF5, 0x82, 0x74, 0x02, 0x3E, 0xF5, -/*0750*/0x83, 0xE5, 0x42, 0xF0, 0x74, 0xE0, 0x2F, 0xF5, - 0x82, 0x74, 0x02, 0x3E, 0xF5, 0x83, 0x22, 0xE5, -/*0760*/0x42, 0x29, 0xFD, 0xE4, 0x33, 0xFC, 0xE5, 0x3C, - 0xC3, 0x9D, 0xEC, 0x64, 0x80, 0xF8, 0x74, 0x80, -/*0770*/0x98, 0x22, 0xF5, 0x83, 0xE0, 0x90, 0x07, 0x22, - 0x54, 0x1F, 0xFD, 0xE0, 0xFA, 0xA3, 0xE0, 0xF5, -/*0780*/0x82, 0x8A, 0x83, 0xED, 0xF0, 0x22, 0x90, 0x07, - 0x22, 0xE0, 0xFC, 0xA3, 0xE0, 0xF5, 0x82, 0x8C, -/*0790*/0x83, 0x22, 0x90, 0x07, 0x24, 0xFF, 0xED, 0x44, - 0x07, 0xCF, 0xF0, 0xA3, 0xEF, 0xF0, 0x22, 0x85, -/*07A0*/0x38, 0x38, 0x85, 0x39, 0x39, 0x85, 0x3A, 0x3A, - 0x74, 0xC0, 0x2F, 0xF5, 0x82, 0x74, 0x02, 0x3E, -/*07B0*/0xF5, 0x83, 0x22, 0x90, 0x07, 0x26, 0xFF, 0xED, - 0x44, 0x07, 0xCF, 0xF0, 0xA3, 0xEF, 0xF0, 0x22, -/*07C0*/0xF0, 0x74, 0xA0, 0x2F, 0xF5, 0x82, 0x74, 0x02, - 0x3E, 0xF5, 0x83, 0x22, 0x74, 0xC0, 0x25, 0x11, -/*07D0*/0xF5, 0x82, 0xE4, 0x34, 0x01, 0xF5, 0x83, 0x22, - 0x74, 0x00, 0x25, 0x11, 0xF5, 0x82, 0xE4, 0x34, -/*07E0*/0x02, 0xF5, 0x83, 0x22, 0x74, 0x60, 0x25, 0x11, - 0xF5, 0x82, 0xE4, 0x34, 0x03, 0xF5, 0x83, 0x22, -/*07F0*/0x74, 0x80, 0x25, 0x11, 0xF5, 0x82, 0xE4, 0x34, - 0x03, 0xF5, 0x83, 0x22, 0x74, 0xE0, 0x25, 0x11, -/*0800*/0xF5, 0x82, 0xE4, 0x34, 0x03, 0xF5, 0x83, 0x22, - 0x74, 0x40, 0x25, 0x11, 0xF5, 0x82, 0xE4, 0x34, -/*0810*/0x06, 0xF5, 0x83, 0x22, 0x74, 0x80, 0x2F, 0xF5, - 0x82, 0x74, 0x02, 0x3E, 0xF5, 0x83, 0x22, 0xAF, -/*0820*/0x08, 0x7E, 0x00, 0xEF, 0x44, 0x07, 0xF5, 0x82, - 0x22, 0xF5, 0x83, 0xE5, 0x82, 0x44, 0x07, 0xF5, -/*0830*/0x82, 0xE5, 0x40, 0xF0, 0x22, 0x74, 0x40, 0x25, - 0x11, 0xF5, 0x82, 0xE4, 0x34, 0x02, 0xF5, 0x83, -/*0840*/0x22, 0x74, 0xC0, 0x25, 0x11, 0xF5, 0x82, 0xE4, - 0x34, 0x03, 0xF5, 0x83, 0x22, 0x74, 0x00, 0x25, -/*0850*/0x11, 0xF5, 0x82, 0xE4, 0x34, 0x06, 0xF5, 0x83, - 0x22, 0x74, 0x20, 0x25, 0x11, 0xF5, 0x82, 0xE4, -/*0860*/0x34, 0x06, 0xF5, 0x83, 0x22, 0xE5, 0x08, 0xFD, - 0xED, 0x44, 0x07, 0xF5, 0x82, 0x22, 0xE5, 0x41, -/*0870*/0xF0, 0xE5, 0x65, 0x64, 0x01, 0x45, 0x64, 0x22, - 0x7E, 0x00, 0xFB, 0x7A, 0x00, 0xFD, 0x7C, 0x00, -/*0880*/0x22, 0x74, 0x20, 0x25, 0x11, 0xF5, 0x82, 0xE4, - 0x34, 0x02, 0x22, 0x74, 0xA0, 0x25, 0x11, 0xF5, -/*0890*/0x82, 0xE4, 0x34, 0x03, 0x22, 0x85, 0x3E, 0x42, - 0x85, 0x3F, 0x41, 0x8F, 0x40, 0x22, 0x85, 0x3C, -/*08A0*/0x42, 0x85, 0x3D, 0x41, 0x8F, 0x40, 0x22, 0x75, - 0x45, 0x3F, 0x90, 0x07, 0x20, 0xE4, 0xF0, 0xA3, -/*08B0*/0x22, 0xF5, 0x83, 0xE5, 0x32, 0xF0, 0x05, 0x6E, - 0xE5, 0x6E, 0xC3, 0x94, 0x40, 0x22, 0xF0, 0xE5, -/*08C0*/0x08, 0x44, 0x06, 0xF5, 0x82, 0x22, 0x74, 0x00, - 0x25, 0x6E, 0xF5, 0x82, 0xE4, 0x34, 0x00, 0xF5, -/*08D0*/0x83, 0x22, 0xE5, 0x6D, 0x45, 0x6C, 0x90, 0x07, - 0x2F, 0x22, 0xE4, 0xF9, 0xE5, 0x3C, 0xD3, 0x95, -/*08E0*/0x3E, 0x22, 0x74, 0x80, 0x2E, 0xF5, 0x82, 0xE4, - 0x34, 0x02, 0xF5, 0x83, 0xE0, 0x22, 0x74, 0xA0, -/*08F0*/0x2E, 0xF5, 0x82, 0xE4, 0x34, 0x02, 0xF5, 0x83, - 0xE0, 0x22, 0x74, 0x80, 0x25, 0x6E, 0xF5, 0x82, -/*0900*/0xE4, 0x34, 0x00, 0x22, 0x25, 0x42, 0xFD, 0xE4, - 0x33, 0xFC, 0x22, 0x85, 0x42, 0x42, 0x85, 0x41, -/*0910*/0x41, 0x85, 0x40, 0x40, 0x22, 0xED, 0x4C, 0x60, - 0x03, 0x02, 0x09, 0xE5, 0xEF, 0x4E, 0x70, 0x37, -/*0920*/0x90, 0x07, 0x26, 0x12, 0x07, 0x89, 0xE0, 0xFD, - 0x12, 0x07, 0xCC, 0xED, 0xF0, 0x90, 0x07, 0x28, -/*0930*/0x12, 0x07, 0x89, 0xE0, 0xFD, 0x12, 0x07, 0xD8, - 0xED, 0xF0, 0x12, 0x07, 0x86, 0xE0, 0x54, 0x1F, -/*0940*/0xFD, 0x12, 0x08, 0x81, 0xF5, 0x83, 0xED, 0xF0, - 0x90, 0x07, 0x24, 0x12, 0x07, 0x89, 0xE0, 0x54, -/*0950*/0x1F, 0xFD, 0x12, 0x08, 0x35, 0xED, 0xF0, 0xEF, - 0x64, 0x04, 0x4E, 0x70, 0x37, 0x90, 0x07, 0x26, -/*0960*/0x12, 0x07, 0x89, 0xE0, 0xFD, 0x12, 0x07, 0xE4, - 0xED, 0xF0, 0x90, 0x07, 0x28, 0x12, 0x07, 0x89, -/*0970*/0xE0, 0xFD, 0x12, 0x07, 0xF0, 0xED, 0xF0, 0x12, - 0x07, 0x86, 0xE0, 0x54, 0x1F, 0xFD, 0x12, 0x08, -/*0980*/0x8B, 0xF5, 0x83, 0xED, 0xF0, 0x90, 0x07, 0x24, - 0x12, 0x07, 0x89, 0xE0, 0x54, 0x1F, 0xFD, 0x12, -/*0990*/0x08, 0x41, 0xED, 0xF0, 0xEF, 0x64, 0x01, 0x4E, - 0x70, 0x04, 0x7D, 0x01, 0x80, 0x02, 0x7D, 0x00, -/*09A0*/0xEF, 0x64, 0x02, 0x4E, 0x70, 0x04, 0x7F, 0x01, - 0x80, 0x02, 0x7F, 0x00, 0xEF, 0x4D, 0x60, 0x78, -/*09B0*/0x90, 0x07, 0x26, 0x12, 0x07, 0x35, 0xE0, 0xFF, - 0x12, 0x07, 0xFC, 0xEF, 0x12, 0x07, 0x31, 0xE0, -/*09C0*/0xFF, 0x12, 0x08, 0x08, 0xEF, 0xF0, 0x90, 0x07, - 0x22, 0x12, 0x07, 0x35, 0xE0, 0x54, 0x1F, 0xFF, -/*09D0*/0x12, 0x08, 0x4D, 0xEF, 0xF0, 0x90, 0x07, 0x24, - 0x12, 0x07, 0x35, 0xE0, 0x54, 0x1F, 0xFF, 0x12, -/*09E0*/0x08, 0x59, 0xEF, 0xF0, 0x22, 0x12, 0x07, 0xCC, - 0xE4, 0xF0, 0x12, 0x07, 0xD8, 0xE4, 0xF0, 0x12, -/*09F0*/0x08, 0x81, 0xF5, 0x83, 0xE4, 0xF0, 0x12, 0x08, - 0x35, 0x74, 0x14, 0xF0, 0x12, 0x07, 0xE4, 0xE4, -/*0A00*/0xF0, 0x12, 0x07, 0xF0, 0xE4, 0xF0, 0x12, 0x08, - 0x8B, 0xF5, 0x83, 0xE4, 0xF0, 0x12, 0x08, 0x41, -/*0A10*/0x74, 0x14, 0xF0, 0x12, 0x07, 0xFC, 0xE4, 0xF0, - 0x12, 0x08, 0x08, 0xE4, 0xF0, 0x12, 0x08, 0x4D, -/*0A20*/0xE4, 0xF0, 0x12, 0x08, 0x59, 0x74, 0x14, 0xF0, - 0x22, 0x53, 0xF9, 0xF7, 0x75, 0xFC, 0x10, 0xE4, -/*0A30*/0xF5, 0xFD, 0x75, 0xFE, 0x30, 0xF5, 0xFF, 0xE5, - 0xE7, 0x20, 0xE7, 0x03, 0x43, 0xF9, 0x08, 0xE5, -/*0A40*/0xE6, 0x20, 0xE7, 0x0B, 0x78, 0xFF, 0xE4, 0xF6, - 0xD8, 0xFD, 0x53, 0xE6, 0xFE, 0x80, 0x09, 0x78, -/*0A50*/0x08, 0xE4, 0xF6, 0xD8, 0xFD, 0x53, 0xE6, 0xFE, - 0x75, 0x81, 0x80, 0xE4, 0xF5, 0xA8, 0xD2, 0xA8, -/*0A60*/0xC2, 0xA9, 0xD2, 0xAF, 0xE5, 0xE2, 0x20, 0xE5, - 0x05, 0x20, 0xE6, 0x02, 0x80, 0x03, 0x43, 0xE1, -/*0A70*/0x02, 0xE5, 0xE2, 0x20, 0xE0, 0x0E, 0x90, 0x00, - 0x00, 0x7F, 0x00, 0x7E, 0x08, 0xE4, 0xF0, 0xA3, -/*0A80*/0xDF, 0xFC, 0xDE, 0xFA, 0x02, 0x0A, 0xDB, 0x43, - 0xFA, 0x01, 0xC0, 0xE0, 0xC0, 0xF0, 0xC0, 0x83, -/*0A90*/0xC0, 0x82, 0xC0, 0xD0, 0x12, 0x1C, 0xE7, 0xD0, - 0xD0, 0xD0, 0x82, 0xD0, 0x83, 0xD0, 0xF0, 0xD0, -/*0AA0*/0xE0, 0x53, 0xFA, 0xFE, 0x32, 0x02, 0x1B, 0x55, - 0xE4, 0x93, 0xA3, 0xF8, 0xE4, 0x93, 0xA3, 0xF6, -/*0AB0*/0x08, 0xDF, 0xF9, 0x80, 0x29, 0xE4, 0x93, 0xA3, - 0xF8, 0x54, 0x07, 0x24, 0x0C, 0xC8, 0xC3, 0x33, -/*0AC0*/0xC4, 0x54, 0x0F, 0x44, 0x20, 0xC8, 0x83, 0x40, - 0x04, 0xF4, 0x56, 0x80, 0x01, 0x46, 0xF6, 0xDF, -/*0AD0*/0xE4, 0x80, 0x0B, 0x01, 0x02, 0x04, 0x08, 0x10, - 0x20, 0x40, 0x80, 0x90, 0x00, 0x3F, 0xE4, 0x7E, -/*0AE0*/0x01, 0x93, 0x60, 0xC1, 0xA3, 0xFF, 0x54, 0x3F, - 0x30, 0xE5, 0x09, 0x54, 0x1F, 0xFE, 0xE4, 0x93, -/*0AF0*/0xA3, 0x60, 0x01, 0x0E, 0xCF, 0x54, 0xC0, 0x25, - 0xE0, 0x60, 0xAD, 0x40, 0xB8, 0x80, 0xFE, 0x8C, -/*0B00*/0x64, 0x8D, 0x65, 0x8A, 0x66, 0x8B, 0x67, 0xE4, - 0xF5, 0x69, 0xEF, 0x4E, 0x70, 0x03, 0x02, 0x1D, -/*0B10*/0x55, 0xE4, 0xF5, 0x68, 0xE5, 0x67, 0x45, 0x66, - 0x70, 0x32, 0x12, 0x07, 0x2A, 0x75, 0x83, 0x90, -/*0B20*/0xE4, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC2, 0xE4, - 0x12, 0x07, 0x29, 0x75, 0x83, 0xC4, 0xE4, 0x12, -/*0B30*/0x08, 0x70, 0x70, 0x29, 0x12, 0x07, 0x2A, 0x75, - 0x83, 0x92, 0xE4, 0x12, 0x07, 0x29, 0x75, 0x83, -/*0B40*/0xC6, 0xE4, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC8, - 0xE4, 0xF0, 0x80, 0x11, 0x90, 0x07, 0x26, 0x12, -/*0B50*/0x07, 0x35, 0xE4, 0x12, 0x08, 0x70, 0x70, 0x05, - 0x12, 0x07, 0x32, 0xE4, 0xF0, 0x12, 0x1D, 0x55, -/*0B60*/0x12, 0x1E, 0xBF, 0xE5, 0x67, 0x45, 0x66, 0x70, - 0x33, 0x12, 0x07, 0x2A, 0x75, 0x83, 0x90, 0xE5, -/*0B70*/0x41, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC2, 0xE5, - 0x41, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC4, 0x12, -/*0B80*/0x08, 0x6E, 0x70, 0x29, 0x12, 0x07, 0x2A, 0x75, - 0x83, 0x92, 0xE5, 0x40, 0x12, 0x07, 0x29, 0x75, -/*0B90*/0x83, 0xC6, 0xE5, 0x40, 0x12, 0x07, 0x29, 0x75, - 0x83, 0xC8, 0x80, 0x0E, 0x90, 0x07, 0x26, 0x12, -/*0BA0*/0x07, 0x35, 0x12, 0x08, 0x6E, 0x70, 0x06, 0x12, - 0x07, 0x32, 0xE5, 0x40, 0xF0, 0xAF, 0x69, 0x7E, -/*0BB0*/0x00, 0xAD, 0x67, 0xAC, 0x66, 0x12, 0x04, 0x44, - 0x12, 0x07, 0x2A, 0x75, 0x83, 0xCA, 0xE0, 0xD3, -/*0BC0*/0x94, 0x00, 0x50, 0x0C, 0x05, 0x68, 0xE5, 0x68, - 0xC3, 0x94, 0x05, 0x50, 0x03, 0x02, 0x0B, 0x14, -/*0BD0*/0x22, 0x8C, 0x60, 0x8D, 0x61, 0x12, 0x08, 0xDA, - 0x74, 0x20, 0x40, 0x0D, 0x2F, 0xF5, 0x82, 0x74, -/*0BE0*/0x03, 0x3E, 0xF5, 0x83, 0xE5, 0x3E, 0xF0, 0x80, - 0x0B, 0x2F, 0xF5, 0x82, 0x74, 0x03, 0x3E, 0xF5, -/*0BF0*/0x83, 0xE5, 0x3C, 0xF0, 0xE5, 0x3C, 0xD3, 0x95, - 0x3E, 0x40, 0x3C, 0xE5, 0x61, 0x45, 0x60, 0x70, -/*0C00*/0x10, 0xE9, 0x12, 0x09, 0x04, 0xE5, 0x3E, 0x12, - 0x07, 0x68, 0x40, 0x3B, 0x12, 0x08, 0x95, 0x80, -/*0C10*/0x18, 0xE5, 0x3E, 0xC3, 0x95, 0x38, 0x40, 0x1D, - 0x85, 0x3E, 0x38, 0xE5, 0x3E, 0x60, 0x05, 0x85, -/*0C20*/0x3F, 0x39, 0x80, 0x03, 0x85, 0x39, 0x39, 0x8F, - 0x3A, 0x12, 0x08, 0x14, 0xE5, 0x3E, 0x12, 0x07, -/*0C30*/0xC0, 0xE5, 0x3F, 0xF0, 0x22, 0x80, 0x43, 0xE5, - 0x61, 0x45, 0x60, 0x70, 0x19, 0x12, 0x07, 0x5F, -/*0C40*/0x40, 0x05, 0x12, 0x08, 0x9E, 0x80, 0x27, 0x12, - 0x09, 0x0B, 0x12, 0x08, 0x14, 0xE5, 0x42, 0x12, -/*0C50*/0x07, 0xC0, 0xE5, 0x41, 0xF0, 0x22, 0xE5, 0x3C, - 0xC3, 0x95, 0x38, 0x40, 0x1D, 0x85, 0x3C, 0x38, -/*0C60*/0xE5, 0x3C, 0x60, 0x05, 0x85, 0x3D, 0x39, 0x80, - 0x03, 0x85, 0x39, 0x39, 0x8F, 0x3A, 0x12, 0x08, -/*0C70*/0x14, 0xE5, 0x3C, 0x12, 0x07, 0xC0, 0xE5, 0x3D, - 0xF0, 0x22, 0x85, 0x38, 0x38, 0x85, 0x39, 0x39, -/*0C80*/0x85, 0x3A, 0x3A, 0x12, 0x08, 0x14, 0xE5, 0x38, - 0x12, 0x07, 0xC0, 0xE5, 0x39, 0xF0, 0x22, 0x7F, -/*0C90*/0x06, 0x12, 0x17, 0x31, 0x12, 0x1D, 0x23, 0x12, - 0x0E, 0x04, 0x12, 0x0E, 0x33, 0xE0, 0x44, 0x0A, -/*0CA0*/0xF0, 0x74, 0x8E, 0xFE, 0x12, 0x0E, 0x04, 0x12, - 0x0E, 0x0B, 0xEF, 0xF0, 0xE5, 0x28, 0x30, 0xE5, -/*0CB0*/0x03, 0xD3, 0x80, 0x01, 0xC3, 0x40, 0x05, 0x75, - 0x14, 0x20, 0x80, 0x03, 0x75, 0x14, 0x08, 0x12, -/*0CC0*/0x0E, 0x04, 0x75, 0x83, 0x8A, 0xE5, 0x14, 0xF0, - 0xB4, 0xFF, 0x05, 0x75, 0x12, 0x80, 0x80, 0x06, -/*0CD0*/0xE5, 0x14, 0xC3, 0x13, 0xF5, 0x12, 0xE4, 0xF5, - 0x16, 0xF5, 0x7F, 0x12, 0x19, 0x36, 0x12, 0x13, -/*0CE0*/0xA3, 0xE5, 0x0A, 0xC3, 0x94, 0x01, 0x50, 0x09, - 0x05, 0x16, 0xE5, 0x16, 0xC3, 0x94, 0x14, 0x40, -/*0CF0*/0xEA, 0xE5, 0xE4, 0x20, 0xE7, 0x28, 0x12, 0x0E, - 0x04, 0x75, 0x83, 0xD2, 0xE0, 0x54, 0x08, 0xD3, -/*0D00*/0x94, 0x00, 0x40, 0x04, 0x7F, 0x01, 0x80, 0x02, - 0x7F, 0x00, 0xE5, 0x0A, 0xC3, 0x94, 0x01, 0x40, -/*0D10*/0x04, 0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEF, - 0x5E, 0x60, 0x03, 0x12, 0x1D, 0xD7, 0xE5, 0x7F, -/*0D20*/0xC3, 0x94, 0x11, 0x40, 0x14, 0x12, 0x0E, 0x04, - 0x75, 0x83, 0xD2, 0xE0, 0x44, 0x80, 0xF0, 0xE5, -/*0D30*/0xE4, 0x20, 0xE7, 0x0F, 0x12, 0x1D, 0xD7, 0x80, - 0x0A, 0x12, 0x0E, 0x04, 0x75, 0x83, 0xD2, 0xE0, -/*0D40*/0x54, 0x7F, 0xF0, 0x12, 0x1D, 0x23, 0x22, 0x74, - 0x8A, 0x85, 0x08, 0x82, 0xF5, 0x83, 0xE5, 0x17, -/*0D50*/0xF0, 0x12, 0x0E, 0x3A, 0xE4, 0xF0, 0x90, 0x07, - 0x02, 0xE0, 0x12, 0x0E, 0x17, 0x75, 0x83, 0x90, -/*0D60*/0xEF, 0xF0, 0x74, 0x92, 0xFE, 0xE5, 0x08, 0x44, - 0x07, 0xFF, 0xF5, 0x82, 0x8E, 0x83, 0xE0, 0x54, -/*0D70*/0xC0, 0xFD, 0x90, 0x07, 0x03, 0xE0, 0x54, 0x3F, - 0x4D, 0x8F, 0x82, 0x8E, 0x83, 0xF0, 0x90, 0x07, -/*0D80*/0x04, 0xE0, 0x12, 0x0E, 0x17, 0x75, 0x83, 0x82, - 0xEF, 0xF0, 0x90, 0x07, 0x05, 0xE0, 0xFF, 0xED, -/*0D90*/0x44, 0x07, 0xF5, 0x82, 0x75, 0x83, 0xB4, 0xEF, - 0x12, 0x0E, 0x03, 0x75, 0x83, 0x80, 0xE0, 0x54, -/*0DA0*/0xBF, 0xF0, 0x30, 0x37, 0x0A, 0x12, 0x0E, 0x91, - 0x75, 0x83, 0x94, 0xE0, 0x44, 0x80, 0xF0, 0x30, -/*0DB0*/0x38, 0x0A, 0x12, 0x0E, 0x91, 0x75, 0x83, 0x92, - 0xE0, 0x44, 0x80, 0xF0, 0xE5, 0x28, 0x30, 0xE4, -/*0DC0*/0x1A, 0x20, 0x39, 0x0A, 0x12, 0x0E, 0x04, 0x75, - 0x83, 0x88, 0xE0, 0x54, 0x7F, 0xF0, 0x20, 0x3A, -/*0DD0*/0x0A, 0x12, 0x0E, 0x04, 0x75, 0x83, 0x88, 0xE0, - 0x54, 0xBF, 0xF0, 0x74, 0x8C, 0xFE, 0x12, 0x0E, -/*0DE0*/0x04, 0x8E, 0x83, 0xE0, 0x54, 0x0F, 0x12, 0x0E, - 0x03, 0x75, 0x83, 0x86, 0xE0, 0x54, 0xBF, 0xF0, -/*0DF0*/0xE5, 0x08, 0x44, 0x06, 0x12, 0x0D, 0xFD, 0x75, - 0x83, 0x8A, 0xE4, 0xF0, 0x22, 0xF5, 0x82, 0x75, -/*0E00*/0x83, 0x82, 0xE4, 0xF0, 0xE5, 0x08, 0x44, 0x07, - 0xF5, 0x82, 0x22, 0x8E, 0x83, 0xE0, 0xF5, 0x10, -/*0E10*/0x54, 0xFE, 0xF0, 0xE5, 0x10, 0x44, 0x01, 0xFF, - 0xE5, 0x08, 0xFD, 0xED, 0x44, 0x07, 0xF5, 0x82, -/*0E20*/0x22, 0xE5, 0x15, 0xC4, 0x54, 0x07, 0xFF, 0xE5, - 0x08, 0xFD, 0xED, 0x44, 0x08, 0xF5, 0x82, 0x75, -/*0E30*/0x83, 0x82, 0x22, 0x75, 0x83, 0x80, 0xE0, 0x44, - 0x40, 0xF0, 0xE5, 0x08, 0x44, 0x08, 0xF5, 0x82, -/*0E40*/0x75, 0x83, 0x8A, 0x22, 0xE5, 0x16, 0x25, 0xE0, - 0x25, 0xE0, 0x24, 0xAF, 0xF5, 0x82, 0xE4, 0x34, -/*0E50*/0x1A, 0xF5, 0x83, 0xE4, 0x93, 0xF5, 0x0D, 0x22, - 0x43, 0xE1, 0x10, 0x43, 0xE1, 0x80, 0x53, 0xE1, -/*0E60*/0xFD, 0x85, 0xE1, 0x10, 0x22, 0xE5, 0x16, 0x25, - 0xE0, 0x25, 0xE0, 0x24, 0xB2, 0xF5, 0x82, 0xE4, -/*0E70*/0x34, 0x1A, 0xF5, 0x83, 0xE4, 0x93, 0x22, 0x85, - 0x55, 0x82, 0x85, 0x54, 0x83, 0xE5, 0x15, 0xF0, -/*0E80*/0x22, 0xE5, 0xE2, 0x54, 0x20, 0xD3, 0x94, 0x00, - 0x22, 0xE5, 0xE2, 0x54, 0x40, 0xD3, 0x94, 0x00, -/*0E90*/0x22, 0xE5, 0x08, 0x44, 0x06, 0xF5, 0x82, 0x22, - 0xFD, 0xE5, 0x08, 0xFB, 0xEB, 0x44, 0x07, 0xF5, -/*0EA0*/0x82, 0x22, 0x53, 0xF9, 0xF7, 0x75, 0xFE, 0x30, - 0x22, 0xEF, 0x4E, 0x70, 0x26, 0x12, 0x07, 0xCC, -/*0EB0*/0xE0, 0xFD, 0x90, 0x07, 0x26, 0x12, 0x07, 0x7B, - 0x12, 0x07, 0xD8, 0xE0, 0xFD, 0x90, 0x07, 0x28, -/*0EC0*/0x12, 0x07, 0x7B, 0x12, 0x08, 0x81, 0x12, 0x07, - 0x72, 0x12, 0x08, 0x35, 0xE0, 0x90, 0x07, 0x24, -/*0ED0*/0x12, 0x07, 0x78, 0xEF, 0x64, 0x04, 0x4E, 0x70, - 0x29, 0x12, 0x07, 0xE4, 0xE0, 0xFD, 0x90, 0x07, -/*0EE0*/0x26, 0x12, 0x07, 0x7B, 0x12, 0x07, 0xF0, 0xE0, - 0xFD, 0x90, 0x07, 0x28, 0x12, 0x07, 0x7B, 0x12, -/*0EF0*/0x08, 0x8B, 0x12, 0x07, 0x72, 0x12, 0x08, 0x41, - 0xE0, 0x54, 0x1F, 0xFD, 0x90, 0x07, 0x24, 0x12, -/*0F00*/0x07, 0x7B, 0xEF, 0x64, 0x01, 0x4E, 0x70, 0x04, - 0x7D, 0x01, 0x80, 0x02, 0x7D, 0x00, 0xEF, 0x64, -/*0F10*/0x02, 0x4E, 0x70, 0x04, 0x7F, 0x01, 0x80, 0x02, - 0x7F, 0x00, 0xEF, 0x4D, 0x60, 0x35, 0x12, 0x07, -/*0F20*/0xFC, 0xE0, 0xFF, 0x90, 0x07, 0x26, 0x12, 0x07, - 0x89, 0xEF, 0xF0, 0x12, 0x08, 0x08, 0xE0, 0xFF, -/*0F30*/0x90, 0x07, 0x28, 0x12, 0x07, 0x89, 0xEF, 0xF0, - 0x12, 0x08, 0x4D, 0xE0, 0x54, 0x1F, 0xFF, 0x12, -/*0F40*/0x07, 0x86, 0xEF, 0xF0, 0x12, 0x08, 0x59, 0xE0, - 0x54, 0x1F, 0xFF, 0x90, 0x07, 0x24, 0x12, 0x07, -/*0F50*/0x89, 0xEF, 0xF0, 0x22, 0xE4, 0xF5, 0x53, 0x12, - 0x0E, 0x81, 0x40, 0x04, 0x7F, 0x01, 0x80, 0x02, -/*0F60*/0x7F, 0x00, 0x12, 0x0E, 0x89, 0x40, 0x04, 0x7E, - 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEE, 0x4F, 0x70, -/*0F70*/0x03, 0x02, 0x0F, 0xF6, 0x85, 0xE1, 0x10, 0x43, - 0xE1, 0x02, 0x53, 0xE1, 0x0F, 0x85, 0xE1, 0x10, -/*0F80*/0xE4, 0xF5, 0x51, 0xE5, 0xE3, 0x54, 0x3F, 0xF5, - 0x52, 0x12, 0x0E, 0x89, 0x40, 0x1D, 0xAD, 0x52, -/*0F90*/0xAF, 0x51, 0x12, 0x11, 0x18, 0xEF, 0x60, 0x08, - 0x85, 0xE1, 0x10, 0x43, 0xE1, 0x40, 0x80, 0x0B, -/*0FA0*/0x53, 0xE1, 0xBF, 0x12, 0x0E, 0x58, 0x12, 0x00, - 0x06, 0x80, 0xFB, 0xE5, 0xE3, 0x54, 0x3F, 0xF5, -/*0FB0*/0x51, 0xE5, 0xE4, 0x54, 0x3F, 0xF5, 0x52, 0x12, - 0x0E, 0x81, 0x40, 0x1D, 0xAD, 0x52, 0xAF, 0x51, -/*0FC0*/0x12, 0x11, 0x18, 0xEF, 0x60, 0x08, 0x85, 0xE1, - 0x10, 0x43, 0xE1, 0x20, 0x80, 0x0B, 0x53, 0xE1, -/*0FD0*/0xDF, 0x12, 0x0E, 0x58, 0x12, 0x00, 0x06, 0x80, - 0xFB, 0x12, 0x0E, 0x81, 0x40, 0x04, 0x7F, 0x01, -/*0FE0*/0x80, 0x02, 0x7F, 0x00, 0x12, 0x0E, 0x89, 0x40, - 0x04, 0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEE, -/*0FF0*/0x4F, 0x60, 0x03, 0x12, 0x0E, 0x5B, 0x22, 0x12, - 0x0E, 0x21, 0xEF, 0xF0, 0x12, 0x10, 0x91, 0x22, -/*1000*/0x02, 0x11, 0x00, 0x02, 0x10, 0x40, 0x02, 0x10, - 0x90, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -/*1010*/0x01, 0x20, 0x01, 0x20, 0xE4, 0xF5, 0x57, 0x12, - 0x16, 0xBD, 0x12, 0x16, 0x44, 0xE4, 0x12, 0x10, -/*1020*/0x56, 0x12, 0x14, 0xB7, 0x90, 0x07, 0x26, 0x12, - 0x07, 0x35, 0xE4, 0x12, 0x07, 0x31, 0xE4, 0xF0, -/*1030*/0x12, 0x10, 0x56, 0x12, 0x14, 0xB7, 0x90, 0x07, - 0x26, 0x12, 0x07, 0x35, 0xE5, 0x41, 0x12, 0x07, -/*1040*/0x31, 0xE5, 0x40, 0xF0, 0xAF, 0x57, 0x7E, 0x00, - 0xAD, 0x56, 0x7C, 0x00, 0x12, 0x04, 0x44, 0xAF, -/*1050*/0x56, 0x7E, 0x00, 0x02, 0x11, 0xEE, 0xFF, 0x90, - 0x07, 0x20, 0xA3, 0xE0, 0xFD, 0xE4, 0xF5, 0x56, -/*1060*/0xF5, 0x40, 0xFE, 0xFC, 0xAB, 0x56, 0xFA, 0x12, - 0x11, 0x51, 0x7F, 0x0F, 0x7D, 0x18, 0xE4, 0xF5, -/*1070*/0x56, 0xF5, 0x40, 0xFE, 0xFC, 0xAB, 0x56, 0xFA, - 0x12, 0x15, 0x41, 0xAF, 0x56, 0x7E, 0x00, 0x12, -/*1080*/0x1A, 0xFF, 0xE4, 0xFF, 0xF5, 0x56, 0x7D, 0x1F, - 0xF5, 0x40, 0xFE, 0xFC, 0xAB, 0x56, 0xFA, 0x22, -/*1090*/0x22, 0xE4, 0xF5, 0x55, 0xE5, 0x08, 0xFD, 0x74, - 0xA0, 0xF5, 0x56, 0xED, 0x44, 0x07, 0xF5, 0x57, -/*10A0*/0xE5, 0x28, 0x30, 0xE5, 0x03, 0xD3, 0x80, 0x01, - 0xC3, 0x40, 0x05, 0x7F, 0x28, 0xEF, 0x80, 0x04, -/*10B0*/0x7F, 0x14, 0xEF, 0xC3, 0x13, 0xF5, 0x54, 0xE4, - 0xF9, 0x12, 0x0E, 0x18, 0x75, 0x83, 0x8E, 0xE0, -/*10C0*/0xF5, 0x10, 0xCE, 0xEF, 0xCE, 0xEE, 0xD3, 0x94, - 0x00, 0x40, 0x26, 0xE5, 0x10, 0x54, 0xFE, 0x12, -/*10D0*/0x0E, 0x98, 0x75, 0x83, 0x8E, 0xED, 0xF0, 0xE5, - 0x10, 0x44, 0x01, 0xFD, 0xEB, 0x44, 0x07, 0xF5, -/*10E0*/0x82, 0xED, 0xF0, 0x85, 0x57, 0x82, 0x85, 0x56, - 0x83, 0xE0, 0x30, 0xE3, 0x01, 0x09, 0x1E, 0x80, -/*10F0*/0xD4, 0xC2, 0x34, 0xE9, 0xC3, 0x95, 0x54, 0x40, - 0x02, 0xD2, 0x34, 0x22, 0x02, 0x00, 0x06, 0x22, -/*1100*/0x30, 0x30, 0x11, 0x90, 0x10, 0x00, 0xE4, 0x93, - 0xF5, 0x10, 0x90, 0x10, 0x10, 0xE4, 0x93, 0xF5, -/*1110*/0x10, 0x12, 0x10, 0x90, 0x12, 0x11, 0x50, 0x22, - 0xE4, 0xFC, 0xC3, 0xED, 0x9F, 0xFA, 0xEF, 0xF5, -/*1120*/0x83, 0x75, 0x82, 0x00, 0x79, 0xFF, 0xE4, 0x93, - 0xCC, 0x6C, 0xCC, 0xA3, 0xD9, 0xF8, 0xDA, 0xF6, -/*1130*/0xE5, 0xE2, 0x30, 0xE4, 0x02, 0x8C, 0xE5, 0xED, - 0x24, 0xFF, 0xFF, 0xEF, 0x75, 0x82, 0xFF, 0xF5, -/*1140*/0x83, 0xE4, 0x93, 0x6C, 0x70, 0x03, 0x7F, 0x01, - 0x22, 0x7F, 0x00, 0x22, 0x22, 0x11, 0x00, 0x00, -/*1150*/0x22, 0x8E, 0x58, 0x8F, 0x59, 0x8C, 0x5A, 0x8D, - 0x5B, 0x8A, 0x5C, 0x8B, 0x5D, 0x75, 0x5E, 0x01, -/*1160*/0xE4, 0xF5, 0x5F, 0xF5, 0x60, 0xF5, 0x62, 0x12, - 0x07, 0x2A, 0x75, 0x83, 0xD0, 0xE0, 0xFF, 0xC4, -/*1170*/0x54, 0x0F, 0xF5, 0x61, 0x12, 0x1E, 0xA5, 0x85, - 0x59, 0x5E, 0xD3, 0xE5, 0x5E, 0x95, 0x5B, 0xE5, -/*1180*/0x5A, 0x12, 0x07, 0x6B, 0x50, 0x4B, 0x12, 0x07, - 0x03, 0x75, 0x83, 0xBC, 0xE0, 0x45, 0x5E, 0x12, -/*1190*/0x07, 0x29, 0x75, 0x83, 0xBE, 0xE0, 0x45, 0x5E, - 0x12, 0x07, 0x29, 0x75, 0x83, 0xC0, 0xE0, 0x45, -/*11A0*/0x5E, 0xF0, 0xAF, 0x5F, 0xE5, 0x60, 0x12, 0x08, - 0x78, 0x12, 0x0A, 0xFF, 0xAF, 0x62, 0x7E, 0x00, -/*11B0*/0xAD, 0x5D, 0xAC, 0x5C, 0x12, 0x04, 0x44, 0xE5, - 0x61, 0xAF, 0x5E, 0x7E, 0x00, 0xB4, 0x03, 0x05, -/*11C0*/0x12, 0x1E, 0x21, 0x80, 0x07, 0xAD, 0x5D, 0xAC, - 0x5C, 0x12, 0x13, 0x17, 0x05, 0x5E, 0x02, 0x11, -/*11D0*/0x7A, 0x12, 0x07, 0x03, 0x75, 0x83, 0xBC, 0xE0, - 0x45, 0x40, 0x12, 0x07, 0x29, 0x75, 0x83, 0xBE, -/*11E0*/0xE0, 0x45, 0x40, 0x12, 0x07, 0x29, 0x75, 0x83, - 0xC0, 0xE0, 0x45, 0x40, 0xF0, 0x22, 0x8E, 0x58, -/*11F0*/0x8F, 0x59, 0x75, 0x5A, 0x01, 0x79, 0x01, 0x75, - 0x5B, 0x01, 0xE4, 0xFB, 0x12, 0x07, 0x2A, 0x75, -/*1200*/0x83, 0xAE, 0xE0, 0x54, 0x1A, 0xFF, 0x12, 0x08, - 0x65, 0xE0, 0xC4, 0x13, 0x54, 0x07, 0xFE, 0xEF, -/*1210*/0x70, 0x0C, 0xEE, 0x65, 0x35, 0x70, 0x07, 0x90, - 0x07, 0x2F, 0xE0, 0xB4, 0x01, 0x0D, 0xAF, 0x35, -/*1220*/0x7E, 0x00, 0x12, 0x0E, 0xA9, 0xCF, 0xEB, 0xCF, - 0x02, 0x1E, 0x60, 0xE5, 0x59, 0x64, 0x02, 0x45, -/*1230*/0x58, 0x70, 0x04, 0x7F, 0x01, 0x80, 0x02, 0x7F, - 0x00, 0xE5, 0x59, 0x45, 0x58, 0x70, 0x04, 0x7E, -/*1240*/0x01, 0x80, 0x02, 0x7E, 0x00, 0xEE, 0x4F, 0x60, - 0x23, 0x85, 0x41, 0x49, 0x85, 0x40, 0x4B, 0xE5, -/*1250*/0x59, 0x45, 0x58, 0x70, 0x2C, 0xAF, 0x5A, 0xFE, - 0xCD, 0xE9, 0xCD, 0xFC, 0xAB, 0x59, 0xAA, 0x58, -/*1260*/0x12, 0x0A, 0xFF, 0xAF, 0x5B, 0x7E, 0x00, 0x12, - 0x1E, 0x60, 0x80, 0x15, 0xAF, 0x5B, 0x7E, 0x00, -/*1270*/0x12, 0x1E, 0x60, 0x90, 0x07, 0x26, 0x12, 0x07, - 0x35, 0xE5, 0x49, 0x12, 0x07, 0x31, 0xE5, 0x4B, -/*1280*/0xF0, 0xE4, 0xFD, 0xAF, 0x35, 0xFE, 0xFC, 0x12, - 0x09, 0x15, 0x22, 0x8C, 0x64, 0x8D, 0x65, 0x12, -/*1290*/0x08, 0xDA, 0x40, 0x3C, 0xE5, 0x65, 0x45, 0x64, - 0x70, 0x10, 0x12, 0x09, 0x04, 0xC3, 0xE5, 0x3E, -/*12A0*/0x12, 0x07, 0x69, 0x40, 0x3B, 0x12, 0x08, 0x95, - 0x80, 0x18, 0xE5, 0x3E, 0xC3, 0x95, 0x38, 0x40, -/*12B0*/0x1D, 0x85, 0x3E, 0x38, 0xE5, 0x3E, 0x60, 0x05, - 0x85, 0x3F, 0x39, 0x80, 0x03, 0x85, 0x39, 0x39, -/*12C0*/0x8F, 0x3A, 0x12, 0x07, 0xA8, 0xE5, 0x3E, 0x12, - 0x07, 0x53, 0xE5, 0x3F, 0xF0, 0x22, 0x80, 0x3B, -/*12D0*/0xE5, 0x65, 0x45, 0x64, 0x70, 0x11, 0x12, 0x07, - 0x5F, 0x40, 0x05, 0x12, 0x08, 0x9E, 0x80, 0x1F, -/*12E0*/0x12, 0x07, 0x3E, 0xE5, 0x41, 0xF0, 0x22, 0xE5, - 0x3C, 0xC3, 0x95, 0x38, 0x40, 0x1D, 0x85, 0x3C, -/*12F0*/0x38, 0xE5, 0x3C, 0x60, 0x05, 0x85, 0x3D, 0x39, - 0x80, 0x03, 0x85, 0x39, 0x39, 0x8F, 0x3A, 0x12, -/*1300*/0x07, 0xA8, 0xE5, 0x3C, 0x12, 0x07, 0x53, 0xE5, - 0x3D, 0xF0, 0x22, 0x12, 0x07, 0x9F, 0xE5, 0x38, -/*1310*/0x12, 0x07, 0x53, 0xE5, 0x39, 0xF0, 0x22, 0x8C, - 0x63, 0x8D, 0x64, 0x12, 0x08, 0xDA, 0x40, 0x3C, -/*1320*/0xE5, 0x64, 0x45, 0x63, 0x70, 0x10, 0x12, 0x09, - 0x04, 0xC3, 0xE5, 0x3E, 0x12, 0x07, 0x69, 0x40, -/*1330*/0x3B, 0x12, 0x08, 0x95, 0x80, 0x18, 0xE5, 0x3E, - 0xC3, 0x95, 0x38, 0x40, 0x1D, 0x85, 0x3E, 0x38, -/*1340*/0xE5, 0x3E, 0x60, 0x05, 0x85, 0x3F, 0x39, 0x80, - 0x03, 0x85, 0x39, 0x39, 0x8F, 0x3A, 0x12, 0x07, -/*1350*/0xA8, 0xE5, 0x3E, 0x12, 0x07, 0x53, 0xE5, 0x3F, - 0xF0, 0x22, 0x80, 0x3B, 0xE5, 0x64, 0x45, 0x63, -/*1360*/0x70, 0x11, 0x12, 0x07, 0x5F, 0x40, 0x05, 0x12, - 0x08, 0x9E, 0x80, 0x1F, 0x12, 0x07, 0x3E, 0xE5, -/*1370*/0x41, 0xF0, 0x22, 0xE5, 0x3C, 0xC3, 0x95, 0x38, - 0x40, 0x1D, 0x85, 0x3C, 0x38, 0xE5, 0x3C, 0x60, -/*1380*/0x05, 0x85, 0x3D, 0x39, 0x80, 0x03, 0x85, 0x39, - 0x39, 0x8F, 0x3A, 0x12, 0x07, 0xA8, 0xE5, 0x3C, -/*1390*/0x12, 0x07, 0x53, 0xE5, 0x3D, 0xF0, 0x22, 0x12, - 0x07, 0x9F, 0xE5, 0x38, 0x12, 0x07, 0x53, 0xE5, -/*13A0*/0x39, 0xF0, 0x22, 0xE5, 0x0D, 0xFE, 0xE5, 0x08, - 0x8E, 0x54, 0x44, 0x05, 0xF5, 0x55, 0x75, 0x15, -/*13B0*/0x0F, 0xF5, 0x82, 0x12, 0x0E, 0x7A, 0x12, 0x17, - 0xA3, 0x20, 0x31, 0x05, 0x75, 0x15, 0x03, 0x80, -/*13C0*/0x03, 0x75, 0x15, 0x0B, 0xE5, 0x0A, 0xC3, 0x94, - 0x01, 0x50, 0x38, 0x12, 0x14, 0x20, 0x20, 0x31, -/*13D0*/0x06, 0x05, 0x15, 0x05, 0x15, 0x80, 0x04, 0x15, - 0x15, 0x15, 0x15, 0xE5, 0x0A, 0xC3, 0x94, 0x01, -/*13E0*/0x50, 0x21, 0x12, 0x14, 0x20, 0x20, 0x31, 0x04, - 0x05, 0x15, 0x80, 0x02, 0x15, 0x15, 0xE5, 0x0A, -/*13F0*/0xC3, 0x94, 0x01, 0x50, 0x0E, 0x12, 0x0E, 0x77, - 0x12, 0x17, 0xA3, 0x20, 0x31, 0x05, 0x05, 0x15, -/*1400*/0x12, 0x0E, 0x77, 0xE5, 0x15, 0xB4, 0x08, 0x04, - 0x7F, 0x01, 0x80, 0x02, 0x7F, 0x00, 0xE5, 0x15, -/*1410*/0xB4, 0x07, 0x04, 0x7E, 0x01, 0x80, 0x02, 0x7E, - 0x00, 0xEE, 0x4F, 0x60, 0x02, 0x05, 0x7F, 0x22, -/*1420*/0x85, 0x55, 0x82, 0x85, 0x54, 0x83, 0xE5, 0x15, - 0xF0, 0x12, 0x17, 0xA3, 0x22, 0x12, 0x07, 0x2A, -/*1430*/0x75, 0x83, 0xAE, 0x74, 0xFF, 0x12, 0x07, 0x29, - 0xE0, 0x54, 0x1A, 0xF5, 0x34, 0xE0, 0xC4, 0x13, -/*1440*/0x54, 0x07, 0xF5, 0x35, 0x24, 0xFE, 0x60, 0x24, - 0x24, 0xFE, 0x60, 0x3C, 0x24, 0x04, 0x70, 0x63, -/*1450*/0x75, 0x31, 0x2D, 0xE5, 0x08, 0xFD, 0x74, 0xB6, - 0x12, 0x07, 0x92, 0x74, 0xBC, 0x90, 0x07, 0x22, -/*1460*/0x12, 0x07, 0x95, 0x74, 0x90, 0x12, 0x07, 0xB3, - 0x74, 0x92, 0x80, 0x3C, 0x75, 0x31, 0x3A, 0xE5, -/*1470*/0x08, 0xFD, 0x74, 0xBA, 0x12, 0x07, 0x92, 0x74, - 0xC0, 0x90, 0x07, 0x22, 0x12, 0x07, 0xB6, 0x74, -/*1480*/0xC4, 0x12, 0x07, 0xB3, 0x74, 0xC8, 0x80, 0x20, - 0x75, 0x31, 0x35, 0xE5, 0x08, 0xFD, 0x74, 0xB8, -/*1490*/0x12, 0x07, 0x92, 0x74, 0xBE, 0xFF, 0xED, 0x44, - 0x07, 0x90, 0x07, 0x22, 0xCF, 0xF0, 0xA3, 0xEF, -/*14A0*/0xF0, 0x74, 0xC2, 0x12, 0x07, 0xB3, 0x74, 0xC6, - 0xFF, 0xED, 0x44, 0x07, 0xA3, 0xCF, 0xF0, 0xA3, -/*14B0*/0xEF, 0xF0, 0x22, 0x75, 0x34, 0x01, 0x22, 0x8E, - 0x58, 0x8F, 0x59, 0x8C, 0x5A, 0x8D, 0x5B, 0x8A, -/*14C0*/0x5C, 0x8B, 0x5D, 0x75, 0x5E, 0x01, 0xE4, 0xF5, - 0x5F, 0x12, 0x1E, 0xA5, 0x85, 0x59, 0x5E, 0xD3, -/*14D0*/0xE5, 0x5E, 0x95, 0x5B, 0xE5, 0x5A, 0x12, 0x07, - 0x6B, 0x50, 0x57, 0xE5, 0x5D, 0x45, 0x5C, 0x70, -/*14E0*/0x30, 0x12, 0x07, 0x2A, 0x75, 0x83, 0x92, 0xE5, - 0x5E, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC6, 0xE5, -/*14F0*/0x5E, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC8, 0xE5, - 0x5E, 0x12, 0x07, 0x29, 0x75, 0x83, 0x90, 0xE5, -/*1500*/0x5E, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC2, 0xE5, - 0x5E, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC4, 0x80, -/*1510*/0x03, 0x12, 0x07, 0x32, 0xE5, 0x5E, 0xF0, 0xAF, - 0x5F, 0x7E, 0x00, 0xAD, 0x5D, 0xAC, 0x5C, 0x12, -/*1520*/0x04, 0x44, 0xAF, 0x5E, 0x7E, 0x00, 0xAD, 0x5D, - 0xAC, 0x5C, 0x12, 0x0B, 0xD1, 0x05, 0x5E, 0x02, -/*1530*/0x14, 0xCF, 0xAB, 0x5D, 0xAA, 0x5C, 0xAD, 0x5B, - 0xAC, 0x5A, 0xAF, 0x59, 0xAE, 0x58, 0x02, 0x1B, -/*1540*/0xFB, 0x8C, 0x5C, 0x8D, 0x5D, 0x8A, 0x5E, 0x8B, - 0x5F, 0x75, 0x60, 0x01, 0xE4, 0xF5, 0x61, 0xF5, -/*1550*/0x62, 0xF5, 0x63, 0x12, 0x1E, 0xA5, 0x8F, 0x60, - 0xD3, 0xE5, 0x60, 0x95, 0x5D, 0xE5, 0x5C, 0x12, -/*1560*/0x07, 0x6B, 0x50, 0x61, 0xE5, 0x5F, 0x45, 0x5E, - 0x70, 0x27, 0x12, 0x07, 0x2A, 0x75, 0x83, 0xB6, -/*1570*/0xE5, 0x60, 0x12, 0x07, 0x29, 0x75, 0x83, 0xB8, - 0xE5, 0x60, 0x12, 0x07, 0x29, 0x75, 0x83, 0xBA, -/*1580*/0xE5, 0x60, 0xF0, 0xAF, 0x61, 0x7E, 0x00, 0xE5, - 0x62, 0x12, 0x08, 0x7A, 0x12, 0x0A, 0xFF, 0x80, -/*1590*/0x19, 0x90, 0x07, 0x24, 0x12, 0x07, 0x35, 0xE5, - 0x60, 0x12, 0x07, 0x29, 0x75, 0x83, 0x8E, 0xE4, -/*15A0*/0x12, 0x07, 0x29, 0x74, 0x01, 0x12, 0x07, 0x29, - 0xE4, 0xF0, 0xAF, 0x63, 0x7E, 0x00, 0xAD, 0x5F, -/*15B0*/0xAC, 0x5E, 0x12, 0x04, 0x44, 0xAF, 0x60, 0x7E, - 0x00, 0xAD, 0x5F, 0xAC, 0x5E, 0x12, 0x12, 0x8B, -/*15C0*/0x05, 0x60, 0x02, 0x15, 0x58, 0x22, 0x90, 0x11, - 0x4D, 0xE4, 0x93, 0x90, 0x07, 0x2E, 0xF0, 0x12, -/*15D0*/0x08, 0x1F, 0x75, 0x83, 0xAE, 0xE0, 0x54, 0x1A, - 0xF5, 0x34, 0x70, 0x67, 0xEF, 0x44, 0x07, 0xF5, -/*15E0*/0x82, 0x75, 0x83, 0xCE, 0xE0, 0xFF, 0x13, 0x13, - 0x13, 0x54, 0x07, 0xF5, 0x36, 0x54, 0x0F, 0xD3, -/*15F0*/0x94, 0x00, 0x40, 0x06, 0x12, 0x14, 0x2D, 0x12, - 0x1B, 0xA9, 0xE5, 0x36, 0x54, 0x0F, 0x24, 0xFE, -/*1600*/0x60, 0x0C, 0x14, 0x60, 0x0C, 0x14, 0x60, 0x19, - 0x24, 0x03, 0x70, 0x37, 0x80, 0x10, 0x02, 0x1E, -/*1610*/0x91, 0x12, 0x1E, 0x91, 0x12, 0x07, 0x2A, 0x75, - 0x83, 0xCE, 0xE0, 0x54, 0xEF, 0xF0, 0x02, 0x1D, -/*1620*/0xAE, 0x12, 0x10, 0x14, 0xE4, 0xF5, 0x55, 0x12, - 0x1D, 0x85, 0x05, 0x55, 0xE5, 0x55, 0xC3, 0x94, -/*1630*/0x05, 0x40, 0xF4, 0x12, 0x07, 0x2A, 0x75, 0x83, - 0xCE, 0xE0, 0x54, 0xC7, 0x12, 0x07, 0x29, 0xE0, -/*1640*/0x44, 0x08, 0xF0, 0x22, 0xE4, 0xF5, 0x58, 0xF5, - 0x59, 0xAF, 0x08, 0xEF, 0x44, 0x07, 0xF5, 0x82, -/*1650*/0x75, 0x83, 0xD0, 0xE0, 0xFD, 0xC4, 0x54, 0x0F, - 0xF5, 0x5A, 0xEF, 0x44, 0x07, 0xF5, 0x82, 0x75, -/*1660*/0x83, 0x80, 0x74, 0x01, 0xF0, 0x12, 0x08, 0x21, - 0x75, 0x83, 0x82, 0xE5, 0x45, 0xF0, 0xEF, 0x44, -/*1670*/0x07, 0xF5, 0x82, 0x75, 0x83, 0x8A, 0x74, 0xFF, - 0xF0, 0x12, 0x1A, 0x4D, 0x12, 0x07, 0x2A, 0x75, -/*1680*/0x83, 0xBC, 0xE0, 0x54, 0xEF, 0x12, 0x07, 0x29, - 0x75, 0x83, 0xBE, 0xE0, 0x54, 0xEF, 0x12, 0x07, -/*1690*/0x29, 0x75, 0x83, 0xC0, 0xE0, 0x54, 0xEF, 0x12, - 0x07, 0x29, 0x75, 0x83, 0xBC, 0xE0, 0x44, 0x10, -/*16A0*/0x12, 0x07, 0x29, 0x75, 0x83, 0xBE, 0xE0, 0x44, - 0x10, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC0, 0xE0, -/*16B0*/0x44, 0x10, 0xF0, 0xAF, 0x58, 0xE5, 0x59, 0x12, - 0x08, 0x78, 0x02, 0x0A, 0xFF, 0xE4, 0xF5, 0x58, -/*16C0*/0x7D, 0x01, 0xF5, 0x59, 0xAF, 0x35, 0xFE, 0xFC, - 0x12, 0x09, 0x15, 0x12, 0x07, 0x2A, 0x75, 0x83, -/*16D0*/0xB6, 0x74, 0x10, 0x12, 0x07, 0x29, 0x75, 0x83, - 0xB8, 0x74, 0x10, 0x12, 0x07, 0x29, 0x75, 0x83, -/*16E0*/0xBA, 0x74, 0x10, 0x12, 0x07, 0x29, 0x75, 0x83, - 0xBC, 0x74, 0x10, 0x12, 0x07, 0x29, 0x75, 0x83, -/*16F0*/0xBE, 0x74, 0x10, 0x12, 0x07, 0x29, 0x75, 0x83, - 0xC0, 0x74, 0x10, 0x12, 0x07, 0x29, 0x75, 0x83, -/*1700*/0x90, 0xE4, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC2, - 0xE4, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC4, 0xE4, -/*1710*/0x12, 0x07, 0x29, 0x75, 0x83, 0x92, 0xE4, 0x12, - 0x07, 0x29, 0x75, 0x83, 0xC6, 0xE4, 0x12, 0x07, -/*1720*/0x29, 0x75, 0x83, 0xC8, 0xE4, 0xF0, 0xAF, 0x58, - 0xFE, 0xE5, 0x59, 0x12, 0x08, 0x7A, 0x02, 0x0A, -/*1730*/0xFF, 0xE5, 0xE2, 0x30, 0xE4, 0x6C, 0xE5, 0xE7, - 0x54, 0xC0, 0x64, 0x40, 0x70, 0x64, 0xE5, 0x09, -/*1740*/0xC4, 0x54, 0x30, 0xFE, 0xE5, 0x08, 0x25, 0xE0, - 0x25, 0xE0, 0x54, 0xC0, 0x4E, 0xFE, 0xEF, 0x54, -/*1750*/0x3F, 0x4E, 0xFD, 0xE5, 0x2B, 0xAE, 0x2A, 0x78, - 0x02, 0xC3, 0x33, 0xCE, 0x33, 0xCE, 0xD8, 0xF9, -/*1760*/0xF5, 0x82, 0x8E, 0x83, 0xED, 0xF0, 0xE5, 0x2B, - 0xAE, 0x2A, 0x78, 0x02, 0xC3, 0x33, 0xCE, 0x33, -/*1770*/0xCE, 0xD8, 0xF9, 0xFF, 0xF5, 0x82, 0x8E, 0x83, - 0xA3, 0xE5, 0xFE, 0xF0, 0x8F, 0x82, 0x8E, 0x83, -/*1780*/0xA3, 0xA3, 0xE5, 0xFD, 0xF0, 0x8F, 0x82, 0x8E, - 0x83, 0xA3, 0xA3, 0xA3, 0xE5, 0xFC, 0xF0, 0xC3, -/*1790*/0xE5, 0x2B, 0x94, 0xFA, 0xE5, 0x2A, 0x94, 0x00, - 0x50, 0x08, 0x05, 0x2B, 0xE5, 0x2B, 0x70, 0x02, -/*17A0*/0x05, 0x2A, 0x22, 0xE4, 0xFF, 0xE4, 0xF5, 0x58, - 0xF5, 0x56, 0xF5, 0x57, 0x74, 0x82, 0xFC, 0x12, -/*17B0*/0x0E, 0x04, 0x8C, 0x83, 0xE0, 0xF5, 0x10, 0x54, - 0x7F, 0xF0, 0xE5, 0x10, 0x44, 0x80, 0x12, 0x0E, -/*17C0*/0x98, 0xED, 0xF0, 0x7E, 0x0A, 0x12, 0x0E, 0x04, - 0x75, 0x83, 0xA0, 0xE0, 0x20, 0xE0, 0x26, 0xDE, -/*17D0*/0xF4, 0x05, 0x57, 0xE5, 0x57, 0x70, 0x02, 0x05, - 0x56, 0xE5, 0x14, 0x24, 0x01, 0xFD, 0xE4, 0x33, -/*17E0*/0xFC, 0xD3, 0xE5, 0x57, 0x9D, 0xE5, 0x56, 0x9C, - 0x40, 0xD9, 0xE5, 0x0A, 0x94, 0x20, 0x50, 0x02, -/*17F0*/0x05, 0x0A, 0x43, 0xE1, 0x08, 0xC2, 0x31, 0x12, - 0x0E, 0x04, 0x75, 0x83, 0xA6, 0xE0, 0x55, 0x12, -/*1800*/0x65, 0x12, 0x70, 0x03, 0xD2, 0x31, 0x22, 0xC2, - 0x31, 0x22, 0x90, 0x07, 0x26, 0xE0, 0xFA, 0xA3, -/*1810*/0xE0, 0xF5, 0x82, 0x8A, 0x83, 0xE0, 0xF5, 0x41, - 0xE5, 0x39, 0xC3, 0x95, 0x41, 0x40, 0x26, 0xE5, -/*1820*/0x39, 0x95, 0x41, 0xC3, 0x9F, 0xEE, 0x12, 0x07, - 0x6B, 0x40, 0x04, 0x7C, 0x01, 0x80, 0x02, 0x7C, -/*1830*/0x00, 0xE5, 0x41, 0x64, 0x3F, 0x60, 0x04, 0x7B, - 0x01, 0x80, 0x02, 0x7B, 0x00, 0xEC, 0x5B, 0x60, -/*1840*/0x29, 0x05, 0x41, 0x80, 0x28, 0xC3, 0xE5, 0x41, - 0x95, 0x39, 0xC3, 0x9F, 0xEE, 0x12, 0x07, 0x6B, -/*1850*/0x40, 0x04, 0x7F, 0x01, 0x80, 0x02, 0x7F, 0x00, - 0xE5, 0x41, 0x60, 0x04, 0x7E, 0x01, 0x80, 0x02, -/*1860*/0x7E, 0x00, 0xEF, 0x5E, 0x60, 0x04, 0x15, 0x41, - 0x80, 0x03, 0x85, 0x39, 0x41, 0x85, 0x3A, 0x40, -/*1870*/0x22, 0xE5, 0xE2, 0x30, 0xE4, 0x60, 0xE5, 0xE1, - 0x30, 0xE2, 0x5B, 0xE5, 0x09, 0x70, 0x04, 0x7F, -/*1880*/0x01, 0x80, 0x02, 0x7F, 0x00, 0xE5, 0x08, 0x70, - 0x04, 0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEE, -/*1890*/0x5F, 0x60, 0x43, 0x53, 0xF9, 0xF8, 0xE5, 0xE2, - 0x30, 0xE4, 0x3B, 0xE5, 0xE1, 0x30, 0xE2, 0x2E, -/*18A0*/0x43, 0xFA, 0x02, 0x53, 0xFA, 0xFB, 0xE4, 0xF5, - 0x10, 0x90, 0x94, 0x70, 0xE5, 0x10, 0xF0, 0xE5, -/*18B0*/0xE1, 0x30, 0xE2, 0xE7, 0x90, 0x94, 0x70, 0xE0, - 0x65, 0x10, 0x60, 0x03, 0x43, 0xFA, 0x04, 0x05, -/*18C0*/0x10, 0x90, 0x94, 0x70, 0xE5, 0x10, 0xF0, 0x70, - 0xE6, 0x12, 0x00, 0x06, 0x80, 0xE1, 0x53, 0xFA, -/*18D0*/0xFD, 0x53, 0xFA, 0xFB, 0x80, 0xC0, 0x22, 0x8F, - 0x54, 0x12, 0x00, 0x06, 0xE5, 0xE1, 0x30, 0xE0, -/*18E0*/0x04, 0x7F, 0x01, 0x80, 0x02, 0x7F, 0x00, 0xE5, - 0x7E, 0xD3, 0x94, 0x05, 0x40, 0x04, 0x7E, 0x01, -/*18F0*/0x80, 0x02, 0x7E, 0x00, 0xEE, 0x4F, 0x60, 0x3D, - 0x85, 0x54, 0x11, 0xE5, 0xE2, 0x20, 0xE1, 0x32, -/*1900*/0x74, 0xCE, 0x12, 0x1A, 0x05, 0x30, 0xE7, 0x04, - 0x7D, 0x01, 0x80, 0x02, 0x7D, 0x00, 0x8F, 0x82, -/*1910*/0x8E, 0x83, 0xE0, 0x30, 0xE6, 0x04, 0x7F, 0x01, - 0x80, 0x02, 0x7F, 0x00, 0xEF, 0x5D, 0x70, 0x15, -/*1920*/0x12, 0x15, 0xC6, 0x74, 0xCE, 0x12, 0x1A, 0x05, - 0x30, 0xE6, 0x07, 0xE0, 0x44, 0x80, 0xF0, 0x43, -/*1930*/0xF9, 0x80, 0x12, 0x18, 0x71, 0x22, 0x12, 0x0E, - 0x44, 0xE5, 0x16, 0x25, 0xE0, 0x25, 0xE0, 0x24, -/*1940*/0xB0, 0xF5, 0x82, 0xE4, 0x34, 0x1A, 0xF5, 0x83, - 0xE4, 0x93, 0xF5, 0x0F, 0xE5, 0x16, 0x25, 0xE0, -/*1950*/0x25, 0xE0, 0x24, 0xB1, 0xF5, 0x82, 0xE4, 0x34, - 0x1A, 0xF5, 0x83, 0xE4, 0x93, 0xF5, 0x0E, 0x12, -/*1960*/0x0E, 0x65, 0xF5, 0x10, 0xE5, 0x0F, 0x54, 0xF0, - 0x12, 0x0E, 0x17, 0x75, 0x83, 0x8C, 0xEF, 0xF0, -/*1970*/0xE5, 0x0F, 0x30, 0xE0, 0x0C, 0x12, 0x0E, 0x04, - 0x75, 0x83, 0x86, 0xE0, 0x44, 0x40, 0xF0, 0x80, -/*1980*/0x0A, 0x12, 0x0E, 0x04, 0x75, 0x83, 0x86, 0xE0, - 0x54, 0xBF, 0xF0, 0x12, 0x0E, 0x91, 0x75, 0x83, -/*1990*/0x82, 0xE5, 0x0E, 0xF0, 0x22, 0x7F, 0x05, 0x12, - 0x17, 0x31, 0x12, 0x0E, 0x04, 0x12, 0x0E, 0x33, -/*19A0*/0x74, 0x02, 0xF0, 0x74, 0x8E, 0xFE, 0x12, 0x0E, - 0x04, 0x12, 0x0E, 0x0B, 0xEF, 0xF0, 0x75, 0x15, -/*19B0*/0x70, 0x12, 0x0F, 0xF7, 0x20, 0x34, 0x05, 0x75, - 0x15, 0x10, 0x80, 0x03, 0x75, 0x15, 0x50, 0x12, -/*19C0*/0x0F, 0xF7, 0x20, 0x34, 0x04, 0x74, 0x10, 0x80, - 0x02, 0x74, 0xF0, 0x25, 0x15, 0xF5, 0x15, 0x12, -/*19D0*/0x0E, 0x21, 0xEF, 0xF0, 0x12, 0x10, 0x91, 0x20, - 0x34, 0x17, 0xE5, 0x15, 0x64, 0x30, 0x60, 0x0C, -/*19E0*/0x74, 0x10, 0x25, 0x15, 0xF5, 0x15, 0xB4, 0x80, - 0x03, 0xE4, 0xF5, 0x15, 0x12, 0x0E, 0x21, 0xEF, -/*19F0*/0xF0, 0x22, 0xF0, 0xE5, 0x0B, 0x25, 0xE0, 0x25, - 0xE0, 0x24, 0x82, 0xF5, 0x82, 0xE4, 0x34, 0x07, -/*1A00*/0xF5, 0x83, 0x22, 0x74, 0x88, 0xFE, 0xE5, 0x08, - 0x44, 0x07, 0xFF, 0xF5, 0x82, 0x8E, 0x83, 0xE0, -/*1A10*/0x22, 0xF0, 0xE5, 0x08, 0x44, 0x07, 0xF5, 0x82, - 0x22, 0xF0, 0xE0, 0x54, 0xC0, 0x8F, 0x82, 0x8E, -/*1A20*/0x83, 0xF0, 0x22, 0xEF, 0x44, 0x07, 0xF5, 0x82, - 0x75, 0x83, 0x86, 0xE0, 0x54, 0x10, 0xD3, 0x94, -/*1A30*/0x00, 0x22, 0xF0, 0x90, 0x07, 0x15, 0xE0, 0x04, - 0xF0, 0x22, 0x44, 0x06, 0xF5, 0x82, 0x75, 0x83, -/*1A40*/0x9E, 0xE0, 0x22, 0xFE, 0xEF, 0x44, 0x07, 0xF5, - 0x82, 0x8E, 0x83, 0xE0, 0x22, 0xE4, 0x90, 0x07, -/*1A50*/0x2A, 0xF0, 0xA3, 0xF0, 0x12, 0x07, 0x2A, 0x75, - 0x83, 0x82, 0xE0, 0x54, 0x7F, 0x12, 0x07, 0x29, -/*1A60*/0xE0, 0x44, 0x80, 0xF0, 0x12, 0x10, 0xFC, 0x12, - 0x08, 0x1F, 0x75, 0x83, 0xA0, 0xE0, 0x20, 0xE0, -/*1A70*/0x1A, 0x90, 0x07, 0x2B, 0xE0, 0x04, 0xF0, 0x70, - 0x06, 0x90, 0x07, 0x2A, 0xE0, 0x04, 0xF0, 0x90, -/*1A80*/0x07, 0x2A, 0xE0, 0xB4, 0x10, 0xE1, 0xA3, 0xE0, - 0xB4, 0x00, 0xDC, 0xEE, 0x44, 0xA6, 0xFC, 0xEF, -/*1A90*/0x44, 0x07, 0xF5, 0x82, 0x8C, 0x83, 0xE0, 0xF5, - 0x32, 0xEE, 0x44, 0xA8, 0xFE, 0xEF, 0x44, 0x07, -/*1AA0*/0xF5, 0x82, 0x8E, 0x83, 0xE0, 0xF5, 0x33, 0x22, - 0x01, 0x20, 0x11, 0x00, 0x04, 0x20, 0x00, 0x90, -/*1AB0*/0x00, 0x20, 0x0F, 0x92, 0x00, 0x21, 0x0F, 0x94, - 0x00, 0x22, 0x0F, 0x96, 0x00, 0x23, 0x0F, 0x98, -/*1AC0*/0x00, 0x24, 0x0F, 0x9A, 0x00, 0x25, 0x0F, 0x9C, - 0x00, 0x26, 0x0F, 0x9E, 0x00, 0x27, 0x0F, 0xA0, -/*1AD0*/0x01, 0x20, 0x01, 0xA2, 0x01, 0x21, 0x01, 0xA4, - 0x01, 0x22, 0x01, 0xA6, 0x01, 0x23, 0x01, 0xA8, -/*1AE0*/0x01, 0x24, 0x01, 0xAA, 0x01, 0x25, 0x01, 0xAC, - 0x01, 0x26, 0x01, 0xAE, 0x01, 0x27, 0x01, 0xB0, -/*1AF0*/0x01, 0x28, 0x01, 0xB4, 0x00, 0x28, 0x0F, 0xB6, - 0x40, 0x28, 0x0F, 0xB8, 0x61, 0x28, 0x01, 0xCB, -/*1B00*/0xEF, 0xCB, 0xCA, 0xEE, 0xCA, 0x7F, 0x01, 0xE4, - 0xFD, 0xEB, 0x4A, 0x70, 0x24, 0xE5, 0x08, 0xF5, -/*1B10*/0x82, 0x74, 0xB6, 0x12, 0x08, 0x29, 0xE5, 0x08, - 0xF5, 0x82, 0x74, 0xB8, 0x12, 0x08, 0x29, 0xE5, -/*1B20*/0x08, 0xF5, 0x82, 0x74, 0xBA, 0x12, 0x08, 0x29, - 0x7E, 0x00, 0x7C, 0x00, 0x12, 0x0A, 0xFF, 0x80, -/*1B30*/0x12, 0x90, 0x07, 0x26, 0x12, 0x07, 0x35, 0xE5, - 0x41, 0xF0, 0x90, 0x07, 0x24, 0x12, 0x07, 0x35, -/*1B40*/0xE5, 0x40, 0xF0, 0x12, 0x07, 0x2A, 0x75, 0x83, - 0x8E, 0xE4, 0x12, 0x07, 0x29, 0x74, 0x01, 0x12, -/*1B50*/0x07, 0x29, 0xE4, 0xF0, 0x22, 0xE4, 0xF5, 0x26, - 0xF5, 0x27, 0x53, 0xE1, 0xFE, 0xF5, 0x2A, 0x75, -/*1B60*/0x2B, 0x01, 0xF5, 0x08, 0x7F, 0x01, 0x12, 0x17, - 0x31, 0x30, 0x30, 0x1C, 0x90, 0x1A, 0xA9, 0xE4, -/*1B70*/0x93, 0xF5, 0x10, 0x90, 0x1F, 0xF9, 0xE4, 0x93, - 0xF5, 0x10, 0x90, 0x00, 0x41, 0xE4, 0x93, 0xF5, -/*1B80*/0x10, 0x90, 0x1E, 0xCA, 0xE4, 0x93, 0xF5, 0x10, - 0x7F, 0x02, 0x12, 0x17, 0x31, 0x12, 0x0F, 0x54, -/*1B90*/0x7F, 0x03, 0x12, 0x17, 0x31, 0x12, 0x00, 0x06, - 0xE5, 0xE2, 0x30, 0xE7, 0x09, 0x12, 0x10, 0x00, -/*1BA0*/0x30, 0x30, 0x03, 0x12, 0x11, 0x00, 0x02, 0x00, - 0x47, 0x12, 0x08, 0x1F, 0x75, 0x83, 0xD0, 0xE0, -/*1BB0*/0xC4, 0x54, 0x0F, 0xFD, 0x75, 0x43, 0x01, 0x75, - 0x44, 0xFF, 0x12, 0x08, 0xAA, 0x74, 0x04, 0xF0, -/*1BC0*/0x75, 0x3B, 0x01, 0xED, 0x14, 0x60, 0x0C, 0x14, - 0x60, 0x0B, 0x14, 0x60, 0x0F, 0x24, 0x03, 0x70, -/*1BD0*/0x0B, 0x80, 0x09, 0x80, 0x00, 0x12, 0x08, 0xA7, - 0x04, 0xF0, 0x80, 0x06, 0x12, 0x08, 0xA7, 0x74, -/*1BE0*/0x04, 0xF0, 0xEE, 0x44, 0x82, 0xFE, 0xEF, 0x44, - 0x07, 0xF5, 0x82, 0x8E, 0x83, 0xE5, 0x45, 0x12, -/*1BF0*/0x08, 0xBE, 0x75, 0x83, 0x82, 0xE5, 0x31, 0xF0, - 0x02, 0x11, 0x4C, 0x8E, 0x60, 0x8F, 0x61, 0x12, -/*1C00*/0x1E, 0xA5, 0xE4, 0xFF, 0xCE, 0xED, 0xCE, 0xEE, - 0xD3, 0x95, 0x61, 0xE5, 0x60, 0x12, 0x07, 0x6B, -/*1C10*/0x40, 0x39, 0x74, 0x20, 0x2E, 0xF5, 0x82, 0xE4, - 0x34, 0x03, 0xF5, 0x83, 0xE0, 0x70, 0x03, 0xFF, -/*1C20*/0x80, 0x26, 0x12, 0x08, 0xE2, 0xFD, 0xC3, 0x9F, - 0x40, 0x1E, 0xCF, 0xED, 0xCF, 0xEB, 0x4A, 0x70, -/*1C30*/0x0B, 0x8D, 0x42, 0x12, 0x08, 0xEE, 0xF5, 0x41, - 0x8E, 0x40, 0x80, 0x0C, 0x12, 0x08, 0xE2, 0xF5, -/*1C40*/0x38, 0x12, 0x08, 0xEE, 0xF5, 0x39, 0x8E, 0x3A, - 0x1E, 0x80, 0xBC, 0x22, 0x75, 0x58, 0x01, 0xE5, -/*1C50*/0x35, 0x70, 0x0C, 0x12, 0x07, 0xCC, 0xE0, 0xF5, - 0x4A, 0x12, 0x07, 0xD8, 0xE0, 0xF5, 0x4C, 0xE5, -/*1C60*/0x35, 0xB4, 0x04, 0x0C, 0x12, 0x07, 0xE4, 0xE0, - 0xF5, 0x4A, 0x12, 0x07, 0xF0, 0xE0, 0xF5, 0x4C, -/*1C70*/0xE5, 0x35, 0xB4, 0x01, 0x04, 0x7F, 0x01, 0x80, - 0x02, 0x7F, 0x00, 0xE5, 0x35, 0xB4, 0x02, 0x04, -/*1C80*/0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEE, 0x4F, - 0x60, 0x0C, 0x12, 0x07, 0xFC, 0xE0, 0xF5, 0x4A, -/*1C90*/0x12, 0x08, 0x08, 0xE0, 0xF5, 0x4C, 0x85, 0x41, - 0x49, 0x85, 0x40, 0x4B, 0x22, 0x75, 0x5B, 0x01, -/*1CA0*/0x90, 0x07, 0x24, 0x12, 0x07, 0x35, 0xE0, 0x54, - 0x1F, 0xFF, 0xD3, 0x94, 0x02, 0x50, 0x04, 0x8F, -/*1CB0*/0x58, 0x80, 0x05, 0xEF, 0x24, 0xFE, 0xF5, 0x58, - 0xEF, 0xC3, 0x94, 0x18, 0x40, 0x05, 0x75, 0x59, -/*1CC0*/0x18, 0x80, 0x04, 0xEF, 0x04, 0xF5, 0x59, 0x85, - 0x43, 0x5A, 0xAF, 0x58, 0x7E, 0x00, 0xAD, 0x59, -/*1CD0*/0x7C, 0x00, 0xAB, 0x5B, 0x7A, 0x00, 0x12, 0x15, - 0x41, 0xAF, 0x5A, 0x7E, 0x00, 0x12, 0x18, 0x0A, -/*1CE0*/0xAF, 0x5B, 0x7E, 0x00, 0x02, 0x1A, 0xFF, 0xE5, - 0xE2, 0x30, 0xE7, 0x0E, 0x12, 0x10, 0x03, 0xC2, -/*1CF0*/0x30, 0x30, 0x30, 0x03, 0x12, 0x10, 0xFF, 0x20, - 0x33, 0x28, 0xE5, 0xE7, 0x30, 0xE7, 0x05, 0x12, -/*1D00*/0x0E, 0xA2, 0x80, 0x0D, 0xE5, 0xFE, 0xC3, 0x94, - 0x20, 0x50, 0x06, 0x12, 0x0E, 0xA2, 0x43, 0xF9, -/*1D10*/0x08, 0xE5, 0xF2, 0x30, 0xE7, 0x03, 0x53, 0xF9, - 0x7F, 0xE5, 0xF1, 0x54, 0x70, 0xD3, 0x94, 0x00, -/*1D20*/0x50, 0xD8, 0x22, 0x12, 0x0E, 0x04, 0x75, 0x83, - 0x80, 0xE4, 0xF0, 0xE5, 0x08, 0x44, 0x07, 0x12, -/*1D30*/0x0D, 0xFD, 0x75, 0x83, 0x84, 0x12, 0x0E, 0x02, - 0x75, 0x83, 0x86, 0x12, 0x0E, 0x02, 0x75, 0x83, -/*1D40*/0x8C, 0xE0, 0x54, 0xF3, 0x12, 0x0E, 0x03, 0x75, - 0x83, 0x8E, 0x12, 0x0E, 0x02, 0x75, 0x83, 0x94, -/*1D50*/0xE0, 0x54, 0xFB, 0xF0, 0x22, 0x12, 0x07, 0x2A, - 0x75, 0x83, 0x8E, 0xE4, 0x12, 0x07, 0x29, 0x74, -/*1D60*/0x01, 0x12, 0x07, 0x29, 0xE4, 0x12, 0x08, 0xBE, - 0x75, 0x83, 0x8C, 0xE0, 0x44, 0x20, 0x12, 0x08, -/*1D70*/0xBE, 0xE0, 0x54, 0xDF, 0xF0, 0x74, 0x84, 0x85, - 0x08, 0x82, 0xF5, 0x83, 0xE0, 0x54, 0x7F, 0xF0, -/*1D80*/0xE0, 0x44, 0x80, 0xF0, 0x22, 0x75, 0x56, 0x01, - 0xE4, 0xFD, 0xF5, 0x57, 0xAF, 0x35, 0xFE, 0xFC, -/*1D90*/0x12, 0x09, 0x15, 0x12, 0x1C, 0x9D, 0x12, 0x1E, - 0x7A, 0x12, 0x1C, 0x4C, 0xAF, 0x57, 0x7E, 0x00, -/*1DA0*/0xAD, 0x56, 0x7C, 0x00, 0x12, 0x04, 0x44, 0xAF, - 0x56, 0x7E, 0x00, 0x02, 0x11, 0xEE, 0x75, 0x56, -/*1DB0*/0x01, 0xE4, 0xFD, 0xF5, 0x57, 0xAF, 0x35, 0xFE, - 0xFC, 0x12, 0x09, 0x15, 0x12, 0x1C, 0x9D, 0x12, -/*1DC0*/0x1E, 0x7A, 0x12, 0x1C, 0x4C, 0xAF, 0x57, 0x7E, - 0x00, 0xAD, 0x56, 0x7C, 0x00, 0x12, 0x04, 0x44, -/*1DD0*/0xAF, 0x56, 0x7E, 0x00, 0x02, 0x11, 0xEE, 0xE4, - 0xF5, 0x16, 0x12, 0x0E, 0x44, 0xFE, 0xE5, 0x08, -/*1DE0*/0x44, 0x05, 0xFF, 0x12, 0x0E, 0x65, 0x8F, 0x82, - 0x8E, 0x83, 0xF0, 0x05, 0x16, 0xE5, 0x16, 0xC3, -/*1DF0*/0x94, 0x14, 0x40, 0xE6, 0xE5, 0x08, 0x12, 0x0E, - 0x2B, 0xE4, 0xF0, 0x22, 0xE4, 0xF5, 0x58, 0xF5, -/*1E00*/0x59, 0xF5, 0x5A, 0xFF, 0xFE, 0xAD, 0x58, 0xFC, - 0x12, 0x09, 0x15, 0x7F, 0x04, 0x7E, 0x00, 0xAD, -/*1E10*/0x58, 0x7C, 0x00, 0x12, 0x09, 0x15, 0x7F, 0x02, - 0x7E, 0x00, 0xAD, 0x58, 0x7C, 0x00, 0x02, 0x09, -/*1E20*/0x15, 0xE5, 0x3C, 0x25, 0x3E, 0xFC, 0xE5, 0x42, - 0x24, 0x00, 0xFB, 0xE4, 0x33, 0xFA, 0xEC, 0xC3, -/*1E30*/0x9B, 0xEA, 0x12, 0x07, 0x6B, 0x40, 0x0B, 0x8C, - 0x42, 0xE5, 0x3D, 0x25, 0x3F, 0xF5, 0x41, 0x8F, -/*1E40*/0x40, 0x22, 0x12, 0x09, 0x0B, 0x22, 0x74, 0x84, - 0xF5, 0x18, 0x85, 0x08, 0x19, 0x85, 0x19, 0x82, -/*1E50*/0x85, 0x18, 0x83, 0xE0, 0x54, 0x7F, 0xF0, 0xE0, - 0x44, 0x80, 0xF0, 0xE0, 0x44, 0x80, 0xF0, 0x22, -/*1E60*/0xEF, 0x4E, 0x70, 0x0B, 0x12, 0x07, 0x2A, 0x75, - 0x83, 0xD2, 0xE0, 0x54, 0xDF, 0xF0, 0x22, 0x12, -/*1E70*/0x07, 0x2A, 0x75, 0x83, 0xD2, 0xE0, 0x44, 0x20, - 0xF0, 0x22, 0x75, 0x58, 0x01, 0x90, 0x07, 0x26, -/*1E80*/0x12, 0x07, 0x35, 0xE0, 0x54, 0x3F, 0xF5, 0x41, - 0x12, 0x07, 0x32, 0xE0, 0x54, 0x3F, 0xF5, 0x40, -/*1E90*/0x22, 0x75, 0x56, 0x02, 0xE4, 0xF5, 0x57, 0x12, - 0x1D, 0xFC, 0xAF, 0x57, 0x7E, 0x00, 0xAD, 0x56, -/*1EA0*/0x7C, 0x00, 0x02, 0x04, 0x44, 0xE4, 0xF5, 0x42, - 0xF5, 0x41, 0xF5, 0x40, 0xF5, 0x38, 0xF5, 0x39, -/*1EB0*/0xF5, 0x3A, 0x22, 0xEF, 0x54, 0x07, 0xFF, 0xE5, - 0xF9, 0x54, 0xF8, 0x4F, 0xF5, 0xF9, 0x22, 0x7F, -/*1EC0*/0x01, 0xE4, 0xFE, 0x0F, 0x0E, 0xBE, 0xFF, 0xFB, - 0x22, 0x01, 0x20, 0x00, 0x01, 0x04, 0x20, 0x00, -/*1ED0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -/*1EE0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -/*1EF0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -/*1F00*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -/*1F10*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -/*1F20*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -/*1F30*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -/*1F40*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -/*1F50*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -/*1F60*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -/*1F70*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -/*1F80*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -/*1F90*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -/*1FA0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -/*1FB0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -/*1FC0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -/*1FD0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -/*1FE0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -/*1FF0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x20, 0x11, 0x00, 0x04, 0x20, 0x00, 0x81 -}; - -int ipath_sd7220_ib_load(struct ipath_devdata *dd) -{ - return ipath_sd7220_prog_ld(dd, IB_7220_SERDES, ipath_sd7220_ib_img, - sizeof(ipath_sd7220_ib_img), 0); -} - -int ipath_sd7220_ib_vfy(struct ipath_devdata *dd) -{ - return ipath_sd7220_prog_vfy(dd, IB_7220_SERDES, ipath_sd7220_ib_img, - sizeof(ipath_sd7220_ib_img), 0); -} diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c index eaba03273e4..17a517766ad 100644 --- a/drivers/infiniband/hw/ipath/ipath_sdma.c +++ b/drivers/infiniband/hw/ipath/ipath_sdma.c @@ -31,6 +31,7 @@ */ #include <linux/spinlock.h> +#include <linux/gfp.h> #include "ipath_kernel.h" #include "ipath_verbs.h" @@ -246,7 +247,7 @@ static void sdma_abort_task(unsigned long opaque) /* ipath_sdma_abort() is done, waiting for interrupt */ if (status == IPATH_SDMA_ABORT_DISARMED) { - if (jiffies < dd->ipath_sdma_abort_intr_timeout) + if (time_before(jiffies, dd->ipath_sdma_abort_intr_timeout)) goto resched_noprint; /* give up, intr got lost somewhere */ ipath_dbg("give up waiting for SDMADISABLED intr\n"); @@ -340,7 +341,7 @@ resched: * JAG - this is bad to just have default be a loop without * state change */ - if (jiffies > dd->ipath_sdma_abort_jiffies) { + if (time_after(jiffies, dd->ipath_sdma_abort_jiffies)) { ipath_dbg("looping with status 0x%08lx\n", dd->ipath_sdma_status); dd->ipath_sdma_abort_jiffies = jiffies + 5 * HZ; @@ -698,10 +699,8 @@ retry: addr = dma_map_single(&dd->pcidev->dev, tx->txreq.map_addr, tx->map_len, DMA_TO_DEVICE); - if (dma_mapping_error(addr)) { - ret = -EIO; - goto unlock; - } + if (dma_mapping_error(&dd->pcidev->dev, addr)) + goto ioerr; dwoffset = tx->map_len >> 2; make_sdma_desc(dd, sdmadesc, (u64) addr, dwoffset, 0); @@ -741,6 +740,8 @@ retry: dw = (len + 3) >> 2; addr = dma_map_single(&dd->pcidev->dev, sge->vaddr, dw << 2, DMA_TO_DEVICE); + if (dma_mapping_error(&dd->pcidev->dev, addr)) + goto unmap; make_sdma_desc(dd, sdmadesc, (u64) addr, dw, dwoffset); /* SDmaUseLargeBuf has to be set in every descriptor */ if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_USELARGEBUF) @@ -781,10 +782,10 @@ retry: descqp = &dd->ipath_sdma_descq[dd->ipath_sdma_descq_cnt].qw[0]; descqp -= 2; /* SDmaLastDesc */ - descqp[0] |= __constant_cpu_to_le64(1ULL << 11); + descqp[0] |= cpu_to_le64(1ULL << 11); if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_INTREQ) { /* SDmaIntReq */ - descqp[0] |= __constant_cpu_to_le64(1ULL << 15); + descqp[0] |= cpu_to_le64(1ULL << 15); } /* Commit writes to memory and advance the tail on the chip */ @@ -798,7 +799,18 @@ retry: list_add_tail(&tx->txreq.list, &dd->ipath_sdma_activelist); if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_VL15) vl15_watchdog_enq(dd); - + goto unlock; + +unmap: + while (tail != dd->ipath_sdma_descq_tail) { + if (!tail) + tail = dd->ipath_sdma_descq_cnt - 1; + else + tail--; + unmap_desc(dd, tail); + } +ioerr: + ret = -EIO; unlock: spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); fail: diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c index e3d80ca84c1..26271984b71 100644 --- a/drivers/infiniband/hw/ipath/ipath_srq.c +++ b/drivers/infiniband/hw/ipath/ipath_srq.c @@ -32,6 +32,7 @@ */ #include <linux/err.h> +#include <linux/slab.h> #include <linux/vmalloc.h> #include "ipath_verbs.h" @@ -106,6 +107,11 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd, u32 sz; struct ib_srq *ret; + if (srq_init_attr->srq_type != IB_SRQT_BASIC) { + ret = ERR_PTR(-ENOSYS); + goto done; + } + if (srq_init_attr->attr.max_wr == 0) { ret = ERR_PTR(-EINVAL); goto done; diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c index c8e3d65f0de..f63e143e329 100644 --- a/drivers/infiniband/hw/ipath/ipath_stats.c +++ b/drivers/infiniband/hw/ipath/ipath_stats.c @@ -112,6 +112,14 @@ u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg) dd->ipath_lastrpkts = val; } val64 = dd->ipath_rpkts; + } else if (creg == dd->ipath_cregs->cr_ibsymbolerrcnt) { + if (dd->ibdeltainprog) + val64 -= val64 - dd->ibsymsnap; + val64 -= dd->ibsymdelta; + } else if (creg == dd->ipath_cregs->cr_iblinkerrrecovcnt) { + if (dd->ibdeltainprog) + val64 -= val64 - dd->iblnkerrsnap; + val64 -= dd->iblnkerrdelta; } else val64 = (u64) val; diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c index a6c8efbdc0c..75558f33f1c 100644 --- a/drivers/infiniband/hw/ipath/ipath_sysfs.c +++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c @@ -32,6 +32,7 @@ */ #include <linux/ctype.h> +#include <linux/stat.h> #include "ipath_kernel.h" #include "ipath_verbs.h" @@ -557,6 +558,7 @@ static ssize_t store_reset(struct device *dev, dev_info(dev,"Unit %d is disabled, can't reset\n", dd->ipath_unit); ret = -EINVAL; + goto bail; } ret = ipath_reset_device(dd->ipath_unit); bail: @@ -1069,7 +1071,7 @@ static ssize_t show_tempsense(struct device *dev, return ret; } -struct attribute_group *ipath_driver_attr_groups[] = { +const struct attribute_group *ipath_driver_attr_groups[] = { &driver_attr_group, NULL, }; diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c index 82cc588b8bf..22e60998f1a 100644 --- a/drivers/infiniband/hw/ipath/ipath_uc.c +++ b/drivers/infiniband/hw/ipath/ipath_uc.c @@ -419,7 +419,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, /* Signal completion event if the solicited bit is set. */ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, (ohdr->bth[0] & - __constant_cpu_to_be32(1 << 23)) != 0); + cpu_to_be32(1 << 23)) != 0); break; case OP(RDMA_WRITE_FIRST): diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c index 36aa242c487..e8a2a915251 100644 --- a/drivers/infiniband/hw/ipath/ipath_ud.c +++ b/drivers/infiniband/hw/ipath/ipath_ud.c @@ -31,6 +31,7 @@ * SOFTWARE. */ +#include <linux/sched.h> #include <rdma/ib_smi.h> #include "ipath_verbs.h" @@ -70,8 +71,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) goto done; } - rsge.sg_list = NULL; - /* * Check that the qkey matches (except for QP0, see 9.6.1.4.1). * Qkeys with the high order bit set mean use the @@ -87,7 +86,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) } /* - * A GRH is expected to preceed the data even if not + * A GRH is expected to precede the data even if not * present on the wire. */ length = swqe->length; @@ -115,21 +114,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) rq = &qp->r_rq; } - if (rq->max_sge > 1) { - /* - * XXX We could use GFP_KERNEL if ipath_do_send() - * was always called from the tasklet instead of - * from ipath_post_send(). - */ - rsge.sg_list = kmalloc((rq->max_sge - 1) * - sizeof(struct ipath_sge), - GFP_ATOMIC); - if (!rsge.sg_list) { - dev->n_pkt_drops++; - goto drop; - } - } - /* * Get the next work request entry to find where to put the data. * Note that it is safe to drop the lock after changing rq->tail @@ -147,6 +131,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) goto drop; } wqe = get_rwqe_ptr(rq, tail); + rsge.sg_list = qp->r_ud_sg_list; if (!ipath_init_sge(qp, wqe, &rlen, &rsge)) { spin_unlock_irqrestore(&rq->lock, flags); dev->n_pkt_drops++; @@ -242,7 +227,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe) ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, swqe->wr.send_flags & IB_SEND_SOLICITED); drop: - kfree(rsge.sg_list); if (atomic_dec_and_test(&qp->refcount)) wake_up(&qp->wait); done:; @@ -267,6 +251,7 @@ int ipath_make_ud_req(struct ipath_qp *qp) u16 lrh0; u16 lid; int ret = 0; + int next_cur; spin_lock_irqsave(&qp->s_lock, flags); @@ -290,8 +275,9 @@ int ipath_make_ud_req(struct ipath_qp *qp) goto bail; wqe = get_swqe_ptr(qp, qp->s_cur); - if (++qp->s_cur >= qp->s_size) - qp->s_cur = 0; + next_cur = qp->s_cur + 1; + if (next_cur >= qp->s_size) + next_cur = 0; /* Construct the header. */ ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr; @@ -315,6 +301,7 @@ int ipath_make_ud_req(struct ipath_qp *qp) qp->s_flags |= IPATH_S_WAIT_DMA; goto bail; } + qp->s_cur = next_cur; spin_unlock_irqrestore(&qp->s_lock, flags); ipath_ud_loopback(qp, wqe); spin_lock_irqsave(&qp->s_lock, flags); @@ -323,6 +310,7 @@ int ipath_make_ud_req(struct ipath_qp *qp) } } + qp->s_cur = next_cur; extra_bytes = -wqe->length & 3; nwords = (wqe->length + extra_bytes) >> 2; @@ -383,7 +371,7 @@ int ipath_make_ud_req(struct ipath_qp *qp) */ ohdr->bth[1] = ah_attr->dlid >= IPATH_MULTICAST_LID_BASE && ah_attr->dlid != IPATH_PERMISSIVE_LID ? - __constant_cpu_to_be32(IPATH_MULTICAST_QPN) : + cpu_to_be32(IPATH_MULTICAST_QPN) : cpu_to_be32(wqe->wr.wr.ud.remote_qpn); ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & IPATH_PSN_MASK); /* @@ -527,7 +515,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, } /* - * A GRH is expected to preceed the data even if not + * A GRH is expected to precede the data even if not * present on the wire. */ wc.byte_len = tlen + sizeof(struct ib_grh); @@ -586,7 +574,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, /* Signal completion event if the solicited bit is set. */ ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, (ohdr->bth[0] & - __constant_cpu_to_be32(1 << 23)) != 0); + cpu_to_be32(1 << 23)) != 0); bail:; } diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c index 0190edc8044..dc66c450691 100644 --- a/drivers/infiniband/hw/ipath/ipath_user_pages.c +++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c @@ -33,6 +33,8 @@ #include <linux/mm.h> #include <linux/device.h> +#include <linux/slab.h> +#include <linux/sched.h> #include "ipath_kernel.h" @@ -51,15 +53,14 @@ static void __ipath_release_user_pages(struct page **p, size_t num_pages, } /* call with current->mm->mmap_sem held */ -static int __get_user_pages(unsigned long start_page, size_t num_pages, - struct page **p, struct vm_area_struct **vma) +static int __ipath_get_user_pages(unsigned long start_page, size_t num_pages, + struct page **p, struct vm_area_struct **vma) { unsigned long lock_limit; size_t got; int ret; - lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> - PAGE_SHIFT; + lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; if (num_pages > lock_limit) { ret = -ENOMEM; @@ -78,7 +79,7 @@ static int __get_user_pages(unsigned long start_page, size_t num_pages, goto bail_release; } - current->mm->locked_vm += num_pages; + current->mm->pinned_vm += num_pages; ret = 0; goto bail; @@ -164,7 +165,7 @@ int ipath_get_user_pages(unsigned long start_page, size_t num_pages, down_write(¤t->mm->mmap_sem); - ret = __get_user_pages(start_page, num_pages, p, NULL); + ret = __ipath_get_user_pages(start_page, num_pages, p, NULL); up_write(¤t->mm->mmap_sem); @@ -177,7 +178,7 @@ void ipath_release_user_pages(struct page **p, size_t num_pages) __ipath_release_user_pages(p, num_pages, 1); - current->mm->locked_vm -= num_pages; + current->mm->pinned_vm -= num_pages; up_write(¤t->mm->mmap_sem); } @@ -194,7 +195,7 @@ static void user_pages_account(struct work_struct *_work) container_of(_work, struct ipath_user_pages_work, work); down_write(&work->mm->mmap_sem); - work->mm->locked_vm -= work->num_pages; + work->mm->pinned_vm -= work->num_pages; up_write(&work->mm->mmap_sem); mmput(work->mm); kfree(work); @@ -209,20 +210,20 @@ void ipath_release_user_pages_on_close(struct page **p, size_t num_pages) mm = get_task_mm(current); if (!mm) - goto bail; + return; work = kmalloc(sizeof(*work), GFP_KERNEL); if (!work) goto bail_mm; - goto bail; - INIT_WORK(&work->work, user_pages_account); work->mm = mm; work->num_pages = num_pages; + queue_work(ib_wq, &work->work); + return; + bail_mm: mmput(mm); -bail: return; } diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.c b/drivers/infiniband/hw/ipath/ipath_user_sdma.c index 86e016916cd..cc04b7ba348 100644 --- a/drivers/infiniband/hw/ipath/ipath_user_sdma.c +++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.c @@ -33,6 +33,7 @@ #include <linux/types.h> #include <linux/device.h> #include <linux/dmapool.h> +#include <linux/sched.h> #include <linux/slab.h> #include <linux/list.h> #include <linux/highmem.h> @@ -206,7 +207,7 @@ static int ipath_user_sdma_coalesce(const struct ipath_devdata *dd, dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len, DMA_TO_DEVICE); - if (dma_mapping_error(dma_addr)) { + if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { ret = -ENOMEM; goto free_unmap; } @@ -235,7 +236,7 @@ static int ipath_user_sdma_num_pages(const struct iovec *iov) return 1 + ((epage - spage) >> PAGE_SHIFT); } -/* truncate length to page boundry */ +/* truncate length to page boundary */ static int ipath_user_sdma_page_length(unsigned long addr, unsigned long len) { const unsigned long offset = addr & ~PAGE_MASK; @@ -279,9 +280,7 @@ static int ipath_user_sdma_pin_pages(const struct ipath_devdata *dd, int j; int ret; - ret = get_user_pages(current, current->mm, addr, - npages, 0, 1, pages, NULL); - + ret = get_user_pages_fast(addr, npages, 0, pages); if (ret != npages) { int i; @@ -301,7 +300,7 @@ static int ipath_user_sdma_pin_pages(const struct ipath_devdata *dd, pages[j], 0, flen, DMA_TO_DEVICE); unsigned long fofs = addr & ~PAGE_MASK; - if (dma_mapping_error(dma_addr)) { + if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { ret = -ENOMEM; goto done; } @@ -508,7 +507,7 @@ static int ipath_user_sdma_queue_pkts(const struct ipath_devdata *dd, if (page) { dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len, DMA_TO_DEVICE); - if (dma_mapping_error(dma_addr)) { + if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { ret = -ENOMEM; goto free_pbc; } @@ -667,13 +666,13 @@ static inline __le64 ipath_sdma_make_desc0(struct ipath_devdata *dd, static inline __le64 ipath_sdma_make_first_desc0(__le64 descq) { - return descq | __constant_cpu_to_le64(1ULL << 12); + return descq | cpu_to_le64(1ULL << 12); } static inline __le64 ipath_sdma_make_last_desc0(__le64 descq) { /* last */ /* dma head */ - return descq | __constant_cpu_to_le64(1ULL << 11 | 1ULL << 13); + return descq | cpu_to_le64(1ULL << 11 | 1ULL << 13); } static inline __le64 ipath_sdma_make_desc1(u64 addr) @@ -763,7 +762,7 @@ static int ipath_user_sdma_push_pkts(struct ipath_devdata *dd, if (ofs >= IPATH_SMALLBUF_DWORDS) { for (i = 0; i < pkt->naddr; i++) { dd->ipath_sdma_descq[dtail].qw[0] |= - __constant_cpu_to_le64(1ULL << 14); + cpu_to_le64(1ULL << 14); if (++dtail == dd->ipath_sdma_descq_cnt) dtail = 0; } @@ -810,10 +809,7 @@ int ipath_user_sdma_writev(struct ipath_devdata *dd, while (dim) { const int mxp = 8; - down_write(¤t->mm->mmap_sem); ret = ipath_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp); - up_write(¤t->mm->mmap_sem); - if (ret <= 0) goto done_unlock; else { diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 55c71882882..44ea9390417 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -34,6 +34,8 @@ #include <rdma/ib_mad.h> #include <rdma/ib_user_verbs.h> #include <linux/io.h> +#include <linux/slab.h> +#include <linux/module.h> #include <linux/utsname.h> #include <linux/rculist.h> @@ -340,9 +342,16 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr) int acc; int ret; unsigned long flags; + struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd; spin_lock_irqsave(&qp->s_lock, flags); + if (qp->ibqp.qp_type != IB_QPT_SMI && + !(dd->ipath_flags & IPATH_LINKACTIVE)) { + ret = -ENETDOWN; + goto bail; + } + /* Check that state is OK to post send. */ if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK))) goto bail_inval; @@ -611,7 +620,7 @@ void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data, goto bail; } - opcode = be32_to_cpu(ohdr->bth[0]) >> 24; + opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0x7f; dev->opstats[opcode].n_bytes += tlen; dev->opstats[opcode].n_packets++; @@ -1021,7 +1030,7 @@ static void sdma_complete(void *cookie, int status) struct ipath_verbs_txreq *tx = cookie; struct ipath_qp *qp = tx->qp; struct ipath_ibdev *dev = to_idev(qp->ibqp.device); - unsigned int flags; + unsigned long flags; enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ? IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR; @@ -1051,7 +1060,7 @@ static void sdma_complete(void *cookie, int status) static void decrement_dma_busy(struct ipath_qp *qp) { - unsigned int flags; + unsigned long flags; if (atomic_dec_and_test(&qp->s_dma_busy)) { spin_lock_irqsave(&qp->s_lock, flags); @@ -1221,7 +1230,7 @@ static int ipath_verbs_send_pio(struct ipath_qp *qp, unsigned flush_wc; u32 control; int ret; - unsigned int flags; + unsigned long flags; piobuf = ipath_getpiobuf(dd, plen, NULL); if (unlikely(piobuf == NULL)) { @@ -1578,7 +1587,7 @@ static int ipath_query_port(struct ib_device *ibdev, u64 ibcstat; memset(props, 0, sizeof(*props)); - props->lid = lid ? lid : __constant_be16_to_cpu(IB_LID_PERMISSIVE); + props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE); props->lmc = dd->ipath_lmc; props->sm_lid = dev->sm_lid; props->sm_sl = dev->sm_sl; @@ -1845,7 +1854,7 @@ unsigned ipath_get_npkeys(struct ipath_devdata *dd) } /** - * ipath_get_pkey - return the indexed PKEY from the port 0 PKEY table + * ipath_get_pkey - return the indexed PKEY from the port PKEY table * @dd: the infinipath device * @index: the PKEY index */ @@ -1853,6 +1862,7 @@ unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index) { unsigned ret; + /* always a kernel port, no locking needed */ if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys)) ret = 0; else @@ -2173,11 +2183,12 @@ int ipath_register_ib_device(struct ipath_devdata *dd) snprintf(dev->node_desc, sizeof(dev->node_desc), IPATH_IDSTR " %s", init_utsname()->nodename); - ret = ib_register_device(dev); + ret = ib_register_device(dev, NULL); if (ret) goto err_reg; - if (ipath_verbs_register_sysfs(dev)) + ret = ipath_verbs_register_sysfs(dev); + if (ret) goto err_class; enable_timer(dd); @@ -2317,15 +2328,15 @@ static int ipath_verbs_register_sysfs(struct ib_device *dev) int i; int ret; - for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i) - if (device_create_file(&dev->dev, - ipath_class_attributes[i])) { - ret = 1; + for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i) { + ret = device_create_file(&dev->dev, + ipath_class_attributes[i]); + if (ret) goto bail; - } - - ret = 0; - + } + return 0; bail: + for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i) + device_remove_file(&dev->dev, ipath_class_attributes[i]); return ret; } diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index 9d12ae8a778..ae6cff4abff 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h @@ -86,11 +86,11 @@ #define IB_PMA_SAMPLE_STATUS_RUNNING 0x02 /* Mandatory IB performance counter select values. */ -#define IB_PMA_PORT_XMIT_DATA __constant_htons(0x0001) -#define IB_PMA_PORT_RCV_DATA __constant_htons(0x0002) -#define IB_PMA_PORT_XMIT_PKTS __constant_htons(0x0003) -#define IB_PMA_PORT_RCV_PKTS __constant_htons(0x0004) -#define IB_PMA_PORT_XMIT_WAIT __constant_htons(0x0005) +#define IB_PMA_PORT_XMIT_DATA cpu_to_be16(0x0001) +#define IB_PMA_PORT_RCV_DATA cpu_to_be16(0x0002) +#define IB_PMA_PORT_XMIT_PKTS cpu_to_be16(0x0003) +#define IB_PMA_PORT_RCV_PKTS cpu_to_be16(0x0004) +#define IB_PMA_PORT_XMIT_WAIT cpu_to_be16(0x0005) struct ib_reth { __be64 vaddr; @@ -431,6 +431,7 @@ struct ipath_qp { u32 s_lsn; /* limit sequence number (credit) */ struct ipath_swqe *s_wq; /* send work queue */ struct ipath_swqe *s_wqe; + struct ipath_sge *r_ud_sg_list; struct ipath_rq r_rq; /* receive work queue */ struct ipath_sge r_sg_list[0]; /* verified SGEs */ }; diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c index d73e3223287..6216ea92385 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c @@ -32,6 +32,8 @@ */ #include <linux/rculist.h> +#include <linux/sched.h> +#include <linux/slab.h> #include "ipath_verbs.h" diff --git a/drivers/infiniband/hw/mlx4/Kconfig b/drivers/infiniband/hw/mlx4/Kconfig index 4175a4bd0c7..fc01deac1d3 100644 --- a/drivers/infiniband/hw/mlx4/Kconfig +++ b/drivers/infiniband/hw/mlx4/Kconfig @@ -1,5 +1,7 @@ config MLX4_INFINIBAND tristate "Mellanox ConnectX HCA support" + depends on NETDEVICES && ETHERNET && PCI && INET + select NET_VENDOR_MELLANOX select MLX4_CORE ---help--- This driver provides low-level InfiniBand support for diff --git a/drivers/infiniband/hw/mlx4/Makefile b/drivers/infiniband/hw/mlx4/Makefile index 70f09c7826d..f4213b3a8fe 100644 --- a/drivers/infiniband/hw/mlx4/Makefile +++ b/drivers/infiniband/hw/mlx4/Makefile @@ -1,3 +1,3 @@ obj-$(CONFIG_MLX4_INFINIBAND) += mlx4_ib.o -mlx4_ib-y := ah.o cq.o doorbell.o mad.o main.o mr.o qp.o srq.o +mlx4_ib-y := ah.o cq.o doorbell.o mad.o main.o mr.o qp.o srq.o mcg.o cm.o alias_GUID.o sysfs.o diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index c75ac9463e2..2d8c3397774 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -30,64 +30,141 @@ * SOFTWARE. */ +#include <rdma/ib_addr.h> +#include <rdma/ib_cache.h> + +#include <linux/slab.h> +#include <linux/inet.h> +#include <linux/string.h> + #include "mlx4_ib.h" -struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct mlx4_ib_ah *ah) { struct mlx4_dev *dev = to_mdev(pd->device)->dev; - struct mlx4_ib_ah *ah; - ah = kmalloc(sizeof *ah, GFP_ATOMIC); - if (!ah) - return ERR_PTR(-ENOMEM); - - memset(&ah->av, 0, sizeof ah->av); - - ah->av.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); - ah->av.g_slid = ah_attr->src_path_bits; - ah->av.dlid = cpu_to_be16(ah_attr->dlid); - if (ah_attr->static_rate) { - ah->av.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET; - while (ah->av.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET && - !(1 << ah->av.stat_rate & dev->caps.stat_rate_support)) - --ah->av.stat_rate; - } - ah->av.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); + ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); + ah->av.ib.g_slid = ah_attr->src_path_bits; if (ah_attr->ah_flags & IB_AH_GRH) { - ah->av.g_slid |= 0x80; - ah->av.gid_index = ah_attr->grh.sgid_index; - ah->av.hop_limit = ah_attr->grh.hop_limit; - ah->av.sl_tclass_flowlabel |= + ah->av.ib.g_slid |= 0x80; + ah->av.ib.gid_index = ah_attr->grh.sgid_index; + ah->av.ib.hop_limit = ah_attr->grh.hop_limit; + ah->av.ib.sl_tclass_flowlabel |= cpu_to_be32((ah_attr->grh.traffic_class << 20) | ah_attr->grh.flow_label); - memcpy(ah->av.dgid, ah_attr->grh.dgid.raw, 16); + memcpy(ah->av.ib.dgid, ah_attr->grh.dgid.raw, 16); } + ah->av.ib.dlid = cpu_to_be16(ah_attr->dlid); + if (ah_attr->static_rate) { + ah->av.ib.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET; + while (ah->av.ib.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET && + !(1 << ah->av.ib.stat_rate & dev->caps.stat_rate_support)) + --ah->av.ib.stat_rate; + } + ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); + return &ah->ibah; } +static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct mlx4_ib_ah *ah) +{ + struct mlx4_ib_dev *ibdev = to_mdev(pd->device); + struct mlx4_dev *dev = ibdev->dev; + int is_mcast = 0; + struct in6_addr in6; + u16 vlan_tag; + + memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6)); + if (rdma_is_multicast_addr(&in6)) { + is_mcast = 1; + rdma_get_mcast_mac(&in6, ah->av.eth.mac); + } else { + memcpy(ah->av.eth.mac, ah_attr->dmac, ETH_ALEN); + } + vlan_tag = ah_attr->vlan_id; + if (vlan_tag < 0x1000) + vlan_tag |= (ah_attr->sl & 7) << 13; + ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); + ah->av.eth.gid_index = ah_attr->grh.sgid_index; + ah->av.eth.vlan = cpu_to_be16(vlan_tag); + if (ah_attr->static_rate) { + ah->av.eth.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET; + while (ah->av.eth.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET && + !(1 << ah->av.eth.stat_rate & dev->caps.stat_rate_support)) + --ah->av.eth.stat_rate; + } + + /* + * HW requires multicast LID so we just choose one. + */ + if (is_mcast) + ah->av.ib.dlid = cpu_to_be16(0xc000); + + memcpy(ah->av.eth.dgid, ah_attr->grh.dgid.raw, 16); + ah->av.eth.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 29); + + return &ah->ibah; +} + +struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +{ + struct mlx4_ib_ah *ah; + struct ib_ah *ret; + + ah = kzalloc(sizeof *ah, GFP_ATOMIC); + if (!ah) + return ERR_PTR(-ENOMEM); + + if (rdma_port_get_link_layer(pd->device, ah_attr->port_num) == IB_LINK_LAYER_ETHERNET) { + if (!(ah_attr->ah_flags & IB_AH_GRH)) { + ret = ERR_PTR(-EINVAL); + } else { + /* + * TBD: need to handle the case when we get + * called in an atomic context and there we + * might sleep. We don't expect this + * currently since we're working with link + * local addresses which we can translate + * without going to sleep. + */ + ret = create_iboe_ah(pd, ah_attr, ah); + } + + if (IS_ERR(ret)) + kfree(ah); + + return ret; + } else + return create_ib_ah(pd, ah_attr, ah); /* never fails */ +} + int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) { struct mlx4_ib_ah *ah = to_mah(ibah); + enum rdma_link_layer ll; memset(ah_attr, 0, sizeof *ah_attr); - ah_attr->dlid = be16_to_cpu(ah->av.dlid); - ah_attr->sl = be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28; - ah_attr->port_num = be32_to_cpu(ah->av.port_pd) >> 24; - if (ah->av.stat_rate) - ah_attr->static_rate = ah->av.stat_rate - MLX4_STAT_RATE_OFFSET; - ah_attr->src_path_bits = ah->av.g_slid & 0x7F; + ah_attr->sl = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28; + ah_attr->port_num = be32_to_cpu(ah->av.ib.port_pd) >> 24; + ll = rdma_port_get_link_layer(ibah->device, ah_attr->port_num); + ah_attr->dlid = ll == IB_LINK_LAYER_INFINIBAND ? be16_to_cpu(ah->av.ib.dlid) : 0; + if (ah->av.ib.stat_rate) + ah_attr->static_rate = ah->av.ib.stat_rate - MLX4_STAT_RATE_OFFSET; + ah_attr->src_path_bits = ah->av.ib.g_slid & 0x7F; if (mlx4_ib_ah_grh_present(ah)) { ah_attr->ah_flags = IB_AH_GRH; ah_attr->grh.traffic_class = - be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20; + be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20; ah_attr->grh.flow_label = - be32_to_cpu(ah->av.sl_tclass_flowlabel) & 0xfffff; - ah_attr->grh.hop_limit = ah->av.hop_limit; - ah_attr->grh.sgid_index = ah->av.gid_index; - memcpy(ah_attr->grh.dgid.raw, ah->av.dgid, 16); + be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) & 0xfffff; + ah_attr->grh.hop_limit = ah->av.ib.hop_limit; + ah_attr->grh.sgid_index = ah->av.ib.gid_index; + memcpy(ah_attr->grh.dgid.raw, ah->av.ib.dgid, 16); } return 0; diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c new file mode 100644 index 00000000000..0eb141c4141 --- /dev/null +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -0,0 +1,688 @@ +/* + * Copyright (c) 2012 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + /***********************************************************/ +/*This file support the handling of the Alias GUID feature. */ +/***********************************************************/ +#include <rdma/ib_mad.h> +#include <rdma/ib_smi.h> +#include <rdma/ib_cache.h> +#include <rdma/ib_sa.h> +#include <rdma/ib_pack.h> +#include <linux/mlx4/cmd.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <rdma/ib_user_verbs.h> +#include <linux/delay.h> +#include "mlx4_ib.h" + +/* +The driver keeps the current state of all guids, as they are in the HW. +Whenever we receive an smp mad GUIDInfo record, the data will be cached. +*/ + +struct mlx4_alias_guid_work_context { + u8 port; + struct mlx4_ib_dev *dev ; + struct ib_sa_query *sa_query; + struct completion done; + int query_id; + struct list_head list; + int block_num; +}; + +struct mlx4_next_alias_guid_work { + u8 port; + u8 block_num; + struct mlx4_sriov_alias_guid_info_rec_det rec_det; +}; + + +void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, int block_num, + u8 port_num, u8 *p_data) +{ + int i; + u64 guid_indexes; + int slave_id; + int port_index = port_num - 1; + + if (!mlx4_is_master(dev->dev)) + return; + + guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid. + ports_guid[port_num - 1]. + all_rec_per_port[block_num].guid_indexes); + pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, guid_indexes); + + for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) { + /* The location of the specific index starts from bit number 4 + * until bit num 11 */ + if (test_bit(i + 4, (unsigned long *)&guid_indexes)) { + slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ; + if (slave_id >= dev->dev->num_slaves) { + pr_debug("The last slave: %d\n", slave_id); + return; + } + + /* cache the guid: */ + memcpy(&dev->sriov.demux[port_index].guid_cache[slave_id], + &p_data[i * GUID_REC_SIZE], + GUID_REC_SIZE); + } else + pr_debug("Guid number: %d in block: %d" + " was not updated\n", i, block_num); + } +} + +static __be64 get_cached_alias_guid(struct mlx4_ib_dev *dev, int port, int index) +{ + if (index >= NUM_ALIAS_GUID_PER_PORT) { + pr_err("%s: ERROR: asked for index:%d\n", __func__, index); + return (__force __be64) -1; + } + return *(__be64 *)&dev->sriov.demux[port - 1].guid_cache[index]; +} + + +ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index) +{ + return IB_SA_COMP_MASK(4 + index); +} + +/* + * Whenever new GUID is set/unset (guid table change) create event and + * notify the relevant slave (master also should be notified). + * If the GUID value is not as we have in the cache the slave will not be + * updated; in this case it waits for the smp_snoop or the port management + * event to call the function and to update the slave. + * block_number - the index of the block (16 blocks available) + * port_number - 1 or 2 + */ +void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev, + int block_num, u8 port_num, + u8 *p_data) +{ + int i; + u64 guid_indexes; + int slave_id; + enum slave_port_state new_state; + enum slave_port_state prev_state; + __be64 tmp_cur_ag, form_cache_ag; + enum slave_port_gen_event gen_event; + + if (!mlx4_is_master(dev->dev)) + return; + + guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid. + ports_guid[port_num - 1]. + all_rec_per_port[block_num].guid_indexes); + pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, guid_indexes); + + /*calculate the slaves and notify them*/ + for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) { + /* the location of the specific index runs from bits 4..11 */ + if (!(test_bit(i + 4, (unsigned long *)&guid_indexes))) + continue; + + slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ; + if (slave_id >= dev->dev->num_vfs + 1) + return; + tmp_cur_ag = *(__be64 *)&p_data[i * GUID_REC_SIZE]; + form_cache_ag = get_cached_alias_guid(dev, port_num, + (NUM_ALIAS_GUID_IN_REC * block_num) + i); + /* + * Check if guid is not the same as in the cache, + * If it is different, wait for the snoop_smp or the port mgmt + * change event to update the slave on its port state change + */ + if (tmp_cur_ag != form_cache_ag) + continue; + mlx4_gen_guid_change_eqe(dev->dev, slave_id, port_num); + + /*2 cases: Valid GUID, and Invalid Guid*/ + + if (tmp_cur_ag != MLX4_NOT_SET_GUID) { /*valid GUID*/ + prev_state = mlx4_get_slave_port_state(dev->dev, slave_id, port_num); + new_state = set_and_calc_slave_port_state(dev->dev, slave_id, port_num, + MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID, + &gen_event); + pr_debug("slave: %d, port: %d prev_port_state: %d," + " new_port_state: %d, gen_event: %d\n", + slave_id, port_num, prev_state, new_state, gen_event); + if (gen_event == SLAVE_PORT_GEN_EVENT_UP) { + pr_debug("sending PORT_UP event to slave: %d, port: %d\n", + slave_id, port_num); + mlx4_gen_port_state_change_eqe(dev->dev, slave_id, + port_num, MLX4_PORT_CHANGE_SUBTYPE_ACTIVE); + } + } else { /* request to invalidate GUID */ + set_and_calc_slave_port_state(dev->dev, slave_id, port_num, + MLX4_PORT_STATE_IB_EVENT_GID_INVALID, + &gen_event); + pr_debug("sending PORT DOWN event to slave: %d, port: %d\n", + slave_id, port_num); + mlx4_gen_port_state_change_eqe(dev->dev, slave_id, port_num, + MLX4_PORT_CHANGE_SUBTYPE_DOWN); + } + } +} + +static void aliasguid_query_handler(int status, + struct ib_sa_guidinfo_rec *guid_rec, + void *context) +{ + struct mlx4_ib_dev *dev; + struct mlx4_alias_guid_work_context *cb_ctx = context; + u8 port_index ; + int i; + struct mlx4_sriov_alias_guid_info_rec_det *rec; + unsigned long flags, flags1; + + if (!context) + return; + + dev = cb_ctx->dev; + port_index = cb_ctx->port - 1; + rec = &dev->sriov.alias_guid.ports_guid[port_index]. + all_rec_per_port[cb_ctx->block_num]; + + if (status) { + rec->status = MLX4_GUID_INFO_STATUS_IDLE; + pr_debug("(port: %d) failed: status = %d\n", + cb_ctx->port, status); + goto out; + } + + if (guid_rec->block_num != cb_ctx->block_num) { + pr_err("block num mismatch: %d != %d\n", + cb_ctx->block_num, guid_rec->block_num); + goto out; + } + + pr_debug("lid/port: %d/%d, block_num: %d\n", + be16_to_cpu(guid_rec->lid), cb_ctx->port, + guid_rec->block_num); + + rec = &dev->sriov.alias_guid.ports_guid[port_index]. + all_rec_per_port[guid_rec->block_num]; + + rec->status = MLX4_GUID_INFO_STATUS_SET; + rec->method = MLX4_GUID_INFO_RECORD_SET; + + for (i = 0 ; i < NUM_ALIAS_GUID_IN_REC; i++) { + __be64 tmp_cur_ag; + tmp_cur_ag = *(__be64 *)&guid_rec->guid_info_list[i * GUID_REC_SIZE]; + /* check if the SM didn't assign one of the records. + * if it didn't, if it was not sysadmin request: + * ask the SM to give a new GUID, (instead of the driver request). + */ + if (tmp_cur_ag == MLX4_NOT_SET_GUID) { + mlx4_ib_warn(&dev->ib_dev, "%s:Record num %d in " + "block_num: %d was declined by SM, " + "ownership by %d (0 = driver, 1=sysAdmin," + " 2=None)\n", __func__, i, + guid_rec->block_num, rec->ownership); + if (rec->ownership == MLX4_GUID_DRIVER_ASSIGN) { + /* if it is driver assign, asks for new GUID from SM*/ + *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] = + MLX4_NOT_SET_GUID; + + /* Mark the record as not assigned, and let it + * be sent again in the next work sched.*/ + rec->status = MLX4_GUID_INFO_STATUS_IDLE; + rec->guid_indexes |= mlx4_ib_get_aguid_comp_mask_from_ix(i); + } + } else { + /* properly assigned record. */ + /* We save the GUID we just got from the SM in the + * admin_guid in order to be persistent, and in the + * request from the sm the process will ask for the same GUID */ + if (rec->ownership == MLX4_GUID_SYSADMIN_ASSIGN && + tmp_cur_ag != *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE]) { + /* the sysadmin assignment failed.*/ + mlx4_ib_warn(&dev->ib_dev, "%s: Failed to set" + " admin guid after SysAdmin " + "configuration. " + "Record num %d in block_num:%d " + "was declined by SM, " + "new val(0x%llx) was kept\n", + __func__, i, + guid_rec->block_num, + be64_to_cpu(*(__be64 *) & + rec->all_recs[i * GUID_REC_SIZE])); + } else { + memcpy(&rec->all_recs[i * GUID_REC_SIZE], + &guid_rec->guid_info_list[i * GUID_REC_SIZE], + GUID_REC_SIZE); + } + } + } + /* + The func is call here to close the cases when the + sm doesn't send smp, so in the sa response the driver + notifies the slave. + */ + mlx4_ib_notify_slaves_on_guid_change(dev, guid_rec->block_num, + cb_ctx->port, + guid_rec->guid_info_list); +out: + spin_lock_irqsave(&dev->sriov.going_down_lock, flags); + spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); + if (!dev->sriov.is_going_down) + queue_delayed_work(dev->sriov.alias_guid.ports_guid[port_index].wq, + &dev->sriov.alias_guid.ports_guid[port_index]. + alias_guid_work, 0); + if (cb_ctx->sa_query) { + list_del(&cb_ctx->list); + kfree(cb_ctx); + } else + complete(&cb_ctx->done); + spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1); + spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); +} + +static void invalidate_guid_record(struct mlx4_ib_dev *dev, u8 port, int index) +{ + int i; + u64 cur_admin_val; + ib_sa_comp_mask comp_mask = 0; + + dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].status + = MLX4_GUID_INFO_STATUS_IDLE; + dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].method + = MLX4_GUID_INFO_RECORD_SET; + + /* calculate the comp_mask for that record.*/ + for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) { + cur_admin_val = + *(u64 *)&dev->sriov.alias_guid.ports_guid[port - 1]. + all_rec_per_port[index].all_recs[GUID_REC_SIZE * i]; + /* + check the admin value: if it's for delete (~00LL) or + it is the first guid of the first record (hw guid) or + the records is not in ownership of the sysadmin and the sm doesn't + need to assign GUIDs, then don't put it up for assignment. + */ + if (MLX4_GUID_FOR_DELETE_VAL == cur_admin_val || + (!index && !i) || + MLX4_GUID_NONE_ASSIGN == dev->sriov.alias_guid. + ports_guid[port - 1].all_rec_per_port[index].ownership) + continue; + comp_mask |= mlx4_ib_get_aguid_comp_mask_from_ix(i); + } + dev->sriov.alias_guid.ports_guid[port - 1]. + all_rec_per_port[index].guid_indexes = comp_mask; +} + +static int set_guid_rec(struct ib_device *ibdev, + u8 port, int index, + struct mlx4_sriov_alias_guid_info_rec_det *rec_det) +{ + int err; + struct mlx4_ib_dev *dev = to_mdev(ibdev); + struct ib_sa_guidinfo_rec guid_info_rec; + ib_sa_comp_mask comp_mask; + struct ib_port_attr attr; + struct mlx4_alias_guid_work_context *callback_context; + unsigned long resched_delay, flags, flags1; + struct list_head *head = + &dev->sriov.alias_guid.ports_guid[port - 1].cb_list; + + err = __mlx4_ib_query_port(ibdev, port, &attr, 1); + if (err) { + pr_debug("mlx4_ib_query_port failed (err: %d), port: %d\n", + err, port); + return err; + } + /*check the port was configured by the sm, otherwise no need to send */ + if (attr.state != IB_PORT_ACTIVE) { + pr_debug("port %d not active...rescheduling\n", port); + resched_delay = 5 * HZ; + err = -EAGAIN; + goto new_schedule; + } + + callback_context = kmalloc(sizeof *callback_context, GFP_KERNEL); + if (!callback_context) { + err = -ENOMEM; + resched_delay = HZ * 5; + goto new_schedule; + } + callback_context->port = port; + callback_context->dev = dev; + callback_context->block_num = index; + + memset(&guid_info_rec, 0, sizeof (struct ib_sa_guidinfo_rec)); + + guid_info_rec.lid = cpu_to_be16(attr.lid); + guid_info_rec.block_num = index; + + memcpy(guid_info_rec.guid_info_list, rec_det->all_recs, + GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC); + comp_mask = IB_SA_GUIDINFO_REC_LID | IB_SA_GUIDINFO_REC_BLOCK_NUM | + rec_det->guid_indexes; + + init_completion(&callback_context->done); + spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); + list_add_tail(&callback_context->list, head); + spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1); + + callback_context->query_id = + ib_sa_guid_info_rec_query(dev->sriov.alias_guid.sa_client, + ibdev, port, &guid_info_rec, + comp_mask, rec_det->method, 1000, + GFP_KERNEL, aliasguid_query_handler, + callback_context, + &callback_context->sa_query); + if (callback_context->query_id < 0) { + pr_debug("ib_sa_guid_info_rec_query failed, query_id: " + "%d. will reschedule to the next 1 sec.\n", + callback_context->query_id); + spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); + list_del(&callback_context->list); + kfree(callback_context); + spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1); + resched_delay = 1 * HZ; + err = -EAGAIN; + goto new_schedule; + } + err = 0; + goto out; + +new_schedule: + spin_lock_irqsave(&dev->sriov.going_down_lock, flags); + spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); + invalidate_guid_record(dev, port, index); + if (!dev->sriov.is_going_down) { + queue_delayed_work(dev->sriov.alias_guid.ports_guid[port - 1].wq, + &dev->sriov.alias_guid.ports_guid[port - 1].alias_guid_work, + resched_delay); + } + spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1); + spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); + +out: + return err; +} + +void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port) +{ + int i; + unsigned long flags, flags1; + + pr_debug("port %d\n", port); + + spin_lock_irqsave(&dev->sriov.going_down_lock, flags); + spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); + for (i = 0; i < NUM_ALIAS_GUID_REC_IN_PORT; i++) + invalidate_guid_record(dev, port, i); + + if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down) { + /* + make sure no work waits in the queue, if the work is already + queued(not on the timer) the cancel will fail. That is not a problem + because we just want the work started. + */ + cancel_delayed_work(&dev->sriov.alias_guid. + ports_guid[port - 1].alias_guid_work); + queue_delayed_work(dev->sriov.alias_guid.ports_guid[port - 1].wq, + &dev->sriov.alias_guid.ports_guid[port - 1].alias_guid_work, + 0); + } + spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1); + spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); +} + +/* The function returns the next record that was + * not configured (or failed to be configured) */ +static int get_next_record_to_update(struct mlx4_ib_dev *dev, u8 port, + struct mlx4_next_alias_guid_work *rec) +{ + int j; + unsigned long flags; + + for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) { + spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags); + if (dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status == + MLX4_GUID_INFO_STATUS_IDLE) { + memcpy(&rec->rec_det, + &dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j], + sizeof (struct mlx4_sriov_alias_guid_info_rec_det)); + rec->port = port; + rec->block_num = j; + dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status = + MLX4_GUID_INFO_STATUS_PENDING; + spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags); + return 0; + } + spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags); + } + return -ENOENT; +} + +static void set_administratively_guid_record(struct mlx4_ib_dev *dev, int port, + int rec_index, + struct mlx4_sriov_alias_guid_info_rec_det *rec_det) +{ + dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].guid_indexes = + rec_det->guid_indexes; + memcpy(dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].all_recs, + rec_det->all_recs, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE); + dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].status = + rec_det->status; +} + +static void set_all_slaves_guids(struct mlx4_ib_dev *dev, int port) +{ + int j; + struct mlx4_sriov_alias_guid_info_rec_det rec_det ; + + for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT ; j++) { + memset(rec_det.all_recs, 0, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE); + rec_det.guid_indexes = (!j ? 0 : IB_SA_GUIDINFO_REC_GID0) | + IB_SA_GUIDINFO_REC_GID1 | IB_SA_GUIDINFO_REC_GID2 | + IB_SA_GUIDINFO_REC_GID3 | IB_SA_GUIDINFO_REC_GID4 | + IB_SA_GUIDINFO_REC_GID5 | IB_SA_GUIDINFO_REC_GID6 | + IB_SA_GUIDINFO_REC_GID7; + rec_det.status = MLX4_GUID_INFO_STATUS_IDLE; + set_administratively_guid_record(dev, port, j, &rec_det); + } +} + +static void alias_guid_work(struct work_struct *work) +{ + struct delayed_work *delay = to_delayed_work(work); + int ret = 0; + struct mlx4_next_alias_guid_work *rec; + struct mlx4_sriov_alias_guid_port_rec_det *sriov_alias_port = + container_of(delay, struct mlx4_sriov_alias_guid_port_rec_det, + alias_guid_work); + struct mlx4_sriov_alias_guid *sriov_alias_guid = sriov_alias_port->parent; + struct mlx4_ib_sriov *ib_sriov = container_of(sriov_alias_guid, + struct mlx4_ib_sriov, + alias_guid); + struct mlx4_ib_dev *dev = container_of(ib_sriov, struct mlx4_ib_dev, sriov); + + rec = kzalloc(sizeof *rec, GFP_KERNEL); + if (!rec) { + pr_err("alias_guid_work: No Memory\n"); + return; + } + + pr_debug("starting [port: %d]...\n", sriov_alias_port->port + 1); + ret = get_next_record_to_update(dev, sriov_alias_port->port, rec); + if (ret) { + pr_debug("No more records to update.\n"); + goto out; + } + + set_guid_rec(&dev->ib_dev, rec->port + 1, rec->block_num, + &rec->rec_det); + +out: + kfree(rec); +} + + +void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port) +{ + unsigned long flags, flags1; + + if (!mlx4_is_master(dev->dev)) + return; + spin_lock_irqsave(&dev->sriov.going_down_lock, flags); + spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); + if (!dev->sriov.is_going_down) { + queue_delayed_work(dev->sriov.alias_guid.ports_guid[port].wq, + &dev->sriov.alias_guid.ports_guid[port].alias_guid_work, 0); + } + spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1); + spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); +} + +void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev) +{ + int i; + struct mlx4_ib_sriov *sriov = &dev->sriov; + struct mlx4_alias_guid_work_context *cb_ctx; + struct mlx4_sriov_alias_guid_port_rec_det *det; + struct ib_sa_query *sa_query; + unsigned long flags; + + for (i = 0 ; i < dev->num_ports; i++) { + cancel_delayed_work(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work); + det = &sriov->alias_guid.ports_guid[i]; + spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags); + while (!list_empty(&det->cb_list)) { + cb_ctx = list_entry(det->cb_list.next, + struct mlx4_alias_guid_work_context, + list); + sa_query = cb_ctx->sa_query; + cb_ctx->sa_query = NULL; + list_del(&cb_ctx->list); + spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags); + ib_sa_cancel_query(cb_ctx->query_id, sa_query); + wait_for_completion(&cb_ctx->done); + kfree(cb_ctx); + spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags); + } + spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags); + } + for (i = 0 ; i < dev->num_ports; i++) { + flush_workqueue(dev->sriov.alias_guid.ports_guid[i].wq); + destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq); + } + ib_sa_unregister_client(dev->sriov.alias_guid.sa_client); + kfree(dev->sriov.alias_guid.sa_client); +} + +int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev) +{ + char alias_wq_name[15]; + int ret = 0; + int i, j, k; + union ib_gid gid; + + if (!mlx4_is_master(dev->dev)) + return 0; + dev->sriov.alias_guid.sa_client = + kzalloc(sizeof *dev->sriov.alias_guid.sa_client, GFP_KERNEL); + if (!dev->sriov.alias_guid.sa_client) + return -ENOMEM; + + ib_sa_register_client(dev->sriov.alias_guid.sa_client); + + spin_lock_init(&dev->sriov.alias_guid.ag_work_lock); + + for (i = 1; i <= dev->num_ports; ++i) { + if (dev->ib_dev.query_gid(&dev->ib_dev , i, 0, &gid)) { + ret = -EFAULT; + goto err_unregister; + } + } + + for (i = 0 ; i < dev->num_ports; i++) { + memset(&dev->sriov.alias_guid.ports_guid[i], 0, + sizeof (struct mlx4_sriov_alias_guid_port_rec_det)); + /*Check if the SM doesn't need to assign the GUIDs*/ + for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) { + if (mlx4_ib_sm_guid_assign) { + dev->sriov.alias_guid.ports_guid[i]. + all_rec_per_port[j]. + ownership = MLX4_GUID_DRIVER_ASSIGN; + continue; + } + dev->sriov.alias_guid.ports_guid[i].all_rec_per_port[j]. + ownership = MLX4_GUID_NONE_ASSIGN; + /*mark each val as it was deleted, + till the sysAdmin will give it valid val*/ + for (k = 0; k < NUM_ALIAS_GUID_IN_REC; k++) { + *(__be64 *)&dev->sriov.alias_guid.ports_guid[i]. + all_rec_per_port[j].all_recs[GUID_REC_SIZE * k] = + cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL); + } + } + INIT_LIST_HEAD(&dev->sriov.alias_guid.ports_guid[i].cb_list); + /*prepare the records, set them to be allocated by sm*/ + for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) + invalidate_guid_record(dev, i + 1, j); + + dev->sriov.alias_guid.ports_guid[i].parent = &dev->sriov.alias_guid; + dev->sriov.alias_guid.ports_guid[i].port = i; + if (mlx4_ib_sm_guid_assign) + set_all_slaves_guids(dev, i); + + snprintf(alias_wq_name, sizeof alias_wq_name, "alias_guid%d", i); + dev->sriov.alias_guid.ports_guid[i].wq = + create_singlethread_workqueue(alias_wq_name); + if (!dev->sriov.alias_guid.ports_guid[i].wq) { + ret = -ENOMEM; + goto err_thread; + } + INIT_DELAYED_WORK(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work, + alias_guid_work); + } + return 0; + +err_thread: + for (--i; i >= 0; i--) { + destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq); + dev->sriov.alias_guid.ports_guid[i].wq = NULL; + } + +err_unregister: + ib_sa_unregister_client(dev->sriov.alias_guid.sa_client); + kfree(dev->sriov.alias_guid.sa_client); + dev->sriov.alias_guid.sa_client = NULL; + pr_err("init_alias_guid_service: Failed. (ret:%d)\n", ret); + return ret; +} diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c new file mode 100644 index 00000000000..56a593e0ae5 --- /dev/null +++ b/drivers/infiniband/hw/mlx4/cm.c @@ -0,0 +1,478 @@ +/* + * Copyright (c) 2012 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <rdma/ib_mad.h> + +#include <linux/mlx4/cmd.h> +#include <linux/rbtree.h> +#include <linux/idr.h> +#include <rdma/ib_cm.h> + +#include "mlx4_ib.h" + +#define CM_CLEANUP_CACHE_TIMEOUT (5 * HZ) + +struct id_map_entry { + struct rb_node node; + + u32 sl_cm_id; + u32 pv_cm_id; + int slave_id; + int scheduled_delete; + struct mlx4_ib_dev *dev; + + struct list_head list; + struct delayed_work timeout; +}; + +struct cm_generic_msg { + struct ib_mad_hdr hdr; + + __be32 local_comm_id; + __be32 remote_comm_id; +}; + +struct cm_sidr_generic_msg { + struct ib_mad_hdr hdr; + __be32 request_id; +}; + +struct cm_req_msg { + unsigned char unused[0x60]; + union ib_gid primary_path_sgid; +}; + + +static void set_local_comm_id(struct ib_mad *mad, u32 cm_id) +{ + if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) { + struct cm_sidr_generic_msg *msg = + (struct cm_sidr_generic_msg *)mad; + msg->request_id = cpu_to_be32(cm_id); + } else if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) { + pr_err("trying to set local_comm_id in SIDR_REP\n"); + return; + } else { + struct cm_generic_msg *msg = (struct cm_generic_msg *)mad; + msg->local_comm_id = cpu_to_be32(cm_id); + } +} + +static u32 get_local_comm_id(struct ib_mad *mad) +{ + if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) { + struct cm_sidr_generic_msg *msg = + (struct cm_sidr_generic_msg *)mad; + return be32_to_cpu(msg->request_id); + } else if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) { + pr_err("trying to set local_comm_id in SIDR_REP\n"); + return -1; + } else { + struct cm_generic_msg *msg = (struct cm_generic_msg *)mad; + return be32_to_cpu(msg->local_comm_id); + } +} + +static void set_remote_comm_id(struct ib_mad *mad, u32 cm_id) +{ + if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) { + struct cm_sidr_generic_msg *msg = + (struct cm_sidr_generic_msg *)mad; + msg->request_id = cpu_to_be32(cm_id); + } else if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) { + pr_err("trying to set remote_comm_id in SIDR_REQ\n"); + return; + } else { + struct cm_generic_msg *msg = (struct cm_generic_msg *)mad; + msg->remote_comm_id = cpu_to_be32(cm_id); + } +} + +static u32 get_remote_comm_id(struct ib_mad *mad) +{ + if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) { + struct cm_sidr_generic_msg *msg = + (struct cm_sidr_generic_msg *)mad; + return be32_to_cpu(msg->request_id); + } else if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) { + pr_err("trying to set remote_comm_id in SIDR_REQ\n"); + return -1; + } else { + struct cm_generic_msg *msg = (struct cm_generic_msg *)mad; + return be32_to_cpu(msg->remote_comm_id); + } +} + +static union ib_gid gid_from_req_msg(struct ib_device *ibdev, struct ib_mad *mad) +{ + struct cm_req_msg *msg = (struct cm_req_msg *)mad; + + return msg->primary_path_sgid; +} + +/* Lock should be taken before called */ +static struct id_map_entry * +id_map_find_by_sl_id(struct ib_device *ibdev, u32 slave_id, u32 sl_cm_id) +{ + struct rb_root *sl_id_map = &to_mdev(ibdev)->sriov.sl_id_map; + struct rb_node *node = sl_id_map->rb_node; + + while (node) { + struct id_map_entry *id_map_entry = + rb_entry(node, struct id_map_entry, node); + + if (id_map_entry->sl_cm_id > sl_cm_id) + node = node->rb_left; + else if (id_map_entry->sl_cm_id < sl_cm_id) + node = node->rb_right; + else if (id_map_entry->slave_id > slave_id) + node = node->rb_left; + else if (id_map_entry->slave_id < slave_id) + node = node->rb_right; + else + return id_map_entry; + } + return NULL; +} + +static void id_map_ent_timeout(struct work_struct *work) +{ + struct delayed_work *delay = to_delayed_work(work); + struct id_map_entry *ent = container_of(delay, struct id_map_entry, timeout); + struct id_map_entry *db_ent, *found_ent; + struct mlx4_ib_dev *dev = ent->dev; + struct mlx4_ib_sriov *sriov = &dev->sriov; + struct rb_root *sl_id_map = &sriov->sl_id_map; + int pv_id = (int) ent->pv_cm_id; + + spin_lock(&sriov->id_map_lock); + db_ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, pv_id); + if (!db_ent) + goto out; + found_ent = id_map_find_by_sl_id(&dev->ib_dev, ent->slave_id, ent->sl_cm_id); + if (found_ent && found_ent == ent) + rb_erase(&found_ent->node, sl_id_map); + idr_remove(&sriov->pv_id_table, pv_id); + +out: + list_del(&ent->list); + spin_unlock(&sriov->id_map_lock); + kfree(ent); +} + +static void id_map_find_del(struct ib_device *ibdev, int pv_cm_id) +{ + struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov; + struct rb_root *sl_id_map = &sriov->sl_id_map; + struct id_map_entry *ent, *found_ent; + + spin_lock(&sriov->id_map_lock); + ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, pv_cm_id); + if (!ent) + goto out; + found_ent = id_map_find_by_sl_id(ibdev, ent->slave_id, ent->sl_cm_id); + if (found_ent && found_ent == ent) + rb_erase(&found_ent->node, sl_id_map); + idr_remove(&sriov->pv_id_table, pv_cm_id); +out: + spin_unlock(&sriov->id_map_lock); +} + +static void sl_id_map_add(struct ib_device *ibdev, struct id_map_entry *new) +{ + struct rb_root *sl_id_map = &to_mdev(ibdev)->sriov.sl_id_map; + struct rb_node **link = &sl_id_map->rb_node, *parent = NULL; + struct id_map_entry *ent; + int slave_id = new->slave_id; + int sl_cm_id = new->sl_cm_id; + + ent = id_map_find_by_sl_id(ibdev, slave_id, sl_cm_id); + if (ent) { + pr_debug("overriding existing sl_id_map entry (cm_id = %x)\n", + sl_cm_id); + + rb_replace_node(&ent->node, &new->node, sl_id_map); + return; + } + + /* Go to the bottom of the tree */ + while (*link) { + parent = *link; + ent = rb_entry(parent, struct id_map_entry, node); + + if (ent->sl_cm_id > sl_cm_id || (ent->sl_cm_id == sl_cm_id && ent->slave_id > slave_id)) + link = &(*link)->rb_left; + else + link = &(*link)->rb_right; + } + + rb_link_node(&new->node, parent, link); + rb_insert_color(&new->node, sl_id_map); +} + +static struct id_map_entry * +id_map_alloc(struct ib_device *ibdev, int slave_id, u32 sl_cm_id) +{ + int ret; + struct id_map_entry *ent; + struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov; + + ent = kmalloc(sizeof (struct id_map_entry), GFP_KERNEL); + if (!ent) { + mlx4_ib_warn(ibdev, "Couldn't allocate id cache entry - out of memory\n"); + return ERR_PTR(-ENOMEM); + } + + ent->sl_cm_id = sl_cm_id; + ent->slave_id = slave_id; + ent->scheduled_delete = 0; + ent->dev = to_mdev(ibdev); + INIT_DELAYED_WORK(&ent->timeout, id_map_ent_timeout); + + idr_preload(GFP_KERNEL); + spin_lock(&to_mdev(ibdev)->sriov.id_map_lock); + + ret = idr_alloc_cyclic(&sriov->pv_id_table, ent, 0, 0, GFP_NOWAIT); + if (ret >= 0) { + ent->pv_cm_id = (u32)ret; + sl_id_map_add(ibdev, ent); + list_add_tail(&ent->list, &sriov->cm_list); + } + + spin_unlock(&sriov->id_map_lock); + idr_preload_end(); + + if (ret >= 0) + return ent; + + /*error flow*/ + kfree(ent); + mlx4_ib_warn(ibdev, "No more space in the idr (err:0x%x)\n", ret); + return ERR_PTR(-ENOMEM); +} + +static struct id_map_entry * +id_map_get(struct ib_device *ibdev, int *pv_cm_id, int sl_cm_id, int slave_id) +{ + struct id_map_entry *ent; + struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov; + + spin_lock(&sriov->id_map_lock); + if (*pv_cm_id == -1) { + ent = id_map_find_by_sl_id(ibdev, sl_cm_id, slave_id); + if (ent) + *pv_cm_id = (int) ent->pv_cm_id; + } else + ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, *pv_cm_id); + spin_unlock(&sriov->id_map_lock); + + return ent; +} + +static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id) +{ + struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov; + unsigned long flags; + + spin_lock(&sriov->id_map_lock); + spin_lock_irqsave(&sriov->going_down_lock, flags); + /*make sure that there is no schedule inside the scheduled work.*/ + if (!sriov->is_going_down) { + id->scheduled_delete = 1; + schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT); + } + spin_unlock_irqrestore(&sriov->going_down_lock, flags); + spin_unlock(&sriov->id_map_lock); +} + +int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id, + struct ib_mad *mad) +{ + struct id_map_entry *id; + u32 sl_cm_id; + int pv_cm_id = -1; + + if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID || + mad->mad_hdr.attr_id == CM_REP_ATTR_ID || + mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) { + sl_cm_id = get_local_comm_id(mad); + id = id_map_alloc(ibdev, slave_id, sl_cm_id); + if (IS_ERR(id)) { + mlx4_ib_warn(ibdev, "%s: id{slave: %d, sl_cm_id: 0x%x} Failed to id_map_alloc\n", + __func__, slave_id, sl_cm_id); + return PTR_ERR(id); + } + } else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID || + mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) { + return 0; + } else { + sl_cm_id = get_local_comm_id(mad); + id = id_map_get(ibdev, &pv_cm_id, slave_id, sl_cm_id); + } + + if (!id) { + pr_debug("id{slave: %d, sl_cm_id: 0x%x} is NULL!\n", + slave_id, sl_cm_id); + return -EINVAL; + } + + set_local_comm_id(mad, id->pv_cm_id); + + if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID) + schedule_delayed(ibdev, id); + else if (mad->mad_hdr.attr_id == CM_DREP_ATTR_ID) + id_map_find_del(ibdev, pv_cm_id); + + return 0; +} + +int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, + struct ib_mad *mad) +{ + u32 pv_cm_id; + struct id_map_entry *id; + + if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID || + mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) { + union ib_gid gid; + + if (!slave) + return 0; + + gid = gid_from_req_msg(ibdev, mad); + *slave = mlx4_ib_find_real_gid(ibdev, port, gid.global.interface_id); + if (*slave < 0) { + mlx4_ib_warn(ibdev, "failed matching slave_id by gid (0x%llx)\n", + gid.global.interface_id); + return -ENOENT; + } + return 0; + } + + pv_cm_id = get_remote_comm_id(mad); + id = id_map_get(ibdev, (int *)&pv_cm_id, -1, -1); + + if (!id) { + pr_debug("Couldn't find an entry for pv_cm_id 0x%x\n", pv_cm_id); + return -ENOENT; + } + + if (slave) + *slave = id->slave_id; + set_remote_comm_id(mad, id->sl_cm_id); + + if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID) + schedule_delayed(ibdev, id); + else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID || + mad->mad_hdr.attr_id == CM_DREP_ATTR_ID) { + id_map_find_del(ibdev, (int) pv_cm_id); + } + + return 0; +} + +void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev) +{ + spin_lock_init(&dev->sriov.id_map_lock); + INIT_LIST_HEAD(&dev->sriov.cm_list); + dev->sriov.sl_id_map = RB_ROOT; + idr_init(&dev->sriov.pv_id_table); +} + +/* slave = -1 ==> all slaves */ +/* TBD -- call paravirt clean for single slave. Need for slave RESET event */ +void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave) +{ + struct mlx4_ib_sriov *sriov = &dev->sriov; + struct rb_root *sl_id_map = &sriov->sl_id_map; + struct list_head lh; + struct rb_node *nd; + int need_flush = 1; + struct id_map_entry *map, *tmp_map; + /* cancel all delayed work queue entries */ + INIT_LIST_HEAD(&lh); + spin_lock(&sriov->id_map_lock); + list_for_each_entry_safe(map, tmp_map, &dev->sriov.cm_list, list) { + if (slave < 0 || slave == map->slave_id) { + if (map->scheduled_delete) + need_flush &= !!cancel_delayed_work(&map->timeout); + } + } + + spin_unlock(&sriov->id_map_lock); + + if (!need_flush) + flush_scheduled_work(); /* make sure all timers were flushed */ + + /* now, remove all leftover entries from databases*/ + spin_lock(&sriov->id_map_lock); + if (slave < 0) { + while (rb_first(sl_id_map)) { + struct id_map_entry *ent = + rb_entry(rb_first(sl_id_map), + struct id_map_entry, node); + + rb_erase(&ent->node, sl_id_map); + idr_remove(&sriov->pv_id_table, (int) ent->pv_cm_id); + } + list_splice_init(&dev->sriov.cm_list, &lh); + } else { + /* first, move nodes belonging to slave to db remove list */ + nd = rb_first(sl_id_map); + while (nd) { + struct id_map_entry *ent = + rb_entry(nd, struct id_map_entry, node); + nd = rb_next(nd); + if (ent->slave_id == slave) + list_move_tail(&ent->list, &lh); + } + /* remove those nodes from databases */ + list_for_each_entry_safe(map, tmp_map, &lh, list) { + rb_erase(&map->node, sl_id_map); + idr_remove(&sriov->pv_id_table, (int) map->pv_cm_id); + } + + /* add remaining nodes from cm_list */ + list_for_each_entry_safe(map, tmp_map, &dev->sriov.cm_list, list) { + if (slave == map->slave_id) + list_move_tail(&map->list, &lh); + } + } + + spin_unlock(&sriov->id_map_lock); + + /* free any map entries left behind due to cancel_delayed_work above */ + list_for_each_entry_safe(map, tmp_map, &lh, list) { + list_del(&map->list); + kfree(map); + } +} diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 299f20832ab..1066eec854a 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -32,6 +33,8 @@ #include <linux/mlx4/cq.h> #include <linux/mlx4/qp.h> +#include <linux/mlx4/srq.h> +#include <linux/slab.h> #include "mlx4_ib.h" #include "user.h" @@ -48,7 +51,7 @@ static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type) struct ib_cq *ibcq; if (type != MLX4_EVENT_TYPE_CQ_ERROR) { - printk(KERN_WARNING "mlx4_ib: Unexpected event type %d " + pr_warn("Unexpected event type %d " "on CQ %06x\n", type, cq->cqn); return; } @@ -64,7 +67,7 @@ static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type) static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n) { - return mlx4_buf_offset(&buf->buf, n * sizeof (struct mlx4_cqe)); + return mlx4_buf_offset(&buf->buf, n * buf->entry_size); } static void *get_cqe(struct mlx4_ib_cq *cq, int n) @@ -75,8 +78,9 @@ static void *get_cqe(struct mlx4_ib_cq *cq, int n) static void *get_sw_cqe(struct mlx4_ib_cq *cq, int n) { struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe); + struct mlx4_cqe *tcqe = ((cq->buf.entry_size == 64) ? (cqe + 1) : cqe); - return (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^ + return (!!(tcqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe; } @@ -97,18 +101,19 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf * { int err; - err = mlx4_buf_alloc(dev->dev, nent * sizeof(struct mlx4_cqe), - PAGE_SIZE * 2, &buf->buf); + err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size, + PAGE_SIZE * 2, &buf->buf, GFP_KERNEL); if (err) goto out; + buf->entry_size = dev->dev->caps.cqe_size; err = mlx4_mtt_init(dev->dev, buf->buf.npages, buf->buf.page_shift, &buf->mtt); if (err) goto err_buf; - err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf); + err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf, GFP_KERNEL); if (err) goto err_mtt; @@ -118,8 +123,7 @@ err_mtt: mlx4_mtt_cleanup(dev->dev, &buf->mtt); err_buf: - mlx4_buf_free(dev->dev, nent * sizeof(struct mlx4_cqe), - &buf->buf); + mlx4_buf_free(dev->dev, nent * buf->entry_size, &buf->buf); out: return err; @@ -127,7 +131,7 @@ out: static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int cqe) { - mlx4_buf_free(dev->dev, (cqe + 1) * sizeof(struct mlx4_cqe), &buf->buf); + mlx4_buf_free(dev->dev, (cqe + 1) * buf->entry_size, &buf->buf); } static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *context, @@ -135,8 +139,9 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *cont u64 buf_addr, int cqe) { int err; + int cqe_size = dev->dev->caps.cqe_size; - *umem = ib_umem_get(context, buf_addr, cqe * sizeof (struct mlx4_cqe), + *umem = ib_umem_get(context, buf_addr, cqe * cqe_size, IB_ACCESS_LOCAL_WRITE, 1); if (IS_ERR(*umem)) return PTR_ERR(*umem); @@ -204,7 +209,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector uar = &to_mucontext(context)->uar; } else { - err = mlx4_db_alloc(dev->dev, &cq->db, 1); + err = mlx4_db_alloc(dev->dev, &cq->db, 1, GFP_KERNEL); if (err) goto err_cq; @@ -220,8 +225,11 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector uar = &dev->priv_uar; } + if (dev->eq_table) + vector = dev->eq_table[vector % ibdev->num_comp_vectors]; + err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar, - cq->db.dma, &cq->mcq, 0); + cq->db.dma, &cq->mcq, vector, 0, 0); if (err) goto err_dbmap; @@ -316,7 +324,7 @@ static int mlx4_ib_get_outstanding_cqes(struct mlx4_ib_cq *cq) u32 i; i = cq->mcq.cons_index; - while (get_sw_cqe(cq, i & cq->ibcq.cqe)) + while (get_sw_cqe(cq, i)) ++i; return i - cq->mcq.cons_index; @@ -324,16 +332,25 @@ static int mlx4_ib_get_outstanding_cqes(struct mlx4_ib_cq *cq) static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq) { - struct mlx4_cqe *cqe; + struct mlx4_cqe *cqe, *new_cqe; int i; + int cqe_size = cq->buf.entry_size; + int cqe_inc = cqe_size == 64 ? 1 : 0; i = cq->mcq.cons_index; cqe = get_cqe(cq, i & cq->ibcq.cqe); + cqe += cqe_inc; + while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) { - memcpy(get_cqe_from_buf(&cq->resize_buf->buf, - (i + 1) & cq->resize_buf->cqe), - get_cqe(cq, i & cq->ibcq.cqe), sizeof(struct mlx4_cqe)); + new_cqe = get_cqe_from_buf(&cq->resize_buf->buf, + (i + 1) & cq->resize_buf->cqe); + memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), cqe_size); + new_cqe += cqe_inc; + + new_cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) | + (((i + 1) & (cq->resize_buf->cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0); cqe = get_cqe(cq, ++i & cq->ibcq.cqe); + cqe += cqe_inc; } ++cq->mcq.cons_index; } @@ -342,12 +359,13 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(ibcq->device); struct mlx4_ib_cq *cq = to_mcq(ibcq); + struct mlx4_mtt mtt; int outst_cqe; int err; mutex_lock(&cq->resize_mutex); - if (entries < 1 || entries > dev->dev->caps.max_cqes) { + if (entries < 1) { err = -EINVAL; goto out; } @@ -358,12 +376,17 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) goto out; } + if (entries > dev->dev->caps.max_cqes) { + err = -EINVAL; + goto out; + } + if (ibcq->uobject) { err = mlx4_alloc_resize_umem(dev, cq, entries, udata); if (err) goto out; } else { - /* Can't be smaller then the number of outstanding CQEs */ + /* Can't be smaller than the number of outstanding CQEs */ outst_cqe = mlx4_ib_get_outstanding_cqes(cq); if (entries < outst_cqe + 1) { err = 0; @@ -375,10 +398,13 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) goto out; } + mtt = cq->buf.mtt; + err = mlx4_cq_resize(dev->dev, &cq->mcq, entries, &cq->resize_buf->buf.mtt); if (err) goto err_buf; + mlx4_mtt_cleanup(dev->dev, &mtt); if (ibcq->uobject) { cq->buf = cq->resize_buf->buf; cq->ibcq.cqe = cq->resize_buf->cqe; @@ -389,10 +415,14 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) cq->resize_buf = NULL; cq->resize_umem = NULL; } else { + struct mlx4_ib_cq_buf tmp_buf; + int tmp_cqe = 0; + spin_lock_irq(&cq->lock); if (cq->resize_buf) { mlx4_ib_cq_resize_copy_cqes(cq); - mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe); + tmp_buf = cq->buf; + tmp_cqe = cq->ibcq.cqe; cq->buf = cq->resize_buf->buf; cq->ibcq.cqe = cq->resize_buf->cqe; @@ -400,11 +430,15 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) cq->resize_buf = NULL; } spin_unlock_irq(&cq->lock); + + if (tmp_cqe) + mlx4_ib_free_cq_buf(dev, &tmp_buf, tmp_cqe); } goto out; err_buf: + mlx4_mtt_cleanup(dev->dev, &cq->resize_buf->buf.mtt); if (!ibcq->uobject) mlx4_ib_free_cq_buf(dev, &cq->resize_buf->buf, cq->resize_buf->cqe); @@ -419,6 +453,7 @@ err_buf: out: mutex_unlock(&cq->resize_mutex); + return err; } @@ -447,7 +482,7 @@ static void dump_cqe(void *cqe) { __be32 *buf = cqe; - printk(KERN_DEBUG "CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n", + pr_debug("CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n", be32_to_cpu(buf[0]), be32_to_cpu(buf[1]), be32_to_cpu(buf[2]), be32_to_cpu(buf[3]), be32_to_cpu(buf[4]), be32_to_cpu(buf[5]), be32_to_cpu(buf[6]), be32_to_cpu(buf[7])); @@ -457,7 +492,7 @@ static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe *cqe, struct ib_wc *wc) { if (cqe->syndrome == MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR) { - printk(KERN_DEBUG "local QP operation err " + pr_debug("local QP operation err " "(QPN %06x, WQE index %x, vendor syndrome %02x, " "opcode = %02x)\n", be32_to_cpu(cqe->my_qpn), be16_to_cpu(cqe->wqe_index), @@ -514,20 +549,48 @@ static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe *cqe, wc->vendor_err = cqe->vendor_err_syndrome; } -static int mlx4_ib_ipoib_csum_ok(__be32 status, __be16 checksum) +static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum) { - return ((status & cpu_to_be32(MLX4_CQE_IPOIB_STATUS_IPV4 | - MLX4_CQE_IPOIB_STATUS_IPV4F | - MLX4_CQE_IPOIB_STATUS_IPV4OPT | - MLX4_CQE_IPOIB_STATUS_IPV6 | - MLX4_CQE_IPOIB_STATUS_IPOK)) == - cpu_to_be32(MLX4_CQE_IPOIB_STATUS_IPV4 | - MLX4_CQE_IPOIB_STATUS_IPOK)) && - (status & cpu_to_be32(MLX4_CQE_IPOIB_STATUS_UDP | - MLX4_CQE_IPOIB_STATUS_TCP)) && + return ((status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 | + MLX4_CQE_STATUS_IPV4F | + MLX4_CQE_STATUS_IPV4OPT | + MLX4_CQE_STATUS_IPV6 | + MLX4_CQE_STATUS_IPOK)) == + cpu_to_be16(MLX4_CQE_STATUS_IPV4 | + MLX4_CQE_STATUS_IPOK)) && + (status & cpu_to_be16(MLX4_CQE_STATUS_UDP | + MLX4_CQE_STATUS_TCP)) && checksum == cpu_to_be16(0xffff); } +static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc, + unsigned tail, struct mlx4_cqe *cqe, int is_eth) +{ + struct mlx4_ib_proxy_sqp_hdr *hdr; + + ib_dma_sync_single_for_cpu(qp->ibqp.device, + qp->sqp_proxy_rcv[tail].map, + sizeof (struct mlx4_ib_proxy_sqp_hdr), + DMA_FROM_DEVICE); + hdr = (struct mlx4_ib_proxy_sqp_hdr *) (qp->sqp_proxy_rcv[tail].addr); + wc->pkey_index = be16_to_cpu(hdr->tun.pkey_index); + wc->src_qp = be32_to_cpu(hdr->tun.flags_src_qp) & 0xFFFFFF; + wc->wc_flags |= (hdr->tun.g_ml_path & 0x80) ? (IB_WC_GRH) : 0; + wc->dlid_path_bits = 0; + + if (is_eth) { + wc->vlan_id = be16_to_cpu(hdr->tun.sl_vid); + memcpy(&(wc->smac[0]), (char *)&hdr->tun.mac_31_0, 4); + memcpy(&(wc->smac[4]), (char *)&hdr->tun.slid_mac_47_32, 2); + wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC); + } else { + wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32); + wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12); + } + + return 0; +} + static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, struct mlx4_ib_qp **cur_qp, struct ib_wc *wc) @@ -536,16 +599,22 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, struct mlx4_qp *mqp; struct mlx4_ib_wq *wq; struct mlx4_ib_srq *srq; + struct mlx4_srq *msrq = NULL; int is_send; int is_error; + int is_eth; u32 g_mlpath_rqpn; u16 wqe_ctr; + unsigned tail = 0; repoll: cqe = next_cqe_sw(cq); if (!cqe) return -EAGAIN; + if (cq->buf.entry_size == 64) + cqe++; + ++cq->mcq.cons_index; /* @@ -560,7 +629,7 @@ repoll: if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_OPCODE_NOP && is_send)) { - printk(KERN_WARNING "Completion for NOP opcode detected!\n"); + pr_warn("Completion for NOP opcode detected!\n"); return -EINVAL; } @@ -581,17 +650,17 @@ repoll: } if (!*cur_qp || - (be32_to_cpu(cqe->my_qpn) & 0xffffff) != (*cur_qp)->mqp.qpn) { + (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) != (*cur_qp)->mqp.qpn) { /* * We do not have to take the QP table lock here, * because CQs will be locked while QPs are removed * from the table. */ mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev, - be32_to_cpu(cqe->my_qpn)); + be32_to_cpu(cqe->vlan_my_qpn)); if (unlikely(!mqp)) { - printk(KERN_WARNING "CQ %06x with entry for unknown QPN %06x\n", - cq->mcq.cqn, be32_to_cpu(cqe->my_qpn) & 0xffffff); + pr_warn("CQ %06x with entry for unknown QPN %06x\n", + cq->mcq.cqn, be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK); return -EINVAL; } @@ -600,6 +669,20 @@ repoll: wc->qp = &(*cur_qp)->ibqp; + if (wc->qp->qp_type == IB_QPT_XRC_TGT) { + u32 srq_num; + g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn); + srq_num = g_mlpath_rqpn & 0xffffff; + /* SRQ is also in the radix tree */ + msrq = mlx4_srq_lookup(to_mdev(cq->ibcq.device)->dev, + srq_num); + if (unlikely(!msrq)) { + pr_warn("CQ %06x with entry for unknown SRQN %06x\n", + cq->mcq.cqn, srq_num); + return -EINVAL; + } + } + if (is_send) { wq = &(*cur_qp)->sq; if (!(*cur_qp)->sq_signal_bits) { @@ -613,9 +696,15 @@ repoll: wqe_ctr = be16_to_cpu(cqe->wqe_index); wc->wr_id = srq->wrid[wqe_ctr]; mlx4_ib_free_srq_wqe(srq, wqe_ctr); + } else if (msrq) { + srq = to_mibsrq(msrq); + wqe_ctr = be16_to_cpu(cqe->wqe_index); + wc->wr_id = srq->wrid[wqe_ctr]; + mlx4_ib_free_srq_wqe(srq, wqe_ctr); } else { wq = &(*cur_qp)->rq; - wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + tail = wq->tail & (wq->wqe_cnt - 1); + wc->wr_id = wq->wrid[tail]; ++wq->tail; } @@ -637,6 +726,7 @@ repoll: case MLX4_OPCODE_SEND_IMM: wc->wc_flags |= IB_WC_WITH_IMM; case MLX4_OPCODE_SEND: + case MLX4_OPCODE_SEND_INVAL: wc->opcode = IB_WC_SEND; break; case MLX4_OPCODE_RDMA_READ: @@ -651,12 +741,26 @@ repoll: wc->opcode = IB_WC_FETCH_ADD; wc->byte_len = 8; break; + case MLX4_OPCODE_MASKED_ATOMIC_CS: + wc->opcode = IB_WC_MASKED_COMP_SWAP; + wc->byte_len = 8; + break; + case MLX4_OPCODE_MASKED_ATOMIC_FA: + wc->opcode = IB_WC_MASKED_FETCH_ADD; + wc->byte_len = 8; + break; case MLX4_OPCODE_BIND_MW: wc->opcode = IB_WC_BIND_MW; break; case MLX4_OPCODE_LSO: wc->opcode = IB_WC_LSO; break; + case MLX4_OPCODE_FMR: + wc->opcode = IB_WC_FAST_REG_MR; + break; + case MLX4_OPCODE_LOCAL_INVAL: + wc->opcode = IB_WC_LOCAL_INV; + break; } } else { wc->byte_len = be32_to_cpu(cqe->byte_cnt); @@ -667,6 +771,11 @@ repoll: wc->wc_flags = IB_WC_WITH_IMM; wc->ex.imm_data = cqe->immed_rss_invalid; break; + case MLX4_RECV_OPCODE_SEND_INVAL: + wc->opcode = IB_WC_RECV; + wc->wc_flags = IB_WC_WITH_INVALIDATE; + wc->ex.invalidate_rkey = be32_to_cpu(cqe->immed_rss_invalid); + break; case MLX4_RECV_OPCODE_SEND: wc->opcode = IB_WC_RECV; wc->wc_flags = 0; @@ -678,15 +787,40 @@ repoll: break; } + is_eth = (rdma_port_get_link_layer(wc->qp->device, + (*cur_qp)->port) == + IB_LINK_LAYER_ETHERNET); + if (mlx4_is_mfunc(to_mdev(cq->ibcq.device)->dev)) { + if ((*cur_qp)->mlx4_ib_qp_type & + (MLX4_IB_QPT_PROXY_SMI_OWNER | + MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) + return use_tunnel_data(*cur_qp, cq, wc, tail, + cqe, is_eth); + } + wc->slid = be16_to_cpu(cqe->rlid); - wc->sl = cqe->sl >> 4; g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn); wc->src_qp = g_mlpath_rqpn & 0xffffff; wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f; wc->wc_flags |= g_mlpath_rqpn & 0x80000000 ? IB_WC_GRH : 0; wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f; - wc->csum_ok = mlx4_ib_ipoib_csum_ok(cqe->ipoib_status, - cqe->checksum); + wc->wc_flags |= mlx4_ib_ipoib_csum_ok(cqe->status, + cqe->checksum) ? IB_WC_IP_CSUM_OK : 0; + if (is_eth) { + wc->sl = be16_to_cpu(cqe->sl_vid) >> 13; + if (be32_to_cpu(cqe->vlan_my_qpn) & + MLX4_CQE_VLAN_PRESENT_MASK) { + wc->vlan_id = be16_to_cpu(cqe->sl_vid) & + MLX4_CQE_VID_MASK; + } else { + wc->vlan_id = 0xffff; + } + memcpy(wc->smac, cqe->smac, ETH_ALEN); + wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC); + } else { + wc->sl = be16_to_cpu(cqe->sl_vid) >> 12; + wc->vlan_id = 0xffff; + } } return 0; @@ -708,8 +842,7 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) break; } - if (npolled) - mlx4_cq_set_ci(&cq->mcq); + mlx4_cq_set_ci(&cq->mcq); spin_unlock_irqrestore(&cq->lock, flags); @@ -736,6 +869,7 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq) int nfreed = 0; struct mlx4_cqe *cqe, *dest; u8 owner_bit; + int cqe_inc = cq->buf.entry_size == 64 ? 1 : 0; /* * First we need to find the current producer index, so we @@ -754,12 +888,16 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq) */ while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) { cqe = get_cqe(cq, prod_index & cq->ibcq.cqe); - if ((be32_to_cpu(cqe->my_qpn) & 0xffffff) == qpn) { + cqe += cqe_inc; + + if ((be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) { if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index)); ++nfreed; } else if (nfreed) { dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe); + dest += cqe_inc; + owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK; memcpy(dest, cqe, sizeof *cqe); dest->owner_sr_opcode = owner_bit | diff --git a/drivers/infiniband/hw/mlx4/doorbell.c b/drivers/infiniband/hw/mlx4/doorbell.c index 8aee4233b38..c5174098636 100644 --- a/drivers/infiniband/hw/mlx4/doorbell.c +++ b/drivers/infiniband/hw/mlx4/doorbell.c @@ -45,7 +45,6 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, struct mlx4_db *db) { struct mlx4_ib_user_db_page *page; - struct ib_umem_chunk *chunk; int err = 0; mutex_lock(&context->db_page_mutex); @@ -73,8 +72,7 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, list_add(&page->list, &context->db_page_list); found: - chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list); - db->dma = sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK); + db->dma = sg_dma_address(page->umem->sg_head.sgl) + (virt & ~PAGE_MASK); db->u.user_page = page; ++page->refcnt; diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index cdca3a511e1..287ad0564ac 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -32,8 +32,13 @@ #include <rdma/ib_mad.h> #include <rdma/ib_smi.h> +#include <rdma/ib_sa.h> +#include <rdma/ib_cache.h> +#include <linux/random.h> #include <linux/mlx4/cmd.h> +#include <linux/gfp.h> +#include <rdma/ib_pma.h> #include "mlx4_ib.h" @@ -42,7 +47,62 @@ enum { MLX4_IB_VENDOR_CLASS2 = 0xa }; -int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey, +#define MLX4_TUN_SEND_WRID_SHIFT 34 +#define MLX4_TUN_QPN_SHIFT 32 +#define MLX4_TUN_WRID_RECV (((u64) 1) << MLX4_TUN_SEND_WRID_SHIFT) +#define MLX4_TUN_SET_WRID_QPN(a) (((u64) ((a) & 0x3)) << MLX4_TUN_QPN_SHIFT) + +#define MLX4_TUN_IS_RECV(a) (((a) >> MLX4_TUN_SEND_WRID_SHIFT) & 0x1) +#define MLX4_TUN_WRID_QPN(a) (((a) >> MLX4_TUN_QPN_SHIFT) & 0x3) + + /* Port mgmt change event handling */ + +#define GET_BLK_PTR_FROM_EQE(eqe) be32_to_cpu(eqe->event.port_mgmt_change.params.tbl_change_info.block_ptr) +#define GET_MASK_FROM_EQE(eqe) be32_to_cpu(eqe->event.port_mgmt_change.params.tbl_change_info.tbl_entries_mask) +#define NUM_IDX_IN_PKEY_TBL_BLK 32 +#define GUID_TBL_ENTRY_SIZE 8 /* size in bytes */ +#define GUID_TBL_BLK_NUM_ENTRIES 8 +#define GUID_TBL_BLK_SIZE (GUID_TBL_ENTRY_SIZE * GUID_TBL_BLK_NUM_ENTRIES) + +struct mlx4_mad_rcv_buf { + struct ib_grh grh; + u8 payload[256]; +} __packed; + +struct mlx4_mad_snd_buf { + u8 payload[256]; +} __packed; + +struct mlx4_tunnel_mad { + struct ib_grh grh; + struct mlx4_ib_tunnel_header hdr; + struct ib_mad mad; +} __packed; + +struct mlx4_rcv_tunnel_mad { + struct mlx4_rcv_tunnel_hdr hdr; + struct ib_grh grh; + struct ib_mad mad; +} __packed; + +static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num); +static void handle_lid_change_event(struct mlx4_ib_dev *dev, u8 port_num); +static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num, + int block, u32 change_bitmap); + +__be64 mlx4_ib_gen_node_guid(void) +{ +#define NODE_GUID_HI ((u64) (((u64)IB_OPENIB_OUI) << 40)) + return cpu_to_be64(NODE_GUID_HI | prandom_u32()); +} + +__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx) +{ + return cpu_to_be64(atomic_inc_return(&ctx->tid)) | + cpu_to_be64(0xff00000000000000LL); +} + +int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags, int port, struct ib_wc *in_wc, struct ib_grh *in_grh, void *in_mad, void *response_mad) { @@ -69,10 +129,13 @@ int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey, * Key check traps can't be generated unless we have in_wc to * tell us where to send the trap. */ - if (ignore_mkey || !in_wc) + if ((mad_ifc_flags & MLX4_MAD_IFC_IGNORE_MKEY) || !in_wc) op_modifier |= 0x1; - if (ignore_bkey || !in_wc) + if ((mad_ifc_flags & MLX4_MAD_IFC_IGNORE_BKEY) || !in_wc) op_modifier |= 0x2; + if (mlx4_is_mfunc(dev->dev) && + (mad_ifc_flags & MLX4_MAD_IFC_NET_VIEW || in_wc)) + op_modifier |= 0x8; if (in_wc) { struct { @@ -105,9 +168,10 @@ int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey, in_modifier |= in_wc->slid << 16; } - err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma, - in_modifier, op_modifier, - MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C); + err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma, in_modifier, + mlx4_is_master(dev->dev) ? (op_modifier & ~0x8) : op_modifier, + MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C, + (op_modifier & 0x8) ? MLX4_CMD_NATIVE : MLX4_CMD_WRAPPED); if (!err) memcpy(response_mad, outmailbox->buf, 256); @@ -122,6 +186,7 @@ static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl) { struct ib_ah *new_ah; struct ib_ah_attr ah_attr; + unsigned long flags; if (!dev->send_agent[port_num - 1][0]) return; @@ -136,48 +201,134 @@ static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl) if (IS_ERR(new_ah)) return; - spin_lock(&dev->sm_lock); + spin_lock_irqsave(&dev->sm_lock, flags); if (dev->sm_ah[port_num - 1]) ib_destroy_ah(dev->sm_ah[port_num - 1]); dev->sm_ah[port_num - 1] = new_ah; - spin_unlock(&dev->sm_lock); + spin_unlock_irqrestore(&dev->sm_lock, flags); } /* - * Snoop SM MADs for port info and P_Key table sets, so we can - * synthesize LID change and P_Key change events. + * Snoop SM MADs for port info, GUID info, and P_Key table sets, so we can + * synthesize LID change, Client-Rereg, GID change, and P_Key change events. */ -static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad) +static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad, + u16 prev_lid) { - struct ib_event event; + struct ib_port_info *pinfo; + u16 lid; + __be16 *base; + u32 bn, pkey_change_bitmap; + int i; + + struct mlx4_ib_dev *dev = to_mdev(ibdev); if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && - mad->mad_hdr.method == IB_MGMT_METHOD_SET) { - if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) { - struct ib_port_info *pinfo = - (struct ib_port_info *) ((struct ib_smp *) mad)->data; + mad->mad_hdr.method == IB_MGMT_METHOD_SET) + switch (mad->mad_hdr.attr_id) { + case IB_SMP_ATTR_PORT_INFO: + pinfo = (struct ib_port_info *) ((struct ib_smp *) mad)->data; + lid = be16_to_cpu(pinfo->lid); - update_sm_ah(to_mdev(ibdev), port_num, + update_sm_ah(dev, port_num, be16_to_cpu(pinfo->sm_lid), pinfo->neighbormtu_mastersmsl & 0xf); - event.device = ibdev; - event.element.port_num = port_num; - if (pinfo->clientrereg_resv_subnetto & 0x80) - event.event = IB_EVENT_CLIENT_REREGISTER; - else - event.event = IB_EVENT_LID_CHANGE; + handle_client_rereg_event(dev, port_num); + + if (prev_lid != lid) + handle_lid_change_event(dev, port_num); + break; + + case IB_SMP_ATTR_PKEY_TABLE: + if (!mlx4_is_mfunc(dev->dev)) { + mlx4_ib_dispatch_event(dev, port_num, + IB_EVENT_PKEY_CHANGE); + break; + } + + /* at this point, we are running in the master. + * Slaves do not receive SMPs. + */ + bn = be32_to_cpu(((struct ib_smp *)mad)->attr_mod) & 0xFFFF; + base = (__be16 *) &(((struct ib_smp *)mad)->data[0]); + pkey_change_bitmap = 0; + for (i = 0; i < 32; i++) { + pr_debug("PKEY[%d] = x%x\n", + i + bn*32, be16_to_cpu(base[i])); + if (be16_to_cpu(base[i]) != + dev->pkeys.phys_pkey_cache[port_num - 1][i + bn*32]) { + pkey_change_bitmap |= (1 << i); + dev->pkeys.phys_pkey_cache[port_num - 1][i + bn*32] = + be16_to_cpu(base[i]); + } + } + pr_debug("PKEY Change event: port=%d, " + "block=0x%x, change_bitmap=0x%x\n", + port_num, bn, pkey_change_bitmap); - ib_dispatch_event(&event); + if (pkey_change_bitmap) { + mlx4_ib_dispatch_event(dev, port_num, + IB_EVENT_PKEY_CHANGE); + if (!dev->sriov.is_going_down) + __propagate_pkey_ev(dev, port_num, bn, + pkey_change_bitmap); + } + break; + + case IB_SMP_ATTR_GUID_INFO: + /* paravirtualized master's guid is guid 0 -- does not change */ + if (!mlx4_is_master(dev->dev)) + mlx4_ib_dispatch_event(dev, port_num, + IB_EVENT_GID_CHANGE); + /*if master, notify relevant slaves*/ + if (mlx4_is_master(dev->dev) && + !dev->sriov.is_going_down) { + bn = be32_to_cpu(((struct ib_smp *)mad)->attr_mod); + mlx4_ib_update_cache_on_guid_change(dev, bn, port_num, + (u8 *)(&((struct ib_smp *)mad)->data)); + mlx4_ib_notify_slaves_on_guid_change(dev, bn, port_num, + (u8 *)(&((struct ib_smp *)mad)->data)); + } + break; + + default: + break; } +} + +static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num, + int block, u32 change_bitmap) +{ + int i, ix, slave, err; + int have_event = 0; - if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PKEY_TABLE) { - event.device = ibdev; - event.event = IB_EVENT_PKEY_CHANGE; - event.element.port_num = port_num; - ib_dispatch_event(&event); + for (slave = 0; slave < dev->dev->caps.sqp_demux; slave++) { + if (slave == mlx4_master_func_num(dev->dev)) + continue; + if (!mlx4_is_slave_active(dev->dev, slave)) + continue; + + have_event = 0; + for (i = 0; i < 32; i++) { + if (!(change_bitmap & (1 << i))) + continue; + for (ix = 0; + ix < dev->dev->caps.pkey_table_len[port_num]; ix++) { + if (dev->pkeys.virt2phys_pkey[slave][port_num - 1] + [ix] == i + 32 * block) { + err = mlx4_gen_pkey_eqe(dev->dev, slave, port_num); + pr_debug("propagate_pkey_ev: slave %d," + " port %d, ix %d (%d)\n", + slave, port_num, ix, err); + have_event = 1; + break; + } + } + if (have_event) + break; } } } @@ -185,13 +336,15 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad) static void node_desc_override(struct ib_device *dev, struct ib_mad *mad) { + unsigned long flags; + if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP && mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) { - spin_lock(&to_mdev(dev)->sm_lock); + spin_lock_irqsave(&to_mdev(dev)->sm_lock, flags); memcpy(((struct ib_smp *) mad)->data, dev->node_desc, 64); - spin_unlock(&to_mdev(dev)->sm_lock); + spin_unlock_irqrestore(&to_mdev(dev)->sm_lock, flags); } } @@ -201,35 +354,391 @@ static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, struct ib_mad *ma struct ib_mad_send_buf *send_buf; struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn]; int ret; + unsigned long flags; if (agent) { send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, GFP_ATOMIC); + if (IS_ERR(send_buf)) + return; /* * We rely here on the fact that MLX QPs don't use the * address handle after the send is posted (this is * wrong following the IB spec strictly, but we know * it's OK for our devices). */ - spin_lock(&dev->sm_lock); + spin_lock_irqsave(&dev->sm_lock, flags); memcpy(send_buf->mad, mad, sizeof *mad); if ((send_buf->ah = dev->sm_ah[port_num - 1])) ret = ib_post_send_mad(send_buf, NULL); else ret = -EINVAL; - spin_unlock(&dev->sm_lock); + spin_unlock_irqrestore(&dev->sm_lock, flags); if (ret) ib_free_send_mad(send_buf); } } -int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, +static int mlx4_ib_demux_sa_handler(struct ib_device *ibdev, int port, int slave, + struct ib_sa_mad *sa_mad) +{ + int ret = 0; + + /* dispatch to different sa handlers */ + switch (be16_to_cpu(sa_mad->mad_hdr.attr_id)) { + case IB_SA_ATTR_MC_MEMBER_REC: + ret = mlx4_ib_mcg_demux_handler(ibdev, port, slave, sa_mad); + break; + default: + break; + } + return ret; +} + +int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid) +{ + struct mlx4_ib_dev *dev = to_mdev(ibdev); + int i; + + for (i = 0; i < dev->dev->caps.sqp_demux; i++) { + if (dev->sriov.demux[port - 1].guid_cache[i] == guid) + return i; + } + return -1; +} + + +static int find_slave_port_pkey_ix(struct mlx4_ib_dev *dev, int slave, + u8 port, u16 pkey, u16 *ix) +{ + int i, ret; + u8 unassigned_pkey_ix, pkey_ix, partial_ix = 0xFF; + u16 slot_pkey; + + if (slave == mlx4_master_func_num(dev->dev)) + return ib_find_cached_pkey(&dev->ib_dev, port, pkey, ix); + + unassigned_pkey_ix = dev->dev->phys_caps.pkey_phys_table_len[port] - 1; + + for (i = 0; i < dev->dev->caps.pkey_table_len[port]; i++) { + if (dev->pkeys.virt2phys_pkey[slave][port - 1][i] == unassigned_pkey_ix) + continue; + + pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][i]; + + ret = ib_get_cached_pkey(&dev->ib_dev, port, pkey_ix, &slot_pkey); + if (ret) + continue; + if ((slot_pkey & 0x7FFF) == (pkey & 0x7FFF)) { + if (slot_pkey & 0x8000) { + *ix = (u16) pkey_ix; + return 0; + } else { + /* take first partial pkey index found */ + if (partial_ix == 0xFF) + partial_ix = pkey_ix; + } + } + } + + if (partial_ix < 0xFF) { + *ix = (u16) partial_ix; + return 0; + } + + return -EINVAL; +} + +int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, + enum ib_qp_type dest_qpt, struct ib_wc *wc, + struct ib_grh *grh, struct ib_mad *mad) +{ + struct ib_sge list; + struct ib_send_wr wr, *bad_wr; + struct mlx4_ib_demux_pv_ctx *tun_ctx; + struct mlx4_ib_demux_pv_qp *tun_qp; + struct mlx4_rcv_tunnel_mad *tun_mad; + struct ib_ah_attr attr; + struct ib_ah *ah; + struct ib_qp *src_qp = NULL; + unsigned tun_tx_ix = 0; + int dqpn; + int ret = 0; + u16 tun_pkey_ix; + u16 cached_pkey; + u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH; + + if (dest_qpt > IB_QPT_GSI) + return -EINVAL; + + tun_ctx = dev->sriov.demux[port-1].tun[slave]; + + /* check if proxy qp created */ + if (!tun_ctx || tun_ctx->state != DEMUX_PV_STATE_ACTIVE) + return -EAGAIN; + + if (!dest_qpt) + tun_qp = &tun_ctx->qp[0]; + else + tun_qp = &tun_ctx->qp[1]; + + /* compute P_Key index to put in tunnel header for slave */ + if (dest_qpt) { + u16 pkey_ix; + ret = ib_get_cached_pkey(&dev->ib_dev, port, wc->pkey_index, &cached_pkey); + if (ret) + return -EINVAL; + + ret = find_slave_port_pkey_ix(dev, slave, port, cached_pkey, &pkey_ix); + if (ret) + return -EINVAL; + tun_pkey_ix = pkey_ix; + } else + tun_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0]; + + dqpn = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave + port + (dest_qpt * 2) - 1; + + /* get tunnel tx data buf for slave */ + src_qp = tun_qp->qp; + + /* create ah. Just need an empty one with the port num for the post send. + * The driver will set the force loopback bit in post_send */ + memset(&attr, 0, sizeof attr); + attr.port_num = port; + if (is_eth) { + memcpy(&attr.grh.dgid.raw[0], &grh->dgid.raw[0], 16); + attr.ah_flags = IB_AH_GRH; + } + ah = ib_create_ah(tun_ctx->pd, &attr); + if (IS_ERR(ah)) + return -ENOMEM; + + /* allocate tunnel tx buf after pass failure returns */ + spin_lock(&tun_qp->tx_lock); + if (tun_qp->tx_ix_head - tun_qp->tx_ix_tail >= + (MLX4_NUM_TUNNEL_BUFS - 1)) + ret = -EAGAIN; + else + tun_tx_ix = (++tun_qp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1); + spin_unlock(&tun_qp->tx_lock); + if (ret) + goto out; + + tun_mad = (struct mlx4_rcv_tunnel_mad *) (tun_qp->tx_ring[tun_tx_ix].buf.addr); + if (tun_qp->tx_ring[tun_tx_ix].ah) + ib_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah); + tun_qp->tx_ring[tun_tx_ix].ah = ah; + ib_dma_sync_single_for_cpu(&dev->ib_dev, + tun_qp->tx_ring[tun_tx_ix].buf.map, + sizeof (struct mlx4_rcv_tunnel_mad), + DMA_TO_DEVICE); + + /* copy over to tunnel buffer */ + if (grh) + memcpy(&tun_mad->grh, grh, sizeof *grh); + memcpy(&tun_mad->mad, mad, sizeof *mad); + + /* adjust tunnel data */ + tun_mad->hdr.pkey_index = cpu_to_be16(tun_pkey_ix); + tun_mad->hdr.flags_src_qp = cpu_to_be32(wc->src_qp & 0xFFFFFF); + tun_mad->hdr.g_ml_path = (grh && (wc->wc_flags & IB_WC_GRH)) ? 0x80 : 0; + + if (is_eth) { + u16 vlan = 0; + if (mlx4_get_slave_default_vlan(dev->dev, port, slave, &vlan, + NULL)) { + /* VST mode */ + if (vlan != wc->vlan_id) + /* Packet vlan is not the VST-assigned vlan. + * Drop the packet. + */ + goto out; + else + /* Remove the vlan tag before forwarding + * the packet to the VF. + */ + vlan = 0xffff; + } else { + vlan = wc->vlan_id; + } + + tun_mad->hdr.sl_vid = cpu_to_be16(vlan); + memcpy((char *)&tun_mad->hdr.mac_31_0, &(wc->smac[0]), 4); + memcpy((char *)&tun_mad->hdr.slid_mac_47_32, &(wc->smac[4]), 2); + } else { + tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12); + tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid); + } + + ib_dma_sync_single_for_device(&dev->ib_dev, + tun_qp->tx_ring[tun_tx_ix].buf.map, + sizeof (struct mlx4_rcv_tunnel_mad), + DMA_TO_DEVICE); + + list.addr = tun_qp->tx_ring[tun_tx_ix].buf.map; + list.length = sizeof (struct mlx4_rcv_tunnel_mad); + list.lkey = tun_ctx->mr->lkey; + + wr.wr.ud.ah = ah; + wr.wr.ud.port_num = port; + wr.wr.ud.remote_qkey = IB_QP_SET_QKEY; + wr.wr.ud.remote_qpn = dqpn; + wr.next = NULL; + wr.wr_id = ((u64) tun_tx_ix) | MLX4_TUN_SET_WRID_QPN(dest_qpt); + wr.sg_list = &list; + wr.num_sge = 1; + wr.opcode = IB_WR_SEND; + wr.send_flags = IB_SEND_SIGNALED; + + ret = ib_post_send(src_qp, &wr, &bad_wr); +out: + if (ret) + ib_destroy_ah(ah); + return ret; +} + +static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, + struct ib_wc *wc, struct ib_grh *grh, + struct ib_mad *mad) +{ + struct mlx4_ib_dev *dev = to_mdev(ibdev); + int err; + int slave; + u8 *slave_id; + int is_eth = 0; + + if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND) + is_eth = 0; + else + is_eth = 1; + + if (is_eth) { + if (!(wc->wc_flags & IB_WC_GRH)) { + mlx4_ib_warn(ibdev, "RoCE grh not present.\n"); + return -EINVAL; + } + if (mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_CM) { + mlx4_ib_warn(ibdev, "RoCE mgmt class is not CM\n"); + return -EINVAL; + } + if (mlx4_get_slave_from_roce_gid(dev->dev, port, grh->dgid.raw, &slave)) { + mlx4_ib_warn(ibdev, "failed matching grh\n"); + return -ENOENT; + } + if (slave >= dev->dev->caps.sqp_demux) { + mlx4_ib_warn(ibdev, "slave id: %d is bigger than allowed:%d\n", + slave, dev->dev->caps.sqp_demux); + return -ENOENT; + } + + if (mlx4_ib_demux_cm_handler(ibdev, port, NULL, mad)) + return 0; + + err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad); + if (err) + pr_debug("failed sending to slave %d via tunnel qp (%d)\n", + slave, err); + return 0; + } + + /* Initially assume that this mad is for us */ + slave = mlx4_master_func_num(dev->dev); + + /* See if the slave id is encoded in a response mad */ + if (mad->mad_hdr.method & 0x80) { + slave_id = (u8 *) &mad->mad_hdr.tid; + slave = *slave_id; + if (slave != 255) /*255 indicates the dom0*/ + *slave_id = 0; /* remap tid */ + } + + /* If a grh is present, we demux according to it */ + if (wc->wc_flags & IB_WC_GRH) { + slave = mlx4_ib_find_real_gid(ibdev, port, grh->dgid.global.interface_id); + if (slave < 0) { + mlx4_ib_warn(ibdev, "failed matching grh\n"); + return -ENOENT; + } + } + /* Class-specific handling */ + switch (mad->mad_hdr.mgmt_class) { + case IB_MGMT_CLASS_SUBN_LID_ROUTED: + case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: + /* 255 indicates the dom0 */ + if (slave != 255 && slave != mlx4_master_func_num(dev->dev)) { + if (!mlx4_vf_smi_enabled(dev->dev, slave, port)) + return -EPERM; + /* for a VF. drop unsolicited MADs */ + if (!(mad->mad_hdr.method & IB_MGMT_METHOD_RESP)) { + mlx4_ib_warn(ibdev, "demux QP0. rejecting unsolicited mad for slave %d class 0x%x, method 0x%x\n", + slave, mad->mad_hdr.mgmt_class, + mad->mad_hdr.method); + return -EINVAL; + } + } + break; + case IB_MGMT_CLASS_SUBN_ADM: + if (mlx4_ib_demux_sa_handler(ibdev, port, slave, + (struct ib_sa_mad *) mad)) + return 0; + break; + case IB_MGMT_CLASS_CM: + if (mlx4_ib_demux_cm_handler(ibdev, port, &slave, mad)) + return 0; + break; + case IB_MGMT_CLASS_DEVICE_MGMT: + if (mad->mad_hdr.method != IB_MGMT_METHOD_GET_RESP) + return 0; + break; + default: + /* Drop unsupported classes for slaves in tunnel mode */ + if (slave != mlx4_master_func_num(dev->dev)) { + pr_debug("dropping unsupported ingress mad from class:%d " + "for slave:%d\n", mad->mad_hdr.mgmt_class, slave); + return 0; + } + } + /*make sure that no slave==255 was not handled yet.*/ + if (slave >= dev->dev->caps.sqp_demux) { + mlx4_ib_warn(ibdev, "slave id: %d is bigger than allowed:%d\n", + slave, dev->dev->caps.sqp_demux); + return -ENOENT; + } + + err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad); + if (err) + pr_debug("failed sending to slave %d via tunnel qp (%d)\n", + slave, err); + return 0; +} + +static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, struct ib_wc *in_wc, struct ib_grh *in_grh, struct ib_mad *in_mad, struct ib_mad *out_mad) { - u16 slid; + u16 slid, prev_lid = 0; int err; + struct ib_port_attr pattr; + + if (in_wc && in_wc->qp->qp_num) { + pr_debug("received MAD: slid:%d sqpn:%d " + "dlid_bits:%d dqpn:%d wc_flags:0x%x, cls %x, mtd %x, atr %x\n", + in_wc->slid, in_wc->src_qp, + in_wc->dlid_path_bits, + in_wc->qp->qp_num, + in_wc->wc_flags, + in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.method, + be16_to_cpu(in_mad->mad_hdr.attr_id)); + if (in_wc->wc_flags & IB_WC_GRH) { + pr_debug("sgid_hi:0x%016llx sgid_lo:0x%016llx\n", + be64_to_cpu(in_grh->sgid.global.subnet_prefix), + be64_to_cpu(in_grh->sgid.global.interface_id)); + pr_debug("dgid_hi:0x%016llx dgid_lo:0x%016llx\n", + be64_to_cpu(in_grh->dgid.global.subnet_prefix), + be64_to_cpu(in_grh->dgid.global.interface_id)); + } + } slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); @@ -246,12 +755,9 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, return IB_MAD_RESULT_SUCCESS; /* - * Don't process SMInfo queries or vendor-specific - * MADs -- the SMA can't handle them. + * Don't process SMInfo queries -- the SMA can't handle them. */ - if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO || - ((in_mad->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) == - IB_SMP_ATTR_VENDOR_MASK)) + if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO) return IB_MAD_RESULT_SUCCESS; } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT || in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS1 || @@ -263,16 +769,27 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, } else return IB_MAD_RESULT_SUCCESS; + if ((in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || + in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && + in_mad->mad_hdr.method == IB_MGMT_METHOD_SET && + in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO && + !ib_query_port(ibdev, port_num, &pattr)) + prev_lid = pattr.lid; + err = mlx4_MAD_IFC(to_mdev(ibdev), - mad_flags & IB_MAD_IGNORE_MKEY, - mad_flags & IB_MAD_IGNORE_BKEY, + (mad_flags & IB_MAD_IGNORE_MKEY ? MLX4_MAD_IFC_IGNORE_MKEY : 0) | + (mad_flags & IB_MAD_IGNORE_BKEY ? MLX4_MAD_IFC_IGNORE_BKEY : 0) | + MLX4_MAD_IFC_NET_VIEW, port_num, in_wc, in_grh, in_mad, out_mad); if (err) return IB_MAD_RESULT_FAILURE; if (!out_mad->mad_hdr.status) { - smp_snoop(ibdev, port_num, in_mad); - node_desc_override(ibdev, out_mad); + if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV)) + smp_snoop(ibdev, port_num, in_mad, prev_lid); + /* slaves get node desc from FW */ + if (!mlx4_is_slave(to_mdev(ibdev)->dev)) + node_desc_override(ibdev, out_mad); } /* set return bit in status of directed route responses */ @@ -286,9 +803,77 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; } +static void edit_counter(struct mlx4_counter *cnt, + struct ib_pma_portcounters *pma_cnt) +{ + pma_cnt->port_xmit_data = cpu_to_be32((be64_to_cpu(cnt->tx_bytes)>>2)); + pma_cnt->port_rcv_data = cpu_to_be32((be64_to_cpu(cnt->rx_bytes)>>2)); + pma_cnt->port_xmit_packets = cpu_to_be32(be64_to_cpu(cnt->tx_frames)); + pma_cnt->port_rcv_packets = cpu_to_be32(be64_to_cpu(cnt->rx_frames)); +} + +static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + struct ib_wc *in_wc, struct ib_grh *in_grh, + struct ib_mad *in_mad, struct ib_mad *out_mad) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_ib_dev *dev = to_mdev(ibdev); + int err; + u32 inmod = dev->counters[port_num - 1] & 0xffff; + u8 mode; + + if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT) + return -EINVAL; + + mailbox = mlx4_alloc_cmd_mailbox(dev->dev); + if (IS_ERR(mailbox)) + return IB_MAD_RESULT_FAILURE; + + err = mlx4_cmd_box(dev->dev, 0, mailbox->dma, inmod, 0, + MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C, + MLX4_CMD_WRAPPED); + if (err) + err = IB_MAD_RESULT_FAILURE; + else { + memset(out_mad->data, 0, sizeof out_mad->data); + mode = ((struct mlx4_counter *)mailbox->buf)->counter_mode; + switch (mode & 0xf) { + case 0: + edit_counter(mailbox->buf, + (void *)(out_mad->data + 40)); + err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; + break; + default: + err = IB_MAD_RESULT_FAILURE; + } + } + + mlx4_free_cmd_mailbox(dev->dev, mailbox); + + return err; +} + +int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + struct ib_wc *in_wc, struct ib_grh *in_grh, + struct ib_mad *in_mad, struct ib_mad *out_mad) +{ + switch (rdma_port_get_link_layer(ibdev, port_num)) { + case IB_LINK_LAYER_INFINIBAND: + return ib_process_mad(ibdev, mad_flags, port_num, in_wc, + in_grh, in_mad, out_mad); + case IB_LINK_LAYER_ETHERNET: + return iboe_process_mad(ibdev, mad_flags, port_num, in_wc, + in_grh, in_mad, out_mad); + default: + return -EINVAL; + } +} + static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *mad_send_wc) { + if (mad_send_wc->send_buf->context[0]) + ib_destroy_ah(mad_send_wc->send_buf->context[0]); ib_free_send_mad(mad_send_wc->send_buf); } @@ -297,24 +882,30 @@ int mlx4_ib_mad_init(struct mlx4_ib_dev *dev) struct ib_mad_agent *agent; int p, q; int ret; + enum rdma_link_layer ll; - for (p = 0; p < dev->dev->caps.num_ports; ++p) + for (p = 0; p < dev->num_ports; ++p) { + ll = rdma_port_get_link_layer(&dev->ib_dev, p + 1); for (q = 0; q <= 1; ++q) { - agent = ib_register_mad_agent(&dev->ib_dev, p + 1, - q ? IB_QPT_GSI : IB_QPT_SMI, - NULL, 0, send_handler, - NULL, NULL); - if (IS_ERR(agent)) { - ret = PTR_ERR(agent); - goto err; - } - dev->send_agent[p][q] = agent; + if (ll == IB_LINK_LAYER_INFINIBAND) { + agent = ib_register_mad_agent(&dev->ib_dev, p + 1, + q ? IB_QPT_GSI : IB_QPT_SMI, + NULL, 0, send_handler, + NULL, NULL); + if (IS_ERR(agent)) { + ret = PTR_ERR(agent); + goto err; + } + dev->send_agent[p][q] = agent; + } else + dev->send_agent[p][q] = NULL; } + } return 0; err: - for (p = 0; p < dev->dev->caps.num_ports; ++p) + for (p = 0; p < dev->num_ports; ++p) for (q = 0; q <= 1; ++q) if (dev->send_agent[p][q]) ib_unregister_mad_agent(dev->send_agent[p][q]); @@ -327,14 +918,1246 @@ void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev) struct ib_mad_agent *agent; int p, q; - for (p = 0; p < dev->dev->caps.num_ports; ++p) { + for (p = 0; p < dev->num_ports; ++p) { for (q = 0; q <= 1; ++q) { agent = dev->send_agent[p][q]; - dev->send_agent[p][q] = NULL; - ib_unregister_mad_agent(agent); + if (agent) { + dev->send_agent[p][q] = NULL; + ib_unregister_mad_agent(agent); + } } if (dev->sm_ah[p]) ib_destroy_ah(dev->sm_ah[p]); } } + +static void handle_lid_change_event(struct mlx4_ib_dev *dev, u8 port_num) +{ + mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_LID_CHANGE); + + if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down) + mlx4_gen_slaves_port_mgt_ev(dev->dev, port_num, + MLX4_EQ_PORT_INFO_LID_CHANGE_MASK); +} + +static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num) +{ + /* re-configure the alias-guid and mcg's */ + if (mlx4_is_master(dev->dev)) { + mlx4_ib_invalidate_all_guid_record(dev, port_num); + + if (!dev->sriov.is_going_down) { + mlx4_ib_mcg_port_cleanup(&dev->sriov.demux[port_num - 1], 0); + mlx4_gen_slaves_port_mgt_ev(dev->dev, port_num, + MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK); + } + } + mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_CLIENT_REREGISTER); +} + +static void propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num, + struct mlx4_eqe *eqe) +{ + __propagate_pkey_ev(dev, port_num, GET_BLK_PTR_FROM_EQE(eqe), + GET_MASK_FROM_EQE(eqe)); +} + +static void handle_slaves_guid_change(struct mlx4_ib_dev *dev, u8 port_num, + u32 guid_tbl_blk_num, u32 change_bitmap) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + u16 i; + + if (!mlx4_is_mfunc(dev->dev) || !mlx4_is_master(dev->dev)) + return; + + in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL); + out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + if (!in_mad || !out_mad) { + mlx4_ib_warn(&dev->ib_dev, "failed to allocate memory for guid info mads\n"); + goto out; + } + + guid_tbl_blk_num *= 4; + + for (i = 0; i < 4; i++) { + if (change_bitmap && (!((change_bitmap >> (8 * i)) & 0xff))) + continue; + memset(in_mad, 0, sizeof *in_mad); + memset(out_mad, 0, sizeof *out_mad); + + in_mad->base_version = 1; + in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + in_mad->class_version = 1; + in_mad->method = IB_MGMT_METHOD_GET; + in_mad->attr_id = IB_SMP_ATTR_GUID_INFO; + in_mad->attr_mod = cpu_to_be32(guid_tbl_blk_num + i); + + if (mlx4_MAD_IFC(dev, + MLX4_MAD_IFC_IGNORE_KEYS | MLX4_MAD_IFC_NET_VIEW, + port_num, NULL, NULL, in_mad, out_mad)) { + mlx4_ib_warn(&dev->ib_dev, "Failed in get GUID INFO MAD_IFC\n"); + goto out; + } + + mlx4_ib_update_cache_on_guid_change(dev, guid_tbl_blk_num + i, + port_num, + (u8 *)(&((struct ib_smp *)out_mad)->data)); + mlx4_ib_notify_slaves_on_guid_change(dev, guid_tbl_blk_num + i, + port_num, + (u8 *)(&((struct ib_smp *)out_mad)->data)); + } + +out: + kfree(in_mad); + kfree(out_mad); + return; +} + +void handle_port_mgmt_change_event(struct work_struct *work) +{ + struct ib_event_work *ew = container_of(work, struct ib_event_work, work); + struct mlx4_ib_dev *dev = ew->ib_dev; + struct mlx4_eqe *eqe = &(ew->ib_eqe); + u8 port = eqe->event.port_mgmt_change.port; + u32 changed_attr; + u32 tbl_block; + u32 change_bitmap; + + switch (eqe->subtype) { + case MLX4_DEV_PMC_SUBTYPE_PORT_INFO: + changed_attr = be32_to_cpu(eqe->event.port_mgmt_change.params.port_info.changed_attr); + + /* Update the SM ah - This should be done before handling + the other changed attributes so that MADs can be sent to the SM */ + if (changed_attr & MSTR_SM_CHANGE_MASK) { + u16 lid = be16_to_cpu(eqe->event.port_mgmt_change.params.port_info.mstr_sm_lid); + u8 sl = eqe->event.port_mgmt_change.params.port_info.mstr_sm_sl & 0xf; + update_sm_ah(dev, port, lid, sl); + } + + /* Check if it is a lid change event */ + if (changed_attr & MLX4_EQ_PORT_INFO_LID_CHANGE_MASK) + handle_lid_change_event(dev, port); + + /* Generate GUID changed event */ + if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) { + mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE); + /*if master, notify all slaves*/ + if (mlx4_is_master(dev->dev)) + mlx4_gen_slaves_port_mgt_ev(dev->dev, port, + MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK); + } + + if (changed_attr & MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK) + handle_client_rereg_event(dev, port); + break; + + case MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE: + mlx4_ib_dispatch_event(dev, port, IB_EVENT_PKEY_CHANGE); + if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down) + propagate_pkey_ev(dev, port, eqe); + break; + case MLX4_DEV_PMC_SUBTYPE_GUID_INFO: + /* paravirtualized master's guid is guid 0 -- does not change */ + if (!mlx4_is_master(dev->dev)) + mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE); + /*if master, notify relevant slaves*/ + else if (!dev->sriov.is_going_down) { + tbl_block = GET_BLK_PTR_FROM_EQE(eqe); + change_bitmap = GET_MASK_FROM_EQE(eqe); + handle_slaves_guid_change(dev, port, tbl_block, change_bitmap); + } + break; + default: + pr_warn("Unsupported subtype 0x%x for " + "Port Management Change event\n", eqe->subtype); + } + + kfree(ew); +} + +void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num, + enum ib_event_type type) +{ + struct ib_event event; + + event.device = &dev->ib_dev; + event.element.port_num = port_num; + event.event = type; + + ib_dispatch_event(&event); +} + +static void mlx4_ib_tunnel_comp_handler(struct ib_cq *cq, void *arg) +{ + unsigned long flags; + struct mlx4_ib_demux_pv_ctx *ctx = cq->cq_context; + struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev); + spin_lock_irqsave(&dev->sriov.going_down_lock, flags); + if (!dev->sriov.is_going_down && ctx->state == DEMUX_PV_STATE_ACTIVE) + queue_work(ctx->wq, &ctx->work); + spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); +} + +static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx, + struct mlx4_ib_demux_pv_qp *tun_qp, + int index) +{ + struct ib_sge sg_list; + struct ib_recv_wr recv_wr, *bad_recv_wr; + int size; + + size = (tun_qp->qp->qp_type == IB_QPT_UD) ? + sizeof (struct mlx4_tunnel_mad) : sizeof (struct mlx4_mad_rcv_buf); + + sg_list.addr = tun_qp->ring[index].map; + sg_list.length = size; + sg_list.lkey = ctx->mr->lkey; + + recv_wr.next = NULL; + recv_wr.sg_list = &sg_list; + recv_wr.num_sge = 1; + recv_wr.wr_id = (u64) index | MLX4_TUN_WRID_RECV | + MLX4_TUN_SET_WRID_QPN(tun_qp->proxy_qpt); + ib_dma_sync_single_for_device(ctx->ib_dev, tun_qp->ring[index].map, + size, DMA_FROM_DEVICE); + return ib_post_recv(tun_qp->qp, &recv_wr, &bad_recv_wr); +} + +static int mlx4_ib_multiplex_sa_handler(struct ib_device *ibdev, int port, + int slave, struct ib_sa_mad *sa_mad) +{ + int ret = 0; + + /* dispatch to different sa handlers */ + switch (be16_to_cpu(sa_mad->mad_hdr.attr_id)) { + case IB_SA_ATTR_MC_MEMBER_REC: + ret = mlx4_ib_mcg_multiplex_handler(ibdev, port, slave, sa_mad); + break; + default: + break; + } + return ret; +} + +static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave) +{ + int proxy_start = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave; + + return (qpn >= proxy_start && qpn <= proxy_start + 1); +} + + +int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, + enum ib_qp_type dest_qpt, u16 pkey_index, + u32 remote_qpn, u32 qkey, struct ib_ah_attr *attr, + u8 *s_mac, struct ib_mad *mad) +{ + struct ib_sge list; + struct ib_send_wr wr, *bad_wr; + struct mlx4_ib_demux_pv_ctx *sqp_ctx; + struct mlx4_ib_demux_pv_qp *sqp; + struct mlx4_mad_snd_buf *sqp_mad; + struct ib_ah *ah; + struct ib_qp *send_qp = NULL; + unsigned wire_tx_ix = 0; + int ret = 0; + u16 wire_pkey_ix; + int src_qpnum; + u8 sgid_index; + + + sqp_ctx = dev->sriov.sqps[port-1]; + + /* check if proxy qp created */ + if (!sqp_ctx || sqp_ctx->state != DEMUX_PV_STATE_ACTIVE) + return -EAGAIN; + + if (dest_qpt == IB_QPT_SMI) { + src_qpnum = 0; + sqp = &sqp_ctx->qp[0]; + wire_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0]; + } else { + src_qpnum = 1; + sqp = &sqp_ctx->qp[1]; + wire_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][pkey_index]; + } + + send_qp = sqp->qp; + + /* create ah */ + sgid_index = attr->grh.sgid_index; + attr->grh.sgid_index = 0; + ah = ib_create_ah(sqp_ctx->pd, attr); + if (IS_ERR(ah)) + return -ENOMEM; + attr->grh.sgid_index = sgid_index; + to_mah(ah)->av.ib.gid_index = sgid_index; + /* get rid of force-loopback bit */ + to_mah(ah)->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF); + spin_lock(&sqp->tx_lock); + if (sqp->tx_ix_head - sqp->tx_ix_tail >= + (MLX4_NUM_TUNNEL_BUFS - 1)) + ret = -EAGAIN; + else + wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1); + spin_unlock(&sqp->tx_lock); + if (ret) + goto out; + + sqp_mad = (struct mlx4_mad_snd_buf *) (sqp->tx_ring[wire_tx_ix].buf.addr); + if (sqp->tx_ring[wire_tx_ix].ah) + ib_destroy_ah(sqp->tx_ring[wire_tx_ix].ah); + sqp->tx_ring[wire_tx_ix].ah = ah; + ib_dma_sync_single_for_cpu(&dev->ib_dev, + sqp->tx_ring[wire_tx_ix].buf.map, + sizeof (struct mlx4_mad_snd_buf), + DMA_TO_DEVICE); + + memcpy(&sqp_mad->payload, mad, sizeof *mad); + + ib_dma_sync_single_for_device(&dev->ib_dev, + sqp->tx_ring[wire_tx_ix].buf.map, + sizeof (struct mlx4_mad_snd_buf), + DMA_TO_DEVICE); + + list.addr = sqp->tx_ring[wire_tx_ix].buf.map; + list.length = sizeof (struct mlx4_mad_snd_buf); + list.lkey = sqp_ctx->mr->lkey; + + wr.wr.ud.ah = ah; + wr.wr.ud.port_num = port; + wr.wr.ud.pkey_index = wire_pkey_ix; + wr.wr.ud.remote_qkey = qkey; + wr.wr.ud.remote_qpn = remote_qpn; + wr.next = NULL; + wr.wr_id = ((u64) wire_tx_ix) | MLX4_TUN_SET_WRID_QPN(src_qpnum); + wr.sg_list = &list; + wr.num_sge = 1; + wr.opcode = IB_WR_SEND; + wr.send_flags = IB_SEND_SIGNALED; + if (s_mac) + memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6); + + + ret = ib_post_send(send_qp, &wr, &bad_wr); +out: + if (ret) + ib_destroy_ah(ah); + return ret; +} + +static int get_slave_base_gid_ix(struct mlx4_ib_dev *dev, int slave, int port) +{ + if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND) + return slave; + return mlx4_get_base_gid_ix(dev->dev, slave, port); +} + +static void fill_in_real_sgid_index(struct mlx4_ib_dev *dev, int slave, int port, + struct ib_ah_attr *ah_attr) +{ + if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND) + ah_attr->grh.sgid_index = slave; + else + ah_attr->grh.sgid_index += get_slave_base_gid_ix(dev, slave, port); +} + +static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc *wc) +{ + struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev); + struct mlx4_ib_demux_pv_qp *tun_qp = &ctx->qp[MLX4_TUN_WRID_QPN(wc->wr_id)]; + int wr_ix = wc->wr_id & (MLX4_NUM_TUNNEL_BUFS - 1); + struct mlx4_tunnel_mad *tunnel = tun_qp->ring[wr_ix].addr; + struct mlx4_ib_ah ah; + struct ib_ah_attr ah_attr; + u8 *slave_id; + int slave; + int port; + + /* Get slave that sent this packet */ + if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn || + wc->src_qp >= dev->dev->phys_caps.base_proxy_sqpn + 8 * MLX4_MFUNC_MAX || + (wc->src_qp & 0x1) != ctx->port - 1 || + wc->src_qp & 0x4) { + mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d\n", wc->src_qp); + return; + } + slave = ((wc->src_qp & ~0x7) - dev->dev->phys_caps.base_proxy_sqpn) / 8; + if (slave != ctx->slave) { + mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: " + "belongs to another slave\n", wc->src_qp); + return; + } + + /* Map transaction ID */ + ib_dma_sync_single_for_cpu(ctx->ib_dev, tun_qp->ring[wr_ix].map, + sizeof (struct mlx4_tunnel_mad), + DMA_FROM_DEVICE); + switch (tunnel->mad.mad_hdr.method) { + case IB_MGMT_METHOD_SET: + case IB_MGMT_METHOD_GET: + case IB_MGMT_METHOD_REPORT: + case IB_SA_METHOD_GET_TABLE: + case IB_SA_METHOD_DELETE: + case IB_SA_METHOD_GET_MULTI: + case IB_SA_METHOD_GET_TRACE_TBL: + slave_id = (u8 *) &tunnel->mad.mad_hdr.tid; + if (*slave_id) { + mlx4_ib_warn(ctx->ib_dev, "egress mad has non-null tid msb:%d " + "class:%d slave:%d\n", *slave_id, + tunnel->mad.mad_hdr.mgmt_class, slave); + return; + } else + *slave_id = slave; + default: + /* nothing */; + } + + /* Class-specific handling */ + switch (tunnel->mad.mad_hdr.mgmt_class) { + case IB_MGMT_CLASS_SUBN_LID_ROUTED: + case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: + if (slave != mlx4_master_func_num(dev->dev) && + !mlx4_vf_smi_enabled(dev->dev, slave, ctx->port)) + return; + break; + case IB_MGMT_CLASS_SUBN_ADM: + if (mlx4_ib_multiplex_sa_handler(ctx->ib_dev, ctx->port, slave, + (struct ib_sa_mad *) &tunnel->mad)) + return; + break; + case IB_MGMT_CLASS_CM: + if (mlx4_ib_multiplex_cm_handler(ctx->ib_dev, ctx->port, slave, + (struct ib_mad *) &tunnel->mad)) + return; + break; + case IB_MGMT_CLASS_DEVICE_MGMT: + if (tunnel->mad.mad_hdr.method != IB_MGMT_METHOD_GET && + tunnel->mad.mad_hdr.method != IB_MGMT_METHOD_SET) + return; + break; + default: + /* Drop unsupported classes for slaves in tunnel mode */ + if (slave != mlx4_master_func_num(dev->dev)) { + mlx4_ib_warn(ctx->ib_dev, "dropping unsupported egress mad from class:%d " + "for slave:%d\n", tunnel->mad.mad_hdr.mgmt_class, slave); + return; + } + } + + /* We are using standard ib_core services to send the mad, so generate a + * stadard address handle by decoding the tunnelled mlx4_ah fields */ + memcpy(&ah.av, &tunnel->hdr.av, sizeof (struct mlx4_av)); + ah.ibah.device = ctx->ib_dev; + mlx4_ib_query_ah(&ah.ibah, &ah_attr); + if (ah_attr.ah_flags & IB_AH_GRH) + fill_in_real_sgid_index(dev, slave, ctx->port, &ah_attr); + + port = mlx4_slave_convert_port(dev->dev, slave, ah_attr.port_num); + if (port < 0) + return; + ah_attr.port_num = port; + memcpy(ah_attr.dmac, tunnel->hdr.mac, 6); + ah_attr.vlan_id = be16_to_cpu(tunnel->hdr.vlan); + /* if slave have default vlan use it */ + mlx4_get_slave_default_vlan(dev->dev, ctx->port, slave, + &ah_attr.vlan_id, &ah_attr.sl); + + mlx4_ib_send_to_wire(dev, slave, ctx->port, + is_proxy_qp0(dev, wc->src_qp, slave) ? + IB_QPT_SMI : IB_QPT_GSI, + be16_to_cpu(tunnel->hdr.pkey_index), + be32_to_cpu(tunnel->hdr.remote_qpn), + be32_to_cpu(tunnel->hdr.qkey), + &ah_attr, wc->smac, &tunnel->mad); +} + +static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx, + enum ib_qp_type qp_type, int is_tun) +{ + int i; + struct mlx4_ib_demux_pv_qp *tun_qp; + int rx_buf_size, tx_buf_size; + + if (qp_type > IB_QPT_GSI) + return -EINVAL; + + tun_qp = &ctx->qp[qp_type]; + + tun_qp->ring = kzalloc(sizeof (struct mlx4_ib_buf) * MLX4_NUM_TUNNEL_BUFS, + GFP_KERNEL); + if (!tun_qp->ring) + return -ENOMEM; + + tun_qp->tx_ring = kcalloc(MLX4_NUM_TUNNEL_BUFS, + sizeof (struct mlx4_ib_tun_tx_buf), + GFP_KERNEL); + if (!tun_qp->tx_ring) { + kfree(tun_qp->ring); + tun_qp->ring = NULL; + return -ENOMEM; + } + + if (is_tun) { + rx_buf_size = sizeof (struct mlx4_tunnel_mad); + tx_buf_size = sizeof (struct mlx4_rcv_tunnel_mad); + } else { + rx_buf_size = sizeof (struct mlx4_mad_rcv_buf); + tx_buf_size = sizeof (struct mlx4_mad_snd_buf); + } + + for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { + tun_qp->ring[i].addr = kmalloc(rx_buf_size, GFP_KERNEL); + if (!tun_qp->ring[i].addr) + goto err; + tun_qp->ring[i].map = ib_dma_map_single(ctx->ib_dev, + tun_qp->ring[i].addr, + rx_buf_size, + DMA_FROM_DEVICE); + } + + for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { + tun_qp->tx_ring[i].buf.addr = + kmalloc(tx_buf_size, GFP_KERNEL); + if (!tun_qp->tx_ring[i].buf.addr) + goto tx_err; + tun_qp->tx_ring[i].buf.map = + ib_dma_map_single(ctx->ib_dev, + tun_qp->tx_ring[i].buf.addr, + tx_buf_size, + DMA_TO_DEVICE); + tun_qp->tx_ring[i].ah = NULL; + } + spin_lock_init(&tun_qp->tx_lock); + tun_qp->tx_ix_head = 0; + tun_qp->tx_ix_tail = 0; + tun_qp->proxy_qpt = qp_type; + + return 0; + +tx_err: + while (i > 0) { + --i; + ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map, + tx_buf_size, DMA_TO_DEVICE); + kfree(tun_qp->tx_ring[i].buf.addr); + } + kfree(tun_qp->tx_ring); + tun_qp->tx_ring = NULL; + i = MLX4_NUM_TUNNEL_BUFS; +err: + while (i > 0) { + --i; + ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map, + rx_buf_size, DMA_FROM_DEVICE); + kfree(tun_qp->ring[i].addr); + } + kfree(tun_qp->ring); + tun_qp->ring = NULL; + return -ENOMEM; +} + +static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx, + enum ib_qp_type qp_type, int is_tun) +{ + int i; + struct mlx4_ib_demux_pv_qp *tun_qp; + int rx_buf_size, tx_buf_size; + + if (qp_type > IB_QPT_GSI) + return; + + tun_qp = &ctx->qp[qp_type]; + if (is_tun) { + rx_buf_size = sizeof (struct mlx4_tunnel_mad); + tx_buf_size = sizeof (struct mlx4_rcv_tunnel_mad); + } else { + rx_buf_size = sizeof (struct mlx4_mad_rcv_buf); + tx_buf_size = sizeof (struct mlx4_mad_snd_buf); + } + + + for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { + ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map, + rx_buf_size, DMA_FROM_DEVICE); + kfree(tun_qp->ring[i].addr); + } + + for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { + ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map, + tx_buf_size, DMA_TO_DEVICE); + kfree(tun_qp->tx_ring[i].buf.addr); + if (tun_qp->tx_ring[i].ah) + ib_destroy_ah(tun_qp->tx_ring[i].ah); + } + kfree(tun_qp->tx_ring); + kfree(tun_qp->ring); +} + +static void mlx4_ib_tunnel_comp_worker(struct work_struct *work) +{ + struct mlx4_ib_demux_pv_ctx *ctx; + struct mlx4_ib_demux_pv_qp *tun_qp; + struct ib_wc wc; + int ret; + ctx = container_of(work, struct mlx4_ib_demux_pv_ctx, work); + ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP); + + while (ib_poll_cq(ctx->cq, 1, &wc) == 1) { + tun_qp = &ctx->qp[MLX4_TUN_WRID_QPN(wc.wr_id)]; + if (wc.status == IB_WC_SUCCESS) { + switch (wc.opcode) { + case IB_WC_RECV: + mlx4_ib_multiplex_mad(ctx, &wc); + ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp, + wc.wr_id & + (MLX4_NUM_TUNNEL_BUFS - 1)); + if (ret) + pr_err("Failed reposting tunnel " + "buf:%lld\n", wc.wr_id); + break; + case IB_WC_SEND: + pr_debug("received tunnel send completion:" + "wrid=0x%llx, status=0x%x\n", + wc.wr_id, wc.status); + ib_destroy_ah(tun_qp->tx_ring[wc.wr_id & + (MLX4_NUM_TUNNEL_BUFS - 1)].ah); + tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah + = NULL; + spin_lock(&tun_qp->tx_lock); + tun_qp->tx_ix_tail++; + spin_unlock(&tun_qp->tx_lock); + + break; + default: + break; + } + } else { + pr_debug("mlx4_ib: completion error in tunnel: %d." + " status = %d, wrid = 0x%llx\n", + ctx->slave, wc.status, wc.wr_id); + if (!MLX4_TUN_IS_RECV(wc.wr_id)) { + ib_destroy_ah(tun_qp->tx_ring[wc.wr_id & + (MLX4_NUM_TUNNEL_BUFS - 1)].ah); + tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah + = NULL; + spin_lock(&tun_qp->tx_lock); + tun_qp->tx_ix_tail++; + spin_unlock(&tun_qp->tx_lock); + } + } + } +} + +static void pv_qp_event_handler(struct ib_event *event, void *qp_context) +{ + struct mlx4_ib_demux_pv_ctx *sqp = qp_context; + + /* It's worse than that! He's dead, Jim! */ + pr_err("Fatal error (%d) on a MAD QP on port %d\n", + event->event, sqp->port); +} + +static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx, + enum ib_qp_type qp_type, int create_tun) +{ + int i, ret; + struct mlx4_ib_demux_pv_qp *tun_qp; + struct mlx4_ib_qp_tunnel_init_attr qp_init_attr; + struct ib_qp_attr attr; + int qp_attr_mask_INIT; + + if (qp_type > IB_QPT_GSI) + return -EINVAL; + + tun_qp = &ctx->qp[qp_type]; + + memset(&qp_init_attr, 0, sizeof qp_init_attr); + qp_init_attr.init_attr.send_cq = ctx->cq; + qp_init_attr.init_attr.recv_cq = ctx->cq; + qp_init_attr.init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; + qp_init_attr.init_attr.cap.max_send_wr = MLX4_NUM_TUNNEL_BUFS; + qp_init_attr.init_attr.cap.max_recv_wr = MLX4_NUM_TUNNEL_BUFS; + qp_init_attr.init_attr.cap.max_send_sge = 1; + qp_init_attr.init_attr.cap.max_recv_sge = 1; + if (create_tun) { + qp_init_attr.init_attr.qp_type = IB_QPT_UD; + qp_init_attr.init_attr.create_flags = MLX4_IB_SRIOV_TUNNEL_QP; + qp_init_attr.port = ctx->port; + qp_init_attr.slave = ctx->slave; + qp_init_attr.proxy_qp_type = qp_type; + qp_attr_mask_INIT = IB_QP_STATE | IB_QP_PKEY_INDEX | + IB_QP_QKEY | IB_QP_PORT; + } else { + qp_init_attr.init_attr.qp_type = qp_type; + qp_init_attr.init_attr.create_flags = MLX4_IB_SRIOV_SQP; + qp_attr_mask_INIT = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY; + } + qp_init_attr.init_attr.port_num = ctx->port; + qp_init_attr.init_attr.qp_context = ctx; + qp_init_attr.init_attr.event_handler = pv_qp_event_handler; + tun_qp->qp = ib_create_qp(ctx->pd, &qp_init_attr.init_attr); + if (IS_ERR(tun_qp->qp)) { + ret = PTR_ERR(tun_qp->qp); + tun_qp->qp = NULL; + pr_err("Couldn't create %s QP (%d)\n", + create_tun ? "tunnel" : "special", ret); + return ret; + } + + memset(&attr, 0, sizeof attr); + attr.qp_state = IB_QPS_INIT; + ret = 0; + if (create_tun) + ret = find_slave_port_pkey_ix(to_mdev(ctx->ib_dev), ctx->slave, + ctx->port, IB_DEFAULT_PKEY_FULL, + &attr.pkey_index); + if (ret || !create_tun) + attr.pkey_index = + to_mdev(ctx->ib_dev)->pkeys.virt2phys_pkey[ctx->slave][ctx->port - 1][0]; + attr.qkey = IB_QP1_QKEY; + attr.port_num = ctx->port; + ret = ib_modify_qp(tun_qp->qp, &attr, qp_attr_mask_INIT); + if (ret) { + pr_err("Couldn't change %s qp state to INIT (%d)\n", + create_tun ? "tunnel" : "special", ret); + goto err_qp; + } + attr.qp_state = IB_QPS_RTR; + ret = ib_modify_qp(tun_qp->qp, &attr, IB_QP_STATE); + if (ret) { + pr_err("Couldn't change %s qp state to RTR (%d)\n", + create_tun ? "tunnel" : "special", ret); + goto err_qp; + } + attr.qp_state = IB_QPS_RTS; + attr.sq_psn = 0; + ret = ib_modify_qp(tun_qp->qp, &attr, IB_QP_STATE | IB_QP_SQ_PSN); + if (ret) { + pr_err("Couldn't change %s qp state to RTS (%d)\n", + create_tun ? "tunnel" : "special", ret); + goto err_qp; + } + + for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { + ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp, i); + if (ret) { + pr_err(" mlx4_ib_post_pv_buf error" + " (err = %d, i = %d)\n", ret, i); + goto err_qp; + } + } + return 0; + +err_qp: + ib_destroy_qp(tun_qp->qp); + tun_qp->qp = NULL; + return ret; +} + +/* + * IB MAD completion callback for real SQPs + */ +static void mlx4_ib_sqp_comp_worker(struct work_struct *work) +{ + struct mlx4_ib_demux_pv_ctx *ctx; + struct mlx4_ib_demux_pv_qp *sqp; + struct ib_wc wc; + struct ib_grh *grh; + struct ib_mad *mad; + + ctx = container_of(work, struct mlx4_ib_demux_pv_ctx, work); + ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP); + + while (mlx4_ib_poll_cq(ctx->cq, 1, &wc) == 1) { + sqp = &ctx->qp[MLX4_TUN_WRID_QPN(wc.wr_id)]; + if (wc.status == IB_WC_SUCCESS) { + switch (wc.opcode) { + case IB_WC_SEND: + ib_destroy_ah(sqp->tx_ring[wc.wr_id & + (MLX4_NUM_TUNNEL_BUFS - 1)].ah); + sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah + = NULL; + spin_lock(&sqp->tx_lock); + sqp->tx_ix_tail++; + spin_unlock(&sqp->tx_lock); + break; + case IB_WC_RECV: + mad = (struct ib_mad *) &(((struct mlx4_mad_rcv_buf *) + (sqp->ring[wc.wr_id & + (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->payload); + grh = &(((struct mlx4_mad_rcv_buf *) + (sqp->ring[wc.wr_id & + (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->grh); + mlx4_ib_demux_mad(ctx->ib_dev, ctx->port, &wc, grh, mad); + if (mlx4_ib_post_pv_qp_buf(ctx, sqp, wc.wr_id & + (MLX4_NUM_TUNNEL_BUFS - 1))) + pr_err("Failed reposting SQP " + "buf:%lld\n", wc.wr_id); + break; + default: + BUG_ON(1); + break; + } + } else { + pr_debug("mlx4_ib: completion error in tunnel: %d." + " status = %d, wrid = 0x%llx\n", + ctx->slave, wc.status, wc.wr_id); + if (!MLX4_TUN_IS_RECV(wc.wr_id)) { + ib_destroy_ah(sqp->tx_ring[wc.wr_id & + (MLX4_NUM_TUNNEL_BUFS - 1)].ah); + sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah + = NULL; + spin_lock(&sqp->tx_lock); + sqp->tx_ix_tail++; + spin_unlock(&sqp->tx_lock); + } + } + } +} + +static int alloc_pv_object(struct mlx4_ib_dev *dev, int slave, int port, + struct mlx4_ib_demux_pv_ctx **ret_ctx) +{ + struct mlx4_ib_demux_pv_ctx *ctx; + + *ret_ctx = NULL; + ctx = kzalloc(sizeof (struct mlx4_ib_demux_pv_ctx), GFP_KERNEL); + if (!ctx) { + pr_err("failed allocating pv resource context " + "for port %d, slave %d\n", port, slave); + return -ENOMEM; + } + + ctx->ib_dev = &dev->ib_dev; + ctx->port = port; + ctx->slave = slave; + *ret_ctx = ctx; + return 0; +} + +static void free_pv_object(struct mlx4_ib_dev *dev, int slave, int port) +{ + if (dev->sriov.demux[port - 1].tun[slave]) { + kfree(dev->sriov.demux[port - 1].tun[slave]); + dev->sriov.demux[port - 1].tun[slave] = NULL; + } +} + +static int create_pv_resources(struct ib_device *ibdev, int slave, int port, + int create_tun, struct mlx4_ib_demux_pv_ctx *ctx) +{ + int ret, cq_size; + + if (ctx->state != DEMUX_PV_STATE_DOWN) + return -EEXIST; + + ctx->state = DEMUX_PV_STATE_STARTING; + /* have QP0 only if link layer is IB */ + if (rdma_port_get_link_layer(ibdev, ctx->port) == + IB_LINK_LAYER_INFINIBAND) + ctx->has_smi = 1; + + if (ctx->has_smi) { + ret = mlx4_ib_alloc_pv_bufs(ctx, IB_QPT_SMI, create_tun); + if (ret) { + pr_err("Failed allocating qp0 tunnel bufs (%d)\n", ret); + goto err_out; + } + } + + ret = mlx4_ib_alloc_pv_bufs(ctx, IB_QPT_GSI, create_tun); + if (ret) { + pr_err("Failed allocating qp1 tunnel bufs (%d)\n", ret); + goto err_out_qp0; + } + + cq_size = 2 * MLX4_NUM_TUNNEL_BUFS; + if (ctx->has_smi) + cq_size *= 2; + + ctx->cq = ib_create_cq(ctx->ib_dev, mlx4_ib_tunnel_comp_handler, + NULL, ctx, cq_size, 0); + if (IS_ERR(ctx->cq)) { + ret = PTR_ERR(ctx->cq); + pr_err("Couldn't create tunnel CQ (%d)\n", ret); + goto err_buf; + } + + ctx->pd = ib_alloc_pd(ctx->ib_dev); + if (IS_ERR(ctx->pd)) { + ret = PTR_ERR(ctx->pd); + pr_err("Couldn't create tunnel PD (%d)\n", ret); + goto err_cq; + } + + ctx->mr = ib_get_dma_mr(ctx->pd, IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(ctx->mr)) { + ret = PTR_ERR(ctx->mr); + pr_err("Couldn't get tunnel DMA MR (%d)\n", ret); + goto err_pd; + } + + if (ctx->has_smi) { + ret = create_pv_sqp(ctx, IB_QPT_SMI, create_tun); + if (ret) { + pr_err("Couldn't create %s QP0 (%d)\n", + create_tun ? "tunnel for" : "", ret); + goto err_mr; + } + } + + ret = create_pv_sqp(ctx, IB_QPT_GSI, create_tun); + if (ret) { + pr_err("Couldn't create %s QP1 (%d)\n", + create_tun ? "tunnel for" : "", ret); + goto err_qp0; + } + + if (create_tun) + INIT_WORK(&ctx->work, mlx4_ib_tunnel_comp_worker); + else + INIT_WORK(&ctx->work, mlx4_ib_sqp_comp_worker); + + ctx->wq = to_mdev(ibdev)->sriov.demux[port - 1].wq; + + ret = ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP); + if (ret) { + pr_err("Couldn't arm tunnel cq (%d)\n", ret); + goto err_wq; + } + ctx->state = DEMUX_PV_STATE_ACTIVE; + return 0; + +err_wq: + ctx->wq = NULL; + ib_destroy_qp(ctx->qp[1].qp); + ctx->qp[1].qp = NULL; + + +err_qp0: + if (ctx->has_smi) + ib_destroy_qp(ctx->qp[0].qp); + ctx->qp[0].qp = NULL; + +err_mr: + ib_dereg_mr(ctx->mr); + ctx->mr = NULL; + +err_pd: + ib_dealloc_pd(ctx->pd); + ctx->pd = NULL; + +err_cq: + ib_destroy_cq(ctx->cq); + ctx->cq = NULL; + +err_buf: + mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_GSI, create_tun); + +err_out_qp0: + if (ctx->has_smi) + mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_SMI, create_tun); +err_out: + ctx->state = DEMUX_PV_STATE_DOWN; + return ret; +} + +static void destroy_pv_resources(struct mlx4_ib_dev *dev, int slave, int port, + struct mlx4_ib_demux_pv_ctx *ctx, int flush) +{ + if (!ctx) + return; + if (ctx->state > DEMUX_PV_STATE_DOWN) { + ctx->state = DEMUX_PV_STATE_DOWNING; + if (flush) + flush_workqueue(ctx->wq); + if (ctx->has_smi) { + ib_destroy_qp(ctx->qp[0].qp); + ctx->qp[0].qp = NULL; + mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_SMI, 1); + } + ib_destroy_qp(ctx->qp[1].qp); + ctx->qp[1].qp = NULL; + mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_GSI, 1); + ib_dereg_mr(ctx->mr); + ctx->mr = NULL; + ib_dealloc_pd(ctx->pd); + ctx->pd = NULL; + ib_destroy_cq(ctx->cq); + ctx->cq = NULL; + ctx->state = DEMUX_PV_STATE_DOWN; + } +} + +static int mlx4_ib_tunnels_update(struct mlx4_ib_dev *dev, int slave, + int port, int do_init) +{ + int ret = 0; + + if (!do_init) { + clean_vf_mcast(&dev->sriov.demux[port - 1], slave); + /* for master, destroy real sqp resources */ + if (slave == mlx4_master_func_num(dev->dev)) + destroy_pv_resources(dev, slave, port, + dev->sriov.sqps[port - 1], 1); + /* destroy the tunnel qp resources */ + destroy_pv_resources(dev, slave, port, + dev->sriov.demux[port - 1].tun[slave], 1); + return 0; + } + + /* create the tunnel qp resources */ + ret = create_pv_resources(&dev->ib_dev, slave, port, 1, + dev->sriov.demux[port - 1].tun[slave]); + + /* for master, create the real sqp resources */ + if (!ret && slave == mlx4_master_func_num(dev->dev)) + ret = create_pv_resources(&dev->ib_dev, slave, port, 0, + dev->sriov.sqps[port - 1]); + return ret; +} + +void mlx4_ib_tunnels_update_work(struct work_struct *work) +{ + struct mlx4_ib_demux_work *dmxw; + + dmxw = container_of(work, struct mlx4_ib_demux_work, work); + mlx4_ib_tunnels_update(dmxw->dev, dmxw->slave, (int) dmxw->port, + dmxw->do_init); + kfree(dmxw); + return; +} + +static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev, + struct mlx4_ib_demux_ctx *ctx, + int port) +{ + char name[12]; + int ret = 0; + int i; + + ctx->tun = kcalloc(dev->dev->caps.sqp_demux, + sizeof (struct mlx4_ib_demux_pv_ctx *), GFP_KERNEL); + if (!ctx->tun) + return -ENOMEM; + + ctx->dev = dev; + ctx->port = port; + ctx->ib_dev = &dev->ib_dev; + + for (i = 0; + i < min(dev->dev->caps.sqp_demux, (u16)(dev->dev->num_vfs + 1)); + i++) { + struct mlx4_active_ports actv_ports = + mlx4_get_active_ports(dev->dev, i); + + if (!test_bit(port - 1, actv_ports.ports)) + continue; + + ret = alloc_pv_object(dev, i, port, &ctx->tun[i]); + if (ret) { + ret = -ENOMEM; + goto err_mcg; + } + } + + ret = mlx4_ib_mcg_port_init(ctx); + if (ret) { + pr_err("Failed initializing mcg para-virt (%d)\n", ret); + goto err_mcg; + } + + snprintf(name, sizeof name, "mlx4_ibt%d", port); + ctx->wq = create_singlethread_workqueue(name); + if (!ctx->wq) { + pr_err("Failed to create tunnelling WQ for port %d\n", port); + ret = -ENOMEM; + goto err_wq; + } + + snprintf(name, sizeof name, "mlx4_ibud%d", port); + ctx->ud_wq = create_singlethread_workqueue(name); + if (!ctx->ud_wq) { + pr_err("Failed to create up/down WQ for port %d\n", port); + ret = -ENOMEM; + goto err_udwq; + } + + return 0; + +err_udwq: + destroy_workqueue(ctx->wq); + ctx->wq = NULL; + +err_wq: + mlx4_ib_mcg_port_cleanup(ctx, 1); +err_mcg: + for (i = 0; i < dev->dev->caps.sqp_demux; i++) + free_pv_object(dev, i, port); + kfree(ctx->tun); + ctx->tun = NULL; + return ret; +} + +static void mlx4_ib_free_sqp_ctx(struct mlx4_ib_demux_pv_ctx *sqp_ctx) +{ + if (sqp_ctx->state > DEMUX_PV_STATE_DOWN) { + sqp_ctx->state = DEMUX_PV_STATE_DOWNING; + flush_workqueue(sqp_ctx->wq); + if (sqp_ctx->has_smi) { + ib_destroy_qp(sqp_ctx->qp[0].qp); + sqp_ctx->qp[0].qp = NULL; + mlx4_ib_free_pv_qp_bufs(sqp_ctx, IB_QPT_SMI, 0); + } + ib_destroy_qp(sqp_ctx->qp[1].qp); + sqp_ctx->qp[1].qp = NULL; + mlx4_ib_free_pv_qp_bufs(sqp_ctx, IB_QPT_GSI, 0); + ib_dereg_mr(sqp_ctx->mr); + sqp_ctx->mr = NULL; + ib_dealloc_pd(sqp_ctx->pd); + sqp_ctx->pd = NULL; + ib_destroy_cq(sqp_ctx->cq); + sqp_ctx->cq = NULL; + sqp_ctx->state = DEMUX_PV_STATE_DOWN; + } +} + +static void mlx4_ib_free_demux_ctx(struct mlx4_ib_demux_ctx *ctx) +{ + int i; + if (ctx) { + struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev); + mlx4_ib_mcg_port_cleanup(ctx, 1); + for (i = 0; i < dev->dev->caps.sqp_demux; i++) { + if (!ctx->tun[i]) + continue; + if (ctx->tun[i]->state > DEMUX_PV_STATE_DOWN) + ctx->tun[i]->state = DEMUX_PV_STATE_DOWNING; + } + flush_workqueue(ctx->wq); + for (i = 0; i < dev->dev->caps.sqp_demux; i++) { + destroy_pv_resources(dev, i, ctx->port, ctx->tun[i], 0); + free_pv_object(dev, i, ctx->port); + } + kfree(ctx->tun); + destroy_workqueue(ctx->ud_wq); + destroy_workqueue(ctx->wq); + } +} + +static void mlx4_ib_master_tunnels(struct mlx4_ib_dev *dev, int do_init) +{ + int i; + + if (!mlx4_is_master(dev->dev)) + return; + /* initialize or tear down tunnel QPs for the master */ + for (i = 0; i < dev->dev->caps.num_ports; i++) + mlx4_ib_tunnels_update(dev, mlx4_master_func_num(dev->dev), i + 1, do_init); + return; +} + +int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev) +{ + int i = 0; + int err; + + if (!mlx4_is_mfunc(dev->dev)) + return 0; + + dev->sriov.is_going_down = 0; + spin_lock_init(&dev->sriov.going_down_lock); + mlx4_ib_cm_paravirt_init(dev); + + mlx4_ib_warn(&dev->ib_dev, "multi-function enabled\n"); + + if (mlx4_is_slave(dev->dev)) { + mlx4_ib_warn(&dev->ib_dev, "operating in qp1 tunnel mode\n"); + return 0; + } + + for (i = 0; i < dev->dev->caps.sqp_demux; i++) { + if (i == mlx4_master_func_num(dev->dev)) + mlx4_put_slave_node_guid(dev->dev, i, dev->ib_dev.node_guid); + else + mlx4_put_slave_node_guid(dev->dev, i, mlx4_ib_gen_node_guid()); + } + + err = mlx4_ib_init_alias_guid_service(dev); + if (err) { + mlx4_ib_warn(&dev->ib_dev, "Failed init alias guid process.\n"); + goto paravirt_err; + } + err = mlx4_ib_device_register_sysfs(dev); + if (err) { + mlx4_ib_warn(&dev->ib_dev, "Failed to register sysfs\n"); + goto sysfs_err; + } + + mlx4_ib_warn(&dev->ib_dev, "initializing demux service for %d qp1 clients\n", + dev->dev->caps.sqp_demux); + for (i = 0; i < dev->num_ports; i++) { + union ib_gid gid; + err = __mlx4_ib_query_gid(&dev->ib_dev, i + 1, 0, &gid, 1); + if (err) + goto demux_err; + dev->sriov.demux[i].guid_cache[0] = gid.global.interface_id; + err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1, + &dev->sriov.sqps[i]); + if (err) + goto demux_err; + err = mlx4_ib_alloc_demux_ctx(dev, &dev->sriov.demux[i], i + 1); + if (err) + goto free_pv; + } + mlx4_ib_master_tunnels(dev, 1); + return 0; + +free_pv: + free_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1); +demux_err: + while (--i >= 0) { + free_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1); + mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]); + } + mlx4_ib_device_unregister_sysfs(dev); + +sysfs_err: + mlx4_ib_destroy_alias_guid_service(dev); + +paravirt_err: + mlx4_ib_cm_paravirt_clean(dev, -1); + + return err; +} + +void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev) +{ + int i; + unsigned long flags; + + if (!mlx4_is_mfunc(dev->dev)) + return; + + spin_lock_irqsave(&dev->sriov.going_down_lock, flags); + dev->sriov.is_going_down = 1; + spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); + if (mlx4_is_master(dev->dev)) { + for (i = 0; i < dev->num_ports; i++) { + flush_workqueue(dev->sriov.demux[i].ud_wq); + mlx4_ib_free_sqp_ctx(dev->sriov.sqps[i]); + kfree(dev->sriov.sqps[i]); + dev->sriov.sqps[i] = NULL; + mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]); + } + + mlx4_ib_cm_paravirt_clean(dev, -1); + mlx4_ib_destroy_alias_guid_service(dev); + mlx4_ib_device_unregister_sysfs(dev); + } +} diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index bcf50648fa1..0f7027e7db1 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -32,30 +33,57 @@ #include <linux/module.h> #include <linux/init.h> +#include <linux/slab.h> #include <linux/errno.h> +#include <linux/netdevice.h> +#include <linux/inetdevice.h> +#include <linux/rtnetlink.h> +#include <linux/if_vlan.h> +#include <net/ipv6.h> +#include <net/addrconf.h> #include <rdma/ib_smi.h> #include <rdma/ib_user_verbs.h> +#include <rdma/ib_addr.h> #include <linux/mlx4/driver.h> #include <linux/mlx4/cmd.h> +#include <linux/mlx4/qp.h> #include "mlx4_ib.h" #include "user.h" -#define DRV_NAME "mlx4_ib" -#define DRV_VERSION "1.0" -#define DRV_RELDATE "April 4, 2008" +#define DRV_NAME MLX4_IB_DRV_NAME +#define DRV_VERSION "2.2-1" +#define DRV_RELDATE "Feb 2014" + +#define MLX4_IB_FLOW_MAX_PRIO 0xFFF +#define MLX4_IB_FLOW_QPN_MASK 0xFFFFFF MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(DRV_VERSION); +int mlx4_ib_sm_guid_assign = 1; +module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444); +MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 1)"); + static const char mlx4_ib_version[] = DRV_NAME ": Mellanox ConnectX InfiniBand driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; +struct update_gid_work { + struct work_struct work; + union ib_gid gids[128]; + struct mlx4_ib_dev *dev; + int port; +}; + +static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init); + +static struct workqueue_struct *wq; + static void init_query_mad(struct ib_smp *mad) { mad->base_version = 1; @@ -64,6 +92,33 @@ static void init_query_mad(struct ib_smp *mad) mad->method = IB_MGMT_METHOD_GET; } +static union ib_gid zgid; + +static int check_flow_steering_support(struct mlx4_dev *dev) +{ + int eth_num_ports = 0; + int ib_num_ports = 0; + + int dmfs = dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED; + + if (dmfs) { + int i; + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) + eth_num_ports++; + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) + ib_num_ports++; + dmfs &= (!ib_num_ports || + (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB)) && + (!eth_num_ports || + (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN)); + if (ib_num_ports && mlx4_is_mfunc(dev)) { + pr_warn("Device managed flow steering is unavailable for IB port in multifunction env.\n"); + dmfs = 0; + } + } + return dmfs; +} + static int mlx4_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props) { @@ -80,7 +135,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; - err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, 1, NULL, NULL, in_mad, out_mad); + err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS, + 1, NULL, NULL, in_mad, out_mad); if (err) goto out; @@ -102,40 +158,60 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM) props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; - if (dev->dev->caps.max_gso_sz) + if (dev->dev->caps.max_gso_sz && dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH) props->device_cap_flags |= IB_DEVICE_UD_TSO; + if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY) + props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY; + if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) && + (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) && + (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR)) + props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) + props->device_cap_flags |= IB_DEVICE_XRC; + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW) + props->device_cap_flags |= IB_DEVICE_MEM_WINDOW; + if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) { + if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_WIN_TYPE_2B) + props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B; + else + props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A; + if (dev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) + props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING; + } props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 0xffffff; - props->vendor_part_id = be16_to_cpup((__be16 *) (out_mad->data + 30)); + props->vendor_part_id = dev->dev->pdev->device; props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32)); memcpy(&props->sys_image_guid, out_mad->data + 4, 8); props->max_mr_size = ~0ull; props->page_size_cap = dev->dev->caps.page_size_cap; - props->max_qp = dev->dev->caps.num_qps - dev->dev->caps.reserved_qps; - props->max_qp_wr = dev->dev->caps.max_wqes; + props->max_qp = dev->dev->quotas.qp; + props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE; props->max_sge = min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg); - props->max_cq = dev->dev->caps.num_cqs - dev->dev->caps.reserved_cqs; + props->max_cq = dev->dev->quotas.cq; props->max_cqe = dev->dev->caps.max_cqes; - props->max_mr = dev->dev->caps.num_mpts - dev->dev->caps.reserved_mrws; + props->max_mr = dev->dev->quotas.mpt; props->max_pd = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds; props->max_qp_rd_atom = dev->dev->caps.max_qp_dest_rdma; props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma; props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; - props->max_srq = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs; + props->max_srq = dev->dev->quotas.srq; props->max_srq_wr = dev->dev->caps.max_srq_wqes - 1; props->max_srq_sge = dev->dev->caps.max_srq_sge; + props->max_fast_reg_page_list_len = MLX4_MAX_FAST_REG_PAGES; props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay; props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ? IB_ATOMIC_HCA : IB_ATOMIC_NONE; + props->masked_atomic_cap = props->atomic_cap; props->max_pkeys = dev->dev->caps.pkey_table_len[1]; props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms; props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm; props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * props->max_mcast_grp; - props->max_map_per_fmr = (1 << (32 - ilog2(dev->dev->caps.num_mpts))) - 1; + props->max_map_per_fmr = dev->dev->caps.max_fmr_maps; out: kfree(in_mad); @@ -144,11 +220,22 @@ out: return err; } -static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, - struct ib_port_attr *props) +static enum rdma_link_layer +mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num) +{ + struct mlx4_dev *dev = to_mdev(device)->dev; + + return dev->caps.port_mask[port_num] == MLX4_PORT_TYPE_IB ? + IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET; +} + +static int ib_link_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props, int netw_view) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; + int ext_active_speed; + int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; int err = -ENOMEM; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); @@ -156,16 +243,19 @@ static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, if (!in_mad || !out_mad) goto out; - memset(props, 0, sizeof *props); - init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; in_mad->attr_mod = cpu_to_be32(port); - err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); + if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view) + mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW; + + err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL, + in_mad, out_mad); if (err) goto out; + props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16)); props->lmc = out_mad->data[34] & 0x7; props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18)); @@ -173,7 +263,10 @@ static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, props->state = out_mad->data[32] & 0xf; props->phys_state = out_mad->data[33] >> 4; props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20)); - props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port]; + if (netw_view) + props->gid_tbl_len = out_mad->data[50]; + else + props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port]; props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz; props->pkey_tbl_len = to_mdev(ibdev)->dev->caps.pkey_table_len[port]; props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46)); @@ -186,19 +279,132 @@ static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, props->max_vl_num = out_mad->data[37] >> 4; props->init_type_reply = out_mad->data[41] >> 4; + /* Check if extended speeds (EDR/FDR/...) are supported */ + if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) { + ext_active_speed = out_mad->data[62] >> 4; + + switch (ext_active_speed) { + case 1: + props->active_speed = IB_SPEED_FDR; + break; + case 2: + props->active_speed = IB_SPEED_EDR; + break; + } + } + + /* If reported active speed is QDR, check if is FDR-10 */ + if (props->active_speed == IB_SPEED_QDR) { + init_query_mad(in_mad); + in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO; + in_mad->attr_mod = cpu_to_be32(port); + + err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, + NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + /* Checking LinkSpeedActive for FDR-10 */ + if (out_mad->data[15] & 0x1) + props->active_speed = IB_SPEED_FDR10; + } + + /* Avoid wrong speed value returned by FW if the IB link is down. */ + if (props->state == IB_PORT_DOWN) + props->active_speed = IB_SPEED_SDR; + out: kfree(in_mad); kfree(out_mad); + return err; +} +static u8 state_to_phys_state(enum ib_port_state state) +{ + return state == IB_PORT_ACTIVE ? 5 : 3; +} + +static int eth_link_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props, int netw_view) +{ + + struct mlx4_ib_dev *mdev = to_mdev(ibdev); + struct mlx4_ib_iboe *iboe = &mdev->iboe; + struct net_device *ndev; + enum ib_mtu tmp; + struct mlx4_cmd_mailbox *mailbox; + int err = 0; + + mailbox = mlx4_alloc_cmd_mailbox(mdev->dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0, + MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); + if (err) + goto out; + + props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) ? + IB_WIDTH_4X : IB_WIDTH_1X; + props->active_speed = IB_SPEED_QDR; + props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS; + props->gid_tbl_len = mdev->dev->caps.gid_table_len[port]; + props->max_msg_sz = mdev->dev->caps.max_msg_sz; + props->pkey_tbl_len = 1; + props->max_mtu = IB_MTU_4096; + props->max_vl_num = 2; + props->state = IB_PORT_DOWN; + props->phys_state = state_to_phys_state(props->state); + props->active_mtu = IB_MTU_256; + spin_lock(&iboe->lock); + ndev = iboe->netdevs[port - 1]; + if (!ndev) + goto out_unlock; + + tmp = iboe_get_mtu(ndev->mtu); + props->active_mtu = tmp ? min(props->max_mtu, tmp) : IB_MTU_256; + + props->state = (netif_running(ndev) && netif_carrier_ok(ndev)) ? + IB_PORT_ACTIVE : IB_PORT_DOWN; + props->phys_state = state_to_phys_state(props->state); +out_unlock: + spin_unlock(&iboe->lock); +out: + mlx4_free_cmd_mailbox(mdev->dev, mailbox); return err; } -static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, - union ib_gid *gid) +int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props, int netw_view) +{ + int err; + + memset(props, 0, sizeof *props); + + err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ? + ib_link_query_port(ibdev, port, props, netw_view) : + eth_link_query_port(ibdev, port, props, netw_view); + + return err; +} + +static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props) +{ + /* returns host view */ + return __mlx4_ib_query_port(ibdev, port, props, 0); +} + +int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid, int netw_view) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int err = -ENOMEM; + struct mlx4_ib_dev *dev = to_mdev(ibdev); + int clear = 0; + int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); @@ -209,33 +415,68 @@ static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; in_mad->attr_mod = cpu_to_be32(port); - err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); + if (mlx4_is_mfunc(dev->dev) && netw_view) + mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW; + + err = mlx4_MAD_IFC(dev, mad_ifc_flags, port, NULL, NULL, in_mad, out_mad); if (err) goto out; memcpy(gid->raw, out_mad->data + 8, 8); + if (mlx4_is_mfunc(dev->dev) && !netw_view) { + if (index) { + /* For any index > 0, return the null guid */ + err = 0; + clear = 1; + goto out; + } + } + init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_GUID_INFO; in_mad->attr_mod = cpu_to_be32(index / 8); - err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); + err = mlx4_MAD_IFC(dev, mad_ifc_flags, port, + NULL, NULL, in_mad, out_mad); if (err) goto out; memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8); out: + if (clear) + memset(gid->raw + 8, 0, 8); kfree(in_mad); kfree(out_mad); return err; } -static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, - u16 *pkey) +static int iboe_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid) +{ + struct mlx4_ib_dev *dev = to_mdev(ibdev); + + *gid = dev->iboe.gid_table[port - 1][index]; + + return 0; +} + +static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid) +{ + if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND) + return __mlx4_ib_query_gid(ibdev, port, index, gid, 0); + else + return iboe_query_gid(ibdev, port, index, gid); +} + +int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey, int netw_view) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; + int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; int err = -ENOMEM; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); @@ -247,7 +488,11 @@ static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE; in_mad->attr_mod = cpu_to_be32(index / 32); - err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); + if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view) + mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW; + + err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL, + in_mad, out_mad); if (err) goto out; @@ -259,23 +504,49 @@ out: return err; } +static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) +{ + return __mlx4_ib_query_pkey(ibdev, port, index, pkey, 0); +} + static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask, struct ib_device_modify *props) { + struct mlx4_cmd_mailbox *mailbox; + unsigned long flags; + if (mask & ~IB_DEVICE_MODIFY_NODE_DESC) return -EOPNOTSUPP; - if (mask & IB_DEVICE_MODIFY_NODE_DESC) { - spin_lock(&to_mdev(ibdev)->sm_lock); - memcpy(ibdev->node_desc, props->node_desc, 64); - spin_unlock(&to_mdev(ibdev)->sm_lock); - } + if (!(mask & IB_DEVICE_MODIFY_NODE_DESC)) + return 0; + + if (mlx4_is_slave(to_mdev(ibdev)->dev)) + return -EOPNOTSUPP; + + spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags); + memcpy(ibdev->node_desc, props->node_desc, 64); + spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags); + + /* + * If possible, pass node desc to FW, so it can generate + * a 144 trap. If cmd fails, just ignore. + */ + mailbox = mlx4_alloc_cmd_mailbox(to_mdev(ibdev)->dev); + if (IS_ERR(mailbox)) + return 0; + + memcpy(mailbox->buf, props->node_desc, 64); + mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0, + MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox); return 0; } -static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols, - u32 cap_mask) +static int mlx4_ib_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols, + u32 cap_mask) { struct mlx4_cmd_mailbox *mailbox; int err; @@ -284,8 +555,6 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols, if (IS_ERR(mailbox)) return PTR_ERR(mailbox); - memset(mailbox->buf, 0, 256); - if (dev->dev->flags & MLX4_FLAG_OLD_PORT_CMDS) { *(u8 *) mailbox->buf = !!reset_qkey_viols << 6; ((__be32 *) mailbox->buf)[2] = cpu_to_be32(cap_mask); @@ -295,7 +564,7 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols, } err = mlx4_cmd(dev->dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT, - MLX4_CMD_TIME_CLASS_B); + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); mlx4_free_cmd_mailbox(dev->dev, mailbox); return err; @@ -304,11 +573,20 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols, static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask, struct ib_port_modify *props) { + struct mlx4_ib_dev *mdev = to_mdev(ibdev); + u8 is_eth = mdev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH; struct ib_port_attr attr; u32 cap_mask; int err; - mutex_lock(&to_mdev(ibdev)->cap_mask_mutex); + /* return OK if this is RoCE. CM calls ib_modify_port() regardless + * of whether port link layer is ETH or IB. For ETH ports, qkey + * violations and port capabilities are not meaningful. + */ + if (is_eth) + return 0; + + mutex_lock(&mdev->cap_mask_mutex); err = mlx4_ib_query_port(ibdev, port, &attr); if (err) @@ -317,9 +595,9 @@ static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask, cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) & ~props->clr_port_cap_mask; - err = mlx4_SET_PORT(to_mdev(ibdev), port, - !!(mask & IB_PORT_RESET_QKEY_CNTR), - cap_mask); + err = mlx4_ib_SET_PORT(mdev, port, + !!(mask & IB_PORT_RESET_QKEY_CNTR), + cap_mask); out: mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex); @@ -331,12 +609,24 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev, { struct mlx4_ib_dev *dev = to_mdev(ibdev); struct mlx4_ib_ucontext *context; + struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3; struct mlx4_ib_alloc_ucontext_resp resp; int err; - resp.qp_tab_size = dev->dev->caps.num_qps; - resp.bf_reg_size = dev->dev->caps.bf_reg_size; - resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page; + if (!dev->ib_active) + return ERR_PTR(-EAGAIN); + + if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) { + resp_v3.qp_tab_size = dev->dev->caps.num_qps; + resp_v3.bf_reg_size = dev->dev->caps.bf_reg_size; + resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page; + } else { + resp.dev_caps = dev->dev->caps.userspace_caps; + resp.qp_tab_size = dev->dev->caps.num_qps; + resp.bf_reg_size = dev->dev->caps.bf_reg_size; + resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page; + resp.cqe_size = dev->dev->caps.cqe_size; + } context = kmalloc(sizeof *context, GFP_KERNEL); if (!context) @@ -351,7 +641,11 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev, INIT_LIST_HEAD(&context->db_page_list); mutex_init(&context->db_page_mutex); - err = ib_copy_to_udata(udata, &resp, sizeof resp); + if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) + err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3)); + else + err = ib_copy_to_udata(udata, &resp, sizeof(resp)); + if (err) { mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar); kfree(context); @@ -386,8 +680,7 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) PAGE_SIZE, vma->vm_page_prot)) return -EAGAIN; } else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) { - /* FIXME want pgprot_writecombine() for BlueFlame pages */ - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); if (io_remap_pfn_range(vma, vma->vm_start, to_mucontext(context)->uar.pfn + @@ -435,24 +728,564 @@ static int mlx4_ib_dealloc_pd(struct ib_pd *pd) return 0; } +static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct mlx4_ib_xrcd *xrcd; + int err; + + if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)) + return ERR_PTR(-ENOSYS); + + xrcd = kmalloc(sizeof *xrcd, GFP_KERNEL); + if (!xrcd) + return ERR_PTR(-ENOMEM); + + err = mlx4_xrcd_alloc(to_mdev(ibdev)->dev, &xrcd->xrcdn); + if (err) + goto err1; + + xrcd->pd = ib_alloc_pd(ibdev); + if (IS_ERR(xrcd->pd)) { + err = PTR_ERR(xrcd->pd); + goto err2; + } + + xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, 1, 0); + if (IS_ERR(xrcd->cq)) { + err = PTR_ERR(xrcd->cq); + goto err3; + } + + return &xrcd->ibxrcd; + +err3: + ib_dealloc_pd(xrcd->pd); +err2: + mlx4_xrcd_free(to_mdev(ibdev)->dev, xrcd->xrcdn); +err1: + kfree(xrcd); + return ERR_PTR(err); +} + +static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd) +{ + ib_destroy_cq(to_mxrcd(xrcd)->cq); + ib_dealloc_pd(to_mxrcd(xrcd)->pd); + mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn); + kfree(xrcd); + + return 0; +} + +static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid) +{ + struct mlx4_ib_qp *mqp = to_mqp(ibqp); + struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); + struct mlx4_ib_gid_entry *ge; + + ge = kzalloc(sizeof *ge, GFP_KERNEL); + if (!ge) + return -ENOMEM; + + ge->gid = *gid; + if (mlx4_ib_add_mc(mdev, mqp, gid)) { + ge->port = mqp->port; + ge->added = 1; + } + + mutex_lock(&mqp->mutex); + list_add_tail(&ge->list, &mqp->gid_list); + mutex_unlock(&mqp->mutex); + + return 0; +} + +int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, + union ib_gid *gid) +{ + struct net_device *ndev; + int ret = 0; + + if (!mqp->port) + return 0; + + spin_lock(&mdev->iboe.lock); + ndev = mdev->iboe.netdevs[mqp->port - 1]; + if (ndev) + dev_hold(ndev); + spin_unlock(&mdev->iboe.lock); + + if (ndev) { + ret = 1; + dev_put(ndev); + } + + return ret; +} + +struct mlx4_ib_steering { + struct list_head list; + u64 reg_id; + union ib_gid gid; +}; + +static int parse_flow_attr(struct mlx4_dev *dev, + u32 qp_num, + union ib_flow_spec *ib_spec, + struct _rule_hw *mlx4_spec) +{ + enum mlx4_net_trans_rule_id type; + + switch (ib_spec->type) { + case IB_FLOW_SPEC_ETH: + type = MLX4_NET_TRANS_RULE_ID_ETH; + memcpy(mlx4_spec->eth.dst_mac, ib_spec->eth.val.dst_mac, + ETH_ALEN); + memcpy(mlx4_spec->eth.dst_mac_msk, ib_spec->eth.mask.dst_mac, + ETH_ALEN); + mlx4_spec->eth.vlan_tag = ib_spec->eth.val.vlan_tag; + mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag; + break; + case IB_FLOW_SPEC_IB: + type = MLX4_NET_TRANS_RULE_ID_IB; + mlx4_spec->ib.l3_qpn = + cpu_to_be32(qp_num); + mlx4_spec->ib.qpn_mask = + cpu_to_be32(MLX4_IB_FLOW_QPN_MASK); + break; + + + case IB_FLOW_SPEC_IPV4: + type = MLX4_NET_TRANS_RULE_ID_IPV4; + mlx4_spec->ipv4.src_ip = ib_spec->ipv4.val.src_ip; + mlx4_spec->ipv4.src_ip_msk = ib_spec->ipv4.mask.src_ip; + mlx4_spec->ipv4.dst_ip = ib_spec->ipv4.val.dst_ip; + mlx4_spec->ipv4.dst_ip_msk = ib_spec->ipv4.mask.dst_ip; + break; + + case IB_FLOW_SPEC_TCP: + case IB_FLOW_SPEC_UDP: + type = ib_spec->type == IB_FLOW_SPEC_TCP ? + MLX4_NET_TRANS_RULE_ID_TCP : + MLX4_NET_TRANS_RULE_ID_UDP; + mlx4_spec->tcp_udp.dst_port = ib_spec->tcp_udp.val.dst_port; + mlx4_spec->tcp_udp.dst_port_msk = ib_spec->tcp_udp.mask.dst_port; + mlx4_spec->tcp_udp.src_port = ib_spec->tcp_udp.val.src_port; + mlx4_spec->tcp_udp.src_port_msk = ib_spec->tcp_udp.mask.src_port; + break; + + default: + return -EINVAL; + } + if (mlx4_map_sw_to_hw_steering_id(dev, type) < 0 || + mlx4_hw_rule_sz(dev, type) < 0) + return -EINVAL; + mlx4_spec->id = cpu_to_be16(mlx4_map_sw_to_hw_steering_id(dev, type)); + mlx4_spec->size = mlx4_hw_rule_sz(dev, type) >> 2; + return mlx4_hw_rule_sz(dev, type); +} + +struct default_rules { + __u32 mandatory_fields[IB_FLOW_SPEC_SUPPORT_LAYERS]; + __u32 mandatory_not_fields[IB_FLOW_SPEC_SUPPORT_LAYERS]; + __u32 rules_create_list[IB_FLOW_SPEC_SUPPORT_LAYERS]; + __u8 link_layer; +}; +static const struct default_rules default_table[] = { + { + .mandatory_fields = {IB_FLOW_SPEC_IPV4}, + .mandatory_not_fields = {IB_FLOW_SPEC_ETH}, + .rules_create_list = {IB_FLOW_SPEC_IB}, + .link_layer = IB_LINK_LAYER_INFINIBAND + } +}; + +static int __mlx4_ib_default_rules_match(struct ib_qp *qp, + struct ib_flow_attr *flow_attr) +{ + int i, j, k; + void *ib_flow; + const struct default_rules *pdefault_rules = default_table; + u8 link_layer = rdma_port_get_link_layer(qp->device, flow_attr->port); + + for (i = 0; i < sizeof(default_table)/sizeof(default_table[0]); i++, + pdefault_rules++) { + __u32 field_types[IB_FLOW_SPEC_SUPPORT_LAYERS]; + memset(&field_types, 0, sizeof(field_types)); + + if (link_layer != pdefault_rules->link_layer) + continue; + + ib_flow = flow_attr + 1; + /* we assume the specs are sorted */ + for (j = 0, k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS && + j < flow_attr->num_of_specs; k++) { + union ib_flow_spec *current_flow = + (union ib_flow_spec *)ib_flow; + + /* same layer but different type */ + if (((current_flow->type & IB_FLOW_SPEC_LAYER_MASK) == + (pdefault_rules->mandatory_fields[k] & + IB_FLOW_SPEC_LAYER_MASK)) && + (current_flow->type != + pdefault_rules->mandatory_fields[k])) + goto out; + + /* same layer, try match next one */ + if (current_flow->type == + pdefault_rules->mandatory_fields[k]) { + j++; + ib_flow += + ((union ib_flow_spec *)ib_flow)->size; + } + } + + ib_flow = flow_attr + 1; + for (j = 0; j < flow_attr->num_of_specs; + j++, ib_flow += ((union ib_flow_spec *)ib_flow)->size) + for (k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS; k++) + /* same layer and same type */ + if (((union ib_flow_spec *)ib_flow)->type == + pdefault_rules->mandatory_not_fields[k]) + goto out; + + return i; + } +out: + return -1; +} + +static int __mlx4_ib_create_default_rules( + struct mlx4_ib_dev *mdev, + struct ib_qp *qp, + const struct default_rules *pdefault_rules, + struct _rule_hw *mlx4_spec) { + int size = 0; + int i; + + for (i = 0; i < sizeof(pdefault_rules->rules_create_list)/ + sizeof(pdefault_rules->rules_create_list[0]); i++) { + int ret; + union ib_flow_spec ib_spec; + switch (pdefault_rules->rules_create_list[i]) { + case 0: + /* no rule */ + continue; + case IB_FLOW_SPEC_IB: + ib_spec.type = IB_FLOW_SPEC_IB; + ib_spec.size = sizeof(struct ib_flow_spec_ib); + + break; + default: + /* invalid rule */ + return -EINVAL; + } + /* We must put empty rule, qpn is being ignored */ + ret = parse_flow_attr(mdev->dev, 0, &ib_spec, + mlx4_spec); + if (ret < 0) { + pr_info("invalid parsing\n"); + return -EINVAL; + } + + mlx4_spec = (void *)mlx4_spec + ret; + size += ret; + } + return size; +} + +static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr, + int domain, + enum mlx4_net_trans_promisc_mode flow_type, + u64 *reg_id) +{ + int ret, i; + int size = 0; + void *ib_flow; + struct mlx4_ib_dev *mdev = to_mdev(qp->device); + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_net_trans_rule_hw_ctrl *ctrl; + int default_flow; + + static const u16 __mlx4_domain[] = { + [IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS, + [IB_FLOW_DOMAIN_ETHTOOL] = MLX4_DOMAIN_ETHTOOL, + [IB_FLOW_DOMAIN_RFS] = MLX4_DOMAIN_RFS, + [IB_FLOW_DOMAIN_NIC] = MLX4_DOMAIN_NIC, + }; + + if (flow_attr->priority > MLX4_IB_FLOW_MAX_PRIO) { + pr_err("Invalid priority value %d\n", flow_attr->priority); + return -EINVAL; + } + + if (domain >= IB_FLOW_DOMAIN_NUM) { + pr_err("Invalid domain value %d\n", domain); + return -EINVAL; + } + + if (mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type) < 0) + return -EINVAL; + + mailbox = mlx4_alloc_cmd_mailbox(mdev->dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + ctrl = mailbox->buf; + + ctrl->prio = cpu_to_be16(__mlx4_domain[domain] | + flow_attr->priority); + ctrl->type = mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type); + ctrl->port = flow_attr->port; + ctrl->qpn = cpu_to_be32(qp->qp_num); + + ib_flow = flow_attr + 1; + size += sizeof(struct mlx4_net_trans_rule_hw_ctrl); + /* Add default flows */ + default_flow = __mlx4_ib_default_rules_match(qp, flow_attr); + if (default_flow >= 0) { + ret = __mlx4_ib_create_default_rules( + mdev, qp, default_table + default_flow, + mailbox->buf + size); + if (ret < 0) { + mlx4_free_cmd_mailbox(mdev->dev, mailbox); + return -EINVAL; + } + size += ret; + } + for (i = 0; i < flow_attr->num_of_specs; i++) { + ret = parse_flow_attr(mdev->dev, qp->qp_num, ib_flow, + mailbox->buf + size); + if (ret < 0) { + mlx4_free_cmd_mailbox(mdev->dev, mailbox); + return -EINVAL; + } + ib_flow += ((union ib_flow_spec *) ib_flow)->size; + size += ret; + } + + ret = mlx4_cmd_imm(mdev->dev, mailbox->dma, reg_id, size >> 2, 0, + MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (ret == -ENOMEM) + pr_err("mcg table is full. Fail to register network rule.\n"); + else if (ret == -ENXIO) + pr_err("Device managed flow steering is disabled. Fail to register network rule.\n"); + else if (ret) + pr_err("Invalid argumant. Fail to register network rule.\n"); + + mlx4_free_cmd_mailbox(mdev->dev, mailbox); + return ret; +} + +static int __mlx4_ib_destroy_flow(struct mlx4_dev *dev, u64 reg_id) +{ + int err; + err = mlx4_cmd(dev, reg_id, 0, 0, + MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (err) + pr_err("Fail to detach network rule. registration id = 0x%llx\n", + reg_id); + return err; +} + +static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, + struct ib_flow_attr *flow_attr, + int domain) +{ + int err = 0, i = 0; + struct mlx4_ib_flow *mflow; + enum mlx4_net_trans_promisc_mode type[2]; + + memset(type, 0, sizeof(type)); + + mflow = kzalloc(sizeof(*mflow), GFP_KERNEL); + if (!mflow) { + err = -ENOMEM; + goto err_free; + } + + switch (flow_attr->type) { + case IB_FLOW_ATTR_NORMAL: + type[0] = MLX4_FS_REGULAR; + break; + + case IB_FLOW_ATTR_ALL_DEFAULT: + type[0] = MLX4_FS_ALL_DEFAULT; + break; + + case IB_FLOW_ATTR_MC_DEFAULT: + type[0] = MLX4_FS_MC_DEFAULT; + break; + + case IB_FLOW_ATTR_SNIFFER: + type[0] = MLX4_FS_UC_SNIFFER; + type[1] = MLX4_FS_MC_SNIFFER; + break; + + default: + err = -EINVAL; + goto err_free; + } + + while (i < ARRAY_SIZE(type) && type[i]) { + err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i], + &mflow->reg_id[i]); + if (err) + goto err_free; + i++; + } + + return &mflow->ibflow; + +err_free: + kfree(mflow); + return ERR_PTR(err); +} + +static int mlx4_ib_destroy_flow(struct ib_flow *flow_id) +{ + int err, ret = 0; + int i = 0; + struct mlx4_ib_dev *mdev = to_mdev(flow_id->qp->device); + struct mlx4_ib_flow *mflow = to_mflow(flow_id); + + while (i < ARRAY_SIZE(mflow->reg_id) && mflow->reg_id[i]) { + err = __mlx4_ib_destroy_flow(mdev->dev, mflow->reg_id[i]); + if (err) + ret = err; + i++; + } + + kfree(mflow); + return ret; +} + static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { - return mlx4_multicast_attach(to_mdev(ibqp->device)->dev, - &to_mqp(ibqp)->mqp, gid->raw, - !!(to_mqp(ibqp)->flags & - MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK)); + int err; + struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); + struct mlx4_ib_qp *mqp = to_mqp(ibqp); + u64 reg_id; + struct mlx4_ib_steering *ib_steering = NULL; + enum mlx4_protocol prot = (gid->raw[1] == 0x0e) ? + MLX4_PROT_IB_IPV4 : MLX4_PROT_IB_IPV6; + + if (mdev->dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) { + ib_steering = kmalloc(sizeof(*ib_steering), GFP_KERNEL); + if (!ib_steering) + return -ENOMEM; + } + + err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port, + !!(mqp->flags & + MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK), + prot, ®_id); + if (err) + goto err_malloc; + + err = add_gid_entry(ibqp, gid); + if (err) + goto err_add; + + if (ib_steering) { + memcpy(ib_steering->gid.raw, gid->raw, 16); + ib_steering->reg_id = reg_id; + mutex_lock(&mqp->mutex); + list_add(&ib_steering->list, &mqp->steering_rules); + mutex_unlock(&mqp->mutex); + } + return 0; + +err_add: + mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, + prot, reg_id); +err_malloc: + kfree(ib_steering); + + return err; +} + +static struct mlx4_ib_gid_entry *find_gid_entry(struct mlx4_ib_qp *qp, u8 *raw) +{ + struct mlx4_ib_gid_entry *ge; + struct mlx4_ib_gid_entry *tmp; + struct mlx4_ib_gid_entry *ret = NULL; + + list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) { + if (!memcmp(raw, ge->gid.raw, 16)) { + ret = ge; + break; + } + } + + return ret; } static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { - return mlx4_multicast_detach(to_mdev(ibqp->device)->dev, - &to_mqp(ibqp)->mqp, gid->raw); + int err; + struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); + struct mlx4_ib_qp *mqp = to_mqp(ibqp); + struct net_device *ndev; + struct mlx4_ib_gid_entry *ge; + u64 reg_id = 0; + enum mlx4_protocol prot = (gid->raw[1] == 0x0e) ? + MLX4_PROT_IB_IPV4 : MLX4_PROT_IB_IPV6; + + if (mdev->dev->caps.steering_mode == + MLX4_STEERING_MODE_DEVICE_MANAGED) { + struct mlx4_ib_steering *ib_steering; + + mutex_lock(&mqp->mutex); + list_for_each_entry(ib_steering, &mqp->steering_rules, list) { + if (!memcmp(ib_steering->gid.raw, gid->raw, 16)) { + list_del(&ib_steering->list); + break; + } + } + mutex_unlock(&mqp->mutex); + if (&ib_steering->list == &mqp->steering_rules) { + pr_err("Couldn't find reg_id for mgid. Steering rule is left attached\n"); + return -EINVAL; + } + reg_id = ib_steering->reg_id; + kfree(ib_steering); + } + + err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, + prot, reg_id); + if (err) + return err; + + mutex_lock(&mqp->mutex); + ge = find_gid_entry(mqp, gid->raw); + if (ge) { + spin_lock(&mdev->iboe.lock); + ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL; + if (ndev) + dev_hold(ndev); + spin_unlock(&mdev->iboe.lock); + if (ndev) + dev_put(ndev); + list_del(&ge->list); + kfree(ge); + } else + pr_warn("could not find mgid entry\n"); + + mutex_unlock(&mqp->mutex); + + return 0; } static int init_node_data(struct mlx4_ib_dev *dev) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; + int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; int err = -ENOMEM; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); @@ -462,8 +1295,10 @@ static int init_node_data(struct mlx4_ib_dev *dev) init_query_mad(in_mad); in_mad->attr_id = IB_SMP_ATTR_NODE_DESC; + if (mlx4_is_master(dev->dev)) + mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW; - err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad); + err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad); if (err) goto out; @@ -471,7 +1306,7 @@ static int init_node_data(struct mlx4_ib_dev *dev) in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; - err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad); + err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad); if (err) goto out; @@ -531,31 +1366,620 @@ static struct device_attribute *mlx4_class_attributes[] = { &dev_attr_board_id }; -static void *mlx4_ib_add(struct mlx4_dev *dev) +static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, + struct net_device *dev) { - static int mlx4_ib_version_printed; + memcpy(eui, dev->dev_addr, 3); + memcpy(eui + 5, dev->dev_addr + 3, 3); + if (vlan_id < 0x1000) { + eui[3] = vlan_id >> 8; + eui[4] = vlan_id & 0xff; + } else { + eui[3] = 0xff; + eui[4] = 0xfe; + } + eui[0] ^= 2; +} + +static void update_gids_task(struct work_struct *work) +{ + struct update_gid_work *gw = container_of(work, struct update_gid_work, work); + struct mlx4_cmd_mailbox *mailbox; + union ib_gid *gids; + int err; + struct mlx4_dev *dev = gw->dev->dev; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) { + pr_warn("update gid table failed %ld\n", PTR_ERR(mailbox)); + return; + } + + gids = mailbox->buf; + memcpy(gids, gw->gids, sizeof gw->gids); + + err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port, + 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); + if (err) + pr_warn("set port command failed\n"); + else + mlx4_ib_dispatch_event(gw->dev, gw->port, IB_EVENT_GID_CHANGE); + + mlx4_free_cmd_mailbox(dev, mailbox); + kfree(gw); +} + +static void reset_gids_task(struct work_struct *work) +{ + struct update_gid_work *gw = + container_of(work, struct update_gid_work, work); + struct mlx4_cmd_mailbox *mailbox; + union ib_gid *gids; + int err; + struct mlx4_dev *dev = gw->dev->dev; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) { + pr_warn("reset gid table failed\n"); + goto free; + } + + gids = mailbox->buf; + memcpy(gids, gw->gids, sizeof(gw->gids)); + + if (mlx4_ib_port_link_layer(&gw->dev->ib_dev, gw->port) == + IB_LINK_LAYER_ETHERNET) { + err = mlx4_cmd(dev, mailbox->dma, + MLX4_SET_PORT_GID_TABLE << 8 | gw->port, + 1, MLX4_CMD_SET_PORT, + MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); + if (err) + pr_warn(KERN_WARNING + "set port %d command failed\n", gw->port); + } + + mlx4_free_cmd_mailbox(dev, mailbox); +free: + kfree(gw); +} + +static int update_gid_table(struct mlx4_ib_dev *dev, int port, + union ib_gid *gid, int clear, + int default_gid) +{ + struct update_gid_work *work; + int i; + int need_update = 0; + int free = -1; + int found = -1; + int max_gids; + + if (default_gid) { + free = 0; + } else { + max_gids = dev->dev->caps.gid_table_len[port]; + for (i = 1; i < max_gids; ++i) { + if (!memcmp(&dev->iboe.gid_table[port - 1][i], gid, + sizeof(*gid))) + found = i; + + if (clear) { + if (found >= 0) { + need_update = 1; + dev->iboe.gid_table[port - 1][found] = + zgid; + break; + } + } else { + if (found >= 0) + break; + + if (free < 0 && + !memcmp(&dev->iboe.gid_table[port - 1][i], + &zgid, sizeof(*gid))) + free = i; + } + } + } + + if (found == -1 && !clear && free >= 0) { + dev->iboe.gid_table[port - 1][free] = *gid; + need_update = 1; + } + + if (!need_update) + return 0; + + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return -ENOMEM; + + memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof(work->gids)); + INIT_WORK(&work->work, update_gids_task); + work->port = port; + work->dev = dev; + queue_work(wq, &work->work); + + return 0; +} + +static void mlx4_make_default_gid(struct net_device *dev, union ib_gid *gid) +{ + gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); + mlx4_addrconf_ifid_eui48(&gid->raw[8], 0xffff, dev); +} + + +static int reset_gid_table(struct mlx4_ib_dev *dev, u8 port) +{ + struct update_gid_work *work; + + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return -ENOMEM; + + memset(dev->iboe.gid_table[port - 1], 0, sizeof(work->gids)); + memset(work->gids, 0, sizeof(work->gids)); + INIT_WORK(&work->work, reset_gids_task); + work->dev = dev; + work->port = port; + queue_work(wq, &work->work); + return 0; +} + +static int mlx4_ib_addr_event(int event, struct net_device *event_netdev, + struct mlx4_ib_dev *ibdev, union ib_gid *gid) +{ + struct mlx4_ib_iboe *iboe; + int port = 0; + struct net_device *real_dev = rdma_vlan_dev_real_dev(event_netdev) ? + rdma_vlan_dev_real_dev(event_netdev) : + event_netdev; + union ib_gid default_gid; + + mlx4_make_default_gid(real_dev, &default_gid); + + if (!memcmp(gid, &default_gid, sizeof(*gid))) + return 0; + + if (event != NETDEV_DOWN && event != NETDEV_UP) + return 0; + + if ((real_dev != event_netdev) && + (event == NETDEV_DOWN) && + rdma_link_local_addr((struct in6_addr *)gid)) + return 0; + + iboe = &ibdev->iboe; + spin_lock(&iboe->lock); + + for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) + if ((netif_is_bond_master(real_dev) && + (real_dev == iboe->masters[port - 1])) || + (!netif_is_bond_master(real_dev) && + (real_dev == iboe->netdevs[port - 1]))) + update_gid_table(ibdev, port, gid, + event == NETDEV_DOWN, 0); + + spin_unlock(&iboe->lock); + return 0; + +} + +static u8 mlx4_ib_get_dev_port(struct net_device *dev, + struct mlx4_ib_dev *ibdev) +{ + u8 port = 0; + struct mlx4_ib_iboe *iboe; + struct net_device *real_dev = rdma_vlan_dev_real_dev(dev) ? + rdma_vlan_dev_real_dev(dev) : dev; + + iboe = &ibdev->iboe; + + for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) + if ((netif_is_bond_master(real_dev) && + (real_dev == iboe->masters[port - 1])) || + (!netif_is_bond_master(real_dev) && + (real_dev == iboe->netdevs[port - 1]))) + break; + + if ((port == 0) || (port > ibdev->dev->caps.num_ports)) + return 0; + else + return port; +} + +static int mlx4_ib_inet_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct mlx4_ib_dev *ibdev; + struct in_ifaddr *ifa = ptr; + union ib_gid gid; + struct net_device *event_netdev = ifa->ifa_dev->dev; + + ipv6_addr_set_v4mapped(ifa->ifa_address, (struct in6_addr *)&gid); + + ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb_inet); + + mlx4_ib_addr_event(event, event_netdev, ibdev, &gid); + return NOTIFY_DONE; +} + +#if IS_ENABLED(CONFIG_IPV6) +static int mlx4_ib_inet6_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct mlx4_ib_dev *ibdev; + struct inet6_ifaddr *ifa = ptr; + union ib_gid *gid = (union ib_gid *)&ifa->addr; + struct net_device *event_netdev = ifa->idev->dev; + + ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb_inet6); + + mlx4_ib_addr_event(event, event_netdev, ibdev, gid); + return NOTIFY_DONE; +} +#endif + +#define MLX4_IB_INVALID_MAC ((u64)-1) +static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev, + struct net_device *dev, + int port) +{ + u64 new_smac = 0; + u64 release_mac = MLX4_IB_INVALID_MAC; + struct mlx4_ib_qp *qp; + + read_lock(&dev_base_lock); + new_smac = mlx4_mac_to_u64(dev->dev_addr); + read_unlock(&dev_base_lock); + + mutex_lock(&ibdev->qp1_proxy_lock[port - 1]); + qp = ibdev->qp1_proxy[port - 1]; + if (qp) { + int new_smac_index; + u64 old_smac = qp->pri.smac; + struct mlx4_update_qp_params update_params; + + if (new_smac == old_smac) + goto unlock; + + new_smac_index = mlx4_register_mac(ibdev->dev, port, new_smac); + + if (new_smac_index < 0) + goto unlock; + + update_params.smac_index = new_smac_index; + if (mlx4_update_qp(ibdev->dev, &qp->mqp, MLX4_UPDATE_QP_SMAC, + &update_params)) { + release_mac = new_smac; + goto unlock; + } + + qp->pri.smac = new_smac; + qp->pri.smac_index = new_smac_index; + + release_mac = old_smac; + } + +unlock: + mutex_unlock(&ibdev->qp1_proxy_lock[port - 1]); + if (release_mac != MLX4_IB_INVALID_MAC) + mlx4_unregister_mac(ibdev->dev, port, release_mac); +} + +static void mlx4_ib_get_dev_addr(struct net_device *dev, + struct mlx4_ib_dev *ibdev, u8 port) +{ + struct in_device *in_dev; +#if IS_ENABLED(CONFIG_IPV6) + struct inet6_dev *in6_dev; + union ib_gid *pgid; + struct inet6_ifaddr *ifp; +#endif + union ib_gid gid; + + + if ((port == 0) || (port > ibdev->dev->caps.num_ports)) + return; + + /* IPv4 gids */ + in_dev = in_dev_get(dev); + if (in_dev) { + for_ifa(in_dev) { + /*ifa->ifa_address;*/ + ipv6_addr_set_v4mapped(ifa->ifa_address, + (struct in6_addr *)&gid); + update_gid_table(ibdev, port, &gid, 0, 0); + } + endfor_ifa(in_dev); + in_dev_put(in_dev); + } +#if IS_ENABLED(CONFIG_IPV6) + /* IPv6 gids */ + in6_dev = in6_dev_get(dev); + if (in6_dev) { + read_lock_bh(&in6_dev->lock); + list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { + pgid = (union ib_gid *)&ifp->addr; + update_gid_table(ibdev, port, pgid, 0, 0); + } + read_unlock_bh(&in6_dev->lock); + in6_dev_put(in6_dev); + } +#endif +} + +static void mlx4_ib_set_default_gid(struct mlx4_ib_dev *ibdev, + struct net_device *dev, u8 port) +{ + union ib_gid gid; + mlx4_make_default_gid(dev, &gid); + update_gid_table(ibdev, port, &gid, 0, 1); +} + +static int mlx4_ib_init_gid_table(struct mlx4_ib_dev *ibdev) +{ + struct net_device *dev; + struct mlx4_ib_iboe *iboe = &ibdev->iboe; + int i; + + for (i = 1; i <= ibdev->num_ports; ++i) + if (reset_gid_table(ibdev, i)) + return -1; + + read_lock(&dev_base_lock); + spin_lock(&iboe->lock); + + for_each_netdev(&init_net, dev) { + u8 port = mlx4_ib_get_dev_port(dev, ibdev); + if (port) + mlx4_ib_get_dev_addr(dev, ibdev, port); + } + + spin_unlock(&iboe->lock); + read_unlock(&dev_base_lock); + + return 0; +} + +static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev, + struct net_device *dev, + unsigned long event) + +{ + struct mlx4_ib_iboe *iboe; + int update_qps_port = -1; + int port; + + iboe = &ibdev->iboe; + + spin_lock(&iboe->lock); + mlx4_foreach_ib_transport_port(port, ibdev->dev) { + enum ib_port_state port_state = IB_PORT_NOP; + struct net_device *old_master = iboe->masters[port - 1]; + struct net_device *curr_netdev; + struct net_device *curr_master; + + iboe->netdevs[port - 1] = + mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port); + if (iboe->netdevs[port - 1]) + mlx4_ib_set_default_gid(ibdev, + iboe->netdevs[port - 1], port); + curr_netdev = iboe->netdevs[port - 1]; + + if (iboe->netdevs[port - 1] && + netif_is_bond_slave(iboe->netdevs[port - 1])) { + iboe->masters[port - 1] = netdev_master_upper_dev_get( + iboe->netdevs[port - 1]); + } else { + iboe->masters[port - 1] = NULL; + } + curr_master = iboe->masters[port - 1]; + + if (dev == iboe->netdevs[port - 1] && + (event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER || + event == NETDEV_UP || event == NETDEV_CHANGE)) + update_qps_port = port; + + if (curr_netdev) { + port_state = (netif_running(curr_netdev) && netif_carrier_ok(curr_netdev)) ? + IB_PORT_ACTIVE : IB_PORT_DOWN; + mlx4_ib_set_default_gid(ibdev, curr_netdev, port); + } else { + reset_gid_table(ibdev, port); + } + /* if using bonding/team and a slave port is down, we don't the bond IP + * based gids in the table since flows that select port by gid may get + * the down port. + */ + if (curr_master && (port_state == IB_PORT_DOWN)) { + reset_gid_table(ibdev, port); + mlx4_ib_set_default_gid(ibdev, curr_netdev, port); + } + /* if bonding is used it is possible that we add it to masters + * only after IP address is assigned to the net bonding + * interface. + */ + if (curr_master && (old_master != curr_master)) { + reset_gid_table(ibdev, port); + mlx4_ib_set_default_gid(ibdev, curr_netdev, port); + mlx4_ib_get_dev_addr(curr_master, ibdev, port); + } + + if (!curr_master && (old_master != curr_master)) { + reset_gid_table(ibdev, port); + mlx4_ib_set_default_gid(ibdev, curr_netdev, port); + mlx4_ib_get_dev_addr(curr_netdev, ibdev, port); + } + } + + spin_unlock(&iboe->lock); + + if (update_qps_port > 0) + mlx4_ib_update_qps(ibdev, dev, update_qps_port); +} + +static int mlx4_ib_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct mlx4_ib_dev *ibdev; + + if (!net_eq(dev_net(dev), &init_net)) + return NOTIFY_DONE; + + ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb); + mlx4_ib_scan_netdevs(ibdev, dev, event); + + return NOTIFY_DONE; +} + +static void init_pkeys(struct mlx4_ib_dev *ibdev) +{ + int port; + int slave; int i; + if (mlx4_is_master(ibdev->dev)) { + for (slave = 0; slave <= ibdev->dev->num_vfs; ++slave) { + for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) { + for (i = 0; + i < ibdev->dev->phys_caps.pkey_phys_table_len[port]; + ++i) { + ibdev->pkeys.virt2phys_pkey[slave][port - 1][i] = + /* master has the identity virt2phys pkey mapping */ + (slave == mlx4_master_func_num(ibdev->dev) || !i) ? i : + ibdev->dev->phys_caps.pkey_phys_table_len[port] - 1; + mlx4_sync_pkey_table(ibdev->dev, slave, port, i, + ibdev->pkeys.virt2phys_pkey[slave][port - 1][i]); + } + } + } + /* initialize pkey cache */ + for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) { + for (i = 0; + i < ibdev->dev->phys_caps.pkey_phys_table_len[port]; + ++i) + ibdev->pkeys.phys_pkey_cache[port-1][i] = + (i) ? 0 : 0xFFFF; + } + } +} + +static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) +{ + char name[80]; + int eq_per_port = 0; + int added_eqs = 0; + int total_eqs = 0; + int i, j, eq; + + /* Legacy mode or comp_pool is not large enough */ + if (dev->caps.comp_pool == 0 || + dev->caps.num_ports > dev->caps.comp_pool) + return; + + eq_per_port = rounddown_pow_of_two(dev->caps.comp_pool/ + dev->caps.num_ports); + + /* Init eq table */ + added_eqs = 0; + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) + added_eqs += eq_per_port; + + total_eqs = dev->caps.num_comp_vectors + added_eqs; + + ibdev->eq_table = kzalloc(total_eqs * sizeof(int), GFP_KERNEL); + if (!ibdev->eq_table) + return; - if (!mlx4_ib_version_printed) { - printk(KERN_INFO "%s", mlx4_ib_version); - ++mlx4_ib_version_printed; + ibdev->eq_added = added_eqs; + + eq = 0; + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) { + for (j = 0; j < eq_per_port; j++) { + snprintf(name, sizeof(name), "mlx4-ib-%d-%d@%s", + i, j, dev->pdev->bus->name); + /* Set IRQ for specific name (per ring) */ + if (mlx4_assign_eq(dev, name, NULL, + &ibdev->eq_table[eq])) { + /* Use legacy (same as mlx4_en driver) */ + pr_warn("Can't allocate EQ %d; reverting to legacy\n", eq); + ibdev->eq_table[eq] = + (eq % dev->caps.num_comp_vectors); + } + eq++; + } } + /* Fill the reset of the vector with legacy EQ */ + for (i = 0, eq = added_eqs; i < dev->caps.num_comp_vectors; i++) + ibdev->eq_table[eq++] = i; + + /* Advertise the new number of EQs to clients */ + ibdev->ib_dev.num_comp_vectors = total_eqs; +} + +static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) +{ + int i; + + /* no additional eqs were added */ + if (!ibdev->eq_table) + return; + + /* Reset the advertised EQ number */ + ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; + + /* Free only the added eqs */ + for (i = 0; i < ibdev->eq_added; i++) { + /* Don't free legacy eqs if used */ + if (ibdev->eq_table[i] <= dev->caps.num_comp_vectors) + continue; + mlx4_release_eq(dev, ibdev->eq_table[i]); + } + + kfree(ibdev->eq_table); +} + +static void *mlx4_ib_add(struct mlx4_dev *dev) +{ + struct mlx4_ib_dev *ibdev; + int num_ports = 0; + int i, j; + int err; + struct mlx4_ib_iboe *iboe; + int ib_num_ports = 0; + + pr_info_once("%s", mlx4_ib_version); + + num_ports = 0; + mlx4_foreach_ib_transport_port(i, dev) + num_ports++; + + /* No point in registering a device with no ports... */ + if (num_ports == 0) + return NULL; + ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev); if (!ibdev) { dev_err(&dev->pdev->dev, "Device struct alloc failed\n"); return NULL; } + iboe = &ibdev->iboe; + if (mlx4_pd_alloc(dev, &ibdev->priv_pdn)) goto err_dealloc; if (mlx4_uar_alloc(dev, &ibdev->priv_uar)) goto err_pd; - ibdev->uar_map = ioremap(ibdev->priv_uar.pfn << PAGE_SHIFT, PAGE_SIZE); + ibdev->uar_map = ioremap((phys_addr_t) ibdev->priv_uar.pfn << PAGE_SHIFT, + PAGE_SIZE); if (!ibdev->uar_map) goto err_uar; MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock); @@ -565,11 +1989,17 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX); ibdev->ib_dev.owner = THIS_MODULE; ibdev->ib_dev.node_type = RDMA_NODE_IB_CA; - ibdev->ib_dev.phys_port_cnt = dev->caps.num_ports; - ibdev->ib_dev.num_comp_vectors = 1; + ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey; + ibdev->num_ports = num_ports; + ibdev->ib_dev.phys_port_cnt = ibdev->num_ports; + ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; ibdev->ib_dev.dma_device = &dev->pdev->dev; - ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION; + if (dev->caps.userspace_caps) + ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION; + else + ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION; + ibdev->ib_dev.uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | @@ -591,10 +2021,13 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | + (1ull << IB_USER_VERBS_CMD_OPEN_QP); ibdev->ib_dev.query_device = mlx4_ib_query_device; ibdev->ib_dev.query_port = mlx4_ib_query_port; + ibdev->ib_dev.get_link_layer = mlx4_ib_port_link_layer; ibdev->ib_dev.query_gid = mlx4_ib_query_gid; ibdev->ib_dev.query_pkey = mlx4_ib_query_pkey; ibdev->ib_dev.modify_device = mlx4_ib_modify_device; @@ -627,38 +2060,208 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr; ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr; ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr; + ibdev->ib_dev.alloc_fast_reg_mr = mlx4_ib_alloc_fast_reg_mr; + ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list; + ibdev->ib_dev.free_fast_reg_page_list = mlx4_ib_free_fast_reg_page_list; ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach; ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach; ibdev->ib_dev.process_mad = mlx4_ib_process_mad; - ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc; - ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr; - ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr; - ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc; + if (!mlx4_is_slave(ibdev->dev)) { + ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc; + ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr; + ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr; + ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc; + } + + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW || + dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) { + ibdev->ib_dev.alloc_mw = mlx4_ib_alloc_mw; + ibdev->ib_dev.bind_mw = mlx4_ib_bind_mw; + ibdev->ib_dev.dealloc_mw = mlx4_ib_dealloc_mw; + + ibdev->ib_dev.uverbs_cmd_mask |= + (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); + } + + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) { + ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd; + ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd; + ibdev->ib_dev.uverbs_cmd_mask |= + (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | + (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); + } + + if (check_flow_steering_support(dev)) { + ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED; + ibdev->ib_dev.create_flow = mlx4_ib_create_flow; + ibdev->ib_dev.destroy_flow = mlx4_ib_destroy_flow; + + ibdev->ib_dev.uverbs_ex_cmd_mask |= + (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); + } + + mlx4_ib_alloc_eqs(dev, ibdev); + + spin_lock_init(&iboe->lock); if (init_node_data(ibdev)) goto err_map; + for (i = 0; i < ibdev->num_ports; ++i) { + mutex_init(&ibdev->qp1_proxy_lock[i]); + if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) == + IB_LINK_LAYER_ETHERNET) { + err = mlx4_counter_alloc(ibdev->dev, &ibdev->counters[i]); + if (err) + ibdev->counters[i] = -1; + } else { + ibdev->counters[i] = -1; + } + } + + mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) + ib_num_ports++; + spin_lock_init(&ibdev->sm_lock); mutex_init(&ibdev->cap_mask_mutex); - if (ib_register_device(&ibdev->ib_dev)) - goto err_map; + if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED && + ib_num_ports) { + ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS; + err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count, + MLX4_IB_UC_STEER_QPN_ALIGN, + &ibdev->steer_qpn_base); + if (err) + goto err_counter; + + ibdev->ib_uc_qpns_bitmap = + kmalloc(BITS_TO_LONGS(ibdev->steer_qpn_count) * + sizeof(long), + GFP_KERNEL); + if (!ibdev->ib_uc_qpns_bitmap) { + dev_err(&dev->pdev->dev, "bit map alloc failed\n"); + goto err_steer_qp_release; + } + + bitmap_zero(ibdev->ib_uc_qpns_bitmap, ibdev->steer_qpn_count); + + err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE( + dev, ibdev->steer_qpn_base, + ibdev->steer_qpn_base + + ibdev->steer_qpn_count - 1); + if (err) + goto err_steer_free_bitmap; + } + + if (ib_register_device(&ibdev->ib_dev, NULL)) + goto err_steer_free_bitmap; if (mlx4_ib_mad_init(ibdev)) goto err_reg; - for (i = 0; i < ARRAY_SIZE(mlx4_class_attributes); ++i) { + if (mlx4_ib_init_sriov(ibdev)) + goto err_mad; + + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) { + if (!iboe->nb.notifier_call) { + iboe->nb.notifier_call = mlx4_ib_netdev_event; + err = register_netdevice_notifier(&iboe->nb); + if (err) { + iboe->nb.notifier_call = NULL; + goto err_notif; + } + } + if (!iboe->nb_inet.notifier_call) { + iboe->nb_inet.notifier_call = mlx4_ib_inet_event; + err = register_inetaddr_notifier(&iboe->nb_inet); + if (err) { + iboe->nb_inet.notifier_call = NULL; + goto err_notif; + } + } +#if IS_ENABLED(CONFIG_IPV6) + if (!iboe->nb_inet6.notifier_call) { + iboe->nb_inet6.notifier_call = mlx4_ib_inet6_event; + err = register_inet6addr_notifier(&iboe->nb_inet6); + if (err) { + iboe->nb_inet6.notifier_call = NULL; + goto err_notif; + } + } +#endif + for (i = 1 ; i <= ibdev->num_ports ; ++i) + reset_gid_table(ibdev, i); + rtnl_lock(); + mlx4_ib_scan_netdevs(ibdev, NULL, 0); + rtnl_unlock(); + mlx4_ib_init_gid_table(ibdev); + } + + for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) { if (device_create_file(&ibdev->ib_dev.dev, - mlx4_class_attributes[i])) - goto err_reg; + mlx4_class_attributes[j])) + goto err_notif; } + ibdev->ib_active = true; + + if (mlx4_is_mfunc(ibdev->dev)) + init_pkeys(ibdev); + + /* create paravirt contexts for any VFs which are active */ + if (mlx4_is_master(ibdev->dev)) { + for (j = 0; j < MLX4_MFUNC_MAX; j++) { + if (j == mlx4_master_func_num(ibdev->dev)) + continue; + if (mlx4_is_slave_active(ibdev->dev, j)) + do_slave_init(ibdev, j, 1); + } + } return ibdev; +err_notif: + if (ibdev->iboe.nb.notifier_call) { + if (unregister_netdevice_notifier(&ibdev->iboe.nb)) + pr_warn("failure unregistering notifier\n"); + ibdev->iboe.nb.notifier_call = NULL; + } + if (ibdev->iboe.nb_inet.notifier_call) { + if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet)) + pr_warn("failure unregistering notifier\n"); + ibdev->iboe.nb_inet.notifier_call = NULL; + } +#if IS_ENABLED(CONFIG_IPV6) + if (ibdev->iboe.nb_inet6.notifier_call) { + if (unregister_inet6addr_notifier(&ibdev->iboe.nb_inet6)) + pr_warn("failure unregistering notifier\n"); + ibdev->iboe.nb_inet6.notifier_call = NULL; + } +#endif + flush_workqueue(wq); + + mlx4_ib_close_sriov(ibdev); + +err_mad: + mlx4_ib_mad_cleanup(ibdev); + err_reg: ib_unregister_device(&ibdev->ib_dev); +err_steer_free_bitmap: + kfree(ibdev->ib_uc_qpns_bitmap); + +err_steer_qp_release: + if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) + mlx4_qp_release_range(dev, ibdev->steer_qpn_base, + ibdev->steer_qpn_count); +err_counter: + for (; i; --i) + if (ibdev->counters[i - 1] != -1) + mlx4_counter_free(ibdev->dev, ibdev->counters[i - 1]); + err_map: iounmap(ibdev->uar_map); @@ -674,64 +2277,279 @@ err_dealloc: return NULL; } +int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn) +{ + int offset; + + WARN_ON(!dev->ib_uc_qpns_bitmap); + + offset = bitmap_find_free_region(dev->ib_uc_qpns_bitmap, + dev->steer_qpn_count, + get_count_order(count)); + if (offset < 0) + return offset; + + *qpn = dev->steer_qpn_base + offset; + return 0; +} + +void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count) +{ + if (!qpn || + dev->steering_support != MLX4_STEERING_MODE_DEVICE_MANAGED) + return; + + BUG_ON(qpn < dev->steer_qpn_base); + + bitmap_release_region(dev->ib_uc_qpns_bitmap, + qpn - dev->steer_qpn_base, + get_count_order(count)); +} + +int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, + int is_attach) +{ + int err; + size_t flow_size; + struct ib_flow_attr *flow = NULL; + struct ib_flow_spec_ib *ib_spec; + + if (is_attach) { + flow_size = sizeof(struct ib_flow_attr) + + sizeof(struct ib_flow_spec_ib); + flow = kzalloc(flow_size, GFP_KERNEL); + if (!flow) + return -ENOMEM; + flow->port = mqp->port; + flow->num_of_specs = 1; + flow->size = flow_size; + ib_spec = (struct ib_flow_spec_ib *)(flow + 1); + ib_spec->type = IB_FLOW_SPEC_IB; + ib_spec->size = sizeof(struct ib_flow_spec_ib); + /* Add an empty rule for IB L2 */ + memset(&ib_spec->mask, 0, sizeof(ib_spec->mask)); + + err = __mlx4_ib_create_flow(&mqp->ibqp, flow, + IB_FLOW_DOMAIN_NIC, + MLX4_FS_REGULAR, + &mqp->reg_id); + } else { + err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id); + } + kfree(flow); + return err; +} + static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) { struct mlx4_ib_dev *ibdev = ibdev_ptr; int p; - for (p = 1; p <= dev->caps.num_ports; ++p) - mlx4_CLOSE_PORT(dev, p); - + mlx4_ib_close_sriov(ibdev); mlx4_ib_mad_cleanup(ibdev); ib_unregister_device(&ibdev->ib_dev); + if (ibdev->iboe.nb.notifier_call) { + if (unregister_netdevice_notifier(&ibdev->iboe.nb)) + pr_warn("failure unregistering notifier\n"); + ibdev->iboe.nb.notifier_call = NULL; + } + + if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) { + mlx4_qp_release_range(dev, ibdev->steer_qpn_base, + ibdev->steer_qpn_count); + kfree(ibdev->ib_uc_qpns_bitmap); + } + + if (ibdev->iboe.nb_inet.notifier_call) { + if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet)) + pr_warn("failure unregistering notifier\n"); + ibdev->iboe.nb_inet.notifier_call = NULL; + } +#if IS_ENABLED(CONFIG_IPV6) + if (ibdev->iboe.nb_inet6.notifier_call) { + if (unregister_inet6addr_notifier(&ibdev->iboe.nb_inet6)) + pr_warn("failure unregistering notifier\n"); + ibdev->iboe.nb_inet6.notifier_call = NULL; + } +#endif + iounmap(ibdev->uar_map); + for (p = 0; p < ibdev->num_ports; ++p) + if (ibdev->counters[p] != -1) + mlx4_counter_free(ibdev->dev, ibdev->counters[p]); + mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB) + mlx4_CLOSE_PORT(dev, p); + + mlx4_ib_free_eqs(dev, ibdev); + mlx4_uar_free(dev, &ibdev->priv_uar); mlx4_pd_free(dev, ibdev->priv_pdn); ib_dealloc_device(&ibdev->ib_dev); } +static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init) +{ + struct mlx4_ib_demux_work **dm = NULL; + struct mlx4_dev *dev = ibdev->dev; + int i; + unsigned long flags; + struct mlx4_active_ports actv_ports; + unsigned int ports; + unsigned int first_port; + + if (!mlx4_is_master(dev)) + return; + + actv_ports = mlx4_get_active_ports(dev, slave); + ports = bitmap_weight(actv_ports.ports, dev->caps.num_ports); + first_port = find_first_bit(actv_ports.ports, dev->caps.num_ports); + + dm = kcalloc(ports, sizeof(*dm), GFP_ATOMIC); + if (!dm) { + pr_err("failed to allocate memory for tunneling qp update\n"); + goto out; + } + + for (i = 0; i < ports; i++) { + dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC); + if (!dm[i]) { + pr_err("failed to allocate memory for tunneling qp update work struct\n"); + for (i = 0; i < dev->caps.num_ports; i++) { + if (dm[i]) + kfree(dm[i]); + } + goto out; + } + } + /* initialize or tear down tunnel QPs for the slave */ + for (i = 0; i < ports; i++) { + INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work); + dm[i]->port = first_port + i + 1; + dm[i]->slave = slave; + dm[i]->do_init = do_init; + dm[i]->dev = ibdev; + spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags); + if (!ibdev->sriov.is_going_down) + queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work); + spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags); + } +out: + kfree(dm); + return; +} + static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, - enum mlx4_dev_event event, int port) + enum mlx4_dev_event event, unsigned long param) { struct ib_event ibev; + struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr); + struct mlx4_eqe *eqe = NULL; + struct ib_event_work *ew; + int p = 0; + + if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE) + eqe = (struct mlx4_eqe *)param; + else + p = (int) param; switch (event) { case MLX4_DEV_EVENT_PORT_UP: + if (p > ibdev->num_ports) + return; + if (mlx4_is_master(dev) && + rdma_port_get_link_layer(&ibdev->ib_dev, p) == + IB_LINK_LAYER_INFINIBAND) { + mlx4_ib_invalidate_all_guid_record(ibdev, p); + } ibev.event = IB_EVENT_PORT_ACTIVE; break; case MLX4_DEV_EVENT_PORT_DOWN: + if (p > ibdev->num_ports) + return; ibev.event = IB_EVENT_PORT_ERR; break; case MLX4_DEV_EVENT_CATASTROPHIC_ERROR: + ibdev->ib_active = false; ibev.event = IB_EVENT_DEVICE_FATAL; break; + case MLX4_DEV_EVENT_PORT_MGMT_CHANGE: + ew = kmalloc(sizeof *ew, GFP_ATOMIC); + if (!ew) { + pr_err("failed to allocate memory for events work\n"); + break; + } + + INIT_WORK(&ew->work, handle_port_mgmt_change_event); + memcpy(&ew->ib_eqe, eqe, sizeof *eqe); + ew->ib_dev = ibdev; + /* need to queue only for port owner, which uses GEN_EQE */ + if (mlx4_is_master(dev)) + queue_work(wq, &ew->work); + else + handle_port_mgmt_change_event(&ew->work); + return; + + case MLX4_DEV_EVENT_SLAVE_INIT: + /* here, p is the slave id */ + do_slave_init(ibdev, p, 1); + return; + + case MLX4_DEV_EVENT_SLAVE_SHUTDOWN: + /* here, p is the slave id */ + do_slave_init(ibdev, p, 0); + return; + default: return; } ibev.device = ibdev_ptr; - ibev.element.port_num = port; + ibev.element.port_num = (u8) p; ib_dispatch_event(&ibev); } static struct mlx4_interface mlx4_ib_interface = { - .add = mlx4_ib_add, - .remove = mlx4_ib_remove, - .event = mlx4_ib_event + .add = mlx4_ib_add, + .remove = mlx4_ib_remove, + .event = mlx4_ib_event, + .protocol = MLX4_PROT_IB_IPV6 }; static int __init mlx4_ib_init(void) { - return mlx4_register_interface(&mlx4_ib_interface); + int err; + + wq = create_singlethread_workqueue("mlx4_ib"); + if (!wq) + return -ENOMEM; + + err = mlx4_ib_mcg_init(); + if (err) + goto clean_wq; + + err = mlx4_register_interface(&mlx4_ib_interface); + if (err) + goto clean_mcg; + + return 0; + +clean_mcg: + mlx4_ib_mcg_destroy(); + +clean_wq: + destroy_workqueue(wq); + return err; } static void __exit mlx4_ib_cleanup(void) { mlx4_unregister_interface(&mlx4_ib_interface); + mlx4_ib_mcg_destroy(); + destroy_workqueue(wq); } module_init(mlx4_ib_init); diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c new file mode 100644 index 00000000000..ed327e6c8fd --- /dev/null +++ b/drivers/infiniband/hw/mlx4/mcg.c @@ -0,0 +1,1257 @@ +/* + * Copyright (c) 2012 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <rdma/ib_mad.h> +#include <rdma/ib_smi.h> +#include <rdma/ib_cache.h> +#include <rdma/ib_sa.h> + +#include <linux/mlx4/cmd.h> +#include <linux/rbtree.h> +#include <linux/delay.h> + +#include "mlx4_ib.h" + +#define MAX_VFS 80 +#define MAX_PEND_REQS_PER_FUNC 4 +#define MAD_TIMEOUT_MS 2000 + +#define mcg_warn(fmt, arg...) pr_warn("MCG WARNING: " fmt, ##arg) +#define mcg_error(fmt, arg...) pr_err(fmt, ##arg) +#define mcg_warn_group(group, format, arg...) \ + pr_warn("%s-%d: %16s (port %d): WARNING: " format, __func__, __LINE__,\ + (group)->name, group->demux->port, ## arg) + +#define mcg_error_group(group, format, arg...) \ + pr_err(" %16s: " format, (group)->name, ## arg) + + +static union ib_gid mgid0; + +static struct workqueue_struct *clean_wq; + +enum mcast_state { + MCAST_NOT_MEMBER = 0, + MCAST_MEMBER, +}; + +enum mcast_group_state { + MCAST_IDLE, + MCAST_JOIN_SENT, + MCAST_LEAVE_SENT, + MCAST_RESP_READY +}; + +struct mcast_member { + enum mcast_state state; + uint8_t join_state; + int num_pend_reqs; + struct list_head pending; +}; + +struct ib_sa_mcmember_data { + union ib_gid mgid; + union ib_gid port_gid; + __be32 qkey; + __be16 mlid; + u8 mtusel_mtu; + u8 tclass; + __be16 pkey; + u8 ratesel_rate; + u8 lifetmsel_lifetm; + __be32 sl_flowlabel_hoplimit; + u8 scope_join_state; + u8 proxy_join; + u8 reserved[2]; +}; + +struct mcast_group { + struct ib_sa_mcmember_data rec; + struct rb_node node; + struct list_head mgid0_list; + struct mlx4_ib_demux_ctx *demux; + struct mcast_member func[MAX_VFS]; + struct mutex lock; + struct work_struct work; + struct list_head pending_list; + int members[3]; + enum mcast_group_state state; + enum mcast_group_state prev_state; + struct ib_sa_mad response_sa_mad; + __be64 last_req_tid; + + char name[33]; /* MGID string */ + struct device_attribute dentry; + + /* refcount is the reference count for the following: + 1. Each queued request + 2. Each invocation of the worker thread + 3. Membership of the port at the SA + */ + atomic_t refcount; + + /* delayed work to clean pending SM request */ + struct delayed_work timeout_work; + struct list_head cleanup_list; +}; + +struct mcast_req { + int func; + struct ib_sa_mad sa_mad; + struct list_head group_list; + struct list_head func_list; + struct mcast_group *group; + int clean; +}; + + +#define safe_atomic_dec(ref) \ + do {\ + if (atomic_dec_and_test(ref)) \ + mcg_warn_group(group, "did not expect to reach zero\n"); \ + } while (0) + +static const char *get_state_string(enum mcast_group_state state) +{ + switch (state) { + case MCAST_IDLE: + return "MCAST_IDLE"; + case MCAST_JOIN_SENT: + return "MCAST_JOIN_SENT"; + case MCAST_LEAVE_SENT: + return "MCAST_LEAVE_SENT"; + case MCAST_RESP_READY: + return "MCAST_RESP_READY"; + } + return "Invalid State"; +} + +static struct mcast_group *mcast_find(struct mlx4_ib_demux_ctx *ctx, + union ib_gid *mgid) +{ + struct rb_node *node = ctx->mcg_table.rb_node; + struct mcast_group *group; + int ret; + + while (node) { + group = rb_entry(node, struct mcast_group, node); + ret = memcmp(mgid->raw, group->rec.mgid.raw, sizeof *mgid); + if (!ret) + return group; + + if (ret < 0) + node = node->rb_left; + else + node = node->rb_right; + } + return NULL; +} + +static struct mcast_group *mcast_insert(struct mlx4_ib_demux_ctx *ctx, + struct mcast_group *group) +{ + struct rb_node **link = &ctx->mcg_table.rb_node; + struct rb_node *parent = NULL; + struct mcast_group *cur_group; + int ret; + + while (*link) { + parent = *link; + cur_group = rb_entry(parent, struct mcast_group, node); + + ret = memcmp(group->rec.mgid.raw, cur_group->rec.mgid.raw, + sizeof group->rec.mgid); + if (ret < 0) + link = &(*link)->rb_left; + else if (ret > 0) + link = &(*link)->rb_right; + else + return cur_group; + } + rb_link_node(&group->node, parent, link); + rb_insert_color(&group->node, &ctx->mcg_table); + return NULL; +} + +static int send_mad_to_wire(struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad) +{ + struct mlx4_ib_dev *dev = ctx->dev; + struct ib_ah_attr ah_attr; + + spin_lock(&dev->sm_lock); + if (!dev->sm_ah[ctx->port - 1]) { + /* port is not yet Active, sm_ah not ready */ + spin_unlock(&dev->sm_lock); + return -EAGAIN; + } + mlx4_ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr); + spin_unlock(&dev->sm_lock); + return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev), + ctx->port, IB_QPT_GSI, 0, 1, IB_QP1_QKEY, + &ah_attr, NULL, mad); +} + +static int send_mad_to_slave(int slave, struct mlx4_ib_demux_ctx *ctx, + struct ib_mad *mad) +{ + struct mlx4_ib_dev *dev = ctx->dev; + struct ib_mad_agent *agent = dev->send_agent[ctx->port - 1][1]; + struct ib_wc wc; + struct ib_ah_attr ah_attr; + + /* Our agent might not yet be registered when mads start to arrive */ + if (!agent) + return -EAGAIN; + + ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr); + + if (ib_find_cached_pkey(&dev->ib_dev, ctx->port, IB_DEFAULT_PKEY_FULL, &wc.pkey_index)) + return -EINVAL; + wc.sl = 0; + wc.dlid_path_bits = 0; + wc.port_num = ctx->port; + wc.slid = ah_attr.dlid; /* opensm lid */ + wc.src_qp = 1; + return mlx4_ib_send_to_slave(dev, slave, ctx->port, IB_QPT_GSI, &wc, NULL, mad); +} + +static int send_join_to_wire(struct mcast_group *group, struct ib_sa_mad *sa_mad) +{ + struct ib_sa_mad mad; + struct ib_sa_mcmember_data *sa_mad_data = (struct ib_sa_mcmember_data *)&mad.data; + int ret; + + /* we rely on a mad request as arrived from a VF */ + memcpy(&mad, sa_mad, sizeof mad); + + /* fix port GID to be the real one (slave 0) */ + sa_mad_data->port_gid.global.interface_id = group->demux->guid_cache[0]; + + /* assign our own TID */ + mad.mad_hdr.tid = mlx4_ib_get_new_demux_tid(group->demux); + group->last_req_tid = mad.mad_hdr.tid; /* keep it for later validation */ + + ret = send_mad_to_wire(group->demux, (struct ib_mad *)&mad); + /* set timeout handler */ + if (!ret) { + /* calls mlx4_ib_mcg_timeout_handler */ + queue_delayed_work(group->demux->mcg_wq, &group->timeout_work, + msecs_to_jiffies(MAD_TIMEOUT_MS)); + } + + return ret; +} + +static int send_leave_to_wire(struct mcast_group *group, u8 join_state) +{ + struct ib_sa_mad mad; + struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)&mad.data; + int ret; + + memset(&mad, 0, sizeof mad); + mad.mad_hdr.base_version = 1; + mad.mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; + mad.mad_hdr.class_version = 2; + mad.mad_hdr.method = IB_SA_METHOD_DELETE; + mad.mad_hdr.status = cpu_to_be16(0); + mad.mad_hdr.class_specific = cpu_to_be16(0); + mad.mad_hdr.tid = mlx4_ib_get_new_demux_tid(group->demux); + group->last_req_tid = mad.mad_hdr.tid; /* keep it for later validation */ + mad.mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC); + mad.mad_hdr.attr_mod = cpu_to_be32(0); + mad.sa_hdr.sm_key = 0x0; + mad.sa_hdr.attr_offset = cpu_to_be16(7); + mad.sa_hdr.comp_mask = IB_SA_MCMEMBER_REC_MGID | + IB_SA_MCMEMBER_REC_PORT_GID | IB_SA_MCMEMBER_REC_JOIN_STATE; + + *sa_data = group->rec; + sa_data->scope_join_state = join_state; + + ret = send_mad_to_wire(group->demux, (struct ib_mad *)&mad); + if (ret) + group->state = MCAST_IDLE; + + /* set timeout handler */ + if (!ret) { + /* calls mlx4_ib_mcg_timeout_handler */ + queue_delayed_work(group->demux->mcg_wq, &group->timeout_work, + msecs_to_jiffies(MAD_TIMEOUT_MS)); + } + + return ret; +} + +static int send_reply_to_slave(int slave, struct mcast_group *group, + struct ib_sa_mad *req_sa_mad, u16 status) +{ + struct ib_sa_mad mad; + struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)&mad.data; + struct ib_sa_mcmember_data *req_sa_data = (struct ib_sa_mcmember_data *)&req_sa_mad->data; + int ret; + + memset(&mad, 0, sizeof mad); + mad.mad_hdr.base_version = 1; + mad.mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; + mad.mad_hdr.class_version = 2; + mad.mad_hdr.method = IB_MGMT_METHOD_GET_RESP; + mad.mad_hdr.status = cpu_to_be16(status); + mad.mad_hdr.class_specific = cpu_to_be16(0); + mad.mad_hdr.tid = req_sa_mad->mad_hdr.tid; + *(u8 *)&mad.mad_hdr.tid = 0; /* resetting tid to 0 */ + mad.mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC); + mad.mad_hdr.attr_mod = cpu_to_be32(0); + mad.sa_hdr.sm_key = req_sa_mad->sa_hdr.sm_key; + mad.sa_hdr.attr_offset = cpu_to_be16(7); + mad.sa_hdr.comp_mask = 0; /* ignored on responses, see IBTA spec */ + + *sa_data = group->rec; + + /* reconstruct VF's requested join_state and port_gid */ + sa_data->scope_join_state &= 0xf0; + sa_data->scope_join_state |= (group->func[slave].join_state & 0x0f); + memcpy(&sa_data->port_gid, &req_sa_data->port_gid, sizeof req_sa_data->port_gid); + + ret = send_mad_to_slave(slave, group->demux, (struct ib_mad *)&mad); + return ret; +} + +static int check_selector(ib_sa_comp_mask comp_mask, + ib_sa_comp_mask selector_mask, + ib_sa_comp_mask value_mask, + u8 src_value, u8 dst_value) +{ + int err; + u8 selector = dst_value >> 6; + dst_value &= 0x3f; + src_value &= 0x3f; + + if (!(comp_mask & selector_mask) || !(comp_mask & value_mask)) + return 0; + + switch (selector) { + case IB_SA_GT: + err = (src_value <= dst_value); + break; + case IB_SA_LT: + err = (src_value >= dst_value); + break; + case IB_SA_EQ: + err = (src_value != dst_value); + break; + default: + err = 0; + break; + } + + return err; +} + +static u16 cmp_rec(struct ib_sa_mcmember_data *src, + struct ib_sa_mcmember_data *dst, ib_sa_comp_mask comp_mask) +{ + /* src is group record, dst is request record */ + /* MGID must already match */ + /* Port_GID we always replace to our Port_GID, so it is a match */ + +#define MAD_STATUS_REQ_INVALID 0x0200 + if (comp_mask & IB_SA_MCMEMBER_REC_QKEY && src->qkey != dst->qkey) + return MAD_STATUS_REQ_INVALID; + if (comp_mask & IB_SA_MCMEMBER_REC_MLID && src->mlid != dst->mlid) + return MAD_STATUS_REQ_INVALID; + if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR, + IB_SA_MCMEMBER_REC_MTU, + src->mtusel_mtu, dst->mtusel_mtu)) + return MAD_STATUS_REQ_INVALID; + if (comp_mask & IB_SA_MCMEMBER_REC_TRAFFIC_CLASS && + src->tclass != dst->tclass) + return MAD_STATUS_REQ_INVALID; + if (comp_mask & IB_SA_MCMEMBER_REC_PKEY && src->pkey != dst->pkey) + return MAD_STATUS_REQ_INVALID; + if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR, + IB_SA_MCMEMBER_REC_RATE, + src->ratesel_rate, dst->ratesel_rate)) + return MAD_STATUS_REQ_INVALID; + if (check_selector(comp_mask, + IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR, + IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME, + src->lifetmsel_lifetm, dst->lifetmsel_lifetm)) + return MAD_STATUS_REQ_INVALID; + if (comp_mask & IB_SA_MCMEMBER_REC_SL && + (be32_to_cpu(src->sl_flowlabel_hoplimit) & 0xf0000000) != + (be32_to_cpu(dst->sl_flowlabel_hoplimit) & 0xf0000000)) + return MAD_STATUS_REQ_INVALID; + if (comp_mask & IB_SA_MCMEMBER_REC_FLOW_LABEL && + (be32_to_cpu(src->sl_flowlabel_hoplimit) & 0x0fffff00) != + (be32_to_cpu(dst->sl_flowlabel_hoplimit) & 0x0fffff00)) + return MAD_STATUS_REQ_INVALID; + if (comp_mask & IB_SA_MCMEMBER_REC_HOP_LIMIT && + (be32_to_cpu(src->sl_flowlabel_hoplimit) & 0x000000ff) != + (be32_to_cpu(dst->sl_flowlabel_hoplimit) & 0x000000ff)) + return MAD_STATUS_REQ_INVALID; + if (comp_mask & IB_SA_MCMEMBER_REC_SCOPE && + (src->scope_join_state & 0xf0) != + (dst->scope_join_state & 0xf0)) + return MAD_STATUS_REQ_INVALID; + + /* join_state checked separately, proxy_join ignored */ + + return 0; +} + +/* release group, return 1 if this was last release and group is destroyed + * timout work is canceled sync */ +static int release_group(struct mcast_group *group, int from_timeout_handler) +{ + struct mlx4_ib_demux_ctx *ctx = group->demux; + int nzgroup; + + mutex_lock(&ctx->mcg_table_lock); + mutex_lock(&group->lock); + if (atomic_dec_and_test(&group->refcount)) { + if (!from_timeout_handler) { + if (group->state != MCAST_IDLE && + !cancel_delayed_work(&group->timeout_work)) { + atomic_inc(&group->refcount); + mutex_unlock(&group->lock); + mutex_unlock(&ctx->mcg_table_lock); + return 0; + } + } + + nzgroup = memcmp(&group->rec.mgid, &mgid0, sizeof mgid0); + if (nzgroup) + del_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr); + if (!list_empty(&group->pending_list)) + mcg_warn_group(group, "releasing a group with non empty pending list\n"); + if (nzgroup) + rb_erase(&group->node, &ctx->mcg_table); + list_del_init(&group->mgid0_list); + mutex_unlock(&group->lock); + mutex_unlock(&ctx->mcg_table_lock); + kfree(group); + return 1; + } else { + mutex_unlock(&group->lock); + mutex_unlock(&ctx->mcg_table_lock); + } + return 0; +} + +static void adjust_membership(struct mcast_group *group, u8 join_state, int inc) +{ + int i; + + for (i = 0; i < 3; i++, join_state >>= 1) + if (join_state & 0x1) + group->members[i] += inc; +} + +static u8 get_leave_state(struct mcast_group *group) +{ + u8 leave_state = 0; + int i; + + for (i = 0; i < 3; i++) + if (!group->members[i]) + leave_state |= (1 << i); + + return leave_state & (group->rec.scope_join_state & 7); +} + +static int join_group(struct mcast_group *group, int slave, u8 join_mask) +{ + int ret = 0; + u8 join_state; + + /* remove bits that slave is already member of, and adjust */ + join_state = join_mask & (~group->func[slave].join_state); + adjust_membership(group, join_state, 1); + group->func[slave].join_state |= join_state; + if (group->func[slave].state != MCAST_MEMBER && join_state) { + group->func[slave].state = MCAST_MEMBER; + ret = 1; + } + return ret; +} + +static int leave_group(struct mcast_group *group, int slave, u8 leave_state) +{ + int ret = 0; + + adjust_membership(group, leave_state, -1); + group->func[slave].join_state &= ~leave_state; + if (!group->func[slave].join_state) { + group->func[slave].state = MCAST_NOT_MEMBER; + ret = 1; + } + return ret; +} + +static int check_leave(struct mcast_group *group, int slave, u8 leave_mask) +{ + if (group->func[slave].state != MCAST_MEMBER) + return MAD_STATUS_REQ_INVALID; + + /* make sure we're not deleting unset bits */ + if (~group->func[slave].join_state & leave_mask) + return MAD_STATUS_REQ_INVALID; + + if (!leave_mask) + return MAD_STATUS_REQ_INVALID; + + return 0; +} + +static void mlx4_ib_mcg_timeout_handler(struct work_struct *work) +{ + struct delayed_work *delay = to_delayed_work(work); + struct mcast_group *group; + struct mcast_req *req = NULL; + + group = container_of(delay, typeof(*group), timeout_work); + + mutex_lock(&group->lock); + if (group->state == MCAST_JOIN_SENT) { + if (!list_empty(&group->pending_list)) { + req = list_first_entry(&group->pending_list, struct mcast_req, group_list); + list_del(&req->group_list); + list_del(&req->func_list); + --group->func[req->func].num_pend_reqs; + mutex_unlock(&group->lock); + kfree(req); + if (memcmp(&group->rec.mgid, &mgid0, sizeof mgid0)) { + if (release_group(group, 1)) + return; + } else { + kfree(group); + return; + } + mutex_lock(&group->lock); + } else + mcg_warn_group(group, "DRIVER BUG\n"); + } else if (group->state == MCAST_LEAVE_SENT) { + if (group->rec.scope_join_state & 7) + group->rec.scope_join_state &= 0xf8; + group->state = MCAST_IDLE; + mutex_unlock(&group->lock); + if (release_group(group, 1)) + return; + mutex_lock(&group->lock); + } else + mcg_warn_group(group, "invalid state %s\n", get_state_string(group->state)); + group->state = MCAST_IDLE; + atomic_inc(&group->refcount); + if (!queue_work(group->demux->mcg_wq, &group->work)) + safe_atomic_dec(&group->refcount); + + mutex_unlock(&group->lock); +} + +static int handle_leave_req(struct mcast_group *group, u8 leave_mask, + struct mcast_req *req) +{ + u16 status; + + if (req->clean) + leave_mask = group->func[req->func].join_state; + + status = check_leave(group, req->func, leave_mask); + if (!status) + leave_group(group, req->func, leave_mask); + + if (!req->clean) + send_reply_to_slave(req->func, group, &req->sa_mad, status); + --group->func[req->func].num_pend_reqs; + list_del(&req->group_list); + list_del(&req->func_list); + kfree(req); + return 1; +} + +static int handle_join_req(struct mcast_group *group, u8 join_mask, + struct mcast_req *req) +{ + u8 group_join_state = group->rec.scope_join_state & 7; + int ref = 0; + u16 status; + struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data; + + if (join_mask == (group_join_state & join_mask)) { + /* port's membership need not change */ + status = cmp_rec(&group->rec, sa_data, req->sa_mad.sa_hdr.comp_mask); + if (!status) + join_group(group, req->func, join_mask); + + --group->func[req->func].num_pend_reqs; + send_reply_to_slave(req->func, group, &req->sa_mad, status); + list_del(&req->group_list); + list_del(&req->func_list); + kfree(req); + ++ref; + } else { + /* port's membership needs to be updated */ + group->prev_state = group->state; + if (send_join_to_wire(group, &req->sa_mad)) { + --group->func[req->func].num_pend_reqs; + list_del(&req->group_list); + list_del(&req->func_list); + kfree(req); + ref = 1; + group->state = group->prev_state; + } else + group->state = MCAST_JOIN_SENT; + } + + return ref; +} + +static void mlx4_ib_mcg_work_handler(struct work_struct *work) +{ + struct mcast_group *group; + struct mcast_req *req = NULL; + struct ib_sa_mcmember_data *sa_data; + u8 req_join_state; + int rc = 1; /* release_count - this is for the scheduled work */ + u16 status; + u8 method; + + group = container_of(work, typeof(*group), work); + + mutex_lock(&group->lock); + + /* First, let's see if a response from SM is waiting regarding this group. + * If so, we need to update the group's REC. If this is a bad response, we + * may need to send a bad response to a VF waiting for it. If VF is waiting + * and this is a good response, the VF will be answered later in this func. */ + if (group->state == MCAST_RESP_READY) { + /* cancels mlx4_ib_mcg_timeout_handler */ + cancel_delayed_work(&group->timeout_work); + status = be16_to_cpu(group->response_sa_mad.mad_hdr.status); + method = group->response_sa_mad.mad_hdr.method; + if (group->last_req_tid != group->response_sa_mad.mad_hdr.tid) { + mcg_warn_group(group, "Got MAD response to existing MGID but wrong TID, dropping. Resp TID=%llx, group TID=%llx\n", + be64_to_cpu(group->response_sa_mad.mad_hdr.tid), + be64_to_cpu(group->last_req_tid)); + group->state = group->prev_state; + goto process_requests; + } + if (status) { + if (!list_empty(&group->pending_list)) + req = list_first_entry(&group->pending_list, + struct mcast_req, group_list); + if ((method == IB_MGMT_METHOD_GET_RESP)) { + if (req) { + send_reply_to_slave(req->func, group, &req->sa_mad, status); + --group->func[req->func].num_pend_reqs; + list_del(&req->group_list); + list_del(&req->func_list); + kfree(req); + ++rc; + } else + mcg_warn_group(group, "no request for failed join\n"); + } else if (method == IB_SA_METHOD_DELETE_RESP && group->demux->flushing) + ++rc; + } else { + u8 resp_join_state; + u8 cur_join_state; + + resp_join_state = ((struct ib_sa_mcmember_data *) + group->response_sa_mad.data)->scope_join_state & 7; + cur_join_state = group->rec.scope_join_state & 7; + + if (method == IB_MGMT_METHOD_GET_RESP) { + /* successfull join */ + if (!cur_join_state && resp_join_state) + --rc; + } else if (!resp_join_state) + ++rc; + memcpy(&group->rec, group->response_sa_mad.data, sizeof group->rec); + } + group->state = MCAST_IDLE; + } + +process_requests: + /* We should now go over pending join/leave requests, as long as we are idle. */ + while (!list_empty(&group->pending_list) && group->state == MCAST_IDLE) { + req = list_first_entry(&group->pending_list, struct mcast_req, + group_list); + sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data; + req_join_state = sa_data->scope_join_state & 0x7; + + /* For a leave request, we will immediately answer the VF, and + * update our internal counters. The actual leave will be sent + * to SM later, if at all needed. We dequeue the request now. */ + if (req->sa_mad.mad_hdr.method == IB_SA_METHOD_DELETE) + rc += handle_leave_req(group, req_join_state, req); + else + rc += handle_join_req(group, req_join_state, req); + } + + /* Handle leaves */ + if (group->state == MCAST_IDLE) { + req_join_state = get_leave_state(group); + if (req_join_state) { + group->rec.scope_join_state &= ~req_join_state; + group->prev_state = group->state; + if (send_leave_to_wire(group, req_join_state)) { + group->state = group->prev_state; + ++rc; + } else + group->state = MCAST_LEAVE_SENT; + } + } + + if (!list_empty(&group->pending_list) && group->state == MCAST_IDLE) + goto process_requests; + mutex_unlock(&group->lock); + + while (rc--) + release_group(group, 0); +} + +static struct mcast_group *search_relocate_mgid0_group(struct mlx4_ib_demux_ctx *ctx, + __be64 tid, + union ib_gid *new_mgid) +{ + struct mcast_group *group = NULL, *cur_group; + struct mcast_req *req; + struct list_head *pos; + struct list_head *n; + + mutex_lock(&ctx->mcg_table_lock); + list_for_each_safe(pos, n, &ctx->mcg_mgid0_list) { + group = list_entry(pos, struct mcast_group, mgid0_list); + mutex_lock(&group->lock); + if (group->last_req_tid == tid) { + if (memcmp(new_mgid, &mgid0, sizeof mgid0)) { + group->rec.mgid = *new_mgid; + sprintf(group->name, "%016llx%016llx", + be64_to_cpu(group->rec.mgid.global.subnet_prefix), + be64_to_cpu(group->rec.mgid.global.interface_id)); + list_del_init(&group->mgid0_list); + cur_group = mcast_insert(ctx, group); + if (cur_group) { + /* A race between our code and SM. Silently cleaning the new one */ + req = list_first_entry(&group->pending_list, + struct mcast_req, group_list); + --group->func[req->func].num_pend_reqs; + list_del(&req->group_list); + list_del(&req->func_list); + kfree(req); + mutex_unlock(&group->lock); + mutex_unlock(&ctx->mcg_table_lock); + release_group(group, 0); + return NULL; + } + + atomic_inc(&group->refcount); + add_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr); + mutex_unlock(&group->lock); + mutex_unlock(&ctx->mcg_table_lock); + return group; + } else { + struct mcast_req *tmp1, *tmp2; + + list_del(&group->mgid0_list); + if (!list_empty(&group->pending_list) && group->state != MCAST_IDLE) + cancel_delayed_work_sync(&group->timeout_work); + + list_for_each_entry_safe(tmp1, tmp2, &group->pending_list, group_list) { + list_del(&tmp1->group_list); + kfree(tmp1); + } + mutex_unlock(&group->lock); + mutex_unlock(&ctx->mcg_table_lock); + kfree(group); + return NULL; + } + } + mutex_unlock(&group->lock); + } + mutex_unlock(&ctx->mcg_table_lock); + + return NULL; +} + +static ssize_t sysfs_show_group(struct device *dev, + struct device_attribute *attr, char *buf); + +static struct mcast_group *acquire_group(struct mlx4_ib_demux_ctx *ctx, + union ib_gid *mgid, int create, + gfp_t gfp_mask) +{ + struct mcast_group *group, *cur_group; + int is_mgid0; + int i; + + is_mgid0 = !memcmp(&mgid0, mgid, sizeof mgid0); + if (!is_mgid0) { + group = mcast_find(ctx, mgid); + if (group) + goto found; + } + + if (!create) + return ERR_PTR(-ENOENT); + + group = kzalloc(sizeof *group, gfp_mask); + if (!group) + return ERR_PTR(-ENOMEM); + + group->demux = ctx; + group->rec.mgid = *mgid; + INIT_LIST_HEAD(&group->pending_list); + INIT_LIST_HEAD(&group->mgid0_list); + for (i = 0; i < MAX_VFS; ++i) + INIT_LIST_HEAD(&group->func[i].pending); + INIT_WORK(&group->work, mlx4_ib_mcg_work_handler); + INIT_DELAYED_WORK(&group->timeout_work, mlx4_ib_mcg_timeout_handler); + mutex_init(&group->lock); + sprintf(group->name, "%016llx%016llx", + be64_to_cpu(group->rec.mgid.global.subnet_prefix), + be64_to_cpu(group->rec.mgid.global.interface_id)); + sysfs_attr_init(&group->dentry.attr); + group->dentry.show = sysfs_show_group; + group->dentry.store = NULL; + group->dentry.attr.name = group->name; + group->dentry.attr.mode = 0400; + group->state = MCAST_IDLE; + + if (is_mgid0) { + list_add(&group->mgid0_list, &ctx->mcg_mgid0_list); + goto found; + } + + cur_group = mcast_insert(ctx, group); + if (cur_group) { + mcg_warn("group just showed up %s - confused\n", cur_group->name); + kfree(group); + return ERR_PTR(-EINVAL); + } + + add_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr); + +found: + atomic_inc(&group->refcount); + return group; +} + +static void queue_req(struct mcast_req *req) +{ + struct mcast_group *group = req->group; + + atomic_inc(&group->refcount); /* for the request */ + atomic_inc(&group->refcount); /* for scheduling the work */ + list_add_tail(&req->group_list, &group->pending_list); + list_add_tail(&req->func_list, &group->func[req->func].pending); + /* calls mlx4_ib_mcg_work_handler */ + if (!queue_work(group->demux->mcg_wq, &group->work)) + safe_atomic_dec(&group->refcount); +} + +int mlx4_ib_mcg_demux_handler(struct ib_device *ibdev, int port, int slave, + struct ib_sa_mad *mad) +{ + struct mlx4_ib_dev *dev = to_mdev(ibdev); + struct ib_sa_mcmember_data *rec = (struct ib_sa_mcmember_data *)mad->data; + struct mlx4_ib_demux_ctx *ctx = &dev->sriov.demux[port - 1]; + struct mcast_group *group; + + switch (mad->mad_hdr.method) { + case IB_MGMT_METHOD_GET_RESP: + case IB_SA_METHOD_DELETE_RESP: + mutex_lock(&ctx->mcg_table_lock); + group = acquire_group(ctx, &rec->mgid, 0, GFP_KERNEL); + mutex_unlock(&ctx->mcg_table_lock); + if (IS_ERR(group)) { + if (mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP) { + __be64 tid = mad->mad_hdr.tid; + *(u8 *)(&tid) = (u8)slave; /* in group we kept the modified TID */ + group = search_relocate_mgid0_group(ctx, tid, &rec->mgid); + } else + group = NULL; + } + + if (!group) + return 1; + + mutex_lock(&group->lock); + group->response_sa_mad = *mad; + group->prev_state = group->state; + group->state = MCAST_RESP_READY; + /* calls mlx4_ib_mcg_work_handler */ + atomic_inc(&group->refcount); + if (!queue_work(ctx->mcg_wq, &group->work)) + safe_atomic_dec(&group->refcount); + mutex_unlock(&group->lock); + release_group(group, 0); + return 1; /* consumed */ + case IB_MGMT_METHOD_SET: + case IB_SA_METHOD_GET_TABLE: + case IB_SA_METHOD_GET_TABLE_RESP: + case IB_SA_METHOD_DELETE: + return 0; /* not consumed, pass-through to guest over tunnel */ + default: + mcg_warn("In demux, port %d: unexpected MCMember method: 0x%x, dropping\n", + port, mad->mad_hdr.method); + return 1; /* consumed */ + } +} + +int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port, + int slave, struct ib_sa_mad *sa_mad) +{ + struct mlx4_ib_dev *dev = to_mdev(ibdev); + struct ib_sa_mcmember_data *rec = (struct ib_sa_mcmember_data *)sa_mad->data; + struct mlx4_ib_demux_ctx *ctx = &dev->sriov.demux[port - 1]; + struct mcast_group *group; + struct mcast_req *req; + int may_create = 0; + + if (ctx->flushing) + return -EAGAIN; + + switch (sa_mad->mad_hdr.method) { + case IB_MGMT_METHOD_SET: + may_create = 1; + case IB_SA_METHOD_DELETE: + req = kzalloc(sizeof *req, GFP_KERNEL); + if (!req) + return -ENOMEM; + + req->func = slave; + req->sa_mad = *sa_mad; + + mutex_lock(&ctx->mcg_table_lock); + group = acquire_group(ctx, &rec->mgid, may_create, GFP_KERNEL); + mutex_unlock(&ctx->mcg_table_lock); + if (IS_ERR(group)) { + kfree(req); + return PTR_ERR(group); + } + mutex_lock(&group->lock); + if (group->func[slave].num_pend_reqs > MAX_PEND_REQS_PER_FUNC) { + mutex_unlock(&group->lock); + mcg_warn_group(group, "Port %d, Func %d has too many pending requests (%d), dropping\n", + port, slave, MAX_PEND_REQS_PER_FUNC); + release_group(group, 0); + kfree(req); + return -ENOMEM; + } + ++group->func[slave].num_pend_reqs; + req->group = group; + queue_req(req); + mutex_unlock(&group->lock); + release_group(group, 0); + return 1; /* consumed */ + case IB_SA_METHOD_GET_TABLE: + case IB_MGMT_METHOD_GET_RESP: + case IB_SA_METHOD_GET_TABLE_RESP: + case IB_SA_METHOD_DELETE_RESP: + return 0; /* not consumed, pass-through */ + default: + mcg_warn("In multiplex, port %d, func %d: unexpected MCMember method: 0x%x, dropping\n", + port, slave, sa_mad->mad_hdr.method); + return 1; /* consumed */ + } +} + +static ssize_t sysfs_show_group(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct mcast_group *group = + container_of(attr, struct mcast_group, dentry); + struct mcast_req *req = NULL; + char pending_str[40]; + char state_str[40]; + ssize_t len = 0; + int f; + + if (group->state == MCAST_IDLE) + sprintf(state_str, "%s", get_state_string(group->state)); + else + sprintf(state_str, "%s(TID=0x%llx)", + get_state_string(group->state), + be64_to_cpu(group->last_req_tid)); + if (list_empty(&group->pending_list)) { + sprintf(pending_str, "No"); + } else { + req = list_first_entry(&group->pending_list, struct mcast_req, group_list); + sprintf(pending_str, "Yes(TID=0x%llx)", + be64_to_cpu(req->sa_mad.mad_hdr.tid)); + } + len += sprintf(buf + len, "%1d [%02d,%02d,%02d] %4d %4s %5s ", + group->rec.scope_join_state & 0xf, + group->members[2], group->members[1], group->members[0], + atomic_read(&group->refcount), + pending_str, + state_str); + for (f = 0; f < MAX_VFS; ++f) + if (group->func[f].state == MCAST_MEMBER) + len += sprintf(buf + len, "%d[%1x] ", + f, group->func[f].join_state); + + len += sprintf(buf + len, "\t\t(%4hx %4x %2x %2x %2x %2x %2x " + "%4x %4x %2x %2x)\n", + be16_to_cpu(group->rec.pkey), + be32_to_cpu(group->rec.qkey), + (group->rec.mtusel_mtu & 0xc0) >> 6, + group->rec.mtusel_mtu & 0x3f, + group->rec.tclass, + (group->rec.ratesel_rate & 0xc0) >> 6, + group->rec.ratesel_rate & 0x3f, + (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0xf0000000) >> 28, + (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x0fffff00) >> 8, + be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x000000ff, + group->rec.proxy_join); + + return len; +} + +int mlx4_ib_mcg_port_init(struct mlx4_ib_demux_ctx *ctx) +{ + char name[20]; + + atomic_set(&ctx->tid, 0); + sprintf(name, "mlx4_ib_mcg%d", ctx->port); + ctx->mcg_wq = create_singlethread_workqueue(name); + if (!ctx->mcg_wq) + return -ENOMEM; + + mutex_init(&ctx->mcg_table_lock); + ctx->mcg_table = RB_ROOT; + INIT_LIST_HEAD(&ctx->mcg_mgid0_list); + ctx->flushing = 0; + + return 0; +} + +static void force_clean_group(struct mcast_group *group) +{ + struct mcast_req *req, *tmp + ; + list_for_each_entry_safe(req, tmp, &group->pending_list, group_list) { + list_del(&req->group_list); + kfree(req); + } + del_sysfs_port_mcg_attr(group->demux->dev, group->demux->port, &group->dentry.attr); + rb_erase(&group->node, &group->demux->mcg_table); + kfree(group); +} + +static void _mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq) +{ + int i; + struct rb_node *p; + struct mcast_group *group; + unsigned long end; + int count; + + for (i = 0; i < MAX_VFS; ++i) + clean_vf_mcast(ctx, i); + + end = jiffies + msecs_to_jiffies(MAD_TIMEOUT_MS + 3000); + do { + count = 0; + mutex_lock(&ctx->mcg_table_lock); + for (p = rb_first(&ctx->mcg_table); p; p = rb_next(p)) + ++count; + mutex_unlock(&ctx->mcg_table_lock); + if (!count) + break; + + msleep(1); + } while (time_after(end, jiffies)); + + flush_workqueue(ctx->mcg_wq); + if (destroy_wq) + destroy_workqueue(ctx->mcg_wq); + + mutex_lock(&ctx->mcg_table_lock); + while ((p = rb_first(&ctx->mcg_table)) != NULL) { + group = rb_entry(p, struct mcast_group, node); + if (atomic_read(&group->refcount)) + mcg_warn_group(group, "group refcount %d!!! (pointer %p)\n", atomic_read(&group->refcount), group); + + force_clean_group(group); + } + mutex_unlock(&ctx->mcg_table_lock); +} + +struct clean_work { + struct work_struct work; + struct mlx4_ib_demux_ctx *ctx; + int destroy_wq; +}; + +static void mcg_clean_task(struct work_struct *work) +{ + struct clean_work *cw = container_of(work, struct clean_work, work); + + _mlx4_ib_mcg_port_cleanup(cw->ctx, cw->destroy_wq); + cw->ctx->flushing = 0; + kfree(cw); +} + +void mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq) +{ + struct clean_work *work; + + if (ctx->flushing) + return; + + ctx->flushing = 1; + + if (destroy_wq) { + _mlx4_ib_mcg_port_cleanup(ctx, destroy_wq); + ctx->flushing = 0; + return; + } + + work = kmalloc(sizeof *work, GFP_KERNEL); + if (!work) { + ctx->flushing = 0; + mcg_warn("failed allocating work for cleanup\n"); + return; + } + + work->ctx = ctx; + work->destroy_wq = destroy_wq; + INIT_WORK(&work->work, mcg_clean_task); + queue_work(clean_wq, &work->work); +} + +static void build_leave_mad(struct mcast_req *req) +{ + struct ib_sa_mad *mad = &req->sa_mad; + + mad->mad_hdr.method = IB_SA_METHOD_DELETE; +} + + +static void clear_pending_reqs(struct mcast_group *group, int vf) +{ + struct mcast_req *req, *tmp, *group_first = NULL; + int clear; + int pend = 0; + + if (!list_empty(&group->pending_list)) + group_first = list_first_entry(&group->pending_list, struct mcast_req, group_list); + + list_for_each_entry_safe(req, tmp, &group->func[vf].pending, func_list) { + clear = 1; + if (group_first == req && + (group->state == MCAST_JOIN_SENT || + group->state == MCAST_LEAVE_SENT)) { + clear = cancel_delayed_work(&group->timeout_work); + pend = !clear; + group->state = MCAST_IDLE; + } + if (clear) { + --group->func[vf].num_pend_reqs; + list_del(&req->group_list); + list_del(&req->func_list); + kfree(req); + atomic_dec(&group->refcount); + } + } + + if (!pend && (!list_empty(&group->func[vf].pending) || group->func[vf].num_pend_reqs)) { + mcg_warn_group(group, "DRIVER BUG: list_empty %d, num_pend_reqs %d\n", + list_empty(&group->func[vf].pending), group->func[vf].num_pend_reqs); + } +} + +static int push_deleteing_req(struct mcast_group *group, int slave) +{ + struct mcast_req *req; + struct mcast_req *pend_req; + + if (!group->func[slave].join_state) + return 0; + + req = kzalloc(sizeof *req, GFP_KERNEL); + if (!req) { + mcg_warn_group(group, "failed allocation - may leave stall groups\n"); + return -ENOMEM; + } + + if (!list_empty(&group->func[slave].pending)) { + pend_req = list_entry(group->func[slave].pending.prev, struct mcast_req, group_list); + if (pend_req->clean) { + kfree(req); + return 0; + } + } + + req->clean = 1; + req->func = slave; + req->group = group; + ++group->func[slave].num_pend_reqs; + build_leave_mad(req); + queue_req(req); + return 0; +} + +void clean_vf_mcast(struct mlx4_ib_demux_ctx *ctx, int slave) +{ + struct mcast_group *group; + struct rb_node *p; + + mutex_lock(&ctx->mcg_table_lock); + for (p = rb_first(&ctx->mcg_table); p; p = rb_next(p)) { + group = rb_entry(p, struct mcast_group, node); + mutex_lock(&group->lock); + if (atomic_read(&group->refcount)) { + /* clear pending requests of this VF */ + clear_pending_reqs(group, slave); + push_deleteing_req(group, slave); + } + mutex_unlock(&group->lock); + } + mutex_unlock(&ctx->mcg_table_lock); +} + + +int mlx4_ib_mcg_init(void) +{ + clean_wq = create_singlethread_workqueue("mlx4_ib_mcg"); + if (!clean_wq) + return -ENOMEM; + + return 0; +} + +void mlx4_ib_mcg_destroy(void) +{ + destroy_workqueue(clean_wq); +} diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index c4cf5b69eef..369da3ca5d6 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2006, 2007 Cisco Systems. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -36,13 +37,39 @@ #include <linux/compiler.h> #include <linux/list.h> #include <linux/mutex.h> +#include <linux/idr.h> #include <rdma/ib_verbs.h> #include <rdma/ib_umem.h> +#include <rdma/ib_mad.h> +#include <rdma/ib_sa.h> #include <linux/mlx4/device.h> #include <linux/mlx4/doorbell.h> +#define MLX4_IB_DRV_NAME "mlx4_ib" + +#ifdef pr_fmt +#undef pr_fmt +#endif +#define pr_fmt(fmt) "<" MLX4_IB_DRV_NAME "> %s: " fmt, __func__ + +#define mlx4_ib_warn(ibdev, format, arg...) \ + dev_warn((ibdev)->dma_device, MLX4_IB_DRV_NAME ": " format, ## arg) + +enum { + MLX4_IB_SQ_MIN_WQE_SHIFT = 6, + MLX4_IB_MAX_HEADROOM = 2048 +}; + +#define MLX4_IB_SQ_HEADROOM(shift) ((MLX4_IB_MAX_HEADROOM >> (shift)) + 1) +#define MLX4_IB_SQ_MAX_SPARE (MLX4_IB_SQ_HEADROOM(MLX4_IB_SQ_MIN_WQE_SHIFT)) + +/*module param to indicate if SM assigns the alias_GUID*/ +extern int mlx4_ib_sm_guid_assign; + +#define MLX4_IB_UC_STEER_QPN_ALIGN 1 +#define MLX4_IB_UC_MAX_NUM_QPS 256 struct mlx4_ib_ucontext { struct ib_ucontext ibucontext; struct mlx4_uar uar; @@ -55,9 +82,17 @@ struct mlx4_ib_pd { u32 pdn; }; +struct mlx4_ib_xrcd { + struct ib_xrcd ibxrcd; + u32 xrcdn; + struct ib_pd *pd; + struct ib_cq *cq; +}; + struct mlx4_ib_cq_buf { struct mlx4_buf buf; struct mlx4_mtt mtt; + int entry_size; }; struct mlx4_ib_cq_resize { @@ -83,11 +118,28 @@ struct mlx4_ib_mr { struct ib_umem *umem; }; +struct mlx4_ib_mw { + struct ib_mw ibmw; + struct mlx4_mw mmw; +}; + +struct mlx4_ib_fast_reg_page_list { + struct ib_fast_reg_page_list ibfrpl; + __be64 *mapped_page_list; + dma_addr_t map; +}; + struct mlx4_ib_fmr { struct ib_fmr ibfmr; struct mlx4_fmr mfmr; }; +struct mlx4_ib_flow { + struct ib_flow ibflow; + /* translating DMFS verbs sniffer rule to FW API requires two reg IDs */ + u64 reg_id[2]; +}; + struct mlx4_ib_wq { u64 *wrid; spinlock_t lock; @@ -101,8 +153,109 @@ struct mlx4_ib_wq { }; enum mlx4_ib_qp_flags { - MLX4_IB_QP_LSO = 1 << 0, - MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 1, + MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO, + MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK, + MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP, + MLX4_IB_QP_CREATE_USE_GFP_NOIO = IB_QP_CREATE_USE_GFP_NOIO, + MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30, + MLX4_IB_SRIOV_SQP = 1 << 31, +}; + +struct mlx4_ib_gid_entry { + struct list_head list; + union ib_gid gid; + int added; + u8 port; +}; + +enum mlx4_ib_qp_type { + /* + * IB_QPT_SMI and IB_QPT_GSI have to be the first two entries + * here (and in that order) since the MAD layer uses them as + * indices into a 2-entry table. + */ + MLX4_IB_QPT_SMI = IB_QPT_SMI, + MLX4_IB_QPT_GSI = IB_QPT_GSI, + + MLX4_IB_QPT_RC = IB_QPT_RC, + MLX4_IB_QPT_UC = IB_QPT_UC, + MLX4_IB_QPT_UD = IB_QPT_UD, + MLX4_IB_QPT_RAW_IPV6 = IB_QPT_RAW_IPV6, + MLX4_IB_QPT_RAW_ETHERTYPE = IB_QPT_RAW_ETHERTYPE, + MLX4_IB_QPT_RAW_PACKET = IB_QPT_RAW_PACKET, + MLX4_IB_QPT_XRC_INI = IB_QPT_XRC_INI, + MLX4_IB_QPT_XRC_TGT = IB_QPT_XRC_TGT, + + MLX4_IB_QPT_PROXY_SMI_OWNER = 1 << 16, + MLX4_IB_QPT_PROXY_SMI = 1 << 17, + MLX4_IB_QPT_PROXY_GSI = 1 << 18, + MLX4_IB_QPT_TUN_SMI_OWNER = 1 << 19, + MLX4_IB_QPT_TUN_SMI = 1 << 20, + MLX4_IB_QPT_TUN_GSI = 1 << 21, +}; + +#define MLX4_IB_QPT_ANY_SRIOV (MLX4_IB_QPT_PROXY_SMI_OWNER | \ + MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER | \ + MLX4_IB_QPT_TUN_SMI | MLX4_IB_QPT_TUN_GSI) + +enum mlx4_ib_mad_ifc_flags { + MLX4_MAD_IFC_IGNORE_MKEY = 1, + MLX4_MAD_IFC_IGNORE_BKEY = 2, + MLX4_MAD_IFC_IGNORE_KEYS = (MLX4_MAD_IFC_IGNORE_MKEY | + MLX4_MAD_IFC_IGNORE_BKEY), + MLX4_MAD_IFC_NET_VIEW = 4, +}; + +enum { + MLX4_NUM_TUNNEL_BUFS = 256, +}; + +struct mlx4_ib_tunnel_header { + struct mlx4_av av; + __be32 remote_qpn; + __be32 qkey; + __be16 vlan; + u8 mac[6]; + __be16 pkey_index; + u8 reserved[6]; +}; + +struct mlx4_ib_buf { + void *addr; + dma_addr_t map; +}; + +struct mlx4_rcv_tunnel_hdr { + __be32 flags_src_qp; /* flags[6:5] is defined for VLANs: + * 0x0 - no vlan was in the packet + * 0x01 - C-VLAN was in the packet */ + u8 g_ml_path; /* gid bit stands for ipv6/4 header in RoCE */ + u8 reserved; + __be16 pkey_index; + __be16 sl_vid; + __be16 slid_mac_47_32; + __be32 mac_31_0; +}; + +struct mlx4_ib_proxy_sqp_hdr { + struct ib_grh grh; + struct mlx4_rcv_tunnel_hdr tun; +} __packed; + +struct mlx4_roce_smac_vlan_info { + u64 smac; + int smac_index; + int smac_port; + u64 candidate_smac; + int candidate_smac_index; + int candidate_smac_port; + u16 vid; + int vlan_index; + int vlan_port; + u16 candidate_vid; + int candidate_vlan_index; + int candidate_vlan_port; + int update_vid; }; struct mlx4_ib_qp { @@ -120,10 +273,12 @@ struct mlx4_ib_qp { int sq_spare_wqes; struct mlx4_ib_wq sq; + enum mlx4_ib_qp_type mlx4_ib_qp_type; struct ib_umem *umem; struct mlx4_mtt mtt; int buf_size; struct mutex mutex; + u16 xrcdn; u32 flags; u8 port; u8 alt_port; @@ -131,6 +286,13 @@ struct mlx4_ib_qp { u8 resp_depth; u8 sq_no_prefetch; u8 state; + int mlx_type; + struct list_head gid_list; + struct list_head steering_rules; + struct mlx4_ib_buf *sqp_proxy_rcv; + struct mlx4_roce_smac_vlan_info pri; + struct mlx4_roce_smac_vlan_info alt; + u64 reg_id; }; struct mlx4_ib_srq { @@ -150,12 +312,191 @@ struct mlx4_ib_srq { struct mlx4_ib_ah { struct ib_ah ibah; - struct mlx4_av av; + union mlx4_ext_av av; +}; + +/****************************************/ +/* alias guid support */ +/****************************************/ +#define NUM_PORT_ALIAS_GUID 2 +#define NUM_ALIAS_GUID_IN_REC 8 +#define NUM_ALIAS_GUID_REC_IN_PORT 16 +#define GUID_REC_SIZE 8 +#define NUM_ALIAS_GUID_PER_PORT 128 +#define MLX4_NOT_SET_GUID (0x00LL) +#define MLX4_GUID_FOR_DELETE_VAL (~(0x00LL)) + +enum mlx4_guid_alias_rec_status { + MLX4_GUID_INFO_STATUS_IDLE, + MLX4_GUID_INFO_STATUS_SET, + MLX4_GUID_INFO_STATUS_PENDING, +}; + +enum mlx4_guid_alias_rec_ownership { + MLX4_GUID_DRIVER_ASSIGN, + MLX4_GUID_SYSADMIN_ASSIGN, + MLX4_GUID_NONE_ASSIGN, /*init state of each record*/ +}; + +enum mlx4_guid_alias_rec_method { + MLX4_GUID_INFO_RECORD_SET = IB_MGMT_METHOD_SET, + MLX4_GUID_INFO_RECORD_DELETE = IB_SA_METHOD_DELETE, +}; + +struct mlx4_sriov_alias_guid_info_rec_det { + u8 all_recs[GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC]; + ib_sa_comp_mask guid_indexes; /*indicates what from the 8 records are valid*/ + enum mlx4_guid_alias_rec_status status; /*indicates the administraively status of the record.*/ + u8 method; /*set or delete*/ + enum mlx4_guid_alias_rec_ownership ownership; /*indicates who assign that alias_guid record*/ +}; + +struct mlx4_sriov_alias_guid_port_rec_det { + struct mlx4_sriov_alias_guid_info_rec_det all_rec_per_port[NUM_ALIAS_GUID_REC_IN_PORT]; + struct workqueue_struct *wq; + struct delayed_work alias_guid_work; + u8 port; + struct mlx4_sriov_alias_guid *parent; + struct list_head cb_list; +}; + +struct mlx4_sriov_alias_guid { + struct mlx4_sriov_alias_guid_port_rec_det ports_guid[MLX4_MAX_PORTS]; + spinlock_t ag_work_lock; + struct ib_sa_client *sa_client; +}; + +struct mlx4_ib_demux_work { + struct work_struct work; + struct mlx4_ib_dev *dev; + int slave; + int do_init; + u8 port; + +}; + +struct mlx4_ib_tun_tx_buf { + struct mlx4_ib_buf buf; + struct ib_ah *ah; +}; + +struct mlx4_ib_demux_pv_qp { + struct ib_qp *qp; + enum ib_qp_type proxy_qpt; + struct mlx4_ib_buf *ring; + struct mlx4_ib_tun_tx_buf *tx_ring; + spinlock_t tx_lock; + unsigned tx_ix_head; + unsigned tx_ix_tail; +}; + +enum mlx4_ib_demux_pv_state { + DEMUX_PV_STATE_DOWN, + DEMUX_PV_STATE_STARTING, + DEMUX_PV_STATE_ACTIVE, + DEMUX_PV_STATE_DOWNING, +}; + +struct mlx4_ib_demux_pv_ctx { + int port; + int slave; + enum mlx4_ib_demux_pv_state state; + int has_smi; + struct ib_device *ib_dev; + struct ib_cq *cq; + struct ib_pd *pd; + struct ib_mr *mr; + struct work_struct work; + struct workqueue_struct *wq; + struct mlx4_ib_demux_pv_qp qp[2]; +}; + +struct mlx4_ib_demux_ctx { + struct ib_device *ib_dev; + int port; + struct workqueue_struct *wq; + struct workqueue_struct *ud_wq; + spinlock_t ud_lock; + __be64 subnet_prefix; + __be64 guid_cache[128]; + struct mlx4_ib_dev *dev; + /* the following lock protects both mcg_table and mcg_mgid0_list */ + struct mutex mcg_table_lock; + struct rb_root mcg_table; + struct list_head mcg_mgid0_list; + struct workqueue_struct *mcg_wq; + struct mlx4_ib_demux_pv_ctx **tun; + atomic_t tid; + int flushing; /* flushing the work queue */ +}; + +struct mlx4_ib_sriov { + struct mlx4_ib_demux_ctx demux[MLX4_MAX_PORTS]; + struct mlx4_ib_demux_pv_ctx *sqps[MLX4_MAX_PORTS]; + /* when using this spinlock you should use "irq" because + * it may be called from interrupt context.*/ + spinlock_t going_down_lock; + int is_going_down; + + struct mlx4_sriov_alias_guid alias_guid; + + /* CM paravirtualization fields */ + struct list_head cm_list; + spinlock_t id_map_lock; + struct rb_root sl_id_map; + struct idr pv_id_table; +}; + +struct mlx4_ib_iboe { + spinlock_t lock; + struct net_device *netdevs[MLX4_MAX_PORTS]; + struct net_device *masters[MLX4_MAX_PORTS]; + struct notifier_block nb; + struct notifier_block nb_inet; + struct notifier_block nb_inet6; + union ib_gid gid_table[MLX4_MAX_PORTS][128]; +}; + +struct pkey_mgt { + u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS]; + u16 phys_pkey_cache[MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS]; + struct list_head pkey_port_list[MLX4_MFUNC_MAX]; + struct kobject *device_parent[MLX4_MFUNC_MAX]; +}; + +struct mlx4_ib_iov_sysfs_attr { + void *ctx; + struct kobject *kobj; + unsigned long data; + u32 entry_num; + char name[15]; + struct device_attribute dentry; + struct device *dev; +}; + +struct mlx4_ib_iov_sysfs_attr_ar { + struct mlx4_ib_iov_sysfs_attr dentries[3 * NUM_ALIAS_GUID_PER_PORT + 1]; +}; + +struct mlx4_ib_iov_port { + char name[100]; + u8 num; + struct mlx4_ib_dev *dev; + struct list_head list; + struct mlx4_ib_iov_sysfs_attr_ar *dentr_ar; + struct ib_port_attr attr; + struct kobject *cur_port; + struct kobject *admin_alias_parent; + struct kobject *gids_parent; + struct kobject *pkeys_parent; + struct kobject *mcgs_parent; + struct mlx4_ib_iov_sysfs_attr mcg_dentry; }; struct mlx4_ib_dev { struct ib_device ib_dev; struct mlx4_dev *dev; + int num_ports; void __iomem *uar_map; struct mlx4_uar priv_uar; @@ -165,8 +506,39 @@ struct mlx4_ib_dev { struct ib_mad_agent *send_agent[MLX4_MAX_PORTS][2]; struct ib_ah *sm_ah[MLX4_MAX_PORTS]; spinlock_t sm_lock; + struct mlx4_ib_sriov sriov; struct mutex cap_mask_mutex; + bool ib_active; + struct mlx4_ib_iboe iboe; + int counters[MLX4_MAX_PORTS]; + int *eq_table; + int eq_added; + struct kobject *iov_parent; + struct kobject *ports_parent; + struct kobject *dev_ports_parent[MLX4_MFUNC_MAX]; + struct mlx4_ib_iov_port iov_ports[MLX4_MAX_PORTS]; + struct pkey_mgt pkeys; + unsigned long *ib_uc_qpns_bitmap; + int steer_qpn_count; + int steer_qpn_base; + int steering_support; + struct mlx4_ib_qp *qp1_proxy[MLX4_MAX_PORTS]; + /* lock when destroying qp1_proxy and getting netdev events */ + struct mutex qp1_proxy_lock[MLX4_MAX_PORTS]; +}; + +struct ib_event_work { + struct work_struct work; + struct mlx4_ib_dev *ib_dev; + struct mlx4_eqe ib_eqe; +}; + +struct mlx4_ib_qp_tunnel_init_attr { + struct ib_qp_init_attr init_attr; + int slave; + enum ib_qp_type proxy_qp_type; + u8 port; }; static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev) @@ -184,6 +556,11 @@ static inline struct mlx4_ib_pd *to_mpd(struct ib_pd *ibpd) return container_of(ibpd, struct mlx4_ib_pd, ibpd); } +static inline struct mlx4_ib_xrcd *to_mxrcd(struct ib_xrcd *ibxrcd) +{ + return container_of(ibxrcd, struct mlx4_ib_xrcd, ibxrcd); +} + static inline struct mlx4_ib_cq *to_mcq(struct ib_cq *ibcq) { return container_of(ibcq, struct mlx4_ib_cq, ibcq); @@ -199,10 +576,26 @@ static inline struct mlx4_ib_mr *to_mmr(struct ib_mr *ibmr) return container_of(ibmr, struct mlx4_ib_mr, ibmr); } +static inline struct mlx4_ib_mw *to_mmw(struct ib_mw *ibmw) +{ + return container_of(ibmw, struct mlx4_ib_mw, ibmw); +} + +static inline struct mlx4_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl) +{ + return container_of(ibfrpl, struct mlx4_ib_fast_reg_page_list, ibfrpl); +} + static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr) { return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr); } + +static inline struct mlx4_ib_flow *to_mflow(struct ib_flow *ibflow) +{ + return container_of(ibflow, struct mlx4_ib_flow, ibflow); +} + static inline struct mlx4_ib_qp *to_mqp(struct ib_qp *ibqp) { return container_of(ibqp, struct mlx4_ib_qp, ibqp); @@ -228,6 +621,9 @@ static inline struct mlx4_ib_ah *to_mah(struct ib_ah *ibah) return container_of(ibah, struct mlx4_ib_ah, ibah); } +int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev); +void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev); + int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, struct mlx4_db *db); void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db); @@ -239,6 +635,15 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); int mlx4_ib_dereg_mr(struct ib_mr *mr); +struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); +int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw, + struct ib_mw_bind *mw_bind); +int mlx4_ib_dealloc_mw(struct ib_mw *mw); +struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, + int max_page_list_len); +struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, + int page_list_len); +void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list); int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata); @@ -279,7 +684,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr); -int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey, +int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags, int port, struct ib_wc *in_wc, struct ib_grh *in_grh, void *in_mad, void *response_mad); int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, @@ -294,10 +699,94 @@ int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int npages, u64 iova); int mlx4_ib_unmap_fmr(struct list_head *fmr_list); int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr); +int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props, int netw_view); +int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey, int netw_view); + +int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid, int netw_view); -static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah) +static inline bool mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah) { - return !!(ah->av.g_slid & 0x80); + u8 port = be32_to_cpu(ah->av.ib.port_pd) >> 24 & 3; + + if (rdma_port_get_link_layer(ah->ibah.device, port) == IB_LINK_LAYER_ETHERNET) + return true; + + return !!(ah->av.ib.g_slid & 0x80); } +int mlx4_ib_mcg_port_init(struct mlx4_ib_demux_ctx *ctx); +void mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq); +void clean_vf_mcast(struct mlx4_ib_demux_ctx *ctx, int slave); +int mlx4_ib_mcg_init(void); +void mlx4_ib_mcg_destroy(void); + +int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid); + +int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port, int slave, + struct ib_sa_mad *sa_mad); +int mlx4_ib_mcg_demux_handler(struct ib_device *ibdev, int port, int slave, + struct ib_sa_mad *mad); + +int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, + union ib_gid *gid); + +void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num, + enum ib_event_type type); + +void mlx4_ib_tunnels_update_work(struct work_struct *work); + +int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, + enum ib_qp_type qpt, struct ib_wc *wc, + struct ib_grh *grh, struct ib_mad *mad); + +int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, + enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn, + u32 qkey, struct ib_ah_attr *attr, u8 *s_mac, + struct ib_mad *mad); + +__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx); + +int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, + struct ib_mad *mad); + +int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id, + struct ib_mad *mad); + +void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev); +void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave_id); + +/* alias guid support */ +void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port); +int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev); +void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev); +void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port); + +void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev, + int block_num, + u8 port_num, u8 *p_data); + +void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, + int block_num, u8 port_num, + u8 *p_data); + +int add_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num, + struct attribute *attr); +void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num, + struct attribute *attr); +ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index); + +int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *device) ; + +void mlx4_ib_device_unregister_sysfs(struct mlx4_ib_dev *device); + +__be64 mlx4_ib_gen_node_guid(void); + +int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn); +void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count); +int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, + int is_attach); + #endif /* MLX4_IB_H */ diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 68e92485fc7..cb2a8727f3f 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -30,6 +31,8 @@ * SOFTWARE. */ +#include <linux/slab.h> + #include "mlx4_ib.h" static u32 convert_access(int acc) @@ -38,9 +41,19 @@ static u32 convert_access(int acc) (acc & IB_ACCESS_REMOTE_WRITE ? MLX4_PERM_REMOTE_WRITE : 0) | (acc & IB_ACCESS_REMOTE_READ ? MLX4_PERM_REMOTE_READ : 0) | (acc & IB_ACCESS_LOCAL_WRITE ? MLX4_PERM_LOCAL_WRITE : 0) | + (acc & IB_ACCESS_MW_BIND ? MLX4_PERM_BIND_MW : 0) | MLX4_PERM_LOCAL_READ; } +static enum mlx4_mw_type to_mlx4_type(enum ib_mw_type type) +{ + switch (type) { + case IB_MW_TYPE_1: return MLX4_MW_TYPE_1; + case IB_MW_TYPE_2: return MLX4_MW_TYPE_2; + default: return -1; + } +} + struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc) { struct mlx4_ib_mr *mr; @@ -65,7 +78,7 @@ struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc) return &mr->ibmr; err_mr: - mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); + (void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); err_free: kfree(mr); @@ -77,11 +90,11 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, struct ib_umem *umem) { u64 *pages; - struct ib_umem_chunk *chunk; - int i, j, k; + int i, k, entry; int n; int len; int err = 0; + struct scatterlist *sg; pages = (u64 *) __get_free_page(GFP_KERNEL); if (!pages) @@ -89,26 +102,25 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, i = n = 0; - list_for_each_entry(chunk, &umem->chunk_list, list) - for (j = 0; j < chunk->nmap; ++j) { - len = sg_dma_len(&chunk->page_list[j]) >> mtt->page_shift; - for (k = 0; k < len; ++k) { - pages[i++] = sg_dma_address(&chunk->page_list[j]) + - umem->page_size * k; - /* - * Be friendly to mlx4_write_mtt() and - * pass it chunks of appropriate size. - */ - if (i == PAGE_SIZE / sizeof (u64)) { - err = mlx4_write_mtt(dev->dev, mtt, n, - i, pages); - if (err) - goto out; - n += i; - i = 0; - } + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + len = sg_dma_len(sg) >> mtt->page_shift; + for (k = 0; k < len; ++k) { + pages[i++] = sg_dma_address(sg) + + umem->page_size * k; + /* + * Be friendly to mlx4_write_mtt() and + * pass it chunks of appropriate size. + */ + if (i == PAGE_SIZE / sizeof (u64)) { + err = mlx4_write_mtt(dev->dev, mtt, n, + i, pages); + if (err) + goto out; + n += i; + i = 0; } } + } if (i) err = mlx4_write_mtt(dev->dev, mtt, n, i, pages); @@ -160,7 +172,7 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return &mr->ibmr; err_mr: - mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); + (void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); err_umem: ib_umem_release(mr->umem); @@ -174,8 +186,11 @@ err_free: int mlx4_ib_dereg_mr(struct ib_mr *ibmr) { struct mlx4_ib_mr *mr = to_mmr(ibmr); + int ret; - mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr); + ret = mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr); + if (ret) + return ret; if (mr->umem) ib_umem_release(mr->umem); kfree(mr); @@ -183,6 +198,149 @@ int mlx4_ib_dereg_mr(struct ib_mr *ibmr) return 0; } +struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) +{ + struct mlx4_ib_dev *dev = to_mdev(pd->device); + struct mlx4_ib_mw *mw; + int err; + + mw = kmalloc(sizeof(*mw), GFP_KERNEL); + if (!mw) + return ERR_PTR(-ENOMEM); + + err = mlx4_mw_alloc(dev->dev, to_mpd(pd)->pdn, + to_mlx4_type(type), &mw->mmw); + if (err) + goto err_free; + + err = mlx4_mw_enable(dev->dev, &mw->mmw); + if (err) + goto err_mw; + + mw->ibmw.rkey = mw->mmw.key; + + return &mw->ibmw; + +err_mw: + mlx4_mw_free(dev->dev, &mw->mmw); + +err_free: + kfree(mw); + + return ERR_PTR(err); +} + +int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw, + struct ib_mw_bind *mw_bind) +{ + struct ib_send_wr wr; + struct ib_send_wr *bad_wr; + int ret; + + memset(&wr, 0, sizeof(wr)); + wr.opcode = IB_WR_BIND_MW; + wr.wr_id = mw_bind->wr_id; + wr.send_flags = mw_bind->send_flags; + wr.wr.bind_mw.mw = mw; + wr.wr.bind_mw.bind_info = mw_bind->bind_info; + wr.wr.bind_mw.rkey = ib_inc_rkey(mw->rkey); + + ret = mlx4_ib_post_send(qp, &wr, &bad_wr); + if (!ret) + mw->rkey = wr.wr.bind_mw.rkey; + + return ret; +} + +int mlx4_ib_dealloc_mw(struct ib_mw *ibmw) +{ + struct mlx4_ib_mw *mw = to_mmw(ibmw); + + mlx4_mw_free(to_mdev(ibmw->device)->dev, &mw->mmw); + kfree(mw); + + return 0; +} + +struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, + int max_page_list_len) +{ + struct mlx4_ib_dev *dev = to_mdev(pd->device); + struct mlx4_ib_mr *mr; + int err; + + mr = kmalloc(sizeof *mr, GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, 0, 0, 0, + max_page_list_len, 0, &mr->mmr); + if (err) + goto err_free; + + err = mlx4_mr_enable(dev->dev, &mr->mmr); + if (err) + goto err_mr; + + mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; + mr->umem = NULL; + + return &mr->ibmr; + +err_mr: + (void) mlx4_mr_free(dev->dev, &mr->mmr); + +err_free: + kfree(mr); + return ERR_PTR(err); +} + +struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, + int page_list_len) +{ + struct mlx4_ib_dev *dev = to_mdev(ibdev); + struct mlx4_ib_fast_reg_page_list *mfrpl; + int size = page_list_len * sizeof (u64); + + if (page_list_len > MLX4_MAX_FAST_REG_PAGES) + return ERR_PTR(-EINVAL); + + mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL); + if (!mfrpl) + return ERR_PTR(-ENOMEM); + + mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL); + if (!mfrpl->ibfrpl.page_list) + goto err_free; + + mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->pdev->dev, + size, &mfrpl->map, + GFP_KERNEL); + if (!mfrpl->mapped_page_list) + goto err_free; + + WARN_ON(mfrpl->map & 0x3f); + + return &mfrpl->ibfrpl; + +err_free: + kfree(mfrpl->ibfrpl.page_list); + kfree(mfrpl); + return ERR_PTR(-ENOMEM); +} + +void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) +{ + struct mlx4_ib_dev *dev = to_mdev(page_list->device); + struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list); + int size = page_list->max_page_list_len * sizeof (u64); + + dma_free_coherent(&dev->dev->pdev->dev, size, mfrpl->mapped_page_list, + mfrpl->map); + kfree(mfrpl->ibfrpl.page_list); + kfree(mfrpl); +} + struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc, struct ib_fmr_attr *fmr_attr) { @@ -209,7 +367,7 @@ struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc, return &fmr->ibfmr; err_mr: - mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr); + (void) mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr); err_free: kfree(fmr); @@ -256,7 +414,7 @@ int mlx4_ib_unmap_fmr(struct list_head *fmr_list) err = mlx4_SYNC_TPT(mdev); if (err) - printk(KERN_WARNING "mlx4_ib: SYNC_TPT error %d when " + pr_warn("SYNC_TPT error %d when " "unmapping FMRs\n", err); return 0; diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 89eb6cbe592..67780452f0c 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -31,9 +32,13 @@ */ #include <linux/log2.h> +#include <linux/slab.h> +#include <linux/netdevice.h> #include <rdma/ib_cache.h> #include <rdma/ib_pack.h> +#include <rdma/ib_addr.h> +#include <rdma/ib_mad.h> #include <linux/mlx4/qp.h> @@ -46,14 +51,24 @@ enum { enum { MLX4_IB_DEFAULT_SCHED_QUEUE = 0x83, - MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f + MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f, + MLX4_IB_LINK_TYPE_IB = 0, + MLX4_IB_LINK_TYPE_ETH = 1 }; enum { /* - * Largest possible UD header: send with GRH and immediate data. + * Largest possible UD header: send with GRH and immediate + * data plus 18 bytes for an Ethernet header with VLAN/802.1Q + * tag. (LRH would only use 8 bytes, so Ethernet is the + * biggest case) */ - MLX4_IB_UD_HEADER_SIZE = 72 + MLX4_IB_UD_HEADER_SIZE = 82, + MLX4_IB_LSO_HEADER_SPARE = 128, +}; + +enum { + MLX4_IB_IBOE_ETHERTYPE = 0x8915 }; struct mlx4_ib_sqp { @@ -66,18 +81,45 @@ struct mlx4_ib_sqp { }; enum { - MLX4_IB_MIN_SQ_STRIDE = 6 + MLX4_IB_MIN_SQ_STRIDE = 6, + MLX4_IB_CACHE_LINE_SIZE = 64, +}; + +enum { + MLX4_RAW_QP_MTU = 7, + MLX4_RAW_QP_MSGMAX = 31, }; +#ifndef ETH_ALEN +#define ETH_ALEN 6 +#endif +static inline u64 mlx4_mac_to_u64(u8 *addr) +{ + u64 mac = 0; + int i; + + for (i = 0; i < ETH_ALEN; i++) { + mac <<= 8; + mac |= addr[i]; + } + return mac; +} + static const __be32 mlx4_ib_opcode[] = { - [IB_WR_SEND] = __constant_cpu_to_be32(MLX4_OPCODE_SEND), - [IB_WR_LSO] = __constant_cpu_to_be32(MLX4_OPCODE_LSO), - [IB_WR_SEND_WITH_IMM] = __constant_cpu_to_be32(MLX4_OPCODE_SEND_IMM), - [IB_WR_RDMA_WRITE] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE), - [IB_WR_RDMA_WRITE_WITH_IMM] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM), - [IB_WR_RDMA_READ] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_READ), - [IB_WR_ATOMIC_CMP_AND_SWP] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_CS), - [IB_WR_ATOMIC_FETCH_AND_ADD] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_FA), + [IB_WR_SEND] = cpu_to_be32(MLX4_OPCODE_SEND), + [IB_WR_LSO] = cpu_to_be32(MLX4_OPCODE_LSO), + [IB_WR_SEND_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_SEND_IMM), + [IB_WR_RDMA_WRITE] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE), + [IB_WR_RDMA_WRITE_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM), + [IB_WR_RDMA_READ] = cpu_to_be32(MLX4_OPCODE_RDMA_READ), + [IB_WR_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_ATOMIC_CS), + [IB_WR_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_ATOMIC_FA), + [IB_WR_SEND_WITH_INV] = cpu_to_be32(MLX4_OPCODE_SEND_INVAL), + [IB_WR_LOCAL_INV] = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL), + [IB_WR_FAST_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR), + [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS), + [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA), + [IB_WR_BIND_MW] = cpu_to_be32(MLX4_OPCODE_BIND_MW), }; static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) @@ -85,16 +127,62 @@ static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) return container_of(mqp, struct mlx4_ib_sqp, qp); } +static int is_tunnel_qp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) +{ + if (!mlx4_is_master(dev->dev)) + return 0; + + return qp->mqp.qpn >= dev->dev->phys_caps.base_tunnel_sqpn && + qp->mqp.qpn < dev->dev->phys_caps.base_tunnel_sqpn + + 8 * MLX4_MFUNC_MAX; +} + static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) { - return qp->mqp.qpn >= dev->dev->caps.sqp_start && - qp->mqp.qpn <= dev->dev->caps.sqp_start + 3; + int proxy_sqp = 0; + int real_sqp = 0; + int i; + /* PPF or Native -- real SQP */ + real_sqp = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) && + qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn && + qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 3); + if (real_sqp) + return 1; + /* VF or PF -- proxy SQP */ + if (mlx4_is_mfunc(dev->dev)) { + for (i = 0; i < dev->dev->caps.num_ports; i++) { + if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i] || + qp->mqp.qpn == dev->dev->caps.qp1_proxy[i]) { + proxy_sqp = 1; + break; + } + } + } + return proxy_sqp; } +/* used for INIT/CLOSE port logic */ static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) { - return qp->mqp.qpn >= dev->dev->caps.sqp_start && - qp->mqp.qpn <= dev->dev->caps.sqp_start + 1; + int proxy_qp0 = 0; + int real_qp0 = 0; + int i; + /* PPF or Native -- real QP0 */ + real_qp0 = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) && + qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn && + qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 1); + if (real_qp0) + return 1; + /* VF or PF -- proxy QP0 */ + if (mlx4_is_mfunc(dev->dev)) { + for (i = 0; i < dev->dev->caps.num_ports; i++) { + if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i]) { + proxy_qp0 = 1; + break; + } + } + } + return proxy_qp0; } static void *get_wqe(struct mlx4_ib_qp *qp, int offset) @@ -236,7 +324,7 @@ static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type) event.event = IB_EVENT_QP_ACCESS_ERR; break; default: - printk(KERN_WARNING "mlx4_ib: Unexpected event type %d " + pr_warn("Unexpected event type %d " "on QP %06x\n", type, qp->qpn); return; } @@ -245,7 +333,7 @@ static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type) } } -static int send_wqe_overhead(enum ib_qp_type type, u32 flags) +static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags) { /* * UD WQEs must have a datagram segment. @@ -254,19 +342,29 @@ static int send_wqe_overhead(enum ib_qp_type type, u32 flags) * header and space for the ICRC). */ switch (type) { - case IB_QPT_UD: + case MLX4_IB_QPT_UD: return sizeof (struct mlx4_wqe_ctrl_seg) + sizeof (struct mlx4_wqe_datagram_seg) + - ((flags & MLX4_IB_QP_LSO) ? 64 : 0); - case IB_QPT_UC: + ((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0); + case MLX4_IB_QPT_PROXY_SMI_OWNER: + case MLX4_IB_QPT_PROXY_SMI: + case MLX4_IB_QPT_PROXY_GSI: + return sizeof (struct mlx4_wqe_ctrl_seg) + + sizeof (struct mlx4_wqe_datagram_seg) + 64; + case MLX4_IB_QPT_TUN_SMI_OWNER: + case MLX4_IB_QPT_TUN_GSI: + return sizeof (struct mlx4_wqe_ctrl_seg) + + sizeof (struct mlx4_wqe_datagram_seg); + + case MLX4_IB_QPT_UC: return sizeof (struct mlx4_wqe_ctrl_seg) + sizeof (struct mlx4_wqe_raddr_seg); - case IB_QPT_RC: + case MLX4_IB_QPT_RC: return sizeof (struct mlx4_wqe_ctrl_seg) + sizeof (struct mlx4_wqe_atomic_seg) + sizeof (struct mlx4_wqe_raddr_seg); - case IB_QPT_SMI: - case IB_QPT_GSI: + case MLX4_IB_QPT_SMI: + case MLX4_IB_QPT_GSI: return sizeof (struct mlx4_wqe_ctrl_seg) + ALIGN(MLX4_IB_UD_HEADER_SIZE + DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE, @@ -282,15 +380,14 @@ static int send_wqe_overhead(enum ib_qp_type type, u32 flags) } static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, - int is_user, int has_srq, struct mlx4_ib_qp *qp) + int is_user, int has_rq, struct mlx4_ib_qp *qp) { /* Sanity check RQ size before proceeding */ - if (cap->max_recv_wr > dev->dev->caps.max_wqes || - cap->max_recv_sge > dev->dev->caps.max_rq_sg) + if (cap->max_recv_wr > dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE || + cap->max_recv_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg)) return -EINVAL; - if (has_srq) { - /* QPs attached to an SRQ should have no RQ */ + if (!has_rq) { if (cap->max_recv_wr) return -EINVAL; @@ -305,20 +402,29 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg)); } - cap->max_recv_wr = qp->rq.max_post = qp->rq.wqe_cnt; - cap->max_recv_sge = qp->rq.max_gs; + /* leave userspace return values as they were, so as not to break ABI */ + if (is_user) { + cap->max_recv_wr = qp->rq.max_post = qp->rq.wqe_cnt; + cap->max_recv_sge = qp->rq.max_gs; + } else { + cap->max_recv_wr = qp->rq.max_post = + min(dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE, qp->rq.wqe_cnt); + cap->max_recv_sge = min(qp->rq.max_gs, + min(dev->dev->caps.max_sq_sg, + dev->dev->caps.max_rq_sg)); + } return 0; } static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, - enum ib_qp_type type, struct mlx4_ib_qp *qp) + enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp) { int s; /* Sanity check SQ size before proceeding */ - if (cap->max_send_wr > dev->dev->caps.max_wqes || - cap->max_send_sge > dev->dev->caps.max_sq_sg || + if (cap->max_send_wr > (dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE) || + cap->max_send_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg) || cap->max_inline_data + send_wqe_overhead(type, qp->flags) + sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz) return -EINVAL; @@ -327,7 +433,8 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, * For MLX transport we need 2 extra S/G entries: * one for the header and one for the checksum at the end */ - if ((type == IB_QPT_SMI || type == IB_QPT_GSI) && + if ((type == MLX4_IB_QPT_SMI || type == MLX4_IB_QPT_GSI || + type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) && cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg) return -EINVAL; @@ -348,7 +455,7 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, * anymore, so we do this only if selective signaling is off. * * Further, on 32-bit platforms, we can't use vmap() to make - * the QP buffer virtually contigious. Thus we have to use + * the QP buffer virtually contiguous. Thus we have to use * constant-sized WRs to make sure a WR is always fully within * a single page-sized chunk. * @@ -371,7 +478,9 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, */ if (dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC && qp->sq_signal_bits && BITS_PER_LONG == 64 && - type != IB_QPT_SMI && type != IB_QPT_GSI) + type != MLX4_IB_QPT_SMI && type != MLX4_IB_QPT_GSI && + !(type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI | + MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) qp->sq.wqe_shift = ilog2(64); else qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s)); @@ -443,21 +552,157 @@ static int set_user_sq_size(struct mlx4_ib_dev *dev, return 0; } +static int alloc_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp) +{ + int i; + + qp->sqp_proxy_rcv = + kmalloc(sizeof (struct mlx4_ib_buf) * qp->rq.wqe_cnt, + GFP_KERNEL); + if (!qp->sqp_proxy_rcv) + return -ENOMEM; + for (i = 0; i < qp->rq.wqe_cnt; i++) { + qp->sqp_proxy_rcv[i].addr = + kmalloc(sizeof (struct mlx4_ib_proxy_sqp_hdr), + GFP_KERNEL); + if (!qp->sqp_proxy_rcv[i].addr) + goto err; + qp->sqp_proxy_rcv[i].map = + ib_dma_map_single(dev, qp->sqp_proxy_rcv[i].addr, + sizeof (struct mlx4_ib_proxy_sqp_hdr), + DMA_FROM_DEVICE); + } + return 0; + +err: + while (i > 0) { + --i; + ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map, + sizeof (struct mlx4_ib_proxy_sqp_hdr), + DMA_FROM_DEVICE); + kfree(qp->sqp_proxy_rcv[i].addr); + } + kfree(qp->sqp_proxy_rcv); + qp->sqp_proxy_rcv = NULL; + return -ENOMEM; +} + +static void free_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp) +{ + int i; + + for (i = 0; i < qp->rq.wqe_cnt; i++) { + ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map, + sizeof (struct mlx4_ib_proxy_sqp_hdr), + DMA_FROM_DEVICE); + kfree(qp->sqp_proxy_rcv[i].addr); + } + kfree(qp->sqp_proxy_rcv); +} + +static int qp_has_rq(struct ib_qp_init_attr *attr) +{ + if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT) + return 0; + + return !attr->srq; +} + +static int qp0_enabled_vf(struct mlx4_dev *dev, int qpn) +{ + int i; + for (i = 0; i < dev->caps.num_ports; i++) { + if (qpn == dev->caps.qp0_proxy[i]) + return !!dev->caps.qp0_qkey[i]; + } + return 0; +} + static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, struct ib_qp_init_attr *init_attr, - struct ib_udata *udata, int sqpn, struct mlx4_ib_qp *qp) + struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp, + gfp_t gfp) { + int qpn; int err; + struct mlx4_ib_sqp *sqp; + struct mlx4_ib_qp *qp; + enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type; + + /* When tunneling special qps, we use a plain UD qp */ + if (sqpn) { + if (mlx4_is_mfunc(dev->dev) && + (!mlx4_is_master(dev->dev) || + !(init_attr->create_flags & MLX4_IB_SRIOV_SQP))) { + if (init_attr->qp_type == IB_QPT_GSI) + qp_type = MLX4_IB_QPT_PROXY_GSI; + else { + if (mlx4_is_master(dev->dev) || + qp0_enabled_vf(dev->dev, sqpn)) + qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER; + else + qp_type = MLX4_IB_QPT_PROXY_SMI; + } + } + qpn = sqpn; + /* add extra sg entry for tunneling */ + init_attr->cap.max_recv_sge++; + } else if (init_attr->create_flags & MLX4_IB_SRIOV_TUNNEL_QP) { + struct mlx4_ib_qp_tunnel_init_attr *tnl_init = + container_of(init_attr, + struct mlx4_ib_qp_tunnel_init_attr, init_attr); + if ((tnl_init->proxy_qp_type != IB_QPT_SMI && + tnl_init->proxy_qp_type != IB_QPT_GSI) || + !mlx4_is_master(dev->dev)) + return -EINVAL; + if (tnl_init->proxy_qp_type == IB_QPT_GSI) + qp_type = MLX4_IB_QPT_TUN_GSI; + else if (tnl_init->slave == mlx4_master_func_num(dev->dev) || + mlx4_vf_smi_enabled(dev->dev, tnl_init->slave, + tnl_init->port)) + qp_type = MLX4_IB_QPT_TUN_SMI_OWNER; + else + qp_type = MLX4_IB_QPT_TUN_SMI; + /* we are definitely in the PPF here, since we are creating + * tunnel QPs. base_tunnel_sqpn is therefore valid. */ + qpn = dev->dev->phys_caps.base_tunnel_sqpn + 8 * tnl_init->slave + + tnl_init->proxy_qp_type * 2 + tnl_init->port - 1; + sqpn = qpn; + } + + if (!*caller_qp) { + if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI || + (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER | + MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) { + sqp = kzalloc(sizeof (struct mlx4_ib_sqp), gfp); + if (!sqp) + return -ENOMEM; + qp = &sqp->qp; + qp->pri.vid = 0xFFFF; + qp->alt.vid = 0xFFFF; + } else { + qp = kzalloc(sizeof (struct mlx4_ib_qp), gfp); + if (!qp) + return -ENOMEM; + qp->pri.vid = 0xFFFF; + qp->alt.vid = 0xFFFF; + } + } else + qp = *caller_qp; + + qp->mlx4_ib_qp_type = qp_type; mutex_init(&qp->mutex); spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); + INIT_LIST_HEAD(&qp->gid_list); + INIT_LIST_HEAD(&qp->steering_rules); qp->state = IB_QPS_RESET; if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); - err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, !!init_attr->srq, qp); + err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, qp_has_rq(init_attr), qp); if (err) goto err; @@ -491,7 +736,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (err) goto err_mtt; - if (!init_attr->srq) { + if (qp_has_rq(init_attr)) { err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context), ucmd.db_addr, &qp->db); if (err) @@ -506,19 +751,27 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) qp->flags |= MLX4_IB_QP_LSO; - err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp); + if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) { + if (dev->steering_support == + MLX4_STEERING_MODE_DEVICE_MANAGED) + qp->flags |= MLX4_IB_QP_NETIF; + else + goto err; + } + + err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp); if (err) goto err; - if (!init_attr->srq) { - err = mlx4_db_alloc(dev->dev, &qp->db, 0); + if (qp_has_rq(init_attr)) { + err = mlx4_db_alloc(dev->dev, &qp->db, 0, gfp); if (err) goto err; *qp->db.db = 0; } - if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) { + if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf, gfp)) { err = -ENOMEM; goto err_db; } @@ -528,22 +781,47 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (err) goto err_buf; - err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf); + err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf, gfp); if (err) goto err_mtt; - qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), GFP_KERNEL); - qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), GFP_KERNEL); - + qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof (u64), gfp); + qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof (u64), gfp); if (!qp->sq.wrid || !qp->rq.wrid) { err = -ENOMEM; goto err_wrid; } } - err = mlx4_qp_alloc(dev->dev, sqpn, &qp->mqp); + if (sqpn) { + if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | + MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) { + if (alloc_proxy_bufs(pd->device, qp)) { + err = -ENOMEM; + goto err_wrid; + } + } + } else { + /* Raw packet QPNs must be aligned to 8 bits. If not, the WQE + * BlueFlame setup flow wrongly causes VLAN insertion. */ + if (init_attr->qp_type == IB_QPT_RAW_PACKET) + err = mlx4_qp_reserve_range(dev->dev, 1, 1 << 8, &qpn); + else + if (qp->flags & MLX4_IB_QP_NETIF) + err = mlx4_ib_steer_qp_alloc(dev, 1, &qpn); + else + err = mlx4_qp_reserve_range(dev->dev, 1, 1, + &qpn); + if (err) + goto err_proxy; + } + + err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp, gfp); if (err) - goto err_wrid; + goto err_qpn; + + if (init_attr->qp_type == IB_QPT_XRC_TGT) + qp->mqp.qpn |= (1 << 23); /* * Hardware wants QPN written in big-endian order (after @@ -553,14 +831,24 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, qp->doorbell_qpn = swab32(qp->mqp.qpn << 8); qp->mqp.event = mlx4_ib_qp_event; - + if (!*caller_qp) + *caller_qp = qp; return 0; +err_qpn: + if (!sqpn) { + if (qp->flags & MLX4_IB_QP_NETIF) + mlx4_ib_steer_qp_free(dev, qpn, 1); + else + mlx4_qp_release_range(dev->dev, qpn, 1); + } +err_proxy: + if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI) + free_proxy_bufs(pd->device, qp); err_wrid: if (pd->uobject) { - if (!init_attr->srq) - mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), - &qp->db); + if (qp_has_rq(init_attr)) + mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db); } else { kfree(qp->sq.wrid); kfree(qp->rq.wrid); @@ -576,10 +864,12 @@ err_buf: mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); err_db: - if (!pd->uobject && !init_attr->srq) + if (!pd->uobject && qp_has_rq(init_attr)) mlx4_db_free(dev->dev, &qp->db); err: + if (!*caller_qp) + kfree(qp); return err; } @@ -598,10 +888,12 @@ static enum mlx4_qp_state to_mlx4_state(enum ib_qp_state state) } static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq) + __acquires(&send_cq->lock) __acquires(&recv_cq->lock) { - if (send_cq == recv_cq) + if (send_cq == recv_cq) { spin_lock_irq(&send_cq->lock); - else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { + __acquire(&recv_cq->lock); + } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { spin_lock_irq(&send_cq->lock); spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING); } else { @@ -611,10 +903,12 @@ static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv } static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq) + __releases(&send_cq->lock) __releases(&recv_cq->lock) { - if (send_cq == recv_cq) + if (send_cq == recv_cq) { + __release(&recv_cq->lock); spin_unlock_irq(&send_cq->lock); - else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { + } else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { spin_unlock(&recv_cq->lock); spin_unlock_irq(&send_cq->lock); } else { @@ -623,19 +917,76 @@ static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *re } } +static void del_gid_entries(struct mlx4_ib_qp *qp) +{ + struct mlx4_ib_gid_entry *ge, *tmp; + + list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) { + list_del(&ge->list); + kfree(ge); + } +} + +static struct mlx4_ib_pd *get_pd(struct mlx4_ib_qp *qp) +{ + if (qp->ibqp.qp_type == IB_QPT_XRC_TGT) + return to_mpd(to_mxrcd(qp->ibqp.xrcd)->pd); + else + return to_mpd(qp->ibqp.pd); +} + +static void get_cqs(struct mlx4_ib_qp *qp, + struct mlx4_ib_cq **send_cq, struct mlx4_ib_cq **recv_cq) +{ + switch (qp->ibqp.qp_type) { + case IB_QPT_XRC_TGT: + *send_cq = to_mcq(to_mxrcd(qp->ibqp.xrcd)->cq); + *recv_cq = *send_cq; + break; + case IB_QPT_XRC_INI: + *send_cq = to_mcq(qp->ibqp.send_cq); + *recv_cq = *send_cq; + break; + default: + *send_cq = to_mcq(qp->ibqp.send_cq); + *recv_cq = to_mcq(qp->ibqp.recv_cq); + break; + } +} + static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, int is_user) { struct mlx4_ib_cq *send_cq, *recv_cq; - if (qp->state != IB_QPS_RESET) + if (qp->state != IB_QPS_RESET) { if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state), MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp)) - printk(KERN_WARNING "mlx4_ib: modify QP %06x to RESET failed.\n", + pr_warn("modify QP %06x to RESET failed.\n", qp->mqp.qpn); + if (qp->pri.smac) { + mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac); + qp->pri.smac = 0; + } + if (qp->alt.smac) { + mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac); + qp->alt.smac = 0; + } + if (qp->pri.vid < 0x1000) { + mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, qp->pri.vid); + qp->pri.vid = 0xFFFF; + qp->pri.candidate_vid = 0xFFFF; + qp->pri.update_vid = 0; + } + if (qp->alt.vid < 0x1000) { + mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, qp->alt.vid); + qp->alt.vid = 0xFFFF; + qp->alt.candidate_vid = 0xFFFF; + qp->alt.update_vid = 0; + } + } - send_cq = to_mcq(qp->ibqp.send_cq); - recv_cq = to_mcq(qp->ibqp.recv_cq); + get_cqs(qp, &send_cq, &recv_cq); mlx4_ib_lock_cqs(send_cq, recv_cq); @@ -651,59 +1002,117 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, mlx4_ib_unlock_cqs(send_cq, recv_cq); mlx4_qp_free(dev->dev, &qp->mqp); + + if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp)) { + if (qp->flags & MLX4_IB_QP_NETIF) + mlx4_ib_steer_qp_free(dev, qp->mqp.qpn, 1); + else + mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1); + } + mlx4_mtt_cleanup(dev->dev, &qp->mtt); if (is_user) { - if (!qp->ibqp.srq) + if (qp->rq.wqe_cnt) mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context), &qp->db); ib_umem_release(qp->umem); } else { kfree(qp->sq.wrid); kfree(qp->rq.wrid); + if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | + MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) + free_proxy_bufs(&dev->ib_dev, qp); mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); - if (!qp->ibqp.srq) + if (qp->rq.wqe_cnt) mlx4_db_free(dev->dev, &qp->db); } + + del_gid_entries(qp); +} + +static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr) +{ + /* Native or PPF */ + if (!mlx4_is_mfunc(dev->dev) || + (mlx4_is_master(dev->dev) && + attr->create_flags & MLX4_IB_SRIOV_SQP)) { + return dev->dev->phys_caps.base_sqpn + + (attr->qp_type == IB_QPT_SMI ? 0 : 2) + + attr->port_num - 1; + } + /* PF or VF -- creating proxies */ + if (attr->qp_type == IB_QPT_SMI) + return dev->dev->caps.qp0_proxy[attr->port_num - 1]; + else + return dev->dev->caps.qp1_proxy[attr->port_num - 1]; } struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { - struct mlx4_ib_dev *dev = to_mdev(pd->device); - struct mlx4_ib_sqp *sqp; - struct mlx4_ib_qp *qp; + struct mlx4_ib_qp *qp = NULL; int err; + u16 xrcdn = 0; + gfp_t gfp; + gfp = (init_attr->create_flags & MLX4_IB_QP_CREATE_USE_GFP_NOIO) ? + GFP_NOIO : GFP_KERNEL; /* - * We only support LSO and multicast loopback blocking, and - * only for kernel UD QPs. + * We only support LSO, vendor flag1, and multicast loopback blocking, + * and only for kernel UD QPs. */ - if (init_attr->create_flags & ~(IB_QP_CREATE_IPOIB_UD_LSO | - IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)) + if (init_attr->create_flags & ~(MLX4_IB_QP_LSO | + MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK | + MLX4_IB_SRIOV_TUNNEL_QP | + MLX4_IB_SRIOV_SQP | + MLX4_IB_QP_NETIF | + MLX4_IB_QP_CREATE_USE_GFP_NOIO)) return ERR_PTR(-EINVAL); + if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) { + if (init_attr->qp_type != IB_QPT_UD) + return ERR_PTR(-EINVAL); + } + if (init_attr->create_flags && - (pd->uobject || init_attr->qp_type != IB_QPT_UD)) + (udata || + ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP | MLX4_IB_QP_CREATE_USE_GFP_NOIO)) && + init_attr->qp_type != IB_QPT_UD) || + ((init_attr->create_flags & MLX4_IB_SRIOV_SQP) && + init_attr->qp_type > IB_QPT_GSI))) return ERR_PTR(-EINVAL); switch (init_attr->qp_type) { + case IB_QPT_XRC_TGT: + pd = to_mxrcd(init_attr->xrcd)->pd; + xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn; + init_attr->send_cq = to_mxrcd(init_attr->xrcd)->cq; + /* fall through */ + case IB_QPT_XRC_INI: + if (!(to_mdev(pd->device)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)) + return ERR_PTR(-ENOSYS); + init_attr->recv_cq = init_attr->send_cq; + /* fall through */ case IB_QPT_RC: case IB_QPT_UC: - case IB_QPT_UD: - { - qp = kzalloc(sizeof *qp, GFP_KERNEL); + case IB_QPT_RAW_PACKET: + qp = kzalloc(sizeof *qp, gfp); if (!qp) return ERR_PTR(-ENOMEM); - - err = create_qp_common(dev, pd, init_attr, udata, 0, qp); - if (err) { - kfree(qp); + qp->pri.vid = 0xFFFF; + qp->alt.vid = 0xFFFF; + /* fall through */ + case IB_QPT_UD: + { + err = create_qp_common(to_mdev(pd->device), pd, init_attr, + udata, 0, &qp, gfp); + if (err) return ERR_PTR(err); - } qp->ibqp.qp_num = qp->mqp.qpn; + qp->xrcdn = xrcdn; break; } @@ -711,24 +1120,14 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, case IB_QPT_GSI: { /* Userspace is not allowed to create special QPs: */ - if (pd->uobject) + if (udata) return ERR_PTR(-EINVAL); - sqp = kzalloc(sizeof *sqp, GFP_KERNEL); - if (!sqp) - return ERR_PTR(-ENOMEM); - - qp = &sqp->qp; - - err = create_qp_common(dev, pd, init_attr, udata, - dev->dev->caps.sqp_start + - (init_attr->qp_type == IB_QPT_SMI ? 0 : 2) + - init_attr->port_num - 1, - qp); - if (err) { - kfree(sqp); + err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, + get_sqp_num(to_mdev(pd->device), init_attr), + &qp, gfp); + if (err) return ERR_PTR(err); - } qp->port = init_attr->port_num; qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1; @@ -747,11 +1146,19 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp) { struct mlx4_ib_dev *dev = to_mdev(qp->device); struct mlx4_ib_qp *mqp = to_mqp(qp); + struct mlx4_ib_pd *pd; if (is_qp0(dev, mqp)) mlx4_CLOSE_PORT(dev->dev, mqp->port); - destroy_qp_common(dev, mqp, !!qp->pd->uobject); + if (dev->qp1_proxy[mqp->port - 1] == mqp) { + mutex_lock(&dev->qp1_proxy_lock[mqp->port - 1]); + dev->qp1_proxy[mqp->port - 1] = NULL; + mutex_unlock(&dev->qp1_proxy_lock[mqp->port - 1]); + } + + pd = get_pd(mqp); + destroy_qp_common(dev, mqp, !!pd->ibpd.uobject); if (is_sqp(dev, mqp)) kfree(to_msqp(mqp)); @@ -761,15 +1168,27 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp) return 0; } -static int to_mlx4_st(enum ib_qp_type type) +static int to_mlx4_st(struct mlx4_ib_dev *dev, enum mlx4_ib_qp_type type) { switch (type) { - case IB_QPT_RC: return MLX4_QP_ST_RC; - case IB_QPT_UC: return MLX4_QP_ST_UC; - case IB_QPT_UD: return MLX4_QP_ST_UD; - case IB_QPT_SMI: - case IB_QPT_GSI: return MLX4_QP_ST_MLX; - default: return -1; + case MLX4_IB_QPT_RC: return MLX4_QP_ST_RC; + case MLX4_IB_QPT_UC: return MLX4_QP_ST_UC; + case MLX4_IB_QPT_UD: return MLX4_QP_ST_UD; + case MLX4_IB_QPT_XRC_INI: + case MLX4_IB_QPT_XRC_TGT: return MLX4_QP_ST_XRC; + case MLX4_IB_QPT_SMI: + case MLX4_IB_QPT_GSI: + case MLX4_IB_QPT_RAW_PACKET: return MLX4_QP_ST_MLX; + + case MLX4_IB_QPT_PROXY_SMI_OWNER: + case MLX4_IB_QPT_TUN_SMI_OWNER: return (mlx4_is_mfunc(dev->dev) ? + MLX4_QP_ST_MLX : -1); + case MLX4_IB_QPT_PROXY_SMI: + case MLX4_IB_QPT_TUN_SMI: + case MLX4_IB_QPT_PROXY_GSI: + case MLX4_IB_QPT_TUN_GSI: return (mlx4_is_mfunc(dev->dev) ? + MLX4_QP_ST_UD : -1); + default: return -1; } } @@ -819,9 +1238,17 @@ static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port) path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6); } -static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, - struct mlx4_qp_path *path, u8 port) +static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, + u64 smac, u16 vlan_tag, struct mlx4_qp_path *path, + struct mlx4_roce_smac_vlan_info *smac_info, u8 port) { + int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port) == + IB_LINK_LAYER_ETHERNET; + int vidx; + int smac_index; + int err; + + path->grh_mylmc = ah->src_path_bits & 0x7f; path->rlid = cpu_to_be16(ah->dlid); if (ah->static_rate) { @@ -831,11 +1258,10 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, --path->static_rate; } else path->static_rate = 0; - path->counter_index = 0xff; if (ah->ah_flags & IB_AH_GRH) { if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len[port]) { - printk(KERN_ERR "sgid_index (%u) too large. max is %d\n", + pr_err("sgid_index (%u) too large. max is %d\n", ah->grh.sgid_index, dev->dev->caps.gid_table_len[port] - 1); return -1; } @@ -849,9 +1275,146 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, memcpy(path->rgid, ah->grh.dgid.raw, 16); } - path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | - ((port - 1) << 6) | ((ah->sl & 0xf) << 2); + if (is_eth) { + if (!(ah->ah_flags & IB_AH_GRH)) + return -1; + + path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | + ((port - 1) << 6) | ((ah->sl & 7) << 3); + + path->feup |= MLX4_FEUP_FORCE_ETH_UP; + if (vlan_tag < 0x1000) { + if (smac_info->vid < 0x1000) { + /* both valid vlan ids */ + if (smac_info->vid != vlan_tag) { + /* different VIDs. unreg old and reg new */ + err = mlx4_register_vlan(dev->dev, port, vlan_tag, &vidx); + if (err) + return err; + smac_info->candidate_vid = vlan_tag; + smac_info->candidate_vlan_index = vidx; + smac_info->candidate_vlan_port = port; + smac_info->update_vid = 1; + path->vlan_index = vidx; + } else { + path->vlan_index = smac_info->vlan_index; + } + } else { + /* no current vlan tag in qp */ + err = mlx4_register_vlan(dev->dev, port, vlan_tag, &vidx); + if (err) + return err; + smac_info->candidate_vid = vlan_tag; + smac_info->candidate_vlan_index = vidx; + smac_info->candidate_vlan_port = port; + smac_info->update_vid = 1; + path->vlan_index = vidx; + } + path->feup |= MLX4_FVL_FORCE_ETH_VLAN; + path->fl = 1 << 6; + } else { + /* have current vlan tag. unregister it at modify-qp success */ + if (smac_info->vid < 0x1000) { + smac_info->candidate_vid = 0xFFFF; + smac_info->update_vid = 1; + } + } + + /* get smac_index for RoCE use. + * If no smac was yet assigned, register one. + * If one was already assigned, but the new mac differs, + * unregister the old one and register the new one. + */ + if (!smac_info->smac || smac_info->smac != smac) { + /* register candidate now, unreg if needed, after success */ + smac_index = mlx4_register_mac(dev->dev, port, smac); + if (smac_index >= 0) { + smac_info->candidate_smac_index = smac_index; + smac_info->candidate_smac = smac; + smac_info->candidate_smac_port = port; + } else { + return -EINVAL; + } + } else { + smac_index = smac_info->smac_index; + } + + memcpy(path->dmac, ah->dmac, 6); + path->ackto = MLX4_IB_LINK_TYPE_ETH; + /* put MAC table smac index for IBoE */ + path->grh_mylmc = (u8) (smac_index) | 0x80; + } else { + path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | + ((port - 1) << 6) | ((ah->sl & 0xf) << 2); + } + + return 0; +} + +static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_qp_attr *qp, + enum ib_qp_attr_mask qp_attr_mask, + struct mlx4_ib_qp *mqp, + struct mlx4_qp_path *path, u8 port) +{ + return _mlx4_set_path(dev, &qp->ah_attr, + mlx4_mac_to_u64((u8 *)qp->smac), + (qp_attr_mask & IB_QP_VID) ? qp->vlan_id : 0xffff, + path, &mqp->pri, port); +} + +static int mlx4_set_alt_path(struct mlx4_ib_dev *dev, + const struct ib_qp_attr *qp, + enum ib_qp_attr_mask qp_attr_mask, + struct mlx4_ib_qp *mqp, + struct mlx4_qp_path *path, u8 port) +{ + return _mlx4_set_path(dev, &qp->alt_ah_attr, + mlx4_mac_to_u64((u8 *)qp->alt_smac), + (qp_attr_mask & IB_QP_ALT_VID) ? + qp->alt_vlan_id : 0xffff, + path, &mqp->alt, port); +} + +static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) +{ + struct mlx4_ib_gid_entry *ge, *tmp; + + list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) { + if (!ge->added && mlx4_ib_add_mc(dev, qp, &ge->gid)) { + ge->added = 1; + ge->port = qp->port; + } + } +} + +static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, u8 *smac, + struct mlx4_qp_context *context) +{ + struct net_device *ndev; + u64 u64_mac; + int smac_index; + + ndev = dev->iboe.netdevs[qp->port - 1]; + if (ndev) { + smac = ndev->dev_addr; + u64_mac = mlx4_mac_to_u64(smac); + } else { + u64_mac = dev->dev->caps.def_mac[qp->port]; + } + + context->pri_path.sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | ((qp->port - 1) << 6); + if (!qp->pri.smac) { + smac_index = mlx4_register_mac(dev->dev, qp->port, u64_mac); + if (smac_index >= 0) { + qp->pri.candidate_smac_index = smac_index; + qp->pri.candidate_smac = u64_mac; + qp->pri.candidate_smac_port = qp->port; + context->pri_path.grh_mylmc = 0x80 | (u8) smac_index; + } else { + return -ENOENT; + } + } return 0; } @@ -861,9 +1424,12 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, { struct mlx4_ib_dev *dev = to_mdev(ibqp->device); struct mlx4_ib_qp *qp = to_mqp(ibqp); + struct mlx4_ib_pd *pd; + struct mlx4_ib_cq *send_cq, *recv_cq; struct mlx4_qp_context *context; enum mlx4_qp_optpar optpar = 0; int sqd_event; + int steer_qp = 0; int err = -EINVAL; context = kzalloc(sizeof *context, GFP_KERNEL); @@ -871,8 +1437,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, return -ENOMEM; context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) | - (to_mlx4_st(ibqp->qp_type) << 16)); - context->flags |= cpu_to_be32(1 << 8); /* DE? */ + (to_mlx4_st(dev, qp->mlx4_ib_qp_type) << 16)); if (!(attr_mask & IB_QP_PATH_MIG_STATE)) context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11); @@ -893,15 +1458,17 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) context->mtu_msgmax = (IB_MTU_4096 << 5) | 11; + else if (ibqp->qp_type == IB_QPT_RAW_PACKET) + context->mtu_msgmax = (MLX4_RAW_QP_MTU << 5) | MLX4_RAW_QP_MSGMAX; else if (ibqp->qp_type == IB_QPT_UD) { if (qp->flags & MLX4_IB_QP_LSO) context->mtu_msgmax = (IB_MTU_4096 << 5) | ilog2(dev->dev->caps.max_gso_sz); else - context->mtu_msgmax = (IB_MTU_4096 << 5) | 11; + context->mtu_msgmax = (IB_MTU_4096 << 5) | 12; } else if (attr_mask & IB_QP_PATH_MTU) { if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) { - printk(KERN_ERR "path MTU (%u) is invalid\n", + pr_err("path MTU (%u) is invalid\n", attr->path_mtu); goto out; } @@ -917,8 +1484,12 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3; context->sq_size_stride |= qp->sq.wqe_shift - 4; - if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { context->sq_size_stride |= !!qp->sq_no_prefetch << 7; + context->xrcd = cpu_to_be32((u32) qp->xrcdn); + if (ibqp->qp_type == IB_QPT_RAW_PACKET) + context->param3 |= cpu_to_be32(1 << 30); + } if (qp->ibqp.uobject) context->usr_page = cpu_to_be32(to_mucontext(ibqp->uobject->context)->uar.index); @@ -936,14 +1507,31 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } } + if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { + if (dev->counters[qp->port - 1] != -1) { + context->pri_path.counter_index = + dev->counters[qp->port - 1]; + optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX; + } else + context->pri_path.counter_index = 0xff; + + if (qp->flags & MLX4_IB_QP_NETIF) { + mlx4_ib_steer_qp_reg(dev, qp, 1); + steer_qp = 1; + } + } + if (attr_mask & IB_QP_PKEY_INDEX) { + if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV) + context->pri_path.disable_pkey_check = 0x40; context->pri_path.pkey_index = attr->pkey_index; optpar |= MLX4_QP_OPTPAR_PKEY_INDEX; } if (attr_mask & IB_QP_AV) { - if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path, - attr_mask & IB_QP_PORT ? attr->port_num : qp->port)) + if (mlx4_set_path(dev, attr, attr_mask, qp, &context->pri_path, + attr_mask & IB_QP_PORT ? + attr->port_num : qp->port)) goto out; optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH | @@ -951,7 +1539,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } if (attr_mask & IB_QP_TIMEOUT) { - context->pri_path.ackto = attr->timeout << 3; + context->pri_path.ackto |= attr->timeout << 3; optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT; } @@ -964,8 +1552,9 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, dev->dev->caps.pkey_table_len[attr->alt_port_num]) goto out; - if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->alt_path, - attr->alt_port_num)) + if (mlx4_set_alt_path(dev, attr, attr_mask, qp, + &context->alt_path, + attr->alt_port_num)) goto out; context->alt_path.pkey_index = attr->alt_pkey_index; @@ -973,8 +1562,16 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH; } - context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pdn); - context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28); + pd = get_pd(qp); + get_cqs(qp, &send_cq, &recv_cq); + context->pd = cpu_to_be32(pd->pdn); + context->cqn_send = cpu_to_be32(send_cq->mcq.cqn); + context->cqn_recv = cpu_to_be32(recv_cq->mcq.cqn); + context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28); + + /* Set "fast registration enabled" for all kernel QPs */ + if (!qp->ibqp.uobject) + context->params1 |= cpu_to_be32(1 << 11); if (attr_mask & IB_QP_RNR_RETRY) { context->params1 |= cpu_to_be32(attr->rnr_retry << 13); @@ -996,8 +1593,6 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (attr_mask & IB_QP_SQ_PSN) context->next_send_psn = cpu_to_be32(attr->sq_psn); - context->cqn_send = cpu_to_be32(to_mcq(ibqp->send_cq)->mcq.cqn); - if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { if (attr->max_dest_rd_atomic) context->params2 |= @@ -1020,36 +1615,92 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (attr_mask & IB_QP_RQ_PSN) context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn); - context->cqn_recv = cpu_to_be32(to_mcq(ibqp->recv_cq)->mcq.cqn); - + /* proxy and tunnel qp qkeys will be changed in modify-qp wrappers */ if (attr_mask & IB_QP_QKEY) { - context->qkey = cpu_to_be32(attr->qkey); + if (qp->mlx4_ib_qp_type & + (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) + context->qkey = cpu_to_be32(IB_QP_SET_QKEY); + else { + if (mlx4_is_mfunc(dev->dev) && + !(qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV) && + (attr->qkey & MLX4_RESERVED_QKEY_MASK) == + MLX4_RESERVED_QKEY_BASE) { + pr_err("Cannot use reserved QKEY" + " 0x%x (range 0xffff0000..0xffffffff" + " is reserved)\n", attr->qkey); + err = -EINVAL; + goto out; + } + context->qkey = cpu_to_be32(attr->qkey); + } optpar |= MLX4_QP_OPTPAR_Q_KEY; } if (ibqp->srq) context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn); - if (!ibqp->srq && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) + if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) context->db_rec_addr = cpu_to_be64(qp->db.dma); if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR && (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI || - ibqp->qp_type == IB_QPT_UD)) { + ibqp->qp_type == IB_QPT_UD || + ibqp->qp_type == IB_QPT_RAW_PACKET)) { context->pri_path.sched_queue = (qp->port - 1) << 6; - if (is_qp0(dev, qp)) + if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI || + qp->mlx4_ib_qp_type & + (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) { context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE; - else + if (qp->mlx4_ib_qp_type != MLX4_IB_QPT_SMI) + context->pri_path.fl = 0x80; + } else { + if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV) + context->pri_path.fl = 0x80; context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE; + } + if (rdma_port_get_link_layer(&dev->ib_dev, qp->port) == + IB_LINK_LAYER_ETHERNET) { + if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI || + qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) + context->pri_path.feup = 1 << 7; /* don't fsm */ + /* handle smac_index */ + if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_UD || + qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI || + qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI) { + err = handle_eth_ud_smac_index(dev, qp, (u8 *)attr->smac, context); + if (err) + return -EINVAL; + if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI) + dev->qp1_proxy[qp->port - 1] = qp; + } + } + } + + if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) + context->pri_path.ackto = (context->pri_path.ackto & 0xf8) | + MLX4_IB_LINK_TYPE_ETH; + + if (ibqp->qp_type == IB_QPT_UD && (new_state == IB_QPS_RTR)) { + int is_eth = rdma_port_get_link_layer( + &dev->ib_dev, qp->port) == + IB_LINK_LAYER_ETHERNET; + if (is_eth) { + context->pri_path.ackto = MLX4_IB_LINK_TYPE_ETH; + optpar |= MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH; + } } + if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD && attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify) sqd_event = 1; else sqd_event = 0; + if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) + context->rlkey |= (1 << 4); + /* * Before passing a kernel QP to the HW, make sure that the * ownership bits of the send queue are set and the SQ @@ -1082,8 +1733,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, qp->atomic_rd_en = attr->qp_access_flags; if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) qp->resp_depth = attr->max_dest_rd_atomic; - if (attr_mask & IB_QP_PORT) + if (attr_mask & IB_QP_PORT) { qp->port = attr->port_num; + update_mcg_macs(dev, qp); + } if (attr_mask & IB_QP_ALT_PATH) qp->alt_port = attr->alt_port_num; @@ -1097,7 +1750,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (is_qp0(dev, qp)) { if (cur_state != IB_QPS_RTR && new_state == IB_QPS_RTR) if (mlx4_INIT_PORT(dev->dev, qp->port)) - printk(KERN_WARNING "INIT_PORT failed for port %d\n", + pr_warn("INIT_PORT failed for port %d\n", qp->port); if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR && @@ -1109,23 +1762,113 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, * If we moved a kernel QP to RESET, clean up all old CQ * entries and reinitialize the QP. */ - if (new_state == IB_QPS_RESET && !ibqp->uobject) { - mlx4_ib_cq_clean(to_mcq(ibqp->recv_cq), qp->mqp.qpn, - ibqp->srq ? to_msrq(ibqp->srq): NULL); - if (ibqp->send_cq != ibqp->recv_cq) - mlx4_ib_cq_clean(to_mcq(ibqp->send_cq), qp->mqp.qpn, NULL); + if (new_state == IB_QPS_RESET) { + if (!ibqp->uobject) { + mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn, + ibqp->srq ? to_msrq(ibqp->srq) : NULL); + if (send_cq != recv_cq) + mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL); + + qp->rq.head = 0; + qp->rq.tail = 0; + qp->sq.head = 0; + qp->sq.tail = 0; + qp->sq_next_wqe = 0; + if (qp->rq.wqe_cnt) + *qp->db.db = 0; + + if (qp->flags & MLX4_IB_QP_NETIF) + mlx4_ib_steer_qp_reg(dev, qp, 0); + } + if (qp->pri.smac) { + mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac); + qp->pri.smac = 0; + } + if (qp->alt.smac) { + mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac); + qp->alt.smac = 0; + } + if (qp->pri.vid < 0x1000) { + mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, qp->pri.vid); + qp->pri.vid = 0xFFFF; + qp->pri.candidate_vid = 0xFFFF; + qp->pri.update_vid = 0; + } - qp->rq.head = 0; - qp->rq.tail = 0; - qp->sq.head = 0; - qp->sq.tail = 0; - qp->sq_next_wqe = 0; - if (!ibqp->srq) - *qp->db.db = 0; + if (qp->alt.vid < 0x1000) { + mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, qp->alt.vid); + qp->alt.vid = 0xFFFF; + qp->alt.candidate_vid = 0xFFFF; + qp->alt.update_vid = 0; + } } - out: + if (err && steer_qp) + mlx4_ib_steer_qp_reg(dev, qp, 0); kfree(context); + if (qp->pri.candidate_smac) { + if (err) { + mlx4_unregister_mac(dev->dev, qp->pri.candidate_smac_port, qp->pri.candidate_smac); + } else { + if (qp->pri.smac) + mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac); + qp->pri.smac = qp->pri.candidate_smac; + qp->pri.smac_index = qp->pri.candidate_smac_index; + qp->pri.smac_port = qp->pri.candidate_smac_port; + } + qp->pri.candidate_smac = 0; + qp->pri.candidate_smac_index = 0; + qp->pri.candidate_smac_port = 0; + } + if (qp->alt.candidate_smac) { + if (err) { + mlx4_unregister_mac(dev->dev, qp->alt.candidate_smac_port, qp->alt.candidate_smac); + } else { + if (qp->alt.smac) + mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac); + qp->alt.smac = qp->alt.candidate_smac; + qp->alt.smac_index = qp->alt.candidate_smac_index; + qp->alt.smac_port = qp->alt.candidate_smac_port; + } + qp->alt.candidate_smac = 0; + qp->alt.candidate_smac_index = 0; + qp->alt.candidate_smac_port = 0; + } + + if (qp->pri.update_vid) { + if (err) { + if (qp->pri.candidate_vid < 0x1000) + mlx4_unregister_vlan(dev->dev, qp->pri.candidate_vlan_port, + qp->pri.candidate_vid); + } else { + if (qp->pri.vid < 0x1000) + mlx4_unregister_vlan(dev->dev, qp->pri.vlan_port, + qp->pri.vid); + qp->pri.vid = qp->pri.candidate_vid; + qp->pri.vlan_port = qp->pri.candidate_vlan_port; + qp->pri.vlan_index = qp->pri.candidate_vlan_index; + } + qp->pri.candidate_vid = 0xFFFF; + qp->pri.update_vid = 0; + } + + if (qp->alt.update_vid) { + if (err) { + if (qp->alt.candidate_vid < 0x1000) + mlx4_unregister_vlan(dev->dev, qp->alt.candidate_vlan_port, + qp->alt.candidate_vid); + } else { + if (qp->alt.vid < 0x1000) + mlx4_unregister_vlan(dev->dev, qp->alt.vlan_port, + qp->alt.vid); + qp->alt.vid = qp->alt.candidate_vid; + qp->alt.vlan_port = qp->alt.candidate_vlan_port; + qp->alt.vlan_index = qp->alt.candidate_vlan_index; + } + qp->alt.candidate_vid = 0xFFFF; + qp->alt.update_vid = 0; + } + return err; } @@ -1136,33 +1879,69 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, struct mlx4_ib_qp *qp = to_mqp(ibqp); enum ib_qp_state cur_state, new_state; int err = -EINVAL; - + int ll; mutex_lock(&qp->mutex); cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; - if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) + if (cur_state == new_state && cur_state == IB_QPS_RESET) { + ll = IB_LINK_LAYER_UNSPECIFIED; + } else { + int port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; + ll = rdma_port_get_link_layer(&dev->ib_dev, port); + } + + if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, + attr_mask, ll)) { + pr_debug("qpn 0x%x: invalid attribute mask specified " + "for transition %d to %d. qp_type %d," + " attr_mask 0x%x\n", + ibqp->qp_num, cur_state, new_state, + ibqp->qp_type, attr_mask); goto out; + } if ((attr_mask & IB_QP_PORT) && - (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) { + (attr->port_num == 0 || attr->port_num > dev->num_ports)) { + pr_debug("qpn 0x%x: invalid port number (%d) specified " + "for transition %d to %d. qp_type %d\n", + ibqp->qp_num, attr->port_num, cur_state, + new_state, ibqp->qp_type); goto out; } + if ((attr_mask & IB_QP_PORT) && (ibqp->qp_type == IB_QPT_RAW_PACKET) && + (rdma_port_get_link_layer(&dev->ib_dev, attr->port_num) != + IB_LINK_LAYER_ETHERNET)) + goto out; + if (attr_mask & IB_QP_PKEY_INDEX) { int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; - if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p]) + if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p]) { + pr_debug("qpn 0x%x: invalid pkey index (%d) specified " + "for transition %d to %d. qp_type %d\n", + ibqp->qp_num, attr->pkey_index, cur_state, + new_state, ibqp->qp_type); goto out; + } } if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) { + pr_debug("qpn 0x%x: max_rd_atomic (%d) too large. " + "Transition %d to %d. qp_type %d\n", + ibqp->qp_num, attr->max_rd_atomic, cur_state, + new_state, ibqp->qp_type); goto out; } if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && attr->max_dest_rd_atomic > dev->dev->caps.max_qp_dest_rdma) { + pr_debug("qpn 0x%x: max_dest_rd_atomic (%d) too large. " + "Transition %d to %d. qp_type %d\n", + ibqp->qp_num, attr->max_dest_rd_atomic, cur_state, + new_state, ibqp->qp_type); goto out; } @@ -1178,47 +1957,230 @@ out: return err; } +static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey) +{ + int i; + for (i = 0; i < dev->caps.num_ports; i++) { + if (qpn == dev->caps.qp0_proxy[i] || + qpn == dev->caps.qp0_tunnel[i]) { + *qkey = dev->caps.qp0_qkey[i]; + return 0; + } + } + return -EINVAL; +} + +static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, + struct ib_send_wr *wr, + void *wqe, unsigned *mlx_seg_len) +{ + struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device); + struct ib_device *ib_dev = &mdev->ib_dev; + struct mlx4_wqe_mlx_seg *mlx = wqe; + struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; + struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); + u16 pkey; + u32 qkey; + int send_size; + int header_size; + int spc; + int i; + + if (wr->opcode != IB_WR_SEND) + return -EINVAL; + + send_size = 0; + + for (i = 0; i < wr->num_sge; ++i) + send_size += wr->sg_list[i].length; + + /* for proxy-qp0 sends, need to add in size of tunnel header */ + /* for tunnel-qp0 sends, tunnel header is already in s/g list */ + if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) + send_size += sizeof (struct mlx4_ib_tunnel_header); + + ib_ud_header_init(send_size, 1, 0, 0, 0, 0, &sqp->ud_header); + + if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) { + sqp->ud_header.lrh.service_level = + be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28; + sqp->ud_header.lrh.destination_lid = + cpu_to_be16(ah->av.ib.g_slid & 0x7f); + sqp->ud_header.lrh.source_lid = + cpu_to_be16(ah->av.ib.g_slid & 0x7f); + } + + mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); + + /* force loopback */ + mlx->flags |= cpu_to_be32(MLX4_WQE_MLX_VL15 | 0x1 | MLX4_WQE_MLX_SLR); + mlx->rlid = sqp->ud_header.lrh.destination_lid; + + sqp->ud_header.lrh.virtual_lane = 0; + sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); + ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey); + sqp->ud_header.bth.pkey = cpu_to_be16(pkey); + if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER) + sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); + else + sqp->ud_header.bth.destination_qpn = + cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]); + + sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); + if (mlx4_is_master(mdev->dev)) { + if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey)) + return -EINVAL; + } else { + if (vf_get_qp0_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey)) + return -EINVAL; + } + sqp->ud_header.deth.qkey = cpu_to_be32(qkey); + sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn); + + sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY; + sqp->ud_header.immediate_present = 0; + + header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf); + + /* + * Inline data segments may not cross a 64 byte boundary. If + * our UD header is bigger than the space available up to the + * next 64 byte boundary in the WQE, use two inline data + * segments to hold the UD header. + */ + spc = MLX4_INLINE_ALIGN - + ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1)); + if (header_size <= spc) { + inl->byte_count = cpu_to_be32(1 << 31 | header_size); + memcpy(inl + 1, sqp->header_buf, header_size); + i = 1; + } else { + inl->byte_count = cpu_to_be32(1 << 31 | spc); + memcpy(inl + 1, sqp->header_buf, spc); + + inl = (void *) (inl + 1) + spc; + memcpy(inl + 1, sqp->header_buf + spc, header_size - spc); + /* + * Need a barrier here to make sure all the data is + * visible before the byte_count field is set. + * Otherwise the HCA prefetcher could grab the 64-byte + * chunk with this inline segment and get a valid (!= + * 0xffffffff) byte count but stale data, and end up + * generating a packet with bad headers. + * + * The first inline segment's byte_count field doesn't + * need a barrier, because it comes after a + * control/MLX segment and therefore is at an offset + * of 16 mod 64. + */ + wmb(); + inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc)); + i = 2; + } + + *mlx_seg_len = + ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16); + return 0; +} + static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len) { - struct ib_device *ib_dev = &to_mdev(sqp->qp.ibqp.device)->ib_dev; + struct ib_device *ib_dev = sqp->qp.ibqp.device; struct mlx4_wqe_mlx_seg *mlx = wqe; + struct mlx4_wqe_ctrl_seg *ctrl = wqe; struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); + union ib_gid sgid; u16 pkey; int send_size; int header_size; int spc; int i; + int err = 0; + u16 vlan = 0xffff; + bool is_eth; + bool is_vlan = false; + bool is_grh; send_size = 0; for (i = 0; i < wr->num_sge; ++i) send_size += wr->sg_list[i].length; - ib_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah), &sqp->ud_header); + is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET; + is_grh = mlx4_ib_ah_grh_present(ah); + if (is_eth) { + if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { + /* When multi-function is enabled, the ib_core gid + * indexes don't necessarily match the hw ones, so + * we must use our own cache */ + err = mlx4_get_roce_gid_from_slave(to_mdev(ib_dev)->dev, + be32_to_cpu(ah->av.ib.port_pd) >> 24, + ah->av.ib.gid_index, &sgid.raw[0]); + if (err) + return err; + } else { + err = ib_get_cached_gid(ib_dev, + be32_to_cpu(ah->av.ib.port_pd) >> 24, + ah->av.ib.gid_index, &sgid); + if (err) + return err; + } + + if (ah->av.eth.vlan != cpu_to_be16(0xffff)) { + vlan = be16_to_cpu(ah->av.eth.vlan) & 0x0fff; + is_vlan = 1; + } + } + ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header); + + if (!is_eth) { + sqp->ud_header.lrh.service_level = + be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28; + sqp->ud_header.lrh.destination_lid = ah->av.ib.dlid; + sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f); + } - sqp->ud_header.lrh.service_level = - be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28; - sqp->ud_header.lrh.destination_lid = ah->av.dlid; - sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.g_slid & 0x7f); - if (mlx4_ib_ah_grh_present(ah)) { + if (is_grh) { sqp->ud_header.grh.traffic_class = - (be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20) & 0xff; + (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff; sqp->ud_header.grh.flow_label = - ah->av.sl_tclass_flowlabel & cpu_to_be32(0xfffff); - sqp->ud_header.grh.hop_limit = ah->av.hop_limit; - ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.port_pd) >> 24, - ah->av.gid_index, &sqp->ud_header.grh.source_gid); + ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff); + sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit; + if (is_eth) + memcpy(sqp->ud_header.grh.source_gid.raw, sgid.raw, 16); + else { + if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { + /* When multi-function is enabled, the ib_core gid + * indexes don't necessarily match the hw ones, so + * we must use our own cache */ + sqp->ud_header.grh.source_gid.global.subnet_prefix = + to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. + subnet_prefix; + sqp->ud_header.grh.source_gid.global.interface_id = + to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. + guid_cache[ah->av.ib.gid_index]; + } else + ib_get_cached_gid(ib_dev, + be32_to_cpu(ah->av.ib.port_pd) >> 24, + ah->av.ib.gid_index, + &sqp->ud_header.grh.source_gid); + } memcpy(sqp->ud_header.grh.destination_gid.raw, - ah->av.dgid, 16); + ah->av.ib.dgid, 16); } mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); - mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) | - (sqp->ud_header.lrh.destination_lid == - IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) | - (sqp->ud_header.lrh.service_level << 8)); - mlx->rlid = sqp->ud_header.lrh.destination_lid; + + if (!is_eth) { + mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) | + (sqp->ud_header.lrh.destination_lid == + IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) | + (sqp->ud_header.lrh.service_level << 8)); + if (ah->av.ib.port_pd & cpu_to_be32(0x80000000)) + mlx->flags |= cpu_to_be32(0x1); /* force loopback */ + mlx->rlid = sqp->ud_header.lrh.destination_lid; + } switch (wr->opcode) { case IB_WR_SEND: @@ -1234,9 +2196,39 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, return -EINVAL; } - sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; - if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) - sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; + if (is_eth) { + u8 *smac; + struct in6_addr in6; + + u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13; + + mlx->sched_prio = cpu_to_be16(pcp); + + memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6); + /* FIXME: cache smac value? */ + memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2); + memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4); + memcpy(&in6, sgid.raw, sizeof(in6)); + + if (!mlx4_is_mfunc(to_mdev(ib_dev)->dev)) + smac = to_mdev(sqp->qp.ibqp.device)-> + iboe.netdevs[sqp->qp.port - 1]->dev_addr; + else /* use the src mac of the tunnel */ + smac = ah->av.eth.s_mac; + memcpy(sqp->ud_header.eth.smac_h, smac, 6); + if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6)) + mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK); + if (!is_vlan) { + sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE); + } else { + sqp->ud_header.vlan.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE); + sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp); + } + } else { + sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; + if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) + sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; + } sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); if (!sqp->qp.ibqp.qp_num) ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey); @@ -1252,16 +2244,16 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf); if (0) { - printk(KERN_ERR "built UD header of size %d:\n", header_size); + pr_err("built UD header of size %d:\n", header_size); for (i = 0; i < header_size / 4; ++i) { if (i % 8 == 0) - printk(" [%02x] ", i * 4); - printk(" %08x", - be32_to_cpu(((__be32 *) sqp->header_buf)[i])); + pr_err(" [%02x] ", i * 4); + pr_cont(" %08x", + be32_to_cpu(((__be32 *) sqp->header_buf)[i])); if ((i + 1) % 8 == 0) - printk("\n"); + pr_cont("\n"); } - printk("\n"); + pr_err("\n"); } /* @@ -1322,6 +2314,63 @@ static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq return cur + nreq >= wq->max_post; } +static __be32 convert_access(int acc) +{ + return (acc & IB_ACCESS_REMOTE_ATOMIC ? + cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC) : 0) | + (acc & IB_ACCESS_REMOTE_WRITE ? + cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE) : 0) | + (acc & IB_ACCESS_REMOTE_READ ? + cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ) : 0) | + (acc & IB_ACCESS_LOCAL_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_WRITE) : 0) | + cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ); +} + +static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr) +{ + struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list); + int i; + + for (i = 0; i < wr->wr.fast_reg.page_list_len; ++i) + mfrpl->mapped_page_list[i] = + cpu_to_be64(wr->wr.fast_reg.page_list->page_list[i] | + MLX4_MTT_FLAG_PRESENT); + + fseg->flags = convert_access(wr->wr.fast_reg.access_flags); + fseg->mem_key = cpu_to_be32(wr->wr.fast_reg.rkey); + fseg->buf_list = cpu_to_be64(mfrpl->map); + fseg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start); + fseg->reg_len = cpu_to_be64(wr->wr.fast_reg.length); + fseg->offset = 0; /* XXX -- is this just for ZBVA? */ + fseg->page_size = cpu_to_be32(wr->wr.fast_reg.page_shift); + fseg->reserved[0] = 0; + fseg->reserved[1] = 0; +} + +static void set_bind_seg(struct mlx4_wqe_bind_seg *bseg, struct ib_send_wr *wr) +{ + bseg->flags1 = + convert_access(wr->wr.bind_mw.bind_info.mw_access_flags) & + cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ | + MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE | + MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC); + bseg->flags2 = 0; + if (wr->wr.bind_mw.mw->type == IB_MW_TYPE_2) + bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_TYPE_2); + if (wr->wr.bind_mw.bind_info.mw_access_flags & IB_ZERO_BASED) + bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_ZERO_BASED); + bseg->new_rkey = cpu_to_be32(wr->wr.bind_mw.rkey); + bseg->lkey = cpu_to_be32(wr->wr.bind_mw.bind_info.mr->lkey); + bseg->addr = cpu_to_be64(wr->wr.bind_mw.bind_info.addr); + bseg->length = cpu_to_be64(wr->wr.bind_mw.bind_info.length); +} + +static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey) +{ + memset(iseg, 0, sizeof(*iseg)); + iseg->mem_key = cpu_to_be32(rkey); +} + static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg, u64 remote_addr, u32 rkey) { @@ -1335,6 +2384,9 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr * if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); + } else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) { + aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); + aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add_mask); } else { aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); aseg->compare = 0; @@ -1342,12 +2394,85 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr * } +static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg, + struct ib_send_wr *wr) +{ + aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); + aseg->swap_add_mask = cpu_to_be64(wr->wr.atomic.swap_mask); + aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); + aseg->compare_mask = cpu_to_be64(wr->wr.atomic.compare_add_mask); +} + static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, struct ib_send_wr *wr) { memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); + dseg->vlan = to_mah(wr->wr.ud.ah)->av.eth.vlan; + memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6); +} + +static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev, + struct mlx4_wqe_datagram_seg *dseg, + struct ib_send_wr *wr, + enum mlx4_ib_qp_type qpt) +{ + union mlx4_ext_av *av = &to_mah(wr->wr.ud.ah)->av; + struct mlx4_av sqp_av = {0}; + int port = *((u8 *) &av->ib.port_pd) & 0x3; + + /* force loopback */ + sqp_av.port_pd = av->ib.port_pd | cpu_to_be32(0x80000000); + sqp_av.g_slid = av->ib.g_slid & 0x7f; /* no GRH */ + sqp_av.sl_tclass_flowlabel = av->ib.sl_tclass_flowlabel & + cpu_to_be32(0xf0000000); + + memcpy(dseg->av, &sqp_av, sizeof (struct mlx4_av)); + if (qpt == MLX4_IB_QPT_PROXY_GSI) + dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]); + else + dseg->dqpn = cpu_to_be32(dev->dev->caps.qp0_tunnel[port - 1]); + /* Use QKEY from the QP context, which is set by master */ + dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY); +} + +static void build_tunnel_header(struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len) +{ + struct mlx4_wqe_inline_seg *inl = wqe; + struct mlx4_ib_tunnel_header hdr; + struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); + int spc; + int i; + + memcpy(&hdr.av, &ah->av, sizeof hdr.av); + hdr.remote_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); + hdr.pkey_index = cpu_to_be16(wr->wr.ud.pkey_index); + hdr.qkey = cpu_to_be32(wr->wr.ud.remote_qkey); + memcpy(hdr.mac, ah->av.eth.mac, 6); + hdr.vlan = ah->av.eth.vlan; + + spc = MLX4_INLINE_ALIGN - + ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1)); + if (sizeof (hdr) <= spc) { + memcpy(inl + 1, &hdr, sizeof (hdr)); + wmb(); + inl->byte_count = cpu_to_be32(1 << 31 | sizeof (hdr)); + i = 1; + } else { + memcpy(inl + 1, &hdr, spc); + wmb(); + inl->byte_count = cpu_to_be32(1 << 31 | spc); + + inl = (void *) (inl + 1) + spc; + memcpy(inl + 1, (void *) &hdr + spc, sizeof (hdr) - spc); + wmb(); + inl->byte_count = cpu_to_be32(1 << 31 | (sizeof (hdr) - spc)); + i = 2; + } + + *mlx_seg_len = + ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + sizeof (hdr), 16); } static void set_mlx_icrc_seg(void *dseg) @@ -1395,17 +2520,14 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg) dseg->addr = cpu_to_be64(sg->addr); } -static int build_lso_seg(struct mlx4_lso_seg *wqe, struct ib_send_wr *wr, - struct mlx4_ib_qp *qp, unsigned *lso_seg_len) +static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr, + struct mlx4_ib_qp *qp, unsigned *lso_seg_len, + __be32 *lso_hdr_sz, __be32 *blh) { unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16); - /* - * This is a temporary limitation and will be removed in - * a forthcoming FW release: - */ - if (unlikely(halign > 64)) - return -EINVAL; + if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE)) + *blh = cpu_to_be32(1 << 6); if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) && wr->num_sge > qp->sq.max_gs - (halign >> 4))) @@ -1413,16 +2535,34 @@ static int build_lso_seg(struct mlx4_lso_seg *wqe, struct ib_send_wr *wr, memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen); - /* make sure LSO header is written before overwriting stamping */ - wmb(); - - wqe->mss_hdr_size = cpu_to_be32((wr->wr.ud.mss - wr->wr.ud.hlen) << 16 | - wr->wr.ud.hlen); - + *lso_hdr_sz = cpu_to_be32((wr->wr.ud.mss - wr->wr.ud.hlen) << 16 | + wr->wr.ud.hlen); *lso_seg_len = halign; return 0; } +static __be32 send_ieth(struct ib_send_wr *wr) +{ + switch (wr->opcode) { + case IB_WR_SEND_WITH_IMM: + case IB_WR_RDMA_WRITE_WITH_IMM: + return wr->ex.imm_data; + + case IB_WR_SEND_WITH_INV: + return cpu_to_be32(wr->ex.invalidate_rkey); + + default: + return 0; + } +} + +static void add_zero_len_inline(void *wqe) +{ + struct mlx4_wqe_inline_seg *inl = wqe; + memset(wqe, 0, 16); + inl->byte_count = cpu_to_be32(1 << 31); +} + int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { @@ -1437,6 +2577,10 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, int uninitialized_var(stamp); int uninitialized_var(size); unsigned uninitialized_var(seglen); + __be32 dummy; + __be32 *lso_wqe; + __be32 uninitialized_var(lso_hdr_sz); + __be32 blh; int i; spin_lock_irqsave(&qp->sq.lock, flags); @@ -1444,6 +2588,9 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ind = qp->sq_next_wqe; for (nreq = 0; wr; ++nreq, wr = wr->next) { + lso_wqe = &dummy; + blh = 0; + if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { err = -ENOMEM; *bad_wr = wr; @@ -1469,21 +2616,18 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) | qp->sq_signal_bits; - if (wr->opcode == IB_WR_SEND_WITH_IMM || - wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) - ctrl->imm = wr->ex.imm_data; - else - ctrl->imm = 0; + ctrl->imm = send_ieth(wr); wqe += sizeof *ctrl; size = sizeof *ctrl / 16; - switch (ibqp->qp_type) { - case IB_QPT_RC: - case IB_QPT_UC: + switch (qp->mlx4_ib_qp_type) { + case MLX4_IB_QPT_RC: + case MLX4_IB_QPT_UC: switch (wr->opcode) { case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: + case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD: set_raddr_seg(wqe, wr->wr.atomic.remote_addr, wr->wr.atomic.rkey); wqe += sizeof (struct mlx4_wqe_raddr_seg); @@ -1496,6 +2640,19 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; + case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: + set_raddr_seg(wqe, wr->wr.atomic.remote_addr, + wr->wr.atomic.rkey); + wqe += sizeof (struct mlx4_wqe_raddr_seg); + + set_masked_atomic_seg(wqe, wr); + wqe += sizeof (struct mlx4_wqe_masked_atomic_seg); + + size += (sizeof (struct mlx4_wqe_raddr_seg) + + sizeof (struct mlx4_wqe_masked_atomic_seg)) / 16; + + break; + case IB_WR_RDMA_READ: case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: @@ -1505,30 +2662,103 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, size += sizeof (struct mlx4_wqe_raddr_seg) / 16; break; + case IB_WR_LOCAL_INV: + ctrl->srcrb_flags |= + cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER); + set_local_inv_seg(wqe, wr->ex.invalidate_rkey); + wqe += sizeof (struct mlx4_wqe_local_inval_seg); + size += sizeof (struct mlx4_wqe_local_inval_seg) / 16; + break; + + case IB_WR_FAST_REG_MR: + ctrl->srcrb_flags |= + cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER); + set_fmr_seg(wqe, wr); + wqe += sizeof (struct mlx4_wqe_fmr_seg); + size += sizeof (struct mlx4_wqe_fmr_seg) / 16; + break; + + case IB_WR_BIND_MW: + ctrl->srcrb_flags |= + cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER); + set_bind_seg(wqe, wr); + wqe += sizeof(struct mlx4_wqe_bind_seg); + size += sizeof(struct mlx4_wqe_bind_seg) / 16; + break; default: /* No extra segments required for sends */ break; } break; - case IB_QPT_UD: + case MLX4_IB_QPT_TUN_SMI_OWNER: + err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen); + if (unlikely(err)) { + *bad_wr = wr; + goto out; + } + wqe += seglen; + size += seglen / 16; + break; + case MLX4_IB_QPT_TUN_SMI: + case MLX4_IB_QPT_TUN_GSI: + /* this is a UD qp used in MAD responses to slaves. */ + set_datagram_seg(wqe, wr); + /* set the forced-loopback bit in the data seg av */ + *(__be32 *) wqe |= cpu_to_be32(0x80000000); + wqe += sizeof (struct mlx4_wqe_datagram_seg); + size += sizeof (struct mlx4_wqe_datagram_seg) / 16; + break; + case MLX4_IB_QPT_UD: set_datagram_seg(wqe, wr); wqe += sizeof (struct mlx4_wqe_datagram_seg); size += sizeof (struct mlx4_wqe_datagram_seg) / 16; if (wr->opcode == IB_WR_LSO) { - err = build_lso_seg(wqe, wr, qp, &seglen); + err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz, &blh); if (unlikely(err)) { *bad_wr = wr; goto out; } + lso_wqe = (__be32 *) wqe; wqe += seglen; size += seglen / 16; } break; - case IB_QPT_SMI: - case IB_QPT_GSI: + case MLX4_IB_QPT_PROXY_SMI_OWNER: + err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen); + if (unlikely(err)) { + *bad_wr = wr; + goto out; + } + wqe += seglen; + size += seglen / 16; + /* to start tunnel header on a cache-line boundary */ + add_zero_len_inline(wqe); + wqe += 16; + size++; + build_tunnel_header(wr, wqe, &seglen); + wqe += seglen; + size += seglen / 16; + break; + case MLX4_IB_QPT_PROXY_SMI: + case MLX4_IB_QPT_PROXY_GSI: + /* If we are tunneling special qps, this is a UD qp. + * In this case we first add a UD segment targeting + * the tunnel qp, and then add a header with address + * information */ + set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr, + qp->mlx4_ib_qp_type); + wqe += sizeof (struct mlx4_wqe_datagram_seg); + size += sizeof (struct mlx4_wqe_datagram_seg) / 16; + build_tunnel_header(wr, wqe, &seglen); + wqe += seglen; + size += seglen / 16; + break; + + case MLX4_IB_QPT_SMI: + case MLX4_IB_QPT_GSI: err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen); if (unlikely(err)) { *bad_wr = wr; @@ -1554,8 +2784,10 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, size += wr->num_sge * (sizeof (struct mlx4_wqe_data_seg) / 16); /* Add one more inline data segment for ICRC for MLX sends */ - if (unlikely(qp->ibqp.qp_type == IB_QPT_SMI || - qp->ibqp.qp_type == IB_QPT_GSI)) { + if (unlikely(qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI || + qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI || + qp->mlx4_ib_qp_type & + (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER))) { set_mlx_icrc_seg(dseg + 1); size += sizeof (struct mlx4_wqe_data_seg) / 16; } @@ -1563,6 +2795,14 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, for (i = wr->num_sge - 1; i >= 0; --i, --dseg) set_data_seg(dseg, wr->sg_list + i); + /* + * Possibly overwrite stamping in cacheline with LSO + * segment only after making sure all data segments + * are written. + */ + wmb(); + *lso_wqe = lso_hdr_sz; + ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ? MLX4_WQE_CTRL_FENCE : 0) | size; @@ -1574,12 +2814,13 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, wmb(); if (wr->opcode < 0 || wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) { + *bad_wr = wr; err = -EINVAL; goto out; } ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] | - (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0); + (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh; stamp = ind + qp->sq_spare_wqes; ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift); @@ -1597,7 +2838,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, stamp_send_wqe(qp, stamp, size * 16); ind = pad_wraparound(qp, ind); } - } out: @@ -1639,14 +2879,16 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, int err = 0; int nreq; int ind; + int max_gs; int i; + max_gs = qp->rq.max_gs; spin_lock_irqsave(&qp->rq.lock, flags); ind = qp->rq.head & (qp->rq.wqe_cnt - 1); for (nreq = 0; wr; ++nreq, wr = wr->next) { - if (mlx4_wq_overflow(&qp->rq, nreq, qp->ibqp.send_cq)) { + if (mlx4_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) { err = -ENOMEM; *bad_wr = wr; goto out; @@ -1660,10 +2902,25 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, scat = get_recv_wqe(qp, ind); + if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | + MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) { + ib_dma_sync_single_for_device(ibqp->device, + qp->sqp_proxy_rcv[ind].map, + sizeof (struct mlx4_ib_proxy_sqp_hdr), + DMA_FROM_DEVICE); + scat->byte_count = + cpu_to_be32(sizeof (struct mlx4_ib_proxy_sqp_hdr)); + /* use dma lkey from upper layer entry */ + scat->lkey = cpu_to_be32(wr->sg_list->lkey); + scat->addr = cpu_to_be64(qp->sqp_proxy_rcv[ind].map); + scat++; + max_gs--; + } + for (i = 0; i < wr->num_sge; ++i) __set_data_seg(scat + i, wr->sg_list + i); - if (i < qp->rq.max_gs) { + if (i < max_gs) { scat[i].byte_count = 0; scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY); scat[i].addr = 0; @@ -1731,17 +2988,27 @@ static int to_ib_qp_access_flags(int mlx4_flags) return ib_flags; } -static void to_ib_ah_attr(struct mlx4_dev *dev, struct ib_ah_attr *ib_ah_attr, +static void to_ib_ah_attr(struct mlx4_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr, struct mlx4_qp_path *path) { + struct mlx4_dev *dev = ibdev->dev; + int is_eth; + memset(ib_ah_attr, 0, sizeof *ib_ah_attr); ib_ah_attr->port_num = path->sched_queue & 0x40 ? 2 : 1; if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports) return; + is_eth = rdma_port_get_link_layer(&ibdev->ib_dev, ib_ah_attr->port_num) == + IB_LINK_LAYER_ETHERNET; + if (is_eth) + ib_ah_attr->sl = ((path->sched_queue >> 3) & 0x7) | + ((path->sched_queue & 4) << 1); + else + ib_ah_attr->sl = (path->sched_queue >> 2) & 0xf; + ib_ah_attr->dlid = be16_to_cpu(path->rlid); - ib_ah_attr->sl = (path->sched_queue >> 2) & 0xf; ib_ah_attr->src_path_bits = path->grh_mylmc & 0x7f; ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0; ib_ah_attr->ah_flags = (path->grh_mylmc & (1 << 7)) ? IB_AH_GRH : 0; @@ -1794,8 +3061,8 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr to_ib_qp_access_flags(be32_to_cpu(context.params2)); if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { - to_ib_ah_attr(dev->dev, &qp_attr->ah_attr, &context.pri_path); - to_ib_ah_attr(dev->dev, &qp_attr->alt_ah_attr, &context.alt_path); + to_ib_ah_attr(dev, &qp_attr->ah_attr, &context.pri_path); + to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context.alt_path); qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f; qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num; } @@ -1848,6 +3115,13 @@ done: if (qp->flags & MLX4_IB_QP_LSO) qp_init_attr->create_flags |= IB_QP_CREATE_IPOIB_UD_LSO; + if (qp->flags & MLX4_IB_QP_NETIF) + qp_init_attr->create_flags |= IB_QP_CREATE_NETIF_QP; + + qp_init_attr->sq_sig_type = + qp->sq_signal_bits == cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) ? + IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; + out: mutex_unlock(&qp->mutex); return err; diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 12d6bc6f800..62d9285300a 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -32,6 +33,7 @@ #include <linux/mlx4/qp.h> #include <linux/mlx4/srq.h> +#include <linux/slab.h> #include "mlx4_ib.h" #include "user.h" @@ -57,7 +59,7 @@ static void mlx4_ib_srq_event(struct mlx4_srq *srq, enum mlx4_event type) event.event = IB_EVENT_SRQ_ERR; break; default: - printk(KERN_WARNING "mlx4_ib: Unexpected event type %d " + pr_warn("Unexpected event type %d " "on SRQ %06x\n", type, srq->srqn); return; } @@ -73,6 +75,9 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, struct mlx4_ib_dev *dev = to_mdev(pd->device); struct mlx4_ib_srq *srq; struct mlx4_wqe_srq_next_seg *next; + struct mlx4_wqe_data_seg *scatter; + u32 cqn; + u16 xrcdn; int desc_size; int buf_size; int err; @@ -129,13 +134,14 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, if (err) goto err_mtt; } else { - err = mlx4_db_alloc(dev->dev, &srq->db, 0); + err = mlx4_db_alloc(dev->dev, &srq->db, 0, GFP_KERNEL); if (err) goto err_srq; *srq->db.db = 0; - if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf)) { + if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf, + GFP_KERNEL)) { err = -ENOMEM; goto err_db; } @@ -148,6 +154,11 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, next = get_wqe(srq, i); next->next_wqe_index = cpu_to_be16((i + 1) & (srq->msrq.max - 1)); + + for (scatter = (void *) (next + 1); + (void *) scatter < (void *) next + desc_size; + ++scatter) + scatter->lkey = cpu_to_be32(MLX4_INVALID_LKEY); } err = mlx4_mtt_init(dev->dev, srq->buf.npages, srq->buf.page_shift, @@ -155,7 +166,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, if (err) goto err_buf; - err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf); + err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf, GFP_KERNEL); if (err) goto err_mtt; @@ -166,12 +177,18 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, } } - err = mlx4_srq_alloc(dev->dev, to_mpd(pd)->pdn, &srq->mtt, + cqn = (init_attr->srq_type == IB_SRQT_XRC) ? + to_mcq(init_attr->ext.xrc.cq)->mcq.cqn : 0; + xrcdn = (init_attr->srq_type == IB_SRQT_XRC) ? + to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn : + (u16) dev->dev->caps.reserved_xrcds; + err = mlx4_srq_alloc(dev->dev, to_mpd(pd)->pdn, cqn, xrcdn, &srq->mtt, srq->db.dma, &srq->msrq); if (err) goto err_wrid; srq->msrq.event = mlx4_ib_srq_event; + srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn; if (pd->uobject) if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) { diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c new file mode 100644 index 00000000000..cb4c66e723b --- /dev/null +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -0,0 +1,906 @@ +/* + * Copyright (c) 2012 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/*#include "core_priv.h"*/ +#include "mlx4_ib.h" +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/stat.h> + +#include <rdma/ib_mad.h> +/*show_admin_alias_guid returns the administratively assigned value of that GUID. + * Values returned in buf parameter string: + * 0 - requests opensm to assign a value. + * ffffffffffffffff - delete this entry. + * other - value assigned by administrator. + */ +static ssize_t show_admin_alias_guid(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int record_num;/*0-15*/ + int guid_index_in_rec; /*0 - 7*/ + struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry = + container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry); + struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx; + struct mlx4_ib_dev *mdev = port->dev; + + record_num = mlx4_ib_iov_dentry->entry_num / 8 ; + guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8 ; + + return sprintf(buf, "%llx\n", + be64_to_cpu(*(__be64 *)&mdev->sriov.alias_guid. + ports_guid[port->num - 1]. + all_rec_per_port[record_num]. + all_recs[8 * guid_index_in_rec])); +} + +/* store_admin_alias_guid stores the (new) administratively assigned value of that GUID. + * Values in buf parameter string: + * 0 - requests opensm to assign a value. + * 0xffffffffffffffff - delete this entry. + * other - guid value assigned by the administrator. + */ +static ssize_t store_admin_alias_guid(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int record_num;/*0-15*/ + int guid_index_in_rec; /*0 - 7*/ + struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry = + container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry); + struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx; + struct mlx4_ib_dev *mdev = port->dev; + u64 sysadmin_ag_val; + + record_num = mlx4_ib_iov_dentry->entry_num / 8; + guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8; + if (0 == record_num && 0 == guid_index_in_rec) { + pr_err("GUID 0 block 0 is RO\n"); + return count; + } + sscanf(buf, "%llx", &sysadmin_ag_val); + *(__be64 *)&mdev->sriov.alias_guid.ports_guid[port->num - 1]. + all_rec_per_port[record_num]. + all_recs[GUID_REC_SIZE * guid_index_in_rec] = + cpu_to_be64(sysadmin_ag_val); + + /* Change the state to be pending for update */ + mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].status + = MLX4_GUID_INFO_STATUS_IDLE ; + + mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method + = MLX4_GUID_INFO_RECORD_SET; + + switch (sysadmin_ag_val) { + case MLX4_GUID_FOR_DELETE_VAL: + mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method + = MLX4_GUID_INFO_RECORD_DELETE; + mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership + = MLX4_GUID_SYSADMIN_ASSIGN; + break; + /* The sysadmin requests the SM to re-assign */ + case MLX4_NOT_SET_GUID: + mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership + = MLX4_GUID_DRIVER_ASSIGN; + break; + /* The sysadmin requests a specific value.*/ + default: + mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership + = MLX4_GUID_SYSADMIN_ASSIGN; + break; + } + + /* set the record index */ + mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].guid_indexes + = mlx4_ib_get_aguid_comp_mask_from_ix(guid_index_in_rec); + + mlx4_ib_init_alias_guid_work(mdev, port->num - 1); + + return count; +} + +static ssize_t show_port_gid(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry = + container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry); + struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx; + struct mlx4_ib_dev *mdev = port->dev; + union ib_gid gid; + ssize_t ret; + + ret = __mlx4_ib_query_gid(&mdev->ib_dev, port->num, + mlx4_ib_iov_dentry->entry_num, &gid, 1); + if (ret) + return ret; + ret = sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", + be16_to_cpu(((__be16 *) gid.raw)[0]), + be16_to_cpu(((__be16 *) gid.raw)[1]), + be16_to_cpu(((__be16 *) gid.raw)[2]), + be16_to_cpu(((__be16 *) gid.raw)[3]), + be16_to_cpu(((__be16 *) gid.raw)[4]), + be16_to_cpu(((__be16 *) gid.raw)[5]), + be16_to_cpu(((__be16 *) gid.raw)[6]), + be16_to_cpu(((__be16 *) gid.raw)[7])); + return ret; +} + +static ssize_t show_phys_port_pkey(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry = + container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry); + struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx; + struct mlx4_ib_dev *mdev = port->dev; + u16 pkey; + ssize_t ret; + + ret = __mlx4_ib_query_pkey(&mdev->ib_dev, port->num, + mlx4_ib_iov_dentry->entry_num, &pkey, 1); + if (ret) + return ret; + + return sprintf(buf, "0x%04x\n", pkey); +} + +#define DENTRY_REMOVE(_dentry) \ +do { \ + sysfs_remove_file((_dentry)->kobj, &(_dentry)->dentry.attr); \ +} while (0); + +static int create_sysfs_entry(void *_ctx, struct mlx4_ib_iov_sysfs_attr *_dentry, + char *_name, struct kobject *_kobj, + ssize_t (*show)(struct device *dev, + struct device_attribute *attr, + char *buf), + ssize_t (*store)(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) + ) +{ + int ret = 0; + struct mlx4_ib_iov_sysfs_attr *vdentry = _dentry; + + vdentry->ctx = _ctx; + vdentry->dentry.show = show; + vdentry->dentry.store = store; + sysfs_attr_init(&vdentry->dentry.attr); + vdentry->dentry.attr.name = vdentry->name; + vdentry->dentry.attr.mode = 0; + vdentry->kobj = _kobj; + snprintf(vdentry->name, 15, "%s", _name); + + if (vdentry->dentry.store) + vdentry->dentry.attr.mode |= S_IWUSR; + + if (vdentry->dentry.show) + vdentry->dentry.attr.mode |= S_IRUGO; + + ret = sysfs_create_file(vdentry->kobj, &vdentry->dentry.attr); + if (ret) { + pr_err("failed to create %s\n", vdentry->dentry.attr.name); + vdentry->ctx = NULL; + return ret; + } + + return ret; +} + +int add_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num, + struct attribute *attr) +{ + struct mlx4_ib_iov_port *port = &device->iov_ports[port_num - 1]; + int ret; + + ret = sysfs_create_file(port->mcgs_parent, attr); + if (ret) + pr_err("failed to create %s\n", attr->name); + + return ret; +} + +void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num, + struct attribute *attr) +{ + struct mlx4_ib_iov_port *port = &device->iov_ports[port_num - 1]; + + sysfs_remove_file(port->mcgs_parent, attr); +} + +static int add_port_entries(struct mlx4_ib_dev *device, int port_num) +{ + int i; + char buff[10]; + struct mlx4_ib_iov_port *port = NULL; + int ret = 0 ; + struct ib_port_attr attr; + + /* get the physical gid and pkey table sizes.*/ + ret = __mlx4_ib_query_port(&device->ib_dev, port_num, &attr, 1); + if (ret) + goto err; + + port = &device->iov_ports[port_num - 1]; + port->dev = device; + port->num = port_num; + /* Directory structure: + * iov - + * port num - + * admin_guids + * gids (operational) + * mcg_table + */ + port->dentr_ar = kzalloc(sizeof (struct mlx4_ib_iov_sysfs_attr_ar), + GFP_KERNEL); + if (!port->dentr_ar) { + ret = -ENOMEM; + goto err; + } + sprintf(buff, "%d", port_num); + port->cur_port = kobject_create_and_add(buff, + kobject_get(device->ports_parent)); + if (!port->cur_port) { + ret = -ENOMEM; + goto kobj_create_err; + } + /* admin GUIDs */ + port->admin_alias_parent = kobject_create_and_add("admin_guids", + kobject_get(port->cur_port)); + if (!port->admin_alias_parent) { + ret = -ENOMEM; + goto err_admin_guids; + } + for (i = 0 ; i < attr.gid_tbl_len; i++) { + sprintf(buff, "%d", i); + port->dentr_ar->dentries[i].entry_num = i; + ret = create_sysfs_entry(port, &port->dentr_ar->dentries[i], + buff, port->admin_alias_parent, + show_admin_alias_guid, store_admin_alias_guid); + if (ret) + goto err_admin_alias_parent; + } + + /* gids subdirectory (operational gids) */ + port->gids_parent = kobject_create_and_add("gids", + kobject_get(port->cur_port)); + if (!port->gids_parent) { + ret = -ENOMEM; + goto err_gids; + } + + for (i = 0 ; i < attr.gid_tbl_len; i++) { + sprintf(buff, "%d", i); + port->dentr_ar->dentries[attr.gid_tbl_len + i].entry_num = i; + ret = create_sysfs_entry(port, + &port->dentr_ar->dentries[attr.gid_tbl_len + i], + buff, + port->gids_parent, show_port_gid, NULL); + if (ret) + goto err_gids_parent; + } + + /* physical port pkey table */ + port->pkeys_parent = + kobject_create_and_add("pkeys", kobject_get(port->cur_port)); + if (!port->pkeys_parent) { + ret = -ENOMEM; + goto err_pkeys; + } + + for (i = 0 ; i < attr.pkey_tbl_len; i++) { + sprintf(buff, "%d", i); + port->dentr_ar->dentries[2 * attr.gid_tbl_len + i].entry_num = i; + ret = create_sysfs_entry(port, + &port->dentr_ar->dentries[2 * attr.gid_tbl_len + i], + buff, port->pkeys_parent, + show_phys_port_pkey, NULL); + if (ret) + goto err_pkeys_parent; + } + + /* MCGs table */ + port->mcgs_parent = + kobject_create_and_add("mcgs", kobject_get(port->cur_port)); + if (!port->mcgs_parent) { + ret = -ENOMEM; + goto err_mcgs; + } + return 0; + +err_mcgs: + kobject_put(port->cur_port); + +err_pkeys_parent: + kobject_put(port->pkeys_parent); + +err_pkeys: + kobject_put(port->cur_port); + +err_gids_parent: + kobject_put(port->gids_parent); + +err_gids: + kobject_put(port->cur_port); + +err_admin_alias_parent: + kobject_put(port->admin_alias_parent); + +err_admin_guids: + kobject_put(port->cur_port); + kobject_put(port->cur_port); /* once more for create_and_add buff */ + +kobj_create_err: + kobject_put(device->ports_parent); + kfree(port->dentr_ar); + +err: + pr_err("add_port_entries FAILED: for port:%d, error: %d\n", + port_num, ret); + return ret; +} + +static void get_name(struct mlx4_ib_dev *dev, char *name, int i, int max) +{ + char base_name[9]; + + /* pci_name format is: bus:dev:func -> xxxx:yy:zz.n */ + strlcpy(name, pci_name(dev->dev->pdev), max); + strncpy(base_name, name, 8); /*till xxxx:yy:*/ + base_name[8] = '\0'; + /* with no ARI only 3 last bits are used so when the fn is higher than 8 + * need to add it to the dev num, so count in the last number will be + * modulo 8 */ + sprintf(name, "%s%.2d.%d", base_name, (i/8), (i%8)); +} + +struct mlx4_port { + struct kobject kobj; + struct mlx4_ib_dev *dev; + struct attribute_group pkey_group; + struct attribute_group gid_group; + struct device_attribute enable_smi_admin; + struct device_attribute smi_enabled; + int slave; + u8 port_num; +}; + + +static void mlx4_port_release(struct kobject *kobj) +{ + struct mlx4_port *p = container_of(kobj, struct mlx4_port, kobj); + struct attribute *a; + int i; + + for (i = 0; (a = p->pkey_group.attrs[i]); ++i) + kfree(a); + kfree(p->pkey_group.attrs); + for (i = 0; (a = p->gid_group.attrs[i]); ++i) + kfree(a); + kfree(p->gid_group.attrs); + kfree(p); +} + +struct port_attribute { + struct attribute attr; + ssize_t (*show)(struct mlx4_port *, struct port_attribute *, char *buf); + ssize_t (*store)(struct mlx4_port *, struct port_attribute *, + const char *buf, size_t count); +}; + +static ssize_t port_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct port_attribute *port_attr = + container_of(attr, struct port_attribute, attr); + struct mlx4_port *p = container_of(kobj, struct mlx4_port, kobj); + + if (!port_attr->show) + return -EIO; + return port_attr->show(p, port_attr, buf); +} + +static ssize_t port_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t size) +{ + struct port_attribute *port_attr = + container_of(attr, struct port_attribute, attr); + struct mlx4_port *p = container_of(kobj, struct mlx4_port, kobj); + + if (!port_attr->store) + return -EIO; + return port_attr->store(p, port_attr, buf, size); +} + +static const struct sysfs_ops port_sysfs_ops = { + .show = port_attr_show, + .store = port_attr_store, +}; + +static struct kobj_type port_type = { + .release = mlx4_port_release, + .sysfs_ops = &port_sysfs_ops, +}; + +struct port_table_attribute { + struct port_attribute attr; + char name[8]; + int index; +}; + +static ssize_t show_port_pkey(struct mlx4_port *p, struct port_attribute *attr, + char *buf) +{ + struct port_table_attribute *tab_attr = + container_of(attr, struct port_table_attribute, attr); + ssize_t ret = -ENODEV; + + if (p->dev->pkeys.virt2phys_pkey[p->slave][p->port_num - 1][tab_attr->index] >= + (p->dev->dev->caps.pkey_table_len[p->port_num])) + ret = sprintf(buf, "none\n"); + else + ret = sprintf(buf, "%d\n", + p->dev->pkeys.virt2phys_pkey[p->slave] + [p->port_num - 1][tab_attr->index]); + return ret; +} + +static ssize_t store_port_pkey(struct mlx4_port *p, struct port_attribute *attr, + const char *buf, size_t count) +{ + struct port_table_attribute *tab_attr = + container_of(attr, struct port_table_attribute, attr); + int idx; + int err; + + /* do not allow remapping Dom0 virtual pkey table */ + if (p->slave == mlx4_master_func_num(p->dev->dev)) + return -EINVAL; + + if (!strncasecmp(buf, "no", 2)) + idx = p->dev->dev->phys_caps.pkey_phys_table_len[p->port_num] - 1; + else if (sscanf(buf, "%i", &idx) != 1 || + idx >= p->dev->dev->caps.pkey_table_len[p->port_num] || + idx < 0) + return -EINVAL; + + p->dev->pkeys.virt2phys_pkey[p->slave][p->port_num - 1] + [tab_attr->index] = idx; + mlx4_sync_pkey_table(p->dev->dev, p->slave, p->port_num, + tab_attr->index, idx); + err = mlx4_gen_pkey_eqe(p->dev->dev, p->slave, p->port_num); + if (err) { + pr_err("mlx4_gen_pkey_eqe failed for slave %d," + " port %d, index %d\n", p->slave, p->port_num, idx); + return err; + } + return count; +} + +static ssize_t show_port_gid_idx(struct mlx4_port *p, + struct port_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", p->slave); +} + +static struct attribute ** +alloc_group_attrs(ssize_t (*show)(struct mlx4_port *, + struct port_attribute *, char *buf), + ssize_t (*store)(struct mlx4_port *, struct port_attribute *, + const char *buf, size_t count), + int len) +{ + struct attribute **tab_attr; + struct port_table_attribute *element; + int i; + + tab_attr = kcalloc(1 + len, sizeof (struct attribute *), GFP_KERNEL); + if (!tab_attr) + return NULL; + + for (i = 0; i < len; i++) { + element = kzalloc(sizeof (struct port_table_attribute), + GFP_KERNEL); + if (!element) + goto err; + if (snprintf(element->name, sizeof (element->name), + "%d", i) >= sizeof (element->name)) { + kfree(element); + goto err; + } + sysfs_attr_init(&element->attr.attr); + element->attr.attr.name = element->name; + if (store) { + element->attr.attr.mode = S_IWUSR | S_IRUGO; + element->attr.store = store; + } else + element->attr.attr.mode = S_IRUGO; + + element->attr.show = show; + element->index = i; + tab_attr[i] = &element->attr.attr; + } + return tab_attr; + +err: + while (--i >= 0) + kfree(tab_attr[i]); + kfree(tab_attr); + return NULL; +} + +static ssize_t sysfs_show_smi_enabled(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct mlx4_port *p = + container_of(attr, struct mlx4_port, smi_enabled); + ssize_t len = 0; + + if (mlx4_vf_smi_enabled(p->dev->dev, p->slave, p->port_num)) + len = sprintf(buf, "%d\n", 1); + else + len = sprintf(buf, "%d\n", 0); + + return len; +} + +static ssize_t sysfs_show_enable_smi_admin(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlx4_port *p = + container_of(attr, struct mlx4_port, enable_smi_admin); + ssize_t len = 0; + + if (mlx4_vf_get_enable_smi_admin(p->dev->dev, p->slave, p->port_num)) + len = sprintf(buf, "%d\n", 1); + else + len = sprintf(buf, "%d\n", 0); + + return len; +} + +static ssize_t sysfs_store_enable_smi_admin(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct mlx4_port *p = + container_of(attr, struct mlx4_port, enable_smi_admin); + int enable; + + if (sscanf(buf, "%i", &enable) != 1 || + enable < 0 || enable > 1) + return -EINVAL; + + if (mlx4_vf_set_enable_smi_admin(p->dev->dev, p->slave, p->port_num, enable)) + return -EINVAL; + return count; +} + +static int add_vf_smi_entries(struct mlx4_port *p) +{ + int is_eth = rdma_port_get_link_layer(&p->dev->ib_dev, p->port_num) == + IB_LINK_LAYER_ETHERNET; + int ret; + + /* do not display entries if eth transport, or if master */ + if (is_eth || p->slave == mlx4_master_func_num(p->dev->dev)) + return 0; + + sysfs_attr_init(&p->smi_enabled.attr); + p->smi_enabled.show = sysfs_show_smi_enabled; + p->smi_enabled.store = NULL; + p->smi_enabled.attr.name = "smi_enabled"; + p->smi_enabled.attr.mode = 0444; + ret = sysfs_create_file(&p->kobj, &p->smi_enabled.attr); + if (ret) { + pr_err("failed to create smi_enabled\n"); + return ret; + } + + sysfs_attr_init(&p->enable_smi_admin.attr); + p->enable_smi_admin.show = sysfs_show_enable_smi_admin; + p->enable_smi_admin.store = sysfs_store_enable_smi_admin; + p->enable_smi_admin.attr.name = "enable_smi_admin"; + p->enable_smi_admin.attr.mode = 0644; + ret = sysfs_create_file(&p->kobj, &p->enable_smi_admin.attr); + if (ret) { + pr_err("failed to create enable_smi_admin\n"); + sysfs_remove_file(&p->kobj, &p->smi_enabled.attr); + return ret; + } + return 0; +} + +static void remove_vf_smi_entries(struct mlx4_port *p) +{ + int is_eth = rdma_port_get_link_layer(&p->dev->ib_dev, p->port_num) == + IB_LINK_LAYER_ETHERNET; + + if (is_eth || p->slave == mlx4_master_func_num(p->dev->dev)) + return; + + sysfs_remove_file(&p->kobj, &p->smi_enabled.attr); + sysfs_remove_file(&p->kobj, &p->enable_smi_admin.attr); +} + +static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave) +{ + struct mlx4_port *p; + int i; + int ret; + + p = kzalloc(sizeof *p, GFP_KERNEL); + if (!p) + return -ENOMEM; + + p->dev = dev; + p->port_num = port_num; + p->slave = slave; + + ret = kobject_init_and_add(&p->kobj, &port_type, + kobject_get(dev->dev_ports_parent[slave]), + "%d", port_num); + if (ret) + goto err_alloc; + + p->pkey_group.name = "pkey_idx"; + p->pkey_group.attrs = + alloc_group_attrs(show_port_pkey, store_port_pkey, + dev->dev->caps.pkey_table_len[port_num]); + if (!p->pkey_group.attrs) { + ret = -ENOMEM; + goto err_alloc; + } + + ret = sysfs_create_group(&p->kobj, &p->pkey_group); + if (ret) + goto err_free_pkey; + + p->gid_group.name = "gid_idx"; + p->gid_group.attrs = alloc_group_attrs(show_port_gid_idx, NULL, 1); + if (!p->gid_group.attrs) { + ret = -ENOMEM; + goto err_free_pkey; + } + + ret = sysfs_create_group(&p->kobj, &p->gid_group); + if (ret) + goto err_free_gid; + + ret = add_vf_smi_entries(p); + if (ret) + goto err_free_gid; + + list_add_tail(&p->kobj.entry, &dev->pkeys.pkey_port_list[slave]); + return 0; + +err_free_gid: + kfree(p->gid_group.attrs[0]); + kfree(p->gid_group.attrs); + +err_free_pkey: + for (i = 0; i < dev->dev->caps.pkey_table_len[port_num]; ++i) + kfree(p->pkey_group.attrs[i]); + kfree(p->pkey_group.attrs); + +err_alloc: + kobject_put(dev->dev_ports_parent[slave]); + kfree(p); + return ret; +} + +static int register_one_pkey_tree(struct mlx4_ib_dev *dev, int slave) +{ + char name[32]; + int err; + int port; + struct kobject *p, *t; + struct mlx4_port *mport; + struct mlx4_active_ports actv_ports; + + get_name(dev, name, slave, sizeof name); + + dev->pkeys.device_parent[slave] = + kobject_create_and_add(name, kobject_get(dev->iov_parent)); + + if (!dev->pkeys.device_parent[slave]) { + err = -ENOMEM; + goto fail_dev; + } + + INIT_LIST_HEAD(&dev->pkeys.pkey_port_list[slave]); + + dev->dev_ports_parent[slave] = + kobject_create_and_add("ports", + kobject_get(dev->pkeys.device_parent[slave])); + + if (!dev->dev_ports_parent[slave]) { + err = -ENOMEM; + goto err_ports; + } + + actv_ports = mlx4_get_active_ports(dev->dev, slave); + + for (port = 1; port <= dev->dev->caps.num_ports; ++port) { + if (!test_bit(port - 1, actv_ports.ports)) + continue; + err = add_port(dev, port, slave); + if (err) + goto err_add; + } + return 0; + +err_add: + list_for_each_entry_safe(p, t, + &dev->pkeys.pkey_port_list[slave], + entry) { + list_del(&p->entry); + mport = container_of(p, struct mlx4_port, kobj); + sysfs_remove_group(p, &mport->pkey_group); + sysfs_remove_group(p, &mport->gid_group); + remove_vf_smi_entries(mport); + kobject_put(p); + } + kobject_put(dev->dev_ports_parent[slave]); + +err_ports: + kobject_put(dev->pkeys.device_parent[slave]); + /* extra put for the device_parent create_and_add */ + kobject_put(dev->pkeys.device_parent[slave]); + +fail_dev: + kobject_put(dev->iov_parent); + return err; +} + +static int register_pkey_tree(struct mlx4_ib_dev *device) +{ + int i; + + if (!mlx4_is_master(device->dev)) + return 0; + + for (i = 0; i <= device->dev->num_vfs; ++i) + register_one_pkey_tree(device, i); + + return 0; +} + +static void unregister_pkey_tree(struct mlx4_ib_dev *device) +{ + int slave; + struct kobject *p, *t; + struct mlx4_port *port; + + if (!mlx4_is_master(device->dev)) + return; + + for (slave = device->dev->num_vfs; slave >= 0; --slave) { + list_for_each_entry_safe(p, t, + &device->pkeys.pkey_port_list[slave], + entry) { + list_del(&p->entry); + port = container_of(p, struct mlx4_port, kobj); + sysfs_remove_group(p, &port->pkey_group); + sysfs_remove_group(p, &port->gid_group); + remove_vf_smi_entries(port); + kobject_put(p); + kobject_put(device->dev_ports_parent[slave]); + } + kobject_put(device->dev_ports_parent[slave]); + kobject_put(device->pkeys.device_parent[slave]); + kobject_put(device->pkeys.device_parent[slave]); + kobject_put(device->iov_parent); + } +} + +int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *dev) +{ + int i; + int ret = 0; + + if (!mlx4_is_master(dev->dev)) + return 0; + + dev->iov_parent = + kobject_create_and_add("iov", + kobject_get(dev->ib_dev.ports_parent->parent)); + if (!dev->iov_parent) { + ret = -ENOMEM; + goto err; + } + dev->ports_parent = + kobject_create_and_add("ports", + kobject_get(dev->iov_parent)); + if (!dev->ports_parent) { + ret = -ENOMEM; + goto err_ports; + } + + for (i = 1; i <= dev->ib_dev.phys_port_cnt; ++i) { + ret = add_port_entries(dev, i); + if (ret) + goto err_add_entries; + } + + ret = register_pkey_tree(dev); + if (ret) + goto err_add_entries; + return 0; + +err_add_entries: + kobject_put(dev->ports_parent); + +err_ports: + kobject_put(dev->iov_parent); +err: + kobject_put(dev->ib_dev.ports_parent->parent); + pr_err("mlx4_ib_device_register_sysfs error (%d)\n", ret); + return ret; +} + +static void unregister_alias_guid_tree(struct mlx4_ib_dev *device) +{ + struct mlx4_ib_iov_port *p; + int i; + + if (!mlx4_is_master(device->dev)) + return; + + for (i = 0; i < device->dev->caps.num_ports; i++) { + p = &device->iov_ports[i]; + kobject_put(p->admin_alias_parent); + kobject_put(p->gids_parent); + kobject_put(p->pkeys_parent); + kobject_put(p->mcgs_parent); + kobject_put(p->cur_port); + kobject_put(p->cur_port); + kobject_put(p->cur_port); + kobject_put(p->cur_port); + kobject_put(p->cur_port); + kobject_put(p->dev->ports_parent); + kfree(p->dentr_ar); + } +} + +void mlx4_ib_device_unregister_sysfs(struct mlx4_ib_dev *device) +{ + unregister_alias_guid_tree(device); + unregister_pkey_tree(device); + kobject_put(device->ports_parent); + kobject_put(device->iov_parent); + kobject_put(device->iov_parent); + kobject_put(device->ib_dev.ports_parent->parent); +} diff --git a/drivers/infiniband/hw/mlx4/user.h b/drivers/infiniband/hw/mlx4/user.h index e2d11be4525..07e6769ef43 100644 --- a/drivers/infiniband/hw/mlx4/user.h +++ b/drivers/infiniband/hw/mlx4/user.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -39,7 +40,9 @@ * Increment this value if any changes that break userspace ABI * compatibility are made. */ -#define MLX4_IB_UVERBS_ABI_VERSION 3 + +#define MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION 3 +#define MLX4_IB_UVERBS_ABI_VERSION 4 /* * Make sure that all structs defined in this file remain laid out so @@ -49,10 +52,18 @@ * instead. */ +struct mlx4_ib_alloc_ucontext_resp_v3 { + __u32 qp_tab_size; + __u16 bf_reg_size; + __u16 bf_regs_per_page; +}; + struct mlx4_ib_alloc_ucontext_resp { + __u32 dev_caps; __u32 qp_tab_size; __u16 bf_reg_size; __u16 bf_regs_per_page; + __u32 cqe_size; }; struct mlx4_ib_alloc_pd_resp { diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig new file mode 100644 index 00000000000..10df386c634 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/Kconfig @@ -0,0 +1,10 @@ +config MLX5_INFINIBAND + tristate "Mellanox Connect-IB HCA support" + depends on NETDEVICES && ETHERNET && PCI + select NET_VENDOR_MELLANOX + select MLX5_CORE + ---help--- + This driver provides low-level InfiniBand support for + Mellanox Connect-IB PCI Express host channel adapters (HCAs). + This is required to use InfiniBand protocols such as + IP-over-IB or SRP with these devices. diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile new file mode 100644 index 00000000000..4ea0135af48 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o + +mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c new file mode 100644 index 00000000000..39ab0caefdf --- /dev/null +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "mlx5_ib.h" + +struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr, + struct mlx5_ib_ah *ah) +{ + if (ah_attr->ah_flags & IB_AH_GRH) { + memcpy(ah->av.rgid, &ah_attr->grh.dgid, 16); + ah->av.grh_gid_fl = cpu_to_be32(ah_attr->grh.flow_label | + (1 << 30) | + ah_attr->grh.sgid_index << 20); + ah->av.hop_limit = ah_attr->grh.hop_limit; + ah->av.tclass = ah_attr->grh.traffic_class; + } + + ah->av.rlid = cpu_to_be16(ah_attr->dlid); + ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f; + ah->av.stat_rate_sl = (ah_attr->static_rate << 4) | (ah_attr->sl & 0xf); + + return &ah->ibah; +} + +struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +{ + struct mlx5_ib_ah *ah; + + ah = kzalloc(sizeof(*ah), GFP_ATOMIC); + if (!ah) + return ERR_PTR(-ENOMEM); + + return create_ib_ah(ah_attr, ah); /* never fails */ +} + +int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) +{ + struct mlx5_ib_ah *ah = to_mah(ibah); + u32 tmp; + + memset(ah_attr, 0, sizeof(*ah_attr)); + + tmp = be32_to_cpu(ah->av.grh_gid_fl); + if (tmp & (1 << 30)) { + ah_attr->ah_flags = IB_AH_GRH; + ah_attr->grh.sgid_index = (tmp >> 20) & 0xff; + ah_attr->grh.flow_label = tmp & 0xfffff; + memcpy(&ah_attr->grh.dgid, ah->av.rgid, 16); + ah_attr->grh.hop_limit = ah->av.hop_limit; + ah_attr->grh.traffic_class = ah->av.tclass; + } + ah_attr->dlid = be16_to_cpu(ah->av.rlid); + ah_attr->static_rate = ah->av.stat_rate_sl >> 4; + ah_attr->sl = ah->av.stat_rate_sl & 0xf; + + return 0; +} + +int mlx5_ib_destroy_ah(struct ib_ah *ah) +{ + kfree(to_mah(ah)); + return 0; +} diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c new file mode 100644 index 00000000000..8ae4f896cb4 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -0,0 +1,1187 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kref.h> +#include <rdma/ib_umem.h> +#include <rdma/ib_user_verbs.h> +#include "mlx5_ib.h" +#include "user.h" + +static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq) +{ + struct ib_cq *ibcq = &to_mibcq(cq)->ibcq; + + ibcq->comp_handler(ibcq, ibcq->cq_context); +} + +static void mlx5_ib_cq_event(struct mlx5_core_cq *mcq, enum mlx5_event type) +{ + struct mlx5_ib_cq *cq = container_of(mcq, struct mlx5_ib_cq, mcq); + struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); + struct ib_cq *ibcq = &cq->ibcq; + struct ib_event event; + + if (type != MLX5_EVENT_TYPE_CQ_ERROR) { + mlx5_ib_warn(dev, "Unexpected event type %d on CQ %06x\n", + type, mcq->cqn); + return; + } + + if (ibcq->event_handler) { + event.device = &dev->ib_dev; + event.event = IB_EVENT_CQ_ERR; + event.element.cq = ibcq; + ibcq->event_handler(&event, ibcq->cq_context); + } +} + +static void *get_cqe_from_buf(struct mlx5_ib_cq_buf *buf, int n, int size) +{ + return mlx5_buf_offset(&buf->buf, n * size); +} + +static void *get_cqe(struct mlx5_ib_cq *cq, int n) +{ + return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz); +} + +static u8 sw_ownership_bit(int n, int nent) +{ + return (n & nent) ? 1 : 0; +} + +static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n) +{ + void *cqe = get_cqe(cq, n & cq->ibcq.cqe); + struct mlx5_cqe64 *cqe64; + + cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; + + if (likely((cqe64->op_own) >> 4 != MLX5_CQE_INVALID) && + !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1)))) { + return cqe; + } else { + return NULL; + } +} + +static void *next_cqe_sw(struct mlx5_ib_cq *cq) +{ + return get_sw_cqe(cq, cq->mcq.cons_index); +} + +static enum ib_wc_opcode get_umr_comp(struct mlx5_ib_wq *wq, int idx) +{ + switch (wq->wr_data[idx]) { + case MLX5_IB_WR_UMR: + return 0; + + case IB_WR_LOCAL_INV: + return IB_WC_LOCAL_INV; + + case IB_WR_FAST_REG_MR: + return IB_WC_FAST_REG_MR; + + default: + pr_warn("unknown completion status\n"); + return 0; + } +} + +static void handle_good_req(struct ib_wc *wc, struct mlx5_cqe64 *cqe, + struct mlx5_ib_wq *wq, int idx) +{ + wc->wc_flags = 0; + switch (be32_to_cpu(cqe->sop_drop_qpn) >> 24) { + case MLX5_OPCODE_RDMA_WRITE_IMM: + wc->wc_flags |= IB_WC_WITH_IMM; + case MLX5_OPCODE_RDMA_WRITE: + wc->opcode = IB_WC_RDMA_WRITE; + break; + case MLX5_OPCODE_SEND_IMM: + wc->wc_flags |= IB_WC_WITH_IMM; + case MLX5_OPCODE_SEND: + case MLX5_OPCODE_SEND_INVAL: + wc->opcode = IB_WC_SEND; + break; + case MLX5_OPCODE_RDMA_READ: + wc->opcode = IB_WC_RDMA_READ; + wc->byte_len = be32_to_cpu(cqe->byte_cnt); + break; + case MLX5_OPCODE_ATOMIC_CS: + wc->opcode = IB_WC_COMP_SWAP; + wc->byte_len = 8; + break; + case MLX5_OPCODE_ATOMIC_FA: + wc->opcode = IB_WC_FETCH_ADD; + wc->byte_len = 8; + break; + case MLX5_OPCODE_ATOMIC_MASKED_CS: + wc->opcode = IB_WC_MASKED_COMP_SWAP; + wc->byte_len = 8; + break; + case MLX5_OPCODE_ATOMIC_MASKED_FA: + wc->opcode = IB_WC_MASKED_FETCH_ADD; + wc->byte_len = 8; + break; + case MLX5_OPCODE_BIND_MW: + wc->opcode = IB_WC_BIND_MW; + break; + case MLX5_OPCODE_UMR: + wc->opcode = get_umr_comp(wq, idx); + break; + } +} + +enum { + MLX5_GRH_IN_BUFFER = 1, + MLX5_GRH_IN_CQE = 2, +}; + +static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, + struct mlx5_ib_qp *qp) +{ + struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device); + struct mlx5_ib_srq *srq; + struct mlx5_ib_wq *wq; + u16 wqe_ctr; + u8 g; + + if (qp->ibqp.srq || qp->ibqp.xrcd) { + struct mlx5_core_srq *msrq = NULL; + + if (qp->ibqp.xrcd) { + msrq = mlx5_core_get_srq(&dev->mdev, + be32_to_cpu(cqe->srqn)); + srq = to_mibsrq(msrq); + } else { + srq = to_msrq(qp->ibqp.srq); + } + if (srq) { + wqe_ctr = be16_to_cpu(cqe->wqe_counter); + wc->wr_id = srq->wrid[wqe_ctr]; + mlx5_ib_free_srq_wqe(srq, wqe_ctr); + if (msrq && atomic_dec_and_test(&msrq->refcount)) + complete(&msrq->free); + } + } else { + wq = &qp->rq; + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + ++wq->tail; + } + wc->byte_len = be32_to_cpu(cqe->byte_cnt); + + switch (cqe->op_own >> 4) { + case MLX5_CQE_RESP_WR_IMM: + wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; + wc->wc_flags = IB_WC_WITH_IMM; + wc->ex.imm_data = cqe->imm_inval_pkey; + break; + case MLX5_CQE_RESP_SEND: + wc->opcode = IB_WC_RECV; + wc->wc_flags = 0; + break; + case MLX5_CQE_RESP_SEND_IMM: + wc->opcode = IB_WC_RECV; + wc->wc_flags = IB_WC_WITH_IMM; + wc->ex.imm_data = cqe->imm_inval_pkey; + break; + case MLX5_CQE_RESP_SEND_INV: + wc->opcode = IB_WC_RECV; + wc->wc_flags = IB_WC_WITH_INVALIDATE; + wc->ex.invalidate_rkey = be32_to_cpu(cqe->imm_inval_pkey); + break; + } + wc->slid = be16_to_cpu(cqe->slid); + wc->sl = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0xf; + wc->src_qp = be32_to_cpu(cqe->flags_rqpn) & 0xffffff; + wc->dlid_path_bits = cqe->ml_path; + g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3; + wc->wc_flags |= g ? IB_WC_GRH : 0; + wc->pkey_index = be32_to_cpu(cqe->imm_inval_pkey) & 0xffff; +} + +static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe) +{ + __be32 *p = (__be32 *)cqe; + int i; + + mlx5_ib_warn(dev, "dump error cqe\n"); + for (i = 0; i < sizeof(*cqe) / 16; i++, p += 4) + pr_info("%08x %08x %08x %08x\n", be32_to_cpu(p[0]), + be32_to_cpu(p[1]), be32_to_cpu(p[2]), + be32_to_cpu(p[3])); +} + +static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev, + struct mlx5_err_cqe *cqe, + struct ib_wc *wc) +{ + int dump = 1; + + switch (cqe->syndrome) { + case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR: + wc->status = IB_WC_LOC_LEN_ERR; + break; + case MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR: + wc->status = IB_WC_LOC_QP_OP_ERR; + break; + case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR: + wc->status = IB_WC_LOC_PROT_ERR; + break; + case MLX5_CQE_SYNDROME_WR_FLUSH_ERR: + dump = 0; + wc->status = IB_WC_WR_FLUSH_ERR; + break; + case MLX5_CQE_SYNDROME_MW_BIND_ERR: + wc->status = IB_WC_MW_BIND_ERR; + break; + case MLX5_CQE_SYNDROME_BAD_RESP_ERR: + wc->status = IB_WC_BAD_RESP_ERR; + break; + case MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR: + wc->status = IB_WC_LOC_ACCESS_ERR; + break; + case MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR: + wc->status = IB_WC_REM_INV_REQ_ERR; + break; + case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR: + wc->status = IB_WC_REM_ACCESS_ERR; + break; + case MLX5_CQE_SYNDROME_REMOTE_OP_ERR: + wc->status = IB_WC_REM_OP_ERR; + break; + case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR: + wc->status = IB_WC_RETRY_EXC_ERR; + dump = 0; + break; + case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR: + wc->status = IB_WC_RNR_RETRY_EXC_ERR; + dump = 0; + break; + case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR: + wc->status = IB_WC_REM_ABORT_ERR; + break; + default: + wc->status = IB_WC_GENERAL_ERR; + break; + } + + wc->vendor_err = cqe->vendor_err_synd; + if (dump) + dump_cqe(dev, cqe); +} + +static int is_atomic_response(struct mlx5_ib_qp *qp, uint16_t idx) +{ + /* TBD: waiting decision + */ + return 0; +} + +static void *mlx5_get_atomic_laddr(struct mlx5_ib_qp *qp, uint16_t idx) +{ + struct mlx5_wqe_data_seg *dpseg; + void *addr; + + dpseg = mlx5_get_send_wqe(qp, idx) + sizeof(struct mlx5_wqe_ctrl_seg) + + sizeof(struct mlx5_wqe_raddr_seg) + + sizeof(struct mlx5_wqe_atomic_seg); + addr = (void *)(unsigned long)be64_to_cpu(dpseg->addr); + return addr; +} + +static void handle_atomic(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64, + uint16_t idx) +{ + void *addr; + int byte_count; + int i; + + if (!is_atomic_response(qp, idx)) + return; + + byte_count = be32_to_cpu(cqe64->byte_cnt); + addr = mlx5_get_atomic_laddr(qp, idx); + + if (byte_count == 4) { + *(uint32_t *)addr = be32_to_cpu(*((__be32 *)addr)); + } else { + for (i = 0; i < byte_count; i += 8) { + *(uint64_t *)addr = be64_to_cpu(*((__be64 *)addr)); + addr += 8; + } + } + + return; +} + +static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64, + u16 tail, u16 head) +{ + int idx; + + do { + idx = tail & (qp->sq.wqe_cnt - 1); + handle_atomic(qp, cqe64, idx); + if (idx == head) + break; + + tail = qp->sq.w_list[idx].next; + } while (1); + tail = qp->sq.w_list[idx].next; + qp->sq.last_poll = tail; +} + +static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf) +{ + mlx5_buf_free(&dev->mdev, &buf->buf); +} + +static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe, + struct ib_sig_err *item) +{ + u16 syndrome = be16_to_cpu(cqe->syndrome); + +#define GUARD_ERR (1 << 13) +#define APPTAG_ERR (1 << 12) +#define REFTAG_ERR (1 << 11) + + if (syndrome & GUARD_ERR) { + item->err_type = IB_SIG_BAD_GUARD; + item->expected = be32_to_cpu(cqe->expected_trans_sig) >> 16; + item->actual = be32_to_cpu(cqe->actual_trans_sig) >> 16; + } else + if (syndrome & REFTAG_ERR) { + item->err_type = IB_SIG_BAD_REFTAG; + item->expected = be32_to_cpu(cqe->expected_reftag); + item->actual = be32_to_cpu(cqe->actual_reftag); + } else + if (syndrome & APPTAG_ERR) { + item->err_type = IB_SIG_BAD_APPTAG; + item->expected = be32_to_cpu(cqe->expected_trans_sig) & 0xffff; + item->actual = be32_to_cpu(cqe->actual_trans_sig) & 0xffff; + } else { + pr_err("Got signature completion error with bad syndrome %04x\n", + syndrome); + } + + item->sig_err_offset = be64_to_cpu(cqe->err_offset); + item->key = be32_to_cpu(cqe->mkey); +} + +static int mlx5_poll_one(struct mlx5_ib_cq *cq, + struct mlx5_ib_qp **cur_qp, + struct ib_wc *wc) +{ + struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); + struct mlx5_err_cqe *err_cqe; + struct mlx5_cqe64 *cqe64; + struct mlx5_core_qp *mqp; + struct mlx5_ib_wq *wq; + struct mlx5_sig_err_cqe *sig_err_cqe; + struct mlx5_core_mr *mmr; + struct mlx5_ib_mr *mr; + uint8_t opcode; + uint32_t qpn; + u16 wqe_ctr; + void *cqe; + int idx; + +repoll: + cqe = next_cqe_sw(cq); + if (!cqe) + return -EAGAIN; + + cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; + + ++cq->mcq.cons_index; + + /* Make sure we read CQ entry contents after we've checked the + * ownership bit. + */ + rmb(); + + opcode = cqe64->op_own >> 4; + if (unlikely(opcode == MLX5_CQE_RESIZE_CQ)) { + if (likely(cq->resize_buf)) { + free_cq_buf(dev, &cq->buf); + cq->buf = *cq->resize_buf; + kfree(cq->resize_buf); + cq->resize_buf = NULL; + goto repoll; + } else { + mlx5_ib_warn(dev, "unexpected resize cqe\n"); + } + } + + qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff; + if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) { + /* We do not have to take the QP table lock here, + * because CQs will be locked while QPs are removed + * from the table. + */ + mqp = __mlx5_qp_lookup(&dev->mdev, qpn); + if (unlikely(!mqp)) { + mlx5_ib_warn(dev, "CQE@CQ %06x for unknown QPN %6x\n", + cq->mcq.cqn, qpn); + return -EINVAL; + } + + *cur_qp = to_mibqp(mqp); + } + + wc->qp = &(*cur_qp)->ibqp; + switch (opcode) { + case MLX5_CQE_REQ: + wq = &(*cur_qp)->sq; + wqe_ctr = be16_to_cpu(cqe64->wqe_counter); + idx = wqe_ctr & (wq->wqe_cnt - 1); + handle_good_req(wc, cqe64, wq, idx); + handle_atomics(*cur_qp, cqe64, wq->last_poll, idx); + wc->wr_id = wq->wrid[idx]; + wq->tail = wq->wqe_head[idx] + 1; + wc->status = IB_WC_SUCCESS; + break; + case MLX5_CQE_RESP_WR_IMM: + case MLX5_CQE_RESP_SEND: + case MLX5_CQE_RESP_SEND_IMM: + case MLX5_CQE_RESP_SEND_INV: + handle_responder(wc, cqe64, *cur_qp); + wc->status = IB_WC_SUCCESS; + break; + case MLX5_CQE_RESIZE_CQ: + break; + case MLX5_CQE_REQ_ERR: + case MLX5_CQE_RESP_ERR: + err_cqe = (struct mlx5_err_cqe *)cqe64; + mlx5_handle_error_cqe(dev, err_cqe, wc); + mlx5_ib_dbg(dev, "%s error cqe on cqn 0x%x:\n", + opcode == MLX5_CQE_REQ_ERR ? + "Requestor" : "Responder", cq->mcq.cqn); + mlx5_ib_dbg(dev, "syndrome 0x%x, vendor syndrome 0x%x\n", + err_cqe->syndrome, err_cqe->vendor_err_synd); + if (opcode == MLX5_CQE_REQ_ERR) { + wq = &(*cur_qp)->sq; + wqe_ctr = be16_to_cpu(cqe64->wqe_counter); + idx = wqe_ctr & (wq->wqe_cnt - 1); + wc->wr_id = wq->wrid[idx]; + wq->tail = wq->wqe_head[idx] + 1; + } else { + struct mlx5_ib_srq *srq; + + if ((*cur_qp)->ibqp.srq) { + srq = to_msrq((*cur_qp)->ibqp.srq); + wqe_ctr = be16_to_cpu(cqe64->wqe_counter); + wc->wr_id = srq->wrid[wqe_ctr]; + mlx5_ib_free_srq_wqe(srq, wqe_ctr); + } else { + wq = &(*cur_qp)->rq; + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + ++wq->tail; + } + } + break; + case MLX5_CQE_SIG_ERR: + sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64; + + read_lock(&dev->mdev.priv.mr_table.lock); + mmr = __mlx5_mr_lookup(&dev->mdev, + mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey))); + if (unlikely(!mmr)) { + read_unlock(&dev->mdev.priv.mr_table.lock); + mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n", + cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey)); + return -EINVAL; + } + + mr = to_mibmr(mmr); + get_sig_err_item(sig_err_cqe, &mr->sig->err_item); + mr->sig->sig_err_exists = true; + mr->sig->sigerr_count++; + + mlx5_ib_warn(dev, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n", + cq->mcq.cqn, mr->sig->err_item.key, + mr->sig->err_item.err_type, + mr->sig->err_item.sig_err_offset, + mr->sig->err_item.expected, + mr->sig->err_item.actual); + + read_unlock(&dev->mdev.priv.mr_table.lock); + goto repoll; + } + + return 0; +} + +int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) +{ + struct mlx5_ib_cq *cq = to_mcq(ibcq); + struct mlx5_ib_qp *cur_qp = NULL; + unsigned long flags; + int npolled; + int err = 0; + + spin_lock_irqsave(&cq->lock, flags); + + for (npolled = 0; npolled < num_entries; npolled++) { + err = mlx5_poll_one(cq, &cur_qp, wc + npolled); + if (err) + break; + } + + if (npolled) + mlx5_cq_set_ci(&cq->mcq); + + spin_unlock_irqrestore(&cq->lock, flags); + + if (err == 0 || err == -EAGAIN) + return npolled; + else + return err; +} + +int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) +{ + mlx5_cq_arm(&to_mcq(ibcq)->mcq, + (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? + MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT, + to_mdev(ibcq->device)->mdev.priv.uuari.uars[0].map, + MLX5_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->mdev.priv.cq_uar_lock)); + + return 0; +} + +static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf, + int nent, int cqe_size) +{ + int err; + + err = mlx5_buf_alloc(&dev->mdev, nent * cqe_size, + PAGE_SIZE * 2, &buf->buf); + if (err) + return err; + + buf->cqe_size = cqe_size; + buf->nent = nent; + + return 0; +} + +static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, + struct ib_ucontext *context, struct mlx5_ib_cq *cq, + int entries, struct mlx5_create_cq_mbox_in **cqb, + int *cqe_size, int *index, int *inlen) +{ + struct mlx5_ib_create_cq ucmd; + size_t ucmdlen; + int page_shift; + int npages; + int ncont; + int err; + + ucmdlen = + (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) < + sizeof(ucmd)) ? (sizeof(ucmd) - + sizeof(ucmd.reserved)) : sizeof(ucmd); + + if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) + return -EFAULT; + + if (ucmdlen == sizeof(ucmd) && + ucmd.reserved != 0) + return -EINVAL; + + if (ucmd.cqe_size != 64 && ucmd.cqe_size != 128) + return -EINVAL; + + *cqe_size = ucmd.cqe_size; + + cq->buf.umem = ib_umem_get(context, ucmd.buf_addr, + entries * ucmd.cqe_size, + IB_ACCESS_LOCAL_WRITE, 1); + if (IS_ERR(cq->buf.umem)) { + err = PTR_ERR(cq->buf.umem); + return err; + } + + err = mlx5_ib_db_map_user(to_mucontext(context), ucmd.db_addr, + &cq->db); + if (err) + goto err_umem; + + mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, &npages, &page_shift, + &ncont, NULL); + mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n", + ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont); + + *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * ncont; + *cqb = mlx5_vzalloc(*inlen); + if (!*cqb) { + err = -ENOMEM; + goto err_db; + } + mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, (*cqb)->pas, 0); + (*cqb)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT; + + *index = to_mucontext(context)->uuari.uars[0].index; + + return 0; + +err_db: + mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db); + +err_umem: + ib_umem_release(cq->buf.umem); + return err; +} + +static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_ucontext *context) +{ + mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db); + ib_umem_release(cq->buf.umem); +} + +static void init_cq_buf(struct mlx5_ib_cq *cq, struct mlx5_ib_cq_buf *buf) +{ + int i; + void *cqe; + struct mlx5_cqe64 *cqe64; + + for (i = 0; i < buf->nent; i++) { + cqe = get_cqe_from_buf(buf, i, buf->cqe_size); + cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64; + cqe64->op_own = MLX5_CQE_INVALID << 4; + } +} + +static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, + int entries, int cqe_size, + struct mlx5_create_cq_mbox_in **cqb, + int *index, int *inlen) +{ + int err; + + err = mlx5_db_alloc(&dev->mdev, &cq->db); + if (err) + return err; + + cq->mcq.set_ci_db = cq->db.db; + cq->mcq.arm_db = cq->db.db + 1; + *cq->mcq.set_ci_db = 0; + *cq->mcq.arm_db = 0; + cq->mcq.cqe_sz = cqe_size; + + err = alloc_cq_buf(dev, &cq->buf, entries, cqe_size); + if (err) + goto err_db; + + init_cq_buf(cq, &cq->buf); + + *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * cq->buf.buf.npages; + *cqb = mlx5_vzalloc(*inlen); + if (!*cqb) { + err = -ENOMEM; + goto err_buf; + } + mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas); + + (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT; + *index = dev->mdev.priv.uuari.uars[0].index; + + return 0; + +err_buf: + free_cq_buf(dev, &cq->buf); + +err_db: + mlx5_db_free(&dev->mdev, &cq->db); + return err; +} + +static void destroy_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq) +{ + free_cq_buf(dev, &cq->buf); + mlx5_db_free(&dev->mdev, &cq->db); +} + +struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries, + int vector, struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct mlx5_create_cq_mbox_in *cqb = NULL; + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_ib_cq *cq; + int uninitialized_var(index); + int uninitialized_var(inlen); + int cqe_size; + int irqn; + int eqn; + int err; + + if (entries < 0) + return ERR_PTR(-EINVAL); + + entries = roundup_pow_of_two(entries + 1); + if (entries > dev->mdev.caps.max_cqes) + return ERR_PTR(-EINVAL); + + cq = kzalloc(sizeof(*cq), GFP_KERNEL); + if (!cq) + return ERR_PTR(-ENOMEM); + + cq->ibcq.cqe = entries - 1; + mutex_init(&cq->resize_mutex); + spin_lock_init(&cq->lock); + cq->resize_buf = NULL; + cq->resize_umem = NULL; + + if (context) { + err = create_cq_user(dev, udata, context, cq, entries, + &cqb, &cqe_size, &index, &inlen); + if (err) + goto err_create; + } else { + /* for now choose 64 bytes till we have a proper interface */ + cqe_size = 64; + err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb, + &index, &inlen); + if (err) + goto err_create; + } + + cq->cqe_size = cqe_size; + cqb->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5; + cqb->ctx.log_sz_usr_page = cpu_to_be32((ilog2(entries) << 24) | index); + err = mlx5_vector2eqn(dev, vector, &eqn, &irqn); + if (err) + goto err_cqb; + + cqb->ctx.c_eqn = cpu_to_be16(eqn); + cqb->ctx.db_record_addr = cpu_to_be64(cq->db.dma); + + err = mlx5_core_create_cq(&dev->mdev, &cq->mcq, cqb, inlen); + if (err) + goto err_cqb; + + mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn); + cq->mcq.irqn = irqn; + cq->mcq.comp = mlx5_ib_cq_comp; + cq->mcq.event = mlx5_ib_cq_event; + + if (context) + if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) { + err = -EFAULT; + goto err_cmd; + } + + + mlx5_vfree(cqb); + return &cq->ibcq; + +err_cmd: + mlx5_core_destroy_cq(&dev->mdev, &cq->mcq); + +err_cqb: + mlx5_vfree(cqb); + if (context) + destroy_cq_user(cq, context); + else + destroy_cq_kernel(dev, cq); + +err_create: + kfree(cq); + + return ERR_PTR(err); +} + + +int mlx5_ib_destroy_cq(struct ib_cq *cq) +{ + struct mlx5_ib_dev *dev = to_mdev(cq->device); + struct mlx5_ib_cq *mcq = to_mcq(cq); + struct ib_ucontext *context = NULL; + + if (cq->uobject) + context = cq->uobject->context; + + mlx5_core_destroy_cq(&dev->mdev, &mcq->mcq); + if (context) + destroy_cq_user(mcq, context); + else + destroy_cq_kernel(dev, mcq); + + kfree(mcq); + + return 0; +} + +static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn) +{ + return rsn == (ntohl(cqe64->sop_drop_qpn) & 0xffffff); +} + +void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq) +{ + struct mlx5_cqe64 *cqe64, *dest64; + void *cqe, *dest; + u32 prod_index; + int nfreed = 0; + u8 owner_bit; + + if (!cq) + return; + + /* First we need to find the current producer index, so we + * know where to start cleaning from. It doesn't matter if HW + * adds new entries after this loop -- the QP we're worried + * about is already in RESET, so the new entries won't come + * from our QP and therefore don't need to be checked. + */ + for (prod_index = cq->mcq.cons_index; get_sw_cqe(cq, prod_index); prod_index++) + if (prod_index == cq->mcq.cons_index + cq->ibcq.cqe) + break; + + /* Now sweep backwards through the CQ, removing CQ entries + * that match our QP by copying older entries on top of them. + */ + while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) { + cqe = get_cqe(cq, prod_index & cq->ibcq.cqe); + cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; + if (is_equal_rsn(cqe64, rsn)) { + if (srq && (ntohl(cqe64->srqn) & 0xffffff)) + mlx5_ib_free_srq_wqe(srq, be16_to_cpu(cqe64->wqe_counter)); + ++nfreed; + } else if (nfreed) { + dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe); + dest64 = (cq->mcq.cqe_sz == 64) ? dest : dest + 64; + owner_bit = dest64->op_own & MLX5_CQE_OWNER_MASK; + memcpy(dest, cqe, cq->mcq.cqe_sz); + dest64->op_own = owner_bit | + (dest64->op_own & ~MLX5_CQE_OWNER_MASK); + } + } + + if (nfreed) { + cq->mcq.cons_index += nfreed; + /* Make sure update of buffer contents is done before + * updating consumer index. + */ + wmb(); + mlx5_cq_set_ci(&cq->mcq); + } +} + +void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq) +{ + if (!cq) + return; + + spin_lock_irq(&cq->lock); + __mlx5_ib_cq_clean(cq, qpn, srq); + spin_unlock_irq(&cq->lock); +} + +int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) +{ + struct mlx5_modify_cq_mbox_in *in; + struct mlx5_ib_dev *dev = to_mdev(cq->device); + struct mlx5_ib_cq *mcq = to_mcq(cq); + int err; + u32 fsel; + + if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_CQ_MODER)) + return -ENOSYS; + + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) + return -ENOMEM; + + in->cqn = cpu_to_be32(mcq->mcq.cqn); + fsel = (MLX5_CQ_MODIFY_PERIOD | MLX5_CQ_MODIFY_COUNT); + in->ctx.cq_period = cpu_to_be16(cq_period); + in->ctx.cq_max_count = cpu_to_be16(cq_count); + in->field_select = cpu_to_be32(fsel); + err = mlx5_core_modify_cq(&dev->mdev, &mcq->mcq, in, sizeof(*in)); + kfree(in); + + if (err) + mlx5_ib_warn(dev, "modify cq 0x%x failed\n", mcq->mcq.cqn); + + return err; +} + +static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, + int entries, struct ib_udata *udata, int *npas, + int *page_shift, int *cqe_size) +{ + struct mlx5_ib_resize_cq ucmd; + struct ib_umem *umem; + int err; + int npages; + struct ib_ucontext *context = cq->buf.umem->context; + + err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); + if (err) + return err; + + if (ucmd.reserved0 || ucmd.reserved1) + return -EINVAL; + + umem = ib_umem_get(context, ucmd.buf_addr, entries * ucmd.cqe_size, + IB_ACCESS_LOCAL_WRITE, 1); + if (IS_ERR(umem)) { + err = PTR_ERR(umem); + return err; + } + + mlx5_ib_cont_pages(umem, ucmd.buf_addr, &npages, page_shift, + npas, NULL); + + cq->resize_umem = umem; + *cqe_size = ucmd.cqe_size; + + return 0; +} + +static void un_resize_user(struct mlx5_ib_cq *cq) +{ + ib_umem_release(cq->resize_umem); +} + +static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, + int entries, int cqe_size) +{ + int err; + + cq->resize_buf = kzalloc(sizeof(*cq->resize_buf), GFP_KERNEL); + if (!cq->resize_buf) + return -ENOMEM; + + err = alloc_cq_buf(dev, cq->resize_buf, entries, cqe_size); + if (err) + goto ex; + + init_cq_buf(cq, cq->resize_buf); + + return 0; + +ex: + kfree(cq->resize_buf); + return err; +} + +static void un_resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq) +{ + free_cq_buf(dev, cq->resize_buf); + cq->resize_buf = NULL; +} + +static int copy_resize_cqes(struct mlx5_ib_cq *cq) +{ + struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); + struct mlx5_cqe64 *scqe64; + struct mlx5_cqe64 *dcqe64; + void *start_cqe; + void *scqe; + void *dcqe; + int ssize; + int dsize; + int i; + u8 sw_own; + + ssize = cq->buf.cqe_size; + dsize = cq->resize_buf->cqe_size; + if (ssize != dsize) { + mlx5_ib_warn(dev, "resize from different cqe size is not supported\n"); + return -EINVAL; + } + + i = cq->mcq.cons_index; + scqe = get_sw_cqe(cq, i); + scqe64 = ssize == 64 ? scqe : scqe + 64; + start_cqe = scqe; + if (!scqe) { + mlx5_ib_warn(dev, "expected cqe in sw ownership\n"); + return -EINVAL; + } + + while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) { + dcqe = get_cqe_from_buf(cq->resize_buf, + (i + 1) & (cq->resize_buf->nent), + dsize); + dcqe64 = dsize == 64 ? dcqe : dcqe + 64; + sw_own = sw_ownership_bit(i + 1, cq->resize_buf->nent); + memcpy(dcqe, scqe, dsize); + dcqe64->op_own = (dcqe64->op_own & ~MLX5_CQE_OWNER_MASK) | sw_own; + + ++i; + scqe = get_sw_cqe(cq, i); + scqe64 = ssize == 64 ? scqe : scqe + 64; + if (!scqe) { + mlx5_ib_warn(dev, "expected cqe in sw ownership\n"); + return -EINVAL; + } + + if (scqe == start_cqe) { + pr_warn("resize CQ failed to get resize CQE, CQN 0x%x\n", + cq->mcq.cqn); + return -ENOMEM; + } + } + ++cq->mcq.cons_index; + return 0; +} + +int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(ibcq->device); + struct mlx5_ib_cq *cq = to_mcq(ibcq); + struct mlx5_modify_cq_mbox_in *in; + int err; + int npas; + int page_shift; + int inlen; + int uninitialized_var(cqe_size); + unsigned long flags; + + if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_RESIZE_CQ)) { + pr_info("Firmware does not support resize CQ\n"); + return -ENOSYS; + } + + if (entries < 1) + return -EINVAL; + + entries = roundup_pow_of_two(entries + 1); + if (entries > dev->mdev.caps.max_cqes + 1) + return -EINVAL; + + if (entries == ibcq->cqe + 1) + return 0; + + mutex_lock(&cq->resize_mutex); + if (udata) { + err = resize_user(dev, cq, entries, udata, &npas, &page_shift, + &cqe_size); + } else { + cqe_size = 64; + err = resize_kernel(dev, cq, entries, cqe_size); + if (!err) { + npas = cq->resize_buf->buf.npages; + page_shift = cq->resize_buf->buf.page_shift; + } + } + + if (err) + goto ex; + + inlen = sizeof(*in) + npas * sizeof(in->pas[0]); + in = mlx5_vzalloc(inlen); + if (!in) { + err = -ENOMEM; + goto ex_resize; + } + + if (udata) + mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift, + in->pas, 0); + else + mlx5_fill_page_array(&cq->resize_buf->buf, in->pas); + + in->field_select = cpu_to_be32(MLX5_MODIFY_CQ_MASK_LOG_SIZE | + MLX5_MODIFY_CQ_MASK_PG_OFFSET | + MLX5_MODIFY_CQ_MASK_PG_SIZE); + in->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT; + in->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5; + in->ctx.page_offset = 0; + in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(entries) << 24); + in->hdr.opmod = cpu_to_be16(MLX5_CQ_OPMOD_RESIZE); + in->cqn = cpu_to_be32(cq->mcq.cqn); + + err = mlx5_core_modify_cq(&dev->mdev, &cq->mcq, in, inlen); + if (err) + goto ex_alloc; + + if (udata) { + cq->ibcq.cqe = entries - 1; + ib_umem_release(cq->buf.umem); + cq->buf.umem = cq->resize_umem; + cq->resize_umem = NULL; + } else { + struct mlx5_ib_cq_buf tbuf; + int resized = 0; + + spin_lock_irqsave(&cq->lock, flags); + if (cq->resize_buf) { + err = copy_resize_cqes(cq); + if (!err) { + tbuf = cq->buf; + cq->buf = *cq->resize_buf; + kfree(cq->resize_buf); + cq->resize_buf = NULL; + resized = 1; + } + } + cq->ibcq.cqe = entries - 1; + spin_unlock_irqrestore(&cq->lock, flags); + if (resized) + free_cq_buf(dev, &tbuf); + } + mutex_unlock(&cq->resize_mutex); + + mlx5_vfree(in); + return 0; + +ex_alloc: + mlx5_vfree(in); + +ex_resize: + if (udata) + un_resize_user(cq); + else + un_resize_kernel(dev, cq); +ex: + mutex_unlock(&cq->resize_mutex); + return err; +} + +int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq) +{ + struct mlx5_ib_cq *cq; + + if (!ibcq) + return 128; + + cq = to_mcq(ibcq); + return cq->cqe_size; +} diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c new file mode 100644 index 00000000000..ece028fc47d --- /dev/null +++ b/drivers/infiniband/hw/mlx5/doorbell.c @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kref.h> +#include <linux/slab.h> +#include <rdma/ib_umem.h> + +#include "mlx5_ib.h" + +struct mlx5_ib_user_db_page { + struct list_head list; + struct ib_umem *umem; + unsigned long user_virt; + int refcnt; +}; + +int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt, + struct mlx5_db *db) +{ + struct mlx5_ib_user_db_page *page; + int err = 0; + + mutex_lock(&context->db_page_mutex); + + list_for_each_entry(page, &context->db_page_list, list) + if (page->user_virt == (virt & PAGE_MASK)) + goto found; + + page = kmalloc(sizeof(*page), GFP_KERNEL); + if (!page) { + err = -ENOMEM; + goto out; + } + + page->user_virt = (virt & PAGE_MASK); + page->refcnt = 0; + page->umem = ib_umem_get(&context->ibucontext, virt & PAGE_MASK, + PAGE_SIZE, 0, 0); + if (IS_ERR(page->umem)) { + err = PTR_ERR(page->umem); + kfree(page); + goto out; + } + + list_add(&page->list, &context->db_page_list); + +found: + db->dma = sg_dma_address(page->umem->sg_head.sgl) + (virt & ~PAGE_MASK); + db->u.user_page = page; + ++page->refcnt; + +out: + mutex_unlock(&context->db_page_mutex); + + return err; +} + +void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db) +{ + mutex_lock(&context->db_page_mutex); + + if (!--db->u.user_page->refcnt) { + list_del(&db->u.user_page->list); + ib_umem_release(db->u.user_page->umem); + kfree(db->u.user_page); + } + + mutex_unlock(&context->db_page_mutex); +} diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c new file mode 100644 index 00000000000..5c8938be0e0 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/cmd.h> +#include <rdma/ib_mad.h> +#include <rdma/ib_smi.h> +#include "mlx5_ib.h" + +enum { + MLX5_IB_VENDOR_CLASS1 = 0x9, + MLX5_IB_VENDOR_CLASS2 = 0xa +}; + +int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, + int port, struct ib_wc *in_wc, struct ib_grh *in_grh, + void *in_mad, void *response_mad) +{ + u8 op_modifier = 0; + + /* Key check traps can't be generated unless we have in_wc to + * tell us where to send the trap. + */ + if (ignore_mkey || !in_wc) + op_modifier |= 0x1; + if (ignore_bkey || !in_wc) + op_modifier |= 0x2; + + return mlx5_core_mad_ifc(&dev->mdev, in_mad, response_mad, op_modifier, port); +} + +int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + struct ib_wc *in_wc, struct ib_grh *in_grh, + struct ib_mad *in_mad, struct ib_mad *out_mad) +{ + u16 slid; + int err; + + slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); + + if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; + + if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || + in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { + if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && + in_mad->mad_hdr.method != IB_MGMT_METHOD_SET && + in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS) + return IB_MAD_RESULT_SUCCESS; + + /* Don't process SMInfo queries -- the SMA can't handle them. + */ + if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO) + return IB_MAD_RESULT_SUCCESS; + } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT || + in_mad->mad_hdr.mgmt_class == MLX5_IB_VENDOR_CLASS1 || + in_mad->mad_hdr.mgmt_class == MLX5_IB_VENDOR_CLASS2 || + in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_CONG_MGMT) { + if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && + in_mad->mad_hdr.method != IB_MGMT_METHOD_SET) + return IB_MAD_RESULT_SUCCESS; + } else { + return IB_MAD_RESULT_SUCCESS; + } + + err = mlx5_MAD_IFC(to_mdev(ibdev), + mad_flags & IB_MAD_IGNORE_MKEY, + mad_flags & IB_MAD_IGNORE_BKEY, + port_num, in_wc, in_grh, in_mad, out_mad); + if (err) + return IB_MAD_RESULT_FAILURE; + + /* set return bit in status of directed route responses */ + if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + out_mad->mad_hdr.status |= cpu_to_be16(1 << 15); + + if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) + /* no response for trap repress */ + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; + + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; +} + +int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + u16 packet_error; + + in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); + out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO; + in_mad->attr_mod = cpu_to_be32(port); + + err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad); + + packet_error = be16_to_cpu(out_mad->status); + + dev->mdev.caps.ext_port_cap[port - 1] = (!err && !packet_error) ? + MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO : 0; + +out: + kfree(in_mad); + kfree(out_mad); + return err; +} diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c new file mode 100644 index 00000000000..364d4b6937f --- /dev/null +++ b/drivers/infiniband/hw/mlx5/main.c @@ -0,0 +1,1543 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <asm-generic/kmap_types.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/pci.h> +#include <linux/dma-mapping.h> +#include <linux/slab.h> +#include <linux/io-mapping.h> +#include <linux/sched.h> +#include <rdma/ib_user_verbs.h> +#include <rdma/ib_smi.h> +#include <rdma/ib_umem.h> +#include "user.h" +#include "mlx5_ib.h" + +#define DRIVER_NAME "mlx5_ib" +#define DRIVER_VERSION "2.2-1" +#define DRIVER_RELDATE "Feb 2014" + +MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); +MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION(DRIVER_VERSION); + +static int prof_sel = 2; +module_param_named(prof_sel, prof_sel, int, 0444); +MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2"); + +static char mlx5_version[] = + DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v" + DRIVER_VERSION " (" DRIVER_RELDATE ")\n"; + +static struct mlx5_profile profile[] = { + [0] = { + .mask = 0, + }, + [1] = { + .mask = MLX5_PROF_MASK_QP_SIZE, + .log_max_qp = 12, + }, + [2] = { + .mask = MLX5_PROF_MASK_QP_SIZE | + MLX5_PROF_MASK_MR_CACHE, + .log_max_qp = 17, + .mr_cache[0] = { + .size = 500, + .limit = 250 + }, + .mr_cache[1] = { + .size = 500, + .limit = 250 + }, + .mr_cache[2] = { + .size = 500, + .limit = 250 + }, + .mr_cache[3] = { + .size = 500, + .limit = 250 + }, + .mr_cache[4] = { + .size = 500, + .limit = 250 + }, + .mr_cache[5] = { + .size = 500, + .limit = 250 + }, + .mr_cache[6] = { + .size = 500, + .limit = 250 + }, + .mr_cache[7] = { + .size = 500, + .limit = 250 + }, + .mr_cache[8] = { + .size = 500, + .limit = 250 + }, + .mr_cache[9] = { + .size = 500, + .limit = 250 + }, + .mr_cache[10] = { + .size = 500, + .limit = 250 + }, + .mr_cache[11] = { + .size = 500, + .limit = 250 + }, + .mr_cache[12] = { + .size = 64, + .limit = 32 + }, + .mr_cache[13] = { + .size = 32, + .limit = 16 + }, + .mr_cache[14] = { + .size = 16, + .limit = 8 + }, + .mr_cache[15] = { + .size = 8, + .limit = 4 + }, + }, +}; + +int mlx5_vector2eqn(struct mlx5_ib_dev *dev, int vector, int *eqn, int *irqn) +{ + struct mlx5_eq_table *table = &dev->mdev.priv.eq_table; + struct mlx5_eq *eq, *n; + int err = -ENOENT; + + spin_lock(&table->lock); + list_for_each_entry_safe(eq, n, &dev->eqs_list, list) { + if (eq->index == vector) { + *eqn = eq->eqn; + *irqn = eq->irqn; + err = 0; + break; + } + } + spin_unlock(&table->lock); + + return err; +} + +static int alloc_comp_eqs(struct mlx5_ib_dev *dev) +{ + struct mlx5_eq_table *table = &dev->mdev.priv.eq_table; + char name[MLX5_MAX_EQ_NAME]; + struct mlx5_eq *eq, *n; + int ncomp_vec; + int nent; + int err; + int i; + + INIT_LIST_HEAD(&dev->eqs_list); + ncomp_vec = table->num_comp_vectors; + nent = MLX5_COMP_EQ_SIZE; + for (i = 0; i < ncomp_vec; i++) { + eq = kzalloc(sizeof(*eq), GFP_KERNEL); + if (!eq) { + err = -ENOMEM; + goto clean; + } + + snprintf(name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i); + err = mlx5_create_map_eq(&dev->mdev, eq, + i + MLX5_EQ_VEC_COMP_BASE, nent, 0, + name, &dev->mdev.priv.uuari.uars[0]); + if (err) { + kfree(eq); + goto clean; + } + mlx5_ib_dbg(dev, "allocated completion EQN %d\n", eq->eqn); + eq->index = i; + spin_lock(&table->lock); + list_add_tail(&eq->list, &dev->eqs_list); + spin_unlock(&table->lock); + } + + dev->num_comp_vectors = ncomp_vec; + return 0; + +clean: + spin_lock(&table->lock); + list_for_each_entry_safe(eq, n, &dev->eqs_list, list) { + list_del(&eq->list); + spin_unlock(&table->lock); + if (mlx5_destroy_unmap_eq(&dev->mdev, eq)) + mlx5_ib_warn(dev, "failed to destroy EQ 0x%x\n", eq->eqn); + kfree(eq); + spin_lock(&table->lock); + } + spin_unlock(&table->lock); + return err; +} + +static void free_comp_eqs(struct mlx5_ib_dev *dev) +{ + struct mlx5_eq_table *table = &dev->mdev.priv.eq_table; + struct mlx5_eq *eq, *n; + + spin_lock(&table->lock); + list_for_each_entry_safe(eq, n, &dev->eqs_list, list) { + list_del(&eq->list); + spin_unlock(&table->lock); + if (mlx5_destroy_unmap_eq(&dev->mdev, eq)) + mlx5_ib_warn(dev, "failed to destroy EQ 0x%x\n", eq->eqn); + kfree(eq); + spin_lock(&table->lock); + } + spin_unlock(&table->lock); +} + +static int mlx5_ib_query_device(struct ib_device *ibdev, + struct ib_device_attr *props) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + int max_rq_sg; + int max_sq_sg; + u64 flags; + + in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); + out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; + + err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, 1, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + memset(props, 0, sizeof(*props)); + + props->fw_ver = ((u64)fw_rev_maj(&dev->mdev) << 32) | + (fw_rev_min(&dev->mdev) << 16) | + fw_rev_sub(&dev->mdev); + props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT | + IB_DEVICE_PORT_ACTIVE_EVENT | + IB_DEVICE_SYS_IMAGE_GUID | + IB_DEVICE_RC_RNR_NAK_GEN; + flags = dev->mdev.caps.flags; + if (flags & MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR) + props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; + if (flags & MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR) + props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR; + if (flags & MLX5_DEV_CAP_FLAG_APM) + props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; + props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY; + if (flags & MLX5_DEV_CAP_FLAG_XRC) + props->device_cap_flags |= IB_DEVICE_XRC; + props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; + if (flags & MLX5_DEV_CAP_FLAG_SIG_HAND_OVER) { + props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER; + /* At this stage no support for signature handover */ + props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 | + IB_PROT_T10DIF_TYPE_2 | + IB_PROT_T10DIF_TYPE_3; + props->sig_guard_cap = IB_GUARD_T10DIF_CRC | + IB_GUARD_T10DIF_CSUM; + } + if (flags & MLX5_DEV_CAP_FLAG_BLOCK_MCAST) + props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; + + props->vendor_id = be32_to_cpup((__be32 *)(out_mad->data + 36)) & + 0xffffff; + props->vendor_part_id = be16_to_cpup((__be16 *)(out_mad->data + 30)); + props->hw_ver = be32_to_cpup((__be32 *)(out_mad->data + 32)); + memcpy(&props->sys_image_guid, out_mad->data + 4, 8); + + props->max_mr_size = ~0ull; + props->page_size_cap = dev->mdev.caps.min_page_sz; + props->max_qp = 1 << dev->mdev.caps.log_max_qp; + props->max_qp_wr = dev->mdev.caps.max_wqes; + max_rq_sg = dev->mdev.caps.max_rq_desc_sz / sizeof(struct mlx5_wqe_data_seg); + max_sq_sg = (dev->mdev.caps.max_sq_desc_sz - sizeof(struct mlx5_wqe_ctrl_seg)) / + sizeof(struct mlx5_wqe_data_seg); + props->max_sge = min(max_rq_sg, max_sq_sg); + props->max_cq = 1 << dev->mdev.caps.log_max_cq; + props->max_cqe = dev->mdev.caps.max_cqes - 1; + props->max_mr = 1 << dev->mdev.caps.log_max_mkey; + props->max_pd = 1 << dev->mdev.caps.log_max_pd; + props->max_qp_rd_atom = dev->mdev.caps.max_ra_req_qp; + props->max_qp_init_rd_atom = dev->mdev.caps.max_ra_res_qp; + props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; + props->max_srq = 1 << dev->mdev.caps.log_max_srq; + props->max_srq_wr = dev->mdev.caps.max_srq_wqes - 1; + props->max_srq_sge = max_rq_sg - 1; + props->max_fast_reg_page_list_len = (unsigned int)-1; + props->local_ca_ack_delay = dev->mdev.caps.local_ca_ack_delay; + props->atomic_cap = IB_ATOMIC_NONE; + props->masked_atomic_cap = IB_ATOMIC_NONE; + props->max_pkeys = be16_to_cpup((__be16 *)(out_mad->data + 28)); + props->max_mcast_grp = 1 << dev->mdev.caps.log_max_mcg; + props->max_mcast_qp_attach = dev->mdev.caps.max_qp_mcg; + props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * + props->max_mcast_grp; + props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */ + +out: + kfree(in_mad); + kfree(out_mad); + + return err; +} + +int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int ext_active_speed; + int err = -ENOMEM; + + if (port < 1 || port > dev->mdev.caps.num_ports) { + mlx5_ib_warn(dev, "invalid port number %d\n", port); + return -EINVAL; + } + + in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); + out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + memset(props, 0, sizeof(*props)); + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; + in_mad->attr_mod = cpu_to_be32(port); + + err = mlx5_MAD_IFC(dev, 1, 1, port, NULL, NULL, in_mad, out_mad); + if (err) { + mlx5_ib_warn(dev, "err %d\n", err); + goto out; + } + + + props->lid = be16_to_cpup((__be16 *)(out_mad->data + 16)); + props->lmc = out_mad->data[34] & 0x7; + props->sm_lid = be16_to_cpup((__be16 *)(out_mad->data + 18)); + props->sm_sl = out_mad->data[36] & 0xf; + props->state = out_mad->data[32] & 0xf; + props->phys_state = out_mad->data[33] >> 4; + props->port_cap_flags = be32_to_cpup((__be32 *)(out_mad->data + 20)); + props->gid_tbl_len = out_mad->data[50]; + props->max_msg_sz = 1 << to_mdev(ibdev)->mdev.caps.log_max_msg; + props->pkey_tbl_len = to_mdev(ibdev)->mdev.caps.port[port - 1].pkey_table_len; + props->bad_pkey_cntr = be16_to_cpup((__be16 *)(out_mad->data + 46)); + props->qkey_viol_cntr = be16_to_cpup((__be16 *)(out_mad->data + 48)); + props->active_width = out_mad->data[31] & 0xf; + props->active_speed = out_mad->data[35] >> 4; + props->max_mtu = out_mad->data[41] & 0xf; + props->active_mtu = out_mad->data[36] >> 4; + props->subnet_timeout = out_mad->data[51] & 0x1f; + props->max_vl_num = out_mad->data[37] >> 4; + props->init_type_reply = out_mad->data[41] >> 4; + + /* Check if extended speeds (EDR/FDR/...) are supported */ + if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) { + ext_active_speed = out_mad->data[62] >> 4; + + switch (ext_active_speed) { + case 1: + props->active_speed = 16; /* FDR */ + break; + case 2: + props->active_speed = 32; /* EDR */ + break; + } + } + + /* If reported active speed is QDR, check if is FDR-10 */ + if (props->active_speed == 4) { + if (dev->mdev.caps.ext_port_cap[port - 1] & + MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) { + init_query_mad(in_mad); + in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO; + in_mad->attr_mod = cpu_to_be32(port); + + err = mlx5_MAD_IFC(dev, 1, 1, port, + NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + /* Checking LinkSpeedActive for FDR-10 */ + if (out_mad->data[15] & 0x1) + props->active_speed = 8; + } + } + +out: + kfree(in_mad); + kfree(out_mad); + + return err; +} + +static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + + in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); + out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; + in_mad->attr_mod = cpu_to_be32(port); + + err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + memcpy(gid->raw, out_mad->data + 8, 8); + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_GUID_INFO; + in_mad->attr_mod = cpu_to_be32(index / 8); + + err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8); + +out: + kfree(in_mad); + kfree(out_mad); + return err; +} + +static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + + in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); + out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE; + in_mad->attr_mod = cpu_to_be32(index / 32); + + err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + *pkey = be16_to_cpu(((__be16 *)out_mad->data)[index % 32]); + +out: + kfree(in_mad); + kfree(out_mad); + return err; +} + +struct mlx5_reg_node_desc { + u8 desc[64]; +}; + +static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask, + struct ib_device_modify *props) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_reg_node_desc in; + struct mlx5_reg_node_desc out; + int err; + + if (mask & ~IB_DEVICE_MODIFY_NODE_DESC) + return -EOPNOTSUPP; + + if (!(mask & IB_DEVICE_MODIFY_NODE_DESC)) + return 0; + + /* + * If possible, pass node desc to FW, so it can generate + * a 144 trap. If cmd fails, just ignore. + */ + memcpy(&in, props->node_desc, 64); + err = mlx5_core_access_reg(&dev->mdev, &in, sizeof(in), &out, + sizeof(out), MLX5_REG_NODE_DESC, 0, 1); + if (err) + return err; + + memcpy(ibdev->node_desc, props->node_desc, 64); + + return err; +} + +static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask, + struct ib_port_modify *props) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct ib_port_attr attr; + u32 tmp; + int err; + + mutex_lock(&dev->cap_mask_mutex); + + err = mlx5_ib_query_port(ibdev, port, &attr); + if (err) + goto out; + + tmp = (attr.port_cap_flags | props->set_port_cap_mask) & + ~props->clr_port_cap_mask; + + err = mlx5_set_port_caps(&dev->mdev, port, tmp); + +out: + mutex_unlock(&dev->cap_mask_mutex); + return err; +} + +static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_ib_alloc_ucontext_req_v2 req; + struct mlx5_ib_alloc_ucontext_resp resp; + struct mlx5_ib_ucontext *context; + struct mlx5_uuar_info *uuari; + struct mlx5_uar *uars; + int gross_uuars; + int num_uars; + int ver; + int uuarn; + int err; + int i; + int reqlen; + + if (!dev->ib_active) + return ERR_PTR(-EAGAIN); + + memset(&req, 0, sizeof(req)); + reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr); + if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req)) + ver = 0; + else if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req_v2)) + ver = 2; + else + return ERR_PTR(-EINVAL); + + err = ib_copy_from_udata(&req, udata, reqlen); + if (err) + return ERR_PTR(err); + + if (req.flags || req.reserved) + return ERR_PTR(-EINVAL); + + if (req.total_num_uuars > MLX5_MAX_UUARS) + return ERR_PTR(-ENOMEM); + + if (req.total_num_uuars == 0) + return ERR_PTR(-EINVAL); + + req.total_num_uuars = ALIGN(req.total_num_uuars, + MLX5_NON_FP_BF_REGS_PER_PAGE); + if (req.num_low_latency_uuars > req.total_num_uuars - 1) + return ERR_PTR(-EINVAL); + + num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE; + gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE; + resp.qp_tab_size = 1 << dev->mdev.caps.log_max_qp; + resp.bf_reg_size = dev->mdev.caps.bf_reg_size; + resp.cache_line_size = L1_CACHE_BYTES; + resp.max_sq_desc_sz = dev->mdev.caps.max_sq_desc_sz; + resp.max_rq_desc_sz = dev->mdev.caps.max_rq_desc_sz; + resp.max_send_wqebb = dev->mdev.caps.max_wqes; + resp.max_recv_wr = dev->mdev.caps.max_wqes; + resp.max_srq_recv_wr = dev->mdev.caps.max_srq_wqes; + + context = kzalloc(sizeof(*context), GFP_KERNEL); + if (!context) + return ERR_PTR(-ENOMEM); + + uuari = &context->uuari; + mutex_init(&uuari->lock); + uars = kcalloc(num_uars, sizeof(*uars), GFP_KERNEL); + if (!uars) { + err = -ENOMEM; + goto out_ctx; + } + + uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars), + sizeof(*uuari->bitmap), + GFP_KERNEL); + if (!uuari->bitmap) { + err = -ENOMEM; + goto out_uar_ctx; + } + /* + * clear all fast path uuars + */ + for (i = 0; i < gross_uuars; i++) { + uuarn = i & 3; + if (uuarn == 2 || uuarn == 3) + set_bit(i, uuari->bitmap); + } + + uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL); + if (!uuari->count) { + err = -ENOMEM; + goto out_bitmap; + } + + for (i = 0; i < num_uars; i++) { + err = mlx5_cmd_alloc_uar(&dev->mdev, &uars[i].index); + if (err) + goto out_count; + } + + INIT_LIST_HEAD(&context->db_page_list); + mutex_init(&context->db_page_mutex); + + resp.tot_uuars = req.total_num_uuars; + resp.num_ports = dev->mdev.caps.num_ports; + err = ib_copy_to_udata(udata, &resp, + sizeof(resp) - sizeof(resp.reserved)); + if (err) + goto out_uars; + + uuari->ver = ver; + uuari->num_low_latency_uuars = req.num_low_latency_uuars; + uuari->uars = uars; + uuari->num_uars = num_uars; + return &context->ibucontext; + +out_uars: + for (i--; i >= 0; i--) + mlx5_cmd_free_uar(&dev->mdev, uars[i].index); +out_count: + kfree(uuari->count); + +out_bitmap: + kfree(uuari->bitmap); + +out_uar_ctx: + kfree(uars); + +out_ctx: + kfree(context); + return ERR_PTR(err); +} + +static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) +{ + struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); + struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); + struct mlx5_uuar_info *uuari = &context->uuari; + int i; + + for (i = 0; i < uuari->num_uars; i++) { + if (mlx5_cmd_free_uar(&dev->mdev, uuari->uars[i].index)) + mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index); + } + + kfree(uuari->count); + kfree(uuari->bitmap); + kfree(uuari->uars); + kfree(context); + + return 0; +} + +static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index) +{ + return (pci_resource_start(dev->mdev.pdev, 0) >> PAGE_SHIFT) + index; +} + +static int get_command(unsigned long offset) +{ + return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK; +} + +static int get_arg(unsigned long offset) +{ + return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1); +} + +static int get_index(unsigned long offset) +{ + return get_arg(offset); +} + +static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) +{ + struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); + struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); + struct mlx5_uuar_info *uuari = &context->uuari; + unsigned long command; + unsigned long idx; + phys_addr_t pfn; + + command = get_command(vma->vm_pgoff); + switch (command) { + case MLX5_IB_MMAP_REGULAR_PAGE: + if (vma->vm_end - vma->vm_start != PAGE_SIZE) + return -EINVAL; + + idx = get_index(vma->vm_pgoff); + pfn = uar_index2pfn(dev, uuari->uars[idx].index); + mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx, + (unsigned long long)pfn); + + if (idx >= uuari->num_uars) + return -EINVAL; + + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + if (io_remap_pfn_range(vma, vma->vm_start, pfn, + PAGE_SIZE, vma->vm_page_prot)) + return -EAGAIN; + + mlx5_ib_dbg(dev, "mapped WC at 0x%lx, PA 0x%llx\n", + vma->vm_start, + (unsigned long long)pfn << PAGE_SHIFT); + break; + + case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES: + return -ENOSYS; + + default: + return -EINVAL; + } + + return 0; +} + +static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn) +{ + struct mlx5_create_mkey_mbox_in *in; + struct mlx5_mkey_seg *seg; + struct mlx5_core_mr mr; + int err; + + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) + return -ENOMEM; + + seg = &in->seg; + seg->flags = MLX5_PERM_LOCAL_READ | MLX5_ACCESS_MODE_PA; + seg->flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64); + seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + seg->start_addr = 0; + + err = mlx5_core_create_mkey(&dev->mdev, &mr, in, sizeof(*in), + NULL, NULL, NULL); + if (err) { + mlx5_ib_warn(dev, "failed to create mkey, %d\n", err); + goto err_in; + } + + kfree(in); + *key = mr.key; + + return 0; + +err_in: + kfree(in); + + return err; +} + +static void free_pa_mkey(struct mlx5_ib_dev *dev, u32 key) +{ + struct mlx5_core_mr mr; + int err; + + memset(&mr, 0, sizeof(mr)); + mr.key = key; + err = mlx5_core_destroy_mkey(&dev->mdev, &mr); + if (err) + mlx5_ib_warn(dev, "failed to destroy mkey 0x%x\n", key); +} + +static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct mlx5_ib_alloc_pd_resp resp; + struct mlx5_ib_pd *pd; + int err; + + pd = kmalloc(sizeof(*pd), GFP_KERNEL); + if (!pd) + return ERR_PTR(-ENOMEM); + + err = mlx5_core_alloc_pd(&to_mdev(ibdev)->mdev, &pd->pdn); + if (err) { + kfree(pd); + return ERR_PTR(err); + } + + if (context) { + resp.pdn = pd->pdn; + if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { + mlx5_core_dealloc_pd(&to_mdev(ibdev)->mdev, pd->pdn); + kfree(pd); + return ERR_PTR(-EFAULT); + } + } else { + err = alloc_pa_mkey(to_mdev(ibdev), &pd->pa_lkey, pd->pdn); + if (err) { + mlx5_core_dealloc_pd(&to_mdev(ibdev)->mdev, pd->pdn); + kfree(pd); + return ERR_PTR(err); + } + } + + return &pd->ibpd; +} + +static int mlx5_ib_dealloc_pd(struct ib_pd *pd) +{ + struct mlx5_ib_dev *mdev = to_mdev(pd->device); + struct mlx5_ib_pd *mpd = to_mpd(pd); + + if (!pd->uobject) + free_pa_mkey(mdev, mpd->pa_lkey); + + mlx5_core_dealloc_pd(&mdev->mdev, mpd->pdn); + kfree(mpd); + + return 0; +} + +static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + int err; + + err = mlx5_core_attach_mcg(&dev->mdev, gid, ibqp->qp_num); + if (err) + mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n", + ibqp->qp_num, gid->raw); + + return err; +} + +static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + int err; + + err = mlx5_core_detach_mcg(&dev->mdev, gid, ibqp->qp_num); + if (err) + mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n", + ibqp->qp_num, gid->raw); + + return err; +} + +static int init_node_data(struct mlx5_ib_dev *dev) +{ + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int err = -ENOMEM; + + in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); + out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_NODE_DESC; + + err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + memcpy(dev->ib_dev.node_desc, out_mad->data, 64); + + in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; + + err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad); + if (err) + goto out; + + dev->mdev.rev_id = be32_to_cpup((__be32 *)(out_mad->data + 32)); + memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8); + +out: + kfree(in_mad); + kfree(out_mad); + return err; +} + +static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct mlx5_ib_dev *dev = + container_of(device, struct mlx5_ib_dev, ib_dev.dev); + + return sprintf(buf, "%d\n", dev->mdev.priv.fw_pages); +} + +static ssize_t show_reg_pages(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct mlx5_ib_dev *dev = + container_of(device, struct mlx5_ib_dev, ib_dev.dev); + + return sprintf(buf, "%d\n", dev->mdev.priv.reg_pages); +} + +static ssize_t show_hca(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct mlx5_ib_dev *dev = + container_of(device, struct mlx5_ib_dev, ib_dev.dev); + return sprintf(buf, "MT%d\n", dev->mdev.pdev->device); +} + +static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct mlx5_ib_dev *dev = + container_of(device, struct mlx5_ib_dev, ib_dev.dev); + return sprintf(buf, "%d.%d.%d\n", fw_rev_maj(&dev->mdev), + fw_rev_min(&dev->mdev), fw_rev_sub(&dev->mdev)); +} + +static ssize_t show_rev(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct mlx5_ib_dev *dev = + container_of(device, struct mlx5_ib_dev, ib_dev.dev); + return sprintf(buf, "%x\n", dev->mdev.rev_id); +} + +static ssize_t show_board(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct mlx5_ib_dev *dev = + container_of(device, struct mlx5_ib_dev, ib_dev.dev); + return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN, + dev->mdev.board_id); +} + +static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); +static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); +static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); +static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); +static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL); +static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL); + +static struct device_attribute *mlx5_class_attributes[] = { + &dev_attr_hw_rev, + &dev_attr_fw_ver, + &dev_attr_hca_type, + &dev_attr_board_id, + &dev_attr_fw_pages, + &dev_attr_reg_pages, +}; + +static void mlx5_ib_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, + void *data) +{ + struct mlx5_ib_dev *ibdev = container_of(dev, struct mlx5_ib_dev, mdev); + struct ib_event ibev; + u8 port = 0; + + switch (event) { + case MLX5_DEV_EVENT_SYS_ERROR: + ibdev->ib_active = false; + ibev.event = IB_EVENT_DEVICE_FATAL; + break; + + case MLX5_DEV_EVENT_PORT_UP: + ibev.event = IB_EVENT_PORT_ACTIVE; + port = *(u8 *)data; + break; + + case MLX5_DEV_EVENT_PORT_DOWN: + ibev.event = IB_EVENT_PORT_ERR; + port = *(u8 *)data; + break; + + case MLX5_DEV_EVENT_PORT_INITIALIZED: + /* not used by ULPs */ + return; + + case MLX5_DEV_EVENT_LID_CHANGE: + ibev.event = IB_EVENT_LID_CHANGE; + port = *(u8 *)data; + break; + + case MLX5_DEV_EVENT_PKEY_CHANGE: + ibev.event = IB_EVENT_PKEY_CHANGE; + port = *(u8 *)data; + break; + + case MLX5_DEV_EVENT_GUID_CHANGE: + ibev.event = IB_EVENT_GID_CHANGE; + port = *(u8 *)data; + break; + + case MLX5_DEV_EVENT_CLIENT_REREG: + ibev.event = IB_EVENT_CLIENT_REREGISTER; + port = *(u8 *)data; + break; + } + + ibev.device = &ibdev->ib_dev; + ibev.element.port_num = port; + + if (port < 1 || port > ibdev->num_ports) { + mlx5_ib_warn(ibdev, "warning: event on port %d\n", port); + return; + } + + if (ibdev->ib_active) + ib_dispatch_event(&ibev); +} + +static void get_ext_port_caps(struct mlx5_ib_dev *dev) +{ + int port; + + for (port = 1; port <= dev->mdev.caps.num_ports; port++) + mlx5_query_ext_port_caps(dev, port); +} + +static int get_port_caps(struct mlx5_ib_dev *dev) +{ + struct ib_device_attr *dprops = NULL; + struct ib_port_attr *pprops = NULL; + int err = 0; + int port; + + pprops = kmalloc(sizeof(*pprops), GFP_KERNEL); + if (!pprops) + goto out; + + dprops = kmalloc(sizeof(*dprops), GFP_KERNEL); + if (!dprops) + goto out; + + err = mlx5_ib_query_device(&dev->ib_dev, dprops); + if (err) { + mlx5_ib_warn(dev, "query_device failed %d\n", err); + goto out; + } + + for (port = 1; port <= dev->mdev.caps.num_ports; port++) { + err = mlx5_ib_query_port(&dev->ib_dev, port, pprops); + if (err) { + mlx5_ib_warn(dev, "query_port %d failed %d\n", port, err); + break; + } + dev->mdev.caps.port[port - 1].pkey_table_len = dprops->max_pkeys; + dev->mdev.caps.port[port - 1].gid_table_len = pprops->gid_tbl_len; + mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n", + dprops->max_pkeys, pprops->gid_tbl_len); + } + +out: + kfree(pprops); + kfree(dprops); + + return err; +} + +static void destroy_umrc_res(struct mlx5_ib_dev *dev) +{ + int err; + + err = mlx5_mr_cache_cleanup(dev); + if (err) + mlx5_ib_warn(dev, "mr cache cleanup failed\n"); + + mlx5_ib_destroy_qp(dev->umrc.qp); + ib_destroy_cq(dev->umrc.cq); + ib_dereg_mr(dev->umrc.mr); + ib_dealloc_pd(dev->umrc.pd); +} + +enum { + MAX_UMR_WR = 128, +}; + +static int create_umr_res(struct mlx5_ib_dev *dev) +{ + struct ib_qp_init_attr *init_attr = NULL; + struct ib_qp_attr *attr = NULL; + struct ib_pd *pd; + struct ib_cq *cq; + struct ib_qp *qp; + struct ib_mr *mr; + int ret; + + attr = kzalloc(sizeof(*attr), GFP_KERNEL); + init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL); + if (!attr || !init_attr) { + ret = -ENOMEM; + goto error_0; + } + + pd = ib_alloc_pd(&dev->ib_dev); + if (IS_ERR(pd)) { + mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n"); + ret = PTR_ERR(pd); + goto error_0; + } + + mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(mr)) { + mlx5_ib_dbg(dev, "Couldn't create DMA MR for sync UMR QP\n"); + ret = PTR_ERR(mr); + goto error_1; + } + + cq = ib_create_cq(&dev->ib_dev, mlx5_umr_cq_handler, NULL, NULL, 128, + 0); + if (IS_ERR(cq)) { + mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n"); + ret = PTR_ERR(cq); + goto error_2; + } + ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); + + init_attr->send_cq = cq; + init_attr->recv_cq = cq; + init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; + init_attr->cap.max_send_wr = MAX_UMR_WR; + init_attr->cap.max_send_sge = 1; + init_attr->qp_type = MLX5_IB_QPT_REG_UMR; + init_attr->port_num = 1; + qp = mlx5_ib_create_qp(pd, init_attr, NULL); + if (IS_ERR(qp)) { + mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n"); + ret = PTR_ERR(qp); + goto error_3; + } + qp->device = &dev->ib_dev; + qp->real_qp = qp; + qp->uobject = NULL; + qp->qp_type = MLX5_IB_QPT_REG_UMR; + + attr->qp_state = IB_QPS_INIT; + attr->port_num = 1; + ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX | + IB_QP_PORT, NULL); + if (ret) { + mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); + goto error_4; + } + + memset(attr, 0, sizeof(*attr)); + attr->qp_state = IB_QPS_RTR; + attr->path_mtu = IB_MTU_256; + + ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL); + if (ret) { + mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n"); + goto error_4; + } + + memset(attr, 0, sizeof(*attr)); + attr->qp_state = IB_QPS_RTS; + ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL); + if (ret) { + mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n"); + goto error_4; + } + + dev->umrc.qp = qp; + dev->umrc.cq = cq; + dev->umrc.mr = mr; + dev->umrc.pd = pd; + + sema_init(&dev->umrc.sem, MAX_UMR_WR); + ret = mlx5_mr_cache_init(dev); + if (ret) { + mlx5_ib_warn(dev, "mr cache init failed %d\n", ret); + goto error_4; + } + + kfree(attr); + kfree(init_attr); + + return 0; + +error_4: + mlx5_ib_destroy_qp(qp); + +error_3: + ib_destroy_cq(cq); + +error_2: + ib_dereg_mr(mr); + +error_1: + ib_dealloc_pd(pd); + +error_0: + kfree(attr); + kfree(init_attr); + return ret; +} + +static int create_dev_resources(struct mlx5_ib_resources *devr) +{ + struct ib_srq_init_attr attr; + struct mlx5_ib_dev *dev; + int ret = 0; + + dev = container_of(devr, struct mlx5_ib_dev, devr); + + devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL); + if (IS_ERR(devr->p0)) { + ret = PTR_ERR(devr->p0); + goto error0; + } + devr->p0->device = &dev->ib_dev; + devr->p0->uobject = NULL; + atomic_set(&devr->p0->usecnt, 0); + + devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, 1, 0, NULL, NULL); + if (IS_ERR(devr->c0)) { + ret = PTR_ERR(devr->c0); + goto error1; + } + devr->c0->device = &dev->ib_dev; + devr->c0->uobject = NULL; + devr->c0->comp_handler = NULL; + devr->c0->event_handler = NULL; + devr->c0->cq_context = NULL; + atomic_set(&devr->c0->usecnt, 0); + + devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL); + if (IS_ERR(devr->x0)) { + ret = PTR_ERR(devr->x0); + goto error2; + } + devr->x0->device = &dev->ib_dev; + devr->x0->inode = NULL; + atomic_set(&devr->x0->usecnt, 0); + mutex_init(&devr->x0->tgt_qp_mutex); + INIT_LIST_HEAD(&devr->x0->tgt_qp_list); + + devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL); + if (IS_ERR(devr->x1)) { + ret = PTR_ERR(devr->x1); + goto error3; + } + devr->x1->device = &dev->ib_dev; + devr->x1->inode = NULL; + atomic_set(&devr->x1->usecnt, 0); + mutex_init(&devr->x1->tgt_qp_mutex); + INIT_LIST_HEAD(&devr->x1->tgt_qp_list); + + memset(&attr, 0, sizeof(attr)); + attr.attr.max_sge = 1; + attr.attr.max_wr = 1; + attr.srq_type = IB_SRQT_XRC; + attr.ext.xrc.cq = devr->c0; + attr.ext.xrc.xrcd = devr->x0; + + devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL); + if (IS_ERR(devr->s0)) { + ret = PTR_ERR(devr->s0); + goto error4; + } + devr->s0->device = &dev->ib_dev; + devr->s0->pd = devr->p0; + devr->s0->uobject = NULL; + devr->s0->event_handler = NULL; + devr->s0->srq_context = NULL; + devr->s0->srq_type = IB_SRQT_XRC; + devr->s0->ext.xrc.xrcd = devr->x0; + devr->s0->ext.xrc.cq = devr->c0; + atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt); + atomic_inc(&devr->s0->ext.xrc.cq->usecnt); + atomic_inc(&devr->p0->usecnt); + atomic_set(&devr->s0->usecnt, 0); + + return 0; + +error4: + mlx5_ib_dealloc_xrcd(devr->x1); +error3: + mlx5_ib_dealloc_xrcd(devr->x0); +error2: + mlx5_ib_destroy_cq(devr->c0); +error1: + mlx5_ib_dealloc_pd(devr->p0); +error0: + return ret; +} + +static void destroy_dev_resources(struct mlx5_ib_resources *devr) +{ + mlx5_ib_destroy_srq(devr->s0); + mlx5_ib_dealloc_xrcd(devr->x0); + mlx5_ib_dealloc_xrcd(devr->x1); + mlx5_ib_destroy_cq(devr->c0); + mlx5_ib_dealloc_pd(devr->p0); +} + +static int init_one(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct mlx5_core_dev *mdev; + struct mlx5_ib_dev *dev; + int err; + int i; + + printk_once(KERN_INFO "%s", mlx5_version); + + dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev)); + if (!dev) + return -ENOMEM; + + mdev = &dev->mdev; + mdev->event = mlx5_ib_event; + if (prof_sel >= ARRAY_SIZE(profile)) { + pr_warn("selected pofile out of range, selceting default\n"); + prof_sel = 0; + } + mdev->profile = &profile[prof_sel]; + err = mlx5_dev_init(mdev, pdev); + if (err) + goto err_free; + + err = get_port_caps(dev); + if (err) + goto err_cleanup; + + get_ext_port_caps(dev); + + err = alloc_comp_eqs(dev); + if (err) + goto err_cleanup; + + MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock); + + strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX); + dev->ib_dev.owner = THIS_MODULE; + dev->ib_dev.node_type = RDMA_NODE_IB_CA; + dev->ib_dev.local_dma_lkey = mdev->caps.reserved_lkey; + dev->num_ports = mdev->caps.num_ports; + dev->ib_dev.phys_port_cnt = dev->num_ports; + dev->ib_dev.num_comp_vectors = dev->num_comp_vectors; + dev->ib_dev.dma_device = &mdev->pdev->dev; + + dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION; + dev->ib_dev.uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | + (1ull << IB_USER_VERBS_CMD_OPEN_QP); + + dev->ib_dev.query_device = mlx5_ib_query_device; + dev->ib_dev.query_port = mlx5_ib_query_port; + dev->ib_dev.query_gid = mlx5_ib_query_gid; + dev->ib_dev.query_pkey = mlx5_ib_query_pkey; + dev->ib_dev.modify_device = mlx5_ib_modify_device; + dev->ib_dev.modify_port = mlx5_ib_modify_port; + dev->ib_dev.alloc_ucontext = mlx5_ib_alloc_ucontext; + dev->ib_dev.dealloc_ucontext = mlx5_ib_dealloc_ucontext; + dev->ib_dev.mmap = mlx5_ib_mmap; + dev->ib_dev.alloc_pd = mlx5_ib_alloc_pd; + dev->ib_dev.dealloc_pd = mlx5_ib_dealloc_pd; + dev->ib_dev.create_ah = mlx5_ib_create_ah; + dev->ib_dev.query_ah = mlx5_ib_query_ah; + dev->ib_dev.destroy_ah = mlx5_ib_destroy_ah; + dev->ib_dev.create_srq = mlx5_ib_create_srq; + dev->ib_dev.modify_srq = mlx5_ib_modify_srq; + dev->ib_dev.query_srq = mlx5_ib_query_srq; + dev->ib_dev.destroy_srq = mlx5_ib_destroy_srq; + dev->ib_dev.post_srq_recv = mlx5_ib_post_srq_recv; + dev->ib_dev.create_qp = mlx5_ib_create_qp; + dev->ib_dev.modify_qp = mlx5_ib_modify_qp; + dev->ib_dev.query_qp = mlx5_ib_query_qp; + dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp; + dev->ib_dev.post_send = mlx5_ib_post_send; + dev->ib_dev.post_recv = mlx5_ib_post_recv; + dev->ib_dev.create_cq = mlx5_ib_create_cq; + dev->ib_dev.modify_cq = mlx5_ib_modify_cq; + dev->ib_dev.resize_cq = mlx5_ib_resize_cq; + dev->ib_dev.destroy_cq = mlx5_ib_destroy_cq; + dev->ib_dev.poll_cq = mlx5_ib_poll_cq; + dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq; + dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr; + dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr; + dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr; + dev->ib_dev.destroy_mr = mlx5_ib_destroy_mr; + dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach; + dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach; + dev->ib_dev.process_mad = mlx5_ib_process_mad; + dev->ib_dev.create_mr = mlx5_ib_create_mr; + dev->ib_dev.alloc_fast_reg_mr = mlx5_ib_alloc_fast_reg_mr; + dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list; + dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list; + dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; + + if (mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC) { + dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd; + dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd; + dev->ib_dev.uverbs_cmd_mask |= + (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | + (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); + } + + err = init_node_data(dev); + if (err) + goto err_eqs; + + mutex_init(&dev->cap_mask_mutex); + spin_lock_init(&dev->mr_lock); + + err = create_dev_resources(&dev->devr); + if (err) + goto err_eqs; + + err = ib_register_device(&dev->ib_dev, NULL); + if (err) + goto err_rsrc; + + err = create_umr_res(dev); + if (err) + goto err_dev; + + for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) { + err = device_create_file(&dev->ib_dev.dev, + mlx5_class_attributes[i]); + if (err) + goto err_umrc; + } + + dev->ib_active = true; + + return 0; + +err_umrc: + destroy_umrc_res(dev); + +err_dev: + ib_unregister_device(&dev->ib_dev); + +err_rsrc: + destroy_dev_resources(&dev->devr); + +err_eqs: + free_comp_eqs(dev); + +err_cleanup: + mlx5_dev_cleanup(mdev); + +err_free: + ib_dealloc_device((struct ib_device *)dev); + + return err; +} + +static void remove_one(struct pci_dev *pdev) +{ + struct mlx5_ib_dev *dev = mlx5_pci2ibdev(pdev); + + destroy_umrc_res(dev); + ib_unregister_device(&dev->ib_dev); + destroy_dev_resources(&dev->devr); + free_comp_eqs(dev); + mlx5_dev_cleanup(&dev->mdev); + ib_dealloc_device(&dev->ib_dev); +} + +static DEFINE_PCI_DEVICE_TABLE(mlx5_ib_pci_table) = { + { PCI_VDEVICE(MELLANOX, 4113) }, /* MT4113 Connect-IB */ + { 0, } +}; + +MODULE_DEVICE_TABLE(pci, mlx5_ib_pci_table); + +static struct pci_driver mlx5_ib_driver = { + .name = DRIVER_NAME, + .id_table = mlx5_ib_pci_table, + .probe = init_one, + .remove = remove_one +}; + +static int __init mlx5_ib_init(void) +{ + return pci_register_driver(&mlx5_ib_driver); +} + +static void __exit mlx5_ib_cleanup(void) +{ + pci_unregister_driver(&mlx5_ib_driver); +} + +module_init(mlx5_ib_init); +module_exit(mlx5_ib_cleanup); diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c new file mode 100644 index 00000000000..8499aec94db --- /dev/null +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/module.h> +#include <rdma/ib_umem.h> +#include "mlx5_ib.h" + +/* @umem: umem object to scan + * @addr: ib virtual address requested by the user + * @count: number of PAGE_SIZE pages covered by umem + * @shift: page shift for the compound pages found in the region + * @ncont: number of compund pages + * @order: log2 of the number of compound pages + */ +void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, + int *ncont, int *order) +{ + unsigned long tmp; + unsigned long m; + int i, k; + u64 base = 0; + int p = 0; + int skip; + int mask; + u64 len; + u64 pfn; + struct scatterlist *sg; + int entry; + + addr = addr >> PAGE_SHIFT; + tmp = (unsigned long)addr; + m = find_first_bit(&tmp, sizeof(tmp)); + skip = 1 << m; + mask = skip - 1; + i = 0; + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + len = sg_dma_len(sg) >> PAGE_SHIFT; + pfn = sg_dma_address(sg) >> PAGE_SHIFT; + for (k = 0; k < len; k++) { + if (!(i & mask)) { + tmp = (unsigned long)pfn; + m = min(m, find_first_bit(&tmp, sizeof(tmp))); + skip = 1 << m; + mask = skip - 1; + base = pfn; + p = 0; + } else { + if (base + p != pfn) { + tmp = (unsigned long)p; + m = find_first_bit(&tmp, sizeof(tmp)); + skip = 1 << m; + mask = skip - 1; + base = pfn; + p = 0; + } + } + p++; + i++; + } + } + + if (i) { + m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m); + + if (order) + *order = ilog2(roundup_pow_of_two(i) >> m); + + *ncont = DIV_ROUND_UP(i, (1 << m)); + } else { + m = 0; + + if (order) + *order = 0; + + *ncont = 0; + } + *shift = PAGE_SHIFT + m; + *count = i; +} + +void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, + int page_shift, __be64 *pas, int umr) +{ + int shift = page_shift - PAGE_SHIFT; + int mask = (1 << shift) - 1; + int i, k; + u64 cur = 0; + u64 base; + int len; + struct scatterlist *sg; + int entry; + + i = 0; + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + len = sg_dma_len(sg) >> PAGE_SHIFT; + base = sg_dma_address(sg); + for (k = 0; k < len; k++) { + if (!(i & mask)) { + cur = base + (k << PAGE_SHIFT); + if (umr) + cur |= 3; + + pas[i >> shift] = cpu_to_be64(cur); + mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n", + i >> shift, be64_to_cpu(pas[i >> shift])); + } else + mlx5_ib_dbg(dev, "=====> 0x%llx\n", + base + (k << PAGE_SHIFT)); + i++; + } + } +} + +int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset) +{ + u64 page_size; + u64 page_mask; + u64 off_size; + u64 off_mask; + u64 buf_off; + + page_size = 1 << page_shift; + page_mask = page_size - 1; + buf_off = addr & page_mask; + off_size = page_size >> 6; + off_mask = off_size - 1; + + if (buf_off & off_mask) + return -EINVAL; + + *offset = buf_off >> ilog2(off_size); + return 0; +} diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h new file mode 100644 index 00000000000..f2ccf1a5a29 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -0,0 +1,574 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX5_IB_H +#define MLX5_IB_H + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <rdma/ib_verbs.h> +#include <rdma/ib_smi.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/cq.h> +#include <linux/mlx5/qp.h> +#include <linux/mlx5/srq.h> +#include <linux/types.h> + +#define mlx5_ib_dbg(dev, format, arg...) \ +pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ + __LINE__, current->pid, ##arg) + +#define mlx5_ib_err(dev, format, arg...) \ +pr_err("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ + __LINE__, current->pid, ##arg) + +#define mlx5_ib_warn(dev, format, arg...) \ +pr_warn("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ + __LINE__, current->pid, ##arg) + +enum { + MLX5_IB_MMAP_CMD_SHIFT = 8, + MLX5_IB_MMAP_CMD_MASK = 0xff, +}; + +enum mlx5_ib_mmap_cmd { + MLX5_IB_MMAP_REGULAR_PAGE = 0, + MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1, /* always last */ +}; + +enum { + MLX5_RES_SCAT_DATA32_CQE = 0x1, + MLX5_RES_SCAT_DATA64_CQE = 0x2, + MLX5_REQ_SCAT_DATA32_CQE = 0x11, + MLX5_REQ_SCAT_DATA64_CQE = 0x22, +}; + +enum mlx5_ib_latency_class { + MLX5_IB_LATENCY_CLASS_LOW, + MLX5_IB_LATENCY_CLASS_MEDIUM, + MLX5_IB_LATENCY_CLASS_HIGH, + MLX5_IB_LATENCY_CLASS_FAST_PATH +}; + +enum mlx5_ib_mad_ifc_flags { + MLX5_MAD_IFC_IGNORE_MKEY = 1, + MLX5_MAD_IFC_IGNORE_BKEY = 2, + MLX5_MAD_IFC_NET_VIEW = 4, +}; + +struct mlx5_ib_ucontext { + struct ib_ucontext ibucontext; + struct list_head db_page_list; + + /* protect doorbell record alloc/free + */ + struct mutex db_page_mutex; + struct mlx5_uuar_info uuari; +}; + +static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext) +{ + return container_of(ibucontext, struct mlx5_ib_ucontext, ibucontext); +} + +struct mlx5_ib_pd { + struct ib_pd ibpd; + u32 pdn; + u32 pa_lkey; +}; + +/* Use macros here so that don't have to duplicate + * enum ib_send_flags and enum ib_qp_type for low-level driver + */ + +#define MLX5_IB_SEND_UMR_UNREG IB_SEND_RESERVED_START +#define MLX5_IB_QPT_REG_UMR IB_QPT_RESERVED1 +#define MLX5_IB_WR_UMR IB_WR_RESERVED1 + +struct wr_list { + u16 opcode; + u16 next; +}; + +struct mlx5_ib_wq { + u64 *wrid; + u32 *wr_data; + struct wr_list *w_list; + unsigned *wqe_head; + u16 unsig_count; + + /* serialize post to the work queue + */ + spinlock_t lock; + int wqe_cnt; + int max_post; + int max_gs; + int offset; + int wqe_shift; + unsigned head; + unsigned tail; + u16 cur_post; + u16 last_poll; + void *qend; +}; + +enum { + MLX5_QP_USER, + MLX5_QP_KERNEL, + MLX5_QP_EMPTY +}; + +struct mlx5_ib_qp { + struct ib_qp ibqp; + struct mlx5_core_qp mqp; + struct mlx5_buf buf; + + struct mlx5_db db; + struct mlx5_ib_wq rq; + + u32 doorbell_qpn; + u8 sq_signal_bits; + u8 fm_cache; + int sq_max_wqes_per_wr; + int sq_spare_wqes; + struct mlx5_ib_wq sq; + + struct ib_umem *umem; + int buf_size; + + /* serialize qp state modifications + */ + struct mutex mutex; + u16 xrcdn; + u32 flags; + u8 port; + u8 alt_port; + u8 atomic_rd_en; + u8 resp_depth; + u8 state; + int mlx_type; + int wq_sig; + int scat_cqe; + int max_inline_data; + struct mlx5_bf *bf; + int has_rq; + + /* only for user space QPs. For kernel + * we have it from the bf object + */ + int uuarn; + + int create_type; + u32 pa_lkey; + + /* Store signature errors */ + bool signature_en; +}; + +struct mlx5_ib_cq_buf { + struct mlx5_buf buf; + struct ib_umem *umem; + int cqe_size; + int nent; +}; + +enum mlx5_ib_qp_flags { + MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 0, + MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 1, +}; + +struct mlx5_shared_mr_info { + int mr_id; + struct ib_umem *umem; +}; + +struct mlx5_ib_cq { + struct ib_cq ibcq; + struct mlx5_core_cq mcq; + struct mlx5_ib_cq_buf buf; + struct mlx5_db db; + + /* serialize access to the CQ + */ + spinlock_t lock; + + /* protect resize cq + */ + struct mutex resize_mutex; + struct mlx5_ib_cq_buf *resize_buf; + struct ib_umem *resize_umem; + int cqe_size; +}; + +struct mlx5_ib_srq { + struct ib_srq ibsrq; + struct mlx5_core_srq msrq; + struct mlx5_buf buf; + struct mlx5_db db; + u64 *wrid; + /* protect SRQ hanlding + */ + spinlock_t lock; + int head; + int tail; + u16 wqe_ctr; + struct ib_umem *umem; + /* serialize arming a SRQ + */ + struct mutex mutex; + int wq_sig; +}; + +struct mlx5_ib_xrcd { + struct ib_xrcd ibxrcd; + u32 xrcdn; +}; + +struct mlx5_ib_mr { + struct ib_mr ibmr; + struct mlx5_core_mr mmr; + struct ib_umem *umem; + struct mlx5_shared_mr_info *smr_info; + struct list_head list; + int order; + int umred; + __be64 *pas; + dma_addr_t dma; + int npages; + struct mlx5_ib_dev *dev; + struct mlx5_create_mkey_mbox_out out; + struct mlx5_core_sig_ctx *sig; +}; + +struct mlx5_ib_fast_reg_page_list { + struct ib_fast_reg_page_list ibfrpl; + __be64 *mapped_page_list; + dma_addr_t map; +}; + +struct mlx5_ib_umr_context { + enum ib_wc_status status; + struct completion done; +}; + +static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context) +{ + context->status = -1; + init_completion(&context->done); +} + +struct umr_common { + struct ib_pd *pd; + struct ib_cq *cq; + struct ib_qp *qp; + struct ib_mr *mr; + /* control access to UMR QP + */ + struct semaphore sem; +}; + +enum { + MLX5_FMR_INVALID, + MLX5_FMR_VALID, + MLX5_FMR_BUSY, +}; + +struct mlx5_ib_fmr { + struct ib_fmr ibfmr; + struct mlx5_core_mr mr; + int access_flags; + int state; + /* protect fmr state + */ + spinlock_t lock; + u64 wrid; + struct ib_send_wr wr[2]; + u8 page_shift; + struct ib_fast_reg_page_list page_list; +}; + +struct mlx5_cache_ent { + struct list_head head; + /* sync access to the cahce entry + */ + spinlock_t lock; + + + struct dentry *dir; + char name[4]; + u32 order; + u32 size; + u32 cur; + u32 miss; + u32 limit; + + struct dentry *fsize; + struct dentry *fcur; + struct dentry *fmiss; + struct dentry *flimit; + + struct mlx5_ib_dev *dev; + struct work_struct work; + struct delayed_work dwork; + int pending; +}; + +struct mlx5_mr_cache { + struct workqueue_struct *wq; + struct mlx5_cache_ent ent[MAX_MR_CACHE_ENTRIES]; + int stopped; + struct dentry *root; + unsigned long last_add; +}; + +struct mlx5_ib_resources { + struct ib_cq *c0; + struct ib_xrcd *x0; + struct ib_xrcd *x1; + struct ib_pd *p0; + struct ib_srq *s0; +}; + +struct mlx5_ib_dev { + struct ib_device ib_dev; + struct mlx5_core_dev mdev; + MLX5_DECLARE_DOORBELL_LOCK(uar_lock); + struct list_head eqs_list; + int num_ports; + int num_comp_vectors; + /* serialize update of capability mask + */ + struct mutex cap_mask_mutex; + bool ib_active; + struct umr_common umrc; + /* sync used page count stats + */ + spinlock_t mr_lock; + struct mlx5_ib_resources devr; + struct mlx5_mr_cache cache; + struct timer_list delay_timer; + int fill_delay; +}; + +static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) +{ + return container_of(mcq, struct mlx5_ib_cq, mcq); +} + +static inline struct mlx5_ib_xrcd *to_mxrcd(struct ib_xrcd *ibxrcd) +{ + return container_of(ibxrcd, struct mlx5_ib_xrcd, ibxrcd); +} + +static inline struct mlx5_ib_dev *to_mdev(struct ib_device *ibdev) +{ + return container_of(ibdev, struct mlx5_ib_dev, ib_dev); +} + +static inline struct mlx5_ib_fmr *to_mfmr(struct ib_fmr *ibfmr) +{ + return container_of(ibfmr, struct mlx5_ib_fmr, ibfmr); +} + +static inline struct mlx5_ib_cq *to_mcq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct mlx5_ib_cq, ibcq); +} + +static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp) +{ + return container_of(mqp, struct mlx5_ib_qp, mqp); +} + +static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mr *mmr) +{ + return container_of(mmr, struct mlx5_ib_mr, mmr); +} + +static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct mlx5_ib_pd, ibpd); +} + +static inline struct mlx5_ib_srq *to_msrq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct mlx5_ib_srq, ibsrq); +} + +static inline struct mlx5_ib_qp *to_mqp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct mlx5_ib_qp, ibqp); +} + +static inline struct mlx5_ib_srq *to_mibsrq(struct mlx5_core_srq *msrq) +{ + return container_of(msrq, struct mlx5_ib_srq, msrq); +} + +static inline struct mlx5_ib_mr *to_mmr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct mlx5_ib_mr, ibmr); +} + +static inline struct mlx5_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl) +{ + return container_of(ibfrpl, struct mlx5_ib_fast_reg_page_list, ibfrpl); +} + +struct mlx5_ib_ah { + struct ib_ah ibah; + struct mlx5_av av; +}; + +static inline struct mlx5_ib_ah *to_mah(struct ib_ah *ibah) +{ + return container_of(ibah, struct mlx5_ib_ah, ibah); +} + +static inline struct mlx5_ib_dev *mlx5_core2ibdev(struct mlx5_core_dev *dev) +{ + return container_of(dev, struct mlx5_ib_dev, mdev); +} + +static inline struct mlx5_ib_dev *mlx5_pci2ibdev(struct pci_dev *pdev) +{ + return mlx5_core2ibdev(pci2mlx5_core_dev(pdev)); +} + +int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt, + struct mlx5_db *db); +void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db); +void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); +void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq); +void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index); +int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, + int port, struct ib_wc *in_wc, struct ib_grh *in_grh, + void *in_mad, void *response_mad); +struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr, + struct mlx5_ib_ah *ah); +struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); +int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr); +int mlx5_ib_destroy_ah(struct ib_ah *ah); +struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata); +int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); +int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr); +int mlx5_ib_destroy_srq(struct ib_srq *srq); +int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); +struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata); +int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); +int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr); +int mlx5_ib_destroy_qp(struct ib_qp *qp); +int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr); +int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); +void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n); +struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries, + int vector, struct ib_ucontext *context, + struct ib_udata *udata); +int mlx5_ib_destroy_cq(struct ib_cq *cq); +int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); +int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); +int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); +int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata); +struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc); +struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int access_flags, + struct ib_udata *udata); +int mlx5_ib_dereg_mr(struct ib_mr *ibmr); +int mlx5_ib_destroy_mr(struct ib_mr *ibmr); +struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd, + struct ib_mr_init_attr *mr_init_attr); +struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd, + int max_page_list_len); +struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, + int page_list_len); +void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list); +struct ib_fmr *mlx5_ib_fmr_alloc(struct ib_pd *pd, int acc, + struct ib_fmr_attr *fmr_attr); +int mlx5_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, + int npages, u64 iova); +int mlx5_ib_unmap_fmr(struct list_head *fmr_list); +int mlx5_ib_fmr_dealloc(struct ib_fmr *ibfmr); +int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + struct ib_wc *in_wc, struct ib_grh *in_grh, + struct ib_mad *in_mad, struct ib_mad *out_mad); +struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata); +int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd); +int mlx5_vector2eqn(struct mlx5_ib_dev *dev, int vector, int *eqn, int *irqn); +int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset); +int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port); +int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props); +int mlx5_ib_init_fmr(struct mlx5_ib_dev *dev); +void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev); +void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, + int *ncont, int *order); +void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, + int page_shift, __be64 *pas, int umr); +void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num); +int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq); +int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); +int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); +int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift); +void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context); +int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, + struct ib_mr_status *mr_status); + +static inline void init_query_mad(struct ib_smp *mad) +{ + mad->base_version = 1; + mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + mad->class_version = 1; + mad->method = IB_MGMT_METHOD_GET; +} + +static inline u8 convert_access(int acc) +{ + return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) | + (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) | + (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) | + (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) | + MLX5_PERM_LOCAL_READ; +} + +#endif /* MLX5_IB_H */ diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c new file mode 100644 index 00000000000..afa873bd028 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -0,0 +1,1250 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + + +#include <linux/kref.h> +#include <linux/random.h> +#include <linux/debugfs.h> +#include <linux/export.h> +#include <linux/delay.h> +#include <rdma/ib_umem.h> +#include "mlx5_ib.h" + +enum { + MAX_PENDING_REG_MR = 8, +}; + +enum { + MLX5_UMR_ALIGN = 2048 +}; + +static __be64 *mr_align(__be64 *ptr, int align) +{ + unsigned long mask = align - 1; + + return (__be64 *)(((unsigned long)ptr + mask) & ~mask); +} + +static int order2idx(struct mlx5_ib_dev *dev, int order) +{ + struct mlx5_mr_cache *cache = &dev->cache; + + if (order < cache->ent[0].order) + return 0; + else + return order - cache->ent[0].order; +} + +static void reg_mr_callback(int status, void *context) +{ + struct mlx5_ib_mr *mr = context; + struct mlx5_ib_dev *dev = mr->dev; + struct mlx5_mr_cache *cache = &dev->cache; + int c = order2idx(dev, mr->order); + struct mlx5_cache_ent *ent = &cache->ent[c]; + u8 key; + unsigned long flags; + struct mlx5_mr_table *table = &dev->mdev.priv.mr_table; + int err; + + spin_lock_irqsave(&ent->lock, flags); + ent->pending--; + spin_unlock_irqrestore(&ent->lock, flags); + if (status) { + mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status); + kfree(mr); + dev->fill_delay = 1; + mod_timer(&dev->delay_timer, jiffies + HZ); + return; + } + + if (mr->out.hdr.status) { + mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n", + mr->out.hdr.status, + be32_to_cpu(mr->out.hdr.syndrome)); + kfree(mr); + dev->fill_delay = 1; + mod_timer(&dev->delay_timer, jiffies + HZ); + return; + } + + spin_lock_irqsave(&dev->mdev.priv.mkey_lock, flags); + key = dev->mdev.priv.mkey_key++; + spin_unlock_irqrestore(&dev->mdev.priv.mkey_lock, flags); + mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key; + + cache->last_add = jiffies; + + spin_lock_irqsave(&ent->lock, flags); + list_add_tail(&mr->list, &ent->head); + ent->cur++; + ent->size++; + spin_unlock_irqrestore(&ent->lock, flags); + + write_lock_irqsave(&table->lock, flags); + err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmr.key), + &mr->mmr); + if (err) + pr_err("Error inserting to mr tree. 0x%x\n", -err); + write_unlock_irqrestore(&table->lock, flags); +} + +static int add_keys(struct mlx5_ib_dev *dev, int c, int num) +{ + struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_cache_ent *ent = &cache->ent[c]; + struct mlx5_create_mkey_mbox_in *in; + struct mlx5_ib_mr *mr; + int npages = 1 << ent->order; + int err = 0; + int i; + + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) + return -ENOMEM; + + for (i = 0; i < num; i++) { + if (ent->pending >= MAX_PENDING_REG_MR) { + err = -EAGAIN; + break; + } + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) { + err = -ENOMEM; + break; + } + mr->order = ent->order; + mr->umred = 1; + mr->dev = dev; + in->seg.status = 1 << 6; + in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2); + in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN; + in->seg.log2_page_size = 12; + + spin_lock_irq(&ent->lock); + ent->pending++; + spin_unlock_irq(&ent->lock); + err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, + sizeof(*in), reg_mr_callback, + mr, &mr->out); + if (err) { + mlx5_ib_warn(dev, "create mkey failed %d\n", err); + kfree(mr); + break; + } + } + + kfree(in); + return err; +} + +static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) +{ + struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_cache_ent *ent = &cache->ent[c]; + struct mlx5_ib_mr *mr; + int err; + int i; + + for (i = 0; i < num; i++) { + spin_lock_irq(&ent->lock); + if (list_empty(&ent->head)) { + spin_unlock_irq(&ent->lock); + return; + } + mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); + list_del(&mr->list); + ent->cur--; + ent->size--; + spin_unlock_irq(&ent->lock); + err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); + if (err) + mlx5_ib_warn(dev, "failed destroy mkey\n"); + else + kfree(mr); + } +} + +static ssize_t size_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_cache_ent *ent = filp->private_data; + struct mlx5_ib_dev *dev = ent->dev; + char lbuf[20]; + u32 var; + int err; + int c; + + if (copy_from_user(lbuf, buf, sizeof(lbuf))) + return -EFAULT; + + c = order2idx(dev, ent->order); + lbuf[sizeof(lbuf) - 1] = 0; + + if (sscanf(lbuf, "%u", &var) != 1) + return -EINVAL; + + if (var < ent->limit) + return -EINVAL; + + if (var > ent->size) { + do { + err = add_keys(dev, c, var - ent->size); + if (err && err != -EAGAIN) + return err; + + usleep_range(3000, 5000); + } while (err); + } else if (var < ent->size) { + remove_keys(dev, c, ent->size - var); + } + + return count; +} + +static ssize_t size_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct mlx5_cache_ent *ent = filp->private_data; + char lbuf[20]; + int err; + + if (*pos) + return 0; + + err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size); + if (err < 0) + return err; + + if (copy_to_user(buf, lbuf, err)) + return -EFAULT; + + *pos += err; + + return err; +} + +static const struct file_operations size_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = size_write, + .read = size_read, +}; + +static ssize_t limit_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct mlx5_cache_ent *ent = filp->private_data; + struct mlx5_ib_dev *dev = ent->dev; + char lbuf[20]; + u32 var; + int err; + int c; + + if (copy_from_user(lbuf, buf, sizeof(lbuf))) + return -EFAULT; + + c = order2idx(dev, ent->order); + lbuf[sizeof(lbuf) - 1] = 0; + + if (sscanf(lbuf, "%u", &var) != 1) + return -EINVAL; + + if (var > ent->size) + return -EINVAL; + + ent->limit = var; + + if (ent->cur < ent->limit) { + err = add_keys(dev, c, 2 * ent->limit - ent->cur); + if (err) + return err; + } + + return count; +} + +static ssize_t limit_read(struct file *filp, char __user *buf, size_t count, + loff_t *pos) +{ + struct mlx5_cache_ent *ent = filp->private_data; + char lbuf[20]; + int err; + + if (*pos) + return 0; + + err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit); + if (err < 0) + return err; + + if (copy_to_user(buf, lbuf, err)) + return -EFAULT; + + *pos += err; + + return err; +} + +static const struct file_operations limit_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = limit_write, + .read = limit_read, +}; + +static int someone_adding(struct mlx5_mr_cache *cache) +{ + int i; + + for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { + if (cache->ent[i].cur < cache->ent[i].limit) + return 1; + } + + return 0; +} + +static void __cache_work_func(struct mlx5_cache_ent *ent) +{ + struct mlx5_ib_dev *dev = ent->dev; + struct mlx5_mr_cache *cache = &dev->cache; + int i = order2idx(dev, ent->order); + int err; + + if (cache->stopped) + return; + + ent = &dev->cache.ent[i]; + if (ent->cur < 2 * ent->limit && !dev->fill_delay) { + err = add_keys(dev, i, 1); + if (ent->cur < 2 * ent->limit) { + if (err == -EAGAIN) { + mlx5_ib_dbg(dev, "returned eagain, order %d\n", + i + 2); + queue_delayed_work(cache->wq, &ent->dwork, + msecs_to_jiffies(3)); + } else if (err) { + mlx5_ib_warn(dev, "command failed order %d, err %d\n", + i + 2, err); + queue_delayed_work(cache->wq, &ent->dwork, + msecs_to_jiffies(1000)); + } else { + queue_work(cache->wq, &ent->work); + } + } + } else if (ent->cur > 2 * ent->limit) { + if (!someone_adding(cache) && + time_after(jiffies, cache->last_add + 300 * HZ)) { + remove_keys(dev, i, 1); + if (ent->cur > ent->limit) + queue_work(cache->wq, &ent->work); + } else { + queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ); + } + } +} + +static void delayed_cache_work_func(struct work_struct *work) +{ + struct mlx5_cache_ent *ent; + + ent = container_of(work, struct mlx5_cache_ent, dwork.work); + __cache_work_func(ent); +} + +static void cache_work_func(struct work_struct *work) +{ + struct mlx5_cache_ent *ent; + + ent = container_of(work, struct mlx5_cache_ent, work); + __cache_work_func(ent); +} + +static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) +{ + struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_ib_mr *mr = NULL; + struct mlx5_cache_ent *ent; + int c; + int i; + + c = order2idx(dev, order); + if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { + mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c); + return NULL; + } + + for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) { + ent = &cache->ent[i]; + + mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i); + + spin_lock_irq(&ent->lock); + if (!list_empty(&ent->head)) { + mr = list_first_entry(&ent->head, struct mlx5_ib_mr, + list); + list_del(&mr->list); + ent->cur--; + spin_unlock_irq(&ent->lock); + if (ent->cur < ent->limit) + queue_work(cache->wq, &ent->work); + break; + } + spin_unlock_irq(&ent->lock); + + queue_work(cache->wq, &ent->work); + + if (mr) + break; + } + + if (!mr) + cache->ent[c].miss++; + + return mr; +} + +static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) +{ + struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_cache_ent *ent; + int shrink = 0; + int c; + + c = order2idx(dev, mr->order); + if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { + mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c); + return; + } + ent = &cache->ent[c]; + spin_lock_irq(&ent->lock); + list_add_tail(&mr->list, &ent->head); + ent->cur++; + if (ent->cur > 2 * ent->limit) + shrink = 1; + spin_unlock_irq(&ent->lock); + + if (shrink) + queue_work(cache->wq, &ent->work); +} + +static void clean_keys(struct mlx5_ib_dev *dev, int c) +{ + struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_cache_ent *ent = &cache->ent[c]; + struct mlx5_ib_mr *mr; + int err; + + cancel_delayed_work(&ent->dwork); + while (1) { + spin_lock_irq(&ent->lock); + if (list_empty(&ent->head)) { + spin_unlock_irq(&ent->lock); + return; + } + mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); + list_del(&mr->list); + ent->cur--; + ent->size--; + spin_unlock_irq(&ent->lock); + err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); + if (err) + mlx5_ib_warn(dev, "failed destroy mkey\n"); + else + kfree(mr); + } +} + +static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) +{ + struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_cache_ent *ent; + int i; + + if (!mlx5_debugfs_root) + return 0; + + cache->root = debugfs_create_dir("mr_cache", dev->mdev.priv.dbg_root); + if (!cache->root) + return -ENOMEM; + + for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { + ent = &cache->ent[i]; + sprintf(ent->name, "%d", ent->order); + ent->dir = debugfs_create_dir(ent->name, cache->root); + if (!ent->dir) + return -ENOMEM; + + ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent, + &size_fops); + if (!ent->fsize) + return -ENOMEM; + + ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent, + &limit_fops); + if (!ent->flimit) + return -ENOMEM; + + ent->fcur = debugfs_create_u32("cur", 0400, ent->dir, + &ent->cur); + if (!ent->fcur) + return -ENOMEM; + + ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir, + &ent->miss); + if (!ent->fmiss) + return -ENOMEM; + } + + return 0; +} + +static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) +{ + if (!mlx5_debugfs_root) + return; + + debugfs_remove_recursive(dev->cache.root); +} + +static void delay_time_func(unsigned long ctx) +{ + struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx; + + dev->fill_delay = 0; +} + +int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) +{ + struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_cache_ent *ent; + int limit; + int err; + int i; + + cache->wq = create_singlethread_workqueue("mkey_cache"); + if (!cache->wq) { + mlx5_ib_warn(dev, "failed to create work queue\n"); + return -ENOMEM; + } + + setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev); + for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { + INIT_LIST_HEAD(&cache->ent[i].head); + spin_lock_init(&cache->ent[i].lock); + + ent = &cache->ent[i]; + INIT_LIST_HEAD(&ent->head); + spin_lock_init(&ent->lock); + ent->order = i + 2; + ent->dev = dev; + + if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE) + limit = dev->mdev.profile->mr_cache[i].limit; + else + limit = 0; + + INIT_WORK(&ent->work, cache_work_func); + INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); + ent->limit = limit; + queue_work(cache->wq, &ent->work); + } + + err = mlx5_mr_cache_debugfs_init(dev); + if (err) + mlx5_ib_warn(dev, "cache debugfs failure\n"); + + return 0; +} + +int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) +{ + int i; + + dev->cache.stopped = 1; + flush_workqueue(dev->cache.wq); + + mlx5_mr_cache_debugfs_cleanup(dev); + + for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) + clean_keys(dev, i); + + destroy_workqueue(dev->cache.wq); + del_timer_sync(&dev->delay_timer); + + return 0; +} + +struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_core_dev *mdev = &dev->mdev; + struct mlx5_create_mkey_mbox_in *in; + struct mlx5_mkey_seg *seg; + struct mlx5_ib_mr *mr; + int err; + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_free; + } + + seg = &in->seg; + seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA; + seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64); + seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + seg->start_addr = 0; + + err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL, + NULL); + if (err) + goto err_in; + + kfree(in); + mr->ibmr.lkey = mr->mmr.key; + mr->ibmr.rkey = mr->mmr.key; + mr->umem = NULL; + + return &mr->ibmr; + +err_in: + kfree(in); + +err_free: + kfree(mr); + + return ERR_PTR(err); +} + +static int get_octo_len(u64 addr, u64 len, int page_size) +{ + u64 offset; + int npages; + + offset = addr & (page_size - 1); + npages = ALIGN(len + offset, page_size) >> ilog2(page_size); + return (npages + 1) / 2; +} + +static int use_umr(int order) +{ + return order <= 17; +} + +static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, + struct ib_sge *sg, u64 dma, int n, u32 key, + int page_shift, u64 virt_addr, u64 len, + int access_flags) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct ib_mr *mr = dev->umrc.mr; + + sg->addr = dma; + sg->length = ALIGN(sizeof(u64) * n, 64); + sg->lkey = mr->lkey; + + wr->next = NULL; + wr->send_flags = 0; + wr->sg_list = sg; + if (n) + wr->num_sge = 1; + else + wr->num_sge = 0; + + wr->opcode = MLX5_IB_WR_UMR; + wr->wr.fast_reg.page_list_len = n; + wr->wr.fast_reg.page_shift = page_shift; + wr->wr.fast_reg.rkey = key; + wr->wr.fast_reg.iova_start = virt_addr; + wr->wr.fast_reg.length = len; + wr->wr.fast_reg.access_flags = access_flags; + wr->wr.fast_reg.page_list = (struct ib_fast_reg_page_list *)pd; +} + +static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev, + struct ib_send_wr *wr, u32 key) +{ + wr->send_flags = MLX5_IB_SEND_UMR_UNREG; + wr->opcode = MLX5_IB_WR_UMR; + wr->wr.fast_reg.rkey = key; +} + +void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context) +{ + struct mlx5_ib_umr_context *context; + struct ib_wc wc; + int err; + + while (1) { + err = ib_poll_cq(cq, 1, &wc); + if (err < 0) { + pr_warn("poll cq error %d\n", err); + return; + } + if (err == 0) + break; + + context = (struct mlx5_ib_umr_context *) (unsigned long) wc.wr_id; + context->status = wc.status; + complete(&context->done); + } + ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); +} + +static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, + u64 virt_addr, u64 len, int npages, + int page_shift, int order, int access_flags) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct device *ddev = dev->ib_dev.dma_device; + struct umr_common *umrc = &dev->umrc; + struct mlx5_ib_umr_context umr_context; + struct ib_send_wr wr, *bad; + struct mlx5_ib_mr *mr; + struct ib_sge sg; + int size = sizeof(u64) * npages; + int err = 0; + int i; + + for (i = 0; i < 1; i++) { + mr = alloc_cached_mr(dev, order); + if (mr) + break; + + err = add_keys(dev, order2idx(dev, order), 1); + if (err && err != -EAGAIN) { + mlx5_ib_warn(dev, "add_keys failed, err %d\n", err); + break; + } + } + + if (!mr) + return ERR_PTR(-EAGAIN); + + mr->pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL); + if (!mr->pas) { + err = -ENOMEM; + goto free_mr; + } + + mlx5_ib_populate_pas(dev, umem, page_shift, + mr_align(mr->pas, MLX5_UMR_ALIGN), 1); + + mr->dma = dma_map_single(ddev, mr_align(mr->pas, MLX5_UMR_ALIGN), size, + DMA_TO_DEVICE); + if (dma_mapping_error(ddev, mr->dma)) { + err = -ENOMEM; + goto free_pas; + } + + memset(&wr, 0, sizeof(wr)); + wr.wr_id = (u64)(unsigned long)&umr_context; + prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags); + + mlx5_ib_init_umr_context(&umr_context); + down(&umrc->sem); + err = ib_post_send(umrc->qp, &wr, &bad); + if (err) { + mlx5_ib_warn(dev, "post send failed, err %d\n", err); + goto unmap_dma; + } else { + wait_for_completion(&umr_context.done); + if (umr_context.status != IB_WC_SUCCESS) { + mlx5_ib_warn(dev, "reg umr failed\n"); + err = -EFAULT; + } + } + + mr->mmr.iova = virt_addr; + mr->mmr.size = len; + mr->mmr.pd = to_mpd(pd)->pdn; + +unmap_dma: + up(&umrc->sem); + dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE); + +free_pas: + kfree(mr->pas); + +free_mr: + if (err) { + free_cached_mr(dev, mr); + return ERR_PTR(err); + } + + return mr; +} + +static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, + u64 length, struct ib_umem *umem, + int npages, int page_shift, + int access_flags) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_create_mkey_mbox_in *in; + struct mlx5_ib_mr *mr; + int inlen; + int err; + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2; + in = mlx5_vzalloc(inlen); + if (!in) { + err = -ENOMEM; + goto err_1; + } + mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, 0); + + in->seg.flags = convert_access(access_flags) | + MLX5_ACCESS_MODE_MTT; + in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); + in->seg.start_addr = cpu_to_be64(virt_addr); + in->seg.len = cpu_to_be64(length); + in->seg.bsfs_octo_size = 0; + in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift)); + in->seg.log2_page_size = page_shift; + in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, + 1 << page_shift)); + err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen, NULL, + NULL, NULL); + if (err) { + mlx5_ib_warn(dev, "create mkey failed\n"); + goto err_2; + } + mr->umem = umem; + mlx5_vfree(in); + + mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key); + + return mr; + +err_2: + mlx5_vfree(in); + +err_1: + kfree(mr); + + return ERR_PTR(err); +} + +struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int access_flags, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_ib_mr *mr = NULL; + struct ib_umem *umem; + int page_shift; + int npages; + int ncont; + int order; + int err; + + mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx\n", + start, virt_addr, length); + umem = ib_umem_get(pd->uobject->context, start, length, access_flags, + 0); + if (IS_ERR(umem)) { + mlx5_ib_dbg(dev, "umem get failed\n"); + return (void *)umem; + } + + mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order); + if (!npages) { + mlx5_ib_warn(dev, "avoid zero region\n"); + err = -EINVAL; + goto error; + } + + mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", + npages, ncont, order, page_shift); + + if (use_umr(order)) { + mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift, + order, access_flags); + if (PTR_ERR(mr) == -EAGAIN) { + mlx5_ib_dbg(dev, "cache empty for order %d", order); + mr = NULL; + } + } + + if (!mr) + mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift, + access_flags); + + if (IS_ERR(mr)) { + err = PTR_ERR(mr); + goto error; + } + + mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key); + + mr->umem = umem; + mr->npages = npages; + spin_lock(&dev->mr_lock); + dev->mdev.priv.reg_pages += npages; + spin_unlock(&dev->mr_lock); + mr->ibmr.lkey = mr->mmr.key; + mr->ibmr.rkey = mr->mmr.key; + + return &mr->ibmr; + +error: + ib_umem_release(umem); + return ERR_PTR(err); +} + +static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) +{ + struct umr_common *umrc = &dev->umrc; + struct mlx5_ib_umr_context umr_context; + struct ib_send_wr wr, *bad; + int err; + + memset(&wr, 0, sizeof(wr)); + wr.wr_id = (u64)(unsigned long)&umr_context; + prep_umr_unreg_wqe(dev, &wr, mr->mmr.key); + + mlx5_ib_init_umr_context(&umr_context); + down(&umrc->sem); + err = ib_post_send(umrc->qp, &wr, &bad); + if (err) { + up(&umrc->sem); + mlx5_ib_dbg(dev, "err %d\n", err); + goto error; + } else { + wait_for_completion(&umr_context.done); + up(&umrc->sem); + } + if (umr_context.status != IB_WC_SUCCESS) { + mlx5_ib_warn(dev, "unreg umr failed\n"); + err = -EFAULT; + goto error; + } + return 0; + +error: + return err; +} + +int mlx5_ib_dereg_mr(struct ib_mr *ibmr) +{ + struct mlx5_ib_dev *dev = to_mdev(ibmr->device); + struct mlx5_ib_mr *mr = to_mmr(ibmr); + struct ib_umem *umem = mr->umem; + int npages = mr->npages; + int umred = mr->umred; + int err; + + if (!umred) { + err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); + if (err) { + mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", + mr->mmr.key, err); + return err; + } + } else { + err = unreg_umr(dev, mr); + if (err) { + mlx5_ib_warn(dev, "failed unregister\n"); + return err; + } + free_cached_mr(dev, mr); + } + + if (umem) { + ib_umem_release(umem); + spin_lock(&dev->mr_lock); + dev->mdev.priv.reg_pages -= npages; + spin_unlock(&dev->mr_lock); + } + + if (!umred) + kfree(mr); + + return 0; +} + +struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd, + struct ib_mr_init_attr *mr_init_attr) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_create_mkey_mbox_in *in; + struct mlx5_ib_mr *mr; + int access_mode, err; + int ndescs = roundup(mr_init_attr->max_reg_descriptors, 4); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_free; + } + + in->seg.status = 1 << 6; /* free */ + in->seg.xlt_oct_size = cpu_to_be32(ndescs); + in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); + access_mode = MLX5_ACCESS_MODE_MTT; + + if (mr_init_attr->flags & IB_MR_SIGNATURE_EN) { + u32 psv_index[2]; + + in->seg.flags_pd = cpu_to_be32(be32_to_cpu(in->seg.flags_pd) | + MLX5_MKEY_BSF_EN); + in->seg.bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE); + mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL); + if (!mr->sig) { + err = -ENOMEM; + goto err_free_in; + } + + /* create mem & wire PSVs */ + err = mlx5_core_create_psv(&dev->mdev, to_mpd(pd)->pdn, + 2, psv_index); + if (err) + goto err_free_sig; + + access_mode = MLX5_ACCESS_MODE_KLM; + mr->sig->psv_memory.psv_idx = psv_index[0]; + mr->sig->psv_wire.psv_idx = psv_index[1]; + + mr->sig->sig_status_checked = true; + mr->sig->sig_err_exists = false; + /* Next UMR, Arm SIGERR */ + ++mr->sig->sigerr_count; + } + + in->seg.flags = MLX5_PERM_UMR_EN | access_mode; + err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in), + NULL, NULL, NULL); + if (err) + goto err_destroy_psv; + + mr->ibmr.lkey = mr->mmr.key; + mr->ibmr.rkey = mr->mmr.key; + mr->umem = NULL; + kfree(in); + + return &mr->ibmr; + +err_destroy_psv: + if (mr->sig) { + if (mlx5_core_destroy_psv(&dev->mdev, + mr->sig->psv_memory.psv_idx)) + mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", + mr->sig->psv_memory.psv_idx); + if (mlx5_core_destroy_psv(&dev->mdev, + mr->sig->psv_wire.psv_idx)) + mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", + mr->sig->psv_wire.psv_idx); + } +err_free_sig: + kfree(mr->sig); +err_free_in: + kfree(in); +err_free: + kfree(mr); + return ERR_PTR(err); +} + +int mlx5_ib_destroy_mr(struct ib_mr *ibmr) +{ + struct mlx5_ib_dev *dev = to_mdev(ibmr->device); + struct mlx5_ib_mr *mr = to_mmr(ibmr); + int err; + + if (mr->sig) { + if (mlx5_core_destroy_psv(&dev->mdev, + mr->sig->psv_memory.psv_idx)) + mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", + mr->sig->psv_memory.psv_idx); + if (mlx5_core_destroy_psv(&dev->mdev, + mr->sig->psv_wire.psv_idx)) + mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", + mr->sig->psv_wire.psv_idx); + kfree(mr->sig); + } + + err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); + if (err) { + mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", + mr->mmr.key, err); + return err; + } + + kfree(mr); + + return err; +} + +struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd, + int max_page_list_len) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_create_mkey_mbox_in *in; + struct mlx5_ib_mr *mr; + int err; + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_free; + } + + in->seg.status = 1 << 6; /* free */ + in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2); + in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT; + in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); + /* + * TBD not needed - issue 197292 */ + in->seg.log2_page_size = PAGE_SHIFT; + + err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in), NULL, + NULL, NULL); + kfree(in); + if (err) + goto err_free; + + mr->ibmr.lkey = mr->mmr.key; + mr->ibmr.rkey = mr->mmr.key; + mr->umem = NULL; + + return &mr->ibmr; + +err_free: + kfree(mr); + return ERR_PTR(err); +} + +struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, + int page_list_len) +{ + struct mlx5_ib_fast_reg_page_list *mfrpl; + int size = page_list_len * sizeof(u64); + + mfrpl = kmalloc(sizeof(*mfrpl), GFP_KERNEL); + if (!mfrpl) + return ERR_PTR(-ENOMEM); + + mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL); + if (!mfrpl->ibfrpl.page_list) + goto err_free; + + mfrpl->mapped_page_list = dma_alloc_coherent(ibdev->dma_device, + size, &mfrpl->map, + GFP_KERNEL); + if (!mfrpl->mapped_page_list) + goto err_free; + + WARN_ON(mfrpl->map & 0x3f); + + return &mfrpl->ibfrpl; + +err_free: + kfree(mfrpl->ibfrpl.page_list); + kfree(mfrpl); + return ERR_PTR(-ENOMEM); +} + +void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) +{ + struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list); + struct mlx5_ib_dev *dev = to_mdev(page_list->device); + int size = page_list->max_page_list_len * sizeof(u64); + + dma_free_coherent(&dev->mdev.pdev->dev, size, mfrpl->mapped_page_list, + mfrpl->map); + kfree(mfrpl->ibfrpl.page_list); + kfree(mfrpl); +} + +int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, + struct ib_mr_status *mr_status) +{ + struct mlx5_ib_mr *mmr = to_mmr(ibmr); + int ret = 0; + + if (check_mask & ~IB_MR_CHECK_SIG_STATUS) { + pr_err("Invalid status check mask\n"); + ret = -EINVAL; + goto done; + } + + mr_status->fail_status = 0; + if (check_mask & IB_MR_CHECK_SIG_STATUS) { + if (!mmr->sig) { + ret = -EINVAL; + pr_err("signature status check requested on a non-signature enabled MR\n"); + goto done; + } + + mmr->sig->sig_status_checked = true; + if (!mmr->sig->sig_err_exists) + goto done; + + if (ibmr->lkey == mmr->sig->err_item.key) + memcpy(&mr_status->sig_err, &mmr->sig->err_item, + sizeof(mr_status->sig_err)); + else { + mr_status->sig_err.err_type = IB_SIG_BAD_GUARD; + mr_status->sig_err.sig_err_offset = 0; + mr_status->sig_err.key = mmr->sig->err_item.key; + } + + mmr->sig->sig_err_exists = false; + mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS; + } + +done: + return ret; +} diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c new file mode 100644 index 00000000000..bbbcf389272 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -0,0 +1,3048 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/module.h> +#include <rdma/ib_umem.h> +#include "mlx5_ib.h" +#include "user.h" + +/* not supported currently */ +static int wq_signature; + +enum { + MLX5_IB_ACK_REQ_FREQ = 8, +}; + +enum { + MLX5_IB_DEFAULT_SCHED_QUEUE = 0x83, + MLX5_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f, + MLX5_IB_LINK_TYPE_IB = 0, + MLX5_IB_LINK_TYPE_ETH = 1 +}; + +enum { + MLX5_IB_SQ_STRIDE = 6, + MLX5_IB_CACHE_LINE_SIZE = 64, +}; + +static const u32 mlx5_ib_opcode[] = { + [IB_WR_SEND] = MLX5_OPCODE_SEND, + [IB_WR_SEND_WITH_IMM] = MLX5_OPCODE_SEND_IMM, + [IB_WR_RDMA_WRITE] = MLX5_OPCODE_RDMA_WRITE, + [IB_WR_RDMA_WRITE_WITH_IMM] = MLX5_OPCODE_RDMA_WRITE_IMM, + [IB_WR_RDMA_READ] = MLX5_OPCODE_RDMA_READ, + [IB_WR_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_CS, + [IB_WR_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_FA, + [IB_WR_SEND_WITH_INV] = MLX5_OPCODE_SEND_INVAL, + [IB_WR_LOCAL_INV] = MLX5_OPCODE_UMR, + [IB_WR_FAST_REG_MR] = MLX5_OPCODE_UMR, + [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_MASKED_CS, + [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_MASKED_FA, + [MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR, +}; + +struct umr_wr { + u64 virt_addr; + struct ib_pd *pd; + unsigned int page_shift; + unsigned int npages; + u32 length; + int access_flags; + u32 mkey; +}; + +static int is_qp0(enum ib_qp_type qp_type) +{ + return qp_type == IB_QPT_SMI; +} + +static int is_qp1(enum ib_qp_type qp_type) +{ + return qp_type == IB_QPT_GSI; +} + +static int is_sqp(enum ib_qp_type qp_type) +{ + return is_qp0(qp_type) || is_qp1(qp_type); +} + +static void *get_wqe(struct mlx5_ib_qp *qp, int offset) +{ + return mlx5_buf_offset(&qp->buf, offset); +} + +static void *get_recv_wqe(struct mlx5_ib_qp *qp, int n) +{ + return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift)); +} + +void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n) +{ + return get_wqe(qp, qp->sq.offset + (n << MLX5_IB_SQ_STRIDE)); +} + +static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type) +{ + struct ib_qp *ibqp = &to_mibqp(qp)->ibqp; + struct ib_event event; + + if (type == MLX5_EVENT_TYPE_PATH_MIG) + to_mibqp(qp)->port = to_mibqp(qp)->alt_port; + + if (ibqp->event_handler) { + event.device = ibqp->device; + event.element.qp = ibqp; + switch (type) { + case MLX5_EVENT_TYPE_PATH_MIG: + event.event = IB_EVENT_PATH_MIG; + break; + case MLX5_EVENT_TYPE_COMM_EST: + event.event = IB_EVENT_COMM_EST; + break; + case MLX5_EVENT_TYPE_SQ_DRAINED: + event.event = IB_EVENT_SQ_DRAINED; + break; + case MLX5_EVENT_TYPE_SRQ_LAST_WQE: + event.event = IB_EVENT_QP_LAST_WQE_REACHED; + break; + case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: + event.event = IB_EVENT_QP_FATAL; + break; + case MLX5_EVENT_TYPE_PATH_MIG_FAILED: + event.event = IB_EVENT_PATH_MIG_ERR; + break; + case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: + event.event = IB_EVENT_QP_REQ_ERR; + break; + case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: + event.event = IB_EVENT_QP_ACCESS_ERR; + break; + default: + pr_warn("mlx5_ib: Unexpected event type %d on QP %06x\n", type, qp->qpn); + return; + } + + ibqp->event_handler(&event, ibqp->qp_context); + } +} + +static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap, + int has_rq, struct mlx5_ib_qp *qp, struct mlx5_ib_create_qp *ucmd) +{ + int wqe_size; + int wq_size; + + /* Sanity check RQ size before proceeding */ + if (cap->max_recv_wr > dev->mdev.caps.max_wqes) + return -EINVAL; + + if (!has_rq) { + qp->rq.max_gs = 0; + qp->rq.wqe_cnt = 0; + qp->rq.wqe_shift = 0; + } else { + if (ucmd) { + qp->rq.wqe_cnt = ucmd->rq_wqe_count; + qp->rq.wqe_shift = ucmd->rq_wqe_shift; + qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig; + qp->rq.max_post = qp->rq.wqe_cnt; + } else { + wqe_size = qp->wq_sig ? sizeof(struct mlx5_wqe_signature_seg) : 0; + wqe_size += cap->max_recv_sge * sizeof(struct mlx5_wqe_data_seg); + wqe_size = roundup_pow_of_two(wqe_size); + wq_size = roundup_pow_of_two(cap->max_recv_wr) * wqe_size; + wq_size = max_t(int, wq_size, MLX5_SEND_WQE_BB); + qp->rq.wqe_cnt = wq_size / wqe_size; + if (wqe_size > dev->mdev.caps.max_rq_desc_sz) { + mlx5_ib_dbg(dev, "wqe_size %d, max %d\n", + wqe_size, + dev->mdev.caps.max_rq_desc_sz); + return -EINVAL; + } + qp->rq.wqe_shift = ilog2(wqe_size); + qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig; + qp->rq.max_post = qp->rq.wqe_cnt; + } + } + + return 0; +} + +static int sq_overhead(enum ib_qp_type qp_type) +{ + int size = 0; + + switch (qp_type) { + case IB_QPT_XRC_INI: + size += sizeof(struct mlx5_wqe_xrc_seg); + /* fall through */ + case IB_QPT_RC: + size += sizeof(struct mlx5_wqe_ctrl_seg) + + sizeof(struct mlx5_wqe_atomic_seg) + + sizeof(struct mlx5_wqe_raddr_seg); + break; + + case IB_QPT_XRC_TGT: + return 0; + + case IB_QPT_UC: + size += sizeof(struct mlx5_wqe_ctrl_seg) + + sizeof(struct mlx5_wqe_raddr_seg) + + sizeof(struct mlx5_wqe_umr_ctrl_seg) + + sizeof(struct mlx5_mkey_seg); + break; + + case IB_QPT_UD: + case IB_QPT_SMI: + case IB_QPT_GSI: + size += sizeof(struct mlx5_wqe_ctrl_seg) + + sizeof(struct mlx5_wqe_datagram_seg); + break; + + case MLX5_IB_QPT_REG_UMR: + size += sizeof(struct mlx5_wqe_ctrl_seg) + + sizeof(struct mlx5_wqe_umr_ctrl_seg) + + sizeof(struct mlx5_mkey_seg); + break; + + default: + return -EINVAL; + } + + return size; +} + +static int calc_send_wqe(struct ib_qp_init_attr *attr) +{ + int inl_size = 0; + int size; + + size = sq_overhead(attr->qp_type); + if (size < 0) + return size; + + if (attr->cap.max_inline_data) { + inl_size = size + sizeof(struct mlx5_wqe_inline_seg) + + attr->cap.max_inline_data; + } + + size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg); + if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN && + ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB) < MLX5_SIG_WQE_SIZE) + return MLX5_SIG_WQE_SIZE; + else + return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB); +} + +static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, + struct mlx5_ib_qp *qp) +{ + int wqe_size; + int wq_size; + + if (!attr->cap.max_send_wr) + return 0; + + wqe_size = calc_send_wqe(attr); + mlx5_ib_dbg(dev, "wqe_size %d\n", wqe_size); + if (wqe_size < 0) + return wqe_size; + + if (wqe_size > dev->mdev.caps.max_sq_desc_sz) { + mlx5_ib_dbg(dev, "wqe_size(%d) > max_sq_desc_sz(%d)\n", + wqe_size, dev->mdev.caps.max_sq_desc_sz); + return -EINVAL; + } + + qp->max_inline_data = wqe_size - sq_overhead(attr->qp_type) - + sizeof(struct mlx5_wqe_inline_seg); + attr->cap.max_inline_data = qp->max_inline_data; + + if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN) + qp->signature_en = true; + + wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size); + qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB; + if (qp->sq.wqe_cnt > dev->mdev.caps.max_wqes) { + mlx5_ib_dbg(dev, "wqe count(%d) exceeds limits(%d)\n", + qp->sq.wqe_cnt, dev->mdev.caps.max_wqes); + return -ENOMEM; + } + qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB); + qp->sq.max_gs = attr->cap.max_send_sge; + qp->sq.max_post = wq_size / wqe_size; + attr->cap.max_send_wr = qp->sq.max_post; + + return wq_size; +} + +static int set_user_buf_size(struct mlx5_ib_dev *dev, + struct mlx5_ib_qp *qp, + struct mlx5_ib_create_qp *ucmd) +{ + int desc_sz = 1 << qp->sq.wqe_shift; + + if (desc_sz > dev->mdev.caps.max_sq_desc_sz) { + mlx5_ib_warn(dev, "desc_sz %d, max_sq_desc_sz %d\n", + desc_sz, dev->mdev.caps.max_sq_desc_sz); + return -EINVAL; + } + + if (ucmd->sq_wqe_count && ((1 << ilog2(ucmd->sq_wqe_count)) != ucmd->sq_wqe_count)) { + mlx5_ib_warn(dev, "sq_wqe_count %d, sq_wqe_count %d\n", + ucmd->sq_wqe_count, ucmd->sq_wqe_count); + return -EINVAL; + } + + qp->sq.wqe_cnt = ucmd->sq_wqe_count; + + if (qp->sq.wqe_cnt > dev->mdev.caps.max_wqes) { + mlx5_ib_warn(dev, "wqe_cnt %d, max_wqes %d\n", + qp->sq.wqe_cnt, dev->mdev.caps.max_wqes); + return -EINVAL; + } + + qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + + (qp->sq.wqe_cnt << 6); + + return 0; +} + +static int qp_has_rq(struct ib_qp_init_attr *attr) +{ + if (attr->qp_type == IB_QPT_XRC_INI || + attr->qp_type == IB_QPT_XRC_TGT || attr->srq || + attr->qp_type == MLX5_IB_QPT_REG_UMR || + !attr->cap.max_recv_wr) + return 0; + + return 1; +} + +static int first_med_uuar(void) +{ + return 1; +} + +static int next_uuar(int n) +{ + n++; + + while (((n % 4) & 2)) + n++; + + return n; +} + +static int num_med_uuar(struct mlx5_uuar_info *uuari) +{ + int n; + + n = uuari->num_uars * MLX5_NON_FP_BF_REGS_PER_PAGE - + uuari->num_low_latency_uuars - 1; + + return n >= 0 ? n : 0; +} + +static int max_uuari(struct mlx5_uuar_info *uuari) +{ + return uuari->num_uars * 4; +} + +static int first_hi_uuar(struct mlx5_uuar_info *uuari) +{ + int med; + int i; + int t; + + med = num_med_uuar(uuari); + for (t = 0, i = first_med_uuar();; i = next_uuar(i)) { + t++; + if (t == med) + return next_uuar(i); + } + + return 0; +} + +static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari) +{ + int i; + + for (i = first_hi_uuar(uuari); i < max_uuari(uuari); i = next_uuar(i)) { + if (!test_bit(i, uuari->bitmap)) { + set_bit(i, uuari->bitmap); + uuari->count[i]++; + return i; + } + } + + return -ENOMEM; +} + +static int alloc_med_class_uuar(struct mlx5_uuar_info *uuari) +{ + int minidx = first_med_uuar(); + int i; + + for (i = first_med_uuar(); i < first_hi_uuar(uuari); i = next_uuar(i)) { + if (uuari->count[i] < uuari->count[minidx]) + minidx = i; + } + + uuari->count[minidx]++; + return minidx; +} + +static int alloc_uuar(struct mlx5_uuar_info *uuari, + enum mlx5_ib_latency_class lat) +{ + int uuarn = -EINVAL; + + mutex_lock(&uuari->lock); + switch (lat) { + case MLX5_IB_LATENCY_CLASS_LOW: + uuarn = 0; + uuari->count[uuarn]++; + break; + + case MLX5_IB_LATENCY_CLASS_MEDIUM: + if (uuari->ver < 2) + uuarn = -ENOMEM; + else + uuarn = alloc_med_class_uuar(uuari); + break; + + case MLX5_IB_LATENCY_CLASS_HIGH: + if (uuari->ver < 2) + uuarn = -ENOMEM; + else + uuarn = alloc_high_class_uuar(uuari); + break; + + case MLX5_IB_LATENCY_CLASS_FAST_PATH: + uuarn = 2; + break; + } + mutex_unlock(&uuari->lock); + + return uuarn; +} + +static void free_med_class_uuar(struct mlx5_uuar_info *uuari, int uuarn) +{ + clear_bit(uuarn, uuari->bitmap); + --uuari->count[uuarn]; +} + +static void free_high_class_uuar(struct mlx5_uuar_info *uuari, int uuarn) +{ + clear_bit(uuarn, uuari->bitmap); + --uuari->count[uuarn]; +} + +static void free_uuar(struct mlx5_uuar_info *uuari, int uuarn) +{ + int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE; + int high_uuar = nuuars - uuari->num_low_latency_uuars; + + mutex_lock(&uuari->lock); + if (uuarn == 0) { + --uuari->count[uuarn]; + goto out; + } + + if (uuarn < high_uuar) { + free_med_class_uuar(uuari, uuarn); + goto out; + } + + free_high_class_uuar(uuari, uuarn); + +out: + mutex_unlock(&uuari->lock); +} + +static enum mlx5_qp_state to_mlx5_state(enum ib_qp_state state) +{ + switch (state) { + case IB_QPS_RESET: return MLX5_QP_STATE_RST; + case IB_QPS_INIT: return MLX5_QP_STATE_INIT; + case IB_QPS_RTR: return MLX5_QP_STATE_RTR; + case IB_QPS_RTS: return MLX5_QP_STATE_RTS; + case IB_QPS_SQD: return MLX5_QP_STATE_SQD; + case IB_QPS_SQE: return MLX5_QP_STATE_SQER; + case IB_QPS_ERR: return MLX5_QP_STATE_ERR; + default: return -1; + } +} + +static int to_mlx5_st(enum ib_qp_type type) +{ + switch (type) { + case IB_QPT_RC: return MLX5_QP_ST_RC; + case IB_QPT_UC: return MLX5_QP_ST_UC; + case IB_QPT_UD: return MLX5_QP_ST_UD; + case MLX5_IB_QPT_REG_UMR: return MLX5_QP_ST_REG_UMR; + case IB_QPT_XRC_INI: + case IB_QPT_XRC_TGT: return MLX5_QP_ST_XRC; + case IB_QPT_SMI: return MLX5_QP_ST_QP0; + case IB_QPT_GSI: return MLX5_QP_ST_QP1; + case IB_QPT_RAW_IPV6: return MLX5_QP_ST_RAW_IPV6; + case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE; + case IB_QPT_RAW_PACKET: + case IB_QPT_MAX: + default: return -EINVAL; + } +} + +static int uuarn_to_uar_index(struct mlx5_uuar_info *uuari, int uuarn) +{ + return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index; +} + +static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, + struct mlx5_ib_qp *qp, struct ib_udata *udata, + struct mlx5_create_qp_mbox_in **in, + struct mlx5_ib_create_qp_resp *resp, int *inlen) +{ + struct mlx5_ib_ucontext *context; + struct mlx5_ib_create_qp ucmd; + int page_shift = 0; + int uar_index; + int npages; + u32 offset = 0; + int uuarn; + int ncont = 0; + int err; + + err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); + if (err) { + mlx5_ib_dbg(dev, "copy failed\n"); + return err; + } + + context = to_mucontext(pd->uobject->context); + /* + * TBD: should come from the verbs when we have the API + */ + uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH); + if (uuarn < 0) { + mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n"); + mlx5_ib_dbg(dev, "reverting to medium latency\n"); + uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM); + if (uuarn < 0) { + mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n"); + mlx5_ib_dbg(dev, "reverting to high latency\n"); + uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW); + if (uuarn < 0) { + mlx5_ib_warn(dev, "uuar allocation failed\n"); + return uuarn; + } + } + } + + uar_index = uuarn_to_uar_index(&context->uuari, uuarn); + mlx5_ib_dbg(dev, "uuarn 0x%x, uar_index 0x%x\n", uuarn, uar_index); + + qp->rq.offset = 0; + qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB); + qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; + + err = set_user_buf_size(dev, qp, &ucmd); + if (err) + goto err_uuar; + + if (ucmd.buf_addr && qp->buf_size) { + qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, + qp->buf_size, 0, 0); + if (IS_ERR(qp->umem)) { + mlx5_ib_dbg(dev, "umem_get failed\n"); + err = PTR_ERR(qp->umem); + goto err_uuar; + } + } else { + qp->umem = NULL; + } + + if (qp->umem) { + mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift, + &ncont, NULL); + err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset); + if (err) { + mlx5_ib_warn(dev, "bad offset\n"); + goto err_umem; + } + mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n", + ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset); + } + + *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont; + *in = mlx5_vzalloc(*inlen); + if (!*in) { + err = -ENOMEM; + goto err_umem; + } + if (qp->umem) + mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0); + (*in)->ctx.log_pg_sz_remote_qpn = + cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24); + (*in)->ctx.params2 = cpu_to_be32(offset << 6); + + (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index); + resp->uuar_index = uuarn; + qp->uuarn = uuarn; + + err = mlx5_ib_db_map_user(context, ucmd.db_addr, &qp->db); + if (err) { + mlx5_ib_dbg(dev, "map failed\n"); + goto err_free; + } + + err = ib_copy_to_udata(udata, resp, sizeof(*resp)); + if (err) { + mlx5_ib_dbg(dev, "copy failed\n"); + goto err_unmap; + } + qp->create_type = MLX5_QP_USER; + + return 0; + +err_unmap: + mlx5_ib_db_unmap_user(context, &qp->db); + +err_free: + mlx5_vfree(*in); + +err_umem: + if (qp->umem) + ib_umem_release(qp->umem); + +err_uuar: + free_uuar(&context->uuari, uuarn); + return err; +} + +static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp) +{ + struct mlx5_ib_ucontext *context; + + context = to_mucontext(pd->uobject->context); + mlx5_ib_db_unmap_user(context, &qp->db); + if (qp->umem) + ib_umem_release(qp->umem); + free_uuar(&context->uuari, qp->uuarn); +} + +static int create_kernel_qp(struct mlx5_ib_dev *dev, + struct ib_qp_init_attr *init_attr, + struct mlx5_ib_qp *qp, + struct mlx5_create_qp_mbox_in **in, int *inlen) +{ + enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW; + struct mlx5_uuar_info *uuari; + int uar_index; + int uuarn; + int err; + + uuari = &dev->mdev.priv.uuari; + if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN | IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)) + return -EINVAL; + + if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR) + lc = MLX5_IB_LATENCY_CLASS_FAST_PATH; + + uuarn = alloc_uuar(uuari, lc); + if (uuarn < 0) { + mlx5_ib_dbg(dev, "\n"); + return -ENOMEM; + } + + qp->bf = &uuari->bfs[uuarn]; + uar_index = qp->bf->uar->index; + + err = calc_sq_size(dev, init_attr, qp); + if (err < 0) { + mlx5_ib_dbg(dev, "err %d\n", err); + goto err_uuar; + } + + qp->rq.offset = 0; + qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; + qp->buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift); + + err = mlx5_buf_alloc(&dev->mdev, qp->buf_size, PAGE_SIZE * 2, &qp->buf); + if (err) { + mlx5_ib_dbg(dev, "err %d\n", err); + goto err_uuar; + } + + qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt); + *inlen = sizeof(**in) + sizeof(*(*in)->pas) * qp->buf.npages; + *in = mlx5_vzalloc(*inlen); + if (!*in) { + err = -ENOMEM; + goto err_buf; + } + (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index); + (*in)->ctx.log_pg_sz_remote_qpn = + cpu_to_be32((qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24); + /* Set "fast registration enabled" for all kernel QPs */ + (*in)->ctx.params1 |= cpu_to_be32(1 << 11); + (*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4); + + mlx5_fill_page_array(&qp->buf, (*in)->pas); + + err = mlx5_db_alloc(&dev->mdev, &qp->db); + if (err) { + mlx5_ib_dbg(dev, "err %d\n", err); + goto err_free; + } + + qp->db.db[0] = 0; + qp->db.db[1] = 0; + + qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wrid), GFP_KERNEL); + qp->sq.wr_data = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wr_data), GFP_KERNEL); + qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(*qp->rq.wrid), GFP_KERNEL); + qp->sq.w_list = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.w_list), GFP_KERNEL); + qp->sq.wqe_head = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wqe_head), GFP_KERNEL); + + if (!qp->sq.wrid || !qp->sq.wr_data || !qp->rq.wrid || + !qp->sq.w_list || !qp->sq.wqe_head) { + err = -ENOMEM; + goto err_wrid; + } + qp->create_type = MLX5_QP_KERNEL; + + return 0; + +err_wrid: + mlx5_db_free(&dev->mdev, &qp->db); + kfree(qp->sq.wqe_head); + kfree(qp->sq.w_list); + kfree(qp->sq.wrid); + kfree(qp->sq.wr_data); + kfree(qp->rq.wrid); + +err_free: + mlx5_vfree(*in); + +err_buf: + mlx5_buf_free(&dev->mdev, &qp->buf); + +err_uuar: + free_uuar(&dev->mdev.priv.uuari, uuarn); + return err; +} + +static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) +{ + mlx5_db_free(&dev->mdev, &qp->db); + kfree(qp->sq.wqe_head); + kfree(qp->sq.w_list); + kfree(qp->sq.wrid); + kfree(qp->sq.wr_data); + kfree(qp->rq.wrid); + mlx5_buf_free(&dev->mdev, &qp->buf); + free_uuar(&dev->mdev.priv.uuari, qp->bf->uuarn); +} + +static __be32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr) +{ + if (attr->srq || (attr->qp_type == IB_QPT_XRC_TGT) || + (attr->qp_type == IB_QPT_XRC_INI)) + return cpu_to_be32(MLX5_SRQ_RQ); + else if (!qp->has_rq) + return cpu_to_be32(MLX5_ZERO_LEN_RQ); + else + return cpu_to_be32(MLX5_NON_ZERO_RQ); +} + +static int is_connected(enum ib_qp_type qp_type) +{ + if (qp_type == IB_QPT_RC || qp_type == IB_QPT_UC) + return 1; + + return 0; +} + +static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata, struct mlx5_ib_qp *qp) +{ + struct mlx5_ib_resources *devr = &dev->devr; + struct mlx5_ib_create_qp_resp resp; + struct mlx5_create_qp_mbox_in *in; + struct mlx5_ib_create_qp ucmd; + int inlen = sizeof(*in); + int err; + + mutex_init(&qp->mutex); + spin_lock_init(&qp->sq.lock); + spin_lock_init(&qp->rq.lock); + + if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { + if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_BLOCK_MCAST)) { + mlx5_ib_dbg(dev, "block multicast loopback isn't supported\n"); + return -EINVAL; + } else { + qp->flags |= MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK; + } + } + + if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) + qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE; + + if (pd && pd->uobject) { + if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { + mlx5_ib_dbg(dev, "copy failed\n"); + return -EFAULT; + } + + qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE); + qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE); + } else { + qp->wq_sig = !!wq_signature; + } + + qp->has_rq = qp_has_rq(init_attr); + err = set_rq_size(dev, &init_attr->cap, qp->has_rq, + qp, (pd && pd->uobject) ? &ucmd : NULL); + if (err) { + mlx5_ib_dbg(dev, "err %d\n", err); + return err; + } + + if (pd) { + if (pd->uobject) { + mlx5_ib_dbg(dev, "requested sq_wqe_count (%d)\n", ucmd.sq_wqe_count); + if (ucmd.rq_wqe_shift != qp->rq.wqe_shift || + ucmd.rq_wqe_count != qp->rq.wqe_cnt) { + mlx5_ib_dbg(dev, "invalid rq params\n"); + return -EINVAL; + } + if (ucmd.sq_wqe_count > dev->mdev.caps.max_wqes) { + mlx5_ib_dbg(dev, "requested sq_wqe_count (%d) > max allowed (%d)\n", + ucmd.sq_wqe_count, dev->mdev.caps.max_wqes); + return -EINVAL; + } + err = create_user_qp(dev, pd, qp, udata, &in, &resp, &inlen); + if (err) + mlx5_ib_dbg(dev, "err %d\n", err); + } else { + err = create_kernel_qp(dev, init_attr, qp, &in, &inlen); + if (err) + mlx5_ib_dbg(dev, "err %d\n", err); + else + qp->pa_lkey = to_mpd(pd)->pa_lkey; + } + + if (err) + return err; + } else { + in = mlx5_vzalloc(sizeof(*in)); + if (!in) + return -ENOMEM; + + qp->create_type = MLX5_QP_EMPTY; + } + + if (is_sqp(init_attr->qp_type)) + qp->port = init_attr->port_num; + + in->ctx.flags = cpu_to_be32(to_mlx5_st(init_attr->qp_type) << 16 | + MLX5_QP_PM_MIGRATED << 11); + + if (init_attr->qp_type != MLX5_IB_QPT_REG_UMR) + in->ctx.flags_pd = cpu_to_be32(to_mpd(pd ? pd : devr->p0)->pdn); + else + in->ctx.flags_pd = cpu_to_be32(MLX5_QP_LAT_SENSITIVE); + + if (qp->wq_sig) + in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_ENABLE_SIG); + + if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK) + in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_BLOCK_MCAST); + + if (qp->scat_cqe && is_connected(init_attr->qp_type)) { + int rcqe_sz; + int scqe_sz; + + rcqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->recv_cq); + scqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->send_cq); + + if (rcqe_sz == 128) + in->ctx.cs_res = MLX5_RES_SCAT_DATA64_CQE; + else + in->ctx.cs_res = MLX5_RES_SCAT_DATA32_CQE; + + if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) { + if (scqe_sz == 128) + in->ctx.cs_req = MLX5_REQ_SCAT_DATA64_CQE; + else + in->ctx.cs_req = MLX5_REQ_SCAT_DATA32_CQE; + } + } + + if (qp->rq.wqe_cnt) { + in->ctx.rq_size_stride = (qp->rq.wqe_shift - 4); + in->ctx.rq_size_stride |= ilog2(qp->rq.wqe_cnt) << 3; + } + + in->ctx.rq_type_srqn = get_rx_type(qp, init_attr); + + if (qp->sq.wqe_cnt) + in->ctx.sq_crq_size |= cpu_to_be16(ilog2(qp->sq.wqe_cnt) << 11); + else + in->ctx.sq_crq_size |= cpu_to_be16(0x8000); + + /* Set default resources */ + switch (init_attr->qp_type) { + case IB_QPT_XRC_TGT: + in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn); + in->ctx.cqn_send = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn); + in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn); + in->ctx.xrcd = cpu_to_be32(to_mxrcd(init_attr->xrcd)->xrcdn); + break; + case IB_QPT_XRC_INI: + in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn); + in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn); + in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn); + break; + default: + if (init_attr->srq) { + in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x0)->xrcdn); + in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(init_attr->srq)->msrq.srqn); + } else { + in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn); + in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn); + } + } + + if (init_attr->send_cq) + in->ctx.cqn_send = cpu_to_be32(to_mcq(init_attr->send_cq)->mcq.cqn); + + if (init_attr->recv_cq) + in->ctx.cqn_recv = cpu_to_be32(to_mcq(init_attr->recv_cq)->mcq.cqn); + + in->ctx.db_rec_addr = cpu_to_be64(qp->db.dma); + + err = mlx5_core_create_qp(&dev->mdev, &qp->mqp, in, inlen); + if (err) { + mlx5_ib_dbg(dev, "create qp failed\n"); + goto err_create; + } + + mlx5_vfree(in); + /* Hardware wants QPN written in big-endian order (after + * shifting) for send doorbell. Precompute this value to save + * a little bit when posting sends. + */ + qp->doorbell_qpn = swab32(qp->mqp.qpn << 8); + + qp->mqp.event = mlx5_ib_qp_event; + + return 0; + +err_create: + if (qp->create_type == MLX5_QP_USER) + destroy_qp_user(pd, qp); + else if (qp->create_type == MLX5_QP_KERNEL) + destroy_qp_kernel(dev, qp); + + mlx5_vfree(in); + return err; +} + +static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq) + __acquires(&send_cq->lock) __acquires(&recv_cq->lock) +{ + if (send_cq) { + if (recv_cq) { + if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { + spin_lock_irq(&send_cq->lock); + spin_lock_nested(&recv_cq->lock, + SINGLE_DEPTH_NESTING); + } else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) { + spin_lock_irq(&send_cq->lock); + __acquire(&recv_cq->lock); + } else { + spin_lock_irq(&recv_cq->lock); + spin_lock_nested(&send_cq->lock, + SINGLE_DEPTH_NESTING); + } + } else { + spin_lock_irq(&send_cq->lock); + } + } else if (recv_cq) { + spin_lock_irq(&recv_cq->lock); + } +} + +static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq) + __releases(&send_cq->lock) __releases(&recv_cq->lock) +{ + if (send_cq) { + if (recv_cq) { + if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { + spin_unlock(&recv_cq->lock); + spin_unlock_irq(&send_cq->lock); + } else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) { + __release(&recv_cq->lock); + spin_unlock_irq(&send_cq->lock); + } else { + spin_unlock(&send_cq->lock); + spin_unlock_irq(&recv_cq->lock); + } + } else { + spin_unlock_irq(&send_cq->lock); + } + } else if (recv_cq) { + spin_unlock_irq(&recv_cq->lock); + } +} + +static struct mlx5_ib_pd *get_pd(struct mlx5_ib_qp *qp) +{ + return to_mpd(qp->ibqp.pd); +} + +static void get_cqs(struct mlx5_ib_qp *qp, + struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq) +{ + switch (qp->ibqp.qp_type) { + case IB_QPT_XRC_TGT: + *send_cq = NULL; + *recv_cq = NULL; + break; + case MLX5_IB_QPT_REG_UMR: + case IB_QPT_XRC_INI: + *send_cq = to_mcq(qp->ibqp.send_cq); + *recv_cq = NULL; + break; + + case IB_QPT_SMI: + case IB_QPT_GSI: + case IB_QPT_RC: + case IB_QPT_UC: + case IB_QPT_UD: + case IB_QPT_RAW_IPV6: + case IB_QPT_RAW_ETHERTYPE: + *send_cq = to_mcq(qp->ibqp.send_cq); + *recv_cq = to_mcq(qp->ibqp.recv_cq); + break; + + case IB_QPT_RAW_PACKET: + case IB_QPT_MAX: + default: + *send_cq = NULL; + *recv_cq = NULL; + break; + } +} + +static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) +{ + struct mlx5_ib_cq *send_cq, *recv_cq; + struct mlx5_modify_qp_mbox_in *in; + int err; + + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) + return; + if (qp->state != IB_QPS_RESET) + if (mlx5_core_qp_modify(&dev->mdev, to_mlx5_state(qp->state), + MLX5_QP_STATE_RST, in, sizeof(*in), &qp->mqp)) + mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET failed\n", + qp->mqp.qpn); + + get_cqs(qp, &send_cq, &recv_cq); + + if (qp->create_type == MLX5_QP_KERNEL) { + mlx5_ib_lock_cqs(send_cq, recv_cq); + __mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn, + qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); + if (send_cq != recv_cq) + __mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL); + mlx5_ib_unlock_cqs(send_cq, recv_cq); + } + + err = mlx5_core_destroy_qp(&dev->mdev, &qp->mqp); + if (err) + mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n", qp->mqp.qpn); + kfree(in); + + + if (qp->create_type == MLX5_QP_KERNEL) + destroy_qp_kernel(dev, qp); + else if (qp->create_type == MLX5_QP_USER) + destroy_qp_user(&get_pd(qp)->ibpd, qp); +} + +static const char *ib_qp_type_str(enum ib_qp_type type) +{ + switch (type) { + case IB_QPT_SMI: + return "IB_QPT_SMI"; + case IB_QPT_GSI: + return "IB_QPT_GSI"; + case IB_QPT_RC: + return "IB_QPT_RC"; + case IB_QPT_UC: + return "IB_QPT_UC"; + case IB_QPT_UD: + return "IB_QPT_UD"; + case IB_QPT_RAW_IPV6: + return "IB_QPT_RAW_IPV6"; + case IB_QPT_RAW_ETHERTYPE: + return "IB_QPT_RAW_ETHERTYPE"; + case IB_QPT_XRC_INI: + return "IB_QPT_XRC_INI"; + case IB_QPT_XRC_TGT: + return "IB_QPT_XRC_TGT"; + case IB_QPT_RAW_PACKET: + return "IB_QPT_RAW_PACKET"; + case MLX5_IB_QPT_REG_UMR: + return "MLX5_IB_QPT_REG_UMR"; + case IB_QPT_MAX: + default: + return "Invalid QP type"; + } +} + +struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev; + struct mlx5_ib_qp *qp; + u16 xrcdn = 0; + int err; + + if (pd) { + dev = to_mdev(pd->device); + } else { + /* being cautious here */ + if (init_attr->qp_type != IB_QPT_XRC_TGT && + init_attr->qp_type != MLX5_IB_QPT_REG_UMR) { + pr_warn("%s: no PD for transport %s\n", __func__, + ib_qp_type_str(init_attr->qp_type)); + return ERR_PTR(-EINVAL); + } + dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device); + } + + switch (init_attr->qp_type) { + case IB_QPT_XRC_TGT: + case IB_QPT_XRC_INI: + if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_XRC)) { + mlx5_ib_dbg(dev, "XRC not supported\n"); + return ERR_PTR(-ENOSYS); + } + init_attr->recv_cq = NULL; + if (init_attr->qp_type == IB_QPT_XRC_TGT) { + xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn; + init_attr->send_cq = NULL; + } + + /* fall through */ + case IB_QPT_RC: + case IB_QPT_UC: + case IB_QPT_UD: + case IB_QPT_SMI: + case IB_QPT_GSI: + case MLX5_IB_QPT_REG_UMR: + qp = kzalloc(sizeof(*qp), GFP_KERNEL); + if (!qp) + return ERR_PTR(-ENOMEM); + + err = create_qp_common(dev, pd, init_attr, udata, qp); + if (err) { + mlx5_ib_dbg(dev, "create_qp_common failed\n"); + kfree(qp); + return ERR_PTR(err); + } + + if (is_qp0(init_attr->qp_type)) + qp->ibqp.qp_num = 0; + else if (is_qp1(init_attr->qp_type)) + qp->ibqp.qp_num = 1; + else + qp->ibqp.qp_num = qp->mqp.qpn; + + mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n", + qp->ibqp.qp_num, qp->mqp.qpn, to_mcq(init_attr->recv_cq)->mcq.cqn, + to_mcq(init_attr->send_cq)->mcq.cqn); + + qp->xrcdn = xrcdn; + + break; + + case IB_QPT_RAW_IPV6: + case IB_QPT_RAW_ETHERTYPE: + case IB_QPT_RAW_PACKET: + case IB_QPT_MAX: + default: + mlx5_ib_dbg(dev, "unsupported qp type %d\n", + init_attr->qp_type); + /* Don't support raw QPs */ + return ERR_PTR(-EINVAL); + } + + return &qp->ibqp; +} + +int mlx5_ib_destroy_qp(struct ib_qp *qp) +{ + struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct mlx5_ib_qp *mqp = to_mqp(qp); + + destroy_qp_common(dev, mqp); + + kfree(mqp); + + return 0; +} + +static __be32 to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_attr *attr, + int attr_mask) +{ + u32 hw_access_flags = 0; + u8 dest_rd_atomic; + u32 access_flags; + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + dest_rd_atomic = attr->max_dest_rd_atomic; + else + dest_rd_atomic = qp->resp_depth; + + if (attr_mask & IB_QP_ACCESS_FLAGS) + access_flags = attr->qp_access_flags; + else + access_flags = qp->atomic_rd_en; + + if (!dest_rd_atomic) + access_flags &= IB_ACCESS_REMOTE_WRITE; + + if (access_flags & IB_ACCESS_REMOTE_READ) + hw_access_flags |= MLX5_QP_BIT_RRE; + if (access_flags & IB_ACCESS_REMOTE_ATOMIC) + hw_access_flags |= (MLX5_QP_BIT_RAE | MLX5_ATOMIC_MODE_CX); + if (access_flags & IB_ACCESS_REMOTE_WRITE) + hw_access_flags |= MLX5_QP_BIT_RWE; + + return cpu_to_be32(hw_access_flags); +} + +enum { + MLX5_PATH_FLAG_FL = 1 << 0, + MLX5_PATH_FLAG_FREE_AR = 1 << 1, + MLX5_PATH_FLAG_COUNTER = 1 << 2, +}; + +static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate) +{ + if (rate == IB_RATE_PORT_CURRENT) { + return 0; + } else if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) { + return -EINVAL; + } else { + while (rate != IB_RATE_2_5_GBPS && + !(1 << (rate + MLX5_STAT_RATE_OFFSET) & + dev->mdev.caps.stat_rate_support)) + --rate; + } + + return rate + MLX5_STAT_RATE_OFFSET; +} + +static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah, + struct mlx5_qp_path *path, u8 port, int attr_mask, + u32 path_flags, const struct ib_qp_attr *attr) +{ + int err; + + path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0; + path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 : 0; + + if (attr_mask & IB_QP_PKEY_INDEX) + path->pkey_index = attr->pkey_index; + + path->grh_mlid = ah->src_path_bits & 0x7f; + path->rlid = cpu_to_be16(ah->dlid); + + if (ah->ah_flags & IB_AH_GRH) { + path->grh_mlid |= 1 << 7; + path->mgid_index = ah->grh.sgid_index; + path->hop_limit = ah->grh.hop_limit; + path->tclass_flowlabel = + cpu_to_be32((ah->grh.traffic_class << 20) | + (ah->grh.flow_label)); + memcpy(path->rgid, ah->grh.dgid.raw, 16); + } + + err = ib_rate_to_mlx5(dev, ah->static_rate); + if (err < 0) + return err; + path->static_rate = err; + path->port = port; + + if (ah->ah_flags & IB_AH_GRH) { + if (ah->grh.sgid_index >= dev->mdev.caps.port[port - 1].gid_table_len) { + pr_err(KERN_ERR "sgid_index (%u) too large. max is %d\n", + ah->grh.sgid_index, dev->mdev.caps.port[port - 1].gid_table_len); + return -EINVAL; + } + + path->grh_mlid |= 1 << 7; + path->mgid_index = ah->grh.sgid_index; + path->hop_limit = ah->grh.hop_limit; + path->tclass_flowlabel = + cpu_to_be32((ah->grh.traffic_class << 20) | + (ah->grh.flow_label)); + memcpy(path->rgid, ah->grh.dgid.raw, 16); + } + + if (attr_mask & IB_QP_TIMEOUT) + path->ackto_lt = attr->timeout << 3; + + path->sl = ah->sl & 0xf; + + return 0; +} + +static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_QP_ST_MAX] = { + [MLX5_QP_STATE_INIT] = { + [MLX5_QP_STATE_INIT] = { + [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RRE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_PKEY_INDEX | + MLX5_QP_OPTPAR_PRI_PORT, + [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_PKEY_INDEX | + MLX5_QP_OPTPAR_PRI_PORT, + [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX | + MLX5_QP_OPTPAR_Q_KEY | + MLX5_QP_OPTPAR_PRI_PORT, + }, + [MLX5_QP_STATE_RTR] = { + [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | + MLX5_QP_OPTPAR_RRE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_PKEY_INDEX, + [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_PKEY_INDEX, + [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX | + MLX5_QP_OPTPAR_Q_KEY, + [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_PKEY_INDEX | + MLX5_QP_OPTPAR_Q_KEY, + [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | + MLX5_QP_OPTPAR_RRE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_PKEY_INDEX, + }, + }, + [MLX5_QP_STATE_RTR] = { + [MLX5_QP_STATE_RTS] = { + [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | + MLX5_QP_OPTPAR_RRE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_PM_STATE | + MLX5_QP_OPTPAR_RNR_TIMEOUT, + [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_PM_STATE, + [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY, + }, + }, + [MLX5_QP_STATE_RTS] = { + [MLX5_QP_STATE_RTS] = { + [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RRE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_RNR_TIMEOUT | + MLX5_QP_OPTPAR_PM_STATE | + MLX5_QP_OPTPAR_ALT_ADDR_PATH, + [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_PM_STATE | + MLX5_QP_OPTPAR_ALT_ADDR_PATH, + [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY | + MLX5_QP_OPTPAR_SRQN | + MLX5_QP_OPTPAR_CQN_RCV, + }, + }, + [MLX5_QP_STATE_SQER] = { + [MLX5_QP_STATE_RTS] = { + [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY, + [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_Q_KEY, + [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE, + [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RNR_TIMEOUT | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RRE, + }, + }, +}; + +static int ib_nr_to_mlx5_nr(int ib_mask) +{ + switch (ib_mask) { + case IB_QP_STATE: + return 0; + case IB_QP_CUR_STATE: + return 0; + case IB_QP_EN_SQD_ASYNC_NOTIFY: + return 0; + case IB_QP_ACCESS_FLAGS: + return MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_RRE | + MLX5_QP_OPTPAR_RAE; + case IB_QP_PKEY_INDEX: + return MLX5_QP_OPTPAR_PKEY_INDEX; + case IB_QP_PORT: + return MLX5_QP_OPTPAR_PRI_PORT; + case IB_QP_QKEY: + return MLX5_QP_OPTPAR_Q_KEY; + case IB_QP_AV: + return MLX5_QP_OPTPAR_PRIMARY_ADDR_PATH | + MLX5_QP_OPTPAR_PRI_PORT; + case IB_QP_PATH_MTU: + return 0; + case IB_QP_TIMEOUT: + return MLX5_QP_OPTPAR_ACK_TIMEOUT; + case IB_QP_RETRY_CNT: + return MLX5_QP_OPTPAR_RETRY_COUNT; + case IB_QP_RNR_RETRY: + return MLX5_QP_OPTPAR_RNR_RETRY; + case IB_QP_RQ_PSN: + return 0; + case IB_QP_MAX_QP_RD_ATOMIC: + return MLX5_QP_OPTPAR_SRA_MAX; + case IB_QP_ALT_PATH: + return MLX5_QP_OPTPAR_ALT_ADDR_PATH; + case IB_QP_MIN_RNR_TIMER: + return MLX5_QP_OPTPAR_RNR_TIMEOUT; + case IB_QP_SQ_PSN: + return 0; + case IB_QP_MAX_DEST_RD_ATOMIC: + return MLX5_QP_OPTPAR_RRA_MAX | MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_RRE | MLX5_QP_OPTPAR_RAE; + case IB_QP_PATH_MIG_STATE: + return MLX5_QP_OPTPAR_PM_STATE; + case IB_QP_CAP: + return 0; + case IB_QP_DEST_QPN: + return 0; + } + return 0; +} + +static int ib_mask_to_mlx5_opt(int ib_mask) +{ + int result = 0; + int i; + + for (i = 0; i < 8 * sizeof(int); i++) { + if ((1 << i) & ib_mask) + result |= ib_nr_to_mlx5_nr(1 << i); + } + + return result; +} + +static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, + const struct ib_qp_attr *attr, int attr_mask, + enum ib_qp_state cur_state, enum ib_qp_state new_state) +{ + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + struct mlx5_ib_qp *qp = to_mqp(ibqp); + struct mlx5_ib_cq *send_cq, *recv_cq; + struct mlx5_qp_context *context; + struct mlx5_modify_qp_mbox_in *in; + struct mlx5_ib_pd *pd; + enum mlx5_qp_state mlx5_cur, mlx5_new; + enum mlx5_qp_optpar optpar; + int sqd_event; + int mlx5_st; + int err; + + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) + return -ENOMEM; + + context = &in->ctx; + err = to_mlx5_st(ibqp->qp_type); + if (err < 0) + goto out; + + context->flags = cpu_to_be32(err << 16); + + if (!(attr_mask & IB_QP_PATH_MIG_STATE)) { + context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11); + } else { + switch (attr->path_mig_state) { + case IB_MIG_MIGRATED: + context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11); + break; + case IB_MIG_REARM: + context->flags |= cpu_to_be32(MLX5_QP_PM_REARM << 11); + break; + case IB_MIG_ARMED: + context->flags |= cpu_to_be32(MLX5_QP_PM_ARMED << 11); + break; + } + } + + if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) { + context->mtu_msgmax = (IB_MTU_256 << 5) | 8; + } else if (ibqp->qp_type == IB_QPT_UD || + ibqp->qp_type == MLX5_IB_QPT_REG_UMR) { + context->mtu_msgmax = (IB_MTU_4096 << 5) | 12; + } else if (attr_mask & IB_QP_PATH_MTU) { + if (attr->path_mtu < IB_MTU_256 || + attr->path_mtu > IB_MTU_4096) { + mlx5_ib_warn(dev, "invalid mtu %d\n", attr->path_mtu); + err = -EINVAL; + goto out; + } + context->mtu_msgmax = (attr->path_mtu << 5) | dev->mdev.caps.log_max_msg; + } + + if (attr_mask & IB_QP_DEST_QPN) + context->log_pg_sz_remote_qpn = cpu_to_be32(attr->dest_qp_num); + + if (attr_mask & IB_QP_PKEY_INDEX) + context->pri_path.pkey_index = attr->pkey_index; + + /* todo implement counter_index functionality */ + + if (is_sqp(ibqp->qp_type)) + context->pri_path.port = qp->port; + + if (attr_mask & IB_QP_PORT) + context->pri_path.port = attr->port_num; + + if (attr_mask & IB_QP_AV) { + err = mlx5_set_path(dev, &attr->ah_attr, &context->pri_path, + attr_mask & IB_QP_PORT ? attr->port_num : qp->port, + attr_mask, 0, attr); + if (err) + goto out; + } + + if (attr_mask & IB_QP_TIMEOUT) + context->pri_path.ackto_lt |= attr->timeout << 3; + + if (attr_mask & IB_QP_ALT_PATH) { + err = mlx5_set_path(dev, &attr->alt_ah_attr, &context->alt_path, + attr->alt_port_num, attr_mask, 0, attr); + if (err) + goto out; + } + + pd = get_pd(qp); + get_cqs(qp, &send_cq, &recv_cq); + + context->flags_pd = cpu_to_be32(pd ? pd->pdn : to_mpd(dev->devr.p0)->pdn); + context->cqn_send = send_cq ? cpu_to_be32(send_cq->mcq.cqn) : 0; + context->cqn_recv = recv_cq ? cpu_to_be32(recv_cq->mcq.cqn) : 0; + context->params1 = cpu_to_be32(MLX5_IB_ACK_REQ_FREQ << 28); + + if (attr_mask & IB_QP_RNR_RETRY) + context->params1 |= cpu_to_be32(attr->rnr_retry << 13); + + if (attr_mask & IB_QP_RETRY_CNT) + context->params1 |= cpu_to_be32(attr->retry_cnt << 16); + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { + if (attr->max_rd_atomic) + context->params1 |= + cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21); + } + + if (attr_mask & IB_QP_SQ_PSN) + context->next_send_psn = cpu_to_be32(attr->sq_psn); + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { + if (attr->max_dest_rd_atomic) + context->params2 |= + cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21); + } + + if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) + context->params2 |= to_mlx5_access_flags(qp, attr, attr_mask); + + if (attr_mask & IB_QP_MIN_RNR_TIMER) + context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24); + + if (attr_mask & IB_QP_RQ_PSN) + context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn); + + if (attr_mask & IB_QP_QKEY) + context->qkey = cpu_to_be32(attr->qkey); + + if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) + context->db_rec_addr = cpu_to_be64(qp->db.dma); + + if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD && + attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify) + sqd_event = 1; + else + sqd_event = 0; + + if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) + context->sq_crq_size |= cpu_to_be16(1 << 4); + + + mlx5_cur = to_mlx5_state(cur_state); + mlx5_new = to_mlx5_state(new_state); + mlx5_st = to_mlx5_st(ibqp->qp_type); + if (mlx5_st < 0) + goto out; + + optpar = ib_mask_to_mlx5_opt(attr_mask); + optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st]; + in->optparam = cpu_to_be32(optpar); + err = mlx5_core_qp_modify(&dev->mdev, to_mlx5_state(cur_state), + to_mlx5_state(new_state), in, sqd_event, + &qp->mqp); + if (err) + goto out; + + qp->state = new_state; + + if (attr_mask & IB_QP_ACCESS_FLAGS) + qp->atomic_rd_en = attr->qp_access_flags; + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + qp->resp_depth = attr->max_dest_rd_atomic; + if (attr_mask & IB_QP_PORT) + qp->port = attr->port_num; + if (attr_mask & IB_QP_ALT_PATH) + qp->alt_port = attr->alt_port_num; + + /* + * If we moved a kernel QP to RESET, clean up all old CQ + * entries and reinitialize the QP. + */ + if (new_state == IB_QPS_RESET && !ibqp->uobject) { + mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn, + ibqp->srq ? to_msrq(ibqp->srq) : NULL); + if (send_cq != recv_cq) + mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL); + + qp->rq.head = 0; + qp->rq.tail = 0; + qp->sq.head = 0; + qp->sq.tail = 0; + qp->sq.cur_post = 0; + qp->sq.last_poll = 0; + qp->db.db[MLX5_RCV_DBR] = 0; + qp->db.db[MLX5_SND_DBR] = 0; + } + +out: + kfree(in); + return err; +} + +int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + struct mlx5_ib_qp *qp = to_mqp(ibqp); + enum ib_qp_state cur_state, new_state; + int err = -EINVAL; + int port; + + mutex_lock(&qp->mutex); + + cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; + new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; + + if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR && + !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask, + IB_LINK_LAYER_UNSPECIFIED)) + goto out; + + if ((attr_mask & IB_QP_PORT) && + (attr->port_num == 0 || attr->port_num > dev->mdev.caps.num_ports)) + goto out; + + if (attr_mask & IB_QP_PKEY_INDEX) { + port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; + if (attr->pkey_index >= dev->mdev.caps.port[port - 1].pkey_table_len) + goto out; + } + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && + attr->max_rd_atomic > dev->mdev.caps.max_ra_res_qp) + goto out; + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && + attr->max_dest_rd_atomic > dev->mdev.caps.max_ra_req_qp) + goto out; + + if (cur_state == new_state && cur_state == IB_QPS_RESET) { + err = 0; + goto out; + } + + err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state); + +out: + mutex_unlock(&qp->mutex); + return err; +} + +static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq) +{ + struct mlx5_ib_cq *cq; + unsigned cur; + + cur = wq->head - wq->tail; + if (likely(cur + nreq < wq->max_post)) + return 0; + + cq = to_mcq(ib_cq); + spin_lock(&cq->lock); + cur = wq->head - wq->tail; + spin_unlock(&cq->lock); + + return cur + nreq >= wq->max_post; +} + +static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg, + u64 remote_addr, u32 rkey) +{ + rseg->raddr = cpu_to_be64(remote_addr); + rseg->rkey = cpu_to_be32(rkey); + rseg->reserved = 0; +} + +static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg, + struct ib_send_wr *wr) +{ + memcpy(&dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof(struct mlx5_av)); + dseg->av.dqp_dct = cpu_to_be32(wr->wr.ud.remote_qpn | MLX5_EXTENDED_UD_AV); + dseg->av.key.qkey.qkey = cpu_to_be32(wr->wr.ud.remote_qkey); +} + +static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg) +{ + dseg->byte_count = cpu_to_be32(sg->length); + dseg->lkey = cpu_to_be32(sg->lkey); + dseg->addr = cpu_to_be64(sg->addr); +} + +static __be16 get_klm_octo(int npages) +{ + return cpu_to_be16(ALIGN(npages, 8) / 2); +} + +static __be64 frwr_mkey_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_LEN | + MLX5_MKEY_MASK_PAGE_SIZE | + MLX5_MKEY_MASK_START_ADDR | + MLX5_MKEY_MASK_EN_RINVAL | + MLX5_MKEY_MASK_KEY | + MLX5_MKEY_MASK_LR | + MLX5_MKEY_MASK_LW | + MLX5_MKEY_MASK_RR | + MLX5_MKEY_MASK_RW | + MLX5_MKEY_MASK_A | + MLX5_MKEY_MASK_SMALL_FENCE | + MLX5_MKEY_MASK_FREE; + + return cpu_to_be64(result); +} + +static __be64 sig_mkey_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_LEN | + MLX5_MKEY_MASK_PAGE_SIZE | + MLX5_MKEY_MASK_START_ADDR | + MLX5_MKEY_MASK_EN_SIGERR | + MLX5_MKEY_MASK_EN_RINVAL | + MLX5_MKEY_MASK_KEY | + MLX5_MKEY_MASK_LR | + MLX5_MKEY_MASK_LW | + MLX5_MKEY_MASK_RR | + MLX5_MKEY_MASK_RW | + MLX5_MKEY_MASK_SMALL_FENCE | + MLX5_MKEY_MASK_FREE | + MLX5_MKEY_MASK_BSF_EN; + + return cpu_to_be64(result); +} + +static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, + struct ib_send_wr *wr, int li) +{ + memset(umr, 0, sizeof(*umr)); + + if (li) { + umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); + umr->flags = 1 << 7; + return; + } + + umr->flags = (1 << 5); /* fail if not free */ + umr->klm_octowords = get_klm_octo(wr->wr.fast_reg.page_list_len); + umr->mkey_mask = frwr_mkey_mask(); +} + +static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, + struct ib_send_wr *wr) +{ + struct umr_wr *umrwr = (struct umr_wr *)&wr->wr.fast_reg; + u64 mask; + + memset(umr, 0, sizeof(*umr)); + + if (!(wr->send_flags & MLX5_IB_SEND_UMR_UNREG)) { + umr->flags = 1 << 5; /* fail if not free */ + umr->klm_octowords = get_klm_octo(umrwr->npages); + mask = MLX5_MKEY_MASK_LEN | + MLX5_MKEY_MASK_PAGE_SIZE | + MLX5_MKEY_MASK_START_ADDR | + MLX5_MKEY_MASK_PD | + MLX5_MKEY_MASK_LR | + MLX5_MKEY_MASK_LW | + MLX5_MKEY_MASK_KEY | + MLX5_MKEY_MASK_RR | + MLX5_MKEY_MASK_RW | + MLX5_MKEY_MASK_A | + MLX5_MKEY_MASK_FREE; + umr->mkey_mask = cpu_to_be64(mask); + } else { + umr->flags = 2 << 5; /* fail if free */ + mask = MLX5_MKEY_MASK_FREE; + umr->mkey_mask = cpu_to_be64(mask); + } + + if (!wr->num_sge) + umr->flags |= (1 << 7); /* inline */ +} + +static u8 get_umr_flags(int acc) +{ + return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) | + (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) | + (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) | + (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) | + MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN; +} + +static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr, + int li, int *writ) +{ + memset(seg, 0, sizeof(*seg)); + if (li) { + seg->status = 1 << 6; + return; + } + + seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags) | + MLX5_ACCESS_MODE_MTT; + *writ = seg->flags & (MLX5_PERM_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE); + seg->qpn_mkey7_0 = cpu_to_be32((wr->wr.fast_reg.rkey & 0xff) | 0xffffff00); + seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL); + seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start); + seg->len = cpu_to_be64(wr->wr.fast_reg.length); + seg->xlt_oct_size = cpu_to_be32((wr->wr.fast_reg.page_list_len + 1) / 2); + seg->log2_page_size = wr->wr.fast_reg.page_shift; +} + +static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr) +{ + memset(seg, 0, sizeof(*seg)); + if (wr->send_flags & MLX5_IB_SEND_UMR_UNREG) { + seg->status = 1 << 6; + return; + } + + seg->flags = convert_access(wr->wr.fast_reg.access_flags); + seg->flags_pd = cpu_to_be32(to_mpd((struct ib_pd *)wr->wr.fast_reg.page_list)->pdn); + seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start); + seg->len = cpu_to_be64(wr->wr.fast_reg.length); + seg->log2_page_size = wr->wr.fast_reg.page_shift; + seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 | + mlx5_mkey_variant(wr->wr.fast_reg.rkey)); +} + +static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg, + struct ib_send_wr *wr, + struct mlx5_core_dev *mdev, + struct mlx5_ib_pd *pd, + int writ) +{ + struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list); + u64 *page_list = wr->wr.fast_reg.page_list->page_list; + u64 perm = MLX5_EN_RD | (writ ? MLX5_EN_WR : 0); + int i; + + for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) + mfrpl->mapped_page_list[i] = cpu_to_be64(page_list[i] | perm); + dseg->addr = cpu_to_be64(mfrpl->map); + dseg->byte_count = cpu_to_be32(ALIGN(sizeof(u64) * wr->wr.fast_reg.page_list_len, 64)); + dseg->lkey = cpu_to_be32(pd->pa_lkey); +} + +static __be32 send_ieth(struct ib_send_wr *wr) +{ + switch (wr->opcode) { + case IB_WR_SEND_WITH_IMM: + case IB_WR_RDMA_WRITE_WITH_IMM: + return wr->ex.imm_data; + + case IB_WR_SEND_WITH_INV: + return cpu_to_be32(wr->ex.invalidate_rkey); + + default: + return 0; + } +} + +static u8 calc_sig(void *wqe, int size) +{ + u8 *p = wqe; + u8 res = 0; + int i; + + for (i = 0; i < size; i++) + res ^= p[i]; + + return ~res; +} + +static u8 wq_sig(void *wqe) +{ + return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4); +} + +static int set_data_inl_seg(struct mlx5_ib_qp *qp, struct ib_send_wr *wr, + void *wqe, int *sz) +{ + struct mlx5_wqe_inline_seg *seg; + void *qend = qp->sq.qend; + void *addr; + int inl = 0; + int copy; + int len; + int i; + + seg = wqe; + wqe += sizeof(*seg); + for (i = 0; i < wr->num_sge; i++) { + addr = (void *)(unsigned long)(wr->sg_list[i].addr); + len = wr->sg_list[i].length; + inl += len; + + if (unlikely(inl > qp->max_inline_data)) + return -ENOMEM; + + if (unlikely(wqe + len > qend)) { + copy = qend - wqe; + memcpy(wqe, addr, copy); + addr += copy; + len -= copy; + wqe = mlx5_get_send_wqe(qp, 0); + } + memcpy(wqe, addr, len); + wqe += len; + } + + seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG); + + *sz = ALIGN(inl + sizeof(seg->byte_count), 16) / 16; + + return 0; +} + +static u16 prot_field_size(enum ib_signature_type type) +{ + switch (type) { + case IB_SIG_TYPE_T10_DIF: + return MLX5_DIF_SIZE; + default: + return 0; + } +} + +static u8 bs_selector(int block_size) +{ + switch (block_size) { + case 512: return 0x1; + case 520: return 0x2; + case 4096: return 0x3; + case 4160: return 0x4; + case 1073741824: return 0x5; + default: return 0; + } +} + +static int format_selector(struct ib_sig_attrs *attr, + struct ib_sig_domain *domain, + int *selector) +{ + +#define FORMAT_DIF_NONE 0 +#define FORMAT_DIF_CRC_INC 8 +#define FORMAT_DIF_CRC_NO_INC 12 +#define FORMAT_DIF_CSUM_INC 13 +#define FORMAT_DIF_CSUM_NO_INC 14 + + switch (domain->sig.dif.type) { + case IB_T10DIF_NONE: + /* No DIF */ + *selector = FORMAT_DIF_NONE; + break; + case IB_T10DIF_TYPE1: /* Fall through */ + case IB_T10DIF_TYPE2: + switch (domain->sig.dif.bg_type) { + case IB_T10DIF_CRC: + *selector = FORMAT_DIF_CRC_INC; + break; + case IB_T10DIF_CSUM: + *selector = FORMAT_DIF_CSUM_INC; + break; + default: + return 1; + } + break; + case IB_T10DIF_TYPE3: + switch (domain->sig.dif.bg_type) { + case IB_T10DIF_CRC: + *selector = domain->sig.dif.type3_inc_reftag ? + FORMAT_DIF_CRC_INC : + FORMAT_DIF_CRC_NO_INC; + break; + case IB_T10DIF_CSUM: + *selector = domain->sig.dif.type3_inc_reftag ? + FORMAT_DIF_CSUM_INC : + FORMAT_DIF_CSUM_NO_INC; + break; + default: + return 1; + } + break; + default: + return 1; + } + + return 0; +} + +static int mlx5_set_bsf(struct ib_mr *sig_mr, + struct ib_sig_attrs *sig_attrs, + struct mlx5_bsf *bsf, u32 data_size) +{ + struct mlx5_core_sig_ctx *msig = to_mmr(sig_mr)->sig; + struct mlx5_bsf_basic *basic = &bsf->basic; + struct ib_sig_domain *mem = &sig_attrs->mem; + struct ib_sig_domain *wire = &sig_attrs->wire; + int ret, selector; + + memset(bsf, 0, sizeof(*bsf)); + switch (sig_attrs->mem.sig_type) { + case IB_SIG_TYPE_T10_DIF: + if (sig_attrs->wire.sig_type != IB_SIG_TYPE_T10_DIF) + return -EINVAL; + + /* Input domain check byte mask */ + basic->check_byte_mask = sig_attrs->check_mask; + if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval && + mem->sig.dif.type == wire->sig.dif.type) { + /* Same block structure */ + basic->bsf_size_sbs = 1 << 4; + if (mem->sig.dif.bg_type == wire->sig.dif.bg_type) + basic->wire.copy_byte_mask |= 0xc0; + if (mem->sig.dif.app_tag == wire->sig.dif.app_tag) + basic->wire.copy_byte_mask |= 0x30; + if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag) + basic->wire.copy_byte_mask |= 0x0f; + } else + basic->wire.bs_selector = bs_selector(wire->sig.dif.pi_interval); + + basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval); + basic->raw_data_size = cpu_to_be32(data_size); + + ret = format_selector(sig_attrs, mem, &selector); + if (ret) + return -EINVAL; + basic->m_bfs_psv = cpu_to_be32(selector << 24 | + msig->psv_memory.psv_idx); + + ret = format_selector(sig_attrs, wire, &selector); + if (ret) + return -EINVAL; + basic->w_bfs_psv = cpu_to_be32(selector << 24 | + msig->psv_wire.psv_idx); + break; + + default: + return -EINVAL; + } + + return 0; +} + +static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, + void **seg, int *size) +{ + struct ib_sig_attrs *sig_attrs = wr->wr.sig_handover.sig_attrs; + struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr; + struct mlx5_bsf *bsf; + u32 data_len = wr->sg_list->length; + u32 data_key = wr->sg_list->lkey; + u64 data_va = wr->sg_list->addr; + int ret; + int wqe_size; + + if (!wr->wr.sig_handover.prot || + (data_key == wr->wr.sig_handover.prot->lkey && + data_va == wr->wr.sig_handover.prot->addr && + data_len == wr->wr.sig_handover.prot->length)) { + /** + * Source domain doesn't contain signature information + * or data and protection are interleaved in memory. + * So need construct: + * ------------------ + * | data_klm | + * ------------------ + * | BSF | + * ------------------ + **/ + struct mlx5_klm *data_klm = *seg; + + data_klm->bcount = cpu_to_be32(data_len); + data_klm->key = cpu_to_be32(data_key); + data_klm->va = cpu_to_be64(data_va); + wqe_size = ALIGN(sizeof(*data_klm), 64); + } else { + /** + * Source domain contains signature information + * So need construct a strided block format: + * --------------------------- + * | stride_block_ctrl | + * --------------------------- + * | data_klm | + * --------------------------- + * | prot_klm | + * --------------------------- + * | BSF | + * --------------------------- + **/ + struct mlx5_stride_block_ctrl_seg *sblock_ctrl; + struct mlx5_stride_block_entry *data_sentry; + struct mlx5_stride_block_entry *prot_sentry; + u32 prot_key = wr->wr.sig_handover.prot->lkey; + u64 prot_va = wr->wr.sig_handover.prot->addr; + u16 block_size = sig_attrs->mem.sig.dif.pi_interval; + int prot_size; + + sblock_ctrl = *seg; + data_sentry = (void *)sblock_ctrl + sizeof(*sblock_ctrl); + prot_sentry = (void *)data_sentry + sizeof(*data_sentry); + + prot_size = prot_field_size(sig_attrs->mem.sig_type); + if (!prot_size) { + pr_err("Bad block size given: %u\n", block_size); + return -EINVAL; + } + sblock_ctrl->bcount_per_cycle = cpu_to_be32(block_size + + prot_size); + sblock_ctrl->op = cpu_to_be32(MLX5_STRIDE_BLOCK_OP); + sblock_ctrl->repeat_count = cpu_to_be32(data_len / block_size); + sblock_ctrl->num_entries = cpu_to_be16(2); + + data_sentry->bcount = cpu_to_be16(block_size); + data_sentry->key = cpu_to_be32(data_key); + data_sentry->va = cpu_to_be64(data_va); + data_sentry->stride = cpu_to_be16(block_size); + + prot_sentry->bcount = cpu_to_be16(prot_size); + prot_sentry->key = cpu_to_be32(prot_key); + prot_sentry->va = cpu_to_be64(prot_va); + prot_sentry->stride = cpu_to_be16(prot_size); + + wqe_size = ALIGN(sizeof(*sblock_ctrl) + sizeof(*data_sentry) + + sizeof(*prot_sentry), 64); + } + + *seg += wqe_size; + *size += wqe_size / 16; + if (unlikely((*seg == qp->sq.qend))) + *seg = mlx5_get_send_wqe(qp, 0); + + bsf = *seg; + ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len); + if (ret) + return -EINVAL; + + *seg += sizeof(*bsf); + *size += sizeof(*bsf) / 16; + if (unlikely((*seg == qp->sq.qend))) + *seg = mlx5_get_send_wqe(qp, 0); + + return 0; +} + +static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, + struct ib_send_wr *wr, u32 nelements, + u32 length, u32 pdn) +{ + struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr; + u32 sig_key = sig_mr->rkey; + u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1; + + memset(seg, 0, sizeof(*seg)); + + seg->flags = get_umr_flags(wr->wr.sig_handover.access_flags) | + MLX5_ACCESS_MODE_KLM; + seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00); + seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 | + MLX5_MKEY_BSF_EN | pdn); + seg->len = cpu_to_be64(length); + seg->xlt_oct_size = cpu_to_be32(be16_to_cpu(get_klm_octo(nelements))); + seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE); +} + +static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, + struct ib_send_wr *wr, u32 nelements) +{ + memset(umr, 0, sizeof(*umr)); + + umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE; + umr->klm_octowords = get_klm_octo(nelements); + umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE); + umr->mkey_mask = sig_mkey_mask(); +} + + +static int set_sig_umr_wr(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, + void **seg, int *size) +{ + struct mlx5_ib_mr *sig_mr = to_mmr(wr->wr.sig_handover.sig_mr); + u32 pdn = get_pd(qp)->pdn; + u32 klm_oct_size; + int region_len, ret; + + if (unlikely(wr->num_sge != 1) || + unlikely(wr->wr.sig_handover.access_flags & + IB_ACCESS_REMOTE_ATOMIC) || + unlikely(!sig_mr->sig) || unlikely(!qp->signature_en) || + unlikely(!sig_mr->sig->sig_status_checked)) + return -EINVAL; + + /* length of the protected region, data + protection */ + region_len = wr->sg_list->length; + if (wr->wr.sig_handover.prot && + (wr->wr.sig_handover.prot->lkey != wr->sg_list->lkey || + wr->wr.sig_handover.prot->addr != wr->sg_list->addr || + wr->wr.sig_handover.prot->length != wr->sg_list->length)) + region_len += wr->wr.sig_handover.prot->length; + + /** + * KLM octoword size - if protection was provided + * then we use strided block format (3 octowords), + * else we use single KLM (1 octoword) + **/ + klm_oct_size = wr->wr.sig_handover.prot ? 3 : 1; + + set_sig_umr_segment(*seg, wr, klm_oct_size); + *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); + *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; + if (unlikely((*seg == qp->sq.qend))) + *seg = mlx5_get_send_wqe(qp, 0); + + set_sig_mkey_segment(*seg, wr, klm_oct_size, region_len, pdn); + *seg += sizeof(struct mlx5_mkey_seg); + *size += sizeof(struct mlx5_mkey_seg) / 16; + if (unlikely((*seg == qp->sq.qend))) + *seg = mlx5_get_send_wqe(qp, 0); + + ret = set_sig_data_segment(wr, qp, seg, size); + if (ret) + return ret; + + sig_mr->sig->sig_status_checked = false; + return 0; +} + +static int set_psv_wr(struct ib_sig_domain *domain, + u32 psv_idx, void **seg, int *size) +{ + struct mlx5_seg_set_psv *psv_seg = *seg; + + memset(psv_seg, 0, sizeof(*psv_seg)); + psv_seg->psv_num = cpu_to_be32(psv_idx); + switch (domain->sig_type) { + case IB_SIG_TYPE_T10_DIF: + psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 | + domain->sig.dif.app_tag); + psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag); + + *seg += sizeof(*psv_seg); + *size += sizeof(*psv_seg) / 16; + break; + + default: + pr_err("Bad signature type given.\n"); + return 1; + } + + return 0; +} + +static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size, + struct mlx5_core_dev *mdev, struct mlx5_ib_pd *pd, struct mlx5_ib_qp *qp) +{ + int writ = 0; + int li; + + li = wr->opcode == IB_WR_LOCAL_INV ? 1 : 0; + if (unlikely(wr->send_flags & IB_SEND_INLINE)) + return -EINVAL; + + set_frwr_umr_segment(*seg, wr, li); + *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); + *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; + if (unlikely((*seg == qp->sq.qend))) + *seg = mlx5_get_send_wqe(qp, 0); + set_mkey_segment(*seg, wr, li, &writ); + *seg += sizeof(struct mlx5_mkey_seg); + *size += sizeof(struct mlx5_mkey_seg) / 16; + if (unlikely((*seg == qp->sq.qend))) + *seg = mlx5_get_send_wqe(qp, 0); + if (!li) { + if (unlikely(wr->wr.fast_reg.page_list_len > + wr->wr.fast_reg.page_list->max_page_list_len)) + return -ENOMEM; + + set_frwr_pages(*seg, wr, mdev, pd, writ); + *seg += sizeof(struct mlx5_wqe_data_seg); + *size += (sizeof(struct mlx5_wqe_data_seg) / 16); + } + return 0; +} + +static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16) +{ + __be32 *p = NULL; + int tidx = idx; + int i, j; + + pr_debug("dump wqe at %p\n", mlx5_get_send_wqe(qp, tidx)); + for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) { + if ((i & 0xf) == 0) { + void *buf = mlx5_get_send_wqe(qp, tidx); + tidx = (tidx + 1) & (qp->sq.wqe_cnt - 1); + p = buf; + j = 0; + } + pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]), + be32_to_cpu(p[j + 1]), be32_to_cpu(p[j + 2]), + be32_to_cpu(p[j + 3])); + } +} + +static void mlx5_bf_copy(u64 __iomem *dst, u64 *src, + unsigned bytecnt, struct mlx5_ib_qp *qp) +{ + while (bytecnt > 0) { + __iowrite64_copy(dst++, src++, 8); + __iowrite64_copy(dst++, src++, 8); + __iowrite64_copy(dst++, src++, 8); + __iowrite64_copy(dst++, src++, 8); + __iowrite64_copy(dst++, src++, 8); + __iowrite64_copy(dst++, src++, 8); + __iowrite64_copy(dst++, src++, 8); + __iowrite64_copy(dst++, src++, 8); + bytecnt -= 64; + if (unlikely(src == qp->sq.qend)) + src = mlx5_get_send_wqe(qp, 0); + } +} + +static u8 get_fence(u8 fence, struct ib_send_wr *wr) +{ + if (unlikely(wr->opcode == IB_WR_LOCAL_INV && + wr->send_flags & IB_SEND_FENCE)) + return MLX5_FENCE_MODE_STRONG_ORDERING; + + if (unlikely(fence)) { + if (wr->send_flags & IB_SEND_FENCE) + return MLX5_FENCE_MODE_SMALL_AND_FENCE; + else + return fence; + + } else { + return 0; + } +} + +static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, + struct mlx5_wqe_ctrl_seg **ctrl, + struct ib_send_wr *wr, int *idx, + int *size, int nreq) +{ + int err = 0; + + if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) { + err = -ENOMEM; + return err; + } + + *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); + *seg = mlx5_get_send_wqe(qp, *idx); + *ctrl = *seg; + *(uint32_t *)(*seg + 8) = 0; + (*ctrl)->imm = send_ieth(wr); + (*ctrl)->fm_ce_se = qp->sq_signal_bits | + (wr->send_flags & IB_SEND_SIGNALED ? + MLX5_WQE_CTRL_CQ_UPDATE : 0) | + (wr->send_flags & IB_SEND_SOLICITED ? + MLX5_WQE_CTRL_SOLICITED : 0); + + *seg += sizeof(**ctrl); + *size = sizeof(**ctrl) / 16; + + return err; +} + +static void finish_wqe(struct mlx5_ib_qp *qp, + struct mlx5_wqe_ctrl_seg *ctrl, + u8 size, unsigned idx, u64 wr_id, + int nreq, u8 fence, u8 next_fence, + u32 mlx5_opcode) +{ + u8 opmod = 0; + + ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | + mlx5_opcode | ((u32)opmod << 24)); + ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8)); + ctrl->fm_ce_se |= fence; + qp->fm_cache = next_fence; + if (unlikely(qp->wq_sig)) + ctrl->signature = wq_sig(ctrl); + + qp->sq.wrid[idx] = wr_id; + qp->sq.w_list[idx].opcode = mlx5_opcode; + qp->sq.wqe_head[idx] = qp->sq.head + nreq; + qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB); + qp->sq.w_list[idx].next = qp->sq.cur_post; +} + + +int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */ + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + struct mlx5_core_dev *mdev = &dev->mdev; + struct mlx5_ib_qp *qp = to_mqp(ibqp); + struct mlx5_ib_mr *mr; + struct mlx5_wqe_data_seg *dpseg; + struct mlx5_wqe_xrc_seg *xrc; + struct mlx5_bf *bf = qp->bf; + int uninitialized_var(size); + void *qend = qp->sq.qend; + unsigned long flags; + unsigned idx; + int err = 0; + int inl = 0; + int num_sge; + void *seg; + int nreq; + int i; + u8 next_fence = 0; + u8 fence; + + spin_lock_irqsave(&qp->sq.lock, flags); + + for (nreq = 0; wr; nreq++, wr = wr->next) { + if (unlikely(wr->opcode >= sizeof(mlx5_ib_opcode) / sizeof(mlx5_ib_opcode[0]))) { + mlx5_ib_warn(dev, "\n"); + err = -EINVAL; + *bad_wr = wr; + goto out; + } + + fence = qp->fm_cache; + num_sge = wr->num_sge; + if (unlikely(num_sge > qp->sq.max_gs)) { + mlx5_ib_warn(dev, "\n"); + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + + err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq); + if (err) { + mlx5_ib_warn(dev, "\n"); + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + + switch (ibqp->qp_type) { + case IB_QPT_XRC_INI: + xrc = seg; + xrc->xrc_srqn = htonl(wr->xrc_remote_srq_num); + seg += sizeof(*xrc); + size += sizeof(*xrc) / 16; + /* fall through */ + case IB_QPT_RC: + switch (wr->opcode) { + case IB_WR_RDMA_READ: + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + set_raddr_seg(seg, wr->wr.rdma.remote_addr, + wr->wr.rdma.rkey); + seg += sizeof(struct mlx5_wqe_raddr_seg); + size += sizeof(struct mlx5_wqe_raddr_seg) / 16; + break; + + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: + mlx5_ib_warn(dev, "Atomic operations are not supported yet\n"); + err = -ENOSYS; + *bad_wr = wr; + goto out; + + case IB_WR_LOCAL_INV: + next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; + qp->sq.wr_data[idx] = IB_WR_LOCAL_INV; + ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey); + err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp); + if (err) { + mlx5_ib_warn(dev, "\n"); + *bad_wr = wr; + goto out; + } + num_sge = 0; + break; + + case IB_WR_FAST_REG_MR: + next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; + qp->sq.wr_data[idx] = IB_WR_FAST_REG_MR; + ctrl->imm = cpu_to_be32(wr->wr.fast_reg.rkey); + err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp); + if (err) { + mlx5_ib_warn(dev, "\n"); + *bad_wr = wr; + goto out; + } + num_sge = 0; + break; + + case IB_WR_REG_SIG_MR: + qp->sq.wr_data[idx] = IB_WR_REG_SIG_MR; + mr = to_mmr(wr->wr.sig_handover.sig_mr); + + ctrl->imm = cpu_to_be32(mr->ibmr.rkey); + err = set_sig_umr_wr(wr, qp, &seg, &size); + if (err) { + mlx5_ib_warn(dev, "\n"); + *bad_wr = wr; + goto out; + } + + finish_wqe(qp, ctrl, size, idx, wr->wr_id, + nreq, get_fence(fence, wr), + next_fence, MLX5_OPCODE_UMR); + /* + * SET_PSV WQEs are not signaled and solicited + * on error + */ + wr->send_flags &= ~IB_SEND_SIGNALED; + wr->send_flags |= IB_SEND_SOLICITED; + err = begin_wqe(qp, &seg, &ctrl, wr, + &idx, &size, nreq); + if (err) { + mlx5_ib_warn(dev, "\n"); + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + + err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->mem, + mr->sig->psv_memory.psv_idx, &seg, + &size); + if (err) { + mlx5_ib_warn(dev, "\n"); + *bad_wr = wr; + goto out; + } + + finish_wqe(qp, ctrl, size, idx, wr->wr_id, + nreq, get_fence(fence, wr), + next_fence, MLX5_OPCODE_SET_PSV); + err = begin_wqe(qp, &seg, &ctrl, wr, + &idx, &size, nreq); + if (err) { + mlx5_ib_warn(dev, "\n"); + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + + next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; + err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->wire, + mr->sig->psv_wire.psv_idx, &seg, + &size); + if (err) { + mlx5_ib_warn(dev, "\n"); + *bad_wr = wr; + goto out; + } + + finish_wqe(qp, ctrl, size, idx, wr->wr_id, + nreq, get_fence(fence, wr), + next_fence, MLX5_OPCODE_SET_PSV); + num_sge = 0; + goto skip_psv; + + default: + break; + } + break; + + case IB_QPT_UC: + switch (wr->opcode) { + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + set_raddr_seg(seg, wr->wr.rdma.remote_addr, + wr->wr.rdma.rkey); + seg += sizeof(struct mlx5_wqe_raddr_seg); + size += sizeof(struct mlx5_wqe_raddr_seg) / 16; + break; + + default: + break; + } + break; + + case IB_QPT_UD: + case IB_QPT_SMI: + case IB_QPT_GSI: + set_datagram_seg(seg, wr); + seg += sizeof(struct mlx5_wqe_datagram_seg); + size += sizeof(struct mlx5_wqe_datagram_seg) / 16; + if (unlikely((seg == qend))) + seg = mlx5_get_send_wqe(qp, 0); + break; + + case MLX5_IB_QPT_REG_UMR: + if (wr->opcode != MLX5_IB_WR_UMR) { + err = -EINVAL; + mlx5_ib_warn(dev, "bad opcode\n"); + goto out; + } + qp->sq.wr_data[idx] = MLX5_IB_WR_UMR; + ctrl->imm = cpu_to_be32(wr->wr.fast_reg.rkey); + set_reg_umr_segment(seg, wr); + seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); + size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; + if (unlikely((seg == qend))) + seg = mlx5_get_send_wqe(qp, 0); + set_reg_mkey_segment(seg, wr); + seg += sizeof(struct mlx5_mkey_seg); + size += sizeof(struct mlx5_mkey_seg) / 16; + if (unlikely((seg == qend))) + seg = mlx5_get_send_wqe(qp, 0); + break; + + default: + break; + } + + if (wr->send_flags & IB_SEND_INLINE && num_sge) { + int uninitialized_var(sz); + + err = set_data_inl_seg(qp, wr, seg, &sz); + if (unlikely(err)) { + mlx5_ib_warn(dev, "\n"); + *bad_wr = wr; + goto out; + } + inl = 1; + size += sz; + } else { + dpseg = seg; + for (i = 0; i < num_sge; i++) { + if (unlikely(dpseg == qend)) { + seg = mlx5_get_send_wqe(qp, 0); + dpseg = seg; + } + if (likely(wr->sg_list[i].length)) { + set_data_ptr_seg(dpseg, wr->sg_list + i); + size += sizeof(struct mlx5_wqe_data_seg) / 16; + dpseg++; + } + } + } + + finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, + get_fence(fence, wr), next_fence, + mlx5_ib_opcode[wr->opcode]); +skip_psv: + if (0) + dump_wqe(qp, idx, size); + } + +out: + if (likely(nreq)) { + qp->sq.head += nreq; + + /* Make sure that descriptors are written before + * updating doorbell record and ringing the doorbell + */ + wmb(); + + qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post); + + /* Make sure doorbell record is visible to the HCA before + * we hit doorbell */ + wmb(); + + if (bf->need_lock) + spin_lock(&bf->lock); + + /* TBD enable WC */ + if (0 && nreq == 1 && bf->uuarn && inl && size > 1 && size <= bf->buf_size / 16) { + mlx5_bf_copy(bf->reg + bf->offset, (u64 *)ctrl, ALIGN(size * 16, 64), qp); + /* wc_wmb(); */ + } else { + mlx5_write64((__be32 *)ctrl, bf->regreg + bf->offset, + MLX5_GET_DOORBELL_LOCK(&bf->lock32)); + /* Make sure doorbells don't leak out of SQ spinlock + * and reach the HCA out of order. + */ + mmiowb(); + } + bf->offset ^= bf->buf_size; + if (bf->need_lock) + spin_unlock(&bf->lock); + } + + spin_unlock_irqrestore(&qp->sq.lock, flags); + + return err; +} + +static void set_sig_seg(struct mlx5_rwqe_sig *sig, int size) +{ + sig->signature = calc_sig(sig, size); +} + +int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct mlx5_ib_qp *qp = to_mqp(ibqp); + struct mlx5_wqe_data_seg *scat; + struct mlx5_rwqe_sig *sig; + unsigned long flags; + int err = 0; + int nreq; + int ind; + int i; + + spin_lock_irqsave(&qp->rq.lock, flags); + + ind = qp->rq.head & (qp->rq.wqe_cnt - 1); + + for (nreq = 0; wr; nreq++, wr = wr->next) { + if (mlx5_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) { + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + + if (unlikely(wr->num_sge > qp->rq.max_gs)) { + err = -EINVAL; + *bad_wr = wr; + goto out; + } + + scat = get_recv_wqe(qp, ind); + if (qp->wq_sig) + scat++; + + for (i = 0; i < wr->num_sge; i++) + set_data_ptr_seg(scat + i, wr->sg_list + i); + + if (i < qp->rq.max_gs) { + scat[i].byte_count = 0; + scat[i].lkey = cpu_to_be32(MLX5_INVALID_LKEY); + scat[i].addr = 0; + } + + if (qp->wq_sig) { + sig = (struct mlx5_rwqe_sig *)scat; + set_sig_seg(sig, (qp->rq.max_gs + 1) << 2); + } + + qp->rq.wrid[ind] = wr->wr_id; + + ind = (ind + 1) & (qp->rq.wqe_cnt - 1); + } + +out: + if (likely(nreq)) { + qp->rq.head += nreq; + + /* Make sure that descriptors are written before + * doorbell record. + */ + wmb(); + + *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff); + } + + spin_unlock_irqrestore(&qp->rq.lock, flags); + + return err; +} + +static inline enum ib_qp_state to_ib_qp_state(enum mlx5_qp_state mlx5_state) +{ + switch (mlx5_state) { + case MLX5_QP_STATE_RST: return IB_QPS_RESET; + case MLX5_QP_STATE_INIT: return IB_QPS_INIT; + case MLX5_QP_STATE_RTR: return IB_QPS_RTR; + case MLX5_QP_STATE_RTS: return IB_QPS_RTS; + case MLX5_QP_STATE_SQ_DRAINING: + case MLX5_QP_STATE_SQD: return IB_QPS_SQD; + case MLX5_QP_STATE_SQER: return IB_QPS_SQE; + case MLX5_QP_STATE_ERR: return IB_QPS_ERR; + default: return -1; + } +} + +static inline enum ib_mig_state to_ib_mig_state(int mlx5_mig_state) +{ + switch (mlx5_mig_state) { + case MLX5_QP_PM_ARMED: return IB_MIG_ARMED; + case MLX5_QP_PM_REARM: return IB_MIG_REARM; + case MLX5_QP_PM_MIGRATED: return IB_MIG_MIGRATED; + default: return -1; + } +} + +static int to_ib_qp_access_flags(int mlx5_flags) +{ + int ib_flags = 0; + + if (mlx5_flags & MLX5_QP_BIT_RRE) + ib_flags |= IB_ACCESS_REMOTE_READ; + if (mlx5_flags & MLX5_QP_BIT_RWE) + ib_flags |= IB_ACCESS_REMOTE_WRITE; + if (mlx5_flags & MLX5_QP_BIT_RAE) + ib_flags |= IB_ACCESS_REMOTE_ATOMIC; + + return ib_flags; +} + +static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr, + struct mlx5_qp_path *path) +{ + struct mlx5_core_dev *dev = &ibdev->mdev; + + memset(ib_ah_attr, 0, sizeof(*ib_ah_attr)); + ib_ah_attr->port_num = path->port; + + if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports) + return; + + ib_ah_attr->sl = path->sl & 0xf; + + ib_ah_attr->dlid = be16_to_cpu(path->rlid); + ib_ah_attr->src_path_bits = path->grh_mlid & 0x7f; + ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0; + ib_ah_attr->ah_flags = (path->grh_mlid & (1 << 7)) ? IB_AH_GRH : 0; + if (ib_ah_attr->ah_flags) { + ib_ah_attr->grh.sgid_index = path->mgid_index; + ib_ah_attr->grh.hop_limit = path->hop_limit; + ib_ah_attr->grh.traffic_class = + (be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff; + ib_ah_attr->grh.flow_label = + be32_to_cpu(path->tclass_flowlabel) & 0xfffff; + memcpy(ib_ah_attr->grh.dgid.raw, + path->rgid, sizeof(ib_ah_attr->grh.dgid.raw)); + } +} + +int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr) +{ + struct mlx5_ib_dev *dev = to_mdev(ibqp->device); + struct mlx5_ib_qp *qp = to_mqp(ibqp); + struct mlx5_query_qp_mbox_out *outb; + struct mlx5_qp_context *context; + int mlx5_state; + int err = 0; + + mutex_lock(&qp->mutex); + outb = kzalloc(sizeof(*outb), GFP_KERNEL); + if (!outb) { + err = -ENOMEM; + goto out; + } + context = &outb->ctx; + err = mlx5_core_qp_query(&dev->mdev, &qp->mqp, outb, sizeof(*outb)); + if (err) + goto out_free; + + mlx5_state = be32_to_cpu(context->flags) >> 28; + + qp->state = to_ib_qp_state(mlx5_state); + qp_attr->qp_state = qp->state; + qp_attr->path_mtu = context->mtu_msgmax >> 5; + qp_attr->path_mig_state = + to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3); + qp_attr->qkey = be32_to_cpu(context->qkey); + qp_attr->rq_psn = be32_to_cpu(context->rnr_nextrecvpsn) & 0xffffff; + qp_attr->sq_psn = be32_to_cpu(context->next_send_psn) & 0xffffff; + qp_attr->dest_qp_num = be32_to_cpu(context->log_pg_sz_remote_qpn) & 0xffffff; + qp_attr->qp_access_flags = + to_ib_qp_access_flags(be32_to_cpu(context->params2)); + + if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { + to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path); + to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path); + qp_attr->alt_pkey_index = context->alt_path.pkey_index & 0x7f; + qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num; + } + + qp_attr->pkey_index = context->pri_path.pkey_index & 0x7f; + qp_attr->port_num = context->pri_path.port; + + /* qp_attr->en_sqd_async_notify is only applicable in modify qp */ + qp_attr->sq_draining = mlx5_state == MLX5_QP_STATE_SQ_DRAINING; + + qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context->params1) >> 21) & 0x7); + + qp_attr->max_dest_rd_atomic = + 1 << ((be32_to_cpu(context->params2) >> 21) & 0x7); + qp_attr->min_rnr_timer = + (be32_to_cpu(context->rnr_nextrecvpsn) >> 24) & 0x1f; + qp_attr->timeout = context->pri_path.ackto_lt >> 3; + qp_attr->retry_cnt = (be32_to_cpu(context->params1) >> 16) & 0x7; + qp_attr->rnr_retry = (be32_to_cpu(context->params1) >> 13) & 0x7; + qp_attr->alt_timeout = context->alt_path.ackto_lt >> 3; + qp_attr->cur_qp_state = qp_attr->qp_state; + qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt; + qp_attr->cap.max_recv_sge = qp->rq.max_gs; + + if (!ibqp->uobject) { + qp_attr->cap.max_send_wr = qp->sq.wqe_cnt; + qp_attr->cap.max_send_sge = qp->sq.max_gs; + } else { + qp_attr->cap.max_send_wr = 0; + qp_attr->cap.max_send_sge = 0; + } + + /* We don't support inline sends for kernel QPs (yet), and we + * don't know what userspace's value should be. + */ + qp_attr->cap.max_inline_data = 0; + + qp_init_attr->cap = qp_attr->cap; + + qp_init_attr->create_flags = 0; + if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK) + qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK; + + qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ? + IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; + +out_free: + kfree(outb); + +out: + mutex_unlock(&qp->mutex); + return err; +} + +struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_ib_xrcd *xrcd; + int err; + + if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_XRC)) + return ERR_PTR(-ENOSYS); + + xrcd = kmalloc(sizeof(*xrcd), GFP_KERNEL); + if (!xrcd) + return ERR_PTR(-ENOMEM); + + err = mlx5_core_xrcd_alloc(&dev->mdev, &xrcd->xrcdn); + if (err) { + kfree(xrcd); + return ERR_PTR(-ENOMEM); + } + + return &xrcd->ibxrcd; +} + +int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd) +{ + struct mlx5_ib_dev *dev = to_mdev(xrcd->device); + u32 xrcdn = to_mxrcd(xrcd)->xrcdn; + int err; + + err = mlx5_core_xrcd_dealloc(&dev->mdev, xrcdn); + if (err) { + mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn); + return err; + } + + kfree(xrcd); + + return 0; +} diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c new file mode 100644 index 00000000000..384af6dec5e --- /dev/null +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -0,0 +1,485 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/module.h> +#include <linux/mlx5/qp.h> +#include <linux/mlx5/srq.h> +#include <linux/slab.h> +#include <rdma/ib_umem.h> +#include <rdma/ib_user_verbs.h> + +#include "mlx5_ib.h" +#include "user.h" + +/* not supported currently */ +static int srq_signature; + +static void *get_wqe(struct mlx5_ib_srq *srq, int n) +{ + return mlx5_buf_offset(&srq->buf, n << srq->msrq.wqe_shift); +} + +static void mlx5_ib_srq_event(struct mlx5_core_srq *srq, enum mlx5_event type) +{ + struct ib_event event; + struct ib_srq *ibsrq = &to_mibsrq(srq)->ibsrq; + + if (ibsrq->event_handler) { + event.device = ibsrq->device; + event.element.srq = ibsrq; + switch (type) { + case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: + event.event = IB_EVENT_SRQ_LIMIT_REACHED; + break; + case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: + event.event = IB_EVENT_SRQ_ERR; + break; + default: + pr_warn("mlx5_ib: Unexpected event type %d on SRQ %06x\n", + type, srq->srqn); + return; + } + + ibsrq->event_handler(&event, ibsrq->srq_context); + } +} + +static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, + struct mlx5_create_srq_mbox_in **in, + struct ib_udata *udata, int buf_size, int *inlen) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_ib_create_srq ucmd; + size_t ucmdlen; + int err; + int npages; + int page_shift; + int ncont; + u32 offset; + + ucmdlen = + (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) < + sizeof(ucmd)) ? (sizeof(ucmd) - + sizeof(ucmd.reserved)) : sizeof(ucmd); + + if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) { + mlx5_ib_dbg(dev, "failed copy udata\n"); + return -EFAULT; + } + + if (ucmdlen == sizeof(ucmd) && + ucmd.reserved != 0) + return -EINVAL; + + srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE); + + srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size, + 0, 0); + if (IS_ERR(srq->umem)) { + mlx5_ib_dbg(dev, "failed umem get, size %d\n", buf_size); + err = PTR_ERR(srq->umem); + return err; + } + + mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, &npages, + &page_shift, &ncont, NULL); + err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, + &offset); + if (err) { + mlx5_ib_warn(dev, "bad offset\n"); + goto err_umem; + } + + *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont; + *in = mlx5_vzalloc(*inlen); + if (!(*in)) { + err = -ENOMEM; + goto err_umem; + } + + mlx5_ib_populate_pas(dev, srq->umem, page_shift, (*in)->pas, 0); + + err = mlx5_ib_db_map_user(to_mucontext(pd->uobject->context), + ucmd.db_addr, &srq->db); + if (err) { + mlx5_ib_dbg(dev, "map doorbell failed\n"); + goto err_in; + } + + (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT; + (*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26); + + return 0; + +err_in: + mlx5_vfree(*in); + +err_umem: + ib_umem_release(srq->umem); + + return err; +} + +static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, + struct mlx5_create_srq_mbox_in **in, int buf_size, + int *inlen) +{ + int err; + int i; + struct mlx5_wqe_srq_next_seg *next; + int page_shift; + int npages; + + err = mlx5_db_alloc(&dev->mdev, &srq->db); + if (err) { + mlx5_ib_warn(dev, "alloc dbell rec failed\n"); + return err; + } + + *srq->db.db = 0; + + if (mlx5_buf_alloc(&dev->mdev, buf_size, PAGE_SIZE * 2, &srq->buf)) { + mlx5_ib_dbg(dev, "buf alloc failed\n"); + err = -ENOMEM; + goto err_db; + } + page_shift = srq->buf.page_shift; + + srq->head = 0; + srq->tail = srq->msrq.max - 1; + srq->wqe_ctr = 0; + + for (i = 0; i < srq->msrq.max; i++) { + next = get_wqe(srq, i); + next->next_wqe_index = + cpu_to_be16((i + 1) & (srq->msrq.max - 1)); + } + + npages = DIV_ROUND_UP(srq->buf.npages, 1 << (page_shift - PAGE_SHIFT)); + mlx5_ib_dbg(dev, "buf_size %d, page_shift %d, npages %d, calc npages %d\n", + buf_size, page_shift, srq->buf.npages, npages); + *inlen = sizeof(**in) + sizeof(*(*in)->pas) * npages; + *in = mlx5_vzalloc(*inlen); + if (!*in) { + err = -ENOMEM; + goto err_buf; + } + mlx5_fill_page_array(&srq->buf, (*in)->pas); + + srq->wrid = kmalloc(srq->msrq.max * sizeof(u64), GFP_KERNEL); + if (!srq->wrid) { + mlx5_ib_dbg(dev, "kmalloc failed %lu\n", + (unsigned long)(srq->msrq.max * sizeof(u64))); + err = -ENOMEM; + goto err_in; + } + srq->wq_sig = !!srq_signature; + + (*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT; + + return 0; + +err_in: + mlx5_vfree(*in); + +err_buf: + mlx5_buf_free(&dev->mdev, &srq->buf); + +err_db: + mlx5_db_free(&dev->mdev, &srq->db); + return err; +} + +static void destroy_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq) +{ + mlx5_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db); + ib_umem_release(srq->umem); +} + + +static void destroy_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq) +{ + kfree(srq->wrid); + mlx5_buf_free(&dev->mdev, &srq->buf); + mlx5_db_free(&dev->mdev, &srq->db); +} + +struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_ib_srq *srq; + int desc_size; + int buf_size; + int err; + struct mlx5_create_srq_mbox_in *uninitialized_var(in); + int uninitialized_var(inlen); + int is_xrc; + u32 flgs, xrcdn; + + /* Sanity check SRQ size before proceeding */ + if (init_attr->attr.max_wr >= dev->mdev.caps.max_srq_wqes) { + mlx5_ib_dbg(dev, "max_wr %d, cap %d\n", + init_attr->attr.max_wr, + dev->mdev.caps.max_srq_wqes); + return ERR_PTR(-EINVAL); + } + + srq = kmalloc(sizeof(*srq), GFP_KERNEL); + if (!srq) + return ERR_PTR(-ENOMEM); + + mutex_init(&srq->mutex); + spin_lock_init(&srq->lock); + srq->msrq.max = roundup_pow_of_two(init_attr->attr.max_wr + 1); + srq->msrq.max_gs = init_attr->attr.max_sge; + + desc_size = sizeof(struct mlx5_wqe_srq_next_seg) + + srq->msrq.max_gs * sizeof(struct mlx5_wqe_data_seg); + desc_size = roundup_pow_of_two(desc_size); + desc_size = max_t(int, 32, desc_size); + srq->msrq.max_avail_gather = (desc_size - sizeof(struct mlx5_wqe_srq_next_seg)) / + sizeof(struct mlx5_wqe_data_seg); + srq->msrq.wqe_shift = ilog2(desc_size); + buf_size = srq->msrq.max * desc_size; + mlx5_ib_dbg(dev, "desc_size 0x%x, req wr 0x%x, srq size 0x%x, max_gs 0x%x, max_avail_gather 0x%x\n", + desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs, + srq->msrq.max_avail_gather); + + if (pd->uobject) + err = create_srq_user(pd, srq, &in, udata, buf_size, &inlen); + else + err = create_srq_kernel(dev, srq, &in, buf_size, &inlen); + + if (err) { + mlx5_ib_warn(dev, "create srq %s failed, err %d\n", + pd->uobject ? "user" : "kernel", err); + goto err_srq; + } + + is_xrc = (init_attr->srq_type == IB_SRQT_XRC); + in->ctx.state_log_sz = ilog2(srq->msrq.max); + flgs = ((srq->msrq.wqe_shift - 4) | (is_xrc << 5) | (srq->wq_sig << 7)) << 24; + xrcdn = 0; + if (is_xrc) { + xrcdn = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn; + in->ctx.pgoff_cqn |= cpu_to_be32(to_mcq(init_attr->ext.xrc.cq)->mcq.cqn); + } else if (init_attr->srq_type == IB_SRQT_BASIC) { + xrcdn = to_mxrcd(dev->devr.x0)->xrcdn; + in->ctx.pgoff_cqn |= cpu_to_be32(to_mcq(dev->devr.c0)->mcq.cqn); + } + + in->ctx.flags_xrcd = cpu_to_be32((flgs & 0xFF000000) | (xrcdn & 0xFFFFFF)); + + in->ctx.pd = cpu_to_be32(to_mpd(pd)->pdn); + in->ctx.db_record = cpu_to_be64(srq->db.dma); + err = mlx5_core_create_srq(&dev->mdev, &srq->msrq, in, inlen); + mlx5_vfree(in); + if (err) { + mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err); + goto err_usr_kern_srq; + } + + mlx5_ib_dbg(dev, "create SRQ with srqn 0x%x\n", srq->msrq.srqn); + + srq->msrq.event = mlx5_ib_srq_event; + srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn; + + if (pd->uobject) + if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof(__u32))) { + mlx5_ib_dbg(dev, "copy to user failed\n"); + err = -EFAULT; + goto err_core; + } + + init_attr->attr.max_wr = srq->msrq.max - 1; + + return &srq->ibsrq; + +err_core: + mlx5_core_destroy_srq(&dev->mdev, &srq->msrq); + +err_usr_kern_srq: + if (pd->uobject) + destroy_srq_user(pd, srq); + else + destroy_srq_kernel(dev, srq); + +err_srq: + kfree(srq); + + return ERR_PTR(err); +} + +int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(ibsrq->device); + struct mlx5_ib_srq *srq = to_msrq(ibsrq); + int ret; + + /* We don't support resizing SRQs yet */ + if (attr_mask & IB_SRQ_MAX_WR) + return -EINVAL; + + if (attr_mask & IB_SRQ_LIMIT) { + if (attr->srq_limit >= srq->msrq.max) + return -EINVAL; + + mutex_lock(&srq->mutex); + ret = mlx5_core_arm_srq(&dev->mdev, &srq->msrq, attr->srq_limit, 1); + mutex_unlock(&srq->mutex); + + if (ret) + return ret; + } + + return 0; +} + +int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) +{ + struct mlx5_ib_dev *dev = to_mdev(ibsrq->device); + struct mlx5_ib_srq *srq = to_msrq(ibsrq); + int ret; + struct mlx5_query_srq_mbox_out *out; + + out = kzalloc(sizeof(*out), GFP_KERNEL); + if (!out) + return -ENOMEM; + + ret = mlx5_core_query_srq(&dev->mdev, &srq->msrq, out); + if (ret) + goto out_box; + + srq_attr->srq_limit = be16_to_cpu(out->ctx.lwm); + srq_attr->max_wr = srq->msrq.max - 1; + srq_attr->max_sge = srq->msrq.max_gs; + +out_box: + kfree(out); + return ret; +} + +int mlx5_ib_destroy_srq(struct ib_srq *srq) +{ + struct mlx5_ib_dev *dev = to_mdev(srq->device); + struct mlx5_ib_srq *msrq = to_msrq(srq); + + mlx5_core_destroy_srq(&dev->mdev, &msrq->msrq); + + if (srq->uobject) { + mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db); + ib_umem_release(msrq->umem); + } else { + destroy_srq_kernel(dev, msrq); + } + + kfree(srq); + return 0; +} + +void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index) +{ + struct mlx5_wqe_srq_next_seg *next; + + /* always called with interrupts disabled. */ + spin_lock(&srq->lock); + + next = get_wqe(srq, srq->tail); + next->next_wqe_index = cpu_to_be16(wqe_index); + srq->tail = wqe_index; + + spin_unlock(&srq->lock); +} + +int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct mlx5_ib_srq *srq = to_msrq(ibsrq); + struct mlx5_wqe_srq_next_seg *next; + struct mlx5_wqe_data_seg *scat; + unsigned long flags; + int err = 0; + int nreq; + int i; + + spin_lock_irqsave(&srq->lock, flags); + + for (nreq = 0; wr; nreq++, wr = wr->next) { + if (unlikely(wr->num_sge > srq->msrq.max_gs)) { + err = -EINVAL; + *bad_wr = wr; + break; + } + + if (unlikely(srq->head == srq->tail)) { + err = -ENOMEM; + *bad_wr = wr; + break; + } + + srq->wrid[srq->head] = wr->wr_id; + + next = get_wqe(srq, srq->head); + srq->head = be16_to_cpu(next->next_wqe_index); + scat = (struct mlx5_wqe_data_seg *)(next + 1); + + for (i = 0; i < wr->num_sge; i++) { + scat[i].byte_count = cpu_to_be32(wr->sg_list[i].length); + scat[i].lkey = cpu_to_be32(wr->sg_list[i].lkey); + scat[i].addr = cpu_to_be64(wr->sg_list[i].addr); + } + + if (i < srq->msrq.max_avail_gather) { + scat[i].byte_count = 0; + scat[i].lkey = cpu_to_be32(MLX5_INVALID_LKEY); + scat[i].addr = 0; + } + } + + if (likely(nreq)) { + srq->wqe_ctr += nreq; + + /* Make sure that descriptors are written before + * doorbell record. + */ + wmb(); + + *srq->db.db = cpu_to_be32(srq->wqe_ctr); + } + + spin_unlock_irqrestore(&srq->lock, flags); + + return err; +} diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h new file mode 100644 index 00000000000..d0ba264ac1e --- /dev/null +++ b/drivers/infiniband/hw/mlx5/user.h @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef MLX5_IB_USER_H +#define MLX5_IB_USER_H + +#include <linux/types.h> + +enum { + MLX5_QP_FLAG_SIGNATURE = 1 << 0, + MLX5_QP_FLAG_SCATTER_CQE = 1 << 1, +}; + +enum { + MLX5_SRQ_FLAG_SIGNATURE = 1 << 0, +}; + + +/* Increment this value if any changes that break userspace ABI + * compatibility are made. + */ +#define MLX5_IB_UVERBS_ABI_VERSION 1 + +/* Make sure that all structs defined in this file remain laid out so + * that they pack the same way on 32-bit and 64-bit architectures (to + * avoid incompatibility between 32-bit userspace and 64-bit kernels). + * In particular do not use pointer types -- pass pointers in __u64 + * instead. + */ + +struct mlx5_ib_alloc_ucontext_req { + __u32 total_num_uuars; + __u32 num_low_latency_uuars; +}; + +struct mlx5_ib_alloc_ucontext_req_v2 { + __u32 total_num_uuars; + __u32 num_low_latency_uuars; + __u32 flags; + __u32 reserved; +}; + +struct mlx5_ib_alloc_ucontext_resp { + __u32 qp_tab_size; + __u32 bf_reg_size; + __u32 tot_uuars; + __u32 cache_line_size; + __u16 max_sq_desc_sz; + __u16 max_rq_desc_sz; + __u32 max_send_wqebb; + __u32 max_recv_wr; + __u32 max_srq_recv_wr; + __u16 num_ports; + __u16 reserved; +}; + +struct mlx5_ib_alloc_pd_resp { + __u32 pdn; +}; + +struct mlx5_ib_create_cq { + __u64 buf_addr; + __u64 db_addr; + __u32 cqe_size; + __u32 reserved; /* explicit padding (optional on i386) */ +}; + +struct mlx5_ib_create_cq_resp { + __u32 cqn; + __u32 reserved; +}; + +struct mlx5_ib_resize_cq { + __u64 buf_addr; + __u16 cqe_size; + __u16 reserved0; + __u32 reserved1; +}; + +struct mlx5_ib_create_srq { + __u64 buf_addr; + __u64 db_addr; + __u32 flags; + __u32 reserved; /* explicit padding (optional on i386) */ +}; + +struct mlx5_ib_create_srq_resp { + __u32 srqn; + __u32 reserved; +}; + +struct mlx5_ib_create_qp { + __u64 buf_addr; + __u64 db_addr; + __u32 sq_wqe_count; + __u32 rq_wqe_count; + __u32 rq_wqe_shift; + __u32 flags; +}; + +struct mlx5_ib_create_qp_resp { + __u32 uuar_index; +}; +#endif /* MLX5_IB_USER_H */ diff --git a/drivers/infiniband/hw/mthca/Kconfig b/drivers/infiniband/hw/mthca/Kconfig index 03efc074967..da314c3fec2 100644 --- a/drivers/infiniband/hw/mthca/Kconfig +++ b/drivers/infiniband/hw/mthca/Kconfig @@ -7,7 +7,7 @@ config INFINIBAND_MTHCA ("Tavor") and the MT25208 PCI Express HCA ("Arbel"). config INFINIBAND_MTHCA_DEBUG - bool "Verbose debugging output" if EMBEDDED + bool "Verbose debugging output" if EXPERT depends on INFINIBAND_MTHCA default y ---help--- diff --git a/drivers/infiniband/hw/mthca/mthca_allocator.c b/drivers/infiniband/hw/mthca/mthca_allocator.c index c5ccc2daab6..b4e0cf4e95c 100644 --- a/drivers/infiniband/hw/mthca/mthca_allocator.c +++ b/drivers/infiniband/hw/mthca/mthca_allocator.c @@ -211,7 +211,7 @@ int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct, if (!buf->direct.buf) return -ENOMEM; - pci_unmap_addr_set(&buf->direct, mapping, t); + dma_unmap_addr_set(&buf->direct, mapping, t); memset(buf->direct.buf, 0, size); @@ -251,7 +251,7 @@ int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct, goto err_free; dma_list[i] = t; - pci_unmap_addr_set(&buf->page_list[i], mapping, t); + dma_unmap_addr_set(&buf->page_list[i], mapping, t); clear_page(buf->page_list[i].buf); } @@ -289,12 +289,12 @@ void mthca_buf_free(struct mthca_dev *dev, int size, union mthca_buf *buf, if (is_direct) dma_free_coherent(&dev->pdev->dev, size, buf->direct.buf, - pci_unmap_addr(&buf->direct, mapping)); + dma_unmap_addr(&buf->direct, mapping)); else { for (i = 0; i < (size + PAGE_SIZE - 1) / PAGE_SIZE; ++i) dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, buf->page_list[i].buf, - pci_unmap_addr(&buf->page_list[i], + dma_unmap_addr(&buf->page_list[i], mapping)); kfree(buf->page_list); } diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c index cc440f90000..712d2a30fbe 100644 --- a/drivers/infiniband/hw/mthca/mthca_catas.c +++ b/drivers/infiniband/hw/mthca/mthca_catas.c @@ -31,6 +31,7 @@ */ #include <linux/jiffies.h> +#include <linux/module.h> #include <linux/timer.h> #include <linux/workqueue.h> @@ -68,11 +69,16 @@ static void catas_reset(struct work_struct *work) spin_unlock_irq(&catas_lock); list_for_each_entry_safe(dev, tmpdev, &tlist, catas_err.list) { + struct pci_dev *pdev = dev->pdev; ret = __mthca_restart_one(dev->pdev); + /* 'dev' now is not valid */ if (ret) - mthca_err(dev, "Reset failed (%d)\n", ret); - else - mthca_dbg(dev, "Reset succeeded\n"); + printk(KERN_ERR "mthca %s: Reset failed (%d)\n", + pci_name(pdev), ret); + else { + struct mthca_dev *d = pci_get_drvdata(pdev); + mthca_dbg(d, "Reset succeeded\n"); + } } mutex_unlock(&mthca_device_mutex); @@ -88,6 +94,7 @@ static void handle_catas(struct mthca_dev *dev) event.device = &dev->ib_dev; event.event = IB_EVENT_DEVICE_FATAL; event.element.port_num = 0; + dev->active = false; ib_dispatch_event(&event); @@ -140,7 +147,7 @@ static void poll_catas(unsigned long dev_ptr) void mthca_start_catas_poll(struct mthca_dev *dev) { - unsigned long addr; + phys_addr_t addr; init_timer(&dev->catas_err.timer); dev->catas_err.map = NULL; @@ -149,18 +156,11 @@ void mthca_start_catas_poll(struct mthca_dev *dev) ((pci_resource_len(dev->pdev, 0) - 1) & dev->catas_err.addr); - if (!request_mem_region(addr, dev->catas_err.size * 4, - DRV_NAME)) { - mthca_warn(dev, "couldn't request catastrophic error region " - "at 0x%lx/0x%x\n", addr, dev->catas_err.size * 4); - return; - } - dev->catas_err.map = ioremap(addr, dev->catas_err.size * 4); if (!dev->catas_err.map) { mthca_warn(dev, "couldn't map catastrophic error region " - "at 0x%lx/0x%x\n", addr, dev->catas_err.size * 4); - release_mem_region(addr, dev->catas_err.size * 4); + "at 0x%llx/0x%x\n", (unsigned long long) addr, + dev->catas_err.size * 4); return; } @@ -175,13 +175,8 @@ void mthca_stop_catas_poll(struct mthca_dev *dev) { del_timer_sync(&dev->catas_err.timer); - if (dev->catas_err.map) { + if (dev->catas_err.map) iounmap(dev->catas_err.map); - release_mem_region(pci_resource_start(dev->pdev, 0) + - ((pci_resource_len(dev->pdev, 0) - 1) & - dev->catas_err.addr), - dev->catas_err.size * 4); - } spin_lock_irq(&catas_lock); list_del(&dev->catas_err.list); diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index c33e1c53c79..9d3e5c1ac60 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -36,6 +36,8 @@ #include <linux/pci.h> #include <linux/errno.h> #include <linux/sched.h> +#include <linux/module.h> +#include <linux/slab.h> #include <asm/io.h> #include <rdma/ib_mad.h> @@ -157,13 +159,15 @@ enum { enum { CMD_TIME_CLASS_A = (HZ + 999) / 1000 + 1, CMD_TIME_CLASS_B = (HZ + 99) / 100 + 1, - CMD_TIME_CLASS_C = (HZ + 9) / 10 + 1 + CMD_TIME_CLASS_C = (HZ + 9) / 10 + 1, + CMD_TIME_CLASS_D = 60 * HZ }; #else enum { CMD_TIME_CLASS_A = 60 * HZ, CMD_TIME_CLASS_B = 60 * HZ, - CMD_TIME_CLASS_C = 60 * HZ + CMD_TIME_CLASS_C = 60 * HZ, + CMD_TIME_CLASS_D = 60 * HZ }; #endif @@ -298,6 +302,38 @@ static int mthca_cmd_post(struct mthca_dev *dev, return err; } + +static int mthca_status_to_errno(u8 status) +{ + static const int trans_table[] = { + [MTHCA_CMD_STAT_INTERNAL_ERR] = -EIO, + [MTHCA_CMD_STAT_BAD_OP] = -EPERM, + [MTHCA_CMD_STAT_BAD_PARAM] = -EINVAL, + [MTHCA_CMD_STAT_BAD_SYS_STATE] = -ENXIO, + [MTHCA_CMD_STAT_BAD_RESOURCE] = -EBADF, + [MTHCA_CMD_STAT_RESOURCE_BUSY] = -EBUSY, + [MTHCA_CMD_STAT_DDR_MEM_ERR] = -ENOMEM, + [MTHCA_CMD_STAT_EXCEED_LIM] = -ENOMEM, + [MTHCA_CMD_STAT_BAD_RES_STATE] = -EBADF, + [MTHCA_CMD_STAT_BAD_INDEX] = -EBADF, + [MTHCA_CMD_STAT_BAD_NVMEM] = -EFAULT, + [MTHCA_CMD_STAT_BAD_QPEE_STATE] = -EINVAL, + [MTHCA_CMD_STAT_BAD_SEG_PARAM] = -EFAULT, + [MTHCA_CMD_STAT_REG_BOUND] = -EBUSY, + [MTHCA_CMD_STAT_LAM_NOT_PRE] = -EAGAIN, + [MTHCA_CMD_STAT_BAD_PKT] = -EBADMSG, + [MTHCA_CMD_STAT_BAD_SIZE] = -ENOMEM, + }; + + if (status >= ARRAY_SIZE(trans_table) || + (status != MTHCA_CMD_STAT_OK + && trans_table[status] == 0)) + return -EINVAL; + + return trans_table[status]; +} + + static int mthca_cmd_poll(struct mthca_dev *dev, u64 in_param, u64 *out_param, @@ -305,11 +341,11 @@ static int mthca_cmd_poll(struct mthca_dev *dev, u32 in_modifier, u8 op_modifier, u16 op, - unsigned long timeout, - u8 *status) + unsigned long timeout) { int err = 0; unsigned long end; + u8 status; down(&dev->cmd.poll_sem); @@ -338,7 +374,12 @@ static int mthca_cmd_poll(struct mthca_dev *dev, (u64) be32_to_cpu((__force __be32) __raw_readl(dev->hcr + HCR_OUT_PARAM_OFFSET + 4)); - *status = be32_to_cpu((__force __be32) __raw_readl(dev->hcr + HCR_STATUS_OFFSET)) >> 24; + status = be32_to_cpu((__force __be32) __raw_readl(dev->hcr + HCR_STATUS_OFFSET)) >> 24; + if (status) { + mthca_dbg(dev, "Command %02x completed with status %02x\n", + op, status); + err = mthca_status_to_errno(status); + } out: up(&dev->cmd.poll_sem); @@ -371,8 +412,7 @@ static int mthca_cmd_wait(struct mthca_dev *dev, u32 in_modifier, u8 op_modifier, u16 op, - unsigned long timeout, - u8 *status) + unsigned long timeout) { int err = 0; struct mthca_cmd_context *context; @@ -404,10 +444,11 @@ static int mthca_cmd_wait(struct mthca_dev *dev, if (err) goto out; - *status = context->status; - if (*status) + if (context->status) { mthca_dbg(dev, "Command %02x completed with status %02x\n", - op, *status); + op, context->status); + err = mthca_status_to_errno(context->status); + } if (out_is_imm) *out_param = context->out_param; @@ -429,17 +470,16 @@ static int mthca_cmd_box(struct mthca_dev *dev, u32 in_modifier, u8 op_modifier, u16 op, - unsigned long timeout, - u8 *status) + unsigned long timeout) { if (dev->cmd.flags & MTHCA_CMD_USE_EVENTS) return mthca_cmd_wait(dev, in_param, &out_param, 0, in_modifier, op_modifier, op, - timeout, status); + timeout); else return mthca_cmd_poll(dev, in_param, &out_param, 0, in_modifier, op_modifier, op, - timeout, status); + timeout); } /* Invoke a command with no output parameter */ @@ -448,11 +488,10 @@ static int mthca_cmd(struct mthca_dev *dev, u32 in_modifier, u8 op_modifier, u16 op, - unsigned long timeout, - u8 *status) + unsigned long timeout) { return mthca_cmd_box(dev, in_param, 0, in_modifier, - op_modifier, op, timeout, status); + op_modifier, op, timeout); } /* @@ -466,17 +505,16 @@ static int mthca_cmd_imm(struct mthca_dev *dev, u32 in_modifier, u8 op_modifier, u16 op, - unsigned long timeout, - u8 *status) + unsigned long timeout) { if (dev->cmd.flags & MTHCA_CMD_USE_EVENTS) return mthca_cmd_wait(dev, in_param, out_param, 1, in_modifier, op_modifier, op, - timeout, status); + timeout); else return mthca_cmd_poll(dev, in_param, out_param, 1, in_modifier, op_modifier, op, - timeout, status); + timeout); } int mthca_cmd_init(struct mthca_dev *dev) @@ -593,14 +631,14 @@ void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox) kfree(mailbox); } -int mthca_SYS_EN(struct mthca_dev *dev, u8 *status) +int mthca_SYS_EN(struct mthca_dev *dev) { u64 out; int ret; - ret = mthca_cmd_imm(dev, 0, &out, 0, 0, CMD_SYS_EN, HZ, status); + ret = mthca_cmd_imm(dev, 0, &out, 0, 0, CMD_SYS_EN, CMD_TIME_CLASS_D); - if (*status == MTHCA_CMD_STAT_DDR_MEM_ERR) + if (ret == -ENOMEM) mthca_warn(dev, "SYS_EN DDR error: syn=%x, sock=%d, " "sladdr=%d, SPD source=%s\n", (int) (out >> 6) & 0xf, (int) (out >> 4) & 3, @@ -609,13 +647,13 @@ int mthca_SYS_EN(struct mthca_dev *dev, u8 *status) return ret; } -int mthca_SYS_DIS(struct mthca_dev *dev, u8 *status) +int mthca_SYS_DIS(struct mthca_dev *dev) { - return mthca_cmd(dev, 0, 0, 0, CMD_SYS_DIS, HZ, status); + return mthca_cmd(dev, 0, 0, 0, CMD_SYS_DIS, CMD_TIME_CLASS_C); } static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm, - u64 virt, u8 *status) + u64 virt) { struct mthca_mailbox *mailbox; struct mthca_icm_iter iter; @@ -663,8 +701,8 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm, if (++nent == MTHCA_MAILBOX_SIZE / 16) { err = mthca_cmd(dev, mailbox->dma, nent, 0, op, - CMD_TIME_CLASS_B, status); - if (err || *status) + CMD_TIME_CLASS_B); + if (err) goto out; nent = 0; } @@ -673,7 +711,7 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm, if (nent) err = mthca_cmd(dev, mailbox->dma, nent, 0, op, - CMD_TIME_CLASS_B, status); + CMD_TIME_CLASS_B); switch (op) { case CMD_MAP_FA: @@ -693,24 +731,24 @@ out: return err; } -int mthca_MAP_FA(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status) +int mthca_MAP_FA(struct mthca_dev *dev, struct mthca_icm *icm) { - return mthca_map_cmd(dev, CMD_MAP_FA, icm, -1, status); + return mthca_map_cmd(dev, CMD_MAP_FA, icm, -1); } -int mthca_UNMAP_FA(struct mthca_dev *dev, u8 *status) +int mthca_UNMAP_FA(struct mthca_dev *dev) { - return mthca_cmd(dev, 0, 0, 0, CMD_UNMAP_FA, CMD_TIME_CLASS_B, status); + return mthca_cmd(dev, 0, 0, 0, CMD_UNMAP_FA, CMD_TIME_CLASS_B); } -int mthca_RUN_FW(struct mthca_dev *dev, u8 *status) +int mthca_RUN_FW(struct mthca_dev *dev) { - return mthca_cmd(dev, 0, 0, 0, CMD_RUN_FW, CMD_TIME_CLASS_A, status); + return mthca_cmd(dev, 0, 0, 0, CMD_RUN_FW, CMD_TIME_CLASS_A); } static void mthca_setup_cmd_doorbells(struct mthca_dev *dev, u64 base) { - unsigned long addr; + phys_addr_t addr; u16 max_off = 0; int i; @@ -734,7 +772,7 @@ static void mthca_setup_cmd_doorbells(struct mthca_dev *dev, u64 base) mthca_dbg(dev, "Mapped doorbell page for posting FW commands\n"); } -int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status) +int mthca_QUERY_FW(struct mthca_dev *dev) { struct mthca_mailbox *mailbox; u32 *outbox; @@ -768,7 +806,7 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status) outbox = mailbox->buf; err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_FW, - CMD_TIME_CLASS_A, status); + CMD_TIME_CLASS_A); if (err) goto out; @@ -840,7 +878,7 @@ out: return err; } -int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status) +int mthca_ENABLE_LAM(struct mthca_dev *dev) { struct mthca_mailbox *mailbox; u8 info; @@ -861,14 +899,11 @@ int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status) outbox = mailbox->buf; err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_ENABLE_LAM, - CMD_TIME_CLASS_C, status); + CMD_TIME_CLASS_C); if (err) goto out; - if (*status == MTHCA_CMD_STAT_LAM_NOT_PRE) - goto out; - MTHCA_GET(dev->ddr_start, outbox, ENABLE_LAM_START_OFFSET); MTHCA_GET(dev->ddr_end, outbox, ENABLE_LAM_END_OFFSET); MTHCA_GET(info, outbox, ENABLE_LAM_INFO_OFFSET); @@ -893,12 +928,12 @@ out: return err; } -int mthca_DISABLE_LAM(struct mthca_dev *dev, u8 *status) +int mthca_DISABLE_LAM(struct mthca_dev *dev) { - return mthca_cmd(dev, 0, 0, 0, CMD_SYS_DIS, CMD_TIME_CLASS_C, status); + return mthca_cmd(dev, 0, 0, 0, CMD_SYS_DIS, CMD_TIME_CLASS_C); } -int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status) +int mthca_QUERY_DDR(struct mthca_dev *dev) { struct mthca_mailbox *mailbox; u8 info; @@ -919,7 +954,7 @@ int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status) outbox = mailbox->buf; err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_DDR, - CMD_TIME_CLASS_A, status); + CMD_TIME_CLASS_A); if (err) goto out; @@ -949,7 +984,7 @@ out: } int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, - struct mthca_dev_lim *dev_lim, u8 *status) + struct mthca_dev_lim *dev_lim) { struct mthca_mailbox *mailbox; u32 *outbox; @@ -1025,7 +1060,7 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, outbox = mailbox->buf; err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_DEV_LIM, - CMD_TIME_CLASS_A, status); + CMD_TIME_CLASS_A); if (err) goto out; @@ -1057,7 +1092,7 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MTT_OFFSET); if (mthca_is_memfree(dev)) dev_lim->reserved_mtts = ALIGN((1 << (field >> 4)) * sizeof(u64), - MTHCA_MTT_SEG_SIZE) / MTHCA_MTT_SEG_SIZE; + dev->limits.mtt_seg_size) / dev->limits.mtt_seg_size; else dev_lim->reserved_mtts = 1 << (field >> 4); MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MRW_SZ_OFFSET); @@ -1229,7 +1264,7 @@ static void get_board_id(void *vsd, char *board_id) } int mthca_QUERY_ADAPTER(struct mthca_dev *dev, - struct mthca_adapter *adapter, u8 *status) + struct mthca_adapter *adapter) { struct mthca_mailbox *mailbox; u32 *outbox; @@ -1248,7 +1283,7 @@ int mthca_QUERY_ADAPTER(struct mthca_dev *dev, outbox = mailbox->buf; err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_ADAPTER, - CMD_TIME_CLASS_A, status); + CMD_TIME_CLASS_A); if (err) goto out; @@ -1272,8 +1307,7 @@ out: } int mthca_INIT_HCA(struct mthca_dev *dev, - struct mthca_init_hca_param *param, - u8 *status) + struct mthca_init_hca_param *param) { struct mthca_mailbox *mailbox; __be32 *inbox; @@ -1390,7 +1424,8 @@ int mthca_INIT_HCA(struct mthca_dev *dev, MTHCA_PUT(inbox, param->uarc_base, INIT_HCA_UAR_CTX_BASE_OFFSET); } - err = mthca_cmd(dev, mailbox->dma, 0, 0, CMD_INIT_HCA, HZ, status); + err = mthca_cmd(dev, mailbox->dma, 0, 0, + CMD_INIT_HCA, CMD_TIME_CLASS_D); mthca_free_mailbox(dev, mailbox); return err; @@ -1398,7 +1433,7 @@ int mthca_INIT_HCA(struct mthca_dev *dev, int mthca_INIT_IB(struct mthca_dev *dev, struct mthca_init_ib_param *param, - int port, u8 *status) + int port) { struct mthca_mailbox *mailbox; u32 *inbox; @@ -1442,24 +1477,24 @@ int mthca_INIT_IB(struct mthca_dev *dev, MTHCA_PUT(inbox, param->si_guid, INIT_IB_SI_GUID_OFFSET); err = mthca_cmd(dev, mailbox->dma, port, 0, CMD_INIT_IB, - CMD_TIME_CLASS_A, status); + CMD_TIME_CLASS_A); mthca_free_mailbox(dev, mailbox); return err; } -int mthca_CLOSE_IB(struct mthca_dev *dev, int port, u8 *status) +int mthca_CLOSE_IB(struct mthca_dev *dev, int port) { - return mthca_cmd(dev, 0, port, 0, CMD_CLOSE_IB, HZ, status); + return mthca_cmd(dev, 0, port, 0, CMD_CLOSE_IB, CMD_TIME_CLASS_A); } -int mthca_CLOSE_HCA(struct mthca_dev *dev, int panic, u8 *status) +int mthca_CLOSE_HCA(struct mthca_dev *dev, int panic) { - return mthca_cmd(dev, 0, 0, panic, CMD_CLOSE_HCA, HZ, status); + return mthca_cmd(dev, 0, 0, panic, CMD_CLOSE_HCA, CMD_TIME_CLASS_C); } int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param, - int port, u8 *status) + int port) { struct mthca_mailbox *mailbox; u32 *inbox; @@ -1488,18 +1523,18 @@ int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param, MTHCA_PUT(inbox, param->si_guid, SET_IB_SI_GUID_OFFSET); err = mthca_cmd(dev, mailbox->dma, port, 0, CMD_SET_IB, - CMD_TIME_CLASS_B, status); + CMD_TIME_CLASS_B); mthca_free_mailbox(dev, mailbox); return err; } -int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt, u8 *status) +int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt) { - return mthca_map_cmd(dev, CMD_MAP_ICM, icm, virt, status); + return mthca_map_cmd(dev, CMD_MAP_ICM, icm, virt); } -int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt, u8 *status) +int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt) { struct mthca_mailbox *mailbox; __be64 *inbox; @@ -1514,7 +1549,7 @@ int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt, u8 *status inbox[1] = cpu_to_be64(dma_addr); err = mthca_cmd(dev, mailbox->dma, 1, 0, CMD_MAP_ICM, - CMD_TIME_CLASS_B, status); + CMD_TIME_CLASS_B); mthca_free_mailbox(dev, mailbox); @@ -1525,31 +1560,31 @@ int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt, u8 *status return err; } -int mthca_UNMAP_ICM(struct mthca_dev *dev, u64 virt, u32 page_count, u8 *status) +int mthca_UNMAP_ICM(struct mthca_dev *dev, u64 virt, u32 page_count) { mthca_dbg(dev, "Unmapping %d pages at %llx from ICM.\n", page_count, (unsigned long long) virt); - return mthca_cmd(dev, virt, page_count, 0, CMD_UNMAP_ICM, CMD_TIME_CLASS_B, status); + return mthca_cmd(dev, virt, page_count, 0, + CMD_UNMAP_ICM, CMD_TIME_CLASS_B); } -int mthca_MAP_ICM_AUX(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status) +int mthca_MAP_ICM_AUX(struct mthca_dev *dev, struct mthca_icm *icm) { - return mthca_map_cmd(dev, CMD_MAP_ICM_AUX, icm, -1, status); + return mthca_map_cmd(dev, CMD_MAP_ICM_AUX, icm, -1); } -int mthca_UNMAP_ICM_AUX(struct mthca_dev *dev, u8 *status) +int mthca_UNMAP_ICM_AUX(struct mthca_dev *dev) { - return mthca_cmd(dev, 0, 0, 0, CMD_UNMAP_ICM_AUX, CMD_TIME_CLASS_B, status); + return mthca_cmd(dev, 0, 0, 0, CMD_UNMAP_ICM_AUX, CMD_TIME_CLASS_B); } -int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages, - u8 *status) +int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages) { - int ret = mthca_cmd_imm(dev, icm_size, aux_pages, 0, 0, CMD_SET_ICM_SIZE, - CMD_TIME_CLASS_A, status); + int ret = mthca_cmd_imm(dev, icm_size, aux_pages, 0, + 0, CMD_SET_ICM_SIZE, CMD_TIME_CLASS_A); - if (ret || status) + if (ret) return ret; /* @@ -1563,74 +1598,73 @@ int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages, } int mthca_SW2HW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox, - int mpt_index, u8 *status) + int mpt_index) { return mthca_cmd(dev, mailbox->dma, mpt_index, 0, CMD_SW2HW_MPT, - CMD_TIME_CLASS_B, status); + CMD_TIME_CLASS_B); } int mthca_HW2SW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox, - int mpt_index, u8 *status) + int mpt_index) { return mthca_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, mpt_index, !mailbox, CMD_HW2SW_MPT, - CMD_TIME_CLASS_B, status); + CMD_TIME_CLASS_B); } int mthca_WRITE_MTT(struct mthca_dev *dev, struct mthca_mailbox *mailbox, - int num_mtt, u8 *status) + int num_mtt) { return mthca_cmd(dev, mailbox->dma, num_mtt, 0, CMD_WRITE_MTT, - CMD_TIME_CLASS_B, status); + CMD_TIME_CLASS_B); } -int mthca_SYNC_TPT(struct mthca_dev *dev, u8 *status) +int mthca_SYNC_TPT(struct mthca_dev *dev) { - return mthca_cmd(dev, 0, 0, 0, CMD_SYNC_TPT, CMD_TIME_CLASS_B, status); + return mthca_cmd(dev, 0, 0, 0, CMD_SYNC_TPT, CMD_TIME_CLASS_B); } int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap, - int eq_num, u8 *status) + int eq_num) { mthca_dbg(dev, "%s mask %016llx for eqn %d\n", unmap ? "Clearing" : "Setting", (unsigned long long) event_mask, eq_num); return mthca_cmd(dev, event_mask, (unmap << 31) | eq_num, - 0, CMD_MAP_EQ, CMD_TIME_CLASS_B, status); + 0, CMD_MAP_EQ, CMD_TIME_CLASS_B); } int mthca_SW2HW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, - int eq_num, u8 *status) + int eq_num) { return mthca_cmd(dev, mailbox->dma, eq_num, 0, CMD_SW2HW_EQ, - CMD_TIME_CLASS_A, status); + CMD_TIME_CLASS_A); } int mthca_HW2SW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, - int eq_num, u8 *status) + int eq_num) { return mthca_cmd_box(dev, 0, mailbox->dma, eq_num, 0, CMD_HW2SW_EQ, - CMD_TIME_CLASS_A, status); + CMD_TIME_CLASS_A); } int mthca_SW2HW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, - int cq_num, u8 *status) + int cq_num) { return mthca_cmd(dev, mailbox->dma, cq_num, 0, CMD_SW2HW_CQ, - CMD_TIME_CLASS_A, status); + CMD_TIME_CLASS_A); } int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, - int cq_num, u8 *status) + int cq_num) { return mthca_cmd_box(dev, 0, mailbox->dma, cq_num, 0, CMD_HW2SW_CQ, - CMD_TIME_CLASS_A, status); + CMD_TIME_CLASS_A); } -int mthca_RESIZE_CQ(struct mthca_dev *dev, int cq_num, u32 lkey, u8 log_size, - u8 *status) +int mthca_RESIZE_CQ(struct mthca_dev *dev, int cq_num, u32 lkey, u8 log_size) { struct mthca_mailbox *mailbox; __be32 *inbox; @@ -1654,44 +1688,43 @@ int mthca_RESIZE_CQ(struct mthca_dev *dev, int cq_num, u32 lkey, u8 log_size, MTHCA_PUT(inbox, lkey, RESIZE_CQ_LKEY_OFFSET); err = mthca_cmd(dev, mailbox->dma, cq_num, 1, CMD_RESIZE_CQ, - CMD_TIME_CLASS_B, status); + CMD_TIME_CLASS_B); mthca_free_mailbox(dev, mailbox); return err; } int mthca_SW2HW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, - int srq_num, u8 *status) + int srq_num) { return mthca_cmd(dev, mailbox->dma, srq_num, 0, CMD_SW2HW_SRQ, - CMD_TIME_CLASS_A, status); + CMD_TIME_CLASS_A); } int mthca_HW2SW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, - int srq_num, u8 *status) + int srq_num) { return mthca_cmd_box(dev, 0, mailbox->dma, srq_num, 0, CMD_HW2SW_SRQ, - CMD_TIME_CLASS_A, status); + CMD_TIME_CLASS_A); } int mthca_QUERY_SRQ(struct mthca_dev *dev, u32 num, - struct mthca_mailbox *mailbox, u8 *status) + struct mthca_mailbox *mailbox) { return mthca_cmd_box(dev, 0, mailbox->dma, num, 0, - CMD_QUERY_SRQ, CMD_TIME_CLASS_A, status); + CMD_QUERY_SRQ, CMD_TIME_CLASS_A); } -int mthca_ARM_SRQ(struct mthca_dev *dev, int srq_num, int limit, u8 *status) +int mthca_ARM_SRQ(struct mthca_dev *dev, int srq_num, int limit) { return mthca_cmd(dev, limit, srq_num, 0, CMD_ARM_SRQ, - CMD_TIME_CLASS_B, status); + CMD_TIME_CLASS_B); } int mthca_MODIFY_QP(struct mthca_dev *dev, enum ib_qp_state cur, enum ib_qp_state next, u32 num, int is_ee, - struct mthca_mailbox *mailbox, u32 optmask, - u8 *status) + struct mthca_mailbox *mailbox, u32 optmask) { static const u16 op[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { [IB_QPS_RESET] = { @@ -1752,7 +1785,7 @@ int mthca_MODIFY_QP(struct mthca_dev *dev, enum ib_qp_state cur, err = mthca_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, (!!is_ee << 24) | num, op_mod, - op[cur][next], CMD_TIME_CLASS_C, status); + op[cur][next], CMD_TIME_CLASS_C); if (0 && mailbox) { int i; @@ -1786,21 +1819,20 @@ int mthca_MODIFY_QP(struct mthca_dev *dev, enum ib_qp_state cur, } err = mthca_cmd(dev, mailbox->dma, optmask | (!!is_ee << 24) | num, - op_mod, op[cur][next], CMD_TIME_CLASS_C, status); + op_mod, op[cur][next], CMD_TIME_CLASS_C); } return err; } int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee, - struct mthca_mailbox *mailbox, u8 *status) + struct mthca_mailbox *mailbox) { return mthca_cmd_box(dev, 0, mailbox->dma, (!!is_ee << 24) | num, 0, - CMD_QUERY_QPEE, CMD_TIME_CLASS_A, status); + CMD_QUERY_QPEE, CMD_TIME_CLASS_A); } -int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn, - u8 *status) +int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn) { u8 op_mod; @@ -1814,7 +1846,7 @@ int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn, case IB_QPT_RAW_IPV6: op_mod = 2; break; - case IB_QPT_RAW_ETY: + case IB_QPT_RAW_ETHERTYPE: op_mod = 3; break; default: @@ -1822,12 +1854,12 @@ int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn, } return mthca_cmd(dev, 0, qpn, op_mod, CMD_CONF_SPECIAL_QP, - CMD_TIME_CLASS_B, status); + CMD_TIME_CLASS_B); } int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, int port, struct ib_wc *in_wc, struct ib_grh *in_grh, - void *in_mad, void *response_mad, u8 *status) + void *in_mad, void *response_mad) { struct mthca_mailbox *inmailbox, *outmailbox; void *inbox; @@ -1894,9 +1926,9 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, err = mthca_cmd_box(dev, inmailbox->dma, outmailbox->dma, in_modifier, op_modifier, - CMD_MAD_IFC, CMD_TIME_CLASS_C, status); + CMD_MAD_IFC, CMD_TIME_CLASS_C); - if (!err && !*status) + if (!err) memcpy(response_mad, outmailbox->buf, 256); mthca_free_mailbox(dev, inmailbox); @@ -1905,33 +1937,33 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, } int mthca_READ_MGM(struct mthca_dev *dev, int index, - struct mthca_mailbox *mailbox, u8 *status) + struct mthca_mailbox *mailbox) { return mthca_cmd_box(dev, 0, mailbox->dma, index, 0, - CMD_READ_MGM, CMD_TIME_CLASS_A, status); + CMD_READ_MGM, CMD_TIME_CLASS_A); } int mthca_WRITE_MGM(struct mthca_dev *dev, int index, - struct mthca_mailbox *mailbox, u8 *status) + struct mthca_mailbox *mailbox) { return mthca_cmd(dev, mailbox->dma, index, 0, CMD_WRITE_MGM, - CMD_TIME_CLASS_A, status); + CMD_TIME_CLASS_A); } int mthca_MGID_HASH(struct mthca_dev *dev, struct mthca_mailbox *mailbox, - u16 *hash, u8 *status) + u16 *hash) { u64 imm; int err; err = mthca_cmd_imm(dev, mailbox->dma, &imm, 0, 0, CMD_MGID_HASH, - CMD_TIME_CLASS_A, status); + CMD_TIME_CLASS_A); *hash = imm; return err; } -int mthca_NOP(struct mthca_dev *dev, u8 *status) +int mthca_NOP(struct mthca_dev *dev) { - return mthca_cmd(dev, 0, 0x1f, 0, CMD_NOP, msecs_to_jiffies(100), status); + return mthca_cmd(dev, 0, 0x1f, 0, CMD_NOP, msecs_to_jiffies(100)); } diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h index 6efd3265f24..f952244c54d 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.h +++ b/drivers/infiniband/hw/mthca/mthca_cmd.h @@ -252,79 +252,74 @@ struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev, gfp_t gfp_mask); void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox); -int mthca_SYS_EN(struct mthca_dev *dev, u8 *status); -int mthca_SYS_DIS(struct mthca_dev *dev, u8 *status); -int mthca_MAP_FA(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status); -int mthca_UNMAP_FA(struct mthca_dev *dev, u8 *status); -int mthca_RUN_FW(struct mthca_dev *dev, u8 *status); -int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status); -int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status); -int mthca_DISABLE_LAM(struct mthca_dev *dev, u8 *status); -int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status); +int mthca_SYS_EN(struct mthca_dev *dev); +int mthca_SYS_DIS(struct mthca_dev *dev); +int mthca_MAP_FA(struct mthca_dev *dev, struct mthca_icm *icm); +int mthca_UNMAP_FA(struct mthca_dev *dev); +int mthca_RUN_FW(struct mthca_dev *dev); +int mthca_QUERY_FW(struct mthca_dev *dev); +int mthca_ENABLE_LAM(struct mthca_dev *dev); +int mthca_DISABLE_LAM(struct mthca_dev *dev); +int mthca_QUERY_DDR(struct mthca_dev *dev); int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, - struct mthca_dev_lim *dev_lim, u8 *status); + struct mthca_dev_lim *dev_lim); int mthca_QUERY_ADAPTER(struct mthca_dev *dev, - struct mthca_adapter *adapter, u8 *status); + struct mthca_adapter *adapter); int mthca_INIT_HCA(struct mthca_dev *dev, - struct mthca_init_hca_param *param, - u8 *status); + struct mthca_init_hca_param *param); int mthca_INIT_IB(struct mthca_dev *dev, struct mthca_init_ib_param *param, - int port, u8 *status); -int mthca_CLOSE_IB(struct mthca_dev *dev, int port, u8 *status); -int mthca_CLOSE_HCA(struct mthca_dev *dev, int panic, u8 *status); + int port); +int mthca_CLOSE_IB(struct mthca_dev *dev, int port); +int mthca_CLOSE_HCA(struct mthca_dev *dev, int panic); int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param, - int port, u8 *status); -int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt, u8 *status); -int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt, u8 *status); -int mthca_UNMAP_ICM(struct mthca_dev *dev, u64 virt, u32 page_count, u8 *status); -int mthca_MAP_ICM_AUX(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status); -int mthca_UNMAP_ICM_AUX(struct mthca_dev *dev, u8 *status); -int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages, - u8 *status); + int port); +int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt); +int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt); +int mthca_UNMAP_ICM(struct mthca_dev *dev, u64 virt, u32 page_count); +int mthca_MAP_ICM_AUX(struct mthca_dev *dev, struct mthca_icm *icm); +int mthca_UNMAP_ICM_AUX(struct mthca_dev *dev); +int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages); int mthca_SW2HW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox, - int mpt_index, u8 *status); + int mpt_index); int mthca_HW2SW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox, - int mpt_index, u8 *status); + int mpt_index); int mthca_WRITE_MTT(struct mthca_dev *dev, struct mthca_mailbox *mailbox, - int num_mtt, u8 *status); -int mthca_SYNC_TPT(struct mthca_dev *dev, u8 *status); + int num_mtt); +int mthca_SYNC_TPT(struct mthca_dev *dev); int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap, - int eq_num, u8 *status); + int eq_num); int mthca_SW2HW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, - int eq_num, u8 *status); + int eq_num); int mthca_HW2SW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, - int eq_num, u8 *status); + int eq_num); int mthca_SW2HW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, - int cq_num, u8 *status); + int cq_num); int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, - int cq_num, u8 *status); -int mthca_RESIZE_CQ(struct mthca_dev *dev, int cq_num, u32 lkey, u8 log_size, - u8 *status); + int cq_num); +int mthca_RESIZE_CQ(struct mthca_dev *dev, int cq_num, u32 lkey, u8 log_size); int mthca_SW2HW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, - int srq_num, u8 *status); + int srq_num); int mthca_HW2SW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, - int srq_num, u8 *status); + int srq_num); int mthca_QUERY_SRQ(struct mthca_dev *dev, u32 num, - struct mthca_mailbox *mailbox, u8 *status); -int mthca_ARM_SRQ(struct mthca_dev *dev, int srq_num, int limit, u8 *status); + struct mthca_mailbox *mailbox); +int mthca_ARM_SRQ(struct mthca_dev *dev, int srq_num, int limit); int mthca_MODIFY_QP(struct mthca_dev *dev, enum ib_qp_state cur, enum ib_qp_state next, u32 num, int is_ee, - struct mthca_mailbox *mailbox, u32 optmask, - u8 *status); + struct mthca_mailbox *mailbox, u32 optmask); int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee, - struct mthca_mailbox *mailbox, u8 *status); -int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn, - u8 *status); + struct mthca_mailbox *mailbox); +int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn); int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, int port, struct ib_wc *in_wc, struct ib_grh *in_grh, - void *in_mad, void *response_mad, u8 *status); + void *in_mad, void *response_mad); int mthca_READ_MGM(struct mthca_dev *dev, int index, - struct mthca_mailbox *mailbox, u8 *status); + struct mthca_mailbox *mailbox); int mthca_WRITE_MGM(struct mthca_dev *dev, int index, - struct mthca_mailbox *mailbox, u8 *status); + struct mthca_mailbox *mailbox); int mthca_MGID_HASH(struct mthca_dev *dev, struct mthca_mailbox *mailbox, - u16 *hash, u8 *status); -int mthca_NOP(struct mthca_dev *dev, u8 *status); + u16 *hash); +int mthca_NOP(struct mthca_dev *dev); #endif /* MTHCA_CMD_H */ diff --git a/drivers/infiniband/hw/mthca/mthca_config_reg.h b/drivers/infiniband/hw/mthca/mthca_config_reg.h index 75671f75cac..155bc66395b 100644 --- a/drivers/infiniband/hw/mthca/mthca_config_reg.h +++ b/drivers/infiniband/hw/mthca/mthca_config_reg.h @@ -34,8 +34,6 @@ #ifndef MTHCA_CONFIG_REG_H #define MTHCA_CONFIG_REG_H -#include <asm/page.h> - #define MTHCA_HCR_BASE 0x80680 #define MTHCA_HCR_SIZE 0x0001c #define MTHCA_ECR_BASE 0x80700 diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index d9f4735c2b3..40ba8333815 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -34,6 +34,7 @@ * SOFTWARE. */ +#include <linux/gfp.h> #include <linux/hardirq.h> #include <linux/sched.h> @@ -642,7 +643,8 @@ static inline int mthca_poll_one(struct mthca_dev *dev, entry->wc_flags |= cqe->g_mlpath & 0x80 ? IB_WC_GRH : 0; checksum = (be32_to_cpu(cqe->rqpn) >> 24) | ((be32_to_cpu(cqe->my_ee) >> 16) & 0xff00); - entry->csum_ok = (cqe->sl_ipok & 1 && checksum == 0xffff); + entry->wc_flags |= (cqe->sl_ipok & 1 && checksum == 0xffff) ? + IB_WC_IP_CSUM_OK : 0; } entry->status = IB_WC_SUCCESS; @@ -778,7 +780,6 @@ int mthca_init_cq(struct mthca_dev *dev, int nent, struct mthca_mailbox *mailbox; struct mthca_cq_context *cq_context; int err = -ENOMEM; - u8 status; cq->ibcq.cqe = nent - 1; cq->is_kernel = !ctx; @@ -846,19 +847,12 @@ int mthca_init_cq(struct mthca_dev *dev, int nent, cq_context->state_db = cpu_to_be32(cq->arm_db_index); } - err = mthca_SW2HW_CQ(dev, mailbox, cq->cqn, &status); + err = mthca_SW2HW_CQ(dev, mailbox, cq->cqn); if (err) { mthca_warn(dev, "SW2HW_CQ failed (%d)\n", err); goto err_out_free_mr; } - if (status) { - mthca_warn(dev, "SW2HW_CQ returned status 0x%02x\n", - status); - err = -EINVAL; - goto err_out_free_mr; - } - spin_lock_irq(&dev->cq_table.lock); if (mthca_array_set(&dev->cq_table.cq, cq->cqn & (dev->limits.num_cqs - 1), @@ -914,7 +908,6 @@ void mthca_free_cq(struct mthca_dev *dev, { struct mthca_mailbox *mailbox; int err; - u8 status; mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); if (IS_ERR(mailbox)) { @@ -922,11 +915,9 @@ void mthca_free_cq(struct mthca_dev *dev, return; } - err = mthca_HW2SW_CQ(dev, mailbox, cq->cqn, &status); + err = mthca_HW2SW_CQ(dev, mailbox, cq->cqn); if (err) mthca_warn(dev, "HW2SW_CQ failed (%d)\n", err); - else if (status) - mthca_warn(dev, "HW2SW_CQ returned status 0x%02x\n", status); if (0) { __be32 *ctx = mailbox->buf; diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index ee4d073c889..7e6a6d64ad4 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -159,6 +159,7 @@ struct mthca_limits { int reserved_eqs; int num_mpts; int num_mtt_segs; + int mtt_seg_size; int fmr_reserved_mtts; int reserved_mtts; int reserved_mrws; @@ -202,6 +203,7 @@ struct mthca_pd_table { struct mthca_buddy { unsigned long **bits; + int *num_free; int max_order; spinlock_t lock; }; @@ -355,6 +357,7 @@ struct mthca_dev { struct ib_ah *sm_ah[MTHCA_MAX_PORTS]; spinlock_t sm_lock; u8 rate[MTHCA_MAX_PORTS]; + bool active; }; #ifdef CONFIG_INFINIBAND_MTHCA_DEBUG diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index 4e36aa7cb3d..69020173899 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -34,6 +34,7 @@ #include <linux/errno.h> #include <linux/interrupt.h> #include <linux/pci.h> +#include <linux/slab.h> #include "mthca_dev.h" #include "mthca_cmd.h" @@ -356,7 +357,7 @@ static int mthca_eq_int(struct mthca_dev *dev, struct mthca_eq *eq) mthca_warn(dev, "Unhandled event %02x(%02x) on EQ %d\n", eqe->type, eqe->subtype, eq->eqn); break; - }; + } set_eqe_hw(eqe); ++eq->cons_index; @@ -473,7 +474,6 @@ static int mthca_create_eq(struct mthca_dev *dev, struct mthca_eq_context *eq_context; int err = -ENOMEM; int i; - u8 status; eq->dev = dev; eq->nent = roundup_pow_of_two(max(nent, 2)); @@ -503,7 +503,7 @@ static int mthca_create_eq(struct mthca_dev *dev, goto err_out_free_pages; dma_list[i] = t; - pci_unmap_addr_set(&eq->page_list[i], mapping, t); + dma_unmap_addr_set(&eq->page_list[i], mapping, t); clear_page(eq->page_list[i].buf); } @@ -542,15 +542,9 @@ static int mthca_create_eq(struct mthca_dev *dev, eq_context->intr = intr; eq_context->lkey = cpu_to_be32(eq->mr.ibmr.lkey); - err = mthca_SW2HW_EQ(dev, mailbox, eq->eqn, &status); + err = mthca_SW2HW_EQ(dev, mailbox, eq->eqn); if (err) { - mthca_warn(dev, "SW2HW_EQ failed (%d)\n", err); - goto err_out_free_mr; - } - if (status) { - mthca_warn(dev, "SW2HW_EQ returned status 0x%02x\n", - status); - err = -EINVAL; + mthca_warn(dev, "SW2HW_EQ returned %d\n", err); goto err_out_free_mr; } @@ -578,7 +572,7 @@ static int mthca_create_eq(struct mthca_dev *dev, if (eq->page_list[i].buf) dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, eq->page_list[i].buf, - pci_unmap_addr(&eq->page_list[i], + dma_unmap_addr(&eq->page_list[i], mapping)); mthca_free_mailbox(dev, mailbox); @@ -596,7 +590,6 @@ static void mthca_free_eq(struct mthca_dev *dev, { struct mthca_mailbox *mailbox; int err; - u8 status; int npages = (eq->nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) / PAGE_SIZE; int i; @@ -605,11 +598,9 @@ static void mthca_free_eq(struct mthca_dev *dev, if (IS_ERR(mailbox)) return; - err = mthca_HW2SW_EQ(dev, mailbox, eq->eqn, &status); + err = mthca_HW2SW_EQ(dev, mailbox, eq->eqn); if (err) - mthca_warn(dev, "HW2SW_EQ failed (%d)\n", err); - if (status) - mthca_warn(dev, "HW2SW_EQ returned status 0x%02x\n", status); + mthca_warn(dev, "HW2SW_EQ returned %d\n", err); dev->eq_table.arm_mask &= ~eq->eqn_mask; @@ -628,7 +619,7 @@ static void mthca_free_eq(struct mthca_dev *dev, for (i = 0; i < npages; ++i) pci_free_consistent(dev->pdev, PAGE_SIZE, eq->page_list[i].buf, - pci_unmap_addr(&eq->page_list[i], mapping)); + dma_unmap_addr(&eq->page_list[i], mapping)); kfree(eq->page_list); mthca_free_mailbox(dev, mailbox); @@ -641,38 +632,26 @@ static void mthca_free_irqs(struct mthca_dev *dev) if (dev->eq_table.have_irq) free_irq(dev->pdev->irq, dev); for (i = 0; i < MTHCA_NUM_EQ; ++i) - if (dev->eq_table.eq[i].have_irq) + if (dev->eq_table.eq[i].have_irq) { free_irq(dev->eq_table.eq[i].msi_x_vector, dev->eq_table.eq + i); + dev->eq_table.eq[i].have_irq = 0; + } } static int mthca_map_reg(struct mthca_dev *dev, unsigned long offset, unsigned long size, void __iomem **map) { - unsigned long base = pci_resource_start(dev->pdev, 0); - - if (!request_mem_region(base + offset, size, DRV_NAME)) - return -EBUSY; + phys_addr_t base = pci_resource_start(dev->pdev, 0); *map = ioremap(base + offset, size); - if (!*map) { - release_mem_region(base + offset, size); + if (!*map) return -ENOMEM; - } return 0; } -static void mthca_unmap_reg(struct mthca_dev *dev, unsigned long offset, - unsigned long size, void __iomem *map) -{ - unsigned long base = pci_resource_start(dev->pdev, 0); - - release_mem_region(base + offset, size); - iounmap(map); -} - static int mthca_map_eq_regs(struct mthca_dev *dev) { if (mthca_is_memfree(dev)) { @@ -699,9 +678,7 @@ static int mthca_map_eq_regs(struct mthca_dev *dev) dev->fw.arbel.eq_arm_base) + 4, 4, &dev->eq_regs.arbel.eq_arm)) { mthca_err(dev, "Couldn't map EQ arm register, aborting.\n"); - mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) & - dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE, - dev->clr_base); + iounmap(dev->clr_base); return -ENOMEM; } @@ -710,12 +687,8 @@ static int mthca_map_eq_regs(struct mthca_dev *dev) MTHCA_EQ_SET_CI_SIZE, &dev->eq_regs.arbel.eq_set_ci_base)) { mthca_err(dev, "Couldn't map EQ CI register, aborting.\n"); - mthca_unmap_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) & - dev->fw.arbel.eq_arm_base) + 4, 4, - dev->eq_regs.arbel.eq_arm); - mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) & - dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE, - dev->clr_base); + iounmap(dev->eq_regs.arbel.eq_arm); + iounmap(dev->clr_base); return -ENOMEM; } } else { @@ -731,8 +704,7 @@ static int mthca_map_eq_regs(struct mthca_dev *dev) &dev->eq_regs.tavor.ecr_base)) { mthca_err(dev, "Couldn't map ecr register, " "aborting.\n"); - mthca_unmap_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE, - dev->clr_base); + iounmap(dev->clr_base); return -ENOMEM; } } @@ -744,29 +716,18 @@ static int mthca_map_eq_regs(struct mthca_dev *dev) static void mthca_unmap_eq_regs(struct mthca_dev *dev) { if (mthca_is_memfree(dev)) { - mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) & - dev->fw.arbel.eq_set_ci_base, - MTHCA_EQ_SET_CI_SIZE, - dev->eq_regs.arbel.eq_set_ci_base); - mthca_unmap_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) & - dev->fw.arbel.eq_arm_base) + 4, 4, - dev->eq_regs.arbel.eq_arm); - mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) & - dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE, - dev->clr_base); + iounmap(dev->eq_regs.arbel.eq_set_ci_base); + iounmap(dev->eq_regs.arbel.eq_arm); + iounmap(dev->clr_base); } else { - mthca_unmap_reg(dev, MTHCA_ECR_BASE, - MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE, - dev->eq_regs.tavor.ecr_base); - mthca_unmap_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE, - dev->clr_base); + iounmap(dev->eq_regs.tavor.ecr_base); + iounmap(dev->clr_base); } } int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt) { int ret; - u8 status; /* * We assume that mapping one page is enough for the whole EQ @@ -780,14 +741,12 @@ int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt) return -ENOMEM; dev->eq_table.icm_dma = pci_map_page(dev->pdev, dev->eq_table.icm_page, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - if (pci_dma_mapping_error(dev->eq_table.icm_dma)) { + if (pci_dma_mapping_error(dev->pdev, dev->eq_table.icm_dma)) { __free_page(dev->eq_table.icm_page); return -ENOMEM; } - ret = mthca_MAP_ICM_page(dev, dev->eq_table.icm_dma, icm_virt, &status); - if (!ret && status) - ret = -EINVAL; + ret = mthca_MAP_ICM_page(dev, dev->eq_table.icm_dma, icm_virt); if (ret) { pci_unmap_page(dev->pdev, dev->eq_table.icm_dma, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); @@ -799,9 +758,7 @@ int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt) void mthca_unmap_eq_icm(struct mthca_dev *dev) { - u8 status; - - mthca_UNMAP_ICM(dev, dev->eq_table.icm_virt, 1, &status); + mthca_UNMAP_ICM(dev, dev->eq_table.icm_virt, 1); pci_unmap_page(dev->pdev, dev->eq_table.icm_dma, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); __free_page(dev->eq_table.icm_page); @@ -810,7 +767,6 @@ void mthca_unmap_eq_icm(struct mthca_dev *dev) int mthca_init_eq_table(struct mthca_dev *dev) { int err; - u8 status; u8 intr; int i; @@ -858,49 +814,50 @@ int mthca_init_eq_table(struct mthca_dev *dev) if (dev->mthca_flags & MTHCA_FLAG_MSI_X) { static const char *eq_name[] = { - [MTHCA_EQ_COMP] = DRV_NAME " (comp)", - [MTHCA_EQ_ASYNC] = DRV_NAME " (async)", - [MTHCA_EQ_CMD] = DRV_NAME " (cmd)" + [MTHCA_EQ_COMP] = DRV_NAME "-comp", + [MTHCA_EQ_ASYNC] = DRV_NAME "-async", + [MTHCA_EQ_CMD] = DRV_NAME "-cmd" }; for (i = 0; i < MTHCA_NUM_EQ; ++i) { + snprintf(dev->eq_table.eq[i].irq_name, + IB_DEVICE_NAME_MAX, + "%s@pci:%s", eq_name[i], + pci_name(dev->pdev)); err = request_irq(dev->eq_table.eq[i].msi_x_vector, mthca_is_memfree(dev) ? mthca_arbel_msi_x_interrupt : mthca_tavor_msi_x_interrupt, - 0, eq_name[i], dev->eq_table.eq + i); + 0, dev->eq_table.eq[i].irq_name, + dev->eq_table.eq + i); if (err) goto err_out_cmd; dev->eq_table.eq[i].have_irq = 1; } } else { + snprintf(dev->eq_table.eq[0].irq_name, IB_DEVICE_NAME_MAX, + DRV_NAME "@pci:%s", pci_name(dev->pdev)); err = request_irq(dev->pdev->irq, mthca_is_memfree(dev) ? mthca_arbel_interrupt : mthca_tavor_interrupt, - IRQF_SHARED, DRV_NAME, dev); + IRQF_SHARED, dev->eq_table.eq[0].irq_name, dev); if (err) goto err_out_cmd; dev->eq_table.have_irq = 1; } err = mthca_MAP_EQ(dev, async_mask(dev), - 0, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, &status); + 0, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn); if (err) mthca_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n", dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, err); - if (status) - mthca_warn(dev, "MAP_EQ for async EQ %d returned status 0x%02x\n", - dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, status); err = mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK, - 0, dev->eq_table.eq[MTHCA_EQ_CMD].eqn, &status); + 0, dev->eq_table.eq[MTHCA_EQ_CMD].eqn); if (err) mthca_warn(dev, "MAP_EQ for cmd EQ %d failed (%d)\n", dev->eq_table.eq[MTHCA_EQ_CMD].eqn, err); - if (status) - mthca_warn(dev, "MAP_EQ for cmd EQ %d returned status 0x%02x\n", - dev->eq_table.eq[MTHCA_EQ_CMD].eqn, status); for (i = 0; i < MTHCA_NUM_EQ; ++i) if (mthca_is_memfree(dev)) @@ -930,15 +887,14 @@ err_out_free: void mthca_cleanup_eq_table(struct mthca_dev *dev) { - u8 status; int i; mthca_free_irqs(dev); mthca_MAP_EQ(dev, async_mask(dev), - 1, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn, &status); + 1, dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn); mthca_MAP_EQ(dev, MTHCA_CMD_EVENT_MASK, - 1, dev->eq_table.eq[MTHCA_EQ_CMD].eqn, &status); + 1, dev->eq_table.eq[MTHCA_EQ_CMD].eqn); for (i = 0; i < MTHCA_NUM_EQ; ++i) mthca_free_eq(dev, &dev->eq_table.eq[i]); diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 640449582ab..b6f7f457fc5 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -104,7 +104,8 @@ static void update_sm_ah(struct mthca_dev *dev, */ static void smp_snoop(struct ib_device *ibdev, u8 port_num, - struct ib_mad *mad) + struct ib_mad *mad, + u16 prev_lid) { struct ib_event event; @@ -114,6 +115,7 @@ static void smp_snoop(struct ib_device *ibdev, if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) { struct ib_port_info *pinfo = (struct ib_port_info *) ((struct ib_smp *) mad)->data; + u16 lid = be16_to_cpu(pinfo->lid); mthca_update_rate(to_mdev(ibdev), port_num); update_sm_ah(to_mdev(ibdev), port_num, @@ -123,12 +125,15 @@ static void smp_snoop(struct ib_device *ibdev, event.device = ibdev; event.element.port_num = port_num; - if (pinfo->clientrereg_resv_subnetto & 0x80) + if (pinfo->clientrereg_resv_subnetto & 0x80) { event.event = IB_EVENT_CLIENT_REREGISTER; - else - event.event = IB_EVENT_LID_CHANGE; + ib_dispatch_event(&event); + } - ib_dispatch_event(&event); + if (prev_lid != lid) { + event.event = IB_EVENT_LID_CHANGE; + ib_dispatch_event(&event); + } } if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PKEY_TABLE) { @@ -166,6 +171,8 @@ static void forward_trap(struct mthca_dev *dev, if (agent) { send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, GFP_ATOMIC); + if (IS_ERR(send_buf)) + return; /* * We rely here on the fact that MLX QPs don't use the * address handle after the send is posted (this is @@ -194,8 +201,9 @@ int mthca_process_mad(struct ib_device *ibdev, struct ib_mad *out_mad) { int err; - u8 status; u16 slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); + u16 prev_lid = 0; + struct ib_port_attr pattr; /* Forward locally generated traps to the SM */ if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && @@ -233,26 +241,26 @@ int mthca_process_mad(struct ib_device *ibdev, return IB_MAD_RESULT_SUCCESS; } else return IB_MAD_RESULT_SUCCESS; + if ((in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || + in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && + in_mad->mad_hdr.method == IB_MGMT_METHOD_SET && + in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO && + !ib_query_port(ibdev, port_num, &pattr)) + prev_lid = pattr.lid; err = mthca_MAD_IFC(to_mdev(ibdev), mad_flags & IB_MAD_IGNORE_MKEY, mad_flags & IB_MAD_IGNORE_BKEY, - port_num, in_wc, in_grh, in_mad, out_mad, - &status); - if (err) { - mthca_err(to_mdev(ibdev), "MAD_IFC failed\n"); - return IB_MAD_RESULT_FAILURE; - } - if (status == MTHCA_CMD_STAT_BAD_PKT) + port_num, in_wc, in_grh, in_mad, out_mad); + if (err == -EBADMSG) return IB_MAD_RESULT_SUCCESS; - if (status) { - mthca_err(to_mdev(ibdev), "MAD_IFC returned status %02x\n", - status); + else if (err) { + mthca_err(to_mdev(ibdev), "MAD_IFC returned %d\n", err); return IB_MAD_RESULT_FAILURE; } if (!out_mad->mad_hdr.status) { - smp_snoop(ibdev, port_num, in_mad); + smp_snoop(ibdev, port_num, in_mad, prev_lid); node_desc_override(ibdev, out_mad); } diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index fb9f91b60f3..ded76c101dd 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -37,6 +37,7 @@ #include <linux/errno.h> #include <linux/pci.h> #include <linux/interrupt.h> +#include <linux/gfp.h> #include "mthca_dev.h" #include "mthca_config_reg.h" @@ -125,7 +126,11 @@ module_param_named(fmr_reserved_mtts, hca_profile.fmr_reserved_mtts, int, 0444); MODULE_PARM_DESC(fmr_reserved_mtts, "number of memory translation table segments reserved for FMR"); -static char mthca_version[] __devinitdata = +static int log_mtts_per_seg = ilog2(MTHCA_MTT_SEG_SIZE / 8); +module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444); +MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-5)"); + +static char mthca_version[] = DRV_NAME ": Mellanox InfiniBand HCA driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; @@ -144,7 +149,7 @@ static int mthca_tune_pci(struct mthca_dev *mdev) } else if (!(mdev->mthca_flags & MTHCA_FLAG_PCIE)) mthca_info(mdev, "No PCI-X capability, not setting RBC.\n"); - if (pci_find_capability(mdev->pdev, PCI_CAP_ID_EXP)) { + if (pci_is_pcie(mdev->pdev)) { if (pcie_set_readrq(mdev->pdev, 4096)) { mthca_err(mdev, "Couldn't write PCI Express read request, " "aborting.\n"); @@ -160,18 +165,14 @@ static int mthca_tune_pci(struct mthca_dev *mdev) static int mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim *dev_lim) { int err; - u8 status; - err = mthca_QUERY_DEV_LIM(mdev, dev_lim, &status); + mdev->limits.mtt_seg_size = (1 << log_mtts_per_seg) * 8; + err = mthca_QUERY_DEV_LIM(mdev, dev_lim); if (err) { - mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n"); + mthca_err(mdev, "QUERY_DEV_LIM command returned %d" + ", aborting.\n", err); return err; } - if (status) { - mthca_err(mdev, "QUERY_DEV_LIM returned status 0x%02x, " - "aborting.\n", status); - return -EINVAL; - } if (dev_lim->min_page_sz > PAGE_SIZE) { mthca_err(mdev, "HCA minimum page size of %d bigger than " "kernel PAGE_SIZE of %ld, aborting.\n", @@ -287,49 +288,32 @@ static int mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim *dev_lim) static int mthca_init_tavor(struct mthca_dev *mdev) { s64 size; - u8 status; int err; struct mthca_dev_lim dev_lim; struct mthca_profile profile; struct mthca_init_hca_param init_hca; - err = mthca_SYS_EN(mdev, &status); + err = mthca_SYS_EN(mdev); if (err) { - mthca_err(mdev, "SYS_EN command failed, aborting.\n"); + mthca_err(mdev, "SYS_EN command returned %d, aborting.\n", err); return err; } - if (status) { - mthca_err(mdev, "SYS_EN returned status 0x%02x, " - "aborting.\n", status); - return -EINVAL; - } - err = mthca_QUERY_FW(mdev, &status); + err = mthca_QUERY_FW(mdev); if (err) { - mthca_err(mdev, "QUERY_FW command failed, aborting.\n"); - goto err_disable; - } - if (status) { - mthca_err(mdev, "QUERY_FW returned status 0x%02x, " - "aborting.\n", status); - err = -EINVAL; + mthca_err(mdev, "QUERY_FW command returned %d," + " aborting.\n", err); goto err_disable; } - err = mthca_QUERY_DDR(mdev, &status); + err = mthca_QUERY_DDR(mdev); if (err) { - mthca_err(mdev, "QUERY_DDR command failed, aborting.\n"); - goto err_disable; - } - if (status) { - mthca_err(mdev, "QUERY_DDR returned status 0x%02x, " - "aborting.\n", status); - err = -EINVAL; + mthca_err(mdev, "QUERY_DDR command returned %d, aborting.\n", err); goto err_disable; } err = mthca_dev_lim(mdev, &dev_lim); if (err) { - mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n"); + mthca_err(mdev, "QUERY_DEV_LIM command returned %d, aborting.\n", err); goto err_disable; } @@ -345,29 +329,22 @@ static int mthca_init_tavor(struct mthca_dev *mdev) goto err_disable; } - err = mthca_INIT_HCA(mdev, &init_hca, &status); + err = mthca_INIT_HCA(mdev, &init_hca); if (err) { - mthca_err(mdev, "INIT_HCA command failed, aborting.\n"); - goto err_disable; - } - if (status) { - mthca_err(mdev, "INIT_HCA returned status 0x%02x, " - "aborting.\n", status); - err = -EINVAL; + mthca_err(mdev, "INIT_HCA command returned %d, aborting.\n", err); goto err_disable; } return 0; err_disable: - mthca_SYS_DIS(mdev, &status); + mthca_SYS_DIS(mdev); return err; } static int mthca_load_fw(struct mthca_dev *mdev) { - u8 status; int err; /* FIXME: use HCA-attached memory for FW if present */ @@ -380,31 +357,21 @@ static int mthca_load_fw(struct mthca_dev *mdev) return -ENOMEM; } - err = mthca_MAP_FA(mdev, mdev->fw.arbel.fw_icm, &status); + err = mthca_MAP_FA(mdev, mdev->fw.arbel.fw_icm); if (err) { - mthca_err(mdev, "MAP_FA command failed, aborting.\n"); - goto err_free; - } - if (status) { - mthca_err(mdev, "MAP_FA returned status 0x%02x, aborting.\n", status); - err = -EINVAL; + mthca_err(mdev, "MAP_FA command returned %d, aborting.\n", err); goto err_free; } - err = mthca_RUN_FW(mdev, &status); + err = mthca_RUN_FW(mdev); if (err) { - mthca_err(mdev, "RUN_FW command failed, aborting.\n"); - goto err_unmap_fa; - } - if (status) { - mthca_err(mdev, "RUN_FW returned status 0x%02x, aborting.\n", status); - err = -EINVAL; + mthca_err(mdev, "RUN_FW command returned %d, aborting.\n", err); goto err_unmap_fa; } return 0; err_unmap_fa: - mthca_UNMAP_FA(mdev, &status); + mthca_UNMAP_FA(mdev); err_free: mthca_free_icm(mdev, mdev->fw.arbel.fw_icm, 0); @@ -417,19 +384,13 @@ static int mthca_init_icm(struct mthca_dev *mdev, u64 icm_size) { u64 aux_pages; - u8 status; int err; - err = mthca_SET_ICM_SIZE(mdev, icm_size, &aux_pages, &status); + err = mthca_SET_ICM_SIZE(mdev, icm_size, &aux_pages); if (err) { - mthca_err(mdev, "SET_ICM_SIZE command failed, aborting.\n"); + mthca_err(mdev, "SET_ICM_SIZE command returned %d, aborting.\n", err); return err; } - if (status) { - mthca_err(mdev, "SET_ICM_SIZE returned status 0x%02x, " - "aborting.\n", status); - return -EINVAL; - } mthca_dbg(mdev, "%lld KB of HCA context requires %lld KB aux memory.\n", (unsigned long long) icm_size >> 10, @@ -442,14 +403,9 @@ static int mthca_init_icm(struct mthca_dev *mdev, return -ENOMEM; } - err = mthca_MAP_ICM_AUX(mdev, mdev->fw.arbel.aux_icm, &status); + err = mthca_MAP_ICM_AUX(mdev, mdev->fw.arbel.aux_icm); if (err) { - mthca_err(mdev, "MAP_ICM_AUX command failed, aborting.\n"); - goto err_free_aux; - } - if (status) { - mthca_err(mdev, "MAP_ICM_AUX returned status 0x%02x, aborting.\n", status); - err = -EINVAL; + mthca_err(mdev, "MAP_ICM_AUX returned %d, aborting.\n", err); goto err_free_aux; } @@ -460,11 +416,11 @@ static int mthca_init_icm(struct mthca_dev *mdev, } /* CPU writes to non-reserved MTTs, while HCA might DMA to reserved mtts */ - mdev->limits.reserved_mtts = ALIGN(mdev->limits.reserved_mtts * MTHCA_MTT_SEG_SIZE, - dma_get_cache_alignment()) / MTHCA_MTT_SEG_SIZE; + mdev->limits.reserved_mtts = ALIGN(mdev->limits.reserved_mtts * mdev->limits.mtt_seg_size, + dma_get_cache_alignment()) / mdev->limits.mtt_seg_size; mdev->mr_table.mtt_table = mthca_alloc_icm_table(mdev, init_hca->mtt_base, - MTHCA_MTT_SEG_SIZE, + mdev->limits.mtt_seg_size, mdev->limits.num_mtt_segs, mdev->limits.reserved_mtts, 1, 0); @@ -590,7 +546,7 @@ err_unmap_eq: mthca_unmap_eq_icm(mdev); err_unmap_aux: - mthca_UNMAP_ICM_AUX(mdev, &status); + mthca_UNMAP_ICM_AUX(mdev); err_free_aux: mthca_free_icm(mdev, mdev->fw.arbel.aux_icm, 0); @@ -600,7 +556,6 @@ err_free_aux: static void mthca_free_icms(struct mthca_dev *mdev) { - u8 status; mthca_free_icm_table(mdev, mdev->mcg_table.table); if (mdev->mthca_flags & MTHCA_FLAG_SRQ) @@ -613,7 +568,7 @@ static void mthca_free_icms(struct mthca_dev *mdev) mthca_free_icm_table(mdev, mdev->mr_table.mtt_table); mthca_unmap_eq_icm(mdev); - mthca_UNMAP_ICM_AUX(mdev, &status); + mthca_UNMAP_ICM_AUX(mdev); mthca_free_icm(mdev, mdev->fw.arbel.aux_icm, 0); } @@ -623,43 +578,32 @@ static int mthca_init_arbel(struct mthca_dev *mdev) struct mthca_profile profile; struct mthca_init_hca_param init_hca; s64 icm_size; - u8 status; int err; - err = mthca_QUERY_FW(mdev, &status); + err = mthca_QUERY_FW(mdev); if (err) { - mthca_err(mdev, "QUERY_FW command failed, aborting.\n"); + mthca_err(mdev, "QUERY_FW command failed %d, aborting.\n", err); return err; } - if (status) { - mthca_err(mdev, "QUERY_FW returned status 0x%02x, " - "aborting.\n", status); - return -EINVAL; - } - err = mthca_ENABLE_LAM(mdev, &status); - if (err) { - mthca_err(mdev, "ENABLE_LAM command failed, aborting.\n"); - return err; - } - if (status == MTHCA_CMD_STAT_LAM_NOT_PRE) { + err = mthca_ENABLE_LAM(mdev); + if (err == -EAGAIN) { mthca_dbg(mdev, "No HCA-attached memory (running in MemFree mode)\n"); mdev->mthca_flags |= MTHCA_FLAG_NO_LAM; - } else if (status) { - mthca_err(mdev, "ENABLE_LAM returned status 0x%02x, " - "aborting.\n", status); - return -EINVAL; + } else if (err) { + mthca_err(mdev, "ENABLE_LAM returned %d, aborting.\n", err); + return err; } err = mthca_load_fw(mdev); if (err) { - mthca_err(mdev, "Failed to start FW, aborting.\n"); + mthca_err(mdev, "Loading FW returned %d, aborting.\n", err); goto err_disable; } err = mthca_dev_lim(mdev, &dev_lim); if (err) { - mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n"); + mthca_err(mdev, "QUERY_DEV_LIM returned %d, aborting.\n", err); goto err_stop_fw; } @@ -679,15 +623,9 @@ static int mthca_init_arbel(struct mthca_dev *mdev) if (err) goto err_stop_fw; - err = mthca_INIT_HCA(mdev, &init_hca, &status); + err = mthca_INIT_HCA(mdev, &init_hca); if (err) { - mthca_err(mdev, "INIT_HCA command failed, aborting.\n"); - goto err_free_icm; - } - if (status) { - mthca_err(mdev, "INIT_HCA returned status 0x%02x, " - "aborting.\n", status); - err = -EINVAL; + mthca_err(mdev, "INIT_HCA command returned %d, aborting.\n", err); goto err_free_icm; } @@ -697,37 +635,34 @@ err_free_icm: mthca_free_icms(mdev); err_stop_fw: - mthca_UNMAP_FA(mdev, &status); + mthca_UNMAP_FA(mdev); mthca_free_icm(mdev, mdev->fw.arbel.fw_icm, 0); err_disable: if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM)) - mthca_DISABLE_LAM(mdev, &status); + mthca_DISABLE_LAM(mdev); return err; } static void mthca_close_hca(struct mthca_dev *mdev) { - u8 status; - - mthca_CLOSE_HCA(mdev, 0, &status); + mthca_CLOSE_HCA(mdev, 0); if (mthca_is_memfree(mdev)) { mthca_free_icms(mdev); - mthca_UNMAP_FA(mdev, &status); + mthca_UNMAP_FA(mdev); mthca_free_icm(mdev, mdev->fw.arbel.fw_icm, 0); if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM)) - mthca_DISABLE_LAM(mdev, &status); + mthca_DISABLE_LAM(mdev); } else - mthca_SYS_DIS(mdev, &status); + mthca_SYS_DIS(mdev); } static int mthca_init_hca(struct mthca_dev *mdev) { - u8 status; int err; struct mthca_adapter adapter; @@ -739,15 +674,9 @@ static int mthca_init_hca(struct mthca_dev *mdev) if (err) return err; - err = mthca_QUERY_ADAPTER(mdev, &adapter, &status); + err = mthca_QUERY_ADAPTER(mdev, &adapter); if (err) { - mthca_err(mdev, "QUERY_ADAPTER command failed, aborting.\n"); - goto err_close; - } - if (status) { - mthca_err(mdev, "QUERY_ADAPTER returned status 0x%02x, " - "aborting.\n", status); - err = -EINVAL; + mthca_err(mdev, "QUERY_ADAPTER command returned %d, aborting.\n", err); goto err_close; } @@ -766,7 +695,6 @@ err_close: static int mthca_setup_hca(struct mthca_dev *dev) { int err; - u8 status; MTHCA_INIT_DOORBELL_LOCK(&dev->doorbell_lock); @@ -784,7 +712,7 @@ static int mthca_setup_hca(struct mthca_dev *dev) goto err_uar_table_free; } - dev->kar = ioremap(dev->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE); + dev->kar = ioremap((phys_addr_t) dev->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE); if (!dev->kar) { mthca_err(dev, "Couldn't map kernel access region, " "aborting.\n"); @@ -827,8 +755,8 @@ static int mthca_setup_hca(struct mthca_dev *dev) goto err_eq_table_free; } - err = mthca_NOP(dev, &status); - if (err || status) { + err = mthca_NOP(dev); + if (err) { if (dev->mthca_flags & MTHCA_FLAG_MSI_X) { mthca_warn(dev, "NOP command failed to generate interrupt " "(IRQ %d).\n", @@ -921,58 +849,6 @@ err_uar_table_free: return err; } -static int mthca_request_regions(struct pci_dev *pdev, int ddr_hidden) -{ - int err; - - /* - * We can't just use pci_request_regions() because the MSI-X - * table is right in the middle of the first BAR. If we did - * pci_request_region and grab all of the first BAR, then - * setting up MSI-X would fail, since the PCI core wants to do - * request_mem_region on the MSI-X vector table. - * - * So just request what we need right now, and request any - * other regions we need when setting up EQs. - */ - if (!request_mem_region(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE, - MTHCA_HCR_SIZE, DRV_NAME)) - return -EBUSY; - - err = pci_request_region(pdev, 2, DRV_NAME); - if (err) - goto err_bar2_failed; - - if (!ddr_hidden) { - err = pci_request_region(pdev, 4, DRV_NAME); - if (err) - goto err_bar4_failed; - } - - return 0; - -err_bar4_failed: - pci_release_region(pdev, 2); - -err_bar2_failed: - release_mem_region(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE, - MTHCA_HCR_SIZE); - - return err; -} - -static void mthca_release_regions(struct pci_dev *pdev, - int ddr_hidden) -{ - if (!ddr_hidden) - pci_release_region(pdev, 4); - - pci_release_region(pdev, 2); - - release_mem_region(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE, - MTHCA_HCR_SIZE); -} - static int mthca_enable_msi_x(struct mthca_dev *mdev) { struct msix_entry entries[3]; @@ -982,13 +858,9 @@ static int mthca_enable_msi_x(struct mthca_dev *mdev) entries[1].entry = 1; entries[2].entry = 2; - err = pci_enable_msix(mdev->pdev, entries, ARRAY_SIZE(entries)); - if (err) { - if (err > 0) - mthca_info(mdev, "Only %d MSI-X vectors available, " - "not using MSI-X\n", err); + err = pci_enable_msix_exact(mdev->pdev, entries, ARRAY_SIZE(entries)); + if (err) return err; - } mdev->eq_table.eq[MTHCA_EQ_COMP ].msi_x_vector = entries[0].vector; mdev->eq_table.eq[MTHCA_EQ_ASYNC].msi_x_vector = entries[1].vector; @@ -1059,7 +931,7 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type) if (!(pci_resource_flags(pdev, 4) & IORESOURCE_MEM)) ddr_hidden = 1; - err = mthca_request_regions(pdev, ddr_hidden); + err = pci_request_regions(pdev, DRV_NAME); if (err) { dev_err(&pdev->dev, "Cannot obtain PCI resources, " "aborting.\n"); @@ -1068,20 +940,20 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type) pci_set_master(pdev); - err = pci_set_dma_mask(pdev, DMA_64BIT_MASK); + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); if (err) { dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n"); - err = pci_set_dma_mask(pdev, DMA_32BIT_MASK); + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) { dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n"); goto err_free_res; } } - err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK); + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); if (err) { dev_warn(&pdev->dev, "Warning: couldn't set 64-bit " "consistent PCI DMA mask.\n"); - err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK); + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) { dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, " "aborting.\n"); @@ -1089,6 +961,9 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type) } } + /* We can handle large RDMA requests, so allow larger segments. */ + dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024); + mdev = (struct mthca_dev *) ib_alloc_device(sizeof *mdev); if (!mdev) { dev_err(&pdev->dev, "Device struct alloc failed, " @@ -1163,6 +1038,8 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type) pci_set_drvdata(pdev, mdev); mdev->hca_type = hca_type; + mdev->active = true; + return 0; err_unregister: @@ -1196,7 +1073,7 @@ err_free_dev: ib_dealloc_device(&mdev->ib_dev); err_free_res: - mthca_release_regions(pdev, ddr_hidden); + pci_release_regions(pdev); err_disable_pdev: pci_disable_device(pdev); @@ -1207,7 +1084,6 @@ err_disable_pdev: static void __mthca_remove_one(struct pci_dev *pdev) { struct mthca_dev *mdev = pci_get_drvdata(pdev); - u8 status; int p; if (mdev) { @@ -1215,7 +1091,7 @@ static void __mthca_remove_one(struct pci_dev *pdev) mthca_unregister_device(mdev); for (p = 1; p <= mdev->limits.num_ports; ++p) - mthca_CLOSE_IB(mdev, p, &status); + mthca_CLOSE_IB(mdev, p); mthca_cleanup_mcg_table(mdev); mthca_cleanup_av_table(mdev); @@ -1240,8 +1116,7 @@ static void __mthca_remove_one(struct pci_dev *pdev) pci_disable_msix(pdev); ib_dealloc_device(&mdev->ib_dev); - mthca_release_regions(pdev, mdev->mthca_flags & - MTHCA_FLAG_DDR_HIDDEN); + pci_release_regions(pdev); pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); } @@ -1260,18 +1135,13 @@ int __mthca_restart_one(struct pci_dev *pdev) return __mthca_init_one(pdev, hca_type); } -static int __devinit mthca_init_one(struct pci_dev *pdev, - const struct pci_device_id *id) +static int mthca_init_one(struct pci_dev *pdev, const struct pci_device_id *id) { - static int mthca_version_printed = 0; int ret; mutex_lock(&mthca_device_mutex); - if (!mthca_version_printed) { - printk(KERN_INFO "%s", mthca_version); - ++mthca_version_printed; - } + printk_once(KERN_INFO "%s", mthca_version); if (id->driver_data >= ARRAY_SIZE(mthca_hca_table)) { printk(KERN_ERR PFX "%s has invalid driver data %lx\n", @@ -1287,7 +1157,7 @@ static int __devinit mthca_init_one(struct pci_dev *pdev, return ret; } -static void __devexit mthca_remove_one(struct pci_dev *pdev) +static void mthca_remove_one(struct pci_dev *pdev) { mutex_lock(&mthca_device_mutex); __mthca_remove_one(pdev); @@ -1324,7 +1194,7 @@ static struct pci_driver mthca_driver = { .name = DRV_NAME, .id_table = mthca_pci_table, .probe = mthca_init_one, - .remove = __devexit_p(mthca_remove_one) + .remove = mthca_remove_one, }; static void __init __mthca_check_profile_val(const char *name, int *pval, @@ -1368,6 +1238,12 @@ static void __init mthca_validate_profile(void) printk(KERN_WARNING PFX "Corrected fmr_reserved_mtts to %d.\n", hca_profile.fmr_reserved_mtts); } + + if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 5)) { + printk(KERN_WARNING PFX "bad log_mtts_per_seg (%d). Using default - %d\n", + log_mtts_per_seg, ilog2(MTHCA_MTT_SEG_SIZE / 8)); + log_mtts_per_seg = ilog2(MTHCA_MTT_SEG_SIZE / 8); + } } static int __init mthca_init(void) diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c index 3f5f9487920..6304ae8f4a6 100644 --- a/drivers/infiniband/hw/mthca/mthca_mcg.c +++ b/drivers/infiniband/hw/mthca/mthca_mcg.c @@ -31,7 +31,7 @@ */ #include <linux/string.h> -#include <linux/slab.h> +#include <linux/gfp.h> #include "mthca_dev.h" #include "mthca_cmd.h" @@ -68,7 +68,6 @@ static int find_mgm(struct mthca_dev *dev, struct mthca_mgm *mgm = mgm_mailbox->buf; u8 *mgid; int err; - u8 status; mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); if (IS_ERR(mailbox)) @@ -77,38 +76,22 @@ static int find_mgm(struct mthca_dev *dev, memcpy(mgid, gid, 16); - err = mthca_MGID_HASH(dev, mailbox, hash, &status); - if (err) - goto out; - if (status) { - mthca_err(dev, "MGID_HASH returned status %02x\n", status); - err = -EINVAL; + err = mthca_MGID_HASH(dev, mailbox, hash); + if (err) { + mthca_err(dev, "MGID_HASH failed (%d)\n", err); goto out; } if (0) - mthca_dbg(dev, "Hash for %04x:%04x:%04x:%04x:" - "%04x:%04x:%04x:%04x is %04x\n", - be16_to_cpu(((__be16 *) gid)[0]), - be16_to_cpu(((__be16 *) gid)[1]), - be16_to_cpu(((__be16 *) gid)[2]), - be16_to_cpu(((__be16 *) gid)[3]), - be16_to_cpu(((__be16 *) gid)[4]), - be16_to_cpu(((__be16 *) gid)[5]), - be16_to_cpu(((__be16 *) gid)[6]), - be16_to_cpu(((__be16 *) gid)[7]), - *hash); + mthca_dbg(dev, "Hash for %pI6 is %04x\n", gid, *hash); *index = *hash; *prev = -1; do { - err = mthca_READ_MGM(dev, *index, mgm_mailbox, &status); - if (err) - goto out; - if (status) { - mthca_err(dev, "READ_MGM returned status %02x\n", status); - err = -EINVAL; + err = mthca_READ_MGM(dev, *index, mgm_mailbox); + if (err) { + mthca_err(dev, "READ_MGM failed (%d)\n", err); goto out; } @@ -144,7 +127,6 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) int link = 0; int i; int err; - u8 status; mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); if (IS_ERR(mailbox)) @@ -170,12 +152,9 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) goto out; } - err = mthca_READ_MGM(dev, index, mailbox, &status); - if (err) - goto out; - if (status) { - mthca_err(dev, "READ_MGM returned status %02x\n", status); - err = -EINVAL; + err = mthca_READ_MGM(dev, index, mailbox); + if (err) { + mthca_err(dev, "READ_MGM failed (%d)\n", err); goto out; } memset(mgm, 0, sizeof *mgm); @@ -199,11 +178,9 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) goto out; } - err = mthca_WRITE_MGM(dev, index, mailbox, &status); - if (err) - goto out; - if (status) { - mthca_err(dev, "WRITE_MGM returned status %02x\n", status); + err = mthca_WRITE_MGM(dev, index, mailbox); + if (err) { + mthca_err(dev, "WRITE_MGM failed %d\n", err); err = -EINVAL; goto out; } @@ -211,24 +188,17 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) if (!link) goto out; - err = mthca_READ_MGM(dev, prev, mailbox, &status); - if (err) - goto out; - if (status) { - mthca_err(dev, "READ_MGM returned status %02x\n", status); - err = -EINVAL; + err = mthca_READ_MGM(dev, prev, mailbox); + if (err) { + mthca_err(dev, "READ_MGM failed %d\n", err); goto out; } mgm->next_gid_index = cpu_to_be32(index << 6); - err = mthca_WRITE_MGM(dev, prev, mailbox, &status); + err = mthca_WRITE_MGM(dev, prev, mailbox); if (err) - goto out; - if (status) { - mthca_err(dev, "WRITE_MGM returned status %02x\n", status); - err = -EINVAL; - } + mthca_err(dev, "WRITE_MGM returned %d\n", err); out: if (err && link && index != -1) { @@ -250,7 +220,6 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) int prev, index; int i, loc; int err; - u8 status; mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); if (IS_ERR(mailbox)) @@ -264,16 +233,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) goto out; if (index == -1) { - mthca_err(dev, "MGID %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x " - "not found\n", - be16_to_cpu(((__be16 *) gid->raw)[0]), - be16_to_cpu(((__be16 *) gid->raw)[1]), - be16_to_cpu(((__be16 *) gid->raw)[2]), - be16_to_cpu(((__be16 *) gid->raw)[3]), - be16_to_cpu(((__be16 *) gid->raw)[4]), - be16_to_cpu(((__be16 *) gid->raw)[5]), - be16_to_cpu(((__be16 *) gid->raw)[6]), - be16_to_cpu(((__be16 *) gid->raw)[7])); + mthca_err(dev, "MGID %pI6 not found\n", gid->raw); err = -EINVAL; goto out; } @@ -294,12 +254,9 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) mgm->qp[loc] = mgm->qp[i - 1]; mgm->qp[i - 1] = 0; - err = mthca_WRITE_MGM(dev, index, mailbox, &status); - if (err) - goto out; - if (status) { - mthca_err(dev, "WRITE_MGM returned status %02x\n", status); - err = -EINVAL; + err = mthca_WRITE_MGM(dev, index, mailbox); + if (err) { + mthca_err(dev, "WRITE_MGM returned %d\n", err); goto out; } @@ -311,24 +268,17 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) int amgm_index_to_free = be32_to_cpu(mgm->next_gid_index) >> 6; if (amgm_index_to_free) { err = mthca_READ_MGM(dev, amgm_index_to_free, - mailbox, &status); - if (err) - goto out; - if (status) { - mthca_err(dev, "READ_MGM returned status %02x\n", - status); - err = -EINVAL; + mailbox); + if (err) { + mthca_err(dev, "READ_MGM returned %d\n", err); goto out; } } else memset(mgm->gid, 0, 16); - err = mthca_WRITE_MGM(dev, index, mailbox, &status); - if (err) - goto out; - if (status) { - mthca_err(dev, "WRITE_MGM returned status %02x\n", status); - err = -EINVAL; + err = mthca_WRITE_MGM(dev, index, mailbox); + if (err) { + mthca_err(dev, "WRITE_MGM returned %d\n", err); goto out; } if (amgm_index_to_free) { @@ -338,23 +288,17 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) } else { /* Remove entry from AMGM */ int curr_next_index = be32_to_cpu(mgm->next_gid_index) >> 6; - err = mthca_READ_MGM(dev, prev, mailbox, &status); - if (err) - goto out; - if (status) { - mthca_err(dev, "READ_MGM returned status %02x\n", status); - err = -EINVAL; + err = mthca_READ_MGM(dev, prev, mailbox); + if (err) { + mthca_err(dev, "READ_MGM returned %d\n", err); goto out; } mgm->next_gid_index = cpu_to_be32(curr_next_index << 6); - err = mthca_WRITE_MGM(dev, prev, mailbox, &status); - if (err) - goto out; - if (status) { - mthca_err(dev, "WRITE_MGM returned status %02x\n", status); - err = -EINVAL; + err = mthca_WRITE_MGM(dev, prev, mailbox); + if (err) { + mthca_err(dev, "WRITE_MGM returned %d\n", err); goto out; } BUG_ON(index < dev->limits.num_mgms); diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index 1f7d1a29d2a..7d2e42dd692 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -35,6 +35,7 @@ #include <linux/mm.h> #include <linux/scatterlist.h> #include <linux/sched.h> +#include <linux/slab.h> #include <asm/page.h> @@ -222,7 +223,6 @@ int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int ob { int i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE; int ret = 0; - u8 status; mutex_lock(&table->mutex); @@ -239,8 +239,8 @@ int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int ob goto out; } - if (mthca_MAP_ICM(dev, table->icm[i], table->virt + i * MTHCA_TABLE_CHUNK_SIZE, - &status) || status) { + if (mthca_MAP_ICM(dev, table->icm[i], + table->virt + i * MTHCA_TABLE_CHUNK_SIZE)) { mthca_free_icm(dev, table->icm[i], table->coherent); table->icm[i] = NULL; ret = -ENOMEM; @@ -257,7 +257,6 @@ out: void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj) { int i; - u8 status; if (!mthca_is_memfree(dev)) return; @@ -268,8 +267,7 @@ void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int o if (--table->icm[i]->refcount == 0) { mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE, - MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE, - &status); + MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE); mthca_free_icm(dev, table->icm[i], table->coherent); table->icm[i] = NULL; } @@ -365,7 +363,6 @@ struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev, int num_icm; unsigned chunk_size; int i; - u8 status; obj_per_chunk = MTHCA_TABLE_CHUNK_SIZE / obj_size; num_icm = DIV_ROUND_UP(nobj, obj_per_chunk); @@ -395,8 +392,8 @@ struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev, __GFP_NOWARN, use_coherent); if (!table->icm[i]) goto err; - if (mthca_MAP_ICM(dev, table->icm[i], virt + i * MTHCA_TABLE_CHUNK_SIZE, - &status) || status) { + if (mthca_MAP_ICM(dev, table->icm[i], + virt + i * MTHCA_TABLE_CHUNK_SIZE)) { mthca_free_icm(dev, table->icm[i], table->coherent); table->icm[i] = NULL; goto err; @@ -415,8 +412,7 @@ err: for (i = 0; i < num_icm; ++i) if (table->icm[i]) { mthca_UNMAP_ICM(dev, virt + i * MTHCA_TABLE_CHUNK_SIZE, - MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE, - &status); + MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE); mthca_free_icm(dev, table->icm[i], table->coherent); } @@ -428,13 +424,12 @@ err: void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table) { int i; - u8 status; for (i = 0; i < table->num_icm; ++i) if (table->icm[i]) { - mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE, - MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE, - &status); + mthca_UNMAP_ICM(dev, + table->virt + i * MTHCA_TABLE_CHUNK_SIZE, + MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE); mthca_free_icm(dev, table->icm[i], table->coherent); } @@ -453,7 +448,6 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar, { struct page *pages[1]; int ret = 0; - u8 status; int i; if (!mthca_is_memfree(dev)) @@ -493,9 +487,7 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar, } ret = mthca_MAP_ICM_page(dev, sg_dma_address(&db_tab->page[i].mem), - mthca_uarc_virt(dev, uar, i), &status); - if (!ret && status) - ret = -EINVAL; + mthca_uarc_virt(dev, uar, i)); if (ret) { pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); put_page(sg_page(&db_tab->page[i].mem)); @@ -556,14 +548,13 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar, struct mthca_user_db_table *db_tab) { int i; - u8 status; if (!mthca_is_memfree(dev)) return; for (i = 0; i < dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE; ++i) { if (db_tab->page[i].uvirt) { - mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1, &status); + mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1); pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); put_page(sg_page(&db_tab->page[i].mem)); } @@ -580,7 +571,6 @@ int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type, int i, j; struct mthca_db_page *page; int ret = 0; - u8 status; mutex_lock(&dev->db_tab->mutex); @@ -643,9 +633,7 @@ alloc: memset(page->db_rec, 0, MTHCA_ICM_PAGE_SIZE); ret = mthca_MAP_ICM_page(dev, page->mapping, - mthca_uarc_virt(dev, &dev->driver_uar, i), &status); - if (!ret && status) - ret = -EINVAL; + mthca_uarc_virt(dev, &dev->driver_uar, i)); if (ret) { dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE, page->db_rec, page->mapping); @@ -677,7 +665,6 @@ void mthca_free_db(struct mthca_dev *dev, int type, int db_index) { int i, j; struct mthca_db_page *page; - u8 status; i = db_index / MTHCA_DB_REC_PER_PAGE; j = db_index % MTHCA_DB_REC_PER_PAGE; @@ -693,7 +680,7 @@ void mthca_free_db(struct mthca_dev *dev, int type, int db_index) if (bitmap_empty(page->used, MTHCA_DB_REC_PER_PAGE) && i >= dev->db_tab->max_group1 - 1) { - mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status); + mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1); dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE, page->db_rec, page->mapping); @@ -744,7 +731,6 @@ int mthca_init_db_tab(struct mthca_dev *dev) void mthca_cleanup_db_tab(struct mthca_dev *dev) { int i; - u8 status; if (!mthca_is_memfree(dev)) return; @@ -762,7 +748,7 @@ void mthca_cleanup_db_tab(struct mthca_dev *dev) if (!bitmap_empty(dev->db_tab->page[i].used, MTHCA_DB_REC_PER_PAGE)) mthca_warn(dev, "Kernel UARC page %d not empty\n", i); - mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status); + mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1); dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE, dev->db_tab->page[i].db_rec, diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c index 8489b1e81c0..ed9a989e501 100644 --- a/drivers/infiniband/hw/mthca/mthca_mr.c +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -89,23 +89,26 @@ static u32 mthca_buddy_alloc(struct mthca_buddy *buddy, int order) spin_lock(&buddy->lock); - for (o = order; o <= buddy->max_order; ++o) { - m = 1 << (buddy->max_order - o); - seg = find_first_bit(buddy->bits[o], m); - if (seg < m) - goto found; - } + for (o = order; o <= buddy->max_order; ++o) + if (buddy->num_free[o]) { + m = 1 << (buddy->max_order - o); + seg = find_first_bit(buddy->bits[o], m); + if (seg < m) + goto found; + } spin_unlock(&buddy->lock); return -1; found: clear_bit(seg, buddy->bits[o]); + --buddy->num_free[o]; while (o > order) { --o; seg <<= 1; set_bit(seg ^ 1, buddy->bits[o]); + ++buddy->num_free[o]; } spin_unlock(&buddy->lock); @@ -123,11 +126,13 @@ static void mthca_buddy_free(struct mthca_buddy *buddy, u32 seg, int order) while (test_bit(seg ^ 1, buddy->bits[order])) { clear_bit(seg ^ 1, buddy->bits[order]); + --buddy->num_free[order]; seg >>= 1; ++order; } set_bit(seg, buddy->bits[order]); + ++buddy->num_free[order]; spin_unlock(&buddy->lock); } @@ -141,7 +146,9 @@ static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order) buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *), GFP_KERNEL); - if (!buddy->bits) + buddy->num_free = kcalloc((buddy->max_order + 1), sizeof *buddy->num_free, + GFP_KERNEL); + if (!buddy->bits || !buddy->num_free) goto err_out; for (i = 0; i <= buddy->max_order; ++i) { @@ -154,6 +161,7 @@ static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order) } set_bit(0, buddy->bits[buddy->max_order]); + buddy->num_free[buddy->max_order] = 1; return 0; @@ -161,9 +169,10 @@ err_out_free: for (i = 0; i <= buddy->max_order; ++i) kfree(buddy->bits[i]); +err_out: kfree(buddy->bits); + kfree(buddy->num_free); -err_out: return -ENOMEM; } @@ -175,6 +184,7 @@ static void mthca_buddy_cleanup(struct mthca_buddy *buddy) kfree(buddy->bits[i]); kfree(buddy->bits); + kfree(buddy->num_free); } static u32 mthca_alloc_mtt_range(struct mthca_dev *dev, int order, @@ -210,7 +220,7 @@ static struct mthca_mtt *__mthca_alloc_mtt(struct mthca_dev *dev, int size, mtt->buddy = buddy; mtt->order = 0; - for (i = MTHCA_MTT_SEG_SIZE / 8; i < size; i <<= 1) + for (i = dev->limits.mtt_seg_size / 8; i < size; i <<= 1) ++mtt->order; mtt->first_seg = mthca_alloc_mtt_range(dev, mtt->order, buddy); @@ -247,7 +257,6 @@ static int __mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt, struct mthca_mailbox *mailbox; __be64 *mtt_entry; int err = 0; - u8 status; int i; mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); @@ -257,7 +266,7 @@ static int __mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt, while (list_len > 0) { mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base + - mtt->first_seg * MTHCA_MTT_SEG_SIZE + + mtt->first_seg * dev->limits.mtt_seg_size + start_index * 8); mtt_entry[1] = 0; for (i = 0; i < list_len && i < MTHCA_MAILBOX_SIZE / 8 - 2; ++i) @@ -271,17 +280,11 @@ static int __mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt, if (i & 1) mtt_entry[i + 2] = 0; - err = mthca_WRITE_MTT(dev, mailbox, (i + 1) & ~1, &status); + err = mthca_WRITE_MTT(dev, mailbox, (i + 1) & ~1); if (err) { mthca_warn(dev, "WRITE_MTT failed (%d)\n", err); goto out; } - if (status) { - mthca_warn(dev, "WRITE_MTT returned status 0x%02x\n", - status); - err = -EINVAL; - goto out; - } list_len -= i; start_index += i; @@ -316,7 +319,7 @@ static void mthca_tavor_write_mtt_seg(struct mthca_dev *dev, u64 __iomem *mtts; int i; - mtts = dev->mr_table.tavor_fmr.mtt_base + mtt->first_seg * MTHCA_MTT_SEG_SIZE + + mtts = dev->mr_table.tavor_fmr.mtt_base + mtt->first_seg * dev->limits.mtt_seg_size + start_index * sizeof (u64); for (i = 0; i < list_len; ++i) mthca_write64_raw(cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT), @@ -335,17 +338,21 @@ static void mthca_arbel_write_mtt_seg(struct mthca_dev *dev, /* For Arbel, all MTTs must fit in the same page. */ BUG_ON(s / PAGE_SIZE != (s + list_len * sizeof(u64) - 1) / PAGE_SIZE); /* Require full segments */ - BUG_ON(s % MTHCA_MTT_SEG_SIZE); + BUG_ON(s % dev->limits.mtt_seg_size); mtts = mthca_table_find(dev->mr_table.mtt_table, mtt->first_seg + - s / MTHCA_MTT_SEG_SIZE, &dma_handle); + s / dev->limits.mtt_seg_size, &dma_handle); BUG_ON(!mtts); + dma_sync_single_for_cpu(&dev->pdev->dev, dma_handle, + list_len * sizeof (u64), DMA_TO_DEVICE); + for (i = 0; i < list_len; ++i) mtts[i] = cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT); - dma_sync_single(&dev->pdev->dev, dma_handle, list_len * sizeof (u64), DMA_TO_DEVICE); + dma_sync_single_for_device(&dev->pdev->dev, dma_handle, + list_len * sizeof (u64), DMA_TO_DEVICE); } int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt, @@ -427,7 +434,6 @@ int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift, u32 key; int i; int err; - u8 status; WARN_ON(buffer_size_shift >= 32); @@ -469,7 +475,7 @@ int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift, if (mr->mtt) mpt_entry->mtt_seg = cpu_to_be64(dev->mr_table.mtt_base + - mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE); + mr->mtt->first_seg * dev->limits.mtt_seg_size); if (0) { mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey); @@ -483,16 +489,10 @@ int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift, } err = mthca_SW2HW_MPT(dev, mailbox, - key & (dev->limits.num_mpts - 1), - &status); + key & (dev->limits.num_mpts - 1)); if (err) { mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err); goto err_out_mailbox; - } else if (status) { - mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n", - status); - err = -EINVAL; - goto err_out_mailbox; } mthca_free_mailbox(dev, mailbox); @@ -553,17 +553,12 @@ static void mthca_free_region(struct mthca_dev *dev, u32 lkey) void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr) { int err; - u8 status; err = mthca_HW2SW_MPT(dev, NULL, key_to_hw_index(dev, mr->ibmr.lkey) & - (dev->limits.num_mpts - 1), - &status); + (dev->limits.num_mpts - 1)); if (err) mthca_warn(dev, "HW2SW_MPT failed (%d)\n", err); - else if (status) - mthca_warn(dev, "HW2SW_MPT returned status 0x%02x\n", - status); mthca_free_region(dev, mr->ibmr.lkey); mthca_free_mtt(dev, mr->mtt); @@ -576,7 +571,6 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, struct mthca_mailbox *mailbox; u64 mtt_seg; u32 key, idx; - u8 status; int list_len = mr->attr.max_pages; int err = -ENOMEM; int i; @@ -616,7 +610,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, goto err_out_table; } - mtt_seg = mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE; + mtt_seg = mr->mtt->first_seg * dev->limits.mtt_seg_size; if (mthca_is_memfree(dev)) { mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table, @@ -658,18 +652,11 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, } err = mthca_SW2HW_MPT(dev, mailbox, - key & (dev->limits.num_mpts - 1), - &status); + key & (dev->limits.num_mpts - 1)); if (err) { mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err); goto err_out_mailbox_free; } - if (status) { - mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n", - status); - err = -EINVAL; - goto err_out_mailbox_free; - } mthca_free_mailbox(dev, mailbox); return 0; @@ -793,12 +780,15 @@ int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, wmb(); + dma_sync_single_for_cpu(&dev->pdev->dev, fmr->mem.arbel.dma_handle, + list_len * sizeof(u64), DMA_TO_DEVICE); + for (i = 0; i < list_len; ++i) fmr->mem.arbel.mtts[i] = cpu_to_be64(page_list[i] | MTHCA_MTT_FLAG_PRESENT); - dma_sync_single(&dev->pdev->dev, fmr->mem.arbel.dma_handle, - list_len * sizeof(u64), DMA_TO_DEVICE); + dma_sync_single_for_device(&dev->pdev->dev, fmr->mem.arbel.dma_handle, + list_len * sizeof(u64), DMA_TO_DEVICE); fmr->mem.arbel.mpt->key = cpu_to_be32(key); fmr->mem.arbel.mpt->lkey = cpu_to_be32(key); @@ -836,7 +826,7 @@ void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr) int mthca_init_mr_table(struct mthca_dev *dev) { - unsigned long addr; + phys_addr_t addr; int mpts, mtts, err, i; err = mthca_alloc_init(&dev->mr_table.mpt_alloc, @@ -898,7 +888,7 @@ int mthca_init_mr_table(struct mthca_dev *dev) dev->mr_table.mtt_base); dev->mr_table.tavor_fmr.mtt_base = - ioremap(addr, mtts * MTHCA_MTT_SEG_SIZE); + ioremap(addr, mtts * dev->limits.mtt_seg_size); if (!dev->mr_table.tavor_fmr.mtt_base) { mthca_warn(dev, "MTT ioremap for FMR failed.\n"); err = -ENOMEM; diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c index d168c254061..8edb28a9a0e 100644 --- a/drivers/infiniband/hw/mthca/mthca_profile.c +++ b/drivers/infiniband/hw/mthca/mthca_profile.c @@ -94,7 +94,7 @@ s64 mthca_make_profile(struct mthca_dev *dev, profile[MTHCA_RES_RDB].size = MTHCA_RDB_ENTRY_SIZE; profile[MTHCA_RES_MCG].size = MTHCA_MGM_ENTRY_SIZE; profile[MTHCA_RES_MPT].size = dev_lim->mpt_entry_sz; - profile[MTHCA_RES_MTT].size = MTHCA_MTT_SEG_SIZE; + profile[MTHCA_RES_MTT].size = dev->limits.mtt_seg_size; profile[MTHCA_RES_UAR].size = dev_lim->uar_scratch_entry_sz; profile[MTHCA_RES_UDAV].size = MTHCA_AV_SIZE; profile[MTHCA_RES_UARC].size = request->uarc_size; @@ -232,7 +232,7 @@ s64 mthca_make_profile(struct mthca_dev *dev, dev->limits.num_mtt_segs = profile[i].num; dev->mr_table.mtt_base = profile[i].start; init_hca->mtt_base = profile[i].start; - init_hca->mtt_seg_sz = ffs(MTHCA_MTT_SEG_SIZE) - 7; + init_hca->mtt_seg_sz = ffs(dev->limits.mtt_seg_size) - 7; break; case MTHCA_RES_UAR: dev->limits.num_uars = profile[i].num; diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 87ad889e367..415f8e1a54d 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -39,7 +39,10 @@ #include <rdma/ib_user_verbs.h> #include <linux/sched.h> +#include <linux/slab.h> +#include <linux/stat.h> #include <linux/mm.h> +#include <linux/export.h> #include "mthca_dev.h" #include "mthca_cmd.h" @@ -62,8 +65,6 @@ static int mthca_query_device(struct ib_device *ibdev, int err = -ENOMEM; struct mthca_dev *mdev = to_mdev(ibdev); - u8 status; - in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); if (!in_mad || !out_mad) @@ -77,14 +78,9 @@ static int mthca_query_device(struct ib_device *ibdev, in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; err = mthca_MAD_IFC(mdev, 1, 1, - 1, NULL, NULL, in_mad, out_mad, - &status); + 1, NULL, NULL, in_mad, out_mad); if (err) goto out; - if (status) { - err = -EINVAL; - goto out; - } props->device_cap_flags = mdev->device_cap_flags; props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & @@ -140,7 +136,6 @@ static int mthca_query_port(struct ib_device *ibdev, struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int err = -ENOMEM; - u8 status; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); @@ -154,14 +149,9 @@ static int mthca_query_port(struct ib_device *ibdev, in_mad->attr_mod = cpu_to_be32(port); err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1, - port, NULL, NULL, in_mad, out_mad, - &status); + port, NULL, NULL, in_mad, out_mad); if (err) goto out; - if (status) { - err = -EINVAL; - goto out; - } props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16)); props->lmc = out_mad->data[34] & 0x7; @@ -213,7 +203,6 @@ static int mthca_modify_port(struct ib_device *ibdev, struct mthca_set_ib_param set_ib; struct ib_port_attr attr; int err; - u8 status; if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex)) return -ERESTARTSYS; @@ -228,14 +217,9 @@ static int mthca_modify_port(struct ib_device *ibdev, set_ib.cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) & ~props->clr_port_cap_mask; - err = mthca_SET_IB(to_mdev(ibdev), &set_ib, port, &status); + err = mthca_SET_IB(to_mdev(ibdev), &set_ib, port); if (err) goto out; - if (status) { - err = -EINVAL; - goto out; - } - out: mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex); return err; @@ -247,7 +231,6 @@ static int mthca_query_pkey(struct ib_device *ibdev, struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int err = -ENOMEM; - u8 status; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); @@ -259,14 +242,9 @@ static int mthca_query_pkey(struct ib_device *ibdev, in_mad->attr_mod = cpu_to_be32(index / 32); err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1, - port, NULL, NULL, in_mad, out_mad, - &status); + port, NULL, NULL, in_mad, out_mad); if (err) goto out; - if (status) { - err = -EINVAL; - goto out; - } *pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]); @@ -282,7 +260,6 @@ static int mthca_query_gid(struct ib_device *ibdev, u8 port, struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int err = -ENOMEM; - u8 status; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); @@ -294,14 +271,9 @@ static int mthca_query_gid(struct ib_device *ibdev, u8 port, in_mad->attr_mod = cpu_to_be32(port); err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1, - port, NULL, NULL, in_mad, out_mad, - &status); + port, NULL, NULL, in_mad, out_mad); if (err) goto out; - if (status) { - err = -EINVAL; - goto out; - } memcpy(gid->raw, out_mad->data + 8, 8); @@ -310,14 +282,9 @@ static int mthca_query_gid(struct ib_device *ibdev, u8 port, in_mad->attr_mod = cpu_to_be32(index / 8); err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1, - port, NULL, NULL, in_mad, out_mad, - &status); + port, NULL, NULL, in_mad, out_mad); if (err) goto out; - if (status) { - err = -EINVAL; - goto out; - } memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8); @@ -334,6 +301,9 @@ static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev, struct mthca_ucontext *context; int err; + if (!(to_mdev(ibdev)->active)) + return ERR_PTR(-EAGAIN); + memset(&uresp, 0, sizeof uresp); uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps; @@ -470,6 +440,9 @@ static struct ib_srq *mthca_create_srq(struct ib_pd *pd, struct mthca_srq *srq; int err; + if (init_attr->srq_type != IB_SRQT_BASIC) + return ERR_PTR(-ENOSYS); + srq = kmalloc(sizeof *srq, GFP_KERNEL); if (!srq) return ERR_PTR(-ENOMEM); @@ -722,6 +695,7 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries, if (context && ib_copy_to_udata(udata, &cq->cqn, sizeof (__u32))) { mthca_free_cq(to_mdev(ibdev), cq); + err = -EFAULT; goto err_free; } @@ -796,7 +770,6 @@ static int mthca_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *uda struct mthca_cq *cq = to_mcq(ibcq); struct mthca_resize_cq ucmd; u32 lkey; - u8 status; int ret; if (entries < 1 || entries > dev->limits.max_cqes) @@ -823,9 +796,7 @@ static int mthca_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *uda lkey = ucmd.lkey; } - ret = mthca_RESIZE_CQ(dev, cq->cqn, lkey, ilog2(entries), &status); - if (status) - ret = -EINVAL; + ret = mthca_RESIZE_CQ(dev, cq->cqn, lkey, ilog2(entries)); if (ret) { if (cq->resize_buf) { @@ -1006,12 +977,12 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, int acc, struct ib_udata *udata) { struct mthca_dev *dev = to_mdev(pd->device); - struct ib_umem_chunk *chunk; + struct scatterlist *sg; struct mthca_mr *mr; struct mthca_reg_mr ucmd; u64 *pages; int shift, n, len; - int i, j, k; + int i, k, entry; int err = 0; int write_mtt_size; @@ -1039,10 +1010,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } shift = ffs(mr->umem->page_size) - 1; - - n = 0; - list_for_each_entry(chunk, &mr->umem->chunk_list, list) - n += chunk->nents; + n = mr->umem->nmap; mr->mtt = mthca_alloc_mtt(dev, n); if (IS_ERR(mr->mtt)) { @@ -1060,25 +1028,24 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages)); - list_for_each_entry(chunk, &mr->umem->chunk_list, list) - for (j = 0; j < chunk->nmap; ++j) { - len = sg_dma_len(&chunk->page_list[j]) >> shift; - for (k = 0; k < len; ++k) { - pages[i++] = sg_dma_address(&chunk->page_list[j]) + - mr->umem->page_size * k; - /* - * Be friendly to write_mtt and pass it chunks - * of appropriate size. - */ - if (i == write_mtt_size) { - err = mthca_write_mtt(dev, mr->mtt, n, pages, i); - if (err) - goto mtt_done; - n += i; - i = 0; - } + for_each_sg(mr->umem->sg_head.sgl, sg, mr->umem->nmap, entry) { + len = sg_dma_len(sg) >> shift; + for (k = 0; k < len; ++k) { + pages[i++] = sg_dma_address(sg) + + mr->umem->page_size * k; + /* + * Be friendly to write_mtt and pass it chunks + * of appropriate size. + */ + if (i == write_mtt_size) { + err = mthca_write_mtt(dev, mr->mtt, n, pages, i); + if (err) + goto mtt_done; + n += i; + i = 0; } } + } if (i) err = mthca_write_mtt(dev, mr->mtt, n, pages, i); @@ -1157,7 +1124,6 @@ static int mthca_unmap_fmr(struct list_head *fmr_list) { struct ib_fmr *fmr; int err; - u8 status; struct mthca_dev *mdev = NULL; list_for_each_entry(fmr, fmr_list, list) { @@ -1178,12 +1144,8 @@ static int mthca_unmap_fmr(struct list_head *fmr_list) list_for_each_entry(fmr, fmr_list, list) mthca_tavor_fmr_unmap(mdev, to_mfmr(fmr)); - err = mthca_SYNC_TPT(mdev, &status); - if (err) - return err; - if (status) - return -EINVAL; - return 0; + err = mthca_SYNC_TPT(mdev); + return err; } static ssize_t show_rev(struct device *device, struct device_attribute *attr, @@ -1249,7 +1211,6 @@ static int mthca_init_node_data(struct mthca_dev *dev) struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; int err = -ENOMEM; - u8 status; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); @@ -1260,28 +1221,18 @@ static int mthca_init_node_data(struct mthca_dev *dev) in_mad->attr_id = IB_SMP_ATTR_NODE_DESC; err = mthca_MAD_IFC(dev, 1, 1, - 1, NULL, NULL, in_mad, out_mad, - &status); + 1, NULL, NULL, in_mad, out_mad); if (err) goto out; - if (status) { - err = -EINVAL; - goto out; - } memcpy(dev->ib_dev.node_desc, out_mad->data, 64); in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; err = mthca_MAD_IFC(dev, 1, 1, - 1, NULL, NULL, in_mad, out_mad, - &status); + 1, NULL, NULL, in_mad, out_mad); if (err) goto out; - if (status) { - err = -EINVAL; - goto out; - } if (mthca_is_memfree(dev)) dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32)); @@ -1399,7 +1350,7 @@ int mthca_register_device(struct mthca_dev *dev) mutex_init(&dev->cap_mask_mutex); - ret = ib_register_device(&dev->ib_dev); + ret = ib_register_device(&dev->ib_dev, NULL); if (ret) return ret; diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h index c621f8794b8..596acc45569 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.h +++ b/drivers/infiniband/hw/mthca/mthca_provider.h @@ -46,7 +46,7 @@ struct mthca_buf_list { void *buf; - DECLARE_PCI_UNMAP_ADDR(mapping) + DEFINE_DMA_UNMAP_ADDR(mapping); }; union mthca_buf { @@ -113,6 +113,7 @@ struct mthca_eq { int nent; struct mthca_buf_list *page_list; struct mthca_mr mr; + char irq_name[IB_DEVICE_NAME_MAX]; }; struct mthca_av; diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index f5081bfde6d..e354b2f04ad 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -247,7 +247,8 @@ void mthca_qp_event(struct mthca_dev *dev, u32 qpn, spin_unlock(&dev->qp_table.lock); if (!qp) { - mthca_warn(dev, "Async event for bogus QP %08x\n", qpn); + mthca_warn(dev, "Async event %d for bogus QP %08x\n", + event_type, qpn); return; } @@ -308,7 +309,6 @@ static void store_attrs(struct mthca_sqp *sqp, const struct ib_qp_attr *attr, static void init_port(struct mthca_dev *dev, int port) { int err; - u8 status; struct mthca_init_ib_param param; memset(¶m, 0, sizeof param); @@ -319,11 +319,9 @@ static void init_port(struct mthca_dev *dev, int port) param.gid_cap = dev->limits.gid_table_len; param.pkey_cap = dev->limits.pkey_table_len; - err = mthca_INIT_IB(dev, ¶m, port, &status); + err = mthca_INIT_IB(dev, ¶m, port); if (err) mthca_warn(dev, "INIT_IB failed, return code %d.\n", err); - if (status) - mthca_warn(dev, "INIT_IB returned status %02x.\n", status); } static __be32 get_hw_access_flags(struct mthca_qp *qp, const struct ib_qp_attr *attr, @@ -433,7 +431,6 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m struct mthca_qp_param *qp_param; struct mthca_qp_context *context; int mthca_state; - u8 status; mutex_lock(&qp->mutex); @@ -448,12 +445,9 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m goto out; } - err = mthca_QUERY_QP(dev, qp->qpn, 0, mailbox, &status); - if (err) - goto out_mailbox; - if (status) { - mthca_warn(dev, "QUERY_QP returned status %02x\n", status); - err = -EINVAL; + err = mthca_QUERY_QP(dev, qp->qpn, 0, mailbox); + if (err) { + mthca_warn(dev, "QUERY_QP failed (%d)\n", err); goto out_mailbox; } @@ -508,6 +502,7 @@ done: qp_attr->cap.max_inline_data = qp->max_inline_data; qp_init_attr->cap = qp_attr->cap; + qp_init_attr->sq_sig_type = qp->sq_policy; out_mailbox: mthca_free_mailbox(dev, mailbox); @@ -555,7 +550,6 @@ static int __mthca_modify_qp(struct ib_qp *ibqp, struct mthca_qp_param *qp_param; struct mthca_qp_context *qp_context; u32 sqd_event = 0; - u8 status; int err = -EINVAL; mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); @@ -781,13 +775,10 @@ static int __mthca_modify_qp(struct ib_qp *ibqp, sqd_event = 1 << 31; err = mthca_MODIFY_QP(dev, cur_state, new_state, qp->qpn, 0, - mailbox, sqd_event, &status); - if (err) - goto out_mailbox; - if (status) { - mthca_warn(dev, "modify QP %d->%d returned status %02x.\n", - cur_state, new_state, status); - err = -EINVAL; + mailbox, sqd_event); + if (err) { + mthca_warn(dev, "modify QP %d->%d returned %d.\n", + cur_state, new_state, err); goto out_mailbox; } @@ -817,7 +808,7 @@ static int __mthca_modify_qp(struct ib_qp *ibqp, cur_state != IB_QPS_ERR && (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR)) - mthca_CLOSE_IB(dev, qp->port, &status); + mthca_CLOSE_IB(dev, qp->port); } /* @@ -869,7 +860,8 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; - if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) { + if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask, + IB_LINK_LAYER_UNSPECIFIED)) { mthca_dbg(dev, "Bad QP transition (transport %d) " "%d->%d with attr 0x%08x\n", qp->transport, cur_state, new_state, @@ -1319,10 +1311,12 @@ int mthca_alloc_qp(struct mthca_dev *dev, } static void mthca_lock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq) + __acquires(&send_cq->lock) __acquires(&recv_cq->lock) { - if (send_cq == recv_cq) + if (send_cq == recv_cq) { spin_lock_irq(&send_cq->lock); - else if (send_cq->cqn < recv_cq->cqn) { + __acquire(&recv_cq->lock); + } else if (send_cq->cqn < recv_cq->cqn) { spin_lock_irq(&send_cq->lock); spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING); } else { @@ -1332,10 +1326,12 @@ static void mthca_lock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq) } static void mthca_unlock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq) + __releases(&send_cq->lock) __releases(&recv_cq->lock) { - if (send_cq == recv_cq) + if (send_cq == recv_cq) { + __release(&recv_cq->lock); spin_unlock_irq(&send_cq->lock); - else if (send_cq->cqn < recv_cq->cqn) { + } else if (send_cq->cqn < recv_cq->cqn) { spin_unlock(&recv_cq->lock); spin_unlock_irq(&send_cq->lock); } else { @@ -1425,7 +1421,6 @@ static inline int get_qp_refcount(struct mthca_dev *dev, struct mthca_qp *qp) void mthca_free_qp(struct mthca_dev *dev, struct mthca_qp *qp) { - u8 status; struct mthca_cq *send_cq; struct mthca_cq *recv_cq; @@ -1450,7 +1445,7 @@ void mthca_free_qp(struct mthca_dev *dev, if (qp->state != IB_QPS_RESET) mthca_MODIFY_QP(dev, qp->state, IB_QPS_RESET, qp->qpn, 0, - NULL, 0, &status); + NULL, 0); /* * If this is a userspace QP, the buffers, MR, CQs and so on @@ -1489,8 +1484,8 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, int err; u16 pkey; - ib_ud_header_init(256, /* assume a MAD */ - mthca_ah_grh_present(to_mah(wr->wr.ud.ah)), + ib_ud_header_init(256, /* assume a MAD */ 1, 0, 0, + mthca_ah_grh_present(to_mah(wr->wr.ud.ah)), 0, &sqp->ud_header); err = mthca_read_ah(dev, to_mah(wr->wr.ud.ah), &sqp->ud_header); @@ -2259,7 +2254,6 @@ void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send, int mthca_init_qp_table(struct mthca_dev *dev) { int err; - u8 status; int i; spin_lock_init(&dev->qp_table.lock); @@ -2286,15 +2280,10 @@ int mthca_init_qp_table(struct mthca_dev *dev) for (i = 0; i < 2; ++i) { err = mthca_CONF_SPECIAL_QP(dev, i ? IB_QPT_GSI : IB_QPT_SMI, - dev->qp_table.sqp_start + i * 2, - &status); - if (err) - goto err_out; - if (status) { + dev->qp_table.sqp_start + i * 2); + if (err) { mthca_warn(dev, "CONF_SPECIAL_QP returned " - "status %02x, aborting.\n", - status); - err = -EINVAL; + "%d, aborting.\n", err); goto err_out; } } @@ -2302,7 +2291,7 @@ int mthca_init_qp_table(struct mthca_dev *dev) err_out: for (i = 0; i < 2; ++i) - mthca_CONF_SPECIAL_QP(dev, i, 0, &status); + mthca_CONF_SPECIAL_QP(dev, i, 0); mthca_array_cleanup(&dev->qp_table.qp, dev->limits.num_qps); mthca_alloc_cleanup(&dev->qp_table.alloc); @@ -2313,10 +2302,9 @@ int mthca_init_qp_table(struct mthca_dev *dev) void mthca_cleanup_qp_table(struct mthca_dev *dev) { int i; - u8 status; for (i = 0; i < 2; ++i) - mthca_CONF_SPECIAL_QP(dev, i, 0, &status); + mthca_CONF_SPECIAL_QP(dev, i, 0); mthca_array_cleanup(&dev->qp_table.qp, dev->limits.num_qps); mthca_alloc_cleanup(&dev->qp_table.alloc); diff --git a/drivers/infiniband/hw/mthca/mthca_reset.c b/drivers/infiniband/hw/mthca/mthca_reset.c index acb6817f606..74c6a942604 100644 --- a/drivers/infiniband/hw/mthca/mthca_reset.c +++ b/drivers/infiniband/hw/mthca/mthca_reset.c @@ -30,7 +30,6 @@ * SOFTWARE. */ -#include <linux/init.h> #include <linux/errno.h> #include <linux/pci.h> #include <linux/delay.h> @@ -114,7 +113,7 @@ int mthca_reset(struct mthca_dev *mdev) } hca_pcix_cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_PCIX); - hca_pcie_cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_EXP); + hca_pcie_cap = pci_pcie_cap(mdev->pdev); if (bridge) { bridge_header = kmalloc(256, GFP_KERNEL); @@ -242,16 +241,16 @@ good: if (hca_pcie_cap) { devctl = hca_header[(hca_pcie_cap + PCI_EXP_DEVCTL) / 4]; - if (pci_write_config_word(mdev->pdev, hca_pcie_cap + PCI_EXP_DEVCTL, - devctl)) { + if (pcie_capability_write_word(mdev->pdev, PCI_EXP_DEVCTL, + devctl)) { err = -ENODEV; mthca_err(mdev, "Couldn't restore HCA PCI Express " "Device Control register, aborting.\n"); goto out; } linkctl = hca_header[(hca_pcie_cap + PCI_EXP_LNKCTL) / 4]; - if (pci_write_config_word(mdev->pdev, hca_pcie_cap + PCI_EXP_LNKCTL, - linkctl)) { + if (pcie_capability_write_word(mdev->pdev, PCI_EXP_LNKCTL, + linkctl)) { err = -ENODEV; mthca_err(mdev, "Couldn't restore HCA PCI Express " "Link control register, aborting.\n"); diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index 4fabe62aab8..d22f970480c 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -200,7 +200,6 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, struct ib_srq_attr *attr, struct mthca_srq *srq) { struct mthca_mailbox *mailbox; - u8 status; int ds; int err; @@ -266,18 +265,12 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, else mthca_tavor_init_srq_context(dev, pd, srq, mailbox->buf); - err = mthca_SW2HW_SRQ(dev, mailbox, srq->srqn, &status); + err = mthca_SW2HW_SRQ(dev, mailbox, srq->srqn); if (err) { mthca_warn(dev, "SW2HW_SRQ failed (%d)\n", err); goto err_out_free_buf; } - if (status) { - mthca_warn(dev, "SW2HW_SRQ returned status 0x%02x\n", - status); - err = -EINVAL; - goto err_out_free_buf; - } spin_lock_irq(&dev->srq_table.lock); if (mthca_array_set(&dev->srq_table.srq, @@ -299,11 +292,9 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, return 0; err_out_free_srq: - err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn, &status); + err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn); if (err) mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err); - else if (status) - mthca_warn(dev, "HW2SW_SRQ returned status 0x%02x\n", status); err_out_free_buf: if (!pd->ibpd.uobject) @@ -340,7 +331,6 @@ void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq) { struct mthca_mailbox *mailbox; int err; - u8 status; mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); if (IS_ERR(mailbox)) { @@ -348,11 +338,9 @@ void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq) return; } - err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn, &status); + err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn); if (err) mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err); - else if (status) - mthca_warn(dev, "HW2SW_SRQ returned status 0x%02x\n", status); spin_lock_irq(&dev->srq_table.lock); mthca_array_clear(&dev->srq_table.srq, @@ -378,8 +366,7 @@ int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, { struct mthca_dev *dev = to_mdev(ibsrq->device); struct mthca_srq *srq = to_msrq(ibsrq); - int ret; - u8 status; + int ret = 0; /* We don't support resizing SRQs (yet?) */ if (attr_mask & IB_SRQ_MAX_WR) @@ -391,16 +378,11 @@ int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, return -EINVAL; mutex_lock(&srq->mutex); - ret = mthca_ARM_SRQ(dev, srq->srqn, attr->srq_limit, &status); + ret = mthca_ARM_SRQ(dev, srq->srqn, attr->srq_limit); mutex_unlock(&srq->mutex); - - if (ret) - return ret; - if (status) - return -EINVAL; } - return 0; + return ret; } int mthca_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) @@ -410,14 +392,13 @@ int mthca_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) struct mthca_mailbox *mailbox; struct mthca_arbel_srq_context *arbel_ctx; struct mthca_tavor_srq_context *tavor_ctx; - u8 status; int err; mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); - err = mthca_QUERY_SRQ(dev, srq->srqn, mailbox, &status); + err = mthca_QUERY_SRQ(dev, srq->srqn, mailbox); if (err) goto out; diff --git a/drivers/infiniband/hw/nes/Kconfig b/drivers/infiniband/hw/nes/Kconfig index d449eb6ec78..846dc97cf26 100644 --- a/drivers/infiniband/hw/nes/Kconfig +++ b/drivers/infiniband/hw/nes/Kconfig @@ -4,14 +4,13 @@ config INFINIBAND_NES select LIBCRC32C select INET_LRO ---help--- - This is a low-level driver for NetEffect RDMA enabled - Network Interface Cards (RNIC). + This is the RDMA Network Interface Card (RNIC) driver for + NetEffect Ethernet Cluster Server Adapters. config INFINIBAND_NES_DEBUG bool "Verbose debugging output" depends on INFINIBAND_NES default n ---help--- - This option causes the NetEffect RNIC driver to produce debug - messages. Select this if you are developing the driver - or trying to diagnose a problem. + This option enables debug messages from the NetEffect RNIC + driver. Select this if you are diagnosing a problem. diff --git a/drivers/infiniband/hw/nes/Makefile b/drivers/infiniband/hw/nes/Makefile index 35148513c47..97820c23ece 100644 --- a/drivers/infiniband/hw/nes/Makefile +++ b/drivers/infiniband/hw/nes/Makefile @@ -1,3 +1,3 @@ obj-$(CONFIG_INFINIBAND_NES) += iw_nes.o -iw_nes-objs := nes.o nes_hw.o nes_nic.o nes_utils.o nes_verbs.o nes_cm.o +iw_nes-objs := nes.o nes_hw.o nes_nic.o nes_utils.o nes_verbs.o nes_cm.o nes_mgt.o diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index d2884e77809..3b2a6dc8ea9 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -44,6 +44,7 @@ #include <linux/init.h> #include <linux/if_arp.h> #include <linux/highmem.h> +#include <linux/slab.h> #include <asm/io.h> #include <asm/irq.h> #include <asm/byteorder.h> @@ -67,30 +68,32 @@ MODULE_VERSION(DRV_VERSION); int max_mtu = 9000; int interrupt_mod_interval = 0; - /* Interoperability */ int mpa_version = 1; -module_param(mpa_version, int, 0); +module_param(mpa_version, int, 0644); MODULE_PARM_DESC(mpa_version, "MPA version to be used int MPA Req/Resp (0 or 1)"); /* Interoperability */ int disable_mpa_crc = 0; -module_param(disable_mpa_crc, int, 0); +module_param(disable_mpa_crc, int, 0644); MODULE_PARM_DESC(disable_mpa_crc, "Disable checking of MPA CRC"); -unsigned int send_first = 0; -module_param(send_first, int, 0); -MODULE_PARM_DESC(send_first, "Send RDMA Message First on Active Connection"); - - -unsigned int nes_drv_opt = 0; -module_param(nes_drv_opt, int, 0); +unsigned int nes_drv_opt = NES_DRV_OPT_DISABLE_INT_MOD | NES_DRV_OPT_ENABLE_PAU; +module_param(nes_drv_opt, int, 0644); MODULE_PARM_DESC(nes_drv_opt, "Driver option parameters"); unsigned int nes_debug_level = 0; module_param_named(debug_level, nes_debug_level, uint, 0644); MODULE_PARM_DESC(debug_level, "Enable debug output level"); +unsigned int wqm_quanta = 0x10000; +module_param(wqm_quanta, int, 0644); +MODULE_PARM_DESC(wqm_quanta, "WQM quanta"); + +static bool limit_maxrdreqsz; +module_param(limit_maxrdreqsz, bool, 0644); +MODULE_PARM_DESC(limit_maxrdreqsz, "Limit max read request size to 256 Bytes"); + LIST_HEAD(nes_adapter_list); static LIST_HEAD(nes_dev_list); @@ -101,12 +104,23 @@ static unsigned int sysfs_nonidx_addr; static unsigned int sysfs_idx_addr; static struct pci_device_id nes_pci_table[] = { - {PCI_VENDOR_ID_NETEFFECT, PCI_DEVICE_ID_NETEFFECT_NE020, PCI_ANY_ID, PCI_ANY_ID}, + { PCI_VDEVICE(NETEFFECT, PCI_DEVICE_ID_NETEFFECT_NE020), }, + { PCI_VDEVICE(NETEFFECT, PCI_DEVICE_ID_NETEFFECT_NE020_KR), }, {0} }; MODULE_DEVICE_TABLE(pci, nes_pci_table); +/* registered nes netlink callbacks */ +static struct ibnl_client_cbs nes_nl_cb_table[] = { + [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb}, + [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb}, + [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb}, + [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb}, + [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb}, + [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb} +}; + static int nes_inetaddr_event(struct notifier_block *, unsigned long, void *); static int nes_net_event(struct notifier_block *, unsigned long, void *); static int nes_notifiers_registered; @@ -120,9 +134,6 @@ static struct notifier_block nes_net_notifier = { .notifier_call = nes_net_event }; - - - /** * nes_inetaddr_event */ @@ -133,21 +144,21 @@ static int nes_inetaddr_event(struct notifier_block *notifier, struct net_device *event_netdev = ifa->ifa_dev->dev; struct nes_device *nesdev; struct net_device *netdev; + struct net_device *upper_dev; struct nes_vnic *nesvnic; - unsigned int addr; - unsigned int mask; - - addr = ntohl(ifa->ifa_address); - mask = ntohl(ifa->ifa_mask); - nes_debug(NES_DBG_NETDEV, "nes_inetaddr_event: ip address " NIPQUAD_FMT - ", netmask " NIPQUAD_FMT ".\n", - HIPQUAD(addr), HIPQUAD(mask)); + unsigned int is_bonded; + + nes_debug(NES_DBG_NETDEV, "nes_inetaddr_event: ip address %pI4, netmask %pI4.\n", + &ifa->ifa_address, &ifa->ifa_mask); list_for_each_entry(nesdev, &nes_dev_list, list) { nes_debug(NES_DBG_NETDEV, "Nesdev list entry = 0x%p. (%s)\n", nesdev, nesdev->netdev[0]->name); netdev = nesdev->netdev[0]; nesvnic = netdev_priv(netdev); - if (netdev == event_netdev) { + upper_dev = netdev_master_upper_dev_get(netdev); + is_bonded = netif_is_bond_slave(netdev) && + (upper_dev == event_netdev); + if ((netdev == event_netdev) || is_bonded) { if (nesvnic->rdma_enabled == 0) { nes_debug(NES_DBG_NETDEV, "Returning without processing event for %s since" " RDMA is not enabled.\n", @@ -164,7 +175,10 @@ static int nes_inetaddr_event(struct notifier_block *notifier, nes_manage_arp_cache(netdev, netdev->dev_addr, ntohl(nesvnic->local_ipaddr), NES_ARP_DELETE); nesvnic->local_ipaddr = 0; - return NOTIFY_OK; + if (is_bonded) + continue; + else + return NOTIFY_OK; break; case NETDEV_UP: nes_debug(NES_DBG_NETDEV, "event:UP\n"); @@ -173,15 +187,24 @@ static int nes_inetaddr_event(struct notifier_block *notifier, nes_debug(NES_DBG_NETDEV, "Interface already has local_ipaddr\n"); return NOTIFY_OK; } + /* fall through */ + case NETDEV_CHANGEADDR: /* Add the address to the IP table */ - nesvnic->local_ipaddr = ifa->ifa_address; + if (upper_dev) + nesvnic->local_ipaddr = + ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address; + else + nesvnic->local_ipaddr = ifa->ifa_address; nes_write_indexed(nesdev, NES_IDX_DST_IP_ADDR+(0x10*PCI_FUNC(nesdev->pcidev->devfn)), - ntohl(ifa->ifa_address)); + ntohl(nesvnic->local_ipaddr)); nes_manage_arp_cache(netdev, netdev->dev_addr, ntohl(nesvnic->local_ipaddr), NES_ARP_ADD); - return NOTIFY_OK; + if (is_bonded) + continue; + else + return NOTIFY_OK; break; default: break; @@ -254,13 +277,11 @@ static void nes_cqp_rem_ref_callback(struct nes_device *nesdev, struct nes_cqp_r unsigned long flags; struct nes_qp *nesqp = cqp_request->cqp_callback_pointer; struct nes_adapter *nesadapter = nesdev->nesadapter; - u32 qp_id; atomic_inc(&qps_destroyed); /* Free the control structures */ - qp_id = nesqp->hwqp.qp_id; if (nesqp->pbl_vbase) { pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size, nesqp->hwqp.q2_vbase, nesqp->hwqp.q2_pbase); @@ -276,6 +297,7 @@ static void nes_cqp_rem_ref_callback(struct nes_device *nesdev, struct nes_cqp_r } nes_free_resource(nesadapter, nesadapter->allocated_qps, nesqp->hwqp.qp_id); + nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = NULL; kfree(nesqp->allocated_buffer); } @@ -289,7 +311,6 @@ void nes_rem_ref(struct ib_qp *ibqp) struct nes_qp *nesqp; struct nes_vnic *nesvnic = to_nesvnic(ibqp->device); struct nes_device *nesdev = nesvnic->nesdev; - struct nes_adapter *nesadapter = nesdev->nesadapter; struct nes_hw_cqp_wqe *cqp_wqe; struct nes_cqp_request *cqp_request; u32 opcode; @@ -303,7 +324,8 @@ void nes_rem_ref(struct ib_qp *ibqp) } if (atomic_dec_and_test(&nesqp->refcount)) { - nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = NULL; + if (nesqp->pau_mode) + nes_destroy_pau_qp(nesdev, nesqp); /* Destroy the QP */ cqp_request = nes_get_cqp_request(nesdev); @@ -354,10 +376,8 @@ struct ib_qp *nes_get_qp(struct ib_device *device, int qpn) */ static void nes_print_macaddr(struct net_device *netdev) { - DECLARE_MAC_BUF(mac); - - nes_debug(NES_DBG_INIT, "%s: %s, IRQ %u\n", - netdev->name, print_mac(mac, netdev->dev_addr), netdev->irq); + nes_debug(NES_DBG_INIT, "%s: %pM, IRQ %u\n", + netdev->name, netdev->dev_addr, netdev->irq); } /** @@ -435,12 +455,11 @@ static irqreturn_t nes_interrupt(int irq, void *dev_id) /** * nes_probe - Device initialization */ -static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent) +static int nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent) { struct net_device *netdev = NULL; struct nes_device *nesdev = NULL; int ret = 0; - struct nes_vnic *nesvnic = NULL; void __iomem *mmio_regs = NULL; u8 hw_rev; @@ -479,23 +498,23 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i } if ((sizeof(dma_addr_t) > 4)) { - ret = pci_set_dma_mask(pcidev, DMA_64BIT_MASK); + ret = pci_set_dma_mask(pcidev, DMA_BIT_MASK(64)); if (ret < 0) { printk(KERN_ERR PFX "64b DMA mask configuration failed\n"); goto bail2; } - ret = pci_set_consistent_dma_mask(pcidev, DMA_64BIT_MASK); + ret = pci_set_consistent_dma_mask(pcidev, DMA_BIT_MASK(64)); if (ret) { printk(KERN_ERR PFX "64b DMA consistent mask configuration failed\n"); goto bail2; } } else { - ret = pci_set_dma_mask(pcidev, DMA_32BIT_MASK); + ret = pci_set_dma_mask(pcidev, DMA_BIT_MASK(32)); if (ret < 0) { printk(KERN_ERR PFX "32b DMA mask configuration failed\n"); goto bail2; } - ret = pci_set_consistent_dma_mask(pcidev, DMA_32BIT_MASK); + ret = pci_set_consistent_dma_mask(pcidev, DMA_BIT_MASK(32)); if (ret) { printk(KERN_ERR PFX "32b DMA consistent mask configuration failed\n"); goto bail2; @@ -522,7 +541,8 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i spin_lock_init(&nesdev->indexed_regs_lock); /* Remap the PCI registers in adapter BAR0 to kernel VA space */ - mmio_regs = ioremap_nocache(pci_resource_start(pcidev, BAR_0), sizeof(mmio_regs)); + mmio_regs = ioremap_nocache(pci_resource_start(pcidev, BAR_0), + pci_resource_len(pcidev, BAR_0)); if (mmio_regs == NULL) { printk(KERN_ERR PFX "Unable to remap BAR0\n"); ret = -EIO; @@ -559,12 +579,44 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i goto bail5; } nesdev->nesadapter->et_rx_coalesce_usecs_irq = interrupt_mod_interval; + nesdev->nesadapter->wqm_quanta = wqm_quanta; /* nesdev->base_doorbell_index = nesdev->nesadapter->pd_config_base[PCI_FUNC(nesdev->pcidev->devfn)]; */ nesdev->base_doorbell_index = 1; nesdev->doorbell_start = nesdev->nesadapter->doorbell_start; - nesdev->mac_index = PCI_FUNC(nesdev->pcidev->devfn) % nesdev->nesadapter->port_count; + if (nesdev->nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G) { + switch (PCI_FUNC(nesdev->pcidev->devfn) % + nesdev->nesadapter->port_count) { + case 1: + nesdev->mac_index = 2; + break; + case 2: + nesdev->mac_index = 1; + break; + case 3: + nesdev->mac_index = 3; + break; + case 0: + default: + nesdev->mac_index = 0; + } + } else { + nesdev->mac_index = PCI_FUNC(nesdev->pcidev->devfn) % + nesdev->nesadapter->port_count; + } + + if ((limit_maxrdreqsz || + ((nesdev->nesadapter->phy_type[0] == NES_PHY_TYPE_GLADIUS) && + (hw_rev == NE020_REV1))) && + (pcie_get_readrq(pcidev) > 256)) { + if (pcie_set_readrq(pcidev, 256)) + printk(KERN_ERR PFX "Unable to set max read request" + " to 256 bytes\n"); + else + nes_debug(NES_DBG_INIT, "Max read request size set" + " to 256 bytes\n"); + } tasklet_init(&nesdev->dpc_tasklet, nes_dpc, (unsigned long)nesdev); @@ -583,7 +635,7 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i nesdev->int_req = (0x101 << PCI_FUNC(nesdev->pcidev->devfn)) | (1 << (PCI_FUNC(nesdev->pcidev->devfn)+16)); if (PCI_FUNC(nesdev->pcidev->devfn) < 4) { - nesdev->int_req |= (1 << (PCI_FUNC(nesdev->pcidev->devfn)+24)); + nesdev->int_req |= (1 << (PCI_FUNC(nesdev->mac_index)+24)); } /* TODO: This really should be the first driver to load, not function 0 */ @@ -629,28 +681,40 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i } nes_notifiers_registered++; - /* Initialize network devices */ - if ((netdev = nes_netdev_init(nesdev, mmio_regs)) == NULL) { - goto bail7; - } + if (ibnl_add_client(RDMA_NL_NES, RDMA_NL_IWPM_NUM_OPS, nes_nl_cb_table)) + printk(KERN_ERR PFX "%s[%u]: Failed to add netlink callback\n", + __func__, __LINE__); - /* Register network device */ - ret = register_netdev(netdev); - if (ret) { - printk(KERN_ERR PFX "Unable to register netdev, ret = %d\n", ret); - nes_netdev_destroy(netdev); - goto bail7; - } + ret = iwpm_init(RDMA_NL_NES); + if (ret) { + printk(KERN_ERR PFX "%s: port mapper initialization failed\n", + pci_name(pcidev)); + goto bail7; + } - nes_print_macaddr(netdev); - /* create a CM core for this netdev */ - nesvnic = netdev_priv(netdev); + INIT_DELAYED_WORK(&nesdev->work, nes_recheck_link_status); + + /* Initialize network devices */ + netdev = nes_netdev_init(nesdev, mmio_regs); + if (netdev == NULL) { + ret = -ENOMEM; + goto bail7; + } + + /* Register network device */ + ret = register_netdev(netdev); + if (ret) { + printk(KERN_ERR PFX "Unable to register netdev, ret = %d\n", ret); + nes_netdev_destroy(netdev); + goto bail7; + } - nesdev->netdev_count++; - nesdev->nesadapter->netdev_count++; + nes_print_macaddr(netdev); + nesdev->netdev_count++; + nesdev->nesadapter->netdev_count++; - printk(KERN_ERR PFX "%s: NetEffect RNIC driver successfully loaded.\n", + printk(KERN_INFO PFX "%s: NetEffect RNIC driver successfully loaded.\n", pci_name(pcidev)); return 0; @@ -666,6 +730,7 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i nes_debug(NES_DBG_INIT, "netdev_count=%d, nesadapter->netdev_count=%d\n", nesdev->netdev_count, nesdev->nesadapter->netdev_count); + ibnl_remove_client(RDMA_NL_NES); nes_notifiers_registered--; if (nes_notifiers_registered == 0) { @@ -710,11 +775,12 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i /** * nes_remove - unload from kernel */ -static void __devexit nes_remove(struct pci_dev *pcidev) +static void nes_remove(struct pci_dev *pcidev) { struct nes_device *nesdev = pci_get_drvdata(pcidev); struct net_device *netdev; int netdev_index = 0; + unsigned long flags; if (nesdev->netdev_count) { netdev = nesdev->netdev[netdev_index]; @@ -728,6 +794,8 @@ static void __devexit nes_remove(struct pci_dev *pcidev) nesdev->nesadapter->netdev_count--; } } + ibnl_remove_client(RDMA_NL_NES); + iwpm_exit(RDMA_NL_NES); nes_notifiers_registered--; if (nes_notifiers_registered == 0) { @@ -741,6 +809,14 @@ static void __devexit nes_remove(struct pci_dev *pcidev) free_irq(pcidev->irq, nesdev); tasklet_kill(&nesdev->dpc_tasklet); + spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags); + if (nesdev->link_recheck) { + spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags); + cancel_delayed_work_sync(&nesdev->work); + } else { + spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags); + } + /* Deallocate the Adapter Structure */ nes_destroy_adapter(nesdev->nesadapter); @@ -762,7 +838,7 @@ static struct pci_driver nes_pci_driver = { .name = DRV_NAME, .id_table = nes_pci_table, .probe = nes_probe, - .remove = __devexit_p(nes_remove), + .remove = nes_remove, }; static ssize_t nes_show_adapter(struct device_driver *ddp, char *buf) @@ -774,14 +850,14 @@ static ssize_t nes_show_adapter(struct device_driver *ddp, char *buf) list_for_each_entry(nesdev, &nes_dev_list, list) { if (i == ee_flsh_adapter) { - devfn = nesdev->nesadapter->devfn; - bus_number = nesdev->nesadapter->bus_number; + devfn = nesdev->pcidev->devfn; + bus_number = nesdev->pcidev->bus->number; break; } i++; } - return snprintf(buf, PAGE_SIZE, "%x:%x", bus_number, devfn); + return snprintf(buf, PAGE_SIZE, "%x:%x\n", bus_number, devfn); } static ssize_t nes_store_adapter(struct device_driver *ddp, @@ -1052,6 +1128,57 @@ static ssize_t nes_store_idx_data(struct device_driver *ddp, return strnlen(buf, count); } + +/** + * nes_show_wqm_quanta + */ +static ssize_t nes_show_wqm_quanta(struct device_driver *ddp, char *buf) +{ + u32 wqm_quanta_value = 0xdead; + u32 i = 0; + struct nes_device *nesdev; + + list_for_each_entry(nesdev, &nes_dev_list, list) { + if (i == ee_flsh_adapter) { + wqm_quanta_value = nesdev->nesadapter->wqm_quanta; + break; + } + i++; + } + + return snprintf(buf, PAGE_SIZE, "0x%X\n", wqm_quanta_value); +} + + +/** + * nes_store_wqm_quanta + */ +static ssize_t nes_store_wqm_quanta(struct device_driver *ddp, + const char *buf, size_t count) +{ + unsigned long wqm_quanta_value; + u32 wqm_config1; + u32 i = 0; + struct nes_device *nesdev; + + if (kstrtoul(buf, 0, &wqm_quanta_value) < 0) + return -EINVAL; + + list_for_each_entry(nesdev, &nes_dev_list, list) { + if (i == ee_flsh_adapter) { + nesdev->nesadapter->wqm_quanta = wqm_quanta_value; + wqm_config1 = nes_read_indexed(nesdev, + NES_IDX_WQM_CONFIG1); + nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG1, + ((wqm_quanta_value << 1) | + (wqm_config1 & 0x00000001))); + break; + } + i++; + } + return strnlen(buf, count); +} + static DRIVER_ATTR(adapter, S_IRUSR | S_IWUSR, nes_show_adapter, nes_store_adapter); static DRIVER_ATTR(eeprom_cmd, S_IRUSR | S_IWUSR, @@ -1070,6 +1197,8 @@ static DRIVER_ATTR(idx_addr, S_IRUSR | S_IWUSR, nes_show_idx_addr, nes_store_idx_addr); static DRIVER_ATTR(idx_data, S_IRUSR | S_IWUSR, nes_show_idx_data, nes_store_idx_data); +static DRIVER_ATTR(wqm_quanta, S_IRUSR | S_IWUSR, + nes_show_wqm_quanta, nes_store_wqm_quanta); static int nes_create_driver_sysfs(struct pci_driver *drv) { @@ -1083,6 +1212,7 @@ static int nes_create_driver_sysfs(struct pci_driver *drv) error |= driver_create_file(&drv->driver, &driver_attr_nonidx_data); error |= driver_create_file(&drv->driver, &driver_attr_idx_addr); error |= driver_create_file(&drv->driver, &driver_attr_idx_data); + error |= driver_create_file(&drv->driver, &driver_attr_wqm_quanta); return error; } @@ -1097,6 +1227,7 @@ static void nes_remove_driver_sysfs(struct pci_driver *drv) driver_remove_file(&drv->driver, &driver_attr_nonidx_data); driver_remove_file(&drv->driver, &driver_attr_idx_addr); driver_remove_file(&drv->driver, &driver_attr_idx_data); + driver_remove_file(&drv->driver, &driver_attr_wqm_quanta); } /** diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index 39bd897b40c..bd9d132f11c 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -43,7 +43,6 @@ #include <linux/dma-mapping.h> #include <linux/workqueue.h> #include <linux/slab.h> -#include <linux/version.h> #include <asm/io.h> #include <linux/crc32c.h> @@ -52,23 +51,24 @@ #include <rdma/ib_pack.h> #include <rdma/rdma_cm.h> #include <rdma/iw_cm.h> +#include <rdma/rdma_netlink.h> +#include <rdma/iw_portmap.h> #define NES_SEND_FIRST_WRITE #define QUEUE_DISCONNECTS -#define DRV_BUILD "1" - #define DRV_NAME "iw_nes" -#define DRV_VERSION "1.0 KO Build " DRV_BUILD +#define DRV_VERSION "1.5.0.1" #define PFX DRV_NAME ": " /* * NetEffect PCI vendor id and NE010 PCI device id. */ #ifndef PCI_VENDOR_ID_NETEFFECT /* not in pci.ids yet */ -#define PCI_VENDOR_ID_NETEFFECT 0x1678 -#define PCI_DEVICE_ID_NETEFFECT_NE020 0x0100 +#define PCI_VENDOR_ID_NETEFFECT 0x1678 +#define PCI_DEVICE_ID_NETEFFECT_NE020 0x0100 +#define PCI_DEVICE_ID_NETEFFECT_NE020_KR 0x0110 #endif #define NE020_REV 4 @@ -104,6 +104,7 @@ #define NES_DRV_OPT_NO_INLINE_DATA 0x00000080 #define NES_DRV_OPT_DISABLE_INT_MOD 0x00000100 #define NES_DRV_OPT_DISABLE_VIRT_WQ 0x00000200 +#define NES_DRV_OPT_ENABLE_PAU 0x00000400 #define NES_AEQ_EVENT_TIMEOUT 2500 #define NES_DISCONNECT_EVENT_TIMEOUT 2000 @@ -130,6 +131,8 @@ #define NES_DBG_IW_RX 0x00020000 #define NES_DBG_IW_TX 0x00040000 #define NES_DBG_SHUTDOWN 0x00080000 +#define NES_DBG_PAU 0x00100000 +#define NES_DBG_NLMSG 0x00200000 #define NES_DBG_RSVD1 0x10000000 #define NES_DBG_RSVD2 0x20000000 #define NES_DBG_RSVD3 0x40000000 @@ -138,14 +141,18 @@ #ifdef CONFIG_INFINIBAND_NES_DEBUG #define nes_debug(level, fmt, args...) \ +do { \ if (level & nes_debug_level) \ - printk(KERN_ERR PFX "%s[%u]: " fmt, __func__, __LINE__, ##args) + printk(KERN_ERR PFX "%s[%u]: " fmt, __func__, __LINE__, ##args); \ +} while (0) -#define assert(expr) \ -if (!(expr)) { \ - printk(KERN_ERR PFX "Assertion failed! %s, %s, %s, line %d\n", \ - #expr, __FILE__, __func__, __LINE__); \ -} +#define assert(expr) \ +do { \ + if (!(expr)) { \ + printk(KERN_ERR PFX "Assertion failed! %s, %s, %s, line %d\n", \ + #expr, __FILE__, __func__, __LINE__); \ + } \ +} while (0) #define NES_EVENT_TIMEOUT 1200000 #else @@ -160,6 +167,7 @@ if (!(expr)) { \ #include "nes_context.h" #include "nes_user.h" #include "nes_cm.h" +#include "nes_mgt.h" extern int max_mtu; #define max_frame_len (max_mtu+ETH_HLEN) @@ -167,10 +175,9 @@ extern int interrupt_mod_interval; extern int nes_if_count; extern int mpa_version; extern int disable_mpa_crc; -extern unsigned int send_first; extern unsigned int nes_drv_opt; extern unsigned int nes_debug_level; - +extern unsigned int wqm_quanta; extern struct list_head nes_adapter_list; extern atomic_t cm_connects; @@ -192,14 +199,16 @@ extern u32 cm_packets_created; extern u32 cm_packets_received; extern u32 cm_packets_dropped; extern u32 cm_packets_retrans; -extern u32 cm_listens_created; -extern u32 cm_listens_destroyed; +extern atomic_t cm_listens_created; +extern atomic_t cm_listens_destroyed; extern u32 cm_backlog_drops; extern atomic_t cm_loopbacks; extern atomic_t cm_nodes_created; extern atomic_t cm_nodes_destroyed; extern atomic_t cm_accel_dropped_pkts; extern atomic_t cm_resets_recvd; +extern atomic_t pau_qps_created; +extern atomic_t pau_qps_destroyed; extern u32 int_mod_timer_init; extern u32 int_mod_cq_depth_256; @@ -260,13 +269,25 @@ struct nes_device { u16 base_doorbell_index; u16 currcq_count; u16 deepcq_count; + u8 iw_status; u8 msi_enabled; u8 netdev_count; u8 napi_isr_ran; u8 disable_rx_flow_control; u8 disable_tx_flow_control; + + struct delayed_work work; + u8 link_recheck; }; +/* Receive skb private area - must fit in skb->cb area */ +struct nes_rskb_cb { + u64 busaddr; + u32 maplen; + u32 seqnum; + u8 *data_start; + struct nes_qp *nesqp; +}; static inline __le32 get_crc_value(struct nes_v4_quad *nes_quad) { @@ -286,8 +307,8 @@ static inline __le32 get_crc_value(struct nes_v4_quad *nes_quad) static inline void set_wqe_64bit_value(__le32 *wqe_words, u32 index, u64 value) { - wqe_words[index] = cpu_to_le32((u32) ((unsigned long)value)); - wqe_words[index + 1] = cpu_to_le32((u32)(upper_32_bits((unsigned long)value))); + wqe_words[index] = cpu_to_le32((u32) value); + wqe_words[index + 1] = cpu_to_le32(upper_32_bits(value)); } static inline void @@ -299,8 +320,8 @@ set_wqe_32bit_value(__le32 *wqe_words, u32 index, u32 value) static inline void nes_fill_init_cqp_wqe(struct nes_hw_cqp_wqe *cqp_wqe, struct nes_device *nesdev) { - set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_COMP_CTX_LOW_IDX, - (u64)((unsigned long) &nesdev->cqp)); + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX] = 0; + cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = 0; cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] = 0; cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] = 0; cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX] = 0; @@ -380,11 +401,20 @@ static inline void nes_write8(void __iomem *addr, u8 val) writeb(val, addr); } - +enum nes_resource { + NES_RESOURCE_MW = 1, + NES_RESOURCE_FAST_MR, + NES_RESOURCE_PHYS_MR, + NES_RESOURCE_USER_MR, + NES_RESOURCE_PD, + NES_RESOURCE_QP, + NES_RESOURCE_CQ, + NES_RESOURCE_ARP +}; static inline int nes_alloc_resource(struct nes_adapter *nesadapter, unsigned long *resource_array, u32 max_resources, - u32 *req_resource_num, u32 *next) + u32 *req_resource_num, u32 *next, enum nes_resource resource_type) { unsigned long flags; u32 resource_num; @@ -395,7 +425,7 @@ static inline int nes_alloc_resource(struct nes_adapter *nesadapter, if (resource_num >= max_resources) { resource_num = find_first_zero_bit(resource_array, max_resources); if (resource_num >= max_resources) { - printk(KERN_ERR PFX "%s: No available resourcess.\n", __func__); + printk(KERN_ERR PFX "%s: No available resources [type=%u].\n", __func__, resource_type); spin_unlock_irqrestore(&nesadapter->resource_lock, flags); return -EMFILE; } @@ -504,6 +534,8 @@ void nes_nic_ce_handler(struct nes_device *, struct nes_hw_nic_cq *); void nes_iwarp_ce_handler(struct nes_device *, struct nes_hw_cq *); int nes_destroy_cqp(struct nes_device *); int nes_nic_cm_xmit(struct sk_buff *, struct net_device *); +void nes_recheck_link_status(struct work_struct *work); +void nes_terminate_timeout(unsigned long context); /* nes_nic.c */ struct net_device *nes_netdev_init(struct nes_device *, void __iomem *); @@ -522,9 +554,10 @@ int nes_cm_disconn(struct nes_qp *); void nes_cm_disconn_worker(void *); /* nes_verbs.c */ -int nes_hw_modify_qp(struct nes_device *, struct nes_qp *, u32, u32); +int nes_hw_modify_qp(struct nes_device *, struct nes_qp *, u32, u32, u32); int nes_modify_qp(struct ib_qp *, struct ib_qp_attr *, int, struct ib_udata *); struct nes_ib_device *nes_init_ofa_device(struct net_device *); +void nes_port_ibevent(struct nes_vnic *nesvnic); void nes_destroy_ofa_device(struct nes_ib_device *); int nes_register_ofa_device(struct nes_ib_device *); diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 6aa531d5276..6f09a72e78d 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved. + * Copyright (c) 2006 - 2014 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -34,7 +34,7 @@ #define TCPOPT_TIMESTAMP 8 -#include <asm/atomic.h> +#include <linux/atomic.h> #include <linux/skbuff.h> #include <linux/ip.h> #include <linux/tcp.h> @@ -52,10 +52,14 @@ #include <linux/random.h> #include <linux/list.h> #include <linux/threads.h> - +#include <linux/highmem.h> +#include <linux/slab.h> +#include <net/arp.h> #include <net/neighbour.h> #include <net/route.h> #include <net/ip_fib.h> +#include <net/tcp.h> +#include <linux/fcntl.h> #include "nes.h" @@ -65,8 +69,8 @@ u32 cm_packets_dropped; u32 cm_packets_retrans; u32 cm_packets_created; u32 cm_packets_received; -u32 cm_listens_created; -u32 cm_listens_destroyed; +atomic_t cm_listens_created; +atomic_t cm_listens_destroyed; u32 cm_backlog_drops; atomic_t cm_loopbacks; atomic_t cm_nodes_created; @@ -75,35 +79,57 @@ atomic_t cm_accel_dropped_pkts; atomic_t cm_resets_recvd; static inline int mini_cm_accelerated(struct nes_cm_core *, struct nes_cm_node *); -static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *, - struct nes_vnic *, struct nes_cm_info *); -static int add_ref_cm_node(struct nes_cm_node *); -static int rem_ref_cm_node(struct nes_cm_core *, struct nes_cm_node *); +static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *, struct nes_vnic *, struct nes_cm_info *); static int mini_cm_del_listen(struct nes_cm_core *, struct nes_cm_listener *); -static struct sk_buff *form_cm_frame(struct sk_buff *, struct nes_cm_node *, - void *, u32, void *, u32, u8); -static struct sk_buff *get_free_pkt(struct nes_cm_node *cm_node); - -static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *, - struct nes_vnic *, - struct ietf_mpa_frame *, - struct nes_cm_info *); -static int mini_cm_accept(struct nes_cm_core *, struct ietf_mpa_frame *, - struct nes_cm_node *); -static int mini_cm_reject(struct nes_cm_core *, struct ietf_mpa_frame *, - struct nes_cm_node *); +static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *, struct nes_vnic *, u16, void *, struct nes_cm_info *); static int mini_cm_close(struct nes_cm_core *, struct nes_cm_node *); -static int mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *, - struct sk_buff *); +static int mini_cm_accept(struct nes_cm_core *, struct nes_cm_node *); +static int mini_cm_reject(struct nes_cm_core *, struct nes_cm_node *); +static int mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *, struct sk_buff *); static int mini_cm_dealloc_core(struct nes_cm_core *); static int mini_cm_get(struct nes_cm_core *); static int mini_cm_set(struct nes_cm_core *, u32, u32); + +static void form_cm_frame(struct sk_buff *, struct nes_cm_node *, void *, u32, void *, u32, u8); +static int add_ref_cm_node(struct nes_cm_node *); +static int rem_ref_cm_node(struct nes_cm_core *, struct nes_cm_node *); + static int nes_cm_disconn_true(struct nes_qp *); static int nes_cm_post_event(struct nes_cm_event *event); static int nes_disconnect(struct nes_qp *nesqp, int abrupt); static void nes_disconnect_worker(struct work_struct *work); -static int send_ack(struct nes_cm_node *cm_node); + +static int send_mpa_request(struct nes_cm_node *, struct sk_buff *); +static int send_mpa_reject(struct nes_cm_node *); +static int send_syn(struct nes_cm_node *, u32, struct sk_buff *); +static int send_reset(struct nes_cm_node *, struct sk_buff *); +static int send_ack(struct nes_cm_node *cm_node, struct sk_buff *skb); static int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb); +static void process_packet(struct nes_cm_node *, struct sk_buff *, struct nes_cm_core *); + +static void active_open_err(struct nes_cm_node *, struct sk_buff *, int); +static void passive_open_err(struct nes_cm_node *, struct sk_buff *, int); +static void cleanup_retrans_entry(struct nes_cm_node *); +static void handle_rcv_mpa(struct nes_cm_node *, struct sk_buff *); +static void free_retrans_entry(struct nes_cm_node *cm_node); +static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph, struct sk_buff *skb, int optionsize, int passive); + +/* CM event handler functions */ +static void cm_event_connected(struct nes_cm_event *); +static void cm_event_connect_error(struct nes_cm_event *); +static void cm_event_reset(struct nes_cm_event *); +static void cm_event_mpa_req(struct nes_cm_event *); +static void cm_event_mpa_reject(struct nes_cm_event *); +static void handle_recv_entry(struct nes_cm_node *cm_node, u32 rem_node); + +/* MPA build functions */ +static int cm_build_mpa_frame(struct nes_cm_node *, u8 **, u16 *, u8 *, u8); +static void build_mpa_v2(struct nes_cm_node *, void *, u8); +static void build_mpa_v1(struct nes_cm_node *, void *, u8); +static void build_rdma0_msg(struct nes_cm_node *, struct nes_qp **); + +static void print_core(struct nes_cm_core *core); +static void record_ird_ord(struct nes_cm_node *, u16, u16); /* External CM API Interface */ /* instance of function pointers for client API */ @@ -132,12 +158,20 @@ atomic_t cm_connecteds; atomic_t cm_connect_reqs; atomic_t cm_rejects; +int nes_add_ref_cm_node(struct nes_cm_node *cm_node) +{ + return add_ref_cm_node(cm_node); +} +int nes_rem_ref_cm_node(struct nes_cm_node *cm_node) +{ + return rem_ref_cm_node(cm_node->cm_core, cm_node); +} /** * create_event */ -static struct nes_cm_event *create_event(struct nes_cm_node *cm_node, - enum nes_cm_event_type type) +static struct nes_cm_event *create_event(struct nes_cm_node * cm_node, + enum nes_cm_event_type type) { struct nes_cm_event *event; @@ -158,11 +192,11 @@ static struct nes_cm_event *create_event(struct nes_cm_node *cm_node, event->cm_info.loc_port = cm_node->loc_port; event->cm_info.cm_id = cm_node->cm_id; - nes_debug(NES_DBG_CM, "Created event=%p, type=%u, dst_addr=%08x[%x]," - " src_addr=%08x[%x]\n", - event, type, - event->cm_info.loc_addr, event->cm_info.loc_port, - event->cm_info.rem_addr, event->cm_info.rem_port); + nes_debug(NES_DBG_CM, "cm_node=%p Created event=%p, type=%u, " + "dst_addr=%08x[%x], src_addr=%08x[%x]\n", + cm_node, event, type, event->cm_info.loc_addr, + event->cm_info.loc_port, event->cm_info.rem_addr, + event->cm_info.rem_port); nes_cm_post_event(event); return event; @@ -172,27 +206,50 @@ static struct nes_cm_event *create_event(struct nes_cm_node *cm_node, /** * send_mpa_request */ -static int send_mpa_request(struct nes_cm_node *cm_node) +static int send_mpa_request(struct nes_cm_node *cm_node, struct sk_buff *skb) { - struct sk_buff *skb; - int ret; + u8 start_addr = 0; + u8 *start_ptr = &start_addr; + u8 **start_buff = &start_ptr; + u16 buff_len = 0; - skb = get_free_pkt(cm_node); if (!skb) { - nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); + nes_debug(NES_DBG_CM, "skb set to NULL\n"); return -1; } /* send an MPA Request frame */ - form_cm_frame(skb, cm_node, NULL, 0, &cm_node->mpa_frame, - cm_node->mpa_frame_size, SET_ACK); + cm_build_mpa_frame(cm_node, start_buff, &buff_len, NULL, MPA_KEY_REQUEST); + form_cm_frame(skb, cm_node, NULL, 0, *start_buff, buff_len, SET_ACK); - ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0); - if (ret < 0) { - return ret; + return schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0); +} + + + +static int send_mpa_reject(struct nes_cm_node *cm_node) +{ + struct sk_buff *skb = NULL; + u8 start_addr = 0; + u8 *start_ptr = &start_addr; + u8 **start_buff = &start_ptr; + u16 buff_len = 0; + struct ietf_mpa_v1 *mpa_frame; + + skb = dev_alloc_skb(MAX_CM_BUFFER); + if (!skb) { + nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); + return -ENOMEM; } - return 0; + /* send an MPA reject frame */ + cm_build_mpa_frame(cm_node, start_buff, &buff_len, NULL, MPA_KEY_REPLY); + mpa_frame = (struct ietf_mpa_v1 *)*start_buff; + mpa_frame->flags |= IETF_MPA_FLAGS_REJECT; + form_cm_frame(skb, cm_node, NULL, 0, *start_buff, buff_len, SET_ACK | SET_FIN); + + cm_node->state = NES_CM_STATE_FIN_WAIT1; + return schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0); } @@ -200,65 +257,174 @@ static int send_mpa_request(struct nes_cm_node *cm_node) * recv_mpa - process a received TCP pkt, we are expecting an * IETF MPA frame */ -static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 len) +static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type, + u32 len) { - struct ietf_mpa_frame *mpa_frame; + struct ietf_mpa_v1 *mpa_frame; + struct ietf_mpa_v2 *mpa_v2_frame; + struct ietf_rtr_msg *rtr_msg; + int mpa_hdr_len; + int priv_data_len; + + *type = NES_MPA_REQUEST_ACCEPT; /* assume req frame is in tcp data payload */ - if (len < sizeof(struct ietf_mpa_frame)) { + if (len < sizeof(struct ietf_mpa_v1)) { nes_debug(NES_DBG_CM, "The received ietf buffer was too small (%x)\n", len); - return -1; + return -EINVAL; } - mpa_frame = (struct ietf_mpa_frame *)buffer; - cm_node->mpa_frame_size = ntohs(mpa_frame->priv_data_len); + /* points to the beginning of the frame, which could be MPA V1 or V2 */ + mpa_frame = (struct ietf_mpa_v1 *)buffer; + mpa_hdr_len = sizeof(struct ietf_mpa_v1); + priv_data_len = ntohs(mpa_frame->priv_data_len); - if (cm_node->mpa_frame_size + sizeof(struct ietf_mpa_frame) != len) { - nes_debug(NES_DBG_CM, "The received ietf buffer was not right" - " complete (%x + %x != %x)\n", - cm_node->mpa_frame_size, (u32)sizeof(struct ietf_mpa_frame), len); - return -1; + /* make sure mpa private data len is less than 512 bytes */ + if (priv_data_len > IETF_MAX_PRIV_DATA_LEN) { + nes_debug(NES_DBG_CM, "The received Length of Private" + " Data field exceeds 512 octets\n"); + return -EINVAL; + } + /* + * make sure MPA receiver interoperate with the + * received MPA version and MPA key information + * + */ + if (mpa_frame->rev != IETF_MPA_V1 && mpa_frame->rev != IETF_MPA_V2) { + nes_debug(NES_DBG_CM, "The received mpa version" + " is not supported\n"); + return -EINVAL; + } + /* + * backwards compatibility only + */ + if (mpa_frame->rev > cm_node->mpa_frame_rev) { + nes_debug(NES_DBG_CM, "The received mpa version" + " can not be interoperated\n"); + return -EINVAL; + } else { + cm_node->mpa_frame_rev = mpa_frame->rev; } - /* copy entire MPA frame to our cm_node's frame */ - memcpy(cm_node->mpa_frame_buf, buffer + sizeof(struct ietf_mpa_frame), - cm_node->mpa_frame_size); + if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) { + if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE)) { + nes_debug(NES_DBG_CM, "Unexpected MPA Key received \n"); + return -EINVAL; + } + } else { + if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE)) { + nes_debug(NES_DBG_CM, "Unexpected MPA Key received \n"); + return -EINVAL; + } + } - return 0; -} + if (priv_data_len + mpa_hdr_len != len) { + nes_debug(NES_DBG_CM, "The received ietf buffer was not right" + " complete (%x + %x != %x)\n", + priv_data_len, mpa_hdr_len, len); + return -EINVAL; + } + /* make sure it does not exceed the max size */ + if (len > MAX_CM_BUFFER) { + nes_debug(NES_DBG_CM, "The received ietf buffer was too large" + " (%x + %x != %x)\n", + priv_data_len, mpa_hdr_len, len); + return -EINVAL; + } + + cm_node->mpa_frame_size = priv_data_len; + + switch (mpa_frame->rev) { + case IETF_MPA_V2: { + u16 ird_size; + u16 ord_size; + u16 rtr_ctrl_ird; + u16 rtr_ctrl_ord; + + mpa_v2_frame = (struct ietf_mpa_v2 *)buffer; + mpa_hdr_len += IETF_RTR_MSG_SIZE; + cm_node->mpa_frame_size -= IETF_RTR_MSG_SIZE; + rtr_msg = &mpa_v2_frame->rtr_msg; + + /* parse rtr message */ + rtr_ctrl_ird = ntohs(rtr_msg->ctrl_ird); + rtr_ctrl_ord = ntohs(rtr_msg->ctrl_ord); + ird_size = rtr_ctrl_ird & IETF_NO_IRD_ORD; + ord_size = rtr_ctrl_ord & IETF_NO_IRD_ORD; + + if (!(rtr_ctrl_ird & IETF_PEER_TO_PEER)) { + /* send reset */ + return -EINVAL; + } + if (ird_size == IETF_NO_IRD_ORD || ord_size == IETF_NO_IRD_ORD) + cm_node->mpav2_ird_ord = IETF_NO_IRD_ORD; + + if (cm_node->mpav2_ird_ord != IETF_NO_IRD_ORD) { + /* responder */ + if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) { + /* we are still negotiating */ + if (ord_size > NES_MAX_IRD) { + cm_node->ird_size = NES_MAX_IRD; + } else { + cm_node->ird_size = ord_size; + if (ord_size == 0 && + (rtr_ctrl_ord & IETF_RDMA0_READ)) { + cm_node->ird_size = 1; + nes_debug(NES_DBG_CM, + "%s: Remote peer doesn't support RDMA0_READ (ord=%u)\n", + __func__, ord_size); + } + } + if (ird_size > NES_MAX_ORD) + cm_node->ord_size = NES_MAX_ORD; + else + cm_node->ord_size = ird_size; + } else { /* initiator */ + if (ord_size > NES_MAX_IRD) { + nes_debug(NES_DBG_CM, + "%s: Unable to support the requested (ord =%u)\n", + __func__, ord_size); + return -EINVAL; + } + cm_node->ird_size = ord_size; + if (ird_size > NES_MAX_ORD) { + cm_node->ord_size = NES_MAX_ORD; + } else { + if (ird_size == 0 && + (rtr_ctrl_ord & IETF_RDMA0_READ)) { + nes_debug(NES_DBG_CM, + "%s: Remote peer doesn't support RDMA0_READ (ird=%u)\n", + __func__, ird_size); + return -EINVAL; + } else { + cm_node->ord_size = ird_size; + } + } + } + } -/** - * handle_exception_pkt - process an exception packet. - * We have been in a TSA state, and we have now received SW - * TCP/IP traffic should be a FIN request or IP pkt with options - */ -static int handle_exception_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb) -{ - int ret = 0; - struct tcphdr *tcph = tcp_hdr(skb); + if (rtr_ctrl_ord & IETF_RDMA0_READ) { + cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO; - /* first check to see if this a FIN pkt */ - if (tcph->fin) { - /* we need to ACK the FIN request */ - send_ack(cm_node); - - /* check which side we are (client/server) and set next state accordingly */ - if (cm_node->tcp_cntxt.client) - cm_node->state = NES_CM_STATE_CLOSING; - else { - /* we are the server side */ - cm_node->state = NES_CM_STATE_CLOSE_WAIT; - /* since this is a self contained CM we don't wait for */ - /* an APP to close us, just send final FIN immediately */ - ret = send_fin(cm_node, NULL); - cm_node->state = NES_CM_STATE_LAST_ACK; + } else if (rtr_ctrl_ord & IETF_RDMA0_WRITE) { + cm_node->send_rdma0_op = SEND_RDMA_WRITE_ZERO; + } else { /* Not supported RDMA0 operation */ + return -EINVAL; } - } else { - ret = -EINVAL; + break; + } + case IETF_MPA_V1: + default: + break; } - return ret; + /* copy entire MPA frame to our cm_node's frame */ + memcpy(cm_node->mpa_frame_buf, buffer + mpa_hdr_len, cm_node->mpa_frame_size); + + if (mpa_frame->flags & IETF_MPA_FLAGS_REJECT) + *type = NES_MPA_REQUEST_REJECT; + return 0; } @@ -266,9 +432,9 @@ static int handle_exception_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb * form_cm_frame - get a free packet and build empty frame Use * node info to build. */ -static struct sk_buff *form_cm_frame(struct sk_buff *skb, struct nes_cm_node *cm_node, - void *options, u32 optionsize, void *data, - u32 datasize, u8 flags) +static void form_cm_frame(struct sk_buff *skb, + struct nes_cm_node *cm_node, void *options, u32 optionsize, + void *data, u32 datasize, u8 flags) { struct tcphdr *tcph; struct iphdr *iph; @@ -277,14 +443,14 @@ static struct sk_buff *form_cm_frame(struct sk_buff *skb, struct nes_cm_node *cm u16 packetsize = sizeof(*iph); packetsize += sizeof(*tcph); - packetsize += optionsize + datasize; + packetsize += optionsize + datasize; + skb_trim(skb, 0); memset(skb->data, 0x00, ETH_HLEN + sizeof(*iph) + sizeof(*tcph)); - skb->len = 0; buf = skb_put(skb, packetsize + ETH_HLEN); - ethh = (struct ethhdr *) buf; + ethh = (struct ethhdr *)buf; buf += ETH_HLEN; iph = (struct iphdr *)buf; @@ -292,10 +458,12 @@ static struct sk_buff *form_cm_frame(struct sk_buff *skb, struct nes_cm_node *cm tcph = (struct tcphdr *)buf; skb_reset_mac_header(skb); skb_set_network_header(skb, ETH_HLEN); - skb_set_transport_header(skb, ETH_HLEN+sizeof(*iph)); + skb_set_transport_header(skb, ETH_HLEN + sizeof(*iph)); buf += sizeof(*tcph); skb->ip_summed = CHECKSUM_PARTIAL; + if (!(cm_node->netdev->features & NETIF_F_IP_CSUM)) + skb->ip_summed = CHECKSUM_NONE; skb->protocol = htons(0x800); skb->data_len = 0; skb->mac_len = ETH_HLEN; @@ -305,37 +473,41 @@ static struct sk_buff *form_cm_frame(struct sk_buff *skb, struct nes_cm_node *cm ethh->h_proto = htons(0x0800); iph->version = IPVERSION; - iph->ihl = 5; /* 5 * 4Byte words, IP headr len */ + iph->ihl = 5; /* 5 * 4Byte words, IP headr len */ iph->tos = 0; iph->tot_len = htons(packetsize); iph->id = htons(++cm_node->tcp_cntxt.loc_id); iph->frag_off = htons(0x4000); iph->ttl = 0x40; - iph->protocol = 0x06; /* IPPROTO_TCP */ + iph->protocol = 0x06; /* IPPROTO_TCP */ - iph->saddr = htonl(cm_node->loc_addr); - iph->daddr = htonl(cm_node->rem_addr); + iph->saddr = htonl(cm_node->mapped_loc_addr); + iph->daddr = htonl(cm_node->mapped_rem_addr); - tcph->source = htons(cm_node->loc_port); - tcph->dest = htons(cm_node->rem_port); + tcph->source = htons(cm_node->mapped_loc_port); + tcph->dest = htons(cm_node->mapped_rem_port); tcph->seq = htonl(cm_node->tcp_cntxt.loc_seq_num); if (flags & SET_ACK) { cm_node->tcp_cntxt.loc_ack_num = cm_node->tcp_cntxt.rcv_nxt; tcph->ack_seq = htonl(cm_node->tcp_cntxt.loc_ack_num); tcph->ack = 1; - } else + } else { tcph->ack_seq = 0; + } if (flags & SET_SYN) { cm_node->tcp_cntxt.loc_seq_num++; tcph->syn = 1; - } else - cm_node->tcp_cntxt.loc_seq_num += datasize; /* data (no headers) */ + } else { + cm_node->tcp_cntxt.loc_seq_num += datasize; + } - if (flags & SET_FIN) + if (flags & SET_FIN) { + cm_node->tcp_cntxt.loc_seq_num++; tcph->fin = 1; + } if (flags & SET_RST) tcph->rst = 1; @@ -351,10 +523,101 @@ static struct sk_buff *form_cm_frame(struct sk_buff *skb, struct nes_cm_node *cm skb_shinfo(skb)->nr_frags = 0; cm_packets_created++; +} + +/* + * nes_create_sockaddr - Record ip addr and tcp port in a sockaddr struct + */ +static void nes_create_sockaddr(__be32 ip_addr, __be16 port, + struct sockaddr_storage *addr) +{ + struct sockaddr_in *nes_sockaddr = (struct sockaddr_in *)addr; + nes_sockaddr->sin_family = AF_INET; + memcpy(&nes_sockaddr->sin_addr.s_addr, &ip_addr, sizeof(__be32)); + nes_sockaddr->sin_port = port; +} + +/* + * nes_create_mapinfo - Create a mapinfo object in the port mapper data base + */ +static int nes_create_mapinfo(struct nes_cm_info *cm_info) +{ + struct sockaddr_storage local_sockaddr; + struct sockaddr_storage mapped_sockaddr; + + nes_create_sockaddr(htonl(cm_info->loc_addr), htons(cm_info->loc_port), + &local_sockaddr); + nes_create_sockaddr(htonl(cm_info->mapped_loc_addr), + htons(cm_info->mapped_loc_port), &mapped_sockaddr); + + return iwpm_create_mapinfo(&local_sockaddr, + &mapped_sockaddr, RDMA_NL_NES); +} + +/* + * nes_remove_mapinfo - Remove a mapinfo object from the port mapper data base + * and send a remove mapping op message to + * the userspace port mapper + */ +static int nes_remove_mapinfo(u32 loc_addr, u16 loc_port, + u32 mapped_loc_addr, u16 mapped_loc_port) +{ + struct sockaddr_storage local_sockaddr; + struct sockaddr_storage mapped_sockaddr; + + nes_create_sockaddr(htonl(loc_addr), htons(loc_port), &local_sockaddr); + nes_create_sockaddr(htonl(mapped_loc_addr), htons(mapped_loc_port), + &mapped_sockaddr); + + iwpm_remove_mapinfo(&local_sockaddr, &mapped_sockaddr); + return iwpm_remove_mapping(&local_sockaddr, RDMA_NL_NES); +} + +/* + * nes_form_pm_msg - Form a port mapper message with mapping info + */ +static void nes_form_pm_msg(struct nes_cm_info *cm_info, + struct iwpm_sa_data *pm_msg) +{ + nes_create_sockaddr(htonl(cm_info->loc_addr), htons(cm_info->loc_port), + &pm_msg->loc_addr); + nes_create_sockaddr(htonl(cm_info->rem_addr), htons(cm_info->rem_port), + &pm_msg->rem_addr); +} - return skb; +/* + * nes_form_reg_msg - Form a port mapper message with dev info + */ +static void nes_form_reg_msg(struct nes_vnic *nesvnic, + struct iwpm_dev_data *pm_msg) +{ + memcpy(pm_msg->dev_name, nesvnic->nesibdev->ibdev.name, + IWPM_DEVNAME_SIZE); + memcpy(pm_msg->if_name, nesvnic->netdev->name, IWPM_IFNAME_SIZE); } +/* + * nes_record_pm_msg - Save the received mapping info + */ +static void nes_record_pm_msg(struct nes_cm_info *cm_info, + struct iwpm_sa_data *pm_msg) +{ + struct sockaddr_in *mapped_loc_addr = + (struct sockaddr_in *)&pm_msg->mapped_loc_addr; + struct sockaddr_in *mapped_rem_addr = + (struct sockaddr_in *)&pm_msg->mapped_rem_addr; + + if (mapped_loc_addr->sin_family == AF_INET) { + cm_info->mapped_loc_addr = + ntohl(mapped_loc_addr->sin_addr.s_addr); + cm_info->mapped_loc_port = ntohs(mapped_loc_addr->sin_port); + } + if (mapped_rem_addr->sin_family == AF_INET) { + cm_info->mapped_rem_addr = + ntohl(mapped_rem_addr->sin_addr.s_addr); + cm_info->mapped_rem_port = ntohs(mapped_rem_addr->sin_port); + } +} /** * print_core - dump a cm core @@ -367,9 +630,8 @@ static void print_core(struct nes_cm_core *core) return; nes_debug(NES_DBG_CM, "---------------------------------------------\n"); - nes_debug(NES_DBG_CM, "State : %u \n", core->state); + nes_debug(NES_DBG_CM, "State : %u \n", core->state); - nes_debug(NES_DBG_CM, "Tx Free cnt : %u \n", skb_queue_len(&core->tx_free_list)); nes_debug(NES_DBG_CM, "Listen Nodes : %u \n", atomic_read(&core->listen_node_cnt)); nes_debug(NES_DBG_CM, "Active Nodes : %u \n", atomic_read(&core->node_cnt)); @@ -378,6 +640,162 @@ static void print_core(struct nes_cm_core *core) nes_debug(NES_DBG_CM, "-------------- end core ---------------\n"); } +static void record_ird_ord(struct nes_cm_node *cm_node, + u16 conn_ird, u16 conn_ord) +{ + if (conn_ird > NES_MAX_IRD) + conn_ird = NES_MAX_IRD; + + if (conn_ord > NES_MAX_ORD) + conn_ord = NES_MAX_ORD; + + cm_node->ird_size = conn_ird; + cm_node->ord_size = conn_ord; +} + +/** + * cm_build_mpa_frame - build a MPA V1 frame or MPA V2 frame + */ +static int cm_build_mpa_frame(struct nes_cm_node *cm_node, u8 **start_buff, + u16 *buff_len, u8 *pci_mem, u8 mpa_key) +{ + int ret = 0; + + *start_buff = (pci_mem) ? pci_mem : &cm_node->mpa_frame_buf[0]; + + switch (cm_node->mpa_frame_rev) { + case IETF_MPA_V1: + *start_buff = (u8 *)*start_buff + sizeof(struct ietf_rtr_msg); + *buff_len = sizeof(struct ietf_mpa_v1) + cm_node->mpa_frame_size; + build_mpa_v1(cm_node, *start_buff, mpa_key); + break; + case IETF_MPA_V2: + *buff_len = sizeof(struct ietf_mpa_v2) + cm_node->mpa_frame_size; + build_mpa_v2(cm_node, *start_buff, mpa_key); + break; + default: + ret = -EINVAL; + } + return ret; +} + +/** + * build_mpa_v2 - build a MPA V2 frame + */ +static void build_mpa_v2(struct nes_cm_node *cm_node, + void *start_addr, u8 mpa_key) +{ + struct ietf_mpa_v2 *mpa_frame = (struct ietf_mpa_v2 *)start_addr; + struct ietf_rtr_msg *rtr_msg = &mpa_frame->rtr_msg; + u16 ctrl_ird; + u16 ctrl_ord; + + /* initialize the upper 5 bytes of the frame */ + build_mpa_v1(cm_node, start_addr, mpa_key); + mpa_frame->flags |= IETF_MPA_V2_FLAG; /* set a bit to indicate MPA V2 */ + mpa_frame->priv_data_len += htons(IETF_RTR_MSG_SIZE); + + /* initialize RTR msg */ + if (cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) { + ctrl_ird = IETF_NO_IRD_ORD; + ctrl_ord = IETF_NO_IRD_ORD; + } else { + ctrl_ird = cm_node->ird_size & IETF_NO_IRD_ORD; + ctrl_ord = cm_node->ord_size & IETF_NO_IRD_ORD; + } + ctrl_ird |= IETF_PEER_TO_PEER; + ctrl_ird |= IETF_FLPDU_ZERO_LEN; + + switch (mpa_key) { + case MPA_KEY_REQUEST: + ctrl_ord |= IETF_RDMA0_WRITE; + ctrl_ord |= IETF_RDMA0_READ; + break; + case MPA_KEY_REPLY: + switch (cm_node->send_rdma0_op) { + case SEND_RDMA_WRITE_ZERO: + ctrl_ord |= IETF_RDMA0_WRITE; + break; + case SEND_RDMA_READ_ZERO: + ctrl_ord |= IETF_RDMA0_READ; + break; + } + } + rtr_msg->ctrl_ird = htons(ctrl_ird); + rtr_msg->ctrl_ord = htons(ctrl_ord); +} + +/** + * build_mpa_v1 - build a MPA V1 frame + */ +static void build_mpa_v1(struct nes_cm_node *cm_node, void *start_addr, u8 mpa_key) +{ + struct ietf_mpa_v1 *mpa_frame = (struct ietf_mpa_v1 *)start_addr; + + switch (mpa_key) { + case MPA_KEY_REQUEST: + memcpy(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE); + break; + case MPA_KEY_REPLY: + memcpy(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE); + break; + } + mpa_frame->flags = IETF_MPA_FLAGS_CRC; + mpa_frame->rev = cm_node->mpa_frame_rev; + mpa_frame->priv_data_len = htons(cm_node->mpa_frame_size); +} + +static void build_rdma0_msg(struct nes_cm_node *cm_node, struct nes_qp **nesqp_addr) +{ + u64 u64temp; + struct nes_qp *nesqp = *nesqp_addr; + struct nes_hw_qp_wqe *wqe = &nesqp->hwqp.sq_vbase[0]; + + u64temp = (unsigned long)nesqp->nesuqp_addr; + u64temp |= NES_SW_CONTEXT_ALIGN >> 1; + set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, u64temp); + + wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = 0; + wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = 0; + + switch (cm_node->send_rdma0_op) { + case SEND_RDMA_WRITE_ZERO: + nes_debug(NES_DBG_CM, "Sending first write.\n"); + wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = + cpu_to_le32(NES_IWARP_SQ_OP_RDMAW); + wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = 0; + wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = 0; + wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0; + break; + + case SEND_RDMA_READ_ZERO: + default: + if (cm_node->send_rdma0_op != SEND_RDMA_READ_ZERO) + WARN(1, "Unsupported RDMA0 len operation=%u\n", + cm_node->send_rdma0_op); + nes_debug(NES_DBG_CM, "Sending first rdma operation.\n"); + wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = + cpu_to_le32(NES_IWARP_SQ_OP_RDMAR); + wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX] = 1; + wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_TO_HIGH_IDX] = 0; + wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX] = 0; + wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_STAG_IDX] = 1; + wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 1; + break; + } + + if (nesqp->sq_kmapped) { + nesqp->sq_kmapped = 0; + kunmap(nesqp->page); + } + + /*use the reserved spot on the WQ for the extra first WQE*/ + nesqp->nesqp_context->ird_ord_sizes &= cpu_to_le32(~(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | + NES_QPCONTEXT_ORDIRD_WRPDU | + NES_QPCONTEXT_ORDIRD_ALSMM)); + nesqp->skip_lsmm = 1; + nesqp->hwqp.sq_tail = 0; +} /** * schedule_nes_timer @@ -385,20 +803,17 @@ static void print_core(struct nes_cm_core *core) * rem_ref_cm_node(cm_core, cm_node);add_ref_cm_node(cm_node); */ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, - enum nes_timer_type type, int send_retrans, - int close_when_complete) + enum nes_timer_type type, int send_retrans, + int close_when_complete) { - unsigned long flags; - struct nes_cm_core *cm_core; + unsigned long flags; + struct nes_cm_core *cm_core = cm_node->cm_core; struct nes_timer_entry *new_send; int ret = 0; - u32 was_timer_set; - if (!cm_node) - return -EINVAL; new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC); if (!new_send) - return -1; + return -ENOMEM; /* new_send->timetosend = currenttime */ new_send->retrycount = NES_DEFAULT_RETRYS; @@ -411,255 +826,262 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, new_send->close_when_complete = close_when_complete; if (type == NES_TIMER_TYPE_CLOSE) { - new_send->timetosend += (HZ/2); /* TODO: decide on the correct value here */ - spin_lock_irqsave(&cm_node->recv_list_lock, flags); - list_add_tail(&new_send->list, &cm_node->recv_list); - spin_unlock_irqrestore(&cm_node->recv_list_lock, flags); + new_send->timetosend += (HZ / 10); + if (cm_node->recv_entry) { + kfree(new_send); + WARN_ON(1); + return -EINVAL; + } + cm_node->recv_entry = new_send; } if (type == NES_TIMER_TYPE_SEND) { new_send->seq_num = ntohl(tcp_hdr(skb)->seq); atomic_inc(&new_send->skb->users); + spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + cm_node->send_entry = new_send; + add_ref_cm_node(cm_node); + spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + new_send->timetosend = jiffies + NES_RETRY_TIMEOUT; ret = nes_nic_cm_xmit(new_send->skb, cm_node->netdev); if (ret != NETDEV_TX_OK) { - nes_debug(NES_DBG_CM, "Error sending packet %p (jiffies = %lu)\n", - new_send, jiffies); - atomic_dec(&new_send->skb->users); + nes_debug(NES_DBG_CM, "Error sending packet %p " + "(jiffies = %lu)\n", new_send, jiffies); new_send->timetosend = jiffies; + ret = NETDEV_TX_OK; } else { cm_packets_sent++; if (!send_retrans) { + cleanup_retrans_entry(cm_node); if (close_when_complete) - rem_ref_cm_node(cm_node->cm_core, cm_node); - dev_kfree_skb_any(new_send->skb); - kfree(new_send); + rem_ref_cm_node(cm_core, cm_node); return ret; } - new_send->timetosend = jiffies + NES_RETRY_TIMEOUT; } - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - list_add_tail(&new_send->list, &cm_node->retrans_list); - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); } - if (type == NES_TIMER_TYPE_RECV) { - new_send->seq_num = ntohl(tcp_hdr(skb)->seq); - new_send->timetosend = jiffies; - spin_lock_irqsave(&cm_node->recv_list_lock, flags); - list_add_tail(&new_send->list, &cm_node->recv_list); - spin_unlock_irqrestore(&cm_node->recv_list_lock, flags); - } - cm_core = cm_node->cm_core; - was_timer_set = timer_pending(&cm_core->tcp_timer); - - if (!was_timer_set) { - cm_core->tcp_timer.expires = new_send->timetosend; - add_timer(&cm_core->tcp_timer); - } + if (!timer_pending(&cm_core->tcp_timer)) + mod_timer(&cm_core->tcp_timer, new_send->timetosend); return ret; } +static void nes_retrans_expired(struct nes_cm_node *cm_node) +{ + struct iw_cm_id *cm_id = cm_node->cm_id; + enum nes_cm_node_state state = cm_node->state; + cm_node->state = NES_CM_STATE_CLOSED; + + switch (state) { + case NES_CM_STATE_SYN_RCVD: + case NES_CM_STATE_CLOSING: + rem_ref_cm_node(cm_node->cm_core, cm_node); + break; + case NES_CM_STATE_LAST_ACK: + case NES_CM_STATE_FIN_WAIT1: + if (cm_node->cm_id) + cm_id->rem_ref(cm_id); + send_reset(cm_node, NULL); + break; + default: + add_ref_cm_node(cm_node); + send_reset(cm_node, NULL); + create_event(cm_node, NES_CM_EVENT_ABORTED); + } +} + +static void handle_recv_entry(struct nes_cm_node *cm_node, u32 rem_node) +{ + struct nes_timer_entry *recv_entry = cm_node->recv_entry; + struct iw_cm_id *cm_id = cm_node->cm_id; + struct nes_qp *nesqp; + unsigned long qplockflags; + + if (!recv_entry) + return; + nesqp = (struct nes_qp *)recv_entry->skb; + if (nesqp) { + spin_lock_irqsave(&nesqp->lock, qplockflags); + if (nesqp->cm_id) { + nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, " + "refcount = %d: HIT A " + "NES_TIMER_TYPE_CLOSE with something " + "to do!!!\n", nesqp->hwqp.qp_id, cm_id, + atomic_read(&nesqp->refcount)); + nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; + nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT; + nesqp->ibqp_state = IB_QPS_ERR; + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + nes_cm_disconn(nesqp); + } else { + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, " + "refcount = %d: HIT A " + "NES_TIMER_TYPE_CLOSE with nothing " + "to do!!!\n", nesqp->hwqp.qp_id, cm_id, + atomic_read(&nesqp->refcount)); + } + } else if (rem_node) { + /* TIME_WAIT state */ + rem_ref_cm_node(cm_node->cm_core, cm_node); + } + if (cm_node->cm_id) + cm_id->rem_ref(cm_id); + kfree(recv_entry); + cm_node->recv_entry = NULL; +} /** * nes_cm_timer_tick */ static void nes_cm_timer_tick(unsigned long pass) { - unsigned long flags, qplockflags; + unsigned long flags; unsigned long nexttimeout = jiffies + NES_LONG_TIME; - struct iw_cm_id *cm_id; struct nes_cm_node *cm_node; struct nes_timer_entry *send_entry, *recv_entry; - struct list_head *list_core, *list_core_temp; - struct list_head *list_node, *list_node_temp; + struct list_head *list_core_temp; + struct list_head *list_node; struct nes_cm_core *cm_core = g_cm_core; - struct nes_qp *nesqp; - struct sk_buff *skb; u32 settimer = 0; + unsigned long timetosend; int ret = NETDEV_TX_OK; - int node_done; + struct list_head timer_list; + + INIT_LIST_HEAD(&timer_list); spin_lock_irqsave(&cm_core->ht_lock, flags); - list_for_each_safe(list_node, list_core_temp, &cm_core->connected_nodes) { + list_for_each_safe(list_node, list_core_temp, + &cm_core->connected_nodes) { cm_node = container_of(list_node, struct nes_cm_node, list); - add_ref_cm_node(cm_node); - spin_unlock_irqrestore(&cm_core->ht_lock, flags); - spin_lock_irqsave(&cm_node->recv_list_lock, flags); - list_for_each_safe(list_core, list_node_temp, &cm_node->recv_list) { - recv_entry = container_of(list_core, struct nes_timer_entry, list); - if ((time_after(recv_entry->timetosend, jiffies)) && - (recv_entry->type == NES_TIMER_TYPE_CLOSE)) { - if (nexttimeout > recv_entry->timetosend || !settimer) { + if ((cm_node->recv_entry) || (cm_node->send_entry)) { + add_ref_cm_node(cm_node); + list_add(&cm_node->timer_entry, &timer_list); + } + } + spin_unlock_irqrestore(&cm_core->ht_lock, flags); + + list_for_each_safe(list_node, list_core_temp, &timer_list) { + cm_node = container_of(list_node, struct nes_cm_node, + timer_entry); + recv_entry = cm_node->recv_entry; + + if (recv_entry) { + if (time_after(recv_entry->timetosend, jiffies)) { + if (nexttimeout > recv_entry->timetosend || + !settimer) { nexttimeout = recv_entry->timetosend; settimer = 1; } - continue; - } - list_del(&recv_entry->list); - cm_id = cm_node->cm_id; - spin_unlock_irqrestore(&cm_node->recv_list_lock, flags); - if (recv_entry->type == NES_TIMER_TYPE_CLOSE) { - nesqp = (struct nes_qp *)recv_entry->skb; - spin_lock_irqsave(&nesqp->lock, qplockflags); - if (nesqp->cm_id) { - nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, refcount = %d: " - "****** HIT A NES_TIMER_TYPE_CLOSE" - " with something to do!!! ******\n", - nesqp->hwqp.qp_id, cm_id, - atomic_read(&nesqp->refcount)); - nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; - nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT; - nesqp->ibqp_state = IB_QPS_ERR; - spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_cm_disconn(nesqp); - } else { - spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, refcount = %d:" - " ****** HIT A NES_TIMER_TYPE_CLOSE" - " with nothing to do!!! ******\n", - nesqp->hwqp.qp_id, cm_id, - atomic_read(&nesqp->refcount)); - nes_rem_ref(&nesqp->ibqp); - } - if (cm_id) - cm_id->rem_ref(cm_id); + } else { + handle_recv_entry(cm_node, 1); } - kfree(recv_entry); - spin_lock_irqsave(&cm_node->recv_list_lock, flags); } - spin_unlock_irqrestore(&cm_node->recv_list_lock, flags); spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - node_done = 0; - list_for_each_safe(list_core, list_node_temp, &cm_node->retrans_list) { - if (node_done) { + do { + send_entry = cm_node->send_entry; + if (!send_entry) break; - } - send_entry = container_of(list_core, struct nes_timer_entry, list); if (time_after(send_entry->timetosend, jiffies)) { if (cm_node->state != NES_CM_STATE_TSA) { - if ((nexttimeout > send_entry->timetosend) || !settimer) { - nexttimeout = send_entry->timetosend; + if ((nexttimeout > + send_entry->timetosend) || + !settimer) { + nexttimeout = + send_entry->timetosend; settimer = 1; } - node_done = 1; - continue; } else { - list_del(&send_entry->list); - skb = send_entry->skb; - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - dev_kfree_skb_any(skb); - kfree(send_entry); - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - continue; + free_retrans_entry(cm_node); } + break; } - if (send_entry->type == NES_TIMER_NODE_CLEANUP) { - list_del(&send_entry->list); - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - kfree(send_entry); - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - continue; - } - if ((send_entry->seq_num < cm_node->tcp_cntxt.rem_ack_num) || - (cm_node->state == NES_CM_STATE_TSA) || - (cm_node->state == NES_CM_STATE_CLOSED)) { - skb = send_entry->skb; - list_del(&send_entry->list); - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - kfree(send_entry); - dev_kfree_skb_any(skb); - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - continue; + + if ((cm_node->state == NES_CM_STATE_TSA) || + (cm_node->state == NES_CM_STATE_CLOSED)) { + free_retrans_entry(cm_node); + break; } - if (!send_entry->retranscount || !send_entry->retrycount) { + if (!send_entry->retranscount || + !send_entry->retrycount) { cm_packets_dropped++; - skb = send_entry->skb; - list_del(&send_entry->list); - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - dev_kfree_skb_any(skb); - kfree(send_entry); - if (cm_node->state == NES_CM_STATE_SYN_RCVD) { - /* this node never even generated an indication up to the cm */ - rem_ref_cm_node(cm_core, cm_node); - } else { - cm_node->state = NES_CM_STATE_CLOSED; - create_event(cm_node, NES_CM_EVENT_ABORTED); - } - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - continue; + free_retrans_entry(cm_node); + + spin_unlock_irqrestore( + &cm_node->retrans_list_lock, flags); + nes_retrans_expired(cm_node); + cm_node->state = NES_CM_STATE_CLOSED; + spin_lock_irqsave(&cm_node->retrans_list_lock, + flags); + break; } - /* this seems like the correct place, but leave send entry unprotected */ - /* spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); */ atomic_inc(&send_entry->skb->users); cm_packets_retrans++; - nes_debug(NES_DBG_CM, "Retransmitting send_entry %p for node %p," - " jiffies = %lu, time to send = %lu, retranscount = %u, " - "send_entry->seq_num = 0x%08X, cm_node->tcp_cntxt.rem_ack_num = 0x%08X\n", - send_entry, cm_node, jiffies, send_entry->timetosend, send_entry->retranscount, - send_entry->seq_num, cm_node->tcp_cntxt.rem_ack_num); - - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + nes_debug(NES_DBG_CM, "Retransmitting send_entry %p " + "for node %p, jiffies = %lu, time to send = " + "%lu, retranscount = %u, send_entry->seq_num = " + "0x%08X, cm_node->tcp_cntxt.rem_ack_num = " + "0x%08X\n", send_entry, cm_node, jiffies, + send_entry->timetosend, + send_entry->retranscount, + send_entry->seq_num, + cm_node->tcp_cntxt.rem_ack_num); + + spin_unlock_irqrestore(&cm_node->retrans_list_lock, + flags); ret = nes_nic_cm_xmit(send_entry->skb, cm_node->netdev); + spin_lock_irqsave(&cm_node->retrans_list_lock, flags); if (ret != NETDEV_TX_OK) { + nes_debug(NES_DBG_CM, "rexmit failed for " + "node=%p\n", cm_node); cm_packets_bounced++; - atomic_dec(&send_entry->skb->users); send_entry->retrycount--; nexttimeout = jiffies + NES_SHORT_TIME; settimer = 1; - node_done = 1; - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - continue; + break; } else { cm_packets_sent++; } - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - list_del(&send_entry->list); - nes_debug(NES_DBG_CM, "Packet Sent: retrans count = %u, retry count = %u.\n", - send_entry->retranscount, send_entry->retrycount); + nes_debug(NES_DBG_CM, "Packet Sent: retrans count = " + "%u, retry count = %u.\n", + send_entry->retranscount, + send_entry->retrycount); if (send_entry->send_retrans) { send_entry->retranscount--; - send_entry->timetosend = jiffies + NES_RETRY_TIMEOUT; - if (nexttimeout > send_entry->timetosend || !settimer) { + timetosend = (NES_RETRY_TIMEOUT << + (NES_DEFAULT_RETRANS - send_entry->retranscount)); + + send_entry->timetosend = jiffies + + min(timetosend, NES_MAX_TIMEOUT); + if (nexttimeout > send_entry->timetosend || + !settimer) { nexttimeout = send_entry->timetosend; settimer = 1; } - list_add(&send_entry->list, &cm_node->retrans_list); - continue; } else { int close_when_complete; - skb = send_entry->skb; - close_when_complete = send_entry->close_when_complete; - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - if (close_when_complete) { - BUG_ON(atomic_read(&cm_node->ref_count) == 1); - rem_ref_cm_node(cm_core, cm_node); - } - dev_kfree_skb_any(skb); - kfree(send_entry); - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - continue; + close_when_complete = + send_entry->close_when_complete; + nes_debug(NES_DBG_CM, "cm_node=%p state=%d\n", + cm_node, cm_node->state); + free_retrans_entry(cm_node); + if (close_when_complete) + rem_ref_cm_node(cm_node->cm_core, + cm_node); } - } - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - - rem_ref_cm_node(cm_core, cm_node); + } while (0); - spin_lock_irqsave(&cm_core->ht_lock, flags); - if (ret != NETDEV_TX_OK) - break; + spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + rem_ref_cm_node(cm_node->cm_core, cm_node); } - spin_unlock_irqrestore(&cm_core->ht_lock, flags); if (settimer) { - if (!timer_pending(&cm_core->tcp_timer)) { - cm_core->tcp_timer.expires = nexttimeout; - add_timer(&cm_core->tcp_timer); - } + if (!timer_pending(&cm_core->tcp_timer)) + mod_timer(&cm_core->tcp_timer, nexttimeout); } } @@ -667,14 +1089,14 @@ static void nes_cm_timer_tick(unsigned long pass) /** * send_syn */ -static int send_syn(struct nes_cm_node *cm_node, u32 sendack) +static int send_syn(struct nes_cm_node *cm_node, u32 sendack, + struct sk_buff *skb) { int ret; int flags = SET_SYN; - struct sk_buff *skb; char optionsbuffer[sizeof(struct option_mss) + - sizeof(struct option_windowscale) + - sizeof(struct option_base) + 1]; + sizeof(struct option_windowscale) + sizeof(struct option_base) + + TCP_OPTIONS_PADDING]; int optionssize = 0; /* Sending MSS option */ @@ -695,8 +1117,7 @@ static int send_syn(struct nes_cm_node *cm_node, u32 sendack) options->as_windowscale.shiftcount = cm_node->tcp_cntxt.rcv_wscale; optionssize += sizeof(struct option_windowscale); - if (sendack && !(NES_DRV_OPT_SUPRESS_OPTION_BC & nes_drv_opt) - ) { + if (sendack && !(NES_DRV_OPT_SUPRESS_OPTION_BC & nes_drv_opt)) { options = (union all_known_options *)&optionsbuffer[optionssize]; options->as_base.optionnum = OPTION_NUMBER_WRITE0; options->as_base.length = sizeof(struct option_base); @@ -714,7 +1135,8 @@ static int send_syn(struct nes_cm_node *cm_node, u32 sendack) options->as_end = OPTION_NUMBER_END; optionssize += 1; - skb = get_free_pkt(cm_node); + if (!skb) + skb = dev_alloc_skb(MAX_CM_BUFFER); if (!skb) { nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); return -1; @@ -733,18 +1155,18 @@ static int send_syn(struct nes_cm_node *cm_node, u32 sendack) /** * send_reset */ -static int send_reset(struct nes_cm_node *cm_node) +static int send_reset(struct nes_cm_node *cm_node, struct sk_buff *skb) { int ret; - struct sk_buff *skb = get_free_pkt(cm_node); int flags = SET_RST | SET_ACK; + if (!skb) + skb = dev_alloc_skb(MAX_CM_BUFFER); if (!skb) { nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); - return -1; + return -ENOMEM; } - add_ref_cm_node(cm_node); form_cm_frame(skb, cm_node, NULL, 0, NULL, 0, flags); ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 0, 1); @@ -755,10 +1177,12 @@ static int send_reset(struct nes_cm_node *cm_node) /** * send_ack */ -static int send_ack(struct nes_cm_node *cm_node) +static int send_ack(struct nes_cm_node *cm_node, struct sk_buff *skb) { int ret; - struct sk_buff *skb = get_free_pkt(cm_node); + + if (!skb) + skb = dev_alloc_skb(MAX_CM_BUFFER); if (!skb) { nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); @@ -781,7 +1205,7 @@ static int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb) /* if we didn't get a frame get one */ if (!skb) - skb = get_free_pkt(cm_node); + skb = dev_alloc_skb(MAX_CM_BUFFER); if (!skb) { nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); @@ -796,76 +1220,32 @@ static int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb) /** - * get_free_pkt - */ -static struct sk_buff *get_free_pkt(struct nes_cm_node *cm_node) -{ - struct sk_buff *skb, *new_skb; - - /* check to see if we need to repopulate the free tx pkt queue */ - if (skb_queue_len(&cm_node->cm_core->tx_free_list) < NES_CM_FREE_PKT_LO_WATERMARK) { - while (skb_queue_len(&cm_node->cm_core->tx_free_list) < - cm_node->cm_core->free_tx_pkt_max) { - /* replace the frame we took, we won't get it back */ - new_skb = dev_alloc_skb(cm_node->cm_core->mtu); - BUG_ON(!new_skb); - /* add a replacement frame to the free tx list head */ - skb_queue_head(&cm_node->cm_core->tx_free_list, new_skb); - } - } - - skb = skb_dequeue(&cm_node->cm_core->tx_free_list); - - return skb; -} - - -/** - * make_hashkey - generate hash key from node tuple - */ -static inline int make_hashkey(u16 loc_port, nes_addr_t loc_addr, u16 rem_port, - nes_addr_t rem_addr) -{ - u32 hashkey = 0; - - hashkey = loc_addr + rem_addr + loc_port + rem_port; - hashkey = (hashkey % NES_CM_HASHTABLE_SIZE); - - return hashkey; -} - - -/** * find_node - find a cm node that matches the reference cm node */ static struct nes_cm_node *find_node(struct nes_cm_core *cm_core, - u16 rem_port, nes_addr_t rem_addr, u16 loc_port, nes_addr_t loc_addr) + u16 rem_port, nes_addr_t rem_addr, u16 loc_port, nes_addr_t loc_addr) { unsigned long flags; - u32 hashkey; struct list_head *hte; struct nes_cm_node *cm_node; - /* make a hash index key for this packet */ - hashkey = make_hashkey(loc_port, loc_addr, rem_port, rem_addr); - /* get a handle on the hte */ hte = &cm_core->connected_nodes; - nes_debug(NES_DBG_CM, "Searching for an owner node: " NIPQUAD_FMT ":%x from core %p->%p\n", - HIPQUAD(loc_addr), loc_port, cm_core, hte); - /* walk list and find cm_node associated with this session ID */ spin_lock_irqsave(&cm_core->ht_lock, flags); list_for_each_entry(cm_node, hte, list) { /* compare quad, return node handle if a match */ nes_debug(NES_DBG_CM, "finding node %x:%x =? %x:%x ^ %x:%x =? %x:%x\n", - cm_node->loc_addr, cm_node->loc_port, - loc_addr, loc_port, - cm_node->rem_addr, cm_node->rem_port, - rem_addr, rem_port); - if ((cm_node->loc_addr == loc_addr) && (cm_node->loc_port == loc_port) && - (cm_node->rem_addr == rem_addr) && (cm_node->rem_port == rem_port)) { + cm_node->loc_addr, cm_node->loc_port, + loc_addr, loc_port, + cm_node->rem_addr, cm_node->rem_port, + rem_addr, rem_port); + if ((cm_node->mapped_loc_addr == loc_addr) && + (cm_node->mapped_loc_port == loc_port) && + (cm_node->mapped_rem_addr == rem_addr) && + (cm_node->mapped_rem_port == rem_port)) { + add_ref_cm_node(cm_node); spin_unlock_irqrestore(&cm_core->ht_lock, flags); return cm_node; @@ -882,19 +1262,29 @@ static struct nes_cm_node *find_node(struct nes_cm_core *cm_core, * find_listener - find a cm node listening on this addr-port pair */ static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core, - nes_addr_t dst_addr, u16 dst_port, enum nes_cm_listener_state listener_state) + nes_addr_t dst_addr, u16 dst_port, + enum nes_cm_listener_state listener_state, int local) { unsigned long flags; struct nes_cm_listener *listen_node; + nes_addr_t listen_addr; + u16 listen_port; /* walk list and find cm_node associated with this session ID */ spin_lock_irqsave(&cm_core->listen_list_lock, flags); list_for_each_entry(listen_node, &cm_core->listen_list.list, list) { + if (local) { + listen_addr = listen_node->loc_addr; + listen_port = listen_node->loc_port; + } else { + listen_addr = listen_node->mapped_loc_addr; + listen_port = listen_node->mapped_loc_port; + } /* compare node pair, return node handle if a match */ - if (((listen_node->loc_addr == dst_addr) || - listen_node->loc_addr == 0x00000000) && - (listen_node->loc_port == dst_port) && - (listener_state & listen_node->listener_state)) { + if (((listen_addr == dst_addr) || + listen_addr == 0x00000000) && + (listen_port == dst_port) && + (listener_state & listen_node->listener_state)) { atomic_inc(&listen_node->ref_count); spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); return listen_node; @@ -902,32 +1292,23 @@ static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core, } spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); - nes_debug(NES_DBG_CM, "Unable to find listener for " NIPQUAD_FMT ":%x\n", - HIPQUAD(dst_addr), dst_port); - /* no listener */ return NULL; } - /** * add_hte_node - add a cm node to the hash table */ static int add_hte_node(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node) { unsigned long flags; - u32 hashkey; struct list_head *hte; if (!cm_node || !cm_core) return -EINVAL; - nes_debug(NES_DBG_CM, "Adding Node to Active Connection HT\n"); - - /* first, make an index into our hash table */ - hashkey = make_hashkey(cm_node->loc_port, cm_node->loc_addr, - cm_node->rem_port, cm_node->rem_addr); - cm_node->hashkey = hashkey; + nes_debug(NES_DBG_CM, "Adding Node %p to Active Connection HT\n", + cm_node); spin_lock_irqsave(&cm_core->ht_lock, flags); @@ -946,10 +1327,86 @@ static int add_hte_node(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node * mini_cm_dec_refcnt_listen */ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core, - struct nes_cm_listener *listener, int free_hanging_nodes) + struct nes_cm_listener *listener, int free_hanging_nodes) { - int ret = 1; + int ret = -EINVAL; + int err = 0; unsigned long flags; + struct list_head *list_pos = NULL; + struct list_head *list_temp = NULL; + struct nes_cm_node *cm_node = NULL; + struct list_head reset_list; + + nes_debug(NES_DBG_CM, "attempting listener= %p free_nodes= %d, " + "refcnt=%d\n", listener, free_hanging_nodes, + atomic_read(&listener->ref_count)); + /* free non-accelerated child nodes for this listener */ + INIT_LIST_HEAD(&reset_list); + if (free_hanging_nodes) { + spin_lock_irqsave(&cm_core->ht_lock, flags); + list_for_each_safe(list_pos, list_temp, + &g_cm_core->connected_nodes) { + cm_node = container_of(list_pos, struct nes_cm_node, + list); + if ((cm_node->listener == listener) && + (!cm_node->accelerated)) { + add_ref_cm_node(cm_node); + list_add(&cm_node->reset_entry, &reset_list); + } + } + spin_unlock_irqrestore(&cm_core->ht_lock, flags); + } + + list_for_each_safe(list_pos, list_temp, &reset_list) { + cm_node = container_of(list_pos, struct nes_cm_node, + reset_entry); + { + struct nes_cm_node *loopback = cm_node->loopbackpartner; + enum nes_cm_node_state old_state; + if (NES_CM_STATE_FIN_WAIT1 <= cm_node->state) { + rem_ref_cm_node(cm_node->cm_core, cm_node); + } else { + if (!loopback) { + cleanup_retrans_entry(cm_node); + err = send_reset(cm_node, NULL); + if (err) { + cm_node->state = + NES_CM_STATE_CLOSED; + WARN_ON(1); + } else { + old_state = cm_node->state; + cm_node->state = NES_CM_STATE_LISTENER_DESTROYED; + if (old_state != NES_CM_STATE_MPAREQ_RCVD) + rem_ref_cm_node( + cm_node->cm_core, + cm_node); + } + } else { + struct nes_cm_event event; + + event.cm_node = loopback; + event.cm_info.rem_addr = + loopback->rem_addr; + event.cm_info.loc_addr = + loopback->loc_addr; + event.cm_info.rem_port = + loopback->rem_port; + event.cm_info.loc_port = + loopback->loc_port; + event.cm_info.cm_id = loopback->cm_id; + add_ref_cm_node(loopback); + loopback->state = NES_CM_STATE_CLOSED; + cm_event_connect_error(&event); + cm_node->state = NES_CM_STATE_LISTENER_DESTROYED; + + rem_ref_cm_node(cm_node->cm_core, + cm_node); + + } + } + } + } + spin_lock_irqsave(&cm_core->listen_list_lock, flags); if (!atomic_dec_return(&listener->ref_count)) { list_del(&listener->list); @@ -960,8 +1417,18 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core, spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); if (listener->nesvnic) { - nes_manage_apbvt(listener->nesvnic, listener->loc_port, - PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL); + nes_manage_apbvt(listener->nesvnic, + listener->mapped_loc_port, + PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn), + NES_MANAGE_APBVT_DEL); + + nes_remove_mapinfo(listener->loc_addr, + listener->loc_port, + listener->mapped_loc_addr, + listener->mapped_loc_port); + nes_debug(NES_DBG_NLMSG, + "Delete APBVT mapped_loc_port = %04X\n", + listener->mapped_loc_port); } nes_debug(NES_DBG_CM, "destroying listener (%p)\n", listener); @@ -969,15 +1436,15 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core, kfree(listener); listener = NULL; ret = 0; - cm_listens_destroyed++; + atomic_inc(&cm_listens_destroyed); } else { spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); } if (listener) { if (atomic_read(&listener->pend_accepts_cnt) > 0) nes_debug(NES_DBG_CM, "destroying listener (%p)" - " with non-zero pending accepts=%u\n", - listener, atomic_read(&listener->pend_accepts_cnt)); + " with non-zero pending accepts=%u\n", + listener, atomic_read(&listener->pend_accepts_cnt)); } return ret; @@ -988,7 +1455,7 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core, * mini_cm_del_listen */ static int mini_cm_del_listen(struct nes_cm_core *cm_core, - struct nes_cm_listener *listener) + struct nes_cm_listener *listener) { listener->listener_state = NES_CM_LISTENER_PASSIVE_STATE; listener->cm_id = NULL; /* going to be destroyed pretty soon */ @@ -1000,61 +1467,98 @@ static int mini_cm_del_listen(struct nes_cm_core *cm_core, * mini_cm_accelerated */ static inline int mini_cm_accelerated(struct nes_cm_core *cm_core, - struct nes_cm_node *cm_node) + struct nes_cm_node *cm_node) { - u32 was_timer_set; cm_node->accelerated = 1; if (cm_node->accept_pend) { BUG_ON(!cm_node->listener); atomic_dec(&cm_node->listener->pend_accepts_cnt); + cm_node->accept_pend = 0; BUG_ON(atomic_read(&cm_node->listener->pend_accepts_cnt) < 0); } - was_timer_set = timer_pending(&cm_core->tcp_timer); - if (!was_timer_set) { - cm_core->tcp_timer.expires = jiffies + NES_SHORT_TIME; - add_timer(&cm_core->tcp_timer); - } + if (!timer_pending(&cm_core->tcp_timer)) + mod_timer(&cm_core->tcp_timer, (jiffies + NES_SHORT_TIME)); return 0; } /** - * nes_addr_send_arp + * nes_addr_resolve_neigh */ -static void nes_addr_send_arp(u32 dst_ip) +static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpindex) { struct rtable *rt; - struct flowi fl; + struct neighbour *neigh; + int rc = arpindex; + struct net_device *netdev; + struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter; + + rt = ip_route_output(&init_net, htonl(dst_ip), 0, 0, 0); + if (IS_ERR(rt)) { + printk(KERN_ERR "%s: ip_route_output_key failed for 0x%08X\n", + __func__, dst_ip); + return rc; + } - memset(&fl, 0, sizeof fl); - fl.nl_u.ip4_u.daddr = htonl(dst_ip); - if (ip_route_output_key(&init_net, &rt, &fl)) { - printk("%s: ip_route_output_key failed for 0x%08X\n", - __func__, dst_ip); - return; + if (netif_is_bond_slave(nesvnic->netdev)) + netdev = netdev_master_upper_dev_get(nesvnic->netdev); + else + netdev = nesvnic->netdev; + + neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, netdev); + + rcu_read_lock(); + if (neigh) { + if (neigh->nud_state & NUD_VALID) { + nes_debug(NES_DBG_CM, "Neighbor MAC address for 0x%08X" + " is %pM, Gateway is 0x%08X \n", dst_ip, + neigh->ha, ntohl(rt->rt_gateway)); + + if (arpindex >= 0) { + if (ether_addr_equal(nesadapter->arp_table[arpindex].mac_addr, neigh->ha)) { + /* Mac address same as in nes_arp_table */ + goto out; + } + + nes_manage_arp_cache(nesvnic->netdev, + nesadapter->arp_table[arpindex].mac_addr, + dst_ip, NES_ARP_DELETE); + } + + nes_manage_arp_cache(nesvnic->netdev, neigh->ha, + dst_ip, NES_ARP_ADD); + rc = nes_arp_table(nesvnic->nesdev, dst_ip, NULL, + NES_ARP_RESOLVE); + } else { + neigh_event_send(neigh, NULL); + } } +out: + rcu_read_unlock(); + + if (neigh) + neigh_release(neigh); - neigh_event_send(rt->u.dst.neighbour, NULL); ip_rt_put(rt); + return rc; } - /** * make_cm_node - create a new instance of a cm node */ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, - struct nes_vnic *nesvnic, struct nes_cm_info *cm_info, - struct nes_cm_listener *listener) + struct nes_vnic *nesvnic, struct nes_cm_info *cm_info, + struct nes_cm_listener *listener) { struct nes_cm_node *cm_node; struct timespec ts; + int oldarpindex = 0; int arpindex = 0; struct nes_device *nesdev; struct nes_adapter *nesadapter; - DECLARE_MAC_BUF(mac); /* create an hte and cm_node for this instance */ cm_node = kzalloc(sizeof(*cm_node), GFP_ATOMIC); @@ -1066,22 +1570,30 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, cm_node->rem_addr = cm_info->rem_addr; cm_node->loc_port = cm_info->loc_port; cm_node->rem_port = cm_info->rem_port; - cm_node->send_write0 = send_first; - nes_debug(NES_DBG_CM, "Make node addresses : loc = " NIPQUAD_FMT ":%x, rem = " NIPQUAD_FMT ":%x\n", - HIPQUAD(cm_node->loc_addr), cm_node->loc_port, - HIPQUAD(cm_node->rem_addr), cm_node->rem_port); + + cm_node->mapped_loc_addr = cm_info->mapped_loc_addr; + cm_node->mapped_rem_addr = cm_info->mapped_rem_addr; + cm_node->mapped_loc_port = cm_info->mapped_loc_port; + cm_node->mapped_rem_port = cm_info->mapped_rem_port; + + cm_node->mpa_frame_rev = mpa_version; + cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO; + cm_node->mpav2_ird_ord = 0; + cm_node->ird_size = 0; + cm_node->ord_size = 0; + + nes_debug(NES_DBG_CM, "Make node addresses : loc = %pI4:%x, rem = %pI4:%x\n", + &cm_node->loc_addr, cm_node->loc_port, + &cm_node->rem_addr, cm_node->rem_port); cm_node->listener = listener; cm_node->netdev = nesvnic->netdev; cm_node->cm_id = cm_info->cm_id; memcpy(cm_node->loc_mac, nesvnic->netdev->dev_addr, ETH_ALEN); - nes_debug(NES_DBG_CM, "listener=%p, cm_id=%p\n", - cm_node->listener, cm_node->cm_id); + nes_debug(NES_DBG_CM, "listener=%p, cm_id=%p\n", cm_node->listener, + cm_node->cm_id); - INIT_LIST_HEAD(&cm_node->retrans_list); spin_lock_init(&cm_node->retrans_list_lock); - INIT_LIST_HEAD(&cm_node->recv_list); - spin_lock_init(&cm_node->recv_list_lock); cm_node->loopbackpartner = NULL; atomic_set(&cm_node->ref_count, 1); @@ -1090,11 +1602,11 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, cm_node->tcp_cntxt.loc_id = NES_CM_DEF_LOCAL_ID; cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE; cm_node->tcp_cntxt.rcv_wnd = NES_CM_DEFAULT_RCV_WND_SCALED >> - NES_CM_DEFAULT_RCV_WND_SCALE; + NES_CM_DEFAULT_RCV_WND_SCALE; ts = current_kernel_time(); cm_node->tcp_cntxt.loc_seq_num = htonl(ts.tv_nsec); cm_node->tcp_cntxt.mss = nesvnic->max_frame_size - sizeof(struct iphdr) - - sizeof(struct tcphdr) - ETH_HLEN - VLAN_HLEN; + sizeof(struct tcphdr) - ETH_HLEN - VLAN_HLEN; cm_node->tcp_cntxt.rcv_nxt = 0; /* get a unique session ID , add thread_id to an upcounter to handle race */ atomic_inc(&cm_core->node_cnt); @@ -1108,18 +1620,21 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, nesadapter = nesdev->nesadapter; cm_node->loopbackpartner = NULL; + /* get the mac addr for the remote node */ - arpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE); + oldarpindex = nes_arp_table(nesdev, cm_node->mapped_rem_addr, + NULL, NES_ARP_RESOLVE); + arpindex = nes_addr_resolve_neigh(nesvnic, + cm_node->mapped_rem_addr, oldarpindex); if (arpindex < 0) { kfree(cm_node); - nes_addr_send_arp(cm_info->rem_addr); return NULL; } /* copy the mac addr to node context */ memcpy(cm_node->rem_mac, nesadapter->arp_table[arpindex].mac_addr, ETH_ALEN); - nes_debug(NES_DBG_CM, "Remote mac addr from arp table: %s\n", - print_mac(mac, cm_node->rem_mac)); + nes_debug(NES_DBG_CM, "Remote mac addr from arp table: %pM\n", + cm_node->rem_mac); add_hte_node(cm_core, cm_node); atomic_inc(&cm_nodes_created); @@ -1142,13 +1657,9 @@ static int add_ref_cm_node(struct nes_cm_node *cm_node) * rem_ref_cm_node - destroy an instance of a cm node */ static int rem_ref_cm_node(struct nes_cm_core *cm_core, - struct nes_cm_node *cm_node) + struct nes_cm_node *cm_node) { - unsigned long flags, qplockflags; - struct nes_timer_entry *send_entry; - struct nes_timer_entry *recv_entry; - struct iw_cm_id *cm_id; - struct list_head *list_core, *list_node_temp; + unsigned long flags; struct nes_qp *nesqp; if (!cm_node) @@ -1169,75 +1680,41 @@ static int rem_ref_cm_node(struct nes_cm_core *cm_core, atomic_dec(&cm_node->listener->pend_accepts_cnt); BUG_ON(atomic_read(&cm_node->listener->pend_accepts_cnt) < 0); } - - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - list_for_each_safe(list_core, list_node_temp, &cm_node->retrans_list) { - send_entry = container_of(list_core, struct nes_timer_entry, list); - list_del(&send_entry->list); - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - dev_kfree_skb_any(send_entry->skb); - kfree(send_entry); - spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - continue; - } - spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); - - spin_lock_irqsave(&cm_node->recv_list_lock, flags); - list_for_each_safe(list_core, list_node_temp, &cm_node->recv_list) { - recv_entry = container_of(list_core, struct nes_timer_entry, list); - list_del(&recv_entry->list); - cm_id = cm_node->cm_id; - spin_unlock_irqrestore(&cm_node->recv_list_lock, flags); - if (recv_entry->type == NES_TIMER_TYPE_CLOSE) { - nesqp = (struct nes_qp *)recv_entry->skb; - spin_lock_irqsave(&nesqp->lock, qplockflags); - if (nesqp->cm_id) { - nes_debug(NES_DBG_CM, "QP%u: cm_id = %p: ****** HIT A NES_TIMER_TYPE_CLOSE" - " with something to do!!! ******\n", - nesqp->hwqp.qp_id, cm_id); - nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; - nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT; - nesqp->ibqp_state = IB_QPS_ERR; - spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_cm_disconn(nesqp); - } else { - spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_debug(NES_DBG_CM, "QP%u: cm_id = %p: ****** HIT A NES_TIMER_TYPE_CLOSE" - " with nothing to do!!! ******\n", - nesqp->hwqp.qp_id, cm_id); - nes_rem_ref(&nesqp->ibqp); - } - cm_id->rem_ref(cm_id); - } else if (recv_entry->type == NES_TIMER_TYPE_RECV) { - dev_kfree_skb_any(recv_entry->skb); - } - kfree(recv_entry); - spin_lock_irqsave(&cm_node->recv_list_lock, flags); - } - spin_unlock_irqrestore(&cm_node->recv_list_lock, flags); - + WARN_ON(cm_node->send_entry); + if (cm_node->recv_entry) + handle_recv_entry(cm_node, 0); if (cm_node->listener) { mini_cm_dec_refcnt_listen(cm_core, cm_node->listener, 0); } else { if (cm_node->apbvt_set && cm_node->nesvnic) { - nes_manage_apbvt(cm_node->nesvnic, cm_node->loc_port, - PCI_FUNC(cm_node->nesvnic->nesdev->pcidev->devfn), - NES_MANAGE_APBVT_DEL); + nes_manage_apbvt(cm_node->nesvnic, cm_node->mapped_loc_port, + PCI_FUNC(cm_node->nesvnic->nesdev->pcidev->devfn), + NES_MANAGE_APBVT_DEL); } + nes_debug(NES_DBG_NLMSG, "Delete APBVT mapped_loc_port = %04X\n", + cm_node->mapped_loc_port); + nes_remove_mapinfo(cm_node->loc_addr, cm_node->loc_port, + cm_node->mapped_loc_addr, cm_node->mapped_loc_port); } - kfree(cm_node); atomic_dec(&cm_core->node_cnt); atomic_inc(&cm_nodes_destroyed); + nesqp = cm_node->nesqp; + if (nesqp) { + nesqp->cm_node = NULL; + nes_rem_ref(&nesqp->ibqp); + cm_node->nesqp = NULL; + } + kfree(cm_node); return 0; } - /** * process_options */ -static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc, u32 optionsize, u32 syn_packet) +static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc, + u32 optionsize, u32 syn_packet) { u32 tmp; u32 offset = 0; @@ -1247,35 +1724,34 @@ static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc, u32 opti while (offset < optionsize) { all_options = (union all_known_options *)(optionsloc + offset); switch (all_options->as_base.optionnum) { - case OPTION_NUMBER_END: - offset = optionsize; - break; - case OPTION_NUMBER_NONE: - offset += 1; - continue; - case OPTION_NUMBER_MSS: - nes_debug(NES_DBG_CM, "%s: MSS Length: %d Offset: %d Size: %d\n", - __func__, - all_options->as_mss.length, offset, optionsize); - got_mss_option = 1; - if (all_options->as_mss.length != 4) { - return 1; - } else { - tmp = ntohs(all_options->as_mss.mss); - if (tmp > 0 && tmp < cm_node->tcp_cntxt.mss) - cm_node->tcp_cntxt.mss = tmp; - } - break; - case OPTION_NUMBER_WINDOW_SCALE: - cm_node->tcp_cntxt.snd_wscale = all_options->as_windowscale.shiftcount; - break; - case OPTION_NUMBER_WRITE0: - cm_node->send_write0 = 1; - break; - default: - nes_debug(NES_DBG_CM, "TCP Option not understood: %x\n", - all_options->as_base.optionnum); - break; + case OPTION_NUMBER_END: + offset = optionsize; + break; + case OPTION_NUMBER_NONE: + offset += 1; + continue; + case OPTION_NUMBER_MSS: + nes_debug(NES_DBG_CM, "%s: MSS Length: %d Offset: %d " + "Size: %d\n", __func__, + all_options->as_mss.length, offset, optionsize); + got_mss_option = 1; + if (all_options->as_mss.length != 4) { + return 1; + } else { + tmp = ntohs(all_options->as_mss.mss); + if (tmp > 0 && tmp < + cm_node->tcp_cntxt.mss) + cm_node->tcp_cntxt.mss = tmp; + } + break; + case OPTION_NUMBER_WINDOW_SCALE: + cm_node->tcp_cntxt.snd_wscale = + all_options->as_windowscale.shiftcount; + break; + default: + nes_debug(NES_DBG_CM, "TCP Option not understood: %x\n", + all_options->as_base.optionnum); + break; } offset += all_options->as_base.length; } @@ -1284,310 +1760,623 @@ static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc, u32 opti return 0; } +static void drop_packet(struct sk_buff *skb) +{ + atomic_inc(&cm_accel_dropped_pkts); + dev_kfree_skb_any(skb); +} -/** - * process_packet - */ -static int process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb, - struct nes_cm_core *cm_core) +static void handle_fin_pkt(struct nes_cm_node *cm_node) { - int optionsize; - int datasize; - int ret = 0; - struct tcphdr *tcph = tcp_hdr(skb); - u32 inc_sequence; - if (cm_node->state == NES_CM_STATE_SYN_SENT && tcph->syn) { - inc_sequence = ntohl(tcph->seq); - cm_node->tcp_cntxt.rcv_nxt = inc_sequence; + nes_debug(NES_DBG_CM, "Received FIN, cm_node = %p, state = %u. " + "refcnt=%d\n", cm_node, cm_node->state, + atomic_read(&cm_node->ref_count)); + switch (cm_node->state) { + case NES_CM_STATE_SYN_RCVD: + case NES_CM_STATE_SYN_SENT: + case NES_CM_STATE_ESTABLISHED: + case NES_CM_STATE_MPAREJ_RCVD: + cm_node->tcp_cntxt.rcv_nxt++; + cleanup_retrans_entry(cm_node); + cm_node->state = NES_CM_STATE_LAST_ACK; + send_fin(cm_node, NULL); + break; + case NES_CM_STATE_MPAREQ_SENT: + create_event(cm_node, NES_CM_EVENT_ABORTED); + cm_node->tcp_cntxt.rcv_nxt++; + cleanup_retrans_entry(cm_node); + cm_node->state = NES_CM_STATE_CLOSED; + add_ref_cm_node(cm_node); + send_reset(cm_node, NULL); + break; + case NES_CM_STATE_FIN_WAIT1: + cm_node->tcp_cntxt.rcv_nxt++; + cleanup_retrans_entry(cm_node); + cm_node->state = NES_CM_STATE_CLOSING; + send_ack(cm_node, NULL); + /* Wait for ACK as this is simultaneous close.. + * After we receive ACK, do not send anything.. + * Just rm the node.. Done.. */ + break; + case NES_CM_STATE_FIN_WAIT2: + cm_node->tcp_cntxt.rcv_nxt++; + cleanup_retrans_entry(cm_node); + cm_node->state = NES_CM_STATE_TIME_WAIT; + send_ack(cm_node, NULL); + schedule_nes_timer(cm_node, NULL, NES_TIMER_TYPE_CLOSE, 1, 0); + break; + case NES_CM_STATE_TIME_WAIT: + cm_node->tcp_cntxt.rcv_nxt++; + cleanup_retrans_entry(cm_node); + cm_node->state = NES_CM_STATE_CLOSED; + rem_ref_cm_node(cm_node->cm_core, cm_node); + break; + case NES_CM_STATE_TSA: + default: + nes_debug(NES_DBG_CM, "Error Rcvd FIN for node-%p state = %d\n", + cm_node, cm_node->state); + break; } +} - if ((!tcph) || (cm_node->state == NES_CM_STATE_TSA)) { - BUG_ON(!tcph); - atomic_inc(&cm_accel_dropped_pkts); - return -1; + +static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, + struct tcphdr *tcph) +{ + + int reset = 0; /* whether to send reset in case of err.. */ + atomic_inc(&cm_resets_recvd); + nes_debug(NES_DBG_CM, "Received Reset, cm_node = %p, state = %u." + " refcnt=%d\n", cm_node, cm_node->state, + atomic_read(&cm_node->ref_count)); + cleanup_retrans_entry(cm_node); + switch (cm_node->state) { + case NES_CM_STATE_SYN_SENT: + case NES_CM_STATE_MPAREQ_SENT: + nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p " + "listener=%p state=%d\n", __func__, __LINE__, cm_node, + cm_node->listener, cm_node->state); + switch (cm_node->mpa_frame_rev) { + case IETF_MPA_V2: + cm_node->mpa_frame_rev = IETF_MPA_V1; + /* send a syn and goto syn sent state */ + cm_node->state = NES_CM_STATE_SYN_SENT; + if (send_syn(cm_node, 0, NULL)) { + active_open_err(cm_node, skb, reset); + } + break; + case IETF_MPA_V1: + default: + active_open_err(cm_node, skb, reset); + break; + } + break; + case NES_CM_STATE_MPAREQ_RCVD: + atomic_inc(&cm_node->passive_state); + dev_kfree_skb_any(skb); + break; + case NES_CM_STATE_ESTABLISHED: + case NES_CM_STATE_SYN_RCVD: + case NES_CM_STATE_LISTENING: + nes_debug(NES_DBG_CM, "Bad state %s[%u]\n", __func__, __LINE__); + passive_open_err(cm_node, skb, reset); + break; + case NES_CM_STATE_TSA: + active_open_err(cm_node, skb, reset); + break; + case NES_CM_STATE_CLOSED: + drop_packet(skb); + break; + case NES_CM_STATE_FIN_WAIT2: + case NES_CM_STATE_FIN_WAIT1: + case NES_CM_STATE_LAST_ACK: + cm_node->cm_id->rem_ref(cm_node->cm_id); + case NES_CM_STATE_TIME_WAIT: + cm_node->state = NES_CM_STATE_CLOSED; + rem_ref_cm_node(cm_node->cm_core, cm_node); + drop_packet(skb); + break; + default: + drop_packet(skb); + break; } +} - if (tcph->rst) { - atomic_inc(&cm_resets_recvd); - nes_debug(NES_DBG_CM, "Received Reset, cm_node = %p, state = %u. refcnt=%d\n", - cm_node, cm_node->state, atomic_read(&cm_node->ref_count)); - switch (cm_node->state) { - case NES_CM_STATE_LISTENING: - rem_ref_cm_node(cm_core, cm_node); - break; - case NES_CM_STATE_TSA: - case NES_CM_STATE_CLOSED: - break; - case NES_CM_STATE_SYN_RCVD: - nes_debug(NES_DBG_CM, "Received a reset for local 0x%08X:%04X," - " remote 0x%08X:%04X, node state = %u\n", - cm_node->loc_addr, cm_node->loc_port, - cm_node->rem_addr, cm_node->rem_port, - cm_node->state); - rem_ref_cm_node(cm_core, cm_node); - break; - case NES_CM_STATE_ONE_SIDE_ESTABLISHED: - case NES_CM_STATE_ESTABLISHED: - case NES_CM_STATE_MPAREQ_SENT: - default: - nes_debug(NES_DBG_CM, "Received a reset for local 0x%08X:%04X," - " remote 0x%08X:%04X, node state = %u refcnt=%d\n", - cm_node->loc_addr, cm_node->loc_port, - cm_node->rem_addr, cm_node->rem_port, - cm_node->state, atomic_read(&cm_node->ref_count)); - /* create event */ - cm_node->state = NES_CM_STATE_CLOSED; - create_event(cm_node, NES_CM_EVENT_ABORTED); - break; +static void handle_rcv_mpa(struct nes_cm_node *cm_node, struct sk_buff *skb) +{ + int ret = 0; + int datasize = skb->len; + u8 *dataloc = skb->data; + + enum nes_cm_event_type type = NES_CM_EVENT_UNKNOWN; + u32 res_type; + + ret = parse_mpa(cm_node, dataloc, &res_type, datasize); + if (ret) { + nes_debug(NES_DBG_CM, "didn't like MPA Request\n"); + if (cm_node->state == NES_CM_STATE_MPAREQ_SENT) { + nes_debug(NES_DBG_CM, "%s[%u] create abort for " + "cm_node=%p listener=%p state=%d\n", __func__, + __LINE__, cm_node, cm_node->listener, + cm_node->state); + active_open_err(cm_node, skb, 1); + } else { + passive_open_err(cm_node, skb, 1); + } + return; + } + switch (cm_node->state) { + case NES_CM_STATE_ESTABLISHED: + if (res_type == NES_MPA_REQUEST_REJECT) + /*BIG problem as we are receiving the MPA.. So should + * not be REJECT.. This is Passive Open.. We can + * only receive it Reject for Active Open...*/ + WARN_ON(1); + cm_node->state = NES_CM_STATE_MPAREQ_RCVD; + type = NES_CM_EVENT_MPA_REQ; + atomic_set(&cm_node->passive_state, + NES_PASSIVE_STATE_INDICATED); + break; + case NES_CM_STATE_MPAREQ_SENT: + cleanup_retrans_entry(cm_node); + if (res_type == NES_MPA_REQUEST_REJECT) { + type = NES_CM_EVENT_MPA_REJECT; + cm_node->state = NES_CM_STATE_MPAREJ_RCVD; + } else { + type = NES_CM_EVENT_CONNECTED; + cm_node->state = NES_CM_STATE_TSA; } - return -1; + + break; + default: + WARN_ON(1); + break; } + dev_kfree_skb_any(skb); + create_event(cm_node, type); +} - optionsize = (tcph->doff << 2) - sizeof(struct tcphdr); +static void indicate_pkt_err(struct nes_cm_node *cm_node, struct sk_buff *skb) +{ + switch (cm_node->state) { + case NES_CM_STATE_SYN_SENT: + case NES_CM_STATE_MPAREQ_SENT: + nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p " + "listener=%p state=%d\n", __func__, __LINE__, cm_node, + cm_node->listener, cm_node->state); + active_open_err(cm_node, skb, 1); + break; + case NES_CM_STATE_ESTABLISHED: + case NES_CM_STATE_SYN_RCVD: + passive_open_err(cm_node, skb, 1); + break; + case NES_CM_STATE_TSA: + default: + drop_packet(skb); + } +} - skb_pull(skb, ip_hdr(skb)->ihl << 2); - skb_pull(skb, tcph->doff << 2); +static int check_syn(struct nes_cm_node *cm_node, struct tcphdr *tcph, + struct sk_buff *skb) +{ + int err; - datasize = skb->len; - inc_sequence = ntohl(tcph->seq); - nes_debug(NES_DBG_CM, "datasize = %u, sequence = 0x%08X, ack_seq = 0x%08X," - " rcv_nxt = 0x%08X Flags: %s %s.\n", - datasize, inc_sequence, ntohl(tcph->ack_seq), - cm_node->tcp_cntxt.rcv_nxt, (tcph->syn ? "SYN":""), - (tcph->ack ? "ACK":"")); - - if (!tcph->syn && (inc_sequence != cm_node->tcp_cntxt.rcv_nxt) - ) { - nes_debug(NES_DBG_CM, "dropping packet, datasize = %u, sequence = 0x%08X," - " ack_seq = 0x%08X, rcv_nxt = 0x%08X Flags: %s.\n", - datasize, inc_sequence, ntohl(tcph->ack_seq), - cm_node->tcp_cntxt.rcv_nxt, (tcph->ack ? "ACK":"")); - if (cm_node->state == NES_CM_STATE_LISTENING) { - rem_ref_cm_node(cm_core, cm_node); - } - return -1; + err = ((ntohl(tcph->ack_seq) == cm_node->tcp_cntxt.loc_seq_num)) ? 0 : 1; + if (err) + active_open_err(cm_node, skb, 1); + + return err; +} + +static int check_seq(struct nes_cm_node *cm_node, struct tcphdr *tcph, + struct sk_buff *skb) +{ + int err = 0; + u32 seq; + u32 ack_seq; + u32 loc_seq_num = cm_node->tcp_cntxt.loc_seq_num; + u32 rcv_nxt = cm_node->tcp_cntxt.rcv_nxt; + u32 rcv_wnd; + + seq = ntohl(tcph->seq); + ack_seq = ntohl(tcph->ack_seq); + rcv_wnd = cm_node->tcp_cntxt.rcv_wnd; + if (ack_seq != loc_seq_num) + err = 1; + else if (!between(seq, rcv_nxt, (rcv_nxt + rcv_wnd))) + err = 1; + if (err) { + nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p " + "listener=%p state=%d\n", __func__, __LINE__, cm_node, + cm_node->listener, cm_node->state); + indicate_pkt_err(cm_node, skb); + nes_debug(NES_DBG_CM, "seq ERROR cm_node =%p seq=0x%08X " + "rcv_nxt=0x%08X rcv_wnd=0x%x\n", cm_node, seq, rcv_nxt, + rcv_wnd); } + return err; +} - cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; +/* + * handle_syn_pkt() is for Passive node. The syn packet is received when a node + * is created with a listener or it may comein as rexmitted packet which in + * that case will be just dropped. + */ +static void handle_syn_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, + struct tcphdr *tcph) +{ + int ret; + u32 inc_sequence; + int optionsize; + optionsize = (tcph->doff << 2) - sizeof(struct tcphdr); + skb_trim(skb, 0); + inc_sequence = ntohl(tcph->seq); - if (optionsize) { - u8 *optionsloc = (u8 *)&tcph[1]; - if (process_options(cm_node, optionsloc, optionsize, (u32)tcph->syn)) { - nes_debug(NES_DBG_CM, "%s: Node %p, Sending RESET\n", __func__, cm_node); - send_reset(cm_node); - if (cm_node->state != NES_CM_STATE_SYN_SENT) - rem_ref_cm_node(cm_core, cm_node); - return 0; + switch (cm_node->state) { + case NES_CM_STATE_SYN_SENT: + case NES_CM_STATE_MPAREQ_SENT: + /* Rcvd syn on active open connection*/ + active_open_err(cm_node, skb, 1); + break; + case NES_CM_STATE_LISTENING: + /* Passive OPEN */ + if (atomic_read(&cm_node->listener->pend_accepts_cnt) > + cm_node->listener->backlog) { + nes_debug(NES_DBG_CM, "drop syn due to backlog " + "pressure \n"); + cm_backlog_drops++; + passive_open_err(cm_node, skb, 0); + break; } - } else if (tcph->syn) - cm_node->tcp_cntxt.mss = NES_CM_DEFAULT_MSS; + ret = handle_tcp_options(cm_node, tcph, skb, optionsize, + 1); + if (ret) { + passive_open_err(cm_node, skb, 0); + /* drop pkt */ + break; + } + cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1; + BUG_ON(cm_node->send_entry); + cm_node->accept_pend = 1; + atomic_inc(&cm_node->listener->pend_accepts_cnt); + + cm_node->state = NES_CM_STATE_SYN_RCVD; + send_syn(cm_node, 1, skb); + break; + case NES_CM_STATE_CLOSED: + cleanup_retrans_entry(cm_node); + add_ref_cm_node(cm_node); + send_reset(cm_node, skb); + break; + case NES_CM_STATE_TSA: + case NES_CM_STATE_ESTABLISHED: + case NES_CM_STATE_FIN_WAIT1: + case NES_CM_STATE_FIN_WAIT2: + case NES_CM_STATE_MPAREQ_RCVD: + case NES_CM_STATE_LAST_ACK: + case NES_CM_STATE_CLOSING: + case NES_CM_STATE_UNKNOWN: + default: + drop_packet(skb); + break; + } +} - cm_node->tcp_cntxt.snd_wnd = ntohs(tcph->window) << - cm_node->tcp_cntxt.snd_wscale; +static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, + struct tcphdr *tcph) +{ + int ret; + u32 inc_sequence; + int optionsize; - if (cm_node->tcp_cntxt.snd_wnd > cm_node->tcp_cntxt.max_snd_wnd) { - cm_node->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.snd_wnd; + optionsize = (tcph->doff << 2) - sizeof(struct tcphdr); + skb_trim(skb, 0); + inc_sequence = ntohl(tcph->seq); + switch (cm_node->state) { + case NES_CM_STATE_SYN_SENT: + cleanup_retrans_entry(cm_node); + /* active open */ + if (check_syn(cm_node, tcph, skb)) + return; + cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq); + /* setup options */ + ret = handle_tcp_options(cm_node, tcph, skb, optionsize, 0); + if (ret) { + nes_debug(NES_DBG_CM, "cm_node=%p tcp_options failed\n", + cm_node); + break; + } + cleanup_retrans_entry(cm_node); + cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1; + send_mpa_request(cm_node, skb); + cm_node->state = NES_CM_STATE_MPAREQ_SENT; + break; + case NES_CM_STATE_MPAREQ_RCVD: + /* passive open, so should not be here */ + passive_open_err(cm_node, skb, 1); + break; + case NES_CM_STATE_LISTENING: + cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq); + cleanup_retrans_entry(cm_node); + cm_node->state = NES_CM_STATE_CLOSED; + send_reset(cm_node, skb); + break; + case NES_CM_STATE_CLOSED: + cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq); + cleanup_retrans_entry(cm_node); + add_ref_cm_node(cm_node); + send_reset(cm_node, skb); + break; + case NES_CM_STATE_ESTABLISHED: + case NES_CM_STATE_FIN_WAIT1: + case NES_CM_STATE_FIN_WAIT2: + case NES_CM_STATE_LAST_ACK: + case NES_CM_STATE_TSA: + case NES_CM_STATE_CLOSING: + case NES_CM_STATE_UNKNOWN: + case NES_CM_STATE_MPAREQ_SENT: + default: + drop_packet(skb); + break; } +} - if (tcph->ack) { +static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, + struct tcphdr *tcph) +{ + int datasize = 0; + u32 inc_sequence; + int ret = 0; + int optionsize; + + optionsize = (tcph->doff << 2) - sizeof(struct tcphdr); + + if (check_seq(cm_node, tcph, skb)) + return -EINVAL; + + skb_pull(skb, tcph->doff << 2); + inc_sequence = ntohl(tcph->seq); + datasize = skb->len; + switch (cm_node->state) { + case NES_CM_STATE_SYN_RCVD: + /* Passive OPEN */ + cleanup_retrans_entry(cm_node); + ret = handle_tcp_options(cm_node, tcph, skb, optionsize, 1); + if (ret) + break; cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq); - switch (cm_node->state) { - case NES_CM_STATE_SYN_RCVD: - case NES_CM_STATE_SYN_SENT: - /* read and stash current sequence number */ - if (cm_node->tcp_cntxt.rem_ack_num != cm_node->tcp_cntxt.loc_seq_num) { - nes_debug(NES_DBG_CM, "ERROR - cm_node->tcp_cntxt.rem_ack_num !=" - " cm_node->tcp_cntxt.loc_seq_num\n"); - send_reset(cm_node); - return 0; - } - if (cm_node->state == NES_CM_STATE_SYN_SENT) - cm_node->state = NES_CM_STATE_ONE_SIDE_ESTABLISHED; - else { - cm_node->state = NES_CM_STATE_ESTABLISHED; - } - break; - case NES_CM_STATE_LAST_ACK: - cm_node->state = NES_CM_STATE_CLOSED; - break; - case NES_CM_STATE_FIN_WAIT1: - cm_node->state = NES_CM_STATE_FIN_WAIT2; - break; - case NES_CM_STATE_CLOSING: - cm_node->state = NES_CM_STATE_TIME_WAIT; - /* need to schedule this to happen in 2MSL timeouts */ - cm_node->state = NES_CM_STATE_CLOSED; - break; - case NES_CM_STATE_ONE_SIDE_ESTABLISHED: - case NES_CM_STATE_ESTABLISHED: - case NES_CM_STATE_MPAREQ_SENT: - case NES_CM_STATE_CLOSE_WAIT: - case NES_CM_STATE_TIME_WAIT: - case NES_CM_STATE_CLOSED: - break; - case NES_CM_STATE_LISTENING: - nes_debug(NES_DBG_CM, "Received an ACK on a listening port (SYN %d)\n", tcph->syn); - cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq); - send_reset(cm_node); - /* send_reset bumps refcount, this should have been a new node */ - rem_ref_cm_node(cm_core, cm_node); - return -1; - break; - case NES_CM_STATE_TSA: - nes_debug(NES_DBG_CM, "Received a packet with the ack bit set while in TSA state\n"); - break; - case NES_CM_STATE_UNKNOWN: - case NES_CM_STATE_INITED: - case NES_CM_STATE_ACCEPTING: - case NES_CM_STATE_FIN_WAIT2: - default: - nes_debug(NES_DBG_CM, "Received ack from unknown state: %x\n", - cm_node->state); - send_reset(cm_node); - break; + cm_node->state = NES_CM_STATE_ESTABLISHED; + if (datasize) { + cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; + handle_rcv_mpa(cm_node, skb); + } else { /* rcvd ACK only */ + dev_kfree_skb_any(skb); + } + break; + case NES_CM_STATE_ESTABLISHED: + /* Passive OPEN */ + cleanup_retrans_entry(cm_node); + if (datasize) { + cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; + handle_rcv_mpa(cm_node, skb); + } else { + drop_packet(skb); } + break; + case NES_CM_STATE_MPAREQ_SENT: + cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq); + if (datasize) { + cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; + handle_rcv_mpa(cm_node, skb); + } else { /* Could be just an ack pkt.. */ + dev_kfree_skb_any(skb); + } + break; + case NES_CM_STATE_LISTENING: + cleanup_retrans_entry(cm_node); + cm_node->state = NES_CM_STATE_CLOSED; + send_reset(cm_node, skb); + break; + case NES_CM_STATE_CLOSED: + cleanup_retrans_entry(cm_node); + add_ref_cm_node(cm_node); + send_reset(cm_node, skb); + break; + case NES_CM_STATE_LAST_ACK: + case NES_CM_STATE_CLOSING: + cleanup_retrans_entry(cm_node); + cm_node->state = NES_CM_STATE_CLOSED; + cm_node->cm_id->rem_ref(cm_node->cm_id); + rem_ref_cm_node(cm_node->cm_core, cm_node); + drop_packet(skb); + break; + case NES_CM_STATE_FIN_WAIT1: + cleanup_retrans_entry(cm_node); + drop_packet(skb); + cm_node->state = NES_CM_STATE_FIN_WAIT2; + break; + case NES_CM_STATE_SYN_SENT: + case NES_CM_STATE_FIN_WAIT2: + case NES_CM_STATE_TSA: + case NES_CM_STATE_MPAREQ_RCVD: + case NES_CM_STATE_UNKNOWN: + default: + cleanup_retrans_entry(cm_node); + drop_packet(skb); + break; } + return ret; +} - if (tcph->syn) { - if (cm_node->state == NES_CM_STATE_LISTENING) { - /* do not exceed backlog */ - atomic_inc(&cm_node->listener->pend_accepts_cnt); - if (atomic_read(&cm_node->listener->pend_accepts_cnt) > - cm_node->listener->backlog) { - nes_debug(NES_DBG_CM, "drop syn due to backlog pressure \n"); - cm_backlog_drops++; - atomic_dec(&cm_node->listener->pend_accepts_cnt); - rem_ref_cm_node(cm_core, cm_node); - return 0; - } - cm_node->accept_pend = 1; - } - if (datasize == 0) - cm_node->tcp_cntxt.rcv_nxt ++; - if (cm_node->state == NES_CM_STATE_LISTENING) { - cm_node->state = NES_CM_STATE_SYN_RCVD; - send_syn(cm_node, 1); - } - if (cm_node->state == NES_CM_STATE_ONE_SIDE_ESTABLISHED) { - cm_node->state = NES_CM_STATE_ESTABLISHED; - /* send final handshake ACK */ - ret = send_ack(cm_node); - if (ret < 0) - return ret; +static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph, + struct sk_buff *skb, int optionsize, int passive) +{ + u8 *optionsloc = (u8 *)&tcph[1]; - cm_node->state = NES_CM_STATE_MPAREQ_SENT; - ret = send_mpa_request(cm_node); - if (ret < 0) - return ret; + if (optionsize) { + if (process_options(cm_node, optionsloc, optionsize, + (u32)tcph->syn)) { + nes_debug(NES_DBG_CM, "%s: Node %p, Sending RESET\n", + __func__, cm_node); + if (passive) + passive_open_err(cm_node, skb, 1); + else + active_open_err(cm_node, skb, 1); + return 1; } } - if (tcph->fin) { - cm_node->tcp_cntxt.rcv_nxt++; - switch (cm_node->state) { - case NES_CM_STATE_SYN_RCVD: - case NES_CM_STATE_SYN_SENT: - case NES_CM_STATE_ONE_SIDE_ESTABLISHED: - case NES_CM_STATE_ESTABLISHED: - case NES_CM_STATE_ACCEPTING: - case NES_CM_STATE_MPAREQ_SENT: - cm_node->state = NES_CM_STATE_CLOSE_WAIT; - cm_node->state = NES_CM_STATE_LAST_ACK; - ret = send_fin(cm_node, NULL); - break; - case NES_CM_STATE_FIN_WAIT1: - cm_node->state = NES_CM_STATE_CLOSING; - ret = send_ack(cm_node); - break; - case NES_CM_STATE_FIN_WAIT2: - cm_node->state = NES_CM_STATE_TIME_WAIT; - cm_node->tcp_cntxt.loc_seq_num ++; - ret = send_ack(cm_node); - /* need to schedule this to happen in 2MSL timeouts */ - cm_node->state = NES_CM_STATE_CLOSED; - break; - case NES_CM_STATE_CLOSE_WAIT: - case NES_CM_STATE_LAST_ACK: - case NES_CM_STATE_CLOSING: - case NES_CM_STATE_TSA: - default: - nes_debug(NES_DBG_CM, "Received a fin while in %x state\n", - cm_node->state); - ret = -EINVAL; - break; - } + cm_node->tcp_cntxt.snd_wnd = ntohs(tcph->window) << + cm_node->tcp_cntxt.snd_wscale; + + if (cm_node->tcp_cntxt.snd_wnd > cm_node->tcp_cntxt.max_snd_wnd) + cm_node->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.snd_wnd; + return 0; +} + +/* + * active_open_err() will send reset() if flag set.. + * It will also send ABORT event. + */ +static void active_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb, + int reset) +{ + cleanup_retrans_entry(cm_node); + if (reset) { + nes_debug(NES_DBG_CM, "ERROR active err called for cm_node=%p, " + "state=%d\n", cm_node, cm_node->state); + add_ref_cm_node(cm_node); + send_reset(cm_node, skb); + } else { + dev_kfree_skb_any(skb); } - if (datasize) { - u8 *dataloc = skb->data; - /* figure out what state we are in and handle transition to next state */ - switch (cm_node->state) { - case NES_CM_STATE_LISTENING: - case NES_CM_STATE_SYN_RCVD: - case NES_CM_STATE_SYN_SENT: - case NES_CM_STATE_FIN_WAIT1: - case NES_CM_STATE_FIN_WAIT2: - case NES_CM_STATE_CLOSE_WAIT: - case NES_CM_STATE_LAST_ACK: - case NES_CM_STATE_CLOSING: - break; - case NES_CM_STATE_MPAREQ_SENT: - /* recv the mpa res frame, ret=frame len (incl priv data) */ - ret = parse_mpa(cm_node, dataloc, datasize); - if (ret < 0) - break; - /* set the req frame payload len in skb */ - /* we are done handling this state, set node to a TSA state */ - cm_node->state = NES_CM_STATE_TSA; - send_ack(cm_node); - create_event(cm_node, NES_CM_EVENT_CONNECTED); - break; + cm_node->state = NES_CM_STATE_CLOSED; + create_event(cm_node, NES_CM_EVENT_ABORTED); +} - case NES_CM_STATE_ESTABLISHED: - /* we are expecting an MPA req frame */ - ret = parse_mpa(cm_node, dataloc, datasize); - if (ret < 0) { - break; - } - cm_node->state = NES_CM_STATE_TSA; - send_ack(cm_node); - /* we got a valid MPA request, create an event */ - create_event(cm_node, NES_CM_EVENT_MPA_REQ); - break; - case NES_CM_STATE_TSA: - handle_exception_pkt(cm_node, skb); - break; - case NES_CM_STATE_UNKNOWN: - case NES_CM_STATE_INITED: - default: - ret = -1; - } +/* + * passive_open_err() will either do a reset() or will free up the skb and + * remove the cm_node. + */ +static void passive_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb, + int reset) +{ + cleanup_retrans_entry(cm_node); + cm_node->state = NES_CM_STATE_CLOSED; + if (reset) { + nes_debug(NES_DBG_CM, "passive_open_err sending RST for " + "cm_node=%p state =%d\n", cm_node, cm_node->state); + send_reset(cm_node, skb); + } else { + dev_kfree_skb_any(skb); + rem_ref_cm_node(cm_node->cm_core, cm_node); } +} - return ret; +/* + * free_retrans_entry() routines assumes that the retrans_list_lock has + * been acquired before calling. + */ +static void free_retrans_entry(struct nes_cm_node *cm_node) +{ + struct nes_timer_entry *send_entry; + + send_entry = cm_node->send_entry; + if (send_entry) { + cm_node->send_entry = NULL; + dev_kfree_skb_any(send_entry->skb); + kfree(send_entry); + rem_ref_cm_node(cm_node->cm_core, cm_node); + } +} + +static void cleanup_retrans_entry(struct nes_cm_node *cm_node) +{ + unsigned long flags; + + spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + free_retrans_entry(cm_node); + spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); } +/** + * process_packet + * Returns skb if to be freed, else it will return NULL if already used.. + */ +static void process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb, + struct nes_cm_core *cm_core) +{ + enum nes_tcpip_pkt_type pkt_type = NES_PKT_TYPE_UNKNOWN; + struct tcphdr *tcph = tcp_hdr(skb); + u32 fin_set = 0; + int ret = 0; + + skb_pull(skb, ip_hdr(skb)->ihl << 2); + + nes_debug(NES_DBG_CM, "process_packet: cm_node=%p state =%d syn=%d " + "ack=%d rst=%d fin=%d\n", cm_node, cm_node->state, tcph->syn, + tcph->ack, tcph->rst, tcph->fin); + + if (tcph->rst) { + pkt_type = NES_PKT_TYPE_RST; + } else if (tcph->syn) { + pkt_type = NES_PKT_TYPE_SYN; + if (tcph->ack) + pkt_type = NES_PKT_TYPE_SYNACK; + } else if (tcph->ack) { + pkt_type = NES_PKT_TYPE_ACK; + } + if (tcph->fin) + fin_set = 1; + + switch (pkt_type) { + case NES_PKT_TYPE_SYN: + handle_syn_pkt(cm_node, skb, tcph); + break; + case NES_PKT_TYPE_SYNACK: + handle_synack_pkt(cm_node, skb, tcph); + break; + case NES_PKT_TYPE_ACK: + ret = handle_ack_pkt(cm_node, skb, tcph); + if (fin_set && !ret) + handle_fin_pkt(cm_node); + break; + case NES_PKT_TYPE_RST: + handle_rst_pkt(cm_node, skb, tcph); + break; + default: + if ((fin_set) && (!check_seq(cm_node, tcph, skb))) + handle_fin_pkt(cm_node); + drop_packet(skb); + break; + } +} /** * mini_cm_listen - create a listen node with params */ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core, - struct nes_vnic *nesvnic, struct nes_cm_info *cm_info) + struct nes_vnic *nesvnic, struct nes_cm_info *cm_info) { struct nes_cm_listener *listener; + struct iwpm_dev_data pm_reg_msg; + struct iwpm_sa_data pm_msg; unsigned long flags; + int iwpm_err = 0; nes_debug(NES_DBG_CM, "Search for 0x%08x : 0x%04x\n", - cm_info->loc_addr, cm_info->loc_port); + cm_info->loc_addr, cm_info->loc_port); /* cannot have multiple matching listeners */ - listener = find_listener(cm_core, htonl(cm_info->loc_addr), - htons(cm_info->loc_port), NES_CM_LISTENER_EITHER_STATE); + listener = find_listener(cm_core, cm_info->loc_addr, cm_info->loc_port, + NES_CM_LISTENER_EITHER_STATE, 1); + if (listener && listener->listener_state == NES_CM_LISTENER_ACTIVE_STATE) { /* find automatically incs ref count ??? */ atomic_dec(&listener->ref_count); @@ -1596,6 +2385,22 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core, } if (!listener) { + nes_form_reg_msg(nesvnic, &pm_reg_msg); + iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_NES); + if (iwpm_err) { + nes_debug(NES_DBG_NLMSG, + "Port Mapper reg pid fail (err = %d).\n", iwpm_err); + } + if (iwpm_valid_pid() && !iwpm_err) { + nes_form_pm_msg(cm_info, &pm_msg); + iwpm_err = iwpm_add_mapping(&pm_msg, RDMA_NL_NES); + if (iwpm_err) + nes_debug(NES_DBG_NLMSG, + "Port Mapper query fail (err = %d).\n", iwpm_err); + else + nes_record_pm_msg(cm_info, &pm_msg); + } + /* create a CM listen node (1/2 node to compare incoming traffic to) */ listener = kzalloc(sizeof(*listener), GFP_ATOMIC); if (!listener) { @@ -1603,8 +2408,10 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core, return NULL; } - listener->loc_addr = htonl(cm_info->loc_addr); - listener->loc_port = htons(cm_info->loc_port); + listener->loc_addr = cm_info->loc_addr; + listener->loc_port = cm_info->loc_port; + listener->mapped_loc_addr = cm_info->mapped_loc_addr; + listener->mapped_loc_port = cm_info->mapped_loc_port; listener->reused_node = 0; atomic_set(&listener->ref_count, 1); @@ -1633,9 +2440,9 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core, } nes_debug(NES_DBG_CM, "Api - listen(): addr=0x%08X, port=0x%04x," - " listener = %p, backlog = %d, cm_id = %p.\n", - cm_info->loc_addr, cm_info->loc_port, - listener, listener->backlog, listener->cm_id); + " listener = %p, backlog = %d, cm_id = %p.\n", + cm_info->loc_addr, cm_info->loc_port, + listener, listener->backlog, listener->cm_id); return listener; } @@ -1645,23 +2452,15 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core, * mini_cm_connect - make a connection node with params */ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, - struct nes_vnic *nesvnic, - struct ietf_mpa_frame *mpa_frame, - struct nes_cm_info *cm_info) + struct nes_vnic *nesvnic, u16 private_data_len, + void *private_data, struct nes_cm_info *cm_info) { int ret = 0; struct nes_cm_node *cm_node; struct nes_cm_listener *loopbackremotelistener; struct nes_cm_node *loopbackremotenode; struct nes_cm_info loopback_cm_info; - - u16 mpa_frame_size = sizeof(struct ietf_mpa_frame) + - ntohs(mpa_frame->priv_data_len); - - cm_info->loc_addr = htonl(cm_info->loc_addr); - cm_info->rem_addr = htonl(cm_info->rem_addr); - cm_info->loc_port = htons(cm_info->loc_port); - cm_info->rem_port = htons(cm_info->rem_port); + u8 *start_buff; /* create a CM connection node */ cm_node = make_cm_node(cm_core, nesvnic, cm_info, NULL); @@ -1673,55 +2472,85 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE; if (cm_info->loc_addr == cm_info->rem_addr) { - loopbackremotelistener = find_listener(cm_core, cm_node->rem_addr, - cm_node->rem_port, NES_CM_LISTENER_ACTIVE_STATE); + loopbackremotelistener = find_listener(cm_core, + cm_node->mapped_loc_addr, cm_node->mapped_rem_port, + NES_CM_LISTENER_ACTIVE_STATE, 0); if (loopbackremotelistener == NULL) { create_event(cm_node, NES_CM_EVENT_ABORTED); } else { - atomic_inc(&cm_loopbacks); loopback_cm_info = *cm_info; loopback_cm_info.loc_port = cm_info->rem_port; loopback_cm_info.rem_port = cm_info->loc_port; + loopback_cm_info.mapped_loc_port = + cm_info->mapped_rem_port; + loopback_cm_info.mapped_rem_port = + cm_info->mapped_loc_port; loopback_cm_info.cm_id = loopbackremotelistener->cm_id; - loopbackremotenode = make_cm_node(cm_core, nesvnic, &loopback_cm_info, - loopbackremotelistener); + loopbackremotenode = make_cm_node(cm_core, nesvnic, + &loopback_cm_info, loopbackremotelistener); + if (!loopbackremotenode) { + rem_ref_cm_node(cm_node->cm_core, cm_node); + return NULL; + } + atomic_inc(&cm_loopbacks); loopbackremotenode->loopbackpartner = cm_node; - loopbackremotenode->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE; + loopbackremotenode->tcp_cntxt.rcv_wscale = + NES_CM_DEFAULT_RCV_WND_SCALE; cm_node->loopbackpartner = loopbackremotenode; - memcpy(loopbackremotenode->mpa_frame_buf, &mpa_frame->priv_data, - mpa_frame_size); - loopbackremotenode->mpa_frame_size = mpa_frame_size - - sizeof(struct ietf_mpa_frame); + memcpy(loopbackremotenode->mpa_frame_buf, private_data, + private_data_len); + loopbackremotenode->mpa_frame_size = private_data_len; - /* we are done handling this state, set node to a TSA state */ + /* we are done handling this state. */ + /* set node to a TSA state */ cm_node->state = NES_CM_STATE_TSA; - cm_node->tcp_cntxt.rcv_nxt = loopbackremotenode->tcp_cntxt.loc_seq_num; - loopbackremotenode->tcp_cntxt.rcv_nxt = cm_node->tcp_cntxt.loc_seq_num; - cm_node->tcp_cntxt.max_snd_wnd = loopbackremotenode->tcp_cntxt.rcv_wnd; - loopbackremotenode->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.rcv_wnd; - cm_node->tcp_cntxt.snd_wnd = loopbackremotenode->tcp_cntxt.rcv_wnd; - loopbackremotenode->tcp_cntxt.snd_wnd = cm_node->tcp_cntxt.rcv_wnd; - cm_node->tcp_cntxt.snd_wscale = loopbackremotenode->tcp_cntxt.rcv_wscale; - loopbackremotenode->tcp_cntxt.snd_wscale = cm_node->tcp_cntxt.rcv_wscale; - + cm_node->tcp_cntxt.rcv_nxt = + loopbackremotenode->tcp_cntxt.loc_seq_num; + loopbackremotenode->tcp_cntxt.rcv_nxt = + cm_node->tcp_cntxt.loc_seq_num; + cm_node->tcp_cntxt.max_snd_wnd = + loopbackremotenode->tcp_cntxt.rcv_wnd; + loopbackremotenode->tcp_cntxt.max_snd_wnd = + cm_node->tcp_cntxt.rcv_wnd; + cm_node->tcp_cntxt.snd_wnd = + loopbackremotenode->tcp_cntxt.rcv_wnd; + loopbackremotenode->tcp_cntxt.snd_wnd = + cm_node->tcp_cntxt.rcv_wnd; + cm_node->tcp_cntxt.snd_wscale = + loopbackremotenode->tcp_cntxt.rcv_wscale; + loopbackremotenode->tcp_cntxt.snd_wscale = + cm_node->tcp_cntxt.rcv_wscale; + loopbackremotenode->state = NES_CM_STATE_MPAREQ_RCVD; create_event(loopbackremotenode, NES_CM_EVENT_MPA_REQ); } return cm_node; } - /* set our node side to client (active) side */ - cm_node->tcp_cntxt.client = 1; - /* init our MPA frame ptr */ - memcpy(&cm_node->mpa_frame, mpa_frame, mpa_frame_size); - cm_node->mpa_frame_size = mpa_frame_size; + start_buff = &cm_node->mpa_frame_buf[0] + sizeof(struct ietf_mpa_v2); + cm_node->mpa_frame_size = private_data_len; + + memcpy(start_buff, private_data, private_data_len); /* send a syn and goto syn sent state */ cm_node->state = NES_CM_STATE_SYN_SENT; - ret = send_syn(cm_node, 0); + ret = send_syn(cm_node, 0, NULL); + + if (ret) { + /* error in sending the syn free up the cm_node struct */ + nes_debug(NES_DBG_CM, "Api - connect() FAILED: dest " + "addr=0x%08X, port=0x%04x, cm_node=%p, cm_id = %p.\n", + cm_node->rem_addr, cm_node->rem_port, cm_node, + cm_node->cm_id); + rem_ref_cm_node(cm_node->cm_core, cm_node); + cm_node = NULL; + } - nes_debug(NES_DBG_CM, "Api - connect(): dest addr=0x%08X, port=0x%04x," - " cm_node=%p, cm_id = %p.\n", - cm_node->rem_addr, cm_node->rem_port, cm_node, cm_node->cm_id); + if (cm_node) { + nes_debug(NES_DBG_CM, "Api - connect(): dest addr=0x%08X," + "port=0x%04x, cm_node=%p, cm_id = %p.\n", + cm_node->rem_addr, cm_node->rem_port, cm_node, + cm_node->cm_id); + } return cm_node; } @@ -1731,8 +2560,7 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, * mini_cm_accept - accept a connection * This function is never called */ -static int mini_cm_accept(struct nes_cm_core *cm_core, struct ietf_mpa_frame *mpa_frame, - struct nes_cm_node *cm_node) +static int mini_cm_accept(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node) { return 0; } @@ -1741,31 +2569,62 @@ static int mini_cm_accept(struct nes_cm_core *cm_core, struct ietf_mpa_frame *mp /** * mini_cm_reject - reject and teardown a connection */ -static int mini_cm_reject(struct nes_cm_core *cm_core, - struct ietf_mpa_frame *mpa_frame, - struct nes_cm_node *cm_node) +static int mini_cm_reject(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node) { int ret = 0; - struct sk_buff *skb; - u16 mpa_frame_size = sizeof(struct ietf_mpa_frame) + - ntohs(mpa_frame->priv_data_len); + int err = 0; + int passive_state; + struct nes_cm_event event; + struct iw_cm_id *cm_id = cm_node->cm_id; + struct nes_cm_node *loopback = cm_node->loopbackpartner; - skb = get_free_pkt(cm_node); - if (!skb) { - nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); - return -1; - } + nes_debug(NES_DBG_CM, "%s cm_node=%p type=%d state=%d\n", + __func__, cm_node, cm_node->tcp_cntxt.client, cm_node->state); - /* send an MPA Request frame */ - form_cm_frame(skb, cm_node, NULL, 0, mpa_frame, mpa_frame_size, SET_ACK | SET_FIN); - ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0); + if (cm_node->tcp_cntxt.client) + return ret; + cleanup_retrans_entry(cm_node); - cm_node->state = NES_CM_STATE_CLOSED; - ret = send_fin(cm_node, NULL); + if (!loopback) { + passive_state = atomic_add_return(1, &cm_node->passive_state); + if (passive_state == NES_SEND_RESET_EVENT) { + cm_node->state = NES_CM_STATE_CLOSED; + rem_ref_cm_node(cm_core, cm_node); + } else { + if (cm_node->state == NES_CM_STATE_LISTENER_DESTROYED) { + rem_ref_cm_node(cm_core, cm_node); + } else { + ret = send_mpa_reject(cm_node); + if (ret) { + cm_node->state = NES_CM_STATE_CLOSED; + err = send_reset(cm_node, NULL); + if (err) + WARN_ON(1); + } else { + cm_id->add_ref(cm_id); + } + } + } + } else { + cm_node->cm_id = NULL; + if (cm_node->state == NES_CM_STATE_LISTENER_DESTROYED) { + rem_ref_cm_node(cm_core, cm_node); + rem_ref_cm_node(cm_core, loopback); + } else { + event.cm_node = loopback; + event.cm_info.rem_addr = loopback->rem_addr; + event.cm_info.loc_addr = loopback->loc_addr; + event.cm_info.rem_port = loopback->rem_port; + event.cm_info.loc_port = loopback->loc_port; + event.cm_info.cm_id = loopback->cm_id; + cm_event_mpa_reject(&event); + rem_ref_cm_node(cm_core, cm_node); + loopback->state = NES_CM_STATE_CLOSING; - if (ret < 0) { - printk(KERN_INFO PFX "failed to send MPA Reply (reject)\n"); - return ret; + cm_id = loopback->cm_id; + rem_ref_cm_node(cm_core, loopback); + cm_id->rem_ref(cm_id); + } } return ret; @@ -1783,37 +2642,45 @@ static int mini_cm_close(struct nes_cm_core *cm_core, struct nes_cm_node *cm_nod return -EINVAL; switch (cm_node->state) { - /* if passed in node is null, create a reference key node for node search */ - /* check if we found an owner node for this pkt */ - case NES_CM_STATE_SYN_RCVD: - case NES_CM_STATE_SYN_SENT: - case NES_CM_STATE_ONE_SIDE_ESTABLISHED: - case NES_CM_STATE_ESTABLISHED: - case NES_CM_STATE_ACCEPTING: - case NES_CM_STATE_MPAREQ_SENT: - cm_node->state = NES_CM_STATE_FIN_WAIT1; - send_fin(cm_node, NULL); - break; - case NES_CM_STATE_CLOSE_WAIT: - cm_node->state = NES_CM_STATE_LAST_ACK; - send_fin(cm_node, NULL); - break; - case NES_CM_STATE_FIN_WAIT1: - case NES_CM_STATE_FIN_WAIT2: - case NES_CM_STATE_LAST_ACK: - case NES_CM_STATE_TIME_WAIT: - case NES_CM_STATE_CLOSING: - ret = -1; - break; - case NES_CM_STATE_LISTENING: - case NES_CM_STATE_UNKNOWN: - case NES_CM_STATE_INITED: - case NES_CM_STATE_CLOSED: - case NES_CM_STATE_TSA: - ret = rem_ref_cm_node(cm_core, cm_node); - break; + case NES_CM_STATE_SYN_RCVD: + case NES_CM_STATE_SYN_SENT: + case NES_CM_STATE_ONE_SIDE_ESTABLISHED: + case NES_CM_STATE_ESTABLISHED: + case NES_CM_STATE_ACCEPTING: + case NES_CM_STATE_MPAREQ_SENT: + case NES_CM_STATE_MPAREQ_RCVD: + cleanup_retrans_entry(cm_node); + send_reset(cm_node, NULL); + break; + case NES_CM_STATE_CLOSE_WAIT: + cm_node->state = NES_CM_STATE_LAST_ACK; + send_fin(cm_node, NULL); + break; + case NES_CM_STATE_FIN_WAIT1: + case NES_CM_STATE_FIN_WAIT2: + case NES_CM_STATE_LAST_ACK: + case NES_CM_STATE_TIME_WAIT: + case NES_CM_STATE_CLOSING: + ret = -1; + break; + case NES_CM_STATE_LISTENING: + cleanup_retrans_entry(cm_node); + send_reset(cm_node, NULL); + break; + case NES_CM_STATE_MPAREJ_RCVD: + case NES_CM_STATE_UNKNOWN: + case NES_CM_STATE_INITED: + case NES_CM_STATE_CLOSED: + case NES_CM_STATE_LISTENER_DESTROYED: + ret = rem_ref_cm_node(cm_core, cm_node); + break; + case NES_CM_STATE_TSA: + if (cm_node->send_entry) + printk(KERN_ERR "ERROR Close got called from STATE_TSA " + "send_entry=%p\n", cm_node->send_entry); + ret = rem_ref_cm_node(cm_core, cm_node); + break; } - cm_node->cm_id = NULL; return ret; } @@ -1822,92 +2689,102 @@ static int mini_cm_close(struct nes_cm_core *cm_core, struct nes_cm_node *cm_nod * recv_pkt - recv an ETHERNET packet, and process it through CM * node state machine */ -static int mini_cm_recv_pkt(struct nes_cm_core *cm_core, struct nes_vnic *nesvnic, - struct sk_buff *skb) +static int mini_cm_recv_pkt(struct nes_cm_core *cm_core, + struct nes_vnic *nesvnic, struct sk_buff *skb) { struct nes_cm_node *cm_node = NULL; struct nes_cm_listener *listener = NULL; struct iphdr *iph; struct tcphdr *tcph; struct nes_cm_info nfo; - int ret = 0; + int skb_handled = 1; + __be32 tmp_daddr, tmp_saddr; - if (!skb || skb->len < sizeof(struct iphdr) + sizeof(struct tcphdr)) { - ret = -EINVAL; - goto out; - } + if (!skb) + return 0; + if (skb->len < sizeof(struct iphdr) + sizeof(struct tcphdr)) + return 0; iph = (struct iphdr *)skb->data; tcph = (struct tcphdr *)(skb->data + sizeof(struct iphdr)); - skb_reset_network_header(skb); - skb_set_transport_header(skb, sizeof(*tcph)); - skb->len = ntohs(iph->tot_len); nfo.loc_addr = ntohl(iph->daddr); nfo.loc_port = ntohs(tcph->dest); nfo.rem_addr = ntohl(iph->saddr); nfo.rem_port = ntohs(tcph->source); - nes_debug(NES_DBG_CM, "Received packet: dest=" NIPQUAD_FMT - ":0x%04X src=" NIPQUAD_FMT ":0x%04X\n", - NIPQUAD(iph->daddr), tcph->dest, - NIPQUAD(iph->saddr), tcph->source); + /* If port mapper is available these should be mapped address info */ + nfo.mapped_loc_addr = ntohl(iph->daddr); + nfo.mapped_loc_port = ntohs(tcph->dest); + nfo.mapped_rem_addr = ntohl(iph->saddr); + nfo.mapped_rem_port = ntohs(tcph->source); - /* note: this call is going to increment cm_node ref count */ - cm_node = find_node(cm_core, - nfo.rem_port, nfo.rem_addr, - nfo.loc_port, nfo.loc_addr); + tmp_daddr = cpu_to_be32(iph->daddr); + tmp_saddr = cpu_to_be32(iph->saddr); - if (!cm_node) { - listener = find_listener(cm_core, nfo.loc_addr, nfo.loc_port, - NES_CM_LISTENER_ACTIVE_STATE); - if (listener) { - nfo.cm_id = listener->cm_id; - nfo.conn_type = listener->conn_type; - } else { - nfo.cm_id = NULL; - nfo.conn_type = 0; - } + nes_debug(NES_DBG_CM, "Received packet: dest=%pI4:0x%04X src=%pI4:0x%04X\n", + &tmp_daddr, tcph->dest, &tmp_saddr, tcph->source); + + do { + cm_node = find_node(cm_core, + nfo.mapped_rem_port, nfo.mapped_rem_addr, + nfo.mapped_loc_port, nfo.mapped_loc_addr); - cm_node = make_cm_node(cm_core, nesvnic, &nfo, listener); if (!cm_node) { - nes_debug(NES_DBG_CM, "Unable to allocate node\n"); - if (listener) { - nes_debug(NES_DBG_CM, "unable to allocate node and decrementing listener refcount\n"); + /* Only type of packet accepted are for */ + /* the PASSIVE open (syn only) */ + if ((!tcph->syn) || (tcph->ack)) { + skb_handled = 0; + break; + } + listener = find_listener(cm_core, nfo.mapped_loc_addr, + nfo.mapped_loc_port, + NES_CM_LISTENER_ACTIVE_STATE, 0); + if (!listener) { + nfo.cm_id = NULL; + nfo.conn_type = 0; + nes_debug(NES_DBG_CM, "Unable to find listener for the pkt\n"); + skb_handled = 0; + break; + } + nfo.cm_id = listener->cm_id; + nfo.conn_type = listener->conn_type; + cm_node = make_cm_node(cm_core, nesvnic, &nfo, + listener); + if (!cm_node) { + nes_debug(NES_DBG_CM, "Unable to allocate " + "node\n"); + cm_packets_dropped++; atomic_dec(&listener->ref_count); + dev_kfree_skb_any(skb); + break; } - ret = -1; - goto out; - } - if (!listener) { - nes_debug(NES_DBG_CM, "Packet found for unknown port %x refcnt=%d\n", - nfo.loc_port, atomic_read(&cm_node->ref_count)); - if (!tcph->rst) { - nes_debug(NES_DBG_CM, "Packet found for unknown port=%d" - " rem_port=%d refcnt=%d\n", - nfo.loc_port, nfo.rem_port, atomic_read(&cm_node->ref_count)); - - cm_node->tcp_cntxt.rcv_nxt = ntohl(tcph->seq); - cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq); - send_reset(cm_node); + if (!tcph->rst && !tcph->fin) { + cm_node->state = NES_CM_STATE_LISTENING; + } else { + cm_packets_dropped++; + rem_ref_cm_node(cm_core, cm_node); + dev_kfree_skb_any(skb); + break; } - rem_ref_cm_node(cm_core, cm_node); - ret = -1; - goto out; + add_ref_cm_node(cm_node); + } else if (cm_node->state == NES_CM_STATE_TSA) { + if (cm_node->nesqp->pau_mode) + nes_queue_mgt_skbs(skb, nesvnic, cm_node->nesqp); + else { + rem_ref_cm_node(cm_core, cm_node); + atomic_inc(&cm_accel_dropped_pkts); + dev_kfree_skb_any(skb); + } + break; } - add_ref_cm_node(cm_node); - cm_node->state = NES_CM_STATE_LISTENING; - } - - nes_debug(NES_DBG_CM, "Processing Packet for node %p, data = (%p):\n", - cm_node, skb->data); - process_packet(cm_node, skb, cm_core); - - rem_ref_cm_node(cm_core, cm_node); - out: - if (skb) - dev_kfree_skb_any(skb); - return ret; + skb_reset_network_header(skb); + skb_set_transport_header(skb, sizeof(*tcph)); + skb->len = ntohs(iph->tot_len); + process_packet(cm_node, skb, cm_core); + rem_ref_cm_node(cm_core, cm_node); + } while (0); + return skb_handled; } @@ -1916,10 +2793,7 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core, struct nes_vnic *nesvni */ static struct nes_cm_core *nes_cm_alloc_core(void) { - int i; - struct nes_cm_core *cm_core; - struct sk_buff *skb = NULL; /* setup the CM core */ /* alloc top level core control structure */ @@ -1931,25 +2805,12 @@ static struct nes_cm_core *nes_cm_alloc_core(void) init_timer(&cm_core->tcp_timer); cm_core->tcp_timer.function = nes_cm_timer_tick; - cm_core->mtu = NES_CM_DEFAULT_MTU; + cm_core->mtu = NES_CM_DEFAULT_MTU; cm_core->state = NES_CM_STATE_INITED; cm_core->free_tx_pkt_max = NES_CM_DEFAULT_FREE_PKTS; atomic_set(&cm_core->events_posted, 0); - /* init the packet lists */ - skb_queue_head_init(&cm_core->tx_free_list); - - for (i = 0; i < NES_CM_DEFAULT_FRAME_CNT; i++) { - skb = dev_alloc_skb(cm_core->mtu); - if (!skb) { - kfree(cm_core); - return NULL; - } - /* add 'raw' skb to free frame list */ - skb_queue_head(&cm_core->tx_free_list, skb); - } - cm_core->api = &nes_cm_api; spin_lock_init(&cm_core->ht_lock); @@ -1982,9 +2843,8 @@ static int mini_cm_dealloc_core(struct nes_cm_core *cm_core) barrier(); - if (timer_pending(&cm_core->tcp_timer)) { + if (timer_pending(&cm_core->tcp_timer)) del_timer(&cm_core->tcp_timer); - } destroy_workqueue(cm_core->event_wq); destroy_workqueue(cm_core->disconn_wq); @@ -2012,15 +2872,15 @@ static int mini_cm_set(struct nes_cm_core *cm_core, u32 type, u32 value) int ret = 0; switch (type) { - case NES_CM_SET_PKT_SIZE: - cm_core->mtu = value; - break; - case NES_CM_SET_FREE_PKT_Q_SIZE: - cm_core->free_tx_pkt_max = value; - break; - default: - /* unknown set option */ - ret = -EINVAL; + case NES_CM_SET_PKT_SIZE: + cm_core->mtu = value; + break; + case NES_CM_SET_FREE_PKT_Q_SIZE: + cm_core->free_tx_pkt_max = value; + break; + default: + /* unknown set option */ + ret = -EINVAL; } return ret; @@ -2039,8 +2899,8 @@ static int nes_cm_init_tsa_conn(struct nes_qp *nesqp, struct nes_cm_node *cm_nod return -EINVAL; nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_IPV4 | - NES_QPCONTEXT_MISC_NO_NAGLE | NES_QPCONTEXT_MISC_DO_NOT_FRAG | - NES_QPCONTEXT_MISC_DROS); + NES_QPCONTEXT_MISC_NO_NAGLE | NES_QPCONTEXT_MISC_DO_NOT_FRAG | + NES_QPCONTEXT_MISC_DROS); if (cm_node->tcp_cntxt.snd_wscale || cm_node->tcp_cntxt.rcv_wscale) nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_WSCALE); @@ -2050,15 +2910,15 @@ static int nes_cm_init_tsa_conn(struct nes_qp *nesqp, struct nes_cm_node *cm_nod nesqp->nesqp_context->mss |= cpu_to_le32(((u32)cm_node->tcp_cntxt.mss) << 16); nesqp->nesqp_context->tcp_state_flow_label |= cpu_to_le32( - (u32)NES_QPCONTEXT_TCPSTATE_EST << NES_QPCONTEXT_TCPFLOW_TCP_STATE_SHIFT); + (u32)NES_QPCONTEXT_TCPSTATE_EST << NES_QPCONTEXT_TCPFLOW_TCP_STATE_SHIFT); nesqp->nesqp_context->pd_index_wscale |= cpu_to_le32( - (cm_node->tcp_cntxt.snd_wscale << NES_QPCONTEXT_PDWSCALE_SND_WSCALE_SHIFT) & - NES_QPCONTEXT_PDWSCALE_SND_WSCALE_MASK); + (cm_node->tcp_cntxt.snd_wscale << NES_QPCONTEXT_PDWSCALE_SND_WSCALE_SHIFT) & + NES_QPCONTEXT_PDWSCALE_SND_WSCALE_MASK); nesqp->nesqp_context->pd_index_wscale |= cpu_to_le32( - (cm_node->tcp_cntxt.rcv_wscale << NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_SHIFT) & - NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_MASK); + (cm_node->tcp_cntxt.rcv_wscale << NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_SHIFT) & + NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_MASK); nesqp->nesqp_context->keepalive = cpu_to_le32(0x80); nesqp->nesqp_context->ts_recent = 0; @@ -2067,24 +2927,24 @@ static int nes_cm_init_tsa_conn(struct nes_qp *nesqp, struct nes_cm_node *cm_nod nesqp->nesqp_context->snd_wnd = cpu_to_le32(cm_node->tcp_cntxt.snd_wnd); nesqp->nesqp_context->rcv_nxt = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt); nesqp->nesqp_context->rcv_wnd = cpu_to_le32(cm_node->tcp_cntxt.rcv_wnd << - cm_node->tcp_cntxt.rcv_wscale); + cm_node->tcp_cntxt.rcv_wscale); nesqp->nesqp_context->snd_max = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num); nesqp->nesqp_context->snd_una = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num); nesqp->nesqp_context->srtt = 0; nesqp->nesqp_context->rttvar = cpu_to_le32(0x6); nesqp->nesqp_context->ssthresh = cpu_to_le32(0x3FFFC000); - nesqp->nesqp_context->cwnd = cpu_to_le32(2*cm_node->tcp_cntxt.mss); + nesqp->nesqp_context->cwnd = cpu_to_le32(2 * cm_node->tcp_cntxt.mss); nesqp->nesqp_context->snd_wl1 = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt); nesqp->nesqp_context->snd_wl2 = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num); nesqp->nesqp_context->max_snd_wnd = cpu_to_le32(cm_node->tcp_cntxt.max_snd_wnd); nes_debug(NES_DBG_CM, "QP%u: rcv_nxt = 0x%08X, snd_nxt = 0x%08X," - " Setting MSS to %u, PDWscale = 0x%08X, rcv_wnd = %u, context misc = 0x%08X.\n", - nesqp->hwqp.qp_id, le32_to_cpu(nesqp->nesqp_context->rcv_nxt), - le32_to_cpu(nesqp->nesqp_context->snd_nxt), - cm_node->tcp_cntxt.mss, le32_to_cpu(nesqp->nesqp_context->pd_index_wscale), - le32_to_cpu(nesqp->nesqp_context->rcv_wnd), - le32_to_cpu(nesqp->nesqp_context->misc)); + " Setting MSS to %u, PDWscale = 0x%08X, rcv_wnd = %u, context misc = 0x%08X.\n", + nesqp->hwqp.qp_id, le32_to_cpu(nesqp->nesqp_context->rcv_nxt), + le32_to_cpu(nesqp->nesqp_context->snd_nxt), + cm_node->tcp_cntxt.mss, le32_to_cpu(nesqp->nesqp_context->pd_index_wscale), + le32_to_cpu(nesqp->nesqp_context->rcv_wnd), + le32_to_cpu(nesqp->nesqp_context->misc)); nes_debug(NES_DBG_CM, " snd_wnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->snd_wnd)); nes_debug(NES_DBG_CM, " snd_cwnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->cwnd)); nes_debug(NES_DBG_CM, " max_swnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->max_snd_wnd)); @@ -2101,22 +2961,16 @@ static int nes_cm_init_tsa_conn(struct nes_qp *nesqp, struct nes_cm_node *cm_nod */ int nes_cm_disconn(struct nes_qp *nesqp) { - unsigned long flags; + struct disconn_work *work; - spin_lock_irqsave(&nesqp->lock, flags); - if (nesqp->disconn_pending == 0) { - nesqp->disconn_pending++; - spin_unlock_irqrestore(&nesqp->lock, flags); - /* nes_add_ref(&nesqp->ibqp); */ - /* init our disconnect work element, to */ - INIT_WORK(&nesqp->disconn_work, nes_disconnect_worker); - - queue_work(g_cm_core->disconn_wq, &nesqp->disconn_work); - } else { - spin_unlock_irqrestore(&nesqp->lock, flags); - nes_rem_ref(&nesqp->ibqp); - } + work = kzalloc(sizeof *work, GFP_ATOMIC); + if (!work) + return -ENOMEM; /* Timer will clean up */ + nes_add_ref(&nesqp->ibqp); + work->nesqp = nesqp; + INIT_WORK(&work->work, nes_disconnect_worker); + queue_work(g_cm_core->disconn_wq, &work->work); return 0; } @@ -2126,11 +2980,14 @@ int nes_cm_disconn(struct nes_qp *nesqp) */ static void nes_disconnect_worker(struct work_struct *work) { - struct nes_qp *nesqp = container_of(work, struct nes_qp, disconn_work); + struct disconn_work *dwork = container_of(work, struct disconn_work, work); + struct nes_qp *nesqp = dwork->nesqp; + kfree(dwork); nes_debug(NES_DBG_CM, "processing AEQE id 0x%04X for QP%u.\n", - nesqp->last_aeq, nesqp->hwqp.qp_id); + nesqp->last_aeq, nesqp->hwqp.qp_id); nes_cm_disconn_true(nesqp); + nes_rem_ref(&nesqp->ibqp); } @@ -2147,7 +3004,12 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp) u16 last_ae; u8 original_hw_tcp_state; u8 original_ibqp_state; - u8 issued_disconnect_reset = 0; + int disconn_status = 0; + int issue_disconn = 0; + int issue_close = 0; + int issue_flush = 0; + u32 flush_q = NES_CQP_FLUSH_RQ; + struct ib_event ibevent; if (!nesqp) { nes_debug(NES_DBG_CM, "disconnect_worker nesqp is NULL\n"); @@ -2159,9 +3021,8 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp) /* make sure we havent already closed this connection */ if (!cm_id) { nes_debug(NES_DBG_CM, "QP%u disconnect_worker cmid is NULL\n", - nesqp->hwqp.qp_id); + nesqp->hwqp.qp_id); spin_unlock_irqrestore(&nesqp->lock, flags); - nes_rem_ref(&nesqp->ibqp); return -1; } @@ -2169,67 +3030,86 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp) nes_debug(NES_DBG_CM, "Disconnecting QP%u\n", nesqp->hwqp.qp_id); original_hw_tcp_state = nesqp->hw_tcp_state; - original_ibqp_state = nesqp->ibqp_state; + original_ibqp_state = nesqp->ibqp_state; last_ae = nesqp->last_aeq; + if (nesqp->term_flags) { + issue_disconn = 1; + issue_close = 1; + nesqp->cm_id = NULL; + del_timer(&nesqp->terminate_timer); + if (nesqp->flush_issued == 0) { + nesqp->flush_issued = 1; + issue_flush = 1; + } + } else if ((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) || + ((original_ibqp_state == IB_QPS_RTS) && + (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) { + issue_disconn = 1; + if (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET) + disconn_status = -ECONNRESET; + } - nes_debug(NES_DBG_CM, "set ibqp_state=%u\n", nesqp->ibqp_state); + if (((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSED) || + (original_hw_tcp_state == NES_AEQE_TCP_STATE_TIME_WAIT) || + (last_ae == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) || + (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) { + issue_close = 1; + nesqp->cm_id = NULL; + if (nesqp->flush_issued == 0) { + nesqp->flush_issued = 1; + issue_flush = 1; + } + } + + spin_unlock_irqrestore(&nesqp->lock, flags); + + if ((issue_flush) && (nesqp->destroyed == 0)) { + /* Flush the queue(s) */ + if (nesqp->hw_iwarp_state >= NES_AEQE_IWARP_STATE_TERMINATE) + flush_q |= NES_CQP_FLUSH_SQ; + flush_wqes(nesvnic->nesdev, nesqp, flush_q, 1); + + if (nesqp->term_flags) { + ibevent.device = nesqp->ibqp.device; + ibevent.event = nesqp->terminate_eventtype; + ibevent.element.qp = &nesqp->ibqp; + if (nesqp->ibqp.event_handler) + nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context); + } + } - if ((nesqp->cm_id) && (cm_id->event_handler)) { - if ((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) || - ((original_ibqp_state == IB_QPS_RTS) && - (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) { + if ((cm_id) && (cm_id->event_handler)) { + if (issue_disconn) { atomic_inc(&cm_disconnects); cm_event.event = IW_CM_EVENT_DISCONNECT; - if (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET) { - issued_disconnect_reset = 1; - cm_event.status = IW_CM_EVENT_STATUS_RESET; - nes_debug(NES_DBG_CM, "Generating a CM Disconnect Event (status reset) for " - " QP%u, cm_id = %p. \n", - nesqp->hwqp.qp_id, cm_id); - } else { - cm_event.status = IW_CM_EVENT_STATUS_OK; - } - + cm_event.status = disconn_status; cm_event.local_addr = cm_id->local_addr; cm_event.remote_addr = cm_id->remote_addr; cm_event.private_data = NULL; cm_event.private_data_len = 0; - nes_debug(NES_DBG_CM, "Generating a CM Disconnect Event for " - " QP%u, SQ Head = %u, SQ Tail = %u. cm_id = %p, refcount = %u.\n", - nesqp->hwqp.qp_id, - nesqp->hwqp.sq_head, nesqp->hwqp.sq_tail, cm_id, - atomic_read(&nesqp->refcount)); + nes_debug(NES_DBG_CM, "Generating a CM Disconnect Event" + " for QP%u, SQ Head = %u, SQ Tail = %u. " + "cm_id = %p, refcount = %u.\n", + nesqp->hwqp.qp_id, nesqp->hwqp.sq_head, + nesqp->hwqp.sq_tail, cm_id, + atomic_read(&nesqp->refcount)); - spin_unlock_irqrestore(&nesqp->lock, flags); ret = cm_id->event_handler(cm_id, &cm_event); if (ret) - nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); - spin_lock_irqsave(&nesqp->lock, flags); + nes_debug(NES_DBG_CM, "OFA CM event_handler " + "returned, ret=%d\n", ret); } - nesqp->disconn_pending = 0; - /* There might have been another AE while the lock was released */ - original_hw_tcp_state = nesqp->hw_tcp_state; - original_ibqp_state = nesqp->ibqp_state; - last_ae = nesqp->last_aeq; - - if ((issued_disconnect_reset == 0) && (nesqp->cm_id) && - ((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSED) || - (original_hw_tcp_state == NES_AEQE_TCP_STATE_TIME_WAIT) || - (last_ae == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) || - (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) { + if (issue_close) { atomic_inc(&cm_closes); - nesqp->cm_id = NULL; - nesqp->in_disconnect = 0; - spin_unlock_irqrestore(&nesqp->lock, flags); nes_disconnect(nesqp, 1); cm_id->provider_data = nesqp; /* Send up the close complete event */ cm_event.event = IW_CM_EVENT_CLOSE; - cm_event.status = IW_CM_EVENT_STATUS_OK; + cm_event.status = 0; cm_event.provider_data = cm_id->provider_data; cm_event.local_addr = cm_id->local_addr; cm_event.remote_addr = cm_id->remote_addr; @@ -2237,41 +3117,12 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp) cm_event.private_data_len = 0; ret = cm_id->event_handler(cm_id, &cm_event); - if (ret) { + if (ret) nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); - } cm_id->rem_ref(cm_id); - - spin_lock_irqsave(&nesqp->lock, flags); - if (nesqp->flush_issued == 0) { - nesqp->flush_issued = 1; - spin_unlock_irqrestore(&nesqp->lock, flags); - flush_wqes(nesvnic->nesdev, nesqp, NES_CQP_FLUSH_RQ, 1); - } else { - spin_unlock_irqrestore(&nesqp->lock, flags); - } - - /* This reference is from either ModifyQP or the AE processing, - there is still a race here with modifyqp */ - nes_rem_ref(&nesqp->ibqp); - - } else { - cm_id = nesqp->cm_id; - spin_unlock_irqrestore(&nesqp->lock, flags); - /* check to see if the inbound reset beat the outbound reset */ - if ((!cm_id) && (last_ae==NES_AEQE_AEID_RESET_SENT)) { - nes_debug(NES_DBG_CM, "QP%u: Decing refcount due to inbound reset" - " beating the outbound reset.\n", - nesqp->hwqp.qp_id); - nes_rem_ref(&nesqp->ibqp); - } } - } else { - nesqp->disconn_pending = 0; - spin_unlock_irqrestore(&nesqp->lock, flags); } - nes_rem_ref(&nesqp->ibqp); return 0; } @@ -2285,15 +3136,17 @@ static int nes_disconnect(struct nes_qp *nesqp, int abrupt) int ret = 0; struct nes_vnic *nesvnic; struct nes_device *nesdev; + struct nes_ib_device *nesibdev; nesvnic = to_nesvnic(nesqp->ibqp.device); if (!nesvnic) return -EINVAL; nesdev = nesvnic->nesdev; + nesibdev = nesvnic->nesibdev; nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n", - atomic_read(&nesvnic->netdev->refcnt)); + netdev_refcnt_read(nesvnic->netdev)); if (nesqp->active_conn) { @@ -2302,9 +3155,11 @@ static int nes_disconnect(struct nes_qp *nesqp, int abrupt) } else { /* Need to free the Last Streaming Mode Message */ if (nesqp->ietf_frame) { + if (nesqp->lsmm_mr) + nesibdev->ibdev.dereg_mr(nesqp->lsmm_mr); pci_free_consistent(nesdev->pcidev, - nesqp->private_data_len+sizeof(struct ietf_mpa_frame), - nesqp->ietf_frame, nesqp->ietf_frame_pbase); + nesqp->private_data_len + nesqp->ietf_frame_size, + nesqp->ietf_frame, nesqp->ietf_frame_pbase); } } @@ -2313,7 +3168,6 @@ static int nes_disconnect(struct nes_qp *nesqp, int abrupt) nes_debug(NES_DBG_CM, "Call close API\n"); g_cm_core->api->close(g_cm_core, nesqp->cm_node); - nesqp->cm_node = NULL; } return ret; @@ -2338,6 +3192,20 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct nes_v4_quad nes_quad; u32 crc_value; int ret; + int passive_state; + struct nes_ib_device *nesibdev; + struct ib_mr *ibmr = NULL; + struct ib_phys_buf ibphysbuf; + struct nes_pd *nespd; + u64 tagged_offset; + u8 mpa_frame_offset = 0; + struct ietf_mpa_v2 *mpa_v2_frame; + u8 start_addr = 0; + u8 *start_ptr = &start_addr; + u8 **start_buff = &start_ptr; + u16 buff_len = 0; + struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr; + struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr; ibqp = nes_get_qp(cm_id->device, conn_param->qpn); if (!ibqp) @@ -2349,72 +3217,117 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) nesdev = nesvnic->nesdev; adapter = nesdev->nesadapter; - nes_debug(NES_DBG_CM, "nesvnic=%p, netdev=%p, %s\n", - nesvnic, nesvnic->netdev, nesvnic->netdev->name); - - /* since this is from a listen, we were able to put node handle into cm_id */ cm_node = (struct nes_cm_node *)cm_id->provider_data; + nes_debug(NES_DBG_CM, "nes_accept: cm_node= %p nesvnic=%p, netdev=%p," + "%s\n", cm_node, nesvnic, nesvnic->netdev, + nesvnic->netdev->name); + + if (NES_CM_STATE_LISTENER_DESTROYED == cm_node->state) { + if (cm_node->loopbackpartner) + rem_ref_cm_node(cm_node->cm_core, cm_node->loopbackpartner); + rem_ref_cm_node(cm_node->cm_core, cm_node); + return -EINVAL; + } + passive_state = atomic_add_return(1, &cm_node->passive_state); + if (passive_state == NES_SEND_RESET_EVENT) { + rem_ref_cm_node(cm_node->cm_core, cm_node); + return -ECONNRESET; + } /* associate the node with the QP */ nesqp->cm_node = (void *)cm_node; + cm_node->nesqp = nesqp; - nes_debug(NES_DBG_CM, "QP%u, cm_node=%p, jiffies = %lu\n", - nesqp->hwqp.qp_id, cm_node, jiffies); + + nes_debug(NES_DBG_CM, "QP%u, cm_node=%p, jiffies = %lu listener = %p\n", + nesqp->hwqp.qp_id, cm_node, jiffies, cm_node->listener); atomic_inc(&cm_accepts); nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n", - atomic_read(&nesvnic->netdev->refcnt)); + netdev_refcnt_read(nesvnic->netdev)); + + nesqp->ietf_frame_size = sizeof(struct ietf_mpa_v2); + /* allocate the ietf frame and space for private data */ + nesqp->ietf_frame = pci_alloc_consistent(nesdev->pcidev, + nesqp->ietf_frame_size + conn_param->private_data_len, + &nesqp->ietf_frame_pbase); + + if (!nesqp->ietf_frame) { + nes_debug(NES_DBG_CM, "Unable to allocate memory for private data\n"); + return -ENOMEM; + } + mpa_v2_frame = (struct ietf_mpa_v2 *)nesqp->ietf_frame; - /* allocate the ietf frame and space for private data */ - nesqp->ietf_frame = pci_alloc_consistent(nesdev->pcidev, - sizeof(struct ietf_mpa_frame) + conn_param->private_data_len, - &nesqp->ietf_frame_pbase); + if (cm_node->mpa_frame_rev == IETF_MPA_V1) + mpa_frame_offset = 4; + + if (cm_node->mpa_frame_rev == IETF_MPA_V1 || + cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) { + record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord); + } + + memcpy(mpa_v2_frame->priv_data, conn_param->private_data, + conn_param->private_data_len); + + cm_build_mpa_frame(cm_node, start_buff, &buff_len, nesqp->ietf_frame, MPA_KEY_REPLY); + nesqp->private_data_len = conn_param->private_data_len; - if (!nesqp->ietf_frame) { - nes_debug(NES_DBG_CM, "Unable to allocate memory for private data\n"); + /* setup our first outgoing iWarp send WQE (the IETF frame response) */ + wqe = &nesqp->hwqp.sq_vbase[0]; + + if (raddr->sin_addr.s_addr != laddr->sin_addr.s_addr) { + u64temp = (unsigned long)nesqp; + nesibdev = nesvnic->nesibdev; + nespd = nesqp->nespd; + ibphysbuf.addr = nesqp->ietf_frame_pbase + mpa_frame_offset; + ibphysbuf.size = buff_len; + tagged_offset = (u64)(unsigned long)*start_buff; + ibmr = nesibdev->ibdev.reg_phys_mr((struct ib_pd *)nespd, + &ibphysbuf, 1, + IB_ACCESS_LOCAL_WRITE, + &tagged_offset); + if (!ibmr) { + nes_debug(NES_DBG_CM, "Unable to register memory region" + "for lSMM for cm_node = %p \n", + cm_node); + pci_free_consistent(nesdev->pcidev, + nesqp->private_data_len + nesqp->ietf_frame_size, + nesqp->ietf_frame, nesqp->ietf_frame_pbase); return -ENOMEM; } - - /* setup the MPA frame */ - nesqp->private_data_len = conn_param->private_data_len; - memcpy(nesqp->ietf_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE); - - memcpy(nesqp->ietf_frame->priv_data, conn_param->private_data, - conn_param->private_data_len); - - nesqp->ietf_frame->priv_data_len = cpu_to_be16(conn_param->private_data_len); - nesqp->ietf_frame->rev = mpa_version; - nesqp->ietf_frame->flags = IETF_MPA_FLAGS_CRC; - - /* setup our first outgoing iWarp send WQE (the IETF frame response) */ - wqe = &nesqp->hwqp.sq_vbase[0]; - - if (cm_id->remote_addr.sin_addr.s_addr != cm_id->local_addr.sin_addr.s_addr) { - u64temp = (unsigned long)nesqp; - u64temp |= NES_SW_CONTEXT_ALIGN>>1; - set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, - u64temp); - wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = - cpu_to_le32(NES_IWARP_SQ_WQE_STREAMING | NES_IWARP_SQ_WQE_WRPDU); - wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = - cpu_to_le32(conn_param->private_data_len + sizeof(struct ietf_mpa_frame)); - wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = - cpu_to_le32((u32)nesqp->ietf_frame_pbase); - wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = - cpu_to_le32((u32)((u64)nesqp->ietf_frame_pbase >> 32)); - wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = - cpu_to_le32(conn_param->private_data_len + sizeof(struct ietf_mpa_frame)); - wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0; - - nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32( - NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | NES_QPCONTEXT_ORDIRD_WRPDU); - } else { - nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | - NES_QPCONTEXT_ORDIRD_WRPDU | NES_QPCONTEXT_ORDIRD_ALSMM)); + ibmr->pd = &nespd->ibpd; + ibmr->device = nespd->ibpd.device; + nesqp->lsmm_mr = ibmr; + + u64temp |= NES_SW_CONTEXT_ALIGN >> 1; + set_wqe_64bit_value(wqe->wqe_words, + NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, + u64temp); + wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = + cpu_to_le32(NES_IWARP_SQ_WQE_STREAMING | + NES_IWARP_SQ_WQE_WRPDU); + wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = + cpu_to_le32(buff_len); + set_wqe_64bit_value(wqe->wqe_words, + NES_IWARP_SQ_WQE_FRAG0_LOW_IDX, + (u64)(unsigned long)(*start_buff)); + wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = + cpu_to_le32(buff_len); + wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = ibmr->lkey; + if (nesqp->sq_kmapped) { + nesqp->sq_kmapped = 0; + kunmap(nesqp->page); } - nesqp->skip_lsmm = 1; + nesqp->nesqp_context->ird_ord_sizes |= + cpu_to_le32(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | + NES_QPCONTEXT_ORDIRD_WRPDU); + } else { + nesqp->nesqp_context->ird_ord_sizes |= + cpu_to_le32(NES_QPCONTEXT_ORDIRD_WRPDU); + } + nesqp->skip_lsmm = 1; /* Cache the cm_id in the qp */ nesqp->cm_id = cm_id; @@ -2422,82 +3335,95 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) /* nesqp->cm_node = (void *)cm_id->provider_data; */ cm_id->provider_data = nesqp; - nesqp->active_conn = 0; + nesqp->active_conn = 0; + + if (cm_node->state == NES_CM_STATE_TSA) + nes_debug(NES_DBG_CM, "Already state = TSA for cm_node=%p\n", + cm_node); nes_cm_init_tsa_conn(nesqp, cm_node); - nesqp->nesqp_context->tcpPorts[0] = cpu_to_le16(ntohs(cm_id->local_addr.sin_port)); - nesqp->nesqp_context->tcpPorts[1] = cpu_to_le16(ntohs(cm_id->remote_addr.sin_port)); - nesqp->nesqp_context->ip0 = cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr)); + nesqp->nesqp_context->tcpPorts[0] = + cpu_to_le16(cm_node->mapped_loc_port); + nesqp->nesqp_context->tcpPorts[1] = + cpu_to_le16(cm_node->mapped_rem_port); + + nesqp->nesqp_context->ip0 = cpu_to_le32(cm_node->mapped_rem_addr); nesqp->nesqp_context->misc2 |= cpu_to_le32( - (u32)PCI_FUNC(nesdev->pcidev->devfn) << NES_QPCONTEXT_MISC2_SRC_IP_SHIFT); + (u32)PCI_FUNC(nesdev->pcidev->devfn) << + NES_QPCONTEXT_MISC2_SRC_IP_SHIFT); - nesqp->nesqp_context->arp_index_vlan |= cpu_to_le32( - nes_arp_table(nesdev, le32_to_cpu(nesqp->nesqp_context->ip0), NULL, - NES_ARP_RESOLVE) << 16); + nesqp->nesqp_context->arp_index_vlan |= + cpu_to_le32(nes_arp_table(nesdev, + le32_to_cpu(nesqp->nesqp_context->ip0), NULL, + NES_ARP_RESOLVE) << 16); nesqp->nesqp_context->ts_val_delta = cpu_to_le32( - jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW)); + jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW)); nesqp->nesqp_context->ird_index = cpu_to_le32(nesqp->hwqp.qp_id); nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32( - ((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT)); - nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((u32)conn_param->ord); + ((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT)); + nesqp->nesqp_context->ird_ord_sizes |= + cpu_to_le32((u32)cm_node->ord_size); memset(&nes_quad, 0, sizeof(nes_quad)); - nes_quad.DstIpAdrIndex = cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); - nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr; - nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port; - nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port; + nes_quad.DstIpAdrIndex = + cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); + nes_quad.SrcIpadr = htonl(cm_node->mapped_rem_addr); + nes_quad.TcpPorts[0] = htons(cm_node->mapped_rem_port); + nes_quad.TcpPorts[1] = htons(cm_node->mapped_loc_port); /* Produce hash key */ crc_value = get_crc_value(&nes_quad); nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff); nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, CRC = 0x%08X\n", - nesqp->hte_index, nesqp->hte_index & adapter->hte_index_mask); + nesqp->hte_index, nesqp->hte_index & adapter->hte_index_mask); nesqp->hte_index &= adapter->hte_index_mask; nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index); cm_node->cm_core->api->accelerated(cm_node->cm_core, cm_node); - nes_debug(NES_DBG_CM, "QP%u, Destination IP = 0x%08X:0x%04X, local = 0x%08X:0x%04X," - " rcv_nxt=0x%08X, snd_nxt=0x%08X, mpa + private data length=%zu.\n", - nesqp->hwqp.qp_id, - ntohl(cm_id->remote_addr.sin_addr.s_addr), - ntohs(cm_id->remote_addr.sin_port), - ntohl(cm_id->local_addr.sin_addr.s_addr), - ntohs(cm_id->local_addr.sin_port), - le32_to_cpu(nesqp->nesqp_context->rcv_nxt), - le32_to_cpu(nesqp->nesqp_context->snd_nxt), - conn_param->private_data_len+sizeof(struct ietf_mpa_frame)); - - attr.qp_state = IB_QPS_RTS; - nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL); + nes_debug(NES_DBG_CM, "QP%u, Destination IP = 0x%08X:0x%04X, local = " + "0x%08X:0x%04X, rcv_nxt=0x%08X, snd_nxt=0x%08X, mpa + " + "private data length=%u.\n", nesqp->hwqp.qp_id, + ntohl(raddr->sin_addr.s_addr), ntohs(raddr->sin_port), + ntohl(laddr->sin_addr.s_addr), ntohs(laddr->sin_port), + le32_to_cpu(nesqp->nesqp_context->rcv_nxt), + le32_to_cpu(nesqp->nesqp_context->snd_nxt), + buff_len); - /* notify OF layer that accept event was successfull */ + /* notify OF layer that accept event was successful */ cm_id->add_ref(cm_id); + nes_add_ref(&nesqp->ibqp); cm_event.event = IW_CM_EVENT_ESTABLISHED; - cm_event.status = IW_CM_EVENT_STATUS_ACCEPTED; + cm_event.status = 0; cm_event.provider_data = (void *)nesqp; cm_event.local_addr = cm_id->local_addr; cm_event.remote_addr = cm_id->remote_addr; cm_event.private_data = NULL; cm_event.private_data_len = 0; + cm_event.ird = cm_node->ird_size; + cm_event.ord = cm_node->ord_size; + ret = cm_id->event_handler(cm_id, &cm_event); + attr.qp_state = IB_QPS_RTS; + nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL); if (cm_node->loopbackpartner) { - cm_node->loopbackpartner->mpa_frame_size = nesqp->private_data_len; + cm_node->loopbackpartner->mpa_frame_size = + nesqp->private_data_len; /* copy entire MPA frame to our cm_node's frame */ - memcpy(cm_node->loopbackpartner->mpa_frame_buf, nesqp->ietf_frame->priv_data, - nesqp->private_data_len); + memcpy(cm_node->loopbackpartner->mpa_frame_buf, + conn_param->private_data, conn_param->private_data_len); create_event(cm_node->loopbackpartner, NES_CM_EVENT_CONNECTED); } if (ret) - printk("%s[%u] OFA CM event_handler returned, ret=%d\n", - __func__, __LINE__, ret); + printk(KERN_ERR "%s[%u] OFA CM event_handler returned, " + "ret=%d\n", __func__, __LINE__, ret); return 0; } @@ -2509,23 +3435,29 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) int nes_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) { struct nes_cm_node *cm_node; + struct nes_cm_node *loopback; struct nes_cm_core *cm_core; + u8 *start_buff; atomic_inc(&cm_rejects); - cm_node = (struct nes_cm_node *) cm_id->provider_data; + cm_node = (struct nes_cm_node *)cm_id->provider_data; + loopback = cm_node->loopbackpartner; cm_core = cm_node->cm_core; - cm_node->mpa_frame_size = sizeof(struct ietf_mpa_frame) + pdata_len; - - strcpy(&cm_node->mpa_frame.key[0], IEFT_MPA_KEY_REP); - memcpy(&cm_node->mpa_frame.priv_data, pdata, pdata_len); - - cm_node->mpa_frame.priv_data_len = cpu_to_be16(pdata_len); - cm_node->mpa_frame.rev = mpa_version; - cm_node->mpa_frame.flags = IETF_MPA_FLAGS_CRC | IETF_MPA_FLAGS_REJECT; + cm_node->cm_id = cm_id; - cm_core->api->reject(cm_core, &cm_node->mpa_frame, cm_node); + if (pdata_len + sizeof(struct ietf_mpa_v2) > MAX_CM_BUFFER) + return -EINVAL; - return 0; + if (loopback) { + memcpy(&loopback->mpa_frame.priv_data, pdata, pdata_len); + loopback->mpa_frame.priv_data_len = pdata_len; + loopback->mpa_frame_size = pdata_len; + } else { + start_buff = &cm_node->mpa_frame_buf[0] + sizeof(struct ietf_mpa_v2); + cm_node->mpa_frame_size = pdata_len; + memcpy(start_buff, pdata, pdata_len); + } + return cm_core->api->reject(cm_core, cm_node); } @@ -2541,7 +3473,15 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct nes_device *nesdev; struct nes_cm_node *cm_node; struct nes_cm_info cm_info; - + int apbvt_set = 0; + struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr; + struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr; + struct iwpm_dev_data pm_reg_msg; + struct iwpm_sa_data pm_msg; + int iwpm_err = 0; + + if (cm_id->remote_addr.ss_family != AF_INET) + return -ENOSYS; ibqp = nes_get_qp(cm_id->device, conn_param->qpn); if (!ibqp) return -EINVAL; @@ -2551,78 +3491,99 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) nesvnic = to_nesvnic(nesqp->ibqp.device); if (!nesvnic) return -EINVAL; - nesdev = nesvnic->nesdev; + nesdev = nesvnic->nesdev; if (!nesdev) return -EINVAL; - atomic_inc(&cm_connects); + if (!laddr->sin_port || !raddr->sin_port) + return -EINVAL; - nesqp->ietf_frame = kzalloc(sizeof(struct ietf_mpa_frame) + - conn_param->private_data_len, GFP_KERNEL); - if (!nesqp->ietf_frame) - return -ENOMEM; + nes_debug(NES_DBG_CM, "QP%u, current IP = 0x%08X, Destination IP = " + "0x%08X:0x%04X, local = 0x%08X:0x%04X.\n", nesqp->hwqp.qp_id, + ntohl(nesvnic->local_ipaddr), ntohl(raddr->sin_addr.s_addr), + ntohs(raddr->sin_port), ntohl(laddr->sin_addr.s_addr), + ntohs(laddr->sin_port)); - /* set qp as having an active connection */ + atomic_inc(&cm_connects); nesqp->active_conn = 1; - nes_debug(NES_DBG_CM, "QP%u, Destination IP = 0x%08X:0x%04X, local = 0x%08X:0x%04X.\n", - nesqp->hwqp.qp_id, - ntohl(cm_id->remote_addr.sin_addr.s_addr), - ntohs(cm_id->remote_addr.sin_port), - ntohl(cm_id->local_addr.sin_addr.s_addr), - ntohs(cm_id->local_addr.sin_port)); - /* cache the cm_id in the qp */ nesqp->cm_id = cm_id; - cm_id->provider_data = nesqp; - - /* copy the private data */ - if (conn_param->private_data_len) { - memcpy(nesqp->ietf_frame->priv_data, conn_param->private_data, - conn_param->private_data_len); - } - nesqp->private_data_len = conn_param->private_data_len; - nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((u32)conn_param->ord); - nes_debug(NES_DBG_CM, "requested ord = 0x%08X.\n", (u32)conn_param->ord); - nes_debug(NES_DBG_CM, "mpa private data len =%u\n", conn_param->private_data_len); - - strcpy(&nesqp->ietf_frame->key[0], IEFT_MPA_KEY_REQ); - nesqp->ietf_frame->flags = IETF_MPA_FLAGS_CRC; - nesqp->ietf_frame->rev = IETF_MPA_VERSION; - nesqp->ietf_frame->priv_data_len = htons(conn_param->private_data_len); - if (cm_id->local_addr.sin_addr.s_addr != cm_id->remote_addr.sin_addr.s_addr) - nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port), - PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD); + nes_debug(NES_DBG_CM, "requested ord = 0x%08X.\n", (u32)conn_param->ord); + nes_debug(NES_DBG_CM, "mpa private data len =%u\n", + conn_param->private_data_len); /* set up the connection params for the node */ - cm_info.loc_addr = (cm_id->local_addr.sin_addr.s_addr); - cm_info.loc_port = (cm_id->local_addr.sin_port); - cm_info.rem_addr = (cm_id->remote_addr.sin_addr.s_addr); - cm_info.rem_port = (cm_id->remote_addr.sin_port); + cm_info.loc_addr = ntohl(laddr->sin_addr.s_addr); + cm_info.loc_port = ntohs(laddr->sin_port); + cm_info.rem_addr = ntohl(raddr->sin_addr.s_addr); + cm_info.rem_port = ntohs(raddr->sin_port); cm_info.cm_id = cm_id; cm_info.conn_type = NES_CM_IWARP_CONN_TYPE; + /* No port mapper available, go with the specified peer information */ + cm_info.mapped_loc_addr = cm_info.loc_addr; + cm_info.mapped_loc_port = cm_info.loc_port; + cm_info.mapped_rem_addr = cm_info.rem_addr; + cm_info.mapped_rem_port = cm_info.rem_port; + + nes_form_reg_msg(nesvnic, &pm_reg_msg); + iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_NES); + if (iwpm_err) { + nes_debug(NES_DBG_NLMSG, + "Port Mapper reg pid fail (err = %d).\n", iwpm_err); + } + if (iwpm_valid_pid() && !iwpm_err) { + nes_form_pm_msg(&cm_info, &pm_msg); + iwpm_err = iwpm_add_and_query_mapping(&pm_msg, RDMA_NL_NES); + if (iwpm_err) + nes_debug(NES_DBG_NLMSG, + "Port Mapper query fail (err = %d).\n", iwpm_err); + else + nes_record_pm_msg(&cm_info, &pm_msg); + } + + if (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr) { + nes_manage_apbvt(nesvnic, cm_info.mapped_loc_port, + PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD); + apbvt_set = 1; + } + + if (nes_create_mapinfo(&cm_info)) + return -ENOMEM; + cm_id->add_ref(cm_id); - nes_add_ref(&nesqp->ibqp); /* create a connect CM node connection */ - cm_node = g_cm_core->api->connect(g_cm_core, nesvnic, nesqp->ietf_frame, &cm_info); + cm_node = g_cm_core->api->connect(g_cm_core, nesvnic, + conn_param->private_data_len, (void *)conn_param->private_data, + &cm_info); if (!cm_node) { - if (cm_id->local_addr.sin_addr.s_addr != cm_id->remote_addr.sin_addr.s_addr) - nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port), - PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL); - nes_rem_ref(&nesqp->ibqp); - kfree(nesqp->ietf_frame); - nesqp->ietf_frame = NULL; + if (apbvt_set) + nes_manage_apbvt(nesvnic, cm_info.mapped_loc_port, + PCI_FUNC(nesdev->pcidev->devfn), + NES_MANAGE_APBVT_DEL); + + nes_debug(NES_DBG_NLMSG, "Delete mapped_loc_port = %04X\n", + cm_info.mapped_loc_port); + nes_remove_mapinfo(cm_info.loc_addr, cm_info.loc_port, + cm_info.mapped_loc_addr, cm_info.mapped_loc_port); cm_id->rem_ref(cm_id); return -ENOMEM; } - cm_node->apbvt_set = 1; + record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord); + if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO && + cm_node->ord_size == 0) + cm_node->ord_size = 1; + + cm_node->apbvt_set = apbvt_set; nesqp->cm_node = cm_node; + cm_node->nesqp = nesqp; + nes_add_ref(&nesqp->ibqp); return 0; } @@ -2636,50 +3597,59 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog) struct nes_vnic *nesvnic; struct nes_cm_listener *cm_node; struct nes_cm_info cm_info; - struct nes_adapter *adapter; int err; - + struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr; nes_debug(NES_DBG_CM, "cm_id = %p, local port = 0x%04X.\n", - cm_id, ntohs(cm_id->local_addr.sin_port)); + cm_id, ntohs(laddr->sin_port)); + if (cm_id->local_addr.ss_family != AF_INET) + return -ENOSYS; nesvnic = to_nesvnic(cm_id->device); if (!nesvnic) return -EINVAL; - adapter = nesvnic->nesdev->nesadapter; + nes_debug(NES_DBG_CM, "nesvnic=%p, netdev=%p, %s\n", nesvnic, nesvnic->netdev, nesvnic->netdev->name); nes_debug(NES_DBG_CM, "nesvnic->local_ipaddr=0x%08x, sin_addr.s_addr=0x%08x\n", - nesvnic->local_ipaddr, cm_id->local_addr.sin_addr.s_addr); + nesvnic->local_ipaddr, laddr->sin_addr.s_addr); /* setup listen params in our api call struct */ - cm_info.loc_addr = nesvnic->local_ipaddr; - cm_info.loc_port = cm_id->local_addr.sin_port; + cm_info.loc_addr = ntohl(nesvnic->local_ipaddr); + cm_info.loc_port = ntohs(laddr->sin_port); cm_info.backlog = backlog; cm_info.cm_id = cm_id; cm_info.conn_type = NES_CM_IWARP_CONN_TYPE; + /* No port mapper available, go with the specified info */ + cm_info.mapped_loc_addr = cm_info.loc_addr; + cm_info.mapped_loc_port = cm_info.loc_port; cm_node = g_cm_core->api->listen(g_cm_core, nesvnic, &cm_info); if (!cm_node) { - printk("%s[%u] Error returned from listen API call\n", - __func__, __LINE__); + printk(KERN_ERR "%s[%u] Error returned from listen API call\n", + __func__, __LINE__); return -ENOMEM; } cm_id->provider_data = cm_node; if (!cm_node->reused_node) { - err = nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port), - PCI_FUNC(nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD); + if (nes_create_mapinfo(&cm_info)) + return -ENOMEM; + + err = nes_manage_apbvt(nesvnic, cm_node->mapped_loc_port, + PCI_FUNC(nesvnic->nesdev->pcidev->devfn), + NES_MANAGE_APBVT_ADD); if (err) { - printk("nes_manage_apbvt call returned %d.\n", err); + printk(KERN_ERR "nes_manage_apbvt call returned %d.\n", + err); g_cm_core->api->stop_listener(g_cm_core, (void *)cm_node); return err; } - cm_listens_created++; + atomic_inc(&cm_listens_created); } cm_id->add_ref(cm_id); @@ -2711,15 +3681,16 @@ int nes_destroy_listen(struct iw_cm_id *cm_id) */ int nes_cm_recv(struct sk_buff *skb, struct net_device *netdevice) { + int rc = 0; + cm_packets_received++; - if ((g_cm_core) && (g_cm_core->api)) { - g_cm_core->api->recv_pkt(g_cm_core, netdev_priv(netdevice), skb); - } else { + if ((g_cm_core) && (g_cm_core->api)) + rc = g_cm_core->api->recv_pkt(g_cm_core, netdev_priv(netdevice), skb); + else nes_debug(NES_DBG_CM, "Unable to process packet for CM," - " cm is not setup properly.\n"); - } + " cm is not setup properly.\n"); - return 0; + return rc; } @@ -2732,11 +3703,10 @@ int nes_cm_start(void) nes_debug(NES_DBG_CM, "\n"); /* create the primary CM core, pass this handle to subsequent core inits */ g_cm_core = nes_cm_alloc_core(); - if (g_cm_core) { + if (g_cm_core) return 0; - } else { + else return -ENOMEM; - } } @@ -2757,7 +3727,6 @@ int nes_cm_stop(void) */ static void cm_event_connected(struct nes_cm_event *event) { - u64 u64temp; struct nes_qp *nesqp; struct nes_vnic *nesvnic; struct nes_device *nesdev; @@ -2766,10 +3735,12 @@ static void cm_event_connected(struct nes_cm_event *event) struct ib_qp_attr attr; struct iw_cm_id *cm_id; struct iw_cm_event cm_event; - struct nes_hw_qp_wqe *wqe; struct nes_v4_quad nes_quad; u32 crc_value; int ret; + struct sockaddr_in *laddr; + struct sockaddr_in *raddr; + struct sockaddr_in *cm_event_laddr; /* get all our handles */ cm_node = event->cm_node; @@ -2779,111 +3750,97 @@ static void cm_event_connected(struct nes_cm_event *event) nesvnic = to_nesvnic(nesqp->ibqp.device); nesdev = nesvnic->nesdev; nesadapter = nesdev->nesadapter; + laddr = (struct sockaddr_in *)&cm_id->local_addr; + raddr = (struct sockaddr_in *)&cm_id->remote_addr; + cm_event_laddr = (struct sockaddr_in *)&cm_event.local_addr; - if (nesqp->destroyed) { + if (nesqp->destroyed) return; - } atomic_inc(&cm_connecteds); nes_debug(NES_DBG_CM, "QP%u attempting to connect to 0x%08X:0x%04X on" - " local port 0x%04X. jiffies = %lu.\n", - nesqp->hwqp.qp_id, - ntohl(cm_id->remote_addr.sin_addr.s_addr), - ntohs(cm_id->remote_addr.sin_port), - ntohs(cm_id->local_addr.sin_port), - jiffies); + " local port 0x%04X. jiffies = %lu.\n", + nesqp->hwqp.qp_id, ntohl(raddr->sin_addr.s_addr), + ntohs(raddr->sin_port), ntohs(laddr->sin_port), jiffies); nes_cm_init_tsa_conn(nesqp, cm_node); /* set the QP tsa context */ - nesqp->nesqp_context->tcpPorts[0] = cpu_to_le16(ntohs(cm_id->local_addr.sin_port)); - nesqp->nesqp_context->tcpPorts[1] = cpu_to_le16(ntohs(cm_id->remote_addr.sin_port)); - nesqp->nesqp_context->ip0 = cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr)); + nesqp->nesqp_context->tcpPorts[0] = + cpu_to_le16(cm_node->mapped_loc_port); + nesqp->nesqp_context->tcpPorts[1] = + cpu_to_le16(cm_node->mapped_rem_port); + nesqp->nesqp_context->ip0 = cpu_to_le32(cm_node->mapped_rem_addr); nesqp->nesqp_context->misc2 |= cpu_to_le32( - (u32)PCI_FUNC(nesdev->pcidev->devfn) << NES_QPCONTEXT_MISC2_SRC_IP_SHIFT); + (u32)PCI_FUNC(nesdev->pcidev->devfn) << + NES_QPCONTEXT_MISC2_SRC_IP_SHIFT); nesqp->nesqp_context->arp_index_vlan |= cpu_to_le32( - nes_arp_table(nesdev, le32_to_cpu(nesqp->nesqp_context->ip0), + nes_arp_table(nesdev, + le32_to_cpu(nesqp->nesqp_context->ip0), NULL, NES_ARP_RESOLVE) << 16); nesqp->nesqp_context->ts_val_delta = cpu_to_le32( jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW)); nesqp->nesqp_context->ird_index = cpu_to_le32(nesqp->hwqp.qp_id); nesqp->nesqp_context->ird_ord_sizes |= - cpu_to_le32((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT); + cpu_to_le32((u32)1 << + NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT); + nesqp->nesqp_context->ird_ord_sizes |= + cpu_to_le32((u32)cm_node->ord_size); /* Adjust tail for not having a LSMM */ - nesqp->hwqp.sq_tail = 1; - -#if defined(NES_SEND_FIRST_WRITE) - if (cm_node->send_write0) { - nes_debug(NES_DBG_CM, "Sending first write.\n"); - wqe = &nesqp->hwqp.sq_vbase[0]; - u64temp = (unsigned long)nesqp; - u64temp |= NES_SW_CONTEXT_ALIGN>>1; - set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, - u64temp); - wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = cpu_to_le32(NES_IWARP_SQ_OP_RDMAW); - wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = 0; - wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = 0; - wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = 0; - wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = 0; - wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0; - - /* use the reserved spot on the WQ for the extra first WQE */ - nesqp->nesqp_context->ird_ord_sizes &= cpu_to_le32(~(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | - NES_QPCONTEXT_ORDIRD_WRPDU | NES_QPCONTEXT_ORDIRD_ALSMM)); - nesqp->skip_lsmm = 1; - nesqp->hwqp.sq_tail = 0; - nes_write32(nesdev->regs + NES_WQE_ALLOC, - (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id); - } -#endif + /*nesqp->hwqp.sq_tail = 1;*/ + + build_rdma0_msg(cm_node, &nesqp); + + nes_write32(nesdev->regs + NES_WQE_ALLOC, + (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id); memset(&nes_quad, 0, sizeof(nes_quad)); - nes_quad.DstIpAdrIndex = cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); - nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr; - nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port; - nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port; + nes_quad.DstIpAdrIndex = + cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); + nes_quad.SrcIpadr = htonl(cm_node->mapped_rem_addr); + nes_quad.TcpPorts[0] = htons(cm_node->mapped_rem_port); + nes_quad.TcpPorts[1] = htons(cm_node->mapped_loc_port); /* Produce hash key */ crc_value = get_crc_value(&nes_quad); nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff); nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, After CRC = 0x%08X\n", - nesqp->hte_index, nesqp->hte_index & nesadapter->hte_index_mask); + nesqp->hte_index, nesqp->hte_index & nesadapter->hte_index_mask); nesqp->hte_index &= nesadapter->hte_index_mask; nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index); nesqp->ietf_frame = &cm_node->mpa_frame; - nesqp->private_data_len = (u8) cm_node->mpa_frame_size; + nesqp->private_data_len = (u8)cm_node->mpa_frame_size; cm_node->cm_core->api->accelerated(cm_node->cm_core, cm_node); - /* modify QP state to rts */ - attr.qp_state = IB_QPS_RTS; - nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL); - /* notify OF layer we successfully created the requested connection */ cm_event.event = IW_CM_EVENT_CONNECT_REPLY; - cm_event.status = IW_CM_EVENT_STATUS_ACCEPTED; + cm_event.status = 0; cm_event.provider_data = cm_id->provider_data; - cm_event.local_addr.sin_family = AF_INET; - cm_event.local_addr.sin_port = cm_id->local_addr.sin_port; + cm_event_laddr->sin_family = AF_INET; + cm_event_laddr->sin_port = laddr->sin_port; cm_event.remote_addr = cm_id->remote_addr; - cm_event.private_data = (void *)event->cm_node->mpa_frame_buf; - cm_event.private_data_len = (u8) event->cm_node->mpa_frame_size; + cm_event.private_data = (void *)event->cm_node->mpa_frame_buf; + cm_event.private_data_len = (u8)event->cm_node->mpa_frame_size; + cm_event.ird = cm_node->ird_size; + cm_event.ord = cm_node->ord_size; - cm_event.local_addr.sin_addr.s_addr = event->cm_info.rem_addr; + cm_event_laddr->sin_addr.s_addr = htonl(event->cm_info.rem_addr); ret = cm_id->event_handler(cm_id, &cm_event); nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); if (ret) - printk("%s[%u] OFA CM event_handler returned, ret=%d\n", - __func__, __LINE__, ret); - nes_debug(NES_DBG_CM, "Exiting connect thread for QP%u. jiffies = %lu\n", - nesqp->hwqp.qp_id, jiffies ); + printk(KERN_ERR "%s[%u] OFA CM event_handler returned, " + "ret=%d\n", __func__, __LINE__, ret); + attr.qp_state = IB_QPS_RTS; + nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL); - nes_rem_ref(&nesqp->ibqp); + nes_debug(NES_DBG_CM, "Exiting connect thread for QP%u. jiffies = " + "%lu\n", nesqp->hwqp.qp_id, jiffies); return; } @@ -2904,40 +3861,46 @@ static void cm_event_connect_error(struct nes_cm_event *event) return; cm_id = event->cm_node->cm_id; - if (!cm_id) { + if (!cm_id) return; - } nes_debug(NES_DBG_CM, "cm_node=%p, cm_id=%p\n", event->cm_node, cm_id); nesqp = cm_id->provider_data; - if (!nesqp) { + if (!nesqp) return; - } /* notify OF layer about this connection error event */ /* cm_id->rem_ref(cm_id); */ nesqp->cm_id = NULL; cm_id->provider_data = NULL; cm_event.event = IW_CM_EVENT_CONNECT_REPLY; - cm_event.status = IW_CM_EVENT_STATUS_REJECTED; + cm_event.status = -ECONNRESET; cm_event.provider_data = cm_id->provider_data; cm_event.local_addr = cm_id->local_addr; cm_event.remote_addr = cm_id->remote_addr; cm_event.private_data = NULL; cm_event.private_data_len = 0; - nes_debug(NES_DBG_CM, "call CM_EVENT REJECTED, local_addr=%08x, remove_addr=%08x\n", - cm_event.local_addr.sin_addr.s_addr, cm_event.remote_addr.sin_addr.s_addr); +#ifdef CONFIG_INFINIBAND_NES_DEBUG + { + struct sockaddr_in *cm_event_laddr = (struct sockaddr_in *) + &cm_event.local_addr; + struct sockaddr_in *cm_event_raddr = (struct sockaddr_in *) + &cm_event.remote_addr; + nes_debug(NES_DBG_CM, "call CM_EVENT REJECTED, local_addr=%08x, remote_addr=%08x\n", + cm_event_laddr->sin_addr.s_addr, cm_event_raddr->sin_addr.s_addr); + } +#endif ret = cm_id->event_handler(cm_id, &cm_event); nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); if (ret) - printk("%s[%u] OFA CM event_handler returned, ret=%d\n", - __func__, __LINE__, ret); - nes_rem_ref(&nesqp->ibqp); - cm_id->rem_ref(cm_id); + printk(KERN_ERR "%s[%u] OFA CM event_handler returned, " + "ret=%d\n", __func__, __LINE__, ret); + cm_id->rem_ref(cm_id); + rem_ref_cm_node(event->cm_node->cm_core, event->cm_node); return; } @@ -2963,18 +3926,32 @@ static void cm_event_reset(struct nes_cm_event *event) nes_debug(NES_DBG_CM, "%p - cm_id = %p\n", event->cm_node, cm_id); nesqp = cm_id->provider_data; + if (!nesqp) + return; nesqp->cm_id = NULL; /* cm_id->provider_data = NULL; */ cm_event.event = IW_CM_EVENT_DISCONNECT; - cm_event.status = IW_CM_EVENT_STATUS_RESET; + cm_event.status = -ECONNRESET; cm_event.provider_data = cm_id->provider_data; cm_event.local_addr = cm_id->local_addr; cm_event.remote_addr = cm_id->remote_addr; cm_event.private_data = NULL; cm_event.private_data_len = 0; + cm_id->add_ref(cm_id); ret = cm_id->event_handler(cm_id, &cm_event); + atomic_inc(&cm_closes); + cm_event.event = IW_CM_EVENT_CLOSE; + cm_event.status = 0; + cm_event.provider_data = cm_id->provider_data; + cm_event.local_addr = cm_id->local_addr; + cm_event.remote_addr = cm_id->remote_addr; + cm_event.private_data = NULL; + cm_event.private_data_len = 0; + nes_debug(NES_DBG_CM, "NODE %p Generating CLOSE\n", event->cm_node); + ret = cm_id->event_handler(cm_id, &cm_event); + nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); @@ -2990,10 +3967,14 @@ static void cm_event_reset(struct nes_cm_event *event) */ static void cm_event_mpa_req(struct nes_cm_event *event) { - struct iw_cm_id *cm_id; + struct iw_cm_id *cm_id; struct iw_cm_event cm_event; int ret; struct nes_cm_node *cm_node; + struct sockaddr_in *cm_event_laddr = (struct sockaddr_in *) + &cm_event.local_addr; + struct sockaddr_in *cm_event_raddr = (struct sockaddr_in *) + &cm_event.remote_addr; cm_node = event->cm_node; if (!cm_node) @@ -3002,27 +3983,81 @@ static void cm_event_mpa_req(struct nes_cm_event *event) atomic_inc(&cm_connect_reqs); nes_debug(NES_DBG_CM, "cm_node = %p - cm_id = %p, jiffies = %lu\n", - cm_node, cm_id, jiffies); + cm_node, cm_id, jiffies); cm_event.event = IW_CM_EVENT_CONNECT_REQUEST; - cm_event.status = IW_CM_EVENT_STATUS_OK; + cm_event.status = 0; cm_event.provider_data = (void *)cm_node; - cm_event.local_addr.sin_family = AF_INET; - cm_event.local_addr.sin_port = htons(event->cm_info.loc_port); - cm_event.local_addr.sin_addr.s_addr = htonl(event->cm_info.loc_addr); + cm_event_laddr->sin_family = AF_INET; + cm_event_laddr->sin_port = htons(event->cm_info.loc_port); + cm_event_laddr->sin_addr.s_addr = htonl(event->cm_info.loc_addr); + + cm_event_raddr->sin_family = AF_INET; + cm_event_raddr->sin_port = htons(event->cm_info.rem_port); + cm_event_raddr->sin_addr.s_addr = htonl(event->cm_info.rem_addr); + cm_event.private_data = cm_node->mpa_frame_buf; + cm_event.private_data_len = (u8)cm_node->mpa_frame_size; + if (cm_node->mpa_frame_rev == IETF_MPA_V1) { + cm_event.ird = NES_MAX_IRD; + cm_event.ord = NES_MAX_ORD; + } else { + cm_event.ird = cm_node->ird_size; + cm_event.ord = cm_node->ord_size; + } + + ret = cm_id->event_handler(cm_id, &cm_event); + if (ret) + printk(KERN_ERR "%s[%u] OFA CM event_handler returned, ret=%d\n", + __func__, __LINE__, ret); + return; +} + + +static void cm_event_mpa_reject(struct nes_cm_event *event) +{ + struct iw_cm_id *cm_id; + struct iw_cm_event cm_event; + struct nes_cm_node *cm_node; + int ret; + struct sockaddr_in *cm_event_laddr = (struct sockaddr_in *) + &cm_event.local_addr; + struct sockaddr_in *cm_event_raddr = (struct sockaddr_in *) + &cm_event.remote_addr; + + cm_node = event->cm_node; + if (!cm_node) + return; + cm_id = cm_node->cm_id; + + atomic_inc(&cm_connect_reqs); + nes_debug(NES_DBG_CM, "cm_node = %p - cm_id = %p, jiffies = %lu\n", + cm_node, cm_id, jiffies); + + cm_event.event = IW_CM_EVENT_CONNECT_REPLY; + cm_event.status = -ECONNREFUSED; + cm_event.provider_data = cm_id->provider_data; + + cm_event_laddr->sin_family = AF_INET; + cm_event_laddr->sin_port = htons(event->cm_info.loc_port); + cm_event_laddr->sin_addr.s_addr = htonl(event->cm_info.loc_addr); + + cm_event_raddr->sin_family = AF_INET; + cm_event_raddr->sin_port = htons(event->cm_info.rem_port); + cm_event_raddr->sin_addr.s_addr = htonl(event->cm_info.rem_addr); - cm_event.remote_addr.sin_family = AF_INET; - cm_event.remote_addr.sin_port = htons(event->cm_info.rem_port); - cm_event.remote_addr.sin_addr.s_addr = htonl(event->cm_info.rem_addr); + cm_event.private_data = cm_node->mpa_frame_buf; + cm_event.private_data_len = (u8)cm_node->mpa_frame_size; - cm_event.private_data = cm_node->mpa_frame_buf; - cm_event.private_data_len = (u8) cm_node->mpa_frame_size; + nes_debug(NES_DBG_CM, "call CM_EVENT_MPA_REJECTED, local_addr=%08x, " + "remove_addr=%08x\n", + cm_event_laddr->sin_addr.s_addr, + cm_event_raddr->sin_addr.s_addr); ret = cm_id->event_handler(cm_id, &cm_event); if (ret) - printk("%s[%u] OFA CM event_handler returned, ret=%d\n", - __func__, __LINE__, ret); + printk(KERN_ERR "%s[%u] OFA CM event_handler returned, ret=%d\n", + __func__, __LINE__, ret); return; } @@ -3040,7 +4075,8 @@ static int nes_cm_post_event(struct nes_cm_event *event) add_ref_cm_node(event->cm_node); event->cm_info.cm_id->add_ref(event->cm_info.cm_id); INIT_WORK(&event->event_work, nes_cm_event_handler); - nes_debug(NES_DBG_CM, "queue_work, event=%p\n", event); + nes_debug(NES_DBG_CM, "cm_node=%p queue_work, event=%p\n", + event->cm_node, event); queue_work(event->cm_node->cm_core->event_wq, &event->event_work); @@ -3056,46 +4092,56 @@ static int nes_cm_post_event(struct nes_cm_event *event) */ static void nes_cm_event_handler(struct work_struct *work) { - struct nes_cm_event *event = container_of(work, struct nes_cm_event, event_work); + struct nes_cm_event *event = container_of(work, struct nes_cm_event, + event_work); struct nes_cm_core *cm_core; - if ((!event) || (!event->cm_node) || (!event->cm_node->cm_core)) { + if ((!event) || (!event->cm_node) || (!event->cm_node->cm_core)) return; - } + cm_core = event->cm_node->cm_core; nes_debug(NES_DBG_CM, "event=%p, event->type=%u, events posted=%u\n", - event, event->type, atomic_read(&cm_core->events_posted)); + event, event->type, atomic_read(&cm_core->events_posted)); switch (event->type) { - case NES_CM_EVENT_MPA_REQ: - cm_event_mpa_req(event); - nes_debug(NES_DBG_CM, "CM Event: MPA REQUEST\n"); + case NES_CM_EVENT_MPA_REQ: + cm_event_mpa_req(event); + nes_debug(NES_DBG_CM, "cm_node=%p CM Event: MPA REQUEST\n", + event->cm_node); + break; + case NES_CM_EVENT_RESET: + nes_debug(NES_DBG_CM, "cm_node = %p CM Event: RESET\n", + event->cm_node); + cm_event_reset(event); + break; + case NES_CM_EVENT_CONNECTED: + if ((!event->cm_node->cm_id) || + (event->cm_node->state != NES_CM_STATE_TSA)) break; - case NES_CM_EVENT_RESET: - nes_debug(NES_DBG_CM, "CM Event: RESET\n"); - cm_event_reset(event); - break; - case NES_CM_EVENT_CONNECTED: - if ((!event->cm_node->cm_id) || - (event->cm_node->state != NES_CM_STATE_TSA)) { - break; - } - cm_event_connected(event); - nes_debug(NES_DBG_CM, "CM Event: CONNECTED\n"); + cm_event_connected(event); + nes_debug(NES_DBG_CM, "CM Event: CONNECTED\n"); + break; + case NES_CM_EVENT_MPA_REJECT: + if ((!event->cm_node->cm_id) || + (event->cm_node->state == NES_CM_STATE_TSA)) break; - case NES_CM_EVENT_ABORTED: - if ((!event->cm_node->cm_id) || (event->cm_node->state == NES_CM_STATE_TSA)) { - break; - } - cm_event_connect_error(event); - nes_debug(NES_DBG_CM, "CM Event: ABORTED\n"); - break; - case NES_CM_EVENT_DROPPED_PKT: - nes_debug(NES_DBG_CM, "CM Event: DROPPED PKT\n"); - break; - default: - nes_debug(NES_DBG_CM, "CM Event: UNKNOWN EVENT TYPE\n"); + cm_event_mpa_reject(event); + nes_debug(NES_DBG_CM, "CM Event: REJECT\n"); + break; + + case NES_CM_EVENT_ABORTED: + if ((!event->cm_node->cm_id) || + (event->cm_node->state == NES_CM_STATE_TSA)) break; + cm_event_connect_error(event); + nes_debug(NES_DBG_CM, "CM Event: ABORTED\n"); + break; + case NES_CM_EVENT_DROPPED_PKT: + nes_debug(NES_DBG_CM, "CM Event: DROPPED PKT\n"); + break; + default: + nes_debug(NES_DBG_CM, "CM Event: UNKNOWN EVENT TYPE\n"); + break; } atomic_dec(&cm_core->events_posted); diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h index 7717cb2ab50..f522cf63978 100644 --- a/drivers/infiniband/hw/nes/nes_cm.h +++ b/drivers/infiniband/hw/nes/nes_cm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved. + * Copyright (c) 2006 - 2014 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -39,11 +39,27 @@ #define NES_MANAGE_APBVT_DEL 0 #define NES_MANAGE_APBVT_ADD 1 +#define NES_MPA_REQUEST_ACCEPT 1 +#define NES_MPA_REQUEST_REJECT 2 + /* IETF MPA -- defines, enums, structs */ #define IEFT_MPA_KEY_REQ "MPA ID Req Frame" #define IEFT_MPA_KEY_REP "MPA ID Rep Frame" #define IETF_MPA_KEY_SIZE 16 #define IETF_MPA_VERSION 1 +#define IETF_MAX_PRIV_DATA_LEN 512 +#define IETF_MPA_FRAME_SIZE 20 +#define IETF_RTR_MSG_SIZE 4 +#define IETF_MPA_V2_FLAG 0x10 + +/* IETF RTR MSG Fields */ +#define IETF_PEER_TO_PEER 0x8000 +#define IETF_FLPDU_ZERO_LEN 0x4000 +#define IETF_RDMA0_WRITE 0x8000 +#define IETF_RDMA0_READ 0x4000 +#define IETF_NO_IRD_ORD 0x3FFF +#define NES_MAX_IRD 0x40 +#define NES_MAX_ORD 0x7F enum ietf_mpa_flags { IETF_MPA_FLAGS_MARKERS = 0x80, /* receive Markers */ @@ -51,7 +67,7 @@ enum ietf_mpa_flags { IETF_MPA_FLAGS_REJECT = 0x20, /* Reject */ }; -struct ietf_mpa_frame { +struct ietf_mpa_v1 { u8 key[IETF_MPA_KEY_SIZE]; u8 flags; u8 rev; @@ -61,6 +77,20 @@ struct ietf_mpa_frame { #define ietf_mpa_req_resp_frame ietf_mpa_frame +struct ietf_rtr_msg { + __be16 ctrl_ird; + __be16 ctrl_ord; +}; + +struct ietf_mpa_v2 { + u8 key[IETF_MPA_KEY_SIZE]; + u8 flags; + u8 rev; + __be16 priv_data_len; + struct ietf_rtr_msg rtr_msg; + u8 priv_data[0]; +}; + struct nes_v4_quad { u32 rsvd0; __le32 DstIpAdrIndex; /* Only most significant 5 bits are valid */ @@ -76,6 +106,10 @@ enum nes_timer_type { NES_TIMER_TYPE_CLOSE, }; +#define NES_PASSIVE_STATE_INDICATED 0 +#define NES_DO_NOT_SEND_RESET_EVENT 1 +#define NES_SEND_RESET_EVENT 2 + #define MAX_NES_IFS 4 #define SET_ACK 1 @@ -83,6 +117,8 @@ enum nes_timer_type { #define SET_FIN 4 #define SET_RST 8 +#define TCP_OPTIONS_PADDING 3 + struct option_base { u8 optionnum; u8 length; @@ -140,6 +176,7 @@ struct nes_timer_entry { #endif #define NES_SHORT_TIME (10) #define NES_LONG_TIME (2000*HZ/1000) +#define NES_MAX_TIMEOUT ((unsigned long) (12*HZ)) #define NES_CM_HASHTABLE_SIZE 1024 #define NES_CM_TCP_TIMER_INTERVAL 3000 @@ -159,6 +196,7 @@ struct nes_timer_entry { #define NES_CM_DEF_SEQ2 0x18ed5740 #define NES_CM_DEF_LOCAL_ID2 0xb807 +#define MAX_CM_BUFFER (IETF_MPA_FRAME_SIZE + IETF_RTR_MSG_SIZE + IETF_MAX_PRIV_DATA_LEN) typedef u32 nes_addr_t; @@ -177,6 +215,8 @@ enum nes_cm_node_state { NES_CM_STATE_ESTABLISHED, NES_CM_STATE_ACCEPTING, NES_CM_STATE_MPAREQ_SENT, + NES_CM_STATE_MPAREQ_RCVD, + NES_CM_STATE_MPAREJ_RCVD, NES_CM_STATE_TSA, NES_CM_STATE_FIN_WAIT1, NES_CM_STATE_FIN_WAIT2, @@ -184,9 +224,35 @@ enum nes_cm_node_state { NES_CM_STATE_TIME_WAIT, NES_CM_STATE_LAST_ACK, NES_CM_STATE_CLOSING, + NES_CM_STATE_LISTENER_DESTROYED, NES_CM_STATE_CLOSED }; +enum mpa_frame_version { + IETF_MPA_V1 = 1, + IETF_MPA_V2 = 2 +}; + +enum mpa_frame_key { + MPA_KEY_REQUEST, + MPA_KEY_REPLY +}; + +enum send_rdma0 { + SEND_RDMA_READ_ZERO = 1, + SEND_RDMA_WRITE_ZERO = 2 +}; + +enum nes_tcpip_pkt_type { + NES_PKT_TYPE_UNKNOWN, + NES_PKT_TYPE_SYN, + NES_PKT_TYPE_SYNACK, + NES_PKT_TYPE_ACK, + NES_PKT_TYPE_FIN, + NES_PKT_TYPE_RST +}; + + /* type of nes connection */ enum nes_cm_conn_type { NES_CM_IWARP_CONN_TYPE, @@ -218,17 +284,17 @@ struct nes_cm_tcp_context { enum nes_cm_listener_state { - NES_CM_LISTENER_PASSIVE_STATE=1, - NES_CM_LISTENER_ACTIVE_STATE=2, - NES_CM_LISTENER_EITHER_STATE=3 + NES_CM_LISTENER_PASSIVE_STATE = 1, + NES_CM_LISTENER_ACTIVE_STATE = 2, + NES_CM_LISTENER_EITHER_STATE = 3 }; struct nes_cm_listener { struct list_head list; struct nes_cm_core *cm_core; u8 loc_mac[ETH_ALEN]; - nes_addr_t loc_addr; - u16 loc_port; + nes_addr_t loc_addr, mapped_loc_addr; + u16 loc_port, mapped_loc_port; struct iw_cm_id *cm_id; enum nes_cm_conn_type conn_type; atomic_t ref_count; @@ -241,10 +307,10 @@ struct nes_cm_listener { /* per connection node and node state information */ struct nes_cm_node { - u32 hashkey; - nes_addr_t loc_addr, rem_addr; + nes_addr_t mapped_loc_addr, mapped_rem_addr; u16 loc_port, rem_port; + u16 mapped_loc_port, mapped_rem_port; u8 loc_mac[ETH_ALEN]; u8 rem_mac[ETH_ALEN]; @@ -257,16 +323,22 @@ struct nes_cm_node { struct net_device *netdev; struct nes_cm_node *loopbackpartner; - struct list_head retrans_list; + + struct nes_timer_entry *send_entry; + struct nes_timer_entry *recv_entry; spinlock_t retrans_list_lock; - struct list_head recv_list; - spinlock_t recv_list_lock; + enum send_rdma0 send_rdma0_op; - int send_write0; union { - struct ietf_mpa_frame mpa_frame; - u8 mpa_frame_buf[NES_CM_DEFAULT_MTU]; + struct ietf_mpa_v1 mpa_frame; + struct ietf_mpa_v2 mpa_v2_frame; + u8 mpa_frame_buf[MAX_CM_BUFFER]; }; + enum mpa_frame_version mpa_frame_rev; + u16 ird_size; + u16 ord_size; + u16 mpav2_ird_ord; + u16 mpa_frame_size; struct iw_cm_id *cm_id; struct list_head list; @@ -276,6 +348,10 @@ struct nes_cm_node { struct nes_vnic *nesvnic; int apbvt_set; int accept_pend; + struct list_head timer_entry; + struct list_head reset_entry; + struct nes_qp *nesqp; + atomic_t passive_state; }; /* structure for client or CM to fill when making CM api calls. */ @@ -290,6 +366,10 @@ struct nes_cm_info { u16 rem_port; nes_addr_t loc_addr; nes_addr_t rem_addr; + u16 mapped_loc_port; + u16 mapped_rem_port; + nes_addr_t mapped_loc_addr; + nes_addr_t mapped_rem_addr; enum nes_cm_conn_type conn_type; int backlog; @@ -302,6 +382,7 @@ enum nes_cm_event_type { NES_CM_EVENT_MPA_REQ, NES_CM_EVENT_MPA_CONNECT, NES_CM_EVENT_MPA_ACCEPT, + NES_CM_EVENT_MPA_REJECT, NES_CM_EVENT_MPA_ESTABLISHED, NES_CM_EVENT_CONNECTED, NES_CM_EVENT_CLOSED, @@ -333,7 +414,6 @@ struct nes_cm_core { u32 mtu; u32 free_tx_pkt_max; u32 rx_pkt_posted; - struct sk_buff_head tx_free_list; atomic_t ht_node_cnt; struct list_head connected_nodes; /* struct list_head hashtable[NES_CM_HASHTABLE_SIZE]; */ @@ -366,13 +446,11 @@ struct nes_cm_ops { struct nes_cm_info *); int (*stop_listener)(struct nes_cm_core *, struct nes_cm_listener *); struct nes_cm_node * (*connect)(struct nes_cm_core *, - struct nes_vnic *, struct ietf_mpa_frame *, + struct nes_vnic *, u16, void *, struct nes_cm_info *); int (*close)(struct nes_cm_core *, struct nes_cm_node *); - int (*accept)(struct nes_cm_core *, struct ietf_mpa_frame *, - struct nes_cm_node *); - int (*reject)(struct nes_cm_core *, struct ietf_mpa_frame *, - struct nes_cm_node *); + int (*accept)(struct nes_cm_core *, struct nes_cm_node *); + int (*reject)(struct nes_cm_core *, struct nes_cm_node *); int (*recv_pkt)(struct nes_cm_core *, struct nes_vnic *, struct sk_buff *); int (*destroy_cm_core)(struct nes_cm_core *); @@ -383,8 +461,6 @@ struct nes_cm_ops { int schedule_nes_timer(struct nes_cm_node *, struct sk_buff *, enum nes_timer_type, int, int); -int nes_cm_disconn(struct nes_qp *); - int nes_accept(struct iw_cm_id *, struct iw_cm_conn_param *); int nes_reject(struct iw_cm_id *, const void *, u8); int nes_connect(struct iw_cm_id *, struct iw_cm_conn_param *); @@ -394,5 +470,7 @@ int nes_destroy_listen(struct iw_cm_id *); int nes_cm_recv(struct sk_buff *, struct net_device *); int nes_cm_start(void); int nes_cm_stop(void); +int nes_add_ref_cm_node(struct nes_cm_node *cm_node); +int nes_rem_ref_cm_node(struct nes_cm_node *cm_node); #endif /* NES_CM_H */ diff --git a/drivers/infiniband/hw/nes/nes_context.h b/drivers/infiniband/hw/nes/nes_context.h index da9daba8e66..a69eef16d72 100644 --- a/drivers/infiniband/hw/nes/nes_context.h +++ b/drivers/infiniband/hw/nes/nes_context.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 85f26d19a32..90200245c5e 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -39,6 +39,7 @@ #include <linux/tcp.h> #include <linux/if_vlan.h> #include <linux/inet_lro.h> +#include <linux/slab.h> #include "nes.h" @@ -46,6 +47,10 @@ static unsigned int nes_lro_max_aggr = NES_LRO_MAX_AGGR; module_param(nes_lro_max_aggr, uint, 0444); MODULE_PARM_DESC(nes_lro_max_aggr, "NIC LRO max packet aggregation"); +static int wide_ppm_offset; +module_param(wide_ppm_offset, int, 0644); +MODULE_PARM_DESC(wide_ppm_offset, "Increase CX4 interface clock ppm offset, 0=100ppm (default), 1=300ppm"); + static u32 crit_err_count; u32 int_mod_timer_init; u32 int_mod_cq_depth_256; @@ -55,24 +60,26 @@ u32 int_mod_cq_depth_24; u32 int_mod_cq_depth_16; u32 int_mod_cq_depth_4; u32 int_mod_cq_depth_1; - +static const u8 nes_max_critical_error_count = 100; #include "nes_cm.h" static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq); static void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_count); static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count, - u8 OneG_Mode); + struct nes_adapter *nesadapter, u8 OneG_Mode); static void nes_nic_napi_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq); static void nes_process_aeq(struct nes_device *nesdev, struct nes_hw_aeq *aeq); static void nes_process_ceq(struct nes_device *nesdev, struct nes_hw_ceq *ceq); static void nes_process_iwarp_aeqe(struct nes_device *nesdev, struct nes_hw_aeqe *aeqe); +static void process_critical_error(struct nes_device *nesdev); static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number); static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode); +static void nes_terminate_start_timer(struct nes_qp *nesqp); #ifdef CONFIG_INFINIBAND_NES_DEBUG static unsigned char *nes_iwarp_state_str[] = { - "Non-Existant", + "Non-Existent", "Idle", "RTS", "Closing", @@ -83,7 +90,7 @@ static unsigned char *nes_iwarp_state_str[] = { }; static unsigned char *nes_tcp_state_str[] = { - "Non-Existant", + "Non-Existent", "Closed", "Listen", "SYN Sent", @@ -102,6 +109,14 @@ static unsigned char *nes_tcp_state_str[] = { }; #endif +static inline void print_ip(struct nes_cm_node *cm_node) +{ + unsigned char *rem_addr; + if (cm_node) { + rem_addr = (unsigned char *)&cm_node->rem_addr; + printk(KERN_ERR PFX "Remote IP addr: %pI4\n", rem_addr); + } +} /** * nes_nic_init_timer_defaults @@ -222,11 +237,10 @@ static void nes_nic_tune_timer(struct nes_device *nesdev) } /* boundary checking */ - if (shared_timer->timer_in_use > NES_NIC_FAST_TIMER_HIGH) - shared_timer->timer_in_use = NES_NIC_FAST_TIMER_HIGH; - else if (shared_timer->timer_in_use < NES_NIC_FAST_TIMER_LOW) { - shared_timer->timer_in_use = NES_NIC_FAST_TIMER_LOW; - } + if (shared_timer->timer_in_use > shared_timer->threshold_high) + shared_timer->timer_in_use = shared_timer->threshold_high; + else if (shared_timer->timer_in_use < shared_timer->threshold_low) + shared_timer->timer_in_use = shared_timer->threshold_low; nesdev->currcq_count = 0; @@ -254,6 +268,7 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { u32 adapter_size; u32 arp_table_size; u16 vendor_id; + u16 device_id; u8 OneG_Mode; u8 func_index; @@ -292,9 +307,6 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { if ((port_count = nes_reset_adapter_ne020(nesdev, &OneG_Mode)) == 0) return NULL; - if (nes_init_serdes(nesdev, hw_rev, port_count, OneG_Mode)) - return NULL; - nes_init_csr_ne020(nesdev, hw_rev, port_count); max_qp = nes_read_indexed(nesdev, NES_IDX_QP_CTX_SIZE); nes_debug(NES_DBG_INIT, "QP_CTX_SIZE=%u\n", max_qp); @@ -353,6 +365,29 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { nes_debug(NES_DBG_INIT, "Allocating new nesadapter @ %p, size = %u (actual size = %u).\n", nesadapter, (u32)sizeof(struct nes_adapter), adapter_size); + if (nes_read_eeprom_values(nesdev, nesadapter)) { + printk(KERN_ERR PFX "Unable to read EEPROM data.\n"); + kfree(nesadapter); + return NULL; + } + + nesadapter->vendor_id = (((u32) nesadapter->mac_addr_high) << 8) | + (nesadapter->mac_addr_low >> 24); + + pci_bus_read_config_word(nesdev->pcidev->bus, nesdev->pcidev->devfn, + PCI_DEVICE_ID, &device_id); + nesadapter->vendor_part_id = device_id; + + if (nes_init_serdes(nesdev, hw_rev, port_count, nesadapter, + OneG_Mode)) { + kfree(nesadapter); + return NULL; + } + nes_init_csr_ne020(nesdev, hw_rev, port_count); + + memset(nesadapter->pft_mcast_map, 255, + sizeof nesadapter->pft_mcast_map); + /* populate the new nesadapter */ nesadapter->devfn = nesdev->pcidev->devfn; nesadapter->bus_number = nesdev->pcidev->bus->number; @@ -397,8 +432,9 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { nesadapter->base_pd = 1; - nesadapter->device_cap_flags = - IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW; + nesadapter->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | + IB_DEVICE_MEM_WINDOW | + IB_DEVICE_MEM_MGT_EXTENSIONS; nesadapter->allocated_qps = (unsigned long *)&(((unsigned char *)nesadapter) [(sizeof(struct nes_adapter)+(sizeof(unsigned long)-1))&(~(sizeof(unsigned long)-1))]); @@ -409,11 +445,12 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { nesadapter->qp_table = (struct nes_qp **)(&nesadapter->allocated_arps[BITS_TO_LONGS(arp_table_size)]); - /* mark the usual suspect QPs and CQs as in use */ + /* mark the usual suspect QPs, MR and CQs as in use */ for (u32temp = 0; u32temp < NES_FIRST_QPN; u32temp++) { set_bit(u32temp, nesadapter->allocated_qps); set_bit(u32temp, nesadapter->allocated_cqs); } + set_bit(0, nesadapter->allocated_mrs); for (u32temp = 0; u32temp < 20; u32temp++) set_bit(u32temp, nesadapter->allocated_pds); @@ -454,7 +491,7 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { nesadapter->max_irrq_wr = (u32temp >> 16) & 3; nesadapter->max_sge = 4; - nesadapter->max_cqe = 32767; + nesadapter->max_cqe = 32766; if (nes_read_eeprom_values(nesdev, nesadapter)) { printk(KERN_ERR PFX "Unable to read EEPROM data.\n"); @@ -468,20 +505,25 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { /* setup port configuration */ if (nesadapter->port_count == 1) { - u32temp = 0x00000000; + nesadapter->log_port = 0x00000000; if (nes_drv_opt & NES_DRV_OPT_DUAL_LOGICAL_PORT) nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000002); else nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000003); } else { - if (nesadapter->port_count == 2) - u32temp = 0x00000044; - else - u32temp = 0x000000e4; + if (nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G) { + nesadapter->log_port = 0x000000D8; + } else { + if (nesadapter->port_count == 2) + nesadapter->log_port = 0x00000044; + else + nesadapter->log_port = 0x000000e4; + } nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000003); } - nes_write_indexed(nesdev, NES_IDX_NIC_LOGPORT_TO_PHYPORT, u32temp); + nes_write_indexed(nesdev, NES_IDX_NIC_LOGPORT_TO_PHYPORT, + nesadapter->log_port); nes_debug(NES_DBG_INIT, "Probe time, LOG2PHY=%u\n", nes_read_indexed(nesdev, NES_IDX_NIC_LOGPORT_TO_PHYPORT)); @@ -521,7 +563,7 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { } if (int_cnt > 1) { spin_lock_irqsave(&nesadapter->phy_lock, flags); - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F088); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F0C8); mh_detected++; reset_value = nes_read32(nesdev->regs+NES_SOFTWARE_RESET); reset_value |= 0x0000003d; @@ -546,7 +588,7 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { if (++ext_cnt > int_cnt) { spin_lock_irqsave(&nesadapter->phy_lock, flags); nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, - 0x0000F0C8); + 0x0000F088); mh_detected++; reset_value = nes_read32(nesdev->regs+NES_SOFTWARE_RESET); reset_value |= 0x0000003d; @@ -637,7 +679,7 @@ static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_ i = 0; while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) & 0x00000040) == 0) && i++ < 10000) mdelay(1); - if (i >= 10000) { + if (i > 10000) { nes_debug(NES_DBG_INIT, "Did not see full soft reset done.\n"); return 0; } @@ -645,7 +687,7 @@ static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_ i = 0; while ((nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS) != 0x80) && i++ < 10000) mdelay(1); - if (i >= 10000) { + if (i > 10000) { printk(KERN_ERR PFX "Internal CPU not ready, status = %02X\n", nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS)); return 0; @@ -671,7 +713,7 @@ static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_ i = 0; while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) & 0x00000040) == 0) && i++ < 10000) mdelay(1); - if (i >= 10000) { + if (i > 10000) { nes_debug(NES_DBG_INIT, "Did not see port soft reset done.\n"); return 0; } @@ -681,7 +723,7 @@ static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_ while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0) & 0x0000000f)) != 0x0000000f) && i++ < 5000) mdelay(1); - if (i >= 5000) { + if (i > 5000) { nes_debug(NES_DBG_INIT, "Serdes 0 not ready, status=%x\n", u32temp); return 0; } @@ -692,7 +734,7 @@ static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_ while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS1) & 0x0000000f)) != 0x0000000f) && i++ < 5000) mdelay(1); - if (i >= 5000) { + if (i > 5000) { nes_debug(NES_DBG_INIT, "Serdes 1 not ready, status=%x\n", u32temp); return 0; } @@ -706,23 +748,70 @@ static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_ * nes_init_serdes */ static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count, - u8 OneG_Mode) + struct nes_adapter *nesadapter, u8 OneG_Mode) { int i; u32 u32temp; + u32 sds; if (hw_rev != NE020_REV) { /* init serdes 0 */ + switch (nesadapter->phy_type[0]) { + case NES_PHY_TYPE_CX4: + if (wide_ppm_offset) + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000FFFAA); + else + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF); + break; + case NES_PHY_TYPE_KR: + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP0, 0x00000000); + break; + case NES_PHY_TYPE_PUMA_1G: + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF); + sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0); + sds |= 0x00000100; + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, sds); + break; + default: + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF); + break; + } - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF); if (!OneG_Mode) nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE0, 0x11110000); - if (port_count > 1) { - /* init serdes 1 */ + + if (port_count < 2) + return 0; + + /* init serdes 1 */ + if (!(OneG_Mode && (nesadapter->phy_type[1] != NES_PHY_TYPE_PUMA_1G))) nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000000FF); - if (!OneG_Mode) - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE1, 0x11110000); - } + + switch (nesadapter->phy_type[1]) { + case NES_PHY_TYPE_ARGUS: + case NES_PHY_TYPE_SFP_D: + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP0, 0x00000000); + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP1, 0x00000000); + break; + case NES_PHY_TYPE_CX4: + if (wide_ppm_offset) + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000FFFAA); + break; + case NES_PHY_TYPE_KR: + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP1, 0x00000000); + break; + case NES_PHY_TYPE_PUMA_1G: + sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1); + sds |= 0x000000100; + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, sds); + } + if (!OneG_Mode) { + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE1, 0x11110000); + sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1); + sds &= 0xFFFFFFBF; + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, sds); + } } else { /* init serdes 0 */ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, 0x00000008); @@ -730,7 +819,7 @@ static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count, while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0) & 0x0000000f)) != 0x0000000f) && i++ < 5000) mdelay(1); - if (i >= 5000) { + if (i > 5000) { nes_debug(NES_DBG_PHY, "Init: serdes 0 not ready, status=%x\n", u32temp); return 1; } @@ -753,7 +842,7 @@ static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count, while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS1) & 0x0000000f)) != 0x0000000f) && (i++ < 5000)) mdelay(1); - if (i >= 5000) { + if (i > 5000) { printk("%s: Init: serdes 1 not ready, status=%x\n", __func__, u32temp); /* return 1; */ } @@ -826,7 +915,8 @@ static void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_cou nes_write_indexed(nesdev, 0x00005000, 0x00018000); /* nes_write_indexed(nesdev, 0x00005000, 0x00010000); */ - nes_write_indexed(nesdev, 0x00005004, 0x00020001); + nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG1, (wqm_quanta << 1) | + 0x00000001); nes_write_indexed(nesdev, 0x00005008, 0x1F1F1F1F); nes_write_indexed(nesdev, 0x00005010, 0x1F1F1F1F); nes_write_indexed(nesdev, 0x00005018, 0x1F1F1F1F); @@ -849,6 +939,12 @@ static void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_cou u32temp &= 0x7fffffff; u32temp |= 0x7fff0010; nes_write_indexed(nesdev, 0x000021f8, u32temp); + if (port_count > 1) { + u32temp = nes_read_indexed(nesdev, 0x000023f8); + u32temp &= 0x7fffffff; + u32temp |= 0x7fff0010; + nes_write_indexed(nesdev, 0x000023f8, u32temp); + } } } @@ -1206,209 +1302,252 @@ int nes_destroy_cqp(struct nes_device *nesdev) /** - * nes_init_phy + * nes_init_1g_phy */ -int nes_init_phy(struct nes_device *nesdev) +static int nes_init_1g_phy(struct nes_device *nesdev, u8 phy_type, u8 phy_index) { - struct nes_adapter *nesadapter = nesdev->nesadapter; u32 counter = 0; - u32 sds_common_control0; - u32 mac_index = nesdev->mac_index; - u32 tx_config = 0; u16 phy_data; + int ret = 0; + + nes_read_1G_phy_reg(nesdev, 1, phy_index, &phy_data); + nes_write_1G_phy_reg(nesdev, 23, phy_index, 0xb000); + + /* Reset the PHY */ + nes_write_1G_phy_reg(nesdev, 0, phy_index, 0x8000); + udelay(100); + counter = 0; + do { + nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data); + if (counter++ > 100) { + ret = -1; + break; + } + } while (phy_data & 0x8000); + + /* Setting no phy loopback */ + phy_data &= 0xbfff; + phy_data |= 0x1140; + nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data); + nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data); + nes_read_1G_phy_reg(nesdev, 0x17, phy_index, &phy_data); + nes_read_1G_phy_reg(nesdev, 0x1e, phy_index, &phy_data); + + /* Setting the interrupt mask */ + nes_read_1G_phy_reg(nesdev, 0x19, phy_index, &phy_data); + nes_write_1G_phy_reg(nesdev, 0x19, phy_index, 0xffee); + nes_read_1G_phy_reg(nesdev, 0x19, phy_index, &phy_data); + + /* turning on flow control */ + nes_read_1G_phy_reg(nesdev, 4, phy_index, &phy_data); + nes_write_1G_phy_reg(nesdev, 4, phy_index, (phy_data & ~(0x03E0)) | 0xc00); + nes_read_1G_phy_reg(nesdev, 4, phy_index, &phy_data); + + /* Clear Half duplex */ + nes_read_1G_phy_reg(nesdev, 9, phy_index, &phy_data); + nes_write_1G_phy_reg(nesdev, 9, phy_index, phy_data & ~(0x0100)); + nes_read_1G_phy_reg(nesdev, 9, phy_index, &phy_data); + + nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data); + nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data | 0x0300); + + return ret; +} + + +/** + * nes_init_2025_phy + */ +static int nes_init_2025_phy(struct nes_device *nesdev, u8 phy_type, u8 phy_index) +{ u32 temp_phy_data = 0; u32 temp_phy_data2 = 0; - u32 i = 0; - - if ((nesadapter->OneG_Mode) && - (nesadapter->phy_type[mac_index] != NES_PHY_TYPE_PUMA_1G)) { - nes_debug(NES_DBG_PHY, "1G PHY, mac_index = %d.\n", mac_index); - if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_1G) { - printk(PFX "%s: Programming mdc config for 1G\n", __func__); - tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG); - tx_config |= 0x04; - nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config); + u32 counter = 0; + u32 sds; + u32 mac_index = nesdev->mac_index; + int ret = 0; + unsigned int first_attempt = 1; + + /* Check firmware heartbeat */ + nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee); + temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + udelay(1500); + nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee); + temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + + if (temp_phy_data != temp_phy_data2) { + nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd); + temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + if ((temp_phy_data & 0xff) > 0x20) + return 0; + printk(PFX "Reinitialize external PHY\n"); + } + + /* no heartbeat, configure the PHY */ + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0x0000, 0x8000); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc300, 0x0000); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052); + + switch (phy_type) { + case NES_PHY_TYPE_ARGUS: + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x000C); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0008); + nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0001); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc31a, 0x0098); + nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0026, 0x0E00); + + /* setup LEDs */ + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd006, 0x0007); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd007, 0x000A); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd008, 0x0009); + break; + + case NES_PHY_TYPE_SFP_D: + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x0004); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0038); + nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0013); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc31a, 0x0098); + nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0026, 0x0E00); + + /* setup LEDs */ + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd006, 0x0007); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd007, 0x000A); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd008, 0x0009); + break; + + case NES_PHY_TYPE_KR: + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x000C); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0010); + nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0013); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc31a, 0x0080); + nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0026, 0x0E00); + + /* setup LEDs */ + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd006, 0x000B); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd007, 0x0003); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd008, 0x0004); + + nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0022, 0x406D); + nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0023, 0x0020); + break; + } + + nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0028, 0xA528); + + /* Bring PHY out of reset */ + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc300, 0x0002); + + /* Check for heartbeat */ + counter = 0; + mdelay(690); + nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee); + temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + do { + if (counter++ > 150) { + printk(PFX "No PHY heartbeat\n"); + break; } + mdelay(1); + nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee); + temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + } while ((temp_phy_data2 == temp_phy_data)); - nes_read_1G_phy_reg(nesdev, 1, nesadapter->phy_index[mac_index], &phy_data); - nes_debug(NES_DBG_PHY, "Phy data from register 1 phy address %u = 0x%X.\n", - nesadapter->phy_index[mac_index], phy_data); - nes_write_1G_phy_reg(nesdev, 23, nesadapter->phy_index[mac_index], 0xb000); - - /* Reset the PHY */ - nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], 0x8000); - udelay(100); - counter = 0; - do { - nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], &phy_data); - nes_debug(NES_DBG_PHY, "Phy data from register 0 = 0x%X.\n", phy_data); - if (counter++ > 100) break; - } while (phy_data & 0x8000); - - /* Setting no phy loopback */ - phy_data &= 0xbfff; - phy_data |= 0x1140; - nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], phy_data); - nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], &phy_data); - nes_debug(NES_DBG_PHY, "Phy data from register 0 = 0x%X.\n", phy_data); - - nes_read_1G_phy_reg(nesdev, 0x17, nesadapter->phy_index[mac_index], &phy_data); - nes_debug(NES_DBG_PHY, "Phy data from register 0x17 = 0x%X.\n", phy_data); - - nes_read_1G_phy_reg(nesdev, 0x1e, nesadapter->phy_index[mac_index], &phy_data); - nes_debug(NES_DBG_PHY, "Phy data from register 0x1e = 0x%X.\n", phy_data); - - /* Setting the interrupt mask */ - nes_read_1G_phy_reg(nesdev, 0x19, nesadapter->phy_index[mac_index], &phy_data); - nes_debug(NES_DBG_PHY, "Phy data from register 0x19 = 0x%X.\n", phy_data); - nes_write_1G_phy_reg(nesdev, 0x19, nesadapter->phy_index[mac_index], 0xffee); - - nes_read_1G_phy_reg(nesdev, 0x19, nesadapter->phy_index[mac_index], &phy_data); - nes_debug(NES_DBG_PHY, "Phy data from register 0x19 = 0x%X.\n", phy_data); - - /* turning on flow control */ - nes_read_1G_phy_reg(nesdev, 4, nesadapter->phy_index[mac_index], &phy_data); - nes_debug(NES_DBG_PHY, "Phy data from register 0x4 = 0x%X.\n", phy_data); - nes_write_1G_phy_reg(nesdev, 4, nesadapter->phy_index[mac_index], - (phy_data & ~(0x03E0)) | 0xc00); - /* nes_write_1G_phy_reg(nesdev, 4, nesadapter->phy_index[mac_index], - phy_data | 0xc00); */ - nes_read_1G_phy_reg(nesdev, 4, nesadapter->phy_index[mac_index], &phy_data); - nes_debug(NES_DBG_PHY, "Phy data from register 0x4 = 0x%X.\n", phy_data); - - nes_read_1G_phy_reg(nesdev, 9, nesadapter->phy_index[mac_index], &phy_data); - nes_debug(NES_DBG_PHY, "Phy data from register 0x9 = 0x%X.\n", phy_data); - /* Clear Half duplex */ - nes_write_1G_phy_reg(nesdev, 9, nesadapter->phy_index[mac_index], - phy_data & ~(0x0100)); - nes_read_1G_phy_reg(nesdev, 9, nesadapter->phy_index[mac_index], &phy_data); - nes_debug(NES_DBG_PHY, "Phy data from register 0x9 = 0x%X.\n", phy_data); - - nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], &phy_data); - nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], phy_data | 0x0300); - } else { - if ((nesadapter->phy_type[mac_index] == NES_PHY_TYPE_IRIS) || - (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_ARGUS)) { - /* setup 10G MDIO operation */ - tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG); - tx_config |= 0x14; - nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config); + /* wait for tracking */ + counter = 0; + do { + nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd); + temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + if (counter++ > 300) { + if (((temp_phy_data & 0xff) == 0x0) && first_attempt) { + first_attempt = 0; + counter = 0; + /* reset AMCC PHY and try again */ + nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x00c0); + nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x0040); + continue; + } else { + ret = 1; + break; + } } - if ((nesadapter->phy_type[mac_index] == NES_PHY_TYPE_ARGUS)) { - nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee); - - temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - mdelay(10); - nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee); - temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - - /* - * if firmware is already running (like from a - * driver un-load/load, don't do anything. - */ - if (temp_phy_data == temp_phy_data2) { - /* configure QT2505 AMCC PHY */ - nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0x0000, 0x8000); - nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc300, 0x0000); - nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc302, 0x0044); - nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc318, 0x0052); - nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc319, 0x0008); - nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc31a, 0x0098); - nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0026, 0x0E00); - nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0027, 0x0000); - nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0028, 0xA528); + mdelay(10); + } while ((temp_phy_data & 0xff) < 0x30); + + /* setup signal integrity */ + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd003, 0x0000); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00D, 0x00FE); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00E, 0x0032); + if (phy_type == NES_PHY_TYPE_KR) { + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00F, 0x000C); + } else { + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00F, 0x0002); + nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc314, 0x0063); + } - /* - * remove micro from reset; chip boots from ROM, - * uploads EEPROM f/w image, uC executes f/w - */ - nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc300, 0x0002); + /* reset serdes */ + sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + mac_index * 0x200); + sds |= 0x1; + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + mac_index * 0x200, sds); + sds &= 0xfffffffe; + nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + mac_index * 0x200, sds); - /* - * wait for heart beat to start to - * know loading is done - */ - counter = 0; - do { - nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee); - temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - if (counter++ > 1000) { - nes_debug(NES_DBG_PHY, "AMCC PHY- breaking from heartbeat check <this is bad!!!> \n"); - break; - } - mdelay(100); - nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee); - temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - } while ((temp_phy_data2 == temp_phy_data)); + counter = 0; + while (((nes_read32(nesdev->regs + NES_SOFTWARE_RESET) & 0x00000040) != 0x00000040) + && (counter++ < 5000)) + ; - /* - * wait for tracking to start to know - * f/w is good to go - */ - counter = 0; - do { - nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7fd); - temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - if (counter++ > 1000) { - nes_debug(NES_DBG_PHY, "AMCC PHY- breaking from status check <this is bad!!!> \n"); - break; - } - mdelay(1000); - /* - * nes_debug(NES_DBG_PHY, "AMCC PHY- phy_status not ready yet = 0x%02X\n", - * temp_phy_data); - */ - } while (((temp_phy_data & 0xff) != 0x50) && ((temp_phy_data & 0xff) != 0x70)); - - /* set LOS Control invert RXLOSB_I_PADINV */ - nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd003, 0x0000); - /* set LOS Control to mask of RXLOSB_I */ - nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc314, 0x0042); - /* set LED1 to input mode (LED1 and LED2 share same LED) */ - nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd006, 0x0007); - /* set LED2 to RX link_status and activity */ - nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd007, 0x000A); - /* set LED3 to RX link_status */ - nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd008, 0x0009); + return ret; +} - /* - * reset the res-calibration on t2 - * serdes; ensures it is stable after - * the amcc phy is stable - */ - sds_common_control0 = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0); - sds_common_control0 |= 0x1; - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, sds_common_control0); +/** + * nes_init_phy + */ +int nes_init_phy(struct nes_device *nesdev) +{ + struct nes_adapter *nesadapter = nesdev->nesadapter; + u32 mac_index = nesdev->mac_index; + u32 tx_config = 0; + unsigned long flags; + u8 phy_type = nesadapter->phy_type[mac_index]; + u8 phy_index = nesadapter->phy_index[mac_index]; + int ret = 0; - /* release the res-calibration reset */ - sds_common_control0 &= 0xfffffffe; - nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, sds_common_control0); + tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG); + if (phy_type == NES_PHY_TYPE_1G) { + /* setup 1G MDIO operation */ + tx_config &= 0xFFFFFFE3; + tx_config |= 0x04; + } else { + /* setup 10G MDIO operation */ + tx_config &= 0xFFFFFFE3; + tx_config |= 0x1D; + } + nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config); - i = 0; - while (((nes_read32(nesdev->regs + NES_SOFTWARE_RESET) & 0x00000040) != 0x00000040) - && (i++ < 5000)) { - /* mdelay(1); */ - } + spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags); - /* - * wait for link train done before moving on, - * or will get an interupt storm - */ - counter = 0; - do { - temp_phy_data = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + - (0x200 * (nesdev->mac_index & 1))); - if (counter++ > 1000) { - nes_debug(NES_DBG_PHY, "AMCC PHY- breaking from link train wait <this is bad, link didnt train!!!>\n"); - break; - } - mdelay(1); - } while (((temp_phy_data & 0x0f1f0000) != 0x0f0f0000)); - } - } + switch (phy_type) { + case NES_PHY_TYPE_1G: + ret = nes_init_1g_phy(nesdev, phy_type, phy_index); + break; + case NES_PHY_TYPE_ARGUS: + case NES_PHY_TYPE_SFP_D: + case NES_PHY_TYPE_KR: + ret = nes_init_2025_phy(nesdev, phy_type, phy_index); + break; } - return 0; + + spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags); + + return ret; } @@ -1423,6 +1562,7 @@ static void nes_replenish_nic_rq(struct nes_vnic *nesvnic) struct nes_hw_nic_rq_wqe *nic_rqe; struct nes_hw_nic *nesnic; struct nes_device *nesdev; + struct nes_rskb_cb *cb; u32 rx_wqes_posted = 0; nesnic = &nesvnic->nic; @@ -1448,6 +1588,9 @@ static void nes_replenish_nic_rq(struct nes_vnic *nesvnic) bus_address = pci_map_single(nesdev->pcidev, skb->data, nesvnic->max_frame_size, PCI_DMA_FROMDEVICE); + cb = (struct nes_rskb_cb *)&skb->cb[0]; + cb->busaddr = bus_address; + cb->maplen = nesvnic->max_frame_size; nic_rqe = &nesnic->rq_vbase[nesvnic->nic.rq_head]; nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = @@ -1537,6 +1680,7 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) u32 cqp_head; u32 counter; u32 wqe_count; + struct nes_rskb_cb *cb; u8 jumbomode=0; /* Allocate fragment, SQ, RQ, and CQ; Reuse CEQ based on the PCI function */ @@ -1595,7 +1739,6 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) nesvnic->post_cqp_request = nes_post_cqp_request; nesvnic->mcrq_mcast_filter = NULL; - spin_lock_init(&nesvnic->nic.sq_lock); spin_lock_init(&nesvnic->nic.rq_lock); /* setup the RQ */ @@ -1714,6 +1857,9 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) pmem = pci_map_single(nesdev->pcidev, skb->data, nesvnic->max_frame_size, PCI_DMA_FROMDEVICE); + cb = (struct nes_rskb_cb *)&skb->cb[0]; + cb->busaddr = pmem; + cb->maplen = nesvnic->max_frame_size; nic_rqe = &nesvnic->nic.rq_vbase[counter]; nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = cpu_to_le32(nesvnic->max_frame_size); @@ -1742,6 +1888,13 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) jumbomode = 1; nes_nic_init_timer_defaults(nesdev, jumbomode); } + if ((nesdev->nesadapter->allow_unaligned_fpdus) && + (nes_init_mgt_qp(nesdev, netdev, nesvnic))) { + nes_debug(NES_DBG_INIT, "%s: Out of memory for pau nic\n", netdev->name); + nes_destroy_nic_qp(nesvnic); + return -ENOMEM; + } + nesvnic->lro_mgr.max_aggr = nes_lro_max_aggr; nesvnic->lro_mgr.max_desc = NES_MAX_LRO_DESCRIPTORS; nesvnic->lro_mgr.lro_arr = nesvnic->lro_desc; @@ -1759,25 +1912,91 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) */ void nes_destroy_nic_qp(struct nes_vnic *nesvnic) { + u64 u64temp; + dma_addr_t bus_address; struct nes_device *nesdev = nesvnic->nesdev; struct nes_hw_cqp_wqe *cqp_wqe; - struct nes_hw_nic_rq_wqe *nic_rqe; - u64 wqe_frag; + struct nes_hw_nic_sq_wqe *nic_sqe; + __le16 *wqe_fragment_length; + u16 wqe_fragment_index; u32 cqp_head; + u32 wqm_cfg0; unsigned long flags; + struct sk_buff *rx_skb; + struct nes_rskb_cb *cb; int ret; + if (nesdev->nesadapter->allow_unaligned_fpdus) + nes_destroy_mgt(nesvnic); + + /* clear wqe stall before destroying NIC QP */ + wqm_cfg0 = nes_read_indexed(nesdev, NES_IDX_WQM_CONFIG0); + nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG0, wqm_cfg0 & 0xFFFF7FFF); + /* Free remaining NIC receive buffers */ while (nesvnic->nic.rq_head != nesvnic->nic.rq_tail) { - nic_rqe = &nesvnic->nic.rq_vbase[nesvnic->nic.rq_tail]; - wqe_frag = (u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX]); - wqe_frag |= ((u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX])) << 32; - pci_unmap_single(nesdev->pcidev, (dma_addr_t)wqe_frag, - nesvnic->max_frame_size, PCI_DMA_FROMDEVICE); + rx_skb = nesvnic->nic.rx_skb[nesvnic->nic.rq_tail]; + cb = (struct nes_rskb_cb *)&rx_skb->cb[0]; + pci_unmap_single(nesdev->pcidev, cb->busaddr, cb->maplen, + PCI_DMA_FROMDEVICE); + dev_kfree_skb(nesvnic->nic.rx_skb[nesvnic->nic.rq_tail++]); nesvnic->nic.rq_tail &= (nesvnic->nic.rq_size - 1); } + /* Free remaining NIC transmit buffers */ + while (nesvnic->nic.sq_head != nesvnic->nic.sq_tail) { + nic_sqe = &nesvnic->nic.sq_vbase[nesvnic->nic.sq_tail]; + wqe_fragment_index = 1; + wqe_fragment_length = (__le16 *) + &nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX]; + /* bump past the vlan tag */ + wqe_fragment_length++; + if (le16_to_cpu(wqe_fragment_length[wqe_fragment_index]) != 0) { + u64temp = (u64)le32_to_cpu( + nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX+ + wqe_fragment_index*2]); + u64temp += ((u64)le32_to_cpu( + nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX + + wqe_fragment_index*2]))<<32; + bus_address = (dma_addr_t)u64temp; + if (test_and_clear_bit(nesvnic->nic.sq_tail, + nesvnic->nic.first_frag_overflow)) { + pci_unmap_single(nesdev->pcidev, + bus_address, + le16_to_cpu(wqe_fragment_length[ + wqe_fragment_index++]), + PCI_DMA_TODEVICE); + } + for (; wqe_fragment_index < 5; wqe_fragment_index++) { + if (wqe_fragment_length[wqe_fragment_index]) { + u64temp = le32_to_cpu( + nic_sqe->wqe_words[ + NES_NIC_SQ_WQE_FRAG0_LOW_IDX+ + wqe_fragment_index*2]); + u64temp += ((u64)le32_to_cpu( + nic_sqe->wqe_words[ + NES_NIC_SQ_WQE_FRAG0_HIGH_IDX+ + wqe_fragment_index*2]))<<32; + bus_address = (dma_addr_t)u64temp; + pci_unmap_page(nesdev->pcidev, + bus_address, + le16_to_cpu( + wqe_fragment_length[ + wqe_fragment_index]), + PCI_DMA_TODEVICE); + } else + break; + } + } + if (nesvnic->nic.tx_skb[nesvnic->nic.sq_tail]) + dev_kfree_skb( + nesvnic->nic.tx_skb[nesvnic->nic.sq_tail]); + + nesvnic->nic.sq_tail = (nesvnic->nic.sq_tail + 1) + & (nesvnic->nic.sq_size - 1); + } + spin_lock_irqsave(&nesdev->cqp.lock, flags); /* Destroy NIC QP */ @@ -1830,6 +2049,9 @@ void nes_destroy_nic_qp(struct nes_vnic *nesvnic) pci_free_consistent(nesdev->pcidev, nesvnic->nic_mem_size, nesvnic->nic_vbase, nesvnic->nic_pbase); + + /* restore old wqm_cfg0 value */ + nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG0, wqm_cfg0); } /** @@ -1894,7 +2116,30 @@ int nes_napi_isr(struct nes_device *nesdev) } } - +static void process_critical_error(struct nes_device *nesdev) +{ + u32 debug_error; + u32 nes_idx_debug_error_masks0 = 0; + u16 error_module = 0; + + debug_error = nes_read_indexed(nesdev, NES_IDX_DEBUG_ERROR_CONTROL_STATUS); + printk(KERN_ERR PFX "Critical Error reported by device!!! 0x%02X\n", + (u16)debug_error); + nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_CONTROL_STATUS, + 0x01010000 | (debug_error & 0x0000ffff)); + if (crit_err_count++ > 10) + nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS1, 1 << 0x17); + error_module = (u16) (debug_error & 0x1F00) >> 8; + if (++nesdev->nesadapter->crit_error_count[error_module-1] >= + nes_max_critical_error_count) { + printk(KERN_ERR PFX "Masking off critical error for module " + "0x%02X\n", (u16)error_module); + nes_idx_debug_error_masks0 = nes_read_indexed(nesdev, + NES_IDX_DEBUG_ERROR_MASKS0); + nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS0, + nes_idx_debug_error_masks0 | (1 << error_module)); + } +} /** * nes_dpc */ @@ -1909,7 +2154,6 @@ void nes_dpc(unsigned long param) u32 timer_stat; u32 temp_int_stat; u32 intf_int_stat; - u32 debug_error; u32 processed_intf_int = 0; u16 processed_timer_int = 0; u16 completion_ints = 0; @@ -1987,14 +2231,7 @@ void nes_dpc(unsigned long param) intf_int_stat = nes_read32(nesdev->regs+NES_INTF_INT_STAT); intf_int_stat &= nesdev->intf_int_req; if (NES_INTF_INT_CRITERR & intf_int_stat) { - debug_error = nes_read_indexed(nesdev, NES_IDX_DEBUG_ERROR_CONTROL_STATUS); - printk(KERN_ERR PFX "Critical Error reported by device!!! 0x%02X\n", - (u16)debug_error); - nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_CONTROL_STATUS, - 0x01010000 | (debug_error & 0x0000ffff)); - /* BUG(); */ - if (crit_err_count++ > 10) - nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS1, 1 << 0x17); + process_critical_error(nesdev); } if (NES_INTF_INT_PCIERR & intf_int_stat) { printk(KERN_ERR PFX "PCI Error reported by device!!!\n"); @@ -2145,6 +2382,8 @@ static void nes_process_aeq(struct nes_device *nesdev, struct nes_hw_aeq *aeq) if (++head >= aeq_size) head = 0; + + nes_write32(nesdev->regs + NES_AEQ_ALLOC, 1 << 16); } while (1); aeq->aeq_head = head; @@ -2234,6 +2473,7 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number) u16 temp_phy_data; u32 pcs_val = 0x0f0f0000; u32 pcs_mask = 0x0f1f0000; + u32 cdr_ctrl; spin_lock_irqsave(&nesadapter->phy_lock, flags); if (nesadapter->mac_sw_state[mac_number] != NES_MAC_SW_IDLE) { @@ -2241,7 +2481,6 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number) return; } nesadapter->mac_sw_state[mac_number] = NES_MAC_SW_INTERRUPT; - spin_unlock_irqrestore(&nesadapter->phy_lock, flags); /* ack the MAC interrupt */ mac_status = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (mac_index * 0x200)); @@ -2252,13 +2491,12 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number) if (mac_status & (NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT)) { nesdev->link_status_interrupts++; - if (0 == (++nesadapter->link_interrupt_count[mac_index] % ((u16)NES_MAX_LINK_INTERRUPTS))) { - spin_lock_irqsave(&nesadapter->phy_lock, flags); + if (0 == (++nesadapter->link_interrupt_count[mac_index] % ((u16)NES_MAX_LINK_INTERRUPTS))) nes_reset_link(nesdev, mac_index); - spin_unlock_irqrestore(&nesadapter->phy_lock, flags); - } + /* read the PHY interrupt status register */ - if (nesadapter->OneG_Mode) { + if ((nesadapter->OneG_Mode) && + (nesadapter->phy_type[mac_index] != NES_PHY_TYPE_PUMA_1G)) { do { nes_read_1G_phy_reg(nesdev, 0x1a, nesadapter->phy_index[mac_index], &phy_data); @@ -2331,22 +2569,9 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number) } } else { switch (nesadapter->phy_type[mac_index]) { - case NES_PHY_TYPE_IRIS: - nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1); - temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - u32temp = 20; - do { - nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1); - phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - if ((phy_data == temp_phy_data) || (!(--u32temp))) - break; - temp_phy_data = phy_data; - } while (1); - nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n", - __func__, phy_data, nesadapter->mac_link_down[mac_index] ? "DOWN" : "UP"); - break; - case NES_PHY_TYPE_ARGUS: + case NES_PHY_TYPE_SFP_D: + case NES_PHY_TYPE_KR: /* clear the alarms */ nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0x0008); nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc001); @@ -2357,19 +2582,18 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number) nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9004); nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9005); /* check link status */ - nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1); + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9003); temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - u32temp = 100; - do { - nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1); - phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); - if ((phy_data == temp_phy_data) || (!(--u32temp))) - break; - temp_phy_data = phy_data; - } while (1); + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 3, 0x0021); + nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 3, 0x0021); + phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + + phy_data = (!temp_phy_data && (phy_data == 0x8000)) ? 0x4 : 0x0; + nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n", - __func__, phy_data, nesadapter->mac_link_down ? "DOWN" : "UP"); + __func__, phy_data, nesadapter->mac_link_down[mac_index] ? "DOWN" : "UP"); break; case NES_PHY_TYPE_PUMA_1G: @@ -2385,6 +2609,17 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number) } if (phy_data & 0x0004) { + if (wide_ppm_offset && + (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_CX4) && + (nesadapter->hw_rev != NE020_REV)) { + cdr_ctrl = nes_read_indexed(nesdev, + NES_IDX_ETH_SERDES_CDR_CONTROL0 + + mac_index * 0x200); + nes_write_indexed(nesdev, + NES_IDX_ETH_SERDES_CDR_CONTROL0 + + mac_index * 0x200, + cdr_ctrl | 0x000F0000); + } nesadapter->mac_link_down[mac_index] = 0; list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) { nes_debug(NES_DBG_PHY, "The Link is UP!!. linkup was %d\n", @@ -2396,9 +2631,29 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number) netif_start_queue(nesvnic->netdev); nesvnic->linkup = 1; netif_carrier_on(nesvnic->netdev); + + spin_lock(&nesvnic->port_ibevent_lock); + if (nesvnic->of_device_registered) { + if (nesdev->iw_status == 0) { + nesdev->iw_status = 1; + nes_port_ibevent(nesvnic); + } + } + spin_unlock(&nesvnic->port_ibevent_lock); } } } else { + if (wide_ppm_offset && + (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_CX4) && + (nesadapter->hw_rev != NE020_REV)) { + cdr_ctrl = nes_read_indexed(nesdev, + NES_IDX_ETH_SERDES_CDR_CONTROL0 + + mac_index * 0x200); + nes_write_indexed(nesdev, + NES_IDX_ETH_SERDES_CDR_CONTROL0 + + mac_index * 0x200, + cdr_ctrl & 0xFFF0FFFF); + } nesadapter->mac_link_down[mac_index] = 1; list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) { nes_debug(NES_DBG_PHY, "The Link is Down!!. linkup was %d\n", @@ -2410,21 +2665,115 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number) netif_stop_queue(nesvnic->netdev); nesvnic->linkup = 0; netif_carrier_off(nesvnic->netdev); + + spin_lock(&nesvnic->port_ibevent_lock); + if (nesvnic->of_device_registered) { + if (nesdev->iw_status == 1) { + nesdev->iw_status = 0; + nes_port_ibevent(nesvnic); + } + } + spin_unlock(&nesvnic->port_ibevent_lock); } } } + if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_SFP_D) { + nesdev->link_recheck = 1; + mod_delayed_work(system_wq, &nesdev->work, + NES_LINK_RECHECK_DELAY); + } } + spin_unlock_irqrestore(&nesadapter->phy_lock, flags); + nesadapter->mac_sw_state[mac_number] = NES_MAC_SW_IDLE; } +void nes_recheck_link_status(struct work_struct *work) +{ + unsigned long flags; + struct nes_device *nesdev = container_of(work, struct nes_device, work.work); + struct nes_adapter *nesadapter = nesdev->nesadapter; + struct nes_vnic *nesvnic; + u32 mac_index = nesdev->mac_index; + u16 phy_data; + u16 temp_phy_data; + + spin_lock_irqsave(&nesadapter->phy_lock, flags); + + /* check link status */ + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9003); + temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 3, 0x0021); + nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 3, 0x0021); + phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + + phy_data = (!temp_phy_data && (phy_data == 0x8000)) ? 0x4 : 0x0; + + nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n", + __func__, phy_data, + nesadapter->mac_link_down[mac_index] ? "DOWN" : "UP"); + + if (phy_data & 0x0004) { + nesadapter->mac_link_down[mac_index] = 0; + list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) { + if (nesvnic->linkup == 0) { + printk(PFX "The Link is now up for port %s, netdev %p.\n", + nesvnic->netdev->name, nesvnic->netdev); + if (netif_queue_stopped(nesvnic->netdev)) + netif_start_queue(nesvnic->netdev); + nesvnic->linkup = 1; + netif_carrier_on(nesvnic->netdev); + + spin_lock(&nesvnic->port_ibevent_lock); + if (nesvnic->of_device_registered) { + if (nesdev->iw_status == 0) { + nesdev->iw_status = 1; + nes_port_ibevent(nesvnic); + } + } + spin_unlock(&nesvnic->port_ibevent_lock); + } + } + + } else { + nesadapter->mac_link_down[mac_index] = 1; + list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) { + if (nesvnic->linkup == 1) { + printk(PFX "The Link is now down for port %s, netdev %p.\n", + nesvnic->netdev->name, nesvnic->netdev); + if (!(netif_queue_stopped(nesvnic->netdev))) + netif_stop_queue(nesvnic->netdev); + nesvnic->linkup = 0; + netif_carrier_off(nesvnic->netdev); + + spin_lock(&nesvnic->port_ibevent_lock); + if (nesvnic->of_device_registered) { + if (nesdev->iw_status == 1) { + nesdev->iw_status = 0; + nes_port_ibevent(nesvnic); + } + } + spin_unlock(&nesvnic->port_ibevent_lock); + } + } + } + if (nesdev->link_recheck++ < NES_LINK_RECHECK_MAX) + schedule_delayed_work(&nesdev->work, NES_LINK_RECHECK_DELAY); + else + nesdev->link_recheck = 0; + + spin_unlock_irqrestore(&nesadapter->phy_lock, flags); +} static void nes_nic_napi_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) { struct nes_vnic *nesvnic = container_of(cq, struct nes_vnic, nic_cq); - netif_rx_schedule(nesdev->netdev[nesvnic->netdev_index], &nesvnic->napi); + napi_schedule(&nesvnic->napi); } @@ -2447,6 +2796,7 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) struct nes_hw_nic_sq_wqe *nic_sqe; struct sk_buff *skb; struct sk_buff *rx_skb; + struct nes_rskb_cb *cb; __le16 *wqe_fragment_length; u32 head; u32 cq_size; @@ -2505,16 +2855,16 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) } else break; } - if (skb) - dev_kfree_skb_any(skb); } + if (skb) + dev_kfree_skb_any(skb); nesnic->sq_tail++; nesnic->sq_tail &= nesnic->sq_size-1; if (sq_cqes > 128) { barrier(); - /* restart the queue if it had been stopped */ - if (netif_queue_stopped(nesvnic->netdev)) - netif_wake_queue(nesvnic->netdev); + /* restart the queue if it had been stopped */ + if (netif_queue_stopped(nesvnic->netdev)) + netif_wake_queue(nesvnic->netdev); sq_cqes = 0; } } else { @@ -2531,6 +2881,8 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) bus_address += ((u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX])) << 32; pci_unmap_single(nesdev->pcidev, bus_address, nesvnic->max_frame_size, PCI_DMA_FROMDEVICE); + cb = (struct nes_rskb_cb *)&rx_skb->cb[0]; + cb->busaddr = 0; /* rx_skb->tail = rx_skb->data + rx_pkt_size; */ /* rx_skb->len = rx_pkt_size; */ rx_skb->len = 0; /* TODO: see if this is necessary */ @@ -2557,9 +2909,8 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) if ((cqe_errv & (NES_NIC_ERRV_BITS_IPV4_CSUM_ERR | NES_NIC_ERRV_BITS_TCPUDP_CSUM_ERR | NES_NIC_ERRV_BITS_IPH_ERR | NES_NIC_ERRV_BITS_WQE_OVERRUN)) == 0) { - if (nesvnic->rx_checksum_disabled == 0) { + if (nesvnic->netdev->features & NETIF_F_RXCSUM) rx_skb->ip_summed = CHECKSUM_UNNECESSARY; - } } else nes_debug(NES_DBG_CQ, "%s: unsuccessfully checksummed TCP or UDP packet." " errv = 0x%X, pkt_type = 0x%X.\n", @@ -2569,7 +2920,7 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) if ((cqe_errv & (NES_NIC_ERRV_BITS_IPV4_CSUM_ERR | NES_NIC_ERRV_BITS_IPH_ERR | NES_NIC_ERRV_BITS_WQE_OVERRUN)) == 0) { - if (nesvnic->rx_checksum_disabled == 0) { + if (nesvnic->netdev->features & NETIF_F_RXCSUM) { rx_skb->ip_summed = CHECKSUM_UNNECESSARY; /* nes_debug(NES_DBG_CQ, "%s: Reporting successfully checksummed IPv4 packet.\n", nesvnic->netdev->name); */ @@ -2583,28 +2934,29 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) pkt_type, (pkt_type & NES_PKT_TYPE_APBVT_MASK)); */ if ((pkt_type & NES_PKT_TYPE_APBVT_MASK) == NES_PKT_TYPE_APBVT_BITS) { - nes_cm_recv(rx_skb, nesvnic->netdev); - } else { - if ((cqe_misc & NES_NIC_CQE_TAG_VALID) && (nesvnic->vlan_grp != NULL)) { - vlan_tag = (u16)(le32_to_cpu( - cq->cq_vbase[head].cqe_words[NES_NIC_CQE_TAG_PKT_TYPE_IDX]) - >> 16); - nes_debug(NES_DBG_CQ, "%s: Reporting stripped VLAN packet. Tag = 0x%04X\n", - nesvnic->netdev->name, vlan_tag); - if (nes_use_lro) - lro_vlan_hwaccel_receive_skb(&nesvnic->lro_mgr, rx_skb, - nesvnic->vlan_grp, vlan_tag, NULL); - else - nes_vlan_rx(rx_skb, nesvnic->vlan_grp, vlan_tag); - } else { - if (nes_use_lro) - lro_receive_skb(&nesvnic->lro_mgr, rx_skb, NULL); - else - nes_netif_rx(rx_skb); - } + if (nes_cm_recv(rx_skb, nesvnic->netdev)) + rx_skb = NULL; } + if (rx_skb == NULL) + goto skip_rx_indicate0; + + + if (cqe_misc & NES_NIC_CQE_TAG_VALID) { + vlan_tag = (u16)(le32_to_cpu( + cq->cq_vbase[head].cqe_words[NES_NIC_CQE_TAG_PKT_TYPE_IDX]) + >> 16); + nes_debug(NES_DBG_CQ, "%s: Reporting stripped VLAN packet. Tag = 0x%04X\n", + nesvnic->netdev->name, vlan_tag); + + __vlan_hwaccel_put_tag(rx_skb, htons(ETH_P_8021Q), vlan_tag); + } + if (nes_use_lro) + lro_receive_skb(&nesvnic->lro_mgr, rx_skb, NULL); + else + netif_receive_skb(rx_skb); - nesvnic->netdev->last_rx = jiffies; +skip_rx_indicate0: + ; /* nesvnic->netstats.rx_packets++; */ /* nesvnic->netstats.rx_bytes += rx_pkt_size; */ } @@ -2655,6 +3007,7 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) } + /** * nes_cqp_ce_handler */ @@ -2669,6 +3022,8 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq) u32 cq_size; u32 cqe_count=0; u32 error_code; + u32 opcode; + u32 ctx_index; /* u32 counter; */ head = cq->cq_head; @@ -2679,12 +3034,9 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq) /* nes_debug(NES_DBG_CQP, "head=%u cqe_words=%08X\n", head, le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX])); */ - if (le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_VALID) { - u64temp = (((u64)(le32_to_cpu(cq->cq_vbase[head]. - cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX]))) << 32) | - ((u64)(le32_to_cpu(cq->cq_vbase[head]. - cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]))); - cqp = *((struct nes_hw_cqp **)&u64temp); + opcode = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]); + if (opcode & NES_CQE_VALID) { + cqp = &nesdev->cqp; error_code = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_ERROR_CODE_IDX]); if (error_code) { @@ -2693,15 +3045,14 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq) le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX])&0x3f, (u16)(error_code >> 16), (u16)error_code); - nes_debug(NES_DBG_CQP, "cqp: qp_id=%u, sq_head=%u, sq_tail=%u\n", - cqp->qp_id, cqp->sq_head, cqp->sq_tail); } - u64temp = (((u64)(le32_to_cpu(nesdev->cqp.sq_vbase[cqp->sq_tail]. - wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX]))) << 32) | - ((u64)(le32_to_cpu(nesdev->cqp.sq_vbase[cqp->sq_tail]. - wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX]))); - cqp_request = *((struct nes_cqp_request **)&u64temp); + u64temp = (((u64)(le32_to_cpu(cq->cq_vbase[head]. + cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX]))) << 32) | + ((u64)(le32_to_cpu(cq->cq_vbase[head]. + cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]))); + + cqp_request = (struct nes_cqp_request *)(unsigned long)u64temp; if (cqp_request) { if (cqp_request->waiting) { /* nes_debug(NES_DBG_CQP, "%s: Waking up requestor\n"); */ @@ -2747,9 +3098,15 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq) cqp_wqe = &nesdev->cqp.sq_vbase[head]; memcpy(cqp_wqe, &cqp_request->cqp_wqe, sizeof(*cqp_wqe)); barrier(); - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] = + + opcode = cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX]; + if ((opcode & NES_CQP_OPCODE_MASK) == NES_CQP_DOWNLOAD_SEGMENT) + ctx_index = NES_CQP_WQE_DL_COMP_CTX_LOW_IDX; + else + ctx_index = NES_CQP_WQE_COMP_CTX_LOW_IDX; + cqp_wqe->wqe_words[ctx_index] = cpu_to_le32((u32)((unsigned long)cqp_request)); - cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] = + cqp_wqe->wqe_words[ctx_index + 1] = cpu_to_le32((u32)(upper_32_bits((unsigned long)cqp_request))); nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) put on CQPs SQ wqe%u.\n", cqp_request, le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f, head); @@ -2765,6 +3122,415 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq) nes_read32(nesdev->regs+NES_CQE_ALLOC); } +static u8 *locate_mpa(u8 *pkt, u32 aeq_info) +{ + if (aeq_info & NES_AEQE_Q2_DATA_ETHERNET) { + /* skip over ethernet header */ + pkt += ETH_HLEN; + + /* Skip over IP and TCP headers */ + pkt += 4 * (pkt[0] & 0x0f); + pkt += 4 * ((pkt[12] >> 4) & 0x0f); + } + return pkt; +} + +/* Determine if incoming error pkt is rdma layer */ +static u32 iwarp_opcode(struct nes_qp *nesqp, u32 aeq_info) +{ + u8 *pkt; + u16 *mpa; + u32 opcode = 0xffffffff; + + if (aeq_info & NES_AEQE_Q2_DATA_WRITTEN) { + pkt = nesqp->hwqp.q2_vbase + BAD_FRAME_OFFSET; + mpa = (u16 *)locate_mpa(pkt, aeq_info); + opcode = be16_to_cpu(mpa[1]) & 0xf; + } + + return opcode; +} + +/* Build iWARP terminate header */ +static int nes_bld_terminate_hdr(struct nes_qp *nesqp, u16 async_event_id, u32 aeq_info) +{ + u8 *pkt = nesqp->hwqp.q2_vbase + BAD_FRAME_OFFSET; + u16 ddp_seg_len; + int copy_len = 0; + u8 is_tagged = 0; + u8 flush_code = 0; + struct nes_terminate_hdr *termhdr; + + termhdr = (struct nes_terminate_hdr *)nesqp->hwqp.q2_vbase; + memset(termhdr, 0, 64); + + if (aeq_info & NES_AEQE_Q2_DATA_WRITTEN) { + + /* Use data from offending packet to fill in ddp & rdma hdrs */ + pkt = locate_mpa(pkt, aeq_info); + ddp_seg_len = be16_to_cpu(*(u16 *)pkt); + if (ddp_seg_len) { + copy_len = 2; + termhdr->hdrct = DDP_LEN_FLAG; + if (pkt[2] & 0x80) { + is_tagged = 1; + if (ddp_seg_len >= TERM_DDP_LEN_TAGGED) { + copy_len += TERM_DDP_LEN_TAGGED; + termhdr->hdrct |= DDP_HDR_FLAG; + } + } else { + if (ddp_seg_len >= TERM_DDP_LEN_UNTAGGED) { + copy_len += TERM_DDP_LEN_UNTAGGED; + termhdr->hdrct |= DDP_HDR_FLAG; + } + + if (ddp_seg_len >= (TERM_DDP_LEN_UNTAGGED + TERM_RDMA_LEN)) { + if ((pkt[3] & RDMA_OPCODE_MASK) == RDMA_READ_REQ_OPCODE) { + copy_len += TERM_RDMA_LEN; + termhdr->hdrct |= RDMA_HDR_FLAG; + } + } + } + } + } + + switch (async_event_id) { + case NES_AEQE_AEID_AMP_UNALLOCATED_STAG: + switch (iwarp_opcode(nesqp, aeq_info)) { + case IWARP_OPCODE_WRITE: + flush_code = IB_WC_LOC_PROT_ERR; + termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER; + termhdr->error_code = DDP_TAGGED_INV_STAG; + break; + default: + flush_code = IB_WC_REM_ACCESS_ERR; + termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT; + termhdr->error_code = RDMAP_INV_STAG; + } + break; + case NES_AEQE_AEID_AMP_INVALID_STAG: + flush_code = IB_WC_REM_ACCESS_ERR; + termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT; + termhdr->error_code = RDMAP_INV_STAG; + break; + case NES_AEQE_AEID_AMP_BAD_QP: + flush_code = IB_WC_LOC_QP_OP_ERR; + termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER; + termhdr->error_code = DDP_UNTAGGED_INV_QN; + break; + case NES_AEQE_AEID_AMP_BAD_STAG_KEY: + case NES_AEQE_AEID_AMP_BAD_STAG_INDEX: + switch (iwarp_opcode(nesqp, aeq_info)) { + case IWARP_OPCODE_SEND_INV: + case IWARP_OPCODE_SEND_SE_INV: + flush_code = IB_WC_REM_OP_ERR; + termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP; + termhdr->error_code = RDMAP_CANT_INV_STAG; + break; + default: + flush_code = IB_WC_REM_ACCESS_ERR; + termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT; + termhdr->error_code = RDMAP_INV_STAG; + } + break; + case NES_AEQE_AEID_AMP_BOUNDS_VIOLATION: + if (aeq_info & (NES_AEQE_Q2_DATA_ETHERNET | NES_AEQE_Q2_DATA_MPA)) { + flush_code = IB_WC_LOC_PROT_ERR; + termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER; + termhdr->error_code = DDP_TAGGED_BOUNDS; + } else { + flush_code = IB_WC_REM_ACCESS_ERR; + termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT; + termhdr->error_code = RDMAP_INV_BOUNDS; + } + break; + case NES_AEQE_AEID_AMP_RIGHTS_VIOLATION: + case NES_AEQE_AEID_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS: + case NES_AEQE_AEID_PRIV_OPERATION_DENIED: + flush_code = IB_WC_REM_ACCESS_ERR; + termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT; + termhdr->error_code = RDMAP_ACCESS; + break; + case NES_AEQE_AEID_AMP_TO_WRAP: + flush_code = IB_WC_REM_ACCESS_ERR; + termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT; + termhdr->error_code = RDMAP_TO_WRAP; + break; + case NES_AEQE_AEID_AMP_BAD_PD: + switch (iwarp_opcode(nesqp, aeq_info)) { + case IWARP_OPCODE_WRITE: + flush_code = IB_WC_LOC_PROT_ERR; + termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER; + termhdr->error_code = DDP_TAGGED_UNASSOC_STAG; + break; + case IWARP_OPCODE_SEND_INV: + case IWARP_OPCODE_SEND_SE_INV: + flush_code = IB_WC_REM_ACCESS_ERR; + termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT; + termhdr->error_code = RDMAP_CANT_INV_STAG; + break; + default: + flush_code = IB_WC_REM_ACCESS_ERR; + termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT; + termhdr->error_code = RDMAP_UNASSOC_STAG; + } + break; + case NES_AEQE_AEID_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH: + flush_code = IB_WC_LOC_LEN_ERR; + termhdr->layer_etype = (LAYER_MPA << 4) | DDP_LLP; + termhdr->error_code = MPA_MARKER; + break; + case NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR: + flush_code = IB_WC_GENERAL_ERR; + termhdr->layer_etype = (LAYER_MPA << 4) | DDP_LLP; + termhdr->error_code = MPA_CRC; + break; + case NES_AEQE_AEID_LLP_SEGMENT_TOO_LARGE: + case NES_AEQE_AEID_LLP_SEGMENT_TOO_SMALL: + flush_code = IB_WC_LOC_LEN_ERR; + termhdr->layer_etype = (LAYER_DDP << 4) | DDP_CATASTROPHIC; + termhdr->error_code = DDP_CATASTROPHIC_LOCAL; + break; + case NES_AEQE_AEID_DDP_LCE_LOCAL_CATASTROPHIC: + case NES_AEQE_AEID_DDP_NO_L_BIT: + flush_code = IB_WC_FATAL_ERR; + termhdr->layer_etype = (LAYER_DDP << 4) | DDP_CATASTROPHIC; + termhdr->error_code = DDP_CATASTROPHIC_LOCAL; + break; + case NES_AEQE_AEID_DDP_INVALID_MSN_GAP_IN_MSN: + case NES_AEQE_AEID_DDP_INVALID_MSN_RANGE_IS_NOT_VALID: + flush_code = IB_WC_GENERAL_ERR; + termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER; + termhdr->error_code = DDP_UNTAGGED_INV_MSN_RANGE; + break; + case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER: + flush_code = IB_WC_LOC_LEN_ERR; + termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER; + termhdr->error_code = DDP_UNTAGGED_INV_TOO_LONG; + break; + case NES_AEQE_AEID_DDP_UBE_INVALID_DDP_VERSION: + flush_code = IB_WC_GENERAL_ERR; + if (is_tagged) { + termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER; + termhdr->error_code = DDP_TAGGED_INV_DDP_VER; + } else { + termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER; + termhdr->error_code = DDP_UNTAGGED_INV_DDP_VER; + } + break; + case NES_AEQE_AEID_DDP_UBE_INVALID_MO: + flush_code = IB_WC_GENERAL_ERR; + termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER; + termhdr->error_code = DDP_UNTAGGED_INV_MO; + break; + case NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE: + flush_code = IB_WC_REM_OP_ERR; + termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER; + termhdr->error_code = DDP_UNTAGGED_INV_MSN_NO_BUF; + break; + case NES_AEQE_AEID_DDP_UBE_INVALID_QN: + flush_code = IB_WC_GENERAL_ERR; + termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER; + termhdr->error_code = DDP_UNTAGGED_INV_QN; + break; + case NES_AEQE_AEID_RDMAP_ROE_INVALID_RDMAP_VERSION: + flush_code = IB_WC_GENERAL_ERR; + termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP; + termhdr->error_code = RDMAP_INV_RDMAP_VER; + break; + case NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE: + flush_code = IB_WC_LOC_QP_OP_ERR; + termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP; + termhdr->error_code = RDMAP_UNEXPECTED_OP; + break; + default: + flush_code = IB_WC_FATAL_ERR; + termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP; + termhdr->error_code = RDMAP_UNSPECIFIED; + break; + } + + if (copy_len) + memcpy(termhdr + 1, pkt, copy_len); + + if ((flush_code) && ((NES_AEQE_INBOUND_RDMA & aeq_info) == 0)) { + if (aeq_info & NES_AEQE_SQ) + nesqp->term_sq_flush_code = flush_code; + else + nesqp->term_rq_flush_code = flush_code; + } + + return sizeof(struct nes_terminate_hdr) + copy_len; +} + +static void nes_terminate_connection(struct nes_device *nesdev, struct nes_qp *nesqp, + struct nes_hw_aeqe *aeqe, enum ib_event_type eventtype) +{ + u64 context; + unsigned long flags; + u32 aeq_info; + u16 async_event_id; + u8 tcp_state; + u8 iwarp_state; + u32 termlen = 0; + u32 mod_qp_flags = NES_CQP_QP_IWARP_STATE_TERMINATE | + NES_CQP_QP_TERM_DONT_SEND_FIN; + struct nes_adapter *nesadapter = nesdev->nesadapter; + + if (nesqp->term_flags & NES_TERM_SENT) + return; /* Sanity check */ + + aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]); + tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT; + iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT; + async_event_id = (u16)aeq_info; + + context = (unsigned long)nesadapter->qp_table[le32_to_cpu( + aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]) - NES_FIRST_QPN]; + if (!context) { + WARN_ON(!context); + return; + } + + nesqp = (struct nes_qp *)(unsigned long)context; + spin_lock_irqsave(&nesqp->lock, flags); + nesqp->hw_iwarp_state = iwarp_state; + nesqp->hw_tcp_state = tcp_state; + nesqp->last_aeq = async_event_id; + nesqp->terminate_eventtype = eventtype; + spin_unlock_irqrestore(&nesqp->lock, flags); + + if (nesadapter->send_term_ok) + termlen = nes_bld_terminate_hdr(nesqp, async_event_id, aeq_info); + else + mod_qp_flags |= NES_CQP_QP_TERM_DONT_SEND_TERM_MSG; + + if (!nesdev->iw_status) { + nesqp->term_flags = NES_TERM_DONE; + nes_hw_modify_qp(nesdev, nesqp, NES_CQP_QP_IWARP_STATE_ERROR, 0, 0); + nes_cm_disconn(nesqp); + } else { + nes_terminate_start_timer(nesqp); + nesqp->term_flags |= NES_TERM_SENT; + nes_hw_modify_qp(nesdev, nesqp, mod_qp_flags, termlen, 0); + } +} + +static void nes_terminate_send_fin(struct nes_device *nesdev, + struct nes_qp *nesqp, struct nes_hw_aeqe *aeqe) +{ + u32 aeq_info; + u16 async_event_id; + u8 tcp_state; + u8 iwarp_state; + unsigned long flags; + + aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]); + tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT; + iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT; + async_event_id = (u16)aeq_info; + + spin_lock_irqsave(&nesqp->lock, flags); + nesqp->hw_iwarp_state = iwarp_state; + nesqp->hw_tcp_state = tcp_state; + nesqp->last_aeq = async_event_id; + spin_unlock_irqrestore(&nesqp->lock, flags); + + /* Send the fin only */ + nes_hw_modify_qp(nesdev, nesqp, NES_CQP_QP_IWARP_STATE_TERMINATE | + NES_CQP_QP_TERM_DONT_SEND_TERM_MSG, 0, 0); +} + +/* Cleanup after a terminate sent or received */ +static void nes_terminate_done(struct nes_qp *nesqp, int timeout_occurred) +{ + u32 next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR; + unsigned long flags; + struct nes_vnic *nesvnic = to_nesvnic(nesqp->ibqp.device); + struct nes_device *nesdev = nesvnic->nesdev; + u8 first_time = 0; + + spin_lock_irqsave(&nesqp->lock, flags); + if (nesqp->hte_added) { + nesqp->hte_added = 0; + next_iwarp_state |= NES_CQP_QP_DEL_HTE; + } + + first_time = (nesqp->term_flags & NES_TERM_DONE) == 0; + nesqp->term_flags |= NES_TERM_DONE; + spin_unlock_irqrestore(&nesqp->lock, flags); + + /* Make sure we go through this only once */ + if (first_time) { + if (timeout_occurred == 0) + del_timer(&nesqp->terminate_timer); + else + next_iwarp_state |= NES_CQP_QP_RESET; + + nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0); + nes_cm_disconn(nesqp); + } +} + +static void nes_terminate_received(struct nes_device *nesdev, + struct nes_qp *nesqp, struct nes_hw_aeqe *aeqe) +{ + u32 aeq_info; + u8 *pkt; + u32 *mpa; + u8 ddp_ctl; + u8 rdma_ctl; + u16 aeq_id = 0; + + aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]); + if (aeq_info & NES_AEQE_Q2_DATA_WRITTEN) { + /* Terminate is not a performance path so the silicon */ + /* did not validate the frame - do it now */ + pkt = nesqp->hwqp.q2_vbase + BAD_FRAME_OFFSET; + mpa = (u32 *)locate_mpa(pkt, aeq_info); + ddp_ctl = (be32_to_cpu(mpa[0]) >> 8) & 0xff; + rdma_ctl = be32_to_cpu(mpa[0]) & 0xff; + if ((ddp_ctl & 0xc0) != 0x40) + aeq_id = NES_AEQE_AEID_DDP_LCE_LOCAL_CATASTROPHIC; + else if ((ddp_ctl & 0x03) != 1) + aeq_id = NES_AEQE_AEID_DDP_UBE_INVALID_DDP_VERSION; + else if (be32_to_cpu(mpa[2]) != 2) + aeq_id = NES_AEQE_AEID_DDP_UBE_INVALID_QN; + else if (be32_to_cpu(mpa[3]) != 1) + aeq_id = NES_AEQE_AEID_DDP_INVALID_MSN_GAP_IN_MSN; + else if (be32_to_cpu(mpa[4]) != 0) + aeq_id = NES_AEQE_AEID_DDP_UBE_INVALID_MO; + else if ((rdma_ctl & 0xc0) != 0x40) + aeq_id = NES_AEQE_AEID_RDMAP_ROE_INVALID_RDMAP_VERSION; + + if (aeq_id) { + /* Bad terminate recvd - send back a terminate */ + aeq_info = (aeq_info & 0xffff0000) | aeq_id; + aeqe->aeqe_words[NES_AEQE_MISC_IDX] = cpu_to_le32(aeq_info); + nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL); + return; + } + } + + nesqp->term_flags |= NES_TERM_RCVD; + nesqp->terminate_eventtype = IB_EVENT_QP_FATAL; + nes_terminate_start_timer(nesqp); + nes_terminate_send_fin(nesdev, nesqp, aeqe); +} + +/* Timeout routine in case terminate fails to complete */ +void nes_terminate_timeout(unsigned long context) +{ + struct nes_qp *nesqp = (struct nes_qp *)(unsigned long)context; + + nes_terminate_done(nesqp, 1); +} + +/* Set a timer in case hw cannot complete the terminate sequence */ +static void nes_terminate_start_timer(struct nes_qp *nesqp) +{ + mod_timer(&nesqp->terminate_timer, (jiffies + HZ)); +} /** * nes_process_iwarp_aeqe @@ -2773,33 +3539,33 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, struct nes_hw_aeqe *aeqe) { u64 context; - u64 aeqe_context = 0; unsigned long flags; struct nes_qp *nesqp; + struct nes_hw_cq *hw_cq; + struct nes_cq *nescq; int resource_allocated; - /* struct iw_cm_id *cm_id; */ struct nes_adapter *nesadapter = nesdev->nesadapter; - struct ib_event ibevent; - /* struct iw_cm_event cm_event; */ u32 aeq_info; u32 next_iwarp_state = 0; + u32 aeqe_cq_id; u16 async_event_id; u8 tcp_state; u8 iwarp_state; + struct ib_event ibevent; nes_debug(NES_DBG_AEQ, "\n"); aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]); - if ((NES_AEQE_INBOUND_RDMA&aeq_info) || (!(NES_AEQE_QP&aeq_info))) { + if ((NES_AEQE_INBOUND_RDMA & aeq_info) || (!(NES_AEQE_QP & aeq_info))) { context = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]); context += ((u64)le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])) << 32; } else { - aeqe_context = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]); - aeqe_context += ((u64)le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])) << 32; context = (unsigned long)nesadapter->qp_table[le32_to_cpu( aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]) - NES_FIRST_QPN]; BUG_ON(!context); } + /* context is nesqp unless async_event_id == CQ ERROR */ + nesqp = (struct nes_qp *)(unsigned long)context; async_event_id = (u16)aeq_info; tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT; iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT; @@ -2809,12 +3575,33 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]), aeqe, nes_tcp_state_str[tcp_state], nes_iwarp_state_str[iwarp_state]); + aeqe_cq_id = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]); + if (aeq_info & NES_AEQE_QP) { + if (!nes_is_resource_allocated(nesadapter, + nesadapter->allocated_qps, + aeqe_cq_id)) + return; + } + switch (async_event_id) { case NES_AEQE_AEID_LLP_FIN_RECEIVED: - nesqp = *((struct nes_qp **)&context); + if (nesqp->term_flags) + return; /* Ignore it, wait for close complete */ + if (atomic_inc_return(&nesqp->close_timer_started) == 1) { + if ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) && + (nesqp->ibqp_state == IB_QPS_RTS)) { + spin_lock_irqsave(&nesqp->lock, flags); + nesqp->hw_iwarp_state = iwarp_state; + nesqp->hw_tcp_state = tcp_state; + nesqp->last_aeq = async_event_id; + next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING; + nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING; + spin_unlock_irqrestore(&nesqp->lock, flags); + nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0); + nes_cm_disconn(nesqp); + } nesqp->cm_id->add_ref(nesqp->cm_id); - nes_add_ref(&nesqp->ibqp); schedule_nes_timer(nesqp->cm_node, (struct sk_buff *)nesqp, NES_TIMER_TYPE_CLOSE, 1, 0); nes_debug(NES_DBG_AEQ, "QP%u Not decrementing QP refcount (%d)," @@ -2823,196 +3610,115 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), async_event_id, nesqp->last_aeq, tcp_state); } - if ((tcp_state != NES_AEQE_TCP_STATE_CLOSE_WAIT) || - (nesqp->ibqp_state != IB_QPS_RTS)) { - /* FIN Received but tcp state or IB state moved on, - should expect a close complete */ - return; - } - case NES_AEQE_AEID_LLP_CLOSE_COMPLETE: - case NES_AEQE_AEID_LLP_CONNECTION_RESET: - case NES_AEQE_AEID_TERMINATE_SENT: - case NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE: - case NES_AEQE_AEID_RESET_SENT: - nesqp = *((struct nes_qp **)&context); - if (async_event_id == NES_AEQE_AEID_RESET_SENT) { - tcp_state = NES_AEQE_TCP_STATE_CLOSED; - } - nes_add_ref(&nesqp->ibqp); - spin_lock_irqsave(&nesqp->lock, flags); - nesqp->hw_iwarp_state = iwarp_state; - nesqp->hw_tcp_state = tcp_state; - nesqp->last_aeq = async_event_id; - - if ((tcp_state == NES_AEQE_TCP_STATE_CLOSED) || - (tcp_state == NES_AEQE_TCP_STATE_TIME_WAIT)) { - nesqp->hte_added = 0; - spin_unlock_irqrestore(&nesqp->lock, flags); - nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u to remove hte\n", - nesqp->hwqp.qp_id); - nes_hw_modify_qp(nesdev, nesqp, - NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_DEL_HTE, 0); - spin_lock_irqsave(&nesqp->lock, flags); - } - - if ((nesqp->ibqp_state == IB_QPS_RTS) && - ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) || - (async_event_id == NES_AEQE_AEID_LLP_CONNECTION_RESET))) { - switch (nesqp->hw_iwarp_state) { - case NES_AEQE_IWARP_STATE_RTS: - next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING; - nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING; - break; - case NES_AEQE_IWARP_STATE_TERMINATE: - next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE; - nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_TERMINATE; - if (async_event_id == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) { - next_iwarp_state |= 0x02000000; - nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; - } - break; - default: - next_iwarp_state = 0; - } - spin_unlock_irqrestore(&nesqp->lock, flags); - if (next_iwarp_state) { - nes_add_ref(&nesqp->ibqp); - nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u. next state = 0x%08X," - " also added another reference\n", - nesqp->hwqp.qp_id, next_iwarp_state); - nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0); - } - nes_cm_disconn(nesqp); - } else { - if (async_event_id == NES_AEQE_AEID_LLP_FIN_RECEIVED) { - /* FIN Received but ib state not RTS, - close complete will be on its way */ - spin_unlock_irqrestore(&nesqp->lock, flags); - nes_rem_ref(&nesqp->ibqp); - return; - } - spin_unlock_irqrestore(&nesqp->lock, flags); - if (async_event_id == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) { - next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000; - nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; - nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u. next state = 0x%08X," - " also added another reference\n", - nesqp->hwqp.qp_id, next_iwarp_state); - nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0); - } - nes_cm_disconn(nesqp); - } break; - case NES_AEQE_AEID_LLP_TERMINATE_RECEIVED: - nesqp = *((struct nes_qp **)&context); + case NES_AEQE_AEID_LLP_CLOSE_COMPLETE: spin_lock_irqsave(&nesqp->lock, flags); nesqp->hw_iwarp_state = iwarp_state; nesqp->hw_tcp_state = tcp_state; nesqp->last_aeq = async_event_id; spin_unlock_irqrestore(&nesqp->lock, flags); - nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_LLP_TERMINATE_RECEIVED" - " event on QP%u \n Q2 Data:\n", - nesqp->hwqp.qp_id); - if (nesqp->ibqp.event_handler) { - ibevent.device = nesqp->ibqp.device; - ibevent.element.qp = &nesqp->ibqp; - ibevent.event = IB_EVENT_QP_FATAL; - nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context); - } - if ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) || - ((nesqp->ibqp_state == IB_QPS_RTS)&& - (async_event_id == NES_AEQE_AEID_LLP_CONNECTION_RESET))) { - nes_add_ref(&nesqp->ibqp); - nes_cm_disconn(nesqp); - } else { - nesqp->in_disconnect = 0; - wake_up(&nesqp->kick_waitq); - } - break; - case NES_AEQE_AEID_LLP_TOO_MANY_RETRIES: - nesqp = *((struct nes_qp **)&context); - nes_add_ref(&nesqp->ibqp); - spin_lock_irqsave(&nesqp->lock, flags); - nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_ERROR; - nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; - nesqp->last_aeq = async_event_id; - if (nesqp->cm_id) { - nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_LLP_TOO_MANY_RETRIES" - " event on QP%u, remote IP = 0x%08X \n", - nesqp->hwqp.qp_id, - ntohl(nesqp->cm_id->remote_addr.sin_addr.s_addr)); - } else { - nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_LLP_TOO_MANY_RETRIES" - " event on QP%u \n", - nesqp->hwqp.qp_id); - } - spin_unlock_irqrestore(&nesqp->lock, flags); - next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_RESET; - nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0); - if (nesqp->ibqp.event_handler) { - ibevent.device = nesqp->ibqp.device; - ibevent.element.qp = &nesqp->ibqp; - ibevent.event = IB_EVENT_QP_FATAL; - nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context); - } + nes_cm_disconn(nesqp); break; - case NES_AEQE_AEID_AMP_BAD_STAG_INDEX: - if (NES_AEQE_INBOUND_RDMA&aeq_info) { - nesqp = nesadapter->qp_table[le32_to_cpu( - aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX])-NES_FIRST_QPN]; - } else { - /* TODO: get the actual WQE and mask off wqe index */ - context &= ~((u64)511); - nesqp = *((struct nes_qp **)&context); - } + + case NES_AEQE_AEID_RESET_SENT: + tcp_state = NES_AEQE_TCP_STATE_CLOSED; spin_lock_irqsave(&nesqp->lock, flags); nesqp->hw_iwarp_state = iwarp_state; nesqp->hw_tcp_state = tcp_state; nesqp->last_aeq = async_event_id; + nesqp->hte_added = 0; spin_unlock_irqrestore(&nesqp->lock, flags); - nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_AMP_BAD_STAG_INDEX event on QP%u\n", - nesqp->hwqp.qp_id); - if (nesqp->ibqp.event_handler) { - ibevent.device = nesqp->ibqp.device; - ibevent.element.qp = &nesqp->ibqp; - ibevent.event = IB_EVENT_QP_ACCESS_ERR; - nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context); - } + next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_DEL_HTE; + nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0); + nes_cm_disconn(nesqp); break; - case NES_AEQE_AEID_AMP_UNALLOCATED_STAG: - nesqp = *((struct nes_qp **)&context); + + case NES_AEQE_AEID_LLP_CONNECTION_RESET: + if (atomic_read(&nesqp->close_timer_started)) + return; spin_lock_irqsave(&nesqp->lock, flags); nesqp->hw_iwarp_state = iwarp_state; nesqp->hw_tcp_state = tcp_state; nesqp->last_aeq = async_event_id; spin_unlock_irqrestore(&nesqp->lock, flags); - nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_AMP_UNALLOCATED_STAG event on QP%u\n", - nesqp->hwqp.qp_id); - if (nesqp->ibqp.event_handler) { - ibevent.device = nesqp->ibqp.device; - ibevent.element.qp = &nesqp->ibqp; - ibevent.event = IB_EVENT_QP_ACCESS_ERR; - nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context); - } + nes_cm_disconn(nesqp); + break; + + case NES_AEQE_AEID_TERMINATE_SENT: + nes_terminate_send_fin(nesdev, nesqp, aeqe); break; + + case NES_AEQE_AEID_LLP_TERMINATE_RECEIVED: + nes_terminate_received(nesdev, nesqp, aeqe); + break; + + case NES_AEQE_AEID_AMP_BAD_STAG_KEY: + case NES_AEQE_AEID_AMP_BAD_STAG_INDEX: + case NES_AEQE_AEID_AMP_UNALLOCATED_STAG: + case NES_AEQE_AEID_AMP_INVALID_STAG: + case NES_AEQE_AEID_AMP_RIGHTS_VIOLATION: + case NES_AEQE_AEID_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS: case NES_AEQE_AEID_PRIV_OPERATION_DENIED: - nesqp = nesadapter->qp_table[le32_to_cpu(aeqe->aeqe_words - [NES_AEQE_COMP_QP_CQ_ID_IDX])-NES_FIRST_QPN]; - spin_lock_irqsave(&nesqp->lock, flags); - nesqp->hw_iwarp_state = iwarp_state; - nesqp->hw_tcp_state = tcp_state; - nesqp->last_aeq = async_event_id; - spin_unlock_irqrestore(&nesqp->lock, flags); - nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_PRIV_OPERATION_DENIED event on QP%u," - " nesqp = %p, AE reported %p\n", - nesqp->hwqp.qp_id, nesqp, *((struct nes_qp **)&context)); - if (nesqp->ibqp.event_handler) { - ibevent.device = nesqp->ibqp.device; - ibevent.element.qp = &nesqp->ibqp; - ibevent.event = IB_EVENT_QP_ACCESS_ERR; - nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context); + case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER: + case NES_AEQE_AEID_AMP_BOUNDS_VIOLATION: + case NES_AEQE_AEID_AMP_TO_WRAP: + printk(KERN_ERR PFX "QP[%u] async_event_id=0x%04X IB_EVENT_QP_ACCESS_ERR\n", + nesqp->hwqp.qp_id, async_event_id); + nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_ACCESS_ERR); + break; + + case NES_AEQE_AEID_LLP_SEGMENT_TOO_LARGE: + case NES_AEQE_AEID_LLP_SEGMENT_TOO_SMALL: + case NES_AEQE_AEID_DDP_UBE_INVALID_MO: + case NES_AEQE_AEID_DDP_UBE_INVALID_QN: + if (iwarp_opcode(nesqp, aeq_info) > IWARP_OPCODE_TERM) { + aeq_info &= 0xffff0000; + aeq_info |= NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE; + aeqe->aeqe_words[NES_AEQE_MISC_IDX] = cpu_to_le32(aeq_info); } + + case NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE: + case NES_AEQE_AEID_LLP_TOO_MANY_RETRIES: + case NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE: + case NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR: + case NES_AEQE_AEID_AMP_BAD_QP: + case NES_AEQE_AEID_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH: + case NES_AEQE_AEID_DDP_LCE_LOCAL_CATASTROPHIC: + case NES_AEQE_AEID_DDP_NO_L_BIT: + case NES_AEQE_AEID_DDP_INVALID_MSN_GAP_IN_MSN: + case NES_AEQE_AEID_DDP_INVALID_MSN_RANGE_IS_NOT_VALID: + case NES_AEQE_AEID_DDP_UBE_INVALID_DDP_VERSION: + case NES_AEQE_AEID_RDMAP_ROE_INVALID_RDMAP_VERSION: + case NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE: + case NES_AEQE_AEID_AMP_BAD_PD: + case NES_AEQE_AEID_AMP_FASTREG_SHARED: + case NES_AEQE_AEID_AMP_FASTREG_VALID_STAG: + case NES_AEQE_AEID_AMP_FASTREG_MW_STAG: + case NES_AEQE_AEID_AMP_FASTREG_INVALID_RIGHTS: + case NES_AEQE_AEID_AMP_FASTREG_PBL_TABLE_OVERFLOW: + case NES_AEQE_AEID_AMP_FASTREG_INVALID_LENGTH: + case NES_AEQE_AEID_AMP_INVALIDATE_SHARED: + case NES_AEQE_AEID_AMP_INVALIDATE_MR_WITH_BOUND_WINDOWS: + case NES_AEQE_AEID_AMP_MWBIND_VALID_STAG: + case NES_AEQE_AEID_AMP_MWBIND_OF_MR_STAG: + case NES_AEQE_AEID_AMP_MWBIND_TO_ZERO_BASED_STAG: + case NES_AEQE_AEID_AMP_MWBIND_TO_MW_STAG: + case NES_AEQE_AEID_AMP_MWBIND_INVALID_RIGHTS: + case NES_AEQE_AEID_AMP_MWBIND_INVALID_BOUNDS: + case NES_AEQE_AEID_AMP_MWBIND_TO_INVALID_PARENT: + case NES_AEQE_AEID_AMP_MWBIND_BIND_DISABLED: + case NES_AEQE_AEID_BAD_CLOSE: + case NES_AEQE_AEID_RDMA_READ_WHILE_ORD_ZERO: + case NES_AEQE_AEID_STAG_ZERO_INVALID: + case NES_AEQE_AEID_ROE_INVALID_RDMA_READ_REQUEST: + case NES_AEQE_AEID_ROE_INVALID_RDMA_WRITE_OR_READ_RESP: + printk(KERN_ERR PFX "QP[%u] async_event_id=0x%04X IB_EVENT_QP_FATAL\n", + nesqp->hwqp.qp_id, async_event_id); + print_ip(nesqp->cm_node); + if (!atomic_read(&nesqp->close_timer_started)) + nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL); break; + case NES_AEQE_AEID_CQ_OPERATION_ERROR: context <<= 1; nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_CQ_OPERATION_ERROR event on CQ%u, %p\n", @@ -3022,70 +3728,19 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, if (resource_allocated) { printk(KERN_ERR PFX "%s: Processing an NES_AEQE_AEID_CQ_OPERATION_ERROR event on CQ%u\n", __func__, le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX])); + hw_cq = (struct nes_hw_cq *)(unsigned long)context; + if (hw_cq) { + nescq = container_of(hw_cq, struct nes_cq, hw_cq); + if (nescq->ibcq.event_handler) { + ibevent.device = nescq->ibcq.device; + ibevent.event = IB_EVENT_CQ_ERR; + ibevent.element.cq = &nescq->ibcq; + nescq->ibcq.event_handler(&ibevent, nescq->ibcq.cq_context); + } + } } break; - case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER: - nesqp = nesadapter->qp_table[le32_to_cpu( - aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX])-NES_FIRST_QPN]; - spin_lock_irqsave(&nesqp->lock, flags); - nesqp->hw_iwarp_state = iwarp_state; - nesqp->hw_tcp_state = tcp_state; - nesqp->last_aeq = async_event_id; - spin_unlock_irqrestore(&nesqp->lock, flags); - nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG" - "_FOR_AVAILABLE_BUFFER event on QP%u\n", - nesqp->hwqp.qp_id); - if (nesqp->ibqp.event_handler) { - ibevent.device = nesqp->ibqp.device; - ibevent.element.qp = &nesqp->ibqp; - ibevent.event = IB_EVENT_QP_ACCESS_ERR; - nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context); - } - /* tell cm to disconnect, cm will queue work to thread */ - nes_add_ref(&nesqp->ibqp); - nes_cm_disconn(nesqp); - break; - case NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE: - nesqp = *((struct nes_qp **)&context); - spin_lock_irqsave(&nesqp->lock, flags); - nesqp->hw_iwarp_state = iwarp_state; - nesqp->hw_tcp_state = tcp_state; - nesqp->last_aeq = async_event_id; - spin_unlock_irqrestore(&nesqp->lock, flags); - nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_DDP_UBE_INVALID_MSN" - "_NO_BUFFER_AVAILABLE event on QP%u\n", - nesqp->hwqp.qp_id); - if (nesqp->ibqp.event_handler) { - ibevent.device = nesqp->ibqp.device; - ibevent.element.qp = &nesqp->ibqp; - ibevent.event = IB_EVENT_QP_FATAL; - nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context); - } - /* tell cm to disconnect, cm will queue work to thread */ - nes_add_ref(&nesqp->ibqp); - nes_cm_disconn(nesqp); - break; - case NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR: - nesqp = *((struct nes_qp **)&context); - spin_lock_irqsave(&nesqp->lock, flags); - nesqp->hw_iwarp_state = iwarp_state; - nesqp->hw_tcp_state = tcp_state; - nesqp->last_aeq = async_event_id; - spin_unlock_irqrestore(&nesqp->lock, flags); - nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR" - " event on QP%u \n Q2 Data:\n", - nesqp->hwqp.qp_id); - if (nesqp->ibqp.event_handler) { - ibevent.device = nesqp->ibqp.device; - ibevent.element.qp = &nesqp->ibqp; - ibevent.event = IB_EVENT_QP_FATAL; - nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context); - } - /* tell cm to disconnect, cm will queue work to thread */ - nes_add_ref(&nesqp->ibqp); - nes_cm_disconn(nesqp); - break; - /* TODO: additional AEs need to be here */ + default: nes_debug(NES_DBG_AEQ, "Processing an iWARP related AE for QP, misc = 0x%04X\n", async_event_id); @@ -3094,7 +3749,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, } - /** * nes_iwarp_ce_handler */ @@ -3229,6 +3883,8 @@ void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp, { struct nes_cqp_request *cqp_request; struct nes_hw_cqp_wqe *cqp_wqe; + u32 sq_code = (NES_IWARP_CQE_MAJOR_FLUSH << 16) | NES_IWARP_CQE_MINOR_FLUSH; + u32 rq_code = (NES_IWARP_CQE_MAJOR_FLUSH << 16) | NES_IWARP_CQE_MINOR_FLUSH; int ret; cqp_request = nes_get_cqp_request(nesdev); @@ -3245,6 +3901,24 @@ void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp, cqp_wqe = &cqp_request->cqp_wqe; nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + /* If wqe in error was identified, set code to be put into cqe */ + if ((nesqp->term_sq_flush_code) && (which_wq & NES_CQP_FLUSH_SQ)) { + which_wq |= NES_CQP_FLUSH_MAJ_MIN; + sq_code = (CQE_MAJOR_DRV << 16) | nesqp->term_sq_flush_code; + nesqp->term_sq_flush_code = 0; + } + + if ((nesqp->term_rq_flush_code) && (which_wq & NES_CQP_FLUSH_RQ)) { + which_wq |= NES_CQP_FLUSH_MAJ_MIN; + rq_code = (CQE_MAJOR_DRV << 16) | nesqp->term_rq_flush_code; + nesqp->term_rq_flush_code = 0; + } + + if (which_wq & NES_CQP_FLUSH_MAJ_MIN) { + cqp_wqe->wqe_words[NES_CQP_QP_WQE_FLUSH_SQ_CODE] = cpu_to_le32(sq_code); + cqp_wqe->wqe_words[NES_CQP_QP_WQE_FLUSH_RQ_CODE] = cpu_to_le32(rq_code); + } + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_FLUSH_WQES | which_wq); cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesqp->hwqp.qp_id); diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h index 7b81e0ae007..d748e4b31b8 100644 --- a/drivers/infiniband/hw/nes/nes_hw.h +++ b/drivers/infiniband/hw/nes/nes_hw.h @@ -1,5 +1,5 @@ /* -* Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved. +* Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -35,13 +35,22 @@ #include <linux/inet_lro.h> +#define NES_PHY_TYPE_CX4 1 #define NES_PHY_TYPE_1G 2 -#define NES_PHY_TYPE_IRIS 3 #define NES_PHY_TYPE_ARGUS 4 #define NES_PHY_TYPE_PUMA_1G 5 #define NES_PHY_TYPE_PUMA_10G 6 +#define NES_PHY_TYPE_GLADIUS 7 +#define NES_PHY_TYPE_SFP_D 8 +#define NES_PHY_TYPE_KR 9 #define NES_MULTICAST_PF_MAX 8 +#define NES_A0 3 + +#define NES_ENABLE_PAU 0x07000001 +#define NES_DISABLE_PAU 0x07000000 +#define NES_PAU_COUNTER 10 +#define NES_CQP_OPCODE_MASK 0x3f enum pci_regs { NES_INT_STAT = 0x0000, @@ -60,6 +69,7 @@ enum pci_regs { NES_CQ_ACK = 0x0034, NES_WQE_ALLOC = 0x0040, NES_CQE_ALLOC = 0x0044, + NES_AEQ_ALLOC = 0x0048 }; enum indexed_regs { @@ -68,8 +78,10 @@ enum indexed_regs { NES_IDX_QP_CONTROL = 0x0040, NES_IDX_FLM_CONTROL = 0x0080, NES_IDX_INT_CPU_STATUS = 0x00a0, + NES_IDX_GPR_TRIGGER = 0x00bc, NES_IDX_GPIO_CONTROL = 0x00f0, NES_IDX_GPIO_DATA = 0x00f4, + NES_IDX_GPR2 = 0x010c, NES_IDX_TCP_CONFIG0 = 0x01e4, NES_IDX_TCP_TIMER_CONFIG = 0x01ec, NES_IDX_TCP_NOW = 0x01f0, @@ -156,6 +168,8 @@ enum indexed_regs { NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_HI = 0x7004, NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_LO = 0x7008, NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_HI = 0x700c, + NES_IDX_WQM_CONFIG0 = 0x5000, + NES_IDX_WQM_CONFIG1 = 0x5004, NES_IDX_CM_CONFIG = 0x5100, NES_IDX_NIC_LOGPORT_TO_PHYPORT = 0x6000, NES_IDX_NIC_PHYPORT_TO_USW = 0x6008, @@ -195,6 +209,7 @@ enum nes_cqp_opcodes { NES_CQP_REGISTER_SHARED_STAG = 0x0c, NES_CQP_DEALLOCATE_STAG = 0x0d, NES_CQP_MANAGE_ARP_CACHE = 0x0f, + NES_CQP_DOWNLOAD_SEGMENT = 0x10, NES_CQP_SUSPEND_QPS = 0x11, NES_CQP_UPLOAD_CONTEXT = 0x13, NES_CQP_CREATE_CEQ = 0x16, @@ -203,7 +218,8 @@ enum nes_cqp_opcodes { NES_CQP_DESTROY_AEQ = 0x1b, NES_CQP_LMI_ACCESS = 0x20, NES_CQP_FLUSH_WQES = 0x22, - NES_CQP_MANAGE_APBVT = 0x23 + NES_CQP_MANAGE_APBVT = 0x23, + NES_CQP_MANAGE_QUAD_HASH = 0x25 }; enum nes_cqp_wqe_word_idx { @@ -215,6 +231,14 @@ enum nes_cqp_wqe_word_idx { NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX = 5, }; +enum nes_cqp_wqe_word_download_idx { /* format differs from other cqp ops */ + NES_CQP_WQE_DL_OPCODE_IDX = 0, + NES_CQP_WQE_DL_COMP_CTX_LOW_IDX = 1, + NES_CQP_WQE_DL_COMP_CTX_HIGH_IDX = 2, + NES_CQP_WQE_DL_LENGTH_0_TOTAL_IDX = 3 + /* For index values 4-15 use NES_NIC_SQ_WQE_ values */ +}; + enum nes_cqp_cq_wqeword_idx { NES_CQP_CQ_WQE_PBL_LOW_IDX = 6, NES_CQP_CQ_WQE_PBL_HIGH_IDX = 7, @@ -235,7 +259,9 @@ enum nes_cqp_stag_wqeword_idx { NES_CQP_STAG_WQE_PBL_LEN_IDX = 14 }; +#define NES_CQP_OP_LOGICAL_PORT_SHIFT 26 #define NES_CQP_OP_IWARP_STATE_SHIFT 28 +#define NES_CQP_OP_TERMLEN_SHIFT 28 enum nes_cqp_qp_bits { NES_CQP_QP_ARP_VALID = (1<<8), @@ -260,12 +286,16 @@ enum nes_cqp_qp_bits { NES_CQP_QP_IWARP_STATE_TERMINATE = (5<<NES_CQP_OP_IWARP_STATE_SHIFT), NES_CQP_QP_IWARP_STATE_ERROR = (6<<NES_CQP_OP_IWARP_STATE_SHIFT), NES_CQP_QP_IWARP_STATE_MASK = (7<<NES_CQP_OP_IWARP_STATE_SHIFT), + NES_CQP_QP_TERM_DONT_SEND_FIN = (1<<24), + NES_CQP_QP_TERM_DONT_SEND_TERM_MSG = (1<<25), NES_CQP_QP_RESET = (1<<31), }; enum nes_cqp_qp_wqe_word_idx { NES_CQP_QP_WQE_CONTEXT_LOW_IDX = 6, NES_CQP_QP_WQE_CONTEXT_HIGH_IDX = 7, + NES_CQP_QP_WQE_FLUSH_SQ_CODE = 8, + NES_CQP_QP_WQE_FLUSH_RQ_CODE = 9, NES_CQP_QP_WQE_NEW_MSS_IDX = 15, }; @@ -356,6 +386,7 @@ enum nes_cqp_arp_bits { enum nes_cqp_flush_bits { NES_CQP_FLUSH_SQ = (1<<30), NES_CQP_FLUSH_RQ = (1<<31), + NES_CQP_FLUSH_MAJ_MIN = (1<<28), }; enum nes_cqe_opcode_bits { @@ -535,11 +566,23 @@ enum nes_iwarp_sq_fmr_wqe_word_idx { NES_IWARP_SQ_FMR_WQE_PBL_LENGTH_IDX = 14, }; +enum nes_iwarp_sq_fmr_opcodes { + NES_IWARP_SQ_FMR_WQE_ZERO_BASED = (1<<6), + NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_4K = (0<<7), + NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_2M = (1<<7), + NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_READ = (1<<16), + NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_WRITE = (1<<17), + NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_READ = (1<<18), + NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_WRITE = (1<<19), + NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_WINDOW_BIND = (1<<20), +}; + +#define NES_IWARP_SQ_FMR_WQE_MR_LENGTH_HIGH_MASK 0xFF; + enum nes_iwarp_sq_locinv_wqe_word_idx { NES_IWARP_SQ_LOCINV_WQE_INV_STAG_IDX = 6, }; - enum nes_iwarp_rq_wqe_word_idx { NES_IWARP_RQ_WQE_TOTAL_PAYLOAD_IDX = 1, NES_IWARP_RQ_WQE_COMP_CTX_LOW_IDX = 2, @@ -574,6 +617,7 @@ enum nes_nic_sq_wqe_bits { enum nes_nic_cqe_word_idx { NES_NIC_CQE_ACCQP_ID_IDX = 0, + NES_NIC_CQE_HASH_RCVNXT = 1, NES_NIC_CQE_TAG_PKT_TYPE_IDX = 2, NES_NIC_CQE_MISC_IDX = 3, }; @@ -628,11 +672,14 @@ enum nes_aeqe_bits { NES_AEQE_INBOUND_RDMA = (1<<19), NES_AEQE_IWARP_STATE_MASK = (7<<20), NES_AEQE_TCP_STATE_MASK = (0xf<<24), + NES_AEQE_Q2_DATA_WRITTEN = (0x3<<28), NES_AEQE_VALID = (1<<31), }; #define NES_AEQE_IWARP_STATE_SHIFT 20 #define NES_AEQE_TCP_STATE_SHIFT 24 +#define NES_AEQE_Q2_DATA_ETHERNET (1<<28) +#define NES_AEQE_Q2_DATA_MPA (1<<29) enum nes_aeqe_iwarp_state { NES_AEQE_IWARP_STATE_NON_EXISTANT = 0, @@ -746,6 +793,15 @@ enum nes_iwarp_sq_wqe_bits { NES_IWARP_SQ_OP_NOP = 12, }; +enum nes_iwarp_cqe_major_code { + NES_IWARP_CQE_MAJOR_FLUSH = 1, + NES_IWARP_CQE_MAJOR_DRV = 0x8000 +}; + +enum nes_iwarp_cqe_minor_code { + NES_IWARP_CQE_MINOR_FLUSH = 1 +}; + #define NES_EEPROM_READ_REQUEST (1<<16) #define NES_MAC_ADDR_VALID (1<<20) @@ -873,7 +929,6 @@ struct nes_hw_nic { u8 replenishing_rq; u8 reserved; - spinlock_t sq_lock; spinlock_t rq_lock; }; @@ -967,6 +1022,12 @@ struct nes_arp_entry { #define DEFAULT_JUMBO_NES_QL_TARGET 40 #define DEFAULT_JUMBO_NES_QL_HIGH 128 #define NES_NIC_CQ_DOWNWARD_TREND 16 +#define NES_PFT_SIZE 48 + +#define NES_MGT_WQ_COUNT 32 +#define NES_MGT_CTX_SIZE ((NES_NIC_CTX_RQ_SIZE_32) | (NES_NIC_CTX_SQ_SIZE_32)) +#define NES_MGT_QP_OFFSET 36 +#define NES_MGT_QP_COUNT 4 struct nes_hw_tune_timer { /* u16 cq_count; */ @@ -1064,11 +1125,12 @@ struct nes_adapter { u32 wqm_wat; u32 core_clock; u32 firmware_version; + u32 eeprom_version; u32 nic_rx_eth_route_err; u32 et_rx_coalesce_usecs; - u32 et_rx_max_coalesced_frames; + u32 et_rx_max_coalesced_frames; u32 et_rx_coalesce_usecs_irq; u32 et_rx_max_coalesced_frames_irq; u32 et_pkt_rate_low; @@ -1079,6 +1141,8 @@ struct nes_adapter { u32 et_rx_max_coalesced_frames_high; u32 et_rate_sample_interval; u32 timer_int_limit; + u32 wqm_quanta; + u8 allow_unaligned_fpdus; /* Adapter base MAC address */ u32 mac_addr_low; @@ -1094,12 +1158,14 @@ struct nes_adapter { u16 pd_config_base[4]; u16 link_interrupt_count[4]; + u8 crit_error_count[32]; /* the phy index for each port */ u8 phy_index[4]; u8 mac_sw_state[4]; u8 mac_link_down[4]; u8 phy_type[4]; + u8 log_port; /* PCI information */ unsigned int devfn; @@ -1111,8 +1177,10 @@ struct nes_adapter { u8 netdev_max; /* from host nic address count in EEPROM */ u8 port_count; u8 virtwq; + u8 send_term_ok; u8 et_use_adaptive_rx_coalesce; u8 adapter_fcn_count; + u8 pft_mcast_map[NES_PFT_SIZE]; }; struct nes_pbl { @@ -1125,6 +1193,19 @@ struct nes_pbl { /* TODO: need to add list for two level tables */ }; +#define NES_4K_PBL_CHUNK_SIZE 4096 + +struct nes_fast_mr_wqe_pbl { + u64 *kva; + dma_addr_t paddr; +}; + +struct nes_ib_fast_reg_page_list { + struct ib_fast_reg_page_list ibfrpl; + struct nes_fast_mr_wqe_pbl nes_wqe_pbl; + u64 pbl; +}; + struct nes_listener { struct work_struct work; struct workqueue_struct *wq; @@ -1137,10 +1218,11 @@ struct nes_listener { struct nes_ib_device; +#define NES_EVENT_DELAY msecs_to_jiffies(100) + struct nes_vnic { struct nes_ib_device *nesibdev; u64 sq_full; - u64 sq_locked; u64 tso_requests; u64 segmented_tso_requests; u64 linearized_skbs; @@ -1154,7 +1236,6 @@ struct nes_vnic { /* void *mem; */ struct nes_device *nesdev; struct net_device *netdev; - struct vlan_group *vlan_grp; atomic_t rx_skbs_needed; atomic_t rx_skb_timer_running; int budget; @@ -1188,10 +1269,21 @@ struct nes_vnic { u8 next_qp_nic_index; u8 of_device_registered; u8 rdma_enabled; - u8 rx_checksum_disabled; u32 lro_max_aggr; struct net_lro_mgr lro_mgr; struct net_lro_desc lro_desc[NES_MAX_LRO_DESCRIPTORS]; + struct timer_list event_timer; + enum ib_event_type delayed_event; + enum ib_event_type last_dispatched_event; + spinlock_t port_ibevent_lock; + u32 mgt_mem_size; + void *mgt_vbase; + dma_addr_t mgt_pbase; + struct nes_vnic_mgt *mgtvnic[NES_MGT_QP_COUNT]; + struct task_struct *mgt_thread; + wait_queue_head_t mgt_wait_queue; + struct sk_buff_head mgt_skb_list; + }; struct nes_ib_device { @@ -1209,7 +1301,92 @@ struct nes_ib_device { u32 num_pd; }; -#define nes_vlan_rx vlan_hwaccel_receive_skb -#define nes_netif_rx netif_receive_skb +enum nes_hdrct_flags { + DDP_LEN_FLAG = 0x80, + DDP_HDR_FLAG = 0x40, + RDMA_HDR_FLAG = 0x20 +}; + +enum nes_term_layers { + LAYER_RDMA = 0, + LAYER_DDP = 1, + LAYER_MPA = 2 +}; + +enum nes_term_error_types { + RDMAP_CATASTROPHIC = 0, + RDMAP_REMOTE_PROT = 1, + RDMAP_REMOTE_OP = 2, + DDP_CATASTROPHIC = 0, + DDP_TAGGED_BUFFER = 1, + DDP_UNTAGGED_BUFFER = 2, + DDP_LLP = 3 +}; + +enum nes_term_rdma_errors { + RDMAP_INV_STAG = 0x00, + RDMAP_INV_BOUNDS = 0x01, + RDMAP_ACCESS = 0x02, + RDMAP_UNASSOC_STAG = 0x03, + RDMAP_TO_WRAP = 0x04, + RDMAP_INV_RDMAP_VER = 0x05, + RDMAP_UNEXPECTED_OP = 0x06, + RDMAP_CATASTROPHIC_LOCAL = 0x07, + RDMAP_CATASTROPHIC_GLOBAL = 0x08, + RDMAP_CANT_INV_STAG = 0x09, + RDMAP_UNSPECIFIED = 0xff +}; + +enum nes_term_ddp_errors { + DDP_CATASTROPHIC_LOCAL = 0x00, + DDP_TAGGED_INV_STAG = 0x00, + DDP_TAGGED_BOUNDS = 0x01, + DDP_TAGGED_UNASSOC_STAG = 0x02, + DDP_TAGGED_TO_WRAP = 0x03, + DDP_TAGGED_INV_DDP_VER = 0x04, + DDP_UNTAGGED_INV_QN = 0x01, + DDP_UNTAGGED_INV_MSN_NO_BUF = 0x02, + DDP_UNTAGGED_INV_MSN_RANGE = 0x03, + DDP_UNTAGGED_INV_MO = 0x04, + DDP_UNTAGGED_INV_TOO_LONG = 0x05, + DDP_UNTAGGED_INV_DDP_VER = 0x06 +}; + +enum nes_term_mpa_errors { + MPA_CLOSED = 0x01, + MPA_CRC = 0x02, + MPA_MARKER = 0x03, + MPA_REQ_RSP = 0x04, +}; + +struct nes_terminate_hdr { + u8 layer_etype; + u8 error_code; + u8 hdrct; + u8 rsvd; +}; + +/* Used to determine how to fill in terminate error codes */ +#define IWARP_OPCODE_WRITE 0 +#define IWARP_OPCODE_READREQ 1 +#define IWARP_OPCODE_READRSP 2 +#define IWARP_OPCODE_SEND 3 +#define IWARP_OPCODE_SEND_INV 4 +#define IWARP_OPCODE_SEND_SE 5 +#define IWARP_OPCODE_SEND_SE_INV 6 +#define IWARP_OPCODE_TERM 7 + +/* These values are used only during terminate processing */ +#define TERM_DDP_LEN_TAGGED 14 +#define TERM_DDP_LEN_UNTAGGED 18 +#define TERM_RDMA_LEN 28 +#define RDMA_OPCODE_MASK 0x0f +#define RDMA_READ_REQ_OPCODE 1 +#define BAD_FRAME_OFFSET 64 +#define CQE_MAJOR_DRV 0x8000 + +/* Used for link status recheck after interrupt processing */ +#define NES_LINK_RECHECK_DELAY msecs_to_jiffies(50) +#define NES_LINK_RECHECK_MAX 60 #endif /* __NES_HW_H */ diff --git a/drivers/infiniband/hw/nes/nes_mgt.c b/drivers/infiniband/hw/nes/nes_mgt.c new file mode 100644 index 00000000000..416645259b0 --- /dev/null +++ b/drivers/infiniband/hw/nes/nes_mgt.c @@ -0,0 +1,1160 @@ +/* + * Copyright (c) 2006 - 2011 Intel-NE, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/skbuff.h> +#include <linux/etherdevice.h> +#include <linux/kthread.h> +#include <linux/ip.h> +#include <linux/tcp.h> +#include <net/tcp.h> +#include "nes.h" +#include "nes_mgt.h" + +atomic_t pau_qps_created; +atomic_t pau_qps_destroyed; + +static void nes_replenish_mgt_rq(struct nes_vnic_mgt *mgtvnic) +{ + unsigned long flags; + dma_addr_t bus_address; + struct sk_buff *skb; + struct nes_hw_nic_rq_wqe *nic_rqe; + struct nes_hw_mgt *nesmgt; + struct nes_device *nesdev; + struct nes_rskb_cb *cb; + u32 rx_wqes_posted = 0; + + nesmgt = &mgtvnic->mgt; + nesdev = mgtvnic->nesvnic->nesdev; + spin_lock_irqsave(&nesmgt->rq_lock, flags); + if (nesmgt->replenishing_rq != 0) { + if (((nesmgt->rq_size - 1) == atomic_read(&mgtvnic->rx_skbs_needed)) && + (atomic_read(&mgtvnic->rx_skb_timer_running) == 0)) { + atomic_set(&mgtvnic->rx_skb_timer_running, 1); + spin_unlock_irqrestore(&nesmgt->rq_lock, flags); + mgtvnic->rq_wqes_timer.expires = jiffies + (HZ / 2); /* 1/2 second */ + add_timer(&mgtvnic->rq_wqes_timer); + } else { + spin_unlock_irqrestore(&nesmgt->rq_lock, flags); + } + return; + } + nesmgt->replenishing_rq = 1; + spin_unlock_irqrestore(&nesmgt->rq_lock, flags); + do { + skb = dev_alloc_skb(mgtvnic->nesvnic->max_frame_size); + if (skb) { + skb->dev = mgtvnic->nesvnic->netdev; + + bus_address = pci_map_single(nesdev->pcidev, + skb->data, mgtvnic->nesvnic->max_frame_size, PCI_DMA_FROMDEVICE); + cb = (struct nes_rskb_cb *)&skb->cb[0]; + cb->busaddr = bus_address; + cb->maplen = mgtvnic->nesvnic->max_frame_size; + + nic_rqe = &nesmgt->rq_vbase[mgtvnic->mgt.rq_head]; + nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = + cpu_to_le32(mgtvnic->nesvnic->max_frame_size); + nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0; + nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] = + cpu_to_le32((u32)bus_address); + nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] = + cpu_to_le32((u32)((u64)bus_address >> 32)); + nesmgt->rx_skb[nesmgt->rq_head] = skb; + nesmgt->rq_head++; + nesmgt->rq_head &= nesmgt->rq_size - 1; + atomic_dec(&mgtvnic->rx_skbs_needed); + barrier(); + if (++rx_wqes_posted == 255) { + nes_write32(nesdev->regs + NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesmgt->qp_id); + rx_wqes_posted = 0; + } + } else { + spin_lock_irqsave(&nesmgt->rq_lock, flags); + if (((nesmgt->rq_size - 1) == atomic_read(&mgtvnic->rx_skbs_needed)) && + (atomic_read(&mgtvnic->rx_skb_timer_running) == 0)) { + atomic_set(&mgtvnic->rx_skb_timer_running, 1); + spin_unlock_irqrestore(&nesmgt->rq_lock, flags); + mgtvnic->rq_wqes_timer.expires = jiffies + (HZ / 2); /* 1/2 second */ + add_timer(&mgtvnic->rq_wqes_timer); + } else { + spin_unlock_irqrestore(&nesmgt->rq_lock, flags); + } + break; + } + } while (atomic_read(&mgtvnic->rx_skbs_needed)); + barrier(); + if (rx_wqes_posted) + nes_write32(nesdev->regs + NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesmgt->qp_id); + nesmgt->replenishing_rq = 0; +} + +/** + * nes_mgt_rq_wqes_timeout + */ +static void nes_mgt_rq_wqes_timeout(unsigned long parm) +{ + struct nes_vnic_mgt *mgtvnic = (struct nes_vnic_mgt *)parm; + + atomic_set(&mgtvnic->rx_skb_timer_running, 0); + if (atomic_read(&mgtvnic->rx_skbs_needed)) + nes_replenish_mgt_rq(mgtvnic); +} + +/** + * nes_mgt_free_skb - unmap and free skb + */ +static void nes_mgt_free_skb(struct nes_device *nesdev, struct sk_buff *skb, u32 dir) +{ + struct nes_rskb_cb *cb; + + cb = (struct nes_rskb_cb *)&skb->cb[0]; + pci_unmap_single(nesdev->pcidev, cb->busaddr, cb->maplen, dir); + cb->busaddr = 0; + dev_kfree_skb_any(skb); +} + +/** + * nes_download_callback - handle download completions + */ +static void nes_download_callback(struct nes_device *nesdev, struct nes_cqp_request *cqp_request) +{ + struct pau_fpdu_info *fpdu_info = cqp_request->cqp_callback_pointer; + struct nes_qp *nesqp = fpdu_info->nesqp; + struct sk_buff *skb; + int i; + + for (i = 0; i < fpdu_info->frag_cnt; i++) { + skb = fpdu_info->frags[i].skb; + if (fpdu_info->frags[i].cmplt) { + nes_mgt_free_skb(nesdev, skb, PCI_DMA_TODEVICE); + nes_rem_ref_cm_node(nesqp->cm_node); + } + } + + if (fpdu_info->hdr_vbase) + pci_free_consistent(nesdev->pcidev, fpdu_info->hdr_len, + fpdu_info->hdr_vbase, fpdu_info->hdr_pbase); + kfree(fpdu_info); +} + +/** + * nes_get_seq - Get the seq, ack_seq and window from the packet + */ +static u32 nes_get_seq(struct sk_buff *skb, u32 *ack, u16 *wnd, u32 *fin_rcvd, u32 *rst_rcvd) +{ + struct nes_rskb_cb *cb = (struct nes_rskb_cb *)&skb->cb[0]; + struct iphdr *iph = (struct iphdr *)(cb->data_start + ETH_HLEN); + struct tcphdr *tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl)); + + *ack = be32_to_cpu(tcph->ack_seq); + *wnd = be16_to_cpu(tcph->window); + *fin_rcvd = tcph->fin; + *rst_rcvd = tcph->rst; + return be32_to_cpu(tcph->seq); +} + +/** + * nes_get_next_skb - Get the next skb based on where current skb is in the queue + */ +static struct sk_buff *nes_get_next_skb(struct nes_device *nesdev, struct nes_qp *nesqp, + struct sk_buff *skb, u32 nextseq, u32 *ack, + u16 *wnd, u32 *fin_rcvd, u32 *rst_rcvd) +{ + u32 seq; + bool processacks; + struct sk_buff *old_skb; + + if (skb) { + /* Continue processing fpdu */ + if (skb->next == (struct sk_buff *)&nesqp->pau_list) + goto out; + skb = skb->next; + processacks = false; + } else { + /* Starting a new one */ + if (skb_queue_empty(&nesqp->pau_list)) + goto out; + skb = skb_peek(&nesqp->pau_list); + processacks = true; + } + + while (1) { + if (skb_queue_empty(&nesqp->pau_list)) + goto out; + + seq = nes_get_seq(skb, ack, wnd, fin_rcvd, rst_rcvd); + if (seq == nextseq) { + if (skb->len || processacks) + break; + } else if (after(seq, nextseq)) { + goto out; + } + + old_skb = skb; + skb = skb->next; + skb_unlink(old_skb, &nesqp->pau_list); + nes_mgt_free_skb(nesdev, old_skb, PCI_DMA_TODEVICE); + nes_rem_ref_cm_node(nesqp->cm_node); + if (skb == (struct sk_buff *)&nesqp->pau_list) + goto out; + } + return skb; + +out: + return NULL; +} + +/** + * get_fpdu_info - Find the next complete fpdu and return its fragments. + */ +static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp, + struct pau_fpdu_info **pau_fpdu_info) +{ + struct sk_buff *skb; + struct iphdr *iph; + struct tcphdr *tcph; + struct nes_rskb_cb *cb; + struct pau_fpdu_info *fpdu_info = NULL; + struct pau_fpdu_frag frags[MAX_FPDU_FRAGS]; + u32 fpdu_len = 0; + u32 tmp_len; + int frag_cnt = 0; + u32 tot_len; + u32 frag_tot; + u32 ack; + u32 fin_rcvd; + u32 rst_rcvd; + u16 wnd; + int i; + int rc = 0; + + *pau_fpdu_info = NULL; + + skb = nes_get_next_skb(nesdev, nesqp, NULL, nesqp->pau_rcv_nxt, &ack, &wnd, &fin_rcvd, &rst_rcvd); + if (!skb) + goto out; + + cb = (struct nes_rskb_cb *)&skb->cb[0]; + if (skb->len) { + fpdu_len = be16_to_cpu(*(__be16 *) skb->data) + MPA_FRAMING; + fpdu_len = (fpdu_len + 3) & 0xfffffffc; + tmp_len = fpdu_len; + + /* See if we have all of the fpdu */ + frag_tot = 0; + memset(&frags, 0, sizeof frags); + for (i = 0; i < MAX_FPDU_FRAGS; i++) { + frags[i].physaddr = cb->busaddr; + frags[i].physaddr += skb->data - cb->data_start; + frags[i].frag_len = min(tmp_len, skb->len); + frags[i].skb = skb; + frags[i].cmplt = (skb->len == frags[i].frag_len); + frag_tot += frags[i].frag_len; + frag_cnt++; + + tmp_len -= frags[i].frag_len; + if (tmp_len == 0) + break; + + skb = nes_get_next_skb(nesdev, nesqp, skb, + nesqp->pau_rcv_nxt + frag_tot, &ack, &wnd, &fin_rcvd, &rst_rcvd); + if (!skb) + goto out; + if (rst_rcvd) { + /* rst received in the middle of fpdu */ + for (; i >= 0; i--) { + skb_unlink(frags[i].skb, &nesqp->pau_list); + nes_mgt_free_skb(nesdev, frags[i].skb, PCI_DMA_TODEVICE); + } + cb = (struct nes_rskb_cb *)&skb->cb[0]; + frags[0].physaddr = cb->busaddr; + frags[0].physaddr += skb->data - cb->data_start; + frags[0].frag_len = skb->len; + frags[0].skb = skb; + frags[0].cmplt = true; + frag_cnt = 1; + break; + } + + cb = (struct nes_rskb_cb *)&skb->cb[0]; + } + } else { + /* no data */ + frags[0].physaddr = cb->busaddr; + frags[0].frag_len = 0; + frags[0].skb = skb; + frags[0].cmplt = true; + frag_cnt = 1; + } + + /* Found one */ + fpdu_info = kzalloc(sizeof(*fpdu_info), GFP_ATOMIC); + if (fpdu_info == NULL) { + nes_debug(NES_DBG_PAU, "Failed to alloc a fpdu_info.\n"); + rc = -ENOMEM; + goto out; + } + + fpdu_info->cqp_request = nes_get_cqp_request(nesdev); + if (fpdu_info->cqp_request == NULL) { + nes_debug(NES_DBG_PAU, "Failed to get a cqp_request.\n"); + rc = -ENOMEM; + goto out; + } + + cb = (struct nes_rskb_cb *)&frags[0].skb->cb[0]; + iph = (struct iphdr *)(cb->data_start + ETH_HLEN); + tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl)); + fpdu_info->hdr_len = (((unsigned char *)tcph) + 4 * (tcph->doff)) - cb->data_start; + fpdu_info->data_len = fpdu_len; + tot_len = fpdu_info->hdr_len + fpdu_len - ETH_HLEN; + + if (frags[0].cmplt) { + fpdu_info->hdr_pbase = cb->busaddr; + fpdu_info->hdr_vbase = NULL; + } else { + fpdu_info->hdr_vbase = pci_alloc_consistent(nesdev->pcidev, + fpdu_info->hdr_len, &fpdu_info->hdr_pbase); + if (!fpdu_info->hdr_vbase) { + nes_debug(NES_DBG_PAU, "Unable to allocate memory for pau first frag\n"); + rc = -ENOMEM; + goto out; + } + + /* Copy hdrs, adjusting len and seqnum */ + memcpy(fpdu_info->hdr_vbase, cb->data_start, fpdu_info->hdr_len); + iph = (struct iphdr *)(fpdu_info->hdr_vbase + ETH_HLEN); + tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl)); + } + + iph->tot_len = cpu_to_be16(tot_len); + iph->saddr = cpu_to_be32(0x7f000001); + + tcph->seq = cpu_to_be32(nesqp->pau_rcv_nxt); + tcph->ack_seq = cpu_to_be32(ack); + tcph->window = cpu_to_be16(wnd); + + nesqp->pau_rcv_nxt += fpdu_len + fin_rcvd; + + memcpy(fpdu_info->frags, frags, sizeof(fpdu_info->frags)); + fpdu_info->frag_cnt = frag_cnt; + fpdu_info->nesqp = nesqp; + *pau_fpdu_info = fpdu_info; + + /* Update skb's for next pass */ + for (i = 0; i < frag_cnt; i++) { + cb = (struct nes_rskb_cb *)&frags[i].skb->cb[0]; + skb_pull(frags[i].skb, frags[i].frag_len); + + if (frags[i].skb->len == 0) { + /* Pull skb off the list - it will be freed in the callback */ + if (!skb_queue_empty(&nesqp->pau_list)) + skb_unlink(frags[i].skb, &nesqp->pau_list); + } else { + /* Last skb still has data so update the seq */ + iph = (struct iphdr *)(cb->data_start + ETH_HLEN); + tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl)); + tcph->seq = cpu_to_be32(nesqp->pau_rcv_nxt); + } + } + +out: + if (rc) { + if (fpdu_info) { + if (fpdu_info->cqp_request) + nes_put_cqp_request(nesdev, fpdu_info->cqp_request); + kfree(fpdu_info); + } + } + return rc; +} + +/** + * forward_fpdu - send complete fpdus, one at a time + */ +static int forward_fpdus(struct nes_vnic *nesvnic, struct nes_qp *nesqp) +{ + struct nes_device *nesdev = nesvnic->nesdev; + struct pau_fpdu_info *fpdu_info; + struct nes_hw_cqp_wqe *cqp_wqe; + struct nes_cqp_request *cqp_request; + unsigned long flags; + u64 u64tmp; + u32 u32tmp; + int rc; + + while (1) { + spin_lock_irqsave(&nesqp->pau_lock, flags); + rc = get_fpdu_info(nesdev, nesqp, &fpdu_info); + if (rc || (fpdu_info == NULL)) { + spin_unlock_irqrestore(&nesqp->pau_lock, flags); + return rc; + } + + cqp_request = fpdu_info->cqp_request; + cqp_wqe = &cqp_request->cqp_wqe; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_DL_OPCODE_IDX, + NES_CQP_DOWNLOAD_SEGMENT | + (((u32)nesvnic->logical_port) << NES_CQP_OP_LOGICAL_PORT_SHIFT)); + + u32tmp = fpdu_info->hdr_len << 16; + u32tmp |= fpdu_info->hdr_len + (u32)fpdu_info->data_len; + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_DL_LENGTH_0_TOTAL_IDX, + u32tmp); + + u32tmp = (fpdu_info->frags[1].frag_len << 16) | fpdu_info->frags[0].frag_len; + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_LENGTH_2_1_IDX, + u32tmp); + + u32tmp = (fpdu_info->frags[3].frag_len << 16) | fpdu_info->frags[2].frag_len; + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_LENGTH_4_3_IDX, + u32tmp); + + u64tmp = (u64)fpdu_info->hdr_pbase; + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX, + lower_32_bits(u64tmp)); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_HIGH_IDX, + upper_32_bits(u64tmp)); + + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_LOW_IDX, + lower_32_bits(fpdu_info->frags[0].physaddr)); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_HIGH_IDX, + upper_32_bits(fpdu_info->frags[0].physaddr)); + + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG2_LOW_IDX, + lower_32_bits(fpdu_info->frags[1].physaddr)); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG2_HIGH_IDX, + upper_32_bits(fpdu_info->frags[1].physaddr)); + + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG3_LOW_IDX, + lower_32_bits(fpdu_info->frags[2].physaddr)); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG3_HIGH_IDX, + upper_32_bits(fpdu_info->frags[2].physaddr)); + + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG4_LOW_IDX, + lower_32_bits(fpdu_info->frags[3].physaddr)); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG4_HIGH_IDX, + upper_32_bits(fpdu_info->frags[3].physaddr)); + + cqp_request->cqp_callback_pointer = fpdu_info; + cqp_request->callback = 1; + cqp_request->cqp_callback = nes_download_callback; + + atomic_set(&cqp_request->refcount, 1); + nes_post_cqp_request(nesdev, cqp_request); + spin_unlock_irqrestore(&nesqp->pau_lock, flags); + } + + return 0; +} + +static void process_fpdus(struct nes_vnic *nesvnic, struct nes_qp *nesqp) +{ + int again = 1; + unsigned long flags; + + do { + /* Ignore rc - if it failed, tcp retries will cause it to try again */ + forward_fpdus(nesvnic, nesqp); + + spin_lock_irqsave(&nesqp->pau_lock, flags); + if (nesqp->pau_pending) { + nesqp->pau_pending = 0; + } else { + nesqp->pau_busy = 0; + again = 0; + } + + spin_unlock_irqrestore(&nesqp->pau_lock, flags); + } while (again); +} + +/** + * queue_fpdus - Handle fpdu's that hw passed up to sw + */ +static void queue_fpdus(struct sk_buff *skb, struct nes_vnic *nesvnic, struct nes_qp *nesqp) +{ + struct sk_buff *tmpskb; + struct nes_rskb_cb *cb; + struct iphdr *iph; + struct tcphdr *tcph; + unsigned char *tcph_end; + u32 rcv_nxt; + u32 rcv_wnd; + u32 seqnum; + u32 len; + bool process_it = false; + unsigned long flags; + + /* Move data ptr to after tcp header */ + iph = (struct iphdr *)skb->data; + tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl)); + seqnum = be32_to_cpu(tcph->seq); + tcph_end = (((char *)tcph) + (4 * tcph->doff)); + + len = be16_to_cpu(iph->tot_len); + if (skb->len > len) + skb_trim(skb, len); + skb_pull(skb, tcph_end - skb->data); + + /* Initialize tracking values */ + cb = (struct nes_rskb_cb *)&skb->cb[0]; + cb->seqnum = seqnum; + + /* Make sure data is in the receive window */ + rcv_nxt = nesqp->pau_rcv_nxt; + rcv_wnd = le32_to_cpu(nesqp->nesqp_context->rcv_wnd); + if (!between(seqnum, rcv_nxt, (rcv_nxt + rcv_wnd))) { + nes_mgt_free_skb(nesvnic->nesdev, skb, PCI_DMA_TODEVICE); + nes_rem_ref_cm_node(nesqp->cm_node); + return; + } + + spin_lock_irqsave(&nesqp->pau_lock, flags); + + if (nesqp->pau_busy) + nesqp->pau_pending = 1; + else + nesqp->pau_busy = 1; + + /* Queue skb by sequence number */ + if (skb_queue_len(&nesqp->pau_list) == 0) { + skb_queue_head(&nesqp->pau_list, skb); + } else { + tmpskb = nesqp->pau_list.next; + while (tmpskb != (struct sk_buff *)&nesqp->pau_list) { + cb = (struct nes_rskb_cb *)&tmpskb->cb[0]; + if (before(seqnum, cb->seqnum)) + break; + tmpskb = tmpskb->next; + } + skb_insert(tmpskb, skb, &nesqp->pau_list); + } + if (nesqp->pau_state == PAU_READY) + process_it = true; + spin_unlock_irqrestore(&nesqp->pau_lock, flags); + + if (process_it) + process_fpdus(nesvnic, nesqp); + + return; +} + +/** + * mgt_thread - Handle mgt skbs in a safe context + */ +static int mgt_thread(void *context) +{ + struct nes_vnic *nesvnic = context; + struct sk_buff *skb; + struct nes_rskb_cb *cb; + + while (!kthread_should_stop()) { + wait_event_interruptible(nesvnic->mgt_wait_queue, + skb_queue_len(&nesvnic->mgt_skb_list) || kthread_should_stop()); + while ((skb_queue_len(&nesvnic->mgt_skb_list)) && !kthread_should_stop()) { + skb = skb_dequeue(&nesvnic->mgt_skb_list); + cb = (struct nes_rskb_cb *)&skb->cb[0]; + cb->data_start = skb->data - ETH_HLEN; + cb->busaddr = pci_map_single(nesvnic->nesdev->pcidev, cb->data_start, + nesvnic->max_frame_size, PCI_DMA_TODEVICE); + queue_fpdus(skb, nesvnic, cb->nesqp); + } + } + + /* Closing down so delete any entries on the queue */ + while (skb_queue_len(&nesvnic->mgt_skb_list)) { + skb = skb_dequeue(&nesvnic->mgt_skb_list); + cb = (struct nes_rskb_cb *)&skb->cb[0]; + nes_rem_ref_cm_node(cb->nesqp->cm_node); + dev_kfree_skb_any(skb); + } + return 0; +} + +/** + * nes_queue_skbs - Queue skb so it can be handled in a thread context + */ +void nes_queue_mgt_skbs(struct sk_buff *skb, struct nes_vnic *nesvnic, struct nes_qp *nesqp) +{ + struct nes_rskb_cb *cb; + + cb = (struct nes_rskb_cb *)&skb->cb[0]; + cb->nesqp = nesqp; + skb_queue_tail(&nesvnic->mgt_skb_list, skb); + wake_up_interruptible(&nesvnic->mgt_wait_queue); +} + +void nes_destroy_pau_qp(struct nes_device *nesdev, struct nes_qp *nesqp) +{ + struct sk_buff *skb; + unsigned long flags; + atomic_inc(&pau_qps_destroyed); + + /* Free packets that have not yet been forwarded */ + /* Lock is acquired by skb_dequeue when removing the skb */ + spin_lock_irqsave(&nesqp->pau_lock, flags); + while (skb_queue_len(&nesqp->pau_list)) { + skb = skb_dequeue(&nesqp->pau_list); + nes_mgt_free_skb(nesdev, skb, PCI_DMA_TODEVICE); + nes_rem_ref_cm_node(nesqp->cm_node); + } + spin_unlock_irqrestore(&nesqp->pau_lock, flags); +} + +static void nes_chg_qh_handler(struct nes_device *nesdev, struct nes_cqp_request *cqp_request) +{ + struct pau_qh_chg *qh_chg = cqp_request->cqp_callback_pointer; + struct nes_cqp_request *new_request; + struct nes_hw_cqp_wqe *cqp_wqe; + struct nes_adapter *nesadapter; + struct nes_qp *nesqp; + struct nes_v4_quad nes_quad; + u32 crc_value; + u64 u64temp; + + nesadapter = nesdev->nesadapter; + nesqp = qh_chg->nesqp; + + /* Should we handle the bad completion */ + if (cqp_request->major_code) + WARN(1, PFX "Invalid cqp_request major_code=0x%x\n", + cqp_request->major_code); + + switch (nesqp->pau_state) { + case PAU_DEL_QH: + /* Old hash code deleted, now set the new one */ + nesqp->pau_state = PAU_ADD_LB_QH; + new_request = nes_get_cqp_request(nesdev); + if (new_request == NULL) { + nes_debug(NES_DBG_PAU, "Failed to get a new_request.\n"); + WARN_ON(1); + return; + } + + memset(&nes_quad, 0, sizeof(nes_quad)); + nes_quad.DstIpAdrIndex = + cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); + nes_quad.SrcIpadr = cpu_to_be32(0x7f000001); + nes_quad.TcpPorts[0] = swab16(nesqp->nesqp_context->tcpPorts[1]); + nes_quad.TcpPorts[1] = swab16(nesqp->nesqp_context->tcpPorts[0]); + + /* Produce hash key */ + crc_value = get_crc_value(&nes_quad); + nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff); + nes_debug(NES_DBG_PAU, "new HTE Index = 0x%08X, CRC = 0x%08X\n", + nesqp->hte_index, nesqp->hte_index & nesadapter->hte_index_mask); + + nesqp->hte_index &= nesadapter->hte_index_mask; + nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index); + nesqp->nesqp_context->ip0 = cpu_to_le32(0x7f000001); + nesqp->nesqp_context->rcv_nxt = cpu_to_le32(nesqp->pau_rcv_nxt); + + cqp_wqe = &new_request->cqp_wqe; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, + NES_CQP_WQE_OPCODE_IDX, NES_CQP_MANAGE_QUAD_HASH | + NES_CQP_QP_TYPE_IWARP | NES_CQP_QP_CONTEXT_VALID | NES_CQP_QP_IWARP_STATE_RTS); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id); + u64temp = (u64)nesqp->nesqp_context_pbase; + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp); + + nes_debug(NES_DBG_PAU, "Waiting for CQP completion for adding the quad hash.\n"); + + new_request->cqp_callback_pointer = qh_chg; + new_request->callback = 1; + new_request->cqp_callback = nes_chg_qh_handler; + atomic_set(&new_request->refcount, 1); + nes_post_cqp_request(nesdev, new_request); + break; + + case PAU_ADD_LB_QH: + /* Start processing the queued fpdu's */ + nesqp->pau_state = PAU_READY; + process_fpdus(qh_chg->nesvnic, qh_chg->nesqp); + kfree(qh_chg); + break; + } +} + +/** + * nes_change_quad_hash + */ +static int nes_change_quad_hash(struct nes_device *nesdev, + struct nes_vnic *nesvnic, struct nes_qp *nesqp) +{ + struct nes_cqp_request *cqp_request = NULL; + struct pau_qh_chg *qh_chg = NULL; + u64 u64temp; + struct nes_hw_cqp_wqe *cqp_wqe; + int ret = 0; + + cqp_request = nes_get_cqp_request(nesdev); + if (cqp_request == NULL) { + nes_debug(NES_DBG_PAU, "Failed to get a cqp_request.\n"); + ret = -ENOMEM; + goto chg_qh_err; + } + + qh_chg = kmalloc(sizeof *qh_chg, GFP_ATOMIC); + if (qh_chg == NULL) { + nes_debug(NES_DBG_PAU, "Failed to get a cqp_request.\n"); + ret = -ENOMEM; + goto chg_qh_err; + } + qh_chg->nesdev = nesdev; + qh_chg->nesvnic = nesvnic; + qh_chg->nesqp = nesqp; + nesqp->pau_state = PAU_DEL_QH; + + cqp_wqe = &cqp_request->cqp_wqe; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, + NES_CQP_WQE_OPCODE_IDX, NES_CQP_MANAGE_QUAD_HASH | NES_CQP_QP_DEL_HTE | + NES_CQP_QP_TYPE_IWARP | NES_CQP_QP_CONTEXT_VALID | NES_CQP_QP_IWARP_STATE_RTS); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id); + u64temp = (u64)nesqp->nesqp_context_pbase; + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp); + + nes_debug(NES_DBG_PAU, "Waiting for CQP completion for deleting the quad hash.\n"); + + cqp_request->cqp_callback_pointer = qh_chg; + cqp_request->callback = 1; + cqp_request->cqp_callback = nes_chg_qh_handler; + atomic_set(&cqp_request->refcount, 1); + nes_post_cqp_request(nesdev, cqp_request); + + return ret; + +chg_qh_err: + kfree(qh_chg); + if (cqp_request) + nes_put_cqp_request(nesdev, cqp_request); + return ret; +} + +/** + * nes_mgt_ce_handler + * This management code deals with any packed and unaligned (pau) fpdu's + * that the hardware cannot handle. + */ +static void nes_mgt_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) +{ + struct nes_vnic_mgt *mgtvnic = container_of(cq, struct nes_vnic_mgt, mgt_cq); + struct nes_adapter *nesadapter = nesdev->nesadapter; + u32 head; + u32 cq_size; + u32 cqe_count = 0; + u32 cqe_misc; + u32 qp_id = 0; + u32 skbs_needed; + unsigned long context; + struct nes_qp *nesqp; + struct sk_buff *rx_skb; + struct nes_rskb_cb *cb; + + head = cq->cq_head; + cq_size = cq->cq_size; + + while (1) { + cqe_misc = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX]); + if (!(cqe_misc & NES_NIC_CQE_VALID)) + break; + + nesqp = NULL; + if (cqe_misc & NES_NIC_CQE_ACCQP_VALID) { + qp_id = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_ACCQP_ID_IDX]); + qp_id &= 0x001fffff; + if (qp_id < nesadapter->max_qp) { + context = (unsigned long)nesadapter->qp_table[qp_id - NES_FIRST_QPN]; + nesqp = (struct nes_qp *)context; + } + } + + if (nesqp) { + if (nesqp->pau_mode == false) { + nesqp->pau_mode = true; /* First time for this qp */ + nesqp->pau_rcv_nxt = le32_to_cpu( + cq->cq_vbase[head].cqe_words[NES_NIC_CQE_HASH_RCVNXT]); + skb_queue_head_init(&nesqp->pau_list); + spin_lock_init(&nesqp->pau_lock); + atomic_inc(&pau_qps_created); + nes_change_quad_hash(nesdev, mgtvnic->nesvnic, nesqp); + } + + rx_skb = mgtvnic->mgt.rx_skb[mgtvnic->mgt.rq_tail]; + rx_skb->len = 0; + skb_put(rx_skb, cqe_misc & 0x0000ffff); + rx_skb->protocol = eth_type_trans(rx_skb, mgtvnic->nesvnic->netdev); + cb = (struct nes_rskb_cb *)&rx_skb->cb[0]; + pci_unmap_single(nesdev->pcidev, cb->busaddr, cb->maplen, PCI_DMA_FROMDEVICE); + cb->busaddr = 0; + mgtvnic->mgt.rq_tail++; + mgtvnic->mgt.rq_tail &= mgtvnic->mgt.rq_size - 1; + + nes_add_ref_cm_node(nesqp->cm_node); + nes_queue_mgt_skbs(rx_skb, mgtvnic->nesvnic, nesqp); + } else { + printk(KERN_ERR PFX "Invalid QP %d for packed/unaligned handling\n", qp_id); + } + + cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX] = 0; + cqe_count++; + if (++head >= cq_size) + head = 0; + + if (cqe_count == 255) { + /* Replenish mgt CQ */ + nes_write32(nesdev->regs + NES_CQE_ALLOC, cq->cq_number | (cqe_count << 16)); + nesdev->currcq_count += cqe_count; + cqe_count = 0; + } + + skbs_needed = atomic_inc_return(&mgtvnic->rx_skbs_needed); + if (skbs_needed > (mgtvnic->mgt.rq_size >> 1)) + nes_replenish_mgt_rq(mgtvnic); + } + + cq->cq_head = head; + nes_write32(nesdev->regs + NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT | + cq->cq_number | (cqe_count << 16)); + nes_read32(nesdev->regs + NES_CQE_ALLOC); + nesdev->currcq_count += cqe_count; +} + +/** + * nes_init_mgt_qp + */ +int nes_init_mgt_qp(struct nes_device *nesdev, struct net_device *netdev, struct nes_vnic *nesvnic) +{ + struct nes_vnic_mgt *mgtvnic; + u32 counter; + void *vmem; + dma_addr_t pmem; + struct nes_hw_cqp_wqe *cqp_wqe; + u32 cqp_head; + unsigned long flags; + struct nes_hw_nic_qp_context *mgt_context; + u64 u64temp; + struct nes_hw_nic_rq_wqe *mgt_rqe; + struct sk_buff *skb; + u32 wqe_count; + struct nes_rskb_cb *cb; + u32 mgt_mem_size; + void *mgt_vbase; + dma_addr_t mgt_pbase; + int i; + int ret; + + /* Allocate space the all mgt QPs once */ + mgtvnic = kzalloc(NES_MGT_QP_COUNT * sizeof(struct nes_vnic_mgt), GFP_KERNEL); + if (mgtvnic == NULL) { + nes_debug(NES_DBG_INIT, "Unable to allocate memory for mgt structure\n"); + return -ENOMEM; + } + + /* Allocate fragment, RQ, and CQ; Reuse CEQ based on the PCI function */ + /* We are not sending from this NIC so sq is not allocated */ + mgt_mem_size = 256 + + (NES_MGT_WQ_COUNT * sizeof(struct nes_hw_nic_rq_wqe)) + + (NES_MGT_WQ_COUNT * sizeof(struct nes_hw_nic_cqe)) + + sizeof(struct nes_hw_nic_qp_context); + mgt_mem_size = (mgt_mem_size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1); + mgt_vbase = pci_alloc_consistent(nesdev->pcidev, NES_MGT_QP_COUNT * mgt_mem_size, &mgt_pbase); + if (!mgt_vbase) { + kfree(mgtvnic); + nes_debug(NES_DBG_INIT, "Unable to allocate memory for mgt host descriptor rings\n"); + return -ENOMEM; + } + + nesvnic->mgt_mem_size = NES_MGT_QP_COUNT * mgt_mem_size; + nesvnic->mgt_vbase = mgt_vbase; + nesvnic->mgt_pbase = mgt_pbase; + + skb_queue_head_init(&nesvnic->mgt_skb_list); + init_waitqueue_head(&nesvnic->mgt_wait_queue); + nesvnic->mgt_thread = kthread_run(mgt_thread, nesvnic, "nes_mgt_thread"); + + for (i = 0; i < NES_MGT_QP_COUNT; i++) { + mgtvnic->nesvnic = nesvnic; + mgtvnic->mgt.qp_id = nesdev->mac_index + NES_MGT_QP_OFFSET + i; + memset(mgt_vbase, 0, mgt_mem_size); + nes_debug(NES_DBG_INIT, "Allocated mgt QP structures at %p (phys = %016lX), size = %u.\n", + mgt_vbase, (unsigned long)mgt_pbase, mgt_mem_size); + + vmem = (void *)(((unsigned long)mgt_vbase + (256 - 1)) & + ~(unsigned long)(256 - 1)); + pmem = (dma_addr_t)(((unsigned long long)mgt_pbase + (256 - 1)) & + ~(unsigned long long)(256 - 1)); + + spin_lock_init(&mgtvnic->mgt.rq_lock); + + /* setup the RQ */ + mgtvnic->mgt.rq_vbase = vmem; + mgtvnic->mgt.rq_pbase = pmem; + mgtvnic->mgt.rq_head = 0; + mgtvnic->mgt.rq_tail = 0; + mgtvnic->mgt.rq_size = NES_MGT_WQ_COUNT; + + /* setup the CQ */ + vmem += (NES_MGT_WQ_COUNT * sizeof(struct nes_hw_nic_rq_wqe)); + pmem += (NES_MGT_WQ_COUNT * sizeof(struct nes_hw_nic_rq_wqe)); + + mgtvnic->mgt_cq.cq_number = mgtvnic->mgt.qp_id; + mgtvnic->mgt_cq.cq_vbase = vmem; + mgtvnic->mgt_cq.cq_pbase = pmem; + mgtvnic->mgt_cq.cq_head = 0; + mgtvnic->mgt_cq.cq_size = NES_MGT_WQ_COUNT; + + mgtvnic->mgt_cq.ce_handler = nes_mgt_ce_handler; + + /* Send CreateCQ request to CQP */ + spin_lock_irqsave(&nesdev->cqp.lock, flags); + cqp_head = nesdev->cqp.sq_head; + + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32( + NES_CQP_CREATE_CQ | NES_CQP_CQ_CEQ_VALID | + ((u32)mgtvnic->mgt_cq.cq_size << 16)); + cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32( + mgtvnic->mgt_cq.cq_number | ((u32)nesdev->ceq_index << 16)); + u64temp = (u64)mgtvnic->mgt_cq.cq_pbase; + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp); + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = 0; + u64temp = (unsigned long)&mgtvnic->mgt_cq; + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX] = cpu_to_le32((u32)(u64temp >> 1)); + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = + cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF); + cqp_wqe->wqe_words[NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX] = 0; + + if (++cqp_head >= nesdev->cqp.sq_size) + cqp_head = 0; + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + + /* Send CreateQP request to CQP */ + mgt_context = (void *)(&mgtvnic->mgt_cq.cq_vbase[mgtvnic->mgt_cq.cq_size]); + mgt_context->context_words[NES_NIC_CTX_MISC_IDX] = + cpu_to_le32((u32)NES_MGT_CTX_SIZE | + ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 12)); + nes_debug(NES_DBG_INIT, "RX_WINDOW_BUFFER_PAGE_TABLE_SIZE = 0x%08X, RX_WINDOW_BUFFER_SIZE = 0x%08X\n", + nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_PAGE_TABLE_SIZE), + nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_SIZE)); + if (nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_SIZE) != 0) + mgt_context->context_words[NES_NIC_CTX_MISC_IDX] |= cpu_to_le32(NES_NIC_BACK_STORE); + + u64temp = (u64)mgtvnic->mgt.rq_pbase; + mgt_context->context_words[NES_NIC_CTX_SQ_LOW_IDX] = cpu_to_le32((u32)u64temp); + mgt_context->context_words[NES_NIC_CTX_SQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32)); + u64temp = (u64)mgtvnic->mgt.rq_pbase; + mgt_context->context_words[NES_NIC_CTX_RQ_LOW_IDX] = cpu_to_le32((u32)u64temp); + mgt_context->context_words[NES_NIC_CTX_RQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32)); + + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_CREATE_QP | + NES_CQP_QP_TYPE_NIC); + cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(mgtvnic->mgt.qp_id); + u64temp = (u64)mgtvnic->mgt_cq.cq_pbase + + (mgtvnic->mgt_cq.cq_size * sizeof(struct nes_hw_nic_cqe)); + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp); + + if (++cqp_head >= nesdev->cqp.sq_size) + cqp_head = 0; + nesdev->cqp.sq_head = cqp_head; + + barrier(); + + /* Ring doorbell (2 WQEs) */ + nes_write32(nesdev->regs + NES_WQE_ALLOC, 0x02800000 | nesdev->cqp.qp_id); + + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + nes_debug(NES_DBG_INIT, "Waiting for create MGT QP%u to complete.\n", + mgtvnic->mgt.qp_id); + + ret = wait_event_timeout(nesdev->cqp.waitq, (nesdev->cqp.sq_tail == cqp_head), + NES_EVENT_TIMEOUT); + nes_debug(NES_DBG_INIT, "Create MGT QP%u completed, wait_event_timeout ret = %u.\n", + mgtvnic->mgt.qp_id, ret); + if (!ret) { + nes_debug(NES_DBG_INIT, "MGT QP%u create timeout expired\n", mgtvnic->mgt.qp_id); + if (i == 0) { + pci_free_consistent(nesdev->pcidev, nesvnic->mgt_mem_size, nesvnic->mgt_vbase, + nesvnic->mgt_pbase); + kfree(mgtvnic); + } else { + nes_destroy_mgt(nesvnic); + } + return -EIO; + } + + /* Populate the RQ */ + for (counter = 0; counter < (NES_MGT_WQ_COUNT - 1); counter++) { + skb = dev_alloc_skb(nesvnic->max_frame_size); + if (!skb) { + nes_debug(NES_DBG_INIT, "%s: out of memory for receive skb\n", netdev->name); + return -ENOMEM; + } + + skb->dev = netdev; + + pmem = pci_map_single(nesdev->pcidev, skb->data, + nesvnic->max_frame_size, PCI_DMA_FROMDEVICE); + cb = (struct nes_rskb_cb *)&skb->cb[0]; + cb->busaddr = pmem; + cb->maplen = nesvnic->max_frame_size; + + mgt_rqe = &mgtvnic->mgt.rq_vbase[counter]; + mgt_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = cpu_to_le32((u32)nesvnic->max_frame_size); + mgt_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0; + mgt_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] = cpu_to_le32((u32)pmem); + mgt_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] = cpu_to_le32((u32)((u64)pmem >> 32)); + mgtvnic->mgt.rx_skb[counter] = skb; + } + + init_timer(&mgtvnic->rq_wqes_timer); + mgtvnic->rq_wqes_timer.function = nes_mgt_rq_wqes_timeout; + mgtvnic->rq_wqes_timer.data = (unsigned long)mgtvnic; + + wqe_count = NES_MGT_WQ_COUNT - 1; + mgtvnic->mgt.rq_head = wqe_count; + barrier(); + do { + counter = min(wqe_count, ((u32)255)); + wqe_count -= counter; + nes_write32(nesdev->regs + NES_WQE_ALLOC, (counter << 24) | mgtvnic->mgt.qp_id); + } while (wqe_count); + + nes_write32(nesdev->regs + NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT | + mgtvnic->mgt_cq.cq_number); + nes_read32(nesdev->regs + NES_CQE_ALLOC); + + mgt_vbase += mgt_mem_size; + mgt_pbase += mgt_mem_size; + nesvnic->mgtvnic[i] = mgtvnic++; + } + return 0; +} + + +void nes_destroy_mgt(struct nes_vnic *nesvnic) +{ + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_vnic_mgt *mgtvnic; + struct nes_vnic_mgt *first_mgtvnic; + unsigned long flags; + struct nes_hw_cqp_wqe *cqp_wqe; + u32 cqp_head; + struct sk_buff *rx_skb; + int i; + int ret; + + kthread_stop(nesvnic->mgt_thread); + + /* Free remaining NIC receive buffers */ + first_mgtvnic = nesvnic->mgtvnic[0]; + for (i = 0; i < NES_MGT_QP_COUNT; i++) { + mgtvnic = nesvnic->mgtvnic[i]; + if (mgtvnic == NULL) + continue; + + while (mgtvnic->mgt.rq_head != mgtvnic->mgt.rq_tail) { + rx_skb = mgtvnic->mgt.rx_skb[mgtvnic->mgt.rq_tail]; + nes_mgt_free_skb(nesdev, rx_skb, PCI_DMA_FROMDEVICE); + mgtvnic->mgt.rq_tail++; + mgtvnic->mgt.rq_tail &= (mgtvnic->mgt.rq_size - 1); + } + + spin_lock_irqsave(&nesdev->cqp.lock, flags); + + /* Destroy NIC QP */ + cqp_head = nesdev->cqp.sq_head; + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, + (NES_CQP_DESTROY_QP | NES_CQP_QP_TYPE_NIC)); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, + mgtvnic->mgt.qp_id); + + if (++cqp_head >= nesdev->cqp.sq_size) + cqp_head = 0; + + cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; + + /* Destroy NIC CQ */ + nes_fill_init_cqp_wqe(cqp_wqe, nesdev); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, + (NES_CQP_DESTROY_CQ | ((u32)mgtvnic->mgt_cq.cq_size << 16))); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, + (mgtvnic->mgt_cq.cq_number | ((u32)nesdev->ceq_index << 16))); + + if (++cqp_head >= nesdev->cqp.sq_size) + cqp_head = 0; + + nesdev->cqp.sq_head = cqp_head; + barrier(); + + /* Ring doorbell (2 WQEs) */ + nes_write32(nesdev->regs + NES_WQE_ALLOC, 0x02800000 | nesdev->cqp.qp_id); + + spin_unlock_irqrestore(&nesdev->cqp.lock, flags); + nes_debug(NES_DBG_SHUTDOWN, "Waiting for CQP, cqp_head=%u, cqp.sq_head=%u," + " cqp.sq_tail=%u, cqp.sq_size=%u\n", + cqp_head, nesdev->cqp.sq_head, + nesdev->cqp.sq_tail, nesdev->cqp.sq_size); + + ret = wait_event_timeout(nesdev->cqp.waitq, (nesdev->cqp.sq_tail == cqp_head), + NES_EVENT_TIMEOUT); + + nes_debug(NES_DBG_SHUTDOWN, "Destroy MGT QP returned, wait_event_timeout ret = %u, cqp_head=%u," + " cqp.sq_head=%u, cqp.sq_tail=%u\n", + ret, cqp_head, nesdev->cqp.sq_head, nesdev->cqp.sq_tail); + if (!ret) + nes_debug(NES_DBG_SHUTDOWN, "MGT QP%u destroy timeout expired\n", + mgtvnic->mgt.qp_id); + + nesvnic->mgtvnic[i] = NULL; + } + + if (nesvnic->mgt_vbase) { + pci_free_consistent(nesdev->pcidev, nesvnic->mgt_mem_size, nesvnic->mgt_vbase, + nesvnic->mgt_pbase); + nesvnic->mgt_vbase = NULL; + nesvnic->mgt_pbase = 0; + } + + kfree(first_mgtvnic); +} diff --git a/drivers/infiniband/hw/nes/nes_mgt.h b/drivers/infiniband/hw/nes/nes_mgt.h new file mode 100644 index 00000000000..4f7f701c4a8 --- /dev/null +++ b/drivers/infiniband/hw/nes/nes_mgt.h @@ -0,0 +1,97 @@ +/* +* Copyright (c) 2006 - 2011 Intel-NE, Inc. All rights reserved. +* +* This software is available to you under a choice of one of two +* licenses. You may choose to be licensed under the terms of the GNU +* General Public License (GPL) Version 2, available from the file +* COPYING in the main directory of this source tree, or the +* OpenIB.org BSD license below: +* +* Redistribution and use in source and binary forms, with or +* without modification, are permitted provided that the following +* conditions are met: +* +* - Redistributions of source code must retain the above +* copyright notice, this list of conditions and the following +* disclaimer. +* +* - Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following +* disclaimer in the documentation and/or other materials +* provided with the distribution. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +*/ + +#ifndef __NES_MGT_H +#define __NES_MGT_H + +#define MPA_FRAMING 6 /* length is 2 bytes, crc is 4 bytes */ + +int nes_init_mgt_qp(struct nes_device *nesdev, struct net_device *netdev, struct nes_vnic *nesvnic); +void nes_queue_mgt_skbs(struct sk_buff *skb, struct nes_vnic *nesvnic, struct nes_qp *nesqp); +void nes_destroy_mgt(struct nes_vnic *nesvnic); +void nes_destroy_pau_qp(struct nes_device *nesdev, struct nes_qp *nesqp); + +struct nes_hw_mgt { + struct nes_hw_nic_rq_wqe *rq_vbase; /* virtual address of rq */ + dma_addr_t rq_pbase; /* PCI memory for host rings */ + struct sk_buff *rx_skb[NES_NIC_WQ_SIZE]; + u16 qp_id; + u16 sq_head; + u16 rq_head; + u16 rq_tail; + u16 rq_size; + u8 replenishing_rq; + u8 reserved; + spinlock_t rq_lock; +}; + +struct nes_vnic_mgt { + struct nes_vnic *nesvnic; + struct nes_hw_mgt mgt; + struct nes_hw_nic_cq mgt_cq; + atomic_t rx_skbs_needed; + struct timer_list rq_wqes_timer; + atomic_t rx_skb_timer_running; +}; + +#define MAX_FPDU_FRAGS 4 +struct pau_fpdu_frag { + struct sk_buff *skb; + u64 physaddr; + u32 frag_len; + bool cmplt; +}; + +struct pau_fpdu_info { + struct nes_qp *nesqp; + struct nes_cqp_request *cqp_request; + void *hdr_vbase; + dma_addr_t hdr_pbase; + int hdr_len; + u16 data_len; + u16 frag_cnt; + struct pau_fpdu_frag frags[MAX_FPDU_FRAGS]; +}; + +enum pau_qh_state { + PAU_DEL_QH, + PAU_ADD_LB_QH, + PAU_READY +}; + +struct pau_qh_chg { + struct nes_device *nesdev; + struct nes_vnic *nesvnic; + struct nes_qp *nesqp; +}; + +#endif /* __NES_MGT_H */ diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 1b0938c8777..49eb5111d2c 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -40,6 +40,7 @@ #include <linux/if_arp.h> #include <linux/if_vlan.h> #include <linux/ethtool.h> +#include <linux/slab.h> #include <net/tcp.h> #include <net/inet_common.h> @@ -91,6 +92,7 @@ static struct nic_qp_map *nic_qp_mapping_per_function[] = { static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN; static int debug = -1; +static int nics_per_function = 1; /** * nes_netdev_poll @@ -98,7 +100,6 @@ static int debug = -1; static int nes_netdev_poll(struct napi_struct *napi, int budget) { struct nes_vnic *nesvnic = container_of(napi, struct nes_vnic, napi); - struct net_device *netdev = nesvnic->netdev; struct nes_device *nesdev = nesvnic->nesdev; struct nes_hw_nic_cq *nescq = &nesvnic->nic_cq; @@ -111,7 +112,7 @@ static int nes_netdev_poll(struct napi_struct *napi, int budget) nes_nic_ce_handler(nesdev, nescq); if (nescq->cqes_pending == 0) { - netif_rx_complete(netdev, napi); + napi_complete(napi); /* clear out completed cqes and arm */ nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT | nescq->cq_number | (nescq->cqe_allocs_pending << 16)); @@ -143,6 +144,7 @@ static int nes_netdev_open(struct net_device *netdev) u32 nic_active_bit; u32 nic_active; struct list_head *list_pos, *list_temp; + unsigned long flags; assert(nesdev != NULL); @@ -201,7 +203,8 @@ static int nes_netdev_open(struct net_device *netdev) nes_debug(NES_DBG_NETDEV, "i=%d, perfect filter table index= %d, PERF FILTER LOW" " (Addr:%08X) = %08X, HIGH = %08X.\n", i, nesvnic->qp_nic_index[i], - NES_IDX_PERFECT_FILTER_LOW+((nesvnic->perfect_filter_index + i) * 8), + NES_IDX_PERFECT_FILTER_LOW+ + (nesvnic->qp_nic_index[i] * 8), macaddr_low, (u32)macaddr_high | NES_MAC_ADDR_VALID | ((((u32)nesvnic->nic_index) << 16))); @@ -230,12 +233,36 @@ static int nes_netdev_open(struct net_device *netdev) NES_MAC_INT_TX_UNDERFLOW | NES_MAC_INT_TX_ERROR)); first_nesvnic = nesvnic; } + if (first_nesvnic->linkup) { /* Enable network packets */ nesvnic->linkup = 1; netif_start_queue(netdev); netif_carrier_on(netdev); } + + spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags); + if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_SFP_D) { + nesdev->link_recheck = 1; + mod_delayed_work(system_wq, &nesdev->work, + NES_LINK_RECHECK_DELAY); + } + spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags); + + spin_lock_irqsave(&nesvnic->port_ibevent_lock, flags); + if (nesvnic->of_device_registered) { + nesdev->nesadapter->send_term_ok = 1; + if (nesvnic->linkup == 1) { + if (nesdev->iw_status == 0) { + nesdev->iw_status = 1; + nes_port_ibevent(nesvnic); + } + } else { + nesdev->iw_status = 0; + } + } + spin_unlock_irqrestore(&nesvnic->port_ibevent_lock, flags); + napi_enable(&nesvnic->napi); nesvnic->netdev_open = 1; @@ -254,6 +281,7 @@ static int nes_netdev_stop(struct net_device *netdev) u32 nic_active; struct nes_vnic *first_nesvnic = NULL; struct list_head *list_pos, *list_temp; + unsigned long flags; nes_debug(NES_DBG_SHUTDOWN, "nesvnic=%p, nesdev=%p, netdev=%p %s\n", nesvnic, nesdev, netdev, netdev->name); @@ -262,6 +290,7 @@ static int nes_netdev_stop(struct net_device *netdev) if (netif_msg_ifdown(nesvnic)) printk(KERN_INFO PFX "%s: disabling interface\n", netdev->name); + netif_carrier_off(netdev); /* Disable network packets */ napi_disable(&nesvnic->napi); @@ -272,14 +301,18 @@ static int nes_netdev_stop(struct net_device *netdev) break; } - if (first_nesvnic->netdev_open == 0) + if ((first_nesvnic->netdev_open == 1) && (first_nesvnic != nesvnic) && + (PCI_FUNC(first_nesvnic->nesdev->pcidev->devfn) != + PCI_FUNC(nesvnic->nesdev->pcidev->devfn))) { + nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK+ + (0x200*nesdev->mac_index), 0xffffffff); + nes_write_indexed(first_nesvnic->nesdev, + NES_IDX_MAC_INT_MASK+ + (0x200*first_nesvnic->nesdev->mac_index), + ~(NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT | + NES_MAC_INT_TX_UNDERFLOW | NES_MAC_INT_TX_ERROR)); + } else { nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK+(0x200*nesdev->mac_index), 0xffffffff); - else if ((first_nesvnic != nesvnic) && - (PCI_FUNC(first_nesvnic->nesdev->pcidev->devfn) != PCI_FUNC(nesvnic->nesdev->pcidev->devfn))) { - nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK + (0x200 * nesdev->mac_index), 0xffffffff); - nes_write_indexed(first_nesvnic->nesdev, NES_IDX_MAC_INT_MASK + (0x200 * first_nesvnic->nesdev->mac_index), - ~(NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT | - NES_MAC_INT_TX_UNDERFLOW | NES_MAC_INT_TX_ERROR)); } nic_active_mask = ~((u32)(1 << nesvnic->nic_index)); @@ -301,12 +334,17 @@ static int nes_netdev_stop(struct net_device *netdev) nic_active &= nic_active_mask; nes_write_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON, nic_active); - + spin_lock_irqsave(&nesvnic->port_ibevent_lock, flags); if (nesvnic->of_device_registered) { - nes_destroy_ofa_device(nesvnic->nesibdev); - nesvnic->nesibdev = NULL; - nesvnic->of_device_registered = 0; + nesdev->nesadapter->send_term_ok = 0; + nesdev->iw_status = 0; + if (nesvnic->linkup == 1) + nes_port_ibevent(nesvnic); } + del_timer_sync(&nesvnic->event_timer); + nesvnic->event_timer.function = NULL; + spin_unlock_irqrestore(&nesvnic->port_ibevent_lock, flags); + nes_destroy_nic_qp(nesvnic); nesvnic->netdev_open = 0; @@ -346,24 +384,20 @@ static int nes_nic_send(struct sk_buff *skb, struct net_device *netdev) /* bump past the vlan tag */ wqe_fragment_length++; /* wqe_fragment_address = (u64 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX]; */ + wqe_misc |= NES_NIC_SQ_WQE_COMPLETION; if (skb->ip_summed == CHECKSUM_PARTIAL) { - tcph = tcp_hdr(skb); - if (1) { - if (skb_is_gso(skb)) { - /* nes_debug(NES_DBG_NIC_TX, "%s: TSO request... seg size = %u\n", - netdev->name, skb_is_gso(skb)); */ - wqe_misc |= NES_NIC_SQ_WQE_LSO_ENABLE | - NES_NIC_SQ_WQE_COMPLETION | (u16)skb_is_gso(skb); - set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_LSO_INFO_IDX, - ((u32)tcph->doff) | - (((u32)(((unsigned char *)tcph) - skb->data)) << 4)); - } else { - wqe_misc |= NES_NIC_SQ_WQE_COMPLETION; - } + if (skb_is_gso(skb)) { + tcph = tcp_hdr(skb); + /* nes_debug(NES_DBG_NIC_TX, "%s: TSO request... is_gso = %u seg size = %u\n", + netdev->name, skb_is_gso(skb), skb_shinfo(skb)->gso_size); */ + wqe_misc |= NES_NIC_SQ_WQE_LSO_ENABLE | (u16)skb_shinfo(skb)->gso_size; + set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_LSO_INFO_IDX, + ((u32)tcph->doff) | + (((u32)(((unsigned char *)tcph) - skb->data)) << 4)); } } else { /* CHECKSUM_HW */ - wqe_misc |= NES_NIC_SQ_WQE_DISABLE_CHKSUM | NES_NIC_SQ_WQE_COMPLETION; + wqe_misc |= NES_NIC_SQ_WQE_DISABLE_CHKSUM; } set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_TOTAL_LENGTH_IDX, @@ -395,21 +429,20 @@ static int nes_nic_send(struct sk_buff *skb, struct net_device *netdev) if (skb_headlen(skb) == skb->len) { if (skb_headlen(skb) <= NES_FIRST_FRAG_SIZE) { nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_2_1_IDX] = 0; - nesnic->tx_skb[nesnic->sq_head] = NULL; - dev_kfree_skb(skb); + nesnic->tx_skb[nesnic->sq_head] = skb; } } else { /* Deal with Fragments */ nesnic->tx_skb[nesnic->sq_head] = skb; for (skb_fragment_index = 0; skb_fragment_index < skb_shinfo(skb)->nr_frags; skb_fragment_index++) { - bus_address = pci_map_page( nesdev->pcidev, - skb_shinfo(skb)->frags[skb_fragment_index].page, - skb_shinfo(skb)->frags[skb_fragment_index].page_offset, - skb_shinfo(skb)->frags[skb_fragment_index].size, - PCI_DMA_TODEVICE); + skb_frag_t *frag = + &skb_shinfo(skb)->frags[skb_fragment_index]; + bus_address = skb_frag_dma_map(&nesdev->pcidev->dev, + frag, 0, skb_frag_size(frag), + DMA_TO_DEVICE); wqe_fragment_length[wqe_fragment_index] = - cpu_to_le16(skb_shinfo(skb)->frags[skb_fragment_index].size); + cpu_to_le16(skb_frag_size(&skb_shinfo(skb)->frags[skb_fragment_index])); set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX+(2*wqe_fragment_index), bus_address); wqe_fragment_index++; @@ -437,7 +470,7 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) struct nes_hw_nic_sq_wqe *nic_sqe; struct tcphdr *tcph; /* struct udphdr *udph; */ -#define NES_MAX_TSO_FRAGS 18 +#define NES_MAX_TSO_FRAGS MAX_SKB_FRAGS /* 64K segment plus overflow on each side */ dma_addr_t tso_bus_address[NES_MAX_TSO_FRAGS]; dma_addr_t bus_address; @@ -448,7 +481,6 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) u32 wqe_count=1; u32 send_rc; struct iphdr *iph; - unsigned long flags; __le16 *wqe_fragment_length; u32 nr_frags; u32 original_first_length; @@ -459,7 +491,6 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) u16 nhoffset; u16 wqes_needed; u16 wqes_available; - u32 old_head; u32 wqe_misc; /* @@ -475,13 +506,6 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) if (netif_queue_stopped(netdev)) return NETDEV_TX_BUSY; - local_irq_save(flags); - if (!spin_trylock(&nesnic->sq_lock)) { - local_irq_restore(flags); - nesvnic->sq_locked++; - return NETDEV_TX_LOCKED; - } - /* Check if SQ is full */ if ((((nesnic->sq_tail+(nesnic->sq_size*2))-nesnic->sq_head) & (nesnic->sq_size - 1)) == 1) { if (!netif_queue_stopped(netdev)) { @@ -493,7 +517,6 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) } } nesvnic->sq_full++; - spin_unlock_irqrestore(&nesnic->sq_lock, flags); return NETDEV_TX_BUSY; } @@ -507,7 +530,6 @@ sq_no_longer_full: if (skb_is_gso(skb)) { nesvnic->segmented_tso_requests++; nesvnic->tso_requests++; - old_head = nesnic->sq_head; /* Basically 4 fragments available per WQE with extended fragments */ wqes_needed = nr_frags >> 2; wqes_needed += (nr_frags&3)?1:0; @@ -526,7 +548,6 @@ sq_no_longer_full: } } nesvnic->sq_full++; - spin_unlock_irqrestore(&nesnic->sq_lock, flags); nes_debug(NES_DBG_NIC_TX, "%s: HNIC SQ full- TSO request has too many frags!\n", netdev->name); return NETDEV_TX_BUSY; @@ -535,11 +556,12 @@ tso_sq_no_longer_full: /* Map all the buffers */ for (tso_frag_count=0; tso_frag_count < skb_shinfo(skb)->nr_frags; tso_frag_count++) { - tso_bus_address[tso_frag_count] = pci_map_page( nesdev->pcidev, - skb_shinfo(skb)->frags[tso_frag_count].page, - skb_shinfo(skb)->frags[tso_frag_count].page_offset, - skb_shinfo(skb)->frags[tso_frag_count].size, - PCI_DMA_TODEVICE); + skb_frag_t *frag = + &skb_shinfo(skb)->frags[tso_frag_count]; + tso_bus_address[tso_frag_count] = + skb_frag_dma_map(&nesdev->pcidev->dev, + frag, 0, skb_frag_size(frag), + DMA_TO_DEVICE); } tso_frag_index = 0; @@ -570,10 +592,10 @@ tso_sq_no_longer_full: nes_debug(NES_DBG_NIC_TX, "ERROR: SKB header too big, headlen=%u, FIRST_FRAG_SIZE=%u\n", original_first_length, NES_FIRST_FRAG_SIZE); nes_debug(NES_DBG_NIC_TX, "%s Request to tx NIC packet length %u, headlen %u," - " (%u frags), tso_size=%u\n", + " (%u frags), is_gso = %u tso_size=%u\n", netdev->name, skb->len, skb_headlen(skb), - skb_shinfo(skb)->nr_frags, skb_is_gso(skb)); + skb_shinfo(skb)->nr_frags, skb_is_gso(skb), skb_shinfo(skb)->gso_size); } memcpy(&nesnic->first_frag_vbase[nesnic->sq_head].buffer, skb->data, min(((unsigned int)NES_FIRST_FRAG_SIZE), @@ -605,14 +627,16 @@ tso_sq_no_longer_full: wqe_fragment_length[wqe_fragment_index] = 0; set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_LOW_IDX, bus_address); + tso_wqe_length += skb_headlen(skb) - + original_first_length; } while (wqe_fragment_index < 5) { wqe_fragment_length[wqe_fragment_index] = - cpu_to_le16(skb_shinfo(skb)->frags[tso_frag_index].size); + cpu_to_le16(skb_frag_size(&skb_shinfo(skb)->frags[tso_frag_index])); set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX+(2*wqe_fragment_index), (u64)tso_bus_address[tso_frag_index]); wqe_fragment_index++; - tso_wqe_length += skb_shinfo(skb)->frags[tso_frag_index++].size; + tso_wqe_length += skb_frag_size(&skb_shinfo(skb)->frags[tso_frag_index++]); if (wqe_fragment_index < 5) wqe_fragment_length[wqe_fragment_index] = 0; if (tso_frag_index == tso_frag_count) @@ -623,8 +647,8 @@ tso_sq_no_longer_full: } else { nesnic->tx_skb[nesnic->sq_head] = NULL; } - wqe_misc |= NES_NIC_SQ_WQE_COMPLETION | (u16)skb_is_gso(skb); - if ((tso_wqe_length + original_first_length) > skb_is_gso(skb)) { + wqe_misc |= NES_NIC_SQ_WQE_COMPLETION | (u16)skb_shinfo(skb)->gso_size; + if ((tso_wqe_length + original_first_length) > skb_shinfo(skb)->gso_size) { wqe_misc |= NES_NIC_SQ_WQE_LSO_ENABLE; } else { iph->tot_len = htons(tso_wqe_length + original_first_length - nhoffset); @@ -649,17 +673,13 @@ tso_sq_no_longer_full: skb_set_transport_header(skb, hoffset); skb_set_network_header(skb, nhoffset); send_rc = nes_nic_send(skb, netdev); - if (send_rc != NETDEV_TX_OK) { - spin_unlock_irqrestore(&nesnic->sq_lock, flags); + if (send_rc != NETDEV_TX_OK) return NETDEV_TX_OK; - } } } else { send_rc = nes_nic_send(skb, netdev); - if (send_rc != NETDEV_TX_OK) { - spin_unlock_irqrestore(&nesnic->sq_lock, flags); + if (send_rc != NETDEV_TX_OK) return NETDEV_TX_OK; - } } barrier(); @@ -669,7 +689,6 @@ tso_sq_no_longer_full: (wqe_count << 24) | (1 << 23) | nesvnic->nic.qp_id); netdev->trans_start = jiffies; - spin_unlock_irqrestore(&nesnic->sq_lock, flags); return NETDEV_TX_OK; } @@ -789,14 +808,13 @@ static int nes_netdev_set_mac_address(struct net_device *netdev, void *p) int i; u32 macaddr_low; u16 macaddr_high; - DECLARE_MAC_BUF(mac); if (!is_valid_ether_addr(mac_addr->sa_data)) return -EADDRNOTAVAIL; memcpy(netdev->dev_addr, mac_addr->sa_data, netdev->addr_len); - printk(PFX "%s: Address length = %d, Address = %s\n", - __func__, netdev->addr_len, print_mac(mac, mac_addr->sa_data)); + printk(PFX "%s: Address length = %d, Address = %pM\n", + __func__, netdev->addr_len, mac_addr->sa_data); macaddr_high = ((u16)netdev->dev_addr[0]) << 8; macaddr_high += (u16)netdev->dev_addr[1]; macaddr_low = ((u32)netdev->dev_addr[2]) << 24; @@ -820,6 +838,20 @@ static int nes_netdev_set_mac_address(struct net_device *netdev, void *p) } +static void set_allmulti(struct nes_device *nesdev, u32 nic_active_bit) +{ + u32 nic_active; + + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL); + nic_active |= nic_active_bit; + nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active); + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL); + nic_active &= ~nic_active_bit; + nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active); +} + +#define get_addr(addrs, index) ((addrs) + (index) * ETH_ALEN) + /** * nes_netdev_set_multicast_list */ @@ -827,7 +859,7 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev) { struct nes_vnic *nesvnic = netdev_priv(netdev); struct nes_device *nesdev = nesvnic->nesdev; - struct dev_mc_list *multicast_addr; + struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter; u32 nic_active_bit; u32 nic_active; u32 perfect_filter_register_address; @@ -836,7 +868,13 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev) u8 mc_all_on = 0; u8 mc_index; int mc_nic_index = -1; + u8 pft_entries_preallocated = max(nesadapter->adapter_fcn_count * + nics_per_function, 4); + u8 max_pft_entries_avaiable = NES_PFT_SIZE - pft_entries_preallocated; + unsigned long flags; + int mc_count = netdev_mc_count(netdev); + spin_lock_irqsave(&nesadapter->resource_lock, flags); nic_active_bit = 1 << nesvnic->nic_index; if (netdev->flags & IFF_PROMISC) { @@ -847,14 +885,9 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev) nic_active |= nic_active_bit; nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active); mc_all_on = 1; - } else if ((netdev->flags & IFF_ALLMULTI) || (netdev->mc_count > NES_MULTICAST_PF_MAX) || + } else if ((netdev->flags & IFF_ALLMULTI) || (nesvnic->nic_index > 3)) { - nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL); - nic_active |= nic_active_bit; - nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active); - nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL); - nic_active &= ~nic_active_bit; - nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active); + set_allmulti(nesdev, nic_active_bit); mc_all_on = 1; } else { nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL); @@ -865,30 +898,59 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev) nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active); } - nes_debug(NES_DBG_NIC_RX, "Number of MC entries = %d, Promiscous = %d, All Multicast = %d.\n", - netdev->mc_count, (netdev->flags & IFF_PROMISC)?1:0, - (netdev->flags & IFF_ALLMULTI)?1:0); + nes_debug(NES_DBG_NIC_RX, "Number of MC entries = %d, Promiscuous = %d, All Multicast = %d.\n", + mc_count, !!(netdev->flags & IFF_PROMISC), + !!(netdev->flags & IFF_ALLMULTI)); if (!mc_all_on) { - multicast_addr = netdev->mc_list; - perfect_filter_register_address = NES_IDX_PERFECT_FILTER_LOW + 0x80; - perfect_filter_register_address += nesvnic->nic_index*0x40; - for (mc_index=0; mc_index < NES_MULTICAST_PF_MAX; mc_index++) { - while (multicast_addr && nesvnic->mcrq_mcast_filter && ((mc_nic_index = nesvnic->mcrq_mcast_filter(nesvnic, multicast_addr->dmi_addr)) == 0)) - multicast_addr = multicast_addr->next; + char *addrs; + int i; + struct netdev_hw_addr *ha; + + addrs = kmalloc(ETH_ALEN * mc_count, GFP_ATOMIC); + if (!addrs) { + set_allmulti(nesdev, nic_active_bit); + goto unlock; + } + i = 0; + netdev_for_each_mc_addr(ha, netdev) + memcpy(get_addr(addrs, i++), ha->addr, ETH_ALEN); + + perfect_filter_register_address = NES_IDX_PERFECT_FILTER_LOW + + pft_entries_preallocated * 0x8; + for (i = 0, mc_index = 0; mc_index < max_pft_entries_avaiable; + mc_index++) { + while (i < mc_count && nesvnic->mcrq_mcast_filter && + ((mc_nic_index = nesvnic->mcrq_mcast_filter(nesvnic, + get_addr(addrs, i++))) == 0)); if (mc_nic_index < 0) mc_nic_index = nesvnic->nic_index; - if (multicast_addr) { - DECLARE_MAC_BUF(mac); - nes_debug(NES_DBG_NIC_RX, "Assigning MC Address %s to register 0x%04X nic_idx=%d\n", - print_mac(mac, multicast_addr->dmi_addr), + while (nesadapter->pft_mcast_map[mc_index] < 16 && + nesadapter->pft_mcast_map[mc_index] != + nesvnic->nic_index && + mc_index < max_pft_entries_avaiable) { + nes_debug(NES_DBG_NIC_RX, + "mc_index=%d skipping nic_index=%d, " + "used for=%d \n", mc_index, + nesvnic->nic_index, + nesadapter->pft_mcast_map[mc_index]); + mc_index++; + } + if (mc_index >= max_pft_entries_avaiable) + break; + if (i < mc_count) { + char *addr = get_addr(addrs, i++); + + nes_debug(NES_DBG_NIC_RX, "Assigning MC Address %pM to register 0x%04X nic_idx=%d\n", + addr, perfect_filter_register_address+(mc_index * 8), mc_nic_index); - macaddr_high = ((u16)multicast_addr->dmi_addr[0]) << 8; - macaddr_high += (u16)multicast_addr->dmi_addr[1]; - macaddr_low = ((u32)multicast_addr->dmi_addr[2]) << 24; - macaddr_low += ((u32)multicast_addr->dmi_addr[3]) << 16; - macaddr_low += ((u32)multicast_addr->dmi_addr[4]) << 8; - macaddr_low += (u32)multicast_addr->dmi_addr[5]; + macaddr_high = ((u8) addr[0]) << 8; + macaddr_high += (u8) addr[1]; + macaddr_low = ((u8) addr[2]) << 24; + macaddr_low += ((u8) addr[3]) << 16; + macaddr_low += ((u8) addr[4]) << 8; + macaddr_low += (u8) addr[5]; + nes_write_indexed(nesdev, perfect_filter_register_address+(mc_index * 8), macaddr_low); @@ -896,16 +958,25 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev) perfect_filter_register_address+4+(mc_index * 8), (u32)macaddr_high | NES_MAC_ADDR_VALID | ((((u32)(1<<mc_nic_index)) << 16))); - multicast_addr = multicast_addr->next; + nesadapter->pft_mcast_map[mc_index] = + nesvnic->nic_index; } else { nes_debug(NES_DBG_NIC_RX, "Clearing MC Address at register 0x%04X\n", perfect_filter_register_address+(mc_index * 8)); nes_write_indexed(nesdev, perfect_filter_register_address+4+(mc_index * 8), 0); + nesadapter->pft_mcast_map[mc_index] = 255; } } + kfree(addrs); + /* PFT is not large enough */ + if (i < mc_count) + set_allmulti(nesdev, nic_active_bit); } + +unlock: + spin_unlock_irqrestore(&nesadapter->resource_lock, flags); } @@ -918,6 +989,10 @@ static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu) struct nes_device *nesdev = nesvnic->nesdev; int ret = 0; u8 jumbomode = 0; + u32 nic_active; + u32 nic_active_bit; + u32 uc_all_active; + u32 mc_all_active; if ((new_mtu < ETH_ZLEN) || (new_mtu > max_mtu)) return -EINVAL; @@ -931,13 +1006,30 @@ static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu) nes_nic_init_timer_defaults(nesdev, jumbomode); if (netif_running(netdev)) { + nic_active_bit = 1 << nesvnic->nic_index; + mc_all_active = nes_read_indexed(nesdev, + NES_IDX_NIC_MULTICAST_ALL) & nic_active_bit; + uc_all_active = nes_read_indexed(nesdev, + NES_IDX_NIC_UNICAST_ALL) & nic_active_bit; + nes_netdev_stop(netdev); nes_netdev_open(netdev); + + nic_active = nes_read_indexed(nesdev, + NES_IDX_NIC_MULTICAST_ALL); + nic_active |= mc_all_active; + nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, + nic_active); + + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL); + nic_active |= uc_all_active; + nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active); } return ret; } + static const char nes_ethtool_stringset[][ETH_GSTRING_LEN] = { "Link Change Interrupts", "Linearized SKBs", @@ -946,18 +1038,21 @@ static const char nes_ethtool_stringset[][ETH_GSTRING_LEN] = { "Pause Frames Received", "Internal Routing Errors", "SQ SW Dropped SKBs", - "SQ Locked", "SQ Full", "Segmented TSO Requests", "Rx Symbol Errors", "Rx Jabber Errors", "Rx Oversized Frames", "Rx Short Frames", + "Rx Length Errors", + "Rx CRC Errors", + "Rx Port Discard", "Endnode Rx Discards", "Endnode Rx Octets", "Endnode Rx Frames", "Endnode Tx Octets", "Endnode Tx Frames", + "Tx Errors", "mh detected", "mh pauses", "Retransmission Count", @@ -986,56 +1081,27 @@ static const char nes_ethtool_stringset[][ETH_GSTRING_LEN] = { "CM Nodes Destroyed", "CM Accel Drops", "CM Resets Received", + "Free 4Kpbls", + "Free 256pbls", "Timer Inits", - "CQ Depth 1", - "CQ Depth 4", - "CQ Depth 16", - "CQ Depth 24", - "CQ Depth 32", - "CQ Depth 128", - "CQ Depth 256", "LRO aggregated", "LRO flushed", "LRO no_desc", + "PAU CreateQPs", + "PAU DestroyQPs", }; - #define NES_ETHTOOL_STAT_COUNT ARRAY_SIZE(nes_ethtool_stringset) -/** - * nes_netdev_get_rx_csum - */ -static u32 nes_netdev_get_rx_csum (struct net_device *netdev) -{ - struct nes_vnic *nesvnic = netdev_priv(netdev); - - if (nesvnic->rx_checksum_disabled) - return 0; - else - return 1; -} - /** - * nes_netdev_set_rc_csum + * nes_netdev_get_sset_count */ -static int nes_netdev_set_rx_csum(struct net_device *netdev, u32 enable) +static int nes_netdev_get_sset_count(struct net_device *netdev, int stringset) { - struct nes_vnic *nesvnic = netdev_priv(netdev); - - if (enable) - nesvnic->rx_checksum_disabled = 0; + if (stringset == ETH_SS_STATS) + return NES_ETHTOOL_STAT_COUNT; else - nesvnic->rx_checksum_disabled = 1; - return 0; -} - - -/** - * nes_netdev_get_stats_count - */ -static int nes_netdev_get_stats_count(struct net_device *netdev) -{ - return NES_ETHTOOL_STAT_COUNT; + return -EINVAL; } @@ -1055,24 +1121,27 @@ static void nes_netdev_get_strings(struct net_device *netdev, u32 stringset, /** * nes_netdev_get_ethtool_stats */ + static void nes_netdev_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats *target_ethtool_stats, u64 *target_stat_values) { u64 u64temp; struct nes_vnic *nesvnic = netdev_priv(netdev); struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesdev->nesadapter; u32 nic_count; u32 u32temp; + u32 index = 0; target_ethtool_stats->n_stats = NES_ETHTOOL_STAT_COUNT; - target_stat_values[0] = nesvnic->nesdev->link_status_interrupts; - target_stat_values[1] = nesvnic->linearized_skbs; - target_stat_values[2] = nesvnic->tso_requests; + target_stat_values[index] = nesvnic->nesdev->link_status_interrupts; + target_stat_values[++index] = nesvnic->linearized_skbs; + target_stat_values[++index] = nesvnic->tso_requests; u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_FRAMES + (nesvnic->nesdev->mac_index*0x200)); nesvnic->nesdev->mac_pause_frames_sent += u32temp; - target_stat_values[3] = nesvnic->nesdev->mac_pause_frames_sent; + target_stat_values[++index] = nesvnic->nesdev->mac_pause_frames_sent; u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_RX_PAUSE_FRAMES + (nesvnic->nesdev->mac_index*0x200)); @@ -1088,6 +1157,46 @@ static void nes_netdev_get_ethtool_stats(struct net_device *netdev, nesvnic->nesdev->port_tx_discards += u32temp; nesvnic->netstats.tx_dropped += u32temp; + u32temp = nes_read_indexed(nesdev, + NES_IDX_MAC_RX_SHORT_FRAMES + (nesvnic->nesdev->mac_index*0x200)); + nesvnic->netstats.rx_dropped += u32temp; + nesvnic->nesdev->mac_rx_errors += u32temp; + nesvnic->nesdev->mac_rx_short_frames += u32temp; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_MAC_RX_OVERSIZED_FRAMES + (nesvnic->nesdev->mac_index*0x200)); + nesvnic->netstats.rx_dropped += u32temp; + nesvnic->nesdev->mac_rx_errors += u32temp; + nesvnic->nesdev->mac_rx_oversized_frames += u32temp; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_MAC_RX_JABBER_FRAMES + (nesvnic->nesdev->mac_index*0x200)); + nesvnic->netstats.rx_dropped += u32temp; + nesvnic->nesdev->mac_rx_errors += u32temp; + nesvnic->nesdev->mac_rx_jabber_frames += u32temp; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_MAC_RX_SYMBOL_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200)); + nesvnic->netstats.rx_dropped += u32temp; + nesvnic->nesdev->mac_rx_errors += u32temp; + nesvnic->nesdev->mac_rx_symbol_err_frames += u32temp; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_MAC_RX_LENGTH_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200)); + nesvnic->netstats.rx_length_errors += u32temp; + nesvnic->nesdev->mac_rx_errors += u32temp; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_MAC_RX_CRC_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200)); + nesvnic->nesdev->mac_rx_errors += u32temp; + nesvnic->nesdev->mac_rx_crc_errors += u32temp; + nesvnic->netstats.rx_crc_errors += u32temp; + + u32temp = nes_read_indexed(nesdev, + NES_IDX_MAC_TX_ERRORS + (nesvnic->nesdev->mac_index*0x200)); + nesvnic->nesdev->mac_tx_errors += u32temp; + nesvnic->netstats.tx_errors += u32temp; + for (nic_count = 0; nic_count < NES_MAX_PORT_COUNT; nic_count++) { if (nesvnic->qp_nic_index[nic_count] == 0xf) break; @@ -1143,64 +1252,62 @@ static void nes_netdev_get_ethtool_stats(struct net_device *netdev, nesvnic->endnode_ipv4_tcp_retransmits += u32temp; } - target_stat_values[4] = nesvnic->nesdev->mac_pause_frames_received; - target_stat_values[5] = nesdev->nesadapter->nic_rx_eth_route_err; - target_stat_values[6] = nesvnic->tx_sw_dropped; - target_stat_values[7] = nesvnic->sq_locked; - target_stat_values[8] = nesvnic->sq_full; - target_stat_values[9] = nesvnic->segmented_tso_requests; - target_stat_values[10] = nesvnic->nesdev->mac_rx_symbol_err_frames; - target_stat_values[11] = nesvnic->nesdev->mac_rx_jabber_frames; - target_stat_values[12] = nesvnic->nesdev->mac_rx_oversized_frames; - target_stat_values[13] = nesvnic->nesdev->mac_rx_short_frames; - target_stat_values[14] = nesvnic->endnode_nstat_rx_discard; - target_stat_values[15] = nesvnic->endnode_nstat_rx_octets; - target_stat_values[16] = nesvnic->endnode_nstat_rx_frames; - target_stat_values[17] = nesvnic->endnode_nstat_tx_octets; - target_stat_values[18] = nesvnic->endnode_nstat_tx_frames; - target_stat_values[19] = mh_detected; - target_stat_values[20] = mh_pauses_sent; - target_stat_values[21] = nesvnic->endnode_ipv4_tcp_retransmits; - target_stat_values[22] = atomic_read(&cm_connects); - target_stat_values[23] = atomic_read(&cm_accepts); - target_stat_values[24] = atomic_read(&cm_disconnects); - target_stat_values[25] = atomic_read(&cm_connecteds); - target_stat_values[26] = atomic_read(&cm_connect_reqs); - target_stat_values[27] = atomic_read(&cm_rejects); - target_stat_values[28] = atomic_read(&mod_qp_timouts); - target_stat_values[29] = atomic_read(&qps_created); - target_stat_values[30] = atomic_read(&sw_qps_destroyed); - target_stat_values[31] = atomic_read(&qps_destroyed); - target_stat_values[32] = atomic_read(&cm_closes); - target_stat_values[33] = cm_packets_sent; - target_stat_values[34] = cm_packets_bounced; - target_stat_values[35] = cm_packets_created; - target_stat_values[36] = cm_packets_received; - target_stat_values[37] = cm_packets_dropped; - target_stat_values[38] = cm_packets_retrans; - target_stat_values[39] = cm_listens_created; - target_stat_values[40] = cm_listens_destroyed; - target_stat_values[41] = cm_backlog_drops; - target_stat_values[42] = atomic_read(&cm_loopbacks); - target_stat_values[43] = atomic_read(&cm_nodes_created); - target_stat_values[44] = atomic_read(&cm_nodes_destroyed); - target_stat_values[45] = atomic_read(&cm_accel_dropped_pkts); - target_stat_values[46] = atomic_read(&cm_resets_recvd); - target_stat_values[47] = int_mod_timer_init; - target_stat_values[48] = int_mod_cq_depth_1; - target_stat_values[49] = int_mod_cq_depth_4; - target_stat_values[50] = int_mod_cq_depth_16; - target_stat_values[51] = int_mod_cq_depth_24; - target_stat_values[52] = int_mod_cq_depth_32; - target_stat_values[53] = int_mod_cq_depth_128; - target_stat_values[54] = int_mod_cq_depth_256; - target_stat_values[55] = nesvnic->lro_mgr.stats.aggregated; - target_stat_values[56] = nesvnic->lro_mgr.stats.flushed; - target_stat_values[57] = nesvnic->lro_mgr.stats.no_desc; - + target_stat_values[++index] = nesvnic->nesdev->mac_pause_frames_received; + target_stat_values[++index] = nesdev->nesadapter->nic_rx_eth_route_err; + target_stat_values[++index] = nesvnic->tx_sw_dropped; + target_stat_values[++index] = nesvnic->sq_full; + target_stat_values[++index] = nesvnic->segmented_tso_requests; + target_stat_values[++index] = nesvnic->nesdev->mac_rx_symbol_err_frames; + target_stat_values[++index] = nesvnic->nesdev->mac_rx_jabber_frames; + target_stat_values[++index] = nesvnic->nesdev->mac_rx_oversized_frames; + target_stat_values[++index] = nesvnic->nesdev->mac_rx_short_frames; + target_stat_values[++index] = nesvnic->netstats.rx_length_errors; + target_stat_values[++index] = nesvnic->nesdev->mac_rx_crc_errors; + target_stat_values[++index] = nesvnic->nesdev->port_rx_discards; + target_stat_values[++index] = nesvnic->endnode_nstat_rx_discard; + target_stat_values[++index] = nesvnic->endnode_nstat_rx_octets; + target_stat_values[++index] = nesvnic->endnode_nstat_rx_frames; + target_stat_values[++index] = nesvnic->endnode_nstat_tx_octets; + target_stat_values[++index] = nesvnic->endnode_nstat_tx_frames; + target_stat_values[++index] = nesvnic->nesdev->mac_tx_errors; + target_stat_values[++index] = mh_detected; + target_stat_values[++index] = mh_pauses_sent; + target_stat_values[++index] = nesvnic->endnode_ipv4_tcp_retransmits; + target_stat_values[++index] = atomic_read(&cm_connects); + target_stat_values[++index] = atomic_read(&cm_accepts); + target_stat_values[++index] = atomic_read(&cm_disconnects); + target_stat_values[++index] = atomic_read(&cm_connecteds); + target_stat_values[++index] = atomic_read(&cm_connect_reqs); + target_stat_values[++index] = atomic_read(&cm_rejects); + target_stat_values[++index] = atomic_read(&mod_qp_timouts); + target_stat_values[++index] = atomic_read(&qps_created); + target_stat_values[++index] = atomic_read(&sw_qps_destroyed); + target_stat_values[++index] = atomic_read(&qps_destroyed); + target_stat_values[++index] = atomic_read(&cm_closes); + target_stat_values[++index] = cm_packets_sent; + target_stat_values[++index] = cm_packets_bounced; + target_stat_values[++index] = cm_packets_created; + target_stat_values[++index] = cm_packets_received; + target_stat_values[++index] = cm_packets_dropped; + target_stat_values[++index] = cm_packets_retrans; + target_stat_values[++index] = atomic_read(&cm_listens_created); + target_stat_values[++index] = atomic_read(&cm_listens_destroyed); + target_stat_values[++index] = cm_backlog_drops; + target_stat_values[++index] = atomic_read(&cm_loopbacks); + target_stat_values[++index] = atomic_read(&cm_nodes_created); + target_stat_values[++index] = atomic_read(&cm_nodes_destroyed); + target_stat_values[++index] = atomic_read(&cm_accel_dropped_pkts); + target_stat_values[++index] = atomic_read(&cm_resets_recvd); + target_stat_values[++index] = nesadapter->free_4kpbl; + target_stat_values[++index] = nesadapter->free_256pbl; + target_stat_values[++index] = int_mod_timer_init; + target_stat_values[++index] = nesvnic->lro_mgr.stats.aggregated; + target_stat_values[++index] = nesvnic->lro_mgr.stats.flushed; + target_stat_values[++index] = nesvnic->lro_mgr.stats.no_desc; + target_stat_values[++index] = atomic_read(&pau_qps_created); + target_stat_values[++index] = atomic_read(&pau_qps_destroyed); } - /** * nes_netdev_get_drvinfo */ @@ -1208,12 +1315,15 @@ static void nes_netdev_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) { struct nes_vnic *nesvnic = netdev_priv(netdev); - - strcpy(drvinfo->driver, DRV_NAME); - strcpy(drvinfo->bus_info, pci_name(nesvnic->nesdev->pcidev)); - strcpy(drvinfo->fw_version, "TBD"); - strcpy(drvinfo->version, DRV_VERSION); - drvinfo->n_stats = nes_netdev_get_stats_count(netdev); + struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter; + + strlcpy(drvinfo->driver, DRV_NAME, sizeof(drvinfo->driver)); + strlcpy(drvinfo->bus_info, pci_name(nesvnic->nesdev->pcidev), + sizeof(drvinfo->bus_info)); + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%u.%u", nesadapter->firmware_version >> 16, + nesadapter->firmware_version & 0x000000ff); + strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version)); drvinfo->testinfo_len = 0; drvinfo->eedump_len = 0; drvinfo->regdump_len = 0; @@ -1337,14 +1447,14 @@ static int nes_netdev_set_pauseparam(struct net_device *netdev, NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200)); u32temp |= NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE; nes_write_indexed(nesdev, - NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE + (nesdev->mac_index*0x200), u32temp); + NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200), u32temp); nesdev->disable_tx_flow_control = 0; } else if ((et_pauseparam->tx_pause == 0) && (nesdev->disable_tx_flow_control == 0)) { u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200)); u32temp &= ~NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE; nes_write_indexed(nesdev, - NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE + (nesdev->mac_index*0x200), u32temp); + NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200), u32temp); nesdev->disable_tx_flow_control = 1; } if ((et_pauseparam->rx_pause == 1) && (nesdev->disable_rx_flow_control == 1)) { @@ -1375,49 +1485,58 @@ static int nes_netdev_get_settings(struct net_device *netdev, struct ethtool_cmd struct nes_vnic *nesvnic = netdev_priv(netdev); struct nes_device *nesdev = nesvnic->nesdev; struct nes_adapter *nesadapter = nesdev->nesadapter; + u32 mac_index = nesdev->mac_index; + u8 phy_type = nesadapter->phy_type[mac_index]; + u8 phy_index = nesadapter->phy_index[mac_index]; u16 phy_data; et_cmd->duplex = DUPLEX_FULL; et_cmd->port = PORT_MII; + et_cmd->maxtxpkt = 511; + et_cmd->maxrxpkt = 511; if (nesadapter->OneG_Mode) { - et_cmd->speed = SPEED_1000; - if (nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_PUMA_1G) { + ethtool_cmd_speed_set(et_cmd, SPEED_1000); + if (phy_type == NES_PHY_TYPE_PUMA_1G) { et_cmd->supported = SUPPORTED_1000baseT_Full; et_cmd->advertising = ADVERTISED_1000baseT_Full; et_cmd->autoneg = AUTONEG_DISABLE; et_cmd->transceiver = XCVR_INTERNAL; - et_cmd->phy_address = nesdev->mac_index; + et_cmd->phy_address = mac_index; } else { - et_cmd->supported = SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg; - et_cmd->advertising = ADVERTISED_1000baseT_Full | ADVERTISED_Autoneg; - nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index], &phy_data); + unsigned long flags; + et_cmd->supported = SUPPORTED_1000baseT_Full + | SUPPORTED_Autoneg; + et_cmd->advertising = ADVERTISED_1000baseT_Full + | ADVERTISED_Autoneg; + spin_lock_irqsave(&nesadapter->phy_lock, flags); + nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data); + spin_unlock_irqrestore(&nesadapter->phy_lock, flags); if (phy_data & 0x1000) et_cmd->autoneg = AUTONEG_ENABLE; else et_cmd->autoneg = AUTONEG_DISABLE; et_cmd->transceiver = XCVR_EXTERNAL; - et_cmd->phy_address = nesadapter->phy_index[nesdev->mac_index]; + et_cmd->phy_address = phy_index; } + return 0; + } + if ((phy_type == NES_PHY_TYPE_ARGUS) || + (phy_type == NES_PHY_TYPE_SFP_D) || + (phy_type == NES_PHY_TYPE_KR)) { + et_cmd->transceiver = XCVR_EXTERNAL; + et_cmd->port = PORT_FIBRE; + et_cmd->supported = SUPPORTED_FIBRE; + et_cmd->advertising = ADVERTISED_FIBRE; + et_cmd->phy_address = phy_index; } else { - if ((nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_IRIS) || - (nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_ARGUS)) { - et_cmd->transceiver = XCVR_EXTERNAL; - et_cmd->port = PORT_FIBRE; - et_cmd->supported = SUPPORTED_FIBRE; - et_cmd->advertising = ADVERTISED_FIBRE; - et_cmd->phy_address = nesadapter->phy_index[nesdev->mac_index]; - } else { - et_cmd->transceiver = XCVR_INTERNAL; - et_cmd->supported = SUPPORTED_10000baseT_Full; - et_cmd->advertising = ADVERTISED_10000baseT_Full; - et_cmd->phy_address = nesdev->mac_index; - } - et_cmd->speed = SPEED_10000; - et_cmd->autoneg = AUTONEG_DISABLE; + et_cmd->transceiver = XCVR_INTERNAL; + et_cmd->supported = SUPPORTED_10000baseT_Full; + et_cmd->advertising = ADVERTISED_10000baseT_Full; + et_cmd->phy_address = mac_index; } - et_cmd->maxtxpkt = 511; - et_cmd->maxrxpkt = 511; + ethtool_cmd_speed_set(et_cmd, SPEED_10000); + et_cmd->autoneg = AUTONEG_DISABLE; return 0; } @@ -1430,12 +1549,15 @@ static int nes_netdev_set_settings(struct net_device *netdev, struct ethtool_cmd struct nes_vnic *nesvnic = netdev_priv(netdev); struct nes_device *nesdev = nesvnic->nesdev; struct nes_adapter *nesadapter = nesdev->nesadapter; - u16 phy_data; if ((nesadapter->OneG_Mode) && (nesadapter->phy_type[nesdev->mac_index] != NES_PHY_TYPE_PUMA_1G)) { - nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index], - &phy_data); + unsigned long flags; + u16 phy_data; + u8 phy_index = nesadapter->phy_index[nesdev->mac_index]; + + spin_lock_irqsave(&nesadapter->phy_lock, flags); + nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data); if (et_cmd->autoneg) { /* Turn on Full duplex, Autoneg, and restart autonegotiation */ phy_data |= 0x1300; @@ -1443,55 +1565,41 @@ static int nes_netdev_set_settings(struct net_device *netdev, struct ethtool_cmd /* Turn off autoneg */ phy_data &= ~0x1000; } - nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index], - phy_data); + nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data); + spin_unlock_irqrestore(&nesadapter->phy_lock, flags); } return 0; } -static struct ethtool_ops nes_ethtool_ops = { +static const struct ethtool_ops nes_ethtool_ops = { .get_link = ethtool_op_get_link, .get_settings = nes_netdev_get_settings, .set_settings = nes_netdev_set_settings, - .get_tx_csum = ethtool_op_get_tx_csum, - .get_rx_csum = nes_netdev_get_rx_csum, - .get_sg = ethtool_op_get_sg, .get_strings = nes_netdev_get_strings, - .get_stats_count = nes_netdev_get_stats_count, + .get_sset_count = nes_netdev_get_sset_count, .get_ethtool_stats = nes_netdev_get_ethtool_stats, .get_drvinfo = nes_netdev_get_drvinfo, .get_coalesce = nes_netdev_get_coalesce, .set_coalesce = nes_netdev_set_coalesce, .get_pauseparam = nes_netdev_get_pauseparam, .set_pauseparam = nes_netdev_set_pauseparam, - .set_tx_csum = ethtool_op_set_tx_csum, - .set_rx_csum = nes_netdev_set_rx_csum, - .set_sg = ethtool_op_set_sg, - .get_tso = ethtool_op_get_tso, - .set_tso = ethtool_op_set_tso, - .get_flags = ethtool_op_get_flags, - .set_flags = ethtool_op_set_flags, }; - -static void nes_netdev_vlan_rx_register(struct net_device *netdev, struct vlan_group *grp) +static void nes_vlan_mode(struct net_device *netdev, struct nes_device *nesdev, netdev_features_t features) { - struct nes_vnic *nesvnic = netdev_priv(netdev); - struct nes_device *nesdev = nesvnic->nesdev; struct nes_adapter *nesadapter = nesdev->nesadapter; u32 u32temp; unsigned long flags; spin_lock_irqsave(&nesadapter->phy_lock, flags); - nesvnic->vlan_grp = grp; nes_debug(NES_DBG_NETDEV, "%s: %s\n", __func__, netdev->name); /* Enable/Disable VLAN Stripping */ u32temp = nes_read_indexed(nesdev, NES_IDX_PCIX_DIAG); - if (grp) + if (features & NETIF_F_HW_VLAN_CTAG_RX) u32temp &= 0xfdffffff; else u32temp |= 0x02000000; @@ -1500,6 +1608,45 @@ static void nes_netdev_vlan_rx_register(struct net_device *netdev, struct vlan_g spin_unlock_irqrestore(&nesadapter->phy_lock, flags); } +static netdev_features_t nes_fix_features(struct net_device *netdev, netdev_features_t features) +{ + /* + * Since there is no support for separate rx/tx vlan accel + * enable/disable make sure tx flag is always in same state as rx. + */ + if (features & NETIF_F_HW_VLAN_CTAG_RX) + features |= NETIF_F_HW_VLAN_CTAG_TX; + else + features &= ~NETIF_F_HW_VLAN_CTAG_TX; + + return features; +} + +static int nes_set_features(struct net_device *netdev, netdev_features_t features) +{ + struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_device *nesdev = nesvnic->nesdev; + u32 changed = netdev->features ^ features; + + if (changed & NETIF_F_HW_VLAN_CTAG_RX) + nes_vlan_mode(netdev, nesdev, features); + + return 0; +} + +static const struct net_device_ops nes_netdev_ops = { + .ndo_open = nes_netdev_open, + .ndo_stop = nes_netdev_stop, + .ndo_start_xmit = nes_netdev_start_xmit, + .ndo_get_stats = nes_netdev_get_stats, + .ndo_tx_timeout = nes_netdev_tx_timeout, + .ndo_set_mac_address = nes_netdev_set_mac_address, + .ndo_set_rx_mode = nes_netdev_set_multicast_list, + .ndo_change_mtu = nes_netdev_change_mtu, + .ndo_validate_addr = eth_validate_addr, + .ndo_fix_features = nes_fix_features, + .ndo_set_features = nes_set_features, +}; /** * nes_netdev_init - initialize network device @@ -1508,47 +1655,32 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, void __iomem *mmio_addr) { u64 u64temp; - struct nes_vnic *nesvnic = NULL; + struct nes_vnic *nesvnic; struct net_device *netdev; struct nic_qp_map *curr_qp_map; - u32 u32temp; - u16 phy_data; - u16 temp_phy_data; + u8 phy_type = nesdev->nesadapter->phy_type[nesdev->mac_index]; netdev = alloc_etherdev(sizeof(struct nes_vnic)); if (!netdev) { printk(KERN_ERR PFX "nesvnic etherdev alloc failed"); return NULL; } + nesvnic = netdev_priv(netdev); nes_debug(NES_DBG_INIT, "netdev = %p, %s\n", netdev, netdev->name); SET_NETDEV_DEV(netdev, &nesdev->pcidev->dev); - nesvnic = netdev_priv(netdev); - memset(nesvnic, 0, sizeof(*nesvnic)); - - netdev->open = nes_netdev_open; - netdev->stop = nes_netdev_stop; - netdev->hard_start_xmit = nes_netdev_start_xmit; - netdev->get_stats = nes_netdev_get_stats; - netdev->tx_timeout = nes_netdev_tx_timeout; - netdev->set_mac_address = nes_netdev_set_mac_address; - netdev->set_multicast_list = nes_netdev_set_multicast_list; - netdev->change_mtu = nes_netdev_change_mtu; netdev->watchdog_timeo = NES_TX_TIMEOUT; netdev->irq = nesdev->pcidev->irq; netdev->mtu = ETH_DATA_LEN; netdev->hard_header_len = ETH_HLEN; netdev->addr_len = ETH_ALEN; netdev->type = ARPHRD_ETHER; - netdev->features = NETIF_F_HIGHDMA; + netdev->netdev_ops = &nes_netdev_ops; netdev->ethtool_ops = &nes_ethtool_ops; netif_napi_add(netdev, &nesvnic->napi, nes_netdev_poll, 128); nes_debug(NES_DBG_INIT, "Enabling VLAN Insert/Delete.\n"); - netdev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; - netdev->vlan_rx_register = nes_netdev_vlan_rx_register; - netdev->features |= NETIF_F_LLTX; /* Fill in the port structure */ nesvnic->netdev = netdev; @@ -1573,21 +1705,22 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, netdev->dev_addr[3] = (u8)(u64temp>>16); netdev->dev_addr[4] = (u8)(u64temp>>8); netdev->dev_addr[5] = (u8)u64temp; - memcpy(netdev->perm_addr, netdev->dev_addr, 6); - if ((nesvnic->logical_port < 2) || (nesdev->nesadapter->hw_rev != NE020_REV)) { - netdev->features |= NETIF_F_TSO | NETIF_F_SG | NETIF_F_IP_CSUM; - netdev->features |= NETIF_F_GSO | NETIF_F_TSO | NETIF_F_SG | NETIF_F_IP_CSUM; - } else { - netdev->features |= NETIF_F_SG | NETIF_F_IP_CSUM; - } + netdev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_RX; + if ((nesvnic->logical_port < 2) || (nesdev->nesadapter->hw_rev != NE020_REV)) + netdev->hw_features |= NETIF_F_TSO; + + netdev->features = netdev->hw_features | NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX; + netdev->hw_features |= NETIF_F_LRO; nes_debug(NES_DBG_INIT, "nesvnic = %p, reported features = 0x%lX, QPid = %d," " nic_index = %d, logical_port = %d, mac_index = %d.\n", nesvnic, (unsigned long)netdev->features, nesvnic->nic.qp_id, nesvnic->nic_index, nesvnic->logical_port, nesdev->mac_index); - if (nesvnic->nesdev->nesadapter->port_count == 1) { + if (nesvnic->nesdev->nesadapter->port_count == 1 && + nesvnic->nesdev->nesadapter->adapter_fcn_count == 1) { + nesvnic->qp_nic_index[0] = nesvnic->nic_index; nesvnic->qp_nic_index[1] = nesvnic->nic_index + 1; if (nes_drv_opt & NES_DRV_OPT_DUAL_LOGICAL_PORT) { @@ -1598,11 +1731,14 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, nesvnic->qp_nic_index[3] = nesvnic->nic_index + 3; } } else { - if (nesvnic->nesdev->nesadapter->port_count == 2) { - nesvnic->qp_nic_index[0] = nesvnic->nic_index; - nesvnic->qp_nic_index[1] = nesvnic->nic_index + 2; - nesvnic->qp_nic_index[2] = 0xf; - nesvnic->qp_nic_index[3] = 0xf; + if (nesvnic->nesdev->nesadapter->port_count == 2 || + (nesvnic->nesdev->nesadapter->port_count == 1 && + nesvnic->nesdev->nesadapter->adapter_fcn_count == 2)) { + nesvnic->qp_nic_index[0] = nesvnic->nic_index; + nesvnic->qp_nic_index[1] = nesvnic->nic_index + + 2; + nesvnic->qp_nic_index[2] = 0xf; + nesvnic->qp_nic_index[3] = 0xf; } else { nesvnic->qp_nic_index[0] = nesvnic->nic_index; nesvnic->qp_nic_index[1] = 0xf; @@ -1618,7 +1754,10 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, nesvnic->rdma_enabled = 0; } nesvnic->nic_cq.cq_number = nesvnic->nic.qp_id; + init_timer(&nesvnic->event_timer); + nesvnic->event_timer.function = NULL; spin_lock_init(&nesvnic->tx_lock); + spin_lock_init(&nesvnic->port_ibevent_lock); nesdev->netdev[nesdev->netdev_count] = netdev; nes_debug(NES_DBG_INIT, "Adding nesvnic (%p) to the adapters nesvnic_list for MAC%d.\n", @@ -1627,68 +1766,81 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, if ((nesdev->netdev_count == 0) && ((PCI_FUNC(nesdev->pcidev->devfn) == nesdev->mac_index) || - ((nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_PUMA_1G) && + ((phy_type == NES_PHY_TYPE_PUMA_1G) && (((PCI_FUNC(nesdev->pcidev->devfn) == 1) && (nesdev->mac_index == 2)) || ((PCI_FUNC(nesdev->pcidev->devfn) == 2) && (nesdev->mac_index == 1)))))) { - /* - * nes_debug(NES_DBG_INIT, "Setting up PHY interrupt mask. Using register index 0x%04X\n", - * NES_IDX_PHY_PCS_CONTROL_STATUS0 + (0x200 * (nesvnic->logical_port & 1))); - */ + u32 u32temp; + u32 link_mask = 0; + u32 link_val = 0; + u16 temp_phy_data; + u16 phy_data = 0; + unsigned long flags; + u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + (0x200 * (nesdev->mac_index & 1))); - if (nesdev->nesadapter->phy_type[nesdev->mac_index] != NES_PHY_TYPE_PUMA_1G) { + if (phy_type != NES_PHY_TYPE_PUMA_1G) { u32temp |= 0x00200000; nes_write_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + (0x200 * (nesdev->mac_index & 1)), u32temp); } - u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + - (0x200 * (nesdev->mac_index & 1))); - - if ((u32temp&0x0f1f0000) == 0x0f0f0000) { - if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_IRIS) { - nes_init_phy(nesdev); - nes_read_10G_phy_reg(nesdev, nesdev->nesadapter->phy_index[nesdev->mac_index], 1, 1); - temp_phy_data = (u16)nes_read_indexed(nesdev, - NES_IDX_MAC_MDIO_CONTROL); - u32temp = 20; - do { - nes_read_10G_phy_reg(nesdev, nesdev->nesadapter->phy_index[nesdev->mac_index], 1, 1); - phy_data = (u16)nes_read_indexed(nesdev, - NES_IDX_MAC_MDIO_CONTROL); - if ((phy_data == temp_phy_data) || (!(--u32temp))) - break; - temp_phy_data = phy_data; - } while (1); - if (phy_data & 4) { - nes_debug(NES_DBG_INIT, "The Link is UP!!.\n"); - nesvnic->linkup = 1; - } else { - nes_debug(NES_DBG_INIT, "The Link is DOWN!!.\n"); - } + /* Check and set linkup here. This is for back to back */ + /* configuration where second port won't get link interrupt */ + switch (phy_type) { + case NES_PHY_TYPE_PUMA_1G: + if (nesdev->mac_index < 2) { + link_mask = 0x01010000; + link_val = 0x01010000; } else { - nes_debug(NES_DBG_INIT, "The Link is UP!!.\n"); - nesvnic->linkup = 1; + link_mask = 0x02020000; + link_val = 0x02020000; } - } else if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_PUMA_1G) { - nes_debug(NES_DBG_INIT, "mac_index=%d, logical_port=%d, u32temp=0x%04X, PCI_FUNC=%d\n", - nesdev->mac_index, nesvnic->logical_port, u32temp, PCI_FUNC(nesdev->pcidev->devfn)); - if (((nesdev->mac_index < 2) && ((u32temp&0x01010000) == 0x01010000)) || - ((nesdev->mac_index > 1) && ((u32temp&0x02020000) == 0x02020000))) { - nes_debug(NES_DBG_INIT, "The Link is UP!!.\n"); + break; + case NES_PHY_TYPE_SFP_D: + spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags); + nes_read_10G_phy_reg(nesdev, + nesdev->nesadapter->phy_index[nesdev->mac_index], + 1, 0x9003); + temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + nes_read_10G_phy_reg(nesdev, + nesdev->nesadapter->phy_index[nesdev->mac_index], + 3, 0x0021); + nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + nes_read_10G_phy_reg(nesdev, + nesdev->nesadapter->phy_index[nesdev->mac_index], + 3, 0x0021); + phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); + spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags); + phy_data = (!temp_phy_data && (phy_data == 0x8000)) ? 0x4 : 0x0; + break; + default: + link_mask = 0x0f1f0000; + link_val = 0x0f0f0000; + break; + } + + u32temp = nes_read_indexed(nesdev, + NES_IDX_PHY_PCS_CONTROL_STATUS0 + + (0x200 * (nesdev->mac_index & 1))); + + if (phy_type == NES_PHY_TYPE_SFP_D) { + if (phy_data & 0x0004) + nesvnic->linkup = 1; + } else { + if ((u32temp & link_mask) == link_val) nesvnic->linkup = 1; - } } + /* clear the MAC interrupt status, assumes direct logical to physical mapping */ u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index)); nes_debug(NES_DBG_INIT, "Phy interrupt status = 0x%X.\n", u32temp); nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index), u32temp); - if (nesdev->nesadapter->phy_type[nesdev->mac_index] != NES_PHY_TYPE_IRIS) - nes_init_phy(nesdev); - + nes_init_phy(nesdev); } + nes_vlan_mode(netdev, nesdev, netdev->features); + return netdev; } diff --git a/drivers/infiniband/hw/nes/nes_user.h b/drivers/infiniband/hw/nes/nes_user.h index e64306bce80..529c421bb15 100644 --- a/drivers/infiniband/hw/nes/nes_user.h +++ b/drivers/infiniband/hw/nes/nes_user.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2008 NetEffect. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. @@ -39,8 +39,8 @@ #include <linux/types.h> -#define NES_ABI_USERSPACE_VER 1 -#define NES_ABI_KERNEL_VER 1 +#define NES_ABI_USERSPACE_VER 2 +#define NES_ABI_KERNEL_VER 2 /* * Make sure that all structs defined in this file remain laid out so @@ -78,6 +78,7 @@ struct nes_create_cq_req { struct nes_create_qp_req { __u64 user_wqe_buffers; + __u64 user_qp_buffer; }; enum iwnes_memreg_type { @@ -86,6 +87,7 @@ enum iwnes_memreg_type { IWNES_MEMREG_TYPE_CQ = 0x0002, IWNES_MEMREG_TYPE_MW = 0x0003, IWNES_MEMREG_TYPE_FMR = 0x0004, + IWNES_MEMREG_TYPE_FMEM = 0x0005, }; struct nes_mem_reg_req { diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c index fb8cbd71a2e..2042c0f2975 100644 --- a/drivers/infiniband/hw/nes/nes_utils.c +++ b/drivers/infiniband/hw/nes/nes_utils.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -38,6 +38,7 @@ #include <linux/ethtool.h> #include <linux/mii.h> #include <linux/if_vlan.h> +#include <linux/slab.h> #include <linux/crc32.h> #include <linux/in.h> #include <linux/ip.h> @@ -50,13 +51,34 @@ #include "nes.h" - - static u16 nes_read16_eeprom(void __iomem *addr, u16 offset); u32 mh_detected; u32 mh_pauses_sent; +static u32 nes_set_pau(struct nes_device *nesdev) +{ + u32 ret = 0; + u32 counter; + + nes_write_indexed(nesdev, NES_IDX_GPR2, NES_ENABLE_PAU); + nes_write_indexed(nesdev, NES_IDX_GPR_TRIGGER, 1); + + for (counter = 0; counter < NES_PAU_COUNTER; counter++) { + udelay(30); + if (!nes_read_indexed(nesdev, NES_IDX_GPR2)) { + printk(KERN_INFO PFX "PAU is supported.\n"); + break; + } + nes_write_indexed(nesdev, NES_IDX_GPR_TRIGGER, 1); + } + if (counter == NES_PAU_COUNTER) { + printk(KERN_INFO PFX "PAU is not supported.\n"); + return -EPERM; + } + return ret; +} + /** * nes_read_eeprom_values - */ @@ -183,9 +205,22 @@ int nes_read_eeprom_values(struct nes_device *nesdev, struct nes_adapter *nesada } else if (((major_ver == 2) && (minor_ver > 21)) || ((major_ver > 2) && (major_ver != 255))) { nesadapter->virtwq = 1; } + if (((major_ver == 3) && (minor_ver >= 16)) || (major_ver > 3)) + nesadapter->send_term_ok = 1; + + if (nes_drv_opt & NES_DRV_OPT_ENABLE_PAU) { + if (!nes_set_pau(nesdev)) + nesadapter->allow_unaligned_fpdus = 1; + } + nesadapter->firmware_version = (((u32)(u8)(eeprom_data>>8)) << 16) + (u32)((u8)eeprom_data); + eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 10); + printk(PFX "EEPROM version %u.%u\n", (u8)(eeprom_data>>8), (u8)eeprom_data); + nesadapter->eeprom_version = (((u32)(u8)(eeprom_data>>8)) << 16) + + (u32)((u8)eeprom_data); + no_fw_rev: /* eeprom is valid */ eeprom_offset = nesadapter->software_eeprom_offset; @@ -377,12 +412,8 @@ static u16 nes_read16_eeprom(void __iomem *addr, u16 offset) */ void nes_write_1G_phy_reg(struct nes_device *nesdev, u8 phy_reg, u8 phy_addr, u16 data) { - struct nes_adapter *nesadapter = nesdev->nesadapter; u32 u32temp; u32 counter; - unsigned long flags; - - spin_lock_irqsave(&nesadapter->phy_lock, flags); nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL, 0x50020000 | data | ((u32)phy_reg << 18) | ((u32)phy_addr << 23)); @@ -398,8 +429,6 @@ void nes_write_1G_phy_reg(struct nes_device *nesdev, u8 phy_reg, u8 phy_addr, u1 if (!(u32temp & 1)) nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n", u32temp); - - spin_unlock_irqrestore(&nesadapter->phy_lock, flags); } @@ -410,14 +439,11 @@ void nes_write_1G_phy_reg(struct nes_device *nesdev, u8 phy_reg, u8 phy_addr, u1 */ void nes_read_1G_phy_reg(struct nes_device *nesdev, u8 phy_reg, u8 phy_addr, u16 *data) { - struct nes_adapter *nesadapter = nesdev->nesadapter; u32 u32temp; u32 counter; - unsigned long flags; /* nes_debug(NES_DBG_PHY, "phy addr = %d, mac_index = %d\n", phy_addr, nesdev->mac_index); */ - spin_lock_irqsave(&nesadapter->phy_lock, flags); nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL, 0x60020000 | ((u32)phy_reg << 18) | ((u32)phy_addr << 23)); @@ -437,7 +463,6 @@ void nes_read_1G_phy_reg(struct nes_device *nesdev, u8 phy_reg, u8 phy_addr, u16 } else { *data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL); } - spin_unlock_irqrestore(&nesadapter->phy_lock, flags); } @@ -540,12 +565,15 @@ struct nes_cqp_request *nes_get_cqp_request(struct nes_device *nesdev) if (!list_empty(&nesdev->cqp_avail_reqs)) { spin_lock_irqsave(&nesdev->cqp.lock, flags); - cqp_request = list_entry(nesdev->cqp_avail_reqs.next, + if (!list_empty(&nesdev->cqp_avail_reqs)) { + cqp_request = list_entry(nesdev->cqp_avail_reqs.next, struct nes_cqp_request, list); - list_del_init(&cqp_request->list); + list_del_init(&cqp_request->list); + } spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } else { - cqp_request = kzalloc(sizeof(struct nes_cqp_request), GFP_KERNEL); + } + if (cqp_request == NULL) { + cqp_request = kzalloc(sizeof(struct nes_cqp_request), GFP_ATOMIC); if (cqp_request) { cqp_request->dynamic = 1; INIT_LIST_HEAD(&cqp_request->list); @@ -592,6 +620,7 @@ void nes_put_cqp_request(struct nes_device *nesdev, nes_free_cqp_request(nesdev, cqp_request); } + /** * nes_post_cqp_request */ @@ -602,6 +631,8 @@ void nes_post_cqp_request(struct nes_device *nesdev, unsigned long flags; u32 cqp_head; u64 u64temp; + u32 opcode; + int ctx_index = NES_CQP_WQE_COMP_CTX_LOW_IDX; spin_lock_irqsave(&nesdev->cqp.lock, flags); @@ -612,17 +643,20 @@ void nes_post_cqp_request(struct nes_device *nesdev, nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1; cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; memcpy(cqp_wqe, &cqp_request->cqp_wqe, sizeof(*cqp_wqe)); + opcode = le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX]); + if ((opcode & NES_CQP_OPCODE_MASK) == NES_CQP_DOWNLOAD_SEGMENT) + ctx_index = NES_CQP_WQE_DL_COMP_CTX_LOW_IDX; barrier(); u64temp = (unsigned long)cqp_request; - set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_COMP_SCRATCH_LOW_IDX, - u64temp); + set_wqe_64bit_value(cqp_wqe->wqe_words, ctx_index, u64temp); nes_debug(NES_DBG_CQP, "CQP request (opcode 0x%02X), line 1 = 0x%08X put on CQPs SQ," - " request = %p, cqp_head = %u, cqp_tail = %u, cqp_size = %u," - " waiting = %d, refcount = %d.\n", - le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f, - le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX]), cqp_request, - nesdev->cqp.sq_head, nesdev->cqp.sq_tail, nesdev->cqp.sq_size, - cqp_request->waiting, atomic_read(&cqp_request->refcount)); + " request = %p, cqp_head = %u, cqp_tail = %u, cqp_size = %u," + " waiting = %d, refcount = %d.\n", + opcode & NES_CQP_OPCODE_MASK, + le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX]), cqp_request, + nesdev->cqp.sq_head, nesdev->cqp.sq_tail, nesdev->cqp.sq_size, + cqp_request->waiting, atomic_read(&cqp_request->refcount)); + barrier(); /* Ring doorbell (1 WQEs) */ @@ -643,7 +677,6 @@ void nes_post_cqp_request(struct nes_device *nesdev, return; } - /** * nes_arp_table */ @@ -652,6 +685,7 @@ int nes_arp_table(struct nes_device *nesdev, u32 ip_addr, u8 *mac_addr, u32 acti struct nes_adapter *nesadapter = nesdev->nesadapter; int arp_index; int err = 0; + __be32 tmp_addr; for (arp_index = 0; (u32) arp_index < nesadapter->arp_table_size; arp_index++) { if (nesadapter->arp_table[arp_index].ip_addr == ip_addr) @@ -665,7 +699,7 @@ int nes_arp_table(struct nes_device *nesdev, u32 ip_addr, u8 *mac_addr, u32 acti arp_index = 0; err = nes_alloc_resource(nesadapter, nesadapter->allocated_arps, - nesadapter->arp_table_size, (u32 *)&arp_index, &nesadapter->next_arp_index); + nesadapter->arp_table_size, (u32 *)&arp_index, &nesadapter->next_arp_index, NES_RESOURCE_ARP); if (err) { nes_debug(NES_DBG_NETDEV, "nes_alloc_resource returned error = %u\n", err); return err; @@ -679,9 +713,9 @@ int nes_arp_table(struct nes_device *nesdev, u32 ip_addr, u8 *mac_addr, u32 acti /* DELETE or RESOLVE */ if (arp_index == nesadapter->arp_table_size) { - nes_debug(NES_DBG_NETDEV, "MAC for " NIPQUAD_FMT " not in ARP table - cannot %s\n", - HIPQUAD(ip_addr), - action == NES_ARP_RESOLVE ? "resolve" : "delete"); + tmp_addr = cpu_to_be32(ip_addr); + nes_debug(NES_DBG_NETDEV, "MAC for %pI4 not in ARP table - cannot %s\n", + &tmp_addr, action == NES_ARP_RESOLVE ? "resolve" : "delete"); return -1; } diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index e3939d13484..218dd357428 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -35,6 +35,7 @@ #include <linux/moduleparam.h> #include <linux/random.h> #include <linux/highmem.h> +#include <linux/slab.h> #include <asm/byteorder.h> #include <rdma/ib_verbs.h> @@ -54,7 +55,8 @@ static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev); /** * nes_alloc_mw */ -static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) { +static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd, enum ib_mw_type type) +{ struct nes_pd *nespd = to_nespd(ibpd); struct nes_vnic *nesvnic = to_nesvnic(ibpd->device); struct nes_device *nesdev = nesvnic->nesdev; @@ -70,6 +72,9 @@ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) { u32 driver_key = 0; u8 stag_key = 0; + if (type != IB_MW_TYPE_1) + return ERR_PTR(-EINVAL); + get_random_bytes(&next_stag_index, sizeof(next_stag_index)); stag_key = (u8)next_stag_index; @@ -79,7 +84,7 @@ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) { next_stag_index %= nesadapter->max_mr; ret = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, - nesadapter->max_mr, &stag_index, &next_stag_index); + nesadapter->max_mr, &stag_index, &next_stag_index, NES_RESOURCE_MW); if (ret) { return ERR_PTR(ret); } @@ -220,15 +225,15 @@ static int nes_bind_mw(struct ib_qp *ibqp, struct ib_mw *ibmw, if (nesqp->ibqp_state > IB_QPS_RTS) return -EINVAL; - spin_lock_irqsave(&nesqp->lock, flags); + spin_lock_irqsave(&nesqp->lock, flags); head = nesqp->hwqp.sq_head; qsize = nesqp->hwqp.sq_tail; /* Check for SQ overflow */ if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) { - spin_unlock_irqrestore(&nesqp->lock, flags); - return -EINVAL; + spin_unlock_irqrestore(&nesqp->lock, flags); + return -ENOMEM; } wqe = &nesqp->hwqp.sq_vbase[head]; @@ -243,20 +248,19 @@ static int nes_bind_mw(struct ib_qp *ibqp, struct ib_mw *ibmw, if (ibmw_bind->send_flags & IB_SEND_SIGNALED) wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL; - if (ibmw_bind->mw_access_flags & IB_ACCESS_REMOTE_WRITE) { + if (ibmw_bind->bind_info.mw_access_flags & IB_ACCESS_REMOTE_WRITE) wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_WRITE; - } - if (ibmw_bind->mw_access_flags & IB_ACCESS_REMOTE_READ) { + if (ibmw_bind->bind_info.mw_access_flags & IB_ACCESS_REMOTE_READ) wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_READ; - } set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_MISC_IDX, wqe_misc); - set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MR_IDX, ibmw_bind->mr->lkey); + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MR_IDX, + ibmw_bind->bind_info.mr->lkey); set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MW_IDX, ibmw->rkey); set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_LENGTH_LOW_IDX, - ibmw_bind->length); + ibmw_bind->bind_info.length); wqe->wqe_words[NES_IWARP_SQ_BIND_WQE_LENGTH_HIGH_IDX] = 0; - u64temp = (u64)ibmw_bind->addr; + u64temp = (u64)ibmw_bind->bind_info.addr; set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_VA_FBO_LOW_IDX, u64temp); head++; @@ -269,323 +273,242 @@ static int nes_bind_mw(struct ib_qp *ibqp, struct ib_mw *ibmw, nes_write32(nesdev->regs+NES_WQE_ALLOC, (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id); - spin_unlock_irqrestore(&nesqp->lock, flags); + spin_unlock_irqrestore(&nesqp->lock, flags); return 0; } -/** - * nes_alloc_fmr +/* + * nes_alloc_fast_mr */ -static struct ib_fmr *nes_alloc_fmr(struct ib_pd *ibpd, - int ibmr_access_flags, - struct ib_fmr_attr *ibfmr_attr) +static int alloc_fast_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, + u32 stag, u32 page_count) { - unsigned long flags; - struct nes_pd *nespd = to_nespd(ibpd); - struct nes_vnic *nesvnic = to_nesvnic(ibpd->device); - struct nes_device *nesdev = nesvnic->nesdev; - struct nes_adapter *nesadapter = nesdev->nesadapter; - struct nes_fmr *nesfmr; - struct nes_cqp_request *cqp_request; struct nes_hw_cqp_wqe *cqp_wqe; + struct nes_cqp_request *cqp_request; + unsigned long flags; int ret; - u32 stag; - u32 stag_index = 0; - u32 next_stag_index = 0; - u32 driver_key = 0; + struct nes_adapter *nesadapter = nesdev->nesadapter; u32 opcode = 0; - u8 stag_key = 0; - int i=0; - struct nes_vpbl vpbl; - - get_random_bytes(&next_stag_index, sizeof(next_stag_index)); - stag_key = (u8)next_stag_index; - - driver_key = 0; - - next_stag_index >>= 8; - next_stag_index %= nesadapter->max_mr; - - ret = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, - nesadapter->max_mr, &stag_index, &next_stag_index); - if (ret) { - goto failed_resource_alloc; - } - - nesfmr = kzalloc(sizeof(*nesfmr), GFP_KERNEL); - if (!nesfmr) { - ret = -ENOMEM; - goto failed_fmr_alloc; - } - - nesfmr->nesmr.mode = IWNES_MEMREG_TYPE_FMR; - if (ibfmr_attr->max_pages == 1) { - /* use zero length PBL */ - nesfmr->nesmr.pbl_4k = 0; - nesfmr->nesmr.pbls_used = 0; - } else if (ibfmr_attr->max_pages <= 32) { - /* use PBL 256 */ - nesfmr->nesmr.pbl_4k = 0; - nesfmr->nesmr.pbls_used = 1; - } else if (ibfmr_attr->max_pages <= 512) { - /* use 4K PBLs */ - nesfmr->nesmr.pbl_4k = 1; - nesfmr->nesmr.pbls_used = 1; - } else { - /* use two level 4K PBLs */ - /* add support for two level 256B PBLs */ - nesfmr->nesmr.pbl_4k = 1; - nesfmr->nesmr.pbls_used = 1 + (ibfmr_attr->max_pages >> 9) + - ((ibfmr_attr->max_pages & 511) ? 1 : 0); - } - /* Register the region with the adapter */ - spin_lock_irqsave(&nesadapter->pbl_lock, flags); - - /* track PBL resources */ - if (nesfmr->nesmr.pbls_used != 0) { - if (nesfmr->nesmr.pbl_4k) { - if (nesfmr->nesmr.pbls_used > nesadapter->free_4kpbl) { - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - ret = -ENOMEM; - goto failed_vpbl_alloc; - } else { - nesadapter->free_4kpbl -= nesfmr->nesmr.pbls_used; - } - } else { - if (nesfmr->nesmr.pbls_used > nesadapter->free_256pbl) { - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - ret = -ENOMEM; - goto failed_vpbl_alloc; - } else { - nesadapter->free_256pbl -= nesfmr->nesmr.pbls_used; - } - } - } - - /* one level pbl */ - if (nesfmr->nesmr.pbls_used == 0) { - nesfmr->root_vpbl.pbl_vbase = NULL; - nes_debug(NES_DBG_MR, "zero level pbl \n"); - } else if (nesfmr->nesmr.pbls_used == 1) { - /* can change it to kmalloc & dma_map_single */ - nesfmr->root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096, - &nesfmr->root_vpbl.pbl_pbase); - if (!nesfmr->root_vpbl.pbl_vbase) { - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - ret = -ENOMEM; - goto failed_vpbl_alloc; - } - nesfmr->leaf_pbl_cnt = 0; - nes_debug(NES_DBG_MR, "one level pbl, root_vpbl.pbl_vbase=%p \n", - nesfmr->root_vpbl.pbl_vbase); - } - /* two level pbl */ - else { - nesfmr->root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 8192, - &nesfmr->root_vpbl.pbl_pbase); - if (!nesfmr->root_vpbl.pbl_vbase) { - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - ret = -ENOMEM; - goto failed_vpbl_alloc; - } - - nesfmr->root_vpbl.leaf_vpbl = kzalloc(sizeof(*nesfmr->root_vpbl.leaf_vpbl)*1024, GFP_KERNEL); - if (!nesfmr->root_vpbl.leaf_vpbl) { - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - ret = -ENOMEM; - goto failed_leaf_vpbl_alloc; - } - - nesfmr->leaf_pbl_cnt = nesfmr->nesmr.pbls_used-1; - nes_debug(NES_DBG_MR, "two level pbl, root_vpbl.pbl_vbase=%p" - " leaf_pbl_cnt=%d root_vpbl.leaf_vpbl=%p\n", - nesfmr->root_vpbl.pbl_vbase, nesfmr->leaf_pbl_cnt, nesfmr->root_vpbl.leaf_vpbl); - - for (i=0; i<nesfmr->leaf_pbl_cnt; i++) - nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase = NULL; - - for (i=0; i<nesfmr->leaf_pbl_cnt; i++) { - vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096, - &vpbl.pbl_pbase); - - if (!vpbl.pbl_vbase) { - ret = -ENOMEM; - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - goto failed_leaf_vpbl_pages_alloc; - } - - nesfmr->root_vpbl.pbl_vbase[i].pa_low = cpu_to_le32((u32)vpbl.pbl_pbase); - nesfmr->root_vpbl.pbl_vbase[i].pa_high = cpu_to_le32((u32)((((u64)vpbl.pbl_pbase)>>32))); - nesfmr->root_vpbl.leaf_vpbl[i] = vpbl; - - nes_debug(NES_DBG_MR, "pbase_low=0x%x, pbase_high=0x%x, vpbl=%p\n", - nesfmr->root_vpbl.pbl_vbase[i].pa_low, - nesfmr->root_vpbl.pbl_vbase[i].pa_high, - &nesfmr->root_vpbl.leaf_vpbl[i]); - } - } - nesfmr->ib_qp = NULL; - nesfmr->access_rights =0; + u16 major_code; + u64 region_length = page_count * PAGE_SIZE; - stag = stag_index << 8; - stag |= driver_key; - stag += (u32)stag_key; - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); cqp_request = nes_get_cqp_request(nesdev); if (cqp_request == NULL) { nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n"); - ret = -ENOMEM; - goto failed_leaf_vpbl_pages_alloc; + return -ENOMEM; } + nes_debug(NES_DBG_MR, "alloc_fast_reg_mr: page_count = %d, " + "region_length = %llu\n", + page_count, region_length); cqp_request->waiting = 1; cqp_wqe = &cqp_request->cqp_wqe; - nes_debug(NES_DBG_MR, "Registering STag 0x%08X, index = 0x%08X\n", - stag, stag_index); - - opcode = NES_CQP_ALLOCATE_STAG | NES_CQP_STAG_VA_TO | NES_CQP_STAG_MR; - - if (nesfmr->nesmr.pbl_4k == 1) - opcode |= NES_CQP_STAG_PBL_BLK_SIZE; - - if (ibmr_access_flags & IB_ACCESS_REMOTE_WRITE) { - opcode |= NES_CQP_STAG_RIGHTS_REMOTE_WRITE | - NES_CQP_STAG_RIGHTS_LOCAL_WRITE | NES_CQP_STAG_REM_ACC_EN; - nesfmr->access_rights |= - NES_CQP_STAG_RIGHTS_REMOTE_WRITE | NES_CQP_STAG_RIGHTS_LOCAL_WRITE | - NES_CQP_STAG_REM_ACC_EN; + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + if (nesadapter->free_4kpbl > 0) { + nesadapter->free_4kpbl--; + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + } else { + /* No 4kpbl's available: */ + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + nes_debug(NES_DBG_MR, "Out of Pbls\n"); + nes_free_cqp_request(nesdev, cqp_request); + return -ENOMEM; } - if (ibmr_access_flags & IB_ACCESS_REMOTE_READ) { - opcode |= NES_CQP_STAG_RIGHTS_REMOTE_READ | - NES_CQP_STAG_RIGHTS_LOCAL_READ | NES_CQP_STAG_REM_ACC_EN; - nesfmr->access_rights |= - NES_CQP_STAG_RIGHTS_REMOTE_READ | NES_CQP_STAG_RIGHTS_LOCAL_READ | - NES_CQP_STAG_REM_ACC_EN; - } + opcode = NES_CQP_ALLOCATE_STAG | NES_CQP_STAG_MR | + NES_CQP_STAG_PBL_BLK_SIZE | NES_CQP_STAG_VA_TO | + NES_CQP_STAG_REM_ACC_EN; + /* + * The current OFED API does not support the zero based TO option. + * If added then need to changed the NES_CQP_STAG_VA* option. Also, + * the API does not support that ability to have the MR set for local + * access only when created and not allow the SQ op to override. Given + * this the remote enable must be set here. + */ nes_fill_init_cqp_wqe(cqp_wqe, nesdev); set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX, (nespd->pd_id & 0x00007fff)); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX, 1); - cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX] = - cpu_to_le32((nesfmr->nesmr.pbls_used>1) ? - (nesfmr->nesmr.pbls_used-1) : nesfmr->nesmr.pbls_used); + cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX] = + cpu_to_le32((u32)(region_length >> 8) & 0xff000000); + cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX] |= + cpu_to_le32(nespd->pd_id & 0x00007fff); + + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag); + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_VA_LOW_IDX, 0); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_LEN_LOW_IDX, 0); + set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PA_LOW_IDX, 0); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_LEN_IDX, (page_count * 8)); + cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_STAG_PBL_BLK_SIZE); + barrier(); atomic_set(&cqp_request->refcount, 2); nes_post_cqp_request(nesdev, cqp_request); /* Wait for CQP */ - ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0), - NES_EVENT_TIMEOUT); - nes_debug(NES_DBG_MR, "Register STag 0x%08X completed, wait_event_timeout ret = %u," - " CQP Major:Minor codes = 0x%04X:0x%04X.\n", - stag, ret, cqp_request->major_code, cqp_request->minor_code); - - if ((!ret) || (cqp_request->major_code)) { - nes_put_cqp_request(nesdev, cqp_request); - ret = (!ret) ? -ETIME : -EIO; - goto failed_leaf_vpbl_pages_alloc; - } + ret = wait_event_timeout(cqp_request->waitq, + (0 != cqp_request->request_done), + NES_EVENT_TIMEOUT); + + nes_debug(NES_DBG_MR, "Allocate STag 0x%08X completed, " + "wait_event_timeout ret = %u, CQP Major:Minor codes = " + "0x%04X:0x%04X.\n", stag, ret, cqp_request->major_code, + cqp_request->minor_code); + major_code = cqp_request->major_code; nes_put_cqp_request(nesdev, cqp_request); - nesfmr->nesmr.ibfmr.lkey = stag; - nesfmr->nesmr.ibfmr.rkey = stag; - nesfmr->attr = *ibfmr_attr; - - return &nesfmr->nesmr.ibfmr; - - failed_leaf_vpbl_pages_alloc: - /* unroll all allocated pages */ - for (i=0; i<nesfmr->leaf_pbl_cnt; i++) { - if (nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase) { - pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase, - nesfmr->root_vpbl.leaf_vpbl[i].pbl_pbase); - } - } - if (nesfmr->root_vpbl.leaf_vpbl) - kfree(nesfmr->root_vpbl.leaf_vpbl); - - failed_leaf_vpbl_alloc: - if (nesfmr->leaf_pbl_cnt == 0) { - if (nesfmr->root_vpbl.pbl_vbase) - pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.pbl_vbase, - nesfmr->root_vpbl.pbl_pbase); - } else - pci_free_consistent(nesdev->pcidev, 8192, nesfmr->root_vpbl.pbl_vbase, - nesfmr->root_vpbl.pbl_pbase); - - failed_vpbl_alloc: - kfree(nesfmr); - failed_fmr_alloc: - nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + if (!ret || major_code) { + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + nesadapter->free_4kpbl++; + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + } - failed_resource_alloc: - return ERR_PTR(ret); + if (!ret) + return -ETIME; + else if (major_code) + return -EIO; + return 0; } - -/** - * nes_dealloc_fmr +/* + * nes_alloc_fast_reg_mr */ -static int nes_dealloc_fmr(struct ib_fmr *ibfmr) +static struct ib_mr *nes_alloc_fast_reg_mr(struct ib_pd *ibpd, int max_page_list_len) { - struct nes_mr *nesmr = to_nesmr_from_ibfmr(ibfmr); - struct nes_fmr *nesfmr = to_nesfmr(nesmr); - struct nes_vnic *nesvnic = to_nesvnic(ibfmr->device); + struct nes_pd *nespd = to_nespd(ibpd); + struct nes_vnic *nesvnic = to_nesvnic(ibpd->device); struct nes_device *nesdev = nesvnic->nesdev; - struct nes_mr temp_nesmr = *nesmr; - int i = 0; + struct nes_adapter *nesadapter = nesdev->nesadapter; - temp_nesmr.ibmw.device = ibfmr->device; - temp_nesmr.ibmw.pd = ibfmr->pd; - temp_nesmr.ibmw.rkey = ibfmr->rkey; - temp_nesmr.ibmw.uobject = NULL; + u32 next_stag_index; + u8 stag_key = 0; + u32 driver_key = 0; + int err = 0; + u32 stag_index = 0; + struct nes_mr *nesmr; + u32 stag; + int ret; + struct ib_mr *ibmr; +/* + * Note: Set to always use a fixed length single page entry PBL. This is to allow + * for the fast_reg_mr operation to always know the size of the PBL. + */ + if (max_page_list_len > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) + return ERR_PTR(-E2BIG); - /* free the resources */ - if (nesfmr->leaf_pbl_cnt == 0) { - /* single PBL case */ - if (nesfmr->root_vpbl.pbl_vbase) - pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.pbl_vbase, - nesfmr->root_vpbl.pbl_pbase); - } else { - for (i = 0; i < nesfmr->leaf_pbl_cnt; i++) { - pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase, - nesfmr->root_vpbl.leaf_vpbl[i].pbl_pbase); - } - kfree(nesfmr->root_vpbl.leaf_vpbl); - pci_free_consistent(nesdev->pcidev, 8192, nesfmr->root_vpbl.pbl_vbase, - nesfmr->root_vpbl.pbl_pbase); + get_random_bytes(&next_stag_index, sizeof(next_stag_index)); + stag_key = (u8)next_stag_index; + next_stag_index >>= 8; + next_stag_index %= nesadapter->max_mr; + + err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, + nesadapter->max_mr, &stag_index, + &next_stag_index, NES_RESOURCE_FAST_MR); + if (err) + return ERR_PTR(err); + + nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL); + if (!nesmr) { + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + return ERR_PTR(-ENOMEM); } - return nes_dealloc_mw(&temp_nesmr.ibmw); -} + stag = stag_index << 8; + stag |= driver_key; + stag += (u32)stag_key; + nes_debug(NES_DBG_MR, "Allocating STag 0x%08X index = 0x%08X\n", + stag, stag_index); -/** - * nes_map_phys_fmr + ret = alloc_fast_reg_mr(nesdev, nespd, stag, max_page_list_len); + + if (ret == 0) { + nesmr->ibmr.rkey = stag; + nesmr->ibmr.lkey = stag; + nesmr->mode = IWNES_MEMREG_TYPE_FMEM; + ibmr = &nesmr->ibmr; + } else { + kfree(nesmr); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + ibmr = ERR_PTR(-ENOMEM); + } + return ibmr; +} + +/* + * nes_alloc_fast_reg_page_list */ -static int nes_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, - int list_len, u64 iova) +static struct ib_fast_reg_page_list *nes_alloc_fast_reg_page_list( + struct ib_device *ibdev, + int page_list_len) { - return 0; -} + struct nes_vnic *nesvnic = to_nesvnic(ibdev); + struct nes_device *nesdev = nesvnic->nesdev; + struct ib_fast_reg_page_list *pifrpl; + struct nes_ib_fast_reg_page_list *pnesfrpl; + if (page_list_len > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) + return ERR_PTR(-E2BIG); + /* + * Allocate the ib_fast_reg_page_list structure, the + * nes_fast_bpl structure, and the PLB table. + */ + pnesfrpl = kmalloc(sizeof(struct nes_ib_fast_reg_page_list) + + page_list_len * sizeof(u64), GFP_KERNEL); + + if (!pnesfrpl) + return ERR_PTR(-ENOMEM); -/** - * nes_unmap_frm + pifrpl = &pnesfrpl->ibfrpl; + pifrpl->page_list = &pnesfrpl->pbl; + pifrpl->max_page_list_len = page_list_len; + /* + * Allocate the WQE PBL + */ + pnesfrpl->nes_wqe_pbl.kva = pci_alloc_consistent(nesdev->pcidev, + page_list_len * sizeof(u64), + &pnesfrpl->nes_wqe_pbl.paddr); + + if (!pnesfrpl->nes_wqe_pbl.kva) { + kfree(pnesfrpl); + return ERR_PTR(-ENOMEM); + } + nes_debug(NES_DBG_MR, "nes_alloc_fast_reg_pbl: nes_frpl = %p, " + "ibfrpl = %p, ibfrpl.page_list = %p, pbl.kva = %p, " + "pbl.paddr = %llx\n", pnesfrpl, &pnesfrpl->ibfrpl, + pnesfrpl->ibfrpl.page_list, pnesfrpl->nes_wqe_pbl.kva, + (unsigned long long) pnesfrpl->nes_wqe_pbl.paddr); + + return pifrpl; +} + +/* + * nes_free_fast_reg_page_list */ -static int nes_unmap_fmr(struct list_head *ibfmr_list) +static void nes_free_fast_reg_page_list(struct ib_fast_reg_page_list *pifrpl) { - return 0; + struct nes_vnic *nesvnic = to_nesvnic(pifrpl->device); + struct nes_device *nesdev = nesvnic->nesdev; + struct nes_ib_fast_reg_page_list *pnesfrpl; + + pnesfrpl = container_of(pifrpl, struct nes_ib_fast_reg_page_list, ibfrpl); + /* + * Free the WQE PBL. + */ + pci_free_consistent(nesdev->pcidev, + pifrpl->max_page_list_len * sizeof(u64), + pnesfrpl->nes_wqe_pbl.kva, + pnesfrpl->nes_wqe_pbl.paddr); + /* + * Free the PBL structure + */ + kfree(pnesfrpl); } - - /** * nes_query_device */ @@ -598,7 +521,7 @@ static int nes_query_device(struct ib_device *ibdev, struct ib_device_attr *prop memset(props, 0, sizeof(*props)); memcpy(&props->sys_image_guid, nesvnic->netdev->dev_addr, 6); - props->fw_ver = nesdev->nesadapter->fw_ver; + props->fw_ver = nesdev->nesadapter->firmware_version; props->device_cap_flags = nesdev->nesadapter->device_cap_flags; props->vendor_id = nesdev->nesadapter->vendor_id; props->vendor_part_id = nesdev->nesadapter->vendor_part_id; @@ -608,28 +531,28 @@ static int nes_query_device(struct ib_device *ibdev, struct ib_device_attr *prop props->max_qp_wr = nesdev->nesadapter->max_qp_wr - 2; props->max_sge = nesdev->nesadapter->max_sge; props->max_cq = nesibdev->max_cq; - props->max_cqe = nesdev->nesadapter->max_cqe - 1; + props->max_cqe = nesdev->nesadapter->max_cqe; props->max_mr = nesibdev->max_mr; props->max_mw = nesibdev->max_mr; props->max_pd = nesibdev->max_pd; props->max_sge_rd = 1; switch (nesdev->nesadapter->max_irrq_wr) { case 0: - props->max_qp_rd_atom = 1; + props->max_qp_rd_atom = 2; break; case 1: - props->max_qp_rd_atom = 4; + props->max_qp_rd_atom = 8; break; case 2: - props->max_qp_rd_atom = 16; + props->max_qp_rd_atom = 32; break; case 3: - props->max_qp_rd_atom = 32; + props->max_qp_rd_atom = 64; break; default: props->max_qp_rd_atom = 0; } - props->max_qp_init_rd_atom = props->max_qp_wr; + props->max_qp_init_rd_atom = props->max_qp_rd_atom; props->atomic_cap = IB_ATOMIC_NONE; props->max_map_per_fmr = 1; @@ -642,15 +565,34 @@ static int nes_query_device(struct ib_device *ibdev, struct ib_device_attr *prop */ static int nes_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { + struct nes_vnic *nesvnic = to_nesvnic(ibdev); + struct net_device *netdev = nesvnic->netdev; + memset(props, 0, sizeof(*props)); - props->max_mtu = IB_MTU_2048; - props->active_mtu = IB_MTU_2048; + props->max_mtu = IB_MTU_4096; + + if (netdev->mtu >= 4096) + props->active_mtu = IB_MTU_4096; + else if (netdev->mtu >= 2048) + props->active_mtu = IB_MTU_2048; + else if (netdev->mtu >= 1024) + props->active_mtu = IB_MTU_1024; + else if (netdev->mtu >= 512) + props->active_mtu = IB_MTU_512; + else + props->active_mtu = IB_MTU_256; + props->lid = 1; props->lmc = 0; props->sm_lid = 0; props->sm_sl = 0; - props->state = IB_PORT_ACTIVE; + if (netif_queue_stopped(netdev)) + props->state = IB_PORT_DOWN; + else if (nesvnic->linkup) + props->state = IB_PORT_ACTIVE; + else + props->state = IB_PORT_DOWN; props->phys_state = 0; props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP | IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; @@ -658,7 +600,7 @@ static int nes_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr props->pkey_tbl_len = 1; props->qkey_viol_cntr = 0; props->active_width = IB_WIDTH_4X; - props->active_speed = 1; + props->active_speed = IB_SPEED_SDR; props->max_msg_sz = 0x80000000; return 0; @@ -666,16 +608,6 @@ static int nes_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr /** - * nes_modify_port - */ -static int nes_modify_port(struct ib_device *ibdev, u8 port, - int port_modify_mask, struct ib_port_modify *props) -{ - return 0; -} - - -/** * nes_query_pkey */ static int nes_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) @@ -848,10 +780,10 @@ static struct ib_pd *nes_alloc_pd(struct ib_device *ibdev, nes_debug(NES_DBG_PD, "nesvnic=%p, netdev=%p %s, ibdev=%p, context=%p, netdev refcnt=%u\n", nesvnic, nesdev->netdev[0], nesdev->netdev[0]->name, ibdev, context, - atomic_read(&nesvnic->netdev->refcnt)); + netdev_refcnt_read(nesvnic->netdev)); err = nes_alloc_resource(nesadapter, nesadapter->allocated_pds, - nesadapter->max_pd, &pd_num, &nesadapter->next_pd); + nesadapter->max_pd, &pd_num, &nesadapter->next_pd, NES_RESOURCE_PD); if (err) { return ERR_PTR(err); } @@ -1079,6 +1011,7 @@ static int nes_setup_virt_qp(struct nes_qp *nesqp, struct nes_pbl *nespbl, kunmap(nesqp->page); return -ENOMEM; } + nesqp->sq_kmapped = 1; nesqp->hwqp.q2_vbase = mem; mem += 256; memset(nesqp->hwqp.q2_vbase, 0, 256); @@ -1156,7 +1089,10 @@ static inline void nes_free_qp_mem(struct nes_device *nesdev, pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size, nesqp->hwqp.q2_vbase, nesqp->hwqp.q2_pbase); pci_free_consistent(nesdev->pcidev, 256, nesqp->pbl_vbase, nesqp->pbl_pbase ); nesqp->pbl_vbase = NULL; - kunmap(nesqp->page); + if (nesqp->sq_kmapped) { + nesqp->sq_kmapped = 0; + kunmap(nesqp->page); + } } } @@ -1224,7 +1160,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, nes_debug(NES_DBG_QP, "RQ size=%u, SQ Size=%u\n", rq_size, sq_size); ret = nes_alloc_resource(nesadapter, nesadapter->allocated_qps, - nesadapter->max_qp, &qp_num, &nesadapter->next_qp); + nesadapter->max_qp, &qp_num, &nesadapter->next_qp, NES_RESOURCE_QP); if (ret) { return ERR_PTR(ret); } @@ -1250,11 +1186,13 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); kfree(nesqp->allocated_buffer); nes_debug(NES_DBG_QP, "ib_copy_from_udata() Failed \n"); - return NULL; + return ERR_PTR(-EFAULT); } if (req.user_wqe_buffers) { virt_wqs = 1; } + if (req.user_qp_buffer) + nesqp->nesuqp_addr = req.user_qp_buffer; if ((ibpd->uobject) && (ibpd->uobject->context)) { nesqp->user_mode = 1; nes_ucontext = to_nesucontext(ibpd->uobject->context); @@ -1338,8 +1276,10 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, NES_QPCONTEXT_MISC_RQ_SIZE_SHIFT); nesqp->nesqp_context->misc |= cpu_to_le32((u32)nesqp->hwqp.sq_encoded_size << NES_QPCONTEXT_MISC_SQ_SIZE_SHIFT); + if (!udata) { nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_PRIV_EN); nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_FAST_REGISTER_EN); + } nesqp->nesqp_context->cqs = cpu_to_le32(nesqp->nesscq->hw_cq.cq_number + ((u32)nesqp->nesrcq->hw_cq.cq_number << 16)); u64temp = (u64)nesqp->hwqp.sq_pbase; @@ -1381,6 +1321,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, nesqp->nesqp_context->aeq_token_low = cpu_to_le32((u32)((unsigned long)(nesqp))); nesqp->nesqp_context->aeq_token_high = cpu_to_le32((u32)(upper_32_bits((unsigned long)(nesqp)))); nesqp->nesqp_context->ird_ord_sizes = cpu_to_le32(NES_QPCONTEXT_ORDIRD_ALSMM | + NES_QPCONTEXT_ORDIRD_AAH | ((((u32)nesadapter->max_irrq_wr) << NES_QPCONTEXT_ORDIRD_IRDSIZE_SHIFT) & NES_QPCONTEXT_ORDIRD_IRDSIZE_MASK)); if (disable_mpa_crc) { @@ -1445,6 +1386,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, if (ibpd->uobject) { uresp.mmap_sq_db_index = nesqp->mmap_sq_db_index; + uresp.mmap_rq_db_index = 0; uresp.actual_sq_size = sq_size; uresp.actual_rq_size = rq_size; uresp.qp_id = nesqp->hwqp.qp_id; @@ -1460,24 +1402,59 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, nes_debug(NES_DBG_QP, "QP%u structure located @%p.Size = %u.\n", nesqp->hwqp.qp_id, nesqp, (u32)sizeof(*nesqp)); spin_lock_init(&nesqp->lock); - init_waitqueue_head(&nesqp->state_waitq); - init_waitqueue_head(&nesqp->kick_waitq); nes_add_ref(&nesqp->ibqp); break; default: nes_debug(NES_DBG_QP, "Invalid QP type: %d\n", init_attr->qp_type); return ERR_PTR(-EINVAL); - break; } + nesqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR); + init_timer(&nesqp->terminate_timer); + nesqp->terminate_timer.function = nes_terminate_timeout; + nesqp->terminate_timer.data = (unsigned long)nesqp; + /* update the QP table */ nesdev->nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = nesqp; nes_debug(NES_DBG_QP, "netdev refcnt=%u\n", - atomic_read(&nesvnic->netdev->refcnt)); + netdev_refcnt_read(nesvnic->netdev)); return &nesqp->ibqp; } +/** + * nes_clean_cq + */ +static void nes_clean_cq(struct nes_qp *nesqp, struct nes_cq *nescq) +{ + u32 cq_head; + u32 lo; + u32 hi; + u64 u64temp; + unsigned long flags = 0; + + spin_lock_irqsave(&nescq->lock, flags); + + cq_head = nescq->hw_cq.cq_head; + while (le32_to_cpu(nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_VALID) { + rmb(); + lo = le32_to_cpu(nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]); + hi = le32_to_cpu(nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX]); + u64temp = (((u64)hi) << 32) | ((u64)lo); + u64temp &= ~(NES_SW_CONTEXT_ALIGN-1); + if (u64temp == (u64)(unsigned long)nesqp) { + /* Zero the context value so cqe will be ignored */ + nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX] = 0; + nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX] = 0; + } + + if (++cq_head >= nescq->hw_cq.cq_size) + cq_head = 0; + } + + spin_unlock_irqrestore(&nescq->lock, flags); +} + /** * nes_destroy_qp @@ -1485,12 +1462,11 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, static int nes_destroy_qp(struct ib_qp *ibqp) { struct nes_qp *nesqp = to_nesqp(ibqp); - /* struct nes_vnic *nesvnic = to_nesvnic(ibqp->device); */ struct nes_ucontext *nes_ucontext; struct ib_qp_attr attr; struct iw_cm_id *cm_id; struct iw_cm_event cm_event; - int ret; + int ret = 0; atomic_inc(&sw_qps_destroyed); nesqp->destroyed = 1; @@ -1506,7 +1482,7 @@ static int nes_destroy_qp(struct ib_qp *ibqp) (nesqp->ibqp_state == IB_QPS_RTR)) && (nesqp->cm_id)) { cm_id = nesqp->cm_id; cm_event.event = IW_CM_EVENT_CONNECT_REPLY; - cm_event.status = IW_CM_EVENT_STATUS_TIMEOUT; + cm_event.status = -ETIMEDOUT; cm_event.local_addr = cm_id->local_addr; cm_event.remote_addr = cm_id->remote_addr; cm_event.private_data = NULL; @@ -1522,7 +1498,6 @@ static int nes_destroy_qp(struct ib_qp *ibqp) nes_debug(NES_DBG_QP, "OFA CM event_handler returned, ret=%d\n", ret); } - if (nesqp->user_mode) { if ((ibqp->uobject)&&(ibqp->uobject->context)) { nes_ucontext = to_nesucontext(ibqp->uobject->context); @@ -1532,10 +1507,18 @@ static int nes_destroy_qp(struct ib_qp *ibqp) nes_ucontext->first_free_wq = nesqp->mmap_sq_db_index; } } - if (nesqp->pbl_pbase) + if (nesqp->pbl_pbase && nesqp->sq_kmapped) { + nesqp->sq_kmapped = 0; kunmap(nesqp->page); - } + } + } else { + /* Clean any pending completions from the cq(s) */ + if (nesqp->nesscq) + nes_clean_cq(nesqp, nesqp->nesscq); + if ((nesqp->nesrcq) && (nesqp->nesrcq != nesqp->nesscq)) + nes_clean_cq(nesqp, nesqp->nesrcq); + } nes_rem_ref(&nesqp->ibqp); return 0; } @@ -1567,8 +1550,11 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, unsigned long flags; int ret; + if (entries > nesadapter->max_cqe) + return ERR_PTR(-EINVAL); + err = nes_alloc_resource(nesadapter, nesadapter->allocated_cqs, - nesadapter->max_cq, &cq_num, &nesadapter->next_cq); + nesadapter->max_cq, &cq_num, &nesadapter->next_cq, NES_RESOURCE_CQ); if (err) { return ERR_PTR(err); } @@ -1596,11 +1582,12 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, nes_ucontext->mcrqf = req.mcrqf; if (nes_ucontext->mcrqf) { if (nes_ucontext->mcrqf & 0x80000000) - nescq->hw_cq.cq_number = nesvnic->nic.qp_id + 12 + (nes_ucontext->mcrqf & 0xf) - 1; + nescq->hw_cq.cq_number = nesvnic->nic.qp_id + 28 + 2 * ((nes_ucontext->mcrqf & 0xf) - 1); else if (nes_ucontext->mcrqf & 0x40000000) nescq->hw_cq.cq_number = nes_ucontext->mcrqf & 0xffff; else nescq->hw_cq.cq_number = nesvnic->mcrq_qp_id + nes_ucontext->mcrqf-1; + nescq->mcrqf = nes_ucontext->mcrqf; nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); } nes_debug(NES_DBG_CQ, "CQ Virtual Address = %08lX, size = %u.\n", @@ -1656,6 +1643,12 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, if (!context) pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, nescq->hw_cq.cq_pbase); + else { + pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, + nespbl->pbl_vbase, nespbl->pbl_pbase); + kfree(nespbl); + } + nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); kfree(nescq); return ERR_PTR(-ENOMEM); @@ -1674,16 +1667,16 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, /* use 4k pbl */ nes_debug(NES_DBG_CQ, "pbl_entries=%u, use a 4k PBL\n", pbl_entries); if (nesadapter->free_4kpbl == 0) { - if (cqp_request->dynamic) { - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - kfree(cqp_request); - } else { - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - } + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + nes_free_cqp_request(nesdev, cqp_request); if (!context) pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, nescq->hw_cq.cq_pbase); + else { + pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, + nespbl->pbl_vbase, nespbl->pbl_pbase); + kfree(nespbl); + } nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); kfree(nescq); return ERR_PTR(-ENOMEM); @@ -1696,16 +1689,16 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, /* use 256 byte pbl */ nes_debug(NES_DBG_CQ, "pbl_entries=%u, use a 256 byte PBL\n", pbl_entries); if (nesadapter->free_256pbl == 0) { - if (cqp_request->dynamic) { - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - kfree(cqp_request); - } else { - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - } + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + nes_free_cqp_request(nesdev, cqp_request); if (!context) pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, nescq->hw_cq.cq_pbase); + else { + pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, + nespbl->pbl_vbase, nespbl->pbl_pbase); + kfree(nespbl); + } nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); kfree(nescq); return ERR_PTR(-ENOMEM); @@ -1758,6 +1751,11 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, if (!context) pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, nescq->hw_cq.cq_pbase); + else { + pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, + nespbl->pbl_vbase, nespbl->pbl_pbase); + kfree(nespbl); + } nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); kfree(nescq); return ERR_PTR(-EIO); @@ -1772,7 +1770,7 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, resp.cq_id = nescq->hw_cq.cq_number; resp.cq_size = nescq->hw_cq.cq_size; resp.mmap_db_index = 0; - if (ib_copy_to_udata(udata, &resp, sizeof resp)) { + if (ib_copy_to_udata(udata, &resp, sizeof resp - sizeof resp.reserved)) { nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num); kfree(nescq); return ERR_PTR(-EFAULT); @@ -1839,7 +1837,9 @@ static int nes_destroy_cq(struct ib_cq *ib_cq) set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode); set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, (nescq->hw_cq.cq_number | ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 16))); - nes_free_resource(nesadapter, nesadapter->allocated_cqs, nescq->hw_cq.cq_number); + if (!nescq->mcrqf) + nes_free_resource(nesadapter, nesadapter->allocated_cqs, nescq->hw_cq.cq_number); + atomic_set(&cqp_request->refcount, 2); nes_post_cqp_request(nesdev, cqp_request); @@ -1873,21 +1873,74 @@ static int nes_destroy_cq(struct ib_cq *ib_cq) return ret; } +/** + * root_256 + */ +static u32 root_256(struct nes_device *nesdev, + struct nes_root_vpbl *root_vpbl, + struct nes_root_vpbl *new_root, + u16 pbl_count_4k) +{ + u64 leaf_pbl; + int i, j, k; + + if (pbl_count_4k == 1) { + new_root->pbl_vbase = pci_alloc_consistent(nesdev->pcidev, + 512, &new_root->pbl_pbase); + + if (new_root->pbl_vbase == NULL) + return 0; + + leaf_pbl = (u64)root_vpbl->pbl_pbase; + for (i = 0; i < 16; i++) { + new_root->pbl_vbase[i].pa_low = + cpu_to_le32((u32)leaf_pbl); + new_root->pbl_vbase[i].pa_high = + cpu_to_le32((u32)((((u64)leaf_pbl) >> 32))); + leaf_pbl += 256; + } + } else { + for (i = 3; i >= 0; i--) { + j = i * 16; + root_vpbl->pbl_vbase[j] = root_vpbl->pbl_vbase[i]; + leaf_pbl = le32_to_cpu(root_vpbl->pbl_vbase[j].pa_low) + + (((u64)le32_to_cpu(root_vpbl->pbl_vbase[j].pa_high)) + << 32); + for (k = 1; k < 16; k++) { + leaf_pbl += 256; + root_vpbl->pbl_vbase[j + k].pa_low = + cpu_to_le32((u32)leaf_pbl); + root_vpbl->pbl_vbase[j + k].pa_high = + cpu_to_le32((u32)((((u64)leaf_pbl) >> 32))); + } + } + } + + return 1; +} + /** * nes_reg_mr */ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, u32 stag, u64 region_length, struct nes_root_vpbl *root_vpbl, - dma_addr_t single_buffer, u16 pbl_count, u16 residual_page_count, - int acc, u64 *iova_start) + dma_addr_t single_buffer, u16 pbl_count_4k, + u16 residual_page_count_4k, int acc, u64 *iova_start, + u16 *actual_pbl_cnt, u8 *used_4k_pbls) { struct nes_hw_cqp_wqe *cqp_wqe; struct nes_cqp_request *cqp_request; unsigned long flags; int ret; struct nes_adapter *nesadapter = nesdev->nesadapter; - /* int count; */ + uint pg_cnt = 0; + u16 pbl_count_256 = 0; + u16 pbl_count = 0; + u8 use_256_pbls = 0; + u8 use_4k_pbls = 0; + u16 use_two_level = (pbl_count_4k > 1) ? 1 : 0; + struct nes_root_vpbl new_root = { 0, NULL, NULL }; u32 opcode = 0; u16 major_code; @@ -1900,56 +1953,70 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, cqp_request->waiting = 1; cqp_wqe = &cqp_request->cqp_wqe; - spin_lock_irqsave(&nesadapter->pbl_lock, flags); - /* track PBL resources */ - if (pbl_count != 0) { - if (pbl_count > 1) { - /* Two level PBL */ - if ((pbl_count+1) > nesadapter->free_4kpbl) { - nes_debug(NES_DBG_MR, "Out of 4KB Pbls for two level request.\n"); - if (cqp_request->dynamic) { - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - kfree(cqp_request); - } else { - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - } - return -ENOMEM; - } else { - nesadapter->free_4kpbl -= pbl_count+1; - } - } else if (residual_page_count > 32) { - if (pbl_count > nesadapter->free_4kpbl) { - nes_debug(NES_DBG_MR, "Out of 4KB Pbls.\n"); - if (cqp_request->dynamic) { - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - kfree(cqp_request); - } else { - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - } - return -ENOMEM; - } else { - nesadapter->free_4kpbl -= pbl_count; + if (pbl_count_4k) { + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + + pg_cnt = ((pbl_count_4k - 1) * 512) + residual_page_count_4k; + pbl_count_256 = (pg_cnt + 31) / 32; + if (pg_cnt <= 32) { + if (pbl_count_256 <= nesadapter->free_256pbl) + use_256_pbls = 1; + else if (pbl_count_4k <= nesadapter->free_4kpbl) + use_4k_pbls = 1; + } else if (pg_cnt <= 2048) { + if (((pbl_count_4k + use_two_level) <= nesadapter->free_4kpbl) && + (nesadapter->free_4kpbl > (nesadapter->max_4kpbl >> 1))) { + use_4k_pbls = 1; + } else if ((pbl_count_256 + 1) <= nesadapter->free_256pbl) { + use_256_pbls = 1; + use_two_level = 1; + } else if ((pbl_count_4k + use_two_level) <= nesadapter->free_4kpbl) { + use_4k_pbls = 1; } } else { - if (pbl_count > nesadapter->free_256pbl) { - nes_debug(NES_DBG_MR, "Out of 256B Pbls.\n"); - if (cqp_request->dynamic) { - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - kfree(cqp_request); - } else { - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - } - return -ENOMEM; - } else { - nesadapter->free_256pbl -= pbl_count; - } + if ((pbl_count_4k + 1) <= nesadapter->free_4kpbl) + use_4k_pbls = 1; + } + + if (use_256_pbls) { + pbl_count = pbl_count_256; + nesadapter->free_256pbl -= pbl_count + use_two_level; + } else if (use_4k_pbls) { + pbl_count = pbl_count_4k; + nesadapter->free_4kpbl -= pbl_count + use_two_level; + } else { + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + nes_debug(NES_DBG_MR, "Out of Pbls\n"); + nes_free_cqp_request(nesdev, cqp_request); + return -ENOMEM; } + + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); } - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + if (use_256_pbls && use_two_level) { + if (root_256(nesdev, root_vpbl, &new_root, pbl_count_4k) == 1) { + if (new_root.pbl_pbase != 0) + root_vpbl = &new_root; + } else { + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + nesadapter->free_256pbl += pbl_count_256 + use_two_level; + use_256_pbls = 0; + + if (pbl_count_4k == 1) + use_two_level = 0; + pbl_count = pbl_count_4k; + + if ((pbl_count_4k + use_two_level) <= nesadapter->free_4kpbl) { + nesadapter->free_4kpbl -= pbl_count + use_two_level; + use_4k_pbls = 1; + } + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + + if (use_4k_pbls == 0) + return -ENOMEM; + } + } opcode = NES_CQP_REGISTER_STAG | NES_CQP_STAG_RIGHTS_LOCAL_READ | NES_CQP_STAG_VA_TO | NES_CQP_STAG_MR; @@ -1978,10 +2045,9 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, } else { set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PA_LOW_IDX, root_vpbl->pbl_pbase); set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX, pbl_count); - set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_LEN_IDX, - (((pbl_count - 1) * 4096) + (residual_page_count*8))); + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_LEN_IDX, (pg_cnt * 8)); - if ((pbl_count > 1) || (residual_page_count > 32)) + if (use_4k_pbls) cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_STAG_PBL_BLK_SIZE); } barrier(); @@ -1998,13 +2064,25 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, major_code = cqp_request->major_code; nes_put_cqp_request(nesdev, cqp_request); + if ((!ret || major_code) && pbl_count != 0) { + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + if (use_256_pbls) + nesadapter->free_256pbl += pbl_count + use_two_level; + else if (use_4k_pbls) + nesadapter->free_4kpbl += pbl_count + use_two_level; + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + } + if (new_root.pbl_pbase) + pci_free_consistent(nesdev->pcidev, 512, new_root.pbl_vbase, + new_root.pbl_pbase); + if (!ret) return -ETIME; else if (major_code) return -EIO; - else - return 0; + *actual_pbl_cnt = pbl_count + use_two_level; + *used_4k_pbls = use_4k_pbls; return 0; } @@ -2027,18 +2105,18 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, struct nes_root_vpbl root_vpbl; u32 stag; u32 i; + unsigned long mask; u32 stag_index = 0; u32 next_stag_index = 0; u32 driver_key = 0; u32 root_pbl_index = 0; u32 cur_pbl_index = 0; - int err = 0, pbl_depth = 0; + int err = 0; int ret = 0; u16 pbl_count = 0; u8 single_page = 1; u8 stag_key = 0; - pbl_depth = 0; region_length = 0; vpbl.pbl_vbase = NULL; root_vpbl.pbl_vbase = NULL; @@ -2055,8 +2133,11 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, return ERR_PTR(-E2BIG); } + if ((buffer_list[0].addr ^ *iova_start) & ~PAGE_MASK) + return ERR_PTR(-EINVAL); + err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, nesadapter->max_mr, - &stag_index, &next_stag_index); + &stag_index, &next_stag_index, NES_RESOURCE_PHYS_MR); if (err) { return ERR_PTR(err); } @@ -2120,19 +2201,16 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, root_pbl_index++; cur_pbl_index = 0; } - if (buffer_list[i].addr & ~PAGE_MASK) { - /* TODO: Unwind allocated buffers */ - nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); - nes_debug(NES_DBG_MR, "Unaligned Memory Buffer: 0x%x\n", - (unsigned int) buffer_list[i].addr); - ibmr = ERR_PTR(-EINVAL); - kfree(nesmr); - goto reg_phys_err; - } - if (!buffer_list[i].size) { + mask = !buffer_list[i].size; + if (i != 0) + mask |= buffer_list[i].addr; + if (i != num_phys_buf - 1) + mask |= buffer_list[i].addr + buffer_list[i].size; + + if (mask & ~PAGE_MASK) { nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); - nes_debug(NES_DBG_MR, "Invalid Buffer Size\n"); + nes_debug(NES_DBG_MR, "Invalid buffer addr or size\n"); ibmr = ERR_PTR(-EINVAL); kfree(nesmr); goto reg_phys_err; @@ -2143,7 +2221,7 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, if ((buffer_list[i-1].addr+PAGE_SIZE) != buffer_list[i].addr) single_page = 0; } - vpbl.pbl_vbase[cur_pbl_index].pa_low = cpu_to_le32((u32)buffer_list[i].addr); + vpbl.pbl_vbase[cur_pbl_index].pa_low = cpu_to_le32((u32)buffer_list[i].addr & PAGE_MASK); vpbl.pbl_vbase[cur_pbl_index++].pa_high = cpu_to_le32((u32)((((u64)buffer_list[i].addr) >> 32))); } @@ -2156,8 +2234,6 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, " length = 0x%016lX, index = 0x%08X\n", stag, (unsigned long)*iova_start, (unsigned long)region_length, stag_index); - region_length -= (*iova_start)&PAGE_MASK; - /* Make the leaf PBL the root if only one PBL */ if (root_pbl_index == 1) { root_vpbl.pbl_pbase = vpbl.pbl_pbase; @@ -2169,18 +2245,14 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, pbl_count = root_pbl_index; } ret = nes_reg_mr(nesdev, nespd, stag, region_length, &root_vpbl, - buffer_list[0].addr, pbl_count, (u16)cur_pbl_index, acc, iova_start); + buffer_list[0].addr, pbl_count, (u16)cur_pbl_index, acc, iova_start, + &nesmr->pbls_used, &nesmr->pbl_4k); if (ret == 0) { nesmr->ibmr.rkey = stag; nesmr->ibmr.lkey = stag; nesmr->mode = IWNES_MEMREG_TYPE_MEM; ibmr = &nesmr->ibmr; - nesmr->pbl_4k = ((pbl_count > 1) || (cur_pbl_index > 32)) ? 1 : 0; - nesmr->pbls_used = pbl_count; - if (pbl_count > 1) { - nesmr->pbls_used++; - } } else { kfree(nesmr); ibmr = ERR_PTR(-ENOMEM); @@ -2237,7 +2309,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, struct nes_device *nesdev = nesvnic->nesdev; struct nes_adapter *nesadapter = nesdev->nesadapter; struct ib_mr *ibmr = ERR_PTR(-EINVAL); - struct ib_umem_chunk *chunk; + struct scatterlist *sg; struct nes_ucontext *nes_ucontext; struct nes_pbl *nespbl; struct nes_mr *nesmr; @@ -2245,7 +2317,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, struct nes_mem_reg_req req; struct nes_vpbl vpbl; struct nes_root_vpbl root_vpbl; - int nmap_index, page_index; + int entry, page_index; int page_count = 0; int err, pbl_depth = 0; int chunk_pages; @@ -2260,6 +2332,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u16 pbl_count; u8 single_page = 1; u8 stag_key; + int first_page = 1; region = ib_umem_get(pd->uobject->context, start, length, acc, 0); if (IS_ERR(region)) { @@ -2273,8 +2346,10 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, skip_pages = ((u32)region->offset) >> 12; - if (ib_copy_from_udata(&req, udata, sizeof(req))) + if (ib_copy_from_udata(&req, udata, sizeof(req))) { + ib_umem_release(region); return ERR_PTR(-EFAULT); + } nes_debug(NES_DBG_MR, "Memory Registration type = %08X.\n", req.reg_type); switch (req.reg_type) { @@ -2294,7 +2369,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, next_stag_index %= nesadapter->max_mr; err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, - nesadapter->max_mr, &stag_index, &next_stag_index); + nesadapter->max_mr, &stag_index, &next_stag_index, NES_RESOURCE_USER_MR); if (err) { ib_umem_release(region); return ERR_PTR(err); @@ -2308,128 +2383,125 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } nesmr->region = region; - list_for_each_entry(chunk, ®ion->chunk_list, list) { - nes_debug(NES_DBG_MR, "Chunk: nents = %u, nmap = %u .\n", - chunk->nents, chunk->nmap); - for (nmap_index = 0; nmap_index < chunk->nmap; ++nmap_index) { - if (sg_dma_address(&chunk->page_list[nmap_index]) & ~PAGE_MASK) { - ib_umem_release(region); - nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); - nes_debug(NES_DBG_MR, "Unaligned Memory Buffer: 0x%x\n", - (unsigned int) sg_dma_address(&chunk->page_list[nmap_index])); - ibmr = ERR_PTR(-EINVAL); - kfree(nesmr); - goto reg_user_mr_err; - } + for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) { + if (sg_dma_address(sg) & ~PAGE_MASK) { + ib_umem_release(region); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + nes_debug(NES_DBG_MR, "Unaligned Memory Buffer: 0x%x\n", + (unsigned int) sg_dma_address(sg)); + ibmr = ERR_PTR(-EINVAL); + kfree(nesmr); + goto reg_user_mr_err; + } - if (!sg_dma_len(&chunk->page_list[nmap_index])) { - ib_umem_release(region); - nes_free_resource(nesadapter, nesadapter->allocated_mrs, - stag_index); - nes_debug(NES_DBG_MR, "Invalid Buffer Size\n"); - ibmr = ERR_PTR(-EINVAL); - kfree(nesmr); - goto reg_user_mr_err; - } + if (!sg_dma_len(sg)) { + ib_umem_release(region); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, + stag_index); + nes_debug(NES_DBG_MR, "Invalid Buffer Size\n"); + ibmr = ERR_PTR(-EINVAL); + kfree(nesmr); + goto reg_user_mr_err; + } - region_length += sg_dma_len(&chunk->page_list[nmap_index]); - chunk_pages = sg_dma_len(&chunk->page_list[nmap_index]) >> 12; - region_length -= skip_pages << 12; - for (page_index=skip_pages; page_index < chunk_pages; page_index++) { - skip_pages = 0; - if ((page_count!=0)&&(page_count<<12)-(region->offset&(4096-1))>=region->length) - goto enough_pages; - if ((page_count&0x01FF) == 0) { - if (page_count >= 1024 * 512) { + region_length += sg_dma_len(sg); + chunk_pages = sg_dma_len(sg) >> 12; + region_length -= skip_pages << 12; + for (page_index = skip_pages; page_index < chunk_pages; page_index++) { + skip_pages = 0; + if ((page_count != 0) && (page_count<<12)-(region->offset&(4096-1)) >= region->length) + goto enough_pages; + if ((page_count&0x01FF) == 0) { + if (page_count >= 1024 * 512) { + ib_umem_release(region); + nes_free_resource(nesadapter, + nesadapter->allocated_mrs, stag_index); + kfree(nesmr); + ibmr = ERR_PTR(-E2BIG); + goto reg_user_mr_err; + } + if (root_pbl_index == 1) { + root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, + 8192, &root_vpbl.pbl_pbase); + nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n", + root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase); + if (!root_vpbl.pbl_vbase) { ib_umem_release(region); - nes_free_resource(nesadapter, - nesadapter->allocated_mrs, stag_index); + pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, + vpbl.pbl_pbase); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, + stag_index); kfree(nesmr); - ibmr = ERR_PTR(-E2BIG); + ibmr = ERR_PTR(-ENOMEM); goto reg_user_mr_err; } - if (root_pbl_index == 1) { - root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, - 8192, &root_vpbl.pbl_pbase); - nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n", - root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase); - if (!root_vpbl.pbl_vbase) { - ib_umem_release(region); - pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, - vpbl.pbl_pbase); - nes_free_resource(nesadapter, nesadapter->allocated_mrs, - stag_index); - kfree(nesmr); - ibmr = ERR_PTR(-ENOMEM); - goto reg_user_mr_err; - } - root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024, - GFP_KERNEL); - if (!root_vpbl.leaf_vpbl) { - ib_umem_release(region); - pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase, - root_vpbl.pbl_pbase); - pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, - vpbl.pbl_pbase); - nes_free_resource(nesadapter, nesadapter->allocated_mrs, - stag_index); - kfree(nesmr); - ibmr = ERR_PTR(-ENOMEM); - goto reg_user_mr_err; - } - root_vpbl.pbl_vbase[0].pa_low = - cpu_to_le32((u32)vpbl.pbl_pbase); - root_vpbl.pbl_vbase[0].pa_high = - cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32))); - root_vpbl.leaf_vpbl[0] = vpbl; - } - vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096, - &vpbl.pbl_pbase); - nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%08X\n", - vpbl.pbl_vbase, (unsigned int)vpbl.pbl_pbase); - if (!vpbl.pbl_vbase) { + root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024, + GFP_KERNEL); + if (!root_vpbl.leaf_vpbl) { ib_umem_release(region); - nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); - ibmr = ERR_PTR(-ENOMEM); + pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase, + root_vpbl.pbl_pbase); + pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, + vpbl.pbl_pbase); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, + stag_index); kfree(nesmr); + ibmr = ERR_PTR(-ENOMEM); goto reg_user_mr_err; } - if (1 <= root_pbl_index) { - root_vpbl.pbl_vbase[root_pbl_index].pa_low = - cpu_to_le32((u32)vpbl.pbl_pbase); - root_vpbl.pbl_vbase[root_pbl_index].pa_high = - cpu_to_le32((u32)((((u64)vpbl.pbl_pbase)>>32))); - root_vpbl.leaf_vpbl[root_pbl_index] = vpbl; - } - root_pbl_index++; - cur_pbl_index = 0; + root_vpbl.pbl_vbase[0].pa_low = + cpu_to_le32((u32)vpbl.pbl_pbase); + root_vpbl.pbl_vbase[0].pa_high = + cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32))); + root_vpbl.leaf_vpbl[0] = vpbl; } - if (single_page) { - if (page_count != 0) { - if ((last_dma_addr+4096) != - (sg_dma_address(&chunk->page_list[nmap_index])+ - (page_index*4096))) - single_page = 0; - last_dma_addr = sg_dma_address(&chunk->page_list[nmap_index])+ - (page_index*4096); - } else { - first_dma_addr = sg_dma_address(&chunk->page_list[nmap_index])+ - (page_index*4096); - last_dma_addr = first_dma_addr; - } + vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096, + &vpbl.pbl_pbase); + nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%08X\n", + vpbl.pbl_vbase, (unsigned int)vpbl.pbl_pbase); + if (!vpbl.pbl_vbase) { + ib_umem_release(region); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + ibmr = ERR_PTR(-ENOMEM); + kfree(nesmr); + goto reg_user_mr_err; } - - vpbl.pbl_vbase[cur_pbl_index].pa_low = - cpu_to_le32((u32)(sg_dma_address(&chunk->page_list[nmap_index])+ - (page_index*4096))); - vpbl.pbl_vbase[cur_pbl_index].pa_high = - cpu_to_le32((u32)((((u64)(sg_dma_address(&chunk->page_list[nmap_index])+ - (page_index*4096))) >> 32))); - cur_pbl_index++; - page_count++; + if (1 <= root_pbl_index) { + root_vpbl.pbl_vbase[root_pbl_index].pa_low = + cpu_to_le32((u32)vpbl.pbl_pbase); + root_vpbl.pbl_vbase[root_pbl_index].pa_high = + cpu_to_le32((u32)((((u64)vpbl.pbl_pbase)>>32))); + root_vpbl.leaf_vpbl[root_pbl_index] = vpbl; + } + root_pbl_index++; + cur_pbl_index = 0; } + if (single_page) { + if (page_count != 0) { + if ((last_dma_addr+4096) != + (sg_dma_address(sg)+ + (page_index*4096))) + single_page = 0; + last_dma_addr = sg_dma_address(sg)+ + (page_index*4096); + } else { + first_dma_addr = sg_dma_address(sg)+ + (page_index*4096); + last_dma_addr = first_dma_addr; + } + } + + vpbl.pbl_vbase[cur_pbl_index].pa_low = + cpu_to_le32((u32)(sg_dma_address(sg)+ + (page_index*4096))); + vpbl.pbl_vbase[cur_pbl_index].pa_high = + cpu_to_le32((u32)((((u64)(sg_dma_address(sg)+ + (page_index*4096))) >> 32))); + cur_pbl_index++; + page_count++; } } + enough_pages: nes_debug(NES_DBG_MR, "calculating stag, stag_index=0x%08x, driver_key=0x%08x," " stag_key=0x%08x\n", @@ -2437,9 +2509,6 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, stag = stag_index << 8; stag |= driver_key; stag += (u32)stag_key; - if (stag == 0) { - stag = 1; - } iova_start = virt; /* Make the leaf PBL the root if only one PBL */ @@ -2458,8 +2527,9 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, stag, (unsigned int)iova_start, (unsigned int)region_length, stag_index, (unsigned long long)region->length, pbl_count); - ret = nes_reg_mr( nesdev, nespd, stag, region->length, &root_vpbl, - first_dma_addr, pbl_count, (u16)cur_pbl_index, acc, &iova_start); + ret = nes_reg_mr(nesdev, nespd, stag, region->length, &root_vpbl, + first_dma_addr, pbl_count, (u16)cur_pbl_index, acc, + &iova_start, &nesmr->pbls_used, &nesmr->pbl_4k); nes_debug(NES_DBG_MR, "ret=%d\n", ret); @@ -2468,11 +2538,6 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, nesmr->ibmr.lkey = stag; nesmr->mode = IWNES_MEMREG_TYPE_MEM; ibmr = &nesmr->ibmr; - nesmr->pbl_4k = ((pbl_count > 1) || (cur_pbl_index > 32)) ? 1 : 0; - nesmr->pbls_used = pbl_count; - if (pbl_count > 1) { - nesmr->pbls_used++; - } } else { ib_umem_release(region); kfree(nesmr); @@ -2498,9 +2563,13 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, nes_debug(NES_DBG_MR, "Leaving, ibmr=%p", ibmr); return ibmr; - break; case IWNES_MEMREG_TYPE_QP: case IWNES_MEMREG_TYPE_CQ: + if (!region->length) { + nes_debug(NES_DBG_MR, "Unable to register zero length region for CQ\n"); + ib_umem_release(region); + return ERR_PTR(-EINVAL); + } nespbl = kzalloc(sizeof(*nespbl), GFP_KERNEL); if (!nespbl) { nes_debug(NES_DBG_MR, "Unable to allocate PBL\n"); @@ -2544,25 +2613,28 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, nespbl->pbl_size, (unsigned long) nespbl->pbl_pbase, (void *) nespbl->pbl_vbase, nespbl->user_base); - list_for_each_entry(chunk, ®ion->chunk_list, list) { - for (nmap_index = 0; nmap_index < chunk->nmap; ++nmap_index) { - chunk_pages = sg_dma_len(&chunk->page_list[nmap_index]) >> 12; - chunk_pages += (sg_dma_len(&chunk->page_list[nmap_index]) & (4096-1)) ? 1 : 0; - nespbl->page = sg_page(&chunk->page_list[0]); - for (page_index=0; page_index<chunk_pages; page_index++) { - ((__le32 *)pbl)[0] = cpu_to_le32((u32) - (sg_dma_address(&chunk->page_list[nmap_index])+ - (page_index*4096))); - ((__le32 *)pbl)[1] = cpu_to_le32(((u64) - (sg_dma_address(&chunk->page_list[nmap_index])+ - (page_index*4096)))>>32); - nes_debug(NES_DBG_MR, "pbl=%p, *pbl=0x%016llx, 0x%08x%08x\n", pbl, - (unsigned long long)*pbl, - le32_to_cpu(((__le32 *)pbl)[1]), le32_to_cpu(((__le32 *)pbl)[0])); - pbl++; - } + for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) { + chunk_pages = sg_dma_len(sg) >> 12; + chunk_pages += (sg_dma_len(sg) & (4096-1)) ? 1 : 0; + if (first_page) { + nespbl->page = sg_page(sg); + first_page = 0; + } + + for (page_index = 0; page_index < chunk_pages; page_index++) { + ((__le32 *)pbl)[0] = cpu_to_le32((u32) + (sg_dma_address(sg)+ + (page_index*4096))); + ((__le32 *)pbl)[1] = cpu_to_le32(((u64) + (sg_dma_address(sg)+ + (page_index*4096)))>>32); + nes_debug(NES_DBG_MR, "pbl=%p, *pbl=0x%016llx, 0x%08x%08x\n", pbl, + (unsigned long long)*pbl, + le32_to_cpu(((__le32 *)pbl)[1]), le32_to_cpu(((__le32 *)pbl)[0])); + pbl++; } } + if (req.reg_type == IWNES_MEMREG_TYPE_QP) { list_add_tail(&nespbl->list, &nes_ucontext->qp_reg_mem_list); } else { @@ -2572,9 +2644,9 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, nesmr->ibmr.lkey = -1; nesmr->mode = req.reg_type; return &nesmr->ibmr; - break; } + ib_umem_release(region); return ERR_PTR(-ENOSYS); } @@ -2613,24 +2685,6 @@ static int nes_dereg_mr(struct ib_mr *ib_mr) cqp_request->waiting = 1; cqp_wqe = &cqp_request->cqp_wqe; - spin_lock_irqsave(&nesadapter->pbl_lock, flags); - if (nesmr->pbls_used != 0) { - if (nesmr->pbl_4k) { - nesadapter->free_4kpbl += nesmr->pbls_used; - if (nesadapter->free_4kpbl > nesadapter->max_4kpbl) { - printk(KERN_ERR PFX "free 4KB PBLs(%u) has exceeded the max(%u)\n", - nesadapter->free_4kpbl, nesadapter->max_4kpbl); - } - } else { - nesadapter->free_256pbl += nesmr->pbls_used; - if (nesadapter->free_256pbl > nesadapter->max_256pbl) { - printk(KERN_ERR PFX "free 256B PBLs(%u) has exceeded the max(%u)\n", - nesadapter->free_256pbl, nesadapter->max_256pbl); - } - } - } - - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); nes_fill_init_cqp_wqe(cqp_wqe, nesdev); set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, NES_CQP_DEALLOCATE_STAG | NES_CQP_STAG_VA_TO | @@ -2648,11 +2702,6 @@ static int nes_dereg_mr(struct ib_mr *ib_mr) " CQP Major:Minor codes = 0x%04X:0x%04X\n", ib_mr->rkey, ret, cqp_request->major_code, cqp_request->minor_code); - nes_free_resource(nesadapter, nesadapter->allocated_mrs, - (ib_mr->rkey & 0x0fffff00) >> 8); - - kfree(nesmr); - major_code = cqp_request->major_code; minor_code = cqp_request->minor_code; @@ -2668,8 +2717,33 @@ static int nes_dereg_mr(struct ib_mr *ib_mr) " to destroy STag, ib_mr=%p, rkey = 0x%08X\n", major_code, minor_code, ib_mr, ib_mr->rkey); return -EIO; - } else - return 0; + } + + if (nesmr->pbls_used != 0) { + spin_lock_irqsave(&nesadapter->pbl_lock, flags); + if (nesmr->pbl_4k) { + nesadapter->free_4kpbl += nesmr->pbls_used; + if (nesadapter->free_4kpbl > nesadapter->max_4kpbl) + printk(KERN_ERR PFX "free 4KB PBLs(%u) has " + "exceeded the max(%u)\n", + nesadapter->free_4kpbl, + nesadapter->max_4kpbl); + } else { + nesadapter->free_256pbl += nesmr->pbls_used; + if (nesadapter->free_256pbl > nesadapter->max_256pbl) + printk(KERN_ERR PFX "free 256B PBLs(%u) has " + "exceeded the max(%u)\n", + nesadapter->free_256pbl, + nesadapter->max_256pbl); + } + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + } + nes_free_resource(nesadapter, nesadapter->allocated_mrs, + (ib_mr->rkey & 0x0fffff00) >> 8); + + kfree(nesmr); + + return 0; } @@ -2699,10 +2773,9 @@ static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, struct nes_vnic *nesvnic = nesibdev->nesvnic; nes_debug(NES_DBG_INIT, "\n"); - return sprintf(buf, "%x.%x.%x\n", - (int)(nesvnic->nesdev->nesadapter->fw_ver >> 32), - (int)(nesvnic->nesdev->nesadapter->fw_ver >> 16) & 0xffff, - (int)(nesvnic->nesdev->nesadapter->fw_ver & 0xffff)); + return sprintf(buf, "%u.%u\n", + (nesvnic->nesdev->nesadapter->firmware_version >> 16), + (nesvnic->nesdev->nesadapter->firmware_version & 0x000000ff)); } @@ -2755,17 +2828,16 @@ static int nes_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, attr->cap.max_send_wr = nesqp->hwqp.sq_size; attr->cap.max_recv_wr = nesqp->hwqp.rq_size; attr->cap.max_recv_sge = 1; - if (nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) { - init_attr->cap.max_inline_data = 0; - } else { - init_attr->cap.max_inline_data = 64; - } + if (nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) + attr->cap.max_inline_data = 0; + else + attr->cap.max_inline_data = 64; init_attr->event_handler = nesqp->ibqp.event_handler; init_attr->qp_context = nesqp->ibqp.qp_context; init_attr->send_cq = nesqp->ibqp.send_cq; init_attr->recv_cq = nesqp->ibqp.recv_cq; - init_attr->srq = nesqp->ibqp.srq = nesqp->ibqp.srq; + init_attr->srq = nesqp->ibqp.srq; init_attr->cap = attr->cap; return 0; @@ -2776,7 +2848,7 @@ static int nes_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, * nes_hw_modify_qp */ int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp, - u32 next_iwarp_state, u32 wait_completion) + u32 next_iwarp_state, u32 termlen, u32 wait_completion) { struct nes_hw_cqp_wqe *cqp_wqe; /* struct iw_cm_id *cm_id = nesqp->cm_id; */ @@ -2808,6 +2880,13 @@ int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp, set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id); set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, (u64)nesqp->nesqp_context_pbase); + /* If sending a terminate message, fill in the length (in words) */ + if (((next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK) == NES_CQP_QP_IWARP_STATE_TERMINATE) && + !(next_iwarp_state & NES_CQP_QP_TERM_DONT_SEND_TERM_MSG)) { + termlen = ((termlen + 3) >> 2) << NES_CQP_OP_TERMLEN_SHIFT; + set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_NEW_MSS_IDX, termlen); + } + atomic_set(&cqp_request->refcount, 2); nes_post_cqp_request(nesdev, cqp_request); @@ -2859,7 +2938,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int ret; u16 original_last_aeq; u8 issue_modify_qp = 0; - u8 issue_disconnect = 0; u8 dont_wait = 0; nes_debug(NES_DBG_MOD_QP, "QP%u: QP State=%u, cur QP State=%u," @@ -2867,7 +2945,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, nesqp->hwqp.qp_id, attr->qp_state, nesqp->ibqp_state, nesqp->iwarp_state, atomic_read(&nesqp->refcount)); - nes_add_ref(&nesqp->ibqp); spin_lock_irqsave(&nesqp->lock, qplockflags); nes_debug(NES_DBG_MOD_QP, "QP%u: hw_iwarp_state=0x%X, hw_tcp_state=0x%X," @@ -2882,7 +2959,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, nesqp->hwqp.qp_id); if (nesqp->iwarp_state > (u32)NES_CQP_QP_IWARP_STATE_IDLE) { spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_rem_ref(&nesqp->ibqp); return -EINVAL; } next_iwarp_state = NES_CQP_QP_IWARP_STATE_IDLE; @@ -2893,7 +2969,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, nesqp->hwqp.qp_id); if (nesqp->iwarp_state>(u32)NES_CQP_QP_IWARP_STATE_IDLE) { spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_rem_ref(&nesqp->ibqp); return -EINVAL; } next_iwarp_state = NES_CQP_QP_IWARP_STATE_IDLE; @@ -2904,14 +2979,12 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, nesqp->hwqp.qp_id); if (nesqp->iwarp_state>(u32)NES_CQP_QP_IWARP_STATE_RTS) { spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_rem_ref(&nesqp->ibqp); return -EINVAL; } if (nesqp->cm_id == NULL) { nes_debug(NES_DBG_MOD_QP, "QP%u: Failing attempt to move QP to RTS without a CM_ID. \n", nesqp->hwqp.qp_id ); spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_rem_ref(&nesqp->ibqp); return -EINVAL; } next_iwarp_state = NES_CQP_QP_IWARP_STATE_RTS; @@ -2929,7 +3002,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, nesqp->hwqp.qp_id, nesqp->hwqp.sq_head, nesqp->hwqp.sq_tail); if (nesqp->iwarp_state == (u32)NES_CQP_QP_IWARP_STATE_CLOSING) { spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_rem_ref(&nesqp->ibqp); return 0; } else { if (nesqp->iwarp_state > (u32)NES_CQP_QP_IWARP_STATE_CLOSING) { @@ -2937,7 +3009,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, " ignored due to current iWARP state\n", nesqp->hwqp.qp_id); spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_rem_ref(&nesqp->ibqp); return -EINVAL; } if (nesqp->hw_iwarp_state != NES_AEQE_IWARP_STATE_RTS) { @@ -2945,11 +3016,11 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, " already done based on hw state.\n", nesqp->hwqp.qp_id); issue_modify_qp = 0; - nesqp->in_disconnect = 0; } switch (nesqp->hw_iwarp_state) { case NES_AEQE_IWARP_STATE_CLOSING: next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING; + break; case NES_AEQE_IWARP_STATE_TERMINATE: next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE; break; @@ -2958,7 +3029,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, break; default: next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING; - nesqp->in_disconnect = 1; nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING; break; } @@ -2969,24 +3039,24 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, nesqp->hwqp.qp_id); if (nesqp->iwarp_state>=(u32)NES_CQP_QP_IWARP_STATE_TERMINATE) { spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_rem_ref(&nesqp->ibqp); return -EINVAL; } /* next_iwarp_state = (NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000); */ next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE; nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_TERMINATE; issue_modify_qp = 1; - nesqp->in_disconnect = 1; break; case IB_QPS_ERR: case IB_QPS_RESET: if (nesqp->iwarp_state == (u32)NES_CQP_QP_IWARP_STATE_ERROR) { spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_rem_ref(&nesqp->ibqp); return -EINVAL; } nes_debug(NES_DBG_MOD_QP, "QP%u: new state = error\n", nesqp->hwqp.qp_id); + if (nesqp->term_flags) + del_timer(&nesqp->terminate_timer); + next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR; /* next_iwarp_state = (NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000); */ if (nesqp->hte_added) { @@ -2995,9 +3065,9 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, nesqp->hte_added = 0; } if ((nesqp->hw_tcp_state > NES_AEQE_TCP_STATE_CLOSED) && + (nesdev->iw_status) && (nesqp->hw_tcp_state != NES_AEQE_TCP_STATE_TIME_WAIT)) { next_iwarp_state |= NES_CQP_QP_RESET; - nesqp->in_disconnect = 1; } else { nes_debug(NES_DBG_MOD_QP, "QP%u NOT setting NES_CQP_QP_RESET since TCP state = %u\n", nesqp->hwqp.qp_id, nesqp->hw_tcp_state); @@ -3008,25 +3078,14 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, break; default: spin_unlock_irqrestore(&nesqp->lock, qplockflags); - nes_rem_ref(&nesqp->ibqp); return -EINVAL; break; } nesqp->ibqp_state = attr->qp_state; - if (((nesqp->iwarp_state & NES_CQP_QP_IWARP_STATE_MASK) == - (u32)NES_CQP_QP_IWARP_STATE_RTS) && - ((next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK) > - (u32)NES_CQP_QP_IWARP_STATE_RTS)) { - nesqp->iwarp_state = next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK; - nes_debug(NES_DBG_MOD_QP, "Change nesqp->iwarp_state=%08x\n", - nesqp->iwarp_state); - issue_disconnect = 1; - } else { - nesqp->iwarp_state = next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK; - nes_debug(NES_DBG_MOD_QP, "Change nesqp->iwarp_state=%08x\n", - nesqp->iwarp_state); - } + nesqp->iwarp_state = next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK; + nes_debug(NES_DBG_MOD_QP, "Change nesqp->iwarp_state=%08x\n", + nesqp->iwarp_state); } if (attr_mask & IB_QP_ACCESS_FLAGS) { @@ -3065,7 +3124,7 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (issue_modify_qp) { nes_debug(NES_DBG_MOD_QP, "call nes_hw_modify_qp\n"); - ret = nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 1); + ret = nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 1); if (ret) nes_debug(NES_DBG_MOD_QP, "nes_hw_modify_qp (next_iwarp_state = 0x%08X)" " failed for QP%u.\n", @@ -3078,9 +3137,7 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n", nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), original_last_aeq, nesqp->last_aeq); - if ((!ret) || - ((original_last_aeq != NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) && - (ret))) { + if (!ret || original_last_aeq != NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) { if (dont_wait) { if (nesqp->cm_id && nesqp->hw_tcp_state != 0) { nes_debug(NES_DBG_MOD_QP, "QP%u Queuing fake disconnect for QP refcount (%d)," @@ -3088,7 +3145,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), original_last_aeq, nesqp->last_aeq); /* this one is for the cm_disconnect thread */ - nes_add_ref(&nesqp->ibqp); spin_lock_irqsave(&nesqp->lock, qplockflags); nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT; @@ -3097,14 +3153,12 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, } else { nes_debug(NES_DBG_MOD_QP, "QP%u No fake disconnect, QP refcount=%d\n", nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount)); - nes_rem_ref(&nesqp->ibqp); } } else { spin_lock_irqsave(&nesqp->lock, qplockflags); if (nesqp->cm_id) { /* These two are for the timer thread */ if (atomic_inc_return(&nesqp->close_timer_started) == 1) { - nes_add_ref(&nesqp->ibqp); nesqp->cm_id->add_ref(nesqp->cm_id); nes_debug(NES_DBG_MOD_QP, "QP%u Not decrementing QP refcount (%d)," " need ae to finish up, original_last_aeq = 0x%04X." @@ -3128,14 +3182,12 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n", nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), original_last_aeq, nesqp->last_aeq); - nes_rem_ref(&nesqp->ibqp); } } else { nes_debug(NES_DBG_MOD_QP, "QP%u Decrementing QP refcount (%d), No ae to finish up," " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n", nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), original_last_aeq, nesqp->last_aeq); - nes_rem_ref(&nesqp->ibqp); } err = 0; @@ -3214,30 +3266,32 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, struct nes_device *nesdev = nesvnic->nesdev; struct nes_qp *nesqp = to_nesqp(ibqp); struct nes_hw_qp_wqe *wqe; - int err; + int err = 0; u32 qsize = nesqp->hwqp.sq_size; u32 head; - u32 wqe_misc; - u32 wqe_count; + u32 wqe_misc = 0; + u32 wqe_count = 0; u32 counter; - u32 total_payload_length; - - err = 0; - wqe_misc = 0; - wqe_count = 0; - total_payload_length = 0; - if (nesqp->ibqp_state > IB_QPS_RTS) - return -EINVAL; + if (nesqp->ibqp_state > IB_QPS_RTS) { + err = -EINVAL; + goto out; + } - spin_lock_irqsave(&nesqp->lock, flags); + spin_lock_irqsave(&nesqp->lock, flags); head = nesqp->hwqp.sq_head; while (ib_wr) { + /* Check for QP error */ + if (nesqp->term_flags) { + err = -EINVAL; + break; + } + /* Check for SQ overflow */ if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) { - err = -EINVAL; + err = -ENOMEM; break; } @@ -3248,94 +3302,210 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, u64temp = (u64)(ib_wr->wr_id); set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX, u64temp); - switch (ib_wr->opcode) { - case IB_WR_SEND: - if (ib_wr->send_flags & IB_SEND_SOLICITED) { - wqe_misc = NES_IWARP_SQ_OP_SENDSE; - } else { - wqe_misc = NES_IWARP_SQ_OP_SEND; - } - if (ib_wr->num_sge > nesdev->nesadapter->max_sge) { - err = -EINVAL; - break; - } - if (ib_wr->send_flags & IB_SEND_FENCE) { - wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE; - } - if ((ib_wr->send_flags & IB_SEND_INLINE) && - ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) && - (ib_wr->sg_list[0].length <= 64)) { - memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX], - (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length); - set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX, - ib_wr->sg_list[0].length); - wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA; - } else { - fill_wqe_sg_send(wqe, ib_wr, 1); - } + switch (ib_wr->opcode) { + case IB_WR_SEND: + case IB_WR_SEND_WITH_INV: + if (IB_WR_SEND == ib_wr->opcode) { + if (ib_wr->send_flags & IB_SEND_SOLICITED) + wqe_misc = NES_IWARP_SQ_OP_SENDSE; + else + wqe_misc = NES_IWARP_SQ_OP_SEND; + } else { + if (ib_wr->send_flags & IB_SEND_SOLICITED) + wqe_misc = NES_IWARP_SQ_OP_SENDSEINV; + else + wqe_misc = NES_IWARP_SQ_OP_SENDINV; - break; - case IB_WR_RDMA_WRITE: - wqe_misc = NES_IWARP_SQ_OP_RDMAW; - if (ib_wr->num_sge > nesdev->nesadapter->max_sge) { - nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=%u\n", - ib_wr->num_sge, - nesdev->nesadapter->max_sge); - err = -EINVAL; - break; - } - if (ib_wr->send_flags & IB_SEND_FENCE) { - wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE; - } + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_INV_STAG_LOW_IDX, + ib_wr->ex.invalidate_rkey); + } - set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX, - ib_wr->wr.rdma.rkey); - set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX, - ib_wr->wr.rdma.remote_addr); - - if ((ib_wr->send_flags & IB_SEND_INLINE) && - ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) && - (ib_wr->sg_list[0].length <= 64)) { - memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX], - (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length); - set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX, - ib_wr->sg_list[0].length); - wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA; - } else { - fill_wqe_sg_send(wqe, ib_wr, 1); - } - wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX] = - wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX]; - break; - case IB_WR_RDMA_READ: - /* iWARP only supports 1 sge for RDMA reads */ - if (ib_wr->num_sge > 1) { - nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=1\n", - ib_wr->num_sge); - err = -EINVAL; - break; - } - wqe_misc = NES_IWARP_SQ_OP_RDMAR; - set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX, - ib_wr->wr.rdma.remote_addr); - set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX, - ib_wr->wr.rdma.rkey); - set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX, - ib_wr->sg_list->length); - set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_FRAG0_LOW_IDX, - ib_wr->sg_list->addr); - set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_STAG0_IDX, - ib_wr->sg_list->lkey); - break; - default: - /* error */ - err = -EINVAL; - break; + if (ib_wr->num_sge > nesdev->nesadapter->max_sge) { + err = -EINVAL; + break; } - if (ib_wr->send_flags & IB_SEND_SIGNALED) { - wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL; + if (ib_wr->send_flags & IB_SEND_FENCE) + wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE; + + if ((ib_wr->send_flags & IB_SEND_INLINE) && + ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) && + (ib_wr->sg_list[0].length <= 64)) { + memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX], + (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length); + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX, + ib_wr->sg_list[0].length); + wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA; + } else { + fill_wqe_sg_send(wqe, ib_wr, 1); + } + + break; + case IB_WR_RDMA_WRITE: + wqe_misc = NES_IWARP_SQ_OP_RDMAW; + if (ib_wr->num_sge > nesdev->nesadapter->max_sge) { + nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=%u\n", + ib_wr->num_sge, nesdev->nesadapter->max_sge); + err = -EINVAL; + break; + } + + if (ib_wr->send_flags & IB_SEND_FENCE) + wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE; + + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX, + ib_wr->wr.rdma.rkey); + set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX, + ib_wr->wr.rdma.remote_addr); + + if ((ib_wr->send_flags & IB_SEND_INLINE) && + ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) && + (ib_wr->sg_list[0].length <= 64)) { + memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX], + (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length); + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX, + ib_wr->sg_list[0].length); + wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA; + } else { + fill_wqe_sg_send(wqe, ib_wr, 1); + } + + wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX] = + wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX]; + break; + case IB_WR_RDMA_READ: + case IB_WR_RDMA_READ_WITH_INV: + /* iWARP only supports 1 sge for RDMA reads */ + if (ib_wr->num_sge > 1) { + nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=1\n", + ib_wr->num_sge); + err = -EINVAL; + break; + } + if (ib_wr->opcode == IB_WR_RDMA_READ) { + wqe_misc = NES_IWARP_SQ_OP_RDMAR; + } else { + wqe_misc = NES_IWARP_SQ_OP_RDMAR_LOCINV; + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_INV_STAG_LOW_IDX, + ib_wr->ex.invalidate_rkey); + } + + set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX, + ib_wr->wr.rdma.remote_addr); + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX, + ib_wr->wr.rdma.rkey); + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX, + ib_wr->sg_list->length); + set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_FRAG0_LOW_IDX, + ib_wr->sg_list->addr); + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_STAG0_IDX, + ib_wr->sg_list->lkey); + break; + case IB_WR_LOCAL_INV: + wqe_misc = NES_IWARP_SQ_OP_LOCINV; + set_wqe_32bit_value(wqe->wqe_words, + NES_IWARP_SQ_LOCINV_WQE_INV_STAG_IDX, + ib_wr->ex.invalidate_rkey); + break; + case IB_WR_FAST_REG_MR: + { + int i; + int flags = ib_wr->wr.fast_reg.access_flags; + struct nes_ib_fast_reg_page_list *pnesfrpl = + container_of(ib_wr->wr.fast_reg.page_list, + struct nes_ib_fast_reg_page_list, + ibfrpl); + u64 *src_page_list = pnesfrpl->ibfrpl.page_list; + u64 *dst_page_list = pnesfrpl->nes_wqe_pbl.kva; + + if (ib_wr->wr.fast_reg.page_list_len > + (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) { + nes_debug(NES_DBG_IW_TX, "SQ_FMR: bad page_list_len\n"); + err = -EINVAL; + break; + } + wqe_misc = NES_IWARP_SQ_OP_FAST_REG; + set_wqe_64bit_value(wqe->wqe_words, + NES_IWARP_SQ_FMR_WQE_VA_FBO_LOW_IDX, + ib_wr->wr.fast_reg.iova_start); + set_wqe_32bit_value(wqe->wqe_words, + NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX, + ib_wr->wr.fast_reg.length); + set_wqe_32bit_value(wqe->wqe_words, + NES_IWARP_SQ_FMR_WQE_LENGTH_HIGH_IDX, 0); + set_wqe_32bit_value(wqe->wqe_words, + NES_IWARP_SQ_FMR_WQE_MR_STAG_IDX, + ib_wr->wr.fast_reg.rkey); + /* Set page size: */ + if (ib_wr->wr.fast_reg.page_shift == 12) { + wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_4K; + } else if (ib_wr->wr.fast_reg.page_shift == 21) { + wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_2M; + } else { + nes_debug(NES_DBG_IW_TX, "Invalid page shift," + " ib_wr=%u, max=1\n", ib_wr->num_sge); + err = -EINVAL; + break; + } + /* Set access_flags */ + wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_READ; + if (flags & IB_ACCESS_LOCAL_WRITE) + wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_WRITE; + + if (flags & IB_ACCESS_REMOTE_WRITE) + wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_WRITE; + + if (flags & IB_ACCESS_REMOTE_READ) + wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_READ; + + if (flags & IB_ACCESS_MW_BIND) + wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_WINDOW_BIND; + + /* Fill in PBL info: */ + if (ib_wr->wr.fast_reg.page_list_len > + pnesfrpl->ibfrpl.max_page_list_len) { + nes_debug(NES_DBG_IW_TX, "Invalid page list length," + " ib_wr=%p, value=%u, max=%u\n", + ib_wr, ib_wr->wr.fast_reg.page_list_len, + pnesfrpl->ibfrpl.max_page_list_len); + err = -EINVAL; + break; + } + + set_wqe_64bit_value(wqe->wqe_words, + NES_IWARP_SQ_FMR_WQE_PBL_ADDR_LOW_IDX, + pnesfrpl->nes_wqe_pbl.paddr); + + set_wqe_32bit_value(wqe->wqe_words, + NES_IWARP_SQ_FMR_WQE_PBL_LENGTH_IDX, + ib_wr->wr.fast_reg.page_list_len * 8); + + for (i = 0; i < ib_wr->wr.fast_reg.page_list_len; i++) + dst_page_list[i] = cpu_to_le64(src_page_list[i]); + + nes_debug(NES_DBG_IW_TX, "SQ_FMR: iova_start: %llx, " + "length: %d, rkey: %0x, pgl_paddr: %llx, " + "page_list_len: %u, wqe_misc: %x\n", + (unsigned long long) ib_wr->wr.fast_reg.iova_start, + ib_wr->wr.fast_reg.length, + ib_wr->wr.fast_reg.rkey, + (unsigned long long) pnesfrpl->nes_wqe_pbl.paddr, + ib_wr->wr.fast_reg.page_list_len, + wqe_misc); + break; + } + default: + /* error */ + err = -EINVAL; + break; } + + if (err) + break; + + if ((ib_wr->send_flags & IB_SEND_SIGNALED) || nesqp->sig_all) + wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL; + wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = cpu_to_le32(wqe_misc); ib_wr = ib_wr->next; @@ -3355,8 +3525,9 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, (counter << 24) | 0x00800000 | nesqp->hwqp.qp_id); } - spin_unlock_irqrestore(&nesqp->lock, flags); + spin_unlock_irqrestore(&nesqp->lock, flags); +out: if (err) *bad_wr = ib_wr; return err; @@ -3383,21 +3554,29 @@ static int nes_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr, u32 counter; u32 total_payload_length; - if (nesqp->ibqp_state > IB_QPS_RTS) - return -EINVAL; + if (nesqp->ibqp_state > IB_QPS_RTS) { + err = -EINVAL; + goto out; + } - spin_lock_irqsave(&nesqp->lock, flags); + spin_lock_irqsave(&nesqp->lock, flags); head = nesqp->hwqp.rq_head; while (ib_wr) { + /* Check for QP error */ + if (nesqp->term_flags) { + err = -EINVAL; + break; + } + if (ib_wr->num_sge > nesdev->nesadapter->max_sge) { err = -EINVAL; break; } /* Check for RQ overflow */ if (((head + (2 * qsize) - nesqp->hwqp.rq_tail) % qsize) == (qsize - 1)) { - err = -EINVAL; + err = -ENOMEM; break; } @@ -3439,8 +3618,9 @@ static int nes_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr, nes_write32(nesdev->regs+NES_WQE_ALLOC, (counter<<24) | nesqp->hwqp.qp_id); } - spin_unlock_irqrestore(&nesqp->lock, flags); + spin_unlock_irqrestore(&nesqp->lock, flags); +out: if (err) *bad_wr = ib_wr; return err; @@ -3454,7 +3634,6 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) { u64 u64temp; u64 wrid; - /* u64 u64temp; */ unsigned long flags = 0; struct nes_vnic *nesvnic = to_nesvnic(ibcq->device); struct nes_device *nesdev = nesvnic->nesdev; @@ -3462,44 +3641,56 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) struct nes_qp *nesqp; struct nes_hw_cqe cqe; u32 head; - u32 wq_tail; + u32 wq_tail = 0; u32 cq_size; u32 cqe_count = 0; u32 wqe_index; u32 u32temp; - /* u32 counter; */ + u32 move_cq_head = 1; + u32 err_code; nes_debug(NES_DBG_CQ, "\n"); - spin_lock_irqsave(&nescq->lock, flags); + spin_lock_irqsave(&nescq->lock, flags); head = nescq->hw_cq.cq_head; cq_size = nescq->hw_cq.cq_size; while (cqe_count < num_entries) { - if (le32_to_cpu(nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]) & - NES_CQE_VALID) { - /* - * Make sure we read CQ entry contents *after* - * we've checked the valid bit. - */ - rmb(); - - cqe = nescq->hw_cq.cq_vbase[head]; - nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX] = 0; - u32temp = le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]); - wqe_index = u32temp & - (nesdev->nesadapter->max_qp_wr - 1); - u32temp &= ~(NES_SW_CONTEXT_ALIGN-1); - /* parse CQE, get completion context from WQE (either rq or sq */ - u64temp = (((u64)(le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX])))<<32) | - ((u64)u32temp); - nesqp = *((struct nes_qp **)&u64temp); + if ((le32_to_cpu(nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]) & + NES_CQE_VALID) == 0) + break; + + /* + * Make sure we read CQ entry contents *after* + * we've checked the valid bit. + */ + rmb(); + + cqe = nescq->hw_cq.cq_vbase[head]; + u32temp = le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]); + wqe_index = u32temp & (nesdev->nesadapter->max_qp_wr - 1); + u32temp &= ~(NES_SW_CONTEXT_ALIGN-1); + /* parse CQE, get completion context from WQE (either rq or sq) */ + u64temp = (((u64)(le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX])))<<32) | + ((u64)u32temp); + + if (u64temp) { + nesqp = (struct nes_qp *)(unsigned long)u64temp; memset(entry, 0, sizeof *entry); if (cqe.cqe_words[NES_CQE_ERROR_CODE_IDX] == 0) { entry->status = IB_WC_SUCCESS; } else { - entry->status = IB_WC_WR_FLUSH_ERR; + err_code = le32_to_cpu(cqe.cqe_words[NES_CQE_ERROR_CODE_IDX]); + if (NES_IWARP_CQE_MAJOR_DRV == (err_code >> 16)) { + entry->status = err_code & 0x0000ffff; + + /* The rest of the cqe's will be marked as flushed */ + nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_ERROR_CODE_IDX] = + cpu_to_le32((NES_IWARP_CQE_MAJOR_FLUSH << 16) | + NES_IWARP_CQE_MINOR_FLUSH); + } else + entry->status = IB_WC_WR_FLUSH_ERR; } entry->qp = &nesqp->ibqp; @@ -3508,20 +3699,18 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) if (le32_to_cpu(cqe.cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_SQ) { if (nesqp->skip_lsmm) { nesqp->skip_lsmm = 0; - wq_tail = nesqp->hwqp.sq_tail++; + nesqp->hwqp.sq_tail++; } /* Working on a SQ Completion*/ - wq_tail = wqe_index; - nesqp->hwqp.sq_tail = (wqe_index+1)&(nesqp->hwqp.sq_size - 1); - wrid = (((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wq_tail]. + wrid = (((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wqe_index]. wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_HIGH_IDX]))) << 32) | - ((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wq_tail]. + ((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wqe_index]. wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX]))); - entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wq_tail]. + entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wqe_index]. wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX]); - switch (le32_to_cpu(nesqp->hwqp.sq_vbase[wq_tail]. + switch (le32_to_cpu(nesqp->hwqp.sq_vbase[wqe_index]. wqe_words[NES_IWARP_SQ_WQE_MISC_IDX]) & 0x3f) { case NES_IWARP_SQ_OP_RDMAW: nes_debug(NES_DBG_CQ, "Operation = RDMA WRITE.\n"); @@ -3530,7 +3719,7 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) case NES_IWARP_SQ_OP_RDMAR: nes_debug(NES_DBG_CQ, "Operation = RDMA READ.\n"); entry->opcode = IB_WC_RDMA_READ; - entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wq_tail]. + entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wqe_index]. wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX]); break; case NES_IWARP_SQ_OP_SENDINV: @@ -3540,34 +3729,61 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) nes_debug(NES_DBG_CQ, "Operation = Send.\n"); entry->opcode = IB_WC_SEND; break; + case NES_IWARP_SQ_OP_LOCINV: + entry->opcode = IB_WC_LOCAL_INV; + break; + case NES_IWARP_SQ_OP_FAST_REG: + entry->opcode = IB_WC_FAST_REG_MR; + break; + } + + nesqp->hwqp.sq_tail = (wqe_index+1)&(nesqp->hwqp.sq_size - 1); + if ((entry->status != IB_WC_SUCCESS) && (nesqp->hwqp.sq_tail != nesqp->hwqp.sq_head)) { + move_cq_head = 0; + wq_tail = nesqp->hwqp.sq_tail; } } else { /* Working on a RQ Completion*/ - wq_tail = wqe_index; - nesqp->hwqp.rq_tail = (wqe_index+1)&(nesqp->hwqp.rq_size - 1); entry->byte_len = le32_to_cpu(cqe.cqe_words[NES_CQE_PAYLOAD_LENGTH_IDX]); - wrid = ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wq_tail].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_LOW_IDX]))) | - ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wq_tail].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_HIGH_IDX]))<<32); + wrid = ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wqe_index].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_LOW_IDX]))) | + ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wqe_index].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_HIGH_IDX]))<<32); entry->opcode = IB_WC_RECV; + + nesqp->hwqp.rq_tail = (wqe_index+1)&(nesqp->hwqp.rq_size - 1); + if ((entry->status != IB_WC_SUCCESS) && (nesqp->hwqp.rq_tail != nesqp->hwqp.rq_head)) { + move_cq_head = 0; + wq_tail = nesqp->hwqp.rq_tail; + } } + entry->wr_id = wrid; + entry++; + cqe_count++; + } + if (move_cq_head) { + nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX] = 0; if (++head >= cq_size) head = 0; - cqe_count++; nescq->polled_completions++; + if ((nescq->polled_completions > (cq_size / 2)) || (nescq->polled_completions == 255)) { nes_debug(NES_DBG_CQ, "CQ%u Issuing CQE Allocate since more than half of cqes" - " are pending %u of %u.\n", - nescq->hw_cq.cq_number, nescq->polled_completions, cq_size); + " are pending %u of %u.\n", + nescq->hw_cq.cq_number, nescq->polled_completions, cq_size); nes_write32(nesdev->regs+NES_CQE_ALLOC, - nescq->hw_cq.cq_number | (nescq->polled_completions << 16)); + nescq->hw_cq.cq_number | (nescq->polled_completions << 16)); nescq->polled_completions = 0; } - entry++; - } else - break; + } else { + /* Update the wqe index and set status to flush */ + wqe_index = le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]); + wqe_index = (wqe_index & (~(nesdev->nesadapter->max_qp_wr - 1))) | wq_tail; + nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX] = + cpu_to_le32(wqe_index); + move_cq_head = 1; /* ready for next pass */ + } } if (nescq->polled_completions) { @@ -3580,7 +3796,7 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) nes_debug(NES_DBG_CQ, "Reporting %u completions for CQ%u.\n", cqe_count, nescq->hw_cq.cq_number); - spin_unlock_irqrestore(&nescq->lock, flags); + spin_unlock_irqrestore(&nescq->lock, flags); return cqe_count; } @@ -3664,7 +3880,6 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) nesibdev->ibdev.dev.parent = &nesdev->pcidev->dev; nesibdev->ibdev.query_device = nes_query_device; nesibdev->ibdev.query_port = nes_query_port; - nesibdev->ibdev.modify_port = nes_modify_port; nesibdev->ibdev.query_pkey = nes_query_pkey; nesibdev->ibdev.query_gid = nes_query_gid; nesibdev->ibdev.alloc_ucontext = nes_alloc_ucontext; @@ -3689,10 +3904,9 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) nesibdev->ibdev.dealloc_mw = nes_dealloc_mw; nesibdev->ibdev.bind_mw = nes_bind_mw; - nesibdev->ibdev.alloc_fmr = nes_alloc_fmr; - nesibdev->ibdev.unmap_fmr = nes_unmap_fmr; - nesibdev->ibdev.dealloc_fmr = nes_dealloc_fmr; - nesibdev->ibdev.map_phys_fmr = nes_map_phys_fmr; + nesibdev->ibdev.alloc_fast_reg_mr = nes_alloc_fast_reg_mr; + nesibdev->ibdev.alloc_fast_reg_page_list = nes_alloc_fast_reg_page_list; + nesibdev->ibdev.free_fast_reg_page_list = nes_free_fast_reg_page_list; nesibdev->ibdev.attach_mcast = nes_multicast_attach; nesibdev->ibdev.detach_mcast = nes_multicast_detach; @@ -3721,6 +3935,52 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) /** + * nes_handle_delayed_event + */ +static void nes_handle_delayed_event(unsigned long data) +{ + struct nes_vnic *nesvnic = (void *) data; + + if (nesvnic->delayed_event != nesvnic->last_dispatched_event) { + struct ib_event event; + + event.device = &nesvnic->nesibdev->ibdev; + if (!event.device) + goto stop_timer; + event.event = nesvnic->delayed_event; + event.element.port_num = nesvnic->logical_port + 1; + ib_dispatch_event(&event); + } + +stop_timer: + nesvnic->event_timer.function = NULL; +} + + +void nes_port_ibevent(struct nes_vnic *nesvnic) +{ + struct nes_ib_device *nesibdev = nesvnic->nesibdev; + struct nes_device *nesdev = nesvnic->nesdev; + struct ib_event event; + event.device = &nesibdev->ibdev; + event.element.port_num = nesvnic->logical_port + 1; + event.event = nesdev->iw_status ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; + + if (!nesvnic->event_timer.function) { + ib_dispatch_event(&event); + nesvnic->last_dispatched_event = event.event; + nesvnic->event_timer.function = nes_handle_delayed_event; + nesvnic->event_timer.data = (unsigned long) nesvnic; + nesvnic->event_timer.expires = jiffies + NES_EVENT_DELAY; + add_timer(&nesvnic->event_timer); + } else { + mod_timer(&nesvnic->event_timer, jiffies + NES_EVENT_DELAY); + } + nesvnic->delayed_event = event.event; +} + + +/** * nes_destroy_ofa_device */ void nes_destroy_ofa_device(struct nes_ib_device *nesibdev) @@ -3745,7 +4005,7 @@ int nes_register_ofa_device(struct nes_ib_device *nesibdev) struct nes_adapter *nesadapter = nesdev->nesadapter; int i, ret; - ret = ib_register_device(&nesvnic->nesibdev->ibdev); + ret = ib_register_device(&nesvnic->nesibdev->ibdev, NULL); if (ret) { return ret; } diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h index 6c6b4da5184..309b31c31ae 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.h +++ b/drivers/infiniband/hw/nes/nes_verbs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -40,6 +40,10 @@ struct nes_device; #define NES_MAX_USER_DB_REGIONS 4096 #define NES_MAX_USER_WQ_REGIONS 4096 +#define NES_TERM_SENT 0x01 +#define NES_TERM_RCVD 0x02 +#define NES_TERM_DONE 0x04 + struct nes_ucontext { struct ib_ucontext ibucontext; struct nes_device *nesdev; @@ -112,12 +116,18 @@ struct nes_cq { spinlock_t lock; u8 virtual_cq; u8 pad[3]; + u32 mcrqf; }; struct nes_wq { spinlock_t lock; }; +struct disconn_work { + struct work_struct work; + struct nes_qp *nesqp; +}; + struct iw_cm_id; struct ietf_mpa_frame; @@ -125,19 +135,16 @@ struct nes_qp { struct ib_qp ibqp; void *allocated_buffer; struct iw_cm_id *cm_id; - struct workqueue_struct *wq; - struct work_struct disconn_work; struct nes_cq *nesscq; struct nes_cq *nesrcq; struct nes_pd *nespd; void *cm_node; /* handle of the node this QP is associated with */ - struct ietf_mpa_frame *ietf_frame; + void *ietf_frame; + u8 ietf_frame_size; dma_addr_t ietf_frame_pbase; - wait_queue_head_t state_waitq; - unsigned long socket; + struct ib_mr *lsmm_mr; struct nes_hw_qp hwqp; struct work_struct work; - struct work_struct ae_work; enum ib_qp_state ibqp_state; u32 iwarp_state; u32 hte_index; @@ -148,22 +155,35 @@ struct nes_qp { u32 mmap_sq_db_index; u32 mmap_rq_db_index; spinlock_t lock; + spinlock_t pau_lock; struct nes_qp_context *nesqp_context; dma_addr_t nesqp_context_pbase; void *pbl_vbase; dma_addr_t pbl_pbase; struct page *page; - wait_queue_head_t kick_waitq; - u16 in_disconnect; + struct timer_list terminate_timer; + enum ib_event_type terminate_eventtype; + struct sk_buff_head pau_list; + u32 pau_rcv_nxt; + u16 active_conn:1; + u16 skip_lsmm:1; + u16 user_mode:1; + u16 hte_added:1; + u16 flush_issued:1; + u16 destroyed:1; + u16 sig_all:1; + u16 pau_mode:1; + u16 rsvd:8; u16 private_data_len; - u8 active_conn; - u8 skip_lsmm; - u8 user_mode; - u8 hte_added; + u16 term_sq_flush_code; + u16 term_rq_flush_code; u8 hw_iwarp_state; - u8 flush_issued; u8 hw_tcp_state; - u8 disconn_pending; - u8 destroyed; + u8 term_flags; + u8 sq_kmapped; + u8 pau_busy; + u8 pau_pending; + u8 pau_state; + __u64 nesuqp_addr; }; #endif /* NES_VERBS_H */ diff --git a/drivers/infiniband/hw/ocrdma/Kconfig b/drivers/infiniband/hw/ocrdma/Kconfig new file mode 100644 index 00000000000..c0cddc0192d --- /dev/null +++ b/drivers/infiniband/hw/ocrdma/Kconfig @@ -0,0 +1,8 @@ +config INFINIBAND_OCRDMA + tristate "Emulex One Connect HCA support" + depends on ETHERNET && NETDEVICES && PCI && INET && (IPV6 || IPV6=n) + select NET_VENDOR_EMULEX + select BE2NET + ---help--- + This driver provides low-level InfiniBand over Ethernet + support for Emulex One Connect host channel adapters (HCAs). diff --git a/drivers/infiniband/hw/ocrdma/Makefile b/drivers/infiniband/hw/ocrdma/Makefile new file mode 100644 index 00000000000..d1bfd4f4cdd --- /dev/null +++ b/drivers/infiniband/hw/ocrdma/Makefile @@ -0,0 +1,5 @@ +ccflags-y := -Idrivers/net/ethernet/emulex/benet + +obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma.o + +ocrdma-y := ocrdma_main.o ocrdma_verbs.o ocrdma_hw.o ocrdma_ah.o ocrdma_stats.o diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h new file mode 100644 index 00000000000..19011dbb930 --- /dev/null +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -0,0 +1,521 @@ +/******************************************************************* + * This file is part of the Emulex RoCE Device Driver for * + * RoCE (RDMA over Converged Ethernet) adapters. * + * Copyright (C) 2008-2012 Emulex. All rights reserved. * + * EMULEX and SLI are trademarks of Emulex. * + * www.emulex.com * + * * + * This program is free software; you can redistribute it and/or * + * modify it under the terms of version 2 of the GNU General * + * Public License as published by the Free Software Foundation. * + * This program is distributed in the hope that it will be useful. * + * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * + * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * + * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * + * TO BE LEGALLY INVALID. See the GNU General Public License for * + * more details, a copy of which can be found in the file COPYING * + * included with this package. * + * + * Contact Information: + * linux-drivers@emulex.com + * + * Emulex + * 3333 Susan Street + * Costa Mesa, CA 92626 + *******************************************************************/ + +#ifndef __OCRDMA_H__ +#define __OCRDMA_H__ + +#include <linux/mutex.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/pci.h> + +#include <rdma/ib_verbs.h> +#include <rdma/ib_user_verbs.h> +#include <rdma/ib_addr.h> + +#include <be_roce.h> +#include "ocrdma_sli.h" + +#define OCRDMA_ROCE_DRV_VERSION "10.2.145.0u" + +#define OCRDMA_ROCE_DRV_DESC "Emulex OneConnect RoCE Driver" +#define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA" + +#define OC_NAME_SH OCRDMA_NODE_DESC "(Skyhawk)" +#define OC_NAME_UNKNOWN OCRDMA_NODE_DESC "(Unknown)" + +#define OC_SKH_DEVICE_PF 0x720 +#define OC_SKH_DEVICE_VF 0x728 +#define OCRDMA_MAX_AH 512 + +#define OCRDMA_UVERBS(CMD_NAME) (1ull << IB_USER_VERBS_CMD_##CMD_NAME) + +#define convert_to_64bit(lo, hi) ((u64)hi << 32 | (u64)lo) + +struct ocrdma_dev_attr { + u8 fw_ver[32]; + u32 vendor_id; + u32 device_id; + u16 max_pd; + u16 max_cq; + u16 max_cqe; + u16 max_qp; + u16 max_wqe; + u16 max_rqe; + u16 max_srq; + u32 max_inline_data; + int max_send_sge; + int max_recv_sge; + int max_srq_sge; + int max_rdma_sge; + int max_mr; + u64 max_mr_size; + u32 max_num_mr_pbl; + int max_mw; + int max_fmr; + int max_map_per_fmr; + int max_pages_per_frmr; + u16 max_ord_per_qp; + u16 max_ird_per_qp; + + int device_cap_flags; + u8 cq_overflow_detect; + u8 srq_supported; + + u32 wqe_size; + u32 rqe_size; + u32 ird_page_size; + u8 local_ca_ack_delay; + u8 ird; + u8 num_ird_pages; +}; + +struct ocrdma_dma_mem { + void *va; + dma_addr_t pa; + u32 size; +}; + +struct ocrdma_pbl { + void *va; + dma_addr_t pa; +}; + +struct ocrdma_queue_info { + void *va; + dma_addr_t dma; + u32 size; + u16 len; + u16 entry_size; /* Size of an element in the queue */ + u16 id; /* qid, where to ring the doorbell. */ + u16 head, tail; + bool created; +}; + +struct ocrdma_eq { + struct ocrdma_queue_info q; + u32 vector; + int cq_cnt; + struct ocrdma_dev *dev; + char irq_name[32]; +}; + +struct ocrdma_mq { + struct ocrdma_queue_info sq; + struct ocrdma_queue_info cq; + bool rearm_cq; +}; + +struct mqe_ctx { + struct mutex lock; /* for serializing mailbox commands on MQ */ + wait_queue_head_t cmd_wait; + u32 tag; + u16 cqe_status; + u16 ext_status; + bool cmd_done; +}; + +struct ocrdma_hw_mr { + u32 lkey; + u8 fr_mr; + u8 remote_atomic; + u8 remote_rd; + u8 remote_wr; + u8 local_rd; + u8 local_wr; + u8 mw_bind; + u8 rsvd; + u64 len; + struct ocrdma_pbl *pbl_table; + u32 num_pbls; + u32 num_pbes; + u32 pbl_size; + u32 pbe_size; + u64 fbo; + u64 va; +}; + +struct ocrdma_mr { + struct ib_mr ibmr; + struct ib_umem *umem; + struct ocrdma_hw_mr hwmr; +}; + +struct ocrdma_stats { + u8 type; + struct ocrdma_dev *dev; +}; + +struct stats_mem { + struct ocrdma_mqe mqe; + void *va; + dma_addr_t pa; + u32 size; + char *debugfs_mem; +}; + +struct phy_info { + u16 auto_speeds_supported; + u16 fixed_speeds_supported; + u16 phy_type; + u16 interface_type; +}; + +struct ocrdma_dev { + struct ib_device ibdev; + struct ocrdma_dev_attr attr; + + struct mutex dev_lock; /* provides syncronise access to device data */ + spinlock_t flush_q_lock ____cacheline_aligned; + + struct ocrdma_cq **cq_tbl; + struct ocrdma_qp **qp_tbl; + + struct ocrdma_eq *eq_tbl; + int eq_cnt; + u16 base_eqid; + u16 max_eq; + + union ib_gid *sgid_tbl; + /* provided synchronization to sgid table for + * updating gid entries triggered by notifier. + */ + spinlock_t sgid_lock; + + int gsi_qp_created; + struct ocrdma_cq *gsi_sqcq; + struct ocrdma_cq *gsi_rqcq; + + struct { + struct ocrdma_av *va; + dma_addr_t pa; + u32 size; + u32 num_ah; + /* provide synchronization for av + * entry allocations. + */ + spinlock_t lock; + u32 ahid; + struct ocrdma_pbl pbl; + } av_tbl; + + void *mbx_cmd; + struct ocrdma_mq mq; + struct mqe_ctx mqe_ctx; + + struct be_dev_info nic_info; + struct phy_info phy; + char model_number[32]; + u32 hba_port_num; + + struct list_head entry; + struct rcu_head rcu; + int id; + u64 stag_arr[OCRDMA_MAX_STAG]; + u16 pvid; + u32 asic_id; + + ulong last_stats_time; + struct mutex stats_lock; /* provide synch for debugfs operations */ + struct stats_mem stats_mem; + struct ocrdma_stats rsrc_stats; + struct ocrdma_stats rx_stats; + struct ocrdma_stats wqe_stats; + struct ocrdma_stats tx_stats; + struct ocrdma_stats db_err_stats; + struct ocrdma_stats tx_qp_err_stats; + struct ocrdma_stats rx_qp_err_stats; + struct ocrdma_stats tx_dbg_stats; + struct ocrdma_stats rx_dbg_stats; + struct dentry *dir; +}; + +struct ocrdma_cq { + struct ib_cq ibcq; + struct ocrdma_cqe *va; + u32 phase; + u32 getp; /* pointer to pending wrs to + * return to stack, wrap arounds + * at max_hw_cqe + */ + u32 max_hw_cqe; + bool phase_change; + bool deferred_arm, deferred_sol; + bool first_arm; + + spinlock_t cq_lock ____cacheline_aligned; /* provide synchronization + * to cq polling + */ + /* syncronizes cq completion handler invoked from multiple context */ + spinlock_t comp_handler_lock ____cacheline_aligned; + u16 id; + u16 eqn; + + struct ocrdma_ucontext *ucontext; + dma_addr_t pa; + u32 len; + u32 cqe_cnt; + + /* head of all qp's sq and rq for which cqes need to be flushed + * by the software. + */ + struct list_head sq_head, rq_head; +}; + +struct ocrdma_pd { + struct ib_pd ibpd; + struct ocrdma_ucontext *uctx; + u32 id; + int num_dpp_qp; + u32 dpp_page; + bool dpp_enabled; +}; + +struct ocrdma_ah { + struct ib_ah ibah; + struct ocrdma_av *av; + u16 sgid_index; + u32 id; +}; + +struct ocrdma_qp_hwq_info { + u8 *va; /* virtual address */ + u32 max_sges; + u32 head, tail; + u32 entry_size; + u32 max_cnt; + u32 max_wqe_idx; + u16 dbid; /* qid, where to ring the doorbell. */ + u32 len; + dma_addr_t pa; +}; + +struct ocrdma_srq { + struct ib_srq ibsrq; + u8 __iomem *db; + struct ocrdma_qp_hwq_info rq; + u64 *rqe_wr_id_tbl; + u32 *idx_bit_fields; + u32 bit_fields_len; + + /* provide synchronization to multiple context(s) posting rqe */ + spinlock_t q_lock ____cacheline_aligned; + + struct ocrdma_pd *pd; + u32 id; +}; + +struct ocrdma_qp { + struct ib_qp ibqp; + struct ocrdma_dev *dev; + + u8 __iomem *sq_db; + struct ocrdma_qp_hwq_info sq; + struct { + uint64_t wrid; + uint16_t dpp_wqe_idx; + uint16_t dpp_wqe; + uint8_t signaled; + uint8_t rsvd[3]; + } *wqe_wr_id_tbl; + u32 max_inline_data; + + /* provide synchronization to multiple context(s) posting wqe, rqe */ + spinlock_t q_lock ____cacheline_aligned; + struct ocrdma_cq *sq_cq; + /* list maintained per CQ to flush SQ errors */ + struct list_head sq_entry; + + u8 __iomem *rq_db; + struct ocrdma_qp_hwq_info rq; + u64 *rqe_wr_id_tbl; + struct ocrdma_cq *rq_cq; + struct ocrdma_srq *srq; + /* list maintained per CQ to flush RQ errors */ + struct list_head rq_entry; + + enum ocrdma_qp_state state; /* QP state */ + int cap_flags; + u32 max_ord, max_ird; + + u32 id; + struct ocrdma_pd *pd; + + enum ib_qp_type qp_type; + + int sgid_idx; + u32 qkey; + bool dpp_enabled; + u8 *ird_q_va; + bool signaled; +}; + +struct ocrdma_ucontext { + struct ib_ucontext ibucontext; + + struct list_head mm_head; + struct mutex mm_list_lock; /* protects list entries of mm type */ + struct ocrdma_pd *cntxt_pd; + int pd_in_use; + + struct { + u32 *va; + dma_addr_t pa; + u32 len; + } ah_tbl; +}; + +struct ocrdma_mm { + struct { + u64 phy_addr; + unsigned long len; + } key; + struct list_head entry; +}; + +static inline struct ocrdma_dev *get_ocrdma_dev(struct ib_device *ibdev) +{ + return container_of(ibdev, struct ocrdma_dev, ibdev); +} + +static inline struct ocrdma_ucontext *get_ocrdma_ucontext(struct ib_ucontext + *ibucontext) +{ + return container_of(ibucontext, struct ocrdma_ucontext, ibucontext); +} + +static inline struct ocrdma_pd *get_ocrdma_pd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct ocrdma_pd, ibpd); +} + +static inline struct ocrdma_cq *get_ocrdma_cq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct ocrdma_cq, ibcq); +} + +static inline struct ocrdma_qp *get_ocrdma_qp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct ocrdma_qp, ibqp); +} + +static inline struct ocrdma_mr *get_ocrdma_mr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct ocrdma_mr, ibmr); +} + +static inline struct ocrdma_ah *get_ocrdma_ah(struct ib_ah *ibah) +{ + return container_of(ibah, struct ocrdma_ah, ibah); +} + +static inline struct ocrdma_srq *get_ocrdma_srq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct ocrdma_srq, ibsrq); +} + +static inline int is_cqe_valid(struct ocrdma_cq *cq, struct ocrdma_cqe *cqe) +{ + int cqe_valid; + cqe_valid = le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_VALID; + return (cqe_valid == cq->phase); +} + +static inline int is_cqe_for_sq(struct ocrdma_cqe *cqe) +{ + return (le32_to_cpu(cqe->flags_status_srcqpn) & + OCRDMA_CQE_QTYPE) ? 0 : 1; +} + +static inline int is_cqe_invalidated(struct ocrdma_cqe *cqe) +{ + return (le32_to_cpu(cqe->flags_status_srcqpn) & + OCRDMA_CQE_INVALIDATE) ? 1 : 0; +} + +static inline int is_cqe_imm(struct ocrdma_cqe *cqe) +{ + return (le32_to_cpu(cqe->flags_status_srcqpn) & + OCRDMA_CQE_IMM) ? 1 : 0; +} + +static inline int is_cqe_wr_imm(struct ocrdma_cqe *cqe) +{ + return (le32_to_cpu(cqe->flags_status_srcqpn) & + OCRDMA_CQE_WRITE_IMM) ? 1 : 0; +} + +static inline int ocrdma_resolve_dmac(struct ocrdma_dev *dev, + struct ib_ah_attr *ah_attr, u8 *mac_addr) +{ + struct in6_addr in6; + + memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6)); + if (rdma_is_multicast_addr(&in6)) + rdma_get_mcast_mac(&in6, mac_addr); + else + memcpy(mac_addr, ah_attr->dmac, ETH_ALEN); + return 0; +} + +static inline char *hca_name(struct ocrdma_dev *dev) +{ + switch (dev->nic_info.pdev->device) { + case OC_SKH_DEVICE_PF: + case OC_SKH_DEVICE_VF: + return OC_NAME_SH; + default: + return OC_NAME_UNKNOWN; + } +} + +static inline int ocrdma_get_eq_table_index(struct ocrdma_dev *dev, + int eqid) +{ + int indx; + + for (indx = 0; indx < dev->eq_cnt; indx++) { + if (dev->eq_tbl[indx].q.id == eqid) + return indx; + } + + return -EINVAL; +} + +static inline u8 ocrdma_get_asic_type(struct ocrdma_dev *dev) +{ + if (dev->nic_info.dev_family == 0xF && !dev->asic_id) { + pci_read_config_dword( + dev->nic_info.pdev, + OCRDMA_SLI_ASIC_ID_OFFSET, &dev->asic_id); + } + + return (dev->asic_id & OCRDMA_SLI_ASIC_GEN_NUM_MASK) >> + OCRDMA_SLI_ASIC_GEN_NUM_SHIFT; +} + +#endif diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h new file mode 100644 index 00000000000..1554cca5712 --- /dev/null +++ b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h @@ -0,0 +1,134 @@ +/******************************************************************* + * This file is part of the Emulex RoCE Device Driver for * + * RoCE (RDMA over Converged Ethernet) adapters. * + * Copyright (C) 2008-2012 Emulex. All rights reserved. * + * EMULEX and SLI are trademarks of Emulex. * + * www.emulex.com * + * * + * This program is free software; you can redistribute it and/or * + * modify it under the terms of version 2 of the GNU General * + * Public License as published by the Free Software Foundation. * + * This program is distributed in the hope that it will be useful. * + * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * + * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * + * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * + * TO BE LEGALLY INVALID. See the GNU General Public License for * + * more details, a copy of which can be found in the file COPYING * + * included with this package. * + * + * Contact Information: + * linux-drivers@emulex.com + * + * Emulex + * 3333 Susan Street + * Costa Mesa, CA 92626 + *******************************************************************/ + +#ifndef __OCRDMA_ABI_H__ +#define __OCRDMA_ABI_H__ + +#define OCRDMA_ABI_VERSION 2 +#define OCRDMA_BE_ROCE_ABI_VERSION 1 +/* user kernel communication data structures. */ + +struct ocrdma_alloc_ucontext_resp { + u32 dev_id; + u32 wqe_size; + u32 max_inline_data; + u32 dpp_wqe_size; + u64 ah_tbl_page; + u32 ah_tbl_len; + u32 rqe_size; + u8 fw_ver[32]; + /* for future use/new features in progress */ + u64 rsvd1; + u64 rsvd2; +}; + +struct ocrdma_alloc_pd_ureq { + u64 rsvd1; +}; + +struct ocrdma_alloc_pd_uresp { + u32 id; + u32 dpp_enabled; + u32 dpp_page_addr_hi; + u32 dpp_page_addr_lo; + u64 rsvd1; +}; + +struct ocrdma_create_cq_ureq { + u32 dpp_cq; + u32 rsvd; /* pad */ +}; + +#define MAX_CQ_PAGES 8 +struct ocrdma_create_cq_uresp { + u32 cq_id; + u32 page_size; + u32 num_pages; + u32 max_hw_cqe; + u64 page_addr[MAX_CQ_PAGES]; + u64 db_page_addr; + u32 db_page_size; + u32 phase_change; + /* for future use/new features in progress */ + u64 rsvd1; + u64 rsvd2; +}; + +#define MAX_QP_PAGES 8 +#define MAX_UD_AV_PAGES 8 + +struct ocrdma_create_qp_ureq { + u8 enable_dpp_cq; + u8 rsvd; + u16 dpp_cq_id; + u32 rsvd1; /* pad */ +}; + +struct ocrdma_create_qp_uresp { + u16 qp_id; + u16 sq_dbid; + u16 rq_dbid; + u16 resv0; /* pad */ + u32 sq_page_size; + u32 rq_page_size; + u32 num_sq_pages; + u32 num_rq_pages; + u64 sq_page_addr[MAX_QP_PAGES]; + u64 rq_page_addr[MAX_QP_PAGES]; + u64 db_page_addr; + u32 db_page_size; + u32 dpp_credit; + u32 dpp_offset; + u32 num_wqe_allocated; + u32 num_rqe_allocated; + u32 db_sq_offset; + u32 db_rq_offset; + u32 db_shift; + u64 rsvd[11]; +} __packed; + +struct ocrdma_create_srq_uresp { + u16 rq_dbid; + u16 resv0; /* pad */ + u32 resv1; + + u32 rq_page_size; + u32 num_rq_pages; + + u64 rq_page_addr[MAX_QP_PAGES]; + u64 db_page_addr; + + u32 db_page_size; + u32 num_rqe_allocated; + u32 db_rq_offset; + u32 db_shift; + + u64 rsvd2; + u64 rsvd3; +}; + +#endif /* __OCRDMA_ABI_H__ */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c new file mode 100644 index 00000000000..d4cc01f10c0 --- /dev/null +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -0,0 +1,175 @@ +/******************************************************************* + * This file is part of the Emulex RoCE Device Driver for * + * RoCE (RDMA over Converged Ethernet) adapters. * + * Copyright (C) 2008-2012 Emulex. All rights reserved. * + * EMULEX and SLI are trademarks of Emulex. * + * www.emulex.com * + * * + * This program is free software; you can redistribute it and/or * + * modify it under the terms of version 2 of the GNU General * + * Public License as published by the Free Software Foundation. * + * This program is distributed in the hope that it will be useful. * + * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * + * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * + * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * + * TO BE LEGALLY INVALID. See the GNU General Public License for * + * more details, a copy of which can be found in the file COPYING * + * included with this package. * + * + * Contact Information: + * linux-drivers@emulex.com + * + * Emulex + * 3333 Susan Street + * Costa Mesa, CA 92626 + *******************************************************************/ + +#include <net/neighbour.h> +#include <net/netevent.h> + +#include <rdma/ib_addr.h> + +#include "ocrdma.h" +#include "ocrdma_verbs.h" +#include "ocrdma_ah.h" +#include "ocrdma_hw.h" + +static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, + struct ib_ah_attr *attr, int pdid) +{ + int status = 0; + u16 vlan_tag; bool vlan_enabled = false; + struct ocrdma_eth_vlan eth; + struct ocrdma_grh grh; + int eth_sz; + + memset(ð, 0, sizeof(eth)); + memset(&grh, 0, sizeof(grh)); + + ah->sgid_index = attr->grh.sgid_index; + + vlan_tag = attr->vlan_id; + if (!vlan_tag || (vlan_tag > 0xFFF)) + vlan_tag = dev->pvid; + if (vlan_tag && (vlan_tag < 0x1000)) { + eth.eth_type = cpu_to_be16(0x8100); + eth.roce_eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE); + vlan_tag |= (attr->sl & 7) << 13; + eth.vlan_tag = cpu_to_be16(vlan_tag); + eth_sz = sizeof(struct ocrdma_eth_vlan); + vlan_enabled = true; + } else { + eth.eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE); + eth_sz = sizeof(struct ocrdma_eth_basic); + } + memcpy(ð.smac[0], &dev->nic_info.mac_addr[0], ETH_ALEN); + memcpy(ð.dmac[0], attr->dmac, ETH_ALEN); + status = ocrdma_resolve_dmac(dev, attr, ð.dmac[0]); + if (status) + return status; + status = ocrdma_query_gid(&dev->ibdev, 1, attr->grh.sgid_index, + (union ib_gid *)&grh.sgid[0]); + if (status) + return status; + + grh.tclass_flow = cpu_to_be32((6 << 28) | + (attr->grh.traffic_class << 24) | + attr->grh.flow_label); + /* 0x1b is next header value in GRH */ + grh.pdid_hoplimit = cpu_to_be32((pdid << 16) | + (0x1b << 8) | attr->grh.hop_limit); + + memcpy(&grh.dgid[0], attr->grh.dgid.raw, sizeof(attr->grh.dgid.raw)); + memcpy(&ah->av->eth_hdr, ð, eth_sz); + memcpy((u8 *)ah->av + eth_sz, &grh, sizeof(struct ocrdma_grh)); + if (vlan_enabled) + ah->av->valid |= OCRDMA_AV_VLAN_VALID; + ah->av->valid = cpu_to_le32(ah->av->valid); + return status; +} + +struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) +{ + u32 *ahid_addr; + int status; + struct ocrdma_ah *ah; + struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); + struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); + + if (!(attr->ah_flags & IB_AH_GRH)) + return ERR_PTR(-EINVAL); + + ah = kzalloc(sizeof(*ah), GFP_ATOMIC); + if (!ah) + return ERR_PTR(-ENOMEM); + + status = ocrdma_alloc_av(dev, ah); + if (status) + goto av_err; + status = set_av_attr(dev, ah, attr, pd->id); + if (status) + goto av_conf_err; + + /* if pd is for the user process, pass the ah_id to user space */ + if ((pd->uctx) && (pd->uctx->ah_tbl.va)) { + ahid_addr = pd->uctx->ah_tbl.va + attr->dlid; + *ahid_addr = ah->id; + } + return &ah->ibah; + +av_conf_err: + ocrdma_free_av(dev, ah); +av_err: + kfree(ah); + return ERR_PTR(status); +} + +int ocrdma_destroy_ah(struct ib_ah *ibah) +{ + struct ocrdma_ah *ah = get_ocrdma_ah(ibah); + struct ocrdma_dev *dev = get_ocrdma_dev(ibah->device); + + ocrdma_free_av(dev, ah); + kfree(ah); + return 0; +} + +int ocrdma_query_ah(struct ib_ah *ibah, struct ib_ah_attr *attr) +{ + struct ocrdma_ah *ah = get_ocrdma_ah(ibah); + struct ocrdma_av *av = ah->av; + struct ocrdma_grh *grh; + attr->ah_flags |= IB_AH_GRH; + if (ah->av->valid & Bit(1)) { + grh = (struct ocrdma_grh *)((u8 *)ah->av + + sizeof(struct ocrdma_eth_vlan)); + attr->sl = be16_to_cpu(av->eth_hdr.vlan_tag) >> 13; + } else { + grh = (struct ocrdma_grh *)((u8 *)ah->av + + sizeof(struct ocrdma_eth_basic)); + attr->sl = 0; + } + memcpy(&attr->grh.dgid.raw[0], &grh->dgid[0], sizeof(grh->dgid)); + attr->grh.sgid_index = ah->sgid_index; + attr->grh.hop_limit = be32_to_cpu(grh->pdid_hoplimit) & 0xff; + attr->grh.traffic_class = be32_to_cpu(grh->tclass_flow) >> 24; + attr->grh.flow_label = be32_to_cpu(grh->tclass_flow) & 0x00ffffffff; + return 0; +} + +int ocrdma_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *attr) +{ + /* modify_ah is unsupported */ + return -ENOSYS; +} + +int ocrdma_process_mad(struct ib_device *ibdev, + int process_mad_flags, + u8 port_num, + struct ib_wc *in_wc, + struct ib_grh *in_grh, + struct ib_mad *in_mad, struct ib_mad *out_mad) +{ + return IB_MAD_RESULT_SUCCESS; +} diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h new file mode 100644 index 00000000000..8ac49e7f96d --- /dev/null +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h @@ -0,0 +1,42 @@ +/******************************************************************* + * This file is part of the Emulex RoCE Device Driver for * + * RoCE (RDMA over Converged Ethernet) adapters. * + * Copyright (C) 2008-2012 Emulex. All rights reserved. * + * EMULEX and SLI are trademarks of Emulex. * + * www.emulex.com * + * * + * This program is free software; you can redistribute it and/or * + * modify it under the terms of version 2 of the GNU General * + * Public License as published by the Free Software Foundation. * + * This program is distributed in the hope that it will be useful. * + * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * + * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * + * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * + * TO BE LEGALLY INVALID. See the GNU General Public License for * + * more details, a copy of which can be found in the file COPYING * + * included with this package. * + * + * Contact Information: + * linux-drivers@emulex.com + * + * Emulex + * 3333 Susan Street + * Costa Mesa, CA 92626 + *******************************************************************/ + +#ifndef __OCRDMA_AH_H__ +#define __OCRDMA_AH_H__ + +struct ib_ah *ocrdma_create_ah(struct ib_pd *, struct ib_ah_attr *); +int ocrdma_destroy_ah(struct ib_ah *); +int ocrdma_query_ah(struct ib_ah *, struct ib_ah_attr *); +int ocrdma_modify_ah(struct ib_ah *, struct ib_ah_attr *); + +int ocrdma_process_mad(struct ib_device *, + int process_mad_flags, + u8 port_num, + struct ib_wc *in_wc, + struct ib_grh *in_grh, + struct ib_mad *in_mad, struct ib_mad *out_mad); +#endif /* __OCRDMA_AH_H__ */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c new file mode 100644 index 00000000000..3bbf2010a82 --- /dev/null +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -0,0 +1,2737 @@ +/******************************************************************* + * This file is part of the Emulex RoCE Device Driver for * + * RoCE (RDMA over Converged Ethernet) CNA Adapters. * + * Copyright (C) 2008-2012 Emulex. All rights reserved. * + * EMULEX and SLI are trademarks of Emulex. * + * www.emulex.com * + * * + * This program is free software; you can redistribute it and/or * + * modify it under the terms of version 2 of the GNU General * + * Public License as published by the Free Software Foundation. * + * This program is distributed in the hope that it will be useful. * + * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * + * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * + * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * + * TO BE LEGALLY INVALID. See the GNU General Public License for * + * more details, a copy of which can be found in the file COPYING * + * included with this package. * + * + * Contact Information: + * linux-drivers@emulex.com + * + * Emulex + * 3333 Susan Street + * Costa Mesa, CA 92626 + *******************************************************************/ + +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/log2.h> +#include <linux/dma-mapping.h> + +#include <rdma/ib_verbs.h> +#include <rdma/ib_user_verbs.h> + +#include "ocrdma.h" +#include "ocrdma_hw.h" +#include "ocrdma_verbs.h" +#include "ocrdma_ah.h" + +enum mbx_status { + OCRDMA_MBX_STATUS_FAILED = 1, + OCRDMA_MBX_STATUS_ILLEGAL_FIELD = 3, + OCRDMA_MBX_STATUS_OOR = 100, + OCRDMA_MBX_STATUS_INVALID_PD = 101, + OCRDMA_MBX_STATUS_PD_INUSE = 102, + OCRDMA_MBX_STATUS_INVALID_CQ = 103, + OCRDMA_MBX_STATUS_INVALID_QP = 104, + OCRDMA_MBX_STATUS_INVALID_LKEY = 105, + OCRDMA_MBX_STATUS_ORD_EXCEEDS = 106, + OCRDMA_MBX_STATUS_IRD_EXCEEDS = 107, + OCRDMA_MBX_STATUS_SENDQ_WQE_EXCEEDS = 108, + OCRDMA_MBX_STATUS_RECVQ_RQE_EXCEEDS = 109, + OCRDMA_MBX_STATUS_SGE_SEND_EXCEEDS = 110, + OCRDMA_MBX_STATUS_SGE_WRITE_EXCEEDS = 111, + OCRDMA_MBX_STATUS_SGE_RECV_EXCEEDS = 112, + OCRDMA_MBX_STATUS_INVALID_STATE_CHANGE = 113, + OCRDMA_MBX_STATUS_MW_BOUND = 114, + OCRDMA_MBX_STATUS_INVALID_VA = 115, + OCRDMA_MBX_STATUS_INVALID_LENGTH = 116, + OCRDMA_MBX_STATUS_INVALID_FBO = 117, + OCRDMA_MBX_STATUS_INVALID_ACC_RIGHTS = 118, + OCRDMA_MBX_STATUS_INVALID_PBE_SIZE = 119, + OCRDMA_MBX_STATUS_INVALID_PBL_ENTRY = 120, + OCRDMA_MBX_STATUS_INVALID_PBL_SHIFT = 121, + OCRDMA_MBX_STATUS_INVALID_SRQ_ID = 129, + OCRDMA_MBX_STATUS_SRQ_ERROR = 133, + OCRDMA_MBX_STATUS_RQE_EXCEEDS = 134, + OCRDMA_MBX_STATUS_MTU_EXCEEDS = 135, + OCRDMA_MBX_STATUS_MAX_QP_EXCEEDS = 136, + OCRDMA_MBX_STATUS_SRQ_LIMIT_EXCEEDS = 137, + OCRDMA_MBX_STATUS_SRQ_SIZE_UNDERUNS = 138, + OCRDMA_MBX_STATUS_QP_BOUND = 130, + OCRDMA_MBX_STATUS_INVALID_CHANGE = 139, + OCRDMA_MBX_STATUS_ATOMIC_OPS_UNSUP = 140, + OCRDMA_MBX_STATUS_INVALID_RNR_NAK_TIMER = 141, + OCRDMA_MBX_STATUS_MW_STILL_BOUND = 142, + OCRDMA_MBX_STATUS_PKEY_INDEX_INVALID = 143, + OCRDMA_MBX_STATUS_PKEY_INDEX_EXCEEDS = 144 +}; + +enum additional_status { + OCRDMA_MBX_ADDI_STATUS_INSUFFICIENT_RESOURCES = 22 +}; + +enum cqe_status { + OCRDMA_MBX_CQE_STATUS_INSUFFICIENT_PRIVILEDGES = 1, + OCRDMA_MBX_CQE_STATUS_INVALID_PARAMETER = 2, + OCRDMA_MBX_CQE_STATUS_INSUFFICIENT_RESOURCES = 3, + OCRDMA_MBX_CQE_STATUS_QUEUE_FLUSHING = 4, + OCRDMA_MBX_CQE_STATUS_DMA_FAILED = 5 +}; + +static inline void *ocrdma_get_eqe(struct ocrdma_eq *eq) +{ + return eq->q.va + (eq->q.tail * sizeof(struct ocrdma_eqe)); +} + +static inline void ocrdma_eq_inc_tail(struct ocrdma_eq *eq) +{ + eq->q.tail = (eq->q.tail + 1) & (OCRDMA_EQ_LEN - 1); +} + +static inline void *ocrdma_get_mcqe(struct ocrdma_dev *dev) +{ + struct ocrdma_mcqe *cqe = (struct ocrdma_mcqe *) + (dev->mq.cq.va + (dev->mq.cq.tail * sizeof(struct ocrdma_mcqe))); + + if (!(le32_to_cpu(cqe->valid_ae_cmpl_cons) & OCRDMA_MCQE_VALID_MASK)) + return NULL; + return cqe; +} + +static inline void ocrdma_mcq_inc_tail(struct ocrdma_dev *dev) +{ + dev->mq.cq.tail = (dev->mq.cq.tail + 1) & (OCRDMA_MQ_CQ_LEN - 1); +} + +static inline struct ocrdma_mqe *ocrdma_get_mqe(struct ocrdma_dev *dev) +{ + return dev->mq.sq.va + (dev->mq.sq.head * sizeof(struct ocrdma_mqe)); +} + +static inline void ocrdma_mq_inc_head(struct ocrdma_dev *dev) +{ + dev->mq.sq.head = (dev->mq.sq.head + 1) & (OCRDMA_MQ_LEN - 1); +} + +static inline void *ocrdma_get_mqe_rsp(struct ocrdma_dev *dev) +{ + return dev->mq.sq.va + (dev->mqe_ctx.tag * sizeof(struct ocrdma_mqe)); +} + +enum ib_qp_state get_ibqp_state(enum ocrdma_qp_state qps) +{ + switch (qps) { + case OCRDMA_QPS_RST: + return IB_QPS_RESET; + case OCRDMA_QPS_INIT: + return IB_QPS_INIT; + case OCRDMA_QPS_RTR: + return IB_QPS_RTR; + case OCRDMA_QPS_RTS: + return IB_QPS_RTS; + case OCRDMA_QPS_SQD: + case OCRDMA_QPS_SQ_DRAINING: + return IB_QPS_SQD; + case OCRDMA_QPS_SQE: + return IB_QPS_SQE; + case OCRDMA_QPS_ERR: + return IB_QPS_ERR; + } + return IB_QPS_ERR; +} + +static enum ocrdma_qp_state get_ocrdma_qp_state(enum ib_qp_state qps) +{ + switch (qps) { + case IB_QPS_RESET: + return OCRDMA_QPS_RST; + case IB_QPS_INIT: + return OCRDMA_QPS_INIT; + case IB_QPS_RTR: + return OCRDMA_QPS_RTR; + case IB_QPS_RTS: + return OCRDMA_QPS_RTS; + case IB_QPS_SQD: + return OCRDMA_QPS_SQD; + case IB_QPS_SQE: + return OCRDMA_QPS_SQE; + case IB_QPS_ERR: + return OCRDMA_QPS_ERR; + } + return OCRDMA_QPS_ERR; +} + +static int ocrdma_get_mbx_errno(u32 status) +{ + int err_num; + u8 mbox_status = (status & OCRDMA_MBX_RSP_STATUS_MASK) >> + OCRDMA_MBX_RSP_STATUS_SHIFT; + u8 add_status = (status & OCRDMA_MBX_RSP_ASTATUS_MASK) >> + OCRDMA_MBX_RSP_ASTATUS_SHIFT; + + switch (mbox_status) { + case OCRDMA_MBX_STATUS_OOR: + case OCRDMA_MBX_STATUS_MAX_QP_EXCEEDS: + err_num = -EAGAIN; + break; + + case OCRDMA_MBX_STATUS_INVALID_PD: + case OCRDMA_MBX_STATUS_INVALID_CQ: + case OCRDMA_MBX_STATUS_INVALID_SRQ_ID: + case OCRDMA_MBX_STATUS_INVALID_QP: + case OCRDMA_MBX_STATUS_INVALID_CHANGE: + case OCRDMA_MBX_STATUS_MTU_EXCEEDS: + case OCRDMA_MBX_STATUS_INVALID_RNR_NAK_TIMER: + case OCRDMA_MBX_STATUS_PKEY_INDEX_INVALID: + case OCRDMA_MBX_STATUS_PKEY_INDEX_EXCEEDS: + case OCRDMA_MBX_STATUS_ILLEGAL_FIELD: + case OCRDMA_MBX_STATUS_INVALID_PBL_ENTRY: + case OCRDMA_MBX_STATUS_INVALID_LKEY: + case OCRDMA_MBX_STATUS_INVALID_VA: + case OCRDMA_MBX_STATUS_INVALID_LENGTH: + case OCRDMA_MBX_STATUS_INVALID_FBO: + case OCRDMA_MBX_STATUS_INVALID_ACC_RIGHTS: + case OCRDMA_MBX_STATUS_INVALID_PBE_SIZE: + case OCRDMA_MBX_STATUS_ATOMIC_OPS_UNSUP: + case OCRDMA_MBX_STATUS_SRQ_ERROR: + case OCRDMA_MBX_STATUS_SRQ_SIZE_UNDERUNS: + err_num = -EINVAL; + break; + + case OCRDMA_MBX_STATUS_PD_INUSE: + case OCRDMA_MBX_STATUS_QP_BOUND: + case OCRDMA_MBX_STATUS_MW_STILL_BOUND: + case OCRDMA_MBX_STATUS_MW_BOUND: + err_num = -EBUSY; + break; + + case OCRDMA_MBX_STATUS_RECVQ_RQE_EXCEEDS: + case OCRDMA_MBX_STATUS_SGE_RECV_EXCEEDS: + case OCRDMA_MBX_STATUS_RQE_EXCEEDS: + case OCRDMA_MBX_STATUS_SRQ_LIMIT_EXCEEDS: + case OCRDMA_MBX_STATUS_ORD_EXCEEDS: + case OCRDMA_MBX_STATUS_IRD_EXCEEDS: + case OCRDMA_MBX_STATUS_SENDQ_WQE_EXCEEDS: + case OCRDMA_MBX_STATUS_SGE_SEND_EXCEEDS: + case OCRDMA_MBX_STATUS_SGE_WRITE_EXCEEDS: + err_num = -ENOBUFS; + break; + + case OCRDMA_MBX_STATUS_FAILED: + switch (add_status) { + case OCRDMA_MBX_ADDI_STATUS_INSUFFICIENT_RESOURCES: + err_num = -EAGAIN; + break; + } + default: + err_num = -EFAULT; + } + return err_num; +} + +char *port_speed_string(struct ocrdma_dev *dev) +{ + char *str = ""; + u16 speeds_supported; + + speeds_supported = dev->phy.fixed_speeds_supported | + dev->phy.auto_speeds_supported; + if (speeds_supported & OCRDMA_PHY_SPEED_40GBPS) + str = "40Gbps "; + else if (speeds_supported & OCRDMA_PHY_SPEED_10GBPS) + str = "10Gbps "; + else if (speeds_supported & OCRDMA_PHY_SPEED_1GBPS) + str = "1Gbps "; + + return str; +} + +static int ocrdma_get_mbx_cqe_errno(u16 cqe_status) +{ + int err_num = -EINVAL; + + switch (cqe_status) { + case OCRDMA_MBX_CQE_STATUS_INSUFFICIENT_PRIVILEDGES: + err_num = -EPERM; + break; + case OCRDMA_MBX_CQE_STATUS_INVALID_PARAMETER: + err_num = -EINVAL; + break; + case OCRDMA_MBX_CQE_STATUS_INSUFFICIENT_RESOURCES: + case OCRDMA_MBX_CQE_STATUS_QUEUE_FLUSHING: + err_num = -EINVAL; + break; + case OCRDMA_MBX_CQE_STATUS_DMA_FAILED: + default: + err_num = -EINVAL; + break; + } + return err_num; +} + +void ocrdma_ring_cq_db(struct ocrdma_dev *dev, u16 cq_id, bool armed, + bool solicited, u16 cqe_popped) +{ + u32 val = cq_id & OCRDMA_DB_CQ_RING_ID_MASK; + + val |= ((cq_id & OCRDMA_DB_CQ_RING_ID_EXT_MASK) << + OCRDMA_DB_CQ_RING_ID_EXT_MASK_SHIFT); + + if (armed) + val |= (1 << OCRDMA_DB_CQ_REARM_SHIFT); + if (solicited) + val |= (1 << OCRDMA_DB_CQ_SOLICIT_SHIFT); + val |= (cqe_popped << OCRDMA_DB_CQ_NUM_POPPED_SHIFT); + iowrite32(val, dev->nic_info.db + OCRDMA_DB_CQ_OFFSET); +} + +static void ocrdma_ring_mq_db(struct ocrdma_dev *dev) +{ + u32 val = 0; + + val |= dev->mq.sq.id & OCRDMA_MQ_ID_MASK; + val |= 1 << OCRDMA_MQ_NUM_MQE_SHIFT; + iowrite32(val, dev->nic_info.db + OCRDMA_DB_MQ_OFFSET); +} + +static void ocrdma_ring_eq_db(struct ocrdma_dev *dev, u16 eq_id, + bool arm, bool clear_int, u16 num_eqe) +{ + u32 val = 0; + + val |= eq_id & OCRDMA_EQ_ID_MASK; + val |= ((eq_id & OCRDMA_EQ_ID_EXT_MASK) << OCRDMA_EQ_ID_EXT_MASK_SHIFT); + if (arm) + val |= (1 << OCRDMA_REARM_SHIFT); + if (clear_int) + val |= (1 << OCRDMA_EQ_CLR_SHIFT); + val |= (1 << OCRDMA_EQ_TYPE_SHIFT); + val |= (num_eqe << OCRDMA_NUM_EQE_SHIFT); + iowrite32(val, dev->nic_info.db + OCRDMA_DB_EQ_OFFSET); +} + +static void ocrdma_init_mch(struct ocrdma_mbx_hdr *cmd_hdr, + u8 opcode, u8 subsys, u32 cmd_len) +{ + cmd_hdr->subsys_op = (opcode | (subsys << OCRDMA_MCH_SUBSYS_SHIFT)); + cmd_hdr->timeout = 20; /* seconds */ + cmd_hdr->cmd_len = cmd_len - sizeof(struct ocrdma_mbx_hdr); +} + +static void *ocrdma_init_emb_mqe(u8 opcode, u32 cmd_len) +{ + struct ocrdma_mqe *mqe; + + mqe = kzalloc(sizeof(struct ocrdma_mqe), GFP_KERNEL); + if (!mqe) + return NULL; + mqe->hdr.spcl_sge_cnt_emb |= + (OCRDMA_MQE_EMBEDDED << OCRDMA_MQE_HDR_EMB_SHIFT) & + OCRDMA_MQE_HDR_EMB_MASK; + mqe->hdr.pyld_len = cmd_len - sizeof(struct ocrdma_mqe_hdr); + + ocrdma_init_mch(&mqe->u.emb_req.mch, opcode, OCRDMA_SUBSYS_ROCE, + mqe->hdr.pyld_len); + return mqe; +} + +static void *ocrdma_alloc_mqe(void) +{ + return kzalloc(sizeof(struct ocrdma_mqe), GFP_KERNEL); +} + +static void ocrdma_free_q(struct ocrdma_dev *dev, struct ocrdma_queue_info *q) +{ + dma_free_coherent(&dev->nic_info.pdev->dev, q->size, q->va, q->dma); +} + +static int ocrdma_alloc_q(struct ocrdma_dev *dev, + struct ocrdma_queue_info *q, u16 len, u16 entry_size) +{ + memset(q, 0, sizeof(*q)); + q->len = len; + q->entry_size = entry_size; + q->size = len * entry_size; + q->va = dma_alloc_coherent(&dev->nic_info.pdev->dev, q->size, + &q->dma, GFP_KERNEL); + if (!q->va) + return -ENOMEM; + memset(q->va, 0, q->size); + return 0; +} + +static void ocrdma_build_q_pages(struct ocrdma_pa *q_pa, int cnt, + dma_addr_t host_pa, int hw_page_size) +{ + int i; + + for (i = 0; i < cnt; i++) { + q_pa[i].lo = (u32) (host_pa & 0xffffffff); + q_pa[i].hi = (u32) upper_32_bits(host_pa); + host_pa += hw_page_size; + } +} + +static int ocrdma_mbx_delete_q(struct ocrdma_dev *dev, + struct ocrdma_queue_info *q, int queue_type) +{ + u8 opcode = 0; + int status; + struct ocrdma_delete_q_req *cmd = dev->mbx_cmd; + + switch (queue_type) { + case QTYPE_MCCQ: + opcode = OCRDMA_CMD_DELETE_MQ; + break; + case QTYPE_CQ: + opcode = OCRDMA_CMD_DELETE_CQ; + break; + case QTYPE_EQ: + opcode = OCRDMA_CMD_DELETE_EQ; + break; + default: + BUG(); + } + memset(cmd, 0, sizeof(*cmd)); + ocrdma_init_mch(&cmd->req, opcode, OCRDMA_SUBSYS_COMMON, sizeof(*cmd)); + cmd->id = q->id; + + status = be_roce_mcc_cmd(dev->nic_info.netdev, + cmd, sizeof(*cmd), NULL, NULL); + if (!status) + q->created = false; + return status; +} + +static int ocrdma_mbx_create_eq(struct ocrdma_dev *dev, struct ocrdma_eq *eq) +{ + int status; + struct ocrdma_create_eq_req *cmd = dev->mbx_cmd; + struct ocrdma_create_eq_rsp *rsp = dev->mbx_cmd; + + memset(cmd, 0, sizeof(*cmd)); + ocrdma_init_mch(&cmd->req, OCRDMA_CMD_CREATE_EQ, OCRDMA_SUBSYS_COMMON, + sizeof(*cmd)); + + cmd->req.rsvd_version = 2; + cmd->num_pages = 4; + cmd->valid = OCRDMA_CREATE_EQ_VALID; + cmd->cnt = 4 << OCRDMA_CREATE_EQ_CNT_SHIFT; + + ocrdma_build_q_pages(&cmd->pa[0], cmd->num_pages, eq->q.dma, + PAGE_SIZE_4K); + status = be_roce_mcc_cmd(dev->nic_info.netdev, cmd, sizeof(*cmd), NULL, + NULL); + if (!status) { + eq->q.id = rsp->vector_eqid & 0xffff; + eq->vector = (rsp->vector_eqid >> 16) & 0xffff; + eq->q.created = true; + } + return status; +} + +static int ocrdma_create_eq(struct ocrdma_dev *dev, + struct ocrdma_eq *eq, u16 q_len) +{ + int status; + + status = ocrdma_alloc_q(dev, &eq->q, OCRDMA_EQ_LEN, + sizeof(struct ocrdma_eqe)); + if (status) + return status; + + status = ocrdma_mbx_create_eq(dev, eq); + if (status) + goto mbx_err; + eq->dev = dev; + ocrdma_ring_eq_db(dev, eq->q.id, true, true, 0); + + return 0; +mbx_err: + ocrdma_free_q(dev, &eq->q); + return status; +} + +int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq) +{ + int irq; + + if (dev->nic_info.intr_mode == BE_INTERRUPT_MODE_INTX) + irq = dev->nic_info.pdev->irq; + else + irq = dev->nic_info.msix.vector_list[eq->vector]; + return irq; +} + +static void _ocrdma_destroy_eq(struct ocrdma_dev *dev, struct ocrdma_eq *eq) +{ + if (eq->q.created) { + ocrdma_mbx_delete_q(dev, &eq->q, QTYPE_EQ); + ocrdma_free_q(dev, &eq->q); + } +} + +static void ocrdma_destroy_eq(struct ocrdma_dev *dev, struct ocrdma_eq *eq) +{ + int irq; + + /* disarm EQ so that interrupts are not generated + * during freeing and EQ delete is in progress. + */ + ocrdma_ring_eq_db(dev, eq->q.id, false, false, 0); + + irq = ocrdma_get_irq(dev, eq); + free_irq(irq, eq); + _ocrdma_destroy_eq(dev, eq); +} + +static void ocrdma_destroy_eqs(struct ocrdma_dev *dev) +{ + int i; + + for (i = 0; i < dev->eq_cnt; i++) + ocrdma_destroy_eq(dev, &dev->eq_tbl[i]); +} + +static int ocrdma_mbx_mq_cq_create(struct ocrdma_dev *dev, + struct ocrdma_queue_info *cq, + struct ocrdma_queue_info *eq) +{ + struct ocrdma_create_cq_cmd *cmd = dev->mbx_cmd; + struct ocrdma_create_cq_cmd_rsp *rsp = dev->mbx_cmd; + int status; + + memset(cmd, 0, sizeof(*cmd)); + ocrdma_init_mch(&cmd->req, OCRDMA_CMD_CREATE_CQ, + OCRDMA_SUBSYS_COMMON, sizeof(*cmd)); + + cmd->req.rsvd_version = OCRDMA_CREATE_CQ_VER2; + cmd->pgsz_pgcnt = (cq->size / OCRDMA_MIN_Q_PAGE_SIZE) << + OCRDMA_CREATE_CQ_PAGE_SIZE_SHIFT; + cmd->pgsz_pgcnt |= PAGES_4K_SPANNED(cq->va, cq->size); + + cmd->ev_cnt_flags = OCRDMA_CREATE_CQ_DEF_FLAGS; + cmd->eqn = eq->id; + cmd->cqe_count = cq->size / sizeof(struct ocrdma_mcqe); + + ocrdma_build_q_pages(&cmd->pa[0], cq->size / OCRDMA_MIN_Q_PAGE_SIZE, + cq->dma, PAGE_SIZE_4K); + status = be_roce_mcc_cmd(dev->nic_info.netdev, + cmd, sizeof(*cmd), NULL, NULL); + if (!status) { + cq->id = (u16) (rsp->cq_id & OCRDMA_CREATE_CQ_RSP_CQ_ID_MASK); + cq->created = true; + } + return status; +} + +static u32 ocrdma_encoded_q_len(int q_len) +{ + u32 len_encoded = fls(q_len); /* log2(len) + 1 */ + + if (len_encoded == 16) + len_encoded = 0; + return len_encoded; +} + +static int ocrdma_mbx_create_mq(struct ocrdma_dev *dev, + struct ocrdma_queue_info *mq, + struct ocrdma_queue_info *cq) +{ + int num_pages, status; + struct ocrdma_create_mq_req *cmd = dev->mbx_cmd; + struct ocrdma_create_mq_rsp *rsp = dev->mbx_cmd; + struct ocrdma_pa *pa; + + memset(cmd, 0, sizeof(*cmd)); + num_pages = PAGES_4K_SPANNED(mq->va, mq->size); + + ocrdma_init_mch(&cmd->req, OCRDMA_CMD_CREATE_MQ_EXT, + OCRDMA_SUBSYS_COMMON, sizeof(*cmd)); + cmd->req.rsvd_version = 1; + cmd->cqid_pages = num_pages; + cmd->cqid_pages |= (cq->id << OCRDMA_CREATE_MQ_CQ_ID_SHIFT); + cmd->async_cqid_valid = OCRDMA_CREATE_MQ_ASYNC_CQ_VALID; + + cmd->async_event_bitmap = Bit(OCRDMA_ASYNC_GRP5_EVE_CODE); + cmd->async_event_bitmap |= Bit(OCRDMA_ASYNC_RDMA_EVE_CODE); + + cmd->async_cqid_ringsize = cq->id; + cmd->async_cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) << + OCRDMA_CREATE_MQ_RING_SIZE_SHIFT); + cmd->valid = OCRDMA_CREATE_MQ_VALID; + pa = &cmd->pa[0]; + + ocrdma_build_q_pages(pa, num_pages, mq->dma, PAGE_SIZE_4K); + status = be_roce_mcc_cmd(dev->nic_info.netdev, + cmd, sizeof(*cmd), NULL, NULL); + if (!status) { + mq->id = rsp->id; + mq->created = true; + } + return status; +} + +static int ocrdma_create_mq(struct ocrdma_dev *dev) +{ + int status; + + /* Alloc completion queue for Mailbox queue */ + status = ocrdma_alloc_q(dev, &dev->mq.cq, OCRDMA_MQ_CQ_LEN, + sizeof(struct ocrdma_mcqe)); + if (status) + goto alloc_err; + + dev->eq_tbl[0].cq_cnt++; + status = ocrdma_mbx_mq_cq_create(dev, &dev->mq.cq, &dev->eq_tbl[0].q); + if (status) + goto mbx_cq_free; + + memset(&dev->mqe_ctx, 0, sizeof(dev->mqe_ctx)); + init_waitqueue_head(&dev->mqe_ctx.cmd_wait); + mutex_init(&dev->mqe_ctx.lock); + + /* Alloc Mailbox queue */ + status = ocrdma_alloc_q(dev, &dev->mq.sq, OCRDMA_MQ_LEN, + sizeof(struct ocrdma_mqe)); + if (status) + goto mbx_cq_destroy; + status = ocrdma_mbx_create_mq(dev, &dev->mq.sq, &dev->mq.cq); + if (status) + goto mbx_q_free; + ocrdma_ring_cq_db(dev, dev->mq.cq.id, true, false, 0); + return 0; + +mbx_q_free: + ocrdma_free_q(dev, &dev->mq.sq); +mbx_cq_destroy: + ocrdma_mbx_delete_q(dev, &dev->mq.cq, QTYPE_CQ); +mbx_cq_free: + ocrdma_free_q(dev, &dev->mq.cq); +alloc_err: + return status; +} + +static void ocrdma_destroy_mq(struct ocrdma_dev *dev) +{ + struct ocrdma_queue_info *mbxq, *cq; + + /* mqe_ctx lock synchronizes with any other pending cmds. */ + mutex_lock(&dev->mqe_ctx.lock); + mbxq = &dev->mq.sq; + if (mbxq->created) { + ocrdma_mbx_delete_q(dev, mbxq, QTYPE_MCCQ); + ocrdma_free_q(dev, mbxq); + } + mutex_unlock(&dev->mqe_ctx.lock); + + cq = &dev->mq.cq; + if (cq->created) { + ocrdma_mbx_delete_q(dev, cq, QTYPE_CQ); + ocrdma_free_q(dev, cq); + } +} + +static void ocrdma_process_qpcat_error(struct ocrdma_dev *dev, + struct ocrdma_qp *qp) +{ + enum ib_qp_state new_ib_qps = IB_QPS_ERR; + enum ib_qp_state old_ib_qps; + + if (qp == NULL) + BUG(); + ocrdma_qp_state_change(qp, new_ib_qps, &old_ib_qps); +} + +static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev, + struct ocrdma_ae_mcqe *cqe) +{ + struct ocrdma_qp *qp = NULL; + struct ocrdma_cq *cq = NULL; + struct ib_event ib_evt = { 0 }; + int cq_event = 0; + int qp_event = 1; + int srq_event = 0; + int dev_event = 0; + int type = (cqe->valid_ae_event & OCRDMA_AE_MCQE_EVENT_TYPE_MASK) >> + OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT; + + if (cqe->qpvalid_qpid & OCRDMA_AE_MCQE_QPVALID) + qp = dev->qp_tbl[cqe->qpvalid_qpid & OCRDMA_AE_MCQE_QPID_MASK]; + if (cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQVALID) + cq = dev->cq_tbl[cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQID_MASK]; + + ib_evt.device = &dev->ibdev; + + switch (type) { + case OCRDMA_CQ_ERROR: + ib_evt.element.cq = &cq->ibcq; + ib_evt.event = IB_EVENT_CQ_ERR; + cq_event = 1; + qp_event = 0; + break; + case OCRDMA_CQ_OVERRUN_ERROR: + ib_evt.element.cq = &cq->ibcq; + ib_evt.event = IB_EVENT_CQ_ERR; + cq_event = 1; + qp_event = 0; + break; + case OCRDMA_CQ_QPCAT_ERROR: + ib_evt.element.qp = &qp->ibqp; + ib_evt.event = IB_EVENT_QP_FATAL; + ocrdma_process_qpcat_error(dev, qp); + break; + case OCRDMA_QP_ACCESS_ERROR: + ib_evt.element.qp = &qp->ibqp; + ib_evt.event = IB_EVENT_QP_ACCESS_ERR; + break; + case OCRDMA_QP_COMM_EST_EVENT: + ib_evt.element.qp = &qp->ibqp; + ib_evt.event = IB_EVENT_COMM_EST; + break; + case OCRDMA_SQ_DRAINED_EVENT: + ib_evt.element.qp = &qp->ibqp; + ib_evt.event = IB_EVENT_SQ_DRAINED; + break; + case OCRDMA_DEVICE_FATAL_EVENT: + ib_evt.element.port_num = 1; + ib_evt.event = IB_EVENT_DEVICE_FATAL; + qp_event = 0; + dev_event = 1; + break; + case OCRDMA_SRQCAT_ERROR: + ib_evt.element.srq = &qp->srq->ibsrq; + ib_evt.event = IB_EVENT_SRQ_ERR; + srq_event = 1; + qp_event = 0; + break; + case OCRDMA_SRQ_LIMIT_EVENT: + ib_evt.element.srq = &qp->srq->ibsrq; + ib_evt.event = IB_EVENT_SRQ_LIMIT_REACHED; + srq_event = 1; + qp_event = 0; + break; + case OCRDMA_QP_LAST_WQE_EVENT: + ib_evt.element.qp = &qp->ibqp; + ib_evt.event = IB_EVENT_QP_LAST_WQE_REACHED; + break; + default: + cq_event = 0; + qp_event = 0; + srq_event = 0; + dev_event = 0; + pr_err("%s() unknown type=0x%x\n", __func__, type); + break; + } + + if (qp_event) { + if (qp->ibqp.event_handler) + qp->ibqp.event_handler(&ib_evt, qp->ibqp.qp_context); + } else if (cq_event) { + if (cq->ibcq.event_handler) + cq->ibcq.event_handler(&ib_evt, cq->ibcq.cq_context); + } else if (srq_event) { + if (qp->srq->ibsrq.event_handler) + qp->srq->ibsrq.event_handler(&ib_evt, + qp->srq->ibsrq. + srq_context); + } else if (dev_event) { + pr_err("%s: Fatal event received\n", dev->ibdev.name); + ib_dispatch_event(&ib_evt); + } + +} + +static void ocrdma_process_grp5_aync(struct ocrdma_dev *dev, + struct ocrdma_ae_mcqe *cqe) +{ + struct ocrdma_ae_pvid_mcqe *evt; + int type = (cqe->valid_ae_event & OCRDMA_AE_MCQE_EVENT_TYPE_MASK) >> + OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT; + + switch (type) { + case OCRDMA_ASYNC_EVENT_PVID_STATE: + evt = (struct ocrdma_ae_pvid_mcqe *)cqe; + if ((evt->tag_enabled & OCRDMA_AE_PVID_MCQE_ENABLED_MASK) >> + OCRDMA_AE_PVID_MCQE_ENABLED_SHIFT) + dev->pvid = ((evt->tag_enabled & + OCRDMA_AE_PVID_MCQE_TAG_MASK) >> + OCRDMA_AE_PVID_MCQE_TAG_SHIFT); + break; + default: + /* Not interested evts. */ + break; + } +} + +static void ocrdma_process_acqe(struct ocrdma_dev *dev, void *ae_cqe) +{ + /* async CQE processing */ + struct ocrdma_ae_mcqe *cqe = ae_cqe; + u32 evt_code = (cqe->valid_ae_event & OCRDMA_AE_MCQE_EVENT_CODE_MASK) >> + OCRDMA_AE_MCQE_EVENT_CODE_SHIFT; + + if (evt_code == OCRDMA_ASYNC_RDMA_EVE_CODE) + ocrdma_dispatch_ibevent(dev, cqe); + else if (evt_code == OCRDMA_ASYNC_GRP5_EVE_CODE) + ocrdma_process_grp5_aync(dev, cqe); + else + pr_err("%s(%d) invalid evt code=0x%x\n", __func__, + dev->id, evt_code); +} + +static void ocrdma_process_mcqe(struct ocrdma_dev *dev, struct ocrdma_mcqe *cqe) +{ + if (dev->mqe_ctx.tag == cqe->tag_lo && dev->mqe_ctx.cmd_done == false) { + dev->mqe_ctx.cqe_status = (cqe->status & + OCRDMA_MCQE_STATUS_MASK) >> OCRDMA_MCQE_STATUS_SHIFT; + dev->mqe_ctx.ext_status = + (cqe->status & OCRDMA_MCQE_ESTATUS_MASK) + >> OCRDMA_MCQE_ESTATUS_SHIFT; + dev->mqe_ctx.cmd_done = true; + wake_up(&dev->mqe_ctx.cmd_wait); + } else + pr_err("%s() cqe for invalid tag0x%x.expected=0x%x\n", + __func__, cqe->tag_lo, dev->mqe_ctx.tag); +} + +static int ocrdma_mq_cq_handler(struct ocrdma_dev *dev, u16 cq_id) +{ + u16 cqe_popped = 0; + struct ocrdma_mcqe *cqe; + + while (1) { + cqe = ocrdma_get_mcqe(dev); + if (cqe == NULL) + break; + ocrdma_le32_to_cpu(cqe, sizeof(*cqe)); + cqe_popped += 1; + if (cqe->valid_ae_cmpl_cons & OCRDMA_MCQE_AE_MASK) + ocrdma_process_acqe(dev, cqe); + else if (cqe->valid_ae_cmpl_cons & OCRDMA_MCQE_CMPL_MASK) + ocrdma_process_mcqe(dev, cqe); + memset(cqe, 0, sizeof(struct ocrdma_mcqe)); + ocrdma_mcq_inc_tail(dev); + } + ocrdma_ring_cq_db(dev, dev->mq.cq.id, true, false, cqe_popped); + return 0; +} + +static void ocrdma_qp_buddy_cq_handler(struct ocrdma_dev *dev, + struct ocrdma_cq *cq) +{ + unsigned long flags; + struct ocrdma_qp *qp; + bool buddy_cq_found = false; + /* Go through list of QPs in error state which are using this CQ + * and invoke its callback handler to trigger CQE processing for + * error/flushed CQE. It is rare to find more than few entries in + * this list as most consumers stops after getting error CQE. + * List is traversed only once when a matching buddy cq found for a QP. + */ + spin_lock_irqsave(&dev->flush_q_lock, flags); + list_for_each_entry(qp, &cq->sq_head, sq_entry) { + if (qp->srq) + continue; + /* if wq and rq share the same cq, than comp_handler + * is already invoked. + */ + if (qp->sq_cq == qp->rq_cq) + continue; + /* if completion came on sq, rq's cq is buddy cq. + * if completion came on rq, sq's cq is buddy cq. + */ + if (qp->sq_cq == cq) + cq = qp->rq_cq; + else + cq = qp->sq_cq; + buddy_cq_found = true; + break; + } + spin_unlock_irqrestore(&dev->flush_q_lock, flags); + if (buddy_cq_found == false) + return; + if (cq->ibcq.comp_handler) { + spin_lock_irqsave(&cq->comp_handler_lock, flags); + (*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context); + spin_unlock_irqrestore(&cq->comp_handler_lock, flags); + } +} + +static void ocrdma_qp_cq_handler(struct ocrdma_dev *dev, u16 cq_idx) +{ + unsigned long flags; + struct ocrdma_cq *cq; + + if (cq_idx >= OCRDMA_MAX_CQ) + BUG(); + + cq = dev->cq_tbl[cq_idx]; + if (cq == NULL) + return; + + if (cq->ibcq.comp_handler) { + spin_lock_irqsave(&cq->comp_handler_lock, flags); + (*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context); + spin_unlock_irqrestore(&cq->comp_handler_lock, flags); + } + ocrdma_qp_buddy_cq_handler(dev, cq); +} + +static void ocrdma_cq_handler(struct ocrdma_dev *dev, u16 cq_id) +{ + /* process the MQ-CQE. */ + if (cq_id == dev->mq.cq.id) + ocrdma_mq_cq_handler(dev, cq_id); + else + ocrdma_qp_cq_handler(dev, cq_id); +} + +static irqreturn_t ocrdma_irq_handler(int irq, void *handle) +{ + struct ocrdma_eq *eq = handle; + struct ocrdma_dev *dev = eq->dev; + struct ocrdma_eqe eqe; + struct ocrdma_eqe *ptr; + u16 cq_id; + int budget = eq->cq_cnt; + + do { + ptr = ocrdma_get_eqe(eq); + eqe = *ptr; + ocrdma_le32_to_cpu(&eqe, sizeof(eqe)); + if ((eqe.id_valid & OCRDMA_EQE_VALID_MASK) == 0) + break; + + ptr->id_valid = 0; + /* ring eq doorbell as soon as its consumed. */ + ocrdma_ring_eq_db(dev, eq->q.id, false, true, 1); + /* check whether its CQE or not. */ + if ((eqe.id_valid & OCRDMA_EQE_FOR_CQE_MASK) == 0) { + cq_id = eqe.id_valid >> OCRDMA_EQE_RESOURCE_ID_SHIFT; + ocrdma_cq_handler(dev, cq_id); + } + ocrdma_eq_inc_tail(eq); + + /* There can be a stale EQE after the last bound CQ is + * destroyed. EQE valid and budget == 0 implies this. + */ + if (budget) + budget--; + + } while (budget); + + ocrdma_ring_eq_db(dev, eq->q.id, true, true, 0); + return IRQ_HANDLED; +} + +static void ocrdma_post_mqe(struct ocrdma_dev *dev, struct ocrdma_mqe *cmd) +{ + struct ocrdma_mqe *mqe; + + dev->mqe_ctx.tag = dev->mq.sq.head; + dev->mqe_ctx.cmd_done = false; + mqe = ocrdma_get_mqe(dev); + cmd->hdr.tag_lo = dev->mq.sq.head; + ocrdma_copy_cpu_to_le32(mqe, cmd, sizeof(*mqe)); + /* make sure descriptor is written before ringing doorbell */ + wmb(); + ocrdma_mq_inc_head(dev); + ocrdma_ring_mq_db(dev); +} + +static int ocrdma_wait_mqe_cmpl(struct ocrdma_dev *dev) +{ + long status; + /* 30 sec timeout */ + status = wait_event_timeout(dev->mqe_ctx.cmd_wait, + (dev->mqe_ctx.cmd_done != false), + msecs_to_jiffies(30000)); + if (status) + return 0; + else + return -1; +} + +/* issue a mailbox command on the MQ */ +static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe) +{ + int status = 0; + u16 cqe_status, ext_status; + struct ocrdma_mqe *rsp_mqe; + struct ocrdma_mbx_rsp *rsp = NULL; + + mutex_lock(&dev->mqe_ctx.lock); + ocrdma_post_mqe(dev, mqe); + status = ocrdma_wait_mqe_cmpl(dev); + if (status) + goto mbx_err; + cqe_status = dev->mqe_ctx.cqe_status; + ext_status = dev->mqe_ctx.ext_status; + rsp_mqe = ocrdma_get_mqe_rsp(dev); + ocrdma_copy_le32_to_cpu(mqe, rsp_mqe, (sizeof(*mqe))); + if ((mqe->hdr.spcl_sge_cnt_emb & OCRDMA_MQE_HDR_EMB_MASK) >> + OCRDMA_MQE_HDR_EMB_SHIFT) + rsp = &mqe->u.rsp; + + if (cqe_status || ext_status) { + pr_err("%s() cqe_status=0x%x, ext_status=0x%x,", + __func__, cqe_status, ext_status); + if (rsp) { + /* This is for embedded cmds. */ + pr_err("opcode=0x%x, subsystem=0x%x\n", + (rsp->subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >> + OCRDMA_MBX_RSP_OPCODE_SHIFT, + (rsp->subsys_op & OCRDMA_MBX_RSP_SUBSYS_MASK) >> + OCRDMA_MBX_RSP_SUBSYS_SHIFT); + } + status = ocrdma_get_mbx_cqe_errno(cqe_status); + goto mbx_err; + } + /* For non embedded, rsp errors are handled in ocrdma_nonemb_mbx_cmd */ + if (rsp && (mqe->u.rsp.status & OCRDMA_MBX_RSP_STATUS_MASK)) + status = ocrdma_get_mbx_errno(mqe->u.rsp.status); +mbx_err: + mutex_unlock(&dev->mqe_ctx.lock); + return status; +} + +static int ocrdma_nonemb_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe, + void *payload_va) +{ + int status = 0; + struct ocrdma_mbx_rsp *rsp = payload_va; + + if ((mqe->hdr.spcl_sge_cnt_emb & OCRDMA_MQE_HDR_EMB_MASK) >> + OCRDMA_MQE_HDR_EMB_SHIFT) + BUG(); + + status = ocrdma_mbx_cmd(dev, mqe); + if (!status) + /* For non embedded, only CQE failures are handled in + * ocrdma_mbx_cmd. We need to check for RSP errors. + */ + if (rsp->status & OCRDMA_MBX_RSP_STATUS_MASK) + status = ocrdma_get_mbx_errno(rsp->status); + + if (status) + pr_err("opcode=0x%x, subsystem=0x%x\n", + (rsp->subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >> + OCRDMA_MBX_RSP_OPCODE_SHIFT, + (rsp->subsys_op & OCRDMA_MBX_RSP_SUBSYS_MASK) >> + OCRDMA_MBX_RSP_SUBSYS_SHIFT); + return status; +} + +static void ocrdma_get_attr(struct ocrdma_dev *dev, + struct ocrdma_dev_attr *attr, + struct ocrdma_mbx_query_config *rsp) +{ + attr->max_pd = + (rsp->max_pd_ca_ack_delay & OCRDMA_MBX_QUERY_CFG_MAX_PD_MASK) >> + OCRDMA_MBX_QUERY_CFG_MAX_PD_SHIFT; + attr->max_qp = + (rsp->qp_srq_cq_ird_ord & OCRDMA_MBX_QUERY_CFG_MAX_QP_MASK) >> + OCRDMA_MBX_QUERY_CFG_MAX_QP_SHIFT; + attr->max_srq = + (rsp->max_srq_rpir_qps & OCRDMA_MBX_QUERY_CFG_MAX_SRQ_MASK) >> + OCRDMA_MBX_QUERY_CFG_MAX_SRQ_OFFSET; + attr->max_send_sge = ((rsp->max_write_send_sge & + OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK) >> + OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT); + attr->max_recv_sge = (rsp->max_write_send_sge & + OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK) >> + OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT; + attr->max_srq_sge = (rsp->max_srq_rqe_sge & + OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_MASK) >> + OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET; + attr->max_rdma_sge = (rsp->max_write_send_sge & + OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_MASK) >> + OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT; + attr->max_ord_per_qp = (rsp->max_ird_ord_per_qp & + OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK) >> + OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT; + attr->max_ird_per_qp = (rsp->max_ird_ord_per_qp & + OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_MASK) >> + OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_SHIFT; + attr->cq_overflow_detect = (rsp->qp_srq_cq_ird_ord & + OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_MASK) >> + OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_SHIFT; + attr->srq_supported = (rsp->qp_srq_cq_ird_ord & + OCRDMA_MBX_QUERY_CFG_SRQ_SUPPORTED_MASK) >> + OCRDMA_MBX_QUERY_CFG_SRQ_SUPPORTED_SHIFT; + attr->local_ca_ack_delay = (rsp->max_pd_ca_ack_delay & + OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_MASK) >> + OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT; + attr->max_mw = rsp->max_mw; + attr->max_mr = rsp->max_mr; + attr->max_mr_size = ~0ull; + attr->max_fmr = 0; + attr->max_pages_per_frmr = rsp->max_pages_per_frmr; + attr->max_num_mr_pbl = rsp->max_num_mr_pbl; + attr->max_cqe = rsp->max_cq_cqes_per_cq & + OCRDMA_MBX_QUERY_CFG_MAX_CQES_PER_CQ_MASK; + attr->max_cq = (rsp->max_cq_cqes_per_cq & + OCRDMA_MBX_QUERY_CFG_MAX_CQ_MASK) >> + OCRDMA_MBX_QUERY_CFG_MAX_CQ_OFFSET; + attr->wqe_size = ((rsp->wqe_rqe_stride_max_dpp_cqs & + OCRDMA_MBX_QUERY_CFG_MAX_WQE_SIZE_MASK) >> + OCRDMA_MBX_QUERY_CFG_MAX_WQE_SIZE_OFFSET) * + OCRDMA_WQE_STRIDE; + attr->rqe_size = ((rsp->wqe_rqe_stride_max_dpp_cqs & + OCRDMA_MBX_QUERY_CFG_MAX_RQE_SIZE_MASK) >> + OCRDMA_MBX_QUERY_CFG_MAX_RQE_SIZE_OFFSET) * + OCRDMA_WQE_STRIDE; + attr->max_inline_data = + attr->wqe_size - (sizeof(struct ocrdma_hdr_wqe) + + sizeof(struct ocrdma_sge)); + if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { + attr->ird = 1; + attr->ird_page_size = OCRDMA_MIN_Q_PAGE_SIZE; + attr->num_ird_pages = MAX_OCRDMA_IRD_PAGES; + } + dev->attr.max_wqe = rsp->max_wqes_rqes_per_q >> + OCRDMA_MBX_QUERY_CFG_MAX_WQES_PER_WQ_OFFSET; + dev->attr.max_rqe = rsp->max_wqes_rqes_per_q & + OCRDMA_MBX_QUERY_CFG_MAX_RQES_PER_RQ_MASK; +} + +static int ocrdma_check_fw_config(struct ocrdma_dev *dev, + struct ocrdma_fw_conf_rsp *conf) +{ + u32 fn_mode; + + fn_mode = conf->fn_mode & OCRDMA_FN_MODE_RDMA; + if (fn_mode != OCRDMA_FN_MODE_RDMA) + return -EINVAL; + dev->base_eqid = conf->base_eqid; + dev->max_eq = conf->max_eq; + return 0; +} + +/* can be issued only during init time. */ +static int ocrdma_mbx_query_fw_ver(struct ocrdma_dev *dev) +{ + int status = -ENOMEM; + struct ocrdma_mqe *cmd; + struct ocrdma_fw_ver_rsp *rsp; + + cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_GET_FW_VER, sizeof(*cmd)); + if (!cmd) + return -ENOMEM; + ocrdma_init_mch((struct ocrdma_mbx_hdr *)&cmd->u.cmd[0], + OCRDMA_CMD_GET_FW_VER, + OCRDMA_SUBSYS_COMMON, sizeof(*cmd)); + + status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); + if (status) + goto mbx_err; + rsp = (struct ocrdma_fw_ver_rsp *)cmd; + memset(&dev->attr.fw_ver[0], 0, sizeof(dev->attr.fw_ver)); + memcpy(&dev->attr.fw_ver[0], &rsp->running_ver[0], + sizeof(rsp->running_ver)); + ocrdma_le32_to_cpu(dev->attr.fw_ver, sizeof(rsp->running_ver)); +mbx_err: + kfree(cmd); + return status; +} + +/* can be issued only during init time. */ +static int ocrdma_mbx_query_fw_config(struct ocrdma_dev *dev) +{ + int status = -ENOMEM; + struct ocrdma_mqe *cmd; + struct ocrdma_fw_conf_rsp *rsp; + + cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_GET_FW_CONFIG, sizeof(*cmd)); + if (!cmd) + return -ENOMEM; + ocrdma_init_mch((struct ocrdma_mbx_hdr *)&cmd->u.cmd[0], + OCRDMA_CMD_GET_FW_CONFIG, + OCRDMA_SUBSYS_COMMON, sizeof(*cmd)); + status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); + if (status) + goto mbx_err; + rsp = (struct ocrdma_fw_conf_rsp *)cmd; + status = ocrdma_check_fw_config(dev, rsp); +mbx_err: + kfree(cmd); + return status; +} + +int ocrdma_mbx_rdma_stats(struct ocrdma_dev *dev, bool reset) +{ + struct ocrdma_rdma_stats_req *req = dev->stats_mem.va; + struct ocrdma_mqe *mqe = &dev->stats_mem.mqe; + struct ocrdma_rdma_stats_resp *old_stats = NULL; + int status; + + old_stats = kzalloc(sizeof(*old_stats), GFP_KERNEL); + if (old_stats == NULL) + return -ENOMEM; + + memset(mqe, 0, sizeof(*mqe)); + mqe->hdr.pyld_len = dev->stats_mem.size; + mqe->hdr.spcl_sge_cnt_emb |= + (1 << OCRDMA_MQE_HDR_SGE_CNT_SHIFT) & + OCRDMA_MQE_HDR_SGE_CNT_MASK; + mqe->u.nonemb_req.sge[0].pa_lo = (u32) (dev->stats_mem.pa & 0xffffffff); + mqe->u.nonemb_req.sge[0].pa_hi = (u32) upper_32_bits(dev->stats_mem.pa); + mqe->u.nonemb_req.sge[0].len = dev->stats_mem.size; + + /* Cache the old stats */ + memcpy(old_stats, req, sizeof(struct ocrdma_rdma_stats_resp)); + memset(req, 0, dev->stats_mem.size); + + ocrdma_init_mch((struct ocrdma_mbx_hdr *)req, + OCRDMA_CMD_GET_RDMA_STATS, + OCRDMA_SUBSYS_ROCE, + dev->stats_mem.size); + if (reset) + req->reset_stats = reset; + + status = ocrdma_nonemb_mbx_cmd(dev, mqe, dev->stats_mem.va); + if (status) + /* Copy from cache, if mbox fails */ + memcpy(req, old_stats, sizeof(struct ocrdma_rdma_stats_resp)); + else + ocrdma_le32_to_cpu(req, dev->stats_mem.size); + + kfree(old_stats); + return status; +} + +static int ocrdma_mbx_get_ctrl_attribs(struct ocrdma_dev *dev) +{ + int status = -ENOMEM; + struct ocrdma_dma_mem dma; + struct ocrdma_mqe *mqe; + struct ocrdma_get_ctrl_attribs_rsp *ctrl_attr_rsp; + struct mgmt_hba_attribs *hba_attribs; + + mqe = ocrdma_alloc_mqe(); + if (!mqe) + return status; + memset(mqe, 0, sizeof(*mqe)); + + dma.size = sizeof(struct ocrdma_get_ctrl_attribs_rsp); + dma.va = dma_alloc_coherent(&dev->nic_info.pdev->dev, + dma.size, &dma.pa, GFP_KERNEL); + if (!dma.va) + goto free_mqe; + + mqe->hdr.pyld_len = dma.size; + mqe->hdr.spcl_sge_cnt_emb |= + (1 << OCRDMA_MQE_HDR_SGE_CNT_SHIFT) & + OCRDMA_MQE_HDR_SGE_CNT_MASK; + mqe->u.nonemb_req.sge[0].pa_lo = (u32) (dma.pa & 0xffffffff); + mqe->u.nonemb_req.sge[0].pa_hi = (u32) upper_32_bits(dma.pa); + mqe->u.nonemb_req.sge[0].len = dma.size; + + memset(dma.va, 0, dma.size); + ocrdma_init_mch((struct ocrdma_mbx_hdr *)dma.va, + OCRDMA_CMD_GET_CTRL_ATTRIBUTES, + OCRDMA_SUBSYS_COMMON, + dma.size); + + status = ocrdma_nonemb_mbx_cmd(dev, mqe, dma.va); + if (!status) { + ctrl_attr_rsp = (struct ocrdma_get_ctrl_attribs_rsp *)dma.va; + hba_attribs = &ctrl_attr_rsp->ctrl_attribs.hba_attribs; + + dev->hba_port_num = hba_attribs->phy_port; + strncpy(dev->model_number, + hba_attribs->controller_model_number, 31); + } + dma_free_coherent(&dev->nic_info.pdev->dev, dma.size, dma.va, dma.pa); +free_mqe: + kfree(mqe); + return status; +} + +static int ocrdma_mbx_query_dev(struct ocrdma_dev *dev) +{ + int status = -ENOMEM; + struct ocrdma_mbx_query_config *rsp; + struct ocrdma_mqe *cmd; + + cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_QUERY_CONFIG, sizeof(*cmd)); + if (!cmd) + return status; + status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); + if (status) + goto mbx_err; + rsp = (struct ocrdma_mbx_query_config *)cmd; + ocrdma_get_attr(dev, &dev->attr, rsp); +mbx_err: + kfree(cmd); + return status; +} + +int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed) +{ + int status = -ENOMEM; + struct ocrdma_get_link_speed_rsp *rsp; + struct ocrdma_mqe *cmd; + + cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_QUERY_NTWK_LINK_CONFIG_V1, + sizeof(*cmd)); + if (!cmd) + return status; + ocrdma_init_mch((struct ocrdma_mbx_hdr *)&cmd->u.cmd[0], + OCRDMA_CMD_QUERY_NTWK_LINK_CONFIG_V1, + OCRDMA_SUBSYS_COMMON, sizeof(*cmd)); + + ((struct ocrdma_mbx_hdr *)cmd->u.cmd)->rsvd_version = 0x1; + + status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); + if (status) + goto mbx_err; + + rsp = (struct ocrdma_get_link_speed_rsp *)cmd; + *lnk_speed = rsp->phys_port_speed; + +mbx_err: + kfree(cmd); + return status; +} + +static int ocrdma_mbx_get_phy_info(struct ocrdma_dev *dev) +{ + int status = -ENOMEM; + struct ocrdma_mqe *cmd; + struct ocrdma_get_phy_info_rsp *rsp; + + cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_PHY_DETAILS, sizeof(*cmd)); + if (!cmd) + return status; + + ocrdma_init_mch((struct ocrdma_mbx_hdr *)&cmd->u.cmd[0], + OCRDMA_CMD_PHY_DETAILS, OCRDMA_SUBSYS_COMMON, + sizeof(*cmd)); + + status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); + if (status) + goto mbx_err; + + rsp = (struct ocrdma_get_phy_info_rsp *)cmd; + dev->phy.phy_type = le16_to_cpu(rsp->phy_type); + dev->phy.auto_speeds_supported = + le16_to_cpu(rsp->auto_speeds_supported); + dev->phy.fixed_speeds_supported = + le16_to_cpu(rsp->fixed_speeds_supported); +mbx_err: + kfree(cmd); + return status; +} + +int ocrdma_mbx_alloc_pd(struct ocrdma_dev *dev, struct ocrdma_pd *pd) +{ + int status = -ENOMEM; + struct ocrdma_alloc_pd *cmd; + struct ocrdma_alloc_pd_rsp *rsp; + + cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_ALLOC_PD, sizeof(*cmd)); + if (!cmd) + return status; + if (pd->dpp_enabled) + cmd->enable_dpp_rsvd |= OCRDMA_ALLOC_PD_ENABLE_DPP; + status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); + if (status) + goto mbx_err; + rsp = (struct ocrdma_alloc_pd_rsp *)cmd; + pd->id = rsp->dpp_page_pdid & OCRDMA_ALLOC_PD_RSP_PDID_MASK; + if (rsp->dpp_page_pdid & OCRDMA_ALLOC_PD_RSP_DPP) { + pd->dpp_enabled = true; + pd->dpp_page = rsp->dpp_page_pdid >> + OCRDMA_ALLOC_PD_RSP_DPP_PAGE_SHIFT; + } else { + pd->dpp_enabled = false; + pd->num_dpp_qp = 0; + } +mbx_err: + kfree(cmd); + return status; +} + +int ocrdma_mbx_dealloc_pd(struct ocrdma_dev *dev, struct ocrdma_pd *pd) +{ + int status = -ENOMEM; + struct ocrdma_dealloc_pd *cmd; + + cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_DEALLOC_PD, sizeof(*cmd)); + if (!cmd) + return status; + cmd->id = pd->id; + status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); + kfree(cmd); + return status; +} + +static int ocrdma_build_q_conf(u32 *num_entries, int entry_size, + int *num_pages, int *page_size) +{ + int i; + int mem_size; + + *num_entries = roundup_pow_of_two(*num_entries); + mem_size = *num_entries * entry_size; + /* find the possible lowest possible multiplier */ + for (i = 0; i < OCRDMA_MAX_Q_PAGE_SIZE_CNT; i++) { + if (mem_size <= (OCRDMA_Q_PAGE_BASE_SIZE << i)) + break; + } + if (i >= OCRDMA_MAX_Q_PAGE_SIZE_CNT) + return -EINVAL; + mem_size = roundup(mem_size, + ((OCRDMA_Q_PAGE_BASE_SIZE << i) / OCRDMA_MAX_Q_PAGES)); + *num_pages = + mem_size / ((OCRDMA_Q_PAGE_BASE_SIZE << i) / OCRDMA_MAX_Q_PAGES); + *page_size = ((OCRDMA_Q_PAGE_BASE_SIZE << i) / OCRDMA_MAX_Q_PAGES); + *num_entries = mem_size / entry_size; + return 0; +} + +static int ocrdma_mbx_create_ah_tbl(struct ocrdma_dev *dev) +{ + int i; + int status = 0; + int max_ah; + struct ocrdma_create_ah_tbl *cmd; + struct ocrdma_create_ah_tbl_rsp *rsp; + struct pci_dev *pdev = dev->nic_info.pdev; + dma_addr_t pa; + struct ocrdma_pbe *pbes; + + cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_CREATE_AH_TBL, sizeof(*cmd)); + if (!cmd) + return status; + + max_ah = OCRDMA_MAX_AH; + dev->av_tbl.size = sizeof(struct ocrdma_av) * max_ah; + + /* number of PBEs in PBL */ + cmd->ah_conf = (OCRDMA_AH_TBL_PAGES << + OCRDMA_CREATE_AH_NUM_PAGES_SHIFT) & + OCRDMA_CREATE_AH_NUM_PAGES_MASK; + + /* page size */ + for (i = 0; i < OCRDMA_MAX_Q_PAGE_SIZE_CNT; i++) { + if (PAGE_SIZE == (OCRDMA_MIN_Q_PAGE_SIZE << i)) + break; + } + cmd->ah_conf |= (i << OCRDMA_CREATE_AH_PAGE_SIZE_SHIFT) & + OCRDMA_CREATE_AH_PAGE_SIZE_MASK; + + /* ah_entry size */ + cmd->ah_conf |= (sizeof(struct ocrdma_av) << + OCRDMA_CREATE_AH_ENTRY_SIZE_SHIFT) & + OCRDMA_CREATE_AH_ENTRY_SIZE_MASK; + + dev->av_tbl.pbl.va = dma_alloc_coherent(&pdev->dev, PAGE_SIZE, + &dev->av_tbl.pbl.pa, + GFP_KERNEL); + if (dev->av_tbl.pbl.va == NULL) + goto mem_err; + + dev->av_tbl.va = dma_alloc_coherent(&pdev->dev, dev->av_tbl.size, + &pa, GFP_KERNEL); + if (dev->av_tbl.va == NULL) + goto mem_err_ah; + dev->av_tbl.pa = pa; + dev->av_tbl.num_ah = max_ah; + memset(dev->av_tbl.va, 0, dev->av_tbl.size); + + pbes = (struct ocrdma_pbe *)dev->av_tbl.pbl.va; + for (i = 0; i < dev->av_tbl.size / OCRDMA_MIN_Q_PAGE_SIZE; i++) { + pbes[i].pa_lo = (u32) (pa & 0xffffffff); + pbes[i].pa_hi = (u32) upper_32_bits(pa); + pa += PAGE_SIZE; + } + cmd->tbl_addr[0].lo = (u32)(dev->av_tbl.pbl.pa & 0xFFFFFFFF); + cmd->tbl_addr[0].hi = (u32)upper_32_bits(dev->av_tbl.pbl.pa); + status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); + if (status) + goto mbx_err; + rsp = (struct ocrdma_create_ah_tbl_rsp *)cmd; + dev->av_tbl.ahid = rsp->ahid & 0xFFFF; + kfree(cmd); + return 0; + +mbx_err: + dma_free_coherent(&pdev->dev, dev->av_tbl.size, dev->av_tbl.va, + dev->av_tbl.pa); + dev->av_tbl.va = NULL; +mem_err_ah: + dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->av_tbl.pbl.va, + dev->av_tbl.pbl.pa); + dev->av_tbl.pbl.va = NULL; + dev->av_tbl.size = 0; +mem_err: + kfree(cmd); + return status; +} + +static void ocrdma_mbx_delete_ah_tbl(struct ocrdma_dev *dev) +{ + struct ocrdma_delete_ah_tbl *cmd; + struct pci_dev *pdev = dev->nic_info.pdev; + + if (dev->av_tbl.va == NULL) + return; + + cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_DELETE_AH_TBL, sizeof(*cmd)); + if (!cmd) + return; + cmd->ahid = dev->av_tbl.ahid; + + ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); + dma_free_coherent(&pdev->dev, dev->av_tbl.size, dev->av_tbl.va, + dev->av_tbl.pa); + dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->av_tbl.pbl.va, + dev->av_tbl.pbl.pa); + kfree(cmd); +} + +/* Multiple CQs uses the EQ. This routine returns least used + * EQ to associate with CQ. This will distributes the interrupt + * processing and CPU load to associated EQ, vector and so to that CPU. + */ +static u16 ocrdma_bind_eq(struct ocrdma_dev *dev) +{ + int i, selected_eq = 0, cq_cnt = 0; + u16 eq_id; + + mutex_lock(&dev->dev_lock); + cq_cnt = dev->eq_tbl[0].cq_cnt; + eq_id = dev->eq_tbl[0].q.id; + /* find the EQ which is has the least number of + * CQs associated with it. + */ + for (i = 0; i < dev->eq_cnt; i++) { + if (dev->eq_tbl[i].cq_cnt < cq_cnt) { + cq_cnt = dev->eq_tbl[i].cq_cnt; + eq_id = dev->eq_tbl[i].q.id; + selected_eq = i; + } + } + dev->eq_tbl[selected_eq].cq_cnt += 1; + mutex_unlock(&dev->dev_lock); + return eq_id; +} + +static void ocrdma_unbind_eq(struct ocrdma_dev *dev, u16 eq_id) +{ + int i; + + mutex_lock(&dev->dev_lock); + i = ocrdma_get_eq_table_index(dev, eq_id); + if (i == -EINVAL) + BUG(); + dev->eq_tbl[i].cq_cnt -= 1; + mutex_unlock(&dev->dev_lock); +} + +int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq, + int entries, int dpp_cq, u16 pd_id) +{ + int status = -ENOMEM; int max_hw_cqe; + struct pci_dev *pdev = dev->nic_info.pdev; + struct ocrdma_create_cq *cmd; + struct ocrdma_create_cq_rsp *rsp; + u32 hw_pages, cqe_size, page_size, cqe_count; + + if (entries > dev->attr.max_cqe) { + pr_err("%s(%d) max_cqe=0x%x, requester_cqe=0x%x\n", + __func__, dev->id, dev->attr.max_cqe, entries); + return -EINVAL; + } + if (dpp_cq && (ocrdma_get_asic_type(dev) != OCRDMA_ASIC_GEN_SKH_R)) + return -EINVAL; + + if (dpp_cq) { + cq->max_hw_cqe = 1; + max_hw_cqe = 1; + cqe_size = OCRDMA_DPP_CQE_SIZE; + hw_pages = 1; + } else { + cq->max_hw_cqe = dev->attr.max_cqe; + max_hw_cqe = dev->attr.max_cqe; + cqe_size = sizeof(struct ocrdma_cqe); + hw_pages = OCRDMA_CREATE_CQ_MAX_PAGES; + } + + cq->len = roundup(max_hw_cqe * cqe_size, OCRDMA_MIN_Q_PAGE_SIZE); + + cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_CREATE_CQ, sizeof(*cmd)); + if (!cmd) + return -ENOMEM; + ocrdma_init_mch(&cmd->cmd.req, OCRDMA_CMD_CREATE_CQ, + OCRDMA_SUBSYS_COMMON, sizeof(*cmd)); + cq->va = dma_alloc_coherent(&pdev->dev, cq->len, &cq->pa, GFP_KERNEL); + if (!cq->va) { + status = -ENOMEM; + goto mem_err; + } + memset(cq->va, 0, cq->len); + page_size = cq->len / hw_pages; + cmd->cmd.pgsz_pgcnt = (page_size / OCRDMA_MIN_Q_PAGE_SIZE) << + OCRDMA_CREATE_CQ_PAGE_SIZE_SHIFT; + cmd->cmd.pgsz_pgcnt |= hw_pages; + cmd->cmd.ev_cnt_flags = OCRDMA_CREATE_CQ_DEF_FLAGS; + + cq->eqn = ocrdma_bind_eq(dev); + cmd->cmd.req.rsvd_version = OCRDMA_CREATE_CQ_VER3; + cqe_count = cq->len / cqe_size; + cq->cqe_cnt = cqe_count; + if (cqe_count > 1024) { + /* Set cnt to 3 to indicate more than 1024 cq entries */ + cmd->cmd.ev_cnt_flags |= (0x3 << OCRDMA_CREATE_CQ_CNT_SHIFT); + } else { + u8 count = 0; + switch (cqe_count) { + case 256: + count = 0; + break; + case 512: + count = 1; + break; + case 1024: + count = 2; + break; + default: + goto mbx_err; + } + cmd->cmd.ev_cnt_flags |= (count << OCRDMA_CREATE_CQ_CNT_SHIFT); + } + /* shared eq between all the consumer cqs. */ + cmd->cmd.eqn = cq->eqn; + if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { + if (dpp_cq) + cmd->cmd.pgsz_pgcnt |= OCRDMA_CREATE_CQ_DPP << + OCRDMA_CREATE_CQ_TYPE_SHIFT; + cq->phase_change = false; + cmd->cmd.cqe_count = (cq->len / cqe_size); + } else { + cmd->cmd.cqe_count = (cq->len / cqe_size) - 1; + cmd->cmd.ev_cnt_flags |= OCRDMA_CREATE_CQ_FLAGS_AUTO_VALID; + cq->phase_change = true; + } + + cmd->cmd.pd_id = pd_id; /* valid only for v3 */ + ocrdma_build_q_pages(&cmd->cmd.pa[0], hw_pages, cq->pa, page_size); + status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); + if (status) + goto mbx_err; + + rsp = (struct ocrdma_create_cq_rsp *)cmd; + cq->id = (u16) (rsp->rsp.cq_id & OCRDMA_CREATE_CQ_RSP_CQ_ID_MASK); + kfree(cmd); + return 0; +mbx_err: + ocrdma_unbind_eq(dev, cq->eqn); + dma_free_coherent(&pdev->dev, cq->len, cq->va, cq->pa); +mem_err: + kfree(cmd); + return status; +} + +int ocrdma_mbx_destroy_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq) +{ + int status = -ENOMEM; + struct ocrdma_destroy_cq *cmd; + + cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_DELETE_CQ, sizeof(*cmd)); + if (!cmd) + return status; + ocrdma_init_mch(&cmd->req, OCRDMA_CMD_DELETE_CQ, + OCRDMA_SUBSYS_COMMON, sizeof(*cmd)); + + cmd->bypass_flush_qid |= + (cq->id << OCRDMA_DESTROY_CQ_QID_SHIFT) & + OCRDMA_DESTROY_CQ_QID_MASK; + + status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); + ocrdma_unbind_eq(dev, cq->eqn); + dma_free_coherent(&dev->nic_info.pdev->dev, cq->len, cq->va, cq->pa); + kfree(cmd); + return status; +} + +int ocrdma_mbx_alloc_lkey(struct ocrdma_dev *dev, struct ocrdma_hw_mr *hwmr, + u32 pdid, int addr_check) +{ + int status = -ENOMEM; + struct ocrdma_alloc_lkey *cmd; + struct ocrdma_alloc_lkey_rsp *rsp; + + cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_ALLOC_LKEY, sizeof(*cmd)); + if (!cmd) + return status; + cmd->pdid = pdid; + cmd->pbl_sz_flags |= addr_check; + cmd->pbl_sz_flags |= (hwmr->fr_mr << OCRDMA_ALLOC_LKEY_FMR_SHIFT); + cmd->pbl_sz_flags |= + (hwmr->remote_wr << OCRDMA_ALLOC_LKEY_REMOTE_WR_SHIFT); + cmd->pbl_sz_flags |= + (hwmr->remote_rd << OCRDMA_ALLOC_LKEY_REMOTE_RD_SHIFT); + cmd->pbl_sz_flags |= + (hwmr->local_wr << OCRDMA_ALLOC_LKEY_LOCAL_WR_SHIFT); + cmd->pbl_sz_flags |= + (hwmr->remote_atomic << OCRDMA_ALLOC_LKEY_REMOTE_ATOMIC_SHIFT); + cmd->pbl_sz_flags |= + (hwmr->num_pbls << OCRDMA_ALLOC_LKEY_PBL_SIZE_SHIFT); + + status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); + if (status) + goto mbx_err; + rsp = (struct ocrdma_alloc_lkey_rsp *)cmd; + hwmr->lkey = rsp->lrkey; +mbx_err: + kfree(cmd); + return status; +} + +int ocrdma_mbx_dealloc_lkey(struct ocrdma_dev *dev, int fr_mr, u32 lkey) +{ + int status = -ENOMEM; + struct ocrdma_dealloc_lkey *cmd; + + cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_DEALLOC_LKEY, sizeof(*cmd)); + if (!cmd) + return -ENOMEM; + cmd->lkey = lkey; + cmd->rsvd_frmr = fr_mr ? 1 : 0; + status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); + if (status) + goto mbx_err; +mbx_err: + kfree(cmd); + return status; +} + +static int ocrdma_mbx_reg_mr(struct ocrdma_dev *dev, struct ocrdma_hw_mr *hwmr, + u32 pdid, u32 pbl_cnt, u32 pbe_size, u32 last) +{ + int status = -ENOMEM; + int i; + struct ocrdma_reg_nsmr *cmd; + struct ocrdma_reg_nsmr_rsp *rsp; + + cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_REGISTER_NSMR, sizeof(*cmd)); + if (!cmd) + return -ENOMEM; + cmd->num_pbl_pdid = + pdid | (hwmr->num_pbls << OCRDMA_REG_NSMR_NUM_PBL_SHIFT); + cmd->fr_mr = hwmr->fr_mr; + + cmd->flags_hpage_pbe_sz |= (hwmr->remote_wr << + OCRDMA_REG_NSMR_REMOTE_WR_SHIFT); + cmd->flags_hpage_pbe_sz |= (hwmr->remote_rd << + OCRDMA_REG_NSMR_REMOTE_RD_SHIFT); + cmd->flags_hpage_pbe_sz |= (hwmr->local_wr << + OCRDMA_REG_NSMR_LOCAL_WR_SHIFT); + cmd->flags_hpage_pbe_sz |= (hwmr->remote_atomic << + OCRDMA_REG_NSMR_REMOTE_ATOMIC_SHIFT); + cmd->flags_hpage_pbe_sz |= (hwmr->mw_bind << + OCRDMA_REG_NSMR_BIND_MEMWIN_SHIFT); + cmd->flags_hpage_pbe_sz |= (last << OCRDMA_REG_NSMR_LAST_SHIFT); + + cmd->flags_hpage_pbe_sz |= (hwmr->pbe_size / OCRDMA_MIN_HPAGE_SIZE); + cmd->flags_hpage_pbe_sz |= (hwmr->pbl_size / OCRDMA_MIN_HPAGE_SIZE) << + OCRDMA_REG_NSMR_HPAGE_SIZE_SHIFT; + cmd->totlen_low = hwmr->len; + cmd->totlen_high = upper_32_bits(hwmr->len); + cmd->fbo_low = (u32) (hwmr->fbo & 0xffffffff); + cmd->fbo_high = (u32) upper_32_bits(hwmr->fbo); + cmd->va_loaddr = (u32) hwmr->va; + cmd->va_hiaddr = (u32) upper_32_bits(hwmr->va); + + for (i = 0; i < pbl_cnt; i++) { + cmd->pbl[i].lo = (u32) (hwmr->pbl_table[i].pa & 0xffffffff); + cmd->pbl[i].hi = upper_32_bits(hwmr->pbl_table[i].pa); + } + status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); + if (status) + goto mbx_err; + rsp = (struct ocrdma_reg_nsmr_rsp *)cmd; + hwmr->lkey = rsp->lrkey; +mbx_err: + kfree(cmd); + return status; +} + +static int ocrdma_mbx_reg_mr_cont(struct ocrdma_dev *dev, + struct ocrdma_hw_mr *hwmr, u32 pbl_cnt, + u32 pbl_offset, u32 last) +{ + int status = -ENOMEM; + int i; + struct ocrdma_reg_nsmr_cont *cmd; + + cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_REGISTER_NSMR_CONT, sizeof(*cmd)); + if (!cmd) + return -ENOMEM; + cmd->lrkey = hwmr->lkey; + cmd->num_pbl_offset = (pbl_cnt << OCRDMA_REG_NSMR_CONT_NUM_PBL_SHIFT) | + (pbl_offset & OCRDMA_REG_NSMR_CONT_PBL_SHIFT_MASK); + cmd->last = last << OCRDMA_REG_NSMR_CONT_LAST_SHIFT; + + for (i = 0; i < pbl_cnt; i++) { + cmd->pbl[i].lo = + (u32) (hwmr->pbl_table[i + pbl_offset].pa & 0xffffffff); + cmd->pbl[i].hi = + upper_32_bits(hwmr->pbl_table[i + pbl_offset].pa); + } + status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); + if (status) + goto mbx_err; +mbx_err: + kfree(cmd); + return status; +} + +int ocrdma_reg_mr(struct ocrdma_dev *dev, + struct ocrdma_hw_mr *hwmr, u32 pdid, int acc) +{ + int status; + u32 last = 0; + u32 cur_pbl_cnt, pbl_offset; + u32 pending_pbl_cnt = hwmr->num_pbls; + + pbl_offset = 0; + cur_pbl_cnt = min(pending_pbl_cnt, MAX_OCRDMA_NSMR_PBL); + if (cur_pbl_cnt == pending_pbl_cnt) + last = 1; + + status = ocrdma_mbx_reg_mr(dev, hwmr, pdid, + cur_pbl_cnt, hwmr->pbe_size, last); + if (status) { + pr_err("%s() status=%d\n", __func__, status); + return status; + } + /* if there is no more pbls to register then exit. */ + if (last) + return 0; + + while (!last) { + pbl_offset += cur_pbl_cnt; + pending_pbl_cnt -= cur_pbl_cnt; + cur_pbl_cnt = min(pending_pbl_cnt, MAX_OCRDMA_NSMR_PBL); + /* if we reach the end of the pbls, then need to set the last + * bit, indicating no more pbls to register for this memory key. + */ + if (cur_pbl_cnt == pending_pbl_cnt) + last = 1; + + status = ocrdma_mbx_reg_mr_cont(dev, hwmr, cur_pbl_cnt, + pbl_offset, last); + if (status) + break; + } + if (status) + pr_err("%s() err. status=%d\n", __func__, status); + + return status; +} + +bool ocrdma_is_qp_in_sq_flushlist(struct ocrdma_cq *cq, struct ocrdma_qp *qp) +{ + struct ocrdma_qp *tmp; + bool found = false; + list_for_each_entry(tmp, &cq->sq_head, sq_entry) { + if (qp == tmp) { + found = true; + break; + } + } + return found; +} + +bool ocrdma_is_qp_in_rq_flushlist(struct ocrdma_cq *cq, struct ocrdma_qp *qp) +{ + struct ocrdma_qp *tmp; + bool found = false; + list_for_each_entry(tmp, &cq->rq_head, rq_entry) { + if (qp == tmp) { + found = true; + break; + } + } + return found; +} + +void ocrdma_flush_qp(struct ocrdma_qp *qp) +{ + bool found; + unsigned long flags; + + spin_lock_irqsave(&qp->dev->flush_q_lock, flags); + found = ocrdma_is_qp_in_sq_flushlist(qp->sq_cq, qp); + if (!found) + list_add_tail(&qp->sq_entry, &qp->sq_cq->sq_head); + if (!qp->srq) { + found = ocrdma_is_qp_in_rq_flushlist(qp->rq_cq, qp); + if (!found) + list_add_tail(&qp->rq_entry, &qp->rq_cq->rq_head); + } + spin_unlock_irqrestore(&qp->dev->flush_q_lock, flags); +} + +static void ocrdma_init_hwq_ptr(struct ocrdma_qp *qp) +{ + qp->sq.head = 0; + qp->sq.tail = 0; + qp->rq.head = 0; + qp->rq.tail = 0; +} + +int ocrdma_qp_state_change(struct ocrdma_qp *qp, enum ib_qp_state new_ib_state, + enum ib_qp_state *old_ib_state) +{ + unsigned long flags; + int status = 0; + enum ocrdma_qp_state new_state; + new_state = get_ocrdma_qp_state(new_ib_state); + + /* sync with wqe and rqe posting */ + spin_lock_irqsave(&qp->q_lock, flags); + + if (old_ib_state) + *old_ib_state = get_ibqp_state(qp->state); + if (new_state == qp->state) { + spin_unlock_irqrestore(&qp->q_lock, flags); + return 1; + } + + + if (new_state == OCRDMA_QPS_INIT) { + ocrdma_init_hwq_ptr(qp); + ocrdma_del_flush_qp(qp); + } else if (new_state == OCRDMA_QPS_ERR) { + ocrdma_flush_qp(qp); + } + + qp->state = new_state; + + spin_unlock_irqrestore(&qp->q_lock, flags); + return status; +} + +static u32 ocrdma_set_create_qp_mbx_access_flags(struct ocrdma_qp *qp) +{ + u32 flags = 0; + if (qp->cap_flags & OCRDMA_QP_INB_RD) + flags |= OCRDMA_CREATE_QP_REQ_INB_RDEN_MASK; + if (qp->cap_flags & OCRDMA_QP_INB_WR) + flags |= OCRDMA_CREATE_QP_REQ_INB_WREN_MASK; + if (qp->cap_flags & OCRDMA_QP_MW_BIND) + flags |= OCRDMA_CREATE_QP_REQ_BIND_MEMWIN_MASK; + if (qp->cap_flags & OCRDMA_QP_LKEY0) + flags |= OCRDMA_CREATE_QP_REQ_ZERO_LKEYEN_MASK; + if (qp->cap_flags & OCRDMA_QP_FAST_REG) + flags |= OCRDMA_CREATE_QP_REQ_FMR_EN_MASK; + return flags; +} + +static int ocrdma_set_create_qp_sq_cmd(struct ocrdma_create_qp_req *cmd, + struct ib_qp_init_attr *attrs, + struct ocrdma_qp *qp) +{ + int status; + u32 len, hw_pages, hw_page_size; + dma_addr_t pa; + struct ocrdma_dev *dev = qp->dev; + struct pci_dev *pdev = dev->nic_info.pdev; + u32 max_wqe_allocated; + u32 max_sges = attrs->cap.max_send_sge; + + /* QP1 may exceed 127 */ + max_wqe_allocated = min_t(u32, attrs->cap.max_send_wr + 1, + dev->attr.max_wqe); + + status = ocrdma_build_q_conf(&max_wqe_allocated, + dev->attr.wqe_size, &hw_pages, &hw_page_size); + if (status) { + pr_err("%s() req. max_send_wr=0x%x\n", __func__, + max_wqe_allocated); + return -EINVAL; + } + qp->sq.max_cnt = max_wqe_allocated; + len = (hw_pages * hw_page_size); + + qp->sq.va = dma_alloc_coherent(&pdev->dev, len, &pa, GFP_KERNEL); + if (!qp->sq.va) + return -EINVAL; + memset(qp->sq.va, 0, len); + qp->sq.len = len; + qp->sq.pa = pa; + qp->sq.entry_size = dev->attr.wqe_size; + ocrdma_build_q_pages(&cmd->wq_addr[0], hw_pages, pa, hw_page_size); + + cmd->type_pgsz_pdn |= (ilog2(hw_page_size / OCRDMA_MIN_Q_PAGE_SIZE) + << OCRDMA_CREATE_QP_REQ_SQ_PAGE_SIZE_SHIFT); + cmd->num_wq_rq_pages |= (hw_pages << + OCRDMA_CREATE_QP_REQ_NUM_WQ_PAGES_SHIFT) & + OCRDMA_CREATE_QP_REQ_NUM_WQ_PAGES_MASK; + cmd->max_sge_send_write |= (max_sges << + OCRDMA_CREATE_QP_REQ_MAX_SGE_SEND_SHIFT) & + OCRDMA_CREATE_QP_REQ_MAX_SGE_SEND_MASK; + cmd->max_sge_send_write |= (max_sges << + OCRDMA_CREATE_QP_REQ_MAX_SGE_WRITE_SHIFT) & + OCRDMA_CREATE_QP_REQ_MAX_SGE_WRITE_MASK; + cmd->max_wqe_rqe |= (ilog2(qp->sq.max_cnt) << + OCRDMA_CREATE_QP_REQ_MAX_WQE_SHIFT) & + OCRDMA_CREATE_QP_REQ_MAX_WQE_MASK; + cmd->wqe_rqe_size |= (dev->attr.wqe_size << + OCRDMA_CREATE_QP_REQ_WQE_SIZE_SHIFT) & + OCRDMA_CREATE_QP_REQ_WQE_SIZE_MASK; + return 0; +} + +static int ocrdma_set_create_qp_rq_cmd(struct ocrdma_create_qp_req *cmd, + struct ib_qp_init_attr *attrs, + struct ocrdma_qp *qp) +{ + int status; + u32 len, hw_pages, hw_page_size; + dma_addr_t pa = 0; + struct ocrdma_dev *dev = qp->dev; + struct pci_dev *pdev = dev->nic_info.pdev; + u32 max_rqe_allocated = attrs->cap.max_recv_wr + 1; + + status = ocrdma_build_q_conf(&max_rqe_allocated, dev->attr.rqe_size, + &hw_pages, &hw_page_size); + if (status) { + pr_err("%s() req. max_recv_wr=0x%x\n", __func__, + attrs->cap.max_recv_wr + 1); + return status; + } + qp->rq.max_cnt = max_rqe_allocated; + len = (hw_pages * hw_page_size); + + qp->rq.va = dma_alloc_coherent(&pdev->dev, len, &pa, GFP_KERNEL); + if (!qp->rq.va) + return -ENOMEM; + memset(qp->rq.va, 0, len); + qp->rq.pa = pa; + qp->rq.len = len; + qp->rq.entry_size = dev->attr.rqe_size; + + ocrdma_build_q_pages(&cmd->rq_addr[0], hw_pages, pa, hw_page_size); + cmd->type_pgsz_pdn |= (ilog2(hw_page_size / OCRDMA_MIN_Q_PAGE_SIZE) << + OCRDMA_CREATE_QP_REQ_RQ_PAGE_SIZE_SHIFT); + cmd->num_wq_rq_pages |= + (hw_pages << OCRDMA_CREATE_QP_REQ_NUM_RQ_PAGES_SHIFT) & + OCRDMA_CREATE_QP_REQ_NUM_RQ_PAGES_MASK; + cmd->max_sge_recv_flags |= (attrs->cap.max_recv_sge << + OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_SHIFT) & + OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_MASK; + cmd->max_wqe_rqe |= (ilog2(qp->rq.max_cnt) << + OCRDMA_CREATE_QP_REQ_MAX_RQE_SHIFT) & + OCRDMA_CREATE_QP_REQ_MAX_RQE_MASK; + cmd->wqe_rqe_size |= (dev->attr.rqe_size << + OCRDMA_CREATE_QP_REQ_RQE_SIZE_SHIFT) & + OCRDMA_CREATE_QP_REQ_RQE_SIZE_MASK; + return 0; +} + +static void ocrdma_set_create_qp_dpp_cmd(struct ocrdma_create_qp_req *cmd, + struct ocrdma_pd *pd, + struct ocrdma_qp *qp, + u8 enable_dpp_cq, u16 dpp_cq_id) +{ + pd->num_dpp_qp--; + qp->dpp_enabled = true; + cmd->max_sge_recv_flags |= OCRDMA_CREATE_QP_REQ_ENABLE_DPP_MASK; + if (!enable_dpp_cq) + return; + cmd->max_sge_recv_flags |= OCRDMA_CREATE_QP_REQ_ENABLE_DPP_MASK; + cmd->dpp_credits_cqid = dpp_cq_id; + cmd->dpp_credits_cqid |= OCRDMA_CREATE_QP_REQ_DPP_CREDIT_LIMIT << + OCRDMA_CREATE_QP_REQ_DPP_CREDIT_SHIFT; +} + +static int ocrdma_set_create_qp_ird_cmd(struct ocrdma_create_qp_req *cmd, + struct ocrdma_qp *qp) +{ + struct ocrdma_dev *dev = qp->dev; + struct pci_dev *pdev = dev->nic_info.pdev; + dma_addr_t pa = 0; + int ird_page_size = dev->attr.ird_page_size; + int ird_q_len = dev->attr.num_ird_pages * ird_page_size; + struct ocrdma_hdr_wqe *rqe; + int i = 0; + + if (dev->attr.ird == 0) + return 0; + + qp->ird_q_va = dma_alloc_coherent(&pdev->dev, ird_q_len, + &pa, GFP_KERNEL); + if (!qp->ird_q_va) + return -ENOMEM; + memset(qp->ird_q_va, 0, ird_q_len); + ocrdma_build_q_pages(&cmd->ird_addr[0], dev->attr.num_ird_pages, + pa, ird_page_size); + for (; i < ird_q_len / dev->attr.rqe_size; i++) { + rqe = (struct ocrdma_hdr_wqe *)(qp->ird_q_va + + (i * dev->attr.rqe_size)); + rqe->cw = 0; + rqe->cw |= 2; + rqe->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT); + rqe->cw |= (8 << OCRDMA_WQE_SIZE_SHIFT); + rqe->cw |= (8 << OCRDMA_WQE_NXT_WQE_SIZE_SHIFT); + } + return 0; +} + +static void ocrdma_get_create_qp_rsp(struct ocrdma_create_qp_rsp *rsp, + struct ocrdma_qp *qp, + struct ib_qp_init_attr *attrs, + u16 *dpp_offset, u16 *dpp_credit_lmt) +{ + u32 max_wqe_allocated, max_rqe_allocated; + qp->id = rsp->qp_id & OCRDMA_CREATE_QP_RSP_QP_ID_MASK; + qp->rq.dbid = rsp->sq_rq_id & OCRDMA_CREATE_QP_RSP_RQ_ID_MASK; + qp->sq.dbid = rsp->sq_rq_id >> OCRDMA_CREATE_QP_RSP_SQ_ID_SHIFT; + qp->max_ird = rsp->max_ord_ird & OCRDMA_CREATE_QP_RSP_MAX_IRD_MASK; + qp->max_ord = (rsp->max_ord_ird >> OCRDMA_CREATE_QP_RSP_MAX_ORD_SHIFT); + qp->dpp_enabled = false; + if (rsp->dpp_response & OCRDMA_CREATE_QP_RSP_DPP_ENABLED_MASK) { + qp->dpp_enabled = true; + *dpp_credit_lmt = (rsp->dpp_response & + OCRDMA_CREATE_QP_RSP_DPP_CREDITS_MASK) >> + OCRDMA_CREATE_QP_RSP_DPP_CREDITS_SHIFT; + *dpp_offset = (rsp->dpp_response & + OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_MASK) >> + OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_SHIFT; + } + max_wqe_allocated = + rsp->max_wqe_rqe >> OCRDMA_CREATE_QP_RSP_MAX_WQE_SHIFT; + max_wqe_allocated = 1 << max_wqe_allocated; + max_rqe_allocated = 1 << ((u16)rsp->max_wqe_rqe); + + qp->sq.max_cnt = max_wqe_allocated; + qp->sq.max_wqe_idx = max_wqe_allocated - 1; + + if (!attrs->srq) { + qp->rq.max_cnt = max_rqe_allocated; + qp->rq.max_wqe_idx = max_rqe_allocated - 1; + } +} + +int ocrdma_mbx_create_qp(struct ocrdma_qp *qp, struct ib_qp_init_attr *attrs, + u8 enable_dpp_cq, u16 dpp_cq_id, u16 *dpp_offset, + u16 *dpp_credit_lmt) +{ + int status = -ENOMEM; + u32 flags = 0; + struct ocrdma_dev *dev = qp->dev; + struct ocrdma_pd *pd = qp->pd; + struct pci_dev *pdev = dev->nic_info.pdev; + struct ocrdma_cq *cq; + struct ocrdma_create_qp_req *cmd; + struct ocrdma_create_qp_rsp *rsp; + int qptype; + + switch (attrs->qp_type) { + case IB_QPT_GSI: + qptype = OCRDMA_QPT_GSI; + break; + case IB_QPT_RC: + qptype = OCRDMA_QPT_RC; + break; + case IB_QPT_UD: + qptype = OCRDMA_QPT_UD; + break; + default: + return -EINVAL; + } + + cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_CREATE_QP, sizeof(*cmd)); + if (!cmd) + return status; + cmd->type_pgsz_pdn |= (qptype << OCRDMA_CREATE_QP_REQ_QPT_SHIFT) & + OCRDMA_CREATE_QP_REQ_QPT_MASK; + status = ocrdma_set_create_qp_sq_cmd(cmd, attrs, qp); + if (status) + goto sq_err; + + if (attrs->srq) { + struct ocrdma_srq *srq = get_ocrdma_srq(attrs->srq); + cmd->max_sge_recv_flags |= OCRDMA_CREATE_QP_REQ_USE_SRQ_MASK; + cmd->rq_addr[0].lo = srq->id; + qp->srq = srq; + } else { + status = ocrdma_set_create_qp_rq_cmd(cmd, attrs, qp); + if (status) + goto rq_err; + } + + status = ocrdma_set_create_qp_ird_cmd(cmd, qp); + if (status) + goto mbx_err; + + cmd->type_pgsz_pdn |= (pd->id << OCRDMA_CREATE_QP_REQ_PD_ID_SHIFT) & + OCRDMA_CREATE_QP_REQ_PD_ID_MASK; + + flags = ocrdma_set_create_qp_mbx_access_flags(qp); + + cmd->max_sge_recv_flags |= flags; + cmd->max_ord_ird |= (dev->attr.max_ord_per_qp << + OCRDMA_CREATE_QP_REQ_MAX_ORD_SHIFT) & + OCRDMA_CREATE_QP_REQ_MAX_ORD_MASK; + cmd->max_ord_ird |= (dev->attr.max_ird_per_qp << + OCRDMA_CREATE_QP_REQ_MAX_IRD_SHIFT) & + OCRDMA_CREATE_QP_REQ_MAX_IRD_MASK; + cq = get_ocrdma_cq(attrs->send_cq); + cmd->wq_rq_cqid |= (cq->id << OCRDMA_CREATE_QP_REQ_WQ_CQID_SHIFT) & + OCRDMA_CREATE_QP_REQ_WQ_CQID_MASK; + qp->sq_cq = cq; + cq = get_ocrdma_cq(attrs->recv_cq); + cmd->wq_rq_cqid |= (cq->id << OCRDMA_CREATE_QP_REQ_RQ_CQID_SHIFT) & + OCRDMA_CREATE_QP_REQ_RQ_CQID_MASK; + qp->rq_cq = cq; + + if (pd->dpp_enabled && pd->num_dpp_qp) { + ocrdma_set_create_qp_dpp_cmd(cmd, pd, qp, enable_dpp_cq, + dpp_cq_id); + } + + status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); + if (status) + goto mbx_err; + rsp = (struct ocrdma_create_qp_rsp *)cmd; + ocrdma_get_create_qp_rsp(rsp, qp, attrs, dpp_offset, dpp_credit_lmt); + qp->state = OCRDMA_QPS_RST; + kfree(cmd); + return 0; +mbx_err: + if (qp->rq.va) + dma_free_coherent(&pdev->dev, qp->rq.len, qp->rq.va, qp->rq.pa); +rq_err: + pr_err("%s(%d) rq_err\n", __func__, dev->id); + dma_free_coherent(&pdev->dev, qp->sq.len, qp->sq.va, qp->sq.pa); +sq_err: + pr_err("%s(%d) sq_err\n", __func__, dev->id); + kfree(cmd); + return status; +} + +int ocrdma_mbx_query_qp(struct ocrdma_dev *dev, struct ocrdma_qp *qp, + struct ocrdma_qp_params *param) +{ + int status = -ENOMEM; + struct ocrdma_query_qp *cmd; + struct ocrdma_query_qp_rsp *rsp; + + cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_QUERY_QP, sizeof(*cmd)); + if (!cmd) + return status; + cmd->qp_id = qp->id; + status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); + if (status) + goto mbx_err; + rsp = (struct ocrdma_query_qp_rsp *)cmd; + memcpy(param, &rsp->params, sizeof(struct ocrdma_qp_params)); +mbx_err: + kfree(cmd); + return status; +} + +static int ocrdma_set_av_params(struct ocrdma_qp *qp, + struct ocrdma_modify_qp *cmd, + struct ib_qp_attr *attrs) +{ + int status; + struct ib_ah_attr *ah_attr = &attrs->ah_attr; + union ib_gid sgid, zgid; + u32 vlan_id; + u8 mac_addr[6]; + + if ((ah_attr->ah_flags & IB_AH_GRH) == 0) + return -EINVAL; + cmd->params.tclass_sq_psn |= + (ah_attr->grh.traffic_class << OCRDMA_QP_PARAMS_TCLASS_SHIFT); + cmd->params.rnt_rc_sl_fl |= + (ah_attr->grh.flow_label & OCRDMA_QP_PARAMS_FLOW_LABEL_MASK); + cmd->params.rnt_rc_sl_fl |= (ah_attr->sl << OCRDMA_QP_PARAMS_SL_SHIFT); + cmd->params.hop_lmt_rq_psn |= + (ah_attr->grh.hop_limit << OCRDMA_QP_PARAMS_HOP_LMT_SHIFT); + cmd->flags |= OCRDMA_QP_PARA_FLOW_LBL_VALID; + memcpy(&cmd->params.dgid[0], &ah_attr->grh.dgid.raw[0], + sizeof(cmd->params.dgid)); + status = ocrdma_query_gid(&qp->dev->ibdev, 1, + ah_attr->grh.sgid_index, &sgid); + if (status) + return status; + + memset(&zgid, 0, sizeof(zgid)); + if (!memcmp(&sgid, &zgid, sizeof(zgid))) + return -EINVAL; + + qp->sgid_idx = ah_attr->grh.sgid_index; + memcpy(&cmd->params.sgid[0], &sgid.raw[0], sizeof(cmd->params.sgid)); + ocrdma_resolve_dmac(qp->dev, ah_attr, &mac_addr[0]); + cmd->params.dmac_b0_to_b3 = mac_addr[0] | (mac_addr[1] << 8) | + (mac_addr[2] << 16) | (mac_addr[3] << 24); + /* convert them to LE format. */ + ocrdma_cpu_to_le32(&cmd->params.dgid[0], sizeof(cmd->params.dgid)); + ocrdma_cpu_to_le32(&cmd->params.sgid[0], sizeof(cmd->params.sgid)); + cmd->params.vlan_dmac_b4_to_b5 = mac_addr[4] | (mac_addr[5] << 8); + vlan_id = ah_attr->vlan_id; + if (vlan_id && (vlan_id < 0x1000)) { + cmd->params.vlan_dmac_b4_to_b5 |= + vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT; + cmd->flags |= OCRDMA_QP_PARA_VLAN_EN_VALID; + } + return 0; +} + +static int ocrdma_set_qp_params(struct ocrdma_qp *qp, + struct ocrdma_modify_qp *cmd, + struct ib_qp_attr *attrs, int attr_mask) +{ + int status = 0; + + if (attr_mask & IB_QP_PKEY_INDEX) { + cmd->params.path_mtu_pkey_indx |= (attrs->pkey_index & + OCRDMA_QP_PARAMS_PKEY_INDEX_MASK); + cmd->flags |= OCRDMA_QP_PARA_PKEY_VALID; + } + if (attr_mask & IB_QP_QKEY) { + qp->qkey = attrs->qkey; + cmd->params.qkey = attrs->qkey; + cmd->flags |= OCRDMA_QP_PARA_QKEY_VALID; + } + if (attr_mask & IB_QP_AV) { + status = ocrdma_set_av_params(qp, cmd, attrs); + if (status) + return status; + } else if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_UD) { + /* set the default mac address for UD, GSI QPs */ + cmd->params.dmac_b0_to_b3 = qp->dev->nic_info.mac_addr[0] | + (qp->dev->nic_info.mac_addr[1] << 8) | + (qp->dev->nic_info.mac_addr[2] << 16) | + (qp->dev->nic_info.mac_addr[3] << 24); + cmd->params.vlan_dmac_b4_to_b5 = qp->dev->nic_info.mac_addr[4] | + (qp->dev->nic_info.mac_addr[5] << 8); + } + if ((attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) && + attrs->en_sqd_async_notify) { + cmd->params.max_sge_recv_flags |= + OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC; + cmd->flags |= OCRDMA_QP_PARA_DST_QPN_VALID; + } + if (attr_mask & IB_QP_DEST_QPN) { + cmd->params.ack_to_rnr_rtc_dest_qpn |= (attrs->dest_qp_num & + OCRDMA_QP_PARAMS_DEST_QPN_MASK); + cmd->flags |= OCRDMA_QP_PARA_DST_QPN_VALID; + } + if (attr_mask & IB_QP_PATH_MTU) { + if (attrs->path_mtu < IB_MTU_256 || + attrs->path_mtu > IB_MTU_4096) { + status = -EINVAL; + goto pmtu_err; + } + cmd->params.path_mtu_pkey_indx |= + (ib_mtu_enum_to_int(attrs->path_mtu) << + OCRDMA_QP_PARAMS_PATH_MTU_SHIFT) & + OCRDMA_QP_PARAMS_PATH_MTU_MASK; + cmd->flags |= OCRDMA_QP_PARA_PMTU_VALID; + } + if (attr_mask & IB_QP_TIMEOUT) { + cmd->params.ack_to_rnr_rtc_dest_qpn |= attrs->timeout << + OCRDMA_QP_PARAMS_ACK_TIMEOUT_SHIFT; + cmd->flags |= OCRDMA_QP_PARA_ACK_TO_VALID; + } + if (attr_mask & IB_QP_RETRY_CNT) { + cmd->params.rnt_rc_sl_fl |= (attrs->retry_cnt << + OCRDMA_QP_PARAMS_RETRY_CNT_SHIFT) & + OCRDMA_QP_PARAMS_RETRY_CNT_MASK; + cmd->flags |= OCRDMA_QP_PARA_RETRY_CNT_VALID; + } + if (attr_mask & IB_QP_MIN_RNR_TIMER) { + cmd->params.rnt_rc_sl_fl |= (attrs->min_rnr_timer << + OCRDMA_QP_PARAMS_RNR_NAK_TIMER_SHIFT) & + OCRDMA_QP_PARAMS_RNR_NAK_TIMER_MASK; + cmd->flags |= OCRDMA_QP_PARA_RNT_VALID; + } + if (attr_mask & IB_QP_RNR_RETRY) { + cmd->params.ack_to_rnr_rtc_dest_qpn |= (attrs->rnr_retry << + OCRDMA_QP_PARAMS_RNR_RETRY_CNT_SHIFT) + & OCRDMA_QP_PARAMS_RNR_RETRY_CNT_MASK; + cmd->flags |= OCRDMA_QP_PARA_RRC_VALID; + } + if (attr_mask & IB_QP_SQ_PSN) { + cmd->params.tclass_sq_psn |= (attrs->sq_psn & 0x00ffffff); + cmd->flags |= OCRDMA_QP_PARA_SQPSN_VALID; + } + if (attr_mask & IB_QP_RQ_PSN) { + cmd->params.hop_lmt_rq_psn |= (attrs->rq_psn & 0x00ffffff); + cmd->flags |= OCRDMA_QP_PARA_RQPSN_VALID; + } + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { + if (attrs->max_rd_atomic > qp->dev->attr.max_ord_per_qp) { + status = -EINVAL; + goto pmtu_err; + } + qp->max_ord = attrs->max_rd_atomic; + cmd->flags |= OCRDMA_QP_PARA_MAX_ORD_VALID; + } + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { + if (attrs->max_dest_rd_atomic > qp->dev->attr.max_ird_per_qp) { + status = -EINVAL; + goto pmtu_err; + } + qp->max_ird = attrs->max_dest_rd_atomic; + cmd->flags |= OCRDMA_QP_PARA_MAX_IRD_VALID; + } + cmd->params.max_ord_ird = (qp->max_ord << + OCRDMA_QP_PARAMS_MAX_ORD_SHIFT) | + (qp->max_ird & OCRDMA_QP_PARAMS_MAX_IRD_MASK); +pmtu_err: + return status; +} + +int ocrdma_mbx_modify_qp(struct ocrdma_dev *dev, struct ocrdma_qp *qp, + struct ib_qp_attr *attrs, int attr_mask) +{ + int status = -ENOMEM; + struct ocrdma_modify_qp *cmd; + + cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_MODIFY_QP, sizeof(*cmd)); + if (!cmd) + return status; + + cmd->params.id = qp->id; + cmd->flags = 0; + if (attr_mask & IB_QP_STATE) { + cmd->params.max_sge_recv_flags |= + (get_ocrdma_qp_state(attrs->qp_state) << + OCRDMA_QP_PARAMS_STATE_SHIFT) & + OCRDMA_QP_PARAMS_STATE_MASK; + cmd->flags |= OCRDMA_QP_PARA_QPS_VALID; + } else { + cmd->params.max_sge_recv_flags |= + (qp->state << OCRDMA_QP_PARAMS_STATE_SHIFT) & + OCRDMA_QP_PARAMS_STATE_MASK; + } + + status = ocrdma_set_qp_params(qp, cmd, attrs, attr_mask); + if (status) + goto mbx_err; + status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); + if (status) + goto mbx_err; + +mbx_err: + kfree(cmd); + return status; +} + +int ocrdma_mbx_destroy_qp(struct ocrdma_dev *dev, struct ocrdma_qp *qp) +{ + int status = -ENOMEM; + struct ocrdma_destroy_qp *cmd; + struct pci_dev *pdev = dev->nic_info.pdev; + + cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_DELETE_QP, sizeof(*cmd)); + if (!cmd) + return status; + cmd->qp_id = qp->id; + status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); + if (status) + goto mbx_err; + +mbx_err: + kfree(cmd); + if (qp->sq.va) + dma_free_coherent(&pdev->dev, qp->sq.len, qp->sq.va, qp->sq.pa); + if (!qp->srq && qp->rq.va) + dma_free_coherent(&pdev->dev, qp->rq.len, qp->rq.va, qp->rq.pa); + if (qp->dpp_enabled) + qp->pd->num_dpp_qp++; + return status; +} + +int ocrdma_mbx_create_srq(struct ocrdma_dev *dev, struct ocrdma_srq *srq, + struct ib_srq_init_attr *srq_attr, + struct ocrdma_pd *pd) +{ + int status = -ENOMEM; + int hw_pages, hw_page_size; + int len; + struct ocrdma_create_srq_rsp *rsp; + struct ocrdma_create_srq *cmd; + dma_addr_t pa; + struct pci_dev *pdev = dev->nic_info.pdev; + u32 max_rqe_allocated; + + cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_CREATE_SRQ, sizeof(*cmd)); + if (!cmd) + return status; + + cmd->pgsz_pdid = pd->id & OCRDMA_CREATE_SRQ_PD_ID_MASK; + max_rqe_allocated = srq_attr->attr.max_wr + 1; + status = ocrdma_build_q_conf(&max_rqe_allocated, + dev->attr.rqe_size, + &hw_pages, &hw_page_size); + if (status) { + pr_err("%s() req. max_wr=0x%x\n", __func__, + srq_attr->attr.max_wr); + status = -EINVAL; + goto ret; + } + len = hw_pages * hw_page_size; + srq->rq.va = dma_alloc_coherent(&pdev->dev, len, &pa, GFP_KERNEL); + if (!srq->rq.va) { + status = -ENOMEM; + goto ret; + } + ocrdma_build_q_pages(&cmd->rq_addr[0], hw_pages, pa, hw_page_size); + + srq->rq.entry_size = dev->attr.rqe_size; + srq->rq.pa = pa; + srq->rq.len = len; + srq->rq.max_cnt = max_rqe_allocated; + + cmd->max_sge_rqe = ilog2(max_rqe_allocated); + cmd->max_sge_rqe |= srq_attr->attr.max_sge << + OCRDMA_CREATE_SRQ_MAX_SGE_RECV_SHIFT; + + cmd->pgsz_pdid |= (ilog2(hw_page_size / OCRDMA_MIN_Q_PAGE_SIZE) + << OCRDMA_CREATE_SRQ_PG_SZ_SHIFT); + cmd->pages_rqe_sz |= (dev->attr.rqe_size + << OCRDMA_CREATE_SRQ_RQE_SIZE_SHIFT) + & OCRDMA_CREATE_SRQ_RQE_SIZE_MASK; + cmd->pages_rqe_sz |= hw_pages << OCRDMA_CREATE_SRQ_NUM_RQ_PAGES_SHIFT; + + status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); + if (status) + goto mbx_err; + rsp = (struct ocrdma_create_srq_rsp *)cmd; + srq->id = rsp->id; + srq->rq.dbid = rsp->id; + max_rqe_allocated = ((rsp->max_sge_rqe_allocated & + OCRDMA_CREATE_SRQ_RSP_MAX_RQE_ALLOCATED_MASK) >> + OCRDMA_CREATE_SRQ_RSP_MAX_RQE_ALLOCATED_SHIFT); + max_rqe_allocated = (1 << max_rqe_allocated); + srq->rq.max_cnt = max_rqe_allocated; + srq->rq.max_wqe_idx = max_rqe_allocated - 1; + srq->rq.max_sges = (rsp->max_sge_rqe_allocated & + OCRDMA_CREATE_SRQ_RSP_MAX_SGE_RECV_ALLOCATED_MASK) >> + OCRDMA_CREATE_SRQ_RSP_MAX_SGE_RECV_ALLOCATED_SHIFT; + goto ret; +mbx_err: + dma_free_coherent(&pdev->dev, srq->rq.len, srq->rq.va, pa); +ret: + kfree(cmd); + return status; +} + +int ocrdma_mbx_modify_srq(struct ocrdma_srq *srq, struct ib_srq_attr *srq_attr) +{ + int status = -ENOMEM; + struct ocrdma_modify_srq *cmd; + struct ocrdma_pd *pd = srq->pd; + struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device); + + cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_MODIFY_SRQ, sizeof(*cmd)); + if (!cmd) + return status; + cmd->id = srq->id; + cmd->limit_max_rqe |= srq_attr->srq_limit << + OCRDMA_MODIFY_SRQ_LIMIT_SHIFT; + status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); + kfree(cmd); + return status; +} + +int ocrdma_mbx_query_srq(struct ocrdma_srq *srq, struct ib_srq_attr *srq_attr) +{ + int status = -ENOMEM; + struct ocrdma_query_srq *cmd; + struct ocrdma_dev *dev = get_ocrdma_dev(srq->ibsrq.device); + + cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_QUERY_SRQ, sizeof(*cmd)); + if (!cmd) + return status; + cmd->id = srq->rq.dbid; + status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); + if (status == 0) { + struct ocrdma_query_srq_rsp *rsp = + (struct ocrdma_query_srq_rsp *)cmd; + srq_attr->max_sge = + rsp->srq_lmt_max_sge & + OCRDMA_QUERY_SRQ_RSP_MAX_SGE_RECV_MASK; + srq_attr->max_wr = + rsp->max_rqe_pdid >> OCRDMA_QUERY_SRQ_RSP_MAX_RQE_SHIFT; + srq_attr->srq_limit = rsp->srq_lmt_max_sge >> + OCRDMA_QUERY_SRQ_RSP_SRQ_LIMIT_SHIFT; + } + kfree(cmd); + return status; +} + +int ocrdma_mbx_destroy_srq(struct ocrdma_dev *dev, struct ocrdma_srq *srq) +{ + int status = -ENOMEM; + struct ocrdma_destroy_srq *cmd; + struct pci_dev *pdev = dev->nic_info.pdev; + cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_DELETE_SRQ, sizeof(*cmd)); + if (!cmd) + return status; + cmd->id = srq->id; + status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); + if (srq->rq.va) + dma_free_coherent(&pdev->dev, srq->rq.len, + srq->rq.va, srq->rq.pa); + kfree(cmd); + return status; +} + +int ocrdma_alloc_av(struct ocrdma_dev *dev, struct ocrdma_ah *ah) +{ + int i; + int status = -EINVAL; + struct ocrdma_av *av; + unsigned long flags; + + av = dev->av_tbl.va; + spin_lock_irqsave(&dev->av_tbl.lock, flags); + for (i = 0; i < dev->av_tbl.num_ah; i++) { + if (av->valid == 0) { + av->valid = OCRDMA_AV_VALID; + ah->av = av; + ah->id = i; + status = 0; + break; + } + av++; + } + if (i == dev->av_tbl.num_ah) + status = -EAGAIN; + spin_unlock_irqrestore(&dev->av_tbl.lock, flags); + return status; +} + +int ocrdma_free_av(struct ocrdma_dev *dev, struct ocrdma_ah *ah) +{ + unsigned long flags; + spin_lock_irqsave(&dev->av_tbl.lock, flags); + ah->av->valid = 0; + spin_unlock_irqrestore(&dev->av_tbl.lock, flags); + return 0; +} + +static int ocrdma_create_eqs(struct ocrdma_dev *dev) +{ + int num_eq, i, status = 0; + int irq; + unsigned long flags = 0; + + num_eq = dev->nic_info.msix.num_vectors - + dev->nic_info.msix.start_vector; + if (dev->nic_info.intr_mode == BE_INTERRUPT_MODE_INTX) { + num_eq = 1; + flags = IRQF_SHARED; + } else { + num_eq = min_t(u32, num_eq, num_online_cpus()); + } + + if (!num_eq) + return -EINVAL; + + dev->eq_tbl = kzalloc(sizeof(struct ocrdma_eq) * num_eq, GFP_KERNEL); + if (!dev->eq_tbl) + return -ENOMEM; + + for (i = 0; i < num_eq; i++) { + status = ocrdma_create_eq(dev, &dev->eq_tbl[i], + OCRDMA_EQ_LEN); + if (status) { + status = -EINVAL; + break; + } + sprintf(dev->eq_tbl[i].irq_name, "ocrdma%d-%d", + dev->id, i); + irq = ocrdma_get_irq(dev, &dev->eq_tbl[i]); + status = request_irq(irq, ocrdma_irq_handler, flags, + dev->eq_tbl[i].irq_name, + &dev->eq_tbl[i]); + if (status) + goto done; + dev->eq_cnt += 1; + } + /* one eq is sufficient for data path to work */ + return 0; +done: + ocrdma_destroy_eqs(dev); + return status; +} + +int ocrdma_init_hw(struct ocrdma_dev *dev) +{ + int status; + + /* create the eqs */ + status = ocrdma_create_eqs(dev); + if (status) + goto qpeq_err; + status = ocrdma_create_mq(dev); + if (status) + goto mq_err; + status = ocrdma_mbx_query_fw_config(dev); + if (status) + goto conf_err; + status = ocrdma_mbx_query_dev(dev); + if (status) + goto conf_err; + status = ocrdma_mbx_query_fw_ver(dev); + if (status) + goto conf_err; + status = ocrdma_mbx_create_ah_tbl(dev); + if (status) + goto conf_err; + status = ocrdma_mbx_get_phy_info(dev); + if (status) + goto conf_err; + status = ocrdma_mbx_get_ctrl_attribs(dev); + if (status) + goto conf_err; + + return 0; + +conf_err: + ocrdma_destroy_mq(dev); +mq_err: + ocrdma_destroy_eqs(dev); +qpeq_err: + pr_err("%s() status=%d\n", __func__, status); + return status; +} + +void ocrdma_cleanup_hw(struct ocrdma_dev *dev) +{ + ocrdma_mbx_delete_ah_tbl(dev); + + /* cleanup the eqs */ + ocrdma_destroy_eqs(dev); + + /* cleanup the control path */ + ocrdma_destroy_mq(dev); +} diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h new file mode 100644 index 00000000000..e513f729314 --- /dev/null +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h @@ -0,0 +1,138 @@ +/******************************************************************* + * This file is part of the Emulex RoCE Device Driver for * + * RoCE (RDMA over Converged Ethernet) CNA Adapters. * + * Copyright (C) 2008-2012 Emulex. All rights reserved. * + * EMULEX and SLI are trademarks of Emulex. * + * www.emulex.com * + * * + * This program is free software; you can redistribute it and/or * + * modify it under the terms of version 2 of the GNU General * + * Public License as published by the Free Software Foundation. * + * This program is distributed in the hope that it will be useful. * + * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * + * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * + * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * + * TO BE LEGALLY INVALID. See the GNU General Public License for * + * more details, a copy of which can be found in the file COPYING * + * included with this package. * + * + * Contact Information: + * linux-drivers@emulex.com + * + * Emulex + * 3333 Susan Street + * Costa Mesa, CA 92626 + *******************************************************************/ + +#ifndef __OCRDMA_HW_H__ +#define __OCRDMA_HW_H__ + +#include "ocrdma_sli.h" + +static inline void ocrdma_cpu_to_le32(void *dst, u32 len) +{ +#ifdef __BIG_ENDIAN + int i = 0; + u32 *src_ptr = dst; + u32 *dst_ptr = dst; + for (; i < (len / 4); i++) + *(dst_ptr + i) = cpu_to_le32p(src_ptr + i); +#endif +} + +static inline void ocrdma_le32_to_cpu(void *dst, u32 len) +{ +#ifdef __BIG_ENDIAN + int i = 0; + u32 *src_ptr = dst; + u32 *dst_ptr = dst; + for (; i < (len / sizeof(u32)); i++) + *(dst_ptr + i) = le32_to_cpu(*(src_ptr + i)); +#endif +} + +static inline void ocrdma_copy_cpu_to_le32(void *dst, void *src, u32 len) +{ +#ifdef __BIG_ENDIAN + int i = 0; + u32 *src_ptr = src; + u32 *dst_ptr = dst; + for (; i < (len / sizeof(u32)); i++) + *(dst_ptr + i) = cpu_to_le32p(src_ptr + i); +#else + memcpy(dst, src, len); +#endif +} + +static inline void ocrdma_copy_le32_to_cpu(void *dst, void *src, u32 len) +{ +#ifdef __BIG_ENDIAN + int i = 0; + u32 *src_ptr = src; + u32 *dst_ptr = dst; + for (; i < len / sizeof(u32); i++) + *(dst_ptr + i) = le32_to_cpu(*(src_ptr + i)); +#else + memcpy(dst, src, len); +#endif +} + +static inline u64 ocrdma_get_db_addr(struct ocrdma_dev *dev, u32 pdid) +{ + return dev->nic_info.unmapped_db + (pdid * dev->nic_info.db_page_size); +} + +int ocrdma_init_hw(struct ocrdma_dev *); +void ocrdma_cleanup_hw(struct ocrdma_dev *); + +enum ib_qp_state get_ibqp_state(enum ocrdma_qp_state qps); +void ocrdma_ring_cq_db(struct ocrdma_dev *, u16 cq_id, bool armed, + bool solicited, u16 cqe_popped); + +/* verbs specific mailbox commands */ +int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed); +int ocrdma_query_config(struct ocrdma_dev *, + struct ocrdma_mbx_query_config *config); + +int ocrdma_mbx_alloc_pd(struct ocrdma_dev *, struct ocrdma_pd *); +int ocrdma_mbx_dealloc_pd(struct ocrdma_dev *, struct ocrdma_pd *); + +int ocrdma_mbx_alloc_lkey(struct ocrdma_dev *, struct ocrdma_hw_mr *hwmr, + u32 pd_id, int addr_check); +int ocrdma_mbx_dealloc_lkey(struct ocrdma_dev *, int fmr, u32 lkey); + +int ocrdma_reg_mr(struct ocrdma_dev *, struct ocrdma_hw_mr *hwmr, + u32 pd_id, int acc); +int ocrdma_mbx_create_cq(struct ocrdma_dev *, struct ocrdma_cq *, + int entries, int dpp_cq, u16 pd_id); +int ocrdma_mbx_destroy_cq(struct ocrdma_dev *, struct ocrdma_cq *); + +int ocrdma_mbx_create_qp(struct ocrdma_qp *, struct ib_qp_init_attr *attrs, + u8 enable_dpp_cq, u16 dpp_cq_id, u16 *dpp_offset, + u16 *dpp_credit_lmt); +int ocrdma_mbx_modify_qp(struct ocrdma_dev *, struct ocrdma_qp *, + struct ib_qp_attr *attrs, int attr_mask); +int ocrdma_mbx_query_qp(struct ocrdma_dev *, struct ocrdma_qp *, + struct ocrdma_qp_params *param); +int ocrdma_mbx_destroy_qp(struct ocrdma_dev *, struct ocrdma_qp *); +int ocrdma_mbx_create_srq(struct ocrdma_dev *, struct ocrdma_srq *, + struct ib_srq_init_attr *, + struct ocrdma_pd *); +int ocrdma_mbx_modify_srq(struct ocrdma_srq *, struct ib_srq_attr *); +int ocrdma_mbx_query_srq(struct ocrdma_srq *, struct ib_srq_attr *); +int ocrdma_mbx_destroy_srq(struct ocrdma_dev *, struct ocrdma_srq *); + +int ocrdma_alloc_av(struct ocrdma_dev *, struct ocrdma_ah *); +int ocrdma_free_av(struct ocrdma_dev *, struct ocrdma_ah *); + +int ocrdma_qp_state_change(struct ocrdma_qp *, enum ib_qp_state new_state, + enum ib_qp_state *old_ib_state); +bool ocrdma_is_qp_in_sq_flushlist(struct ocrdma_cq *, struct ocrdma_qp *); +bool ocrdma_is_qp_in_rq_flushlist(struct ocrdma_cq *, struct ocrdma_qp *); +void ocrdma_flush_qp(struct ocrdma_qp *); +int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq); + +int ocrdma_mbx_rdma_stats(struct ocrdma_dev *, bool reset); +char *port_speed_string(struct ocrdma_dev *dev); +#endif /* __OCRDMA_HW_H__ */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c new file mode 100644 index 00000000000..7c504e07974 --- /dev/null +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -0,0 +1,585 @@ +/******************************************************************* + * This file is part of the Emulex RoCE Device Driver for * + * RoCE (RDMA over Converged Ethernet) adapters. * + * Copyright (C) 2008-2012 Emulex. All rights reserved. * + * EMULEX and SLI are trademarks of Emulex. * + * www.emulex.com * + * * + * This program is free software; you can redistribute it and/or * + * modify it under the terms of version 2 of the GNU General * + * Public License as published by the Free Software Foundation. * + * This program is distributed in the hope that it will be useful. * + * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * + * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * + * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * + * TO BE LEGALLY INVALID. See the GNU General Public License for * + * more details, a copy of which can be found in the file COPYING * + * included with this package. * + * + * Contact Information: + * linux-drivers@emulex.com + * + * Emulex + * 3333 Susan Street + * Costa Mesa, CA 92626 + *******************************************************************/ + +#include <linux/module.h> +#include <linux/idr.h> +#include <rdma/ib_verbs.h> +#include <rdma/ib_user_verbs.h> +#include <rdma/ib_addr.h> + +#include <linux/netdevice.h> +#include <net/addrconf.h> + +#include "ocrdma.h" +#include "ocrdma_verbs.h" +#include "ocrdma_ah.h" +#include "be_roce.h" +#include "ocrdma_hw.h" +#include "ocrdma_stats.h" +#include "ocrdma_abi.h" + +MODULE_VERSION(OCRDMA_ROCE_DRV_VERSION); +MODULE_DESCRIPTION(OCRDMA_ROCE_DRV_DESC " " OCRDMA_ROCE_DRV_VERSION); +MODULE_AUTHOR("Emulex Corporation"); +MODULE_LICENSE("GPL"); + +static LIST_HEAD(ocrdma_dev_list); +static DEFINE_SPINLOCK(ocrdma_devlist_lock); +static DEFINE_IDR(ocrdma_dev_id); + +static union ib_gid ocrdma_zero_sgid; + +void ocrdma_get_guid(struct ocrdma_dev *dev, u8 *guid) +{ + u8 mac_addr[6]; + + memcpy(&mac_addr[0], &dev->nic_info.mac_addr[0], ETH_ALEN); + guid[0] = mac_addr[0] ^ 2; + guid[1] = mac_addr[1]; + guid[2] = mac_addr[2]; + guid[3] = 0xff; + guid[4] = 0xfe; + guid[5] = mac_addr[3]; + guid[6] = mac_addr[4]; + guid[7] = mac_addr[5]; +} + +static bool ocrdma_add_sgid(struct ocrdma_dev *dev, union ib_gid *new_sgid) +{ + int i; + unsigned long flags; + + memset(&ocrdma_zero_sgid, 0, sizeof(union ib_gid)); + + + spin_lock_irqsave(&dev->sgid_lock, flags); + for (i = 0; i < OCRDMA_MAX_SGID; i++) { + if (!memcmp(&dev->sgid_tbl[i], &ocrdma_zero_sgid, + sizeof(union ib_gid))) { + /* found free entry */ + memcpy(&dev->sgid_tbl[i], new_sgid, + sizeof(union ib_gid)); + spin_unlock_irqrestore(&dev->sgid_lock, flags); + return true; + } else if (!memcmp(&dev->sgid_tbl[i], new_sgid, + sizeof(union ib_gid))) { + /* entry already present, no addition is required. */ + spin_unlock_irqrestore(&dev->sgid_lock, flags); + return false; + } + } + spin_unlock_irqrestore(&dev->sgid_lock, flags); + return false; +} + +static bool ocrdma_del_sgid(struct ocrdma_dev *dev, union ib_gid *sgid) +{ + int found = false; + int i; + unsigned long flags; + + + spin_lock_irqsave(&dev->sgid_lock, flags); + /* first is default sgid, which cannot be deleted. */ + for (i = 1; i < OCRDMA_MAX_SGID; i++) { + if (!memcmp(&dev->sgid_tbl[i], sgid, sizeof(union ib_gid))) { + /* found matching entry */ + memset(&dev->sgid_tbl[i], 0, sizeof(union ib_gid)); + found = true; + break; + } + } + spin_unlock_irqrestore(&dev->sgid_lock, flags); + return found; +} + +static int ocrdma_addr_event(unsigned long event, struct net_device *netdev, + union ib_gid *gid) +{ + struct ib_event gid_event; + struct ocrdma_dev *dev; + bool found = false; + bool updated = false; + bool is_vlan = false; + + is_vlan = netdev->priv_flags & IFF_802_1Q_VLAN; + if (is_vlan) + netdev = rdma_vlan_dev_real_dev(netdev); + + rcu_read_lock(); + list_for_each_entry_rcu(dev, &ocrdma_dev_list, entry) { + if (dev->nic_info.netdev == netdev) { + found = true; + break; + } + } + rcu_read_unlock(); + + if (!found) + return NOTIFY_DONE; + + mutex_lock(&dev->dev_lock); + switch (event) { + case NETDEV_UP: + updated = ocrdma_add_sgid(dev, gid); + break; + case NETDEV_DOWN: + updated = ocrdma_del_sgid(dev, gid); + break; + default: + break; + } + if (updated) { + /* GID table updated, notify the consumers about it */ + gid_event.device = &dev->ibdev; + gid_event.element.port_num = 1; + gid_event.event = IB_EVENT_GID_CHANGE; + ib_dispatch_event(&gid_event); + } + mutex_unlock(&dev->dev_lock); + return NOTIFY_OK; +} + +static int ocrdma_inetaddr_event(struct notifier_block *notifier, + unsigned long event, void *ptr) +{ + struct in_ifaddr *ifa = ptr; + union ib_gid gid; + struct net_device *netdev = ifa->ifa_dev->dev; + + ipv6_addr_set_v4mapped(ifa->ifa_address, (struct in6_addr *)&gid); + return ocrdma_addr_event(event, netdev, &gid); +} + +static struct notifier_block ocrdma_inetaddr_notifier = { + .notifier_call = ocrdma_inetaddr_event +}; + +#if IS_ENABLED(CONFIG_IPV6) + +static int ocrdma_inet6addr_event(struct notifier_block *notifier, + unsigned long event, void *ptr) +{ + struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr; + union ib_gid *gid = (union ib_gid *)&ifa->addr; + struct net_device *netdev = ifa->idev->dev; + return ocrdma_addr_event(event, netdev, gid); +} + +static struct notifier_block ocrdma_inet6addr_notifier = { + .notifier_call = ocrdma_inet6addr_event +}; + +#endif /* IPV6 and VLAN */ + +static enum rdma_link_layer ocrdma_link_layer(struct ib_device *device, + u8 port_num) +{ + return IB_LINK_LAYER_ETHERNET; +} + +static int ocrdma_register_device(struct ocrdma_dev *dev) +{ + strlcpy(dev->ibdev.name, "ocrdma%d", IB_DEVICE_NAME_MAX); + ocrdma_get_guid(dev, (u8 *)&dev->ibdev.node_guid); + memcpy(dev->ibdev.node_desc, OCRDMA_NODE_DESC, + sizeof(OCRDMA_NODE_DESC)); + dev->ibdev.owner = THIS_MODULE; + dev->ibdev.uverbs_abi_ver = OCRDMA_ABI_VERSION; + dev->ibdev.uverbs_cmd_mask = + OCRDMA_UVERBS(GET_CONTEXT) | + OCRDMA_UVERBS(QUERY_DEVICE) | + OCRDMA_UVERBS(QUERY_PORT) | + OCRDMA_UVERBS(ALLOC_PD) | + OCRDMA_UVERBS(DEALLOC_PD) | + OCRDMA_UVERBS(REG_MR) | + OCRDMA_UVERBS(DEREG_MR) | + OCRDMA_UVERBS(CREATE_COMP_CHANNEL) | + OCRDMA_UVERBS(CREATE_CQ) | + OCRDMA_UVERBS(RESIZE_CQ) | + OCRDMA_UVERBS(DESTROY_CQ) | + OCRDMA_UVERBS(REQ_NOTIFY_CQ) | + OCRDMA_UVERBS(CREATE_QP) | + OCRDMA_UVERBS(MODIFY_QP) | + OCRDMA_UVERBS(QUERY_QP) | + OCRDMA_UVERBS(DESTROY_QP) | + OCRDMA_UVERBS(POLL_CQ) | + OCRDMA_UVERBS(POST_SEND) | + OCRDMA_UVERBS(POST_RECV); + + dev->ibdev.uverbs_cmd_mask |= + OCRDMA_UVERBS(CREATE_AH) | + OCRDMA_UVERBS(MODIFY_AH) | + OCRDMA_UVERBS(QUERY_AH) | + OCRDMA_UVERBS(DESTROY_AH); + + dev->ibdev.node_type = RDMA_NODE_IB_CA; + dev->ibdev.phys_port_cnt = 1; + dev->ibdev.num_comp_vectors = 1; + + /* mandatory verbs. */ + dev->ibdev.query_device = ocrdma_query_device; + dev->ibdev.query_port = ocrdma_query_port; + dev->ibdev.modify_port = ocrdma_modify_port; + dev->ibdev.query_gid = ocrdma_query_gid; + dev->ibdev.get_link_layer = ocrdma_link_layer; + dev->ibdev.alloc_pd = ocrdma_alloc_pd; + dev->ibdev.dealloc_pd = ocrdma_dealloc_pd; + + dev->ibdev.create_cq = ocrdma_create_cq; + dev->ibdev.destroy_cq = ocrdma_destroy_cq; + dev->ibdev.resize_cq = ocrdma_resize_cq; + + dev->ibdev.create_qp = ocrdma_create_qp; + dev->ibdev.modify_qp = ocrdma_modify_qp; + dev->ibdev.query_qp = ocrdma_query_qp; + dev->ibdev.destroy_qp = ocrdma_destroy_qp; + + dev->ibdev.query_pkey = ocrdma_query_pkey; + dev->ibdev.create_ah = ocrdma_create_ah; + dev->ibdev.destroy_ah = ocrdma_destroy_ah; + dev->ibdev.query_ah = ocrdma_query_ah; + dev->ibdev.modify_ah = ocrdma_modify_ah; + + dev->ibdev.poll_cq = ocrdma_poll_cq; + dev->ibdev.post_send = ocrdma_post_send; + dev->ibdev.post_recv = ocrdma_post_recv; + dev->ibdev.req_notify_cq = ocrdma_arm_cq; + + dev->ibdev.get_dma_mr = ocrdma_get_dma_mr; + dev->ibdev.reg_phys_mr = ocrdma_reg_kernel_mr; + dev->ibdev.dereg_mr = ocrdma_dereg_mr; + dev->ibdev.reg_user_mr = ocrdma_reg_user_mr; + + dev->ibdev.alloc_fast_reg_mr = ocrdma_alloc_frmr; + dev->ibdev.alloc_fast_reg_page_list = ocrdma_alloc_frmr_page_list; + dev->ibdev.free_fast_reg_page_list = ocrdma_free_frmr_page_list; + + /* mandatory to support user space verbs consumer. */ + dev->ibdev.alloc_ucontext = ocrdma_alloc_ucontext; + dev->ibdev.dealloc_ucontext = ocrdma_dealloc_ucontext; + dev->ibdev.mmap = ocrdma_mmap; + dev->ibdev.dma_device = &dev->nic_info.pdev->dev; + + dev->ibdev.process_mad = ocrdma_process_mad; + + if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { + dev->ibdev.uverbs_cmd_mask |= + OCRDMA_UVERBS(CREATE_SRQ) | + OCRDMA_UVERBS(MODIFY_SRQ) | + OCRDMA_UVERBS(QUERY_SRQ) | + OCRDMA_UVERBS(DESTROY_SRQ) | + OCRDMA_UVERBS(POST_SRQ_RECV); + + dev->ibdev.create_srq = ocrdma_create_srq; + dev->ibdev.modify_srq = ocrdma_modify_srq; + dev->ibdev.query_srq = ocrdma_query_srq; + dev->ibdev.destroy_srq = ocrdma_destroy_srq; + dev->ibdev.post_srq_recv = ocrdma_post_srq_recv; + } + return ib_register_device(&dev->ibdev, NULL); +} + +static int ocrdma_alloc_resources(struct ocrdma_dev *dev) +{ + mutex_init(&dev->dev_lock); + dev->sgid_tbl = kzalloc(sizeof(union ib_gid) * + OCRDMA_MAX_SGID, GFP_KERNEL); + if (!dev->sgid_tbl) + goto alloc_err; + spin_lock_init(&dev->sgid_lock); + + dev->cq_tbl = kzalloc(sizeof(struct ocrdma_cq *) * + OCRDMA_MAX_CQ, GFP_KERNEL); + if (!dev->cq_tbl) + goto alloc_err; + + if (dev->attr.max_qp) { + dev->qp_tbl = kzalloc(sizeof(struct ocrdma_qp *) * + OCRDMA_MAX_QP, GFP_KERNEL); + if (!dev->qp_tbl) + goto alloc_err; + } + spin_lock_init(&dev->av_tbl.lock); + spin_lock_init(&dev->flush_q_lock); + return 0; +alloc_err: + pr_err("%s(%d) error.\n", __func__, dev->id); + return -ENOMEM; +} + +static void ocrdma_free_resources(struct ocrdma_dev *dev) +{ + kfree(dev->qp_tbl); + kfree(dev->cq_tbl); + kfree(dev->sgid_tbl); +} + +/* OCRDMA sysfs interface */ +static ssize_t show_rev(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct ocrdma_dev *dev = dev_get_drvdata(device); + + return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor); +} + +static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct ocrdma_dev *dev = dev_get_drvdata(device); + + return scnprintf(buf, PAGE_SIZE, "%s", &dev->attr.fw_ver[0]); +} + +static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); +static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); + +static struct device_attribute *ocrdma_attributes[] = { + &dev_attr_hw_rev, + &dev_attr_fw_ver +}; + +static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++) + device_remove_file(&dev->ibdev.dev, ocrdma_attributes[i]); +} + +static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) +{ + int status = 0, i; + struct ocrdma_dev *dev; + + dev = (struct ocrdma_dev *)ib_alloc_device(sizeof(struct ocrdma_dev)); + if (!dev) { + pr_err("Unable to allocate ib device\n"); + return NULL; + } + dev->mbx_cmd = kzalloc(sizeof(struct ocrdma_mqe_emb_cmd), GFP_KERNEL); + if (!dev->mbx_cmd) + goto idr_err; + + memcpy(&dev->nic_info, dev_info, sizeof(*dev_info)); + dev->id = idr_alloc(&ocrdma_dev_id, NULL, 0, 0, GFP_KERNEL); + if (dev->id < 0) + goto idr_err; + + status = ocrdma_init_hw(dev); + if (status) + goto init_err; + + status = ocrdma_alloc_resources(dev); + if (status) + goto alloc_err; + + status = ocrdma_register_device(dev); + if (status) + goto alloc_err; + + for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++) + if (device_create_file(&dev->ibdev.dev, ocrdma_attributes[i])) + goto sysfs_err; + spin_lock(&ocrdma_devlist_lock); + list_add_tail_rcu(&dev->entry, &ocrdma_dev_list); + spin_unlock(&ocrdma_devlist_lock); + /* Init stats */ + ocrdma_add_port_stats(dev); + + pr_info("%s %s: %s \"%s\" port %d\n", + dev_name(&dev->nic_info.pdev->dev), hca_name(dev), + port_speed_string(dev), dev->model_number, + dev->hba_port_num); + pr_info("%s ocrdma%d driver loaded successfully\n", + dev_name(&dev->nic_info.pdev->dev), dev->id); + return dev; + +sysfs_err: + ocrdma_remove_sysfiles(dev); +alloc_err: + ocrdma_free_resources(dev); + ocrdma_cleanup_hw(dev); +init_err: + idr_remove(&ocrdma_dev_id, dev->id); +idr_err: + kfree(dev->mbx_cmd); + ib_dealloc_device(&dev->ibdev); + pr_err("%s() leaving. ret=%d\n", __func__, status); + return NULL; +} + +static void ocrdma_remove_free(struct rcu_head *rcu) +{ + struct ocrdma_dev *dev = container_of(rcu, struct ocrdma_dev, rcu); + + idr_remove(&ocrdma_dev_id, dev->id); + kfree(dev->mbx_cmd); + ib_dealloc_device(&dev->ibdev); +} + +static void ocrdma_remove(struct ocrdma_dev *dev) +{ + /* first unregister with stack to stop all the active traffic + * of the registered clients. + */ + ocrdma_rem_port_stats(dev); + ocrdma_remove_sysfiles(dev); + + ib_unregister_device(&dev->ibdev); + + spin_lock(&ocrdma_devlist_lock); + list_del_rcu(&dev->entry); + spin_unlock(&ocrdma_devlist_lock); + + ocrdma_free_resources(dev); + ocrdma_cleanup_hw(dev); + + call_rcu(&dev->rcu, ocrdma_remove_free); +} + +static int ocrdma_open(struct ocrdma_dev *dev) +{ + struct ib_event port_event; + + port_event.event = IB_EVENT_PORT_ACTIVE; + port_event.element.port_num = 1; + port_event.device = &dev->ibdev; + ib_dispatch_event(&port_event); + return 0; +} + +static int ocrdma_close(struct ocrdma_dev *dev) +{ + int i; + struct ocrdma_qp *qp, **cur_qp; + struct ib_event err_event; + struct ib_qp_attr attrs; + int attr_mask = IB_QP_STATE; + + attrs.qp_state = IB_QPS_ERR; + mutex_lock(&dev->dev_lock); + if (dev->qp_tbl) { + cur_qp = dev->qp_tbl; + for (i = 0; i < OCRDMA_MAX_QP; i++) { + qp = cur_qp[i]; + if (qp && qp->ibqp.qp_type != IB_QPT_GSI) { + /* change the QP state to ERROR */ + _ocrdma_modify_qp(&qp->ibqp, &attrs, attr_mask); + + err_event.event = IB_EVENT_QP_FATAL; + err_event.element.qp = &qp->ibqp; + err_event.device = &dev->ibdev; + ib_dispatch_event(&err_event); + } + } + } + mutex_unlock(&dev->dev_lock); + + err_event.event = IB_EVENT_PORT_ERR; + err_event.element.port_num = 1; + err_event.device = &dev->ibdev; + ib_dispatch_event(&err_event); + return 0; +} + +/* event handling via NIC driver ensures that all the NIC specific + * initialization done before RoCE driver notifies + * event to stack. + */ +static void ocrdma_event_handler(struct ocrdma_dev *dev, u32 event) +{ + switch (event) { + case BE_DEV_UP: + ocrdma_open(dev); + break; + case BE_DEV_DOWN: + ocrdma_close(dev); + break; + } +} + +static struct ocrdma_driver ocrdma_drv = { + .name = "ocrdma_driver", + .add = ocrdma_add, + .remove = ocrdma_remove, + .state_change_handler = ocrdma_event_handler, + .be_abi_version = OCRDMA_BE_ROCE_ABI_VERSION, +}; + +static void ocrdma_unregister_inet6addr_notifier(void) +{ +#if IS_ENABLED(CONFIG_IPV6) + unregister_inet6addr_notifier(&ocrdma_inet6addr_notifier); +#endif +} + +static void ocrdma_unregister_inetaddr_notifier(void) +{ + unregister_inetaddr_notifier(&ocrdma_inetaddr_notifier); +} + +static int __init ocrdma_init_module(void) +{ + int status; + + ocrdma_init_debugfs(); + + status = register_inetaddr_notifier(&ocrdma_inetaddr_notifier); + if (status) + return status; + +#if IS_ENABLED(CONFIG_IPV6) + status = register_inet6addr_notifier(&ocrdma_inet6addr_notifier); + if (status) + goto err_notifier6; +#endif + + status = be_roce_register_driver(&ocrdma_drv); + if (status) + goto err_be_reg; + + return 0; + +err_be_reg: + ocrdma_unregister_inet6addr_notifier(); +err_notifier6: + ocrdma_unregister_inetaddr_notifier(); + return status; +} + +static void __exit ocrdma_exit_module(void) +{ + be_roce_unregister_driver(&ocrdma_drv); + ocrdma_unregister_inet6addr_notifier(); + ocrdma_unregister_inetaddr_notifier(); + ocrdma_rem_debugfs(); +} + +module_init(ocrdma_init_module); +module_exit(ocrdma_exit_module); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h new file mode 100644 index 00000000000..96c9ee602ba --- /dev/null +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -0,0 +1,1953 @@ +/******************************************************************* + * This file is part of the Emulex RoCE Device Driver for * + * RoCE (RDMA over Converged Ethernet) adapters. * + * Copyright (C) 2008-2012 Emulex. All rights reserved. * + * EMULEX and SLI are trademarks of Emulex. * + * www.emulex.com * + * * + * This program is free software; you can redistribute it and/or * + * modify it under the terms of version 2 of the GNU General * + * Public License as published by the Free Software Foundation. * + * This program is distributed in the hope that it will be useful. * + * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * + * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * + * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * + * TO BE LEGALLY INVALID. See the GNU General Public License for * + * more details, a copy of which can be found in the file COPYING * + * included with this package. * + * + * Contact Information: + * linux-drivers@emulex.com + * + * Emulex + * 3333 Susan Street + * Costa Mesa, CA 92626 + *******************************************************************/ + +#ifndef __OCRDMA_SLI_H__ +#define __OCRDMA_SLI_H__ + +#define Bit(_b) (1 << (_b)) + +enum { + OCRDMA_ASIC_GEN_SKH_R = 0x04, + OCRDMA_ASIC_GEN_LANCER = 0x0B +}; + +enum { + OCRDMA_ASIC_REV_A0 = 0x00, + OCRDMA_ASIC_REV_B0 = 0x10, + OCRDMA_ASIC_REV_C0 = 0x20 +}; + +#define OCRDMA_SUBSYS_ROCE 10 +enum { + OCRDMA_CMD_QUERY_CONFIG = 1, + OCRDMA_CMD_ALLOC_PD, + OCRDMA_CMD_DEALLOC_PD, + + OCRDMA_CMD_CREATE_AH_TBL, + OCRDMA_CMD_DELETE_AH_TBL, + + OCRDMA_CMD_CREATE_QP, + OCRDMA_CMD_QUERY_QP, + OCRDMA_CMD_MODIFY_QP, + OCRDMA_CMD_DELETE_QP, + + OCRDMA_CMD_RSVD1, + OCRDMA_CMD_ALLOC_LKEY, + OCRDMA_CMD_DEALLOC_LKEY, + OCRDMA_CMD_REGISTER_NSMR, + OCRDMA_CMD_REREGISTER_NSMR, + OCRDMA_CMD_REGISTER_NSMR_CONT, + OCRDMA_CMD_QUERY_NSMR, + OCRDMA_CMD_ALLOC_MW, + OCRDMA_CMD_QUERY_MW, + + OCRDMA_CMD_CREATE_SRQ, + OCRDMA_CMD_QUERY_SRQ, + OCRDMA_CMD_MODIFY_SRQ, + OCRDMA_CMD_DELETE_SRQ, + + OCRDMA_CMD_ATTACH_MCAST, + OCRDMA_CMD_DETACH_MCAST, + OCRDMA_CMD_GET_RDMA_STATS, + + OCRDMA_CMD_MAX +}; + +#define OCRDMA_SUBSYS_COMMON 1 +enum { + OCRDMA_CMD_QUERY_NTWK_LINK_CONFIG_V1 = 5, + OCRDMA_CMD_CREATE_CQ = 12, + OCRDMA_CMD_CREATE_EQ = 13, + OCRDMA_CMD_CREATE_MQ = 21, + OCRDMA_CMD_GET_CTRL_ATTRIBUTES = 32, + OCRDMA_CMD_GET_FW_VER = 35, + OCRDMA_CMD_DELETE_MQ = 53, + OCRDMA_CMD_DELETE_CQ = 54, + OCRDMA_CMD_DELETE_EQ = 55, + OCRDMA_CMD_GET_FW_CONFIG = 58, + OCRDMA_CMD_CREATE_MQ_EXT = 90, + OCRDMA_CMD_PHY_DETAILS = 102 +}; + +enum { + QTYPE_EQ = 1, + QTYPE_CQ = 2, + QTYPE_MCCQ = 3 +}; + +#define OCRDMA_MAX_SGID (8) + +#define OCRDMA_MAX_QP 2048 +#define OCRDMA_MAX_CQ 2048 +#define OCRDMA_MAX_STAG 8192 + +enum { + OCRDMA_DB_RQ_OFFSET = 0xE0, + OCRDMA_DB_GEN2_RQ_OFFSET = 0x100, + OCRDMA_DB_SQ_OFFSET = 0x60, + OCRDMA_DB_GEN2_SQ_OFFSET = 0x1C0, + OCRDMA_DB_SRQ_OFFSET = OCRDMA_DB_RQ_OFFSET, + OCRDMA_DB_GEN2_SRQ_OFFSET = OCRDMA_DB_GEN2_RQ_OFFSET, + OCRDMA_DB_CQ_OFFSET = 0x120, + OCRDMA_DB_EQ_OFFSET = OCRDMA_DB_CQ_OFFSET, + OCRDMA_DB_MQ_OFFSET = 0x140, + + OCRDMA_DB_SQ_SHIFT = 16, + OCRDMA_DB_RQ_SHIFT = 24 +}; + +#define OCRDMA_DB_CQ_RING_ID_MASK 0x3FF /* bits 0 - 9 */ +#define OCRDMA_DB_CQ_RING_ID_EXT_MASK 0x0C00 /* bits 10-11 of qid at 12-11 */ +/* qid #2 msbits at 12-11 */ +#define OCRDMA_DB_CQ_RING_ID_EXT_MASK_SHIFT 0x1 +#define OCRDMA_DB_CQ_NUM_POPPED_SHIFT (16) /* bits 16 - 28 */ +/* Rearm bit */ +#define OCRDMA_DB_CQ_REARM_SHIFT (29) /* bit 29 */ +/* solicited bit */ +#define OCRDMA_DB_CQ_SOLICIT_SHIFT (31) /* bit 31 */ + +#define OCRDMA_EQ_ID_MASK 0x1FF /* bits 0 - 8 */ +#define OCRDMA_EQ_ID_EXT_MASK 0x3e00 /* bits 9-13 */ +#define OCRDMA_EQ_ID_EXT_MASK_SHIFT (2) /* qid bits 9-13 at 11-15 */ + +/* Clear the interrupt for this eq */ +#define OCRDMA_EQ_CLR_SHIFT (9) /* bit 9 */ +/* Must be 1 */ +#define OCRDMA_EQ_TYPE_SHIFT (10) /* bit 10 */ +/* Number of event entries processed */ +#define OCRDMA_NUM_EQE_SHIFT (16) /* bits 16 - 28 */ +/* Rearm bit */ +#define OCRDMA_REARM_SHIFT (29) /* bit 29 */ + +#define OCRDMA_MQ_ID_MASK 0x7FF /* bits 0 - 10 */ +/* Number of entries posted */ +#define OCRDMA_MQ_NUM_MQE_SHIFT (16) /* bits 16 - 29 */ + +#define OCRDMA_MIN_HPAGE_SIZE (4096) + +#define OCRDMA_MIN_Q_PAGE_SIZE (4096) +#define OCRDMA_MAX_Q_PAGES (8) + +#define OCRDMA_SLI_ASIC_ID_OFFSET 0x9C +#define OCRDMA_SLI_ASIC_REV_MASK 0x000000FF +#define OCRDMA_SLI_ASIC_GEN_NUM_MASK 0x0000FF00 +#define OCRDMA_SLI_ASIC_GEN_NUM_SHIFT 0x08 +/* +# 0: 4K Bytes +# 1: 8K Bytes +# 2: 16K Bytes +# 3: 32K Bytes +# 4: 64K Bytes +# 5: 128K Bytes +# 6: 256K Bytes +# 7: 512K Bytes +*/ +#define OCRDMA_MAX_Q_PAGE_SIZE_CNT (8) +#define OCRDMA_Q_PAGE_BASE_SIZE (OCRDMA_MIN_Q_PAGE_SIZE * OCRDMA_MAX_Q_PAGES) + +#define MAX_OCRDMA_QP_PAGES (8) +#define OCRDMA_MAX_WQE_MEM_SIZE (MAX_OCRDMA_QP_PAGES * OCRDMA_MIN_HQ_PAGE_SIZE) + +#define OCRDMA_CREATE_CQ_MAX_PAGES (4) +#define OCRDMA_DPP_CQE_SIZE (4) + +#define OCRDMA_GEN2_MAX_CQE 1024 +#define OCRDMA_GEN2_CQ_PAGE_SIZE 4096 +#define OCRDMA_GEN2_WQE_SIZE 256 +#define OCRDMA_MAX_CQE 4095 +#define OCRDMA_CQ_PAGE_SIZE 16384 +#define OCRDMA_WQE_SIZE 128 +#define OCRDMA_WQE_STRIDE 8 +#define OCRDMA_WQE_ALIGN_BYTES 16 + +#define MAX_OCRDMA_SRQ_PAGES MAX_OCRDMA_QP_PAGES + +enum { + OCRDMA_MCH_OPCODE_SHIFT = 0, + OCRDMA_MCH_OPCODE_MASK = 0xFF, + OCRDMA_MCH_SUBSYS_SHIFT = 8, + OCRDMA_MCH_SUBSYS_MASK = 0xFF00 +}; + +/* mailbox cmd header */ +struct ocrdma_mbx_hdr { + u32 subsys_op; + u32 timeout; /* in seconds */ + u32 cmd_len; + u32 rsvd_version; +}; + +enum { + OCRDMA_MBX_RSP_OPCODE_SHIFT = 0, + OCRDMA_MBX_RSP_OPCODE_MASK = 0xFF, + OCRDMA_MBX_RSP_SUBSYS_SHIFT = 8, + OCRDMA_MBX_RSP_SUBSYS_MASK = 0xFF << OCRDMA_MBX_RSP_SUBSYS_SHIFT, + + OCRDMA_MBX_RSP_STATUS_SHIFT = 0, + OCRDMA_MBX_RSP_STATUS_MASK = 0xFF, + OCRDMA_MBX_RSP_ASTATUS_SHIFT = 8, + OCRDMA_MBX_RSP_ASTATUS_MASK = 0xFF << OCRDMA_MBX_RSP_ASTATUS_SHIFT +}; + +/* mailbox cmd response */ +struct ocrdma_mbx_rsp { + u32 subsys_op; + u32 status; + u32 rsp_len; + u32 add_rsp_len; +}; + +enum { + OCRDMA_MQE_EMBEDDED = 1, + OCRDMA_MQE_NONEMBEDDED = 0 +}; + +struct ocrdma_mqe_sge { + u32 pa_lo; + u32 pa_hi; + u32 len; +}; + +enum { + OCRDMA_MQE_HDR_EMB_SHIFT = 0, + OCRDMA_MQE_HDR_EMB_MASK = Bit(0), + OCRDMA_MQE_HDR_SGE_CNT_SHIFT = 3, + OCRDMA_MQE_HDR_SGE_CNT_MASK = 0x1F << OCRDMA_MQE_HDR_SGE_CNT_SHIFT, + OCRDMA_MQE_HDR_SPECIAL_SHIFT = 24, + OCRDMA_MQE_HDR_SPECIAL_MASK = 0xFF << OCRDMA_MQE_HDR_SPECIAL_SHIFT +}; + +struct ocrdma_mqe_hdr { + u32 spcl_sge_cnt_emb; + u32 pyld_len; + u32 tag_lo; + u32 tag_hi; + u32 rsvd3; +}; + +struct ocrdma_mqe_emb_cmd { + struct ocrdma_mbx_hdr mch; + u8 pyld[220]; +}; + +struct ocrdma_mqe { + struct ocrdma_mqe_hdr hdr; + union { + struct ocrdma_mqe_emb_cmd emb_req; + struct { + struct ocrdma_mqe_sge sge[19]; + } nonemb_req; + u8 cmd[236]; + struct ocrdma_mbx_rsp rsp; + } u; +}; + +#define OCRDMA_EQ_LEN 4096 +#define OCRDMA_MQ_CQ_LEN 256 +#define OCRDMA_MQ_LEN 128 + +#define PAGE_SHIFT_4K 12 +#define PAGE_SIZE_4K (1 << PAGE_SHIFT_4K) + +/* Returns number of pages spanned by the data starting at the given addr */ +#define PAGES_4K_SPANNED(_address, size) \ + ((u32)((((size_t)(_address) & (PAGE_SIZE_4K - 1)) + \ + (size) + (PAGE_SIZE_4K - 1)) >> PAGE_SHIFT_4K)) + +struct ocrdma_delete_q_req { + struct ocrdma_mbx_hdr req; + u32 id; +}; + +struct ocrdma_pa { + u32 lo; + u32 hi; +}; + +#define MAX_OCRDMA_EQ_PAGES (8) +struct ocrdma_create_eq_req { + struct ocrdma_mbx_hdr req; + u32 num_pages; + u32 valid; + u32 cnt; + u32 delay; + u32 rsvd; + struct ocrdma_pa pa[MAX_OCRDMA_EQ_PAGES]; +}; + +enum { + OCRDMA_CREATE_EQ_VALID = Bit(29), + OCRDMA_CREATE_EQ_CNT_SHIFT = 26, + OCRDMA_CREATE_CQ_DELAY_SHIFT = 13, +}; + +struct ocrdma_create_eq_rsp { + struct ocrdma_mbx_rsp rsp; + u32 vector_eqid; +}; + +#define OCRDMA_EQ_MINOR_OTHER (0x1) + +enum { + OCRDMA_MCQE_STATUS_SHIFT = 0, + OCRDMA_MCQE_STATUS_MASK = 0xFFFF, + OCRDMA_MCQE_ESTATUS_SHIFT = 16, + OCRDMA_MCQE_ESTATUS_MASK = 0xFFFF << OCRDMA_MCQE_ESTATUS_SHIFT, + OCRDMA_MCQE_CONS_SHIFT = 27, + OCRDMA_MCQE_CONS_MASK = Bit(27), + OCRDMA_MCQE_CMPL_SHIFT = 28, + OCRDMA_MCQE_CMPL_MASK = Bit(28), + OCRDMA_MCQE_AE_SHIFT = 30, + OCRDMA_MCQE_AE_MASK = Bit(30), + OCRDMA_MCQE_VALID_SHIFT = 31, + OCRDMA_MCQE_VALID_MASK = Bit(31) +}; + +struct ocrdma_mcqe { + u32 status; + u32 tag_lo; + u32 tag_hi; + u32 valid_ae_cmpl_cons; +}; + +enum { + OCRDMA_AE_MCQE_QPVALID = Bit(31), + OCRDMA_AE_MCQE_QPID_MASK = 0xFFFF, + + OCRDMA_AE_MCQE_CQVALID = Bit(31), + OCRDMA_AE_MCQE_CQID_MASK = 0xFFFF, + OCRDMA_AE_MCQE_VALID = Bit(31), + OCRDMA_AE_MCQE_AE = Bit(30), + OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT = 16, + OCRDMA_AE_MCQE_EVENT_TYPE_MASK = + 0xFF << OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT, + OCRDMA_AE_MCQE_EVENT_CODE_SHIFT = 8, + OCRDMA_AE_MCQE_EVENT_CODE_MASK = + 0xFF << OCRDMA_AE_MCQE_EVENT_CODE_SHIFT +}; +struct ocrdma_ae_mcqe { + u32 qpvalid_qpid; + u32 cqvalid_cqid; + u32 evt_tag; + u32 valid_ae_event; +}; + +enum { + OCRDMA_AE_PVID_MCQE_ENABLED_SHIFT = 0, + OCRDMA_AE_PVID_MCQE_ENABLED_MASK = 0xFF, + OCRDMA_AE_PVID_MCQE_TAG_SHIFT = 16, + OCRDMA_AE_PVID_MCQE_TAG_MASK = 0xFFFF << OCRDMA_AE_PVID_MCQE_TAG_SHIFT +}; + +struct ocrdma_ae_pvid_mcqe { + u32 tag_enabled; + u32 event_tag; + u32 rsvd1; + u32 rsvd2; +}; + +enum { + OCRDMA_AE_MPA_MCQE_REQ_ID_SHIFT = 16, + OCRDMA_AE_MPA_MCQE_REQ_ID_MASK = 0xFFFF << + OCRDMA_AE_MPA_MCQE_REQ_ID_SHIFT, + + OCRDMA_AE_MPA_MCQE_EVENT_CODE_SHIFT = 8, + OCRDMA_AE_MPA_MCQE_EVENT_CODE_MASK = 0xFF << + OCRDMA_AE_MPA_MCQE_EVENT_CODE_SHIFT, + OCRDMA_AE_MPA_MCQE_EVENT_TYPE_SHIFT = 16, + OCRDMA_AE_MPA_MCQE_EVENT_TYPE_MASK = 0xFF << + OCRDMA_AE_MPA_MCQE_EVENT_TYPE_SHIFT, + OCRDMA_AE_MPA_MCQE_EVENT_AE_SHIFT = 30, + OCRDMA_AE_MPA_MCQE_EVENT_AE_MASK = Bit(30), + OCRDMA_AE_MPA_MCQE_EVENT_VALID_SHIFT = 31, + OCRDMA_AE_MPA_MCQE_EVENT_VALID_MASK = Bit(31) +}; + +struct ocrdma_ae_mpa_mcqe { + u32 req_id; + u32 w1; + u32 w2; + u32 valid_ae_event; +}; + +enum { + OCRDMA_AE_QP_MCQE_NEW_QP_STATE_SHIFT = 0, + OCRDMA_AE_QP_MCQE_NEW_QP_STATE_MASK = 0xFFFF, + OCRDMA_AE_QP_MCQE_QP_ID_SHIFT = 16, + OCRDMA_AE_QP_MCQE_QP_ID_MASK = 0xFFFF << + OCRDMA_AE_QP_MCQE_QP_ID_SHIFT, + + OCRDMA_AE_QP_MCQE_EVENT_CODE_SHIFT = 8, + OCRDMA_AE_QP_MCQE_EVENT_CODE_MASK = 0xFF << + OCRDMA_AE_QP_MCQE_EVENT_CODE_SHIFT, + OCRDMA_AE_QP_MCQE_EVENT_TYPE_SHIFT = 16, + OCRDMA_AE_QP_MCQE_EVENT_TYPE_MASK = 0xFF << + OCRDMA_AE_QP_MCQE_EVENT_TYPE_SHIFT, + OCRDMA_AE_QP_MCQE_EVENT_AE_SHIFT = 30, + OCRDMA_AE_QP_MCQE_EVENT_AE_MASK = Bit(30), + OCRDMA_AE_QP_MCQE_EVENT_VALID_SHIFT = 31, + OCRDMA_AE_QP_MCQE_EVENT_VALID_MASK = Bit(31) +}; + +struct ocrdma_ae_qp_mcqe { + u32 qp_id_state; + u32 w1; + u32 w2; + u32 valid_ae_event; +}; + +#define OCRDMA_ASYNC_RDMA_EVE_CODE 0x14 +#define OCRDMA_ASYNC_GRP5_EVE_CODE 0x5 +#define OCRDMA_ASYNC_EVENT_PVID_STATE 0x3 + +enum OCRDMA_ASYNC_EVENT_TYPE { + OCRDMA_CQ_ERROR = 0x00, + OCRDMA_CQ_OVERRUN_ERROR = 0x01, + OCRDMA_CQ_QPCAT_ERROR = 0x02, + OCRDMA_QP_ACCESS_ERROR = 0x03, + OCRDMA_QP_COMM_EST_EVENT = 0x04, + OCRDMA_SQ_DRAINED_EVENT = 0x05, + OCRDMA_DEVICE_FATAL_EVENT = 0x08, + OCRDMA_SRQCAT_ERROR = 0x0E, + OCRDMA_SRQ_LIMIT_EVENT = 0x0F, + OCRDMA_QP_LAST_WQE_EVENT = 0x10 +}; + +/* mailbox command request and responses */ +enum { + OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_SHIFT = 2, + OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_MASK = Bit(2), + OCRDMA_MBX_QUERY_CFG_SRQ_SUPPORTED_SHIFT = 3, + OCRDMA_MBX_QUERY_CFG_SRQ_SUPPORTED_MASK = Bit(3), + OCRDMA_MBX_QUERY_CFG_MAX_QP_SHIFT = 8, + OCRDMA_MBX_QUERY_CFG_MAX_QP_MASK = 0xFFFFFF << + OCRDMA_MBX_QUERY_CFG_MAX_QP_SHIFT, + + OCRDMA_MBX_QUERY_CFG_MAX_PD_SHIFT = 16, + OCRDMA_MBX_QUERY_CFG_MAX_PD_MASK = 0xFFFF << + OCRDMA_MBX_QUERY_CFG_MAX_PD_SHIFT, + OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT = 8, + OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_MASK = 0xFF << + OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT, + + OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT = 0, + OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK = 0xFFFF, + OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT = 16, + OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_MASK = 0xFFFF << + OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT, + + OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT = 0, + OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK = 0xFFFF, + OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_SHIFT = 16, + OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_MASK = 0xFFFF << + OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_SHIFT, + + OCRDMA_MBX_QUERY_CFG_MAX_WQE_SIZE_OFFSET = 24, + OCRDMA_MBX_QUERY_CFG_MAX_WQE_SIZE_MASK = 0xFF << + OCRDMA_MBX_QUERY_CFG_MAX_WQE_SIZE_OFFSET, + OCRDMA_MBX_QUERY_CFG_MAX_RQE_SIZE_OFFSET = 16, + OCRDMA_MBX_QUERY_CFG_MAX_RQE_SIZE_MASK = 0xFF << + OCRDMA_MBX_QUERY_CFG_MAX_RQE_SIZE_OFFSET, + OCRDMA_MBX_QUERY_CFG_MAX_DPP_CQES_OFFSET = 0, + OCRDMA_MBX_QUERY_CFG_MAX_DPP_CQES_MASK = 0xFFFF << + OCRDMA_MBX_QUERY_CFG_MAX_DPP_CQES_OFFSET, + + OCRDMA_MBX_QUERY_CFG_MAX_SRQ_OFFSET = 16, + OCRDMA_MBX_QUERY_CFG_MAX_SRQ_MASK = 0xFFFF << + OCRDMA_MBX_QUERY_CFG_MAX_SRQ_OFFSET, + OCRDMA_MBX_QUERY_CFG_MAX_RPIR_QPS_OFFSET = 0, + OCRDMA_MBX_QUERY_CFG_MAX_RPIR_QPS_MASK = 0xFFFF << + OCRDMA_MBX_QUERY_CFG_MAX_RPIR_QPS_OFFSET, + + OCRDMA_MBX_QUERY_CFG_MAX_DPP_PDS_OFFSET = 16, + OCRDMA_MBX_QUERY_CFG_MAX_DPP_PDS_MASK = 0xFFFF << + OCRDMA_MBX_QUERY_CFG_MAX_DPP_PDS_OFFSET, + OCRDMA_MBX_QUERY_CFG_MAX_DPP_CREDITS_OFFSET = 0, + OCRDMA_MBX_QUERY_CFG_MAX_DPP_CREDITS_MASK = 0xFFFF << + OCRDMA_MBX_QUERY_CFG_MAX_DPP_CREDITS_OFFSET, + + OCRDMA_MBX_QUERY_CFG_MAX_DPP_QPS_OFFSET = 0, + OCRDMA_MBX_QUERY_CFG_MAX_DPP_QPS_MASK = 0xFFFF << + OCRDMA_MBX_QUERY_CFG_MAX_DPP_QPS_OFFSET, + + OCRDMA_MBX_QUERY_CFG_MAX_WQES_PER_WQ_OFFSET = 16, + OCRDMA_MBX_QUERY_CFG_MAX_WQES_PER_WQ_MASK = 0xFFFF << + OCRDMA_MBX_QUERY_CFG_MAX_WQES_PER_WQ_OFFSET, + OCRDMA_MBX_QUERY_CFG_MAX_RQES_PER_RQ_OFFSET = 0, + OCRDMA_MBX_QUERY_CFG_MAX_RQES_PER_RQ_MASK = 0xFFFF << + OCRDMA_MBX_QUERY_CFG_MAX_RQES_PER_RQ_OFFSET, + + OCRDMA_MBX_QUERY_CFG_MAX_CQ_OFFSET = 16, + OCRDMA_MBX_QUERY_CFG_MAX_CQ_MASK = 0xFFFF << + OCRDMA_MBX_QUERY_CFG_MAX_CQ_OFFSET, + OCRDMA_MBX_QUERY_CFG_MAX_CQES_PER_CQ_OFFSET = 0, + OCRDMA_MBX_QUERY_CFG_MAX_CQES_PER_CQ_MASK = 0xFFFF << + OCRDMA_MBX_QUERY_CFG_MAX_CQES_PER_CQ_OFFSET, + + OCRDMA_MBX_QUERY_CFG_MAX_SRQ_RQE_OFFSET = 16, + OCRDMA_MBX_QUERY_CFG_MAX_SRQ_RQE_MASK = 0xFFFF << + OCRDMA_MBX_QUERY_CFG_MAX_SRQ_RQE_OFFSET, + OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET = 0, + OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_MASK = 0xFFFF << + OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET, +}; + +struct ocrdma_mbx_query_config { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_rsp rsp; + u32 qp_srq_cq_ird_ord; + u32 max_pd_ca_ack_delay; + u32 max_write_send_sge; + u32 max_ird_ord_per_qp; + u32 max_shared_ird_ord; + u32 max_mr; + u32 max_mr_size_lo; + u32 max_mr_size_hi; + u32 max_num_mr_pbl; + u32 max_mw; + u32 max_fmr; + u32 max_pages_per_frmr; + u32 max_mcast_group; + u32 max_mcast_qp_attach; + u32 max_total_mcast_qp_attach; + u32 wqe_rqe_stride_max_dpp_cqs; + u32 max_srq_rpir_qps; + u32 max_dpp_pds_credits; + u32 max_dpp_credits_pds_per_pd; + u32 max_wqes_rqes_per_q; + u32 max_cq_cqes_per_cq; + u32 max_srq_rqe_sge; +}; + +struct ocrdma_fw_ver_rsp { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_rsp rsp; + + u8 running_ver[32]; +}; + +struct ocrdma_fw_conf_rsp { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_rsp rsp; + + u32 config_num; + u32 asic_revision; + u32 phy_port; + u32 fn_mode; + struct { + u32 mode; + u32 nic_wqid_base; + u32 nic_wq_tot; + u32 prot_wqid_base; + u32 prot_wq_tot; + u32 prot_rqid_base; + u32 prot_rqid_tot; + u32 rsvd[6]; + } ulp[2]; + u32 fn_capabilities; + u32 rsvd1; + u32 rsvd2; + u32 base_eqid; + u32 max_eq; + +}; + +enum { + OCRDMA_FN_MODE_RDMA = 0x4 +}; + +struct ocrdma_get_phy_info_rsp { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_rsp rsp; + + u16 phy_type; + u16 interface_type; + u32 misc_params; + u16 ext_phy_details; + u16 rsvd; + u16 auto_speeds_supported; + u16 fixed_speeds_supported; + u32 future_use[2]; +}; + +enum { + OCRDMA_PHY_SPEED_ZERO = 0x0, + OCRDMA_PHY_SPEED_10MBPS = 0x1, + OCRDMA_PHY_SPEED_100MBPS = 0x2, + OCRDMA_PHY_SPEED_1GBPS = 0x4, + OCRDMA_PHY_SPEED_10GBPS = 0x8, + OCRDMA_PHY_SPEED_40GBPS = 0x20 +}; + + +struct ocrdma_get_link_speed_rsp { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_rsp rsp; + + u8 pt_port_num; + u8 link_duplex; + u8 phys_port_speed; + u8 phys_port_fault; + u16 rsvd1; + u16 qos_lnk_speed; + u8 logical_lnk_status; + u8 rsvd2[3]; +}; + +enum { + OCRDMA_PHYS_LINK_SPEED_ZERO = 0x0, + OCRDMA_PHYS_LINK_SPEED_10MBPS = 0x1, + OCRDMA_PHYS_LINK_SPEED_100MBPS = 0x2, + OCRDMA_PHYS_LINK_SPEED_1GBPS = 0x3, + OCRDMA_PHYS_LINK_SPEED_10GBPS = 0x4, + OCRDMA_PHYS_LINK_SPEED_20GBPS = 0x5, + OCRDMA_PHYS_LINK_SPEED_25GBPS = 0x6, + OCRDMA_PHYS_LINK_SPEED_40GBPS = 0x7, + OCRDMA_PHYS_LINK_SPEED_100GBPS = 0x8 +}; + +enum { + OCRDMA_CREATE_CQ_VER2 = 2, + OCRDMA_CREATE_CQ_VER3 = 3, + + OCRDMA_CREATE_CQ_PAGE_CNT_MASK = 0xFFFF, + OCRDMA_CREATE_CQ_PAGE_SIZE_SHIFT = 16, + OCRDMA_CREATE_CQ_PAGE_SIZE_MASK = 0xFF, + + OCRDMA_CREATE_CQ_COALESCWM_SHIFT = 12, + OCRDMA_CREATE_CQ_COALESCWM_MASK = Bit(13) | Bit(12), + OCRDMA_CREATE_CQ_FLAGS_NODELAY = Bit(14), + OCRDMA_CREATE_CQ_FLAGS_AUTO_VALID = Bit(15), + + OCRDMA_CREATE_CQ_EQ_ID_MASK = 0xFFFF, + OCRDMA_CREATE_CQ_CQE_COUNT_MASK = 0xFFFF +}; + +enum { + OCRDMA_CREATE_CQ_VER0 = 0, + OCRDMA_CREATE_CQ_DPP = 1, + OCRDMA_CREATE_CQ_TYPE_SHIFT = 24, + OCRDMA_CREATE_CQ_EQID_SHIFT = 22, + + OCRDMA_CREATE_CQ_CNT_SHIFT = 27, + OCRDMA_CREATE_CQ_FLAGS_VALID = Bit(29), + OCRDMA_CREATE_CQ_FLAGS_EVENTABLE = Bit(31), + OCRDMA_CREATE_CQ_DEF_FLAGS = OCRDMA_CREATE_CQ_FLAGS_VALID | + OCRDMA_CREATE_CQ_FLAGS_EVENTABLE | + OCRDMA_CREATE_CQ_FLAGS_NODELAY +}; + +struct ocrdma_create_cq_cmd { + struct ocrdma_mbx_hdr req; + u32 pgsz_pgcnt; + u32 ev_cnt_flags; + u32 eqn; + u16 cqe_count; + u16 pd_id; + u32 rsvd6; + struct ocrdma_pa pa[OCRDMA_CREATE_CQ_MAX_PAGES]; +}; + +struct ocrdma_create_cq { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_create_cq_cmd cmd; +}; + +enum { + OCRDMA_CREATE_CQ_RSP_CQ_ID_MASK = 0xFFFF +}; + +struct ocrdma_create_cq_cmd_rsp { + struct ocrdma_mbx_rsp rsp; + u32 cq_id; +}; + +struct ocrdma_create_cq_rsp { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_create_cq_cmd_rsp rsp; +}; + +enum { + OCRDMA_CREATE_MQ_V0_CQ_ID_SHIFT = 22, + OCRDMA_CREATE_MQ_CQ_ID_SHIFT = 16, + OCRDMA_CREATE_MQ_RING_SIZE_SHIFT = 16, + OCRDMA_CREATE_MQ_VALID = Bit(31), + OCRDMA_CREATE_MQ_ASYNC_CQ_VALID = Bit(0) +}; + +struct ocrdma_create_mq_req { + struct ocrdma_mbx_hdr req; + u32 cqid_pages; + u32 async_event_bitmap; + u32 async_cqid_ringsize; + u32 valid; + u32 async_cqid_valid; + u32 rsvd; + struct ocrdma_pa pa[8]; +}; + +struct ocrdma_create_mq_rsp { + struct ocrdma_mbx_rsp rsp; + u32 id; +}; + +enum { + OCRDMA_DESTROY_CQ_QID_SHIFT = 0, + OCRDMA_DESTROY_CQ_QID_MASK = 0xFFFF, + OCRDMA_DESTROY_CQ_QID_BYPASS_FLUSH_SHIFT = 16, + OCRDMA_DESTROY_CQ_QID_BYPASS_FLUSH_MASK = 0xFFFF << + OCRDMA_DESTROY_CQ_QID_BYPASS_FLUSH_SHIFT +}; + +struct ocrdma_destroy_cq { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_hdr req; + + u32 bypass_flush_qid; +}; + +struct ocrdma_destroy_cq_rsp { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_rsp rsp; +}; + +enum { + OCRDMA_QPT_GSI = 1, + OCRDMA_QPT_RC = 2, + OCRDMA_QPT_UD = 4, +}; + +enum { + OCRDMA_CREATE_QP_REQ_PD_ID_SHIFT = 0, + OCRDMA_CREATE_QP_REQ_PD_ID_MASK = 0xFFFF, + OCRDMA_CREATE_QP_REQ_SQ_PAGE_SIZE_SHIFT = 16, + OCRDMA_CREATE_QP_REQ_RQ_PAGE_SIZE_SHIFT = 19, + OCRDMA_CREATE_QP_REQ_QPT_SHIFT = 29, + OCRDMA_CREATE_QP_REQ_QPT_MASK = Bit(31) | Bit(30) | Bit(29), + + OCRDMA_CREATE_QP_REQ_MAX_RQE_SHIFT = 0, + OCRDMA_CREATE_QP_REQ_MAX_RQE_MASK = 0xFFFF, + OCRDMA_CREATE_QP_REQ_MAX_WQE_SHIFT = 16, + OCRDMA_CREATE_QP_REQ_MAX_WQE_MASK = 0xFFFF << + OCRDMA_CREATE_QP_REQ_MAX_WQE_SHIFT, + + OCRDMA_CREATE_QP_REQ_MAX_SGE_WRITE_SHIFT = 0, + OCRDMA_CREATE_QP_REQ_MAX_SGE_WRITE_MASK = 0xFFFF, + OCRDMA_CREATE_QP_REQ_MAX_SGE_SEND_SHIFT = 16, + OCRDMA_CREATE_QP_REQ_MAX_SGE_SEND_MASK = 0xFFFF << + OCRDMA_CREATE_QP_REQ_MAX_SGE_SEND_SHIFT, + + OCRDMA_CREATE_QP_REQ_FMR_EN_SHIFT = 0, + OCRDMA_CREATE_QP_REQ_FMR_EN_MASK = Bit(0), + OCRDMA_CREATE_QP_REQ_ZERO_LKEYEN_SHIFT = 1, + OCRDMA_CREATE_QP_REQ_ZERO_LKEYEN_MASK = Bit(1), + OCRDMA_CREATE_QP_REQ_BIND_MEMWIN_SHIFT = 2, + OCRDMA_CREATE_QP_REQ_BIND_MEMWIN_MASK = Bit(2), + OCRDMA_CREATE_QP_REQ_INB_WREN_SHIFT = 3, + OCRDMA_CREATE_QP_REQ_INB_WREN_MASK = Bit(3), + OCRDMA_CREATE_QP_REQ_INB_RDEN_SHIFT = 4, + OCRDMA_CREATE_QP_REQ_INB_RDEN_MASK = Bit(4), + OCRDMA_CREATE_QP_REQ_USE_SRQ_SHIFT = 5, + OCRDMA_CREATE_QP_REQ_USE_SRQ_MASK = Bit(5), + OCRDMA_CREATE_QP_REQ_ENABLE_RPIR_SHIFT = 6, + OCRDMA_CREATE_QP_REQ_ENABLE_RPIR_MASK = Bit(6), + OCRDMA_CREATE_QP_REQ_ENABLE_DPP_SHIFT = 7, + OCRDMA_CREATE_QP_REQ_ENABLE_DPP_MASK = Bit(7), + OCRDMA_CREATE_QP_REQ_ENABLE_DPP_CQ_SHIFT = 8, + OCRDMA_CREATE_QP_REQ_ENABLE_DPP_CQ_MASK = Bit(8), + OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_SHIFT = 16, + OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_MASK = 0xFFFF << + OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_SHIFT, + + OCRDMA_CREATE_QP_REQ_MAX_IRD_SHIFT = 0, + OCRDMA_CREATE_QP_REQ_MAX_IRD_MASK = 0xFFFF, + OCRDMA_CREATE_QP_REQ_MAX_ORD_SHIFT = 16, + OCRDMA_CREATE_QP_REQ_MAX_ORD_MASK = 0xFFFF << + OCRDMA_CREATE_QP_REQ_MAX_ORD_SHIFT, + + OCRDMA_CREATE_QP_REQ_NUM_RQ_PAGES_SHIFT = 0, + OCRDMA_CREATE_QP_REQ_NUM_RQ_PAGES_MASK = 0xFFFF, + OCRDMA_CREATE_QP_REQ_NUM_WQ_PAGES_SHIFT = 16, + OCRDMA_CREATE_QP_REQ_NUM_WQ_PAGES_MASK = 0xFFFF << + OCRDMA_CREATE_QP_REQ_NUM_WQ_PAGES_SHIFT, + + OCRDMA_CREATE_QP_REQ_RQE_SIZE_SHIFT = 0, + OCRDMA_CREATE_QP_REQ_RQE_SIZE_MASK = 0xFFFF, + OCRDMA_CREATE_QP_REQ_WQE_SIZE_SHIFT = 16, + OCRDMA_CREATE_QP_REQ_WQE_SIZE_MASK = 0xFFFF << + OCRDMA_CREATE_QP_REQ_WQE_SIZE_SHIFT, + + OCRDMA_CREATE_QP_REQ_RQ_CQID_SHIFT = 0, + OCRDMA_CREATE_QP_REQ_RQ_CQID_MASK = 0xFFFF, + OCRDMA_CREATE_QP_REQ_WQ_CQID_SHIFT = 16, + OCRDMA_CREATE_QP_REQ_WQ_CQID_MASK = 0xFFFF << + OCRDMA_CREATE_QP_REQ_WQ_CQID_SHIFT, + + OCRDMA_CREATE_QP_REQ_DPP_CQPID_SHIFT = 0, + OCRDMA_CREATE_QP_REQ_DPP_CQPID_MASK = 0xFFFF, + OCRDMA_CREATE_QP_REQ_DPP_CREDIT_SHIFT = 16, + OCRDMA_CREATE_QP_REQ_DPP_CREDIT_MASK = 0xFFFF << + OCRDMA_CREATE_QP_REQ_DPP_CREDIT_SHIFT +}; + +enum { + OCRDMA_CREATE_QP_REQ_DPP_CREDIT_LIMIT = 16, + OCRDMA_CREATE_QP_RSP_DPP_PAGE_SHIFT = 1 +}; + +#define MAX_OCRDMA_IRD_PAGES 4 + +enum ocrdma_qp_flags { + OCRDMA_QP_MW_BIND = 1, + OCRDMA_QP_LKEY0 = (1 << 1), + OCRDMA_QP_FAST_REG = (1 << 2), + OCRDMA_QP_INB_RD = (1 << 6), + OCRDMA_QP_INB_WR = (1 << 7), +}; + +enum ocrdma_qp_state { + OCRDMA_QPS_RST = 0, + OCRDMA_QPS_INIT = 1, + OCRDMA_QPS_RTR = 2, + OCRDMA_QPS_RTS = 3, + OCRDMA_QPS_SQE = 4, + OCRDMA_QPS_SQ_DRAINING = 5, + OCRDMA_QPS_ERR = 6, + OCRDMA_QPS_SQD = 7 +}; + +struct ocrdma_create_qp_req { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_hdr req; + + u32 type_pgsz_pdn; + u32 max_wqe_rqe; + u32 max_sge_send_write; + u32 max_sge_recv_flags; + u32 max_ord_ird; + u32 num_wq_rq_pages; + u32 wqe_rqe_size; + u32 wq_rq_cqid; + struct ocrdma_pa wq_addr[MAX_OCRDMA_QP_PAGES]; + struct ocrdma_pa rq_addr[MAX_OCRDMA_QP_PAGES]; + u32 dpp_credits_cqid; + u32 rpir_lkey; + struct ocrdma_pa ird_addr[MAX_OCRDMA_IRD_PAGES]; +}; + +enum { + OCRDMA_CREATE_QP_RSP_QP_ID_SHIFT = 0, + OCRDMA_CREATE_QP_RSP_QP_ID_MASK = 0xFFFF, + + OCRDMA_CREATE_QP_RSP_MAX_RQE_SHIFT = 0, + OCRDMA_CREATE_QP_RSP_MAX_RQE_MASK = 0xFFFF, + OCRDMA_CREATE_QP_RSP_MAX_WQE_SHIFT = 16, + OCRDMA_CREATE_QP_RSP_MAX_WQE_MASK = 0xFFFF << + OCRDMA_CREATE_QP_RSP_MAX_WQE_SHIFT, + + OCRDMA_CREATE_QP_RSP_MAX_SGE_WRITE_SHIFT = 0, + OCRDMA_CREATE_QP_RSP_MAX_SGE_WRITE_MASK = 0xFFFF, + OCRDMA_CREATE_QP_RSP_MAX_SGE_SEND_SHIFT = 16, + OCRDMA_CREATE_QP_RSP_MAX_SGE_SEND_MASK = 0xFFFF << + OCRDMA_CREATE_QP_RSP_MAX_SGE_SEND_SHIFT, + + OCRDMA_CREATE_QP_RSP_MAX_SGE_RECV_SHIFT = 16, + OCRDMA_CREATE_QP_RSP_MAX_SGE_RECV_MASK = 0xFFFF << + OCRDMA_CREATE_QP_RSP_MAX_SGE_RECV_SHIFT, + + OCRDMA_CREATE_QP_RSP_MAX_IRD_SHIFT = 0, + OCRDMA_CREATE_QP_RSP_MAX_IRD_MASK = 0xFFFF, + OCRDMA_CREATE_QP_RSP_MAX_ORD_SHIFT = 16, + OCRDMA_CREATE_QP_RSP_MAX_ORD_MASK = 0xFFFF << + OCRDMA_CREATE_QP_RSP_MAX_ORD_SHIFT, + + OCRDMA_CREATE_QP_RSP_RQ_ID_SHIFT = 0, + OCRDMA_CREATE_QP_RSP_RQ_ID_MASK = 0xFFFF, + OCRDMA_CREATE_QP_RSP_SQ_ID_SHIFT = 16, + OCRDMA_CREATE_QP_RSP_SQ_ID_MASK = 0xFFFF << + OCRDMA_CREATE_QP_RSP_SQ_ID_SHIFT, + + OCRDMA_CREATE_QP_RSP_DPP_ENABLED_MASK = Bit(0), + OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_SHIFT = 1, + OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_MASK = 0x7FFF << + OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_SHIFT, + OCRDMA_CREATE_QP_RSP_DPP_CREDITS_SHIFT = 16, + OCRDMA_CREATE_QP_RSP_DPP_CREDITS_MASK = 0xFFFF << + OCRDMA_CREATE_QP_RSP_DPP_CREDITS_SHIFT, +}; + +struct ocrdma_create_qp_rsp { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_rsp rsp; + + u32 qp_id; + u32 max_wqe_rqe; + u32 max_sge_send_write; + u32 max_sge_recv; + u32 max_ord_ird; + u32 sq_rq_id; + u32 dpp_response; +}; + +struct ocrdma_destroy_qp { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_hdr req; + u32 qp_id; +}; + +struct ocrdma_destroy_qp_rsp { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_rsp rsp; +}; + +enum { + OCRDMA_MODIFY_QP_ID_SHIFT = 0, + OCRDMA_MODIFY_QP_ID_MASK = 0xFFFF, + + OCRDMA_QP_PARA_QPS_VALID = Bit(0), + OCRDMA_QP_PARA_SQD_ASYNC_VALID = Bit(1), + OCRDMA_QP_PARA_PKEY_VALID = Bit(2), + OCRDMA_QP_PARA_QKEY_VALID = Bit(3), + OCRDMA_QP_PARA_PMTU_VALID = Bit(4), + OCRDMA_QP_PARA_ACK_TO_VALID = Bit(5), + OCRDMA_QP_PARA_RETRY_CNT_VALID = Bit(6), + OCRDMA_QP_PARA_RRC_VALID = Bit(7), + OCRDMA_QP_PARA_RQPSN_VALID = Bit(8), + OCRDMA_QP_PARA_MAX_IRD_VALID = Bit(9), + OCRDMA_QP_PARA_MAX_ORD_VALID = Bit(10), + OCRDMA_QP_PARA_RNT_VALID = Bit(11), + OCRDMA_QP_PARA_SQPSN_VALID = Bit(12), + OCRDMA_QP_PARA_DST_QPN_VALID = Bit(13), + OCRDMA_QP_PARA_MAX_WQE_VALID = Bit(14), + OCRDMA_QP_PARA_MAX_RQE_VALID = Bit(15), + OCRDMA_QP_PARA_SGE_SEND_VALID = Bit(16), + OCRDMA_QP_PARA_SGE_RECV_VALID = Bit(17), + OCRDMA_QP_PARA_SGE_WR_VALID = Bit(18), + OCRDMA_QP_PARA_INB_RDEN_VALID = Bit(19), + OCRDMA_QP_PARA_INB_WREN_VALID = Bit(20), + OCRDMA_QP_PARA_FLOW_LBL_VALID = Bit(21), + OCRDMA_QP_PARA_BIND_EN_VALID = Bit(22), + OCRDMA_QP_PARA_ZLKEY_EN_VALID = Bit(23), + OCRDMA_QP_PARA_FMR_EN_VALID = Bit(24), + OCRDMA_QP_PARA_INBAT_EN_VALID = Bit(25), + OCRDMA_QP_PARA_VLAN_EN_VALID = Bit(26), + + OCRDMA_MODIFY_QP_FLAGS_RD = Bit(0), + OCRDMA_MODIFY_QP_FLAGS_WR = Bit(1), + OCRDMA_MODIFY_QP_FLAGS_SEND = Bit(2), + OCRDMA_MODIFY_QP_FLAGS_ATOMIC = Bit(3) +}; + +enum { + OCRDMA_QP_PARAMS_SRQ_ID_SHIFT = 0, + OCRDMA_QP_PARAMS_SRQ_ID_MASK = 0xFFFF, + + OCRDMA_QP_PARAMS_MAX_RQE_SHIFT = 0, + OCRDMA_QP_PARAMS_MAX_RQE_MASK = 0xFFFF, + OCRDMA_QP_PARAMS_MAX_WQE_SHIFT = 16, + OCRDMA_QP_PARAMS_MAX_WQE_MASK = 0xFFFF << + OCRDMA_QP_PARAMS_MAX_WQE_SHIFT, + + OCRDMA_QP_PARAMS_MAX_SGE_WRITE_SHIFT = 0, + OCRDMA_QP_PARAMS_MAX_SGE_WRITE_MASK = 0xFFFF, + OCRDMA_QP_PARAMS_MAX_SGE_SEND_SHIFT = 16, + OCRDMA_QP_PARAMS_MAX_SGE_SEND_MASK = 0xFFFF << + OCRDMA_QP_PARAMS_MAX_SGE_SEND_SHIFT, + + OCRDMA_QP_PARAMS_FLAGS_FMR_EN = Bit(0), + OCRDMA_QP_PARAMS_FLAGS_LKEY_0_EN = Bit(1), + OCRDMA_QP_PARAMS_FLAGS_BIND_MW_EN = Bit(2), + OCRDMA_QP_PARAMS_FLAGS_INBWR_EN = Bit(3), + OCRDMA_QP_PARAMS_FLAGS_INBRD_EN = Bit(4), + OCRDMA_QP_PARAMS_STATE_SHIFT = 5, + OCRDMA_QP_PARAMS_STATE_MASK = Bit(5) | Bit(6) | Bit(7), + OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC = Bit(8), + OCRDMA_QP_PARAMS_FLAGS_INB_ATEN = Bit(9), + OCRDMA_QP_PARAMS_MAX_SGE_RECV_SHIFT = 16, + OCRDMA_QP_PARAMS_MAX_SGE_RECV_MASK = 0xFFFF << + OCRDMA_QP_PARAMS_MAX_SGE_RECV_SHIFT, + + OCRDMA_QP_PARAMS_MAX_IRD_SHIFT = 0, + OCRDMA_QP_PARAMS_MAX_IRD_MASK = 0xFFFF, + OCRDMA_QP_PARAMS_MAX_ORD_SHIFT = 16, + OCRDMA_QP_PARAMS_MAX_ORD_MASK = 0xFFFF << + OCRDMA_QP_PARAMS_MAX_ORD_SHIFT, + + OCRDMA_QP_PARAMS_RQ_CQID_SHIFT = 0, + OCRDMA_QP_PARAMS_RQ_CQID_MASK = 0xFFFF, + OCRDMA_QP_PARAMS_WQ_CQID_SHIFT = 16, + OCRDMA_QP_PARAMS_WQ_CQID_MASK = 0xFFFF << + OCRDMA_QP_PARAMS_WQ_CQID_SHIFT, + + OCRDMA_QP_PARAMS_RQ_PSN_SHIFT = 0, + OCRDMA_QP_PARAMS_RQ_PSN_MASK = 0xFFFFFF, + OCRDMA_QP_PARAMS_HOP_LMT_SHIFT = 24, + OCRDMA_QP_PARAMS_HOP_LMT_MASK = 0xFF << + OCRDMA_QP_PARAMS_HOP_LMT_SHIFT, + + OCRDMA_QP_PARAMS_SQ_PSN_SHIFT = 0, + OCRDMA_QP_PARAMS_SQ_PSN_MASK = 0xFFFFFF, + OCRDMA_QP_PARAMS_TCLASS_SHIFT = 24, + OCRDMA_QP_PARAMS_TCLASS_MASK = 0xFF << + OCRDMA_QP_PARAMS_TCLASS_SHIFT, + + OCRDMA_QP_PARAMS_DEST_QPN_SHIFT = 0, + OCRDMA_QP_PARAMS_DEST_QPN_MASK = 0xFFFFFF, + OCRDMA_QP_PARAMS_RNR_RETRY_CNT_SHIFT = 24, + OCRDMA_QP_PARAMS_RNR_RETRY_CNT_MASK = 0x7 << + OCRDMA_QP_PARAMS_RNR_RETRY_CNT_SHIFT, + OCRDMA_QP_PARAMS_ACK_TIMEOUT_SHIFT = 27, + OCRDMA_QP_PARAMS_ACK_TIMEOUT_MASK = 0x1F << + OCRDMA_QP_PARAMS_ACK_TIMEOUT_SHIFT, + + OCRDMA_QP_PARAMS_PKEY_IDNEX_SHIFT = 0, + OCRDMA_QP_PARAMS_PKEY_INDEX_MASK = 0xFFFF, + OCRDMA_QP_PARAMS_PATH_MTU_SHIFT = 18, + OCRDMA_QP_PARAMS_PATH_MTU_MASK = 0x3FFF << + OCRDMA_QP_PARAMS_PATH_MTU_SHIFT, + + OCRDMA_QP_PARAMS_FLOW_LABEL_SHIFT = 0, + OCRDMA_QP_PARAMS_FLOW_LABEL_MASK = 0xFFFFF, + OCRDMA_QP_PARAMS_SL_SHIFT = 20, + OCRDMA_QP_PARAMS_SL_MASK = 0xF << + OCRDMA_QP_PARAMS_SL_SHIFT, + OCRDMA_QP_PARAMS_RETRY_CNT_SHIFT = 24, + OCRDMA_QP_PARAMS_RETRY_CNT_MASK = 0x7 << + OCRDMA_QP_PARAMS_RETRY_CNT_SHIFT, + OCRDMA_QP_PARAMS_RNR_NAK_TIMER_SHIFT = 27, + OCRDMA_QP_PARAMS_RNR_NAK_TIMER_MASK = 0x1F << + OCRDMA_QP_PARAMS_RNR_NAK_TIMER_SHIFT, + + OCRDMA_QP_PARAMS_DMAC_B4_TO_B5_SHIFT = 0, + OCRDMA_QP_PARAMS_DMAC_B4_TO_B5_MASK = 0xFFFF, + OCRDMA_QP_PARAMS_VLAN_SHIFT = 16, + OCRDMA_QP_PARAMS_VLAN_MASK = 0xFFFF << + OCRDMA_QP_PARAMS_VLAN_SHIFT +}; + +struct ocrdma_qp_params { + u32 id; + u32 max_wqe_rqe; + u32 max_sge_send_write; + u32 max_sge_recv_flags; + u32 max_ord_ird; + u32 wq_rq_cqid; + u32 hop_lmt_rq_psn; + u32 tclass_sq_psn; + u32 ack_to_rnr_rtc_dest_qpn; + u32 path_mtu_pkey_indx; + u32 rnt_rc_sl_fl; + u8 sgid[16]; + u8 dgid[16]; + u32 dmac_b0_to_b3; + u32 vlan_dmac_b4_to_b5; + u32 qkey; +}; + + +struct ocrdma_modify_qp { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_hdr req; + + struct ocrdma_qp_params params; + u32 flags; + u32 rdma_flags; + u32 num_outstanding_atomic_rd; +}; + +enum { + OCRDMA_MODIFY_QP_RSP_MAX_RQE_SHIFT = 0, + OCRDMA_MODIFY_QP_RSP_MAX_RQE_MASK = 0xFFFF, + OCRDMA_MODIFY_QP_RSP_MAX_WQE_SHIFT = 16, + OCRDMA_MODIFY_QP_RSP_MAX_WQE_MASK = 0xFFFF << + OCRDMA_MODIFY_QP_RSP_MAX_WQE_SHIFT, + + OCRDMA_MODIFY_QP_RSP_MAX_IRD_SHIFT = 0, + OCRDMA_MODIFY_QP_RSP_MAX_IRD_MASK = 0xFFFF, + OCRDMA_MODIFY_QP_RSP_MAX_ORD_SHIFT = 16, + OCRDMA_MODIFY_QP_RSP_MAX_ORD_MASK = 0xFFFF << + OCRDMA_MODIFY_QP_RSP_MAX_ORD_SHIFT +}; + +struct ocrdma_modify_qp_rsp { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_rsp rsp; + + u32 max_wqe_rqe; + u32 max_ord_ird; +}; + +struct ocrdma_query_qp { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_hdr req; + +#define OCRDMA_QUERY_UP_QP_ID_SHIFT 0 +#define OCRDMA_QUERY_UP_QP_ID_MASK 0xFFFFFF + u32 qp_id; +}; + +struct ocrdma_query_qp_rsp { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_rsp rsp; + struct ocrdma_qp_params params; +}; + +enum { + OCRDMA_CREATE_SRQ_PD_ID_SHIFT = 0, + OCRDMA_CREATE_SRQ_PD_ID_MASK = 0xFFFF, + OCRDMA_CREATE_SRQ_PG_SZ_SHIFT = 16, + OCRDMA_CREATE_SRQ_PG_SZ_MASK = 0x3 << + OCRDMA_CREATE_SRQ_PG_SZ_SHIFT, + + OCRDMA_CREATE_SRQ_MAX_RQE_SHIFT = 0, + OCRDMA_CREATE_SRQ_MAX_SGE_RECV_SHIFT = 16, + OCRDMA_CREATE_SRQ_MAX_SGE_RECV_MASK = 0xFFFF << + OCRDMA_CREATE_SRQ_MAX_SGE_RECV_SHIFT, + + OCRDMA_CREATE_SRQ_RQE_SIZE_SHIFT = 0, + OCRDMA_CREATE_SRQ_RQE_SIZE_MASK = 0xFFFF, + OCRDMA_CREATE_SRQ_NUM_RQ_PAGES_SHIFT = 16, + OCRDMA_CREATE_SRQ_NUM_RQ_PAGES_MASK = 0xFFFF << + OCRDMA_CREATE_SRQ_NUM_RQ_PAGES_SHIFT +}; + +struct ocrdma_create_srq { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_hdr req; + + u32 pgsz_pdid; + u32 max_sge_rqe; + u32 pages_rqe_sz; + struct ocrdma_pa rq_addr[MAX_OCRDMA_SRQ_PAGES]; +}; + +enum { + OCRDMA_CREATE_SRQ_RSP_SRQ_ID_SHIFT = 0, + OCRDMA_CREATE_SRQ_RSP_SRQ_ID_MASK = 0xFFFFFF, + + OCRDMA_CREATE_SRQ_RSP_MAX_RQE_ALLOCATED_SHIFT = 0, + OCRDMA_CREATE_SRQ_RSP_MAX_RQE_ALLOCATED_MASK = 0xFFFF, + OCRDMA_CREATE_SRQ_RSP_MAX_SGE_RECV_ALLOCATED_SHIFT = 16, + OCRDMA_CREATE_SRQ_RSP_MAX_SGE_RECV_ALLOCATED_MASK = 0xFFFF << + OCRDMA_CREATE_SRQ_RSP_MAX_SGE_RECV_ALLOCATED_SHIFT +}; + +struct ocrdma_create_srq_rsp { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_rsp rsp; + + u32 id; + u32 max_sge_rqe_allocated; +}; + +enum { + OCRDMA_MODIFY_SRQ_ID_SHIFT = 0, + OCRDMA_MODIFY_SRQ_ID_MASK = 0xFFFFFF, + + OCRDMA_MODIFY_SRQ_MAX_RQE_SHIFT = 0, + OCRDMA_MODIFY_SRQ_MAX_RQE_MASK = 0xFFFF, + OCRDMA_MODIFY_SRQ_LIMIT_SHIFT = 16, + OCRDMA_MODIFY_SRQ__LIMIT_MASK = 0xFFFF << + OCRDMA_MODIFY_SRQ_LIMIT_SHIFT +}; + +struct ocrdma_modify_srq { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_rsp rep; + + u32 id; + u32 limit_max_rqe; +}; + +enum { + OCRDMA_QUERY_SRQ_ID_SHIFT = 0, + OCRDMA_QUERY_SRQ_ID_MASK = 0xFFFFFF +}; + +struct ocrdma_query_srq { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_rsp req; + + u32 id; +}; + +enum { + OCRDMA_QUERY_SRQ_RSP_PD_ID_SHIFT = 0, + OCRDMA_QUERY_SRQ_RSP_PD_ID_MASK = 0xFFFF, + OCRDMA_QUERY_SRQ_RSP_MAX_RQE_SHIFT = 16, + OCRDMA_QUERY_SRQ_RSP_MAX_RQE_MASK = 0xFFFF << + OCRDMA_QUERY_SRQ_RSP_MAX_RQE_SHIFT, + + OCRDMA_QUERY_SRQ_RSP_MAX_SGE_RECV_SHIFT = 0, + OCRDMA_QUERY_SRQ_RSP_MAX_SGE_RECV_MASK = 0xFFFF, + OCRDMA_QUERY_SRQ_RSP_SRQ_LIMIT_SHIFT = 16, + OCRDMA_QUERY_SRQ_RSP_SRQ_LIMIT_MASK = 0xFFFF << + OCRDMA_QUERY_SRQ_RSP_SRQ_LIMIT_SHIFT +}; + +struct ocrdma_query_srq_rsp { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_rsp req; + + u32 max_rqe_pdid; + u32 srq_lmt_max_sge; +}; + +enum { + OCRDMA_DESTROY_SRQ_ID_SHIFT = 0, + OCRDMA_DESTROY_SRQ_ID_MASK = 0xFFFFFF +}; + +struct ocrdma_destroy_srq { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_rsp req; + + u32 id; +}; + +enum { + OCRDMA_ALLOC_PD_ENABLE_DPP = BIT(16), + OCRDMA_PD_MAX_DPP_ENABLED_QP = 8, + OCRDMA_DPP_PAGE_SIZE = 4096 +}; + +struct ocrdma_alloc_pd { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_hdr req; + u32 enable_dpp_rsvd; +}; + +enum { + OCRDMA_ALLOC_PD_RSP_DPP = Bit(16), + OCRDMA_ALLOC_PD_RSP_DPP_PAGE_SHIFT = 20, + OCRDMA_ALLOC_PD_RSP_PDID_MASK = 0xFFFF, +}; + +struct ocrdma_alloc_pd_rsp { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_rsp rsp; + u32 dpp_page_pdid; +}; + +struct ocrdma_dealloc_pd { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_hdr req; + u32 id; +}; + +struct ocrdma_dealloc_pd_rsp { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_rsp rsp; +}; + +enum { + OCRDMA_ADDR_CHECK_ENABLE = 1, + OCRDMA_ADDR_CHECK_DISABLE = 0 +}; + +enum { + OCRDMA_ALLOC_LKEY_PD_ID_SHIFT = 0, + OCRDMA_ALLOC_LKEY_PD_ID_MASK = 0xFFFF, + + OCRDMA_ALLOC_LKEY_ADDR_CHECK_SHIFT = 0, + OCRDMA_ALLOC_LKEY_ADDR_CHECK_MASK = Bit(0), + OCRDMA_ALLOC_LKEY_FMR_SHIFT = 1, + OCRDMA_ALLOC_LKEY_FMR_MASK = Bit(1), + OCRDMA_ALLOC_LKEY_REMOTE_INV_SHIFT = 2, + OCRDMA_ALLOC_LKEY_REMOTE_INV_MASK = Bit(2), + OCRDMA_ALLOC_LKEY_REMOTE_WR_SHIFT = 3, + OCRDMA_ALLOC_LKEY_REMOTE_WR_MASK = Bit(3), + OCRDMA_ALLOC_LKEY_REMOTE_RD_SHIFT = 4, + OCRDMA_ALLOC_LKEY_REMOTE_RD_MASK = Bit(4), + OCRDMA_ALLOC_LKEY_LOCAL_WR_SHIFT = 5, + OCRDMA_ALLOC_LKEY_LOCAL_WR_MASK = Bit(5), + OCRDMA_ALLOC_LKEY_REMOTE_ATOMIC_MASK = Bit(6), + OCRDMA_ALLOC_LKEY_REMOTE_ATOMIC_SHIFT = 6, + OCRDMA_ALLOC_LKEY_PBL_SIZE_SHIFT = 16, + OCRDMA_ALLOC_LKEY_PBL_SIZE_MASK = 0xFFFF << + OCRDMA_ALLOC_LKEY_PBL_SIZE_SHIFT +}; + +struct ocrdma_alloc_lkey { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_hdr req; + + u32 pdid; + u32 pbl_sz_flags; +}; + +struct ocrdma_alloc_lkey_rsp { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_rsp rsp; + + u32 lrkey; + u32 num_pbl_rsvd; +}; + +struct ocrdma_dealloc_lkey { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_hdr req; + + u32 lkey; + u32 rsvd_frmr; +}; + +struct ocrdma_dealloc_lkey_rsp { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_rsp rsp; +}; + +#define MAX_OCRDMA_NSMR_PBL (u32)22 +#define MAX_OCRDMA_PBL_SIZE 65536 +#define MAX_OCRDMA_PBL_PER_LKEY 32767 + +enum { + OCRDMA_REG_NSMR_LRKEY_INDEX_SHIFT = 0, + OCRDMA_REG_NSMR_LRKEY_INDEX_MASK = 0xFFFFFF, + OCRDMA_REG_NSMR_LRKEY_SHIFT = 24, + OCRDMA_REG_NSMR_LRKEY_MASK = 0xFF << + OCRDMA_REG_NSMR_LRKEY_SHIFT, + + OCRDMA_REG_NSMR_PD_ID_SHIFT = 0, + OCRDMA_REG_NSMR_PD_ID_MASK = 0xFFFF, + OCRDMA_REG_NSMR_NUM_PBL_SHIFT = 16, + OCRDMA_REG_NSMR_NUM_PBL_MASK = 0xFFFF << + OCRDMA_REG_NSMR_NUM_PBL_SHIFT, + + OCRDMA_REG_NSMR_PBE_SIZE_SHIFT = 0, + OCRDMA_REG_NSMR_PBE_SIZE_MASK = 0xFFFF, + OCRDMA_REG_NSMR_HPAGE_SIZE_SHIFT = 16, + OCRDMA_REG_NSMR_HPAGE_SIZE_MASK = 0xFF << + OCRDMA_REG_NSMR_HPAGE_SIZE_SHIFT, + OCRDMA_REG_NSMR_BIND_MEMWIN_SHIFT = 24, + OCRDMA_REG_NSMR_BIND_MEMWIN_MASK = Bit(24), + OCRDMA_REG_NSMR_ZB_SHIFT = 25, + OCRDMA_REG_NSMR_ZB_SHIFT_MASK = Bit(25), + OCRDMA_REG_NSMR_REMOTE_INV_SHIFT = 26, + OCRDMA_REG_NSMR_REMOTE_INV_MASK = Bit(26), + OCRDMA_REG_NSMR_REMOTE_WR_SHIFT = 27, + OCRDMA_REG_NSMR_REMOTE_WR_MASK = Bit(27), + OCRDMA_REG_NSMR_REMOTE_RD_SHIFT = 28, + OCRDMA_REG_NSMR_REMOTE_RD_MASK = Bit(28), + OCRDMA_REG_NSMR_LOCAL_WR_SHIFT = 29, + OCRDMA_REG_NSMR_LOCAL_WR_MASK = Bit(29), + OCRDMA_REG_NSMR_REMOTE_ATOMIC_SHIFT = 30, + OCRDMA_REG_NSMR_REMOTE_ATOMIC_MASK = Bit(30), + OCRDMA_REG_NSMR_LAST_SHIFT = 31, + OCRDMA_REG_NSMR_LAST_MASK = Bit(31) +}; + +struct ocrdma_reg_nsmr { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_hdr cmd; + + u32 fr_mr; + u32 num_pbl_pdid; + u32 flags_hpage_pbe_sz; + u32 totlen_low; + u32 totlen_high; + u32 fbo_low; + u32 fbo_high; + u32 va_loaddr; + u32 va_hiaddr; + struct ocrdma_pa pbl[MAX_OCRDMA_NSMR_PBL]; +}; + +enum { + OCRDMA_REG_NSMR_CONT_PBL_SHIFT = 0, + OCRDMA_REG_NSMR_CONT_PBL_SHIFT_MASK = 0xFFFF, + OCRDMA_REG_NSMR_CONT_NUM_PBL_SHIFT = 16, + OCRDMA_REG_NSMR_CONT_NUM_PBL_MASK = 0xFFFF << + OCRDMA_REG_NSMR_CONT_NUM_PBL_SHIFT, + + OCRDMA_REG_NSMR_CONT_LAST_SHIFT = 31, + OCRDMA_REG_NSMR_CONT_LAST_MASK = Bit(31) +}; + +struct ocrdma_reg_nsmr_cont { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_hdr cmd; + + u32 lrkey; + u32 num_pbl_offset; + u32 last; + + struct ocrdma_pa pbl[MAX_OCRDMA_NSMR_PBL]; +}; + +struct ocrdma_pbe { + u32 pa_hi; + u32 pa_lo; +}; + +enum { + OCRDMA_REG_NSMR_RSP_NUM_PBL_SHIFT = 16, + OCRDMA_REG_NSMR_RSP_NUM_PBL_MASK = 0xFFFF0000 +}; +struct ocrdma_reg_nsmr_rsp { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_rsp rsp; + + u32 lrkey; + u32 num_pbl; +}; + +enum { + OCRDMA_REG_NSMR_CONT_RSP_LRKEY_INDEX_SHIFT = 0, + OCRDMA_REG_NSMR_CONT_RSP_LRKEY_INDEX_MASK = 0xFFFFFF, + OCRDMA_REG_NSMR_CONT_RSP_LRKEY_SHIFT = 24, + OCRDMA_REG_NSMR_CONT_RSP_LRKEY_MASK = 0xFF << + OCRDMA_REG_NSMR_CONT_RSP_LRKEY_SHIFT, + + OCRDMA_REG_NSMR_CONT_RSP_NUM_PBL_SHIFT = 16, + OCRDMA_REG_NSMR_CONT_RSP_NUM_PBL_MASK = 0xFFFF << + OCRDMA_REG_NSMR_CONT_RSP_NUM_PBL_SHIFT +}; + +struct ocrdma_reg_nsmr_cont_rsp { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_rsp rsp; + + u32 lrkey_key_index; + u32 num_pbl; +}; + +enum { + OCRDMA_ALLOC_MW_PD_ID_SHIFT = 0, + OCRDMA_ALLOC_MW_PD_ID_MASK = 0xFFFF +}; + +struct ocrdma_alloc_mw { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_hdr req; + + u32 pdid; +}; + +enum { + OCRDMA_ALLOC_MW_RSP_LRKEY_INDEX_SHIFT = 0, + OCRDMA_ALLOC_MW_RSP_LRKEY_INDEX_MASK = 0xFFFFFF +}; + +struct ocrdma_alloc_mw_rsp { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_rsp rsp; + + u32 lrkey_index; +}; + +struct ocrdma_attach_mcast { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_hdr req; + u32 qp_id; + u8 mgid[16]; + u32 mac_b0_to_b3; + u32 vlan_mac_b4_to_b5; +}; + +struct ocrdma_attach_mcast_rsp { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_rsp rsp; +}; + +struct ocrdma_detach_mcast { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_hdr req; + u32 qp_id; + u8 mgid[16]; + u32 mac_b0_to_b3; + u32 vlan_mac_b4_to_b5; +}; + +struct ocrdma_detach_mcast_rsp { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_rsp rsp; +}; + +enum { + OCRDMA_CREATE_AH_NUM_PAGES_SHIFT = 19, + OCRDMA_CREATE_AH_NUM_PAGES_MASK = 0xF << + OCRDMA_CREATE_AH_NUM_PAGES_SHIFT, + + OCRDMA_CREATE_AH_PAGE_SIZE_SHIFT = 16, + OCRDMA_CREATE_AH_PAGE_SIZE_MASK = 0x7 << + OCRDMA_CREATE_AH_PAGE_SIZE_SHIFT, + + OCRDMA_CREATE_AH_ENTRY_SIZE_SHIFT = 23, + OCRDMA_CREATE_AH_ENTRY_SIZE_MASK = 0x1FF << + OCRDMA_CREATE_AH_ENTRY_SIZE_SHIFT, +}; + +#define OCRDMA_AH_TBL_PAGES 8 + +struct ocrdma_create_ah_tbl { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_hdr req; + + u32 ah_conf; + struct ocrdma_pa tbl_addr[8]; +}; + +struct ocrdma_create_ah_tbl_rsp { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_rsp rsp; + u32 ahid; +}; + +struct ocrdma_delete_ah_tbl { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_hdr req; + u32 ahid; +}; + +struct ocrdma_delete_ah_tbl_rsp { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_rsp rsp; +}; + +enum { + OCRDMA_EQE_VALID_SHIFT = 0, + OCRDMA_EQE_VALID_MASK = Bit(0), + OCRDMA_EQE_FOR_CQE_MASK = 0xFFFE, + OCRDMA_EQE_RESOURCE_ID_SHIFT = 16, + OCRDMA_EQE_RESOURCE_ID_MASK = 0xFFFF << + OCRDMA_EQE_RESOURCE_ID_SHIFT, +}; + +struct ocrdma_eqe { + u32 id_valid; +}; + +enum OCRDMA_CQE_STATUS { + OCRDMA_CQE_SUCCESS = 0, + OCRDMA_CQE_LOC_LEN_ERR, + OCRDMA_CQE_LOC_QP_OP_ERR, + OCRDMA_CQE_LOC_EEC_OP_ERR, + OCRDMA_CQE_LOC_PROT_ERR, + OCRDMA_CQE_WR_FLUSH_ERR, + OCRDMA_CQE_MW_BIND_ERR, + OCRDMA_CQE_BAD_RESP_ERR, + OCRDMA_CQE_LOC_ACCESS_ERR, + OCRDMA_CQE_REM_INV_REQ_ERR, + OCRDMA_CQE_REM_ACCESS_ERR, + OCRDMA_CQE_REM_OP_ERR, + OCRDMA_CQE_RETRY_EXC_ERR, + OCRDMA_CQE_RNR_RETRY_EXC_ERR, + OCRDMA_CQE_LOC_RDD_VIOL_ERR, + OCRDMA_CQE_REM_INV_RD_REQ_ERR, + OCRDMA_CQE_REM_ABORT_ERR, + OCRDMA_CQE_INV_EECN_ERR, + OCRDMA_CQE_INV_EEC_STATE_ERR, + OCRDMA_CQE_FATAL_ERR, + OCRDMA_CQE_RESP_TIMEOUT_ERR, + OCRDMA_CQE_GENERAL_ERR +}; + +enum { + /* w0 */ + OCRDMA_CQE_WQEIDX_SHIFT = 0, + OCRDMA_CQE_WQEIDX_MASK = 0xFFFF, + + /* w1 */ + OCRDMA_CQE_UD_XFER_LEN_SHIFT = 16, + OCRDMA_CQE_PKEY_SHIFT = 0, + OCRDMA_CQE_PKEY_MASK = 0xFFFF, + + /* w2 */ + OCRDMA_CQE_QPN_SHIFT = 0, + OCRDMA_CQE_QPN_MASK = 0x0000FFFF, + + OCRDMA_CQE_BUFTAG_SHIFT = 16, + OCRDMA_CQE_BUFTAG_MASK = 0xFFFF << OCRDMA_CQE_BUFTAG_SHIFT, + + /* w3 */ + OCRDMA_CQE_UD_STATUS_SHIFT = 24, + OCRDMA_CQE_UD_STATUS_MASK = 0x7 << OCRDMA_CQE_UD_STATUS_SHIFT, + OCRDMA_CQE_STATUS_SHIFT = 16, + OCRDMA_CQE_STATUS_MASK = 0xFF << OCRDMA_CQE_STATUS_SHIFT, + OCRDMA_CQE_VALID = Bit(31), + OCRDMA_CQE_INVALIDATE = Bit(30), + OCRDMA_CQE_QTYPE = Bit(29), + OCRDMA_CQE_IMM = Bit(28), + OCRDMA_CQE_WRITE_IMM = Bit(27), + OCRDMA_CQE_QTYPE_SQ = 0, + OCRDMA_CQE_QTYPE_RQ = 1, + OCRDMA_CQE_SRCQP_MASK = 0xFFFFFF +}; + +struct ocrdma_cqe { + union { + /* w0 to w2 */ + struct { + u32 wqeidx; + u32 bytes_xfered; + u32 qpn; + } wq; + struct { + u32 lkey_immdt; + u32 rxlen; + u32 buftag_qpn; + } rq; + struct { + u32 lkey_immdt; + u32 rxlen_pkey; + u32 buftag_qpn; + } ud; + struct { + u32 word_0; + u32 word_1; + u32 qpn; + } cmn; + }; + u32 flags_status_srcqpn; /* w3 */ +}; + +struct ocrdma_sge { + u32 addr_hi; + u32 addr_lo; + u32 lrkey; + u32 len; +}; + +enum { + OCRDMA_FLAG_SIG = 0x1, + OCRDMA_FLAG_INV = 0x2, + OCRDMA_FLAG_FENCE_L = 0x4, + OCRDMA_FLAG_FENCE_R = 0x8, + OCRDMA_FLAG_SOLICIT = 0x10, + OCRDMA_FLAG_IMM = 0x20, + + /* Stag flags */ + OCRDMA_LKEY_FLAG_LOCAL_WR = 0x1, + OCRDMA_LKEY_FLAG_REMOTE_RD = 0x2, + OCRDMA_LKEY_FLAG_REMOTE_WR = 0x4, + OCRDMA_LKEY_FLAG_VATO = 0x8, +}; + +enum OCRDMA_WQE_OPCODE { + OCRDMA_WRITE = 0x06, + OCRDMA_READ = 0x0C, + OCRDMA_RESV0 = 0x02, + OCRDMA_SEND = 0x00, + OCRDMA_CMP_SWP = 0x14, + OCRDMA_BIND_MW = 0x10, + OCRDMA_FR_MR = 0x11, + OCRDMA_RESV1 = 0x0A, + OCRDMA_LKEY_INV = 0x15, + OCRDMA_FETCH_ADD = 0x13, + OCRDMA_POST_RQ = 0x12 +}; + +enum { + OCRDMA_TYPE_INLINE = 0x0, + OCRDMA_TYPE_LKEY = 0x1, +}; + +enum { + OCRDMA_WQE_OPCODE_SHIFT = 0, + OCRDMA_WQE_OPCODE_MASK = 0x0000001F, + OCRDMA_WQE_FLAGS_SHIFT = 5, + OCRDMA_WQE_TYPE_SHIFT = 16, + OCRDMA_WQE_TYPE_MASK = 0x00030000, + OCRDMA_WQE_SIZE_SHIFT = 18, + OCRDMA_WQE_SIZE_MASK = 0xFF, + OCRDMA_WQE_NXT_WQE_SIZE_SHIFT = 25, + + OCRDMA_WQE_LKEY_FLAGS_SHIFT = 0, + OCRDMA_WQE_LKEY_FLAGS_MASK = 0xF +}; + +/* header WQE for all the SQ and RQ operations */ +struct ocrdma_hdr_wqe { + u32 cw; + union { + u32 rsvd_tag; + u32 rsvd_lkey_flags; + }; + union { + u32 immdt; + u32 lkey; + }; + u32 total_len; +}; + +struct ocrdma_ewqe_ud_hdr { + u32 rsvd_dest_qpn; + u32 qkey; + u32 rsvd_ahid; + u32 rsvd; +}; + +/* extended wqe followed by hdr_wqe for Fast Memory register */ +struct ocrdma_ewqe_fr { + u32 va_hi; + u32 va_lo; + u32 fbo_hi; + u32 fbo_lo; + u32 size_sge; + u32 num_sges; + u32 rsvd; + u32 rsvd2; +}; + +struct ocrdma_eth_basic { + u8 dmac[6]; + u8 smac[6]; + __be16 eth_type; +} __packed; + +struct ocrdma_eth_vlan { + u8 dmac[6]; + u8 smac[6]; + __be16 eth_type; + __be16 vlan_tag; +#define OCRDMA_ROCE_ETH_TYPE 0x8915 + __be16 roce_eth_type; +} __packed; + +struct ocrdma_grh { + __be32 tclass_flow; + __be32 pdid_hoplimit; + u8 sgid[16]; + u8 dgid[16]; + u16 rsvd; +} __packed; + +#define OCRDMA_AV_VALID Bit(7) +#define OCRDMA_AV_VLAN_VALID Bit(1) + +struct ocrdma_av { + struct ocrdma_eth_vlan eth_hdr; + struct ocrdma_grh grh; + u32 valid; +} __packed; + +struct ocrdma_rsrc_stats { + u32 dpp_pds; + u32 non_dpp_pds; + u32 rc_dpp_qps; + u32 uc_dpp_qps; + u32 ud_dpp_qps; + u32 rc_non_dpp_qps; + u32 rsvd; + u32 uc_non_dpp_qps; + u32 ud_non_dpp_qps; + u32 rsvd1; + u32 srqs; + u32 rbqs; + u32 r64K_nsmr; + u32 r64K_to_2M_nsmr; + u32 r2M_to_44M_nsmr; + u32 r44M_to_1G_nsmr; + u32 r1G_to_4G_nsmr; + u32 nsmr_count_4G_to_32G; + u32 r32G_to_64G_nsmr; + u32 r64G_to_128G_nsmr; + u32 r128G_to_higher_nsmr; + u32 embedded_nsmr; + u32 frmr; + u32 prefetch_qps; + u32 ondemand_qps; + u32 phy_mr; + u32 mw; + u32 rsvd2[7]; +}; + +struct ocrdma_db_err_stats { + u32 sq_doorbell_errors; + u32 cq_doorbell_errors; + u32 rq_srq_doorbell_errors; + u32 cq_overflow_errors; + u32 rsvd[4]; +}; + +struct ocrdma_wqe_stats { + u32 large_send_rc_wqes_lo; + u32 large_send_rc_wqes_hi; + u32 large_write_rc_wqes_lo; + u32 large_write_rc_wqes_hi; + u32 rsvd[4]; + u32 read_wqes_lo; + u32 read_wqes_hi; + u32 frmr_wqes_lo; + u32 frmr_wqes_hi; + u32 mw_bind_wqes_lo; + u32 mw_bind_wqes_hi; + u32 invalidate_wqes_lo; + u32 invalidate_wqes_hi; + u32 rsvd1[2]; + u32 dpp_wqe_drops; + u32 rsvd2[5]; +}; + +struct ocrdma_tx_stats { + u32 send_pkts_lo; + u32 send_pkts_hi; + u32 write_pkts_lo; + u32 write_pkts_hi; + u32 read_pkts_lo; + u32 read_pkts_hi; + u32 read_rsp_pkts_lo; + u32 read_rsp_pkts_hi; + u32 ack_pkts_lo; + u32 ack_pkts_hi; + u32 send_bytes_lo; + u32 send_bytes_hi; + u32 write_bytes_lo; + u32 write_bytes_hi; + u32 read_req_bytes_lo; + u32 read_req_bytes_hi; + u32 read_rsp_bytes_lo; + u32 read_rsp_bytes_hi; + u32 ack_timeouts; + u32 rsvd[5]; +}; + + +struct ocrdma_tx_qp_err_stats { + u32 local_length_errors; + u32 local_protection_errors; + u32 local_qp_operation_errors; + u32 retry_count_exceeded_errors; + u32 rnr_retry_count_exceeded_errors; + u32 rsvd[3]; +}; + +struct ocrdma_rx_stats { + u32 roce_frame_bytes_lo; + u32 roce_frame_bytes_hi; + u32 roce_frame_icrc_drops; + u32 roce_frame_payload_len_drops; + u32 ud_drops; + u32 qp1_drops; + u32 psn_error_request_packets; + u32 psn_error_resp_packets; + u32 rnr_nak_timeouts; + u32 rnr_nak_receives; + u32 roce_frame_rxmt_drops; + u32 nak_count_psn_sequence_errors; + u32 rc_drop_count_lookup_errors; + u32 rq_rnr_naks; + u32 srq_rnr_naks; + u32 roce_frames_lo; + u32 roce_frames_hi; + u32 rsvd; +}; + +struct ocrdma_rx_qp_err_stats { + u32 nak_invalid_requst_errors; + u32 nak_remote_operation_errors; + u32 nak_count_remote_access_errors; + u32 local_length_errors; + u32 local_protection_errors; + u32 local_qp_operation_errors; + u32 rsvd[2]; +}; + +struct ocrdma_tx_dbg_stats { + u32 data[100]; +}; + +struct ocrdma_rx_dbg_stats { + u32 data[200]; +}; + +struct ocrdma_rdma_stats_req { + struct ocrdma_mbx_hdr hdr; + u8 reset_stats; + u8 rsvd[3]; +} __packed; + +struct ocrdma_rdma_stats_resp { + struct ocrdma_mbx_hdr hdr; + struct ocrdma_rsrc_stats act_rsrc_stats; + struct ocrdma_rsrc_stats th_rsrc_stats; + struct ocrdma_db_err_stats db_err_stats; + struct ocrdma_wqe_stats wqe_stats; + struct ocrdma_tx_stats tx_stats; + struct ocrdma_tx_qp_err_stats tx_qp_err_stats; + struct ocrdma_rx_stats rx_stats; + struct ocrdma_rx_qp_err_stats rx_qp_err_stats; + struct ocrdma_tx_dbg_stats tx_dbg_stats; + struct ocrdma_rx_dbg_stats rx_dbg_stats; +} __packed; + + +struct mgmt_hba_attribs { + u8 flashrom_version_string[32]; + u8 manufacturer_name[32]; + u32 supported_modes; + u32 rsvd0[3]; + u8 ncsi_ver_string[12]; + u32 default_extended_timeout; + u8 controller_model_number[32]; + u8 controller_description[64]; + u8 controller_serial_number[32]; + u8 ip_version_string[32]; + u8 firmware_version_string[32]; + u8 bios_version_string[32]; + u8 redboot_version_string[32]; + u8 driver_version_string[32]; + u8 fw_on_flash_version_string[32]; + u32 functionalities_supported; + u16 max_cdblength; + u8 asic_revision; + u8 generational_guid[16]; + u8 hba_port_count; + u16 default_link_down_timeout; + u8 iscsi_ver_min_max; + u8 multifunction_device; + u8 cache_valid; + u8 hba_status; + u8 max_domains_supported; + u8 phy_port; + u32 firmware_post_status; + u32 hba_mtu[8]; + u32 rsvd1[4]; +}; + +struct mgmt_controller_attrib { + struct mgmt_hba_attribs hba_attribs; + u16 pci_vendor_id; + u16 pci_device_id; + u16 pci_sub_vendor_id; + u16 pci_sub_system_id; + u8 pci_bus_number; + u8 pci_device_number; + u8 pci_function_number; + u8 interface_type; + u64 unique_identifier; + u32 rsvd0[5]; +}; + +struct ocrdma_get_ctrl_attribs_rsp { + struct ocrdma_mbx_hdr hdr; + struct mgmt_controller_attrib ctrl_attribs; +}; + + +#endif /* __OCRDMA_SLI_H__ */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c new file mode 100644 index 00000000000..41a9aec9998 --- /dev/null +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c @@ -0,0 +1,616 @@ +/******************************************************************* + * This file is part of the Emulex RoCE Device Driver for * + * RoCE (RDMA over Converged Ethernet) adapters. * + * Copyright (C) 2008-2014 Emulex. All rights reserved. * + * EMULEX and SLI are trademarks of Emulex. * + * www.emulex.com * + * * + * This program is free software; you can redistribute it and/or * + * modify it under the terms of version 2 of the GNU General * + * Public License as published by the Free Software Foundation. * + * This program is distributed in the hope that it will be useful. * + * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * + * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * + * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * + * TO BE LEGALLY INVALID. See the GNU General Public License for * + * more details, a copy of which can be found in the file COPYING * + * included with this package. * + * + * Contact Information: + * linux-drivers@emulex.com + * + * Emulex + * 3333 Susan Street + * Costa Mesa, CA 92626 + *******************************************************************/ + +#include <rdma/ib_addr.h> +#include "ocrdma_stats.h" + +static struct dentry *ocrdma_dbgfs_dir; + +static int ocrdma_add_stat(char *start, char *pcur, + char *name, u64 count) +{ + char buff[128] = {0}; + int cpy_len = 0; + + snprintf(buff, 128, "%s: %llu\n", name, count); + cpy_len = strlen(buff); + + if (pcur + cpy_len > start + OCRDMA_MAX_DBGFS_MEM) { + pr_err("%s: No space in stats buff\n", __func__); + return 0; + } + + memcpy(pcur, buff, cpy_len); + return cpy_len; +} + +static bool ocrdma_alloc_stats_mem(struct ocrdma_dev *dev) +{ + struct stats_mem *mem = &dev->stats_mem; + + /* Alloc mbox command mem*/ + mem->size = max_t(u32, sizeof(struct ocrdma_rdma_stats_req), + sizeof(struct ocrdma_rdma_stats_resp)); + + mem->va = dma_alloc_coherent(&dev->nic_info.pdev->dev, mem->size, + &mem->pa, GFP_KERNEL); + if (!mem->va) { + pr_err("%s: stats mbox allocation failed\n", __func__); + return false; + } + + memset(mem->va, 0, mem->size); + + /* Alloc debugfs mem */ + mem->debugfs_mem = kzalloc(OCRDMA_MAX_DBGFS_MEM, GFP_KERNEL); + if (!mem->debugfs_mem) { + pr_err("%s: stats debugfs mem allocation failed\n", __func__); + return false; + } + + return true; +} + +static void ocrdma_release_stats_mem(struct ocrdma_dev *dev) +{ + struct stats_mem *mem = &dev->stats_mem; + + if (mem->va) + dma_free_coherent(&dev->nic_info.pdev->dev, mem->size, + mem->va, mem->pa); + kfree(mem->debugfs_mem); +} + +static char *ocrdma_resource_stats(struct ocrdma_dev *dev) +{ + char *stats = dev->stats_mem.debugfs_mem, *pcur; + struct ocrdma_rdma_stats_resp *rdma_stats = + (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; + struct ocrdma_rsrc_stats *rsrc_stats = &rdma_stats->act_rsrc_stats; + + memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM)); + + pcur = stats; + pcur += ocrdma_add_stat(stats, pcur, "active_dpp_pds", + (u64)rsrc_stats->dpp_pds); + pcur += ocrdma_add_stat(stats, pcur, "active_non_dpp_pds", + (u64)rsrc_stats->non_dpp_pds); + pcur += ocrdma_add_stat(stats, pcur, "active_rc_dpp_qps", + (u64)rsrc_stats->rc_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "active_uc_dpp_qps", + (u64)rsrc_stats->uc_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "active_ud_dpp_qps", + (u64)rsrc_stats->ud_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "active_rc_non_dpp_qps", + (u64)rsrc_stats->rc_non_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "active_uc_non_dpp_qps", + (u64)rsrc_stats->uc_non_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "active_ud_non_dpp_qps", + (u64)rsrc_stats->ud_non_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "active_srqs", + (u64)rsrc_stats->srqs); + pcur += ocrdma_add_stat(stats, pcur, "active_rbqs", + (u64)rsrc_stats->rbqs); + pcur += ocrdma_add_stat(stats, pcur, "active_64K_nsmr", + (u64)rsrc_stats->r64K_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "active_64K_to_2M_nsmr", + (u64)rsrc_stats->r64K_to_2M_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "active_2M_to_44M_nsmr", + (u64)rsrc_stats->r2M_to_44M_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "active_44M_to_1G_nsmr", + (u64)rsrc_stats->r44M_to_1G_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "active_1G_to_4G_nsmr", + (u64)rsrc_stats->r1G_to_4G_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "active_nsmr_count_4G_to_32G", + (u64)rsrc_stats->nsmr_count_4G_to_32G); + pcur += ocrdma_add_stat(stats, pcur, "active_32G_to_64G_nsmr", + (u64)rsrc_stats->r32G_to_64G_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "active_64G_to_128G_nsmr", + (u64)rsrc_stats->r64G_to_128G_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "active_128G_to_higher_nsmr", + (u64)rsrc_stats->r128G_to_higher_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "active_embedded_nsmr", + (u64)rsrc_stats->embedded_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "active_frmr", + (u64)rsrc_stats->frmr); + pcur += ocrdma_add_stat(stats, pcur, "active_prefetch_qps", + (u64)rsrc_stats->prefetch_qps); + pcur += ocrdma_add_stat(stats, pcur, "active_ondemand_qps", + (u64)rsrc_stats->ondemand_qps); + pcur += ocrdma_add_stat(stats, pcur, "active_phy_mr", + (u64)rsrc_stats->phy_mr); + pcur += ocrdma_add_stat(stats, pcur, "active_mw", + (u64)rsrc_stats->mw); + + /* Print the threshold stats */ + rsrc_stats = &rdma_stats->th_rsrc_stats; + + pcur += ocrdma_add_stat(stats, pcur, "threshold_dpp_pds", + (u64)rsrc_stats->dpp_pds); + pcur += ocrdma_add_stat(stats, pcur, "threshold_non_dpp_pds", + (u64)rsrc_stats->non_dpp_pds); + pcur += ocrdma_add_stat(stats, pcur, "threshold_rc_dpp_qps", + (u64)rsrc_stats->rc_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "threshold_uc_dpp_qps", + (u64)rsrc_stats->uc_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "threshold_ud_dpp_qps", + (u64)rsrc_stats->ud_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "threshold_rc_non_dpp_qps", + (u64)rsrc_stats->rc_non_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "threshold_uc_non_dpp_qps", + (u64)rsrc_stats->uc_non_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "threshold_ud_non_dpp_qps", + (u64)rsrc_stats->ud_non_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "threshold_srqs", + (u64)rsrc_stats->srqs); + pcur += ocrdma_add_stat(stats, pcur, "threshold_rbqs", + (u64)rsrc_stats->rbqs); + pcur += ocrdma_add_stat(stats, pcur, "threshold_64K_nsmr", + (u64)rsrc_stats->r64K_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_64K_to_2M_nsmr", + (u64)rsrc_stats->r64K_to_2M_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_2M_to_44M_nsmr", + (u64)rsrc_stats->r2M_to_44M_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_44M_to_1G_nsmr", + (u64)rsrc_stats->r44M_to_1G_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_1G_to_4G_nsmr", + (u64)rsrc_stats->r1G_to_4G_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_nsmr_count_4G_to_32G", + (u64)rsrc_stats->nsmr_count_4G_to_32G); + pcur += ocrdma_add_stat(stats, pcur, "threshold_32G_to_64G_nsmr", + (u64)rsrc_stats->r32G_to_64G_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_64G_to_128G_nsmr", + (u64)rsrc_stats->r64G_to_128G_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_128G_to_higher_nsmr", + (u64)rsrc_stats->r128G_to_higher_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_embedded_nsmr", + (u64)rsrc_stats->embedded_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_frmr", + (u64)rsrc_stats->frmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_prefetch_qps", + (u64)rsrc_stats->prefetch_qps); + pcur += ocrdma_add_stat(stats, pcur, "threshold_ondemand_qps", + (u64)rsrc_stats->ondemand_qps); + pcur += ocrdma_add_stat(stats, pcur, "threshold_phy_mr", + (u64)rsrc_stats->phy_mr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_mw", + (u64)rsrc_stats->mw); + return stats; +} + +static char *ocrdma_rx_stats(struct ocrdma_dev *dev) +{ + char *stats = dev->stats_mem.debugfs_mem, *pcur; + struct ocrdma_rdma_stats_resp *rdma_stats = + (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; + struct ocrdma_rx_stats *rx_stats = &rdma_stats->rx_stats; + + memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM)); + + pcur = stats; + pcur += ocrdma_add_stat + (stats, pcur, "roce_frame_bytes", + convert_to_64bit(rx_stats->roce_frame_bytes_lo, + rx_stats->roce_frame_bytes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "roce_frame_icrc_drops", + (u64)rx_stats->roce_frame_icrc_drops); + pcur += ocrdma_add_stat(stats, pcur, "roce_frame_payload_len_drops", + (u64)rx_stats->roce_frame_payload_len_drops); + pcur += ocrdma_add_stat(stats, pcur, "ud_drops", + (u64)rx_stats->ud_drops); + pcur += ocrdma_add_stat(stats, pcur, "qp1_drops", + (u64)rx_stats->qp1_drops); + pcur += ocrdma_add_stat(stats, pcur, "psn_error_request_packets", + (u64)rx_stats->psn_error_request_packets); + pcur += ocrdma_add_stat(stats, pcur, "psn_error_resp_packets", + (u64)rx_stats->psn_error_resp_packets); + pcur += ocrdma_add_stat(stats, pcur, "rnr_nak_timeouts", + (u64)rx_stats->rnr_nak_timeouts); + pcur += ocrdma_add_stat(stats, pcur, "rnr_nak_receives", + (u64)rx_stats->rnr_nak_receives); + pcur += ocrdma_add_stat(stats, pcur, "roce_frame_rxmt_drops", + (u64)rx_stats->roce_frame_rxmt_drops); + pcur += ocrdma_add_stat(stats, pcur, "nak_count_psn_sequence_errors", + (u64)rx_stats->nak_count_psn_sequence_errors); + pcur += ocrdma_add_stat(stats, pcur, "rc_drop_count_lookup_errors", + (u64)rx_stats->rc_drop_count_lookup_errors); + pcur += ocrdma_add_stat(stats, pcur, "rq_rnr_naks", + (u64)rx_stats->rq_rnr_naks); + pcur += ocrdma_add_stat(stats, pcur, "srq_rnr_naks", + (u64)rx_stats->srq_rnr_naks); + pcur += ocrdma_add_stat(stats, pcur, "roce_frames", + convert_to_64bit(rx_stats->roce_frames_lo, + rx_stats->roce_frames_hi)); + + return stats; +} + +static char *ocrdma_tx_stats(struct ocrdma_dev *dev) +{ + char *stats = dev->stats_mem.debugfs_mem, *pcur; + struct ocrdma_rdma_stats_resp *rdma_stats = + (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; + struct ocrdma_tx_stats *tx_stats = &rdma_stats->tx_stats; + + memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM)); + + pcur = stats; + pcur += ocrdma_add_stat(stats, pcur, "send_pkts", + convert_to_64bit(tx_stats->send_pkts_lo, + tx_stats->send_pkts_hi)); + pcur += ocrdma_add_stat(stats, pcur, "write_pkts", + convert_to_64bit(tx_stats->write_pkts_lo, + tx_stats->write_pkts_hi)); + pcur += ocrdma_add_stat(stats, pcur, "read_pkts", + convert_to_64bit(tx_stats->read_pkts_lo, + tx_stats->read_pkts_hi)); + pcur += ocrdma_add_stat(stats, pcur, "read_rsp_pkts", + convert_to_64bit(tx_stats->read_rsp_pkts_lo, + tx_stats->read_rsp_pkts_hi)); + pcur += ocrdma_add_stat(stats, pcur, "ack_pkts", + convert_to_64bit(tx_stats->ack_pkts_lo, + tx_stats->ack_pkts_hi)); + pcur += ocrdma_add_stat(stats, pcur, "send_bytes", + convert_to_64bit(tx_stats->send_bytes_lo, + tx_stats->send_bytes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "write_bytes", + convert_to_64bit(tx_stats->write_bytes_lo, + tx_stats->write_bytes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "read_req_bytes", + convert_to_64bit(tx_stats->read_req_bytes_lo, + tx_stats->read_req_bytes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "read_rsp_bytes", + convert_to_64bit(tx_stats->read_rsp_bytes_lo, + tx_stats->read_rsp_bytes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "ack_timeouts", + (u64)tx_stats->ack_timeouts); + + return stats; +} + +static char *ocrdma_wqe_stats(struct ocrdma_dev *dev) +{ + char *stats = dev->stats_mem.debugfs_mem, *pcur; + struct ocrdma_rdma_stats_resp *rdma_stats = + (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; + struct ocrdma_wqe_stats *wqe_stats = &rdma_stats->wqe_stats; + + memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM)); + + pcur = stats; + pcur += ocrdma_add_stat(stats, pcur, "large_send_rc_wqes", + convert_to_64bit(wqe_stats->large_send_rc_wqes_lo, + wqe_stats->large_send_rc_wqes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "large_write_rc_wqes", + convert_to_64bit(wqe_stats->large_write_rc_wqes_lo, + wqe_stats->large_write_rc_wqes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "read_wqes", + convert_to_64bit(wqe_stats->read_wqes_lo, + wqe_stats->read_wqes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "frmr_wqes", + convert_to_64bit(wqe_stats->frmr_wqes_lo, + wqe_stats->frmr_wqes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "mw_bind_wqes", + convert_to_64bit(wqe_stats->mw_bind_wqes_lo, + wqe_stats->mw_bind_wqes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "invalidate_wqes", + convert_to_64bit(wqe_stats->invalidate_wqes_lo, + wqe_stats->invalidate_wqes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "dpp_wqe_drops", + (u64)wqe_stats->dpp_wqe_drops); + return stats; +} + +static char *ocrdma_db_errstats(struct ocrdma_dev *dev) +{ + char *stats = dev->stats_mem.debugfs_mem, *pcur; + struct ocrdma_rdma_stats_resp *rdma_stats = + (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; + struct ocrdma_db_err_stats *db_err_stats = &rdma_stats->db_err_stats; + + memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM)); + + pcur = stats; + pcur += ocrdma_add_stat(stats, pcur, "sq_doorbell_errors", + (u64)db_err_stats->sq_doorbell_errors); + pcur += ocrdma_add_stat(stats, pcur, "cq_doorbell_errors", + (u64)db_err_stats->cq_doorbell_errors); + pcur += ocrdma_add_stat(stats, pcur, "rq_srq_doorbell_errors", + (u64)db_err_stats->rq_srq_doorbell_errors); + pcur += ocrdma_add_stat(stats, pcur, "cq_overflow_errors", + (u64)db_err_stats->cq_overflow_errors); + return stats; +} + +static char *ocrdma_rxqp_errstats(struct ocrdma_dev *dev) +{ + char *stats = dev->stats_mem.debugfs_mem, *pcur; + struct ocrdma_rdma_stats_resp *rdma_stats = + (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; + struct ocrdma_rx_qp_err_stats *rx_qp_err_stats = + &rdma_stats->rx_qp_err_stats; + + memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM)); + + pcur = stats; + pcur += ocrdma_add_stat(stats, pcur, "nak_invalid_requst_errors", + (u64)rx_qp_err_stats->nak_invalid_requst_errors); + pcur += ocrdma_add_stat(stats, pcur, "nak_remote_operation_errors", + (u64)rx_qp_err_stats->nak_remote_operation_errors); + pcur += ocrdma_add_stat(stats, pcur, "nak_count_remote_access_errors", + (u64)rx_qp_err_stats->nak_count_remote_access_errors); + pcur += ocrdma_add_stat(stats, pcur, "local_length_errors", + (u64)rx_qp_err_stats->local_length_errors); + pcur += ocrdma_add_stat(stats, pcur, "local_protection_errors", + (u64)rx_qp_err_stats->local_protection_errors); + pcur += ocrdma_add_stat(stats, pcur, "local_qp_operation_errors", + (u64)rx_qp_err_stats->local_qp_operation_errors); + return stats; +} + +static char *ocrdma_txqp_errstats(struct ocrdma_dev *dev) +{ + char *stats = dev->stats_mem.debugfs_mem, *pcur; + struct ocrdma_rdma_stats_resp *rdma_stats = + (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; + struct ocrdma_tx_qp_err_stats *tx_qp_err_stats = + &rdma_stats->tx_qp_err_stats; + + memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM)); + + pcur = stats; + pcur += ocrdma_add_stat(stats, pcur, "local_length_errors", + (u64)tx_qp_err_stats->local_length_errors); + pcur += ocrdma_add_stat(stats, pcur, "local_protection_errors", + (u64)tx_qp_err_stats->local_protection_errors); + pcur += ocrdma_add_stat(stats, pcur, "local_qp_operation_errors", + (u64)tx_qp_err_stats->local_qp_operation_errors); + pcur += ocrdma_add_stat(stats, pcur, "retry_count_exceeded_errors", + (u64)tx_qp_err_stats->retry_count_exceeded_errors); + pcur += ocrdma_add_stat(stats, pcur, "rnr_retry_count_exceeded_errors", + (u64)tx_qp_err_stats->rnr_retry_count_exceeded_errors); + return stats; +} + +static char *ocrdma_tx_dbg_stats(struct ocrdma_dev *dev) +{ + int i; + char *pstats = dev->stats_mem.debugfs_mem; + struct ocrdma_rdma_stats_resp *rdma_stats = + (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; + struct ocrdma_tx_dbg_stats *tx_dbg_stats = + &rdma_stats->tx_dbg_stats; + + memset(pstats, 0, (OCRDMA_MAX_DBGFS_MEM)); + + for (i = 0; i < 100; i++) + pstats += snprintf(pstats, 80, "DW[%d] = 0x%x\n", i, + tx_dbg_stats->data[i]); + + return dev->stats_mem.debugfs_mem; +} + +static char *ocrdma_rx_dbg_stats(struct ocrdma_dev *dev) +{ + int i; + char *pstats = dev->stats_mem.debugfs_mem; + struct ocrdma_rdma_stats_resp *rdma_stats = + (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; + struct ocrdma_rx_dbg_stats *rx_dbg_stats = + &rdma_stats->rx_dbg_stats; + + memset(pstats, 0, (OCRDMA_MAX_DBGFS_MEM)); + + for (i = 0; i < 200; i++) + pstats += snprintf(pstats, 80, "DW[%d] = 0x%x\n", i, + rx_dbg_stats->data[i]); + + return dev->stats_mem.debugfs_mem; +} + +static void ocrdma_update_stats(struct ocrdma_dev *dev) +{ + ulong now = jiffies, secs; + int status = 0; + + secs = jiffies_to_msecs(now - dev->last_stats_time) / 1000U; + if (secs) { + /* update */ + status = ocrdma_mbx_rdma_stats(dev, false); + if (status) + pr_err("%s: stats mbox failed with status = %d\n", + __func__, status); + dev->last_stats_time = jiffies; + } +} + +static ssize_t ocrdma_dbgfs_ops_read(struct file *filp, char __user *buffer, + size_t usr_buf_len, loff_t *ppos) +{ + struct ocrdma_stats *pstats = filp->private_data; + struct ocrdma_dev *dev = pstats->dev; + ssize_t status = 0; + char *data = NULL; + + /* No partial reads */ + if (*ppos != 0) + return 0; + + mutex_lock(&dev->stats_lock); + + ocrdma_update_stats(dev); + + switch (pstats->type) { + case OCRDMA_RSRC_STATS: + data = ocrdma_resource_stats(dev); + break; + case OCRDMA_RXSTATS: + data = ocrdma_rx_stats(dev); + break; + case OCRDMA_WQESTATS: + data = ocrdma_wqe_stats(dev); + break; + case OCRDMA_TXSTATS: + data = ocrdma_tx_stats(dev); + break; + case OCRDMA_DB_ERRSTATS: + data = ocrdma_db_errstats(dev); + break; + case OCRDMA_RXQP_ERRSTATS: + data = ocrdma_rxqp_errstats(dev); + break; + case OCRDMA_TXQP_ERRSTATS: + data = ocrdma_txqp_errstats(dev); + break; + case OCRDMA_TX_DBG_STATS: + data = ocrdma_tx_dbg_stats(dev); + break; + case OCRDMA_RX_DBG_STATS: + data = ocrdma_rx_dbg_stats(dev); + break; + + default: + status = -EFAULT; + goto exit; + } + + if (usr_buf_len < strlen(data)) { + status = -ENOSPC; + goto exit; + } + + status = simple_read_from_buffer(buffer, usr_buf_len, ppos, data, + strlen(data)); +exit: + mutex_unlock(&dev->stats_lock); + return status; +} + +static const struct file_operations ocrdma_dbg_ops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = ocrdma_dbgfs_ops_read, +}; + +void ocrdma_add_port_stats(struct ocrdma_dev *dev) +{ + if (!ocrdma_dbgfs_dir) + return; + + /* Create post stats base dir */ + dev->dir = debugfs_create_dir(dev->ibdev.name, ocrdma_dbgfs_dir); + if (!dev->dir) + goto err; + + dev->rsrc_stats.type = OCRDMA_RSRC_STATS; + dev->rsrc_stats.dev = dev; + if (!debugfs_create_file("resource_stats", S_IRUSR, dev->dir, + &dev->rsrc_stats, &ocrdma_dbg_ops)) + goto err; + + dev->rx_stats.type = OCRDMA_RXSTATS; + dev->rx_stats.dev = dev; + if (!debugfs_create_file("rx_stats", S_IRUSR, dev->dir, + &dev->rx_stats, &ocrdma_dbg_ops)) + goto err; + + dev->wqe_stats.type = OCRDMA_WQESTATS; + dev->wqe_stats.dev = dev; + if (!debugfs_create_file("wqe_stats", S_IRUSR, dev->dir, + &dev->wqe_stats, &ocrdma_dbg_ops)) + goto err; + + dev->tx_stats.type = OCRDMA_TXSTATS; + dev->tx_stats.dev = dev; + if (!debugfs_create_file("tx_stats", S_IRUSR, dev->dir, + &dev->tx_stats, &ocrdma_dbg_ops)) + goto err; + + dev->db_err_stats.type = OCRDMA_DB_ERRSTATS; + dev->db_err_stats.dev = dev; + if (!debugfs_create_file("db_err_stats", S_IRUSR, dev->dir, + &dev->db_err_stats, &ocrdma_dbg_ops)) + goto err; + + + dev->tx_qp_err_stats.type = OCRDMA_TXQP_ERRSTATS; + dev->tx_qp_err_stats.dev = dev; + if (!debugfs_create_file("tx_qp_err_stats", S_IRUSR, dev->dir, + &dev->tx_qp_err_stats, &ocrdma_dbg_ops)) + goto err; + + dev->rx_qp_err_stats.type = OCRDMA_RXQP_ERRSTATS; + dev->rx_qp_err_stats.dev = dev; + if (!debugfs_create_file("rx_qp_err_stats", S_IRUSR, dev->dir, + &dev->rx_qp_err_stats, &ocrdma_dbg_ops)) + goto err; + + + dev->tx_dbg_stats.type = OCRDMA_TX_DBG_STATS; + dev->tx_dbg_stats.dev = dev; + if (!debugfs_create_file("tx_dbg_stats", S_IRUSR, dev->dir, + &dev->tx_dbg_stats, &ocrdma_dbg_ops)) + goto err; + + dev->rx_dbg_stats.type = OCRDMA_RX_DBG_STATS; + dev->rx_dbg_stats.dev = dev; + if (!debugfs_create_file("rx_dbg_stats", S_IRUSR, dev->dir, + &dev->rx_dbg_stats, &ocrdma_dbg_ops)) + goto err; + + /* Now create dma_mem for stats mbx command */ + if (!ocrdma_alloc_stats_mem(dev)) + goto err; + + mutex_init(&dev->stats_lock); + + return; +err: + ocrdma_release_stats_mem(dev); + debugfs_remove_recursive(dev->dir); + dev->dir = NULL; +} + +void ocrdma_rem_port_stats(struct ocrdma_dev *dev) +{ + if (!dev->dir) + return; + mutex_destroy(&dev->stats_lock); + ocrdma_release_stats_mem(dev); + debugfs_remove(dev->dir); +} + +void ocrdma_init_debugfs(void) +{ + /* Create base dir in debugfs root dir */ + ocrdma_dbgfs_dir = debugfs_create_dir("ocrdma", NULL); +} + +void ocrdma_rem_debugfs(void) +{ + debugfs_remove_recursive(ocrdma_dbgfs_dir); +} diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.h b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h new file mode 100644 index 00000000000..5f5e20c46d7 --- /dev/null +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h @@ -0,0 +1,54 @@ +/******************************************************************* + * This file is part of the Emulex RoCE Device Driver for * + * RoCE (RDMA over Converged Ethernet) adapters. * + * Copyright (C) 2008-2014 Emulex. All rights reserved. * + * EMULEX and SLI are trademarks of Emulex. * + * www.emulex.com * + * * + * This program is free software; you can redistribute it and/or * + * modify it under the terms of version 2 of the GNU General * + * Public License as published by the Free Software Foundation. * + * This program is distributed in the hope that it will be useful. * + * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * + * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * + * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * + * TO BE LEGALLY INVALID. See the GNU General Public License for * + * more details, a copy of which can be found in the file COPYING * + * included with this package. * + * + * Contact Information: + * linux-drivers@emulex.com + * + * Emulex + * 3333 Susan Street + * Costa Mesa, CA 92626 + *******************************************************************/ + +#ifndef __OCRDMA_STATS_H__ +#define __OCRDMA_STATS_H__ + +#include <linux/debugfs.h> +#include "ocrdma.h" +#include "ocrdma_hw.h" + +#define OCRDMA_MAX_DBGFS_MEM 4096 + +enum OCRDMA_STATS_TYPE { + OCRDMA_RSRC_STATS, + OCRDMA_RXSTATS, + OCRDMA_WQESTATS, + OCRDMA_TXSTATS, + OCRDMA_DB_ERRSTATS, + OCRDMA_RXQP_ERRSTATS, + OCRDMA_TXQP_ERRSTATS, + OCRDMA_TX_DBG_STATS, + OCRDMA_RX_DBG_STATS +}; + +void ocrdma_rem_debugfs(void); +void ocrdma_init_debugfs(void); +void ocrdma_rem_port_stats(struct ocrdma_dev *dev); +void ocrdma_add_port_stats(struct ocrdma_dev *dev); + +#endif /* __OCRDMA_STATS_H__ */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c new file mode 100644 index 00000000000..edf6211d84b --- /dev/null +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -0,0 +1,3054 @@ +/******************************************************************* + * This file is part of the Emulex RoCE Device Driver for * + * RoCE (RDMA over Converged Ethernet) adapters. * + * Copyright (C) 2008-2012 Emulex. All rights reserved. * + * EMULEX and SLI are trademarks of Emulex. * + * www.emulex.com * + * * + * This program is free software; you can redistribute it and/or * + * modify it under the terms of version 2 of the GNU General * + * Public License as published by the Free Software Foundation. * + * This program is distributed in the hope that it will be useful. * + * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * + * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * + * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * + * TO BE LEGALLY INVALID. See the GNU General Public License for * + * more details, a copy of which can be found in the file COPYING * + * included with this package. * + * + * Contact Information: + * linux-drivers@emulex.com + * + * Emulex + * 3333 Susan Street + * Costa Mesa, CA 92626 + *******************************************************************/ + +#include <linux/dma-mapping.h> +#include <rdma/ib_verbs.h> +#include <rdma/ib_user_verbs.h> +#include <rdma/iw_cm.h> +#include <rdma/ib_umem.h> +#include <rdma/ib_addr.h> + +#include "ocrdma.h" +#include "ocrdma_hw.h" +#include "ocrdma_verbs.h" +#include "ocrdma_abi.h" + +int ocrdma_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) +{ + if (index > 1) + return -EINVAL; + + *pkey = 0xffff; + return 0; +} + +int ocrdma_query_gid(struct ib_device *ibdev, u8 port, + int index, union ib_gid *sgid) +{ + struct ocrdma_dev *dev; + + dev = get_ocrdma_dev(ibdev); + memset(sgid, 0, sizeof(*sgid)); + if (index > OCRDMA_MAX_SGID) + return -EINVAL; + + memcpy(sgid, &dev->sgid_tbl[index], sizeof(*sgid)); + + return 0; +} + +int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr) +{ + struct ocrdma_dev *dev = get_ocrdma_dev(ibdev); + + memset(attr, 0, sizeof *attr); + memcpy(&attr->fw_ver, &dev->attr.fw_ver[0], + min(sizeof(dev->attr.fw_ver), sizeof(attr->fw_ver))); + ocrdma_get_guid(dev, (u8 *)&attr->sys_image_guid); + attr->max_mr_size = ~0ull; + attr->page_size_cap = 0xffff000; + attr->vendor_id = dev->nic_info.pdev->vendor; + attr->vendor_part_id = dev->nic_info.pdev->device; + attr->hw_ver = 0; + attr->max_qp = dev->attr.max_qp; + attr->max_ah = OCRDMA_MAX_AH; + attr->max_qp_wr = dev->attr.max_wqe; + + attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD | + IB_DEVICE_RC_RNR_NAK_GEN | + IB_DEVICE_SHUTDOWN_PORT | + IB_DEVICE_SYS_IMAGE_GUID | + IB_DEVICE_LOCAL_DMA_LKEY | + IB_DEVICE_MEM_MGT_EXTENSIONS; + attr->max_sge = min(dev->attr.max_send_sge, dev->attr.max_srq_sge); + attr->max_sge_rd = 0; + attr->max_cq = dev->attr.max_cq; + attr->max_cqe = dev->attr.max_cqe; + attr->max_mr = dev->attr.max_mr; + attr->max_mw = dev->attr.max_mw; + attr->max_pd = dev->attr.max_pd; + attr->atomic_cap = 0; + attr->max_fmr = 0; + attr->max_map_per_fmr = 0; + attr->max_qp_rd_atom = + min(dev->attr.max_ord_per_qp, dev->attr.max_ird_per_qp); + attr->max_qp_init_rd_atom = dev->attr.max_ord_per_qp; + attr->max_srq = dev->attr.max_srq; + attr->max_srq_sge = dev->attr.max_srq_sge; + attr->max_srq_wr = dev->attr.max_rqe; + attr->local_ca_ack_delay = dev->attr.local_ca_ack_delay; + attr->max_fast_reg_page_list_len = 0; + attr->max_pkeys = 1; + return 0; +} + +static inline void get_link_speed_and_width(struct ocrdma_dev *dev, + u8 *ib_speed, u8 *ib_width) +{ + int status; + u8 speed; + + status = ocrdma_mbx_get_link_speed(dev, &speed); + if (status) + speed = OCRDMA_PHYS_LINK_SPEED_ZERO; + + switch (speed) { + case OCRDMA_PHYS_LINK_SPEED_1GBPS: + *ib_speed = IB_SPEED_SDR; + *ib_width = IB_WIDTH_1X; + break; + + case OCRDMA_PHYS_LINK_SPEED_10GBPS: + *ib_speed = IB_SPEED_QDR; + *ib_width = IB_WIDTH_1X; + break; + + case OCRDMA_PHYS_LINK_SPEED_20GBPS: + *ib_speed = IB_SPEED_DDR; + *ib_width = IB_WIDTH_4X; + break; + + case OCRDMA_PHYS_LINK_SPEED_40GBPS: + *ib_speed = IB_SPEED_QDR; + *ib_width = IB_WIDTH_4X; + break; + + default: + /* Unsupported */ + *ib_speed = IB_SPEED_SDR; + *ib_width = IB_WIDTH_1X; + } +} + +int ocrdma_query_port(struct ib_device *ibdev, + u8 port, struct ib_port_attr *props) +{ + enum ib_port_state port_state; + struct ocrdma_dev *dev; + struct net_device *netdev; + + dev = get_ocrdma_dev(ibdev); + if (port > 1) { + pr_err("%s(%d) invalid_port=0x%x\n", __func__, + dev->id, port); + return -EINVAL; + } + netdev = dev->nic_info.netdev; + if (netif_running(netdev) && netif_oper_up(netdev)) { + port_state = IB_PORT_ACTIVE; + props->phys_state = 5; + } else { + port_state = IB_PORT_DOWN; + props->phys_state = 3; + } + props->max_mtu = IB_MTU_4096; + props->active_mtu = iboe_get_mtu(netdev->mtu); + props->lid = 0; + props->lmc = 0; + props->sm_lid = 0; + props->sm_sl = 0; + props->state = port_state; + props->port_cap_flags = + IB_PORT_CM_SUP | + IB_PORT_REINIT_SUP | + IB_PORT_DEVICE_MGMT_SUP | IB_PORT_VENDOR_CLASS_SUP | IB_PORT_IP_BASED_GIDS; + props->gid_tbl_len = OCRDMA_MAX_SGID; + props->pkey_tbl_len = 1; + props->bad_pkey_cntr = 0; + props->qkey_viol_cntr = 0; + get_link_speed_and_width(dev, &props->active_speed, + &props->active_width); + props->max_msg_sz = 0x80000000; + props->max_vl_num = 4; + return 0; +} + +int ocrdma_modify_port(struct ib_device *ibdev, u8 port, int mask, + struct ib_port_modify *props) +{ + struct ocrdma_dev *dev; + + dev = get_ocrdma_dev(ibdev); + if (port > 1) { + pr_err("%s(%d) invalid_port=0x%x\n", __func__, dev->id, port); + return -EINVAL; + } + return 0; +} + +static int ocrdma_add_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr, + unsigned long len) +{ + struct ocrdma_mm *mm; + + mm = kzalloc(sizeof(*mm), GFP_KERNEL); + if (mm == NULL) + return -ENOMEM; + mm->key.phy_addr = phy_addr; + mm->key.len = len; + INIT_LIST_HEAD(&mm->entry); + + mutex_lock(&uctx->mm_list_lock); + list_add_tail(&mm->entry, &uctx->mm_head); + mutex_unlock(&uctx->mm_list_lock); + return 0; +} + +static void ocrdma_del_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr, + unsigned long len) +{ + struct ocrdma_mm *mm, *tmp; + + mutex_lock(&uctx->mm_list_lock); + list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) { + if (len != mm->key.len && phy_addr != mm->key.phy_addr) + continue; + + list_del(&mm->entry); + kfree(mm); + break; + } + mutex_unlock(&uctx->mm_list_lock); +} + +static bool ocrdma_search_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr, + unsigned long len) +{ + bool found = false; + struct ocrdma_mm *mm; + + mutex_lock(&uctx->mm_list_lock); + list_for_each_entry(mm, &uctx->mm_head, entry) { + if (len != mm->key.len && phy_addr != mm->key.phy_addr) + continue; + + found = true; + break; + } + mutex_unlock(&uctx->mm_list_lock); + return found; +} + +static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev, + struct ocrdma_ucontext *uctx, + struct ib_udata *udata) +{ + struct ocrdma_pd *pd = NULL; + int status = 0; + + pd = kzalloc(sizeof(*pd), GFP_KERNEL); + if (!pd) + return ERR_PTR(-ENOMEM); + + if (udata && uctx) { + pd->dpp_enabled = + ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R; + pd->num_dpp_qp = + pd->dpp_enabled ? OCRDMA_PD_MAX_DPP_ENABLED_QP : 0; + } + +retry: + status = ocrdma_mbx_alloc_pd(dev, pd); + if (status) { + if (pd->dpp_enabled) { + pd->dpp_enabled = false; + pd->num_dpp_qp = 0; + goto retry; + } else { + kfree(pd); + return ERR_PTR(status); + } + } + + return pd; +} + +static inline int is_ucontext_pd(struct ocrdma_ucontext *uctx, + struct ocrdma_pd *pd) +{ + return (uctx->cntxt_pd == pd ? true : false); +} + +static int _ocrdma_dealloc_pd(struct ocrdma_dev *dev, + struct ocrdma_pd *pd) +{ + int status = 0; + + status = ocrdma_mbx_dealloc_pd(dev, pd); + kfree(pd); + return status; +} + +static int ocrdma_alloc_ucontext_pd(struct ocrdma_dev *dev, + struct ocrdma_ucontext *uctx, + struct ib_udata *udata) +{ + int status = 0; + + uctx->cntxt_pd = _ocrdma_alloc_pd(dev, uctx, udata); + if (IS_ERR(uctx->cntxt_pd)) { + status = PTR_ERR(uctx->cntxt_pd); + uctx->cntxt_pd = NULL; + goto err; + } + + uctx->cntxt_pd->uctx = uctx; + uctx->cntxt_pd->ibpd.device = &dev->ibdev; +err: + return status; +} + +static int ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx) +{ + int status = 0; + struct ocrdma_pd *pd = uctx->cntxt_pd; + struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device); + + BUG_ON(uctx->pd_in_use); + uctx->cntxt_pd = NULL; + status = _ocrdma_dealloc_pd(dev, pd); + return status; +} + +static struct ocrdma_pd *ocrdma_get_ucontext_pd(struct ocrdma_ucontext *uctx) +{ + struct ocrdma_pd *pd = NULL; + + mutex_lock(&uctx->mm_list_lock); + if (!uctx->pd_in_use) { + uctx->pd_in_use = true; + pd = uctx->cntxt_pd; + } + mutex_unlock(&uctx->mm_list_lock); + + return pd; +} + +static void ocrdma_release_ucontext_pd(struct ocrdma_ucontext *uctx) +{ + mutex_lock(&uctx->mm_list_lock); + uctx->pd_in_use = false; + mutex_unlock(&uctx->mm_list_lock); +} + +struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev, + struct ib_udata *udata) +{ + int status; + struct ocrdma_ucontext *ctx; + struct ocrdma_alloc_ucontext_resp resp; + struct ocrdma_dev *dev = get_ocrdma_dev(ibdev); + struct pci_dev *pdev = dev->nic_info.pdev; + u32 map_len = roundup(sizeof(u32) * 2048, PAGE_SIZE); + + if (!udata) + return ERR_PTR(-EFAULT); + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&ctx->mm_head); + mutex_init(&ctx->mm_list_lock); + + ctx->ah_tbl.va = dma_alloc_coherent(&pdev->dev, map_len, + &ctx->ah_tbl.pa, GFP_KERNEL); + if (!ctx->ah_tbl.va) { + kfree(ctx); + return ERR_PTR(-ENOMEM); + } + memset(ctx->ah_tbl.va, 0, map_len); + ctx->ah_tbl.len = map_len; + + memset(&resp, 0, sizeof(resp)); + resp.ah_tbl_len = ctx->ah_tbl.len; + resp.ah_tbl_page = ctx->ah_tbl.pa; + + status = ocrdma_add_mmap(ctx, resp.ah_tbl_page, resp.ah_tbl_len); + if (status) + goto map_err; + + status = ocrdma_alloc_ucontext_pd(dev, ctx, udata); + if (status) + goto pd_err; + + resp.dev_id = dev->id; + resp.max_inline_data = dev->attr.max_inline_data; + resp.wqe_size = dev->attr.wqe_size; + resp.rqe_size = dev->attr.rqe_size; + resp.dpp_wqe_size = dev->attr.wqe_size; + + memcpy(resp.fw_ver, dev->attr.fw_ver, sizeof(resp.fw_ver)); + status = ib_copy_to_udata(udata, &resp, sizeof(resp)); + if (status) + goto cpy_err; + return &ctx->ibucontext; + +cpy_err: +pd_err: + ocrdma_del_mmap(ctx, ctx->ah_tbl.pa, ctx->ah_tbl.len); +map_err: + dma_free_coherent(&pdev->dev, ctx->ah_tbl.len, ctx->ah_tbl.va, + ctx->ah_tbl.pa); + kfree(ctx); + return ERR_PTR(status); +} + +int ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx) +{ + int status = 0; + struct ocrdma_mm *mm, *tmp; + struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ibctx); + struct ocrdma_dev *dev = get_ocrdma_dev(ibctx->device); + struct pci_dev *pdev = dev->nic_info.pdev; + + status = ocrdma_dealloc_ucontext_pd(uctx); + + ocrdma_del_mmap(uctx, uctx->ah_tbl.pa, uctx->ah_tbl.len); + dma_free_coherent(&pdev->dev, uctx->ah_tbl.len, uctx->ah_tbl.va, + uctx->ah_tbl.pa); + + list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) { + list_del(&mm->entry); + kfree(mm); + } + kfree(uctx); + return status; +} + +int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +{ + struct ocrdma_ucontext *ucontext = get_ocrdma_ucontext(context); + struct ocrdma_dev *dev = get_ocrdma_dev(context->device); + unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT; + u64 unmapped_db = (u64) dev->nic_info.unmapped_db; + unsigned long len = (vma->vm_end - vma->vm_start); + int status = 0; + bool found; + + if (vma->vm_start & (PAGE_SIZE - 1)) + return -EINVAL; + found = ocrdma_search_mmap(ucontext, vma->vm_pgoff << PAGE_SHIFT, len); + if (!found) + return -EINVAL; + + if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db + + dev->nic_info.db_total_size)) && + (len <= dev->nic_info.db_page_size)) { + if (vma->vm_flags & VM_READ) + return -EPERM; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + status = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + len, vma->vm_page_prot); + } else if (dev->nic_info.dpp_unmapped_len && + (vm_page >= (u64) dev->nic_info.dpp_unmapped_addr) && + (vm_page <= (u64) (dev->nic_info.dpp_unmapped_addr + + dev->nic_info.dpp_unmapped_len)) && + (len <= dev->nic_info.dpp_unmapped_len)) { + if (vma->vm_flags & VM_READ) + return -EPERM; + + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + status = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + len, vma->vm_page_prot); + } else { + status = remap_pfn_range(vma, vma->vm_start, + vma->vm_pgoff, len, vma->vm_page_prot); + } + return status; +} + +static int ocrdma_copy_pd_uresp(struct ocrdma_dev *dev, struct ocrdma_pd *pd, + struct ib_ucontext *ib_ctx, + struct ib_udata *udata) +{ + int status; + u64 db_page_addr; + u64 dpp_page_addr = 0; + u32 db_page_size; + struct ocrdma_alloc_pd_uresp rsp; + struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ib_ctx); + + memset(&rsp, 0, sizeof(rsp)); + rsp.id = pd->id; + rsp.dpp_enabled = pd->dpp_enabled; + db_page_addr = ocrdma_get_db_addr(dev, pd->id); + db_page_size = dev->nic_info.db_page_size; + + status = ocrdma_add_mmap(uctx, db_page_addr, db_page_size); + if (status) + return status; + + if (pd->dpp_enabled) { + dpp_page_addr = dev->nic_info.dpp_unmapped_addr + + (pd->id * PAGE_SIZE); + status = ocrdma_add_mmap(uctx, dpp_page_addr, + PAGE_SIZE); + if (status) + goto dpp_map_err; + rsp.dpp_page_addr_hi = upper_32_bits(dpp_page_addr); + rsp.dpp_page_addr_lo = dpp_page_addr; + } + + status = ib_copy_to_udata(udata, &rsp, sizeof(rsp)); + if (status) + goto ucopy_err; + + pd->uctx = uctx; + return 0; + +ucopy_err: + if (pd->dpp_enabled) + ocrdma_del_mmap(pd->uctx, dpp_page_addr, PAGE_SIZE); +dpp_map_err: + ocrdma_del_mmap(pd->uctx, db_page_addr, db_page_size); + return status; +} + +struct ib_pd *ocrdma_alloc_pd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct ocrdma_dev *dev = get_ocrdma_dev(ibdev); + struct ocrdma_pd *pd; + struct ocrdma_ucontext *uctx = NULL; + int status; + u8 is_uctx_pd = false; + + if (udata && context) { + uctx = get_ocrdma_ucontext(context); + pd = ocrdma_get_ucontext_pd(uctx); + if (pd) { + is_uctx_pd = true; + goto pd_mapping; + } + } + + pd = _ocrdma_alloc_pd(dev, uctx, udata); + if (IS_ERR(pd)) { + status = PTR_ERR(pd); + goto exit; + } + +pd_mapping: + if (udata && context) { + status = ocrdma_copy_pd_uresp(dev, pd, context, udata); + if (status) + goto err; + } + return &pd->ibpd; + +err: + if (is_uctx_pd) { + ocrdma_release_ucontext_pd(uctx); + } else { + status = ocrdma_mbx_dealloc_pd(dev, pd); + kfree(pd); + } +exit: + return ERR_PTR(status); +} + +int ocrdma_dealloc_pd(struct ib_pd *ibpd) +{ + struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); + struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); + struct ocrdma_ucontext *uctx = NULL; + int status = 0; + u64 usr_db; + + uctx = pd->uctx; + if (uctx) { + u64 dpp_db = dev->nic_info.dpp_unmapped_addr + + (pd->id * PAGE_SIZE); + if (pd->dpp_enabled) + ocrdma_del_mmap(pd->uctx, dpp_db, PAGE_SIZE); + usr_db = ocrdma_get_db_addr(dev, pd->id); + ocrdma_del_mmap(pd->uctx, usr_db, dev->nic_info.db_page_size); + + if (is_ucontext_pd(uctx, pd)) { + ocrdma_release_ucontext_pd(uctx); + return status; + } + } + status = _ocrdma_dealloc_pd(dev, pd); + return status; +} + +static int ocrdma_alloc_lkey(struct ocrdma_dev *dev, struct ocrdma_mr *mr, + u32 pdid, int acc, u32 num_pbls, u32 addr_check) +{ + int status; + + mr->hwmr.fr_mr = 0; + mr->hwmr.local_rd = 1; + mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0; + mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0; + mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0; + mr->hwmr.mw_bind = (acc & IB_ACCESS_MW_BIND) ? 1 : 0; + mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0; + mr->hwmr.num_pbls = num_pbls; + + status = ocrdma_mbx_alloc_lkey(dev, &mr->hwmr, pdid, addr_check); + if (status) + return status; + + mr->ibmr.lkey = mr->hwmr.lkey; + if (mr->hwmr.remote_wr || mr->hwmr.remote_rd) + mr->ibmr.rkey = mr->hwmr.lkey; + return 0; +} + +struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *ibpd, int acc) +{ + int status; + struct ocrdma_mr *mr; + struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); + struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); + + if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) { + pr_err("%s err, invalid access rights\n", __func__); + return ERR_PTR(-EINVAL); + } + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + status = ocrdma_alloc_lkey(dev, mr, pd->id, acc, 0, + OCRDMA_ADDR_CHECK_DISABLE); + if (status) { + kfree(mr); + return ERR_PTR(status); + } + + return &mr->ibmr; +} + +static void ocrdma_free_mr_pbl_tbl(struct ocrdma_dev *dev, + struct ocrdma_hw_mr *mr) +{ + struct pci_dev *pdev = dev->nic_info.pdev; + int i = 0; + + if (mr->pbl_table) { + for (i = 0; i < mr->num_pbls; i++) { + if (!mr->pbl_table[i].va) + continue; + dma_free_coherent(&pdev->dev, mr->pbl_size, + mr->pbl_table[i].va, + mr->pbl_table[i].pa); + } + kfree(mr->pbl_table); + mr->pbl_table = NULL; + } +} + +static int ocrdma_get_pbl_info(struct ocrdma_dev *dev, struct ocrdma_mr *mr, + u32 num_pbes) +{ + u32 num_pbls = 0; + u32 idx = 0; + int status = 0; + u32 pbl_size; + + do { + pbl_size = OCRDMA_MIN_HPAGE_SIZE * (1 << idx); + if (pbl_size > MAX_OCRDMA_PBL_SIZE) { + status = -EFAULT; + break; + } + num_pbls = roundup(num_pbes, (pbl_size / sizeof(u64))); + num_pbls = num_pbls / (pbl_size / sizeof(u64)); + idx++; + } while (num_pbls >= dev->attr.max_num_mr_pbl); + + mr->hwmr.num_pbes = num_pbes; + mr->hwmr.num_pbls = num_pbls; + mr->hwmr.pbl_size = pbl_size; + return status; +} + +static int ocrdma_build_pbl_tbl(struct ocrdma_dev *dev, struct ocrdma_hw_mr *mr) +{ + int status = 0; + int i; + u32 dma_len = mr->pbl_size; + struct pci_dev *pdev = dev->nic_info.pdev; + void *va; + dma_addr_t pa; + + mr->pbl_table = kzalloc(sizeof(struct ocrdma_pbl) * + mr->num_pbls, GFP_KERNEL); + + if (!mr->pbl_table) + return -ENOMEM; + + for (i = 0; i < mr->num_pbls; i++) { + va = dma_alloc_coherent(&pdev->dev, dma_len, &pa, GFP_KERNEL); + if (!va) { + ocrdma_free_mr_pbl_tbl(dev, mr); + status = -ENOMEM; + break; + } + memset(va, 0, dma_len); + mr->pbl_table[i].va = va; + mr->pbl_table[i].pa = pa; + } + return status; +} + +static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr, + u32 num_pbes) +{ + struct ocrdma_pbe *pbe; + struct scatterlist *sg; + struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table; + struct ib_umem *umem = mr->umem; + int shift, pg_cnt, pages, pbe_cnt, entry, total_num_pbes = 0; + + if (!mr->hwmr.num_pbes) + return; + + pbe = (struct ocrdma_pbe *)pbl_tbl->va; + pbe_cnt = 0; + + shift = ilog2(umem->page_size); + + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + pages = sg_dma_len(sg) >> shift; + for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) { + /* store the page address in pbe */ + pbe->pa_lo = + cpu_to_le32(sg_dma_address + (sg) + + (umem->page_size * pg_cnt)); + pbe->pa_hi = + cpu_to_le32(upper_32_bits + ((sg_dma_address + (sg) + + umem->page_size * pg_cnt))); + pbe_cnt += 1; + total_num_pbes += 1; + pbe++; + + /* if done building pbes, issue the mbx cmd. */ + if (total_num_pbes == num_pbes) + return; + + /* if the given pbl is full storing the pbes, + * move to next pbl. + */ + if (pbe_cnt == + (mr->hwmr.pbl_size / sizeof(u64))) { + pbl_tbl++; + pbe = (struct ocrdma_pbe *)pbl_tbl->va; + pbe_cnt = 0; + } + + } + } +} + +struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, + u64 usr_addr, int acc, struct ib_udata *udata) +{ + int status = -ENOMEM; + struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); + struct ocrdma_mr *mr; + struct ocrdma_pd *pd; + u32 num_pbes; + + pd = get_ocrdma_pd(ibpd); + + if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) + return ERR_PTR(-EINVAL); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(status); + mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0); + if (IS_ERR(mr->umem)) { + status = -EFAULT; + goto umem_err; + } + num_pbes = ib_umem_page_count(mr->umem); + status = ocrdma_get_pbl_info(dev, mr, num_pbes); + if (status) + goto umem_err; + + mr->hwmr.pbe_size = mr->umem->page_size; + mr->hwmr.fbo = mr->umem->offset; + mr->hwmr.va = usr_addr; + mr->hwmr.len = len; + mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0; + mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0; + mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0; + mr->hwmr.local_rd = 1; + mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0; + status = ocrdma_build_pbl_tbl(dev, &mr->hwmr); + if (status) + goto umem_err; + build_user_pbes(dev, mr, num_pbes); + status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc); + if (status) + goto mbx_err; + mr->ibmr.lkey = mr->hwmr.lkey; + if (mr->hwmr.remote_wr || mr->hwmr.remote_rd) + mr->ibmr.rkey = mr->hwmr.lkey; + + return &mr->ibmr; + +mbx_err: + ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); +umem_err: + kfree(mr); + return ERR_PTR(status); +} + +int ocrdma_dereg_mr(struct ib_mr *ib_mr) +{ + struct ocrdma_mr *mr = get_ocrdma_mr(ib_mr); + struct ocrdma_dev *dev = get_ocrdma_dev(ib_mr->device); + int status; + + status = ocrdma_mbx_dealloc_lkey(dev, mr->hwmr.fr_mr, mr->hwmr.lkey); + + ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); + + /* it could be user registered memory. */ + if (mr->umem) + ib_umem_release(mr->umem); + kfree(mr); + return status; +} + +static int ocrdma_copy_cq_uresp(struct ocrdma_dev *dev, struct ocrdma_cq *cq, + struct ib_udata *udata, + struct ib_ucontext *ib_ctx) +{ + int status; + struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ib_ctx); + struct ocrdma_create_cq_uresp uresp; + + memset(&uresp, 0, sizeof(uresp)); + uresp.cq_id = cq->id; + uresp.page_size = PAGE_ALIGN(cq->len); + uresp.num_pages = 1; + uresp.max_hw_cqe = cq->max_hw_cqe; + uresp.page_addr[0] = cq->pa; + uresp.db_page_addr = ocrdma_get_db_addr(dev, uctx->cntxt_pd->id); + uresp.db_page_size = dev->nic_info.db_page_size; + uresp.phase_change = cq->phase_change ? 1 : 0; + status = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + if (status) { + pr_err("%s(%d) copy error cqid=0x%x.\n", + __func__, dev->id, cq->id); + goto err; + } + status = ocrdma_add_mmap(uctx, uresp.db_page_addr, uresp.db_page_size); + if (status) + goto err; + status = ocrdma_add_mmap(uctx, uresp.page_addr[0], uresp.page_size); + if (status) { + ocrdma_del_mmap(uctx, uresp.db_page_addr, uresp.db_page_size); + goto err; + } + cq->ucontext = uctx; +err: + return status; +} + +struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector, + struct ib_ucontext *ib_ctx, + struct ib_udata *udata) +{ + struct ocrdma_cq *cq; + struct ocrdma_dev *dev = get_ocrdma_dev(ibdev); + struct ocrdma_ucontext *uctx = NULL; + u16 pd_id = 0; + int status; + struct ocrdma_create_cq_ureq ureq; + + if (udata) { + if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) + return ERR_PTR(-EFAULT); + } else + ureq.dpp_cq = 0; + cq = kzalloc(sizeof(*cq), GFP_KERNEL); + if (!cq) + return ERR_PTR(-ENOMEM); + + spin_lock_init(&cq->cq_lock); + spin_lock_init(&cq->comp_handler_lock); + INIT_LIST_HEAD(&cq->sq_head); + INIT_LIST_HEAD(&cq->rq_head); + cq->first_arm = true; + + if (ib_ctx) { + uctx = get_ocrdma_ucontext(ib_ctx); + pd_id = uctx->cntxt_pd->id; + } + + status = ocrdma_mbx_create_cq(dev, cq, entries, ureq.dpp_cq, pd_id); + if (status) { + kfree(cq); + return ERR_PTR(status); + } + if (ib_ctx) { + status = ocrdma_copy_cq_uresp(dev, cq, udata, ib_ctx); + if (status) + goto ctx_err; + } + cq->phase = OCRDMA_CQE_VALID; + dev->cq_tbl[cq->id] = cq; + return &cq->ibcq; + +ctx_err: + ocrdma_mbx_destroy_cq(dev, cq); + kfree(cq); + return ERR_PTR(status); +} + +int ocrdma_resize_cq(struct ib_cq *ibcq, int new_cnt, + struct ib_udata *udata) +{ + int status = 0; + struct ocrdma_cq *cq = get_ocrdma_cq(ibcq); + + if (new_cnt < 1 || new_cnt > cq->max_hw_cqe) { + status = -EINVAL; + return status; + } + ibcq->cqe = new_cnt; + return status; +} + +static void ocrdma_flush_cq(struct ocrdma_cq *cq) +{ + int cqe_cnt; + int valid_count = 0; + unsigned long flags; + + struct ocrdma_dev *dev = get_ocrdma_dev(cq->ibcq.device); + struct ocrdma_cqe *cqe = NULL; + + cqe = cq->va; + cqe_cnt = cq->cqe_cnt; + + /* Last irq might have scheduled a polling thread + * sync-up with it before hard flushing. + */ + spin_lock_irqsave(&cq->cq_lock, flags); + while (cqe_cnt) { + if (is_cqe_valid(cq, cqe)) + valid_count++; + cqe++; + cqe_cnt--; + } + ocrdma_ring_cq_db(dev, cq->id, false, false, valid_count); + spin_unlock_irqrestore(&cq->cq_lock, flags); +} + +int ocrdma_destroy_cq(struct ib_cq *ibcq) +{ + int status; + struct ocrdma_cq *cq = get_ocrdma_cq(ibcq); + struct ocrdma_eq *eq = NULL; + struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device); + int pdid = 0; + u32 irq, indx; + + dev->cq_tbl[cq->id] = NULL; + indx = ocrdma_get_eq_table_index(dev, cq->eqn); + if (indx == -EINVAL) + BUG(); + + eq = &dev->eq_tbl[indx]; + irq = ocrdma_get_irq(dev, eq); + synchronize_irq(irq); + ocrdma_flush_cq(cq); + + status = ocrdma_mbx_destroy_cq(dev, cq); + if (cq->ucontext) { + pdid = cq->ucontext->cntxt_pd->id; + ocrdma_del_mmap(cq->ucontext, (u64) cq->pa, + PAGE_ALIGN(cq->len)); + ocrdma_del_mmap(cq->ucontext, + ocrdma_get_db_addr(dev, pdid), + dev->nic_info.db_page_size); + } + + kfree(cq); + return status; +} + +static int ocrdma_add_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp) +{ + int status = -EINVAL; + + if (qp->id < OCRDMA_MAX_QP && dev->qp_tbl[qp->id] == NULL) { + dev->qp_tbl[qp->id] = qp; + status = 0; + } + return status; +} + +static void ocrdma_del_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp) +{ + dev->qp_tbl[qp->id] = NULL; +} + +static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev, + struct ib_qp_init_attr *attrs) +{ + if ((attrs->qp_type != IB_QPT_GSI) && + (attrs->qp_type != IB_QPT_RC) && + (attrs->qp_type != IB_QPT_UC) && + (attrs->qp_type != IB_QPT_UD)) { + pr_err("%s(%d) unsupported qp type=0x%x requested\n", + __func__, dev->id, attrs->qp_type); + return -EINVAL; + } + /* Skip the check for QP1 to support CM size of 128 */ + if ((attrs->qp_type != IB_QPT_GSI) && + (attrs->cap.max_send_wr > dev->attr.max_wqe)) { + pr_err("%s(%d) unsupported send_wr=0x%x requested\n", + __func__, dev->id, attrs->cap.max_send_wr); + pr_err("%s(%d) supported send_wr=0x%x\n", + __func__, dev->id, dev->attr.max_wqe); + return -EINVAL; + } + if (!attrs->srq && (attrs->cap.max_recv_wr > dev->attr.max_rqe)) { + pr_err("%s(%d) unsupported recv_wr=0x%x requested\n", + __func__, dev->id, attrs->cap.max_recv_wr); + pr_err("%s(%d) supported recv_wr=0x%x\n", + __func__, dev->id, dev->attr.max_rqe); + return -EINVAL; + } + if (attrs->cap.max_inline_data > dev->attr.max_inline_data) { + pr_err("%s(%d) unsupported inline data size=0x%x requested\n", + __func__, dev->id, attrs->cap.max_inline_data); + pr_err("%s(%d) supported inline data size=0x%x\n", + __func__, dev->id, dev->attr.max_inline_data); + return -EINVAL; + } + if (attrs->cap.max_send_sge > dev->attr.max_send_sge) { + pr_err("%s(%d) unsupported send_sge=0x%x requested\n", + __func__, dev->id, attrs->cap.max_send_sge); + pr_err("%s(%d) supported send_sge=0x%x\n", + __func__, dev->id, dev->attr.max_send_sge); + return -EINVAL; + } + if (attrs->cap.max_recv_sge > dev->attr.max_recv_sge) { + pr_err("%s(%d) unsupported recv_sge=0x%x requested\n", + __func__, dev->id, attrs->cap.max_recv_sge); + pr_err("%s(%d) supported recv_sge=0x%x\n", + __func__, dev->id, dev->attr.max_recv_sge); + return -EINVAL; + } + /* unprivileged user space cannot create special QP */ + if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) { + pr_err + ("%s(%d) Userspace can't create special QPs of type=0x%x\n", + __func__, dev->id, attrs->qp_type); + return -EINVAL; + } + /* allow creating only one GSI type of QP */ + if (attrs->qp_type == IB_QPT_GSI && dev->gsi_qp_created) { + pr_err("%s(%d) GSI special QPs already created.\n", + __func__, dev->id); + return -EINVAL; + } + /* verify consumer QPs are not trying to use GSI QP's CQ */ + if ((attrs->qp_type != IB_QPT_GSI) && (dev->gsi_qp_created)) { + if ((dev->gsi_sqcq == get_ocrdma_cq(attrs->send_cq)) || + (dev->gsi_rqcq == get_ocrdma_cq(attrs->recv_cq))) { + pr_err("%s(%d) Consumer QP cannot use GSI CQs.\n", + __func__, dev->id); + return -EINVAL; + } + } + return 0; +} + +static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp, + struct ib_udata *udata, int dpp_offset, + int dpp_credit_lmt, int srq) +{ + int status = 0; + u64 usr_db; + struct ocrdma_create_qp_uresp uresp; + struct ocrdma_dev *dev = qp->dev; + struct ocrdma_pd *pd = qp->pd; + + memset(&uresp, 0, sizeof(uresp)); + usr_db = dev->nic_info.unmapped_db + + (pd->id * dev->nic_info.db_page_size); + uresp.qp_id = qp->id; + uresp.sq_dbid = qp->sq.dbid; + uresp.num_sq_pages = 1; + uresp.sq_page_size = PAGE_ALIGN(qp->sq.len); + uresp.sq_page_addr[0] = qp->sq.pa; + uresp.num_wqe_allocated = qp->sq.max_cnt; + if (!srq) { + uresp.rq_dbid = qp->rq.dbid; + uresp.num_rq_pages = 1; + uresp.rq_page_size = PAGE_ALIGN(qp->rq.len); + uresp.rq_page_addr[0] = qp->rq.pa; + uresp.num_rqe_allocated = qp->rq.max_cnt; + } + uresp.db_page_addr = usr_db; + uresp.db_page_size = dev->nic_info.db_page_size; + uresp.db_sq_offset = OCRDMA_DB_GEN2_SQ_OFFSET; + uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET; + uresp.db_shift = OCRDMA_DB_RQ_SHIFT; + + if (qp->dpp_enabled) { + uresp.dpp_credit = dpp_credit_lmt; + uresp.dpp_offset = dpp_offset; + } + status = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + if (status) { + pr_err("%s(%d) user copy error.\n", __func__, dev->id); + goto err; + } + status = ocrdma_add_mmap(pd->uctx, uresp.sq_page_addr[0], + uresp.sq_page_size); + if (status) + goto err; + + if (!srq) { + status = ocrdma_add_mmap(pd->uctx, uresp.rq_page_addr[0], + uresp.rq_page_size); + if (status) + goto rq_map_err; + } + return status; +rq_map_err: + ocrdma_del_mmap(pd->uctx, uresp.sq_page_addr[0], uresp.sq_page_size); +err: + return status; +} + +static void ocrdma_set_qp_db(struct ocrdma_dev *dev, struct ocrdma_qp *qp, + struct ocrdma_pd *pd) +{ + if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { + qp->sq_db = dev->nic_info.db + + (pd->id * dev->nic_info.db_page_size) + + OCRDMA_DB_GEN2_SQ_OFFSET; + qp->rq_db = dev->nic_info.db + + (pd->id * dev->nic_info.db_page_size) + + OCRDMA_DB_GEN2_RQ_OFFSET; + } else { + qp->sq_db = dev->nic_info.db + + (pd->id * dev->nic_info.db_page_size) + + OCRDMA_DB_SQ_OFFSET; + qp->rq_db = dev->nic_info.db + + (pd->id * dev->nic_info.db_page_size) + + OCRDMA_DB_RQ_OFFSET; + } +} + +static int ocrdma_alloc_wr_id_tbl(struct ocrdma_qp *qp) +{ + qp->wqe_wr_id_tbl = + kzalloc(sizeof(*(qp->wqe_wr_id_tbl)) * qp->sq.max_cnt, + GFP_KERNEL); + if (qp->wqe_wr_id_tbl == NULL) + return -ENOMEM; + qp->rqe_wr_id_tbl = + kzalloc(sizeof(u64) * qp->rq.max_cnt, GFP_KERNEL); + if (qp->rqe_wr_id_tbl == NULL) + return -ENOMEM; + + return 0; +} + +static void ocrdma_set_qp_init_params(struct ocrdma_qp *qp, + struct ocrdma_pd *pd, + struct ib_qp_init_attr *attrs) +{ + qp->pd = pd; + spin_lock_init(&qp->q_lock); + INIT_LIST_HEAD(&qp->sq_entry); + INIT_LIST_HEAD(&qp->rq_entry); + + qp->qp_type = attrs->qp_type; + qp->cap_flags = OCRDMA_QP_INB_RD | OCRDMA_QP_INB_WR; + qp->max_inline_data = attrs->cap.max_inline_data; + qp->sq.max_sges = attrs->cap.max_send_sge; + qp->rq.max_sges = attrs->cap.max_recv_sge; + qp->state = OCRDMA_QPS_RST; + qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false; +} + +static void ocrdma_store_gsi_qp_cq(struct ocrdma_dev *dev, + struct ib_qp_init_attr *attrs) +{ + if (attrs->qp_type == IB_QPT_GSI) { + dev->gsi_qp_created = 1; + dev->gsi_sqcq = get_ocrdma_cq(attrs->send_cq); + dev->gsi_rqcq = get_ocrdma_cq(attrs->recv_cq); + } +} + +struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd, + struct ib_qp_init_attr *attrs, + struct ib_udata *udata) +{ + int status; + struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); + struct ocrdma_qp *qp; + struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); + struct ocrdma_create_qp_ureq ureq; + u16 dpp_credit_lmt, dpp_offset; + + status = ocrdma_check_qp_params(ibpd, dev, attrs); + if (status) + goto gen_err; + + memset(&ureq, 0, sizeof(ureq)); + if (udata) { + if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) + return ERR_PTR(-EFAULT); + } + qp = kzalloc(sizeof(*qp), GFP_KERNEL); + if (!qp) { + status = -ENOMEM; + goto gen_err; + } + qp->dev = dev; + ocrdma_set_qp_init_params(qp, pd, attrs); + if (udata == NULL) + qp->cap_flags |= (OCRDMA_QP_MW_BIND | OCRDMA_QP_LKEY0 | + OCRDMA_QP_FAST_REG); + + mutex_lock(&dev->dev_lock); + status = ocrdma_mbx_create_qp(qp, attrs, ureq.enable_dpp_cq, + ureq.dpp_cq_id, + &dpp_offset, &dpp_credit_lmt); + if (status) + goto mbx_err; + + /* user space QP's wr_id table are managed in library */ + if (udata == NULL) { + status = ocrdma_alloc_wr_id_tbl(qp); + if (status) + goto map_err; + } + + status = ocrdma_add_qpn_map(dev, qp); + if (status) + goto map_err; + ocrdma_set_qp_db(dev, qp, pd); + if (udata) { + status = ocrdma_copy_qp_uresp(qp, udata, dpp_offset, + dpp_credit_lmt, + (attrs->srq != NULL)); + if (status) + goto cpy_err; + } + ocrdma_store_gsi_qp_cq(dev, attrs); + qp->ibqp.qp_num = qp->id; + mutex_unlock(&dev->dev_lock); + return &qp->ibqp; + +cpy_err: + ocrdma_del_qpn_map(dev, qp); +map_err: + ocrdma_mbx_destroy_qp(dev, qp); +mbx_err: + mutex_unlock(&dev->dev_lock); + kfree(qp->wqe_wr_id_tbl); + kfree(qp->rqe_wr_id_tbl); + kfree(qp); + pr_err("%s(%d) error=%d\n", __func__, dev->id, status); +gen_err: + return ERR_PTR(status); +} + +int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask) +{ + int status = 0; + struct ocrdma_qp *qp; + struct ocrdma_dev *dev; + enum ib_qp_state old_qps; + + qp = get_ocrdma_qp(ibqp); + dev = qp->dev; + if (attr_mask & IB_QP_STATE) + status = ocrdma_qp_state_change(qp, attr->qp_state, &old_qps); + /* if new and previous states are same hw doesn't need to + * know about it. + */ + if (status < 0) + return status; + status = ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask); + + return status; +} + +int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + unsigned long flags; + int status = -EINVAL; + struct ocrdma_qp *qp; + struct ocrdma_dev *dev; + enum ib_qp_state old_qps, new_qps; + + qp = get_ocrdma_qp(ibqp); + dev = qp->dev; + + /* syncronize with multiple context trying to change, retrive qps */ + mutex_lock(&dev->dev_lock); + /* syncronize with wqe, rqe posting and cqe processing contexts */ + spin_lock_irqsave(&qp->q_lock, flags); + old_qps = get_ibqp_state(qp->state); + if (attr_mask & IB_QP_STATE) + new_qps = attr->qp_state; + else + new_qps = old_qps; + spin_unlock_irqrestore(&qp->q_lock, flags); + + if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask, + IB_LINK_LAYER_ETHERNET)) { + pr_err("%s(%d) invalid attribute mask=0x%x specified for\n" + "qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n", + __func__, dev->id, attr_mask, qp->id, ibqp->qp_type, + old_qps, new_qps); + goto param_err; + } + + status = _ocrdma_modify_qp(ibqp, attr, attr_mask); + if (status > 0) + status = 0; +param_err: + mutex_unlock(&dev->dev_lock); + return status; +} + +static enum ib_mtu ocrdma_mtu_int_to_enum(u16 mtu) +{ + switch (mtu) { + case 256: + return IB_MTU_256; + case 512: + return IB_MTU_512; + case 1024: + return IB_MTU_1024; + case 2048: + return IB_MTU_2048; + case 4096: + return IB_MTU_4096; + default: + return IB_MTU_1024; + } +} + +static int ocrdma_to_ib_qp_acc_flags(int qp_cap_flags) +{ + int ib_qp_acc_flags = 0; + + if (qp_cap_flags & OCRDMA_QP_INB_WR) + ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE; + if (qp_cap_flags & OCRDMA_QP_INB_RD) + ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE; + return ib_qp_acc_flags; +} + +int ocrdma_query_qp(struct ib_qp *ibqp, + struct ib_qp_attr *qp_attr, + int attr_mask, struct ib_qp_init_attr *qp_init_attr) +{ + int status; + u32 qp_state; + struct ocrdma_qp_params params; + struct ocrdma_qp *qp = get_ocrdma_qp(ibqp); + struct ocrdma_dev *dev = qp->dev; + + memset(¶ms, 0, sizeof(params)); + mutex_lock(&dev->dev_lock); + status = ocrdma_mbx_query_qp(dev, qp, ¶ms); + mutex_unlock(&dev->dev_lock); + if (status) + goto mbx_err; + qp_attr->qp_state = get_ibqp_state(IB_QPS_INIT); + qp_attr->cur_qp_state = get_ibqp_state(IB_QPS_INIT); + qp_attr->path_mtu = + ocrdma_mtu_int_to_enum(params.path_mtu_pkey_indx & + OCRDMA_QP_PARAMS_PATH_MTU_MASK) >> + OCRDMA_QP_PARAMS_PATH_MTU_SHIFT; + qp_attr->path_mig_state = IB_MIG_MIGRATED; + qp_attr->rq_psn = params.hop_lmt_rq_psn & OCRDMA_QP_PARAMS_RQ_PSN_MASK; + qp_attr->sq_psn = params.tclass_sq_psn & OCRDMA_QP_PARAMS_SQ_PSN_MASK; + qp_attr->dest_qp_num = + params.ack_to_rnr_rtc_dest_qpn & OCRDMA_QP_PARAMS_DEST_QPN_MASK; + + qp_attr->qp_access_flags = ocrdma_to_ib_qp_acc_flags(qp->cap_flags); + qp_attr->cap.max_send_wr = qp->sq.max_cnt - 1; + qp_attr->cap.max_recv_wr = qp->rq.max_cnt - 1; + qp_attr->cap.max_send_sge = qp->sq.max_sges; + qp_attr->cap.max_recv_sge = qp->rq.max_sges; + qp_attr->cap.max_inline_data = qp->max_inline_data; + qp_init_attr->cap = qp_attr->cap; + memcpy(&qp_attr->ah_attr.grh.dgid, ¶ms.dgid[0], + sizeof(params.dgid)); + qp_attr->ah_attr.grh.flow_label = params.rnt_rc_sl_fl & + OCRDMA_QP_PARAMS_FLOW_LABEL_MASK; + qp_attr->ah_attr.grh.sgid_index = qp->sgid_idx; + qp_attr->ah_attr.grh.hop_limit = (params.hop_lmt_rq_psn & + OCRDMA_QP_PARAMS_HOP_LMT_MASK) >> + OCRDMA_QP_PARAMS_HOP_LMT_SHIFT; + qp_attr->ah_attr.grh.traffic_class = (params.tclass_sq_psn & + OCRDMA_QP_PARAMS_TCLASS_MASK) >> + OCRDMA_QP_PARAMS_TCLASS_SHIFT; + + qp_attr->ah_attr.ah_flags = IB_AH_GRH; + qp_attr->ah_attr.port_num = 1; + qp_attr->ah_attr.sl = (params.rnt_rc_sl_fl & + OCRDMA_QP_PARAMS_SL_MASK) >> + OCRDMA_QP_PARAMS_SL_SHIFT; + qp_attr->timeout = (params.ack_to_rnr_rtc_dest_qpn & + OCRDMA_QP_PARAMS_ACK_TIMEOUT_MASK) >> + OCRDMA_QP_PARAMS_ACK_TIMEOUT_SHIFT; + qp_attr->rnr_retry = (params.ack_to_rnr_rtc_dest_qpn & + OCRDMA_QP_PARAMS_RNR_RETRY_CNT_MASK) >> + OCRDMA_QP_PARAMS_RNR_RETRY_CNT_SHIFT; + qp_attr->retry_cnt = + (params.rnt_rc_sl_fl & OCRDMA_QP_PARAMS_RETRY_CNT_MASK) >> + OCRDMA_QP_PARAMS_RETRY_CNT_SHIFT; + qp_attr->min_rnr_timer = 0; + qp_attr->pkey_index = 0; + qp_attr->port_num = 1; + qp_attr->ah_attr.src_path_bits = 0; + qp_attr->ah_attr.static_rate = 0; + qp_attr->alt_pkey_index = 0; + qp_attr->alt_port_num = 0; + qp_attr->alt_timeout = 0; + memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr)); + qp_state = (params.max_sge_recv_flags & OCRDMA_QP_PARAMS_STATE_MASK) >> + OCRDMA_QP_PARAMS_STATE_SHIFT; + qp_attr->sq_draining = (qp_state == OCRDMA_QPS_SQ_DRAINING) ? 1 : 0; + qp_attr->max_dest_rd_atomic = + params.max_ord_ird >> OCRDMA_QP_PARAMS_MAX_ORD_SHIFT; + qp_attr->max_rd_atomic = + params.max_ord_ird & OCRDMA_QP_PARAMS_MAX_IRD_MASK; + qp_attr->en_sqd_async_notify = (params.max_sge_recv_flags & + OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC) ? 1 : 0; +mbx_err: + return status; +} + +static void ocrdma_srq_toggle_bit(struct ocrdma_srq *srq, int idx) +{ + int i = idx / 32; + unsigned int mask = (1 << (idx % 32)); + + if (srq->idx_bit_fields[i] & mask) + srq->idx_bit_fields[i] &= ~mask; + else + srq->idx_bit_fields[i] |= mask; +} + +static int ocrdma_hwq_free_cnt(struct ocrdma_qp_hwq_info *q) +{ + return ((q->max_wqe_idx - q->head) + q->tail) % q->max_cnt; +} + +static int is_hw_sq_empty(struct ocrdma_qp *qp) +{ + return (qp->sq.tail == qp->sq.head); +} + +static int is_hw_rq_empty(struct ocrdma_qp *qp) +{ + return (qp->rq.tail == qp->rq.head); +} + +static void *ocrdma_hwq_head(struct ocrdma_qp_hwq_info *q) +{ + return q->va + (q->head * q->entry_size); +} + +static void *ocrdma_hwq_head_from_idx(struct ocrdma_qp_hwq_info *q, + u32 idx) +{ + return q->va + (idx * q->entry_size); +} + +static void ocrdma_hwq_inc_head(struct ocrdma_qp_hwq_info *q) +{ + q->head = (q->head + 1) & q->max_wqe_idx; +} + +static void ocrdma_hwq_inc_tail(struct ocrdma_qp_hwq_info *q) +{ + q->tail = (q->tail + 1) & q->max_wqe_idx; +} + +/* discard the cqe for a given QP */ +static void ocrdma_discard_cqes(struct ocrdma_qp *qp, struct ocrdma_cq *cq) +{ + unsigned long cq_flags; + unsigned long flags; + int discard_cnt = 0; + u32 cur_getp, stop_getp; + struct ocrdma_cqe *cqe; + u32 qpn = 0, wqe_idx = 0; + + spin_lock_irqsave(&cq->cq_lock, cq_flags); + + /* traverse through the CQEs in the hw CQ, + * find the matching CQE for a given qp, + * mark the matching one discarded by clearing qpn. + * ring the doorbell in the poll_cq() as + * we don't complete out of order cqe. + */ + + cur_getp = cq->getp; + /* find upto when do we reap the cq. */ + stop_getp = cur_getp; + do { + if (is_hw_sq_empty(qp) && (!qp->srq && is_hw_rq_empty(qp))) + break; + + cqe = cq->va + cur_getp; + /* if (a) done reaping whole hw cq, or + * (b) qp_xq becomes empty. + * then exit + */ + qpn = cqe->cmn.qpn & OCRDMA_CQE_QPN_MASK; + /* if previously discarded cqe found, skip that too. */ + /* check for matching qp */ + if (qpn == 0 || qpn != qp->id) + goto skip_cqe; + + if (is_cqe_for_sq(cqe)) { + ocrdma_hwq_inc_tail(&qp->sq); + } else { + if (qp->srq) { + wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >> + OCRDMA_CQE_BUFTAG_SHIFT) & + qp->srq->rq.max_wqe_idx; + if (wqe_idx < 1) + BUG(); + spin_lock_irqsave(&qp->srq->q_lock, flags); + ocrdma_hwq_inc_tail(&qp->srq->rq); + ocrdma_srq_toggle_bit(qp->srq, wqe_idx - 1); + spin_unlock_irqrestore(&qp->srq->q_lock, flags); + + } else { + ocrdma_hwq_inc_tail(&qp->rq); + } + } + /* mark cqe discarded so that it is not picked up later + * in the poll_cq(). + */ + discard_cnt += 1; + cqe->cmn.qpn = 0; +skip_cqe: + cur_getp = (cur_getp + 1) % cq->max_hw_cqe; + } while (cur_getp != stop_getp); + spin_unlock_irqrestore(&cq->cq_lock, cq_flags); +} + +void ocrdma_del_flush_qp(struct ocrdma_qp *qp) +{ + int found = false; + unsigned long flags; + struct ocrdma_dev *dev = qp->dev; + /* sync with any active CQ poll */ + + spin_lock_irqsave(&dev->flush_q_lock, flags); + found = ocrdma_is_qp_in_sq_flushlist(qp->sq_cq, qp); + if (found) + list_del(&qp->sq_entry); + if (!qp->srq) { + found = ocrdma_is_qp_in_rq_flushlist(qp->rq_cq, qp); + if (found) + list_del(&qp->rq_entry); + } + spin_unlock_irqrestore(&dev->flush_q_lock, flags); +} + +int ocrdma_destroy_qp(struct ib_qp *ibqp) +{ + int status; + struct ocrdma_pd *pd; + struct ocrdma_qp *qp; + struct ocrdma_dev *dev; + struct ib_qp_attr attrs; + int attr_mask = IB_QP_STATE; + unsigned long flags; + + qp = get_ocrdma_qp(ibqp); + dev = qp->dev; + + attrs.qp_state = IB_QPS_ERR; + pd = qp->pd; + + /* change the QP state to ERROR */ + _ocrdma_modify_qp(ibqp, &attrs, attr_mask); + + /* ensure that CQEs for newly created QP (whose id may be same with + * one which just getting destroyed are same), dont get + * discarded until the old CQEs are discarded. + */ + mutex_lock(&dev->dev_lock); + status = ocrdma_mbx_destroy_qp(dev, qp); + + /* + * acquire CQ lock while destroy is in progress, in order to + * protect against proessing in-flight CQEs for this QP. + */ + spin_lock_irqsave(&qp->sq_cq->cq_lock, flags); + if (qp->rq_cq && (qp->rq_cq != qp->sq_cq)) + spin_lock(&qp->rq_cq->cq_lock); + + ocrdma_del_qpn_map(dev, qp); + + if (qp->rq_cq && (qp->rq_cq != qp->sq_cq)) + spin_unlock(&qp->rq_cq->cq_lock); + spin_unlock_irqrestore(&qp->sq_cq->cq_lock, flags); + + if (!pd->uctx) { + ocrdma_discard_cqes(qp, qp->sq_cq); + ocrdma_discard_cqes(qp, qp->rq_cq); + } + mutex_unlock(&dev->dev_lock); + + if (pd->uctx) { + ocrdma_del_mmap(pd->uctx, (u64) qp->sq.pa, + PAGE_ALIGN(qp->sq.len)); + if (!qp->srq) + ocrdma_del_mmap(pd->uctx, (u64) qp->rq.pa, + PAGE_ALIGN(qp->rq.len)); + } + + ocrdma_del_flush_qp(qp); + + kfree(qp->wqe_wr_id_tbl); + kfree(qp->rqe_wr_id_tbl); + kfree(qp); + return status; +} + +static int ocrdma_copy_srq_uresp(struct ocrdma_dev *dev, struct ocrdma_srq *srq, + struct ib_udata *udata) +{ + int status; + struct ocrdma_create_srq_uresp uresp; + + memset(&uresp, 0, sizeof(uresp)); + uresp.rq_dbid = srq->rq.dbid; + uresp.num_rq_pages = 1; + uresp.rq_page_addr[0] = srq->rq.pa; + uresp.rq_page_size = srq->rq.len; + uresp.db_page_addr = dev->nic_info.unmapped_db + + (srq->pd->id * dev->nic_info.db_page_size); + uresp.db_page_size = dev->nic_info.db_page_size; + uresp.num_rqe_allocated = srq->rq.max_cnt; + if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { + uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET; + uresp.db_shift = 24; + } else { + uresp.db_rq_offset = OCRDMA_DB_RQ_OFFSET; + uresp.db_shift = 16; + } + + status = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + if (status) + return status; + status = ocrdma_add_mmap(srq->pd->uctx, uresp.rq_page_addr[0], + uresp.rq_page_size); + if (status) + return status; + return status; +} + +struct ib_srq *ocrdma_create_srq(struct ib_pd *ibpd, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) +{ + int status = -ENOMEM; + struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); + struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); + struct ocrdma_srq *srq; + + if (init_attr->attr.max_sge > dev->attr.max_recv_sge) + return ERR_PTR(-EINVAL); + if (init_attr->attr.max_wr > dev->attr.max_rqe) + return ERR_PTR(-EINVAL); + + srq = kzalloc(sizeof(*srq), GFP_KERNEL); + if (!srq) + return ERR_PTR(status); + + spin_lock_init(&srq->q_lock); + srq->pd = pd; + srq->db = dev->nic_info.db + (pd->id * dev->nic_info.db_page_size); + status = ocrdma_mbx_create_srq(dev, srq, init_attr, pd); + if (status) + goto err; + + if (udata == NULL) { + srq->rqe_wr_id_tbl = kzalloc(sizeof(u64) * srq->rq.max_cnt, + GFP_KERNEL); + if (srq->rqe_wr_id_tbl == NULL) + goto arm_err; + + srq->bit_fields_len = (srq->rq.max_cnt / 32) + + (srq->rq.max_cnt % 32 ? 1 : 0); + srq->idx_bit_fields = + kmalloc(srq->bit_fields_len * sizeof(u32), GFP_KERNEL); + if (srq->idx_bit_fields == NULL) + goto arm_err; + memset(srq->idx_bit_fields, 0xff, + srq->bit_fields_len * sizeof(u32)); + } + + if (init_attr->attr.srq_limit) { + status = ocrdma_mbx_modify_srq(srq, &init_attr->attr); + if (status) + goto arm_err; + } + + if (udata) { + status = ocrdma_copy_srq_uresp(dev, srq, udata); + if (status) + goto arm_err; + } + + return &srq->ibsrq; + +arm_err: + ocrdma_mbx_destroy_srq(dev, srq); +err: + kfree(srq->rqe_wr_id_tbl); + kfree(srq->idx_bit_fields); + kfree(srq); + return ERR_PTR(status); +} + +int ocrdma_modify_srq(struct ib_srq *ibsrq, + struct ib_srq_attr *srq_attr, + enum ib_srq_attr_mask srq_attr_mask, + struct ib_udata *udata) +{ + int status = 0; + struct ocrdma_srq *srq; + + srq = get_ocrdma_srq(ibsrq); + if (srq_attr_mask & IB_SRQ_MAX_WR) + status = -EINVAL; + else + status = ocrdma_mbx_modify_srq(srq, srq_attr); + return status; +} + +int ocrdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) +{ + int status; + struct ocrdma_srq *srq; + + srq = get_ocrdma_srq(ibsrq); + status = ocrdma_mbx_query_srq(srq, srq_attr); + return status; +} + +int ocrdma_destroy_srq(struct ib_srq *ibsrq) +{ + int status; + struct ocrdma_srq *srq; + struct ocrdma_dev *dev = get_ocrdma_dev(ibsrq->device); + + srq = get_ocrdma_srq(ibsrq); + + status = ocrdma_mbx_destroy_srq(dev, srq); + + if (srq->pd->uctx) + ocrdma_del_mmap(srq->pd->uctx, (u64) srq->rq.pa, + PAGE_ALIGN(srq->rq.len)); + + kfree(srq->idx_bit_fields); + kfree(srq->rqe_wr_id_tbl); + kfree(srq); + return status; +} + +/* unprivileged verbs and their support functions. */ +static void ocrdma_build_ud_hdr(struct ocrdma_qp *qp, + struct ocrdma_hdr_wqe *hdr, + struct ib_send_wr *wr) +{ + struct ocrdma_ewqe_ud_hdr *ud_hdr = + (struct ocrdma_ewqe_ud_hdr *)(hdr + 1); + struct ocrdma_ah *ah = get_ocrdma_ah(wr->wr.ud.ah); + + ud_hdr->rsvd_dest_qpn = wr->wr.ud.remote_qpn; + if (qp->qp_type == IB_QPT_GSI) + ud_hdr->qkey = qp->qkey; + else + ud_hdr->qkey = wr->wr.ud.remote_qkey; + ud_hdr->rsvd_ahid = ah->id; +} + +static void ocrdma_build_sges(struct ocrdma_hdr_wqe *hdr, + struct ocrdma_sge *sge, int num_sge, + struct ib_sge *sg_list) +{ + int i; + + for (i = 0; i < num_sge; i++) { + sge[i].lrkey = sg_list[i].lkey; + sge[i].addr_lo = sg_list[i].addr; + sge[i].addr_hi = upper_32_bits(sg_list[i].addr); + sge[i].len = sg_list[i].length; + hdr->total_len += sg_list[i].length; + } + if (num_sge == 0) + memset(sge, 0, sizeof(*sge)); +} + +static inline uint32_t ocrdma_sglist_len(struct ib_sge *sg_list, int num_sge) +{ + uint32_t total_len = 0, i; + + for (i = 0; i < num_sge; i++) + total_len += sg_list[i].length; + return total_len; +} + + +static int ocrdma_build_inline_sges(struct ocrdma_qp *qp, + struct ocrdma_hdr_wqe *hdr, + struct ocrdma_sge *sge, + struct ib_send_wr *wr, u32 wqe_size) +{ + int i; + char *dpp_addr; + + if (wr->send_flags & IB_SEND_INLINE && qp->qp_type != IB_QPT_UD) { + hdr->total_len = ocrdma_sglist_len(wr->sg_list, wr->num_sge); + if (unlikely(hdr->total_len > qp->max_inline_data)) { + pr_err("%s() supported_len=0x%x,\n" + " unspported len req=0x%x\n", __func__, + qp->max_inline_data, hdr->total_len); + return -EINVAL; + } + dpp_addr = (char *)sge; + for (i = 0; i < wr->num_sge; i++) { + memcpy(dpp_addr, + (void *)(unsigned long)wr->sg_list[i].addr, + wr->sg_list[i].length); + dpp_addr += wr->sg_list[i].length; + } + + wqe_size += roundup(hdr->total_len, OCRDMA_WQE_ALIGN_BYTES); + if (0 == hdr->total_len) + wqe_size += sizeof(struct ocrdma_sge); + hdr->cw |= (OCRDMA_TYPE_INLINE << OCRDMA_WQE_TYPE_SHIFT); + } else { + ocrdma_build_sges(hdr, sge, wr->num_sge, wr->sg_list); + if (wr->num_sge) + wqe_size += (wr->num_sge * sizeof(struct ocrdma_sge)); + else + wqe_size += sizeof(struct ocrdma_sge); + hdr->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT); + } + hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT); + return 0; +} + +static int ocrdma_build_send(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, + struct ib_send_wr *wr) +{ + int status; + struct ocrdma_sge *sge; + u32 wqe_size = sizeof(*hdr); + + if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) { + ocrdma_build_ud_hdr(qp, hdr, wr); + sge = (struct ocrdma_sge *)(hdr + 2); + wqe_size += sizeof(struct ocrdma_ewqe_ud_hdr); + } else { + sge = (struct ocrdma_sge *)(hdr + 1); + } + + status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size); + return status; +} + +static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, + struct ib_send_wr *wr) +{ + int status; + struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1); + struct ocrdma_sge *sge = ext_rw + 1; + u32 wqe_size = sizeof(*hdr) + sizeof(*ext_rw); + + status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size); + if (status) + return status; + ext_rw->addr_lo = wr->wr.rdma.remote_addr; + ext_rw->addr_hi = upper_32_bits(wr->wr.rdma.remote_addr); + ext_rw->lrkey = wr->wr.rdma.rkey; + ext_rw->len = hdr->total_len; + return 0; +} + +static void ocrdma_build_read(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, + struct ib_send_wr *wr) +{ + struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1); + struct ocrdma_sge *sge = ext_rw + 1; + u32 wqe_size = ((wr->num_sge + 1) * sizeof(struct ocrdma_sge)) + + sizeof(struct ocrdma_hdr_wqe); + + ocrdma_build_sges(hdr, sge, wr->num_sge, wr->sg_list); + hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT); + hdr->cw |= (OCRDMA_READ << OCRDMA_WQE_OPCODE_SHIFT); + hdr->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT); + + ext_rw->addr_lo = wr->wr.rdma.remote_addr; + ext_rw->addr_hi = upper_32_bits(wr->wr.rdma.remote_addr); + ext_rw->lrkey = wr->wr.rdma.rkey; + ext_rw->len = hdr->total_len; +} + +static void build_frmr_pbes(struct ib_send_wr *wr, struct ocrdma_pbl *pbl_tbl, + struct ocrdma_hw_mr *hwmr) +{ + int i; + u64 buf_addr = 0; + int num_pbes; + struct ocrdma_pbe *pbe; + + pbe = (struct ocrdma_pbe *)pbl_tbl->va; + num_pbes = 0; + + /* go through the OS phy regions & fill hw pbe entries into pbls. */ + for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { + /* number of pbes can be more for one OS buf, when + * buffers are of different sizes. + * split the ib_buf to one or more pbes. + */ + buf_addr = wr->wr.fast_reg.page_list->page_list[i]; + pbe->pa_lo = cpu_to_le32((u32) (buf_addr & PAGE_MASK)); + pbe->pa_hi = cpu_to_le32((u32) upper_32_bits(buf_addr)); + num_pbes += 1; + pbe++; + + /* if the pbl is full storing the pbes, + * move to next pbl. + */ + if (num_pbes == (hwmr->pbl_size/sizeof(u64))) { + pbl_tbl++; + pbe = (struct ocrdma_pbe *)pbl_tbl->va; + } + } + return; +} + +static int get_encoded_page_size(int pg_sz) +{ + /* Max size is 256M 4096 << 16 */ + int i = 0; + for (; i < 17; i++) + if (pg_sz == (4096 << i)) + break; + return i; +} + + +static int ocrdma_build_fr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, + struct ib_send_wr *wr) +{ + u64 fbo; + struct ocrdma_ewqe_fr *fast_reg = (struct ocrdma_ewqe_fr *)(hdr + 1); + struct ocrdma_mr *mr; + u32 wqe_size = sizeof(*fast_reg) + sizeof(*hdr); + + wqe_size = roundup(wqe_size, OCRDMA_WQE_ALIGN_BYTES); + + if (wr->wr.fast_reg.page_list_len > qp->dev->attr.max_pages_per_frmr) + return -EINVAL; + + hdr->cw |= (OCRDMA_FR_MR << OCRDMA_WQE_OPCODE_SHIFT); + hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT); + + if (wr->wr.fast_reg.page_list_len == 0) + BUG(); + if (wr->wr.fast_reg.access_flags & IB_ACCESS_LOCAL_WRITE) + hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_LOCAL_WR; + if (wr->wr.fast_reg.access_flags & IB_ACCESS_REMOTE_WRITE) + hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_WR; + if (wr->wr.fast_reg.access_flags & IB_ACCESS_REMOTE_READ) + hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_RD; + hdr->lkey = wr->wr.fast_reg.rkey; + hdr->total_len = wr->wr.fast_reg.length; + + fbo = wr->wr.fast_reg.iova_start - + (wr->wr.fast_reg.page_list->page_list[0] & PAGE_MASK); + + fast_reg->va_hi = upper_32_bits(wr->wr.fast_reg.iova_start); + fast_reg->va_lo = (u32) (wr->wr.fast_reg.iova_start & 0xffffffff); + fast_reg->fbo_hi = upper_32_bits(fbo); + fast_reg->fbo_lo = (u32) fbo & 0xffffffff; + fast_reg->num_sges = wr->wr.fast_reg.page_list_len; + fast_reg->size_sge = + get_encoded_page_size(1 << wr->wr.fast_reg.page_shift); + mr = (struct ocrdma_mr *) (unsigned long) + qp->dev->stag_arr[(hdr->lkey >> 8) & (OCRDMA_MAX_STAG - 1)]; + build_frmr_pbes(wr, mr->hwmr.pbl_table, &mr->hwmr); + return 0; +} + +static void ocrdma_ring_sq_db(struct ocrdma_qp *qp) +{ + u32 val = qp->sq.dbid | (1 << OCRDMA_DB_SQ_SHIFT); + + iowrite32(val, qp->sq_db); +} + +int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + int status = 0; + struct ocrdma_qp *qp = get_ocrdma_qp(ibqp); + struct ocrdma_hdr_wqe *hdr; + unsigned long flags; + + spin_lock_irqsave(&qp->q_lock, flags); + if (qp->state != OCRDMA_QPS_RTS && qp->state != OCRDMA_QPS_SQD) { + spin_unlock_irqrestore(&qp->q_lock, flags); + *bad_wr = wr; + return -EINVAL; + } + + while (wr) { + if (ocrdma_hwq_free_cnt(&qp->sq) == 0 || + wr->num_sge > qp->sq.max_sges) { + *bad_wr = wr; + status = -ENOMEM; + break; + } + hdr = ocrdma_hwq_head(&qp->sq); + hdr->cw = 0; + if (wr->send_flags & IB_SEND_SIGNALED || qp->signaled) + hdr->cw |= (OCRDMA_FLAG_SIG << OCRDMA_WQE_FLAGS_SHIFT); + if (wr->send_flags & IB_SEND_FENCE) + hdr->cw |= + (OCRDMA_FLAG_FENCE_L << OCRDMA_WQE_FLAGS_SHIFT); + if (wr->send_flags & IB_SEND_SOLICITED) + hdr->cw |= + (OCRDMA_FLAG_SOLICIT << OCRDMA_WQE_FLAGS_SHIFT); + hdr->total_len = 0; + switch (wr->opcode) { + case IB_WR_SEND_WITH_IMM: + hdr->cw |= (OCRDMA_FLAG_IMM << OCRDMA_WQE_FLAGS_SHIFT); + hdr->immdt = ntohl(wr->ex.imm_data); + case IB_WR_SEND: + hdr->cw |= (OCRDMA_SEND << OCRDMA_WQE_OPCODE_SHIFT); + ocrdma_build_send(qp, hdr, wr); + break; + case IB_WR_SEND_WITH_INV: + hdr->cw |= (OCRDMA_FLAG_INV << OCRDMA_WQE_FLAGS_SHIFT); + hdr->cw |= (OCRDMA_SEND << OCRDMA_WQE_OPCODE_SHIFT); + hdr->lkey = wr->ex.invalidate_rkey; + status = ocrdma_build_send(qp, hdr, wr); + break; + case IB_WR_RDMA_WRITE_WITH_IMM: + hdr->cw |= (OCRDMA_FLAG_IMM << OCRDMA_WQE_FLAGS_SHIFT); + hdr->immdt = ntohl(wr->ex.imm_data); + case IB_WR_RDMA_WRITE: + hdr->cw |= (OCRDMA_WRITE << OCRDMA_WQE_OPCODE_SHIFT); + status = ocrdma_build_write(qp, hdr, wr); + break; + case IB_WR_RDMA_READ_WITH_INV: + hdr->cw |= (OCRDMA_FLAG_INV << OCRDMA_WQE_FLAGS_SHIFT); + case IB_WR_RDMA_READ: + ocrdma_build_read(qp, hdr, wr); + break; + case IB_WR_LOCAL_INV: + hdr->cw |= + (OCRDMA_LKEY_INV << OCRDMA_WQE_OPCODE_SHIFT); + hdr->cw |= ((sizeof(struct ocrdma_hdr_wqe) + + sizeof(struct ocrdma_sge)) / + OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT; + hdr->lkey = wr->ex.invalidate_rkey; + break; + case IB_WR_FAST_REG_MR: + status = ocrdma_build_fr(qp, hdr, wr); + break; + default: + status = -EINVAL; + break; + } + if (status) { + *bad_wr = wr; + break; + } + if (wr->send_flags & IB_SEND_SIGNALED || qp->signaled) + qp->wqe_wr_id_tbl[qp->sq.head].signaled = 1; + else + qp->wqe_wr_id_tbl[qp->sq.head].signaled = 0; + qp->wqe_wr_id_tbl[qp->sq.head].wrid = wr->wr_id; + ocrdma_cpu_to_le32(hdr, ((hdr->cw >> OCRDMA_WQE_SIZE_SHIFT) & + OCRDMA_WQE_SIZE_MASK) * OCRDMA_WQE_STRIDE); + /* make sure wqe is written before adapter can access it */ + wmb(); + /* inform hw to start processing it */ + ocrdma_ring_sq_db(qp); + + /* update pointer, counter for next wr */ + ocrdma_hwq_inc_head(&qp->sq); + wr = wr->next; + } + spin_unlock_irqrestore(&qp->q_lock, flags); + return status; +} + +static void ocrdma_ring_rq_db(struct ocrdma_qp *qp) +{ + u32 val = qp->rq.dbid | (1 << OCRDMA_DB_RQ_SHIFT); + + iowrite32(val, qp->rq_db); +} + +static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe, struct ib_recv_wr *wr, + u16 tag) +{ + u32 wqe_size = 0; + struct ocrdma_sge *sge; + if (wr->num_sge) + wqe_size = (wr->num_sge * sizeof(*sge)) + sizeof(*rqe); + else + wqe_size = sizeof(*sge) + sizeof(*rqe); + + rqe->cw = ((wqe_size / OCRDMA_WQE_STRIDE) << + OCRDMA_WQE_SIZE_SHIFT); + rqe->cw |= (OCRDMA_FLAG_SIG << OCRDMA_WQE_FLAGS_SHIFT); + rqe->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT); + rqe->total_len = 0; + rqe->rsvd_tag = tag; + sge = (struct ocrdma_sge *)(rqe + 1); + ocrdma_build_sges(rqe, sge, wr->num_sge, wr->sg_list); + ocrdma_cpu_to_le32(rqe, wqe_size); +} + +int ocrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + int status = 0; + unsigned long flags; + struct ocrdma_qp *qp = get_ocrdma_qp(ibqp); + struct ocrdma_hdr_wqe *rqe; + + spin_lock_irqsave(&qp->q_lock, flags); + if (qp->state == OCRDMA_QPS_RST || qp->state == OCRDMA_QPS_ERR) { + spin_unlock_irqrestore(&qp->q_lock, flags); + *bad_wr = wr; + return -EINVAL; + } + while (wr) { + if (ocrdma_hwq_free_cnt(&qp->rq) == 0 || + wr->num_sge > qp->rq.max_sges) { + *bad_wr = wr; + status = -ENOMEM; + break; + } + rqe = ocrdma_hwq_head(&qp->rq); + ocrdma_build_rqe(rqe, wr, 0); + + qp->rqe_wr_id_tbl[qp->rq.head] = wr->wr_id; + /* make sure rqe is written before adapter can access it */ + wmb(); + + /* inform hw to start processing it */ + ocrdma_ring_rq_db(qp); + + /* update pointer, counter for next wr */ + ocrdma_hwq_inc_head(&qp->rq); + wr = wr->next; + } + spin_unlock_irqrestore(&qp->q_lock, flags); + return status; +} + +/* cqe for srq's rqe can potentially arrive out of order. + * index gives the entry in the shadow table where to store + * the wr_id. tag/index is returned in cqe to reference back + * for a given rqe. + */ +static int ocrdma_srq_get_idx(struct ocrdma_srq *srq) +{ + int row = 0; + int indx = 0; + + for (row = 0; row < srq->bit_fields_len; row++) { + if (srq->idx_bit_fields[row]) { + indx = ffs(srq->idx_bit_fields[row]); + indx = (row * 32) + (indx - 1); + if (indx >= srq->rq.max_cnt) + BUG(); + ocrdma_srq_toggle_bit(srq, indx); + break; + } + } + + if (row == srq->bit_fields_len) + BUG(); + return indx + 1; /* Use from index 1 */ +} + +static void ocrdma_ring_srq_db(struct ocrdma_srq *srq) +{ + u32 val = srq->rq.dbid | (1 << 16); + + iowrite32(val, srq->db + OCRDMA_DB_GEN2_SRQ_OFFSET); +} + +int ocrdma_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + int status = 0; + unsigned long flags; + struct ocrdma_srq *srq; + struct ocrdma_hdr_wqe *rqe; + u16 tag; + + srq = get_ocrdma_srq(ibsrq); + + spin_lock_irqsave(&srq->q_lock, flags); + while (wr) { + if (ocrdma_hwq_free_cnt(&srq->rq) == 0 || + wr->num_sge > srq->rq.max_sges) { + status = -ENOMEM; + *bad_wr = wr; + break; + } + tag = ocrdma_srq_get_idx(srq); + rqe = ocrdma_hwq_head(&srq->rq); + ocrdma_build_rqe(rqe, wr, tag); + + srq->rqe_wr_id_tbl[tag] = wr->wr_id; + /* make sure rqe is written before adapter can perform DMA */ + wmb(); + /* inform hw to start processing it */ + ocrdma_ring_srq_db(srq); + /* update pointer, counter for next wr */ + ocrdma_hwq_inc_head(&srq->rq); + wr = wr->next; + } + spin_unlock_irqrestore(&srq->q_lock, flags); + return status; +} + +static enum ib_wc_status ocrdma_to_ibwc_err(u16 status) +{ + enum ib_wc_status ibwc_status; + + switch (status) { + case OCRDMA_CQE_GENERAL_ERR: + ibwc_status = IB_WC_GENERAL_ERR; + break; + case OCRDMA_CQE_LOC_LEN_ERR: + ibwc_status = IB_WC_LOC_LEN_ERR; + break; + case OCRDMA_CQE_LOC_QP_OP_ERR: + ibwc_status = IB_WC_LOC_QP_OP_ERR; + break; + case OCRDMA_CQE_LOC_EEC_OP_ERR: + ibwc_status = IB_WC_LOC_EEC_OP_ERR; + break; + case OCRDMA_CQE_LOC_PROT_ERR: + ibwc_status = IB_WC_LOC_PROT_ERR; + break; + case OCRDMA_CQE_WR_FLUSH_ERR: + ibwc_status = IB_WC_WR_FLUSH_ERR; + break; + case OCRDMA_CQE_MW_BIND_ERR: + ibwc_status = IB_WC_MW_BIND_ERR; + break; + case OCRDMA_CQE_BAD_RESP_ERR: + ibwc_status = IB_WC_BAD_RESP_ERR; + break; + case OCRDMA_CQE_LOC_ACCESS_ERR: + ibwc_status = IB_WC_LOC_ACCESS_ERR; + break; + case OCRDMA_CQE_REM_INV_REQ_ERR: + ibwc_status = IB_WC_REM_INV_REQ_ERR; + break; + case OCRDMA_CQE_REM_ACCESS_ERR: + ibwc_status = IB_WC_REM_ACCESS_ERR; + break; + case OCRDMA_CQE_REM_OP_ERR: + ibwc_status = IB_WC_REM_OP_ERR; + break; + case OCRDMA_CQE_RETRY_EXC_ERR: + ibwc_status = IB_WC_RETRY_EXC_ERR; + break; + case OCRDMA_CQE_RNR_RETRY_EXC_ERR: + ibwc_status = IB_WC_RNR_RETRY_EXC_ERR; + break; + case OCRDMA_CQE_LOC_RDD_VIOL_ERR: + ibwc_status = IB_WC_LOC_RDD_VIOL_ERR; + break; + case OCRDMA_CQE_REM_INV_RD_REQ_ERR: + ibwc_status = IB_WC_REM_INV_RD_REQ_ERR; + break; + case OCRDMA_CQE_REM_ABORT_ERR: + ibwc_status = IB_WC_REM_ABORT_ERR; + break; + case OCRDMA_CQE_INV_EECN_ERR: + ibwc_status = IB_WC_INV_EECN_ERR; + break; + case OCRDMA_CQE_INV_EEC_STATE_ERR: + ibwc_status = IB_WC_INV_EEC_STATE_ERR; + break; + case OCRDMA_CQE_FATAL_ERR: + ibwc_status = IB_WC_FATAL_ERR; + break; + case OCRDMA_CQE_RESP_TIMEOUT_ERR: + ibwc_status = IB_WC_RESP_TIMEOUT_ERR; + break; + default: + ibwc_status = IB_WC_GENERAL_ERR; + break; + } + return ibwc_status; +} + +static void ocrdma_update_wc(struct ocrdma_qp *qp, struct ib_wc *ibwc, + u32 wqe_idx) +{ + struct ocrdma_hdr_wqe *hdr; + struct ocrdma_sge *rw; + int opcode; + + hdr = ocrdma_hwq_head_from_idx(&qp->sq, wqe_idx); + + ibwc->wr_id = qp->wqe_wr_id_tbl[wqe_idx].wrid; + /* Undo the hdr->cw swap */ + opcode = le32_to_cpu(hdr->cw) & OCRDMA_WQE_OPCODE_MASK; + switch (opcode) { + case OCRDMA_WRITE: + ibwc->opcode = IB_WC_RDMA_WRITE; + break; + case OCRDMA_READ: + rw = (struct ocrdma_sge *)(hdr + 1); + ibwc->opcode = IB_WC_RDMA_READ; + ibwc->byte_len = rw->len; + break; + case OCRDMA_SEND: + ibwc->opcode = IB_WC_SEND; + break; + case OCRDMA_FR_MR: + ibwc->opcode = IB_WC_FAST_REG_MR; + break; + case OCRDMA_LKEY_INV: + ibwc->opcode = IB_WC_LOCAL_INV; + break; + default: + ibwc->status = IB_WC_GENERAL_ERR; + pr_err("%s() invalid opcode received = 0x%x\n", + __func__, hdr->cw & OCRDMA_WQE_OPCODE_MASK); + break; + } +} + +static void ocrdma_set_cqe_status_flushed(struct ocrdma_qp *qp, + struct ocrdma_cqe *cqe) +{ + if (is_cqe_for_sq(cqe)) { + cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu( + cqe->flags_status_srcqpn) & + ~OCRDMA_CQE_STATUS_MASK); + cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu( + cqe->flags_status_srcqpn) | + (OCRDMA_CQE_WR_FLUSH_ERR << + OCRDMA_CQE_STATUS_SHIFT)); + } else { + if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) { + cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu( + cqe->flags_status_srcqpn) & + ~OCRDMA_CQE_UD_STATUS_MASK); + cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu( + cqe->flags_status_srcqpn) | + (OCRDMA_CQE_WR_FLUSH_ERR << + OCRDMA_CQE_UD_STATUS_SHIFT)); + } else { + cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu( + cqe->flags_status_srcqpn) & + ~OCRDMA_CQE_STATUS_MASK); + cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu( + cqe->flags_status_srcqpn) | + (OCRDMA_CQE_WR_FLUSH_ERR << + OCRDMA_CQE_STATUS_SHIFT)); + } + } +} + +static bool ocrdma_update_err_cqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe, + struct ocrdma_qp *qp, int status) +{ + bool expand = false; + + ibwc->byte_len = 0; + ibwc->qp = &qp->ibqp; + ibwc->status = ocrdma_to_ibwc_err(status); + + ocrdma_flush_qp(qp); + ocrdma_qp_state_change(qp, IB_QPS_ERR, NULL); + + /* if wqe/rqe pending for which cqe needs to be returned, + * trigger inflating it. + */ + if (!is_hw_rq_empty(qp) || !is_hw_sq_empty(qp)) { + expand = true; + ocrdma_set_cqe_status_flushed(qp, cqe); + } + return expand; +} + +static int ocrdma_update_err_rcqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe, + struct ocrdma_qp *qp, int status) +{ + ibwc->opcode = IB_WC_RECV; + ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail]; + ocrdma_hwq_inc_tail(&qp->rq); + + return ocrdma_update_err_cqe(ibwc, cqe, qp, status); +} + +static int ocrdma_update_err_scqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe, + struct ocrdma_qp *qp, int status) +{ + ocrdma_update_wc(qp, ibwc, qp->sq.tail); + ocrdma_hwq_inc_tail(&qp->sq); + + return ocrdma_update_err_cqe(ibwc, cqe, qp, status); +} + + +static bool ocrdma_poll_err_scqe(struct ocrdma_qp *qp, + struct ocrdma_cqe *cqe, struct ib_wc *ibwc, + bool *polled, bool *stop) +{ + bool expand; + int status = (le32_to_cpu(cqe->flags_status_srcqpn) & + OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT; + + /* when hw sq is empty, but rq is not empty, so we continue + * to keep the cqe in order to get the cq event again. + */ + if (is_hw_sq_empty(qp) && !is_hw_rq_empty(qp)) { + /* when cq for rq and sq is same, it is safe to return + * flush cqe for RQEs. + */ + if (!qp->srq && (qp->sq_cq == qp->rq_cq)) { + *polled = true; + status = OCRDMA_CQE_WR_FLUSH_ERR; + expand = ocrdma_update_err_rcqe(ibwc, cqe, qp, status); + } else { + /* stop processing further cqe as this cqe is used for + * triggering cq event on buddy cq of RQ. + * When QP is destroyed, this cqe will be removed + * from the cq's hardware q. + */ + *polled = false; + *stop = true; + expand = false; + } + } else { + *polled = true; + expand = ocrdma_update_err_scqe(ibwc, cqe, qp, status); + } + return expand; +} + +static bool ocrdma_poll_success_scqe(struct ocrdma_qp *qp, + struct ocrdma_cqe *cqe, + struct ib_wc *ibwc, bool *polled) +{ + bool expand = false; + int tail = qp->sq.tail; + u32 wqe_idx; + + if (!qp->wqe_wr_id_tbl[tail].signaled) { + *polled = false; /* WC cannot be consumed yet */ + } else { + ibwc->status = IB_WC_SUCCESS; + ibwc->wc_flags = 0; + ibwc->qp = &qp->ibqp; + ocrdma_update_wc(qp, ibwc, tail); + *polled = true; + } + wqe_idx = (le32_to_cpu(cqe->wq.wqeidx) & + OCRDMA_CQE_WQEIDX_MASK) & qp->sq.max_wqe_idx; + if (tail != wqe_idx) + expand = true; /* Coalesced CQE can't be consumed yet */ + + ocrdma_hwq_inc_tail(&qp->sq); + return expand; +} + +static bool ocrdma_poll_scqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe, + struct ib_wc *ibwc, bool *polled, bool *stop) +{ + int status; + bool expand; + + status = (le32_to_cpu(cqe->flags_status_srcqpn) & + OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT; + + if (status == OCRDMA_CQE_SUCCESS) + expand = ocrdma_poll_success_scqe(qp, cqe, ibwc, polled); + else + expand = ocrdma_poll_err_scqe(qp, cqe, ibwc, polled, stop); + return expand; +} + +static int ocrdma_update_ud_rcqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe) +{ + int status; + + status = (le32_to_cpu(cqe->flags_status_srcqpn) & + OCRDMA_CQE_UD_STATUS_MASK) >> OCRDMA_CQE_UD_STATUS_SHIFT; + ibwc->src_qp = le32_to_cpu(cqe->flags_status_srcqpn) & + OCRDMA_CQE_SRCQP_MASK; + ibwc->pkey_index = le32_to_cpu(cqe->ud.rxlen_pkey) & + OCRDMA_CQE_PKEY_MASK; + ibwc->wc_flags = IB_WC_GRH; + ibwc->byte_len = (le32_to_cpu(cqe->ud.rxlen_pkey) >> + OCRDMA_CQE_UD_XFER_LEN_SHIFT); + return status; +} + +static void ocrdma_update_free_srq_cqe(struct ib_wc *ibwc, + struct ocrdma_cqe *cqe, + struct ocrdma_qp *qp) +{ + unsigned long flags; + struct ocrdma_srq *srq; + u32 wqe_idx; + + srq = get_ocrdma_srq(qp->ibqp.srq); + wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >> + OCRDMA_CQE_BUFTAG_SHIFT) & srq->rq.max_wqe_idx; + if (wqe_idx < 1) + BUG(); + + ibwc->wr_id = srq->rqe_wr_id_tbl[wqe_idx]; + spin_lock_irqsave(&srq->q_lock, flags); + ocrdma_srq_toggle_bit(srq, wqe_idx - 1); + spin_unlock_irqrestore(&srq->q_lock, flags); + ocrdma_hwq_inc_tail(&srq->rq); +} + +static bool ocrdma_poll_err_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe, + struct ib_wc *ibwc, bool *polled, bool *stop, + int status) +{ + bool expand; + + /* when hw_rq is empty, but wq is not empty, so continue + * to keep the cqe to get the cq event again. + */ + if (is_hw_rq_empty(qp) && !is_hw_sq_empty(qp)) { + if (!qp->srq && (qp->sq_cq == qp->rq_cq)) { + *polled = true; + status = OCRDMA_CQE_WR_FLUSH_ERR; + expand = ocrdma_update_err_scqe(ibwc, cqe, qp, status); + } else { + *polled = false; + *stop = true; + expand = false; + } + } else { + *polled = true; + expand = ocrdma_update_err_rcqe(ibwc, cqe, qp, status); + } + return expand; +} + +static void ocrdma_poll_success_rcqe(struct ocrdma_qp *qp, + struct ocrdma_cqe *cqe, struct ib_wc *ibwc) +{ + ibwc->opcode = IB_WC_RECV; + ibwc->qp = &qp->ibqp; + ibwc->status = IB_WC_SUCCESS; + + if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) + ocrdma_update_ud_rcqe(ibwc, cqe); + else + ibwc->byte_len = le32_to_cpu(cqe->rq.rxlen); + + if (is_cqe_imm(cqe)) { + ibwc->ex.imm_data = htonl(le32_to_cpu(cqe->rq.lkey_immdt)); + ibwc->wc_flags |= IB_WC_WITH_IMM; + } else if (is_cqe_wr_imm(cqe)) { + ibwc->opcode = IB_WC_RECV_RDMA_WITH_IMM; + ibwc->ex.imm_data = htonl(le32_to_cpu(cqe->rq.lkey_immdt)); + ibwc->wc_flags |= IB_WC_WITH_IMM; + } else if (is_cqe_invalidated(cqe)) { + ibwc->ex.invalidate_rkey = le32_to_cpu(cqe->rq.lkey_immdt); + ibwc->wc_flags |= IB_WC_WITH_INVALIDATE; + } + if (qp->ibqp.srq) { + ocrdma_update_free_srq_cqe(ibwc, cqe, qp); + } else { + ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail]; + ocrdma_hwq_inc_tail(&qp->rq); + } +} + +static bool ocrdma_poll_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe, + struct ib_wc *ibwc, bool *polled, bool *stop) +{ + int status; + bool expand = false; + + ibwc->wc_flags = 0; + if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) { + status = (le32_to_cpu(cqe->flags_status_srcqpn) & + OCRDMA_CQE_UD_STATUS_MASK) >> + OCRDMA_CQE_UD_STATUS_SHIFT; + } else { + status = (le32_to_cpu(cqe->flags_status_srcqpn) & + OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT; + } + + if (status == OCRDMA_CQE_SUCCESS) { + *polled = true; + ocrdma_poll_success_rcqe(qp, cqe, ibwc); + } else { + expand = ocrdma_poll_err_rcqe(qp, cqe, ibwc, polled, stop, + status); + } + return expand; +} + +static void ocrdma_change_cq_phase(struct ocrdma_cq *cq, struct ocrdma_cqe *cqe, + u16 cur_getp) +{ + if (cq->phase_change) { + if (cur_getp == 0) + cq->phase = (~cq->phase & OCRDMA_CQE_VALID); + } else { + /* clear valid bit */ + cqe->flags_status_srcqpn = 0; + } +} + +static int ocrdma_poll_hwcq(struct ocrdma_cq *cq, int num_entries, + struct ib_wc *ibwc) +{ + u16 qpn = 0; + int i = 0; + bool expand = false; + int polled_hw_cqes = 0; + struct ocrdma_qp *qp = NULL; + struct ocrdma_dev *dev = get_ocrdma_dev(cq->ibcq.device); + struct ocrdma_cqe *cqe; + u16 cur_getp; bool polled = false; bool stop = false; + + cur_getp = cq->getp; + while (num_entries) { + cqe = cq->va + cur_getp; + /* check whether valid cqe or not */ + if (!is_cqe_valid(cq, cqe)) + break; + qpn = (le32_to_cpu(cqe->cmn.qpn) & OCRDMA_CQE_QPN_MASK); + /* ignore discarded cqe */ + if (qpn == 0) + goto skip_cqe; + qp = dev->qp_tbl[qpn]; + BUG_ON(qp == NULL); + + if (is_cqe_for_sq(cqe)) { + expand = ocrdma_poll_scqe(qp, cqe, ibwc, &polled, + &stop); + } else { + expand = ocrdma_poll_rcqe(qp, cqe, ibwc, &polled, + &stop); + } + if (expand) + goto expand_cqe; + if (stop) + goto stop_cqe; + /* clear qpn to avoid duplicate processing by discard_cqe() */ + cqe->cmn.qpn = 0; +skip_cqe: + polled_hw_cqes += 1; + cur_getp = (cur_getp + 1) % cq->max_hw_cqe; + ocrdma_change_cq_phase(cq, cqe, cur_getp); +expand_cqe: + if (polled) { + num_entries -= 1; + i += 1; + ibwc = ibwc + 1; + polled = false; + } + } +stop_cqe: + cq->getp = cur_getp; + if (cq->deferred_arm) { + ocrdma_ring_cq_db(dev, cq->id, true, cq->deferred_sol, + polled_hw_cqes); + cq->deferred_arm = false; + cq->deferred_sol = false; + } else { + /* We need to pop the CQE. No need to arm */ + ocrdma_ring_cq_db(dev, cq->id, false, cq->deferred_sol, + polled_hw_cqes); + cq->deferred_sol = false; + } + + return i; +} + +/* insert error cqe if the QP's SQ or RQ's CQ matches the CQ under poll. */ +static int ocrdma_add_err_cqe(struct ocrdma_cq *cq, int num_entries, + struct ocrdma_qp *qp, struct ib_wc *ibwc) +{ + int err_cqes = 0; + + while (num_entries) { + if (is_hw_sq_empty(qp) && is_hw_rq_empty(qp)) + break; + if (!is_hw_sq_empty(qp) && qp->sq_cq == cq) { + ocrdma_update_wc(qp, ibwc, qp->sq.tail); + ocrdma_hwq_inc_tail(&qp->sq); + } else if (!is_hw_rq_empty(qp) && qp->rq_cq == cq) { + ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail]; + ocrdma_hwq_inc_tail(&qp->rq); + } else { + return err_cqes; + } + ibwc->byte_len = 0; + ibwc->status = IB_WC_WR_FLUSH_ERR; + ibwc = ibwc + 1; + err_cqes += 1; + num_entries -= 1; + } + return err_cqes; +} + +int ocrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) +{ + int cqes_to_poll = num_entries; + struct ocrdma_cq *cq = get_ocrdma_cq(ibcq); + struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device); + int num_os_cqe = 0, err_cqes = 0; + struct ocrdma_qp *qp; + unsigned long flags; + + /* poll cqes from adapter CQ */ + spin_lock_irqsave(&cq->cq_lock, flags); + num_os_cqe = ocrdma_poll_hwcq(cq, cqes_to_poll, wc); + spin_unlock_irqrestore(&cq->cq_lock, flags); + cqes_to_poll -= num_os_cqe; + + if (cqes_to_poll) { + wc = wc + num_os_cqe; + /* adapter returns single error cqe when qp moves to + * error state. So insert error cqes with wc_status as + * FLUSHED for pending WQEs and RQEs of QP's SQ and RQ + * respectively which uses this CQ. + */ + spin_lock_irqsave(&dev->flush_q_lock, flags); + list_for_each_entry(qp, &cq->sq_head, sq_entry) { + if (cqes_to_poll == 0) + break; + err_cqes = ocrdma_add_err_cqe(cq, cqes_to_poll, qp, wc); + cqes_to_poll -= err_cqes; + num_os_cqe += err_cqes; + wc = wc + err_cqes; + } + spin_unlock_irqrestore(&dev->flush_q_lock, flags); + } + return num_os_cqe; +} + +int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags) +{ + struct ocrdma_cq *cq = get_ocrdma_cq(ibcq); + struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device); + u16 cq_id; + unsigned long flags; + bool arm_needed = false, sol_needed = false; + + cq_id = cq->id; + + spin_lock_irqsave(&cq->cq_lock, flags); + if (cq_flags & IB_CQ_NEXT_COMP || cq_flags & IB_CQ_SOLICITED) + arm_needed = true; + if (cq_flags & IB_CQ_SOLICITED) + sol_needed = true; + + if (cq->first_arm) { + ocrdma_ring_cq_db(dev, cq_id, arm_needed, sol_needed, 0); + cq->first_arm = false; + goto skip_defer; + } + cq->deferred_arm = true; + +skip_defer: + cq->deferred_sol = sol_needed; + spin_unlock_irqrestore(&cq->cq_lock, flags); + + return 0; +} + +struct ib_mr *ocrdma_alloc_frmr(struct ib_pd *ibpd, int max_page_list_len) +{ + int status; + struct ocrdma_mr *mr; + struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); + struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); + + if (max_page_list_len > dev->attr.max_pages_per_frmr) + return ERR_PTR(-EINVAL); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + status = ocrdma_get_pbl_info(dev, mr, max_page_list_len); + if (status) + goto pbl_err; + mr->hwmr.fr_mr = 1; + mr->hwmr.remote_rd = 0; + mr->hwmr.remote_wr = 0; + mr->hwmr.local_rd = 0; + mr->hwmr.local_wr = 0; + mr->hwmr.mw_bind = 0; + status = ocrdma_build_pbl_tbl(dev, &mr->hwmr); + if (status) + goto pbl_err; + status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, 0); + if (status) + goto mbx_err; + mr->ibmr.rkey = mr->hwmr.lkey; + mr->ibmr.lkey = mr->hwmr.lkey; + dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] = + (unsigned long) mr; + return &mr->ibmr; +mbx_err: + ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); +pbl_err: + kfree(mr); + return ERR_PTR(-ENOMEM); +} + +struct ib_fast_reg_page_list *ocrdma_alloc_frmr_page_list(struct ib_device + *ibdev, + int page_list_len) +{ + struct ib_fast_reg_page_list *frmr_list; + int size; + + size = sizeof(*frmr_list) + (page_list_len * sizeof(u64)); + frmr_list = kzalloc(size, GFP_KERNEL); + if (!frmr_list) + return ERR_PTR(-ENOMEM); + frmr_list->page_list = (u64 *)(frmr_list + 1); + return frmr_list; +} + +void ocrdma_free_frmr_page_list(struct ib_fast_reg_page_list *page_list) +{ + kfree(page_list); +} + +#define MAX_KERNEL_PBE_SIZE 65536 +static inline int count_kernel_pbes(struct ib_phys_buf *buf_list, + int buf_cnt, u32 *pbe_size) +{ + u64 total_size = 0; + u64 buf_size = 0; + int i; + *pbe_size = roundup(buf_list[0].size, PAGE_SIZE); + *pbe_size = roundup_pow_of_two(*pbe_size); + + /* find the smallest PBE size that we can have */ + for (i = 0; i < buf_cnt; i++) { + /* first addr may not be page aligned, so ignore checking */ + if ((i != 0) && ((buf_list[i].addr & ~PAGE_MASK) || + (buf_list[i].size & ~PAGE_MASK))) { + return 0; + } + + /* if configured PBE size is greater then the chosen one, + * reduce the PBE size. + */ + buf_size = roundup(buf_list[i].size, PAGE_SIZE); + /* pbe_size has to be even multiple of 4K 1,2,4,8...*/ + buf_size = roundup_pow_of_two(buf_size); + if (*pbe_size > buf_size) + *pbe_size = buf_size; + + total_size += buf_size; + } + *pbe_size = *pbe_size > MAX_KERNEL_PBE_SIZE ? + (MAX_KERNEL_PBE_SIZE) : (*pbe_size); + + /* num_pbes = total_size / (*pbe_size); this is implemented below. */ + + return total_size >> ilog2(*pbe_size); +} + +static void build_kernel_pbes(struct ib_phys_buf *buf_list, int ib_buf_cnt, + u32 pbe_size, struct ocrdma_pbl *pbl_tbl, + struct ocrdma_hw_mr *hwmr) +{ + int i; + int idx; + int pbes_per_buf = 0; + u64 buf_addr = 0; + int num_pbes; + struct ocrdma_pbe *pbe; + int total_num_pbes = 0; + + if (!hwmr->num_pbes) + return; + + pbe = (struct ocrdma_pbe *)pbl_tbl->va; + num_pbes = 0; + + /* go through the OS phy regions & fill hw pbe entries into pbls. */ + for (i = 0; i < ib_buf_cnt; i++) { + buf_addr = buf_list[i].addr; + pbes_per_buf = + roundup_pow_of_two(roundup(buf_list[i].size, PAGE_SIZE)) / + pbe_size; + hwmr->len += buf_list[i].size; + /* number of pbes can be more for one OS buf, when + * buffers are of different sizes. + * split the ib_buf to one or more pbes. + */ + for (idx = 0; idx < pbes_per_buf; idx++) { + /* we program always page aligned addresses, + * first unaligned address is taken care by fbo. + */ + if (i == 0) { + /* for non zero fbo, assign the + * start of the page. + */ + pbe->pa_lo = + cpu_to_le32((u32) (buf_addr & PAGE_MASK)); + pbe->pa_hi = + cpu_to_le32((u32) upper_32_bits(buf_addr)); + } else { + pbe->pa_lo = + cpu_to_le32((u32) (buf_addr & 0xffffffff)); + pbe->pa_hi = + cpu_to_le32((u32) upper_32_bits(buf_addr)); + } + buf_addr += pbe_size; + num_pbes += 1; + total_num_pbes += 1; + pbe++; + + if (total_num_pbes == hwmr->num_pbes) + goto mr_tbl_done; + /* if the pbl is full storing the pbes, + * move to next pbl. + */ + if (num_pbes == (hwmr->pbl_size/sizeof(u64))) { + pbl_tbl++; + pbe = (struct ocrdma_pbe *)pbl_tbl->va; + num_pbes = 0; + } + } + } +mr_tbl_done: + return; +} + +struct ib_mr *ocrdma_reg_kernel_mr(struct ib_pd *ibpd, + struct ib_phys_buf *buf_list, + int buf_cnt, int acc, u64 *iova_start) +{ + int status = -ENOMEM; + struct ocrdma_mr *mr; + struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); + struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); + u32 num_pbes; + u32 pbe_size = 0; + + if ((acc & IB_ACCESS_REMOTE_WRITE) && !(acc & IB_ACCESS_LOCAL_WRITE)) + return ERR_PTR(-EINVAL); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(status); + + num_pbes = count_kernel_pbes(buf_list, buf_cnt, &pbe_size); + if (num_pbes == 0) { + status = -EINVAL; + goto pbl_err; + } + status = ocrdma_get_pbl_info(dev, mr, num_pbes); + if (status) + goto pbl_err; + + mr->hwmr.pbe_size = pbe_size; + mr->hwmr.fbo = *iova_start - (buf_list[0].addr & PAGE_MASK); + mr->hwmr.va = *iova_start; + mr->hwmr.local_rd = 1; + mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0; + mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0; + mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0; + mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0; + mr->hwmr.mw_bind = (acc & IB_ACCESS_MW_BIND) ? 1 : 0; + + status = ocrdma_build_pbl_tbl(dev, &mr->hwmr); + if (status) + goto pbl_err; + build_kernel_pbes(buf_list, buf_cnt, pbe_size, mr->hwmr.pbl_table, + &mr->hwmr); + status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc); + if (status) + goto mbx_err; + + mr->ibmr.lkey = mr->hwmr.lkey; + if (mr->hwmr.remote_wr || mr->hwmr.remote_rd) + mr->ibmr.rkey = mr->hwmr.lkey; + return &mr->ibmr; + +mbx_err: + ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); +pbl_err: + kfree(mr); + return ERR_PTR(status); +} diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h new file mode 100644 index 00000000000..b8f7853fd36 --- /dev/null +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -0,0 +1,99 @@ +/******************************************************************* + * This file is part of the Emulex RoCE Device Driver for * + * RoCE (RDMA over Converged Ethernet) adapters. * + * Copyright (C) 2008-2012 Emulex. All rights reserved. * + * EMULEX and SLI are trademarks of Emulex. * + * www.emulex.com * + * * + * This program is free software; you can redistribute it and/or * + * modify it under the terms of version 2 of the GNU General * + * Public License as published by the Free Software Foundation. * + * This program is distributed in the hope that it will be useful. * + * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * + * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * + * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * + * TO BE LEGALLY INVALID. See the GNU General Public License for * + * more details, a copy of which can be found in the file COPYING * + * included with this package. * + * + * Contact Information: + * linux-drivers@emulex.com + * + * Emulex + * 3333 Susan Street + * Costa Mesa, CA 92626 + *******************************************************************/ + +#ifndef __OCRDMA_VERBS_H__ +#define __OCRDMA_VERBS_H__ + +int ocrdma_post_send(struct ib_qp *, struct ib_send_wr *, + struct ib_send_wr **bad_wr); +int ocrdma_post_recv(struct ib_qp *, struct ib_recv_wr *, + struct ib_recv_wr **bad_wr); + +int ocrdma_poll_cq(struct ib_cq *, int num_entries, struct ib_wc *wc); +int ocrdma_arm_cq(struct ib_cq *, enum ib_cq_notify_flags flags); + +int ocrdma_query_device(struct ib_device *, struct ib_device_attr *props); +int ocrdma_query_port(struct ib_device *, u8 port, struct ib_port_attr *props); +int ocrdma_modify_port(struct ib_device *, u8 port, int mask, + struct ib_port_modify *props); + +void ocrdma_get_guid(struct ocrdma_dev *, u8 *guid); +int ocrdma_query_gid(struct ib_device *, u8 port, + int index, union ib_gid *gid); +int ocrdma_query_pkey(struct ib_device *, u8 port, u16 index, u16 *pkey); + +struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *, + struct ib_udata *); +int ocrdma_dealloc_ucontext(struct ib_ucontext *); + +int ocrdma_mmap(struct ib_ucontext *, struct vm_area_struct *vma); + +struct ib_pd *ocrdma_alloc_pd(struct ib_device *, + struct ib_ucontext *, struct ib_udata *); +int ocrdma_dealloc_pd(struct ib_pd *pd); + +struct ib_cq *ocrdma_create_cq(struct ib_device *, int entries, int vector, + struct ib_ucontext *, struct ib_udata *); +int ocrdma_resize_cq(struct ib_cq *, int cqe, struct ib_udata *); +int ocrdma_destroy_cq(struct ib_cq *); + +struct ib_qp *ocrdma_create_qp(struct ib_pd *, + struct ib_qp_init_attr *attrs, + struct ib_udata *); +int _ocrdma_modify_qp(struct ib_qp *, struct ib_qp_attr *attr, + int attr_mask); +int ocrdma_modify_qp(struct ib_qp *, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); +int ocrdma_query_qp(struct ib_qp *, + struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_qp_init_attr *); +int ocrdma_destroy_qp(struct ib_qp *); +void ocrdma_del_flush_qp(struct ocrdma_qp *qp); + +struct ib_srq *ocrdma_create_srq(struct ib_pd *, struct ib_srq_init_attr *, + struct ib_udata *); +int ocrdma_modify_srq(struct ib_srq *, struct ib_srq_attr *, + enum ib_srq_attr_mask, struct ib_udata *); +int ocrdma_query_srq(struct ib_srq *, struct ib_srq_attr *); +int ocrdma_destroy_srq(struct ib_srq *); +int ocrdma_post_srq_recv(struct ib_srq *, struct ib_recv_wr *, + struct ib_recv_wr **bad_recv_wr); + +int ocrdma_dereg_mr(struct ib_mr *); +struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *, int acc); +struct ib_mr *ocrdma_reg_kernel_mr(struct ib_pd *, + struct ib_phys_buf *buffer_list, + int num_phys_buf, int acc, u64 *iova_start); +struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *, u64 start, u64 length, + u64 virt, int acc, struct ib_udata *); +struct ib_mr *ocrdma_alloc_frmr(struct ib_pd *pd, int max_page_list_len); +struct ib_fast_reg_page_list *ocrdma_alloc_frmr_page_list(struct ib_device + *ibdev, + int page_list_len); +void ocrdma_free_frmr_page_list(struct ib_fast_reg_page_list *page_list); + +#endif /* __OCRDMA_VERBS_H__ */ diff --git a/drivers/infiniband/hw/qib/Kconfig b/drivers/infiniband/hw/qib/Kconfig new file mode 100644 index 00000000000..495be09781b --- /dev/null +++ b/drivers/infiniband/hw/qib/Kconfig @@ -0,0 +1,15 @@ +config INFINIBAND_QIB + tristate "Intel PCIe HCA support" + depends on 64BIT + ---help--- + This is a low-level driver for Intel PCIe QLE InfiniBand host + channel adapters. This driver does not support the Intel + HyperTransport card (model QHT7140). + +config INFINIBAND_QIB_DCA + bool "QIB DCA support" + depends on INFINIBAND_QIB && DCA && SMP && !(INFINIBAND_QIB=y && DCA=m) + default y + ---help--- + Setting this enables DCA support on some Intel chip sets + with the iba7322 HCA. diff --git a/drivers/infiniband/hw/qib/Makefile b/drivers/infiniband/hw/qib/Makefile new file mode 100644 index 00000000000..57f8103e51f --- /dev/null +++ b/drivers/infiniband/hw/qib/Makefile @@ -0,0 +1,16 @@ +obj-$(CONFIG_INFINIBAND_QIB) += ib_qib.o + +ib_qib-y := qib_cq.o qib_diag.o qib_dma.o qib_driver.o qib_eeprom.o \ + qib_file_ops.o qib_fs.o qib_init.o qib_intr.o qib_keys.o \ + qib_mad.o qib_mmap.o qib_mr.o qib_pcie.o qib_pio_copy.o \ + qib_qp.o qib_qsfp.o qib_rc.o qib_ruc.o qib_sdma.o qib_srq.o \ + qib_sysfs.o qib_twsi.o qib_tx.o qib_uc.o qib_ud.o \ + qib_user_pages.o qib_user_sdma.o qib_verbs_mcast.o qib_iba7220.o \ + qib_sd7220.o qib_iba7322.o qib_verbs.o + +# 6120 has no fallback if no MSI interrupts, others can do INTx +ib_qib-$(CONFIG_PCI_MSI) += qib_iba6120.o + +ib_qib-$(CONFIG_X86_64) += qib_wc_x86_64.o +ib_qib-$(CONFIG_PPC64) += qib_wc_ppc64.o +ib_qib-$(CONFIG_DEBUG_FS) += qib_debugfs.o diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h new file mode 100644 index 00000000000..c00ae093b6f --- /dev/null +++ b/drivers/infiniband/hw/qib/qib.h @@ -0,0 +1,1548 @@ +#ifndef _QIB_KERNEL_H +#define _QIB_KERNEL_H +/* + * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * This header file is the base header file for qlogic_ib kernel code + * qib_user.h serves a similar purpose for user code. + */ + +#include <linux/interrupt.h> +#include <linux/pci.h> +#include <linux/dma-mapping.h> +#include <linux/mutex.h> +#include <linux/list.h> +#include <linux/scatterlist.h> +#include <linux/slab.h> +#include <linux/io.h> +#include <linux/fs.h> +#include <linux/completion.h> +#include <linux/kref.h> +#include <linux/sched.h> +#include <linux/kthread.h> + +#include "qib_common.h" +#include "qib_verbs.h" + +/* only s/w major version of QLogic_IB we can handle */ +#define QIB_CHIP_VERS_MAJ 2U + +/* don't care about this except printing */ +#define QIB_CHIP_VERS_MIN 0U + +/* The Organization Unique Identifier (Mfg code), and its position in GUID */ +#define QIB_OUI 0x001175 +#define QIB_OUI_LSB 40 + +/* + * per driver stats, either not device nor port-specific, or + * summed over all of the devices and ports. + * They are described by name via ipathfs filesystem, so layout + * and number of elements can change without breaking compatibility. + * If members are added or deleted qib_statnames[] in qib_fs.c must + * change to match. + */ +struct qlogic_ib_stats { + __u64 sps_ints; /* number of interrupts handled */ + __u64 sps_errints; /* number of error interrupts */ + __u64 sps_txerrs; /* tx-related packet errors */ + __u64 sps_rcverrs; /* non-crc rcv packet errors */ + __u64 sps_hwerrs; /* hardware errors reported (parity, etc.) */ + __u64 sps_nopiobufs; /* no pio bufs avail from kernel */ + __u64 sps_ctxts; /* number of contexts currently open */ + __u64 sps_lenerrs; /* number of kernel packets where RHF != LRH len */ + __u64 sps_buffull; + __u64 sps_hdrfull; +}; + +extern struct qlogic_ib_stats qib_stats; +extern const struct pci_error_handlers qib_pci_err_handler; + +#define QIB_CHIP_SWVERSION QIB_CHIP_VERS_MAJ +/* + * First-cut critierion for "device is active" is + * two thousand dwords combined Tx, Rx traffic per + * 5-second interval. SMA packets are 64 dwords, + * and occur "a few per second", presumably each way. + */ +#define QIB_TRAFFIC_ACTIVE_THRESHOLD (2000) + +/* + * Struct used to indicate which errors are logged in each of the + * error-counters that are logged to EEPROM. A counter is incremented + * _once_ (saturating at 255) for each event with any bits set in + * the error or hwerror register masks below. + */ +#define QIB_EEP_LOG_CNT (4) +struct qib_eep_log_mask { + u64 errs_to_log; + u64 hwerrs_to_log; +}; + +/* + * Below contains all data related to a single context (formerly called port). + */ + +#ifdef CONFIG_DEBUG_FS +struct qib_opcode_stats_perctx; +#endif + +struct qib_ctxtdata { + void **rcvegrbuf; + dma_addr_t *rcvegrbuf_phys; + /* rcvhdrq base, needs mmap before useful */ + void *rcvhdrq; + /* kernel virtual address where hdrqtail is updated */ + void *rcvhdrtail_kvaddr; + /* + * temp buffer for expected send setup, allocated at open, instead + * of each setup call + */ + void *tid_pg_list; + /* + * Shared page for kernel to signal user processes that send buffers + * need disarming. The process should call QIB_CMD_DISARM_BUFS + * or QIB_CMD_ACK_EVENT with IPATH_EVENT_DISARM_BUFS set. + */ + unsigned long *user_event_mask; + /* when waiting for rcv or pioavail */ + wait_queue_head_t wait; + /* + * rcvegr bufs base, physical, must fit + * in 44 bits so 32 bit programs mmap64 44 bit works) + */ + dma_addr_t rcvegr_phys; + /* mmap of hdrq, must fit in 44 bits */ + dma_addr_t rcvhdrq_phys; + dma_addr_t rcvhdrqtailaddr_phys; + + /* + * number of opens (including slave sub-contexts) on this instance + * (ignoring forks, dup, etc. for now) + */ + int cnt; + /* + * how much space to leave at start of eager TID entries for + * protocol use, on each TID + */ + /* instead of calculating it */ + unsigned ctxt; + /* local node of context */ + int node_id; + /* non-zero if ctxt is being shared. */ + u16 subctxt_cnt; + /* non-zero if ctxt is being shared. */ + u16 subctxt_id; + /* number of eager TID entries. */ + u16 rcvegrcnt; + /* index of first eager TID entry. */ + u16 rcvegr_tid_base; + /* number of pio bufs for this ctxt (all procs, if shared) */ + u32 piocnt; + /* first pio buffer for this ctxt */ + u32 pio_base; + /* chip offset of PIO buffers for this ctxt */ + u32 piobufs; + /* how many alloc_pages() chunks in rcvegrbuf_pages */ + u32 rcvegrbuf_chunks; + /* how many egrbufs per chunk */ + u16 rcvegrbufs_perchunk; + /* ilog2 of above */ + u16 rcvegrbufs_perchunk_shift; + /* order for rcvegrbuf_pages */ + size_t rcvegrbuf_size; + /* rcvhdrq size (for freeing) */ + size_t rcvhdrq_size; + /* per-context flags for fileops/intr communication */ + unsigned long flag; + /* next expected TID to check when looking for free */ + u32 tidcursor; + /* WAIT_RCV that timed out, no interrupt */ + u32 rcvwait_to; + /* WAIT_PIO that timed out, no interrupt */ + u32 piowait_to; + /* WAIT_RCV already happened, no wait */ + u32 rcvnowait; + /* WAIT_PIO already happened, no wait */ + u32 pionowait; + /* total number of polled urgent packets */ + u32 urgent; + /* saved total number of polled urgent packets for poll edge trigger */ + u32 urgent_poll; + /* pid of process using this ctxt */ + pid_t pid; + pid_t subpid[QLOGIC_IB_MAX_SUBCTXT]; + /* same size as task_struct .comm[], command that opened context */ + char comm[16]; + /* pkeys set by this use of this ctxt */ + u16 pkeys[4]; + /* so file ops can get at unit */ + struct qib_devdata *dd; + /* so funcs that need physical port can get it easily */ + struct qib_pportdata *ppd; + /* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */ + void *subctxt_uregbase; + /* An array of pages for the eager receive buffers * N */ + void *subctxt_rcvegrbuf; + /* An array of pages for the eager header queue entries * N */ + void *subctxt_rcvhdr_base; + /* The version of the library which opened this ctxt */ + u32 userversion; + /* Bitmask of active slaves */ + u32 active_slaves; + /* Type of packets or conditions we want to poll for */ + u16 poll_type; + /* receive packet sequence counter */ + u8 seq_cnt; + u8 redirect_seq_cnt; + /* ctxt rcvhdrq head offset */ + u32 head; + /* lookaside fields */ + struct qib_qp *lookaside_qp; + u32 lookaside_qpn; + /* QPs waiting for context processing */ + struct list_head qp_wait_list; +#ifdef CONFIG_DEBUG_FS + /* verbs stats per CTX */ + struct qib_opcode_stats_perctx *opstats; +#endif +}; + +struct qib_sge_state; + +struct qib_sdma_txreq { + int flags; + int sg_count; + dma_addr_t addr; + void (*callback)(struct qib_sdma_txreq *, int); + u16 start_idx; /* sdma private */ + u16 next_descq_idx; /* sdma private */ + struct list_head list; /* sdma private */ +}; + +struct qib_sdma_desc { + __le64 qw[2]; +}; + +struct qib_verbs_txreq { + struct qib_sdma_txreq txreq; + struct qib_qp *qp; + struct qib_swqe *wqe; + u32 dwords; + u16 hdr_dwords; + u16 hdr_inx; + struct qib_pio_header *align_buf; + struct qib_mregion *mr; + struct qib_sge_state *ss; +}; + +#define QIB_SDMA_TXREQ_F_USELARGEBUF 0x1 +#define QIB_SDMA_TXREQ_F_HEADTOHOST 0x2 +#define QIB_SDMA_TXREQ_F_INTREQ 0x4 +#define QIB_SDMA_TXREQ_F_FREEBUF 0x8 +#define QIB_SDMA_TXREQ_F_FREEDESC 0x10 + +#define QIB_SDMA_TXREQ_S_OK 0 +#define QIB_SDMA_TXREQ_S_SENDERROR 1 +#define QIB_SDMA_TXREQ_S_ABORTED 2 +#define QIB_SDMA_TXREQ_S_SHUTDOWN 3 + +/* + * Get/Set IB link-level config parameters for f_get/set_ib_cfg() + * Mostly for MADs that set or query link parameters, also ipath + * config interfaces + */ +#define QIB_IB_CFG_LIDLMC 0 /* LID (LS16b) and Mask (MS16b) */ +#define QIB_IB_CFG_LWID_ENB 2 /* allowed Link-width */ +#define QIB_IB_CFG_LWID 3 /* currently active Link-width */ +#define QIB_IB_CFG_SPD_ENB 4 /* allowed Link speeds */ +#define QIB_IB_CFG_SPD 5 /* current Link spd */ +#define QIB_IB_CFG_RXPOL_ENB 6 /* Auto-RX-polarity enable */ +#define QIB_IB_CFG_LREV_ENB 7 /* Auto-Lane-reversal enable */ +#define QIB_IB_CFG_LINKLATENCY 8 /* Link Latency (IB1.2 only) */ +#define QIB_IB_CFG_HRTBT 9 /* IB heartbeat off/enable/auto; DDR/QDR only */ +#define QIB_IB_CFG_OP_VLS 10 /* operational VLs */ +#define QIB_IB_CFG_VL_HIGH_CAP 11 /* num of VL high priority weights */ +#define QIB_IB_CFG_VL_LOW_CAP 12 /* num of VL low priority weights */ +#define QIB_IB_CFG_OVERRUN_THRESH 13 /* IB overrun threshold */ +#define QIB_IB_CFG_PHYERR_THRESH 14 /* IB PHY error threshold */ +#define QIB_IB_CFG_LINKDEFAULT 15 /* IB link default (sleep/poll) */ +#define QIB_IB_CFG_PKEYS 16 /* update partition keys */ +#define QIB_IB_CFG_MTU 17 /* update MTU in IBC */ +#define QIB_IB_CFG_LSTATE 18 /* update linkcmd and linkinitcmd in IBC */ +#define QIB_IB_CFG_VL_HIGH_LIMIT 19 +#define QIB_IB_CFG_PMA_TICKS 20 /* PMA sample tick resolution */ +#define QIB_IB_CFG_PORT 21 /* switch port we are connected to */ + +/* + * for CFG_LSTATE: LINKCMD in upper 16 bits, LINKINITCMD in lower 16 + * IB_LINKINITCMD_POLL and SLEEP are also used as set/get values for + * QIB_IB_CFG_LINKDEFAULT cmd + */ +#define IB_LINKCMD_DOWN (0 << 16) +#define IB_LINKCMD_ARMED (1 << 16) +#define IB_LINKCMD_ACTIVE (2 << 16) +#define IB_LINKINITCMD_NOP 0 +#define IB_LINKINITCMD_POLL 1 +#define IB_LINKINITCMD_SLEEP 2 +#define IB_LINKINITCMD_DISABLE 3 + +/* + * valid states passed to qib_set_linkstate() user call + */ +#define QIB_IB_LINKDOWN 0 +#define QIB_IB_LINKARM 1 +#define QIB_IB_LINKACTIVE 2 +#define QIB_IB_LINKDOWN_ONLY 3 +#define QIB_IB_LINKDOWN_SLEEP 4 +#define QIB_IB_LINKDOWN_DISABLE 5 + +/* + * These 7 values (SDR, DDR, and QDR may be ORed for auto-speed + * negotiation) are used for the 3rd argument to path_f_set_ib_cfg + * with cmd QIB_IB_CFG_SPD_ENB, by direct calls or via sysfs. They + * are also the the possible values for qib_link_speed_enabled and active + * The values were chosen to match values used within the IB spec. + */ +#define QIB_IB_SDR 1 +#define QIB_IB_DDR 2 +#define QIB_IB_QDR 4 + +#define QIB_DEFAULT_MTU 4096 + +/* max number of IB ports supported per HCA */ +#define QIB_MAX_IB_PORTS 2 + +/* + * Possible IB config parameters for f_get/set_ib_table() + */ +#define QIB_IB_TBL_VL_HIGH_ARB 1 /* Get/set VL high priority weights */ +#define QIB_IB_TBL_VL_LOW_ARB 2 /* Get/set VL low priority weights */ + +/* + * Possible "operations" for f_rcvctrl(ppd, op, ctxt) + * these are bits so they can be combined, e.g. + * QIB_RCVCTRL_INTRAVAIL_ENB | QIB_RCVCTRL_CTXT_ENB + */ +#define QIB_RCVCTRL_TAILUPD_ENB 0x01 +#define QIB_RCVCTRL_TAILUPD_DIS 0x02 +#define QIB_RCVCTRL_CTXT_ENB 0x04 +#define QIB_RCVCTRL_CTXT_DIS 0x08 +#define QIB_RCVCTRL_INTRAVAIL_ENB 0x10 +#define QIB_RCVCTRL_INTRAVAIL_DIS 0x20 +#define QIB_RCVCTRL_PKEY_ENB 0x40 /* Note, default is enabled */ +#define QIB_RCVCTRL_PKEY_DIS 0x80 +#define QIB_RCVCTRL_BP_ENB 0x0100 +#define QIB_RCVCTRL_BP_DIS 0x0200 +#define QIB_RCVCTRL_TIDFLOW_ENB 0x0400 +#define QIB_RCVCTRL_TIDFLOW_DIS 0x0800 + +/* + * Possible "operations" for f_sendctrl(ppd, op, var) + * these are bits so they can be combined, e.g. + * QIB_SENDCTRL_BUFAVAIL_ENB | QIB_SENDCTRL_ENB + * Some operations (e.g. DISARM, ABORT) are known to + * be "one-shot", so do not modify shadow. + */ +#define QIB_SENDCTRL_DISARM (0x1000) +#define QIB_SENDCTRL_DISARM_BUF(bufn) ((bufn) | QIB_SENDCTRL_DISARM) + /* available (0x2000) */ +#define QIB_SENDCTRL_AVAIL_DIS (0x4000) +#define QIB_SENDCTRL_AVAIL_ENB (0x8000) +#define QIB_SENDCTRL_AVAIL_BLIP (0x10000) +#define QIB_SENDCTRL_SEND_DIS (0x20000) +#define QIB_SENDCTRL_SEND_ENB (0x40000) +#define QIB_SENDCTRL_FLUSH (0x80000) +#define QIB_SENDCTRL_CLEAR (0x100000) +#define QIB_SENDCTRL_DISARM_ALL (0x200000) + +/* + * These are the generic indices for requesting per-port + * counter values via the f_portcntr function. They + * are always returned as 64 bit values, although most + * are 32 bit counters. + */ +/* send-related counters */ +#define QIBPORTCNTR_PKTSEND 0U +#define QIBPORTCNTR_WORDSEND 1U +#define QIBPORTCNTR_PSXMITDATA 2U +#define QIBPORTCNTR_PSXMITPKTS 3U +#define QIBPORTCNTR_PSXMITWAIT 4U +#define QIBPORTCNTR_SENDSTALL 5U +/* receive-related counters */ +#define QIBPORTCNTR_PKTRCV 6U +#define QIBPORTCNTR_PSRCVDATA 7U +#define QIBPORTCNTR_PSRCVPKTS 8U +#define QIBPORTCNTR_RCVEBP 9U +#define QIBPORTCNTR_RCVOVFL 10U +#define QIBPORTCNTR_WORDRCV 11U +/* IB link related error counters */ +#define QIBPORTCNTR_RXLOCALPHYERR 12U +#define QIBPORTCNTR_RXVLERR 13U +#define QIBPORTCNTR_ERRICRC 14U +#define QIBPORTCNTR_ERRVCRC 15U +#define QIBPORTCNTR_ERRLPCRC 16U +#define QIBPORTCNTR_BADFORMAT 17U +#define QIBPORTCNTR_ERR_RLEN 18U +#define QIBPORTCNTR_IBSYMBOLERR 19U +#define QIBPORTCNTR_INVALIDRLEN 20U +#define QIBPORTCNTR_UNSUPVL 21U +#define QIBPORTCNTR_EXCESSBUFOVFL 22U +#define QIBPORTCNTR_ERRLINK 23U +#define QIBPORTCNTR_IBLINKDOWN 24U +#define QIBPORTCNTR_IBLINKERRRECOV 25U +#define QIBPORTCNTR_LLI 26U +/* other error counters */ +#define QIBPORTCNTR_RXDROPPKT 27U +#define QIBPORTCNTR_VL15PKTDROP 28U +#define QIBPORTCNTR_ERRPKEY 29U +#define QIBPORTCNTR_KHDROVFL 30U +/* sampling counters (these are actually control registers) */ +#define QIBPORTCNTR_PSINTERVAL 31U +#define QIBPORTCNTR_PSSTART 32U +#define QIBPORTCNTR_PSSTAT 33U + +/* how often we check for packet activity for "power on hours (in seconds) */ +#define ACTIVITY_TIMER 5 + +#define MAX_NAME_SIZE 64 + +#ifdef CONFIG_INFINIBAND_QIB_DCA +struct qib_irq_notify; +#endif + +struct qib_msix_entry { + struct msix_entry msix; + void *arg; +#ifdef CONFIG_INFINIBAND_QIB_DCA + int dca; + int rcv; + struct qib_irq_notify *notifier; +#endif + char name[MAX_NAME_SIZE]; + cpumask_var_t mask; +}; + +/* Below is an opaque struct. Each chip (device) can maintain + * private data needed for its operation, but not germane to the + * rest of the driver. For convenience, we define another that + * is chip-specific, per-port + */ +struct qib_chip_specific; +struct qib_chipport_specific; + +enum qib_sdma_states { + qib_sdma_state_s00_hw_down, + qib_sdma_state_s10_hw_start_up_wait, + qib_sdma_state_s20_idle, + qib_sdma_state_s30_sw_clean_up_wait, + qib_sdma_state_s40_hw_clean_up_wait, + qib_sdma_state_s50_hw_halt_wait, + qib_sdma_state_s99_running, +}; + +enum qib_sdma_events { + qib_sdma_event_e00_go_hw_down, + qib_sdma_event_e10_go_hw_start, + qib_sdma_event_e20_hw_started, + qib_sdma_event_e30_go_running, + qib_sdma_event_e40_sw_cleaned, + qib_sdma_event_e50_hw_cleaned, + qib_sdma_event_e60_hw_halted, + qib_sdma_event_e70_go_idle, + qib_sdma_event_e7220_err_halted, + qib_sdma_event_e7322_err_halted, + qib_sdma_event_e90_timer_tick, +}; + +extern char *qib_sdma_state_names[]; +extern char *qib_sdma_event_names[]; + +struct sdma_set_state_action { + unsigned op_enable:1; + unsigned op_intenable:1; + unsigned op_halt:1; + unsigned op_drain:1; + unsigned go_s99_running_tofalse:1; + unsigned go_s99_running_totrue:1; +}; + +struct qib_sdma_state { + struct kref kref; + struct completion comp; + enum qib_sdma_states current_state; + struct sdma_set_state_action *set_state_action; + unsigned current_op; + unsigned go_s99_running; + unsigned first_sendbuf; + unsigned last_sendbuf; /* really last +1 */ + /* debugging/devel */ + enum qib_sdma_states previous_state; + unsigned previous_op; + enum qib_sdma_events last_event; +}; + +struct xmit_wait { + struct timer_list timer; + u64 counter; + u8 flags; + struct cache { + u64 psxmitdata; + u64 psrcvdata; + u64 psxmitpkts; + u64 psrcvpkts; + u64 psxmitwait; + } counter_cache; +}; + +/* + * The structure below encapsulates data relevant to a physical IB Port. + * Current chips support only one such port, but the separation + * clarifies things a bit. Note that to conform to IB conventions, + * port-numbers are one-based. The first or only port is port1. + */ +struct qib_pportdata { + struct qib_ibport ibport_data; + + struct qib_devdata *dd; + struct qib_chippport_specific *cpspec; /* chip-specific per-port */ + struct kobject pport_kobj; + struct kobject pport_cc_kobj; + struct kobject sl2vl_kobj; + struct kobject diagc_kobj; + + /* GUID for this interface, in network order */ + __be64 guid; + + /* QIB_POLL, etc. link-state specific flags, per port */ + u32 lflags; + /* qib_lflags driver is waiting for */ + u32 state_wanted; + spinlock_t lflags_lock; + + /* ref count for each pkey */ + atomic_t pkeyrefs[4]; + + /* + * this address is mapped readonly into user processes so they can + * get status cheaply, whenever they want. One qword of status per port + */ + u64 *statusp; + + /* SendDMA related entries */ + + /* read mostly */ + struct qib_sdma_desc *sdma_descq; + struct workqueue_struct *qib_wq; + struct qib_sdma_state sdma_state; + dma_addr_t sdma_descq_phys; + volatile __le64 *sdma_head_dma; /* DMA'ed by chip */ + dma_addr_t sdma_head_phys; + u16 sdma_descq_cnt; + + /* read/write using lock */ + spinlock_t sdma_lock ____cacheline_aligned_in_smp; + struct list_head sdma_activelist; + struct list_head sdma_userpending; + u64 sdma_descq_added; + u64 sdma_descq_removed; + u16 sdma_descq_tail; + u16 sdma_descq_head; + u8 sdma_generation; + u8 sdma_intrequest; + + struct tasklet_struct sdma_sw_clean_up_task + ____cacheline_aligned_in_smp; + + wait_queue_head_t state_wait; /* for state_wanted */ + + /* HoL blocking for SMP replies */ + unsigned hol_state; + struct timer_list hol_timer; + + /* + * Shadow copies of registers; size indicates read access size. + * Most of them are readonly, but some are write-only register, + * where we manipulate the bits in the shadow copy, and then write + * the shadow copy to qlogic_ib. + * + * We deliberately make most of these 32 bits, since they have + * restricted range. For any that we read, we won't to generate 32 + * bit accesses, since Opteron will generate 2 separate 32 bit HT + * transactions for a 64 bit read, and we want to avoid unnecessary + * bus transactions. + */ + + /* This is the 64 bit group */ + /* last ibcstatus. opaque outside chip-specific code */ + u64 lastibcstat; + + /* these are the "32 bit" regs */ + + /* + * the following two are 32-bit bitmasks, but {test,clear,set}_bit + * all expect bit fields to be "unsigned long" + */ + unsigned long p_rcvctrl; /* shadow per-port rcvctrl */ + unsigned long p_sendctrl; /* shadow per-port sendctrl */ + + u32 ibmtu; /* The MTU programmed for this unit */ + /* + * Current max size IB packet (in bytes) including IB headers, that + * we can send. Changes when ibmtu changes. + */ + u32 ibmaxlen; + /* + * ibmaxlen at init time, limited by chip and by receive buffer + * size. Not changed after init. + */ + u32 init_ibmaxlen; + /* LID programmed for this instance */ + u16 lid; + /* list of pkeys programmed; 0 if not set */ + u16 pkeys[4]; + /* LID mask control */ + u8 lmc; + u8 link_width_supported; + u8 link_speed_supported; + u8 link_width_enabled; + u8 link_speed_enabled; + u8 link_width_active; + u8 link_speed_active; + u8 vls_supported; + u8 vls_operational; + /* Rx Polarity inversion (compensate for ~tx on partner) */ + u8 rx_pol_inv; + + u8 hw_pidx; /* physical port index */ + u8 port; /* IB port number and index into dd->pports - 1 */ + + u8 delay_mult; + + /* used to override LED behavior */ + u8 led_override; /* Substituted for normal value, if non-zero */ + u16 led_override_timeoff; /* delta to next timer event */ + u8 led_override_vals[2]; /* Alternates per blink-frame */ + u8 led_override_phase; /* Just counts, LSB picks from vals[] */ + atomic_t led_override_timer_active; + /* Used to flash LEDs in override mode */ + struct timer_list led_override_timer; + struct xmit_wait cong_stats; + struct timer_list symerr_clear_timer; + + /* Synchronize access between driver writes and sysfs reads */ + spinlock_t cc_shadow_lock + ____cacheline_aligned_in_smp; + + /* Shadow copy of the congestion control table */ + struct cc_table_shadow *ccti_entries_shadow; + + /* Shadow copy of the congestion control entries */ + struct ib_cc_congestion_setting_attr_shadow *congestion_entries_shadow; + + /* List of congestion control table entries */ + struct ib_cc_table_entry_shadow *ccti_entries; + + /* 16 congestion entries with each entry corresponding to a SL */ + struct ib_cc_congestion_entry_shadow *congestion_entries; + + /* Maximum number of congestion control entries that the agent expects + * the manager to send. + */ + u16 cc_supported_table_entries; + + /* Total number of congestion control table entries */ + u16 total_cct_entry; + + /* Bit map identifying service level */ + u16 cc_sl_control_map; + + /* maximum congestion control table index */ + u16 ccti_limit; + + /* CA's max number of 64 entry units in the congestion control table */ + u8 cc_max_table_entries; +}; + +/* Observers. Not to be taken lightly, possibly not to ship. */ +/* + * If a diag read or write is to (bottom <= offset <= top), + * the "hoook" is called, allowing, e.g. shadows to be + * updated in sync with the driver. struct diag_observer + * is the "visible" part. + */ +struct diag_observer; + +typedef int (*diag_hook) (struct qib_devdata *dd, + const struct diag_observer *op, + u32 offs, u64 *data, u64 mask, int only_32); + +struct diag_observer { + diag_hook hook; + u32 bottom; + u32 top; +}; + +extern int qib_register_observer(struct qib_devdata *dd, + const struct diag_observer *op); + +/* Only declared here, not defined. Private to diags */ +struct diag_observer_list_elt; + +/* device data struct now contains only "general per-device" info. + * fields related to a physical IB port are in a qib_pportdata struct, + * described above) while fields only used by a particular chip-type are in + * a qib_chipdata struct, whose contents are opaque to this file. + */ +struct qib_devdata { + struct qib_ibdev verbs_dev; /* must be first */ + struct list_head list; + /* pointers to related structs for this device */ + /* pci access data structure */ + struct pci_dev *pcidev; + struct cdev *user_cdev; + struct cdev *diag_cdev; + struct device *user_device; + struct device *diag_device; + + /* mem-mapped pointer to base of chip regs */ + u64 __iomem *kregbase; + /* end of mem-mapped chip space excluding sendbuf and user regs */ + u64 __iomem *kregend; + /* physical address of chip for io_remap, etc. */ + resource_size_t physaddr; + /* qib_cfgctxts pointers */ + struct qib_ctxtdata **rcd; /* Receive Context Data */ + + /* qib_pportdata, points to array of (physical) port-specific + * data structs, indexed by pidx (0..n-1) + */ + struct qib_pportdata *pport; + struct qib_chip_specific *cspec; /* chip-specific */ + + /* kvirt address of 1st 2k pio buffer */ + void __iomem *pio2kbase; + /* kvirt address of 1st 4k pio buffer */ + void __iomem *pio4kbase; + /* mem-mapped pointer to base of PIO buffers (if using WC PAT) */ + void __iomem *piobase; + /* mem-mapped pointer to base of user chip regs (if using WC PAT) */ + u64 __iomem *userbase; + void __iomem *piovl15base; /* base of VL15 buffers, if not WC */ + /* + * points to area where PIOavail registers will be DMA'ed. + * Has to be on a page of it's own, because the page will be + * mapped into user program space. This copy is *ONLY* ever + * written by DMA, not by the driver! Need a copy per device + * when we get to multiple devices + */ + volatile __le64 *pioavailregs_dma; /* DMA'ed by chip */ + /* physical address where updates occur */ + dma_addr_t pioavailregs_phys; + + /* device-specific implementations of functions needed by + * common code. Contrary to previous consensus, we can't + * really just point to a device-specific table, because we + * may need to "bend", e.g. *_f_put_tid + */ + /* fallback to alternate interrupt type if possible */ + int (*f_intr_fallback)(struct qib_devdata *); + /* hard reset chip */ + int (*f_reset)(struct qib_devdata *); + void (*f_quiet_serdes)(struct qib_pportdata *); + int (*f_bringup_serdes)(struct qib_pportdata *); + int (*f_early_init)(struct qib_devdata *); + void (*f_clear_tids)(struct qib_devdata *, struct qib_ctxtdata *); + void (*f_put_tid)(struct qib_devdata *, u64 __iomem*, + u32, unsigned long); + void (*f_cleanup)(struct qib_devdata *); + void (*f_setextled)(struct qib_pportdata *, u32); + /* fill out chip-specific fields */ + int (*f_get_base_info)(struct qib_ctxtdata *, struct qib_base_info *); + /* free irq */ + void (*f_free_irq)(struct qib_devdata *); + struct qib_message_header *(*f_get_msgheader) + (struct qib_devdata *, __le32 *); + void (*f_config_ctxts)(struct qib_devdata *); + int (*f_get_ib_cfg)(struct qib_pportdata *, int); + int (*f_set_ib_cfg)(struct qib_pportdata *, int, u32); + int (*f_set_ib_loopback)(struct qib_pportdata *, const char *); + int (*f_get_ib_table)(struct qib_pportdata *, int, void *); + int (*f_set_ib_table)(struct qib_pportdata *, int, void *); + u32 (*f_iblink_state)(u64); + u8 (*f_ibphys_portstate)(u64); + void (*f_xgxs_reset)(struct qib_pportdata *); + /* per chip actions needed for IB Link up/down changes */ + int (*f_ib_updown)(struct qib_pportdata *, int, u64); + u32 __iomem *(*f_getsendbuf)(struct qib_pportdata *, u64, u32 *); + /* Read/modify/write of GPIO pins (potentially chip-specific */ + int (*f_gpio_mod)(struct qib_devdata *dd, u32 out, u32 dir, + u32 mask); + /* Enable writes to config EEPROM (if supported) */ + int (*f_eeprom_wen)(struct qib_devdata *dd, int wen); + /* + * modify rcvctrl shadow[s] and write to appropriate chip-regs. + * see above QIB_RCVCTRL_xxx_ENB/DIS for operations. + * (ctxt == -1) means "all contexts", only meaningful for + * clearing. Could remove if chip_spec shutdown properly done. + */ + void (*f_rcvctrl)(struct qib_pportdata *, unsigned int op, + int ctxt); + /* Read/modify/write sendctrl appropriately for op and port. */ + void (*f_sendctrl)(struct qib_pportdata *, u32 op); + void (*f_set_intr_state)(struct qib_devdata *, u32); + void (*f_set_armlaunch)(struct qib_devdata *, u32); + void (*f_wantpiobuf_intr)(struct qib_devdata *, u32); + int (*f_late_initreg)(struct qib_devdata *); + int (*f_init_sdma_regs)(struct qib_pportdata *); + u16 (*f_sdma_gethead)(struct qib_pportdata *); + int (*f_sdma_busy)(struct qib_pportdata *); + void (*f_sdma_update_tail)(struct qib_pportdata *, u16); + void (*f_sdma_set_desc_cnt)(struct qib_pportdata *, unsigned); + void (*f_sdma_sendctrl)(struct qib_pportdata *, unsigned); + void (*f_sdma_hw_clean_up)(struct qib_pportdata *); + void (*f_sdma_hw_start_up)(struct qib_pportdata *); + void (*f_sdma_init_early)(struct qib_pportdata *); + void (*f_set_cntr_sample)(struct qib_pportdata *, u32, u32); + void (*f_update_usrhead)(struct qib_ctxtdata *, u64, u32, u32, u32); + u32 (*f_hdrqempty)(struct qib_ctxtdata *); + u64 (*f_portcntr)(struct qib_pportdata *, u32); + u32 (*f_read_cntrs)(struct qib_devdata *, loff_t, char **, + u64 **); + u32 (*f_read_portcntrs)(struct qib_devdata *, loff_t, u32, + char **, u64 **); + u32 (*f_setpbc_control)(struct qib_pportdata *, u32, u8, u8); + void (*f_initvl15_bufs)(struct qib_devdata *); + void (*f_init_ctxt)(struct qib_ctxtdata *); + void (*f_txchk_change)(struct qib_devdata *, u32, u32, u32, + struct qib_ctxtdata *); + void (*f_writescratch)(struct qib_devdata *, u32); + int (*f_tempsense_rd)(struct qib_devdata *, int regnum); +#ifdef CONFIG_INFINIBAND_QIB_DCA + int (*f_notify_dca)(struct qib_devdata *, unsigned long event); +#endif + + char *boardname; /* human readable board info */ + + /* template for writing TIDs */ + u64 tidtemplate; + /* value to write to free TIDs */ + u64 tidinvalid; + + /* number of registers used for pioavail */ + u32 pioavregs; + /* device (not port) flags, basically device capabilities */ + u32 flags; + /* last buffer for user use */ + u32 lastctxt_piobuf; + + /* reset value */ + u64 z_int_counter; + /* percpu intcounter */ + u64 __percpu *int_counter; + + /* pio bufs allocated per ctxt */ + u32 pbufsctxt; + /* if remainder on bufs/ctxt, ctxts < extrabuf get 1 extra */ + u32 ctxts_extrabuf; + /* + * number of ctxts configured as max; zero is set to number chip + * supports, less gives more pio bufs/ctxt, etc. + */ + u32 cfgctxts; + /* + * number of ctxts available for PSM open + */ + u32 freectxts; + + /* + * hint that we should update pioavailshadow before + * looking for a PIO buffer + */ + u32 upd_pio_shadow; + + /* internal debugging stats */ + u32 maxpkts_call; + u32 avgpkts_call; + u64 nopiobufs; + + /* PCI Vendor ID (here for NodeInfo) */ + u16 vendorid; + /* PCI Device ID (here for NodeInfo) */ + u16 deviceid; + /* for write combining settings */ + unsigned long wc_cookie; + unsigned long wc_base; + unsigned long wc_len; + + /* shadow copy of struct page *'s for exp tid pages */ + struct page **pageshadow; + /* shadow copy of dma handles for exp tid pages */ + dma_addr_t *physshadow; + u64 __iomem *egrtidbase; + spinlock_t sendctrl_lock; /* protect changes to sendctrl shadow */ + /* around rcd and (user ctxts) ctxt_cnt use (intr vs free) */ + spinlock_t uctxt_lock; /* rcd and user context changes */ + /* + * per unit status, see also portdata statusp + * mapped readonly into user processes so they can get unit and + * IB link status cheaply + */ + u64 *devstatusp; + char *freezemsg; /* freeze msg if hw error put chip in freeze */ + u32 freezelen; /* max length of freezemsg */ + /* timer used to prevent stats overflow, error throttling, etc. */ + struct timer_list stats_timer; + + /* timer to verify interrupts work, and fallback if possible */ + struct timer_list intrchk_timer; + unsigned long ureg_align; /* user register alignment */ + + /* + * Protects pioavailshadow, pioavailkernel, pio_need_disarm, and + * pio_writing. + */ + spinlock_t pioavail_lock; + /* + * index of last buffer to optimize search for next + */ + u32 last_pio; + /* + * min kernel pio buffer to optimize search + */ + u32 min_kernel_pio; + /* + * Shadow copies of registers; size indicates read access size. + * Most of them are readonly, but some are write-only register, + * where we manipulate the bits in the shadow copy, and then write + * the shadow copy to qlogic_ib. + * + * We deliberately make most of these 32 bits, since they have + * restricted range. For any that we read, we won't to generate 32 + * bit accesses, since Opteron will generate 2 separate 32 bit HT + * transactions for a 64 bit read, and we want to avoid unnecessary + * bus transactions. + */ + + /* This is the 64 bit group */ + + unsigned long pioavailshadow[6]; + /* bitmap of send buffers available for the kernel to use with PIO. */ + unsigned long pioavailkernel[6]; + /* bitmap of send buffers which need to be disarmed. */ + unsigned long pio_need_disarm[3]; + /* bitmap of send buffers which are being written to. */ + unsigned long pio_writing[3]; + /* kr_revision shadow */ + u64 revision; + /* Base GUID for device (from eeprom, network order) */ + __be64 base_guid; + + /* + * kr_sendpiobufbase value (chip offset of pio buffers), and the + * base of the 2KB buffer s(user processes only use 2K) + */ + u64 piobufbase; + u32 pio2k_bufbase; + + /* these are the "32 bit" regs */ + + /* number of GUIDs in the flash for this interface */ + u32 nguid; + /* + * the following two are 32-bit bitmasks, but {test,clear,set}_bit + * all expect bit fields to be "unsigned long" + */ + unsigned long rcvctrl; /* shadow per device rcvctrl */ + unsigned long sendctrl; /* shadow per device sendctrl */ + + /* value we put in kr_rcvhdrcnt */ + u32 rcvhdrcnt; + /* value we put in kr_rcvhdrsize */ + u32 rcvhdrsize; + /* value we put in kr_rcvhdrentsize */ + u32 rcvhdrentsize; + /* kr_ctxtcnt value */ + u32 ctxtcnt; + /* kr_pagealign value */ + u32 palign; + /* number of "2KB" PIO buffers */ + u32 piobcnt2k; + /* size in bytes of "2KB" PIO buffers */ + u32 piosize2k; + /* max usable size in dwords of a "2KB" PIO buffer before going "4KB" */ + u32 piosize2kmax_dwords; + /* number of "4KB" PIO buffers */ + u32 piobcnt4k; + /* size in bytes of "4KB" PIO buffers */ + u32 piosize4k; + /* kr_rcvegrbase value */ + u32 rcvegrbase; + /* kr_rcvtidbase value */ + u32 rcvtidbase; + /* kr_rcvtidcnt value */ + u32 rcvtidcnt; + /* kr_userregbase */ + u32 uregbase; + /* shadow the control register contents */ + u32 control; + + /* chip address space used by 4k pio buffers */ + u32 align4k; + /* size of each rcvegrbuffer */ + u16 rcvegrbufsize; + /* log2 of above */ + u16 rcvegrbufsize_shift; + /* localbus width (1, 2,4,8,16,32) from config space */ + u32 lbus_width; + /* localbus speed in MHz */ + u32 lbus_speed; + int unit; /* unit # of this chip */ + + /* start of CHIP_SPEC move to chipspec, but need code changes */ + /* low and high portions of MSI capability/vector */ + u32 msi_lo; + /* saved after PCIe init for restore after reset */ + u32 msi_hi; + /* MSI data (vector) saved for restore */ + u16 msi_data; + /* so we can rewrite it after a chip reset */ + u32 pcibar0; + /* so we can rewrite it after a chip reset */ + u32 pcibar1; + u64 rhdrhead_intr_off; + + /* + * ASCII serial number, from flash, large enough for original + * all digit strings, and longer QLogic serial number format + */ + u8 serial[16]; + /* human readable board version */ + u8 boardversion[96]; + u8 lbus_info[32]; /* human readable localbus info */ + /* chip major rev, from qib_revision */ + u8 majrev; + /* chip minor rev, from qib_revision */ + u8 minrev; + + /* Misc small ints */ + /* Number of physical ports available */ + u8 num_pports; + /* Lowest context number which can be used by user processes */ + u8 first_user_ctxt; + u8 n_krcv_queues; + u8 qpn_mask; + u8 skip_kctxt_mask; + + u16 rhf_offset; /* offset of RHF within receive header entry */ + + /* + * GPIO pins for twsi-connected devices, and device code for eeprom + */ + u8 gpio_sda_num; + u8 gpio_scl_num; + u8 twsi_eeprom_dev; + u8 board_atten; + + /* Support (including locks) for EEPROM logging of errors and time */ + /* control access to actual counters, timer */ + spinlock_t eep_st_lock; + /* control high-level access to EEPROM */ + struct mutex eep_lock; + uint64_t traffic_wds; + /* active time is kept in seconds, but logged in hours */ + atomic_t active_time; + /* Below are nominal shadow of EEPROM, new since last EEPROM update */ + uint8_t eep_st_errs[QIB_EEP_LOG_CNT]; + uint8_t eep_st_new_errs[QIB_EEP_LOG_CNT]; + uint16_t eep_hrs; + /* + * masks for which bits of errs, hwerrs that cause + * each of the counters to increment. + */ + struct qib_eep_log_mask eep_st_masks[QIB_EEP_LOG_CNT]; + struct qib_diag_client *diag_client; + spinlock_t qib_diag_trans_lock; /* protect diag observer ops */ + struct diag_observer_list_elt *diag_observer_list; + + u8 psxmitwait_supported; + /* cycle length of PS* counters in HW (in picoseconds) */ + u16 psxmitwait_check_rate; + /* high volume overflow errors defered to tasklet */ + struct tasklet_struct error_tasklet; + /* per device cq worker */ + struct kthread_worker *worker; + + int assigned_node_id; /* NUMA node closest to HCA */ +}; + +/* hol_state values */ +#define QIB_HOL_UP 0 +#define QIB_HOL_INIT 1 + +#define QIB_SDMA_SENDCTRL_OP_ENABLE (1U << 0) +#define QIB_SDMA_SENDCTRL_OP_INTENABLE (1U << 1) +#define QIB_SDMA_SENDCTRL_OP_HALT (1U << 2) +#define QIB_SDMA_SENDCTRL_OP_CLEANUP (1U << 3) +#define QIB_SDMA_SENDCTRL_OP_DRAIN (1U << 4) + +/* operation types for f_txchk_change() */ +#define TXCHK_CHG_TYPE_DIS1 3 +#define TXCHK_CHG_TYPE_ENAB1 2 +#define TXCHK_CHG_TYPE_KERN 1 +#define TXCHK_CHG_TYPE_USER 0 + +#define QIB_CHASE_TIME msecs_to_jiffies(145) +#define QIB_CHASE_DIS_TIME msecs_to_jiffies(160) + +/* Private data for file operations */ +struct qib_filedata { + struct qib_ctxtdata *rcd; + unsigned subctxt; + unsigned tidcursor; + struct qib_user_sdma_queue *pq; + int rec_cpu_num; /* for cpu affinity; -1 if none */ +}; + +extern struct list_head qib_dev_list; +extern spinlock_t qib_devs_lock; +extern struct qib_devdata *qib_lookup(int unit); +extern u32 qib_cpulist_count; +extern unsigned long *qib_cpulist; + +extern unsigned qib_wc_pat; +extern unsigned qib_cc_table_size; +int qib_init(struct qib_devdata *, int); +int init_chip_wc_pat(struct qib_devdata *dd, u32); +int qib_enable_wc(struct qib_devdata *dd); +void qib_disable_wc(struct qib_devdata *dd); +int qib_count_units(int *npresentp, int *nupp); +int qib_count_active_units(void); + +int qib_cdev_init(int minor, const char *name, + const struct file_operations *fops, + struct cdev **cdevp, struct device **devp); +void qib_cdev_cleanup(struct cdev **cdevp, struct device **devp); +int qib_dev_init(void); +void qib_dev_cleanup(void); + +int qib_diag_add(struct qib_devdata *); +void qib_diag_remove(struct qib_devdata *); +void qib_handle_e_ibstatuschanged(struct qib_pportdata *, u64); +void qib_sdma_update_tail(struct qib_pportdata *, u16); /* hold sdma_lock */ + +int qib_decode_err(struct qib_devdata *dd, char *buf, size_t blen, u64 err); +void qib_bad_intrstatus(struct qib_devdata *); +void qib_handle_urcv(struct qib_devdata *, u64); + +/* clean up any per-chip chip-specific stuff */ +void qib_chip_cleanup(struct qib_devdata *); +/* clean up any chip type-specific stuff */ +void qib_chip_done(void); + +/* check to see if we have to force ordering for write combining */ +int qib_unordered_wc(void); +void qib_pio_copy(void __iomem *to, const void *from, size_t count); + +void qib_disarm_piobufs(struct qib_devdata *, unsigned, unsigned); +int qib_disarm_piobufs_ifneeded(struct qib_ctxtdata *); +void qib_disarm_piobufs_set(struct qib_devdata *, unsigned long *, unsigned); +void qib_cancel_sends(struct qib_pportdata *); + +int qib_create_rcvhdrq(struct qib_devdata *, struct qib_ctxtdata *); +int qib_setup_eagerbufs(struct qib_ctxtdata *); +void qib_set_ctxtcnt(struct qib_devdata *); +int qib_create_ctxts(struct qib_devdata *dd); +struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32, int); +int qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8); +void qib_free_ctxtdata(struct qib_devdata *, struct qib_ctxtdata *); + +u32 qib_kreceive(struct qib_ctxtdata *, u32 *, u32 *); +int qib_reset_device(int); +int qib_wait_linkstate(struct qib_pportdata *, u32, int); +int qib_set_linkstate(struct qib_pportdata *, u8); +int qib_set_mtu(struct qib_pportdata *, u16); +int qib_set_lid(struct qib_pportdata *, u32, u8); +void qib_hol_down(struct qib_pportdata *); +void qib_hol_init(struct qib_pportdata *); +void qib_hol_up(struct qib_pportdata *); +void qib_hol_event(unsigned long); +void qib_disable_after_error(struct qib_devdata *); +int qib_set_uevent_bits(struct qib_pportdata *, const int); + +/* for use in system calls, where we want to know device type, etc. */ +#define ctxt_fp(fp) \ + (((struct qib_filedata *)(fp)->private_data)->rcd) +#define subctxt_fp(fp) \ + (((struct qib_filedata *)(fp)->private_data)->subctxt) +#define tidcursor_fp(fp) \ + (((struct qib_filedata *)(fp)->private_data)->tidcursor) +#define user_sdma_queue_fp(fp) \ + (((struct qib_filedata *)(fp)->private_data)->pq) + +static inline struct qib_devdata *dd_from_ppd(struct qib_pportdata *ppd) +{ + return ppd->dd; +} + +static inline struct qib_devdata *dd_from_dev(struct qib_ibdev *dev) +{ + return container_of(dev, struct qib_devdata, verbs_dev); +} + +static inline struct qib_devdata *dd_from_ibdev(struct ib_device *ibdev) +{ + return dd_from_dev(to_idev(ibdev)); +} + +static inline struct qib_pportdata *ppd_from_ibp(struct qib_ibport *ibp) +{ + return container_of(ibp, struct qib_pportdata, ibport_data); +} + +static inline struct qib_ibport *to_iport(struct ib_device *ibdev, u8 port) +{ + struct qib_devdata *dd = dd_from_ibdev(ibdev); + unsigned pidx = port - 1; /* IB number port from 1, hdw from 0 */ + + WARN_ON(pidx >= dd->num_pports); + return &dd->pport[pidx].ibport_data; +} + +/* + * values for dd->flags (_device_ related flags) and + */ +#define QIB_HAS_LINK_LATENCY 0x1 /* supports link latency (IB 1.2) */ +#define QIB_INITTED 0x2 /* chip and driver up and initted */ +#define QIB_DOING_RESET 0x4 /* in the middle of doing chip reset */ +#define QIB_PRESENT 0x8 /* chip accesses can be done */ +#define QIB_PIO_FLUSH_WC 0x10 /* Needs Write combining flush for PIO */ +#define QIB_HAS_THRESH_UPDATE 0x40 +#define QIB_HAS_SDMA_TIMEOUT 0x80 +#define QIB_USE_SPCL_TRIG 0x100 /* SpecialTrigger launch enabled */ +#define QIB_NODMA_RTAIL 0x200 /* rcvhdrtail register DMA enabled */ +#define QIB_HAS_INTX 0x800 /* Supports INTx interrupts */ +#define QIB_HAS_SEND_DMA 0x1000 /* Supports Send DMA */ +#define QIB_HAS_VLSUPP 0x2000 /* Supports multiple VLs; PBC different */ +#define QIB_HAS_HDRSUPP 0x4000 /* Supports header suppression */ +#define QIB_BADINTR 0x8000 /* severe interrupt problems */ +#define QIB_DCA_ENABLED 0x10000 /* Direct Cache Access enabled */ +#define QIB_HAS_QSFP 0x20000 /* device (card instance) has QSFP */ + +/* + * values for ppd->lflags (_ib_port_ related flags) + */ +#define QIBL_LINKV 0x1 /* IB link state valid */ +#define QIBL_LINKDOWN 0x8 /* IB link is down */ +#define QIBL_LINKINIT 0x10 /* IB link level is up */ +#define QIBL_LINKARMED 0x20 /* IB link is ARMED */ +#define QIBL_LINKACTIVE 0x40 /* IB link is ACTIVE */ +/* leave a gap for more IB-link state */ +#define QIBL_IB_AUTONEG_INPROG 0x1000 /* non-IBTA DDR/QDR neg active */ +#define QIBL_IB_AUTONEG_FAILED 0x2000 /* non-IBTA DDR/QDR neg failed */ +#define QIBL_IB_LINK_DISABLED 0x4000 /* Linkdown-disable forced, + * Do not try to bring up */ +#define QIBL_IB_FORCE_NOTIFY 0x8000 /* force notify on next ib change */ + +/* IB dword length mask in PBC (lower 11 bits); same for all chips */ +#define QIB_PBC_LENGTH_MASK ((1 << 11) - 1) + + +/* ctxt_flag bit offsets */ + /* waiting for a packet to arrive */ +#define QIB_CTXT_WAITING_RCV 2 + /* master has not finished initializing */ +#define QIB_CTXT_MASTER_UNINIT 4 + /* waiting for an urgent packet to arrive */ +#define QIB_CTXT_WAITING_URG 5 + +/* free up any allocated data at closes */ +void qib_free_data(struct qib_ctxtdata *dd); +void qib_chg_pioavailkernel(struct qib_devdata *, unsigned, unsigned, + u32, struct qib_ctxtdata *); +struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *, + const struct pci_device_id *); +struct qib_devdata *qib_init_iba7220_funcs(struct pci_dev *, + const struct pci_device_id *); +struct qib_devdata *qib_init_iba6120_funcs(struct pci_dev *, + const struct pci_device_id *); +void qib_free_devdata(struct qib_devdata *); +struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra); + +#define QIB_TWSI_NO_DEV 0xFF +/* Below qib_twsi_ functions must be called with eep_lock held */ +int qib_twsi_reset(struct qib_devdata *dd); +int qib_twsi_blk_rd(struct qib_devdata *dd, int dev, int addr, void *buffer, + int len); +int qib_twsi_blk_wr(struct qib_devdata *dd, int dev, int addr, + const void *buffer, int len); +void qib_get_eeprom_info(struct qib_devdata *); +int qib_update_eeprom_log(struct qib_devdata *dd); +void qib_inc_eeprom_err(struct qib_devdata *dd, u32 eidx, u32 incr); +void qib_dump_lookup_output_queue(struct qib_devdata *); +void qib_force_pio_avail_update(struct qib_devdata *); +void qib_clear_symerror_on_linkup(unsigned long opaque); + +/* + * Set LED override, only the two LSBs have "public" meaning, but + * any non-zero value substitutes them for the Link and LinkTrain + * LED states. + */ +#define QIB_LED_PHYS 1 /* Physical (linktraining) GREEN LED */ +#define QIB_LED_LOG 2 /* Logical (link) YELLOW LED */ +void qib_set_led_override(struct qib_pportdata *ppd, unsigned int val); + +/* send dma routines */ +int qib_setup_sdma(struct qib_pportdata *); +void qib_teardown_sdma(struct qib_pportdata *); +void __qib_sdma_intr(struct qib_pportdata *); +void qib_sdma_intr(struct qib_pportdata *); +void qib_user_sdma_send_desc(struct qib_pportdata *dd, + struct list_head *pktlist); +int qib_sdma_verbs_send(struct qib_pportdata *, struct qib_sge_state *, + u32, struct qib_verbs_txreq *); +/* ppd->sdma_lock should be locked before calling this. */ +int qib_sdma_make_progress(struct qib_pportdata *dd); + +static inline int qib_sdma_empty(const struct qib_pportdata *ppd) +{ + return ppd->sdma_descq_added == ppd->sdma_descq_removed; +} + +/* must be called under qib_sdma_lock */ +static inline u16 qib_sdma_descq_freecnt(const struct qib_pportdata *ppd) +{ + return ppd->sdma_descq_cnt - + (ppd->sdma_descq_added - ppd->sdma_descq_removed) - 1; +} + +static inline int __qib_sdma_running(struct qib_pportdata *ppd) +{ + return ppd->sdma_state.current_state == qib_sdma_state_s99_running; +} +int qib_sdma_running(struct qib_pportdata *); +void dump_sdma_state(struct qib_pportdata *ppd); +void __qib_sdma_process_event(struct qib_pportdata *, enum qib_sdma_events); +void qib_sdma_process_event(struct qib_pportdata *, enum qib_sdma_events); + +/* + * number of words used for protocol header if not set by qib_userinit(); + */ +#define QIB_DFLT_RCVHDRSIZE 9 + +/* + * We need to be able to handle an IB header of at least 24 dwords. + * We need the rcvhdrq large enough to handle largest IB header, but + * still have room for a 2KB MTU standard IB packet. + * Additionally, some processor/memory controller combinations + * benefit quite strongly from having the DMA'ed data be cacheline + * aligned and a cacheline multiple, so we set the size to 32 dwords + * (2 64-byte primary cachelines for pretty much all processors of + * interest). The alignment hurts nothing, other than using somewhat + * more memory. + */ +#define QIB_RCVHDR_ENTSIZE 32 + +int qib_get_user_pages(unsigned long, size_t, struct page **); +void qib_release_user_pages(struct page **, size_t); +int qib_eeprom_read(struct qib_devdata *, u8, void *, int); +int qib_eeprom_write(struct qib_devdata *, u8, const void *, int); +u32 __iomem *qib_getsendbuf_range(struct qib_devdata *, u32 *, u32, u32); +void qib_sendbuf_done(struct qib_devdata *, unsigned); + +static inline void qib_clear_rcvhdrtail(const struct qib_ctxtdata *rcd) +{ + *((u64 *) rcd->rcvhdrtail_kvaddr) = 0ULL; +} + +static inline u32 qib_get_rcvhdrtail(const struct qib_ctxtdata *rcd) +{ + /* + * volatile because it's a DMA target from the chip, routine is + * inlined, and don't want register caching or reordering. + */ + return (u32) le64_to_cpu( + *((volatile __le64 *)rcd->rcvhdrtail_kvaddr)); /* DMA'ed */ +} + +static inline u32 qib_get_hdrqtail(const struct qib_ctxtdata *rcd) +{ + const struct qib_devdata *dd = rcd->dd; + u32 hdrqtail; + + if (dd->flags & QIB_NODMA_RTAIL) { + __le32 *rhf_addr; + u32 seq; + + rhf_addr = (__le32 *) rcd->rcvhdrq + + rcd->head + dd->rhf_offset; + seq = qib_hdrget_seq(rhf_addr); + hdrqtail = rcd->head; + if (seq == rcd->seq_cnt) + hdrqtail++; + } else + hdrqtail = qib_get_rcvhdrtail(rcd); + + return hdrqtail; +} + +/* + * sysfs interface. + */ + +extern const char ib_qib_version[]; + +int qib_device_create(struct qib_devdata *); +void qib_device_remove(struct qib_devdata *); + +int qib_create_port_files(struct ib_device *ibdev, u8 port_num, + struct kobject *kobj); +int qib_verbs_register_sysfs(struct qib_devdata *); +void qib_verbs_unregister_sysfs(struct qib_devdata *); +/* Hook for sysfs read of QSFP */ +extern int qib_qsfp_dump(struct qib_pportdata *ppd, char *buf, int len); + +int __init qib_init_qibfs(void); +int __exit qib_exit_qibfs(void); + +int qibfs_add(struct qib_devdata *); +int qibfs_remove(struct qib_devdata *); + +int qib_pcie_init(struct pci_dev *, const struct pci_device_id *); +int qib_pcie_ddinit(struct qib_devdata *, struct pci_dev *, + const struct pci_device_id *); +void qib_pcie_ddcleanup(struct qib_devdata *); +int qib_pcie_params(struct qib_devdata *, u32, u32 *, struct qib_msix_entry *); +int qib_reinit_intr(struct qib_devdata *); +void qib_enable_intx(struct pci_dev *); +void qib_nomsi(struct qib_devdata *); +void qib_nomsix(struct qib_devdata *); +void qib_pcie_getcmd(struct qib_devdata *, u16 *, u8 *, u8 *); +void qib_pcie_reenable(struct qib_devdata *, u16, u8, u8); +/* interrupts for device */ +u64 qib_int_counter(struct qib_devdata *); +/* interrupt for all devices */ +u64 qib_sps_ints(void); + +/* + * dma_addr wrappers - all 0's invalid for hw + */ +dma_addr_t qib_map_page(struct pci_dev *, struct page *, unsigned long, + size_t, int); +const char *qib_get_unit_name(int unit); + +/* + * Flush write combining store buffers (if present) and perform a write + * barrier. + */ +#if defined(CONFIG_X86_64) +#define qib_flush_wc() asm volatile("sfence" : : : "memory") +#else +#define qib_flush_wc() wmb() /* no reorder around wc flush */ +#endif + +/* global module parameter variables */ +extern unsigned qib_ibmtu; +extern ushort qib_cfgctxts; +extern ushort qib_num_cfg_vls; +extern ushort qib_mini_init; /* If set, do few (ideally 0) writes to chip */ +extern unsigned qib_n_krcv_queues; +extern unsigned qib_sdma_fetch_arb; +extern unsigned qib_compat_ddr_negotiate; +extern int qib_special_trigger; +extern unsigned qib_numa_aware; + +extern struct mutex qib_mutex; + +/* Number of seconds before our card status check... */ +#define STATUS_TIMEOUT 60 + +#define QIB_DRV_NAME "ib_qib" +#define QIB_USER_MINOR_BASE 0 +#define QIB_TRACE_MINOR 127 +#define QIB_DIAGPKT_MINOR 128 +#define QIB_DIAG_MINOR_BASE 129 +#define QIB_NMINORS 255 + +#define PCI_VENDOR_ID_PATHSCALE 0x1fc1 +#define PCI_VENDOR_ID_QLOGIC 0x1077 +#define PCI_DEVICE_ID_QLOGIC_IB_6120 0x10 +#define PCI_DEVICE_ID_QLOGIC_IB_7220 0x7220 +#define PCI_DEVICE_ID_QLOGIC_IB_7322 0x7322 + +/* + * qib_early_err is used (only!) to print early errors before devdata is + * allocated, or when dd->pcidev may not be valid, and at the tail end of + * cleanup when devdata may have been freed, etc. qib_dev_porterr is + * the same as qib_dev_err, but is used when the message really needs + * the IB port# to be definitive as to what's happening.. + * All of these go to the trace log, and the trace log entry is done + * first to avoid possible serial port delays from printk. + */ +#define qib_early_err(dev, fmt, ...) \ + dev_err(dev, fmt, ##__VA_ARGS__) + +#define qib_dev_err(dd, fmt, ...) \ + dev_err(&(dd)->pcidev->dev, "%s: " fmt, \ + qib_get_unit_name((dd)->unit), ##__VA_ARGS__) + +#define qib_dev_warn(dd, fmt, ...) \ + dev_warn(&(dd)->pcidev->dev, "%s: " fmt, \ + qib_get_unit_name((dd)->unit), ##__VA_ARGS__) + +#define qib_dev_porterr(dd, port, fmt, ...) \ + dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \ + qib_get_unit_name((dd)->unit), (dd)->unit, (port), \ + ##__VA_ARGS__) + +#define qib_devinfo(pcidev, fmt, ...) \ + dev_info(&(pcidev)->dev, fmt, ##__VA_ARGS__) + +/* + * this is used for formatting hw error messages... + */ +struct qib_hwerror_msgs { + u64 mask; + const char *msg; + size_t sz; +}; + +#define QLOGIC_IB_HWE_MSG(a, b) { .mask = a, .msg = b } + +/* in qib_intr.c... */ +void qib_format_hwerrors(u64 hwerrs, + const struct qib_hwerror_msgs *hwerrmsgs, + size_t nhwerrmsgs, char *msg, size_t lmsg); +#endif /* _QIB_KERNEL_H */ diff --git a/drivers/infiniband/hw/qib/qib_6120_regs.h b/drivers/infiniband/hw/qib/qib_6120_regs.h new file mode 100644 index 00000000000..e16cb6f7de2 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_6120_regs.h @@ -0,0 +1,977 @@ +/* + * Copyright (c) 2008, 2009, 2010 QLogic Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* This file is mechanically generated from RTL. Any hand-edits will be lost! */ + +#define QIB_6120_Revision_OFFS 0x0 +#define QIB_6120_Revision_R_Simulator_LSB 0x3F +#define QIB_6120_Revision_R_Simulator_RMASK 0x1 +#define QIB_6120_Revision_Reserved_LSB 0x28 +#define QIB_6120_Revision_Reserved_RMASK 0x7FFFFF +#define QIB_6120_Revision_BoardID_LSB 0x20 +#define QIB_6120_Revision_BoardID_RMASK 0xFF +#define QIB_6120_Revision_R_SW_LSB 0x18 +#define QIB_6120_Revision_R_SW_RMASK 0xFF +#define QIB_6120_Revision_R_Arch_LSB 0x10 +#define QIB_6120_Revision_R_Arch_RMASK 0xFF +#define QIB_6120_Revision_R_ChipRevMajor_LSB 0x8 +#define QIB_6120_Revision_R_ChipRevMajor_RMASK 0xFF +#define QIB_6120_Revision_R_ChipRevMinor_LSB 0x0 +#define QIB_6120_Revision_R_ChipRevMinor_RMASK 0xFF + +#define QIB_6120_Control_OFFS 0x8 +#define QIB_6120_Control_TxLatency_LSB 0x4 +#define QIB_6120_Control_TxLatency_RMASK 0x1 +#define QIB_6120_Control_PCIERetryBufDiagEn_LSB 0x3 +#define QIB_6120_Control_PCIERetryBufDiagEn_RMASK 0x1 +#define QIB_6120_Control_LinkEn_LSB 0x2 +#define QIB_6120_Control_LinkEn_RMASK 0x1 +#define QIB_6120_Control_FreezeMode_LSB 0x1 +#define QIB_6120_Control_FreezeMode_RMASK 0x1 +#define QIB_6120_Control_SyncReset_LSB 0x0 +#define QIB_6120_Control_SyncReset_RMASK 0x1 + +#define QIB_6120_PageAlign_OFFS 0x10 + +#define QIB_6120_PortCnt_OFFS 0x18 + +#define QIB_6120_SendRegBase_OFFS 0x30 + +#define QIB_6120_UserRegBase_OFFS 0x38 + +#define QIB_6120_CntrRegBase_OFFS 0x40 + +#define QIB_6120_Scratch_OFFS 0x48 +#define QIB_6120_Scratch_TopHalf_LSB 0x20 +#define QIB_6120_Scratch_TopHalf_RMASK 0xFFFFFFFF +#define QIB_6120_Scratch_BottomHalf_LSB 0x0 +#define QIB_6120_Scratch_BottomHalf_RMASK 0xFFFFFFFF + +#define QIB_6120_IntBlocked_OFFS 0x60 +#define QIB_6120_IntBlocked_ErrorIntBlocked_LSB 0x1F +#define QIB_6120_IntBlocked_ErrorIntBlocked_RMASK 0x1 +#define QIB_6120_IntBlocked_PioSetIntBlocked_LSB 0x1E +#define QIB_6120_IntBlocked_PioSetIntBlocked_RMASK 0x1 +#define QIB_6120_IntBlocked_PioBufAvailIntBlocked_LSB 0x1D +#define QIB_6120_IntBlocked_PioBufAvailIntBlocked_RMASK 0x1 +#define QIB_6120_IntBlocked_assertGPIOIntBlocked_LSB 0x1C +#define QIB_6120_IntBlocked_assertGPIOIntBlocked_RMASK 0x1 +#define QIB_6120_IntBlocked_Reserved_LSB 0xF +#define QIB_6120_IntBlocked_Reserved_RMASK 0x1FFF +#define QIB_6120_IntBlocked_RcvAvail4IntBlocked_LSB 0x10 +#define QIB_6120_IntBlocked_RcvAvail4IntBlocked_RMASK 0x1 +#define QIB_6120_IntBlocked_RcvAvail3IntBlocked_LSB 0xF +#define QIB_6120_IntBlocked_RcvAvail3IntBlocked_RMASK 0x1 +#define QIB_6120_IntBlocked_RcvAvail2IntBlocked_LSB 0xE +#define QIB_6120_IntBlocked_RcvAvail2IntBlocked_RMASK 0x1 +#define QIB_6120_IntBlocked_RcvAvail1IntBlocked_LSB 0xD +#define QIB_6120_IntBlocked_RcvAvail1IntBlocked_RMASK 0x1 +#define QIB_6120_IntBlocked_RcvAvail0IntBlocked_LSB 0xC +#define QIB_6120_IntBlocked_RcvAvail0IntBlocked_RMASK 0x1 +#define QIB_6120_IntBlocked_Reserved1_LSB 0x5 +#define QIB_6120_IntBlocked_Reserved1_RMASK 0x7F +#define QIB_6120_IntBlocked_RcvUrg4IntBlocked_LSB 0x4 +#define QIB_6120_IntBlocked_RcvUrg4IntBlocked_RMASK 0x1 +#define QIB_6120_IntBlocked_RcvUrg3IntBlocked_LSB 0x3 +#define QIB_6120_IntBlocked_RcvUrg3IntBlocked_RMASK 0x1 +#define QIB_6120_IntBlocked_RcvUrg2IntBlocked_LSB 0x2 +#define QIB_6120_IntBlocked_RcvUrg2IntBlocked_RMASK 0x1 +#define QIB_6120_IntBlocked_RcvUrg1IntBlocked_LSB 0x1 +#define QIB_6120_IntBlocked_RcvUrg1IntBlocked_RMASK 0x1 +#define QIB_6120_IntBlocked_RcvUrg0IntBlocked_LSB 0x0 +#define QIB_6120_IntBlocked_RcvUrg0IntBlocked_RMASK 0x1 + +#define QIB_6120_IntMask_OFFS 0x68 +#define QIB_6120_IntMask_ErrorIntMask_LSB 0x1F +#define QIB_6120_IntMask_ErrorIntMask_RMASK 0x1 +#define QIB_6120_IntMask_PioSetIntMask_LSB 0x1E +#define QIB_6120_IntMask_PioSetIntMask_RMASK 0x1 +#define QIB_6120_IntMask_PioBufAvailIntMask_LSB 0x1D +#define QIB_6120_IntMask_PioBufAvailIntMask_RMASK 0x1 +#define QIB_6120_IntMask_assertGPIOIntMask_LSB 0x1C +#define QIB_6120_IntMask_assertGPIOIntMask_RMASK 0x1 +#define QIB_6120_IntMask_Reserved_LSB 0x11 +#define QIB_6120_IntMask_Reserved_RMASK 0x7FF +#define QIB_6120_IntMask_RcvAvail4IntMask_LSB 0x10 +#define QIB_6120_IntMask_RcvAvail4IntMask_RMASK 0x1 +#define QIB_6120_IntMask_RcvAvail3IntMask_LSB 0xF +#define QIB_6120_IntMask_RcvAvail3IntMask_RMASK 0x1 +#define QIB_6120_IntMask_RcvAvail2IntMask_LSB 0xE +#define QIB_6120_IntMask_RcvAvail2IntMask_RMASK 0x1 +#define QIB_6120_IntMask_RcvAvail1IntMask_LSB 0xD +#define QIB_6120_IntMask_RcvAvail1IntMask_RMASK 0x1 +#define QIB_6120_IntMask_RcvAvail0IntMask_LSB 0xC +#define QIB_6120_IntMask_RcvAvail0IntMask_RMASK 0x1 +#define QIB_6120_IntMask_Reserved1_LSB 0x5 +#define QIB_6120_IntMask_Reserved1_RMASK 0x7F +#define QIB_6120_IntMask_RcvUrg4IntMask_LSB 0x4 +#define QIB_6120_IntMask_RcvUrg4IntMask_RMASK 0x1 +#define QIB_6120_IntMask_RcvUrg3IntMask_LSB 0x3 +#define QIB_6120_IntMask_RcvUrg3IntMask_RMASK 0x1 +#define QIB_6120_IntMask_RcvUrg2IntMask_LSB 0x2 +#define QIB_6120_IntMask_RcvUrg2IntMask_RMASK 0x1 +#define QIB_6120_IntMask_RcvUrg1IntMask_LSB 0x1 +#define QIB_6120_IntMask_RcvUrg1IntMask_RMASK 0x1 +#define QIB_6120_IntMask_RcvUrg0IntMask_LSB 0x0 +#define QIB_6120_IntMask_RcvUrg0IntMask_RMASK 0x1 + +#define QIB_6120_IntStatus_OFFS 0x70 +#define QIB_6120_IntStatus_Error_LSB 0x1F +#define QIB_6120_IntStatus_Error_RMASK 0x1 +#define QIB_6120_IntStatus_PioSent_LSB 0x1E +#define QIB_6120_IntStatus_PioSent_RMASK 0x1 +#define QIB_6120_IntStatus_PioBufAvail_LSB 0x1D +#define QIB_6120_IntStatus_PioBufAvail_RMASK 0x1 +#define QIB_6120_IntStatus_assertGPIO_LSB 0x1C +#define QIB_6120_IntStatus_assertGPIO_RMASK 0x1 +#define QIB_6120_IntStatus_Reserved_LSB 0xF +#define QIB_6120_IntStatus_Reserved_RMASK 0x1FFF +#define QIB_6120_IntStatus_RcvAvail4_LSB 0x10 +#define QIB_6120_IntStatus_RcvAvail4_RMASK 0x1 +#define QIB_6120_IntStatus_RcvAvail3_LSB 0xF +#define QIB_6120_IntStatus_RcvAvail3_RMASK 0x1 +#define QIB_6120_IntStatus_RcvAvail2_LSB 0xE +#define QIB_6120_IntStatus_RcvAvail2_RMASK 0x1 +#define QIB_6120_IntStatus_RcvAvail1_LSB 0xD +#define QIB_6120_IntStatus_RcvAvail1_RMASK 0x1 +#define QIB_6120_IntStatus_RcvAvail0_LSB 0xC +#define QIB_6120_IntStatus_RcvAvail0_RMASK 0x1 +#define QIB_6120_IntStatus_Reserved1_LSB 0x5 +#define QIB_6120_IntStatus_Reserved1_RMASK 0x7F +#define QIB_6120_IntStatus_RcvUrg4_LSB 0x4 +#define QIB_6120_IntStatus_RcvUrg4_RMASK 0x1 +#define QIB_6120_IntStatus_RcvUrg3_LSB 0x3 +#define QIB_6120_IntStatus_RcvUrg3_RMASK 0x1 +#define QIB_6120_IntStatus_RcvUrg2_LSB 0x2 +#define QIB_6120_IntStatus_RcvUrg2_RMASK 0x1 +#define QIB_6120_IntStatus_RcvUrg1_LSB 0x1 +#define QIB_6120_IntStatus_RcvUrg1_RMASK 0x1 +#define QIB_6120_IntStatus_RcvUrg0_LSB 0x0 +#define QIB_6120_IntStatus_RcvUrg0_RMASK 0x1 + +#define QIB_6120_IntClear_OFFS 0x78 +#define QIB_6120_IntClear_ErrorIntClear_LSB 0x1F +#define QIB_6120_IntClear_ErrorIntClear_RMASK 0x1 +#define QIB_6120_IntClear_PioSetIntClear_LSB 0x1E +#define QIB_6120_IntClear_PioSetIntClear_RMASK 0x1 +#define QIB_6120_IntClear_PioBufAvailIntClear_LSB 0x1D +#define QIB_6120_IntClear_PioBufAvailIntClear_RMASK 0x1 +#define QIB_6120_IntClear_assertGPIOIntClear_LSB 0x1C +#define QIB_6120_IntClear_assertGPIOIntClear_RMASK 0x1 +#define QIB_6120_IntClear_Reserved_LSB 0xF +#define QIB_6120_IntClear_Reserved_RMASK 0x1FFF +#define QIB_6120_IntClear_RcvAvail4IntClear_LSB 0x10 +#define QIB_6120_IntClear_RcvAvail4IntClear_RMASK 0x1 +#define QIB_6120_IntClear_RcvAvail3IntClear_LSB 0xF +#define QIB_6120_IntClear_RcvAvail3IntClear_RMASK 0x1 +#define QIB_6120_IntClear_RcvAvail2IntClear_LSB 0xE +#define QIB_6120_IntClear_RcvAvail2IntClear_RMASK 0x1 +#define QIB_6120_IntClear_RcvAvail1IntClear_LSB 0xD +#define QIB_6120_IntClear_RcvAvail1IntClear_RMASK 0x1 +#define QIB_6120_IntClear_RcvAvail0IntClear_LSB 0xC +#define QIB_6120_IntClear_RcvAvail0IntClear_RMASK 0x1 +#define QIB_6120_IntClear_Reserved1_LSB 0x5 +#define QIB_6120_IntClear_Reserved1_RMASK 0x7F +#define QIB_6120_IntClear_RcvUrg4IntClear_LSB 0x4 +#define QIB_6120_IntClear_RcvUrg4IntClear_RMASK 0x1 +#define QIB_6120_IntClear_RcvUrg3IntClear_LSB 0x3 +#define QIB_6120_IntClear_RcvUrg3IntClear_RMASK 0x1 +#define QIB_6120_IntClear_RcvUrg2IntClear_LSB 0x2 +#define QIB_6120_IntClear_RcvUrg2IntClear_RMASK 0x1 +#define QIB_6120_IntClear_RcvUrg1IntClear_LSB 0x1 +#define QIB_6120_IntClear_RcvUrg1IntClear_RMASK 0x1 +#define QIB_6120_IntClear_RcvUrg0IntClear_LSB 0x0 +#define QIB_6120_IntClear_RcvUrg0IntClear_RMASK 0x1 + +#define QIB_6120_ErrMask_OFFS 0x80 +#define QIB_6120_ErrMask_Reserved_LSB 0x34 +#define QIB_6120_ErrMask_Reserved_RMASK 0xFFF +#define QIB_6120_ErrMask_HardwareErrMask_LSB 0x33 +#define QIB_6120_ErrMask_HardwareErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_ResetNegatedMask_LSB 0x32 +#define QIB_6120_ErrMask_ResetNegatedMask_RMASK 0x1 +#define QIB_6120_ErrMask_InvalidAddrErrMask_LSB 0x31 +#define QIB_6120_ErrMask_InvalidAddrErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_IBStatusChangedMask_LSB 0x30 +#define QIB_6120_ErrMask_IBStatusChangedMask_RMASK 0x1 +#define QIB_6120_ErrMask_Reserved1_LSB 0x26 +#define QIB_6120_ErrMask_Reserved1_RMASK 0x3FF +#define QIB_6120_ErrMask_SendUnsupportedVLErrMask_LSB 0x25 +#define QIB_6120_ErrMask_SendUnsupportedVLErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_SendUnexpectedPktNumErrMask_LSB 0x24 +#define QIB_6120_ErrMask_SendUnexpectedPktNumErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_SendPioArmLaunchErrMask_LSB 0x23 +#define QIB_6120_ErrMask_SendPioArmLaunchErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_SendDroppedDataPktErrMask_LSB 0x22 +#define QIB_6120_ErrMask_SendDroppedDataPktErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_SendDroppedSmpPktErrMask_LSB 0x21 +#define QIB_6120_ErrMask_SendDroppedSmpPktErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_SendPktLenErrMask_LSB 0x20 +#define QIB_6120_ErrMask_SendPktLenErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_SendUnderRunErrMask_LSB 0x1F +#define QIB_6120_ErrMask_SendUnderRunErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_SendMaxPktLenErrMask_LSB 0x1E +#define QIB_6120_ErrMask_SendMaxPktLenErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_SendMinPktLenErrMask_LSB 0x1D +#define QIB_6120_ErrMask_SendMinPktLenErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_Reserved2_LSB 0x12 +#define QIB_6120_ErrMask_Reserved2_RMASK 0x7FF +#define QIB_6120_ErrMask_RcvIBLostLinkErrMask_LSB 0x11 +#define QIB_6120_ErrMask_RcvIBLostLinkErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_RcvHdrErrMask_LSB 0x10 +#define QIB_6120_ErrMask_RcvHdrErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_RcvHdrLenErrMask_LSB 0xF +#define QIB_6120_ErrMask_RcvHdrLenErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_RcvBadTidErrMask_LSB 0xE +#define QIB_6120_ErrMask_RcvBadTidErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_RcvHdrFullErrMask_LSB 0xD +#define QIB_6120_ErrMask_RcvHdrFullErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_RcvEgrFullErrMask_LSB 0xC +#define QIB_6120_ErrMask_RcvEgrFullErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_RcvBadVersionErrMask_LSB 0xB +#define QIB_6120_ErrMask_RcvBadVersionErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_RcvIBFlowErrMask_LSB 0xA +#define QIB_6120_ErrMask_RcvIBFlowErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_RcvEBPErrMask_LSB 0x9 +#define QIB_6120_ErrMask_RcvEBPErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_RcvUnsupportedVLErrMask_LSB 0x8 +#define QIB_6120_ErrMask_RcvUnsupportedVLErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_RcvUnexpectedCharErrMask_LSB 0x7 +#define QIB_6120_ErrMask_RcvUnexpectedCharErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_RcvShortPktLenErrMask_LSB 0x6 +#define QIB_6120_ErrMask_RcvShortPktLenErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_RcvLongPktLenErrMask_LSB 0x5 +#define QIB_6120_ErrMask_RcvLongPktLenErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_RcvMaxPktLenErrMask_LSB 0x4 +#define QIB_6120_ErrMask_RcvMaxPktLenErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_RcvMinPktLenErrMask_LSB 0x3 +#define QIB_6120_ErrMask_RcvMinPktLenErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_RcvICRCErrMask_LSB 0x2 +#define QIB_6120_ErrMask_RcvICRCErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_RcvVCRCErrMask_LSB 0x1 +#define QIB_6120_ErrMask_RcvVCRCErrMask_RMASK 0x1 +#define QIB_6120_ErrMask_RcvFormatErrMask_LSB 0x0 +#define QIB_6120_ErrMask_RcvFormatErrMask_RMASK 0x1 + +#define QIB_6120_ErrStatus_OFFS 0x88 +#define QIB_6120_ErrStatus_Reserved_LSB 0x34 +#define QIB_6120_ErrStatus_Reserved_RMASK 0xFFF +#define QIB_6120_ErrStatus_HardwareErr_LSB 0x33 +#define QIB_6120_ErrStatus_HardwareErr_RMASK 0x1 +#define QIB_6120_ErrStatus_ResetNegated_LSB 0x32 +#define QIB_6120_ErrStatus_ResetNegated_RMASK 0x1 +#define QIB_6120_ErrStatus_InvalidAddrErr_LSB 0x31 +#define QIB_6120_ErrStatus_InvalidAddrErr_RMASK 0x1 +#define QIB_6120_ErrStatus_IBStatusChanged_LSB 0x30 +#define QIB_6120_ErrStatus_IBStatusChanged_RMASK 0x1 +#define QIB_6120_ErrStatus_Reserved1_LSB 0x26 +#define QIB_6120_ErrStatus_Reserved1_RMASK 0x3FF +#define QIB_6120_ErrStatus_SendUnsupportedVLErr_LSB 0x25 +#define QIB_6120_ErrStatus_SendUnsupportedVLErr_RMASK 0x1 +#define QIB_6120_ErrStatus_SendUnexpectedPktNumErr_LSB 0x24 +#define QIB_6120_ErrStatus_SendUnexpectedPktNumErr_RMASK 0x1 +#define QIB_6120_ErrStatus_SendPioArmLaunchErr_LSB 0x23 +#define QIB_6120_ErrStatus_SendPioArmLaunchErr_RMASK 0x1 +#define QIB_6120_ErrStatus_SendDroppedDataPktErr_LSB 0x22 +#define QIB_6120_ErrStatus_SendDroppedDataPktErr_RMASK 0x1 +#define QIB_6120_ErrStatus_SendDroppedSmpPktErr_LSB 0x21 +#define QIB_6120_ErrStatus_SendDroppedSmpPktErr_RMASK 0x1 +#define QIB_6120_ErrStatus_SendPktLenErr_LSB 0x20 +#define QIB_6120_ErrStatus_SendPktLenErr_RMASK 0x1 +#define QIB_6120_ErrStatus_SendUnderRunErr_LSB 0x1F +#define QIB_6120_ErrStatus_SendUnderRunErr_RMASK 0x1 +#define QIB_6120_ErrStatus_SendMaxPktLenErr_LSB 0x1E +#define QIB_6120_ErrStatus_SendMaxPktLenErr_RMASK 0x1 +#define QIB_6120_ErrStatus_SendMinPktLenErr_LSB 0x1D +#define QIB_6120_ErrStatus_SendMinPktLenErr_RMASK 0x1 +#define QIB_6120_ErrStatus_Reserved2_LSB 0x12 +#define QIB_6120_ErrStatus_Reserved2_RMASK 0x7FF +#define QIB_6120_ErrStatus_RcvIBLostLinkErr_LSB 0x11 +#define QIB_6120_ErrStatus_RcvIBLostLinkErr_RMASK 0x1 +#define QIB_6120_ErrStatus_RcvHdrErr_LSB 0x10 +#define QIB_6120_ErrStatus_RcvHdrErr_RMASK 0x1 +#define QIB_6120_ErrStatus_RcvHdrLenErr_LSB 0xF +#define QIB_6120_ErrStatus_RcvHdrLenErr_RMASK 0x1 +#define QIB_6120_ErrStatus_RcvBadTidErr_LSB 0xE +#define QIB_6120_ErrStatus_RcvBadTidErr_RMASK 0x1 +#define QIB_6120_ErrStatus_RcvHdrFullErr_LSB 0xD +#define QIB_6120_ErrStatus_RcvHdrFullErr_RMASK 0x1 +#define QIB_6120_ErrStatus_RcvEgrFullErr_LSB 0xC +#define QIB_6120_ErrStatus_RcvEgrFullErr_RMASK 0x1 +#define QIB_6120_ErrStatus_RcvBadVersionErr_LSB 0xB +#define QIB_6120_ErrStatus_RcvBadVersionErr_RMASK 0x1 +#define QIB_6120_ErrStatus_RcvIBFlowErr_LSB 0xA +#define QIB_6120_ErrStatus_RcvIBFlowErr_RMASK 0x1 +#define QIB_6120_ErrStatus_RcvEBPErr_LSB 0x9 +#define QIB_6120_ErrStatus_RcvEBPErr_RMASK 0x1 +#define QIB_6120_ErrStatus_RcvUnsupportedVLErr_LSB 0x8 +#define QIB_6120_ErrStatus_RcvUnsupportedVLErr_RMASK 0x1 +#define QIB_6120_ErrStatus_RcvUnexpectedCharErr_LSB 0x7 +#define QIB_6120_ErrStatus_RcvUnexpectedCharErr_RMASK 0x1 +#define QIB_6120_ErrStatus_RcvShortPktLenErr_LSB 0x6 +#define QIB_6120_ErrStatus_RcvShortPktLenErr_RMASK 0x1 +#define QIB_6120_ErrStatus_RcvLongPktLenErr_LSB 0x5 +#define QIB_6120_ErrStatus_RcvLongPktLenErr_RMASK 0x1 +#define QIB_6120_ErrStatus_RcvMaxPktLenErr_LSB 0x4 +#define QIB_6120_ErrStatus_RcvMaxPktLenErr_RMASK 0x1 +#define QIB_6120_ErrStatus_RcvMinPktLenErr_LSB 0x3 +#define QIB_6120_ErrStatus_RcvMinPktLenErr_RMASK 0x1 +#define QIB_6120_ErrStatus_RcvICRCErr_LSB 0x2 +#define QIB_6120_ErrStatus_RcvICRCErr_RMASK 0x1 +#define QIB_6120_ErrStatus_RcvVCRCErr_LSB 0x1 +#define QIB_6120_ErrStatus_RcvVCRCErr_RMASK 0x1 +#define QIB_6120_ErrStatus_RcvFormatErr_LSB 0x0 +#define QIB_6120_ErrStatus_RcvFormatErr_RMASK 0x1 + +#define QIB_6120_ErrClear_OFFS 0x90 +#define QIB_6120_ErrClear_Reserved_LSB 0x34 +#define QIB_6120_ErrClear_Reserved_RMASK 0xFFF +#define QIB_6120_ErrClear_HardwareErrClear_LSB 0x33 +#define QIB_6120_ErrClear_HardwareErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_ResetNegatedClear_LSB 0x32 +#define QIB_6120_ErrClear_ResetNegatedClear_RMASK 0x1 +#define QIB_6120_ErrClear_InvalidAddrErrClear_LSB 0x31 +#define QIB_6120_ErrClear_InvalidAddrErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_IBStatusChangedClear_LSB 0x30 +#define QIB_6120_ErrClear_IBStatusChangedClear_RMASK 0x1 +#define QIB_6120_ErrClear_Reserved1_LSB 0x26 +#define QIB_6120_ErrClear_Reserved1_RMASK 0x3FF +#define QIB_6120_ErrClear_SendUnsupportedVLErrClear_LSB 0x25 +#define QIB_6120_ErrClear_SendUnsupportedVLErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_SendUnexpectedPktNumErrClear_LSB 0x24 +#define QIB_6120_ErrClear_SendUnexpectedPktNumErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_SendPioArmLaunchErrClear_LSB 0x23 +#define QIB_6120_ErrClear_SendPioArmLaunchErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_SendDroppedDataPktErrClear_LSB 0x22 +#define QIB_6120_ErrClear_SendDroppedDataPktErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_SendDroppedSmpPktErrClear_LSB 0x21 +#define QIB_6120_ErrClear_SendDroppedSmpPktErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_SendPktLenErrClear_LSB 0x20 +#define QIB_6120_ErrClear_SendPktLenErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_SendUnderRunErrClear_LSB 0x1F +#define QIB_6120_ErrClear_SendUnderRunErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_SendMaxPktLenErrClear_LSB 0x1E +#define QIB_6120_ErrClear_SendMaxPktLenErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_SendMinPktLenErrClear_LSB 0x1D +#define QIB_6120_ErrClear_SendMinPktLenErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_Reserved2_LSB 0x12 +#define QIB_6120_ErrClear_Reserved2_RMASK 0x7FF +#define QIB_6120_ErrClear_RcvIBLostLinkErrClear_LSB 0x11 +#define QIB_6120_ErrClear_RcvIBLostLinkErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_RcvHdrErrClear_LSB 0x10 +#define QIB_6120_ErrClear_RcvHdrErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_RcvHdrLenErrClear_LSB 0xF +#define QIB_6120_ErrClear_RcvHdrLenErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_RcvBadTidErrClear_LSB 0xE +#define QIB_6120_ErrClear_RcvBadTidErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_RcvHdrFullErrClear_LSB 0xD +#define QIB_6120_ErrClear_RcvHdrFullErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_RcvEgrFullErrClear_LSB 0xC +#define QIB_6120_ErrClear_RcvEgrFullErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_RcvBadVersionErrClear_LSB 0xB +#define QIB_6120_ErrClear_RcvBadVersionErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_RcvIBFlowErrClear_LSB 0xA +#define QIB_6120_ErrClear_RcvIBFlowErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_RcvEBPErrClear_LSB 0x9 +#define QIB_6120_ErrClear_RcvEBPErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_RcvUnsupportedVLErrClear_LSB 0x8 +#define QIB_6120_ErrClear_RcvUnsupportedVLErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_RcvUnexpectedCharErrClear_LSB 0x7 +#define QIB_6120_ErrClear_RcvUnexpectedCharErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_RcvShortPktLenErrClear_LSB 0x6 +#define QIB_6120_ErrClear_RcvShortPktLenErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_RcvLongPktLenErrClear_LSB 0x5 +#define QIB_6120_ErrClear_RcvLongPktLenErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_RcvMaxPktLenErrClear_LSB 0x4 +#define QIB_6120_ErrClear_RcvMaxPktLenErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_RcvMinPktLenErrClear_LSB 0x3 +#define QIB_6120_ErrClear_RcvMinPktLenErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_RcvICRCErrClear_LSB 0x2 +#define QIB_6120_ErrClear_RcvICRCErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_RcvVCRCErrClear_LSB 0x1 +#define QIB_6120_ErrClear_RcvVCRCErrClear_RMASK 0x1 +#define QIB_6120_ErrClear_RcvFormatErrClear_LSB 0x0 +#define QIB_6120_ErrClear_RcvFormatErrClear_RMASK 0x1 + +#define QIB_6120_HwErrMask_OFFS 0x98 +#define QIB_6120_HwErrMask_IBCBusFromSPCParityErrMask_LSB 0x3F +#define QIB_6120_HwErrMask_IBCBusFromSPCParityErrMask_RMASK 0x1 +#define QIB_6120_HwErrMask_IBCBusToSPCParityErrMask_LSB 0x3E +#define QIB_6120_HwErrMask_IBCBusToSPCParityErrMask_RMASK 0x1 +#define QIB_6120_HwErrMask_Reserved_LSB 0x3D +#define QIB_6120_HwErrMask_Reserved_RMASK 0x1 +#define QIB_6120_HwErrMask_IBSerdesPClkNotDetectMask_LSB 0x3C +#define QIB_6120_HwErrMask_IBSerdesPClkNotDetectMask_RMASK 0x1 +#define QIB_6120_HwErrMask_PCIESerdesQ0PClkNotDetectMask_LSB 0x3B +#define QIB_6120_HwErrMask_PCIESerdesQ0PClkNotDetectMask_RMASK 0x1 +#define QIB_6120_HwErrMask_PCIESerdesQ1PClkNotDetectMask_LSB 0x3A +#define QIB_6120_HwErrMask_PCIESerdesQ1PClkNotDetectMask_RMASK 0x1 +#define QIB_6120_HwErrMask_Reserved1_LSB 0x39 +#define QIB_6120_HwErrMask_Reserved1_RMASK 0x1 +#define QIB_6120_HwErrMask_IBPLLrfSlipMask_LSB 0x38 +#define QIB_6120_HwErrMask_IBPLLrfSlipMask_RMASK 0x1 +#define QIB_6120_HwErrMask_IBPLLfbSlipMask_LSB 0x37 +#define QIB_6120_HwErrMask_IBPLLfbSlipMask_RMASK 0x1 +#define QIB_6120_HwErrMask_PowerOnBISTFailedMask_LSB 0x36 +#define QIB_6120_HwErrMask_PowerOnBISTFailedMask_RMASK 0x1 +#define QIB_6120_HwErrMask_Reserved2_LSB 0x33 +#define QIB_6120_HwErrMask_Reserved2_RMASK 0x7 +#define QIB_6120_HwErrMask_RXEMemParityErrMask_LSB 0x2C +#define QIB_6120_HwErrMask_RXEMemParityErrMask_RMASK 0x7F +#define QIB_6120_HwErrMask_TXEMemParityErrMask_LSB 0x28 +#define QIB_6120_HwErrMask_TXEMemParityErrMask_RMASK 0xF +#define QIB_6120_HwErrMask_Reserved3_LSB 0x22 +#define QIB_6120_HwErrMask_Reserved3_RMASK 0x3F +#define QIB_6120_HwErrMask_PCIeBusParityErrMask_LSB 0x1F +#define QIB_6120_HwErrMask_PCIeBusParityErrMask_RMASK 0x7 +#define QIB_6120_HwErrMask_PcieCplTimeoutMask_LSB 0x1E +#define QIB_6120_HwErrMask_PcieCplTimeoutMask_RMASK 0x1 +#define QIB_6120_HwErrMask_PoisonedTLPMask_LSB 0x1D +#define QIB_6120_HwErrMask_PoisonedTLPMask_RMASK 0x1 +#define QIB_6120_HwErrMask_Reserved4_LSB 0x6 +#define QIB_6120_HwErrMask_Reserved4_RMASK 0x7FFFFF +#define QIB_6120_HwErrMask_PCIeMemParityErrMask_LSB 0x0 +#define QIB_6120_HwErrMask_PCIeMemParityErrMask_RMASK 0x3F + +#define QIB_6120_HwErrStatus_OFFS 0xA0 +#define QIB_6120_HwErrStatus_IBCBusFromSPCParityErr_LSB 0x3F +#define QIB_6120_HwErrStatus_IBCBusFromSPCParityErr_RMASK 0x1 +#define QIB_6120_HwErrStatus_IBCBusToSPCParityErr_LSB 0x3E +#define QIB_6120_HwErrStatus_IBCBusToSPCParityErr_RMASK 0x1 +#define QIB_6120_HwErrStatus_Reserved_LSB 0x3D +#define QIB_6120_HwErrStatus_Reserved_RMASK 0x1 +#define QIB_6120_HwErrStatus_IBSerdesPClkNotDetect_LSB 0x3C +#define QIB_6120_HwErrStatus_IBSerdesPClkNotDetect_RMASK 0x1 +#define QIB_6120_HwErrStatus_PCIESerdesQ0PClkNotDetect_LSB 0x3B +#define QIB_6120_HwErrStatus_PCIESerdesQ0PClkNotDetect_RMASK 0x1 +#define QIB_6120_HwErrStatus_PCIESerdesQ1PClkNotDetect_LSB 0x3A +#define QIB_6120_HwErrStatus_PCIESerdesQ1PClkNotDetect_RMASK 0x1 +#define QIB_6120_HwErrStatus_Reserved1_LSB 0x39 +#define QIB_6120_HwErrStatus_Reserved1_RMASK 0x1 +#define QIB_6120_HwErrStatus_IBPLLrfSlip_LSB 0x38 +#define QIB_6120_HwErrStatus_IBPLLrfSlip_RMASK 0x1 +#define QIB_6120_HwErrStatus_IBPLLfbSlip_LSB 0x37 +#define QIB_6120_HwErrStatus_IBPLLfbSlip_RMASK 0x1 +#define QIB_6120_HwErrStatus_PowerOnBISTFailed_LSB 0x36 +#define QIB_6120_HwErrStatus_PowerOnBISTFailed_RMASK 0x1 +#define QIB_6120_HwErrStatus_Reserved2_LSB 0x33 +#define QIB_6120_HwErrStatus_Reserved2_RMASK 0x7 +#define QIB_6120_HwErrStatus_RXEMemParity_LSB 0x2C +#define QIB_6120_HwErrStatus_RXEMemParity_RMASK 0x7F +#define QIB_6120_HwErrStatus_TXEMemParity_LSB 0x28 +#define QIB_6120_HwErrStatus_TXEMemParity_RMASK 0xF +#define QIB_6120_HwErrStatus_Reserved3_LSB 0x22 +#define QIB_6120_HwErrStatus_Reserved3_RMASK 0x3F +#define QIB_6120_HwErrStatus_PCIeBusParity_LSB 0x1F +#define QIB_6120_HwErrStatus_PCIeBusParity_RMASK 0x7 +#define QIB_6120_HwErrStatus_PcieCplTimeout_LSB 0x1E +#define QIB_6120_HwErrStatus_PcieCplTimeout_RMASK 0x1 +#define QIB_6120_HwErrStatus_PoisenedTLP_LSB 0x1D +#define QIB_6120_HwErrStatus_PoisenedTLP_RMASK 0x1 +#define QIB_6120_HwErrStatus_Reserved4_LSB 0x6 +#define QIB_6120_HwErrStatus_Reserved4_RMASK 0x7FFFFF +#define QIB_6120_HwErrStatus_PCIeMemParity_LSB 0x0 +#define QIB_6120_HwErrStatus_PCIeMemParity_RMASK 0x3F + +#define QIB_6120_HwErrClear_OFFS 0xA8 +#define QIB_6120_HwErrClear_IBCBusFromSPCParityErrClear_LSB 0x3F +#define QIB_6120_HwErrClear_IBCBusFromSPCParityErrClear_RMASK 0x1 +#define QIB_6120_HwErrClear_IBCBusToSPCparityErrClear_LSB 0x3E +#define QIB_6120_HwErrClear_IBCBusToSPCparityErrClear_RMASK 0x1 +#define QIB_6120_HwErrClear_Reserved_LSB 0x3D +#define QIB_6120_HwErrClear_Reserved_RMASK 0x1 +#define QIB_6120_HwErrClear_IBSerdesPClkNotDetectClear_LSB 0x3C +#define QIB_6120_HwErrClear_IBSerdesPClkNotDetectClear_RMASK 0x1 +#define QIB_6120_HwErrClear_PCIESerdesQ0PClkNotDetectClear_LSB 0x3B +#define QIB_6120_HwErrClear_PCIESerdesQ0PClkNotDetectClear_RMASK 0x1 +#define QIB_6120_HwErrClear_PCIESerdesQ1PClkNotDetectClear_LSB 0x3A +#define QIB_6120_HwErrClear_PCIESerdesQ1PClkNotDetectClear_RMASK 0x1 +#define QIB_6120_HwErrClear_Reserved1_LSB 0x39 +#define QIB_6120_HwErrClear_Reserved1_RMASK 0x1 +#define QIB_6120_HwErrClear_IBPLLrfSlipClear_LSB 0x38 +#define QIB_6120_HwErrClear_IBPLLrfSlipClear_RMASK 0x1 +#define QIB_6120_HwErrClear_IBPLLfbSlipClear_LSB 0x37 +#define QIB_6120_HwErrClear_IBPLLfbSlipClear_RMASK 0x1 +#define QIB_6120_HwErrClear_PowerOnBISTFailedClear_LSB 0x36 +#define QIB_6120_HwErrClear_PowerOnBISTFailedClear_RMASK 0x1 +#define QIB_6120_HwErrClear_Reserved2_LSB 0x33 +#define QIB_6120_HwErrClear_Reserved2_RMASK 0x7 +#define QIB_6120_HwErrClear_RXEMemParityClear_LSB 0x2C +#define QIB_6120_HwErrClear_RXEMemParityClear_RMASK 0x7F +#define QIB_6120_HwErrClear_TXEMemParityClear_LSB 0x28 +#define QIB_6120_HwErrClear_TXEMemParityClear_RMASK 0xF +#define QIB_6120_HwErrClear_Reserved3_LSB 0x22 +#define QIB_6120_HwErrClear_Reserved3_RMASK 0x3F +#define QIB_6120_HwErrClear_PCIeBusParityClr_LSB 0x1F +#define QIB_6120_HwErrClear_PCIeBusParityClr_RMASK 0x7 +#define QIB_6120_HwErrClear_PcieCplTimeoutClear_LSB 0x1E +#define QIB_6120_HwErrClear_PcieCplTimeoutClear_RMASK 0x1 +#define QIB_6120_HwErrClear_PoisonedTLPClear_LSB 0x1D +#define QIB_6120_HwErrClear_PoisonedTLPClear_RMASK 0x1 +#define QIB_6120_HwErrClear_Reserved4_LSB 0x6 +#define QIB_6120_HwErrClear_Reserved4_RMASK 0x7FFFFF +#define QIB_6120_HwErrClear_PCIeMemParityClr_LSB 0x0 +#define QIB_6120_HwErrClear_PCIeMemParityClr_RMASK 0x3F + +#define QIB_6120_HwDiagCtrl_OFFS 0xB0 +#define QIB_6120_HwDiagCtrl_ForceIBCBusFromSPCParityErr_LSB 0x3F +#define QIB_6120_HwDiagCtrl_ForceIBCBusFromSPCParityErr_RMASK 0x1 +#define QIB_6120_HwDiagCtrl_ForceIBCBusToSPCParityErr_LSB 0x3E +#define QIB_6120_HwDiagCtrl_ForceIBCBusToSPCParityErr_RMASK 0x1 +#define QIB_6120_HwDiagCtrl_CounterWrEnable_LSB 0x3D +#define QIB_6120_HwDiagCtrl_CounterWrEnable_RMASK 0x1 +#define QIB_6120_HwDiagCtrl_CounterDisable_LSB 0x3C +#define QIB_6120_HwDiagCtrl_CounterDisable_RMASK 0x1 +#define QIB_6120_HwDiagCtrl_Reserved_LSB 0x33 +#define QIB_6120_HwDiagCtrl_Reserved_RMASK 0x1FF +#define QIB_6120_HwDiagCtrl_ForceRxMemParityErr_LSB 0x2C +#define QIB_6120_HwDiagCtrl_ForceRxMemParityErr_RMASK 0x7F +#define QIB_6120_HwDiagCtrl_ForceTxMemparityErr_LSB 0x28 +#define QIB_6120_HwDiagCtrl_ForceTxMemparityErr_RMASK 0xF +#define QIB_6120_HwDiagCtrl_Reserved1_LSB 0x23 +#define QIB_6120_HwDiagCtrl_Reserved1_RMASK 0x1F +#define QIB_6120_HwDiagCtrl_forcePCIeBusParity_LSB 0x1F +#define QIB_6120_HwDiagCtrl_forcePCIeBusParity_RMASK 0xF +#define QIB_6120_HwDiagCtrl_Reserved2_LSB 0x6 +#define QIB_6120_HwDiagCtrl_Reserved2_RMASK 0x1FFFFFF +#define QIB_6120_HwDiagCtrl_forcePCIeMemParity_LSB 0x0 +#define QIB_6120_HwDiagCtrl_forcePCIeMemParity_RMASK 0x3F + +#define QIB_6120_IBCStatus_OFFS 0xC0 +#define QIB_6120_IBCStatus_TxCreditOk_LSB 0x1F +#define QIB_6120_IBCStatus_TxCreditOk_RMASK 0x1 +#define QIB_6120_IBCStatus_TxReady_LSB 0x1E +#define QIB_6120_IBCStatus_TxReady_RMASK 0x1 +#define QIB_6120_IBCStatus_Reserved_LSB 0x7 +#define QIB_6120_IBCStatus_Reserved_RMASK 0x7FFFFF +#define QIB_6120_IBCStatus_LinkState_LSB 0x4 +#define QIB_6120_IBCStatus_LinkState_RMASK 0x7 +#define QIB_6120_IBCStatus_LinkTrainingState_LSB 0x0 +#define QIB_6120_IBCStatus_LinkTrainingState_RMASK 0xF + +#define QIB_6120_IBCCtrl_OFFS 0xC8 +#define QIB_6120_IBCCtrl_Loopback_LSB 0x3F +#define QIB_6120_IBCCtrl_Loopback_RMASK 0x1 +#define QIB_6120_IBCCtrl_LinkDownDefaultState_LSB 0x3E +#define QIB_6120_IBCCtrl_LinkDownDefaultState_RMASK 0x1 +#define QIB_6120_IBCCtrl_Reserved_LSB 0x2B +#define QIB_6120_IBCCtrl_Reserved_RMASK 0x7FFFF +#define QIB_6120_IBCCtrl_CreditScale_LSB 0x28 +#define QIB_6120_IBCCtrl_CreditScale_RMASK 0x7 +#define QIB_6120_IBCCtrl_OverrunThreshold_LSB 0x24 +#define QIB_6120_IBCCtrl_OverrunThreshold_RMASK 0xF +#define QIB_6120_IBCCtrl_PhyerrThreshold_LSB 0x20 +#define QIB_6120_IBCCtrl_PhyerrThreshold_RMASK 0xF +#define QIB_6120_IBCCtrl_Reserved1_LSB 0x1F +#define QIB_6120_IBCCtrl_Reserved1_RMASK 0x1 +#define QIB_6120_IBCCtrl_MaxPktLen_LSB 0x14 +#define QIB_6120_IBCCtrl_MaxPktLen_RMASK 0x7FF +#define QIB_6120_IBCCtrl_LinkCmd_LSB 0x12 +#define QIB_6120_IBCCtrl_LinkCmd_RMASK 0x3 +#define QIB_6120_IBCCtrl_LinkInitCmd_LSB 0x10 +#define QIB_6120_IBCCtrl_LinkInitCmd_RMASK 0x3 +#define QIB_6120_IBCCtrl_FlowCtrlWaterMark_LSB 0x8 +#define QIB_6120_IBCCtrl_FlowCtrlWaterMark_RMASK 0xFF +#define QIB_6120_IBCCtrl_FlowCtrlPeriod_LSB 0x0 +#define QIB_6120_IBCCtrl_FlowCtrlPeriod_RMASK 0xFF + +#define QIB_6120_EXTStatus_OFFS 0xD0 +#define QIB_6120_EXTStatus_GPIOIn_LSB 0x30 +#define QIB_6120_EXTStatus_GPIOIn_RMASK 0xFFFF +#define QIB_6120_EXTStatus_Reserved_LSB 0x20 +#define QIB_6120_EXTStatus_Reserved_RMASK 0xFFFF +#define QIB_6120_EXTStatus_Reserved1_LSB 0x10 +#define QIB_6120_EXTStatus_Reserved1_RMASK 0xFFFF +#define QIB_6120_EXTStatus_MemBISTFoundErr_LSB 0xF +#define QIB_6120_EXTStatus_MemBISTFoundErr_RMASK 0x1 +#define QIB_6120_EXTStatus_MemBISTEndTest_LSB 0xE +#define QIB_6120_EXTStatus_MemBISTEndTest_RMASK 0x1 +#define QIB_6120_EXTStatus_Reserved2_LSB 0x0 +#define QIB_6120_EXTStatus_Reserved2_RMASK 0x3FFF + +#define QIB_6120_EXTCtrl_OFFS 0xD8 +#define QIB_6120_EXTCtrl_GPIOOe_LSB 0x30 +#define QIB_6120_EXTCtrl_GPIOOe_RMASK 0xFFFF +#define QIB_6120_EXTCtrl_GPIOInvert_LSB 0x20 +#define QIB_6120_EXTCtrl_GPIOInvert_RMASK 0xFFFF +#define QIB_6120_EXTCtrl_Reserved_LSB 0x4 +#define QIB_6120_EXTCtrl_Reserved_RMASK 0xFFFFFFF +#define QIB_6120_EXTCtrl_LEDPriPortGreenOn_LSB 0x3 +#define QIB_6120_EXTCtrl_LEDPriPortGreenOn_RMASK 0x1 +#define QIB_6120_EXTCtrl_LEDPriPortYellowOn_LSB 0x2 +#define QIB_6120_EXTCtrl_LEDPriPortYellowOn_RMASK 0x1 +#define QIB_6120_EXTCtrl_LEDGblOkGreenOn_LSB 0x1 +#define QIB_6120_EXTCtrl_LEDGblOkGreenOn_RMASK 0x1 +#define QIB_6120_EXTCtrl_LEDGblErrRedOff_LSB 0x0 +#define QIB_6120_EXTCtrl_LEDGblErrRedOff_RMASK 0x1 + +#define QIB_6120_GPIOOut_OFFS 0xE0 + +#define QIB_6120_GPIOMask_OFFS 0xE8 + +#define QIB_6120_GPIOStatus_OFFS 0xF0 + +#define QIB_6120_GPIOClear_OFFS 0xF8 + +#define QIB_6120_RcvCtrl_OFFS 0x100 +#define QIB_6120_RcvCtrl_TailUpd_LSB 0x1F +#define QIB_6120_RcvCtrl_TailUpd_RMASK 0x1 +#define QIB_6120_RcvCtrl_RcvPartitionKeyDisable_LSB 0x1E +#define QIB_6120_RcvCtrl_RcvPartitionKeyDisable_RMASK 0x1 +#define QIB_6120_RcvCtrl_Reserved_LSB 0x15 +#define QIB_6120_RcvCtrl_Reserved_RMASK 0x1FF +#define QIB_6120_RcvCtrl_IntrAvail_LSB 0x10 +#define QIB_6120_RcvCtrl_IntrAvail_RMASK 0x1F +#define QIB_6120_RcvCtrl_Reserved1_LSB 0x9 +#define QIB_6120_RcvCtrl_Reserved1_RMASK 0x7F +#define QIB_6120_RcvCtrl_Reserved2_LSB 0x5 +#define QIB_6120_RcvCtrl_Reserved2_RMASK 0xF +#define QIB_6120_RcvCtrl_PortEnable_LSB 0x0 +#define QIB_6120_RcvCtrl_PortEnable_RMASK 0x1F + +#define QIB_6120_RcvBTHQP_OFFS 0x108 +#define QIB_6120_RcvBTHQP_BTHQP_Mask_LSB 0x1E +#define QIB_6120_RcvBTHQP_BTHQP_Mask_RMASK 0x3 +#define QIB_6120_RcvBTHQP_Reserved_LSB 0x18 +#define QIB_6120_RcvBTHQP_Reserved_RMASK 0x3F +#define QIB_6120_RcvBTHQP_RcvBTHQP_LSB 0x0 +#define QIB_6120_RcvBTHQP_RcvBTHQP_RMASK 0xFFFFFF + +#define QIB_6120_RcvHdrSize_OFFS 0x110 + +#define QIB_6120_RcvHdrCnt_OFFS 0x118 + +#define QIB_6120_RcvHdrEntSize_OFFS 0x120 + +#define QIB_6120_RcvTIDBase_OFFS 0x128 + +#define QIB_6120_RcvTIDCnt_OFFS 0x130 + +#define QIB_6120_RcvEgrBase_OFFS 0x138 + +#define QIB_6120_RcvEgrCnt_OFFS 0x140 + +#define QIB_6120_RcvBufBase_OFFS 0x148 + +#define QIB_6120_RcvBufSize_OFFS 0x150 + +#define QIB_6120_RxIntMemBase_OFFS 0x158 + +#define QIB_6120_RxIntMemSize_OFFS 0x160 + +#define QIB_6120_RcvPartitionKey_OFFS 0x168 + +#define QIB_6120_RcvPktLEDCnt_OFFS 0x178 +#define QIB_6120_RcvPktLEDCnt_ONperiod_LSB 0x20 +#define QIB_6120_RcvPktLEDCnt_ONperiod_RMASK 0xFFFFFFFF +#define QIB_6120_RcvPktLEDCnt_OFFperiod_LSB 0x0 +#define QIB_6120_RcvPktLEDCnt_OFFperiod_RMASK 0xFFFFFFFF + +#define QIB_6120_SendCtrl_OFFS 0x1C0 +#define QIB_6120_SendCtrl_Disarm_LSB 0x1F +#define QIB_6120_SendCtrl_Disarm_RMASK 0x1 +#define QIB_6120_SendCtrl_Reserved_LSB 0x17 +#define QIB_6120_SendCtrl_Reserved_RMASK 0xFF +#define QIB_6120_SendCtrl_DisarmPIOBuf_LSB 0x10 +#define QIB_6120_SendCtrl_DisarmPIOBuf_RMASK 0x7F +#define QIB_6120_SendCtrl_Reserved1_LSB 0x4 +#define QIB_6120_SendCtrl_Reserved1_RMASK 0xFFF +#define QIB_6120_SendCtrl_PIOEnable_LSB 0x3 +#define QIB_6120_SendCtrl_PIOEnable_RMASK 0x1 +#define QIB_6120_SendCtrl_PIOBufAvailUpd_LSB 0x2 +#define QIB_6120_SendCtrl_PIOBufAvailUpd_RMASK 0x1 +#define QIB_6120_SendCtrl_PIOIntBufAvail_LSB 0x1 +#define QIB_6120_SendCtrl_PIOIntBufAvail_RMASK 0x1 +#define QIB_6120_SendCtrl_Abort_LSB 0x0 +#define QIB_6120_SendCtrl_Abort_RMASK 0x1 + +#define QIB_6120_SendPIOBufBase_OFFS 0x1C8 +#define QIB_6120_SendPIOBufBase_Reserved_LSB 0x35 +#define QIB_6120_SendPIOBufBase_Reserved_RMASK 0x7FF +#define QIB_6120_SendPIOBufBase_BaseAddr_LargePIO_LSB 0x20 +#define QIB_6120_SendPIOBufBase_BaseAddr_LargePIO_RMASK 0x1FFFFF +#define QIB_6120_SendPIOBufBase_Reserved1_LSB 0x15 +#define QIB_6120_SendPIOBufBase_Reserved1_RMASK 0x7FF +#define QIB_6120_SendPIOBufBase_BaseAddr_SmallPIO_LSB 0x0 +#define QIB_6120_SendPIOBufBase_BaseAddr_SmallPIO_RMASK 0x1FFFFF + +#define QIB_6120_SendPIOSize_OFFS 0x1D0 +#define QIB_6120_SendPIOSize_Reserved_LSB 0x2D +#define QIB_6120_SendPIOSize_Reserved_RMASK 0xFFFFF +#define QIB_6120_SendPIOSize_Size_LargePIO_LSB 0x20 +#define QIB_6120_SendPIOSize_Size_LargePIO_RMASK 0x1FFF +#define QIB_6120_SendPIOSize_Reserved1_LSB 0xC +#define QIB_6120_SendPIOSize_Reserved1_RMASK 0xFFFFF +#define QIB_6120_SendPIOSize_Size_SmallPIO_LSB 0x0 +#define QIB_6120_SendPIOSize_Size_SmallPIO_RMASK 0xFFF + +#define QIB_6120_SendPIOBufCnt_OFFS 0x1D8 +#define QIB_6120_SendPIOBufCnt_Reserved_LSB 0x24 +#define QIB_6120_SendPIOBufCnt_Reserved_RMASK 0xFFFFFFF +#define QIB_6120_SendPIOBufCnt_Num_LargePIO_LSB 0x20 +#define QIB_6120_SendPIOBufCnt_Num_LargePIO_RMASK 0xF +#define QIB_6120_SendPIOBufCnt_Reserved1_LSB 0x9 +#define QIB_6120_SendPIOBufCnt_Reserved1_RMASK 0x7FFFFF +#define QIB_6120_SendPIOBufCnt_Num_SmallPIO_LSB 0x0 +#define QIB_6120_SendPIOBufCnt_Num_SmallPIO_RMASK 0x1FF + +#define QIB_6120_SendPIOAvailAddr_OFFS 0x1E0 +#define QIB_6120_SendPIOAvailAddr_SendPIOAvailAddr_LSB 0x6 +#define QIB_6120_SendPIOAvailAddr_SendPIOAvailAddr_RMASK 0x3FFFFFFFF +#define QIB_6120_SendPIOAvailAddr_Reserved_LSB 0x0 +#define QIB_6120_SendPIOAvailAddr_Reserved_RMASK 0x3F + +#define QIB_6120_SendBufErr0_OFFS 0x240 +#define QIB_6120_SendBufErr0_SendBufErrPIO_63_0_LSB 0x0 +#define QIB_6120_SendBufErr0_SendBufErrPIO_63_0_RMASK 0x0 + +#define QIB_6120_RcvHdrAddr0_OFFS 0x280 +#define QIB_6120_RcvHdrAddr0_RcvHdrAddr0_LSB 0x2 +#define QIB_6120_RcvHdrAddr0_RcvHdrAddr0_RMASK 0x3FFFFFFFFF +#define QIB_6120_RcvHdrAddr0_Reserved_LSB 0x0 +#define QIB_6120_RcvHdrAddr0_Reserved_RMASK 0x3 + +#define QIB_6120_RcvHdrTailAddr0_OFFS 0x300 +#define QIB_6120_RcvHdrTailAddr0_RcvHdrTailAddr0_LSB 0x2 +#define QIB_6120_RcvHdrTailAddr0_RcvHdrTailAddr0_RMASK 0x3FFFFFFFFF +#define QIB_6120_RcvHdrTailAddr0_Reserved_LSB 0x0 +#define QIB_6120_RcvHdrTailAddr0_Reserved_RMASK 0x3 + +#define QIB_6120_SerdesCfg0_OFFS 0x3C0 +#define QIB_6120_SerdesCfg0_DisableIBTxIdleDetect_LSB 0x3F +#define QIB_6120_SerdesCfg0_DisableIBTxIdleDetect_RMASK 0x1 +#define QIB_6120_SerdesCfg0_Reserved_LSB 0x38 +#define QIB_6120_SerdesCfg0_Reserved_RMASK 0x7F +#define QIB_6120_SerdesCfg0_RxEqCtl_LSB 0x36 +#define QIB_6120_SerdesCfg0_RxEqCtl_RMASK 0x3 +#define QIB_6120_SerdesCfg0_TxTermAdj_LSB 0x34 +#define QIB_6120_SerdesCfg0_TxTermAdj_RMASK 0x3 +#define QIB_6120_SerdesCfg0_RxTermAdj_LSB 0x32 +#define QIB_6120_SerdesCfg0_RxTermAdj_RMASK 0x3 +#define QIB_6120_SerdesCfg0_TermAdj1_LSB 0x31 +#define QIB_6120_SerdesCfg0_TermAdj1_RMASK 0x1 +#define QIB_6120_SerdesCfg0_TermAdj0_LSB 0x30 +#define QIB_6120_SerdesCfg0_TermAdj0_RMASK 0x1 +#define QIB_6120_SerdesCfg0_LPBKA_LSB 0x2F +#define QIB_6120_SerdesCfg0_LPBKA_RMASK 0x1 +#define QIB_6120_SerdesCfg0_LPBKB_LSB 0x2E +#define QIB_6120_SerdesCfg0_LPBKB_RMASK 0x1 +#define QIB_6120_SerdesCfg0_LPBKC_LSB 0x2D +#define QIB_6120_SerdesCfg0_LPBKC_RMASK 0x1 +#define QIB_6120_SerdesCfg0_LPBKD_LSB 0x2C +#define QIB_6120_SerdesCfg0_LPBKD_RMASK 0x1 +#define QIB_6120_SerdesCfg0_PW_LSB 0x2B +#define QIB_6120_SerdesCfg0_PW_RMASK 0x1 +#define QIB_6120_SerdesCfg0_RefSel_LSB 0x29 +#define QIB_6120_SerdesCfg0_RefSel_RMASK 0x3 +#define QIB_6120_SerdesCfg0_ParReset_LSB 0x28 +#define QIB_6120_SerdesCfg0_ParReset_RMASK 0x1 +#define QIB_6120_SerdesCfg0_ParLPBK_LSB 0x27 +#define QIB_6120_SerdesCfg0_ParLPBK_RMASK 0x1 +#define QIB_6120_SerdesCfg0_OffsetEn_LSB 0x26 +#define QIB_6120_SerdesCfg0_OffsetEn_RMASK 0x1 +#define QIB_6120_SerdesCfg0_Offset_LSB 0x1E +#define QIB_6120_SerdesCfg0_Offset_RMASK 0xFF +#define QIB_6120_SerdesCfg0_L2PwrDn_LSB 0x1D +#define QIB_6120_SerdesCfg0_L2PwrDn_RMASK 0x1 +#define QIB_6120_SerdesCfg0_ResetPLL_LSB 0x1C +#define QIB_6120_SerdesCfg0_ResetPLL_RMASK 0x1 +#define QIB_6120_SerdesCfg0_RxTermEnX_LSB 0x18 +#define QIB_6120_SerdesCfg0_RxTermEnX_RMASK 0xF +#define QIB_6120_SerdesCfg0_BeaconTxEnX_LSB 0x14 +#define QIB_6120_SerdesCfg0_BeaconTxEnX_RMASK 0xF +#define QIB_6120_SerdesCfg0_RxDetEnX_LSB 0x10 +#define QIB_6120_SerdesCfg0_RxDetEnX_RMASK 0xF +#define QIB_6120_SerdesCfg0_TxIdeEnX_LSB 0xC +#define QIB_6120_SerdesCfg0_TxIdeEnX_RMASK 0xF +#define QIB_6120_SerdesCfg0_RxIdleEnX_LSB 0x8 +#define QIB_6120_SerdesCfg0_RxIdleEnX_RMASK 0xF +#define QIB_6120_SerdesCfg0_L1PwrDnA_LSB 0x7 +#define QIB_6120_SerdesCfg0_L1PwrDnA_RMASK 0x1 +#define QIB_6120_SerdesCfg0_L1PwrDnB_LSB 0x6 +#define QIB_6120_SerdesCfg0_L1PwrDnB_RMASK 0x1 +#define QIB_6120_SerdesCfg0_L1PwrDnC_LSB 0x5 +#define QIB_6120_SerdesCfg0_L1PwrDnC_RMASK 0x1 +#define QIB_6120_SerdesCfg0_L1PwrDnD_LSB 0x4 +#define QIB_6120_SerdesCfg0_L1PwrDnD_RMASK 0x1 +#define QIB_6120_SerdesCfg0_ResetA_LSB 0x3 +#define QIB_6120_SerdesCfg0_ResetA_RMASK 0x1 +#define QIB_6120_SerdesCfg0_ResetB_LSB 0x2 +#define QIB_6120_SerdesCfg0_ResetB_RMASK 0x1 +#define QIB_6120_SerdesCfg0_ResetC_LSB 0x1 +#define QIB_6120_SerdesCfg0_ResetC_RMASK 0x1 +#define QIB_6120_SerdesCfg0_ResetD_LSB 0x0 +#define QIB_6120_SerdesCfg0_ResetD_RMASK 0x1 + +#define QIB_6120_SerdesStat_OFFS 0x3D0 +#define QIB_6120_SerdesStat_Reserved_LSB 0xC +#define QIB_6120_SerdesStat_Reserved_RMASK 0xFFFFFFFFFFFFF +#define QIB_6120_SerdesStat_BeaconDetA_LSB 0xB +#define QIB_6120_SerdesStat_BeaconDetA_RMASK 0x1 +#define QIB_6120_SerdesStat_BeaconDetB_LSB 0xA +#define QIB_6120_SerdesStat_BeaconDetB_RMASK 0x1 +#define QIB_6120_SerdesStat_BeaconDetC_LSB 0x9 +#define QIB_6120_SerdesStat_BeaconDetC_RMASK 0x1 +#define QIB_6120_SerdesStat_BeaconDetD_LSB 0x8 +#define QIB_6120_SerdesStat_BeaconDetD_RMASK 0x1 +#define QIB_6120_SerdesStat_RxDetA_LSB 0x7 +#define QIB_6120_SerdesStat_RxDetA_RMASK 0x1 +#define QIB_6120_SerdesStat_RxDetB_LSB 0x6 +#define QIB_6120_SerdesStat_RxDetB_RMASK 0x1 +#define QIB_6120_SerdesStat_RxDetC_LSB 0x5 +#define QIB_6120_SerdesStat_RxDetC_RMASK 0x1 +#define QIB_6120_SerdesStat_RxDetD_LSB 0x4 +#define QIB_6120_SerdesStat_RxDetD_RMASK 0x1 +#define QIB_6120_SerdesStat_TxIdleDetA_LSB 0x3 +#define QIB_6120_SerdesStat_TxIdleDetA_RMASK 0x1 +#define QIB_6120_SerdesStat_TxIdleDetB_LSB 0x2 +#define QIB_6120_SerdesStat_TxIdleDetB_RMASK 0x1 +#define QIB_6120_SerdesStat_TxIdleDetC_LSB 0x1 +#define QIB_6120_SerdesStat_TxIdleDetC_RMASK 0x1 +#define QIB_6120_SerdesStat_TxIdleDetD_LSB 0x0 +#define QIB_6120_SerdesStat_TxIdleDetD_RMASK 0x1 + +#define QIB_6120_XGXSCfg_OFFS 0x3D8 +#define QIB_6120_XGXSCfg_ArmLaunchErrorDisable_LSB 0x3F +#define QIB_6120_XGXSCfg_ArmLaunchErrorDisable_RMASK 0x1 +#define QIB_6120_XGXSCfg_Reserved_LSB 0x17 +#define QIB_6120_XGXSCfg_Reserved_RMASK 0xFFFFFFFFFF +#define QIB_6120_XGXSCfg_polarity_inv_LSB 0x13 +#define QIB_6120_XGXSCfg_polarity_inv_RMASK 0xF +#define QIB_6120_XGXSCfg_link_sync_mask_LSB 0x9 +#define QIB_6120_XGXSCfg_link_sync_mask_RMASK 0x3FF +#define QIB_6120_XGXSCfg_port_addr_LSB 0x4 +#define QIB_6120_XGXSCfg_port_addr_RMASK 0x1F +#define QIB_6120_XGXSCfg_mdd_30_LSB 0x3 +#define QIB_6120_XGXSCfg_mdd_30_RMASK 0x1 +#define QIB_6120_XGXSCfg_xcv_resetn_LSB 0x2 +#define QIB_6120_XGXSCfg_xcv_resetn_RMASK 0x1 +#define QIB_6120_XGXSCfg_Reserved1_LSB 0x1 +#define QIB_6120_XGXSCfg_Reserved1_RMASK 0x1 +#define QIB_6120_XGXSCfg_tx_rx_resetn_LSB 0x0 +#define QIB_6120_XGXSCfg_tx_rx_resetn_RMASK 0x1 + +#define QIB_6120_LBIntCnt_OFFS 0x12000 + +#define QIB_6120_LBFlowStallCnt_OFFS 0x12008 + +#define QIB_6120_TxUnsupVLErrCnt_OFFS 0x12018 + +#define QIB_6120_TxDataPktCnt_OFFS 0x12020 + +#define QIB_6120_TxFlowPktCnt_OFFS 0x12028 + +#define QIB_6120_TxDwordCnt_OFFS 0x12030 + +#define QIB_6120_TxLenErrCnt_OFFS 0x12038 + +#define QIB_6120_TxMaxMinLenErrCnt_OFFS 0x12040 + +#define QIB_6120_TxUnderrunCnt_OFFS 0x12048 + +#define QIB_6120_TxFlowStallCnt_OFFS 0x12050 + +#define QIB_6120_TxDroppedPktCnt_OFFS 0x12058 + +#define QIB_6120_RxDroppedPktCnt_OFFS 0x12060 + +#define QIB_6120_RxDataPktCnt_OFFS 0x12068 + +#define QIB_6120_RxFlowPktCnt_OFFS 0x12070 + +#define QIB_6120_RxDwordCnt_OFFS 0x12078 + +#define QIB_6120_RxLenErrCnt_OFFS 0x12080 + +#define QIB_6120_RxMaxMinLenErrCnt_OFFS 0x12088 + +#define QIB_6120_RxICRCErrCnt_OFFS 0x12090 + +#define QIB_6120_RxVCRCErrCnt_OFFS 0x12098 + +#define QIB_6120_RxFlowCtrlErrCnt_OFFS 0x120A0 + +#define QIB_6120_RxBadFormatCnt_OFFS 0x120A8 + +#define QIB_6120_RxLinkProblemCnt_OFFS 0x120B0 + +#define QIB_6120_RxEBPCnt_OFFS 0x120B8 + +#define QIB_6120_RxLPCRCErrCnt_OFFS 0x120C0 + +#define QIB_6120_RxBufOvflCnt_OFFS 0x120C8 + +#define QIB_6120_RxTIDFullErrCnt_OFFS 0x120D0 + +#define QIB_6120_RxTIDValidErrCnt_OFFS 0x120D8 + +#define QIB_6120_RxPKeyMismatchCnt_OFFS 0x120E0 + +#define QIB_6120_RxP0HdrEgrOvflCnt_OFFS 0x120E8 + +#define QIB_6120_IBStatusChangeCnt_OFFS 0x12140 + +#define QIB_6120_IBLinkErrRecoveryCnt_OFFS 0x12148 + +#define QIB_6120_IBLinkDownedCnt_OFFS 0x12150 + +#define QIB_6120_IBSymbolErrCnt_OFFS 0x12158 + +#define QIB_6120_PcieRetryBufDiagQwordCnt_OFFS 0x12170 + +#define QIB_6120_RcvEgrArray0_OFFS 0x14000 + +#define QIB_6120_RcvTIDArray0_OFFS 0x54000 + +#define QIB_6120_PIOLaunchFIFO_OFFS 0x64000 + +#define QIB_6120_SendPIOpbcCache_OFFS 0x64800 + +#define QIB_6120_RcvBuf1_OFFS 0x72000 + +#define QIB_6120_RcvBuf2_OFFS 0x75000 + +#define QIB_6120_RcvFlags_OFFS 0x77000 + +#define QIB_6120_RcvLookupBuf1_OFFS 0x79000 + +#define QIB_6120_RcvDMABuf_OFFS 0x7B000 + +#define QIB_6120_MiscRXEIntMem_OFFS 0x7C000 + +#define QIB_6120_PCIERcvBuf_OFFS 0x80000 + +#define QIB_6120_PCIERetryBuf_OFFS 0x82000 + +#define QIB_6120_PCIERcvBufRdToWrAddr_OFFS 0x84000 + +#define QIB_6120_PIOBuf0_MA_OFFS 0x100000 diff --git a/drivers/infiniband/hw/qib/qib_7220.h b/drivers/infiniband/hw/qib/qib_7220.h new file mode 100644 index 00000000000..a5356cb4252 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_7220.h @@ -0,0 +1,149 @@ +#ifndef _QIB_7220_H +#define _QIB_7220_H +/* + * Copyright (c) 2007, 2009, 2010 QLogic Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* grab register-defs auto-generated by HW */ +#include "qib_7220_regs.h" + +/* The number of eager receive TIDs for context zero. */ +#define IBA7220_KRCVEGRCNT 2048U + +#define IB_7220_LT_STATE_CFGRCVFCFG 0x09 +#define IB_7220_LT_STATE_CFGWAITRMT 0x0a +#define IB_7220_LT_STATE_TXREVLANES 0x0d +#define IB_7220_LT_STATE_CFGENH 0x10 + +struct qib_chip_specific { + u64 __iomem *cregbase; + u64 *cntrs; + u64 *portcntrs; + spinlock_t sdepb_lock; /* serdes EPB bus */ + spinlock_t rcvmod_lock; /* protect rcvctrl shadow changes */ + spinlock_t gpio_lock; /* RMW of shadows/regs for ExtCtrl and GPIO */ + u64 hwerrmask; + u64 errormask; + u64 gpio_out; /* shadow of kr_gpio_out, for rmw ops */ + u64 gpio_mask; /* shadow the gpio mask register */ + u64 extctrl; /* shadow the gpio output enable, etc... */ + u32 ncntrs; + u32 nportcntrs; + u32 cntrnamelen; + u32 portcntrnamelen; + u32 numctxts; + u32 rcvegrcnt; + u32 autoneg_tries; + u32 serdes_first_init_done; + u32 sdmabufcnt; + u32 lastbuf_for_pio; + u32 updthresh; /* current AvailUpdThld */ + u32 updthresh_dflt; /* default AvailUpdThld */ + int irq; + u8 presets_needed; + u8 relock_timer_active; + char emsgbuf[128]; + char sdmamsgbuf[192]; + char bitsmsgbuf[64]; + struct timer_list relock_timer; + unsigned int relock_interval; /* in jiffies */ +}; + +struct qib_chippport_specific { + struct qib_pportdata pportdata; + wait_queue_head_t autoneg_wait; + struct delayed_work autoneg_work; + struct timer_list chase_timer; + /* + * these 5 fields are used to establish deltas for IB symbol + * errors and linkrecovery errors. They can be reported on + * some chips during link negotiation prior to INIT, and with + * DDR when faking DDR negotiations with non-IBTA switches. + * The chip counters are adjusted at driver unload if there is + * a non-zero delta. + */ + u64 ibdeltainprog; + u64 ibsymdelta; + u64 ibsymsnap; + u64 iblnkerrdelta; + u64 iblnkerrsnap; + u64 ibcctrl; /* kr_ibcctrl shadow */ + u64 ibcddrctrl; /* kr_ibcddrctrl shadow */ + unsigned long chase_end; + u32 last_delay_mult; +}; + +/* + * This header file provides the declarations and common definitions + * for (mostly) manipulation of the SerDes blocks within the IBA7220. + * the functions declared should only be called from within other + * 7220-related files such as qib_iba7220.c or qib_sd7220.c. + */ +int qib_sd7220_presets(struct qib_devdata *dd); +int qib_sd7220_init(struct qib_devdata *dd); +void qib_sd7220_clr_ibpar(struct qib_devdata *); +/* + * Below used for sdnum parameter, selecting one of the two sections + * used for PCIe, or the single SerDes used for IB, which is the + * only one currently used + */ +#define IB_7220_SERDES 2 + +static inline u32 qib_read_kreg32(const struct qib_devdata *dd, + const u16 regno) +{ + if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) + return -1; + return readl((u32 __iomem *)&dd->kregbase[regno]); +} + +static inline u64 qib_read_kreg64(const struct qib_devdata *dd, + const u16 regno) +{ + if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) + return -1; + + return readq(&dd->kregbase[regno]); +} + +static inline void qib_write_kreg(const struct qib_devdata *dd, + const u16 regno, u64 value) +{ + if (dd->kregbase) + writeq(value, &dd->kregbase[regno]); +} + +void set_7220_relock_poll(struct qib_devdata *, int); +void shutdown_7220_relock_poll(struct qib_devdata *); +void toggle_7220_rclkrls(struct qib_devdata *); + + +#endif /* _QIB_7220_H */ diff --git a/drivers/infiniband/hw/qib/qib_7220_regs.h b/drivers/infiniband/hw/qib/qib_7220_regs.h new file mode 100644 index 00000000000..0da5bb750e5 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_7220_regs.h @@ -0,0 +1,1496 @@ +/* + * Copyright (c) 2008, 2009, 2010 QLogic Corporation. All rights reserved. + * + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +/* This file is mechanically generated from RTL. Any hand-edits will be lost! */ + +#define QIB_7220_Revision_OFFS 0x0 +#define QIB_7220_Revision_R_Simulator_LSB 0x3F +#define QIB_7220_Revision_R_Simulator_RMASK 0x1 +#define QIB_7220_Revision_R_Emulation_LSB 0x3E +#define QIB_7220_Revision_R_Emulation_RMASK 0x1 +#define QIB_7220_Revision_R_Emulation_Revcode_LSB 0x28 +#define QIB_7220_Revision_R_Emulation_Revcode_RMASK 0x3FFFFF +#define QIB_7220_Revision_BoardID_LSB 0x20 +#define QIB_7220_Revision_BoardID_RMASK 0xFF +#define QIB_7220_Revision_R_SW_LSB 0x18 +#define QIB_7220_Revision_R_SW_RMASK 0xFF +#define QIB_7220_Revision_R_Arch_LSB 0x10 +#define QIB_7220_Revision_R_Arch_RMASK 0xFF +#define QIB_7220_Revision_R_ChipRevMajor_LSB 0x8 +#define QIB_7220_Revision_R_ChipRevMajor_RMASK 0xFF +#define QIB_7220_Revision_R_ChipRevMinor_LSB 0x0 +#define QIB_7220_Revision_R_ChipRevMinor_RMASK 0xFF + +#define QIB_7220_Control_OFFS 0x8 +#define QIB_7220_Control_SyncResetExceptPcieIRAMRST_LSB 0x7 +#define QIB_7220_Control_SyncResetExceptPcieIRAMRST_RMASK 0x1 +#define QIB_7220_Control_PCIECplQDiagEn_LSB 0x6 +#define QIB_7220_Control_PCIECplQDiagEn_RMASK 0x1 +#define QIB_7220_Control_Reserved_LSB 0x5 +#define QIB_7220_Control_Reserved_RMASK 0x1 +#define QIB_7220_Control_TxLatency_LSB 0x4 +#define QIB_7220_Control_TxLatency_RMASK 0x1 +#define QIB_7220_Control_PCIERetryBufDiagEn_LSB 0x3 +#define QIB_7220_Control_PCIERetryBufDiagEn_RMASK 0x1 +#define QIB_7220_Control_LinkEn_LSB 0x2 +#define QIB_7220_Control_LinkEn_RMASK 0x1 +#define QIB_7220_Control_FreezeMode_LSB 0x1 +#define QIB_7220_Control_FreezeMode_RMASK 0x1 +#define QIB_7220_Control_SyncReset_LSB 0x0 +#define QIB_7220_Control_SyncReset_RMASK 0x1 + +#define QIB_7220_PageAlign_OFFS 0x10 + +#define QIB_7220_PortCnt_OFFS 0x18 + +#define QIB_7220_SendRegBase_OFFS 0x30 + +#define QIB_7220_UserRegBase_OFFS 0x38 + +#define QIB_7220_CntrRegBase_OFFS 0x40 + +#define QIB_7220_Scratch_OFFS 0x48 + +#define QIB_7220_IntMask_OFFS 0x68 +#define QIB_7220_IntMask_SDmaIntMask_LSB 0x3F +#define QIB_7220_IntMask_SDmaIntMask_RMASK 0x1 +#define QIB_7220_IntMask_SDmaDisabledMasked_LSB 0x3E +#define QIB_7220_IntMask_SDmaDisabledMasked_RMASK 0x1 +#define QIB_7220_IntMask_Reserved_LSB 0x31 +#define QIB_7220_IntMask_Reserved_RMASK 0x1FFF +#define QIB_7220_IntMask_RcvUrg16IntMask_LSB 0x30 +#define QIB_7220_IntMask_RcvUrg16IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvUrg15IntMask_LSB 0x2F +#define QIB_7220_IntMask_RcvUrg15IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvUrg14IntMask_LSB 0x2E +#define QIB_7220_IntMask_RcvUrg14IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvUrg13IntMask_LSB 0x2D +#define QIB_7220_IntMask_RcvUrg13IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvUrg12IntMask_LSB 0x2C +#define QIB_7220_IntMask_RcvUrg12IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvUrg11IntMask_LSB 0x2B +#define QIB_7220_IntMask_RcvUrg11IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvUrg10IntMask_LSB 0x2A +#define QIB_7220_IntMask_RcvUrg10IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvUrg9IntMask_LSB 0x29 +#define QIB_7220_IntMask_RcvUrg9IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvUrg8IntMask_LSB 0x28 +#define QIB_7220_IntMask_RcvUrg8IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvUrg7IntMask_LSB 0x27 +#define QIB_7220_IntMask_RcvUrg7IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvUrg6IntMask_LSB 0x26 +#define QIB_7220_IntMask_RcvUrg6IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvUrg5IntMask_LSB 0x25 +#define QIB_7220_IntMask_RcvUrg5IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvUrg4IntMask_LSB 0x24 +#define QIB_7220_IntMask_RcvUrg4IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvUrg3IntMask_LSB 0x23 +#define QIB_7220_IntMask_RcvUrg3IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvUrg2IntMask_LSB 0x22 +#define QIB_7220_IntMask_RcvUrg2IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvUrg1IntMask_LSB 0x21 +#define QIB_7220_IntMask_RcvUrg1IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvUrg0IntMask_LSB 0x20 +#define QIB_7220_IntMask_RcvUrg0IntMask_RMASK 0x1 +#define QIB_7220_IntMask_ErrorIntMask_LSB 0x1F +#define QIB_7220_IntMask_ErrorIntMask_RMASK 0x1 +#define QIB_7220_IntMask_PioSetIntMask_LSB 0x1E +#define QIB_7220_IntMask_PioSetIntMask_RMASK 0x1 +#define QIB_7220_IntMask_PioBufAvailIntMask_LSB 0x1D +#define QIB_7220_IntMask_PioBufAvailIntMask_RMASK 0x1 +#define QIB_7220_IntMask_assertGPIOIntMask_LSB 0x1C +#define QIB_7220_IntMask_assertGPIOIntMask_RMASK 0x1 +#define QIB_7220_IntMask_IBSerdesTrimDoneIntMask_LSB 0x1B +#define QIB_7220_IntMask_IBSerdesTrimDoneIntMask_RMASK 0x1 +#define QIB_7220_IntMask_JIntMask_LSB 0x1A +#define QIB_7220_IntMask_JIntMask_RMASK 0x1 +#define QIB_7220_IntMask_Reserved1_LSB 0x11 +#define QIB_7220_IntMask_Reserved1_RMASK 0x1FF +#define QIB_7220_IntMask_RcvAvail16IntMask_LSB 0x10 +#define QIB_7220_IntMask_RcvAvail16IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvAvail15IntMask_LSB 0xF +#define QIB_7220_IntMask_RcvAvail15IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvAvail14IntMask_LSB 0xE +#define QIB_7220_IntMask_RcvAvail14IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvAvail13IntMask_LSB 0xD +#define QIB_7220_IntMask_RcvAvail13IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvAvail12IntMask_LSB 0xC +#define QIB_7220_IntMask_RcvAvail12IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvAvail11IntMask_LSB 0xB +#define QIB_7220_IntMask_RcvAvail11IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvAvail10IntMask_LSB 0xA +#define QIB_7220_IntMask_RcvAvail10IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvAvail9IntMask_LSB 0x9 +#define QIB_7220_IntMask_RcvAvail9IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvAvail8IntMask_LSB 0x8 +#define QIB_7220_IntMask_RcvAvail8IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvAvail7IntMask_LSB 0x7 +#define QIB_7220_IntMask_RcvAvail7IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvAvail6IntMask_LSB 0x6 +#define QIB_7220_IntMask_RcvAvail6IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvAvail5IntMask_LSB 0x5 +#define QIB_7220_IntMask_RcvAvail5IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvAvail4IntMask_LSB 0x4 +#define QIB_7220_IntMask_RcvAvail4IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvAvail3IntMask_LSB 0x3 +#define QIB_7220_IntMask_RcvAvail3IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvAvail2IntMask_LSB 0x2 +#define QIB_7220_IntMask_RcvAvail2IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvAvail1IntMask_LSB 0x1 +#define QIB_7220_IntMask_RcvAvail1IntMask_RMASK 0x1 +#define QIB_7220_IntMask_RcvAvail0IntMask_LSB 0x0 +#define QIB_7220_IntMask_RcvAvail0IntMask_RMASK 0x1 + +#define QIB_7220_IntStatus_OFFS 0x70 +#define QIB_7220_IntStatus_SDmaInt_LSB 0x3F +#define QIB_7220_IntStatus_SDmaInt_RMASK 0x1 +#define QIB_7220_IntStatus_SDmaDisabled_LSB 0x3E +#define QIB_7220_IntStatus_SDmaDisabled_RMASK 0x1 +#define QIB_7220_IntStatus_Reserved_LSB 0x31 +#define QIB_7220_IntStatus_Reserved_RMASK 0x1FFF +#define QIB_7220_IntStatus_RcvUrg16_LSB 0x30 +#define QIB_7220_IntStatus_RcvUrg16_RMASK 0x1 +#define QIB_7220_IntStatus_RcvUrg15_LSB 0x2F +#define QIB_7220_IntStatus_RcvUrg15_RMASK 0x1 +#define QIB_7220_IntStatus_RcvUrg14_LSB 0x2E +#define QIB_7220_IntStatus_RcvUrg14_RMASK 0x1 +#define QIB_7220_IntStatus_RcvUrg13_LSB 0x2D +#define QIB_7220_IntStatus_RcvUrg13_RMASK 0x1 +#define QIB_7220_IntStatus_RcvUrg12_LSB 0x2C +#define QIB_7220_IntStatus_RcvUrg12_RMASK 0x1 +#define QIB_7220_IntStatus_RcvUrg11_LSB 0x2B +#define QIB_7220_IntStatus_RcvUrg11_RMASK 0x1 +#define QIB_7220_IntStatus_RcvUrg10_LSB 0x2A +#define QIB_7220_IntStatus_RcvUrg10_RMASK 0x1 +#define QIB_7220_IntStatus_RcvUrg9_LSB 0x29 +#define QIB_7220_IntStatus_RcvUrg9_RMASK 0x1 +#define QIB_7220_IntStatus_RcvUrg8_LSB 0x28 +#define QIB_7220_IntStatus_RcvUrg8_RMASK 0x1 +#define QIB_7220_IntStatus_RcvUrg7_LSB 0x27 +#define QIB_7220_IntStatus_RcvUrg7_RMASK 0x1 +#define QIB_7220_IntStatus_RcvUrg6_LSB 0x26 +#define QIB_7220_IntStatus_RcvUrg6_RMASK 0x1 +#define QIB_7220_IntStatus_RcvUrg5_LSB 0x25 +#define QIB_7220_IntStatus_RcvUrg5_RMASK 0x1 +#define QIB_7220_IntStatus_RcvUrg4_LSB 0x24 +#define QIB_7220_IntStatus_RcvUrg4_RMASK 0x1 +#define QIB_7220_IntStatus_RcvUrg3_LSB 0x23 +#define QIB_7220_IntStatus_RcvUrg3_RMASK 0x1 +#define QIB_7220_IntStatus_RcvUrg2_LSB 0x22 +#define QIB_7220_IntStatus_RcvUrg2_RMASK 0x1 +#define QIB_7220_IntStatus_RcvUrg1_LSB 0x21 +#define QIB_7220_IntStatus_RcvUrg1_RMASK 0x1 +#define QIB_7220_IntStatus_RcvUrg0_LSB 0x20 +#define QIB_7220_IntStatus_RcvUrg0_RMASK 0x1 +#define QIB_7220_IntStatus_Error_LSB 0x1F +#define QIB_7220_IntStatus_Error_RMASK 0x1 +#define QIB_7220_IntStatus_PioSent_LSB 0x1E +#define QIB_7220_IntStatus_PioSent_RMASK 0x1 +#define QIB_7220_IntStatus_PioBufAvail_LSB 0x1D +#define QIB_7220_IntStatus_PioBufAvail_RMASK 0x1 +#define QIB_7220_IntStatus_assertGPIO_LSB 0x1C +#define QIB_7220_IntStatus_assertGPIO_RMASK 0x1 +#define QIB_7220_IntStatus_IBSerdesTrimDone_LSB 0x1B +#define QIB_7220_IntStatus_IBSerdesTrimDone_RMASK 0x1 +#define QIB_7220_IntStatus_JInt_LSB 0x1A +#define QIB_7220_IntStatus_JInt_RMASK 0x1 +#define QIB_7220_IntStatus_Reserved1_LSB 0x11 +#define QIB_7220_IntStatus_Reserved1_RMASK 0x1FF +#define QIB_7220_IntStatus_RcvAvail16_LSB 0x10 +#define QIB_7220_IntStatus_RcvAvail16_RMASK 0x1 +#define QIB_7220_IntStatus_RcvAvail15_LSB 0xF +#define QIB_7220_IntStatus_RcvAvail15_RMASK 0x1 +#define QIB_7220_IntStatus_RcvAvail14_LSB 0xE +#define QIB_7220_IntStatus_RcvAvail14_RMASK 0x1 +#define QIB_7220_IntStatus_RcvAvail13_LSB 0xD +#define QIB_7220_IntStatus_RcvAvail13_RMASK 0x1 +#define QIB_7220_IntStatus_RcvAvail12_LSB 0xC +#define QIB_7220_IntStatus_RcvAvail12_RMASK 0x1 +#define QIB_7220_IntStatus_RcvAvail11_LSB 0xB +#define QIB_7220_IntStatus_RcvAvail11_RMASK 0x1 +#define QIB_7220_IntStatus_RcvAvail10_LSB 0xA +#define QIB_7220_IntStatus_RcvAvail10_RMASK 0x1 +#define QIB_7220_IntStatus_RcvAvail9_LSB 0x9 +#define QIB_7220_IntStatus_RcvAvail9_RMASK 0x1 +#define QIB_7220_IntStatus_RcvAvail8_LSB 0x8 +#define QIB_7220_IntStatus_RcvAvail8_RMASK 0x1 +#define QIB_7220_IntStatus_RcvAvail7_LSB 0x7 +#define QIB_7220_IntStatus_RcvAvail7_RMASK 0x1 +#define QIB_7220_IntStatus_RcvAvail6_LSB 0x6 +#define QIB_7220_IntStatus_RcvAvail6_RMASK 0x1 +#define QIB_7220_IntStatus_RcvAvail5_LSB 0x5 +#define QIB_7220_IntStatus_RcvAvail5_RMASK 0x1 +#define QIB_7220_IntStatus_RcvAvail4_LSB 0x4 +#define QIB_7220_IntStatus_RcvAvail4_RMASK 0x1 +#define QIB_7220_IntStatus_RcvAvail3_LSB 0x3 +#define QIB_7220_IntStatus_RcvAvail3_RMASK 0x1 +#define QIB_7220_IntStatus_RcvAvail2_LSB 0x2 +#define QIB_7220_IntStatus_RcvAvail2_RMASK 0x1 +#define QIB_7220_IntStatus_RcvAvail1_LSB 0x1 +#define QIB_7220_IntStatus_RcvAvail1_RMASK 0x1 +#define QIB_7220_IntStatus_RcvAvail0_LSB 0x0 +#define QIB_7220_IntStatus_RcvAvail0_RMASK 0x1 + +#define QIB_7220_IntClear_OFFS 0x78 +#define QIB_7220_IntClear_SDmaIntClear_LSB 0x3F +#define QIB_7220_IntClear_SDmaIntClear_RMASK 0x1 +#define QIB_7220_IntClear_SDmaDisabledClear_LSB 0x3E +#define QIB_7220_IntClear_SDmaDisabledClear_RMASK 0x1 +#define QIB_7220_IntClear_Reserved_LSB 0x31 +#define QIB_7220_IntClear_Reserved_RMASK 0x1FFF +#define QIB_7220_IntClear_RcvUrg16IntClear_LSB 0x30 +#define QIB_7220_IntClear_RcvUrg16IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvUrg15IntClear_LSB 0x2F +#define QIB_7220_IntClear_RcvUrg15IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvUrg14IntClear_LSB 0x2E +#define QIB_7220_IntClear_RcvUrg14IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvUrg13IntClear_LSB 0x2D +#define QIB_7220_IntClear_RcvUrg13IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvUrg12IntClear_LSB 0x2C +#define QIB_7220_IntClear_RcvUrg12IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvUrg11IntClear_LSB 0x2B +#define QIB_7220_IntClear_RcvUrg11IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvUrg10IntClear_LSB 0x2A +#define QIB_7220_IntClear_RcvUrg10IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvUrg9IntClear_LSB 0x29 +#define QIB_7220_IntClear_RcvUrg9IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvUrg8IntClear_LSB 0x28 +#define QIB_7220_IntClear_RcvUrg8IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvUrg7IntClear_LSB 0x27 +#define QIB_7220_IntClear_RcvUrg7IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvUrg6IntClear_LSB 0x26 +#define QIB_7220_IntClear_RcvUrg6IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvUrg5IntClear_LSB 0x25 +#define QIB_7220_IntClear_RcvUrg5IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvUrg4IntClear_LSB 0x24 +#define QIB_7220_IntClear_RcvUrg4IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvUrg3IntClear_LSB 0x23 +#define QIB_7220_IntClear_RcvUrg3IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvUrg2IntClear_LSB 0x22 +#define QIB_7220_IntClear_RcvUrg2IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvUrg1IntClear_LSB 0x21 +#define QIB_7220_IntClear_RcvUrg1IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvUrg0IntClear_LSB 0x20 +#define QIB_7220_IntClear_RcvUrg0IntClear_RMASK 0x1 +#define QIB_7220_IntClear_ErrorIntClear_LSB 0x1F +#define QIB_7220_IntClear_ErrorIntClear_RMASK 0x1 +#define QIB_7220_IntClear_PioSetIntClear_LSB 0x1E +#define QIB_7220_IntClear_PioSetIntClear_RMASK 0x1 +#define QIB_7220_IntClear_PioBufAvailIntClear_LSB 0x1D +#define QIB_7220_IntClear_PioBufAvailIntClear_RMASK 0x1 +#define QIB_7220_IntClear_assertGPIOIntClear_LSB 0x1C +#define QIB_7220_IntClear_assertGPIOIntClear_RMASK 0x1 +#define QIB_7220_IntClear_IBSerdesTrimDoneClear_LSB 0x1B +#define QIB_7220_IntClear_IBSerdesTrimDoneClear_RMASK 0x1 +#define QIB_7220_IntClear_JIntClear_LSB 0x1A +#define QIB_7220_IntClear_JIntClear_RMASK 0x1 +#define QIB_7220_IntClear_Reserved1_LSB 0x11 +#define QIB_7220_IntClear_Reserved1_RMASK 0x1FF +#define QIB_7220_IntClear_RcvAvail16IntClear_LSB 0x10 +#define QIB_7220_IntClear_RcvAvail16IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvAvail15IntClear_LSB 0xF +#define QIB_7220_IntClear_RcvAvail15IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvAvail14IntClear_LSB 0xE +#define QIB_7220_IntClear_RcvAvail14IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvAvail13IntClear_LSB 0xD +#define QIB_7220_IntClear_RcvAvail13IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvAvail12IntClear_LSB 0xC +#define QIB_7220_IntClear_RcvAvail12IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvAvail11IntClear_LSB 0xB +#define QIB_7220_IntClear_RcvAvail11IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvAvail10IntClear_LSB 0xA +#define QIB_7220_IntClear_RcvAvail10IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvAvail9IntClear_LSB 0x9 +#define QIB_7220_IntClear_RcvAvail9IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvAvail8IntClear_LSB 0x8 +#define QIB_7220_IntClear_RcvAvail8IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvAvail7IntClear_LSB 0x7 +#define QIB_7220_IntClear_RcvAvail7IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvAvail6IntClear_LSB 0x6 +#define QIB_7220_IntClear_RcvAvail6IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvAvail5IntClear_LSB 0x5 +#define QIB_7220_IntClear_RcvAvail5IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvAvail4IntClear_LSB 0x4 +#define QIB_7220_IntClear_RcvAvail4IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvAvail3IntClear_LSB 0x3 +#define QIB_7220_IntClear_RcvAvail3IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvAvail2IntClear_LSB 0x2 +#define QIB_7220_IntClear_RcvAvail2IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvAvail1IntClear_LSB 0x1 +#define QIB_7220_IntClear_RcvAvail1IntClear_RMASK 0x1 +#define QIB_7220_IntClear_RcvAvail0IntClear_LSB 0x0 +#define QIB_7220_IntClear_RcvAvail0IntClear_RMASK 0x1 + +#define QIB_7220_ErrMask_OFFS 0x80 +#define QIB_7220_ErrMask_Reserved_LSB 0x36 +#define QIB_7220_ErrMask_Reserved_RMASK 0x3FF +#define QIB_7220_ErrMask_InvalidEEPCmdMask_LSB 0x35 +#define QIB_7220_ErrMask_InvalidEEPCmdMask_RMASK 0x1 +#define QIB_7220_ErrMask_SDmaDescAddrMisalignErrMask_LSB 0x34 +#define QIB_7220_ErrMask_SDmaDescAddrMisalignErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_HardwareErrMask_LSB 0x33 +#define QIB_7220_ErrMask_HardwareErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_ResetNegatedMask_LSB 0x32 +#define QIB_7220_ErrMask_ResetNegatedMask_RMASK 0x1 +#define QIB_7220_ErrMask_InvalidAddrErrMask_LSB 0x31 +#define QIB_7220_ErrMask_InvalidAddrErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_IBStatusChangedMask_LSB 0x30 +#define QIB_7220_ErrMask_IBStatusChangedMask_RMASK 0x1 +#define QIB_7220_ErrMask_SDmaUnexpDataErrMask_LSB 0x2F +#define QIB_7220_ErrMask_SDmaUnexpDataErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SDmaMissingDwErrMask_LSB 0x2E +#define QIB_7220_ErrMask_SDmaMissingDwErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SDmaDwEnErrMask_LSB 0x2D +#define QIB_7220_ErrMask_SDmaDwEnErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SDmaRpyTagErrMask_LSB 0x2C +#define QIB_7220_ErrMask_SDmaRpyTagErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SDma1stDescErrMask_LSB 0x2B +#define QIB_7220_ErrMask_SDma1stDescErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SDmaBaseErrMask_LSB 0x2A +#define QIB_7220_ErrMask_SDmaBaseErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SDmaTailOutOfBoundErrMask_LSB 0x29 +#define QIB_7220_ErrMask_SDmaTailOutOfBoundErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SDmaOutOfBoundErrMask_LSB 0x28 +#define QIB_7220_ErrMask_SDmaOutOfBoundErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SDmaGenMismatchErrMask_LSB 0x27 +#define QIB_7220_ErrMask_SDmaGenMismatchErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SendBufMisuseErrMask_LSB 0x26 +#define QIB_7220_ErrMask_SendBufMisuseErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SendUnsupportedVLErrMask_LSB 0x25 +#define QIB_7220_ErrMask_SendUnsupportedVLErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SendUnexpectedPktNumErrMask_LSB 0x24 +#define QIB_7220_ErrMask_SendUnexpectedPktNumErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SendPioArmLaunchErrMask_LSB 0x23 +#define QIB_7220_ErrMask_SendPioArmLaunchErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SendDroppedDataPktErrMask_LSB 0x22 +#define QIB_7220_ErrMask_SendDroppedDataPktErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SendDroppedSmpPktErrMask_LSB 0x21 +#define QIB_7220_ErrMask_SendDroppedSmpPktErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SendPktLenErrMask_LSB 0x20 +#define QIB_7220_ErrMask_SendPktLenErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SendUnderRunErrMask_LSB 0x1F +#define QIB_7220_ErrMask_SendUnderRunErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SendMaxPktLenErrMask_LSB 0x1E +#define QIB_7220_ErrMask_SendMaxPktLenErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SendMinPktLenErrMask_LSB 0x1D +#define QIB_7220_ErrMask_SendMinPktLenErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SDmaDisabledErrMask_LSB 0x1C +#define QIB_7220_ErrMask_SDmaDisabledErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_SendSpecialTriggerErrMask_LSB 0x1B +#define QIB_7220_ErrMask_SendSpecialTriggerErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_Reserved1_LSB 0x12 +#define QIB_7220_ErrMask_Reserved1_RMASK 0x1FF +#define QIB_7220_ErrMask_RcvIBLostLinkErrMask_LSB 0x11 +#define QIB_7220_ErrMask_RcvIBLostLinkErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_RcvHdrErrMask_LSB 0x10 +#define QIB_7220_ErrMask_RcvHdrErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_RcvHdrLenErrMask_LSB 0xF +#define QIB_7220_ErrMask_RcvHdrLenErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_RcvBadTidErrMask_LSB 0xE +#define QIB_7220_ErrMask_RcvBadTidErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_RcvHdrFullErrMask_LSB 0xD +#define QIB_7220_ErrMask_RcvHdrFullErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_RcvEgrFullErrMask_LSB 0xC +#define QIB_7220_ErrMask_RcvEgrFullErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_RcvBadVersionErrMask_LSB 0xB +#define QIB_7220_ErrMask_RcvBadVersionErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_RcvIBFlowErrMask_LSB 0xA +#define QIB_7220_ErrMask_RcvIBFlowErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_RcvEBPErrMask_LSB 0x9 +#define QIB_7220_ErrMask_RcvEBPErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_RcvUnsupportedVLErrMask_LSB 0x8 +#define QIB_7220_ErrMask_RcvUnsupportedVLErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_RcvUnexpectedCharErrMask_LSB 0x7 +#define QIB_7220_ErrMask_RcvUnexpectedCharErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_RcvShortPktLenErrMask_LSB 0x6 +#define QIB_7220_ErrMask_RcvShortPktLenErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_RcvLongPktLenErrMask_LSB 0x5 +#define QIB_7220_ErrMask_RcvLongPktLenErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_RcvMaxPktLenErrMask_LSB 0x4 +#define QIB_7220_ErrMask_RcvMaxPktLenErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_RcvMinPktLenErrMask_LSB 0x3 +#define QIB_7220_ErrMask_RcvMinPktLenErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_RcvICRCErrMask_LSB 0x2 +#define QIB_7220_ErrMask_RcvICRCErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_RcvVCRCErrMask_LSB 0x1 +#define QIB_7220_ErrMask_RcvVCRCErrMask_RMASK 0x1 +#define QIB_7220_ErrMask_RcvFormatErrMask_LSB 0x0 +#define QIB_7220_ErrMask_RcvFormatErrMask_RMASK 0x1 + +#define QIB_7220_ErrStatus_OFFS 0x88 +#define QIB_7220_ErrStatus_Reserved_LSB 0x36 +#define QIB_7220_ErrStatus_Reserved_RMASK 0x3FF +#define QIB_7220_ErrStatus_InvalidEEPCmdErr_LSB 0x35 +#define QIB_7220_ErrStatus_InvalidEEPCmdErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SDmaDescAddrMisalignErr_LSB 0x34 +#define QIB_7220_ErrStatus_SDmaDescAddrMisalignErr_RMASK 0x1 +#define QIB_7220_ErrStatus_HardwareErr_LSB 0x33 +#define QIB_7220_ErrStatus_HardwareErr_RMASK 0x1 +#define QIB_7220_ErrStatus_ResetNegated_LSB 0x32 +#define QIB_7220_ErrStatus_ResetNegated_RMASK 0x1 +#define QIB_7220_ErrStatus_InvalidAddrErr_LSB 0x31 +#define QIB_7220_ErrStatus_InvalidAddrErr_RMASK 0x1 +#define QIB_7220_ErrStatus_IBStatusChanged_LSB 0x30 +#define QIB_7220_ErrStatus_IBStatusChanged_RMASK 0x1 +#define QIB_7220_ErrStatus_SDmaUnexpDataErr_LSB 0x2F +#define QIB_7220_ErrStatus_SDmaUnexpDataErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SDmaMissingDwErr_LSB 0x2E +#define QIB_7220_ErrStatus_SDmaMissingDwErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SDmaDwEnErr_LSB 0x2D +#define QIB_7220_ErrStatus_SDmaDwEnErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SDmaRpyTagErr_LSB 0x2C +#define QIB_7220_ErrStatus_SDmaRpyTagErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SDma1stDescErr_LSB 0x2B +#define QIB_7220_ErrStatus_SDma1stDescErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SDmaBaseErr_LSB 0x2A +#define QIB_7220_ErrStatus_SDmaBaseErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SDmaTailOutOfBoundErr_LSB 0x29 +#define QIB_7220_ErrStatus_SDmaTailOutOfBoundErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SDmaOutOfBoundErr_LSB 0x28 +#define QIB_7220_ErrStatus_SDmaOutOfBoundErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SDmaGenMismatchErr_LSB 0x27 +#define QIB_7220_ErrStatus_SDmaGenMismatchErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SendBufMisuseErr_LSB 0x26 +#define QIB_7220_ErrStatus_SendBufMisuseErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SendUnsupportedVLErr_LSB 0x25 +#define QIB_7220_ErrStatus_SendUnsupportedVLErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SendUnexpectedPktNumErr_LSB 0x24 +#define QIB_7220_ErrStatus_SendUnexpectedPktNumErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SendPioArmLaunchErr_LSB 0x23 +#define QIB_7220_ErrStatus_SendPioArmLaunchErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SendDroppedDataPktErr_LSB 0x22 +#define QIB_7220_ErrStatus_SendDroppedDataPktErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SendDroppedSmpPktErr_LSB 0x21 +#define QIB_7220_ErrStatus_SendDroppedSmpPktErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SendPktLenErr_LSB 0x20 +#define QIB_7220_ErrStatus_SendPktLenErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SendUnderRunErr_LSB 0x1F +#define QIB_7220_ErrStatus_SendUnderRunErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SendMaxPktLenErr_LSB 0x1E +#define QIB_7220_ErrStatus_SendMaxPktLenErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SendMinPktLenErr_LSB 0x1D +#define QIB_7220_ErrStatus_SendMinPktLenErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SDmaDisabledErr_LSB 0x1C +#define QIB_7220_ErrStatus_SDmaDisabledErr_RMASK 0x1 +#define QIB_7220_ErrStatus_SendSpecialTriggerErr_LSB 0x1B +#define QIB_7220_ErrStatus_SendSpecialTriggerErr_RMASK 0x1 +#define QIB_7220_ErrStatus_Reserved1_LSB 0x12 +#define QIB_7220_ErrStatus_Reserved1_RMASK 0x1FF +#define QIB_7220_ErrStatus_RcvIBLostLinkErr_LSB 0x11 +#define QIB_7220_ErrStatus_RcvIBLostLinkErr_RMASK 0x1 +#define QIB_7220_ErrStatus_RcvHdrErr_LSB 0x10 +#define QIB_7220_ErrStatus_RcvHdrErr_RMASK 0x1 +#define QIB_7220_ErrStatus_RcvHdrLenErr_LSB 0xF +#define QIB_7220_ErrStatus_RcvHdrLenErr_RMASK 0x1 +#define QIB_7220_ErrStatus_RcvBadTidErr_LSB 0xE +#define QIB_7220_ErrStatus_RcvBadTidErr_RMASK 0x1 +#define QIB_7220_ErrStatus_RcvHdrFullErr_LSB 0xD +#define QIB_7220_ErrStatus_RcvHdrFullErr_RMASK 0x1 +#define QIB_7220_ErrStatus_RcvEgrFullErr_LSB 0xC +#define QIB_7220_ErrStatus_RcvEgrFullErr_RMASK 0x1 +#define QIB_7220_ErrStatus_RcvBadVersionErr_LSB 0xB +#define QIB_7220_ErrStatus_RcvBadVersionErr_RMASK 0x1 +#define QIB_7220_ErrStatus_RcvIBFlowErr_LSB 0xA +#define QIB_7220_ErrStatus_RcvIBFlowErr_RMASK 0x1 +#define QIB_7220_ErrStatus_RcvEBPErr_LSB 0x9 +#define QIB_7220_ErrStatus_RcvEBPErr_RMASK 0x1 +#define QIB_7220_ErrStatus_RcvUnsupportedVLErr_LSB 0x8 +#define QIB_7220_ErrStatus_RcvUnsupportedVLErr_RMASK 0x1 +#define QIB_7220_ErrStatus_RcvUnexpectedCharErr_LSB 0x7 +#define QIB_7220_ErrStatus_RcvUnexpectedCharErr_RMASK 0x1 +#define QIB_7220_ErrStatus_RcvShortPktLenErr_LSB 0x6 +#define QIB_7220_ErrStatus_RcvShortPktLenErr_RMASK 0x1 +#define QIB_7220_ErrStatus_RcvLongPktLenErr_LSB 0x5 +#define QIB_7220_ErrStatus_RcvLongPktLenErr_RMASK 0x1 +#define QIB_7220_ErrStatus_RcvMaxPktLenErr_LSB 0x4 +#define QIB_7220_ErrStatus_RcvMaxPktLenErr_RMASK 0x1 +#define QIB_7220_ErrStatus_RcvMinPktLenErr_LSB 0x3 +#define QIB_7220_ErrStatus_RcvMinPktLenErr_RMASK 0x1 +#define QIB_7220_ErrStatus_RcvICRCErr_LSB 0x2 +#define QIB_7220_ErrStatus_RcvICRCErr_RMASK 0x1 +#define QIB_7220_ErrStatus_RcvVCRCErr_LSB 0x1 +#define QIB_7220_ErrStatus_RcvVCRCErr_RMASK 0x1 +#define QIB_7220_ErrStatus_RcvFormatErr_LSB 0x0 +#define QIB_7220_ErrStatus_RcvFormatErr_RMASK 0x1 + +#define QIB_7220_ErrClear_OFFS 0x90 +#define QIB_7220_ErrClear_Reserved_LSB 0x36 +#define QIB_7220_ErrClear_Reserved_RMASK 0x3FF +#define QIB_7220_ErrClear_InvalidEEPCmdErrClear_LSB 0x35 +#define QIB_7220_ErrClear_InvalidEEPCmdErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SDmaDescAddrMisalignErrClear_LSB 0x34 +#define QIB_7220_ErrClear_SDmaDescAddrMisalignErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_HardwareErrClear_LSB 0x33 +#define QIB_7220_ErrClear_HardwareErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_ResetNegatedClear_LSB 0x32 +#define QIB_7220_ErrClear_ResetNegatedClear_RMASK 0x1 +#define QIB_7220_ErrClear_InvalidAddrErrClear_LSB 0x31 +#define QIB_7220_ErrClear_InvalidAddrErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_IBStatusChangedClear_LSB 0x30 +#define QIB_7220_ErrClear_IBStatusChangedClear_RMASK 0x1 +#define QIB_7220_ErrClear_SDmaUnexpDataErrClear_LSB 0x2F +#define QIB_7220_ErrClear_SDmaUnexpDataErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SDmaMissingDwErrClear_LSB 0x2E +#define QIB_7220_ErrClear_SDmaMissingDwErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SDmaDwEnErrClear_LSB 0x2D +#define QIB_7220_ErrClear_SDmaDwEnErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SDmaRpyTagErrClear_LSB 0x2C +#define QIB_7220_ErrClear_SDmaRpyTagErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SDma1stDescErrClear_LSB 0x2B +#define QIB_7220_ErrClear_SDma1stDescErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SDmaBaseErrClear_LSB 0x2A +#define QIB_7220_ErrClear_SDmaBaseErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SDmaTailOutOfBoundErrClear_LSB 0x29 +#define QIB_7220_ErrClear_SDmaTailOutOfBoundErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SDmaOutOfBoundErrClear_LSB 0x28 +#define QIB_7220_ErrClear_SDmaOutOfBoundErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SDmaGenMismatchErrClear_LSB 0x27 +#define QIB_7220_ErrClear_SDmaGenMismatchErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SendBufMisuseErrClear_LSB 0x26 +#define QIB_7220_ErrClear_SendBufMisuseErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SendUnsupportedVLErrClear_LSB 0x25 +#define QIB_7220_ErrClear_SendUnsupportedVLErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SendUnexpectedPktNumErrClear_LSB 0x24 +#define QIB_7220_ErrClear_SendUnexpectedPktNumErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SendPioArmLaunchErrClear_LSB 0x23 +#define QIB_7220_ErrClear_SendPioArmLaunchErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SendDroppedDataPktErrClear_LSB 0x22 +#define QIB_7220_ErrClear_SendDroppedDataPktErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SendDroppedSmpPktErrClear_LSB 0x21 +#define QIB_7220_ErrClear_SendDroppedSmpPktErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SendPktLenErrClear_LSB 0x20 +#define QIB_7220_ErrClear_SendPktLenErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SendUnderRunErrClear_LSB 0x1F +#define QIB_7220_ErrClear_SendUnderRunErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SendMaxPktLenErrClear_LSB 0x1E +#define QIB_7220_ErrClear_SendMaxPktLenErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SendMinPktLenErrClear_LSB 0x1D +#define QIB_7220_ErrClear_SendMinPktLenErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SDmaDisabledErrClear_LSB 0x1C +#define QIB_7220_ErrClear_SDmaDisabledErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_SendSpecialTriggerErrClear_LSB 0x1B +#define QIB_7220_ErrClear_SendSpecialTriggerErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_Reserved1_LSB 0x12 +#define QIB_7220_ErrClear_Reserved1_RMASK 0x1FF +#define QIB_7220_ErrClear_RcvIBLostLinkErrClear_LSB 0x11 +#define QIB_7220_ErrClear_RcvIBLostLinkErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_RcvHdrErrClear_LSB 0x10 +#define QIB_7220_ErrClear_RcvHdrErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_RcvHdrLenErrClear_LSB 0xF +#define QIB_7220_ErrClear_RcvHdrLenErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_RcvBadTidErrClear_LSB 0xE +#define QIB_7220_ErrClear_RcvBadTidErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_RcvHdrFullErrClear_LSB 0xD +#define QIB_7220_ErrClear_RcvHdrFullErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_RcvEgrFullErrClear_LSB 0xC +#define QIB_7220_ErrClear_RcvEgrFullErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_RcvBadVersionErrClear_LSB 0xB +#define QIB_7220_ErrClear_RcvBadVersionErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_RcvIBFlowErrClear_LSB 0xA +#define QIB_7220_ErrClear_RcvIBFlowErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_RcvEBPErrClear_LSB 0x9 +#define QIB_7220_ErrClear_RcvEBPErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_RcvUnsupportedVLErrClear_LSB 0x8 +#define QIB_7220_ErrClear_RcvUnsupportedVLErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_RcvUnexpectedCharErrClear_LSB 0x7 +#define QIB_7220_ErrClear_RcvUnexpectedCharErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_RcvShortPktLenErrClear_LSB 0x6 +#define QIB_7220_ErrClear_RcvShortPktLenErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_RcvLongPktLenErrClear_LSB 0x5 +#define QIB_7220_ErrClear_RcvLongPktLenErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_RcvMaxPktLenErrClear_LSB 0x4 +#define QIB_7220_ErrClear_RcvMaxPktLenErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_RcvMinPktLenErrClear_LSB 0x3 +#define QIB_7220_ErrClear_RcvMinPktLenErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_RcvICRCErrClear_LSB 0x2 +#define QIB_7220_ErrClear_RcvICRCErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_RcvVCRCErrClear_LSB 0x1 +#define QIB_7220_ErrClear_RcvVCRCErrClear_RMASK 0x1 +#define QIB_7220_ErrClear_RcvFormatErrClear_LSB 0x0 +#define QIB_7220_ErrClear_RcvFormatErrClear_RMASK 0x1 + +#define QIB_7220_HwErrMask_OFFS 0x98 +#define QIB_7220_HwErrMask_IBCBusFromSPCParityErrMask_LSB 0x3F +#define QIB_7220_HwErrMask_IBCBusFromSPCParityErrMask_RMASK 0x1 +#define QIB_7220_HwErrMask_IBCBusToSPCParityErrMask_LSB 0x3E +#define QIB_7220_HwErrMask_IBCBusToSPCParityErrMask_RMASK 0x1 +#define QIB_7220_HwErrMask_Clk_uC_PLLNotLockedMask_LSB 0x3D +#define QIB_7220_HwErrMask_Clk_uC_PLLNotLockedMask_RMASK 0x1 +#define QIB_7220_HwErrMask_IBSerdesPClkNotDetectMask_LSB 0x3C +#define QIB_7220_HwErrMask_IBSerdesPClkNotDetectMask_RMASK 0x1 +#define QIB_7220_HwErrMask_PCIESerdesQ3PClkNotDetectMask_LSB 0x3B +#define QIB_7220_HwErrMask_PCIESerdesQ3PClkNotDetectMask_RMASK 0x1 +#define QIB_7220_HwErrMask_PCIESerdesQ2PClkNotDetectMask_LSB 0x3A +#define QIB_7220_HwErrMask_PCIESerdesQ2PClkNotDetectMask_RMASK 0x1 +#define QIB_7220_HwErrMask_PCIESerdesQ1PClkNotDetectMask_LSB 0x39 +#define QIB_7220_HwErrMask_PCIESerdesQ1PClkNotDetectMask_RMASK 0x1 +#define QIB_7220_HwErrMask_PCIESerdesQ0PClkNotDetectMask_LSB 0x38 +#define QIB_7220_HwErrMask_PCIESerdesQ0PClkNotDetectMask_RMASK 0x1 +#define QIB_7220_HwErrMask_Reserved_LSB 0x37 +#define QIB_7220_HwErrMask_Reserved_RMASK 0x1 +#define QIB_7220_HwErrMask_PowerOnBISTFailedMask_LSB 0x36 +#define QIB_7220_HwErrMask_PowerOnBISTFailedMask_RMASK 0x1 +#define QIB_7220_HwErrMask_Reserved1_LSB 0x33 +#define QIB_7220_HwErrMask_Reserved1_RMASK 0x7 +#define QIB_7220_HwErrMask_RXEMemParityErrMask_LSB 0x2C +#define QIB_7220_HwErrMask_RXEMemParityErrMask_RMASK 0x7F +#define QIB_7220_HwErrMask_TXEMemParityErrMask_LSB 0x28 +#define QIB_7220_HwErrMask_TXEMemParityErrMask_RMASK 0xF +#define QIB_7220_HwErrMask_DDSRXEQMemoryParityErrMask_LSB 0x27 +#define QIB_7220_HwErrMask_DDSRXEQMemoryParityErrMask_RMASK 0x1 +#define QIB_7220_HwErrMask_IB_uC_MemoryParityErrMask_LSB 0x26 +#define QIB_7220_HwErrMask_IB_uC_MemoryParityErrMask_RMASK 0x1 +#define QIB_7220_HwErrMask_PCIEOct1_uC_MemoryParityErrMask_LSB 0x25 +#define QIB_7220_HwErrMask_PCIEOct1_uC_MemoryParityErrMask_RMASK 0x1 +#define QIB_7220_HwErrMask_PCIEOct0_uC_MemoryParityErrMask_LSB 0x24 +#define QIB_7220_HwErrMask_PCIEOct0_uC_MemoryParityErrMask_RMASK 0x1 +#define QIB_7220_HwErrMask_Reserved2_LSB 0x22 +#define QIB_7220_HwErrMask_Reserved2_RMASK 0x3 +#define QIB_7220_HwErrMask_PCIeBusParityErrMask_LSB 0x1F +#define QIB_7220_HwErrMask_PCIeBusParityErrMask_RMASK 0x7 +#define QIB_7220_HwErrMask_PcieCplTimeoutMask_LSB 0x1E +#define QIB_7220_HwErrMask_PcieCplTimeoutMask_RMASK 0x1 +#define QIB_7220_HwErrMask_PoisonedTLPMask_LSB 0x1D +#define QIB_7220_HwErrMask_PoisonedTLPMask_RMASK 0x1 +#define QIB_7220_HwErrMask_SDmaMemReadErrMask_LSB 0x1C +#define QIB_7220_HwErrMask_SDmaMemReadErrMask_RMASK 0x1 +#define QIB_7220_HwErrMask_Reserved3_LSB 0x8 +#define QIB_7220_HwErrMask_Reserved3_RMASK 0xFFFFF +#define QIB_7220_HwErrMask_PCIeMemParityErrMask_LSB 0x0 +#define QIB_7220_HwErrMask_PCIeMemParityErrMask_RMASK 0xFF + +#define QIB_7220_HwErrStatus_OFFS 0xA0 +#define QIB_7220_HwErrStatus_IBCBusFromSPCParityErr_LSB 0x3F +#define QIB_7220_HwErrStatus_IBCBusFromSPCParityErr_RMASK 0x1 +#define QIB_7220_HwErrStatus_IBCBusToSPCParityErr_LSB 0x3E +#define QIB_7220_HwErrStatus_IBCBusToSPCParityErr_RMASK 0x1 +#define QIB_7220_HwErrStatus_Clk_uC_PLLNotLocked_LSB 0x3D +#define QIB_7220_HwErrStatus_Clk_uC_PLLNotLocked_RMASK 0x1 +#define QIB_7220_HwErrStatus_IBSerdesPClkNotDetect_LSB 0x3C +#define QIB_7220_HwErrStatus_IBSerdesPClkNotDetect_RMASK 0x1 +#define QIB_7220_HwErrStatus_PCIESerdesQ3PClkNotDetect_LSB 0x3B +#define QIB_7220_HwErrStatus_PCIESerdesQ3PClkNotDetect_RMASK 0x1 +#define QIB_7220_HwErrStatus_PCIESerdesQ2PClkNotDetect_LSB 0x3A +#define QIB_7220_HwErrStatus_PCIESerdesQ2PClkNotDetect_RMASK 0x1 +#define QIB_7220_HwErrStatus_PCIESerdesQ1PClkNotDetect_LSB 0x39 +#define QIB_7220_HwErrStatus_PCIESerdesQ1PClkNotDetect_RMASK 0x1 +#define QIB_7220_HwErrStatus_PCIESerdesQ0PClkNotDetect_LSB 0x38 +#define QIB_7220_HwErrStatus_PCIESerdesQ0PClkNotDetect_RMASK 0x1 +#define QIB_7220_HwErrStatus_Reserved_LSB 0x37 +#define QIB_7220_HwErrStatus_Reserved_RMASK 0x1 +#define QIB_7220_HwErrStatus_PowerOnBISTFailed_LSB 0x36 +#define QIB_7220_HwErrStatus_PowerOnBISTFailed_RMASK 0x1 +#define QIB_7220_HwErrStatus_Reserved1_LSB 0x33 +#define QIB_7220_HwErrStatus_Reserved1_RMASK 0x7 +#define QIB_7220_HwErrStatus_RXEMemParity_LSB 0x2C +#define QIB_7220_HwErrStatus_RXEMemParity_RMASK 0x7F +#define QIB_7220_HwErrStatus_TXEMemParity_LSB 0x28 +#define QIB_7220_HwErrStatus_TXEMemParity_RMASK 0xF +#define QIB_7220_HwErrStatus_DDSRXEQMemoryParityErr_LSB 0x27 +#define QIB_7220_HwErrStatus_DDSRXEQMemoryParityErr_RMASK 0x1 +#define QIB_7220_HwErrStatus_IB_uC_MemoryParityErr_LSB 0x26 +#define QIB_7220_HwErrStatus_IB_uC_MemoryParityErr_RMASK 0x1 +#define QIB_7220_HwErrStatus_PCIE_uC_Oct1MemoryParityErr_LSB 0x25 +#define QIB_7220_HwErrStatus_PCIE_uC_Oct1MemoryParityErr_RMASK 0x1 +#define QIB_7220_HwErrStatus_PCIE_uC_Oct0MemoryParityErr_LSB 0x24 +#define QIB_7220_HwErrStatus_PCIE_uC_Oct0MemoryParityErr_RMASK 0x1 +#define QIB_7220_HwErrStatus_Reserved2_LSB 0x22 +#define QIB_7220_HwErrStatus_Reserved2_RMASK 0x3 +#define QIB_7220_HwErrStatus_PCIeBusParity_LSB 0x1F +#define QIB_7220_HwErrStatus_PCIeBusParity_RMASK 0x7 +#define QIB_7220_HwErrStatus_PcieCplTimeout_LSB 0x1E +#define QIB_7220_HwErrStatus_PcieCplTimeout_RMASK 0x1 +#define QIB_7220_HwErrStatus_PoisenedTLP_LSB 0x1D +#define QIB_7220_HwErrStatus_PoisenedTLP_RMASK 0x1 +#define QIB_7220_HwErrStatus_SDmaMemReadErr_LSB 0x1C +#define QIB_7220_HwErrStatus_SDmaMemReadErr_RMASK 0x1 +#define QIB_7220_HwErrStatus_Reserved3_LSB 0x8 +#define QIB_7220_HwErrStatus_Reserved3_RMASK 0xFFFFF +#define QIB_7220_HwErrStatus_PCIeMemParity_LSB 0x0 +#define QIB_7220_HwErrStatus_PCIeMemParity_RMASK 0xFF + +#define QIB_7220_HwErrClear_OFFS 0xA8 +#define QIB_7220_HwErrClear_IBCBusFromSPCParityErrClear_LSB 0x3F +#define QIB_7220_HwErrClear_IBCBusFromSPCParityErrClear_RMASK 0x1 +#define QIB_7220_HwErrClear_IBCBusToSPCparityErrClear_LSB 0x3E +#define QIB_7220_HwErrClear_IBCBusToSPCparityErrClear_RMASK 0x1 +#define QIB_7220_HwErrClear_Clk_uC_PLLNotLockedClear_LSB 0x3D +#define QIB_7220_HwErrClear_Clk_uC_PLLNotLockedClear_RMASK 0x1 +#define QIB_7220_HwErrClear_IBSerdesPClkNotDetectClear_LSB 0x3C +#define QIB_7220_HwErrClear_IBSerdesPClkNotDetectClear_RMASK 0x1 +#define QIB_7220_HwErrClear_PCIESerdesQ3PClkNotDetectClear_LSB 0x3B +#define QIB_7220_HwErrClear_PCIESerdesQ3PClkNotDetectClear_RMASK 0x1 +#define QIB_7220_HwErrClear_PCIESerdesQ2PClkNotDetectClear_LSB 0x3A +#define QIB_7220_HwErrClear_PCIESerdesQ2PClkNotDetectClear_RMASK 0x1 +#define QIB_7220_HwErrClear_PCIESerdesQ1PClkNotDetectClear_LSB 0x39 +#define QIB_7220_HwErrClear_PCIESerdesQ1PClkNotDetectClear_RMASK 0x1 +#define QIB_7220_HwErrClear_PCIESerdesQ0PClkNotDetectClear_LSB 0x38 +#define QIB_7220_HwErrClear_PCIESerdesQ0PClkNotDetectClear_RMASK 0x1 +#define QIB_7220_HwErrClear_Reserved_LSB 0x37 +#define QIB_7220_HwErrClear_Reserved_RMASK 0x1 +#define QIB_7220_HwErrClear_PowerOnBISTFailedClear_LSB 0x36 +#define QIB_7220_HwErrClear_PowerOnBISTFailedClear_RMASK 0x1 +#define QIB_7220_HwErrClear_Reserved1_LSB 0x33 +#define QIB_7220_HwErrClear_Reserved1_RMASK 0x7 +#define QIB_7220_HwErrClear_RXEMemParityClear_LSB 0x2C +#define QIB_7220_HwErrClear_RXEMemParityClear_RMASK 0x7F +#define QIB_7220_HwErrClear_TXEMemParityClear_LSB 0x28 +#define QIB_7220_HwErrClear_TXEMemParityClear_RMASK 0xF +#define QIB_7220_HwErrClear_DDSRXEQMemoryParityErrClear_LSB 0x27 +#define QIB_7220_HwErrClear_DDSRXEQMemoryParityErrClear_RMASK 0x1 +#define QIB_7220_HwErrClear_IB_uC_MemoryParityErrClear_LSB 0x26 +#define QIB_7220_HwErrClear_IB_uC_MemoryParityErrClear_RMASK 0x1 +#define QIB_7220_HwErrClear_PCIE_uC_Oct1MemoryParityErrClear_LSB 0x25 +#define QIB_7220_HwErrClear_PCIE_uC_Oct1MemoryParityErrClear_RMASK 0x1 +#define QIB_7220_HwErrClear_PCIE_uC_Oct0MemoryParityErrClear_LSB 0x24 +#define QIB_7220_HwErrClear_PCIE_uC_Oct0MemoryParityErrClear_RMASK 0x1 +#define QIB_7220_HwErrClear_Reserved2_LSB 0x22 +#define QIB_7220_HwErrClear_Reserved2_RMASK 0x3 +#define QIB_7220_HwErrClear_PCIeBusParityClr_LSB 0x1F +#define QIB_7220_HwErrClear_PCIeBusParityClr_RMASK 0x7 +#define QIB_7220_HwErrClear_PcieCplTimeoutClear_LSB 0x1E +#define QIB_7220_HwErrClear_PcieCplTimeoutClear_RMASK 0x1 +#define QIB_7220_HwErrClear_PoisonedTLPClear_LSB 0x1D +#define QIB_7220_HwErrClear_PoisonedTLPClear_RMASK 0x1 +#define QIB_7220_HwErrClear_SDmaMemReadErrClear_LSB 0x1C +#define QIB_7220_HwErrClear_SDmaMemReadErrClear_RMASK 0x1 +#define QIB_7220_HwErrClear_Reserved3_LSB 0x8 +#define QIB_7220_HwErrClear_Reserved3_RMASK 0xFFFFF +#define QIB_7220_HwErrClear_PCIeMemParityClr_LSB 0x0 +#define QIB_7220_HwErrClear_PCIeMemParityClr_RMASK 0xFF + +#define QIB_7220_HwDiagCtrl_OFFS 0xB0 +#define QIB_7220_HwDiagCtrl_ForceIBCBusFromSPCParityErr_LSB 0x3F +#define QIB_7220_HwDiagCtrl_ForceIBCBusFromSPCParityErr_RMASK 0x1 +#define QIB_7220_HwDiagCtrl_ForceIBCBusToSPCParityErr_LSB 0x3E +#define QIB_7220_HwDiagCtrl_ForceIBCBusToSPCParityErr_RMASK 0x1 +#define QIB_7220_HwDiagCtrl_CounterWrEnable_LSB 0x3D +#define QIB_7220_HwDiagCtrl_CounterWrEnable_RMASK 0x1 +#define QIB_7220_HwDiagCtrl_CounterDisable_LSB 0x3C +#define QIB_7220_HwDiagCtrl_CounterDisable_RMASK 0x1 +#define QIB_7220_HwDiagCtrl_Reserved_LSB 0x33 +#define QIB_7220_HwDiagCtrl_Reserved_RMASK 0x1FF +#define QIB_7220_HwDiagCtrl_ForceRxMemParityErr_LSB 0x2C +#define QIB_7220_HwDiagCtrl_ForceRxMemParityErr_RMASK 0x7F +#define QIB_7220_HwDiagCtrl_ForceTxMemparityErr_LSB 0x28 +#define QIB_7220_HwDiagCtrl_ForceTxMemparityErr_RMASK 0xF +#define QIB_7220_HwDiagCtrl_ForceDDSRXEQMemoryParityErr_LSB 0x27 +#define QIB_7220_HwDiagCtrl_ForceDDSRXEQMemoryParityErr_RMASK 0x1 +#define QIB_7220_HwDiagCtrl_ForceIB_uC_MemoryParityErr_LSB 0x26 +#define QIB_7220_HwDiagCtrl_ForceIB_uC_MemoryParityErr_RMASK 0x1 +#define QIB_7220_HwDiagCtrl_ForcePCIE_uC_Oct1MemoryParityErr_LSB 0x25 +#define QIB_7220_HwDiagCtrl_ForcePCIE_uC_Oct1MemoryParityErr_RMASK 0x1 +#define QIB_7220_HwDiagCtrl_ForcePCIE_uC_Oct0MemoryParityErr_LSB 0x24 +#define QIB_7220_HwDiagCtrl_ForcePCIE_uC_Oct0MemoryParityErr_RMASK 0x1 +#define QIB_7220_HwDiagCtrl_Reserved1_LSB 0x23 +#define QIB_7220_HwDiagCtrl_Reserved1_RMASK 0x1 +#define QIB_7220_HwDiagCtrl_forcePCIeBusParity_LSB 0x1F +#define QIB_7220_HwDiagCtrl_forcePCIeBusParity_RMASK 0xF +#define QIB_7220_HwDiagCtrl_Reserved2_LSB 0x8 +#define QIB_7220_HwDiagCtrl_Reserved2_RMASK 0x7FFFFF +#define QIB_7220_HwDiagCtrl_forcePCIeMemParity_LSB 0x0 +#define QIB_7220_HwDiagCtrl_forcePCIeMemParity_RMASK 0xFF + +#define QIB_7220_REG_0000B8_OFFS 0xB8 + +#define QIB_7220_IBCStatus_OFFS 0xC0 +#define QIB_7220_IBCStatus_TxCreditOk_LSB 0x1F +#define QIB_7220_IBCStatus_TxCreditOk_RMASK 0x1 +#define QIB_7220_IBCStatus_TxReady_LSB 0x1E +#define QIB_7220_IBCStatus_TxReady_RMASK 0x1 +#define QIB_7220_IBCStatus_Reserved_LSB 0xE +#define QIB_7220_IBCStatus_Reserved_RMASK 0xFFFF +#define QIB_7220_IBCStatus_IBTxLaneReversed_LSB 0xD +#define QIB_7220_IBCStatus_IBTxLaneReversed_RMASK 0x1 +#define QIB_7220_IBCStatus_IBRxLaneReversed_LSB 0xC +#define QIB_7220_IBCStatus_IBRxLaneReversed_RMASK 0x1 +#define QIB_7220_IBCStatus_IB_SERDES_TRIM_DONE_LSB 0xB +#define QIB_7220_IBCStatus_IB_SERDES_TRIM_DONE_RMASK 0x1 +#define QIB_7220_IBCStatus_DDS_RXEQ_FAIL_LSB 0xA +#define QIB_7220_IBCStatus_DDS_RXEQ_FAIL_RMASK 0x1 +#define QIB_7220_IBCStatus_LinkWidthActive_LSB 0x9 +#define QIB_7220_IBCStatus_LinkWidthActive_RMASK 0x1 +#define QIB_7220_IBCStatus_LinkSpeedActive_LSB 0x8 +#define QIB_7220_IBCStatus_LinkSpeedActive_RMASK 0x1 +#define QIB_7220_IBCStatus_LinkState_LSB 0x5 +#define QIB_7220_IBCStatus_LinkState_RMASK 0x7 +#define QIB_7220_IBCStatus_LinkTrainingState_LSB 0x0 +#define QIB_7220_IBCStatus_LinkTrainingState_RMASK 0x1F + +#define QIB_7220_IBCCtrl_OFFS 0xC8 +#define QIB_7220_IBCCtrl_Loopback_LSB 0x3F +#define QIB_7220_IBCCtrl_Loopback_RMASK 0x1 +#define QIB_7220_IBCCtrl_LinkDownDefaultState_LSB 0x3E +#define QIB_7220_IBCCtrl_LinkDownDefaultState_RMASK 0x1 +#define QIB_7220_IBCCtrl_Reserved_LSB 0x2B +#define QIB_7220_IBCCtrl_Reserved_RMASK 0x7FFFF +#define QIB_7220_IBCCtrl_CreditScale_LSB 0x28 +#define QIB_7220_IBCCtrl_CreditScale_RMASK 0x7 +#define QIB_7220_IBCCtrl_OverrunThreshold_LSB 0x24 +#define QIB_7220_IBCCtrl_OverrunThreshold_RMASK 0xF +#define QIB_7220_IBCCtrl_PhyerrThreshold_LSB 0x20 +#define QIB_7220_IBCCtrl_PhyerrThreshold_RMASK 0xF +#define QIB_7220_IBCCtrl_MaxPktLen_LSB 0x15 +#define QIB_7220_IBCCtrl_MaxPktLen_RMASK 0x7FF +#define QIB_7220_IBCCtrl_LinkCmd_LSB 0x13 +#define QIB_7220_IBCCtrl_LinkCmd_RMASK 0x3 +#define QIB_7220_IBCCtrl_LinkInitCmd_LSB 0x10 +#define QIB_7220_IBCCtrl_LinkInitCmd_RMASK 0x7 +#define QIB_7220_IBCCtrl_FlowCtrlWaterMark_LSB 0x8 +#define QIB_7220_IBCCtrl_FlowCtrlWaterMark_RMASK 0xFF +#define QIB_7220_IBCCtrl_FlowCtrlPeriod_LSB 0x0 +#define QIB_7220_IBCCtrl_FlowCtrlPeriod_RMASK 0xFF + +#define QIB_7220_EXTStatus_OFFS 0xD0 +#define QIB_7220_EXTStatus_GPIOIn_LSB 0x30 +#define QIB_7220_EXTStatus_GPIOIn_RMASK 0xFFFF +#define QIB_7220_EXTStatus_Reserved_LSB 0x20 +#define QIB_7220_EXTStatus_Reserved_RMASK 0xFFFF +#define QIB_7220_EXTStatus_Reserved1_LSB 0x10 +#define QIB_7220_EXTStatus_Reserved1_RMASK 0xFFFF +#define QIB_7220_EXTStatus_MemBISTDisabled_LSB 0xF +#define QIB_7220_EXTStatus_MemBISTDisabled_RMASK 0x1 +#define QIB_7220_EXTStatus_MemBISTEndTest_LSB 0xE +#define QIB_7220_EXTStatus_MemBISTEndTest_RMASK 0x1 +#define QIB_7220_EXTStatus_Reserved2_LSB 0x0 +#define QIB_7220_EXTStatus_Reserved2_RMASK 0x3FFF + +#define QIB_7220_EXTCtrl_OFFS 0xD8 +#define QIB_7220_EXTCtrl_GPIOOe_LSB 0x30 +#define QIB_7220_EXTCtrl_GPIOOe_RMASK 0xFFFF +#define QIB_7220_EXTCtrl_GPIOInvert_LSB 0x20 +#define QIB_7220_EXTCtrl_GPIOInvert_RMASK 0xFFFF +#define QIB_7220_EXTCtrl_Reserved_LSB 0x4 +#define QIB_7220_EXTCtrl_Reserved_RMASK 0xFFFFFFF +#define QIB_7220_EXTCtrl_LEDPriPortGreenOn_LSB 0x3 +#define QIB_7220_EXTCtrl_LEDPriPortGreenOn_RMASK 0x1 +#define QIB_7220_EXTCtrl_LEDPriPortYellowOn_LSB 0x2 +#define QIB_7220_EXTCtrl_LEDPriPortYellowOn_RMASK 0x1 +#define QIB_7220_EXTCtrl_LEDGblOkGreenOn_LSB 0x1 +#define QIB_7220_EXTCtrl_LEDGblOkGreenOn_RMASK 0x1 +#define QIB_7220_EXTCtrl_LEDGblErrRedOff_LSB 0x0 +#define QIB_7220_EXTCtrl_LEDGblErrRedOff_RMASK 0x1 + +#define QIB_7220_GPIOOut_OFFS 0xE0 + +#define QIB_7220_GPIOMask_OFFS 0xE8 + +#define QIB_7220_GPIOStatus_OFFS 0xF0 + +#define QIB_7220_GPIOClear_OFFS 0xF8 + +#define QIB_7220_RcvCtrl_OFFS 0x100 +#define QIB_7220_RcvCtrl_Reserved_LSB 0x27 +#define QIB_7220_RcvCtrl_Reserved_RMASK 0x1FFFFFF +#define QIB_7220_RcvCtrl_RcvQPMapEnable_LSB 0x26 +#define QIB_7220_RcvCtrl_RcvQPMapEnable_RMASK 0x1 +#define QIB_7220_RcvCtrl_PortCfg_LSB 0x24 +#define QIB_7220_RcvCtrl_PortCfg_RMASK 0x3 +#define QIB_7220_RcvCtrl_TailUpd_LSB 0x23 +#define QIB_7220_RcvCtrl_TailUpd_RMASK 0x1 +#define QIB_7220_RcvCtrl_RcvPartitionKeyDisable_LSB 0x22 +#define QIB_7220_RcvCtrl_RcvPartitionKeyDisable_RMASK 0x1 +#define QIB_7220_RcvCtrl_IntrAvail_LSB 0x11 +#define QIB_7220_RcvCtrl_IntrAvail_RMASK 0x1FFFF +#define QIB_7220_RcvCtrl_PortEnable_LSB 0x0 +#define QIB_7220_RcvCtrl_PortEnable_RMASK 0x1FFFF + +#define QIB_7220_RcvBTHQP_OFFS 0x108 +#define QIB_7220_RcvBTHQP_Reserved_LSB 0x18 +#define QIB_7220_RcvBTHQP_Reserved_RMASK 0xFF +#define QIB_7220_RcvBTHQP_RcvBTHQP_LSB 0x0 +#define QIB_7220_RcvBTHQP_RcvBTHQP_RMASK 0xFFFFFF + +#define QIB_7220_RcvHdrSize_OFFS 0x110 + +#define QIB_7220_RcvHdrCnt_OFFS 0x118 + +#define QIB_7220_RcvHdrEntSize_OFFS 0x120 + +#define QIB_7220_RcvTIDBase_OFFS 0x128 + +#define QIB_7220_RcvTIDCnt_OFFS 0x130 + +#define QIB_7220_RcvEgrBase_OFFS 0x138 + +#define QIB_7220_RcvEgrCnt_OFFS 0x140 + +#define QIB_7220_RcvBufBase_OFFS 0x148 + +#define QIB_7220_RcvBufSize_OFFS 0x150 + +#define QIB_7220_RxIntMemBase_OFFS 0x158 + +#define QIB_7220_RxIntMemSize_OFFS 0x160 + +#define QIB_7220_RcvPartitionKey_OFFS 0x168 + +#define QIB_7220_RcvQPMulticastPort_OFFS 0x170 +#define QIB_7220_RcvQPMulticastPort_Reserved_LSB 0x5 +#define QIB_7220_RcvQPMulticastPort_Reserved_RMASK 0x7FFFFFFFFFFFFFF +#define QIB_7220_RcvQPMulticastPort_RcvQpMcPort_LSB 0x0 +#define QIB_7220_RcvQPMulticastPort_RcvQpMcPort_RMASK 0x1F + +#define QIB_7220_RcvPktLEDCnt_OFFS 0x178 +#define QIB_7220_RcvPktLEDCnt_ONperiod_LSB 0x20 +#define QIB_7220_RcvPktLEDCnt_ONperiod_RMASK 0xFFFFFFFF +#define QIB_7220_RcvPktLEDCnt_OFFperiod_LSB 0x0 +#define QIB_7220_RcvPktLEDCnt_OFFperiod_RMASK 0xFFFFFFFF + +#define QIB_7220_IBCDDRCtrl_OFFS 0x180 +#define QIB_7220_IBCDDRCtrl_IB_DLID_MASK_LSB 0x30 +#define QIB_7220_IBCDDRCtrl_IB_DLID_MASK_RMASK 0xFFFF +#define QIB_7220_IBCDDRCtrl_IB_DLID_LSB 0x20 +#define QIB_7220_IBCDDRCtrl_IB_DLID_RMASK 0xFFFF +#define QIB_7220_IBCDDRCtrl_Reserved_LSB 0x1B +#define QIB_7220_IBCDDRCtrl_Reserved_RMASK 0x1F +#define QIB_7220_IBCDDRCtrl_HRTBT_REQ_LSB 0x1A +#define QIB_7220_IBCDDRCtrl_HRTBT_REQ_RMASK 0x1 +#define QIB_7220_IBCDDRCtrl_HRTBT_PORT_LSB 0x12 +#define QIB_7220_IBCDDRCtrl_HRTBT_PORT_RMASK 0xFF +#define QIB_7220_IBCDDRCtrl_HRTBT_AUTO_LSB 0x11 +#define QIB_7220_IBCDDRCtrl_HRTBT_AUTO_RMASK 0x1 +#define QIB_7220_IBCDDRCtrl_HRTBT_ENB_LSB 0x10 +#define QIB_7220_IBCDDRCtrl_HRTBT_ENB_RMASK 0x1 +#define QIB_7220_IBCDDRCtrl_SD_DDS_LSB 0xC +#define QIB_7220_IBCDDRCtrl_SD_DDS_RMASK 0xF +#define QIB_7220_IBCDDRCtrl_SD_DDSV_LSB 0xB +#define QIB_7220_IBCDDRCtrl_SD_DDSV_RMASK 0x1 +#define QIB_7220_IBCDDRCtrl_SD_ADD_ENB_LSB 0xA +#define QIB_7220_IBCDDRCtrl_SD_ADD_ENB_RMASK 0x1 +#define QIB_7220_IBCDDRCtrl_SD_RX_EQUAL_ENABLE_LSB 0x9 +#define QIB_7220_IBCDDRCtrl_SD_RX_EQUAL_ENABLE_RMASK 0x1 +#define QIB_7220_IBCDDRCtrl_IB_LANE_REV_SUPPORTED_LSB 0x8 +#define QIB_7220_IBCDDRCtrl_IB_LANE_REV_SUPPORTED_RMASK 0x1 +#define QIB_7220_IBCDDRCtrl_IB_POLARITY_REV_SUPP_LSB 0x7 +#define QIB_7220_IBCDDRCtrl_IB_POLARITY_REV_SUPP_RMASK 0x1 +#define QIB_7220_IBCDDRCtrl_IB_NUM_CHANNELS_LSB 0x5 +#define QIB_7220_IBCDDRCtrl_IB_NUM_CHANNELS_RMASK 0x3 +#define QIB_7220_IBCDDRCtrl_SD_SPEED_QDR_LSB 0x4 +#define QIB_7220_IBCDDRCtrl_SD_SPEED_QDR_RMASK 0x1 +#define QIB_7220_IBCDDRCtrl_SD_SPEED_DDR_LSB 0x3 +#define QIB_7220_IBCDDRCtrl_SD_SPEED_DDR_RMASK 0x1 +#define QIB_7220_IBCDDRCtrl_SD_SPEED_SDR_LSB 0x2 +#define QIB_7220_IBCDDRCtrl_SD_SPEED_SDR_RMASK 0x1 +#define QIB_7220_IBCDDRCtrl_SD_SPEED_LSB 0x1 +#define QIB_7220_IBCDDRCtrl_SD_SPEED_RMASK 0x1 +#define QIB_7220_IBCDDRCtrl_IB_ENHANCED_MODE_LSB 0x0 +#define QIB_7220_IBCDDRCtrl_IB_ENHANCED_MODE_RMASK 0x1 + +#define QIB_7220_HRTBT_GUID_OFFS 0x188 + +#define QIB_7220_IBCDDRCtrl2_OFFS 0x1A0 +#define QIB_7220_IBCDDRCtrl2_IB_BACK_PORCH_LSB 0x5 +#define QIB_7220_IBCDDRCtrl2_IB_BACK_PORCH_RMASK 0x1F +#define QIB_7220_IBCDDRCtrl2_IB_FRONT_PORCH_LSB 0x0 +#define QIB_7220_IBCDDRCtrl2_IB_FRONT_PORCH_RMASK 0x1F + +#define QIB_7220_IBCDDRStatus_OFFS 0x1A8 +#define QIB_7220_IBCDDRStatus_heartbeat_timed_out_LSB 0x24 +#define QIB_7220_IBCDDRStatus_heartbeat_timed_out_RMASK 0x1 +#define QIB_7220_IBCDDRStatus_heartbeat_crosstalk_LSB 0x20 +#define QIB_7220_IBCDDRStatus_heartbeat_crosstalk_RMASK 0xF +#define QIB_7220_IBCDDRStatus_RxEqLocalDevice_LSB 0x1E +#define QIB_7220_IBCDDRStatus_RxEqLocalDevice_RMASK 0x3 +#define QIB_7220_IBCDDRStatus_ReqDDSLocalFromRmt_LSB 0x1A +#define QIB_7220_IBCDDRStatus_ReqDDSLocalFromRmt_RMASK 0xF +#define QIB_7220_IBCDDRStatus_LinkRoundTripLatency_LSB 0x0 +#define QIB_7220_IBCDDRStatus_LinkRoundTripLatency_RMASK 0x3FFFFFF + +#define QIB_7220_JIntReload_OFFS 0x1B0 +#define QIB_7220_JIntReload_J_limit_reload_LSB 0x10 +#define QIB_7220_JIntReload_J_limit_reload_RMASK 0xFFFF +#define QIB_7220_JIntReload_J_reload_LSB 0x0 +#define QIB_7220_JIntReload_J_reload_RMASK 0xFFFF + +#define QIB_7220_IBNCModeCtrl_OFFS 0x1B8 +#define QIB_7220_IBNCModeCtrl_Reserved_LSB 0x1A +#define QIB_7220_IBNCModeCtrl_Reserved_RMASK 0x3FFFFFFFFF +#define QIB_7220_IBNCModeCtrl_TSMCode_TS2_LSB 0x11 +#define QIB_7220_IBNCModeCtrl_TSMCode_TS2_RMASK 0x1FF +#define QIB_7220_IBNCModeCtrl_TSMCode_TS1_LSB 0x8 +#define QIB_7220_IBNCModeCtrl_TSMCode_TS1_RMASK 0x1FF +#define QIB_7220_IBNCModeCtrl_Reserved1_LSB 0x3 +#define QIB_7220_IBNCModeCtrl_Reserved1_RMASK 0x1F +#define QIB_7220_IBNCModeCtrl_TSMEnable_ignore_TSM_on_rx_LSB 0x2 +#define QIB_7220_IBNCModeCtrl_TSMEnable_ignore_TSM_on_rx_RMASK 0x1 +#define QIB_7220_IBNCModeCtrl_TSMEnable_send_TS2_LSB 0x1 +#define QIB_7220_IBNCModeCtrl_TSMEnable_send_TS2_RMASK 0x1 +#define QIB_7220_IBNCModeCtrl_TSMEnable_send_TS1_LSB 0x0 +#define QIB_7220_IBNCModeCtrl_TSMEnable_send_TS1_RMASK 0x1 + +#define QIB_7220_SendCtrl_OFFS 0x1C0 +#define QIB_7220_SendCtrl_Disarm_LSB 0x1F +#define QIB_7220_SendCtrl_Disarm_RMASK 0x1 +#define QIB_7220_SendCtrl_Reserved_LSB 0x1D +#define QIB_7220_SendCtrl_Reserved_RMASK 0x3 +#define QIB_7220_SendCtrl_AvailUpdThld_LSB 0x18 +#define QIB_7220_SendCtrl_AvailUpdThld_RMASK 0x1F +#define QIB_7220_SendCtrl_DisarmPIOBuf_LSB 0x10 +#define QIB_7220_SendCtrl_DisarmPIOBuf_RMASK 0xFF +#define QIB_7220_SendCtrl_Reserved1_LSB 0xD +#define QIB_7220_SendCtrl_Reserved1_RMASK 0x7 +#define QIB_7220_SendCtrl_SDmaHalt_LSB 0xC +#define QIB_7220_SendCtrl_SDmaHalt_RMASK 0x1 +#define QIB_7220_SendCtrl_SDmaEnable_LSB 0xB +#define QIB_7220_SendCtrl_SDmaEnable_RMASK 0x1 +#define QIB_7220_SendCtrl_SDmaSingleDescriptor_LSB 0xA +#define QIB_7220_SendCtrl_SDmaSingleDescriptor_RMASK 0x1 +#define QIB_7220_SendCtrl_SDmaIntEnable_LSB 0x9 +#define QIB_7220_SendCtrl_SDmaIntEnable_RMASK 0x1 +#define QIB_7220_SendCtrl_Reserved2_LSB 0x5 +#define QIB_7220_SendCtrl_Reserved2_RMASK 0xF +#define QIB_7220_SendCtrl_SSpecialTriggerEn_LSB 0x4 +#define QIB_7220_SendCtrl_SSpecialTriggerEn_RMASK 0x1 +#define QIB_7220_SendCtrl_SPioEnable_LSB 0x3 +#define QIB_7220_SendCtrl_SPioEnable_RMASK 0x1 +#define QIB_7220_SendCtrl_SendBufAvailUpd_LSB 0x2 +#define QIB_7220_SendCtrl_SendBufAvailUpd_RMASK 0x1 +#define QIB_7220_SendCtrl_SendIntBufAvail_LSB 0x1 +#define QIB_7220_SendCtrl_SendIntBufAvail_RMASK 0x1 +#define QIB_7220_SendCtrl_Abort_LSB 0x0 +#define QIB_7220_SendCtrl_Abort_RMASK 0x1 + +#define QIB_7220_SendBufBase_OFFS 0x1C8 +#define QIB_7220_SendBufBase_Reserved_LSB 0x35 +#define QIB_7220_SendBufBase_Reserved_RMASK 0x7FF +#define QIB_7220_SendBufBase_BaseAddr_LargePIO_LSB 0x20 +#define QIB_7220_SendBufBase_BaseAddr_LargePIO_RMASK 0x1FFFFF +#define QIB_7220_SendBufBase_Reserved1_LSB 0x15 +#define QIB_7220_SendBufBase_Reserved1_RMASK 0x7FF +#define QIB_7220_SendBufBase_BaseAddr_SmallPIO_LSB 0x0 +#define QIB_7220_SendBufBase_BaseAddr_SmallPIO_RMASK 0x1FFFFF + +#define QIB_7220_SendBufSize_OFFS 0x1D0 +#define QIB_7220_SendBufSize_Reserved_LSB 0x2D +#define QIB_7220_SendBufSize_Reserved_RMASK 0xFFFFF +#define QIB_7220_SendBufSize_Size_LargePIO_LSB 0x20 +#define QIB_7220_SendBufSize_Size_LargePIO_RMASK 0x1FFF +#define QIB_7220_SendBufSize_Reserved1_LSB 0xC +#define QIB_7220_SendBufSize_Reserved1_RMASK 0xFFFFF +#define QIB_7220_SendBufSize_Size_SmallPIO_LSB 0x0 +#define QIB_7220_SendBufSize_Size_SmallPIO_RMASK 0xFFF + +#define QIB_7220_SendBufCnt_OFFS 0x1D8 +#define QIB_7220_SendBufCnt_Reserved_LSB 0x24 +#define QIB_7220_SendBufCnt_Reserved_RMASK 0xFFFFFFF +#define QIB_7220_SendBufCnt_Num_LargeBuffers_LSB 0x20 +#define QIB_7220_SendBufCnt_Num_LargeBuffers_RMASK 0xF +#define QIB_7220_SendBufCnt_Reserved1_LSB 0x9 +#define QIB_7220_SendBufCnt_Reserved1_RMASK 0x7FFFFF +#define QIB_7220_SendBufCnt_Num_SmallBuffers_LSB 0x0 +#define QIB_7220_SendBufCnt_Num_SmallBuffers_RMASK 0x1FF + +#define QIB_7220_SendBufAvailAddr_OFFS 0x1E0 +#define QIB_7220_SendBufAvailAddr_SendBufAvailAddr_LSB 0x6 +#define QIB_7220_SendBufAvailAddr_SendBufAvailAddr_RMASK 0x3FFFFFFFF +#define QIB_7220_SendBufAvailAddr_Reserved_LSB 0x0 +#define QIB_7220_SendBufAvailAddr_Reserved_RMASK 0x3F + +#define QIB_7220_TxIntMemBase_OFFS 0x1E8 + +#define QIB_7220_TxIntMemSize_OFFS 0x1F0 + +#define QIB_7220_SendDmaBase_OFFS 0x1F8 +#define QIB_7220_SendDmaBase_Reserved_LSB 0x30 +#define QIB_7220_SendDmaBase_Reserved_RMASK 0xFFFF +#define QIB_7220_SendDmaBase_SendDmaBase_LSB 0x0 +#define QIB_7220_SendDmaBase_SendDmaBase_RMASK 0xFFFFFFFFFFFF + +#define QIB_7220_SendDmaLenGen_OFFS 0x200 +#define QIB_7220_SendDmaLenGen_Reserved_LSB 0x13 +#define QIB_7220_SendDmaLenGen_Reserved_RMASK 0x1FFFFFFFFFFF +#define QIB_7220_SendDmaLenGen_Generation_LSB 0x10 +#define QIB_7220_SendDmaLenGen_Generation_MSB 0x12 +#define QIB_7220_SendDmaLenGen_Generation_RMASK 0x7 +#define QIB_7220_SendDmaLenGen_Length_LSB 0x0 +#define QIB_7220_SendDmaLenGen_Length_RMASK 0xFFFF + +#define QIB_7220_SendDmaTail_OFFS 0x208 +#define QIB_7220_SendDmaTail_Reserved_LSB 0x10 +#define QIB_7220_SendDmaTail_Reserved_RMASK 0xFFFFFFFFFFFF +#define QIB_7220_SendDmaTail_SendDmaTail_LSB 0x0 +#define QIB_7220_SendDmaTail_SendDmaTail_RMASK 0xFFFF + +#define QIB_7220_SendDmaHead_OFFS 0x210 +#define QIB_7220_SendDmaHead_Reserved_LSB 0x30 +#define QIB_7220_SendDmaHead_Reserved_RMASK 0xFFFF +#define QIB_7220_SendDmaHead_InternalSendDmaHead_LSB 0x20 +#define QIB_7220_SendDmaHead_InternalSendDmaHead_RMASK 0xFFFF +#define QIB_7220_SendDmaHead_Reserved1_LSB 0x10 +#define QIB_7220_SendDmaHead_Reserved1_RMASK 0xFFFF +#define QIB_7220_SendDmaHead_SendDmaHead_LSB 0x0 +#define QIB_7220_SendDmaHead_SendDmaHead_RMASK 0xFFFF + +#define QIB_7220_SendDmaHeadAddr_OFFS 0x218 +#define QIB_7220_SendDmaHeadAddr_Reserved_LSB 0x30 +#define QIB_7220_SendDmaHeadAddr_Reserved_RMASK 0xFFFF +#define QIB_7220_SendDmaHeadAddr_SendDmaHeadAddr_LSB 0x0 +#define QIB_7220_SendDmaHeadAddr_SendDmaHeadAddr_RMASK 0xFFFFFFFFFFFF + +#define QIB_7220_SendDmaBufMask0_OFFS 0x220 +#define QIB_7220_SendDmaBufMask0_BufMask_63_0_LSB 0x0 +#define QIB_7220_SendDmaBufMask0_BufMask_63_0_RMASK 0x0 + +#define QIB_7220_SendDmaStatus_OFFS 0x238 +#define QIB_7220_SendDmaStatus_ScoreBoardDrainInProg_LSB 0x3F +#define QIB_7220_SendDmaStatus_ScoreBoardDrainInProg_RMASK 0x1 +#define QIB_7220_SendDmaStatus_AbortInProg_LSB 0x3E +#define QIB_7220_SendDmaStatus_AbortInProg_RMASK 0x1 +#define QIB_7220_SendDmaStatus_InternalSDmaEnable_LSB 0x3D +#define QIB_7220_SendDmaStatus_InternalSDmaEnable_RMASK 0x1 +#define QIB_7220_SendDmaStatus_ScbDescIndex_13_0_LSB 0x2F +#define QIB_7220_SendDmaStatus_ScbDescIndex_13_0_RMASK 0x3FFF +#define QIB_7220_SendDmaStatus_RpyLowAddr_6_0_LSB 0x28 +#define QIB_7220_SendDmaStatus_RpyLowAddr_6_0_RMASK 0x7F +#define QIB_7220_SendDmaStatus_RpyTag_7_0_LSB 0x20 +#define QIB_7220_SendDmaStatus_RpyTag_7_0_RMASK 0xFF +#define QIB_7220_SendDmaStatus_ScbFull_LSB 0x1F +#define QIB_7220_SendDmaStatus_ScbFull_RMASK 0x1 +#define QIB_7220_SendDmaStatus_ScbEmpty_LSB 0x1E +#define QIB_7220_SendDmaStatus_ScbEmpty_RMASK 0x1 +#define QIB_7220_SendDmaStatus_ScbEntryValid_LSB 0x1D +#define QIB_7220_SendDmaStatus_ScbEntryValid_RMASK 0x1 +#define QIB_7220_SendDmaStatus_ScbFetchDescFlag_LSB 0x1C +#define QIB_7220_SendDmaStatus_ScbFetchDescFlag_RMASK 0x1 +#define QIB_7220_SendDmaStatus_SplFifoReadyToGo_LSB 0x1B +#define QIB_7220_SendDmaStatus_SplFifoReadyToGo_RMASK 0x1 +#define QIB_7220_SendDmaStatus_SplFifoDisarmed_LSB 0x1A +#define QIB_7220_SendDmaStatus_SplFifoDisarmed_RMASK 0x1 +#define QIB_7220_SendDmaStatus_SplFifoEmpty_LSB 0x19 +#define QIB_7220_SendDmaStatus_SplFifoEmpty_RMASK 0x1 +#define QIB_7220_SendDmaStatus_SplFifoFull_LSB 0x18 +#define QIB_7220_SendDmaStatus_SplFifoFull_RMASK 0x1 +#define QIB_7220_SendDmaStatus_SplFifoBufNum_LSB 0x10 +#define QIB_7220_SendDmaStatus_SplFifoBufNum_RMASK 0xFF +#define QIB_7220_SendDmaStatus_SplFifoDescIndex_LSB 0x0 +#define QIB_7220_SendDmaStatus_SplFifoDescIndex_RMASK 0xFFFF + +#define QIB_7220_SendBufErr0_OFFS 0x240 +#define QIB_7220_SendBufErr0_SendBufErr_63_0_LSB 0x0 +#define QIB_7220_SendBufErr0_SendBufErr_63_0_RMASK 0x0 + +#define QIB_7220_RcvHdrAddr0_OFFS 0x270 +#define QIB_7220_RcvHdrAddr0_RcvHdrAddr0_LSB 0x2 +#define QIB_7220_RcvHdrAddr0_RcvHdrAddr0_RMASK 0x3FFFFFFFFF +#define QIB_7220_RcvHdrAddr0_Reserved_LSB 0x0 +#define QIB_7220_RcvHdrAddr0_Reserved_RMASK 0x3 + +#define QIB_7220_RcvHdrTailAddr0_OFFS 0x300 +#define QIB_7220_RcvHdrTailAddr0_RcvHdrTailAddr0_LSB 0x2 +#define QIB_7220_RcvHdrTailAddr0_RcvHdrTailAddr0_RMASK 0x3FFFFFFFFF +#define QIB_7220_RcvHdrTailAddr0_Reserved_LSB 0x0 +#define QIB_7220_RcvHdrTailAddr0_Reserved_RMASK 0x3 + +#define QIB_7220_ibsd_epb_access_ctrl_OFFS 0x3C0 +#define QIB_7220_ibsd_epb_access_ctrl_sw_ib_epb_req_granted_LSB 0x8 +#define QIB_7220_ibsd_epb_access_ctrl_sw_ib_epb_req_granted_RMASK 0x1 +#define QIB_7220_ibsd_epb_access_ctrl_Reserved_LSB 0x1 +#define QIB_7220_ibsd_epb_access_ctrl_Reserved_RMASK 0x7F +#define QIB_7220_ibsd_epb_access_ctrl_sw_ib_epb_req_LSB 0x0 +#define QIB_7220_ibsd_epb_access_ctrl_sw_ib_epb_req_RMASK 0x1 + +#define QIB_7220_ibsd_epb_transaction_reg_OFFS 0x3C8 +#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_rdy_LSB 0x1F +#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_rdy_RMASK 0x1 +#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_req_error_LSB 0x1E +#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_req_error_RMASK 0x1 +#define QIB_7220_ibsd_epb_transaction_reg_Reserved_LSB 0x1D +#define QIB_7220_ibsd_epb_transaction_reg_Reserved_RMASK 0x1 +#define QIB_7220_ibsd_epb_transaction_reg_mem_data_parity_LSB 0x1C +#define QIB_7220_ibsd_epb_transaction_reg_mem_data_parity_RMASK 0x1 +#define QIB_7220_ibsd_epb_transaction_reg_Reserved1_LSB 0x1B +#define QIB_7220_ibsd_epb_transaction_reg_Reserved1_RMASK 0x1 +#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_cs_LSB 0x19 +#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_cs_RMASK 0x3 +#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_read_write_LSB 0x18 +#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_read_write_RMASK 0x1 +#define QIB_7220_ibsd_epb_transaction_reg_Reserved2_LSB 0x17 +#define QIB_7220_ibsd_epb_transaction_reg_Reserved2_RMASK 0x1 +#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_address_LSB 0x8 +#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_address_RMASK 0x7FFF +#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_data_LSB 0x0 +#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_data_RMASK 0xFF + +#define QIB_7220_XGXSCfg_OFFS 0x3D8 +#define QIB_7220_XGXSCfg_sel_link_down_for_fctrl_lane_sync_reset_LSB 0x3F +#define QIB_7220_XGXSCfg_sel_link_down_for_fctrl_lane_sync_reset_RMASK 0x1 +#define QIB_7220_XGXSCfg_Reserved_LSB 0x13 +#define QIB_7220_XGXSCfg_Reserved_RMASK 0xFFFFFFFFFFF +#define QIB_7220_XGXSCfg_link_sync_mask_LSB 0x9 +#define QIB_7220_XGXSCfg_link_sync_mask_RMASK 0x3FF +#define QIB_7220_XGXSCfg_Reserved1_LSB 0x3 +#define QIB_7220_XGXSCfg_Reserved1_RMASK 0x3F +#define QIB_7220_XGXSCfg_xcv_reset_LSB 0x2 +#define QIB_7220_XGXSCfg_xcv_reset_RMASK 0x1 +#define QIB_7220_XGXSCfg_Reserved2_LSB 0x1 +#define QIB_7220_XGXSCfg_Reserved2_RMASK 0x1 +#define QIB_7220_XGXSCfg_tx_rx_reset_LSB 0x0 +#define QIB_7220_XGXSCfg_tx_rx_reset_RMASK 0x1 + +#define QIB_7220_IBSerDesCtrl_OFFS 0x3E0 +#define QIB_7220_IBSerDesCtrl_Reserved_LSB 0x2D +#define QIB_7220_IBSerDesCtrl_Reserved_RMASK 0x7FFFF +#define QIB_7220_IBSerDesCtrl_INT_uC_LSB 0x2C +#define QIB_7220_IBSerDesCtrl_INT_uC_RMASK 0x1 +#define QIB_7220_IBSerDesCtrl_CKSEL_uC_LSB 0x2A +#define QIB_7220_IBSerDesCtrl_CKSEL_uC_RMASK 0x3 +#define QIB_7220_IBSerDesCtrl_PLLN_LSB 0x28 +#define QIB_7220_IBSerDesCtrl_PLLN_RMASK 0x3 +#define QIB_7220_IBSerDesCtrl_PLLM_LSB 0x25 +#define QIB_7220_IBSerDesCtrl_PLLM_RMASK 0x7 +#define QIB_7220_IBSerDesCtrl_TXOBPD_LSB 0x24 +#define QIB_7220_IBSerDesCtrl_TXOBPD_RMASK 0x1 +#define QIB_7220_IBSerDesCtrl_TWC_LSB 0x23 +#define QIB_7220_IBSerDesCtrl_TWC_RMASK 0x1 +#define QIB_7220_IBSerDesCtrl_RXIDLE_LSB 0x22 +#define QIB_7220_IBSerDesCtrl_RXIDLE_RMASK 0x1 +#define QIB_7220_IBSerDesCtrl_RXINV_LSB 0x21 +#define QIB_7220_IBSerDesCtrl_RXINV_RMASK 0x1 +#define QIB_7220_IBSerDesCtrl_TXINV_LSB 0x20 +#define QIB_7220_IBSerDesCtrl_TXINV_RMASK 0x1 +#define QIB_7220_IBSerDesCtrl_Reserved1_LSB 0x12 +#define QIB_7220_IBSerDesCtrl_Reserved1_RMASK 0x3FFF +#define QIB_7220_IBSerDesCtrl_NumSerDesRegsToWrForRXEQ_LSB 0xD +#define QIB_7220_IBSerDesCtrl_NumSerDesRegsToWrForRXEQ_RMASK 0x1F +#define QIB_7220_IBSerDesCtrl_NumSerDesRegsToWrForDDS_LSB 0x8 +#define QIB_7220_IBSerDesCtrl_NumSerDesRegsToWrForDDS_RMASK 0x1F +#define QIB_7220_IBSerDesCtrl_Reserved2_LSB 0x1 +#define QIB_7220_IBSerDesCtrl_Reserved2_RMASK 0x7F +#define QIB_7220_IBSerDesCtrl_ResetIB_uC_Core_LSB 0x0 +#define QIB_7220_IBSerDesCtrl_ResetIB_uC_Core_RMASK 0x1 + +#define QIB_7220_pciesd_epb_access_ctrl_OFFS 0x400 +#define QIB_7220_pciesd_epb_access_ctrl_sw_pcie_epb_req_granted_LSB 0x8 +#define QIB_7220_pciesd_epb_access_ctrl_sw_pcie_epb_req_granted_RMASK 0x1 +#define QIB_7220_pciesd_epb_access_ctrl_Reserved_LSB 0x3 +#define QIB_7220_pciesd_epb_access_ctrl_Reserved_RMASK 0x1F +#define QIB_7220_pciesd_epb_access_ctrl_sw_pcieepb_star_en_LSB 0x1 +#define QIB_7220_pciesd_epb_access_ctrl_sw_pcieepb_star_en_RMASK 0x3 +#define QIB_7220_pciesd_epb_access_ctrl_sw_pcie_epb_req_LSB 0x0 +#define QIB_7220_pciesd_epb_access_ctrl_sw_pcie_epb_req_RMASK 0x1 + +#define QIB_7220_pciesd_epb_transaction_reg_OFFS 0x408 +#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_rdy_LSB 0x1F +#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_rdy_RMASK 0x1 +#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_req_error_LSB 0x1E +#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_req_error_RMASK 0x1 +#define QIB_7220_pciesd_epb_transaction_reg_Reserved_LSB 0x1D +#define QIB_7220_pciesd_epb_transaction_reg_Reserved_RMASK 0x1 +#define QIB_7220_pciesd_epb_transaction_reg_mem_data_parity_LSB 0x1C +#define QIB_7220_pciesd_epb_transaction_reg_mem_data_parity_RMASK 0x1 +#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_cs_LSB 0x19 +#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_cs_RMASK 0x7 +#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_read_write_LSB 0x18 +#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_read_write_RMASK 0x1 +#define QIB_7220_pciesd_epb_transaction_reg_Reserved1_LSB 0x17 +#define QIB_7220_pciesd_epb_transaction_reg_Reserved1_RMASK 0x1 +#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_address_LSB 0x8 +#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_address_RMASK 0x7FFF +#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_data_LSB 0x0 +#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_data_RMASK 0xFF + +#define QIB_7220_SerDes_DDSRXEQ0_OFFS 0x500 +#define QIB_7220_SerDes_DDSRXEQ0_reg_addr_LSB 0x4 +#define QIB_7220_SerDes_DDSRXEQ0_reg_addr_RMASK 0x3F +#define QIB_7220_SerDes_DDSRXEQ0_element_num_LSB 0x0 +#define QIB_7220_SerDes_DDSRXEQ0_element_num_RMASK 0xF + +#define QIB_7220_LBIntCnt_OFFS 0x13000 + +#define QIB_7220_LBFlowStallCnt_OFFS 0x13008 + +#define QIB_7220_TxSDmaDescCnt_OFFS 0x13010 + +#define QIB_7220_TxUnsupVLErrCnt_OFFS 0x13018 + +#define QIB_7220_TxDataPktCnt_OFFS 0x13020 + +#define QIB_7220_TxFlowPktCnt_OFFS 0x13028 + +#define QIB_7220_TxDwordCnt_OFFS 0x13030 + +#define QIB_7220_TxLenErrCnt_OFFS 0x13038 + +#define QIB_7220_TxMaxMinLenErrCnt_OFFS 0x13040 + +#define QIB_7220_TxUnderrunCnt_OFFS 0x13048 + +#define QIB_7220_TxFlowStallCnt_OFFS 0x13050 + +#define QIB_7220_TxDroppedPktCnt_OFFS 0x13058 + +#define QIB_7220_RxDroppedPktCnt_OFFS 0x13060 + +#define QIB_7220_RxDataPktCnt_OFFS 0x13068 + +#define QIB_7220_RxFlowPktCnt_OFFS 0x13070 + +#define QIB_7220_RxDwordCnt_OFFS 0x13078 + +#define QIB_7220_RxLenErrCnt_OFFS 0x13080 + +#define QIB_7220_RxMaxMinLenErrCnt_OFFS 0x13088 + +#define QIB_7220_RxICRCErrCnt_OFFS 0x13090 + +#define QIB_7220_RxVCRCErrCnt_OFFS 0x13098 + +#define QIB_7220_RxFlowCtrlViolCnt_OFFS 0x130A0 + +#define QIB_7220_RxVersionErrCnt_OFFS 0x130A8 + +#define QIB_7220_RxLinkMalformCnt_OFFS 0x130B0 + +#define QIB_7220_RxEBPCnt_OFFS 0x130B8 + +#define QIB_7220_RxLPCRCErrCnt_OFFS 0x130C0 + +#define QIB_7220_RxBufOvflCnt_OFFS 0x130C8 + +#define QIB_7220_RxTIDFullErrCnt_OFFS 0x130D0 + +#define QIB_7220_RxTIDValidErrCnt_OFFS 0x130D8 + +#define QIB_7220_RxPKeyMismatchCnt_OFFS 0x130E0 + +#define QIB_7220_RxP0HdrEgrOvflCnt_OFFS 0x130E8 + +#define QIB_7220_IBStatusChangeCnt_OFFS 0x13170 + +#define QIB_7220_IBLinkErrRecoveryCnt_OFFS 0x13178 + +#define QIB_7220_IBLinkDownedCnt_OFFS 0x13180 + +#define QIB_7220_IBSymbolErrCnt_OFFS 0x13188 + +#define QIB_7220_RxVL15DroppedPktCnt_OFFS 0x13190 + +#define QIB_7220_RxOtherLocalPhyErrCnt_OFFS 0x13198 + +#define QIB_7220_PcieRetryBufDiagQwordCnt_OFFS 0x131A0 + +#define QIB_7220_ExcessBufferOvflCnt_OFFS 0x131A8 + +#define QIB_7220_LocalLinkIntegrityErrCnt_OFFS 0x131B0 + +#define QIB_7220_RxVlErrCnt_OFFS 0x131B8 + +#define QIB_7220_RxDlidFltrCnt_OFFS 0x131C0 + +#define QIB_7220_CNT_0131C8_OFFS 0x131C8 + +#define QIB_7220_PSStat_OFFS 0x13200 + +#define QIB_7220_PSStart_OFFS 0x13208 + +#define QIB_7220_PSInterval_OFFS 0x13210 + +#define QIB_7220_PSRcvDataCount_OFFS 0x13218 + +#define QIB_7220_PSRcvPktsCount_OFFS 0x13220 + +#define QIB_7220_PSXmitDataCount_OFFS 0x13228 + +#define QIB_7220_PSXmitPktsCount_OFFS 0x13230 + +#define QIB_7220_PSXmitWaitCount_OFFS 0x13238 + +#define QIB_7220_CNT_013240_OFFS 0x13240 + +#define QIB_7220_RcvEgrArray_OFFS 0x14000 + +#define QIB_7220_MEM_038000_OFFS 0x38000 + +#define QIB_7220_RcvTIDArray0_OFFS 0x53000 + +#define QIB_7220_PIOLaunchFIFO_OFFS 0x64000 + +#define QIB_7220_MEM_064480_OFFS 0x64480 + +#define QIB_7220_SendPIOpbcCache_OFFS 0x64800 + +#define QIB_7220_MEM_064C80_OFFS 0x64C80 + +#define QIB_7220_PreLaunchFIFO_OFFS 0x65000 + +#define QIB_7220_MEM_065080_OFFS 0x65080 + +#define QIB_7220_ScoreBoard_OFFS 0x65400 + +#define QIB_7220_MEM_065440_OFFS 0x65440 + +#define QIB_7220_DescriptorFIFO_OFFS 0x65800 + +#define QIB_7220_MEM_065880_OFFS 0x65880 + +#define QIB_7220_RcvBuf1_OFFS 0x72000 + +#define QIB_7220_MEM_074800_OFFS 0x74800 + +#define QIB_7220_RcvBuf2_OFFS 0x75000 + +#define QIB_7220_MEM_076400_OFFS 0x76400 + +#define QIB_7220_RcvFlags_OFFS 0x77000 + +#define QIB_7220_MEM_078400_OFFS 0x78400 + +#define QIB_7220_RcvLookupBuf1_OFFS 0x79000 + +#define QIB_7220_MEM_07A400_OFFS 0x7A400 + +#define QIB_7220_RcvDMADatBuf_OFFS 0x7B000 + +#define QIB_7220_RcvDMAHdrBuf_OFFS 0x7B800 + +#define QIB_7220_MiscRXEIntMem_OFFS 0x7C000 + +#define QIB_7220_MEM_07D400_OFFS 0x7D400 + +#define QIB_7220_PCIERcvBuf_OFFS 0x80000 + +#define QIB_7220_PCIERetryBuf_OFFS 0x84000 + +#define QIB_7220_PCIERcvBufRdToWrAddr_OFFS 0x88000 + +#define QIB_7220_PCIECplBuf_OFFS 0x90000 + +#define QIB_7220_IBSerDesMappTable_OFFS 0x94000 + +#define QIB_7220_MEM_095000_OFFS 0x95000 + +#define QIB_7220_SendBuf0_MA_OFFS 0x100000 + +#define QIB_7220_MEM_1A0000_OFFS 0x1A0000 diff --git a/drivers/infiniband/hw/qib/qib_7322_regs.h b/drivers/infiniband/hw/qib/qib_7322_regs.h new file mode 100644 index 00000000000..32dc81ff8d4 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_7322_regs.h @@ -0,0 +1,3163 @@ +/* + * Copyright (c) 2008, 2009, 2010 QLogic Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* This file is mechanically generated from RTL. Any hand-edits will be lost! */ + +#define QIB_7322_Revision_OFFS 0x0 +#define QIB_7322_Revision_DEF 0x0000000002010601 +#define QIB_7322_Revision_R_Simulator_LSB 0x3F +#define QIB_7322_Revision_R_Simulator_MSB 0x3F +#define QIB_7322_Revision_R_Simulator_RMASK 0x1 +#define QIB_7322_Revision_R_Emulation_LSB 0x3E +#define QIB_7322_Revision_R_Emulation_MSB 0x3E +#define QIB_7322_Revision_R_Emulation_RMASK 0x1 +#define QIB_7322_Revision_R_Emulation_Revcode_LSB 0x28 +#define QIB_7322_Revision_R_Emulation_Revcode_MSB 0x3D +#define QIB_7322_Revision_R_Emulation_Revcode_RMASK 0x3FFFFF +#define QIB_7322_Revision_BoardID_LSB 0x20 +#define QIB_7322_Revision_BoardID_MSB 0x27 +#define QIB_7322_Revision_BoardID_RMASK 0xFF +#define QIB_7322_Revision_R_SW_LSB 0x18 +#define QIB_7322_Revision_R_SW_MSB 0x1F +#define QIB_7322_Revision_R_SW_RMASK 0xFF +#define QIB_7322_Revision_R_Arch_LSB 0x10 +#define QIB_7322_Revision_R_Arch_MSB 0x17 +#define QIB_7322_Revision_R_Arch_RMASK 0xFF +#define QIB_7322_Revision_R_ChipRevMajor_LSB 0x8 +#define QIB_7322_Revision_R_ChipRevMajor_MSB 0xF +#define QIB_7322_Revision_R_ChipRevMajor_RMASK 0xFF +#define QIB_7322_Revision_R_ChipRevMinor_LSB 0x0 +#define QIB_7322_Revision_R_ChipRevMinor_MSB 0x7 +#define QIB_7322_Revision_R_ChipRevMinor_RMASK 0xFF + +#define QIB_7322_Control_OFFS 0x8 +#define QIB_7322_Control_DEF 0x0000000000000000 +#define QIB_7322_Control_PCIECplQDiagEn_LSB 0x6 +#define QIB_7322_Control_PCIECplQDiagEn_MSB 0x6 +#define QIB_7322_Control_PCIECplQDiagEn_RMASK 0x1 +#define QIB_7322_Control_PCIEPostQDiagEn_LSB 0x5 +#define QIB_7322_Control_PCIEPostQDiagEn_MSB 0x5 +#define QIB_7322_Control_PCIEPostQDiagEn_RMASK 0x1 +#define QIB_7322_Control_SDmaDescFetchPriorityEn_LSB 0x4 +#define QIB_7322_Control_SDmaDescFetchPriorityEn_MSB 0x4 +#define QIB_7322_Control_SDmaDescFetchPriorityEn_RMASK 0x1 +#define QIB_7322_Control_PCIERetryBufDiagEn_LSB 0x3 +#define QIB_7322_Control_PCIERetryBufDiagEn_MSB 0x3 +#define QIB_7322_Control_PCIERetryBufDiagEn_RMASK 0x1 +#define QIB_7322_Control_FreezeMode_LSB 0x1 +#define QIB_7322_Control_FreezeMode_MSB 0x1 +#define QIB_7322_Control_FreezeMode_RMASK 0x1 +#define QIB_7322_Control_SyncReset_LSB 0x0 +#define QIB_7322_Control_SyncReset_MSB 0x0 +#define QIB_7322_Control_SyncReset_RMASK 0x1 + +#define QIB_7322_PageAlign_OFFS 0x10 +#define QIB_7322_PageAlign_DEF 0x0000000000001000 + +#define QIB_7322_ContextCnt_OFFS 0x18 +#define QIB_7322_ContextCnt_DEF 0x0000000000000012 + +#define QIB_7322_Scratch_OFFS 0x20 +#define QIB_7322_Scratch_DEF 0x0000000000000000 + +#define QIB_7322_CntrRegBase_OFFS 0x28 +#define QIB_7322_CntrRegBase_DEF 0x0000000000011000 + +#define QIB_7322_SendRegBase_OFFS 0x30 +#define QIB_7322_SendRegBase_DEF 0x0000000000003000 + +#define QIB_7322_UserRegBase_OFFS 0x38 +#define QIB_7322_UserRegBase_DEF 0x0000000000200000 + +#define QIB_7322_IntMask_OFFS 0x68 +#define QIB_7322_IntMask_DEF 0x0000000000000000 +#define QIB_7322_IntMask_SDmaIntMask_1_LSB 0x3F +#define QIB_7322_IntMask_SDmaIntMask_1_MSB 0x3F +#define QIB_7322_IntMask_SDmaIntMask_1_RMASK 0x1 +#define QIB_7322_IntMask_SDmaIntMask_0_LSB 0x3E +#define QIB_7322_IntMask_SDmaIntMask_0_MSB 0x3E +#define QIB_7322_IntMask_SDmaIntMask_0_RMASK 0x1 +#define QIB_7322_IntMask_SDmaProgressIntMask_1_LSB 0x3D +#define QIB_7322_IntMask_SDmaProgressIntMask_1_MSB 0x3D +#define QIB_7322_IntMask_SDmaProgressIntMask_1_RMASK 0x1 +#define QIB_7322_IntMask_SDmaProgressIntMask_0_LSB 0x3C +#define QIB_7322_IntMask_SDmaProgressIntMask_0_MSB 0x3C +#define QIB_7322_IntMask_SDmaProgressIntMask_0_RMASK 0x1 +#define QIB_7322_IntMask_SDmaIdleIntMask_1_LSB 0x3B +#define QIB_7322_IntMask_SDmaIdleIntMask_1_MSB 0x3B +#define QIB_7322_IntMask_SDmaIdleIntMask_1_RMASK 0x1 +#define QIB_7322_IntMask_SDmaIdleIntMask_0_LSB 0x3A +#define QIB_7322_IntMask_SDmaIdleIntMask_0_MSB 0x3A +#define QIB_7322_IntMask_SDmaIdleIntMask_0_RMASK 0x1 +#define QIB_7322_IntMask_SDmaCleanupDoneMask_1_LSB 0x39 +#define QIB_7322_IntMask_SDmaCleanupDoneMask_1_MSB 0x39 +#define QIB_7322_IntMask_SDmaCleanupDoneMask_1_RMASK 0x1 +#define QIB_7322_IntMask_SDmaCleanupDoneMask_0_LSB 0x38 +#define QIB_7322_IntMask_SDmaCleanupDoneMask_0_MSB 0x38 +#define QIB_7322_IntMask_SDmaCleanupDoneMask_0_RMASK 0x1 +#define QIB_7322_IntMask_RcvUrg17IntMask_LSB 0x31 +#define QIB_7322_IntMask_RcvUrg17IntMask_MSB 0x31 +#define QIB_7322_IntMask_RcvUrg17IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvUrg16IntMask_LSB 0x30 +#define QIB_7322_IntMask_RcvUrg16IntMask_MSB 0x30 +#define QIB_7322_IntMask_RcvUrg16IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvUrg15IntMask_LSB 0x2F +#define QIB_7322_IntMask_RcvUrg15IntMask_MSB 0x2F +#define QIB_7322_IntMask_RcvUrg15IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvUrg14IntMask_LSB 0x2E +#define QIB_7322_IntMask_RcvUrg14IntMask_MSB 0x2E +#define QIB_7322_IntMask_RcvUrg14IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvUrg13IntMask_LSB 0x2D +#define QIB_7322_IntMask_RcvUrg13IntMask_MSB 0x2D +#define QIB_7322_IntMask_RcvUrg13IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvUrg12IntMask_LSB 0x2C +#define QIB_7322_IntMask_RcvUrg12IntMask_MSB 0x2C +#define QIB_7322_IntMask_RcvUrg12IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvUrg11IntMask_LSB 0x2B +#define QIB_7322_IntMask_RcvUrg11IntMask_MSB 0x2B +#define QIB_7322_IntMask_RcvUrg11IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvUrg10IntMask_LSB 0x2A +#define QIB_7322_IntMask_RcvUrg10IntMask_MSB 0x2A +#define QIB_7322_IntMask_RcvUrg10IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvUrg9IntMask_LSB 0x29 +#define QIB_7322_IntMask_RcvUrg9IntMask_MSB 0x29 +#define QIB_7322_IntMask_RcvUrg9IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvUrg8IntMask_LSB 0x28 +#define QIB_7322_IntMask_RcvUrg8IntMask_MSB 0x28 +#define QIB_7322_IntMask_RcvUrg8IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvUrg7IntMask_LSB 0x27 +#define QIB_7322_IntMask_RcvUrg7IntMask_MSB 0x27 +#define QIB_7322_IntMask_RcvUrg7IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvUrg6IntMask_LSB 0x26 +#define QIB_7322_IntMask_RcvUrg6IntMask_MSB 0x26 +#define QIB_7322_IntMask_RcvUrg6IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvUrg5IntMask_LSB 0x25 +#define QIB_7322_IntMask_RcvUrg5IntMask_MSB 0x25 +#define QIB_7322_IntMask_RcvUrg5IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvUrg4IntMask_LSB 0x24 +#define QIB_7322_IntMask_RcvUrg4IntMask_MSB 0x24 +#define QIB_7322_IntMask_RcvUrg4IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvUrg3IntMask_LSB 0x23 +#define QIB_7322_IntMask_RcvUrg3IntMask_MSB 0x23 +#define QIB_7322_IntMask_RcvUrg3IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvUrg2IntMask_LSB 0x22 +#define QIB_7322_IntMask_RcvUrg2IntMask_MSB 0x22 +#define QIB_7322_IntMask_RcvUrg2IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvUrg1IntMask_LSB 0x21 +#define QIB_7322_IntMask_RcvUrg1IntMask_MSB 0x21 +#define QIB_7322_IntMask_RcvUrg1IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvUrg0IntMask_LSB 0x20 +#define QIB_7322_IntMask_RcvUrg0IntMask_MSB 0x20 +#define QIB_7322_IntMask_RcvUrg0IntMask_RMASK 0x1 +#define QIB_7322_IntMask_ErrIntMask_1_LSB 0x1F +#define QIB_7322_IntMask_ErrIntMask_1_MSB 0x1F +#define QIB_7322_IntMask_ErrIntMask_1_RMASK 0x1 +#define QIB_7322_IntMask_ErrIntMask_0_LSB 0x1E +#define QIB_7322_IntMask_ErrIntMask_0_MSB 0x1E +#define QIB_7322_IntMask_ErrIntMask_0_RMASK 0x1 +#define QIB_7322_IntMask_ErrIntMask_LSB 0x1D +#define QIB_7322_IntMask_ErrIntMask_MSB 0x1D +#define QIB_7322_IntMask_ErrIntMask_RMASK 0x1 +#define QIB_7322_IntMask_AssertGPIOIntMask_LSB 0x1C +#define QIB_7322_IntMask_AssertGPIOIntMask_MSB 0x1C +#define QIB_7322_IntMask_AssertGPIOIntMask_RMASK 0x1 +#define QIB_7322_IntMask_SendDoneIntMask_1_LSB 0x19 +#define QIB_7322_IntMask_SendDoneIntMask_1_MSB 0x19 +#define QIB_7322_IntMask_SendDoneIntMask_1_RMASK 0x1 +#define QIB_7322_IntMask_SendDoneIntMask_0_LSB 0x18 +#define QIB_7322_IntMask_SendDoneIntMask_0_MSB 0x18 +#define QIB_7322_IntMask_SendDoneIntMask_0_RMASK 0x1 +#define QIB_7322_IntMask_SendBufAvailIntMask_LSB 0x17 +#define QIB_7322_IntMask_SendBufAvailIntMask_MSB 0x17 +#define QIB_7322_IntMask_SendBufAvailIntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvAvail17IntMask_LSB 0x11 +#define QIB_7322_IntMask_RcvAvail17IntMask_MSB 0x11 +#define QIB_7322_IntMask_RcvAvail17IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvAvail16IntMask_LSB 0x10 +#define QIB_7322_IntMask_RcvAvail16IntMask_MSB 0x10 +#define QIB_7322_IntMask_RcvAvail16IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvAvail15IntMask_LSB 0xF +#define QIB_7322_IntMask_RcvAvail15IntMask_MSB 0xF +#define QIB_7322_IntMask_RcvAvail15IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvAvail14IntMask_LSB 0xE +#define QIB_7322_IntMask_RcvAvail14IntMask_MSB 0xE +#define QIB_7322_IntMask_RcvAvail14IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvAvail13IntMask_LSB 0xD +#define QIB_7322_IntMask_RcvAvail13IntMask_MSB 0xD +#define QIB_7322_IntMask_RcvAvail13IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvAvail12IntMask_LSB 0xC +#define QIB_7322_IntMask_RcvAvail12IntMask_MSB 0xC +#define QIB_7322_IntMask_RcvAvail12IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvAvail11IntMask_LSB 0xB +#define QIB_7322_IntMask_RcvAvail11IntMask_MSB 0xB +#define QIB_7322_IntMask_RcvAvail11IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvAvail10IntMask_LSB 0xA +#define QIB_7322_IntMask_RcvAvail10IntMask_MSB 0xA +#define QIB_7322_IntMask_RcvAvail10IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvAvail9IntMask_LSB 0x9 +#define QIB_7322_IntMask_RcvAvail9IntMask_MSB 0x9 +#define QIB_7322_IntMask_RcvAvail9IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvAvail8IntMask_LSB 0x8 +#define QIB_7322_IntMask_RcvAvail8IntMask_MSB 0x8 +#define QIB_7322_IntMask_RcvAvail8IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvAvail7IntMask_LSB 0x7 +#define QIB_7322_IntMask_RcvAvail7IntMask_MSB 0x7 +#define QIB_7322_IntMask_RcvAvail7IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvAvail6IntMask_LSB 0x6 +#define QIB_7322_IntMask_RcvAvail6IntMask_MSB 0x6 +#define QIB_7322_IntMask_RcvAvail6IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvAvail5IntMask_LSB 0x5 +#define QIB_7322_IntMask_RcvAvail5IntMask_MSB 0x5 +#define QIB_7322_IntMask_RcvAvail5IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvAvail4IntMask_LSB 0x4 +#define QIB_7322_IntMask_RcvAvail4IntMask_MSB 0x4 +#define QIB_7322_IntMask_RcvAvail4IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvAvail3IntMask_LSB 0x3 +#define QIB_7322_IntMask_RcvAvail3IntMask_MSB 0x3 +#define QIB_7322_IntMask_RcvAvail3IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvAvail2IntMask_LSB 0x2 +#define QIB_7322_IntMask_RcvAvail2IntMask_MSB 0x2 +#define QIB_7322_IntMask_RcvAvail2IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvAvail1IntMask_LSB 0x1 +#define QIB_7322_IntMask_RcvAvail1IntMask_MSB 0x1 +#define QIB_7322_IntMask_RcvAvail1IntMask_RMASK 0x1 +#define QIB_7322_IntMask_RcvAvail0IntMask_LSB 0x0 +#define QIB_7322_IntMask_RcvAvail0IntMask_MSB 0x0 +#define QIB_7322_IntMask_RcvAvail0IntMask_RMASK 0x1 + +#define QIB_7322_IntStatus_OFFS 0x70 +#define QIB_7322_IntStatus_DEF 0x0000000000000000 +#define QIB_7322_IntStatus_SDmaInt_1_LSB 0x3F +#define QIB_7322_IntStatus_SDmaInt_1_MSB 0x3F +#define QIB_7322_IntStatus_SDmaInt_1_RMASK 0x1 +#define QIB_7322_IntStatus_SDmaInt_0_LSB 0x3E +#define QIB_7322_IntStatus_SDmaInt_0_MSB 0x3E +#define QIB_7322_IntStatus_SDmaInt_0_RMASK 0x1 +#define QIB_7322_IntStatus_SDmaProgressInt_1_LSB 0x3D +#define QIB_7322_IntStatus_SDmaProgressInt_1_MSB 0x3D +#define QIB_7322_IntStatus_SDmaProgressInt_1_RMASK 0x1 +#define QIB_7322_IntStatus_SDmaProgressInt_0_LSB 0x3C +#define QIB_7322_IntStatus_SDmaProgressInt_0_MSB 0x3C +#define QIB_7322_IntStatus_SDmaProgressInt_0_RMASK 0x1 +#define QIB_7322_IntStatus_SDmaIdleInt_1_LSB 0x3B +#define QIB_7322_IntStatus_SDmaIdleInt_1_MSB 0x3B +#define QIB_7322_IntStatus_SDmaIdleInt_1_RMASK 0x1 +#define QIB_7322_IntStatus_SDmaIdleInt_0_LSB 0x3A +#define QIB_7322_IntStatus_SDmaIdleInt_0_MSB 0x3A +#define QIB_7322_IntStatus_SDmaIdleInt_0_RMASK 0x1 +#define QIB_7322_IntStatus_SDmaCleanupDone_1_LSB 0x39 +#define QIB_7322_IntStatus_SDmaCleanupDone_1_MSB 0x39 +#define QIB_7322_IntStatus_SDmaCleanupDone_1_RMASK 0x1 +#define QIB_7322_IntStatus_SDmaCleanupDone_0_LSB 0x38 +#define QIB_7322_IntStatus_SDmaCleanupDone_0_MSB 0x38 +#define QIB_7322_IntStatus_SDmaCleanupDone_0_RMASK 0x1 +#define QIB_7322_IntStatus_RcvUrg17_LSB 0x31 +#define QIB_7322_IntStatus_RcvUrg17_MSB 0x31 +#define QIB_7322_IntStatus_RcvUrg17_RMASK 0x1 +#define QIB_7322_IntStatus_RcvUrg16_LSB 0x30 +#define QIB_7322_IntStatus_RcvUrg16_MSB 0x30 +#define QIB_7322_IntStatus_RcvUrg16_RMASK 0x1 +#define QIB_7322_IntStatus_RcvUrg15_LSB 0x2F +#define QIB_7322_IntStatus_RcvUrg15_MSB 0x2F +#define QIB_7322_IntStatus_RcvUrg15_RMASK 0x1 +#define QIB_7322_IntStatus_RcvUrg14_LSB 0x2E +#define QIB_7322_IntStatus_RcvUrg14_MSB 0x2E +#define QIB_7322_IntStatus_RcvUrg14_RMASK 0x1 +#define QIB_7322_IntStatus_RcvUrg13_LSB 0x2D +#define QIB_7322_IntStatus_RcvUrg13_MSB 0x2D +#define QIB_7322_IntStatus_RcvUrg13_RMASK 0x1 +#define QIB_7322_IntStatus_RcvUrg12_LSB 0x2C +#define QIB_7322_IntStatus_RcvUrg12_MSB 0x2C +#define QIB_7322_IntStatus_RcvUrg12_RMASK 0x1 +#define QIB_7322_IntStatus_RcvUrg11_LSB 0x2B +#define QIB_7322_IntStatus_RcvUrg11_MSB 0x2B +#define QIB_7322_IntStatus_RcvUrg11_RMASK 0x1 +#define QIB_7322_IntStatus_RcvUrg10_LSB 0x2A +#define QIB_7322_IntStatus_RcvUrg10_MSB 0x2A +#define QIB_7322_IntStatus_RcvUrg10_RMASK 0x1 +#define QIB_7322_IntStatus_RcvUrg9_LSB 0x29 +#define QIB_7322_IntStatus_RcvUrg9_MSB 0x29 +#define QIB_7322_IntStatus_RcvUrg9_RMASK 0x1 +#define QIB_7322_IntStatus_RcvUrg8_LSB 0x28 +#define QIB_7322_IntStatus_RcvUrg8_MSB 0x28 +#define QIB_7322_IntStatus_RcvUrg8_RMASK 0x1 +#define QIB_7322_IntStatus_RcvUrg7_LSB 0x27 +#define QIB_7322_IntStatus_RcvUrg7_MSB 0x27 +#define QIB_7322_IntStatus_RcvUrg7_RMASK 0x1 +#define QIB_7322_IntStatus_RcvUrg6_LSB 0x26 +#define QIB_7322_IntStatus_RcvUrg6_MSB 0x26 +#define QIB_7322_IntStatus_RcvUrg6_RMASK 0x1 +#define QIB_7322_IntStatus_RcvUrg5_LSB 0x25 +#define QIB_7322_IntStatus_RcvUrg5_MSB 0x25 +#define QIB_7322_IntStatus_RcvUrg5_RMASK 0x1 +#define QIB_7322_IntStatus_RcvUrg4_LSB 0x24 +#define QIB_7322_IntStatus_RcvUrg4_MSB 0x24 +#define QIB_7322_IntStatus_RcvUrg4_RMASK 0x1 +#define QIB_7322_IntStatus_RcvUrg3_LSB 0x23 +#define QIB_7322_IntStatus_RcvUrg3_MSB 0x23 +#define QIB_7322_IntStatus_RcvUrg3_RMASK 0x1 +#define QIB_7322_IntStatus_RcvUrg2_LSB 0x22 +#define QIB_7322_IntStatus_RcvUrg2_MSB 0x22 +#define QIB_7322_IntStatus_RcvUrg2_RMASK 0x1 +#define QIB_7322_IntStatus_RcvUrg1_LSB 0x21 +#define QIB_7322_IntStatus_RcvUrg1_MSB 0x21 +#define QIB_7322_IntStatus_RcvUrg1_RMASK 0x1 +#define QIB_7322_IntStatus_RcvUrg0_LSB 0x20 +#define QIB_7322_IntStatus_RcvUrg0_MSB 0x20 +#define QIB_7322_IntStatus_RcvUrg0_RMASK 0x1 +#define QIB_7322_IntStatus_Err_1_LSB 0x1F +#define QIB_7322_IntStatus_Err_1_MSB 0x1F +#define QIB_7322_IntStatus_Err_1_RMASK 0x1 +#define QIB_7322_IntStatus_Err_0_LSB 0x1E +#define QIB_7322_IntStatus_Err_0_MSB 0x1E +#define QIB_7322_IntStatus_Err_0_RMASK 0x1 +#define QIB_7322_IntStatus_Err_LSB 0x1D +#define QIB_7322_IntStatus_Err_MSB 0x1D +#define QIB_7322_IntStatus_Err_RMASK 0x1 +#define QIB_7322_IntStatus_AssertGPIO_LSB 0x1C +#define QIB_7322_IntStatus_AssertGPIO_MSB 0x1C +#define QIB_7322_IntStatus_AssertGPIO_RMASK 0x1 +#define QIB_7322_IntStatus_SendDone_1_LSB 0x19 +#define QIB_7322_IntStatus_SendDone_1_MSB 0x19 +#define QIB_7322_IntStatus_SendDone_1_RMASK 0x1 +#define QIB_7322_IntStatus_SendDone_0_LSB 0x18 +#define QIB_7322_IntStatus_SendDone_0_MSB 0x18 +#define QIB_7322_IntStatus_SendDone_0_RMASK 0x1 +#define QIB_7322_IntStatus_SendBufAvail_LSB 0x17 +#define QIB_7322_IntStatus_SendBufAvail_MSB 0x17 +#define QIB_7322_IntStatus_SendBufAvail_RMASK 0x1 +#define QIB_7322_IntStatus_RcvAvail17_LSB 0x11 +#define QIB_7322_IntStatus_RcvAvail17_MSB 0x11 +#define QIB_7322_IntStatus_RcvAvail17_RMASK 0x1 +#define QIB_7322_IntStatus_RcvAvail16_LSB 0x10 +#define QIB_7322_IntStatus_RcvAvail16_MSB 0x10 +#define QIB_7322_IntStatus_RcvAvail16_RMASK 0x1 +#define QIB_7322_IntStatus_RcvAvail15_LSB 0xF +#define QIB_7322_IntStatus_RcvAvail15_MSB 0xF +#define QIB_7322_IntStatus_RcvAvail15_RMASK 0x1 +#define QIB_7322_IntStatus_RcvAvail14_LSB 0xE +#define QIB_7322_IntStatus_RcvAvail14_MSB 0xE +#define QIB_7322_IntStatus_RcvAvail14_RMASK 0x1 +#define QIB_7322_IntStatus_RcvAvail13_LSB 0xD +#define QIB_7322_IntStatus_RcvAvail13_MSB 0xD +#define QIB_7322_IntStatus_RcvAvail13_RMASK 0x1 +#define QIB_7322_IntStatus_RcvAvail12_LSB 0xC +#define QIB_7322_IntStatus_RcvAvail12_MSB 0xC +#define QIB_7322_IntStatus_RcvAvail12_RMASK 0x1 +#define QIB_7322_IntStatus_RcvAvail11_LSB 0xB +#define QIB_7322_IntStatus_RcvAvail11_MSB 0xB +#define QIB_7322_IntStatus_RcvAvail11_RMASK 0x1 +#define QIB_7322_IntStatus_RcvAvail10_LSB 0xA +#define QIB_7322_IntStatus_RcvAvail10_MSB 0xA +#define QIB_7322_IntStatus_RcvAvail10_RMASK 0x1 +#define QIB_7322_IntStatus_RcvAvail9_LSB 0x9 +#define QIB_7322_IntStatus_RcvAvail9_MSB 0x9 +#define QIB_7322_IntStatus_RcvAvail9_RMASK 0x1 +#define QIB_7322_IntStatus_RcvAvail8_LSB 0x8 +#define QIB_7322_IntStatus_RcvAvail8_MSB 0x8 +#define QIB_7322_IntStatus_RcvAvail8_RMASK 0x1 +#define QIB_7322_IntStatus_RcvAvail7_LSB 0x7 +#define QIB_7322_IntStatus_RcvAvail7_MSB 0x7 +#define QIB_7322_IntStatus_RcvAvail7_RMASK 0x1 +#define QIB_7322_IntStatus_RcvAvail6_LSB 0x6 +#define QIB_7322_IntStatus_RcvAvail6_MSB 0x6 +#define QIB_7322_IntStatus_RcvAvail6_RMASK 0x1 +#define QIB_7322_IntStatus_RcvAvail5_LSB 0x5 +#define QIB_7322_IntStatus_RcvAvail5_MSB 0x5 +#define QIB_7322_IntStatus_RcvAvail5_RMASK 0x1 +#define QIB_7322_IntStatus_RcvAvail4_LSB 0x4 +#define QIB_7322_IntStatus_RcvAvail4_MSB 0x4 +#define QIB_7322_IntStatus_RcvAvail4_RMASK 0x1 +#define QIB_7322_IntStatus_RcvAvail3_LSB 0x3 +#define QIB_7322_IntStatus_RcvAvail3_MSB 0x3 +#define QIB_7322_IntStatus_RcvAvail3_RMASK 0x1 +#define QIB_7322_IntStatus_RcvAvail2_LSB 0x2 +#define QIB_7322_IntStatus_RcvAvail2_MSB 0x2 +#define QIB_7322_IntStatus_RcvAvail2_RMASK 0x1 +#define QIB_7322_IntStatus_RcvAvail1_LSB 0x1 +#define QIB_7322_IntStatus_RcvAvail1_MSB 0x1 +#define QIB_7322_IntStatus_RcvAvail1_RMASK 0x1 +#define QIB_7322_IntStatus_RcvAvail0_LSB 0x0 +#define QIB_7322_IntStatus_RcvAvail0_MSB 0x0 +#define QIB_7322_IntStatus_RcvAvail0_RMASK 0x1 + +#define QIB_7322_IntClear_OFFS 0x78 +#define QIB_7322_IntClear_DEF 0x0000000000000000 +#define QIB_7322_IntClear_SDmaIntClear_1_LSB 0x3F +#define QIB_7322_IntClear_SDmaIntClear_1_MSB 0x3F +#define QIB_7322_IntClear_SDmaIntClear_1_RMASK 0x1 +#define QIB_7322_IntClear_SDmaIntClear_0_LSB 0x3E +#define QIB_7322_IntClear_SDmaIntClear_0_MSB 0x3E +#define QIB_7322_IntClear_SDmaIntClear_0_RMASK 0x1 +#define QIB_7322_IntClear_SDmaProgressIntClear_1_LSB 0x3D +#define QIB_7322_IntClear_SDmaProgressIntClear_1_MSB 0x3D +#define QIB_7322_IntClear_SDmaProgressIntClear_1_RMASK 0x1 +#define QIB_7322_IntClear_SDmaProgressIntClear_0_LSB 0x3C +#define QIB_7322_IntClear_SDmaProgressIntClear_0_MSB 0x3C +#define QIB_7322_IntClear_SDmaProgressIntClear_0_RMASK 0x1 +#define QIB_7322_IntClear_SDmaIdleIntClear_1_LSB 0x3B +#define QIB_7322_IntClear_SDmaIdleIntClear_1_MSB 0x3B +#define QIB_7322_IntClear_SDmaIdleIntClear_1_RMASK 0x1 +#define QIB_7322_IntClear_SDmaIdleIntClear_0_LSB 0x3A +#define QIB_7322_IntClear_SDmaIdleIntClear_0_MSB 0x3A +#define QIB_7322_IntClear_SDmaIdleIntClear_0_RMASK 0x1 +#define QIB_7322_IntClear_SDmaCleanupDoneClear_1_LSB 0x39 +#define QIB_7322_IntClear_SDmaCleanupDoneClear_1_MSB 0x39 +#define QIB_7322_IntClear_SDmaCleanupDoneClear_1_RMASK 0x1 +#define QIB_7322_IntClear_SDmaCleanupDoneClear_0_LSB 0x38 +#define QIB_7322_IntClear_SDmaCleanupDoneClear_0_MSB 0x38 +#define QIB_7322_IntClear_SDmaCleanupDoneClear_0_RMASK 0x1 +#define QIB_7322_IntClear_RcvUrg17IntClear_LSB 0x31 +#define QIB_7322_IntClear_RcvUrg17IntClear_MSB 0x31 +#define QIB_7322_IntClear_RcvUrg17IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvUrg16IntClear_LSB 0x30 +#define QIB_7322_IntClear_RcvUrg16IntClear_MSB 0x30 +#define QIB_7322_IntClear_RcvUrg16IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvUrg15IntClear_LSB 0x2F +#define QIB_7322_IntClear_RcvUrg15IntClear_MSB 0x2F +#define QIB_7322_IntClear_RcvUrg15IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvUrg14IntClear_LSB 0x2E +#define QIB_7322_IntClear_RcvUrg14IntClear_MSB 0x2E +#define QIB_7322_IntClear_RcvUrg14IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvUrg13IntClear_LSB 0x2D +#define QIB_7322_IntClear_RcvUrg13IntClear_MSB 0x2D +#define QIB_7322_IntClear_RcvUrg13IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvUrg12IntClear_LSB 0x2C +#define QIB_7322_IntClear_RcvUrg12IntClear_MSB 0x2C +#define QIB_7322_IntClear_RcvUrg12IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvUrg11IntClear_LSB 0x2B +#define QIB_7322_IntClear_RcvUrg11IntClear_MSB 0x2B +#define QIB_7322_IntClear_RcvUrg11IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvUrg10IntClear_LSB 0x2A +#define QIB_7322_IntClear_RcvUrg10IntClear_MSB 0x2A +#define QIB_7322_IntClear_RcvUrg10IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvUrg9IntClear_LSB 0x29 +#define QIB_7322_IntClear_RcvUrg9IntClear_MSB 0x29 +#define QIB_7322_IntClear_RcvUrg9IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvUrg8IntClear_LSB 0x28 +#define QIB_7322_IntClear_RcvUrg8IntClear_MSB 0x28 +#define QIB_7322_IntClear_RcvUrg8IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvUrg7IntClear_LSB 0x27 +#define QIB_7322_IntClear_RcvUrg7IntClear_MSB 0x27 +#define QIB_7322_IntClear_RcvUrg7IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvUrg6IntClear_LSB 0x26 +#define QIB_7322_IntClear_RcvUrg6IntClear_MSB 0x26 +#define QIB_7322_IntClear_RcvUrg6IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvUrg5IntClear_LSB 0x25 +#define QIB_7322_IntClear_RcvUrg5IntClear_MSB 0x25 +#define QIB_7322_IntClear_RcvUrg5IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvUrg4IntClear_LSB 0x24 +#define QIB_7322_IntClear_RcvUrg4IntClear_MSB 0x24 +#define QIB_7322_IntClear_RcvUrg4IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvUrg3IntClear_LSB 0x23 +#define QIB_7322_IntClear_RcvUrg3IntClear_MSB 0x23 +#define QIB_7322_IntClear_RcvUrg3IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvUrg2IntClear_LSB 0x22 +#define QIB_7322_IntClear_RcvUrg2IntClear_MSB 0x22 +#define QIB_7322_IntClear_RcvUrg2IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvUrg1IntClear_LSB 0x21 +#define QIB_7322_IntClear_RcvUrg1IntClear_MSB 0x21 +#define QIB_7322_IntClear_RcvUrg1IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvUrg0IntClear_LSB 0x20 +#define QIB_7322_IntClear_RcvUrg0IntClear_MSB 0x20 +#define QIB_7322_IntClear_RcvUrg0IntClear_RMASK 0x1 +#define QIB_7322_IntClear_ErrIntClear_1_LSB 0x1F +#define QIB_7322_IntClear_ErrIntClear_1_MSB 0x1F +#define QIB_7322_IntClear_ErrIntClear_1_RMASK 0x1 +#define QIB_7322_IntClear_ErrIntClear_0_LSB 0x1E +#define QIB_7322_IntClear_ErrIntClear_0_MSB 0x1E +#define QIB_7322_IntClear_ErrIntClear_0_RMASK 0x1 +#define QIB_7322_IntClear_ErrIntClear_LSB 0x1D +#define QIB_7322_IntClear_ErrIntClear_MSB 0x1D +#define QIB_7322_IntClear_ErrIntClear_RMASK 0x1 +#define QIB_7322_IntClear_AssertGPIOIntClear_LSB 0x1C +#define QIB_7322_IntClear_AssertGPIOIntClear_MSB 0x1C +#define QIB_7322_IntClear_AssertGPIOIntClear_RMASK 0x1 +#define QIB_7322_IntClear_SendDoneIntClear_1_LSB 0x19 +#define QIB_7322_IntClear_SendDoneIntClear_1_MSB 0x19 +#define QIB_7322_IntClear_SendDoneIntClear_1_RMASK 0x1 +#define QIB_7322_IntClear_SendDoneIntClear_0_LSB 0x18 +#define QIB_7322_IntClear_SendDoneIntClear_0_MSB 0x18 +#define QIB_7322_IntClear_SendDoneIntClear_0_RMASK 0x1 +#define QIB_7322_IntClear_SendBufAvailIntClear_LSB 0x17 +#define QIB_7322_IntClear_SendBufAvailIntClear_MSB 0x17 +#define QIB_7322_IntClear_SendBufAvailIntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvAvail17IntClear_LSB 0x11 +#define QIB_7322_IntClear_RcvAvail17IntClear_MSB 0x11 +#define QIB_7322_IntClear_RcvAvail17IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvAvail16IntClear_LSB 0x10 +#define QIB_7322_IntClear_RcvAvail16IntClear_MSB 0x10 +#define QIB_7322_IntClear_RcvAvail16IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvAvail15IntClear_LSB 0xF +#define QIB_7322_IntClear_RcvAvail15IntClear_MSB 0xF +#define QIB_7322_IntClear_RcvAvail15IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvAvail14IntClear_LSB 0xE +#define QIB_7322_IntClear_RcvAvail14IntClear_MSB 0xE +#define QIB_7322_IntClear_RcvAvail14IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvAvail13IntClear_LSB 0xD +#define QIB_7322_IntClear_RcvAvail13IntClear_MSB 0xD +#define QIB_7322_IntClear_RcvAvail13IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvAvail12IntClear_LSB 0xC +#define QIB_7322_IntClear_RcvAvail12IntClear_MSB 0xC +#define QIB_7322_IntClear_RcvAvail12IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvAvail11IntClear_LSB 0xB +#define QIB_7322_IntClear_RcvAvail11IntClear_MSB 0xB +#define QIB_7322_IntClear_RcvAvail11IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvAvail10IntClear_LSB 0xA +#define QIB_7322_IntClear_RcvAvail10IntClear_MSB 0xA +#define QIB_7322_IntClear_RcvAvail10IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvAvail9IntClear_LSB 0x9 +#define QIB_7322_IntClear_RcvAvail9IntClear_MSB 0x9 +#define QIB_7322_IntClear_RcvAvail9IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvAvail8IntClear_LSB 0x8 +#define QIB_7322_IntClear_RcvAvail8IntClear_MSB 0x8 +#define QIB_7322_IntClear_RcvAvail8IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvAvail7IntClear_LSB 0x7 +#define QIB_7322_IntClear_RcvAvail7IntClear_MSB 0x7 +#define QIB_7322_IntClear_RcvAvail7IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvAvail6IntClear_LSB 0x6 +#define QIB_7322_IntClear_RcvAvail6IntClear_MSB 0x6 +#define QIB_7322_IntClear_RcvAvail6IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvAvail5IntClear_LSB 0x5 +#define QIB_7322_IntClear_RcvAvail5IntClear_MSB 0x5 +#define QIB_7322_IntClear_RcvAvail5IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvAvail4IntClear_LSB 0x4 +#define QIB_7322_IntClear_RcvAvail4IntClear_MSB 0x4 +#define QIB_7322_IntClear_RcvAvail4IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvAvail3IntClear_LSB 0x3 +#define QIB_7322_IntClear_RcvAvail3IntClear_MSB 0x3 +#define QIB_7322_IntClear_RcvAvail3IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvAvail2IntClear_LSB 0x2 +#define QIB_7322_IntClear_RcvAvail2IntClear_MSB 0x2 +#define QIB_7322_IntClear_RcvAvail2IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvAvail1IntClear_LSB 0x1 +#define QIB_7322_IntClear_RcvAvail1IntClear_MSB 0x1 +#define QIB_7322_IntClear_RcvAvail1IntClear_RMASK 0x1 +#define QIB_7322_IntClear_RcvAvail0IntClear_LSB 0x0 +#define QIB_7322_IntClear_RcvAvail0IntClear_MSB 0x0 +#define QIB_7322_IntClear_RcvAvail0IntClear_RMASK 0x1 + +#define QIB_7322_ErrMask_OFFS 0x80 +#define QIB_7322_ErrMask_DEF 0x0000000000000000 +#define QIB_7322_ErrMask_ResetNegatedMask_LSB 0x3F +#define QIB_7322_ErrMask_ResetNegatedMask_MSB 0x3F +#define QIB_7322_ErrMask_ResetNegatedMask_RMASK 0x1 +#define QIB_7322_ErrMask_HardwareErrMask_LSB 0x3E +#define QIB_7322_ErrMask_HardwareErrMask_MSB 0x3E +#define QIB_7322_ErrMask_HardwareErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_InvalidAddrErrMask_LSB 0x3D +#define QIB_7322_ErrMask_InvalidAddrErrMask_MSB 0x3D +#define QIB_7322_ErrMask_InvalidAddrErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_SDmaVL15ErrMask_LSB 0x38 +#define QIB_7322_ErrMask_SDmaVL15ErrMask_MSB 0x38 +#define QIB_7322_ErrMask_SDmaVL15ErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_SBufVL15MisUseErrMask_LSB 0x37 +#define QIB_7322_ErrMask_SBufVL15MisUseErrMask_MSB 0x37 +#define QIB_7322_ErrMask_SBufVL15MisUseErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_InvalidEEPCmdMask_LSB 0x35 +#define QIB_7322_ErrMask_InvalidEEPCmdMask_MSB 0x35 +#define QIB_7322_ErrMask_InvalidEEPCmdMask_RMASK 0x1 +#define QIB_7322_ErrMask_RcvContextShareErrMask_LSB 0x34 +#define QIB_7322_ErrMask_RcvContextShareErrMask_MSB 0x34 +#define QIB_7322_ErrMask_RcvContextShareErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_SendVLMismatchErrMask_LSB 0x24 +#define QIB_7322_ErrMask_SendVLMismatchErrMask_MSB 0x24 +#define QIB_7322_ErrMask_SendVLMismatchErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_SendArmLaunchErrMask_LSB 0x23 +#define QIB_7322_ErrMask_SendArmLaunchErrMask_MSB 0x23 +#define QIB_7322_ErrMask_SendArmLaunchErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_SendSpecialTriggerErrMask_LSB 0x1B +#define QIB_7322_ErrMask_SendSpecialTriggerErrMask_MSB 0x1B +#define QIB_7322_ErrMask_SendSpecialTriggerErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_SDmaWrongPortErrMask_LSB 0x1A +#define QIB_7322_ErrMask_SDmaWrongPortErrMask_MSB 0x1A +#define QIB_7322_ErrMask_SDmaWrongPortErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_SDmaBufMaskDuplicateErrMask_LSB 0x19 +#define QIB_7322_ErrMask_SDmaBufMaskDuplicateErrMask_MSB 0x19 +#define QIB_7322_ErrMask_SDmaBufMaskDuplicateErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_RcvHdrFullErrMask_LSB 0xD +#define QIB_7322_ErrMask_RcvHdrFullErrMask_MSB 0xD +#define QIB_7322_ErrMask_RcvHdrFullErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_RcvEgrFullErrMask_LSB 0xC +#define QIB_7322_ErrMask_RcvEgrFullErrMask_MSB 0xC +#define QIB_7322_ErrMask_RcvEgrFullErrMask_RMASK 0x1 + +#define QIB_7322_ErrStatus_OFFS 0x88 +#define QIB_7322_ErrStatus_DEF 0x0000000000000000 +#define QIB_7322_ErrStatus_ResetNegated_LSB 0x3F +#define QIB_7322_ErrStatus_ResetNegated_MSB 0x3F +#define QIB_7322_ErrStatus_ResetNegated_RMASK 0x1 +#define QIB_7322_ErrStatus_HardwareErr_LSB 0x3E +#define QIB_7322_ErrStatus_HardwareErr_MSB 0x3E +#define QIB_7322_ErrStatus_HardwareErr_RMASK 0x1 +#define QIB_7322_ErrStatus_InvalidAddrErr_LSB 0x3D +#define QIB_7322_ErrStatus_InvalidAddrErr_MSB 0x3D +#define QIB_7322_ErrStatus_InvalidAddrErr_RMASK 0x1 +#define QIB_7322_ErrStatus_SDmaVL15Err_LSB 0x38 +#define QIB_7322_ErrStatus_SDmaVL15Err_MSB 0x38 +#define QIB_7322_ErrStatus_SDmaVL15Err_RMASK 0x1 +#define QIB_7322_ErrStatus_SBufVL15MisUseErr_LSB 0x37 +#define QIB_7322_ErrStatus_SBufVL15MisUseErr_MSB 0x37 +#define QIB_7322_ErrStatus_SBufVL15MisUseErr_RMASK 0x1 +#define QIB_7322_ErrStatus_InvalidEEPCmdErr_LSB 0x35 +#define QIB_7322_ErrStatus_InvalidEEPCmdErr_MSB 0x35 +#define QIB_7322_ErrStatus_InvalidEEPCmdErr_RMASK 0x1 +#define QIB_7322_ErrStatus_RcvContextShareErr_LSB 0x34 +#define QIB_7322_ErrStatus_RcvContextShareErr_MSB 0x34 +#define QIB_7322_ErrStatus_RcvContextShareErr_RMASK 0x1 +#define QIB_7322_ErrStatus_SendVLMismatchErr_LSB 0x24 +#define QIB_7322_ErrStatus_SendVLMismatchErr_MSB 0x24 +#define QIB_7322_ErrStatus_SendVLMismatchErr_RMASK 0x1 +#define QIB_7322_ErrStatus_SendArmLaunchErr_LSB 0x23 +#define QIB_7322_ErrStatus_SendArmLaunchErr_MSB 0x23 +#define QIB_7322_ErrStatus_SendArmLaunchErr_RMASK 0x1 +#define QIB_7322_ErrStatus_SendSpecialTriggerErr_LSB 0x1B +#define QIB_7322_ErrStatus_SendSpecialTriggerErr_MSB 0x1B +#define QIB_7322_ErrStatus_SendSpecialTriggerErr_RMASK 0x1 +#define QIB_7322_ErrStatus_SDmaWrongPortErr_LSB 0x1A +#define QIB_7322_ErrStatus_SDmaWrongPortErr_MSB 0x1A +#define QIB_7322_ErrStatus_SDmaWrongPortErr_RMASK 0x1 +#define QIB_7322_ErrStatus_SDmaBufMaskDuplicateErr_LSB 0x19 +#define QIB_7322_ErrStatus_SDmaBufMaskDuplicateErr_MSB 0x19 +#define QIB_7322_ErrStatus_SDmaBufMaskDuplicateErr_RMASK 0x1 +#define QIB_7322_ErrStatus_RcvHdrFullErr_LSB 0xD +#define QIB_7322_ErrStatus_RcvHdrFullErr_MSB 0xD +#define QIB_7322_ErrStatus_RcvHdrFullErr_RMASK 0x1 +#define QIB_7322_ErrStatus_RcvEgrFullErr_LSB 0xC +#define QIB_7322_ErrStatus_RcvEgrFullErr_MSB 0xC +#define QIB_7322_ErrStatus_RcvEgrFullErr_RMASK 0x1 + +#define QIB_7322_ErrClear_OFFS 0x90 +#define QIB_7322_ErrClear_DEF 0x0000000000000000 +#define QIB_7322_ErrClear_ResetNegatedClear_LSB 0x3F +#define QIB_7322_ErrClear_ResetNegatedClear_MSB 0x3F +#define QIB_7322_ErrClear_ResetNegatedClear_RMASK 0x1 +#define QIB_7322_ErrClear_HardwareErrClear_LSB 0x3E +#define QIB_7322_ErrClear_HardwareErrClear_MSB 0x3E +#define QIB_7322_ErrClear_HardwareErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_InvalidAddrErrClear_LSB 0x3D +#define QIB_7322_ErrClear_InvalidAddrErrClear_MSB 0x3D +#define QIB_7322_ErrClear_InvalidAddrErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_SDmaVL15ErrClear_LSB 0x38 +#define QIB_7322_ErrClear_SDmaVL15ErrClear_MSB 0x38 +#define QIB_7322_ErrClear_SDmaVL15ErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_SBufVL15MisUseErrClear_LSB 0x37 +#define QIB_7322_ErrClear_SBufVL15MisUseErrClear_MSB 0x37 +#define QIB_7322_ErrClear_SBufVL15MisUseErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_InvalidEEPCmdErrClear_LSB 0x35 +#define QIB_7322_ErrClear_InvalidEEPCmdErrClear_MSB 0x35 +#define QIB_7322_ErrClear_InvalidEEPCmdErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_RcvContextShareErrClear_LSB 0x34 +#define QIB_7322_ErrClear_RcvContextShareErrClear_MSB 0x34 +#define QIB_7322_ErrClear_RcvContextShareErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_SendVLMismatchErrMask_LSB 0x24 +#define QIB_7322_ErrClear_SendVLMismatchErrMask_MSB 0x24 +#define QIB_7322_ErrClear_SendVLMismatchErrMask_RMASK 0x1 +#define QIB_7322_ErrClear_SendArmLaunchErrClear_LSB 0x23 +#define QIB_7322_ErrClear_SendArmLaunchErrClear_MSB 0x23 +#define QIB_7322_ErrClear_SendArmLaunchErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_SendSpecialTriggerErrClear_LSB 0x1B +#define QIB_7322_ErrClear_SendSpecialTriggerErrClear_MSB 0x1B +#define QIB_7322_ErrClear_SendSpecialTriggerErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_SDmaWrongPortErrClear_LSB 0x1A +#define QIB_7322_ErrClear_SDmaWrongPortErrClear_MSB 0x1A +#define QIB_7322_ErrClear_SDmaWrongPortErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_SDmaBufMaskDuplicateErrClear_LSB 0x19 +#define QIB_7322_ErrClear_SDmaBufMaskDuplicateErrClear_MSB 0x19 +#define QIB_7322_ErrClear_SDmaBufMaskDuplicateErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_RcvHdrFullErrClear_LSB 0xD +#define QIB_7322_ErrClear_RcvHdrFullErrClear_MSB 0xD +#define QIB_7322_ErrClear_RcvHdrFullErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_RcvEgrFullErrClear_LSB 0xC +#define QIB_7322_ErrClear_RcvEgrFullErrClear_MSB 0xC +#define QIB_7322_ErrClear_RcvEgrFullErrClear_RMASK 0x1 + +#define QIB_7322_HwErrMask_OFFS 0x98 +#define QIB_7322_HwErrMask_DEF 0x0000000000000000 +#define QIB_7322_HwErrMask_IBSerdesPClkNotDetectMask_1_LSB 0x3F +#define QIB_7322_HwErrMask_IBSerdesPClkNotDetectMask_1_MSB 0x3F +#define QIB_7322_HwErrMask_IBSerdesPClkNotDetectMask_1_RMASK 0x1 +#define QIB_7322_HwErrMask_IBSerdesPClkNotDetectMask_0_LSB 0x3E +#define QIB_7322_HwErrMask_IBSerdesPClkNotDetectMask_0_MSB 0x3E +#define QIB_7322_HwErrMask_IBSerdesPClkNotDetectMask_0_RMASK 0x1 +#define QIB_7322_HwErrMask_PCIESerdesPClkNotDetectMask_LSB 0x37 +#define QIB_7322_HwErrMask_PCIESerdesPClkNotDetectMask_MSB 0x37 +#define QIB_7322_HwErrMask_PCIESerdesPClkNotDetectMask_RMASK 0x1 +#define QIB_7322_HwErrMask_PowerOnBISTFailedMask_LSB 0x36 +#define QIB_7322_HwErrMask_PowerOnBISTFailedMask_MSB 0x36 +#define QIB_7322_HwErrMask_PowerOnBISTFailedMask_RMASK 0x1 +#define QIB_7322_HwErrMask_TempsenseTholdReachedMask_LSB 0x35 +#define QIB_7322_HwErrMask_TempsenseTholdReachedMask_MSB 0x35 +#define QIB_7322_HwErrMask_TempsenseTholdReachedMask_RMASK 0x1 +#define QIB_7322_HwErrMask_MemoryErrMask_LSB 0x30 +#define QIB_7322_HwErrMask_MemoryErrMask_MSB 0x30 +#define QIB_7322_HwErrMask_MemoryErrMask_RMASK 0x1 +#define QIB_7322_HwErrMask_pcie_phy_txParityErr_LSB 0x22 +#define QIB_7322_HwErrMask_pcie_phy_txParityErr_MSB 0x22 +#define QIB_7322_HwErrMask_pcie_phy_txParityErr_RMASK 0x1 +#define QIB_7322_HwErrMask_PCIeBusParityErrMask_LSB 0x1F +#define QIB_7322_HwErrMask_PCIeBusParityErrMask_MSB 0x21 +#define QIB_7322_HwErrMask_PCIeBusParityErrMask_RMASK 0x7 +#define QIB_7322_HwErrMask_PcieCplTimeoutMask_LSB 0x1E +#define QIB_7322_HwErrMask_PcieCplTimeoutMask_MSB 0x1E +#define QIB_7322_HwErrMask_PcieCplTimeoutMask_RMASK 0x1 +#define QIB_7322_HwErrMask_PciePoisonedTLPMask_LSB 0x1D +#define QIB_7322_HwErrMask_PciePoisonedTLPMask_MSB 0x1D +#define QIB_7322_HwErrMask_PciePoisonedTLPMask_RMASK 0x1 +#define QIB_7322_HwErrMask_SDmaMemReadErrMask_1_LSB 0x1C +#define QIB_7322_HwErrMask_SDmaMemReadErrMask_1_MSB 0x1C +#define QIB_7322_HwErrMask_SDmaMemReadErrMask_1_RMASK 0x1 +#define QIB_7322_HwErrMask_SDmaMemReadErrMask_0_LSB 0x1B +#define QIB_7322_HwErrMask_SDmaMemReadErrMask_0_MSB 0x1B +#define QIB_7322_HwErrMask_SDmaMemReadErrMask_0_RMASK 0x1 +#define QIB_7322_HwErrMask_IBCBusFromSPCParityErrMask_1_LSB 0xF +#define QIB_7322_HwErrMask_IBCBusFromSPCParityErrMask_1_MSB 0xF +#define QIB_7322_HwErrMask_IBCBusFromSPCParityErrMask_1_RMASK 0x1 +#define QIB_7322_HwErrMask_IBCBusToSPCParityErrMask_1_LSB 0xE +#define QIB_7322_HwErrMask_IBCBusToSPCParityErrMask_1_MSB 0xE +#define QIB_7322_HwErrMask_IBCBusToSPCParityErrMask_1_RMASK 0x1 +#define QIB_7322_HwErrMask_IBCBusFromSPCParityErrMask_0_LSB 0xD +#define QIB_7322_HwErrMask_IBCBusFromSPCParityErrMask_0_MSB 0xD +#define QIB_7322_HwErrMask_IBCBusFromSPCParityErrMask_0_RMASK 0x1 +#define QIB_7322_HwErrMask_statusValidNoEopMask_LSB 0xC +#define QIB_7322_HwErrMask_statusValidNoEopMask_MSB 0xC +#define QIB_7322_HwErrMask_statusValidNoEopMask_RMASK 0x1 +#define QIB_7322_HwErrMask_LATriggeredMask_LSB 0xB +#define QIB_7322_HwErrMask_LATriggeredMask_MSB 0xB +#define QIB_7322_HwErrMask_LATriggeredMask_RMASK 0x1 + +#define QIB_7322_HwErrStatus_OFFS 0xA0 +#define QIB_7322_HwErrStatus_DEF 0x0000000000000000 +#define QIB_7322_HwErrStatus_IBSerdesPClkNotDetect_1_LSB 0x3F +#define QIB_7322_HwErrStatus_IBSerdesPClkNotDetect_1_MSB 0x3F +#define QIB_7322_HwErrStatus_IBSerdesPClkNotDetect_1_RMASK 0x1 +#define QIB_7322_HwErrStatus_IBSerdesPClkNotDetect_0_LSB 0x3E +#define QIB_7322_HwErrStatus_IBSerdesPClkNotDetect_0_MSB 0x3E +#define QIB_7322_HwErrStatus_IBSerdesPClkNotDetect_0_RMASK 0x1 +#define QIB_7322_HwErrStatus_PCIESerdesPClkNotDetect_LSB 0x37 +#define QIB_7322_HwErrStatus_PCIESerdesPClkNotDetect_MSB 0x37 +#define QIB_7322_HwErrStatus_PCIESerdesPClkNotDetect_RMASK 0x1 +#define QIB_7322_HwErrStatus_PowerOnBISTFailed_LSB 0x36 +#define QIB_7322_HwErrStatus_PowerOnBISTFailed_MSB 0x36 +#define QIB_7322_HwErrStatus_PowerOnBISTFailed_RMASK 0x1 +#define QIB_7322_HwErrStatus_TempsenseTholdReached_LSB 0x35 +#define QIB_7322_HwErrStatus_TempsenseTholdReached_MSB 0x35 +#define QIB_7322_HwErrStatus_TempsenseTholdReached_RMASK 0x1 +#define QIB_7322_HwErrStatus_MemoryErr_LSB 0x30 +#define QIB_7322_HwErrStatus_MemoryErr_MSB 0x30 +#define QIB_7322_HwErrStatus_MemoryErr_RMASK 0x1 +#define QIB_7322_HwErrStatus_pcie_phy_txParityErr_LSB 0x22 +#define QIB_7322_HwErrStatus_pcie_phy_txParityErr_MSB 0x22 +#define QIB_7322_HwErrStatus_pcie_phy_txParityErr_RMASK 0x1 +#define QIB_7322_HwErrStatus_PCIeBusParity_LSB 0x1F +#define QIB_7322_HwErrStatus_PCIeBusParity_MSB 0x21 +#define QIB_7322_HwErrStatus_PCIeBusParity_RMASK 0x7 +#define QIB_7322_HwErrStatus_PcieCplTimeout_LSB 0x1E +#define QIB_7322_HwErrStatus_PcieCplTimeout_MSB 0x1E +#define QIB_7322_HwErrStatus_PcieCplTimeout_RMASK 0x1 +#define QIB_7322_HwErrStatus_PciePoisonedTLP_LSB 0x1D +#define QIB_7322_HwErrStatus_PciePoisonedTLP_MSB 0x1D +#define QIB_7322_HwErrStatus_PciePoisonedTLP_RMASK 0x1 +#define QIB_7322_HwErrStatus_SDmaMemReadErr_1_LSB 0x1C +#define QIB_7322_HwErrStatus_SDmaMemReadErr_1_MSB 0x1C +#define QIB_7322_HwErrStatus_SDmaMemReadErr_1_RMASK 0x1 +#define QIB_7322_HwErrStatus_SDmaMemReadErr_0_LSB 0x1B +#define QIB_7322_HwErrStatus_SDmaMemReadErr_0_MSB 0x1B +#define QIB_7322_HwErrStatus_SDmaMemReadErr_0_RMASK 0x1 +#define QIB_7322_HwErrStatus_IBCBusFromSPCParityErr_1_LSB 0xF +#define QIB_7322_HwErrStatus_IBCBusFromSPCParityErr_1_MSB 0xF +#define QIB_7322_HwErrStatus_IBCBusFromSPCParityErr_1_RMASK 0x1 +#define QIB_7322_HwErrStatus_IBCBusToSPCParityErr_1_LSB 0xE +#define QIB_7322_HwErrStatus_IBCBusToSPCParityErr_1_MSB 0xE +#define QIB_7322_HwErrStatus_IBCBusToSPCParityErr_1_RMASK 0x1 +#define QIB_7322_HwErrStatus_IBCBusFromSPCParityErr_0_LSB 0xD +#define QIB_7322_HwErrStatus_IBCBusFromSPCParityErr_0_MSB 0xD +#define QIB_7322_HwErrStatus_IBCBusFromSPCParityErr_0_RMASK 0x1 +#define QIB_7322_HwErrStatus_statusValidNoEop_LSB 0xC +#define QIB_7322_HwErrStatus_statusValidNoEop_MSB 0xC +#define QIB_7322_HwErrStatus_statusValidNoEop_RMASK 0x1 +#define QIB_7322_HwErrStatus_LATriggered_LSB 0xB +#define QIB_7322_HwErrStatus_LATriggered_MSB 0xB +#define QIB_7322_HwErrStatus_LATriggered_RMASK 0x1 + +#define QIB_7322_HwErrClear_OFFS 0xA8 +#define QIB_7322_HwErrClear_DEF 0x0000000000000000 +#define QIB_7322_HwErrClear_IBSerdesPClkNotDetectClear_1_LSB 0x3F +#define QIB_7322_HwErrClear_IBSerdesPClkNotDetectClear_1_MSB 0x3F +#define QIB_7322_HwErrClear_IBSerdesPClkNotDetectClear_1_RMASK 0x1 +#define QIB_7322_HwErrClear_IBSerdesPClkNotDetectClear_0_LSB 0x3E +#define QIB_7322_HwErrClear_IBSerdesPClkNotDetectClear_0_MSB 0x3E +#define QIB_7322_HwErrClear_IBSerdesPClkNotDetectClear_0_RMASK 0x1 +#define QIB_7322_HwErrClear_PCIESerdesPClkNotDetectClear_LSB 0x37 +#define QIB_7322_HwErrClear_PCIESerdesPClkNotDetectClear_MSB 0x37 +#define QIB_7322_HwErrClear_PCIESerdesPClkNotDetectClear_RMASK 0x1 +#define QIB_7322_HwErrClear_PowerOnBISTFailedClear_LSB 0x36 +#define QIB_7322_HwErrClear_PowerOnBISTFailedClear_MSB 0x36 +#define QIB_7322_HwErrClear_PowerOnBISTFailedClear_RMASK 0x1 +#define QIB_7322_HwErrClear_TempsenseTholdReachedClear_LSB 0x35 +#define QIB_7322_HwErrClear_TempsenseTholdReachedClear_MSB 0x35 +#define QIB_7322_HwErrClear_TempsenseTholdReachedClear_RMASK 0x1 +#define QIB_7322_HwErrClear_MemoryErrClear_LSB 0x30 +#define QIB_7322_HwErrClear_MemoryErrClear_MSB 0x30 +#define QIB_7322_HwErrClear_MemoryErrClear_RMASK 0x1 +#define QIB_7322_HwErrClear_pcie_phy_txParityErr_LSB 0x22 +#define QIB_7322_HwErrClear_pcie_phy_txParityErr_MSB 0x22 +#define QIB_7322_HwErrClear_pcie_phy_txParityErr_RMASK 0x1 +#define QIB_7322_HwErrClear_PCIeBusParityClear_LSB 0x1F +#define QIB_7322_HwErrClear_PCIeBusParityClear_MSB 0x21 +#define QIB_7322_HwErrClear_PCIeBusParityClear_RMASK 0x7 +#define QIB_7322_HwErrClear_PcieCplTimeoutClear_LSB 0x1E +#define QIB_7322_HwErrClear_PcieCplTimeoutClear_MSB 0x1E +#define QIB_7322_HwErrClear_PcieCplTimeoutClear_RMASK 0x1 +#define QIB_7322_HwErrClear_PciePoisonedTLPClear_LSB 0x1D +#define QIB_7322_HwErrClear_PciePoisonedTLPClear_MSB 0x1D +#define QIB_7322_HwErrClear_PciePoisonedTLPClear_RMASK 0x1 +#define QIB_7322_HwErrClear_SDmaMemReadErrClear_1_LSB 0x1C +#define QIB_7322_HwErrClear_SDmaMemReadErrClear_1_MSB 0x1C +#define QIB_7322_HwErrClear_SDmaMemReadErrClear_1_RMASK 0x1 +#define QIB_7322_HwErrClear_SDmaMemReadErrClear_0_LSB 0x1B +#define QIB_7322_HwErrClear_SDmaMemReadErrClear_0_MSB 0x1B +#define QIB_7322_HwErrClear_SDmaMemReadErrClear_0_RMASK 0x1 +#define QIB_7322_HwErrClear_IBCBusFromSPCParityErrClear_1_LSB 0xF +#define QIB_7322_HwErrClear_IBCBusFromSPCParityErrClear_1_MSB 0xF +#define QIB_7322_HwErrClear_IBCBusFromSPCParityErrClear_1_RMASK 0x1 +#define QIB_7322_HwErrClear_IBCBusToSPCParityErrClear_1_LSB 0xE +#define QIB_7322_HwErrClear_IBCBusToSPCParityErrClear_1_MSB 0xE +#define QIB_7322_HwErrClear_IBCBusToSPCParityErrClear_1_RMASK 0x1 +#define QIB_7322_HwErrClear_IBCBusFromSPCParityErrClear_0_LSB 0xD +#define QIB_7322_HwErrClear_IBCBusFromSPCParityErrClear_0_MSB 0xD +#define QIB_7322_HwErrClear_IBCBusFromSPCParityErrClear_0_RMASK 0x1 +#define QIB_7322_HwErrClear_statusValidNoEopClear_LSB 0xC +#define QIB_7322_HwErrClear_statusValidNoEopClear_MSB 0xC +#define QIB_7322_HwErrClear_statusValidNoEopClear_RMASK 0x1 +#define QIB_7322_HwErrClear_LATriggeredClear_LSB 0xB +#define QIB_7322_HwErrClear_LATriggeredClear_MSB 0xB +#define QIB_7322_HwErrClear_LATriggeredClear_RMASK 0x1 + +#define QIB_7322_HwDiagCtrl_OFFS 0xB0 +#define QIB_7322_HwDiagCtrl_DEF 0x0000000000000000 +#define QIB_7322_HwDiagCtrl_Diagnostic_LSB 0x3F +#define QIB_7322_HwDiagCtrl_Diagnostic_MSB 0x3F +#define QIB_7322_HwDiagCtrl_Diagnostic_RMASK 0x1 +#define QIB_7322_HwDiagCtrl_CounterWrEnable_LSB 0x3D +#define QIB_7322_HwDiagCtrl_CounterWrEnable_MSB 0x3D +#define QIB_7322_HwDiagCtrl_CounterWrEnable_RMASK 0x1 +#define QIB_7322_HwDiagCtrl_CounterDisable_LSB 0x3C +#define QIB_7322_HwDiagCtrl_CounterDisable_MSB 0x3C +#define QIB_7322_HwDiagCtrl_CounterDisable_RMASK 0x1 +#define QIB_7322_HwDiagCtrl_forcePCIeBusParity_LSB 0x1F +#define QIB_7322_HwDiagCtrl_forcePCIeBusParity_MSB 0x22 +#define QIB_7322_HwDiagCtrl_forcePCIeBusParity_RMASK 0xF +#define QIB_7322_HwDiagCtrl_ForceIBCBusFromSPCParityErr_1_LSB 0xF +#define QIB_7322_HwDiagCtrl_ForceIBCBusFromSPCParityErr_1_MSB 0xF +#define QIB_7322_HwDiagCtrl_ForceIBCBusFromSPCParityErr_1_RMASK 0x1 +#define QIB_7322_HwDiagCtrl_ForceIBCBusToSPCParityErr_1_LSB 0xE +#define QIB_7322_HwDiagCtrl_ForceIBCBusToSPCParityErr_1_MSB 0xE +#define QIB_7322_HwDiagCtrl_ForceIBCBusToSPCParityErr_1_RMASK 0x1 +#define QIB_7322_HwDiagCtrl_ForceIBCBusFromSPCParityErr_0_LSB 0xD +#define QIB_7322_HwDiagCtrl_ForceIBCBusFromSPCParityErr_0_MSB 0xD +#define QIB_7322_HwDiagCtrl_ForceIBCBusFromSPCParityErr_0_RMASK 0x1 +#define QIB_7322_HwDiagCtrl_ForceIBCBusToSPCParityErr_0_LSB 0xC +#define QIB_7322_HwDiagCtrl_ForceIBCBusToSPCParityErr_0_MSB 0xC +#define QIB_7322_HwDiagCtrl_ForceIBCBusToSPCParityErr_0_RMASK 0x1 + +#define QIB_7322_EXTStatus_OFFS 0xC0 +#define QIB_7322_EXTStatus_DEF 0x000000000000X000 +#define QIB_7322_EXTStatus_GPIOIn_LSB 0x30 +#define QIB_7322_EXTStatus_GPIOIn_MSB 0x3F +#define QIB_7322_EXTStatus_GPIOIn_RMASK 0xFFFF +#define QIB_7322_EXTStatus_MemBISTDisabled_LSB 0xF +#define QIB_7322_EXTStatus_MemBISTDisabled_MSB 0xF +#define QIB_7322_EXTStatus_MemBISTDisabled_RMASK 0x1 +#define QIB_7322_EXTStatus_MemBISTEndTest_LSB 0xE +#define QIB_7322_EXTStatus_MemBISTEndTest_MSB 0xE +#define QIB_7322_EXTStatus_MemBISTEndTest_RMASK 0x1 + +#define QIB_7322_EXTCtrl_OFFS 0xC8 +#define QIB_7322_EXTCtrl_DEF 0x0000000000000000 +#define QIB_7322_EXTCtrl_GPIOOe_LSB 0x30 +#define QIB_7322_EXTCtrl_GPIOOe_MSB 0x3F +#define QIB_7322_EXTCtrl_GPIOOe_RMASK 0xFFFF +#define QIB_7322_EXTCtrl_GPIOInvert_LSB 0x20 +#define QIB_7322_EXTCtrl_GPIOInvert_MSB 0x2F +#define QIB_7322_EXTCtrl_GPIOInvert_RMASK 0xFFFF +#define QIB_7322_EXTCtrl_LEDPort1GreenOn_LSB 0x3 +#define QIB_7322_EXTCtrl_LEDPort1GreenOn_MSB 0x3 +#define QIB_7322_EXTCtrl_LEDPort1GreenOn_RMASK 0x1 +#define QIB_7322_EXTCtrl_LEDPort1YellowOn_LSB 0x2 +#define QIB_7322_EXTCtrl_LEDPort1YellowOn_MSB 0x2 +#define QIB_7322_EXTCtrl_LEDPort1YellowOn_RMASK 0x1 +#define QIB_7322_EXTCtrl_LEDPort0GreenOn_LSB 0x1 +#define QIB_7322_EXTCtrl_LEDPort0GreenOn_MSB 0x1 +#define QIB_7322_EXTCtrl_LEDPort0GreenOn_RMASK 0x1 +#define QIB_7322_EXTCtrl_LEDPort0YellowOn_LSB 0x0 +#define QIB_7322_EXTCtrl_LEDPort0YellowOn_MSB 0x0 +#define QIB_7322_EXTCtrl_LEDPort0YellowOn_RMASK 0x1 + +#define QIB_7322_GPIOOut_OFFS 0xE0 +#define QIB_7322_GPIOOut_DEF 0x0000000000000000 + +#define QIB_7322_GPIOMask_OFFS 0xE8 +#define QIB_7322_GPIOMask_DEF 0x0000000000000000 + +#define QIB_7322_GPIOStatus_OFFS 0xF0 +#define QIB_7322_GPIOStatus_DEF 0x0000000000000000 + +#define QIB_7322_GPIOClear_OFFS 0xF8 +#define QIB_7322_GPIOClear_DEF 0x0000000000000000 + +#define QIB_7322_RcvCtrl_OFFS 0x100 +#define QIB_7322_RcvCtrl_DEF 0x0000000000000000 +#define QIB_7322_RcvCtrl_TidReDirect_LSB 0x30 +#define QIB_7322_RcvCtrl_TidReDirect_MSB 0x3F +#define QIB_7322_RcvCtrl_TidReDirect_RMASK 0xFFFF +#define QIB_7322_RcvCtrl_TailUpd_LSB 0x2F +#define QIB_7322_RcvCtrl_TailUpd_MSB 0x2F +#define QIB_7322_RcvCtrl_TailUpd_RMASK 0x1 +#define QIB_7322_RcvCtrl_XrcTypeCode_LSB 0x2C +#define QIB_7322_RcvCtrl_XrcTypeCode_MSB 0x2E +#define QIB_7322_RcvCtrl_XrcTypeCode_RMASK 0x7 +#define QIB_7322_RcvCtrl_TidFlowEnable_LSB 0x2B +#define QIB_7322_RcvCtrl_TidFlowEnable_MSB 0x2B +#define QIB_7322_RcvCtrl_TidFlowEnable_RMASK 0x1 +#define QIB_7322_RcvCtrl_ContextCfg_LSB 0x29 +#define QIB_7322_RcvCtrl_ContextCfg_MSB 0x2A +#define QIB_7322_RcvCtrl_ContextCfg_RMASK 0x3 +#define QIB_7322_RcvCtrl_IntrAvail_LSB 0x14 +#define QIB_7322_RcvCtrl_IntrAvail_MSB 0x25 +#define QIB_7322_RcvCtrl_IntrAvail_RMASK 0x3FFFF +#define QIB_7322_RcvCtrl_dontDropRHQFull_LSB 0x0 +#define QIB_7322_RcvCtrl_dontDropRHQFull_MSB 0x11 +#define QIB_7322_RcvCtrl_dontDropRHQFull_RMASK 0x3FFFF + +#define QIB_7322_RcvHdrSize_OFFS 0x110 +#define QIB_7322_RcvHdrSize_DEF 0x0000000000000000 + +#define QIB_7322_RcvHdrCnt_OFFS 0x118 +#define QIB_7322_RcvHdrCnt_DEF 0x0000000000000000 + +#define QIB_7322_RcvHdrEntSize_OFFS 0x120 +#define QIB_7322_RcvHdrEntSize_DEF 0x0000000000000000 + +#define QIB_7322_RcvTIDBase_OFFS 0x128 +#define QIB_7322_RcvTIDBase_DEF 0x0000000000050000 + +#define QIB_7322_RcvTIDCnt_OFFS 0x130 +#define QIB_7322_RcvTIDCnt_DEF 0x0000000000000200 + +#define QIB_7322_RcvEgrBase_OFFS 0x138 +#define QIB_7322_RcvEgrBase_DEF 0x0000000000014000 + +#define QIB_7322_RcvEgrCnt_OFFS 0x140 +#define QIB_7322_RcvEgrCnt_DEF 0x0000000000001000 + +#define QIB_7322_RcvBufBase_OFFS 0x148 +#define QIB_7322_RcvBufBase_DEF 0x0000000000080000 + +#define QIB_7322_RcvBufSize_OFFS 0x150 +#define QIB_7322_RcvBufSize_DEF 0x0000000000005000 + +#define QIB_7322_RxIntMemBase_OFFS 0x158 +#define QIB_7322_RxIntMemBase_DEF 0x0000000000077000 + +#define QIB_7322_RxIntMemSize_OFFS 0x160 +#define QIB_7322_RxIntMemSize_DEF 0x0000000000007000 + +#define QIB_7322_feature_mask_OFFS 0x190 +#define QIB_7322_feature_mask_DEF 0x00000000000000XX + +#define QIB_7322_active_feature_mask_OFFS 0x198 +#define QIB_7322_active_feature_mask_DEF 0x00000000000000XX +#define QIB_7322_active_feature_mask_Port1_QDR_Enabled_LSB 0x5 +#define QIB_7322_active_feature_mask_Port1_QDR_Enabled_MSB 0x5 +#define QIB_7322_active_feature_mask_Port1_QDR_Enabled_RMASK 0x1 +#define QIB_7322_active_feature_mask_Port1_DDR_Enabled_LSB 0x4 +#define QIB_7322_active_feature_mask_Port1_DDR_Enabled_MSB 0x4 +#define QIB_7322_active_feature_mask_Port1_DDR_Enabled_RMASK 0x1 +#define QIB_7322_active_feature_mask_Port1_SDR_Enabled_LSB 0x3 +#define QIB_7322_active_feature_mask_Port1_SDR_Enabled_MSB 0x3 +#define QIB_7322_active_feature_mask_Port1_SDR_Enabled_RMASK 0x1 +#define QIB_7322_active_feature_mask_Port0_QDR_Enabled_LSB 0x2 +#define QIB_7322_active_feature_mask_Port0_QDR_Enabled_MSB 0x2 +#define QIB_7322_active_feature_mask_Port0_QDR_Enabled_RMASK 0x1 +#define QIB_7322_active_feature_mask_Port0_DDR_Enabled_LSB 0x1 +#define QIB_7322_active_feature_mask_Port0_DDR_Enabled_MSB 0x1 +#define QIB_7322_active_feature_mask_Port0_DDR_Enabled_RMASK 0x1 +#define QIB_7322_active_feature_mask_Port0_SDR_Enabled_LSB 0x0 +#define QIB_7322_active_feature_mask_Port0_SDR_Enabled_MSB 0x0 +#define QIB_7322_active_feature_mask_Port0_SDR_Enabled_RMASK 0x1 + +#define QIB_7322_SendCtrl_OFFS 0x1C0 +#define QIB_7322_SendCtrl_DEF 0x0000000000000000 +#define QIB_7322_SendCtrl_Disarm_LSB 0x1F +#define QIB_7322_SendCtrl_Disarm_MSB 0x1F +#define QIB_7322_SendCtrl_Disarm_RMASK 0x1 +#define QIB_7322_SendCtrl_SendBufAvailPad64Byte_LSB 0x1D +#define QIB_7322_SendCtrl_SendBufAvailPad64Byte_MSB 0x1D +#define QIB_7322_SendCtrl_SendBufAvailPad64Byte_RMASK 0x1 +#define QIB_7322_SendCtrl_AvailUpdThld_LSB 0x18 +#define QIB_7322_SendCtrl_AvailUpdThld_MSB 0x1C +#define QIB_7322_SendCtrl_AvailUpdThld_RMASK 0x1F +#define QIB_7322_SendCtrl_DisarmSendBuf_LSB 0x10 +#define QIB_7322_SendCtrl_DisarmSendBuf_MSB 0x17 +#define QIB_7322_SendCtrl_DisarmSendBuf_RMASK 0xFF +#define QIB_7322_SendCtrl_SpecialTriggerEn_LSB 0x4 +#define QIB_7322_SendCtrl_SpecialTriggerEn_MSB 0x4 +#define QIB_7322_SendCtrl_SpecialTriggerEn_RMASK 0x1 +#define QIB_7322_SendCtrl_SendBufAvailUpd_LSB 0x2 +#define QIB_7322_SendCtrl_SendBufAvailUpd_MSB 0x2 +#define QIB_7322_SendCtrl_SendBufAvailUpd_RMASK 0x1 +#define QIB_7322_SendCtrl_SendIntBufAvail_LSB 0x1 +#define QIB_7322_SendCtrl_SendIntBufAvail_MSB 0x1 +#define QIB_7322_SendCtrl_SendIntBufAvail_RMASK 0x1 + +#define QIB_7322_SendBufBase_OFFS 0x1C8 +#define QIB_7322_SendBufBase_DEF 0x0018000000100000 +#define QIB_7322_SendBufBase_BaseAddr_LargePIO_LSB 0x20 +#define QIB_7322_SendBufBase_BaseAddr_LargePIO_MSB 0x34 +#define QIB_7322_SendBufBase_BaseAddr_LargePIO_RMASK 0x1FFFFF +#define QIB_7322_SendBufBase_BaseAddr_SmallPIO_LSB 0x0 +#define QIB_7322_SendBufBase_BaseAddr_SmallPIO_MSB 0x14 +#define QIB_7322_SendBufBase_BaseAddr_SmallPIO_RMASK 0x1FFFFF + +#define QIB_7322_SendBufSize_OFFS 0x1D0 +#define QIB_7322_SendBufSize_DEF 0x0000108000000880 +#define QIB_7322_SendBufSize_Size_LargePIO_LSB 0x20 +#define QIB_7322_SendBufSize_Size_LargePIO_MSB 0x2C +#define QIB_7322_SendBufSize_Size_LargePIO_RMASK 0x1FFF +#define QIB_7322_SendBufSize_Size_SmallPIO_LSB 0x0 +#define QIB_7322_SendBufSize_Size_SmallPIO_MSB 0xB +#define QIB_7322_SendBufSize_Size_SmallPIO_RMASK 0xFFF + +#define QIB_7322_SendBufCnt_OFFS 0x1D8 +#define QIB_7322_SendBufCnt_DEF 0x0000002000000080 +#define QIB_7322_SendBufCnt_Num_LargeBuffers_LSB 0x20 +#define QIB_7322_SendBufCnt_Num_LargeBuffers_MSB 0x25 +#define QIB_7322_SendBufCnt_Num_LargeBuffers_RMASK 0x3F +#define QIB_7322_SendBufCnt_Num_SmallBuffers_LSB 0x0 +#define QIB_7322_SendBufCnt_Num_SmallBuffers_MSB 0x8 +#define QIB_7322_SendBufCnt_Num_SmallBuffers_RMASK 0x1FF + +#define QIB_7322_SendBufAvailAddr_OFFS 0x1E0 +#define QIB_7322_SendBufAvailAddr_DEF 0x0000000000000000 +#define QIB_7322_SendBufAvailAddr_SendBufAvailAddr_LSB 0x6 +#define QIB_7322_SendBufAvailAddr_SendBufAvailAddr_MSB 0x27 +#define QIB_7322_SendBufAvailAddr_SendBufAvailAddr_RMASK 0x3FFFFFFFF + +#define QIB_7322_SendBufErr0_OFFS 0x240 +#define QIB_7322_SendBufErr0_DEF 0x0000000000000000 +#define QIB_7322_SendBufErr0_SendBufErr_63_0_LSB 0x0 +#define QIB_7322_SendBufErr0_SendBufErr_63_0_MSB 0x3F +#define QIB_7322_SendBufErr0_SendBufErr_63_0_RMASK 0x0 + +#define QIB_7322_AvailUpdCount_OFFS 0x268 +#define QIB_7322_AvailUpdCount_DEF 0x0000000000000000 +#define QIB_7322_AvailUpdCount_AvailUpdCount_LSB 0x0 +#define QIB_7322_AvailUpdCount_AvailUpdCount_MSB 0x4 +#define QIB_7322_AvailUpdCount_AvailUpdCount_RMASK 0x1F + +#define QIB_7322_RcvHdrAddr0_OFFS 0x280 +#define QIB_7322_RcvHdrAddr0_DEF 0x0000000000000000 +#define QIB_7322_RcvHdrAddr0_RcvHdrAddr_LSB 0x2 +#define QIB_7322_RcvHdrAddr0_RcvHdrAddr_MSB 0x27 +#define QIB_7322_RcvHdrAddr0_RcvHdrAddr_RMASK 0x3FFFFFFFFF + +#define QIB_7322_RcvHdrTailAddr0_OFFS 0x340 +#define QIB_7322_RcvHdrTailAddr0_DEF 0x0000000000000000 +#define QIB_7322_RcvHdrTailAddr0_RcvHdrTailAddr_LSB 0x2 +#define QIB_7322_RcvHdrTailAddr0_RcvHdrTailAddr_MSB 0x27 +#define QIB_7322_RcvHdrTailAddr0_RcvHdrTailAddr_RMASK 0x3FFFFFFFFF + +#define QIB_7322_ahb_access_ctrl_OFFS 0x460 +#define QIB_7322_ahb_access_ctrl_DEF 0x0000000000000000 +#define QIB_7322_ahb_access_ctrl_sw_sel_ahb_trgt_LSB 0x1 +#define QIB_7322_ahb_access_ctrl_sw_sel_ahb_trgt_MSB 0x2 +#define QIB_7322_ahb_access_ctrl_sw_sel_ahb_trgt_RMASK 0x3 +#define QIB_7322_ahb_access_ctrl_sw_ahb_sel_LSB 0x0 +#define QIB_7322_ahb_access_ctrl_sw_ahb_sel_MSB 0x0 +#define QIB_7322_ahb_access_ctrl_sw_ahb_sel_RMASK 0x1 + +#define QIB_7322_ahb_transaction_reg_OFFS 0x468 +#define QIB_7322_ahb_transaction_reg_DEF 0x0000000080000000 +#define QIB_7322_ahb_transaction_reg_ahb_data_LSB 0x20 +#define QIB_7322_ahb_transaction_reg_ahb_data_MSB 0x3F +#define QIB_7322_ahb_transaction_reg_ahb_data_RMASK 0xFFFFFFFF +#define QIB_7322_ahb_transaction_reg_ahb_rdy_LSB 0x1F +#define QIB_7322_ahb_transaction_reg_ahb_rdy_MSB 0x1F +#define QIB_7322_ahb_transaction_reg_ahb_rdy_RMASK 0x1 +#define QIB_7322_ahb_transaction_reg_ahb_req_err_LSB 0x1E +#define QIB_7322_ahb_transaction_reg_ahb_req_err_MSB 0x1E +#define QIB_7322_ahb_transaction_reg_ahb_req_err_RMASK 0x1 +#define QIB_7322_ahb_transaction_reg_write_not_read_LSB 0x1B +#define QIB_7322_ahb_transaction_reg_write_not_read_MSB 0x1B +#define QIB_7322_ahb_transaction_reg_write_not_read_RMASK 0x1 +#define QIB_7322_ahb_transaction_reg_ahb_address_LSB 0x10 +#define QIB_7322_ahb_transaction_reg_ahb_address_MSB 0x1A +#define QIB_7322_ahb_transaction_reg_ahb_address_RMASK 0x7FF + +#define QIB_7322_SPC_JTAG_ACCESS_REG_OFFS 0x470 +#define QIB_7322_SPC_JTAG_ACCESS_REG_DEF 0x0000000000000001 +#define QIB_7322_SPC_JTAG_ACCESS_REG_SPC_JTAG_ACCESS_EN_LSB 0xA +#define QIB_7322_SPC_JTAG_ACCESS_REG_SPC_JTAG_ACCESS_EN_MSB 0xA +#define QIB_7322_SPC_JTAG_ACCESS_REG_SPC_JTAG_ACCESS_EN_RMASK 0x1 +#define QIB_7322_SPC_JTAG_ACCESS_REG_bist_en_LSB 0x5 +#define QIB_7322_SPC_JTAG_ACCESS_REG_bist_en_MSB 0x9 +#define QIB_7322_SPC_JTAG_ACCESS_REG_bist_en_RMASK 0x1F +#define QIB_7322_SPC_JTAG_ACCESS_REG_opcode_LSB 0x3 +#define QIB_7322_SPC_JTAG_ACCESS_REG_opcode_MSB 0x4 +#define QIB_7322_SPC_JTAG_ACCESS_REG_opcode_RMASK 0x3 +#define QIB_7322_SPC_JTAG_ACCESS_REG_tdi_LSB 0x2 +#define QIB_7322_SPC_JTAG_ACCESS_REG_tdi_MSB 0x2 +#define QIB_7322_SPC_JTAG_ACCESS_REG_tdi_RMASK 0x1 +#define QIB_7322_SPC_JTAG_ACCESS_REG_tdo_LSB 0x1 +#define QIB_7322_SPC_JTAG_ACCESS_REG_tdo_MSB 0x1 +#define QIB_7322_SPC_JTAG_ACCESS_REG_tdo_RMASK 0x1 +#define QIB_7322_SPC_JTAG_ACCESS_REG_rdy_LSB 0x0 +#define QIB_7322_SPC_JTAG_ACCESS_REG_rdy_MSB 0x0 +#define QIB_7322_SPC_JTAG_ACCESS_REG_rdy_RMASK 0x1 + +#define QIB_7322_SendCheckMask0_OFFS 0x4C0 +#define QIB_7322_SendCheckMask0_DEF 0x0000000000000000 +#define QIB_7322_SendCheckMask0_SendCheckMask_63_32_LSB 0x0 +#define QIB_7322_SendCheckMask0_SendCheckMask_63_32_MSB 0x3F +#define QIB_7322_SendCheckMask0_SendCheckMask_63_32_RMASK 0x0 + +#define QIB_7322_SendGRHCheckMask0_OFFS 0x4E0 +#define QIB_7322_SendGRHCheckMask0_DEF 0x0000000000000000 +#define QIB_7322_SendGRHCheckMask0_SendGRHCheckMask_63_32_LSB 0x0 +#define QIB_7322_SendGRHCheckMask0_SendGRHCheckMask_63_32_MSB 0x3F +#define QIB_7322_SendGRHCheckMask0_SendGRHCheckMask_63_32_RMASK 0x0 + +#define QIB_7322_SendIBPacketMask0_OFFS 0x500 +#define QIB_7322_SendIBPacketMask0_DEF 0x0000000000000000 +#define QIB_7322_SendIBPacketMask0_SendIBPacketMask_63_32_LSB 0x0 +#define QIB_7322_SendIBPacketMask0_SendIBPacketMask_63_32_MSB 0x3F +#define QIB_7322_SendIBPacketMask0_SendIBPacketMask_63_32_RMASK 0x0 + +#define QIB_7322_IntRedirect0_OFFS 0x540 +#define QIB_7322_IntRedirect0_DEF 0x0000000000000000 +#define QIB_7322_IntRedirect0_vec11_LSB 0x37 +#define QIB_7322_IntRedirect0_vec11_MSB 0x3B +#define QIB_7322_IntRedirect0_vec11_RMASK 0x1F +#define QIB_7322_IntRedirect0_vec10_LSB 0x32 +#define QIB_7322_IntRedirect0_vec10_MSB 0x36 +#define QIB_7322_IntRedirect0_vec10_RMASK 0x1F +#define QIB_7322_IntRedirect0_vec9_LSB 0x2D +#define QIB_7322_IntRedirect0_vec9_MSB 0x31 +#define QIB_7322_IntRedirect0_vec9_RMASK 0x1F +#define QIB_7322_IntRedirect0_vec8_LSB 0x28 +#define QIB_7322_IntRedirect0_vec8_MSB 0x2C +#define QIB_7322_IntRedirect0_vec8_RMASK 0x1F +#define QIB_7322_IntRedirect0_vec7_LSB 0x23 +#define QIB_7322_IntRedirect0_vec7_MSB 0x27 +#define QIB_7322_IntRedirect0_vec7_RMASK 0x1F +#define QIB_7322_IntRedirect0_vec6_LSB 0x1E +#define QIB_7322_IntRedirect0_vec6_MSB 0x22 +#define QIB_7322_IntRedirect0_vec6_RMASK 0x1F +#define QIB_7322_IntRedirect0_vec5_LSB 0x19 +#define QIB_7322_IntRedirect0_vec5_MSB 0x1D +#define QIB_7322_IntRedirect0_vec5_RMASK 0x1F +#define QIB_7322_IntRedirect0_vec4_LSB 0x14 +#define QIB_7322_IntRedirect0_vec4_MSB 0x18 +#define QIB_7322_IntRedirect0_vec4_RMASK 0x1F +#define QIB_7322_IntRedirect0_vec3_LSB 0xF +#define QIB_7322_IntRedirect0_vec3_MSB 0x13 +#define QIB_7322_IntRedirect0_vec3_RMASK 0x1F +#define QIB_7322_IntRedirect0_vec2_LSB 0xA +#define QIB_7322_IntRedirect0_vec2_MSB 0xE +#define QIB_7322_IntRedirect0_vec2_RMASK 0x1F +#define QIB_7322_IntRedirect0_vec1_LSB 0x5 +#define QIB_7322_IntRedirect0_vec1_MSB 0x9 +#define QIB_7322_IntRedirect0_vec1_RMASK 0x1F +#define QIB_7322_IntRedirect0_vec0_LSB 0x0 +#define QIB_7322_IntRedirect0_vec0_MSB 0x4 +#define QIB_7322_IntRedirect0_vec0_RMASK 0x1F + +#define QIB_7322_Int_Granted_OFFS 0x570 +#define QIB_7322_Int_Granted_DEF 0x0000000000000000 + +#define QIB_7322_vec_clr_without_int_OFFS 0x578 +#define QIB_7322_vec_clr_without_int_DEF 0x0000000000000000 + +#define QIB_7322_DCACtrlA_OFFS 0x580 +#define QIB_7322_DCACtrlA_DEF 0x0000000000000000 +#define QIB_7322_DCACtrlA_SendDMAHead1DCAEnable_LSB 0x4 +#define QIB_7322_DCACtrlA_SendDMAHead1DCAEnable_MSB 0x4 +#define QIB_7322_DCACtrlA_SendDMAHead1DCAEnable_RMASK 0x1 +#define QIB_7322_DCACtrlA_SendDMAHead0DCAEnable_LSB 0x3 +#define QIB_7322_DCACtrlA_SendDMAHead0DCAEnable_MSB 0x3 +#define QIB_7322_DCACtrlA_SendDMAHead0DCAEnable_RMASK 0x1 +#define QIB_7322_DCACtrlA_RcvTailUpdDCAEnable_LSB 0x2 +#define QIB_7322_DCACtrlA_RcvTailUpdDCAEnable_MSB 0x2 +#define QIB_7322_DCACtrlA_RcvTailUpdDCAEnable_RMASK 0x1 +#define QIB_7322_DCACtrlA_EagerDCAEnable_LSB 0x1 +#define QIB_7322_DCACtrlA_EagerDCAEnable_MSB 0x1 +#define QIB_7322_DCACtrlA_EagerDCAEnable_RMASK 0x1 +#define QIB_7322_DCACtrlA_RcvHdrqDCAEnable_LSB 0x0 +#define QIB_7322_DCACtrlA_RcvHdrqDCAEnable_MSB 0x0 +#define QIB_7322_DCACtrlA_RcvHdrqDCAEnable_RMASK 0x1 + +#define QIB_7322_DCACtrlB_OFFS 0x588 +#define QIB_7322_DCACtrlB_DEF 0x0000000000000000 +#define QIB_7322_DCACtrlB_RcvHdrq3DCAXfrCnt_LSB 0x36 +#define QIB_7322_DCACtrlB_RcvHdrq3DCAXfrCnt_MSB 0x3B +#define QIB_7322_DCACtrlB_RcvHdrq3DCAXfrCnt_RMASK 0x3F +#define QIB_7322_DCACtrlB_RcvHdrq3DCAOPH_LSB 0x2E +#define QIB_7322_DCACtrlB_RcvHdrq3DCAOPH_MSB 0x35 +#define QIB_7322_DCACtrlB_RcvHdrq3DCAOPH_RMASK 0xFF +#define QIB_7322_DCACtrlB_RcvHdrq2DCAXfrCnt_LSB 0x28 +#define QIB_7322_DCACtrlB_RcvHdrq2DCAXfrCnt_MSB 0x2D +#define QIB_7322_DCACtrlB_RcvHdrq2DCAXfrCnt_RMASK 0x3F +#define QIB_7322_DCACtrlB_RcvHdrq2DCAOPH_LSB 0x20 +#define QIB_7322_DCACtrlB_RcvHdrq2DCAOPH_MSB 0x27 +#define QIB_7322_DCACtrlB_RcvHdrq2DCAOPH_RMASK 0xFF +#define QIB_7322_DCACtrlB_RcvHdrq1DCAXfrCnt_LSB 0x16 +#define QIB_7322_DCACtrlB_RcvHdrq1DCAXfrCnt_MSB 0x1B +#define QIB_7322_DCACtrlB_RcvHdrq1DCAXfrCnt_RMASK 0x3F +#define QIB_7322_DCACtrlB_RcvHdrq1DCAOPH_LSB 0xE +#define QIB_7322_DCACtrlB_RcvHdrq1DCAOPH_MSB 0x15 +#define QIB_7322_DCACtrlB_RcvHdrq1DCAOPH_RMASK 0xFF +#define QIB_7322_DCACtrlB_RcvHdrq0DCAXfrCnt_LSB 0x8 +#define QIB_7322_DCACtrlB_RcvHdrq0DCAXfrCnt_MSB 0xD +#define QIB_7322_DCACtrlB_RcvHdrq0DCAXfrCnt_RMASK 0x3F +#define QIB_7322_DCACtrlB_RcvHdrq0DCAOPH_LSB 0x0 +#define QIB_7322_DCACtrlB_RcvHdrq0DCAOPH_MSB 0x7 +#define QIB_7322_DCACtrlB_RcvHdrq0DCAOPH_RMASK 0xFF + +#define QIB_7322_DCACtrlC_OFFS 0x590 +#define QIB_7322_DCACtrlC_DEF 0x0000000000000000 +#define QIB_7322_DCACtrlC_RcvHdrq7DCAXfrCnt_LSB 0x36 +#define QIB_7322_DCACtrlC_RcvHdrq7DCAXfrCnt_MSB 0x3B +#define QIB_7322_DCACtrlC_RcvHdrq7DCAXfrCnt_RMASK 0x3F +#define QIB_7322_DCACtrlC_RcvHdrq7DCAOPH_LSB 0x2E +#define QIB_7322_DCACtrlC_RcvHdrq7DCAOPH_MSB 0x35 +#define QIB_7322_DCACtrlC_RcvHdrq7DCAOPH_RMASK 0xFF +#define QIB_7322_DCACtrlC_RcvHdrq6DCAXfrCnt_LSB 0x28 +#define QIB_7322_DCACtrlC_RcvHdrq6DCAXfrCnt_MSB 0x2D +#define QIB_7322_DCACtrlC_RcvHdrq6DCAXfrCnt_RMASK 0x3F +#define QIB_7322_DCACtrlC_RcvHdrq6DCAOPH_LSB 0x20 +#define QIB_7322_DCACtrlC_RcvHdrq6DCAOPH_MSB 0x27 +#define QIB_7322_DCACtrlC_RcvHdrq6DCAOPH_RMASK 0xFF +#define QIB_7322_DCACtrlC_RcvHdrq5DCAXfrCnt_LSB 0x16 +#define QIB_7322_DCACtrlC_RcvHdrq5DCAXfrCnt_MSB 0x1B +#define QIB_7322_DCACtrlC_RcvHdrq5DCAXfrCnt_RMASK 0x3F +#define QIB_7322_DCACtrlC_RcvHdrq5DCAOPH_LSB 0xE +#define QIB_7322_DCACtrlC_RcvHdrq5DCAOPH_MSB 0x15 +#define QIB_7322_DCACtrlC_RcvHdrq5DCAOPH_RMASK 0xFF +#define QIB_7322_DCACtrlC_RcvHdrq4DCAXfrCnt_LSB 0x8 +#define QIB_7322_DCACtrlC_RcvHdrq4DCAXfrCnt_MSB 0xD +#define QIB_7322_DCACtrlC_RcvHdrq4DCAXfrCnt_RMASK 0x3F +#define QIB_7322_DCACtrlC_RcvHdrq4DCAOPH_LSB 0x0 +#define QIB_7322_DCACtrlC_RcvHdrq4DCAOPH_MSB 0x7 +#define QIB_7322_DCACtrlC_RcvHdrq4DCAOPH_RMASK 0xFF + +#define QIB_7322_DCACtrlD_OFFS 0x598 +#define QIB_7322_DCACtrlD_DEF 0x0000000000000000 +#define QIB_7322_DCACtrlD_RcvHdrq11DCAXfrCnt_LSB 0x36 +#define QIB_7322_DCACtrlD_RcvHdrq11DCAXfrCnt_MSB 0x3B +#define QIB_7322_DCACtrlD_RcvHdrq11DCAXfrCnt_RMASK 0x3F +#define QIB_7322_DCACtrlD_RcvHdrq11DCAOPH_LSB 0x2E +#define QIB_7322_DCACtrlD_RcvHdrq11DCAOPH_MSB 0x35 +#define QIB_7322_DCACtrlD_RcvHdrq11DCAOPH_RMASK 0xFF +#define QIB_7322_DCACtrlD_RcvHdrq10DCAXfrCnt_LSB 0x28 +#define QIB_7322_DCACtrlD_RcvHdrq10DCAXfrCnt_MSB 0x2D +#define QIB_7322_DCACtrlD_RcvHdrq10DCAXfrCnt_RMASK 0x3F +#define QIB_7322_DCACtrlD_RcvHdrq10DCAOPH_LSB 0x20 +#define QIB_7322_DCACtrlD_RcvHdrq10DCAOPH_MSB 0x27 +#define QIB_7322_DCACtrlD_RcvHdrq10DCAOPH_RMASK 0xFF +#define QIB_7322_DCACtrlD_RcvHdrq9DCAXfrCnt_LSB 0x16 +#define QIB_7322_DCACtrlD_RcvHdrq9DCAXfrCnt_MSB 0x1B +#define QIB_7322_DCACtrlD_RcvHdrq9DCAXfrCnt_RMASK 0x3F +#define QIB_7322_DCACtrlD_RcvHdrq9DCAOPH_LSB 0xE +#define QIB_7322_DCACtrlD_RcvHdrq9DCAOPH_MSB 0x15 +#define QIB_7322_DCACtrlD_RcvHdrq9DCAOPH_RMASK 0xFF +#define QIB_7322_DCACtrlD_RcvHdrq8DCAXfrCnt_LSB 0x8 +#define QIB_7322_DCACtrlD_RcvHdrq8DCAXfrCnt_MSB 0xD +#define QIB_7322_DCACtrlD_RcvHdrq8DCAXfrCnt_RMASK 0x3F +#define QIB_7322_DCACtrlD_RcvHdrq8DCAOPH_LSB 0x0 +#define QIB_7322_DCACtrlD_RcvHdrq8DCAOPH_MSB 0x7 +#define QIB_7322_DCACtrlD_RcvHdrq8DCAOPH_RMASK 0xFF + +#define QIB_7322_DCACtrlE_OFFS 0x5A0 +#define QIB_7322_DCACtrlE_DEF 0x0000000000000000 +#define QIB_7322_DCACtrlE_RcvHdrq15DCAXfrCnt_LSB 0x36 +#define QIB_7322_DCACtrlE_RcvHdrq15DCAXfrCnt_MSB 0x3B +#define QIB_7322_DCACtrlE_RcvHdrq15DCAXfrCnt_RMASK 0x3F +#define QIB_7322_DCACtrlE_RcvHdrq15DCAOPH_LSB 0x2E +#define QIB_7322_DCACtrlE_RcvHdrq15DCAOPH_MSB 0x35 +#define QIB_7322_DCACtrlE_RcvHdrq15DCAOPH_RMASK 0xFF +#define QIB_7322_DCACtrlE_RcvHdrq14DCAXfrCnt_LSB 0x28 +#define QIB_7322_DCACtrlE_RcvHdrq14DCAXfrCnt_MSB 0x2D +#define QIB_7322_DCACtrlE_RcvHdrq14DCAXfrCnt_RMASK 0x3F +#define QIB_7322_DCACtrlE_RcvHdrq14DCAOPH_LSB 0x20 +#define QIB_7322_DCACtrlE_RcvHdrq14DCAOPH_MSB 0x27 +#define QIB_7322_DCACtrlE_RcvHdrq14DCAOPH_RMASK 0xFF +#define QIB_7322_DCACtrlE_RcvHdrq13DCAXfrCnt_LSB 0x16 +#define QIB_7322_DCACtrlE_RcvHdrq13DCAXfrCnt_MSB 0x1B +#define QIB_7322_DCACtrlE_RcvHdrq13DCAXfrCnt_RMASK 0x3F +#define QIB_7322_DCACtrlE_RcvHdrq13DCAOPH_LSB 0xE +#define QIB_7322_DCACtrlE_RcvHdrq13DCAOPH_MSB 0x15 +#define QIB_7322_DCACtrlE_RcvHdrq13DCAOPH_RMASK 0xFF +#define QIB_7322_DCACtrlE_RcvHdrq12DCAXfrCnt_LSB 0x8 +#define QIB_7322_DCACtrlE_RcvHdrq12DCAXfrCnt_MSB 0xD +#define QIB_7322_DCACtrlE_RcvHdrq12DCAXfrCnt_RMASK 0x3F +#define QIB_7322_DCACtrlE_RcvHdrq12DCAOPH_LSB 0x0 +#define QIB_7322_DCACtrlE_RcvHdrq12DCAOPH_MSB 0x7 +#define QIB_7322_DCACtrlE_RcvHdrq12DCAOPH_RMASK 0xFF + +#define QIB_7322_DCACtrlF_OFFS 0x5A8 +#define QIB_7322_DCACtrlF_DEF 0x0000000000000000 +#define QIB_7322_DCACtrlF_SendDma1DCAOPH_LSB 0x28 +#define QIB_7322_DCACtrlF_SendDma1DCAOPH_MSB 0x2F +#define QIB_7322_DCACtrlF_SendDma1DCAOPH_RMASK 0xFF +#define QIB_7322_DCACtrlF_SendDma0DCAOPH_LSB 0x20 +#define QIB_7322_DCACtrlF_SendDma0DCAOPH_MSB 0x27 +#define QIB_7322_DCACtrlF_SendDma0DCAOPH_RMASK 0xFF +#define QIB_7322_DCACtrlF_RcvHdrq17DCAXfrCnt_LSB 0x16 +#define QIB_7322_DCACtrlF_RcvHdrq17DCAXfrCnt_MSB 0x1B +#define QIB_7322_DCACtrlF_RcvHdrq17DCAXfrCnt_RMASK 0x3F +#define QIB_7322_DCACtrlF_RcvHdrq17DCAOPH_LSB 0xE +#define QIB_7322_DCACtrlF_RcvHdrq17DCAOPH_MSB 0x15 +#define QIB_7322_DCACtrlF_RcvHdrq17DCAOPH_RMASK 0xFF +#define QIB_7322_DCACtrlF_RcvHdrq16DCAXfrCnt_LSB 0x8 +#define QIB_7322_DCACtrlF_RcvHdrq16DCAXfrCnt_MSB 0xD +#define QIB_7322_DCACtrlF_RcvHdrq16DCAXfrCnt_RMASK 0x3F +#define QIB_7322_DCACtrlF_RcvHdrq16DCAOPH_LSB 0x0 +#define QIB_7322_DCACtrlF_RcvHdrq16DCAOPH_MSB 0x7 +#define QIB_7322_DCACtrlF_RcvHdrq16DCAOPH_RMASK 0xFF + +#define QIB_7322_RcvAvailTimeOut0_OFFS 0xC00 +#define QIB_7322_RcvAvailTimeOut0_DEF 0x0000000000000000 +#define QIB_7322_RcvAvailTimeOut0_RcvAvailTOCount_LSB 0x10 +#define QIB_7322_RcvAvailTimeOut0_RcvAvailTOCount_MSB 0x1F +#define QIB_7322_RcvAvailTimeOut0_RcvAvailTOCount_RMASK 0xFFFF +#define QIB_7322_RcvAvailTimeOut0_RcvAvailTOReload_LSB 0x0 +#define QIB_7322_RcvAvailTimeOut0_RcvAvailTOReload_MSB 0xF +#define QIB_7322_RcvAvailTimeOut0_RcvAvailTOReload_RMASK 0xFFFF + +#define QIB_7322_CntrRegBase_0_OFFS 0x1028 +#define QIB_7322_CntrRegBase_0_DEF 0x0000000000012000 + +#define QIB_7322_ErrMask_0_OFFS 0x1080 +#define QIB_7322_ErrMask_0_DEF 0x0000000000000000 +#define QIB_7322_ErrMask_0_IBStatusChangedMask_LSB 0x3A +#define QIB_7322_ErrMask_0_IBStatusChangedMask_MSB 0x3A +#define QIB_7322_ErrMask_0_IBStatusChangedMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SHeadersErrMask_LSB 0x39 +#define QIB_7322_ErrMask_0_SHeadersErrMask_MSB 0x39 +#define QIB_7322_ErrMask_0_SHeadersErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_VL15BufMisuseErrMask_LSB 0x36 +#define QIB_7322_ErrMask_0_VL15BufMisuseErrMask_MSB 0x36 +#define QIB_7322_ErrMask_0_VL15BufMisuseErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SDmaHaltErrMask_LSB 0x31 +#define QIB_7322_ErrMask_0_SDmaHaltErrMask_MSB 0x31 +#define QIB_7322_ErrMask_0_SDmaHaltErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SDmaDescAddrMisalignErrMask_LSB 0x30 +#define QIB_7322_ErrMask_0_SDmaDescAddrMisalignErrMask_MSB 0x30 +#define QIB_7322_ErrMask_0_SDmaDescAddrMisalignErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SDmaUnexpDataErrMask_LSB 0x2F +#define QIB_7322_ErrMask_0_SDmaUnexpDataErrMask_MSB 0x2F +#define QIB_7322_ErrMask_0_SDmaUnexpDataErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SDmaMissingDwErrMask_LSB 0x2E +#define QIB_7322_ErrMask_0_SDmaMissingDwErrMask_MSB 0x2E +#define QIB_7322_ErrMask_0_SDmaMissingDwErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SDmaDwEnErrMask_LSB 0x2D +#define QIB_7322_ErrMask_0_SDmaDwEnErrMask_MSB 0x2D +#define QIB_7322_ErrMask_0_SDmaDwEnErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SDmaRpyTagErrMask_LSB 0x2C +#define QIB_7322_ErrMask_0_SDmaRpyTagErrMask_MSB 0x2C +#define QIB_7322_ErrMask_0_SDmaRpyTagErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SDma1stDescErrMask_LSB 0x2B +#define QIB_7322_ErrMask_0_SDma1stDescErrMask_MSB 0x2B +#define QIB_7322_ErrMask_0_SDma1stDescErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SDmaBaseErrMask_LSB 0x2A +#define QIB_7322_ErrMask_0_SDmaBaseErrMask_MSB 0x2A +#define QIB_7322_ErrMask_0_SDmaBaseErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SDmaTailOutOfBoundErrMask_LSB 0x29 +#define QIB_7322_ErrMask_0_SDmaTailOutOfBoundErrMask_MSB 0x29 +#define QIB_7322_ErrMask_0_SDmaTailOutOfBoundErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SDmaOutOfBoundErrMask_LSB 0x28 +#define QIB_7322_ErrMask_0_SDmaOutOfBoundErrMask_MSB 0x28 +#define QIB_7322_ErrMask_0_SDmaOutOfBoundErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SDmaGenMismatchErrMask_LSB 0x27 +#define QIB_7322_ErrMask_0_SDmaGenMismatchErrMask_MSB 0x27 +#define QIB_7322_ErrMask_0_SDmaGenMismatchErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SendBufMisuseErrMask_LSB 0x26 +#define QIB_7322_ErrMask_0_SendBufMisuseErrMask_MSB 0x26 +#define QIB_7322_ErrMask_0_SendBufMisuseErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SendUnsupportedVLErrMask_LSB 0x25 +#define QIB_7322_ErrMask_0_SendUnsupportedVLErrMask_MSB 0x25 +#define QIB_7322_ErrMask_0_SendUnsupportedVLErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SendUnexpectedPktNumErrMask_LSB 0x24 +#define QIB_7322_ErrMask_0_SendUnexpectedPktNumErrMask_MSB 0x24 +#define QIB_7322_ErrMask_0_SendUnexpectedPktNumErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SendDroppedDataPktErrMask_LSB 0x22 +#define QIB_7322_ErrMask_0_SendDroppedDataPktErrMask_MSB 0x22 +#define QIB_7322_ErrMask_0_SendDroppedDataPktErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SendDroppedSmpPktErrMask_LSB 0x21 +#define QIB_7322_ErrMask_0_SendDroppedSmpPktErrMask_MSB 0x21 +#define QIB_7322_ErrMask_0_SendDroppedSmpPktErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SendPktLenErrMask_LSB 0x20 +#define QIB_7322_ErrMask_0_SendPktLenErrMask_MSB 0x20 +#define QIB_7322_ErrMask_0_SendPktLenErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SendUnderRunErrMask_LSB 0x1F +#define QIB_7322_ErrMask_0_SendUnderRunErrMask_MSB 0x1F +#define QIB_7322_ErrMask_0_SendUnderRunErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SendMaxPktLenErrMask_LSB 0x1E +#define QIB_7322_ErrMask_0_SendMaxPktLenErrMask_MSB 0x1E +#define QIB_7322_ErrMask_0_SendMaxPktLenErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_SendMinPktLenErrMask_LSB 0x1D +#define QIB_7322_ErrMask_0_SendMinPktLenErrMask_MSB 0x1D +#define QIB_7322_ErrMask_0_SendMinPktLenErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_RcvIBLostLinkErrMask_LSB 0x11 +#define QIB_7322_ErrMask_0_RcvIBLostLinkErrMask_MSB 0x11 +#define QIB_7322_ErrMask_0_RcvIBLostLinkErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_RcvHdrErrMask_LSB 0x10 +#define QIB_7322_ErrMask_0_RcvHdrErrMask_MSB 0x10 +#define QIB_7322_ErrMask_0_RcvHdrErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_RcvHdrLenErrMask_LSB 0xF +#define QIB_7322_ErrMask_0_RcvHdrLenErrMask_MSB 0xF +#define QIB_7322_ErrMask_0_RcvHdrLenErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_RcvBadTidErrMask_LSB 0xE +#define QIB_7322_ErrMask_0_RcvBadTidErrMask_MSB 0xE +#define QIB_7322_ErrMask_0_RcvBadTidErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_RcvBadVersionErrMask_LSB 0xB +#define QIB_7322_ErrMask_0_RcvBadVersionErrMask_MSB 0xB +#define QIB_7322_ErrMask_0_RcvBadVersionErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_RcvIBFlowErrMask_LSB 0xA +#define QIB_7322_ErrMask_0_RcvIBFlowErrMask_MSB 0xA +#define QIB_7322_ErrMask_0_RcvIBFlowErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_RcvEBPErrMask_LSB 0x9 +#define QIB_7322_ErrMask_0_RcvEBPErrMask_MSB 0x9 +#define QIB_7322_ErrMask_0_RcvEBPErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_RcvUnsupportedVLErrMask_LSB 0x8 +#define QIB_7322_ErrMask_0_RcvUnsupportedVLErrMask_MSB 0x8 +#define QIB_7322_ErrMask_0_RcvUnsupportedVLErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_RcvUnexpectedCharErrMask_LSB 0x7 +#define QIB_7322_ErrMask_0_RcvUnexpectedCharErrMask_MSB 0x7 +#define QIB_7322_ErrMask_0_RcvUnexpectedCharErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_RcvShortPktLenErrMask_LSB 0x6 +#define QIB_7322_ErrMask_0_RcvShortPktLenErrMask_MSB 0x6 +#define QIB_7322_ErrMask_0_RcvShortPktLenErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_RcvLongPktLenErrMask_LSB 0x5 +#define QIB_7322_ErrMask_0_RcvLongPktLenErrMask_MSB 0x5 +#define QIB_7322_ErrMask_0_RcvLongPktLenErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_RcvMaxPktLenErrMask_LSB 0x4 +#define QIB_7322_ErrMask_0_RcvMaxPktLenErrMask_MSB 0x4 +#define QIB_7322_ErrMask_0_RcvMaxPktLenErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_RcvMinPktLenErrMask_LSB 0x3 +#define QIB_7322_ErrMask_0_RcvMinPktLenErrMask_MSB 0x3 +#define QIB_7322_ErrMask_0_RcvMinPktLenErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_RcvICRCErrMask_LSB 0x2 +#define QIB_7322_ErrMask_0_RcvICRCErrMask_MSB 0x2 +#define QIB_7322_ErrMask_0_RcvICRCErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_RcvVCRCErrMask_LSB 0x1 +#define QIB_7322_ErrMask_0_RcvVCRCErrMask_MSB 0x1 +#define QIB_7322_ErrMask_0_RcvVCRCErrMask_RMASK 0x1 +#define QIB_7322_ErrMask_0_RcvFormatErrMask_LSB 0x0 +#define QIB_7322_ErrMask_0_RcvFormatErrMask_MSB 0x0 +#define QIB_7322_ErrMask_0_RcvFormatErrMask_RMASK 0x1 + +#define QIB_7322_ErrStatus_0_OFFS 0x1088 +#define QIB_7322_ErrStatus_0_DEF 0x0000000000000000 +#define QIB_7322_ErrStatus_0_IBStatusChanged_LSB 0x3A +#define QIB_7322_ErrStatus_0_IBStatusChanged_MSB 0x3A +#define QIB_7322_ErrStatus_0_IBStatusChanged_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SHeadersErr_LSB 0x39 +#define QIB_7322_ErrStatus_0_SHeadersErr_MSB 0x39 +#define QIB_7322_ErrStatus_0_SHeadersErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_VL15BufMisuseErr_LSB 0x36 +#define QIB_7322_ErrStatus_0_VL15BufMisuseErr_MSB 0x36 +#define QIB_7322_ErrStatus_0_VL15BufMisuseErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SDmaHaltErr_LSB 0x31 +#define QIB_7322_ErrStatus_0_SDmaHaltErr_MSB 0x31 +#define QIB_7322_ErrStatus_0_SDmaHaltErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SDmaDescAddrMisalignErr_LSB 0x30 +#define QIB_7322_ErrStatus_0_SDmaDescAddrMisalignErr_MSB 0x30 +#define QIB_7322_ErrStatus_0_SDmaDescAddrMisalignErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SDmaUnexpDataErr_LSB 0x2F +#define QIB_7322_ErrStatus_0_SDmaUnexpDataErr_MSB 0x2F +#define QIB_7322_ErrStatus_0_SDmaUnexpDataErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SDmaMissingDwErr_LSB 0x2E +#define QIB_7322_ErrStatus_0_SDmaMissingDwErr_MSB 0x2E +#define QIB_7322_ErrStatus_0_SDmaMissingDwErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SDmaDwEnErr_LSB 0x2D +#define QIB_7322_ErrStatus_0_SDmaDwEnErr_MSB 0x2D +#define QIB_7322_ErrStatus_0_SDmaDwEnErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SDmaRpyTagErr_LSB 0x2C +#define QIB_7322_ErrStatus_0_SDmaRpyTagErr_MSB 0x2C +#define QIB_7322_ErrStatus_0_SDmaRpyTagErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SDma1stDescErr_LSB 0x2B +#define QIB_7322_ErrStatus_0_SDma1stDescErr_MSB 0x2B +#define QIB_7322_ErrStatus_0_SDma1stDescErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SDmaBaseErr_LSB 0x2A +#define QIB_7322_ErrStatus_0_SDmaBaseErr_MSB 0x2A +#define QIB_7322_ErrStatus_0_SDmaBaseErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SDmaTailOutOfBoundErr_LSB 0x29 +#define QIB_7322_ErrStatus_0_SDmaTailOutOfBoundErr_MSB 0x29 +#define QIB_7322_ErrStatus_0_SDmaTailOutOfBoundErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SDmaOutOfBoundErr_LSB 0x28 +#define QIB_7322_ErrStatus_0_SDmaOutOfBoundErr_MSB 0x28 +#define QIB_7322_ErrStatus_0_SDmaOutOfBoundErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SDmaGenMismatchErr_LSB 0x27 +#define QIB_7322_ErrStatus_0_SDmaGenMismatchErr_MSB 0x27 +#define QIB_7322_ErrStatus_0_SDmaGenMismatchErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SendBufMisuseErr_LSB 0x26 +#define QIB_7322_ErrStatus_0_SendBufMisuseErr_MSB 0x26 +#define QIB_7322_ErrStatus_0_SendBufMisuseErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SendUnsupportedVLErr_LSB 0x25 +#define QIB_7322_ErrStatus_0_SendUnsupportedVLErr_MSB 0x25 +#define QIB_7322_ErrStatus_0_SendUnsupportedVLErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SendUnexpectedPktNumErr_LSB 0x24 +#define QIB_7322_ErrStatus_0_SendUnexpectedPktNumErr_MSB 0x24 +#define QIB_7322_ErrStatus_0_SendUnexpectedPktNumErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SendDroppedDataPktErr_LSB 0x22 +#define QIB_7322_ErrStatus_0_SendDroppedDataPktErr_MSB 0x22 +#define QIB_7322_ErrStatus_0_SendDroppedDataPktErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SendDroppedSmpPktErr_LSB 0x21 +#define QIB_7322_ErrStatus_0_SendDroppedSmpPktErr_MSB 0x21 +#define QIB_7322_ErrStatus_0_SendDroppedSmpPktErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SendPktLenErr_LSB 0x20 +#define QIB_7322_ErrStatus_0_SendPktLenErr_MSB 0x20 +#define QIB_7322_ErrStatus_0_SendPktLenErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SendUnderRunErr_LSB 0x1F +#define QIB_7322_ErrStatus_0_SendUnderRunErr_MSB 0x1F +#define QIB_7322_ErrStatus_0_SendUnderRunErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SendMaxPktLenErr_LSB 0x1E +#define QIB_7322_ErrStatus_0_SendMaxPktLenErr_MSB 0x1E +#define QIB_7322_ErrStatus_0_SendMaxPktLenErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_SendMinPktLenErr_LSB 0x1D +#define QIB_7322_ErrStatus_0_SendMinPktLenErr_MSB 0x1D +#define QIB_7322_ErrStatus_0_SendMinPktLenErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_RcvIBLostLinkErr_LSB 0x11 +#define QIB_7322_ErrStatus_0_RcvIBLostLinkErr_MSB 0x11 +#define QIB_7322_ErrStatus_0_RcvIBLostLinkErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_RcvHdrErr_LSB 0x10 +#define QIB_7322_ErrStatus_0_RcvHdrErr_MSB 0x10 +#define QIB_7322_ErrStatus_0_RcvHdrErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_RcvHdrLenErr_LSB 0xF +#define QIB_7322_ErrStatus_0_RcvHdrLenErr_MSB 0xF +#define QIB_7322_ErrStatus_0_RcvHdrLenErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_RcvBadTidErr_LSB 0xE +#define QIB_7322_ErrStatus_0_RcvBadTidErr_MSB 0xE +#define QIB_7322_ErrStatus_0_RcvBadTidErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_RcvBadVersionErr_LSB 0xB +#define QIB_7322_ErrStatus_0_RcvBadVersionErr_MSB 0xB +#define QIB_7322_ErrStatus_0_RcvBadVersionErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_RcvIBFlowErr_LSB 0xA +#define QIB_7322_ErrStatus_0_RcvIBFlowErr_MSB 0xA +#define QIB_7322_ErrStatus_0_RcvIBFlowErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_RcvEBPErr_LSB 0x9 +#define QIB_7322_ErrStatus_0_RcvEBPErr_MSB 0x9 +#define QIB_7322_ErrStatus_0_RcvEBPErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_RcvUnsupportedVLErr_LSB 0x8 +#define QIB_7322_ErrStatus_0_RcvUnsupportedVLErr_MSB 0x8 +#define QIB_7322_ErrStatus_0_RcvUnsupportedVLErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_RcvUnexpectedCharErr_LSB 0x7 +#define QIB_7322_ErrStatus_0_RcvUnexpectedCharErr_MSB 0x7 +#define QIB_7322_ErrStatus_0_RcvUnexpectedCharErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_RcvShortPktLenErr_LSB 0x6 +#define QIB_7322_ErrStatus_0_RcvShortPktLenErr_MSB 0x6 +#define QIB_7322_ErrStatus_0_RcvShortPktLenErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_RcvLongPktLenErr_LSB 0x5 +#define QIB_7322_ErrStatus_0_RcvLongPktLenErr_MSB 0x5 +#define QIB_7322_ErrStatus_0_RcvLongPktLenErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_RcvMaxPktLenErr_LSB 0x4 +#define QIB_7322_ErrStatus_0_RcvMaxPktLenErr_MSB 0x4 +#define QIB_7322_ErrStatus_0_RcvMaxPktLenErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_RcvMinPktLenErr_LSB 0x3 +#define QIB_7322_ErrStatus_0_RcvMinPktLenErr_MSB 0x3 +#define QIB_7322_ErrStatus_0_RcvMinPktLenErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_RcvICRCErr_LSB 0x2 +#define QIB_7322_ErrStatus_0_RcvICRCErr_MSB 0x2 +#define QIB_7322_ErrStatus_0_RcvICRCErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_RcvVCRCErr_LSB 0x1 +#define QIB_7322_ErrStatus_0_RcvVCRCErr_MSB 0x1 +#define QIB_7322_ErrStatus_0_RcvVCRCErr_RMASK 0x1 +#define QIB_7322_ErrStatus_0_RcvFormatErr_LSB 0x0 +#define QIB_7322_ErrStatus_0_RcvFormatErr_MSB 0x0 +#define QIB_7322_ErrStatus_0_RcvFormatErr_RMASK 0x1 + +#define QIB_7322_ErrClear_0_OFFS 0x1090 +#define QIB_7322_ErrClear_0_DEF 0x0000000000000000 +#define QIB_7322_ErrClear_0_IBStatusChangedClear_LSB 0x3A +#define QIB_7322_ErrClear_0_IBStatusChangedClear_MSB 0x3A +#define QIB_7322_ErrClear_0_IBStatusChangedClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SHeadersErrClear_LSB 0x39 +#define QIB_7322_ErrClear_0_SHeadersErrClear_MSB 0x39 +#define QIB_7322_ErrClear_0_SHeadersErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_VL15BufMisuseErrClear_LSB 0x36 +#define QIB_7322_ErrClear_0_VL15BufMisuseErrClear_MSB 0x36 +#define QIB_7322_ErrClear_0_VL15BufMisuseErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SDmaHaltErrClear_LSB 0x31 +#define QIB_7322_ErrClear_0_SDmaHaltErrClear_MSB 0x31 +#define QIB_7322_ErrClear_0_SDmaHaltErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SDmaDescAddrMisalignErrClear_LSB 0x30 +#define QIB_7322_ErrClear_0_SDmaDescAddrMisalignErrClear_MSB 0x30 +#define QIB_7322_ErrClear_0_SDmaDescAddrMisalignErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SDmaUnexpDataErrClear_LSB 0x2F +#define QIB_7322_ErrClear_0_SDmaUnexpDataErrClear_MSB 0x2F +#define QIB_7322_ErrClear_0_SDmaUnexpDataErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SDmaMissingDwErrClear_LSB 0x2E +#define QIB_7322_ErrClear_0_SDmaMissingDwErrClear_MSB 0x2E +#define QIB_7322_ErrClear_0_SDmaMissingDwErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SDmaDwEnErrClear_LSB 0x2D +#define QIB_7322_ErrClear_0_SDmaDwEnErrClear_MSB 0x2D +#define QIB_7322_ErrClear_0_SDmaDwEnErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SDmaRpyTagErrClear_LSB 0x2C +#define QIB_7322_ErrClear_0_SDmaRpyTagErrClear_MSB 0x2C +#define QIB_7322_ErrClear_0_SDmaRpyTagErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SDma1stDescErrClear_LSB 0x2B +#define QIB_7322_ErrClear_0_SDma1stDescErrClear_MSB 0x2B +#define QIB_7322_ErrClear_0_SDma1stDescErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SDmaBaseErrClear_LSB 0x2A +#define QIB_7322_ErrClear_0_SDmaBaseErrClear_MSB 0x2A +#define QIB_7322_ErrClear_0_SDmaBaseErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SDmaTailOutOfBoundErrClear_LSB 0x29 +#define QIB_7322_ErrClear_0_SDmaTailOutOfBoundErrClear_MSB 0x29 +#define QIB_7322_ErrClear_0_SDmaTailOutOfBoundErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SDmaOutOfBoundErrClear_LSB 0x28 +#define QIB_7322_ErrClear_0_SDmaOutOfBoundErrClear_MSB 0x28 +#define QIB_7322_ErrClear_0_SDmaOutOfBoundErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SDmaGenMismatchErrClear_LSB 0x27 +#define QIB_7322_ErrClear_0_SDmaGenMismatchErrClear_MSB 0x27 +#define QIB_7322_ErrClear_0_SDmaGenMismatchErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SendBufMisuseErrClear_LSB 0x26 +#define QIB_7322_ErrClear_0_SendBufMisuseErrClear_MSB 0x26 +#define QIB_7322_ErrClear_0_SendBufMisuseErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SendUnsupportedVLErrClear_LSB 0x25 +#define QIB_7322_ErrClear_0_SendUnsupportedVLErrClear_MSB 0x25 +#define QIB_7322_ErrClear_0_SendUnsupportedVLErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SendUnexpectedPktNumErrClear_LSB 0x24 +#define QIB_7322_ErrClear_0_SendUnexpectedPktNumErrClear_MSB 0x24 +#define QIB_7322_ErrClear_0_SendUnexpectedPktNumErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SendDroppedDataPktErrClear_LSB 0x22 +#define QIB_7322_ErrClear_0_SendDroppedDataPktErrClear_MSB 0x22 +#define QIB_7322_ErrClear_0_SendDroppedDataPktErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SendDroppedSmpPktErrClear_LSB 0x21 +#define QIB_7322_ErrClear_0_SendDroppedSmpPktErrClear_MSB 0x21 +#define QIB_7322_ErrClear_0_SendDroppedSmpPktErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SendPktLenErrClear_LSB 0x20 +#define QIB_7322_ErrClear_0_SendPktLenErrClear_MSB 0x20 +#define QIB_7322_ErrClear_0_SendPktLenErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SendUnderRunErrClear_LSB 0x1F +#define QIB_7322_ErrClear_0_SendUnderRunErrClear_MSB 0x1F +#define QIB_7322_ErrClear_0_SendUnderRunErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SendMaxPktLenErrClear_LSB 0x1E +#define QIB_7322_ErrClear_0_SendMaxPktLenErrClear_MSB 0x1E +#define QIB_7322_ErrClear_0_SendMaxPktLenErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_SendMinPktLenErrClear_LSB 0x1D +#define QIB_7322_ErrClear_0_SendMinPktLenErrClear_MSB 0x1D +#define QIB_7322_ErrClear_0_SendMinPktLenErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_RcvIBLostLinkErrClear_LSB 0x11 +#define QIB_7322_ErrClear_0_RcvIBLostLinkErrClear_MSB 0x11 +#define QIB_7322_ErrClear_0_RcvIBLostLinkErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_RcvHdrErrClear_LSB 0x10 +#define QIB_7322_ErrClear_0_RcvHdrErrClear_MSB 0x10 +#define QIB_7322_ErrClear_0_RcvHdrErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_RcvHdrLenErrClear_LSB 0xF +#define QIB_7322_ErrClear_0_RcvHdrLenErrClear_MSB 0xF +#define QIB_7322_ErrClear_0_RcvHdrLenErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_RcvBadTidErrClear_LSB 0xE +#define QIB_7322_ErrClear_0_RcvBadTidErrClear_MSB 0xE +#define QIB_7322_ErrClear_0_RcvBadTidErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_RcvBadVersionErrClear_LSB 0xB +#define QIB_7322_ErrClear_0_RcvBadVersionErrClear_MSB 0xB +#define QIB_7322_ErrClear_0_RcvBadVersionErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_RcvIBFlowErrClear_LSB 0xA +#define QIB_7322_ErrClear_0_RcvIBFlowErrClear_MSB 0xA +#define QIB_7322_ErrClear_0_RcvIBFlowErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_RcvEBPErrClear_LSB 0x9 +#define QIB_7322_ErrClear_0_RcvEBPErrClear_MSB 0x9 +#define QIB_7322_ErrClear_0_RcvEBPErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_RcvUnsupportedVLErrClear_LSB 0x8 +#define QIB_7322_ErrClear_0_RcvUnsupportedVLErrClear_MSB 0x8 +#define QIB_7322_ErrClear_0_RcvUnsupportedVLErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_RcvUnexpectedCharErrClear_LSB 0x7 +#define QIB_7322_ErrClear_0_RcvUnexpectedCharErrClear_MSB 0x7 +#define QIB_7322_ErrClear_0_RcvUnexpectedCharErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_RcvShortPktLenErrClear_LSB 0x6 +#define QIB_7322_ErrClear_0_RcvShortPktLenErrClear_MSB 0x6 +#define QIB_7322_ErrClear_0_RcvShortPktLenErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_RcvLongPktLenErrClear_LSB 0x5 +#define QIB_7322_ErrClear_0_RcvLongPktLenErrClear_MSB 0x5 +#define QIB_7322_ErrClear_0_RcvLongPktLenErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_RcvMaxPktLenErrClear_LSB 0x4 +#define QIB_7322_ErrClear_0_RcvMaxPktLenErrClear_MSB 0x4 +#define QIB_7322_ErrClear_0_RcvMaxPktLenErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_RcvMinPktLenErrClear_LSB 0x3 +#define QIB_7322_ErrClear_0_RcvMinPktLenErrClear_MSB 0x3 +#define QIB_7322_ErrClear_0_RcvMinPktLenErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_RcvICRCErrClear_LSB 0x2 +#define QIB_7322_ErrClear_0_RcvICRCErrClear_MSB 0x2 +#define QIB_7322_ErrClear_0_RcvICRCErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_RcvVCRCErrClear_LSB 0x1 +#define QIB_7322_ErrClear_0_RcvVCRCErrClear_MSB 0x1 +#define QIB_7322_ErrClear_0_RcvVCRCErrClear_RMASK 0x1 +#define QIB_7322_ErrClear_0_RcvFormatErrClear_LSB 0x0 +#define QIB_7322_ErrClear_0_RcvFormatErrClear_MSB 0x0 +#define QIB_7322_ErrClear_0_RcvFormatErrClear_RMASK 0x1 + +#define QIB_7322_TXEStatus_0_OFFS 0x10B8 +#define QIB_7322_TXEStatus_0_DEF 0x0000000XC00080FF +#define QIB_7322_TXEStatus_0_TXE_IBC_Idle_LSB 0x1F +#define QIB_7322_TXEStatus_0_TXE_IBC_Idle_MSB 0x1F +#define QIB_7322_TXEStatus_0_TXE_IBC_Idle_RMASK 0x1 +#define QIB_7322_TXEStatus_0_RmFifoEmpty_LSB 0x1E +#define QIB_7322_TXEStatus_0_RmFifoEmpty_MSB 0x1E +#define QIB_7322_TXEStatus_0_RmFifoEmpty_RMASK 0x1 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL15_LSB 0xF +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL15_MSB 0xF +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL15_RMASK 0x1 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL7_LSB 0x7 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL7_MSB 0x7 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL7_RMASK 0x1 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL6_LSB 0x6 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL6_MSB 0x6 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL6_RMASK 0x1 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL5_LSB 0x5 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL5_MSB 0x5 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL5_RMASK 0x1 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL4_LSB 0x4 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL4_MSB 0x4 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL4_RMASK 0x1 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL3_LSB 0x3 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL3_MSB 0x3 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL3_RMASK 0x1 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL2_LSB 0x2 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL2_MSB 0x2 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL2_RMASK 0x1 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL1_LSB 0x1 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL1_MSB 0x1 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL1_RMASK 0x1 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL0_LSB 0x0 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL0_MSB 0x0 +#define QIB_7322_TXEStatus_0_LaFifoEmpty_VL0_RMASK 0x1 + +#define QIB_7322_RcvCtrl_0_OFFS 0x1100 +#define QIB_7322_RcvCtrl_0_DEF 0x0000000000000000 +#define QIB_7322_RcvCtrl_0_RcvResetCredit_LSB 0x2A +#define QIB_7322_RcvCtrl_0_RcvResetCredit_MSB 0x2A +#define QIB_7322_RcvCtrl_0_RcvResetCredit_RMASK 0x1 +#define QIB_7322_RcvCtrl_0_RcvPartitionKeyDisable_LSB 0x29 +#define QIB_7322_RcvCtrl_0_RcvPartitionKeyDisable_MSB 0x29 +#define QIB_7322_RcvCtrl_0_RcvPartitionKeyDisable_RMASK 0x1 +#define QIB_7322_RcvCtrl_0_RcvQPMapEnable_LSB 0x28 +#define QIB_7322_RcvCtrl_0_RcvQPMapEnable_MSB 0x28 +#define QIB_7322_RcvCtrl_0_RcvQPMapEnable_RMASK 0x1 +#define QIB_7322_RcvCtrl_0_RcvIBPortEnable_LSB 0x27 +#define QIB_7322_RcvCtrl_0_RcvIBPortEnable_MSB 0x27 +#define QIB_7322_RcvCtrl_0_RcvIBPortEnable_RMASK 0x1 +#define QIB_7322_RcvCtrl_0_ContextEnableUser_LSB 0x2 +#define QIB_7322_RcvCtrl_0_ContextEnableUser_MSB 0x11 +#define QIB_7322_RcvCtrl_0_ContextEnableUser_RMASK 0xFFFF +#define QIB_7322_RcvCtrl_0_ContextEnableKernel_LSB 0x0 +#define QIB_7322_RcvCtrl_0_ContextEnableKernel_MSB 0x0 +#define QIB_7322_RcvCtrl_0_ContextEnableKernel_RMASK 0x1 + +#define QIB_7322_RcvBTHQP_0_OFFS 0x1108 +#define QIB_7322_RcvBTHQP_0_DEF 0x0000000000000000 +#define QIB_7322_RcvBTHQP_0_RcvBTHQP_LSB 0x0 +#define QIB_7322_RcvBTHQP_0_RcvBTHQP_MSB 0x17 +#define QIB_7322_RcvBTHQP_0_RcvBTHQP_RMASK 0xFFFFFF + +#define QIB_7322_RcvQPMapTableA_0_OFFS 0x1110 +#define QIB_7322_RcvQPMapTableA_0_DEF 0x0000000000000000 +#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext5_LSB 0x19 +#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext5_MSB 0x1D +#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext5_RMASK 0x1F +#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext4_LSB 0x14 +#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext4_MSB 0x18 +#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext4_RMASK 0x1F +#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext3_LSB 0xF +#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext3_MSB 0x13 +#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext3_RMASK 0x1F +#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext2_LSB 0xA +#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext2_MSB 0xE +#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext2_RMASK 0x1F +#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext1_LSB 0x5 +#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext1_MSB 0x9 +#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext1_RMASK 0x1F +#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext0_LSB 0x0 +#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext0_MSB 0x4 +#define QIB_7322_RcvQPMapTableA_0_RcvQPMapContext0_RMASK 0x1F + +#define QIB_7322_RcvQPMapTableB_0_OFFS 0x1118 +#define QIB_7322_RcvQPMapTableB_0_DEF 0x0000000000000000 +#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext11_LSB 0x19 +#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext11_MSB 0x1D +#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext11_RMASK 0x1F +#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext10_LSB 0x14 +#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext10_MSB 0x18 +#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext10_RMASK 0x1F +#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext9_LSB 0xF +#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext9_MSB 0x13 +#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext9_RMASK 0x1F +#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext8_LSB 0xA +#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext8_MSB 0xE +#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext8_RMASK 0x1F +#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext7_LSB 0x5 +#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext7_MSB 0x9 +#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext7_RMASK 0x1F +#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext6_LSB 0x0 +#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext6_MSB 0x4 +#define QIB_7322_RcvQPMapTableB_0_RcvQPMapContext6_RMASK 0x1F + +#define QIB_7322_RcvQPMapTableC_0_OFFS 0x1120 +#define QIB_7322_RcvQPMapTableC_0_DEF 0x0000000000000000 +#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext17_LSB 0x19 +#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext17_MSB 0x1D +#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext17_RMASK 0x1F +#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext16_LSB 0x14 +#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext16_MSB 0x18 +#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext16_RMASK 0x1F +#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext15_LSB 0xF +#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext15_MSB 0x13 +#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext15_RMASK 0x1F +#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext14_LSB 0xA +#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext14_MSB 0xE +#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext14_RMASK 0x1F +#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext13_LSB 0x5 +#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext13_MSB 0x9 +#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext13_RMASK 0x1F +#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext12_LSB 0x0 +#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext12_MSB 0x4 +#define QIB_7322_RcvQPMapTableC_0_RcvQPMapContext12_RMASK 0x1F + +#define QIB_7322_RcvQPMapTableD_0_OFFS 0x1128 +#define QIB_7322_RcvQPMapTableD_0_DEF 0x0000000000000000 +#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext23_LSB 0x19 +#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext23_MSB 0x1D +#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext23_RMASK 0x1F +#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext22_LSB 0x14 +#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext22_MSB 0x18 +#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext22_RMASK 0x1F +#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext21_LSB 0xF +#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext21_MSB 0x13 +#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext21_RMASK 0x1F +#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext20_LSB 0xA +#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext20_MSB 0xE +#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext20_RMASK 0x1F +#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext19_LSB 0x5 +#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext19_MSB 0x9 +#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext19_RMASK 0x1F +#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext18_LSB 0x0 +#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext18_MSB 0x4 +#define QIB_7322_RcvQPMapTableD_0_RcvQPMapContext18_RMASK 0x1F + +#define QIB_7322_RcvQPMapTableE_0_OFFS 0x1130 +#define QIB_7322_RcvQPMapTableE_0_DEF 0x0000000000000000 +#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext29_LSB 0x19 +#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext29_MSB 0x1D +#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext29_RMASK 0x1F +#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext28_LSB 0x14 +#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext28_MSB 0x18 +#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext28_RMASK 0x1F +#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext27_LSB 0xF +#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext27_MSB 0x13 +#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext27_RMASK 0x1F +#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext26_LSB 0xA +#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext26_MSB 0xE +#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext26_RMASK 0x1F +#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext25_LSB 0x5 +#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext25_MSB 0x9 +#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext25_RMASK 0x1F +#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext24_LSB 0x0 +#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext24_MSB 0x4 +#define QIB_7322_RcvQPMapTableE_0_RcvQPMapContext24_RMASK 0x1F + +#define QIB_7322_RcvQPMapTableF_0_OFFS 0x1138 +#define QIB_7322_RcvQPMapTableF_0_DEF 0x0000000000000000 +#define QIB_7322_RcvQPMapTableF_0_RcvQPMapContext31_LSB 0x5 +#define QIB_7322_RcvQPMapTableF_0_RcvQPMapContext31_MSB 0x9 +#define QIB_7322_RcvQPMapTableF_0_RcvQPMapContext31_RMASK 0x1F +#define QIB_7322_RcvQPMapTableF_0_RcvQPMapContext30_LSB 0x0 +#define QIB_7322_RcvQPMapTableF_0_RcvQPMapContext30_MSB 0x4 +#define QIB_7322_RcvQPMapTableF_0_RcvQPMapContext30_RMASK 0x1F + +#define QIB_7322_PSStat_0_OFFS 0x1140 +#define QIB_7322_PSStat_0_DEF 0x0000000000000000 + +#define QIB_7322_PSStart_0_OFFS 0x1148 +#define QIB_7322_PSStart_0_DEF 0x0000000000000000 + +#define QIB_7322_PSInterval_0_OFFS 0x1150 +#define QIB_7322_PSInterval_0_DEF 0x0000000000000000 + +#define QIB_7322_RcvStatus_0_OFFS 0x1160 +#define QIB_7322_RcvStatus_0_DEF 0x0000000000000000 +#define QIB_7322_RcvStatus_0_DmaeqBlockingContext_LSB 0x1 +#define QIB_7322_RcvStatus_0_DmaeqBlockingContext_MSB 0x5 +#define QIB_7322_RcvStatus_0_DmaeqBlockingContext_RMASK 0x1F +#define QIB_7322_RcvStatus_0_RxPktInProgress_LSB 0x0 +#define QIB_7322_RcvStatus_0_RxPktInProgress_MSB 0x0 +#define QIB_7322_RcvStatus_0_RxPktInProgress_RMASK 0x1 + +#define QIB_7322_RcvPartitionKey_0_OFFS 0x1168 +#define QIB_7322_RcvPartitionKey_0_DEF 0x0000000000000000 + +#define QIB_7322_RcvQPMulticastContext_0_OFFS 0x1170 +#define QIB_7322_RcvQPMulticastContext_0_DEF 0x0000000000000000 +#define QIB_7322_RcvQPMulticastContext_0_RcvQpMcContext_LSB 0x0 +#define QIB_7322_RcvQPMulticastContext_0_RcvQpMcContext_MSB 0x4 +#define QIB_7322_RcvQPMulticastContext_0_RcvQpMcContext_RMASK 0x1F + +#define QIB_7322_RcvPktLEDCnt_0_OFFS 0x1178 +#define QIB_7322_RcvPktLEDCnt_0_DEF 0x0000000000000000 +#define QIB_7322_RcvPktLEDCnt_0_ONperiod_LSB 0x20 +#define QIB_7322_RcvPktLEDCnt_0_ONperiod_MSB 0x3F +#define QIB_7322_RcvPktLEDCnt_0_ONperiod_RMASK 0xFFFFFFFF +#define QIB_7322_RcvPktLEDCnt_0_OFFperiod_LSB 0x0 +#define QIB_7322_RcvPktLEDCnt_0_OFFperiod_MSB 0x1F +#define QIB_7322_RcvPktLEDCnt_0_OFFperiod_RMASK 0xFFFFFFFF + +#define QIB_7322_SendDmaIdleCnt_0_OFFS 0x1180 +#define QIB_7322_SendDmaIdleCnt_0_DEF 0x0000000000000000 +#define QIB_7322_SendDmaIdleCnt_0_SendDmaIdleCnt_LSB 0x0 +#define QIB_7322_SendDmaIdleCnt_0_SendDmaIdleCnt_MSB 0xF +#define QIB_7322_SendDmaIdleCnt_0_SendDmaIdleCnt_RMASK 0xFFFF + +#define QIB_7322_SendDmaReloadCnt_0_OFFS 0x1188 +#define QIB_7322_SendDmaReloadCnt_0_DEF 0x0000000000000000 +#define QIB_7322_SendDmaReloadCnt_0_SendDmaReloadCnt_LSB 0x0 +#define QIB_7322_SendDmaReloadCnt_0_SendDmaReloadCnt_MSB 0xF +#define QIB_7322_SendDmaReloadCnt_0_SendDmaReloadCnt_RMASK 0xFFFF + +#define QIB_7322_SendDmaDescCnt_0_OFFS 0x1190 +#define QIB_7322_SendDmaDescCnt_0_DEF 0x0000000000000000 +#define QIB_7322_SendDmaDescCnt_0_SendDmaDescCnt_LSB 0x0 +#define QIB_7322_SendDmaDescCnt_0_SendDmaDescCnt_MSB 0xF +#define QIB_7322_SendDmaDescCnt_0_SendDmaDescCnt_RMASK 0xFFFF + +#define QIB_7322_SendCtrl_0_OFFS 0x11C0 +#define QIB_7322_SendCtrl_0_DEF 0x0000000000000000 +#define QIB_7322_SendCtrl_0_IBVLArbiterEn_LSB 0xF +#define QIB_7322_SendCtrl_0_IBVLArbiterEn_MSB 0xF +#define QIB_7322_SendCtrl_0_IBVLArbiterEn_RMASK 0x1 +#define QIB_7322_SendCtrl_0_TxeDrainRmFifo_LSB 0xE +#define QIB_7322_SendCtrl_0_TxeDrainRmFifo_MSB 0xE +#define QIB_7322_SendCtrl_0_TxeDrainRmFifo_RMASK 0x1 +#define QIB_7322_SendCtrl_0_TxeDrainLaFifo_LSB 0xD +#define QIB_7322_SendCtrl_0_TxeDrainLaFifo_MSB 0xD +#define QIB_7322_SendCtrl_0_TxeDrainLaFifo_RMASK 0x1 +#define QIB_7322_SendCtrl_0_SDmaHalt_LSB 0xC +#define QIB_7322_SendCtrl_0_SDmaHalt_MSB 0xC +#define QIB_7322_SendCtrl_0_SDmaHalt_RMASK 0x1 +#define QIB_7322_SendCtrl_0_SDmaEnable_LSB 0xB +#define QIB_7322_SendCtrl_0_SDmaEnable_MSB 0xB +#define QIB_7322_SendCtrl_0_SDmaEnable_RMASK 0x1 +#define QIB_7322_SendCtrl_0_SDmaSingleDescriptor_LSB 0xA +#define QIB_7322_SendCtrl_0_SDmaSingleDescriptor_MSB 0xA +#define QIB_7322_SendCtrl_0_SDmaSingleDescriptor_RMASK 0x1 +#define QIB_7322_SendCtrl_0_SDmaIntEnable_LSB 0x9 +#define QIB_7322_SendCtrl_0_SDmaIntEnable_MSB 0x9 +#define QIB_7322_SendCtrl_0_SDmaIntEnable_RMASK 0x1 +#define QIB_7322_SendCtrl_0_SDmaCleanup_LSB 0x8 +#define QIB_7322_SendCtrl_0_SDmaCleanup_MSB 0x8 +#define QIB_7322_SendCtrl_0_SDmaCleanup_RMASK 0x1 +#define QIB_7322_SendCtrl_0_ForceCreditUpToDate_LSB 0x7 +#define QIB_7322_SendCtrl_0_ForceCreditUpToDate_MSB 0x7 +#define QIB_7322_SendCtrl_0_ForceCreditUpToDate_RMASK 0x1 +#define QIB_7322_SendCtrl_0_SendEnable_LSB 0x3 +#define QIB_7322_SendCtrl_0_SendEnable_MSB 0x3 +#define QIB_7322_SendCtrl_0_SendEnable_RMASK 0x1 +#define QIB_7322_SendCtrl_0_TxeBypassIbc_LSB 0x1 +#define QIB_7322_SendCtrl_0_TxeBypassIbc_MSB 0x1 +#define QIB_7322_SendCtrl_0_TxeBypassIbc_RMASK 0x1 +#define QIB_7322_SendCtrl_0_TxeAbortIbc_LSB 0x0 +#define QIB_7322_SendCtrl_0_TxeAbortIbc_MSB 0x0 +#define QIB_7322_SendCtrl_0_TxeAbortIbc_RMASK 0x1 + +#define QIB_7322_SendDmaBase_0_OFFS 0x11F8 +#define QIB_7322_SendDmaBase_0_DEF 0x0000000000000000 +#define QIB_7322_SendDmaBase_0_SendDmaBase_LSB 0x0 +#define QIB_7322_SendDmaBase_0_SendDmaBase_MSB 0x2F +#define QIB_7322_SendDmaBase_0_SendDmaBase_RMASK 0xFFFFFFFFFFFF + +#define QIB_7322_SendDmaLenGen_0_OFFS 0x1200 +#define QIB_7322_SendDmaLenGen_0_DEF 0x0000000000000000 +#define QIB_7322_SendDmaLenGen_0_Generation_LSB 0x10 +#define QIB_7322_SendDmaLenGen_0_Generation_MSB 0x12 +#define QIB_7322_SendDmaLenGen_0_Generation_RMASK 0x7 +#define QIB_7322_SendDmaLenGen_0_Length_LSB 0x0 +#define QIB_7322_SendDmaLenGen_0_Length_MSB 0xF +#define QIB_7322_SendDmaLenGen_0_Length_RMASK 0xFFFF + +#define QIB_7322_SendDmaTail_0_OFFS 0x1208 +#define QIB_7322_SendDmaTail_0_DEF 0x0000000000000000 +#define QIB_7322_SendDmaTail_0_SendDmaTail_LSB 0x0 +#define QIB_7322_SendDmaTail_0_SendDmaTail_MSB 0xF +#define QIB_7322_SendDmaTail_0_SendDmaTail_RMASK 0xFFFF + +#define QIB_7322_SendDmaHead_0_OFFS 0x1210 +#define QIB_7322_SendDmaHead_0_DEF 0x0000000000000000 +#define QIB_7322_SendDmaHead_0_InternalSendDmaHead_LSB 0x20 +#define QIB_7322_SendDmaHead_0_InternalSendDmaHead_MSB 0x2F +#define QIB_7322_SendDmaHead_0_InternalSendDmaHead_RMASK 0xFFFF +#define QIB_7322_SendDmaHead_0_SendDmaHead_LSB 0x0 +#define QIB_7322_SendDmaHead_0_SendDmaHead_MSB 0xF +#define QIB_7322_SendDmaHead_0_SendDmaHead_RMASK 0xFFFF + +#define QIB_7322_SendDmaHeadAddr_0_OFFS 0x1218 +#define QIB_7322_SendDmaHeadAddr_0_DEF 0x0000000000000000 +#define QIB_7322_SendDmaHeadAddr_0_SendDmaHeadAddr_LSB 0x0 +#define QIB_7322_SendDmaHeadAddr_0_SendDmaHeadAddr_MSB 0x2F +#define QIB_7322_SendDmaHeadAddr_0_SendDmaHeadAddr_RMASK 0xFFFFFFFFFFFF + +#define QIB_7322_SendDmaBufMask0_0_OFFS 0x1220 +#define QIB_7322_SendDmaBufMask0_0_DEF 0x0000000000000000 +#define QIB_7322_SendDmaBufMask0_0_BufMask_63_0_LSB 0x0 +#define QIB_7322_SendDmaBufMask0_0_BufMask_63_0_MSB 0x3F +#define QIB_7322_SendDmaBufMask0_0_BufMask_63_0_RMASK 0x0 + +#define QIB_7322_SendDmaStatus_0_OFFS 0x1238 +#define QIB_7322_SendDmaStatus_0_DEF 0x0000000042000000 +#define QIB_7322_SendDmaStatus_0_ScoreBoardDrainInProg_LSB 0x3F +#define QIB_7322_SendDmaStatus_0_ScoreBoardDrainInProg_MSB 0x3F +#define QIB_7322_SendDmaStatus_0_ScoreBoardDrainInProg_RMASK 0x1 +#define QIB_7322_SendDmaStatus_0_HaltInProg_LSB 0x3E +#define QIB_7322_SendDmaStatus_0_HaltInProg_MSB 0x3E +#define QIB_7322_SendDmaStatus_0_HaltInProg_RMASK 0x1 +#define QIB_7322_SendDmaStatus_0_InternalSDmaHalt_LSB 0x3D +#define QIB_7322_SendDmaStatus_0_InternalSDmaHalt_MSB 0x3D +#define QIB_7322_SendDmaStatus_0_InternalSDmaHalt_RMASK 0x1 +#define QIB_7322_SendDmaStatus_0_ScbDescIndex_13_0_LSB 0x2F +#define QIB_7322_SendDmaStatus_0_ScbDescIndex_13_0_MSB 0x3C +#define QIB_7322_SendDmaStatus_0_ScbDescIndex_13_0_RMASK 0x3FFF +#define QIB_7322_SendDmaStatus_0_RpyLowAddr_6_0_LSB 0x28 +#define QIB_7322_SendDmaStatus_0_RpyLowAddr_6_0_MSB 0x2E +#define QIB_7322_SendDmaStatus_0_RpyLowAddr_6_0_RMASK 0x7F +#define QIB_7322_SendDmaStatus_0_RpyTag_7_0_LSB 0x20 +#define QIB_7322_SendDmaStatus_0_RpyTag_7_0_MSB 0x27 +#define QIB_7322_SendDmaStatus_0_RpyTag_7_0_RMASK 0xFF +#define QIB_7322_SendDmaStatus_0_ScbFull_LSB 0x1F +#define QIB_7322_SendDmaStatus_0_ScbFull_MSB 0x1F +#define QIB_7322_SendDmaStatus_0_ScbFull_RMASK 0x1 +#define QIB_7322_SendDmaStatus_0_ScbEmpty_LSB 0x1E +#define QIB_7322_SendDmaStatus_0_ScbEmpty_MSB 0x1E +#define QIB_7322_SendDmaStatus_0_ScbEmpty_RMASK 0x1 +#define QIB_7322_SendDmaStatus_0_ScbEntryValid_LSB 0x1D +#define QIB_7322_SendDmaStatus_0_ScbEntryValid_MSB 0x1D +#define QIB_7322_SendDmaStatus_0_ScbEntryValid_RMASK 0x1 +#define QIB_7322_SendDmaStatus_0_ScbFetchDescFlag_LSB 0x1C +#define QIB_7322_SendDmaStatus_0_ScbFetchDescFlag_MSB 0x1C +#define QIB_7322_SendDmaStatus_0_ScbFetchDescFlag_RMASK 0x1 +#define QIB_7322_SendDmaStatus_0_SplFifoReadyToGo_LSB 0x1B +#define QIB_7322_SendDmaStatus_0_SplFifoReadyToGo_MSB 0x1B +#define QIB_7322_SendDmaStatus_0_SplFifoReadyToGo_RMASK 0x1 +#define QIB_7322_SendDmaStatus_0_SplFifoDisarmed_LSB 0x1A +#define QIB_7322_SendDmaStatus_0_SplFifoDisarmed_MSB 0x1A +#define QIB_7322_SendDmaStatus_0_SplFifoDisarmed_RMASK 0x1 +#define QIB_7322_SendDmaStatus_0_SplFifoEmpty_LSB 0x19 +#define QIB_7322_SendDmaStatus_0_SplFifoEmpty_MSB 0x19 +#define QIB_7322_SendDmaStatus_0_SplFifoEmpty_RMASK 0x1 +#define QIB_7322_SendDmaStatus_0_SplFifoFull_LSB 0x18 +#define QIB_7322_SendDmaStatus_0_SplFifoFull_MSB 0x18 +#define QIB_7322_SendDmaStatus_0_SplFifoFull_RMASK 0x1 +#define QIB_7322_SendDmaStatus_0_SplFifoBufNum_LSB 0x10 +#define QIB_7322_SendDmaStatus_0_SplFifoBufNum_MSB 0x17 +#define QIB_7322_SendDmaStatus_0_SplFifoBufNum_RMASK 0xFF +#define QIB_7322_SendDmaStatus_0_SplFifoDescIndex_LSB 0x0 +#define QIB_7322_SendDmaStatus_0_SplFifoDescIndex_MSB 0xF +#define QIB_7322_SendDmaStatus_0_SplFifoDescIndex_RMASK 0xFFFF + +#define QIB_7322_SendDmaPriorityThld_0_OFFS 0x1258 +#define QIB_7322_SendDmaPriorityThld_0_DEF 0x0000000000000000 +#define QIB_7322_SendDmaPriorityThld_0_PriorityThreshold_LSB 0x0 +#define QIB_7322_SendDmaPriorityThld_0_PriorityThreshold_MSB 0x3 +#define QIB_7322_SendDmaPriorityThld_0_PriorityThreshold_RMASK 0xF + +#define QIB_7322_SendHdrErrSymptom_0_OFFS 0x1260 +#define QIB_7322_SendHdrErrSymptom_0_DEF 0x0000000000000000 +#define QIB_7322_SendHdrErrSymptom_0_NonKeyPacket_LSB 0x6 +#define QIB_7322_SendHdrErrSymptom_0_NonKeyPacket_MSB 0x6 +#define QIB_7322_SendHdrErrSymptom_0_NonKeyPacket_RMASK 0x1 +#define QIB_7322_SendHdrErrSymptom_0_GRHFail_LSB 0x5 +#define QIB_7322_SendHdrErrSymptom_0_GRHFail_MSB 0x5 +#define QIB_7322_SendHdrErrSymptom_0_GRHFail_RMASK 0x1 +#define QIB_7322_SendHdrErrSymptom_0_PkeyFail_LSB 0x4 +#define QIB_7322_SendHdrErrSymptom_0_PkeyFail_MSB 0x4 +#define QIB_7322_SendHdrErrSymptom_0_PkeyFail_RMASK 0x1 +#define QIB_7322_SendHdrErrSymptom_0_QPFail_LSB 0x3 +#define QIB_7322_SendHdrErrSymptom_0_QPFail_MSB 0x3 +#define QIB_7322_SendHdrErrSymptom_0_QPFail_RMASK 0x1 +#define QIB_7322_SendHdrErrSymptom_0_SLIDFail_LSB 0x2 +#define QIB_7322_SendHdrErrSymptom_0_SLIDFail_MSB 0x2 +#define QIB_7322_SendHdrErrSymptom_0_SLIDFail_RMASK 0x1 +#define QIB_7322_SendHdrErrSymptom_0_RawIPV6_LSB 0x1 +#define QIB_7322_SendHdrErrSymptom_0_RawIPV6_MSB 0x1 +#define QIB_7322_SendHdrErrSymptom_0_RawIPV6_RMASK 0x1 +#define QIB_7322_SendHdrErrSymptom_0_PacketTooSmall_LSB 0x0 +#define QIB_7322_SendHdrErrSymptom_0_PacketTooSmall_MSB 0x0 +#define QIB_7322_SendHdrErrSymptom_0_PacketTooSmall_RMASK 0x1 + +#define QIB_7322_RxCreditVL0_0_OFFS 0x1280 +#define QIB_7322_RxCreditVL0_0_DEF 0x0000000000000000 +#define QIB_7322_RxCreditVL0_0_RxBufrConsumedVL_LSB 0x10 +#define QIB_7322_RxCreditVL0_0_RxBufrConsumedVL_MSB 0x1B +#define QIB_7322_RxCreditVL0_0_RxBufrConsumedVL_RMASK 0xFFF +#define QIB_7322_RxCreditVL0_0_RxMaxCreditVL_LSB 0x0 +#define QIB_7322_RxCreditVL0_0_RxMaxCreditVL_MSB 0xB +#define QIB_7322_RxCreditVL0_0_RxMaxCreditVL_RMASK 0xFFF + +#define QIB_7322_SendDmaBufUsed0_0_OFFS 0x1480 +#define QIB_7322_SendDmaBufUsed0_0_DEF 0x0000000000000000 +#define QIB_7322_SendDmaBufUsed0_0_BufUsed_63_0_LSB 0x0 +#define QIB_7322_SendDmaBufUsed0_0_BufUsed_63_0_MSB 0x3F +#define QIB_7322_SendDmaBufUsed0_0_BufUsed_63_0_RMASK 0x0 + +#define QIB_7322_SendCheckControl_0_OFFS 0x14A8 +#define QIB_7322_SendCheckControl_0_DEF 0x0000000000000000 +#define QIB_7322_SendCheckControl_0_PKey_En_LSB 0x4 +#define QIB_7322_SendCheckControl_0_PKey_En_MSB 0x4 +#define QIB_7322_SendCheckControl_0_PKey_En_RMASK 0x1 +#define QIB_7322_SendCheckControl_0_BTHQP_En_LSB 0x3 +#define QIB_7322_SendCheckControl_0_BTHQP_En_MSB 0x3 +#define QIB_7322_SendCheckControl_0_BTHQP_En_RMASK 0x1 +#define QIB_7322_SendCheckControl_0_SLID_En_LSB 0x2 +#define QIB_7322_SendCheckControl_0_SLID_En_MSB 0x2 +#define QIB_7322_SendCheckControl_0_SLID_En_RMASK 0x1 +#define QIB_7322_SendCheckControl_0_RawIPV6_En_LSB 0x1 +#define QIB_7322_SendCheckControl_0_RawIPV6_En_MSB 0x1 +#define QIB_7322_SendCheckControl_0_RawIPV6_En_RMASK 0x1 +#define QIB_7322_SendCheckControl_0_PacketTooSmall_En_LSB 0x0 +#define QIB_7322_SendCheckControl_0_PacketTooSmall_En_MSB 0x0 +#define QIB_7322_SendCheckControl_0_PacketTooSmall_En_RMASK 0x1 + +#define QIB_7322_SendIBSLIDMask_0_OFFS 0x14B0 +#define QIB_7322_SendIBSLIDMask_0_DEF 0x0000000000000000 +#define QIB_7322_SendIBSLIDMask_0_SendIBSLIDMask_15_0_LSB 0x0 +#define QIB_7322_SendIBSLIDMask_0_SendIBSLIDMask_15_0_MSB 0xF +#define QIB_7322_SendIBSLIDMask_0_SendIBSLIDMask_15_0_RMASK 0xFFFF + +#define QIB_7322_SendIBSLIDAssign_0_OFFS 0x14B8 +#define QIB_7322_SendIBSLIDAssign_0_DEF 0x0000000000000000 +#define QIB_7322_SendIBSLIDAssign_0_SendIBSLIDAssign_15_0_LSB 0x0 +#define QIB_7322_SendIBSLIDAssign_0_SendIBSLIDAssign_15_0_MSB 0xF +#define QIB_7322_SendIBSLIDAssign_0_SendIBSLIDAssign_15_0_RMASK 0xFFFF + +#define QIB_7322_IBCStatusA_0_OFFS 0x1540 +#define QIB_7322_IBCStatusA_0_DEF 0x0000000000000X02 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL7_LSB 0x27 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL7_MSB 0x27 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL7_RMASK 0x1 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL6_LSB 0x26 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL6_MSB 0x26 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL6_RMASK 0x1 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL5_LSB 0x25 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL5_MSB 0x25 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL5_RMASK 0x1 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL4_LSB 0x24 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL4_MSB 0x24 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL4_RMASK 0x1 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL3_LSB 0x23 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL3_MSB 0x23 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL3_RMASK 0x1 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL2_LSB 0x22 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL2_MSB 0x22 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL2_RMASK 0x1 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL1_LSB 0x21 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL1_MSB 0x21 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL1_RMASK 0x1 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL0_LSB 0x20 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL0_MSB 0x20 +#define QIB_7322_IBCStatusA_0_TxCreditOk_VL0_RMASK 0x1 +#define QIB_7322_IBCStatusA_0_TxReady_LSB 0x1E +#define QIB_7322_IBCStatusA_0_TxReady_MSB 0x1E +#define QIB_7322_IBCStatusA_0_TxReady_RMASK 0x1 +#define QIB_7322_IBCStatusA_0_LinkSpeedQDR_LSB 0x1D +#define QIB_7322_IBCStatusA_0_LinkSpeedQDR_MSB 0x1D +#define QIB_7322_IBCStatusA_0_LinkSpeedQDR_RMASK 0x1 +#define QIB_7322_IBCStatusA_0_ScrambleCapRemote_LSB 0xF +#define QIB_7322_IBCStatusA_0_ScrambleCapRemote_MSB 0xF +#define QIB_7322_IBCStatusA_0_ScrambleCapRemote_RMASK 0x1 +#define QIB_7322_IBCStatusA_0_ScrambleEn_LSB 0xE +#define QIB_7322_IBCStatusA_0_ScrambleEn_MSB 0xE +#define QIB_7322_IBCStatusA_0_ScrambleEn_RMASK 0x1 +#define QIB_7322_IBCStatusA_0_IBTxLaneReversed_LSB 0xD +#define QIB_7322_IBCStatusA_0_IBTxLaneReversed_MSB 0xD +#define QIB_7322_IBCStatusA_0_IBTxLaneReversed_RMASK 0x1 +#define QIB_7322_IBCStatusA_0_IBRxLaneReversed_LSB 0xC +#define QIB_7322_IBCStatusA_0_IBRxLaneReversed_MSB 0xC +#define QIB_7322_IBCStatusA_0_IBRxLaneReversed_RMASK 0x1 +#define QIB_7322_IBCStatusA_0_DDS_RXEQ_FAIL_LSB 0xA +#define QIB_7322_IBCStatusA_0_DDS_RXEQ_FAIL_MSB 0xA +#define QIB_7322_IBCStatusA_0_DDS_RXEQ_FAIL_RMASK 0x1 +#define QIB_7322_IBCStatusA_0_LinkWidthActive_LSB 0x9 +#define QIB_7322_IBCStatusA_0_LinkWidthActive_MSB 0x9 +#define QIB_7322_IBCStatusA_0_LinkWidthActive_RMASK 0x1 +#define QIB_7322_IBCStatusA_0_LinkSpeedActive_LSB 0x8 +#define QIB_7322_IBCStatusA_0_LinkSpeedActive_MSB 0x8 +#define QIB_7322_IBCStatusA_0_LinkSpeedActive_RMASK 0x1 +#define QIB_7322_IBCStatusA_0_LinkState_LSB 0x5 +#define QIB_7322_IBCStatusA_0_LinkState_MSB 0x7 +#define QIB_7322_IBCStatusA_0_LinkState_RMASK 0x7 +#define QIB_7322_IBCStatusA_0_LinkTrainingState_LSB 0x0 +#define QIB_7322_IBCStatusA_0_LinkTrainingState_MSB 0x4 +#define QIB_7322_IBCStatusA_0_LinkTrainingState_RMASK 0x1F + +#define QIB_7322_IBCStatusB_0_OFFS 0x1548 +#define QIB_7322_IBCStatusB_0_DEF 0x00000000XXXXXXXX +#define QIB_7322_IBCStatusB_0_ibsd_adaptation_timer_debug_LSB 0x27 +#define QIB_7322_IBCStatusB_0_ibsd_adaptation_timer_debug_MSB 0x27 +#define QIB_7322_IBCStatusB_0_ibsd_adaptation_timer_debug_RMASK 0x1 +#define QIB_7322_IBCStatusB_0_ibsd_adaptation_timer_reached_threshold_LSB 0x26 +#define QIB_7322_IBCStatusB_0_ibsd_adaptation_timer_reached_threshold_MSB 0x26 +#define QIB_7322_IBCStatusB_0_ibsd_adaptation_timer_reached_threshold_RMASK 0x1 +#define QIB_7322_IBCStatusB_0_ibsd_adaptation_timer_started_LSB 0x25 +#define QIB_7322_IBCStatusB_0_ibsd_adaptation_timer_started_MSB 0x25 +#define QIB_7322_IBCStatusB_0_ibsd_adaptation_timer_started_RMASK 0x1 +#define QIB_7322_IBCStatusB_0_heartbeat_timed_out_LSB 0x24 +#define QIB_7322_IBCStatusB_0_heartbeat_timed_out_MSB 0x24 +#define QIB_7322_IBCStatusB_0_heartbeat_timed_out_RMASK 0x1 +#define QIB_7322_IBCStatusB_0_heartbeat_crosstalk_LSB 0x20 +#define QIB_7322_IBCStatusB_0_heartbeat_crosstalk_MSB 0x23 +#define QIB_7322_IBCStatusB_0_heartbeat_crosstalk_RMASK 0xF +#define QIB_7322_IBCStatusB_0_RxEqLocalDevice_LSB 0x1E +#define QIB_7322_IBCStatusB_0_RxEqLocalDevice_MSB 0x1F +#define QIB_7322_IBCStatusB_0_RxEqLocalDevice_RMASK 0x3 +#define QIB_7322_IBCStatusB_0_ReqDDSLocalFromRmt_LSB 0x1A +#define QIB_7322_IBCStatusB_0_ReqDDSLocalFromRmt_MSB 0x1D +#define QIB_7322_IBCStatusB_0_ReqDDSLocalFromRmt_RMASK 0xF +#define QIB_7322_IBCStatusB_0_LinkRoundTripLatency_LSB 0x0 +#define QIB_7322_IBCStatusB_0_LinkRoundTripLatency_MSB 0x19 +#define QIB_7322_IBCStatusB_0_LinkRoundTripLatency_RMASK 0x3FFFFFF + +#define QIB_7322_IBCCtrlA_0_OFFS 0x1560 +#define QIB_7322_IBCCtrlA_0_DEF 0x0000000000000000 +#define QIB_7322_IBCCtrlA_0_Loopback_LSB 0x3F +#define QIB_7322_IBCCtrlA_0_Loopback_MSB 0x3F +#define QIB_7322_IBCCtrlA_0_Loopback_RMASK 0x1 +#define QIB_7322_IBCCtrlA_0_LinkDownDefaultState_LSB 0x3E +#define QIB_7322_IBCCtrlA_0_LinkDownDefaultState_MSB 0x3E +#define QIB_7322_IBCCtrlA_0_LinkDownDefaultState_RMASK 0x1 +#define QIB_7322_IBCCtrlA_0_IBLinkEn_LSB 0x3D +#define QIB_7322_IBCCtrlA_0_IBLinkEn_MSB 0x3D +#define QIB_7322_IBCCtrlA_0_IBLinkEn_RMASK 0x1 +#define QIB_7322_IBCCtrlA_0_IBStatIntReductionEn_LSB 0x3C +#define QIB_7322_IBCCtrlA_0_IBStatIntReductionEn_MSB 0x3C +#define QIB_7322_IBCCtrlA_0_IBStatIntReductionEn_RMASK 0x1 +#define QIB_7322_IBCCtrlA_0_NumVLane_LSB 0x30 +#define QIB_7322_IBCCtrlA_0_NumVLane_MSB 0x32 +#define QIB_7322_IBCCtrlA_0_NumVLane_RMASK 0x7 +#define QIB_7322_IBCCtrlA_0_OverrunThreshold_LSB 0x24 +#define QIB_7322_IBCCtrlA_0_OverrunThreshold_MSB 0x27 +#define QIB_7322_IBCCtrlA_0_OverrunThreshold_RMASK 0xF +#define QIB_7322_IBCCtrlA_0_PhyerrThreshold_LSB 0x20 +#define QIB_7322_IBCCtrlA_0_PhyerrThreshold_MSB 0x23 +#define QIB_7322_IBCCtrlA_0_PhyerrThreshold_RMASK 0xF +#define QIB_7322_IBCCtrlA_0_MaxPktLen_LSB 0x15 +#define QIB_7322_IBCCtrlA_0_MaxPktLen_MSB 0x1F +#define QIB_7322_IBCCtrlA_0_MaxPktLen_RMASK 0x7FF +#define QIB_7322_IBCCtrlA_0_LinkCmd_LSB 0x13 +#define QIB_7322_IBCCtrlA_0_LinkCmd_MSB 0x14 +#define QIB_7322_IBCCtrlA_0_LinkCmd_RMASK 0x3 +#define QIB_7322_IBCCtrlA_0_LinkInitCmd_LSB 0x10 +#define QIB_7322_IBCCtrlA_0_LinkInitCmd_MSB 0x12 +#define QIB_7322_IBCCtrlA_0_LinkInitCmd_RMASK 0x7 +#define QIB_7322_IBCCtrlA_0_FlowCtrlWaterMark_LSB 0x8 +#define QIB_7322_IBCCtrlA_0_FlowCtrlWaterMark_MSB 0xF +#define QIB_7322_IBCCtrlA_0_FlowCtrlWaterMark_RMASK 0xFF +#define QIB_7322_IBCCtrlA_0_FlowCtrlPeriod_LSB 0x0 +#define QIB_7322_IBCCtrlA_0_FlowCtrlPeriod_MSB 0x7 +#define QIB_7322_IBCCtrlA_0_FlowCtrlPeriod_RMASK 0xFF + +#define QIB_7322_IBCCtrlB_0_OFFS 0x1568 +#define QIB_7322_IBCCtrlB_0_DEF 0x00000000000305FF +#define QIB_7322_IBCCtrlB_0_IB_DLID_MASK_LSB 0x30 +#define QIB_7322_IBCCtrlB_0_IB_DLID_MASK_MSB 0x3F +#define QIB_7322_IBCCtrlB_0_IB_DLID_MASK_RMASK 0xFFFF +#define QIB_7322_IBCCtrlB_0_IB_DLID_LSB 0x20 +#define QIB_7322_IBCCtrlB_0_IB_DLID_MSB 0x2F +#define QIB_7322_IBCCtrlB_0_IB_DLID_RMASK 0xFFFF +#define QIB_7322_IBCCtrlB_0_IB_ENABLE_FILT_DPKT_LSB 0x1B +#define QIB_7322_IBCCtrlB_0_IB_ENABLE_FILT_DPKT_MSB 0x1B +#define QIB_7322_IBCCtrlB_0_IB_ENABLE_FILT_DPKT_RMASK 0x1 +#define QIB_7322_IBCCtrlB_0_HRTBT_REQ_LSB 0x1A +#define QIB_7322_IBCCtrlB_0_HRTBT_REQ_MSB 0x1A +#define QIB_7322_IBCCtrlB_0_HRTBT_REQ_RMASK 0x1 +#define QIB_7322_IBCCtrlB_0_HRTBT_PORT_LSB 0x12 +#define QIB_7322_IBCCtrlB_0_HRTBT_PORT_MSB 0x19 +#define QIB_7322_IBCCtrlB_0_HRTBT_PORT_RMASK 0xFF +#define QIB_7322_IBCCtrlB_0_HRTBT_AUTO_LSB 0x11 +#define QIB_7322_IBCCtrlB_0_HRTBT_AUTO_MSB 0x11 +#define QIB_7322_IBCCtrlB_0_HRTBT_AUTO_RMASK 0x1 +#define QIB_7322_IBCCtrlB_0_HRTBT_ENB_LSB 0x10 +#define QIB_7322_IBCCtrlB_0_HRTBT_ENB_MSB 0x10 +#define QIB_7322_IBCCtrlB_0_HRTBT_ENB_RMASK 0x1 +#define QIB_7322_IBCCtrlB_0_SD_DDS_LSB 0xC +#define QIB_7322_IBCCtrlB_0_SD_DDS_MSB 0xF +#define QIB_7322_IBCCtrlB_0_SD_DDS_RMASK 0xF +#define QIB_7322_IBCCtrlB_0_SD_DDSV_LSB 0xB +#define QIB_7322_IBCCtrlB_0_SD_DDSV_MSB 0xB +#define QIB_7322_IBCCtrlB_0_SD_DDSV_RMASK 0x1 +#define QIB_7322_IBCCtrlB_0_SD_ADD_ENB_LSB 0xA +#define QIB_7322_IBCCtrlB_0_SD_ADD_ENB_MSB 0xA +#define QIB_7322_IBCCtrlB_0_SD_ADD_ENB_RMASK 0x1 +#define QIB_7322_IBCCtrlB_0_SD_RX_EQUAL_ENABLE_LSB 0x9 +#define QIB_7322_IBCCtrlB_0_SD_RX_EQUAL_ENABLE_MSB 0x9 +#define QIB_7322_IBCCtrlB_0_SD_RX_EQUAL_ENABLE_RMASK 0x1 +#define QIB_7322_IBCCtrlB_0_IB_LANE_REV_SUPPORTED_LSB 0x8 +#define QIB_7322_IBCCtrlB_0_IB_LANE_REV_SUPPORTED_MSB 0x8 +#define QIB_7322_IBCCtrlB_0_IB_LANE_REV_SUPPORTED_RMASK 0x1 +#define QIB_7322_IBCCtrlB_0_IB_POLARITY_REV_SUPP_LSB 0x7 +#define QIB_7322_IBCCtrlB_0_IB_POLARITY_REV_SUPP_MSB 0x7 +#define QIB_7322_IBCCtrlB_0_IB_POLARITY_REV_SUPP_RMASK 0x1 +#define QIB_7322_IBCCtrlB_0_IB_NUM_CHANNELS_LSB 0x5 +#define QIB_7322_IBCCtrlB_0_IB_NUM_CHANNELS_MSB 0x6 +#define QIB_7322_IBCCtrlB_0_IB_NUM_CHANNELS_RMASK 0x3 +#define QIB_7322_IBCCtrlB_0_SD_SPEED_QDR_LSB 0x4 +#define QIB_7322_IBCCtrlB_0_SD_SPEED_QDR_MSB 0x4 +#define QIB_7322_IBCCtrlB_0_SD_SPEED_QDR_RMASK 0x1 +#define QIB_7322_IBCCtrlB_0_SD_SPEED_DDR_LSB 0x3 +#define QIB_7322_IBCCtrlB_0_SD_SPEED_DDR_MSB 0x3 +#define QIB_7322_IBCCtrlB_0_SD_SPEED_DDR_RMASK 0x1 +#define QIB_7322_IBCCtrlB_0_SD_SPEED_SDR_LSB 0x2 +#define QIB_7322_IBCCtrlB_0_SD_SPEED_SDR_MSB 0x2 +#define QIB_7322_IBCCtrlB_0_SD_SPEED_SDR_RMASK 0x1 +#define QIB_7322_IBCCtrlB_0_SD_SPEED_LSB 0x1 +#define QIB_7322_IBCCtrlB_0_SD_SPEED_MSB 0x1 +#define QIB_7322_IBCCtrlB_0_SD_SPEED_RMASK 0x1 +#define QIB_7322_IBCCtrlB_0_IB_ENHANCED_MODE_LSB 0x0 +#define QIB_7322_IBCCtrlB_0_IB_ENHANCED_MODE_MSB 0x0 +#define QIB_7322_IBCCtrlB_0_IB_ENHANCED_MODE_RMASK 0x1 + +#define QIB_7322_IBCCtrlC_0_OFFS 0x1570 +#define QIB_7322_IBCCtrlC_0_DEF 0x0000000000000301 +#define QIB_7322_IBCCtrlC_0_IB_BACK_PORCH_LSB 0x5 +#define QIB_7322_IBCCtrlC_0_IB_BACK_PORCH_MSB 0x9 +#define QIB_7322_IBCCtrlC_0_IB_BACK_PORCH_RMASK 0x1F +#define QIB_7322_IBCCtrlC_0_IB_FRONT_PORCH_LSB 0x0 +#define QIB_7322_IBCCtrlC_0_IB_FRONT_PORCH_MSB 0x4 +#define QIB_7322_IBCCtrlC_0_IB_FRONT_PORCH_RMASK 0x1F + +#define QIB_7322_HRTBT_GUID_0_OFFS 0x1588 +#define QIB_7322_HRTBT_GUID_0_DEF 0x0000000000000000 + +#define QIB_7322_IB_SDTEST_IF_TX_0_OFFS 0x1590 +#define QIB_7322_IB_SDTEST_IF_TX_0_DEF 0x0000000000000000 +#define QIB_7322_IB_SDTEST_IF_TX_0_TS_TX_RX_CFG_LSB 0x30 +#define QIB_7322_IB_SDTEST_IF_TX_0_TS_TX_RX_CFG_MSB 0x3F +#define QIB_7322_IB_SDTEST_IF_TX_0_TS_TX_RX_CFG_RMASK 0xFFFF +#define QIB_7322_IB_SDTEST_IF_TX_0_TS_TX_TX_CFG_LSB 0x20 +#define QIB_7322_IB_SDTEST_IF_TX_0_TS_TX_TX_CFG_MSB 0x2F +#define QIB_7322_IB_SDTEST_IF_TX_0_TS_TX_TX_CFG_RMASK 0xFFFF +#define QIB_7322_IB_SDTEST_IF_TX_0_TS_TX_SPEED_LSB 0xD +#define QIB_7322_IB_SDTEST_IF_TX_0_TS_TX_SPEED_MSB 0xF +#define QIB_7322_IB_SDTEST_IF_TX_0_TS_TX_SPEED_RMASK 0x7 +#define QIB_7322_IB_SDTEST_IF_TX_0_TS_TX_OPCODE_LSB 0xB +#define QIB_7322_IB_SDTEST_IF_TX_0_TS_TX_OPCODE_MSB 0xC +#define QIB_7322_IB_SDTEST_IF_TX_0_TS_TX_OPCODE_RMASK 0x3 +#define QIB_7322_IB_SDTEST_IF_TX_0_CREDIT_CHANGE_LSB 0x4 +#define QIB_7322_IB_SDTEST_IF_TX_0_CREDIT_CHANGE_MSB 0x4 +#define QIB_7322_IB_SDTEST_IF_TX_0_CREDIT_CHANGE_RMASK 0x1 +#define QIB_7322_IB_SDTEST_IF_TX_0_VL_CAP_LSB 0x2 +#define QIB_7322_IB_SDTEST_IF_TX_0_VL_CAP_MSB 0x3 +#define QIB_7322_IB_SDTEST_IF_TX_0_VL_CAP_RMASK 0x3 +#define QIB_7322_IB_SDTEST_IF_TX_0_TS_3_TX_VALID_LSB 0x1 +#define QIB_7322_IB_SDTEST_IF_TX_0_TS_3_TX_VALID_MSB 0x1 +#define QIB_7322_IB_SDTEST_IF_TX_0_TS_3_TX_VALID_RMASK 0x1 +#define QIB_7322_IB_SDTEST_IF_TX_0_TS_T_TX_VALID_LSB 0x0 +#define QIB_7322_IB_SDTEST_IF_TX_0_TS_T_TX_VALID_MSB 0x0 +#define QIB_7322_IB_SDTEST_IF_TX_0_TS_T_TX_VALID_RMASK 0x1 + +#define QIB_7322_IB_SDTEST_IF_RX_0_OFFS 0x1598 +#define QIB_7322_IB_SDTEST_IF_RX_0_DEF 0x0000000000000000 +#define QIB_7322_IB_SDTEST_IF_RX_0_TS_RX_RX_CFG_LSB 0x30 +#define QIB_7322_IB_SDTEST_IF_RX_0_TS_RX_RX_CFG_MSB 0x3F +#define QIB_7322_IB_SDTEST_IF_RX_0_TS_RX_RX_CFG_RMASK 0xFFFF +#define QIB_7322_IB_SDTEST_IF_RX_0_TS_RX_TX_CFG_LSB 0x20 +#define QIB_7322_IB_SDTEST_IF_RX_0_TS_RX_TX_CFG_MSB 0x2F +#define QIB_7322_IB_SDTEST_IF_RX_0_TS_RX_TX_CFG_RMASK 0xFFFF +#define QIB_7322_IB_SDTEST_IF_RX_0_TS_RX_B_LSB 0x18 +#define QIB_7322_IB_SDTEST_IF_RX_0_TS_RX_B_MSB 0x1F +#define QIB_7322_IB_SDTEST_IF_RX_0_TS_RX_B_RMASK 0xFF +#define QIB_7322_IB_SDTEST_IF_RX_0_TS_RX_A_LSB 0x10 +#define QIB_7322_IB_SDTEST_IF_RX_0_TS_RX_A_MSB 0x17 +#define QIB_7322_IB_SDTEST_IF_RX_0_TS_RX_A_RMASK 0xFF +#define QIB_7322_IB_SDTEST_IF_RX_0_TS_3_RX_VALID_LSB 0x1 +#define QIB_7322_IB_SDTEST_IF_RX_0_TS_3_RX_VALID_MSB 0x1 +#define QIB_7322_IB_SDTEST_IF_RX_0_TS_3_RX_VALID_RMASK 0x1 +#define QIB_7322_IB_SDTEST_IF_RX_0_TS_T_RX_VALID_LSB 0x0 +#define QIB_7322_IB_SDTEST_IF_RX_0_TS_T_RX_VALID_MSB 0x0 +#define QIB_7322_IB_SDTEST_IF_RX_0_TS_T_RX_VALID_RMASK 0x1 + +#define QIB_7322_IBNCModeCtrl_0_OFFS 0x15B8 +#define QIB_7322_IBNCModeCtrl_0_DEF 0x0000000000000000 +#define QIB_7322_IBNCModeCtrl_0_ScrambleCapRemoteForce_LSB 0x22 +#define QIB_7322_IBNCModeCtrl_0_ScrambleCapRemoteForce_MSB 0x22 +#define QIB_7322_IBNCModeCtrl_0_ScrambleCapRemoteForce_RMASK 0x1 +#define QIB_7322_IBNCModeCtrl_0_ScrambleCapRemoteMask_LSB 0x21 +#define QIB_7322_IBNCModeCtrl_0_ScrambleCapRemoteMask_MSB 0x21 +#define QIB_7322_IBNCModeCtrl_0_ScrambleCapRemoteMask_RMASK 0x1 +#define QIB_7322_IBNCModeCtrl_0_ScrambleCapLocal_LSB 0x20 +#define QIB_7322_IBNCModeCtrl_0_ScrambleCapLocal_MSB 0x20 +#define QIB_7322_IBNCModeCtrl_0_ScrambleCapLocal_RMASK 0x1 +#define QIB_7322_IBNCModeCtrl_0_TSMCode_TS2_LSB 0x11 +#define QIB_7322_IBNCModeCtrl_0_TSMCode_TS2_MSB 0x19 +#define QIB_7322_IBNCModeCtrl_0_TSMCode_TS2_RMASK 0x1FF +#define QIB_7322_IBNCModeCtrl_0_TSMCode_TS1_LSB 0x8 +#define QIB_7322_IBNCModeCtrl_0_TSMCode_TS1_MSB 0x10 +#define QIB_7322_IBNCModeCtrl_0_TSMCode_TS1_RMASK 0x1FF +#define QIB_7322_IBNCModeCtrl_0_TSMEnable_ignore_TSM_on_rx_LSB 0x2 +#define QIB_7322_IBNCModeCtrl_0_TSMEnable_ignore_TSM_on_rx_MSB 0x2 +#define QIB_7322_IBNCModeCtrl_0_TSMEnable_ignore_TSM_on_rx_RMASK 0x1 +#define QIB_7322_IBNCModeCtrl_0_TSMEnable_send_TS2_LSB 0x1 +#define QIB_7322_IBNCModeCtrl_0_TSMEnable_send_TS2_MSB 0x1 +#define QIB_7322_IBNCModeCtrl_0_TSMEnable_send_TS2_RMASK 0x1 +#define QIB_7322_IBNCModeCtrl_0_TSMEnable_send_TS1_LSB 0x0 +#define QIB_7322_IBNCModeCtrl_0_TSMEnable_send_TS1_MSB 0x0 +#define QIB_7322_IBNCModeCtrl_0_TSMEnable_send_TS1_RMASK 0x1 + +#define QIB_7322_IBSerdesStatus_0_OFFS 0x15D0 +#define QIB_7322_IBSerdesStatus_0_DEF 0x0000000000000000 + +#define QIB_7322_IBPCSConfig_0_OFFS 0x15D8 +#define QIB_7322_IBPCSConfig_0_DEF 0x0000000000000007 +#define QIB_7322_IBPCSConfig_0_link_sync_mask_LSB 0x9 +#define QIB_7322_IBPCSConfig_0_link_sync_mask_MSB 0x12 +#define QIB_7322_IBPCSConfig_0_link_sync_mask_RMASK 0x3FF +#define QIB_7322_IBPCSConfig_0_xcv_rreset_LSB 0x2 +#define QIB_7322_IBPCSConfig_0_xcv_rreset_MSB 0x2 +#define QIB_7322_IBPCSConfig_0_xcv_rreset_RMASK 0x1 +#define QIB_7322_IBPCSConfig_0_xcv_treset_LSB 0x1 +#define QIB_7322_IBPCSConfig_0_xcv_treset_MSB 0x1 +#define QIB_7322_IBPCSConfig_0_xcv_treset_RMASK 0x1 +#define QIB_7322_IBPCSConfig_0_tx_rx_reset_LSB 0x0 +#define QIB_7322_IBPCSConfig_0_tx_rx_reset_MSB 0x0 +#define QIB_7322_IBPCSConfig_0_tx_rx_reset_RMASK 0x1 + +#define QIB_7322_IBSerdesCtrl_0_OFFS 0x15E0 +#define QIB_7322_IBSerdesCtrl_0_DEF 0x0000000000FFA00F +#define QIB_7322_IBSerdesCtrl_0_DISABLE_RXLATOFF_QDR_LSB 0x1A +#define QIB_7322_IBSerdesCtrl_0_DISABLE_RXLATOFF_QDR_MSB 0x1A +#define QIB_7322_IBSerdesCtrl_0_DISABLE_RXLATOFF_QDR_RMASK 0x1 +#define QIB_7322_IBSerdesCtrl_0_DISABLE_RXLATOFF_DDR_LSB 0x19 +#define QIB_7322_IBSerdesCtrl_0_DISABLE_RXLATOFF_DDR_MSB 0x19 +#define QIB_7322_IBSerdesCtrl_0_DISABLE_RXLATOFF_DDR_RMASK 0x1 +#define QIB_7322_IBSerdesCtrl_0_DISABLE_RXLATOFF_SDR_LSB 0x18 +#define QIB_7322_IBSerdesCtrl_0_DISABLE_RXLATOFF_SDR_MSB 0x18 +#define QIB_7322_IBSerdesCtrl_0_DISABLE_RXLATOFF_SDR_RMASK 0x1 +#define QIB_7322_IBSerdesCtrl_0_CHANNEL_RESET_N_LSB 0x14 +#define QIB_7322_IBSerdesCtrl_0_CHANNEL_RESET_N_MSB 0x17 +#define QIB_7322_IBSerdesCtrl_0_CHANNEL_RESET_N_RMASK 0xF +#define QIB_7322_IBSerdesCtrl_0_CGMODE_LSB 0x10 +#define QIB_7322_IBSerdesCtrl_0_CGMODE_MSB 0x13 +#define QIB_7322_IBSerdesCtrl_0_CGMODE_RMASK 0xF +#define QIB_7322_IBSerdesCtrl_0_IB_LAT_MODE_LSB 0xF +#define QIB_7322_IBSerdesCtrl_0_IB_LAT_MODE_MSB 0xF +#define QIB_7322_IBSerdesCtrl_0_IB_LAT_MODE_RMASK 0x1 +#define QIB_7322_IBSerdesCtrl_0_RXLOSEN_LSB 0xD +#define QIB_7322_IBSerdesCtrl_0_RXLOSEN_MSB 0xD +#define QIB_7322_IBSerdesCtrl_0_RXLOSEN_RMASK 0x1 +#define QIB_7322_IBSerdesCtrl_0_LPEN_LSB 0xC +#define QIB_7322_IBSerdesCtrl_0_LPEN_MSB 0xC +#define QIB_7322_IBSerdesCtrl_0_LPEN_RMASK 0x1 +#define QIB_7322_IBSerdesCtrl_0_PLLPD_LSB 0xB +#define QIB_7322_IBSerdesCtrl_0_PLLPD_MSB 0xB +#define QIB_7322_IBSerdesCtrl_0_PLLPD_RMASK 0x1 +#define QIB_7322_IBSerdesCtrl_0_TXPD_LSB 0xA +#define QIB_7322_IBSerdesCtrl_0_TXPD_MSB 0xA +#define QIB_7322_IBSerdesCtrl_0_TXPD_RMASK 0x1 +#define QIB_7322_IBSerdesCtrl_0_RXPD_LSB 0x9 +#define QIB_7322_IBSerdesCtrl_0_RXPD_MSB 0x9 +#define QIB_7322_IBSerdesCtrl_0_RXPD_RMASK 0x1 +#define QIB_7322_IBSerdesCtrl_0_TXIDLE_LSB 0x8 +#define QIB_7322_IBSerdesCtrl_0_TXIDLE_MSB 0x8 +#define QIB_7322_IBSerdesCtrl_0_TXIDLE_RMASK 0x1 +#define QIB_7322_IBSerdesCtrl_0_CMODE_LSB 0x0 +#define QIB_7322_IBSerdesCtrl_0_CMODE_MSB 0x6 +#define QIB_7322_IBSerdesCtrl_0_CMODE_RMASK 0x7F + +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_OFFS 0x1600 +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_DEF 0x0000000000000000 +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_tx_override_deemphasis_select_LSB 0x1F +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_tx_override_deemphasis_select_MSB 0x1F +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_tx_override_deemphasis_select_RMASK 0x1 +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_reset_tx_deemphasis_override_LSB 0x1E +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_reset_tx_deemphasis_override_MSB 0x1E +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_reset_tx_deemphasis_override_RMASK 0x1 +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txampcntl_d2a_LSB 0xE +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txampcntl_d2a_MSB 0x11 +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txampcntl_d2a_RMASK 0xF +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txc0_ena_LSB 0x9 +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txc0_ena_MSB 0xD +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txc0_ena_RMASK 0x1F +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txcp1_ena_LSB 0x5 +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txcp1_ena_MSB 0x8 +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txcp1_ena_RMASK 0xF +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txcn1_xtra_emph0_LSB 0x3 +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txcn1_xtra_emph0_MSB 0x4 +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txcn1_xtra_emph0_RMASK 0x3 +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txcn1_ena_LSB 0x0 +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txcn1_ena_MSB 0x2 +#define QIB_7322_IBSD_TX_DEEMPHASIS_OVERRIDE_0_txcn1_ena_RMASK 0x7 + +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_OFFS 0x1640 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_DEF 0x0000000000000000 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenagain_sdr_ch3_LSB 0x27 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenagain_sdr_ch3_MSB 0x27 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenagain_sdr_ch3_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenagain_sdr_ch2_LSB 0x26 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenagain_sdr_ch2_MSB 0x26 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenagain_sdr_ch2_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenagain_sdr_ch1_LSB 0x25 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenagain_sdr_ch1_MSB 0x25 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenagain_sdr_ch1_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenagain_sdr_ch0_LSB 0x24 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenagain_sdr_ch0_MSB 0x24 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenagain_sdr_ch0_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenale_sdr_ch3_LSB 0x23 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenale_sdr_ch3_MSB 0x23 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenale_sdr_ch3_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenale_sdr_ch2_LSB 0x22 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenale_sdr_ch2_MSB 0x22 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenale_sdr_ch2_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenale_sdr_ch1_LSB 0x21 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenale_sdr_ch1_MSB 0x21 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenale_sdr_ch1_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenale_sdr_ch0_LSB 0x20 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenale_sdr_ch0_MSB 0x20 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenale_sdr_ch0_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenadfe_sdr_ch3_LSB 0x18 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenadfe_sdr_ch3_MSB 0x1F +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenadfe_sdr_ch3_RMASK 0xFF +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenadfe_sdr_ch2_LSB 0x10 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenadfe_sdr_ch2_MSB 0x17 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenadfe_sdr_ch2_RMASK 0xFF +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenadfe_sdr_ch1_LSB 0x8 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenadfe_sdr_ch1_MSB 0xF +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenadfe_sdr_ch1_RMASK 0xFF +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenadfe_sdr_ch0_LSB 0x0 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenadfe_sdr_ch0_MSB 0x7 +#define QIB_7322_ADAPT_DISABLE_STATIC_SDR_0_static_disable_rxenadfe_sdr_ch0_RMASK 0xFF + +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_OFFS 0x1648 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_DEF 0x0000000000000000 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenagain_sdr_ch3_LSB 0x27 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenagain_sdr_ch3_MSB 0x27 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenagain_sdr_ch3_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenagain_sdr_ch2_LSB 0x26 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenagain_sdr_ch2_MSB 0x26 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenagain_sdr_ch2_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenagain_sdr_ch1_LSB 0x25 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenagain_sdr_ch1_MSB 0x25 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenagain_sdr_ch1_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenagain_sdr_ch0_LSB 0x24 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenagain_sdr_ch0_MSB 0x24 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenagain_sdr_ch0_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenale_sdr_ch3_LSB 0x23 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenale_sdr_ch3_MSB 0x23 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenale_sdr_ch3_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenale_sdr_ch2_LSB 0x22 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenale_sdr_ch2_MSB 0x22 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenale_sdr_ch2_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenale_sdr_ch1_LSB 0x21 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenale_sdr_ch1_MSB 0x21 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenale_sdr_ch1_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenale_sdr_ch0_LSB 0x20 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenale_sdr_ch0_MSB 0x20 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenale_sdr_ch0_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenadfe_sdr_ch3_LSB 0x18 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenadfe_sdr_ch3_MSB 0x1F +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenadfe_sdr_ch3_RMASK 0xFF +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenadfe_sdr_ch2_LSB 0x10 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenadfe_sdr_ch2_MSB 0x17 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenadfe_sdr_ch2_RMASK 0xFF +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenadfe_sdr_ch1_LSB 0x8 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenadfe_sdr_ch1_MSB 0xF +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenadfe_sdr_ch1_RMASK 0xFF +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenadfe_sdr_ch0_LSB 0x0 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenadfe_sdr_ch0_MSB 0x7 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_SDR_0_dyn_disable_rxenadfe_sdr_ch0_RMASK 0xFF + +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_OFFS 0x1650 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_DEF 0x0000000000000000 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenagain_ddr_ch3_LSB 0x27 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenagain_ddr_ch3_MSB 0x27 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenagain_ddr_ch3_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenagain_ddr_ch2_LSB 0x26 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenagain_ddr_ch2_MSB 0x26 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenagain_ddr_ch2_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenagain_ddr_ch1_LSB 0x25 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenagain_ddr_ch1_MSB 0x25 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenagain_ddr_ch1_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenagain_ddr_ch0_LSB 0x24 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenagain_ddr_ch0_MSB 0x24 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenagain_ddr_ch0_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenale_ddr_ch3_LSB 0x23 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenale_ddr_ch3_MSB 0x23 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenale_ddr_ch3_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenale_ddr_ch2_LSB 0x22 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenale_ddr_ch2_MSB 0x22 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenale_ddr_ch2_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenale_ddr_ch1_LSB 0x21 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenale_ddr_ch1_MSB 0x21 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenale_ddr_ch1_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenale_ddr_ch0_LSB 0x20 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenale_ddr_ch0_MSB 0x20 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenale_ddr_ch0_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenadfe_ddr_ch3_LSB 0x18 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenadfe_ddr_ch3_MSB 0x1F +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenadfe_ddr_ch3_RMASK 0xFF +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenadfe_ddr_ch2_LSB 0x10 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenadfe_ddr_ch2_MSB 0x17 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenadfe_ddr_ch2_RMASK 0xFF +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenadfe_ddr_ch1_LSB 0x8 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenadfe_ddr_ch1_MSB 0xF +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenadfe_ddr_ch1_RMASK 0xFF +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenadfe_ddr_ch0_LSB 0x0 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenadfe_ddr_ch0_MSB 0x7 +#define QIB_7322_ADAPT_DISABLE_STATIC_DDR_0_static_disable_rxenadfe_ddr_ch0_RMASK 0xFF + +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_OFFS 0x1658 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_DEF 0x0000000000000000 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenagain_ddr_ch3_LSB 0x27 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenagain_ddr_ch3_MSB 0x27 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenagain_ddr_ch3_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenagain_ddr_ch2_LSB 0x26 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenagain_ddr_ch2_MSB 0x26 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenagain_ddr_ch2_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenagain_ddr_ch1_LSB 0x25 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenagain_ddr_ch1_MSB 0x25 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenagain_ddr_ch1_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenagain_ddr_ch0_LSB 0x24 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenagain_ddr_ch0_MSB 0x24 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenagain_ddr_ch0_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenale_ddr_ch3_LSB 0x23 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenale_ddr_ch3_MSB 0x23 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenale_ddr_ch3_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenale_ddr_ch2_LSB 0x22 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenale_ddr_ch2_MSB 0x22 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenale_ddr_ch2_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenale_ddr_ch1_LSB 0x21 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenale_ddr_ch1_MSB 0x21 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenale_ddr_ch1_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenale_ddr_ch0_LSB 0x20 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenale_ddr_ch0_MSB 0x20 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenale_ddr_ch0_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenadfe_ddr_ch3_LSB 0x18 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenadfe_ddr_ch3_MSB 0x1F +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenadfe_ddr_ch3_RMASK 0xFF +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenadfe_ddr_ch2_LSB 0x10 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenadfe_ddr_ch2_MSB 0x17 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenadfe_ddr_ch2_RMASK 0xFF +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenadfe_ddr_ch1_LSB 0x8 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenadfe_ddr_ch1_MSB 0xF +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenadfe_ddr_ch1_RMASK 0xFF +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenadfe_ddr_ch0_LSB 0x0 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenadfe_ddr_ch0_MSB 0x7 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_DDR_0_dyn_disable_rxenadfe_ddr_ch0_RMASK 0xFF + +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_OFFS 0x1660 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_DEF 0x0000000000000000 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenagain_qdr_ch3_LSB 0x27 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenagain_qdr_ch3_MSB 0x27 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenagain_qdr_ch3_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenagain_qdr_ch2_LSB 0x26 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenagain_qdr_ch2_MSB 0x26 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenagain_qdr_ch2_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenagain_qdr_ch1_LSB 0x25 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenagain_qdr_ch1_MSB 0x25 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenagain_qdr_ch1_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenagain_qdr_ch0_LSB 0x24 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenagain_qdr_ch0_MSB 0x24 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenagain_qdr_ch0_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenale_qdr_ch3_LSB 0x23 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenale_qdr_ch3_MSB 0x23 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenale_qdr_ch3_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenale_qdr_ch2_LSB 0x22 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenale_qdr_ch2_MSB 0x22 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenale_qdr_ch2_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenale_qdr_ch1_LSB 0x21 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenale_qdr_ch1_MSB 0x21 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenale_qdr_ch1_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenale_qdr_ch0_LSB 0x20 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenale_qdr_ch0_MSB 0x20 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenale_qdr_ch0_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenadfe_qdr_ch3_LSB 0x18 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenadfe_qdr_ch3_MSB 0x1F +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenadfe_qdr_ch3_RMASK 0xFF +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenadfe_qdr_ch2_LSB 0x10 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenadfe_qdr_ch2_MSB 0x17 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenadfe_qdr_ch2_RMASK 0xFF +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenadfe_qdr_ch1_LSB 0x8 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenadfe_qdr_ch1_MSB 0xF +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenadfe_qdr_ch1_RMASK 0xFF +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenadfe_qdr_ch0_LSB 0x0 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenadfe_qdr_ch0_MSB 0x7 +#define QIB_7322_ADAPT_DISABLE_STATIC_QDR_0_static_disable_rxenadfe_qdr_ch0_RMASK 0xFF + +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_OFFS 0x1668 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_DEF 0x0000000000000000 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenagain_qdr_ch3_LSB 0x27 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenagain_qdr_ch3_MSB 0x27 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenagain_qdr_ch3_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenagain_qdr_ch2_LSB 0x26 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenagain_qdr_ch2_MSB 0x26 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenagain_qdr_ch2_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenagain_qdr_ch1_LSB 0x25 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenagain_qdr_ch1_MSB 0x25 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenagain_qdr_ch1_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenagain_qdr_ch0_LSB 0x24 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenagain_qdr_ch0_MSB 0x24 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenagain_qdr_ch0_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenale_qdr_ch3_LSB 0x23 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenale_qdr_ch3_MSB 0x23 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenale_qdr_ch3_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenale_qdr_ch2_LSB 0x22 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenale_qdr_ch2_MSB 0x22 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenale_qdr_ch2_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenale_qdr_ch1_LSB 0x21 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenale_qdr_ch1_MSB 0x21 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenale_qdr_ch1_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenale_qdr_ch0_LSB 0x20 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenale_qdr_ch0_MSB 0x20 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenale_qdr_ch0_RMASK 0x1 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenadfe_qdr_ch3_LSB 0x18 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenadfe_qdr_ch3_MSB 0x1F +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenadfe_qdr_ch3_RMASK 0xFF +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenadfe_qdr_ch2_LSB 0x10 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenadfe_qdr_ch2_MSB 0x17 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenadfe_qdr_ch2_RMASK 0xFF +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenadfe_qdr_ch1_LSB 0x8 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenadfe_qdr_ch1_MSB 0xF +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenadfe_qdr_ch1_RMASK 0xFF +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenadfe_qdr_ch0_LSB 0x0 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenadfe_qdr_ch0_MSB 0x7 +#define QIB_7322_ADAPT_DISABLE_DYNAMIC_QDR_0_dyn_disable_rxenadfe_qdr_ch0_RMASK 0xFF + +#define QIB_7322_ADAPT_DISABLE_TIMER_THRESHOLD_0_OFFS 0x1670 +#define QIB_7322_ADAPT_DISABLE_TIMER_THRESHOLD_0_DEF 0x0000000000000000 + +#define QIB_7322_HighPriorityLimit_0_OFFS 0x1BC0 +#define QIB_7322_HighPriorityLimit_0_DEF 0x0000000000000000 +#define QIB_7322_HighPriorityLimit_0_Limit_LSB 0x0 +#define QIB_7322_HighPriorityLimit_0_Limit_MSB 0x7 +#define QIB_7322_HighPriorityLimit_0_Limit_RMASK 0xFF + +#define QIB_7322_LowPriority0_0_OFFS 0x1C00 +#define QIB_7322_LowPriority0_0_DEF 0x0000000000000000 +#define QIB_7322_LowPriority0_0_VirtualLane_LSB 0x10 +#define QIB_7322_LowPriority0_0_VirtualLane_MSB 0x12 +#define QIB_7322_LowPriority0_0_VirtualLane_RMASK 0x7 +#define QIB_7322_LowPriority0_0_Weight_LSB 0x0 +#define QIB_7322_LowPriority0_0_Weight_MSB 0x7 +#define QIB_7322_LowPriority0_0_Weight_RMASK 0xFF + +#define QIB_7322_HighPriority0_0_OFFS 0x1E00 +#define QIB_7322_HighPriority0_0_DEF 0x0000000000000000 +#define QIB_7322_HighPriority0_0_VirtualLane_LSB 0x10 +#define QIB_7322_HighPriority0_0_VirtualLane_MSB 0x12 +#define QIB_7322_HighPriority0_0_VirtualLane_RMASK 0x7 +#define QIB_7322_HighPriority0_0_Weight_LSB 0x0 +#define QIB_7322_HighPriority0_0_Weight_MSB 0x7 +#define QIB_7322_HighPriority0_0_Weight_RMASK 0xFF + +#define QIB_7322_CntrRegBase_1_OFFS 0x2028 +#define QIB_7322_CntrRegBase_1_DEF 0x0000000000013000 + +#define QIB_7322_RcvQPMulticastContext_1_OFFS 0x2170 + +#define QIB_7322_SendCtrl_1_OFFS 0x21C0 + +#define QIB_7322_SendBufAvail0_OFFS 0x3000 +#define QIB_7322_SendBufAvail0_DEF 0x0000000000000000 +#define QIB_7322_SendBufAvail0_SendBuf_31_0_LSB 0x0 +#define QIB_7322_SendBufAvail0_SendBuf_31_0_MSB 0x3F +#define QIB_7322_SendBufAvail0_SendBuf_31_0_RMASK 0x0 + +#define QIB_7322_MsixTable_OFFS 0x8000 +#define QIB_7322_MsixTable_DEF 0x0000000000000000 + +#define QIB_7322_MsixPba_OFFS 0x9000 +#define QIB_7322_MsixPba_DEF 0x0000000000000000 + +#define QIB_7322_LAMemory_OFFS 0xA000 +#define QIB_7322_LAMemory_DEF 0x0000000000000000 + +#define QIB_7322_LBIntCnt_OFFS 0x11000 +#define QIB_7322_LBIntCnt_DEF 0x0000000000000000 + +#define QIB_7322_LBFlowStallCnt_OFFS 0x11008 +#define QIB_7322_LBFlowStallCnt_DEF 0x0000000000000000 + +#define QIB_7322_RxTIDFullErrCnt_OFFS 0x110D0 +#define QIB_7322_RxTIDFullErrCnt_DEF 0x0000000000000000 + +#define QIB_7322_RxTIDValidErrCnt_OFFS 0x110D8 +#define QIB_7322_RxTIDValidErrCnt_DEF 0x0000000000000000 + +#define QIB_7322_RxP0HdrEgrOvflCnt_OFFS 0x110E8 +#define QIB_7322_RxP0HdrEgrOvflCnt_DEF 0x0000000000000000 + +#define QIB_7322_PcieRetryBufDiagQwordCnt_OFFS 0x111A0 +#define QIB_7322_PcieRetryBufDiagQwordCnt_DEF 0x0000000000000000 + +#define QIB_7322_RxTidFlowDropCnt_OFFS 0x111E0 +#define QIB_7322_RxTidFlowDropCnt_DEF 0x0000000000000000 + +#define QIB_7322_LBIntCnt_0_OFFS 0x12000 +#define QIB_7322_LBIntCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_TxCreditUpToDateTimeOut_0_OFFS 0x12008 +#define QIB_7322_TxCreditUpToDateTimeOut_0_DEF 0x0000000000000000 + +#define QIB_7322_TxSDmaDescCnt_0_OFFS 0x12010 +#define QIB_7322_TxSDmaDescCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_TxUnsupVLErrCnt_0_OFFS 0x12018 +#define QIB_7322_TxUnsupVLErrCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_TxDataPktCnt_0_OFFS 0x12020 +#define QIB_7322_TxDataPktCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_TxFlowPktCnt_0_OFFS 0x12028 +#define QIB_7322_TxFlowPktCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_TxDwordCnt_0_OFFS 0x12030 +#define QIB_7322_TxDwordCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_TxLenErrCnt_0_OFFS 0x12038 +#define QIB_7322_TxLenErrCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_TxMaxMinLenErrCnt_0_OFFS 0x12040 +#define QIB_7322_TxMaxMinLenErrCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_TxUnderrunCnt_0_OFFS 0x12048 +#define QIB_7322_TxUnderrunCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_TxFlowStallCnt_0_OFFS 0x12050 +#define QIB_7322_TxFlowStallCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_TxDroppedPktCnt_0_OFFS 0x12058 +#define QIB_7322_TxDroppedPktCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxDroppedPktCnt_0_OFFS 0x12060 +#define QIB_7322_RxDroppedPktCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxDataPktCnt_0_OFFS 0x12068 +#define QIB_7322_RxDataPktCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxFlowPktCnt_0_OFFS 0x12070 +#define QIB_7322_RxFlowPktCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxDwordCnt_0_OFFS 0x12078 +#define QIB_7322_RxDwordCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxLenErrCnt_0_OFFS 0x12080 +#define QIB_7322_RxLenErrCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxMaxMinLenErrCnt_0_OFFS 0x12088 +#define QIB_7322_RxMaxMinLenErrCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxICRCErrCnt_0_OFFS 0x12090 +#define QIB_7322_RxICRCErrCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxVCRCErrCnt_0_OFFS 0x12098 +#define QIB_7322_RxVCRCErrCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxFlowCtrlViolCnt_0_OFFS 0x120A0 +#define QIB_7322_RxFlowCtrlViolCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxVersionErrCnt_0_OFFS 0x120A8 +#define QIB_7322_RxVersionErrCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxLinkMalformCnt_0_OFFS 0x120B0 +#define QIB_7322_RxLinkMalformCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxEBPCnt_0_OFFS 0x120B8 +#define QIB_7322_RxEBPCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxLPCRCErrCnt_0_OFFS 0x120C0 +#define QIB_7322_RxLPCRCErrCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxBufOvflCnt_0_OFFS 0x120C8 +#define QIB_7322_RxBufOvflCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxLenTruncateCnt_0_OFFS 0x120D0 +#define QIB_7322_RxLenTruncateCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxPKeyMismatchCnt_0_OFFS 0x120E0 +#define QIB_7322_RxPKeyMismatchCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_IBLinkDownedCnt_0_OFFS 0x12180 +#define QIB_7322_IBLinkDownedCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_IBSymbolErrCnt_0_OFFS 0x12188 +#define QIB_7322_IBSymbolErrCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_IBStatusChangeCnt_0_OFFS 0x12190 +#define QIB_7322_IBStatusChangeCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_IBLinkErrRecoveryCnt_0_OFFS 0x12198 +#define QIB_7322_IBLinkErrRecoveryCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_ExcessBufferOvflCnt_0_OFFS 0x121A8 +#define QIB_7322_ExcessBufferOvflCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_LocalLinkIntegrityErrCnt_0_OFFS 0x121B0 +#define QIB_7322_LocalLinkIntegrityErrCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxVlErrCnt_0_OFFS 0x121B8 +#define QIB_7322_RxVlErrCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxDlidFltrCnt_0_OFFS 0x121C0 +#define QIB_7322_RxDlidFltrCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxVL15DroppedPktCnt_0_OFFS 0x121C8 +#define QIB_7322_RxVL15DroppedPktCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxOtherLocalPhyErrCnt_0_OFFS 0x121D0 +#define QIB_7322_RxOtherLocalPhyErrCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_RxQPInvalidContextCnt_0_OFFS 0x121D8 +#define QIB_7322_RxQPInvalidContextCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_TxHeadersErrCnt_0_OFFS 0x121F8 +#define QIB_7322_TxHeadersErrCnt_0_DEF 0x0000000000000000 + +#define QIB_7322_PSRcvDataCount_0_OFFS 0x12218 +#define QIB_7322_PSRcvDataCount_0_DEF 0x0000000000000000 + +#define QIB_7322_PSRcvPktsCount_0_OFFS 0x12220 +#define QIB_7322_PSRcvPktsCount_0_DEF 0x0000000000000000 + +#define QIB_7322_PSXmitDataCount_0_OFFS 0x12228 +#define QIB_7322_PSXmitDataCount_0_DEF 0x0000000000000000 + +#define QIB_7322_PSXmitPktsCount_0_OFFS 0x12230 +#define QIB_7322_PSXmitPktsCount_0_DEF 0x0000000000000000 + +#define QIB_7322_PSXmitWaitCount_0_OFFS 0x12238 +#define QIB_7322_PSXmitWaitCount_0_DEF 0x0000000000000000 + +#define QIB_7322_LBIntCnt_1_OFFS 0x13000 +#define QIB_7322_LBIntCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_TxCreditUpToDateTimeOut_1_OFFS 0x13008 +#define QIB_7322_TxCreditUpToDateTimeOut_1_DEF 0x0000000000000000 + +#define QIB_7322_TxSDmaDescCnt_1_OFFS 0x13010 +#define QIB_7322_TxSDmaDescCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_TxUnsupVLErrCnt_1_OFFS 0x13018 +#define QIB_7322_TxUnsupVLErrCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_TxDataPktCnt_1_OFFS 0x13020 +#define QIB_7322_TxDataPktCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_TxFlowPktCnt_1_OFFS 0x13028 +#define QIB_7322_TxFlowPktCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_TxDwordCnt_1_OFFS 0x13030 +#define QIB_7322_TxDwordCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_TxLenErrCnt_1_OFFS 0x13038 +#define QIB_7322_TxLenErrCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_TxMaxMinLenErrCnt_1_OFFS 0x13040 +#define QIB_7322_TxMaxMinLenErrCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_TxUnderrunCnt_1_OFFS 0x13048 +#define QIB_7322_TxUnderrunCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_TxFlowStallCnt_1_OFFS 0x13050 +#define QIB_7322_TxFlowStallCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_TxDroppedPktCnt_1_OFFS 0x13058 +#define QIB_7322_TxDroppedPktCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxDroppedPktCnt_1_OFFS 0x13060 +#define QIB_7322_RxDroppedPktCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxDataPktCnt_1_OFFS 0x13068 +#define QIB_7322_RxDataPktCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxFlowPktCnt_1_OFFS 0x13070 +#define QIB_7322_RxFlowPktCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxDwordCnt_1_OFFS 0x13078 +#define QIB_7322_RxDwordCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxLenErrCnt_1_OFFS 0x13080 +#define QIB_7322_RxLenErrCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxMaxMinLenErrCnt_1_OFFS 0x13088 +#define QIB_7322_RxMaxMinLenErrCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxICRCErrCnt_1_OFFS 0x13090 +#define QIB_7322_RxICRCErrCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxVCRCErrCnt_1_OFFS 0x13098 +#define QIB_7322_RxVCRCErrCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxFlowCtrlViolCnt_1_OFFS 0x130A0 +#define QIB_7322_RxFlowCtrlViolCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxVersionErrCnt_1_OFFS 0x130A8 +#define QIB_7322_RxVersionErrCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxLinkMalformCnt_1_OFFS 0x130B0 +#define QIB_7322_RxLinkMalformCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxEBPCnt_1_OFFS 0x130B8 +#define QIB_7322_RxEBPCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxLPCRCErrCnt_1_OFFS 0x130C0 +#define QIB_7322_RxLPCRCErrCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxBufOvflCnt_1_OFFS 0x130C8 +#define QIB_7322_RxBufOvflCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxLenTruncateCnt_1_OFFS 0x130D0 +#define QIB_7322_RxLenTruncateCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxPKeyMismatchCnt_1_OFFS 0x130E0 +#define QIB_7322_RxPKeyMismatchCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_IBLinkDownedCnt_1_OFFS 0x13180 +#define QIB_7322_IBLinkDownedCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_IBSymbolErrCnt_1_OFFS 0x13188 +#define QIB_7322_IBSymbolErrCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_IBStatusChangeCnt_1_OFFS 0x13190 +#define QIB_7322_IBStatusChangeCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_IBLinkErrRecoveryCnt_1_OFFS 0x13198 +#define QIB_7322_IBLinkErrRecoveryCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_ExcessBufferOvflCnt_1_OFFS 0x131A8 +#define QIB_7322_ExcessBufferOvflCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_LocalLinkIntegrityErrCnt_1_OFFS 0x131B0 +#define QIB_7322_LocalLinkIntegrityErrCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxVlErrCnt_1_OFFS 0x131B8 +#define QIB_7322_RxVlErrCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxDlidFltrCnt_1_OFFS 0x131C0 +#define QIB_7322_RxDlidFltrCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxVL15DroppedPktCnt_1_OFFS 0x131C8 +#define QIB_7322_RxVL15DroppedPktCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxOtherLocalPhyErrCnt_1_OFFS 0x131D0 +#define QIB_7322_RxOtherLocalPhyErrCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_RxQPInvalidContextCnt_1_OFFS 0x131D8 +#define QIB_7322_RxQPInvalidContextCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_TxHeadersErrCnt_1_OFFS 0x131F8 +#define QIB_7322_TxHeadersErrCnt_1_DEF 0x0000000000000000 + +#define QIB_7322_PSRcvDataCount_1_OFFS 0x13218 +#define QIB_7322_PSRcvDataCount_1_DEF 0x0000000000000000 + +#define QIB_7322_PSRcvPktsCount_1_OFFS 0x13220 +#define QIB_7322_PSRcvPktsCount_1_DEF 0x0000000000000000 + +#define QIB_7322_PSXmitDataCount_1_OFFS 0x13228 +#define QIB_7322_PSXmitDataCount_1_DEF 0x0000000000000000 + +#define QIB_7322_PSXmitPktsCount_1_OFFS 0x13230 +#define QIB_7322_PSXmitPktsCount_1_DEF 0x0000000000000000 + +#define QIB_7322_PSXmitWaitCount_1_OFFS 0x13238 +#define QIB_7322_PSXmitWaitCount_1_DEF 0x0000000000000000 + +#define QIB_7322_RcvEgrArray_OFFS 0x14000 +#define QIB_7322_RcvEgrArray_DEF 0x0000000000000000 +#define QIB_7322_RcvEgrArray_RT_BufSize_LSB 0x25 +#define QIB_7322_RcvEgrArray_RT_BufSize_MSB 0x27 +#define QIB_7322_RcvEgrArray_RT_BufSize_RMASK 0x7 +#define QIB_7322_RcvEgrArray_RT_Addr_LSB 0x0 +#define QIB_7322_RcvEgrArray_RT_Addr_MSB 0x24 +#define QIB_7322_RcvEgrArray_RT_Addr_RMASK 0x1FFFFFFFFF + +#define QIB_7322_RcvTIDArray0_OFFS 0x50000 +#define QIB_7322_RcvTIDArray0_DEF 0x0000000000000000 +#define QIB_7322_RcvTIDArray0_RT_BufSize_LSB 0x25 +#define QIB_7322_RcvTIDArray0_RT_BufSize_MSB 0x27 +#define QIB_7322_RcvTIDArray0_RT_BufSize_RMASK 0x7 +#define QIB_7322_RcvTIDArray0_RT_Addr_LSB 0x0 +#define QIB_7322_RcvTIDArray0_RT_Addr_MSB 0x24 +#define QIB_7322_RcvTIDArray0_RT_Addr_RMASK 0x1FFFFFFFFF + +#define QIB_7322_IBSD_DDS_MAP_TABLE_0_OFFS 0xD0000 +#define QIB_7322_IBSD_DDS_MAP_TABLE_0_DEF 0x0000000000000000 + +#define QIB_7322_RcvHdrTail0_OFFS 0x200000 +#define QIB_7322_RcvHdrTail0_DEF 0x0000000000000000 + +#define QIB_7322_RcvHdrHead0_OFFS 0x200008 +#define QIB_7322_RcvHdrHead0_DEF 0x0000000000000000 +#define QIB_7322_RcvHdrHead0_counter_LSB 0x20 +#define QIB_7322_RcvHdrHead0_counter_MSB 0x2F +#define QIB_7322_RcvHdrHead0_counter_RMASK 0xFFFF +#define QIB_7322_RcvHdrHead0_RcvHeadPointer_LSB 0x0 +#define QIB_7322_RcvHdrHead0_RcvHeadPointer_MSB 0x1F +#define QIB_7322_RcvHdrHead0_RcvHeadPointer_RMASK 0xFFFFFFFF + +#define QIB_7322_RcvEgrIndexTail0_OFFS 0x200010 +#define QIB_7322_RcvEgrIndexTail0_DEF 0x0000000000000000 + +#define QIB_7322_RcvEgrIndexHead0_OFFS 0x200018 +#define QIB_7322_RcvEgrIndexHead0_DEF 0x0000000000000000 + +#define QIB_7322_RcvTIDFlowTable0_OFFS 0x201000 +#define QIB_7322_RcvTIDFlowTable0_DEF 0x0000000000000000 +#define QIB_7322_RcvTIDFlowTable0_GenMismatch_LSB 0x1C +#define QIB_7322_RcvTIDFlowTable0_GenMismatch_MSB 0x1C +#define QIB_7322_RcvTIDFlowTable0_GenMismatch_RMASK 0x1 +#define QIB_7322_RcvTIDFlowTable0_SeqMismatch_LSB 0x1B +#define QIB_7322_RcvTIDFlowTable0_SeqMismatch_MSB 0x1B +#define QIB_7322_RcvTIDFlowTable0_SeqMismatch_RMASK 0x1 +#define QIB_7322_RcvTIDFlowTable0_KeepOnGenErr_LSB 0x16 +#define QIB_7322_RcvTIDFlowTable0_KeepOnGenErr_MSB 0x16 +#define QIB_7322_RcvTIDFlowTable0_KeepOnGenErr_RMASK 0x1 +#define QIB_7322_RcvTIDFlowTable0_KeepAfterSeqErr_LSB 0x15 +#define QIB_7322_RcvTIDFlowTable0_KeepAfterSeqErr_MSB 0x15 +#define QIB_7322_RcvTIDFlowTable0_KeepAfterSeqErr_RMASK 0x1 +#define QIB_7322_RcvTIDFlowTable0_HdrSuppEnabled_LSB 0x14 +#define QIB_7322_RcvTIDFlowTable0_HdrSuppEnabled_MSB 0x14 +#define QIB_7322_RcvTIDFlowTable0_HdrSuppEnabled_RMASK 0x1 +#define QIB_7322_RcvTIDFlowTable0_FlowValid_LSB 0x13 +#define QIB_7322_RcvTIDFlowTable0_FlowValid_MSB 0x13 +#define QIB_7322_RcvTIDFlowTable0_FlowValid_RMASK 0x1 +#define QIB_7322_RcvTIDFlowTable0_GenVal_LSB 0xB +#define QIB_7322_RcvTIDFlowTable0_GenVal_MSB 0x12 +#define QIB_7322_RcvTIDFlowTable0_GenVal_RMASK 0xFF +#define QIB_7322_RcvTIDFlowTable0_SeqNum_LSB 0x0 +#define QIB_7322_RcvTIDFlowTable0_SeqNum_MSB 0xA +#define QIB_7322_RcvTIDFlowTable0_SeqNum_RMASK 0x7FF diff --git a/drivers/infiniband/hw/qib/qib_common.h b/drivers/infiniband/hw/qib/qib_common.h new file mode 100644 index 00000000000..5670ace27c6 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_common.h @@ -0,0 +1,812 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. + * All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _QIB_COMMON_H +#define _QIB_COMMON_H + +/* + * This file contains defines, structures, etc. that are used + * to communicate between kernel and user code. + */ + +/* This is the IEEE-assigned OUI for QLogic Inc. QLogic_IB */ +#define QIB_SRC_OUI_1 0x00 +#define QIB_SRC_OUI_2 0x11 +#define QIB_SRC_OUI_3 0x75 + +/* version of protocol header (known to chip also). In the long run, + * we should be able to generate and accept a range of version numbers; + * for now we only accept one, and it's compiled in. + */ +#define IPS_PROTO_VERSION 2 + +/* + * These are compile time constants that you may want to enable or disable + * if you are trying to debug problems with code or performance. + * QIB_VERBOSE_TRACING define as 1 if you want additional tracing in + * fastpath code + * QIB_TRACE_REGWRITES define as 1 if you want register writes to be + * traced in faspath code + * _QIB_TRACING define as 0 if you want to remove all tracing in a + * compilation unit + */ + +/* + * The value in the BTH QP field that QLogic_IB uses to differentiate + * an qlogic_ib protocol IB packet vs standard IB transport + * This it needs to be even (0x656b78), because the LSB is sometimes + * used for the MSB of context. The change may cause a problem + * interoperating with older software. + */ +#define QIB_KD_QP 0x656b78 + +/* + * These are the status bits readable (in ascii form, 64bit value) + * from the "status" sysfs file. For binary compatibility, values + * must remain as is; removed states can be reused for different + * purposes. + */ +#define QIB_STATUS_INITTED 0x1 /* basic initialization done */ +/* Chip has been found and initted */ +#define QIB_STATUS_CHIP_PRESENT 0x20 +/* IB link is at ACTIVE, usable for data traffic */ +#define QIB_STATUS_IB_READY 0x40 +/* link is configured, LID, MTU, etc. have been set */ +#define QIB_STATUS_IB_CONF 0x80 +/* A Fatal hardware error has occurred. */ +#define QIB_STATUS_HWERROR 0x200 + +/* + * The list of usermode accessible registers. Also see Reg_* later in file. + */ +enum qib_ureg { + /* (RO) DMA RcvHdr to be used next. */ + ur_rcvhdrtail = 0, + /* (RW) RcvHdr entry to be processed next by host. */ + ur_rcvhdrhead = 1, + /* (RO) Index of next Eager index to use. */ + ur_rcvegrindextail = 2, + /* (RW) Eager TID to be processed next */ + ur_rcvegrindexhead = 3, + /* For internal use only; max register number. */ + _QIB_UregMax +}; + +/* bit values for spi_runtime_flags */ +#define QIB_RUNTIME_PCIE 0x0002 +#define QIB_RUNTIME_FORCE_WC_ORDER 0x0004 +#define QIB_RUNTIME_RCVHDR_COPY 0x0008 +#define QIB_RUNTIME_MASTER 0x0010 +#define QIB_RUNTIME_RCHK 0x0020 +#define QIB_RUNTIME_NODMA_RTAIL 0x0080 +#define QIB_RUNTIME_SPECIAL_TRIGGER 0x0100 +#define QIB_RUNTIME_SDMA 0x0200 +#define QIB_RUNTIME_FORCE_PIOAVAIL 0x0400 +#define QIB_RUNTIME_PIO_REGSWAPPED 0x0800 +#define QIB_RUNTIME_CTXT_MSB_IN_QP 0x1000 +#define QIB_RUNTIME_CTXT_REDIRECT 0x2000 +#define QIB_RUNTIME_HDRSUPP 0x4000 + +/* + * This structure is returned by qib_userinit() immediately after + * open to get implementation-specific info, and info specific to this + * instance. + * + * This struct must have explict pad fields where type sizes + * may result in different alignments between 32 and 64 bit + * programs, since the 64 bit * bit kernel requires the user code + * to have matching offsets + */ +struct qib_base_info { + /* version of hardware, for feature checking. */ + __u32 spi_hw_version; + /* version of software, for feature checking. */ + __u32 spi_sw_version; + /* QLogic_IB context assigned, goes into sent packets */ + __u16 spi_ctxt; + __u16 spi_subctxt; + /* + * IB MTU, packets IB data must be less than this. + * The MTU is in bytes, and will be a multiple of 4 bytes. + */ + __u32 spi_mtu; + /* + * Size of a PIO buffer. Any given packet's total size must be less + * than this (in words). Included is the starting control word, so + * if 513 is returned, then total pkt size is 512 words or less. + */ + __u32 spi_piosize; + /* size of the TID cache in qlogic_ib, in entries */ + __u32 spi_tidcnt; + /* size of the TID Eager list in qlogic_ib, in entries */ + __u32 spi_tidegrcnt; + /* size of a single receive header queue entry in words. */ + __u32 spi_rcvhdrent_size; + /* + * Count of receive header queue entries allocated. + * This may be less than the spu_rcvhdrcnt passed in!. + */ + __u32 spi_rcvhdr_cnt; + + /* per-chip and other runtime features bitmap (QIB_RUNTIME_*) */ + __u32 spi_runtime_flags; + + /* address where hardware receive header queue is mapped */ + __u64 spi_rcvhdr_base; + + /* user program. */ + + /* base address of eager TID receive buffers used by hardware. */ + __u64 spi_rcv_egrbufs; + + /* Allocated by initialization code, not by protocol. */ + + /* + * Size of each TID buffer in host memory, starting at + * spi_rcv_egrbufs. The buffers are virtually contiguous. + */ + __u32 spi_rcv_egrbufsize; + /* + * The special QP (queue pair) value that identifies an qlogic_ib + * protocol packet from standard IB packets. More, probably much + * more, to be added. + */ + __u32 spi_qpair; + + /* + * User register base for init code, not to be used directly by + * protocol or applications. Always points to chip registers, + * for normal or shared context. + */ + __u64 spi_uregbase; + /* + * Maximum buffer size in bytes that can be used in a single TID + * entry (assuming the buffer is aligned to this boundary). This is + * the minimum of what the hardware and software support Guaranteed + * to be a power of 2. + */ + __u32 spi_tid_maxsize; + /* + * alignment of each pio send buffer (byte count + * to add to spi_piobufbase to get to second buffer) + */ + __u32 spi_pioalign; + /* + * The index of the first pio buffer available to this process; + * needed to do lookup in spi_pioavailaddr; not added to + * spi_piobufbase. + */ + __u32 spi_pioindex; + /* number of buffers mapped for this process */ + __u32 spi_piocnt; + + /* + * Base address of writeonly pio buffers for this process. + * Each buffer has spi_piosize words, and is aligned on spi_pioalign + * boundaries. spi_piocnt buffers are mapped from this address + */ + __u64 spi_piobufbase; + + /* + * Base address of readonly memory copy of the pioavail registers. + * There are 2 bits for each buffer. + */ + __u64 spi_pioavailaddr; + + /* + * Address where driver updates a copy of the interface and driver + * status (QIB_STATUS_*) as a 64 bit value. It's followed by a + * link status qword (formerly combined with driver status), then a + * string indicating hardware error, if there was one. + */ + __u64 spi_status; + + /* number of chip ctxts available to user processes */ + __u32 spi_nctxts; + __u16 spi_unit; /* unit number of chip we are using */ + __u16 spi_port; /* IB port number we are using */ + /* num bufs in each contiguous set */ + __u32 spi_rcv_egrperchunk; + /* size in bytes of each contiguous set */ + __u32 spi_rcv_egrchunksize; + /* total size of mmap to cover full rcvegrbuffers */ + __u32 spi_rcv_egrbuftotlen; + __u32 spi_rhf_offset; /* dword offset in hdrqent for rcvhdr flags */ + /* address of readonly memory copy of the rcvhdrq tail register. */ + __u64 spi_rcvhdr_tailaddr; + + /* + * shared memory pages for subctxts if ctxt is shared; these cover + * all the processes in the group sharing a single context. + * all have enough space for the num_subcontexts value on this job. + */ + __u64 spi_subctxt_uregbase; + __u64 spi_subctxt_rcvegrbuf; + __u64 spi_subctxt_rcvhdr_base; + + /* shared memory page for send buffer disarm status */ + __u64 spi_sendbuf_status; +} __attribute__ ((aligned(8))); + +/* + * This version number is given to the driver by the user code during + * initialization in the spu_userversion field of qib_user_info, so + * the driver can check for compatibility with user code. + * + * The major version changes when data structures + * change in an incompatible way. The driver must be the same or higher + * for initialization to succeed. In some cases, a higher version + * driver will not interoperate with older software, and initialization + * will return an error. + */ +#define QIB_USER_SWMAJOR 1 + +/* + * Minor version differences are always compatible + * a within a major version, however if user software is larger + * than driver software, some new features and/or structure fields + * may not be implemented; the user code must deal with this if it + * cares, or it must abort after initialization reports the difference. + */ +#define QIB_USER_SWMINOR 13 + +#define QIB_USER_SWVERSION ((QIB_USER_SWMAJOR << 16) | QIB_USER_SWMINOR) + +#ifndef QIB_KERN_TYPE +#define QIB_KERN_TYPE 0 +#endif + +/* + * Similarly, this is the kernel version going back to the user. It's + * slightly different, in that we want to tell if the driver was built as + * part of a QLogic release, or from the driver from openfabrics.org, + * kernel.org, or a standard distribution, for support reasons. + * The high bit is 0 for non-QLogic and 1 for QLogic-built/supplied. + * + * It's returned by the driver to the user code during initialization in the + * spi_sw_version field of qib_base_info, so the user code can in turn + * check for compatibility with the kernel. +*/ +#define QIB_KERN_SWVERSION ((QIB_KERN_TYPE << 31) | QIB_USER_SWVERSION) + +/* + * Define the driver version number. This is something that refers only + * to the driver itself, not the software interfaces it supports. + */ +#define QIB_DRIVER_VERSION_BASE "1.11" + +/* create the final driver version string */ +#ifdef QIB_IDSTR +#define QIB_DRIVER_VERSION QIB_DRIVER_VERSION_BASE " " QIB_IDSTR +#else +#define QIB_DRIVER_VERSION QIB_DRIVER_VERSION_BASE +#endif + +/* + * If the unit is specified via open, HCA choice is fixed. If port is + * specified, it's also fixed. Otherwise we try to spread contexts + * across ports and HCAs, using different algorithims. WITHIN is + * the old default, prior to this mechanism. + */ +#define QIB_PORT_ALG_ACROSS 0 /* round robin contexts across HCAs, then + * ports; this is the default */ +#define QIB_PORT_ALG_WITHIN 1 /* use all contexts on an HCA (round robin + * active ports within), then next HCA */ +#define QIB_PORT_ALG_COUNT 2 /* number of algorithm choices */ + +/* + * This structure is passed to qib_userinit() to tell the driver where + * user code buffers are, sizes, etc. The offsets and sizes of the + * fields must remain unchanged, for binary compatibility. It can + * be extended, if userversion is changed so user code can tell, if needed + */ +struct qib_user_info { + /* + * version of user software, to detect compatibility issues. + * Should be set to QIB_USER_SWVERSION. + */ + __u32 spu_userversion; + + __u32 _spu_unused2; + + /* size of struct base_info to write to */ + __u32 spu_base_info_size; + + __u32 spu_port_alg; /* which QIB_PORT_ALG_*; unused user minor < 11 */ + + /* + * If two or more processes wish to share a context, each process + * must set the spu_subctxt_cnt and spu_subctxt_id to the same + * values. The only restriction on the spu_subctxt_id is that + * it be unique for a given node. + */ + __u16 spu_subctxt_cnt; + __u16 spu_subctxt_id; + + __u32 spu_port; /* IB port requested by user if > 0 */ + + /* + * address of struct base_info to write to + */ + __u64 spu_base_info; + +} __attribute__ ((aligned(8))); + +/* User commands. */ + +/* 16 available, was: old set up userspace (for old user code) */ +#define QIB_CMD_CTXT_INFO 17 /* find out what resources we got */ +#define QIB_CMD_RECV_CTRL 18 /* control receipt of packets */ +#define QIB_CMD_TID_UPDATE 19 /* update expected TID entries */ +#define QIB_CMD_TID_FREE 20 /* free expected TID entries */ +#define QIB_CMD_SET_PART_KEY 21 /* add partition key */ +/* 22 available, was: return info on slave processes (for old user code) */ +#define QIB_CMD_ASSIGN_CTXT 23 /* allocate HCA and ctxt */ +#define QIB_CMD_USER_INIT 24 /* set up userspace */ +#define QIB_CMD_UNUSED_1 25 +#define QIB_CMD_UNUSED_2 26 +#define QIB_CMD_PIOAVAILUPD 27 /* force an update of PIOAvail reg */ +#define QIB_CMD_POLL_TYPE 28 /* set the kind of polling we want */ +#define QIB_CMD_ARMLAUNCH_CTRL 29 /* armlaunch detection control */ +/* 30 is unused */ +#define QIB_CMD_SDMA_INFLIGHT 31 /* sdma inflight counter request */ +#define QIB_CMD_SDMA_COMPLETE 32 /* sdma completion counter request */ +/* 33 available, was a testing feature */ +#define QIB_CMD_DISARM_BUFS 34 /* disarm send buffers w/ errors */ +#define QIB_CMD_ACK_EVENT 35 /* ack & clear bits */ +#define QIB_CMD_CPUS_LIST 36 /* list of cpus allocated, for pinned + * processes: qib_cpus_list */ + +/* + * QIB_CMD_ACK_EVENT obsoletes QIB_CMD_DISARM_BUFS, but we keep it for + * compatibility with libraries from previous release. The ACK_EVENT + * will take appropriate driver action (if any, just DISARM for now), + * then clear the bits passed in as part of the mask. These bits are + * in the first 64bit word at spi_sendbuf_status, and are passed to + * the driver in the event_mask union as well. + */ +#define _QIB_EVENT_DISARM_BUFS_BIT 0 +#define _QIB_EVENT_LINKDOWN_BIT 1 +#define _QIB_EVENT_LID_CHANGE_BIT 2 +#define _QIB_EVENT_LMC_CHANGE_BIT 3 +#define _QIB_EVENT_SL2VL_CHANGE_BIT 4 +#define _QIB_MAX_EVENT_BIT _QIB_EVENT_SL2VL_CHANGE_BIT + +#define QIB_EVENT_DISARM_BUFS_BIT (1UL << _QIB_EVENT_DISARM_BUFS_BIT) +#define QIB_EVENT_LINKDOWN_BIT (1UL << _QIB_EVENT_LINKDOWN_BIT) +#define QIB_EVENT_LID_CHANGE_BIT (1UL << _QIB_EVENT_LID_CHANGE_BIT) +#define QIB_EVENT_LMC_CHANGE_BIT (1UL << _QIB_EVENT_LMC_CHANGE_BIT) +#define QIB_EVENT_SL2VL_CHANGE_BIT (1UL << _QIB_EVENT_SL2VL_CHANGE_BIT) + + +/* + * Poll types + */ +#define QIB_POLL_TYPE_ANYRCV 0x0 +#define QIB_POLL_TYPE_URGENT 0x1 + +struct qib_ctxt_info { + __u16 num_active; /* number of active units */ + __u16 unit; /* unit (chip) assigned to caller */ + __u16 port; /* IB port assigned to caller (1-based) */ + __u16 ctxt; /* ctxt on unit assigned to caller */ + __u16 subctxt; /* subctxt on unit assigned to caller */ + __u16 num_ctxts; /* number of ctxts available on unit */ + __u16 num_subctxts; /* number of subctxts opened on ctxt */ + __u16 rec_cpu; /* cpu # for affinity (ffff if none) */ +}; + +struct qib_tid_info { + __u32 tidcnt; + /* make structure same size in 32 and 64 bit */ + __u32 tid__unused; + /* virtual address of first page in transfer */ + __u64 tidvaddr; + /* pointer (same size 32/64 bit) to __u16 tid array */ + __u64 tidlist; + + /* + * pointer (same size 32/64 bit) to bitmap of TIDs used + * for this call; checked for being large enough at open + */ + __u64 tidmap; +}; + +struct qib_cmd { + __u32 type; /* command type */ + union { + struct qib_tid_info tid_info; + struct qib_user_info user_info; + + /* + * address in userspace where we should put the sdma + * inflight counter + */ + __u64 sdma_inflight; + /* + * address in userspace where we should put the sdma + * completion counter + */ + __u64 sdma_complete; + /* address in userspace of struct qib_ctxt_info to + write result to */ + __u64 ctxt_info; + /* enable/disable receipt of packets */ + __u32 recv_ctrl; + /* enable/disable armlaunch errors (non-zero to enable) */ + __u32 armlaunch_ctrl; + /* partition key to set */ + __u16 part_key; + /* user address of __u32 bitmask of active slaves */ + __u64 slave_mask_addr; + /* type of polling we want */ + __u16 poll_type; + /* back pressure enable bit for one particular context */ + __u8 ctxt_bp; + /* qib_user_event_ack(), IPATH_EVENT_* bits */ + __u64 event_mask; + } cmd; +}; + +struct qib_iovec { + /* Pointer to data, but same size 32 and 64 bit */ + __u64 iov_base; + + /* + * Length of data; don't need 64 bits, but want + * qib_sendpkt to remain same size as before 32 bit changes, so... + */ + __u64 iov_len; +}; + +/* + * Describes a single packet for send. Each packet can have one or more + * buffers, but the total length (exclusive of IB headers) must be less + * than the MTU, and if using the PIO method, entire packet length, + * including IB headers, must be less than the qib_piosize value (words). + * Use of this necessitates including sys/uio.h + */ +struct __qib_sendpkt { + __u32 sps_flags; /* flags for packet (TBD) */ + __u32 sps_cnt; /* number of entries to use in sps_iov */ + /* array of iov's describing packet. TEMPORARY */ + struct qib_iovec sps_iov[4]; +}; + +/* + * Diagnostics can send a packet by "writing" the following + * structs to the diag data special file. + * This allows a custom + * pbc (+ static rate) qword, so that special modes and deliberate + * changes to CRCs can be used. The elements were also re-ordered + * for better alignment and to avoid padding issues. + */ +#define _DIAG_XPKT_VERS 3 +struct qib_diag_xpkt { + __u16 version; + __u16 unit; + __u16 port; + __u16 len; + __u64 data; + __u64 pbc_wd; +}; + +/* + * Data layout in I2C flash (for GUID, etc.) + * All fields are little-endian binary unless otherwise stated + */ +#define QIB_FLASH_VERSION 2 +struct qib_flash { + /* flash layout version (QIB_FLASH_VERSION) */ + __u8 if_fversion; + /* checksum protecting if_length bytes */ + __u8 if_csum; + /* + * valid length (in use, protected by if_csum), including + * if_fversion and if_csum themselves) + */ + __u8 if_length; + /* the GUID, in network order */ + __u8 if_guid[8]; + /* number of GUIDs to use, starting from if_guid */ + __u8 if_numguid; + /* the (last 10 characters of) board serial number, in ASCII */ + char if_serial[12]; + /* board mfg date (YYYYMMDD ASCII) */ + char if_mfgdate[8]; + /* last board rework/test date (YYYYMMDD ASCII) */ + char if_testdate[8]; + /* logging of error counts, TBD */ + __u8 if_errcntp[4]; + /* powered on hours, updated at driver unload */ + __u8 if_powerhour[2]; + /* ASCII free-form comment field */ + char if_comment[32]; + /* Backwards compatible prefix for longer QLogic Serial Numbers */ + char if_sprefix[4]; + /* 82 bytes used, min flash size is 128 bytes */ + __u8 if_future[46]; +}; + +/* + * These are the counters implemented in the chip, and are listed in order. + * The InterCaps naming is taken straight from the chip spec. + */ +struct qlogic_ib_counters { + __u64 LBIntCnt; + __u64 LBFlowStallCnt; + __u64 TxSDmaDescCnt; /* was Reserved1 */ + __u64 TxUnsupVLErrCnt; + __u64 TxDataPktCnt; + __u64 TxFlowPktCnt; + __u64 TxDwordCnt; + __u64 TxLenErrCnt; + __u64 TxMaxMinLenErrCnt; + __u64 TxUnderrunCnt; + __u64 TxFlowStallCnt; + __u64 TxDroppedPktCnt; + __u64 RxDroppedPktCnt; + __u64 RxDataPktCnt; + __u64 RxFlowPktCnt; + __u64 RxDwordCnt; + __u64 RxLenErrCnt; + __u64 RxMaxMinLenErrCnt; + __u64 RxICRCErrCnt; + __u64 RxVCRCErrCnt; + __u64 RxFlowCtrlErrCnt; + __u64 RxBadFormatCnt; + __u64 RxLinkProblemCnt; + __u64 RxEBPCnt; + __u64 RxLPCRCErrCnt; + __u64 RxBufOvflCnt; + __u64 RxTIDFullErrCnt; + __u64 RxTIDValidErrCnt; + __u64 RxPKeyMismatchCnt; + __u64 RxP0HdrEgrOvflCnt; + __u64 RxP1HdrEgrOvflCnt; + __u64 RxP2HdrEgrOvflCnt; + __u64 RxP3HdrEgrOvflCnt; + __u64 RxP4HdrEgrOvflCnt; + __u64 RxP5HdrEgrOvflCnt; + __u64 RxP6HdrEgrOvflCnt; + __u64 RxP7HdrEgrOvflCnt; + __u64 RxP8HdrEgrOvflCnt; + __u64 RxP9HdrEgrOvflCnt; + __u64 RxP10HdrEgrOvflCnt; + __u64 RxP11HdrEgrOvflCnt; + __u64 RxP12HdrEgrOvflCnt; + __u64 RxP13HdrEgrOvflCnt; + __u64 RxP14HdrEgrOvflCnt; + __u64 RxP15HdrEgrOvflCnt; + __u64 RxP16HdrEgrOvflCnt; + __u64 IBStatusChangeCnt; + __u64 IBLinkErrRecoveryCnt; + __u64 IBLinkDownedCnt; + __u64 IBSymbolErrCnt; + __u64 RxVL15DroppedPktCnt; + __u64 RxOtherLocalPhyErrCnt; + __u64 PcieRetryBufDiagQwordCnt; + __u64 ExcessBufferOvflCnt; + __u64 LocalLinkIntegrityErrCnt; + __u64 RxVlErrCnt; + __u64 RxDlidFltrCnt; +}; + +/* + * The next set of defines are for packet headers, and chip register + * and memory bits that are visible to and/or used by user-mode software. + */ + +/* RcvHdrFlags bits */ +#define QLOGIC_IB_RHF_LENGTH_MASK 0x7FF +#define QLOGIC_IB_RHF_LENGTH_SHIFT 0 +#define QLOGIC_IB_RHF_RCVTYPE_MASK 0x7 +#define QLOGIC_IB_RHF_RCVTYPE_SHIFT 11 +#define QLOGIC_IB_RHF_EGRINDEX_MASK 0xFFF +#define QLOGIC_IB_RHF_EGRINDEX_SHIFT 16 +#define QLOGIC_IB_RHF_SEQ_MASK 0xF +#define QLOGIC_IB_RHF_SEQ_SHIFT 0 +#define QLOGIC_IB_RHF_HDRQ_OFFSET_MASK 0x7FF +#define QLOGIC_IB_RHF_HDRQ_OFFSET_SHIFT 4 +#define QLOGIC_IB_RHF_H_ICRCERR 0x80000000 +#define QLOGIC_IB_RHF_H_VCRCERR 0x40000000 +#define QLOGIC_IB_RHF_H_PARITYERR 0x20000000 +#define QLOGIC_IB_RHF_H_LENERR 0x10000000 +#define QLOGIC_IB_RHF_H_MTUERR 0x08000000 +#define QLOGIC_IB_RHF_H_IHDRERR 0x04000000 +#define QLOGIC_IB_RHF_H_TIDERR 0x02000000 +#define QLOGIC_IB_RHF_H_MKERR 0x01000000 +#define QLOGIC_IB_RHF_H_IBERR 0x00800000 +#define QLOGIC_IB_RHF_H_ERR_MASK 0xFF800000 +#define QLOGIC_IB_RHF_L_USE_EGR 0x80000000 +#define QLOGIC_IB_RHF_L_SWA 0x00008000 +#define QLOGIC_IB_RHF_L_SWB 0x00004000 + +/* qlogic_ib header fields */ +#define QLOGIC_IB_I_VERS_MASK 0xF +#define QLOGIC_IB_I_VERS_SHIFT 28 +#define QLOGIC_IB_I_CTXT_MASK 0xF +#define QLOGIC_IB_I_CTXT_SHIFT 24 +#define QLOGIC_IB_I_TID_MASK 0x7FF +#define QLOGIC_IB_I_TID_SHIFT 13 +#define QLOGIC_IB_I_OFFSET_MASK 0x1FFF +#define QLOGIC_IB_I_OFFSET_SHIFT 0 + +/* K_PktFlags bits */ +#define QLOGIC_IB_KPF_INTR 0x1 +#define QLOGIC_IB_KPF_SUBCTXT_MASK 0x3 +#define QLOGIC_IB_KPF_SUBCTXT_SHIFT 1 + +#define QLOGIC_IB_MAX_SUBCTXT 4 + +/* SendPIO per-buffer control */ +#define QLOGIC_IB_SP_TEST 0x40 +#define QLOGIC_IB_SP_TESTEBP 0x20 +#define QLOGIC_IB_SP_TRIGGER_SHIFT 15 + +/* SendPIOAvail bits */ +#define QLOGIC_IB_SENDPIOAVAIL_BUSY_SHIFT 1 +#define QLOGIC_IB_SENDPIOAVAIL_CHECK_SHIFT 0 + +/* qlogic_ib header format */ +struct qib_header { + /* + * Version - 4 bits, Context - 4 bits, TID - 10 bits and Offset - + * 14 bits before ECO change ~28 Dec 03. After that, Vers 4, + * Context 4, TID 11, offset 13. + */ + __le32 ver_ctxt_tid_offset; + __le16 chksum; + __le16 pkt_flags; +}; + +/* + * qlogic_ib user message header format. + * This structure contains the first 4 fields common to all protocols + * that employ qlogic_ib. + */ +struct qib_message_header { + __be16 lrh[4]; + __be32 bth[3]; + /* fields below this point are in host byte order */ + struct qib_header iph; + /* fields below are simplified, but should match PSM */ + /* some are accessed by driver when packet spliting is needed */ + __u8 sub_opcode; + __u8 flags; + __u16 commidx; + __u32 ack_seq_num; + __u8 flowid; + __u8 hdr_dlen; + __u16 mqhdr; + __u32 uwords[4]; +}; + +/* sequence number bits for message */ +union qib_seqnum { + struct { + __u32 seq:11; + __u32 gen:8; + __u32 flow:5; + }; + struct { + __u32 pkt:16; + __u32 msg:8; + }; + __u32 val; +}; + +/* qib receiving-dma tid-session-member */ +struct qib_tid_session_member { + __u16 tid; + __u16 offset; + __u16 length; +}; + +/* IB - LRH header consts */ +#define QIB_LRH_GRH 0x0003 /* 1. word of IB LRH - next header: GRH */ +#define QIB_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */ + +/* misc. */ +#define SIZE_OF_CRC 1 + +#define QIB_DEFAULT_P_KEY 0xFFFF +#define QIB_PERMISSIVE_LID 0xFFFF +#define QIB_AETH_CREDIT_SHIFT 24 +#define QIB_AETH_CREDIT_MASK 0x1F +#define QIB_AETH_CREDIT_INVAL 0x1F +#define QIB_PSN_MASK 0xFFFFFF +#define QIB_MSN_MASK 0xFFFFFF +#define QIB_QPN_MASK 0xFFFFFF +#define QIB_MULTICAST_LID_BASE 0xC000 +#define QIB_EAGER_TID_ID QLOGIC_IB_I_TID_MASK +#define QIB_MULTICAST_QPN 0xFFFFFF + +/* Receive Header Queue: receive type (from qlogic_ib) */ +#define RCVHQ_RCV_TYPE_EXPECTED 0 +#define RCVHQ_RCV_TYPE_EAGER 1 +#define RCVHQ_RCV_TYPE_NON_KD 2 +#define RCVHQ_RCV_TYPE_ERROR 3 + +#define QIB_HEADER_QUEUE_WORDS 9 + +/* functions for extracting fields from rcvhdrq entries for the driver. + */ +static inline __u32 qib_hdrget_err_flags(const __le32 *rbuf) +{ + return __le32_to_cpu(rbuf[1]) & QLOGIC_IB_RHF_H_ERR_MASK; +} + +static inline __u32 qib_hdrget_rcv_type(const __le32 *rbuf) +{ + return (__le32_to_cpu(rbuf[0]) >> QLOGIC_IB_RHF_RCVTYPE_SHIFT) & + QLOGIC_IB_RHF_RCVTYPE_MASK; +} + +static inline __u32 qib_hdrget_length_in_bytes(const __le32 *rbuf) +{ + return ((__le32_to_cpu(rbuf[0]) >> QLOGIC_IB_RHF_LENGTH_SHIFT) & + QLOGIC_IB_RHF_LENGTH_MASK) << 2; +} + +static inline __u32 qib_hdrget_index(const __le32 *rbuf) +{ + return (__le32_to_cpu(rbuf[0]) >> QLOGIC_IB_RHF_EGRINDEX_SHIFT) & + QLOGIC_IB_RHF_EGRINDEX_MASK; +} + +static inline __u32 qib_hdrget_seq(const __le32 *rbuf) +{ + return (__le32_to_cpu(rbuf[1]) >> QLOGIC_IB_RHF_SEQ_SHIFT) & + QLOGIC_IB_RHF_SEQ_MASK; +} + +static inline __u32 qib_hdrget_offset(const __le32 *rbuf) +{ + return (__le32_to_cpu(rbuf[1]) >> QLOGIC_IB_RHF_HDRQ_OFFSET_SHIFT) & + QLOGIC_IB_RHF_HDRQ_OFFSET_MASK; +} + +static inline __u32 qib_hdrget_use_egr_buf(const __le32 *rbuf) +{ + return __le32_to_cpu(rbuf[0]) & QLOGIC_IB_RHF_L_USE_EGR; +} + +static inline __u32 qib_hdrget_qib_ver(__le32 hdrword) +{ + return (__le32_to_cpu(hdrword) >> QLOGIC_IB_I_VERS_SHIFT) & + QLOGIC_IB_I_VERS_MASK; +} + +#endif /* _QIB_COMMON_H */ diff --git a/drivers/infiniband/hw/qib/qib_cq.c b/drivers/infiniband/hw/qib/qib_cq.c new file mode 100644 index 00000000000..ab4e11cfab1 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_cq.c @@ -0,0 +1,540 @@ +/* + * Copyright (c) 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2006, 2007, 2008, 2010 QLogic Corporation. All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/err.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/kthread.h> + +#include "qib_verbs.h" +#include "qib.h" + +/** + * qib_cq_enter - add a new entry to the completion queue + * @cq: completion queue + * @entry: work completion entry to add + * @sig: true if @entry is a solicitated entry + * + * This may be called with qp->s_lock held. + */ +void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int solicited) +{ + struct qib_cq_wc *wc; + unsigned long flags; + u32 head; + u32 next; + + spin_lock_irqsave(&cq->lock, flags); + + /* + * Note that the head pointer might be writable by user processes. + * Take care to verify it is a sane value. + */ + wc = cq->queue; + head = wc->head; + if (head >= (unsigned) cq->ibcq.cqe) { + head = cq->ibcq.cqe; + next = 0; + } else + next = head + 1; + if (unlikely(next == wc->tail)) { + spin_unlock_irqrestore(&cq->lock, flags); + if (cq->ibcq.event_handler) { + struct ib_event ev; + + ev.device = cq->ibcq.device; + ev.element.cq = &cq->ibcq; + ev.event = IB_EVENT_CQ_ERR; + cq->ibcq.event_handler(&ev, cq->ibcq.cq_context); + } + return; + } + if (cq->ip) { + wc->uqueue[head].wr_id = entry->wr_id; + wc->uqueue[head].status = entry->status; + wc->uqueue[head].opcode = entry->opcode; + wc->uqueue[head].vendor_err = entry->vendor_err; + wc->uqueue[head].byte_len = entry->byte_len; + wc->uqueue[head].ex.imm_data = + (__u32 __force)entry->ex.imm_data; + wc->uqueue[head].qp_num = entry->qp->qp_num; + wc->uqueue[head].src_qp = entry->src_qp; + wc->uqueue[head].wc_flags = entry->wc_flags; + wc->uqueue[head].pkey_index = entry->pkey_index; + wc->uqueue[head].slid = entry->slid; + wc->uqueue[head].sl = entry->sl; + wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits; + wc->uqueue[head].port_num = entry->port_num; + /* Make sure entry is written before the head index. */ + smp_wmb(); + } else + wc->kqueue[head] = *entry; + wc->head = next; + + if (cq->notify == IB_CQ_NEXT_COMP || + (cq->notify == IB_CQ_SOLICITED && + (solicited || entry->status != IB_WC_SUCCESS))) { + struct kthread_worker *worker; + /* + * This will cause send_complete() to be called in + * another thread. + */ + smp_rmb(); + worker = cq->dd->worker; + if (likely(worker)) { + cq->notify = IB_CQ_NONE; + cq->triggered++; + queue_kthread_work(worker, &cq->comptask); + } + } + + spin_unlock_irqrestore(&cq->lock, flags); +} + +/** + * qib_poll_cq - poll for work completion entries + * @ibcq: the completion queue to poll + * @num_entries: the maximum number of entries to return + * @entry: pointer to array where work completions are placed + * + * Returns the number of completion entries polled. + * + * This may be called from interrupt context. Also called by ib_poll_cq() + * in the generic verbs code. + */ +int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) +{ + struct qib_cq *cq = to_icq(ibcq); + struct qib_cq_wc *wc; + unsigned long flags; + int npolled; + u32 tail; + + /* The kernel can only poll a kernel completion queue */ + if (cq->ip) { + npolled = -EINVAL; + goto bail; + } + + spin_lock_irqsave(&cq->lock, flags); + + wc = cq->queue; + tail = wc->tail; + if (tail > (u32) cq->ibcq.cqe) + tail = (u32) cq->ibcq.cqe; + for (npolled = 0; npolled < num_entries; ++npolled, ++entry) { + if (tail == wc->head) + break; + /* The kernel doesn't need a RMB since it has the lock. */ + *entry = wc->kqueue[tail]; + if (tail >= cq->ibcq.cqe) + tail = 0; + else + tail++; + } + wc->tail = tail; + + spin_unlock_irqrestore(&cq->lock, flags); + +bail: + return npolled; +} + +static void send_complete(struct kthread_work *work) +{ + struct qib_cq *cq = container_of(work, struct qib_cq, comptask); + + /* + * The completion handler will most likely rearm the notification + * and poll for all pending entries. If a new completion entry + * is added while we are in this routine, queue_work() + * won't call us again until we return so we check triggered to + * see if we need to call the handler again. + */ + for (;;) { + u8 triggered = cq->triggered; + + /* + * IPoIB connected mode assumes the callback is from a + * soft IRQ. We simulate this by blocking "bottom halves". + * See the implementation for ipoib_cm_handle_tx_wc(), + * netif_tx_lock_bh() and netif_tx_lock(). + */ + local_bh_disable(); + cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); + local_bh_enable(); + + if (cq->triggered == triggered) + return; + } +} + +/** + * qib_create_cq - create a completion queue + * @ibdev: the device this completion queue is attached to + * @entries: the minimum size of the completion queue + * @context: unused by the QLogic_IB driver + * @udata: user data for libibverbs.so + * + * Returns a pointer to the completion queue or negative errno values + * for failure. + * + * Called by ib_create_cq() in the generic verbs code. + */ +struct ib_cq *qib_create_cq(struct ib_device *ibdev, int entries, + int comp_vector, struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct qib_ibdev *dev = to_idev(ibdev); + struct qib_cq *cq; + struct qib_cq_wc *wc; + struct ib_cq *ret; + u32 sz; + + if (entries < 1 || entries > ib_qib_max_cqes) { + ret = ERR_PTR(-EINVAL); + goto done; + } + + /* Allocate the completion queue structure. */ + cq = kmalloc(sizeof(*cq), GFP_KERNEL); + if (!cq) { + ret = ERR_PTR(-ENOMEM); + goto done; + } + + /* + * Allocate the completion queue entries and head/tail pointers. + * This is allocated separately so that it can be resized and + * also mapped into user space. + * We need to use vmalloc() in order to support mmap and large + * numbers of entries. + */ + sz = sizeof(*wc); + if (udata && udata->outlen >= sizeof(__u64)) + sz += sizeof(struct ib_uverbs_wc) * (entries + 1); + else + sz += sizeof(struct ib_wc) * (entries + 1); + wc = vmalloc_user(sz); + if (!wc) { + ret = ERR_PTR(-ENOMEM); + goto bail_cq; + } + + /* + * Return the address of the WC as the offset to mmap. + * See qib_mmap() for details. + */ + if (udata && udata->outlen >= sizeof(__u64)) { + int err; + + cq->ip = qib_create_mmap_info(dev, sz, context, wc); + if (!cq->ip) { + ret = ERR_PTR(-ENOMEM); + goto bail_wc; + } + + err = ib_copy_to_udata(udata, &cq->ip->offset, + sizeof(cq->ip->offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_ip; + } + } else + cq->ip = NULL; + + spin_lock(&dev->n_cqs_lock); + if (dev->n_cqs_allocated == ib_qib_max_cqs) { + spin_unlock(&dev->n_cqs_lock); + ret = ERR_PTR(-ENOMEM); + goto bail_ip; + } + + dev->n_cqs_allocated++; + spin_unlock(&dev->n_cqs_lock); + + if (cq->ip) { + spin_lock_irq(&dev->pending_lock); + list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); + } + + /* + * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe. + * The number of entries should be >= the number requested or return + * an error. + */ + cq->dd = dd_from_dev(dev); + cq->ibcq.cqe = entries; + cq->notify = IB_CQ_NONE; + cq->triggered = 0; + spin_lock_init(&cq->lock); + init_kthread_work(&cq->comptask, send_complete); + wc->head = 0; + wc->tail = 0; + cq->queue = wc; + + ret = &cq->ibcq; + + goto done; + +bail_ip: + kfree(cq->ip); +bail_wc: + vfree(wc); +bail_cq: + kfree(cq); +done: + return ret; +} + +/** + * qib_destroy_cq - destroy a completion queue + * @ibcq: the completion queue to destroy. + * + * Returns 0 for success. + * + * Called by ib_destroy_cq() in the generic verbs code. + */ +int qib_destroy_cq(struct ib_cq *ibcq) +{ + struct qib_ibdev *dev = to_idev(ibcq->device); + struct qib_cq *cq = to_icq(ibcq); + + flush_kthread_work(&cq->comptask); + spin_lock(&dev->n_cqs_lock); + dev->n_cqs_allocated--; + spin_unlock(&dev->n_cqs_lock); + if (cq->ip) + kref_put(&cq->ip->ref, qib_release_mmap_info); + else + vfree(cq->queue); + kfree(cq); + + return 0; +} + +/** + * qib_req_notify_cq - change the notification type for a completion queue + * @ibcq: the completion queue + * @notify_flags: the type of notification to request + * + * Returns 0 for success. + * + * This may be called from interrupt context. Also called by + * ib_req_notify_cq() in the generic verbs code. + */ +int qib_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) +{ + struct qib_cq *cq = to_icq(ibcq); + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&cq->lock, flags); + /* + * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow + * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2). + */ + if (cq->notify != IB_CQ_NEXT_COMP) + cq->notify = notify_flags & IB_CQ_SOLICITED_MASK; + + if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && + cq->queue->head != cq->queue->tail) + ret = 1; + + spin_unlock_irqrestore(&cq->lock, flags); + + return ret; +} + +/** + * qib_resize_cq - change the size of the CQ + * @ibcq: the completion queue + * + * Returns 0 for success. + */ +int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) +{ + struct qib_cq *cq = to_icq(ibcq); + struct qib_cq_wc *old_wc; + struct qib_cq_wc *wc; + u32 head, tail, n; + int ret; + u32 sz; + + if (cqe < 1 || cqe > ib_qib_max_cqes) { + ret = -EINVAL; + goto bail; + } + + /* + * Need to use vmalloc() if we want to support large #s of entries. + */ + sz = sizeof(*wc); + if (udata && udata->outlen >= sizeof(__u64)) + sz += sizeof(struct ib_uverbs_wc) * (cqe + 1); + else + sz += sizeof(struct ib_wc) * (cqe + 1); + wc = vmalloc_user(sz); + if (!wc) { + ret = -ENOMEM; + goto bail; + } + + /* Check that we can write the offset to mmap. */ + if (udata && udata->outlen >= sizeof(__u64)) { + __u64 offset = 0; + + ret = ib_copy_to_udata(udata, &offset, sizeof(offset)); + if (ret) + goto bail_free; + } + + spin_lock_irq(&cq->lock); + /* + * Make sure head and tail are sane since they + * might be user writable. + */ + old_wc = cq->queue; + head = old_wc->head; + if (head > (u32) cq->ibcq.cqe) + head = (u32) cq->ibcq.cqe; + tail = old_wc->tail; + if (tail > (u32) cq->ibcq.cqe) + tail = (u32) cq->ibcq.cqe; + if (head < tail) + n = cq->ibcq.cqe + 1 + head - tail; + else + n = head - tail; + if (unlikely((u32)cqe < n)) { + ret = -EINVAL; + goto bail_unlock; + } + for (n = 0; tail != head; n++) { + if (cq->ip) + wc->uqueue[n] = old_wc->uqueue[tail]; + else + wc->kqueue[n] = old_wc->kqueue[tail]; + if (tail == (u32) cq->ibcq.cqe) + tail = 0; + else + tail++; + } + cq->ibcq.cqe = cqe; + wc->head = n; + wc->tail = 0; + cq->queue = wc; + spin_unlock_irq(&cq->lock); + + vfree(old_wc); + + if (cq->ip) { + struct qib_ibdev *dev = to_idev(ibcq->device); + struct qib_mmap_info *ip = cq->ip; + + qib_update_mmap_info(dev, ip, sz, wc); + + /* + * Return the offset to mmap. + * See qib_mmap() for details. + */ + if (udata && udata->outlen >= sizeof(__u64)) { + ret = ib_copy_to_udata(udata, &ip->offset, + sizeof(ip->offset)); + if (ret) + goto bail; + } + + spin_lock_irq(&dev->pending_lock); + if (list_empty(&ip->pending_mmaps)) + list_add(&ip->pending_mmaps, &dev->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); + } + + ret = 0; + goto bail; + +bail_unlock: + spin_unlock_irq(&cq->lock); +bail_free: + vfree(wc); +bail: + return ret; +} + +int qib_cq_init(struct qib_devdata *dd) +{ + int ret = 0; + int cpu; + struct task_struct *task; + + if (dd->worker) + return 0; + dd->worker = kzalloc(sizeof(*dd->worker), GFP_KERNEL); + if (!dd->worker) + return -ENOMEM; + init_kthread_worker(dd->worker); + task = kthread_create_on_node( + kthread_worker_fn, + dd->worker, + dd->assigned_node_id, + "qib_cq%d", dd->unit); + if (IS_ERR(task)) + goto task_fail; + cpu = cpumask_first(cpumask_of_node(dd->assigned_node_id)); + kthread_bind(task, cpu); + wake_up_process(task); +out: + return ret; +task_fail: + ret = PTR_ERR(task); + kfree(dd->worker); + dd->worker = NULL; + goto out; +} + +void qib_cq_exit(struct qib_devdata *dd) +{ + struct kthread_worker *worker; + + worker = dd->worker; + if (!worker) + return; + /* blocks future queuing from send_complete() */ + dd->worker = NULL; + smp_wmb(); + flush_kthread_worker(worker); + kthread_stop(worker->task); + kfree(worker); +} diff --git a/drivers/infiniband/hw/qib/qib_debugfs.c b/drivers/infiniband/hw/qib/qib_debugfs.c new file mode 100644 index 00000000000..799a0c3bffc --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_debugfs.c @@ -0,0 +1,283 @@ +#ifdef CONFIG_DEBUG_FS +/* + * Copyright (c) 2013 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/kernel.h> +#include <linux/export.h> + +#include "qib.h" +#include "qib_verbs.h" +#include "qib_debugfs.h" + +static struct dentry *qib_dbg_root; + +#define DEBUGFS_FILE(name) \ +static const struct seq_operations _##name##_seq_ops = { \ + .start = _##name##_seq_start, \ + .next = _##name##_seq_next, \ + .stop = _##name##_seq_stop, \ + .show = _##name##_seq_show \ +}; \ +static int _##name##_open(struct inode *inode, struct file *s) \ +{ \ + struct seq_file *seq; \ + int ret; \ + ret = seq_open(s, &_##name##_seq_ops); \ + if (ret) \ + return ret; \ + seq = s->private_data; \ + seq->private = inode->i_private; \ + return 0; \ +} \ +static const struct file_operations _##name##_file_ops = { \ + .owner = THIS_MODULE, \ + .open = _##name##_open, \ + .read = seq_read, \ + .llseek = seq_lseek, \ + .release = seq_release \ +}; + +#define DEBUGFS_FILE_CREATE(name) \ +do { \ + struct dentry *ent; \ + ent = debugfs_create_file(#name , 0400, ibd->qib_ibdev_dbg, \ + ibd, &_##name##_file_ops); \ + if (!ent) \ + pr_warn("create of " #name " failed\n"); \ +} while (0) + +static void *_opcode_stats_seq_start(struct seq_file *s, loff_t *pos) +{ + struct qib_opcode_stats_perctx *opstats; + + if (*pos >= ARRAY_SIZE(opstats->stats)) + return NULL; + return pos; +} + +static void *_opcode_stats_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct qib_opcode_stats_perctx *opstats; + + ++*pos; + if (*pos >= ARRAY_SIZE(opstats->stats)) + return NULL; + return pos; +} + + +static void _opcode_stats_seq_stop(struct seq_file *s, void *v) +{ + /* nothing allocated */ +} + +static int _opcode_stats_seq_show(struct seq_file *s, void *v) +{ + loff_t *spos = v; + loff_t i = *spos, j; + u64 n_packets = 0, n_bytes = 0; + struct qib_ibdev *ibd = (struct qib_ibdev *)s->private; + struct qib_devdata *dd = dd_from_dev(ibd); + + for (j = 0; j < dd->first_user_ctxt; j++) { + if (!dd->rcd[j]) + continue; + n_packets += dd->rcd[j]->opstats->stats[i].n_packets; + n_bytes += dd->rcd[j]->opstats->stats[i].n_bytes; + } + if (!n_packets && !n_bytes) + return SEQ_SKIP; + seq_printf(s, "%02llx %llu/%llu\n", i, + (unsigned long long) n_packets, + (unsigned long long) n_bytes); + + return 0; +} + +DEBUGFS_FILE(opcode_stats) + +static void *_ctx_stats_seq_start(struct seq_file *s, loff_t *pos) +{ + struct qib_ibdev *ibd = (struct qib_ibdev *)s->private; + struct qib_devdata *dd = dd_from_dev(ibd); + + if (!*pos) + return SEQ_START_TOKEN; + if (*pos >= dd->first_user_ctxt) + return NULL; + return pos; +} + +static void *_ctx_stats_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct qib_ibdev *ibd = (struct qib_ibdev *)s->private; + struct qib_devdata *dd = dd_from_dev(ibd); + + if (v == SEQ_START_TOKEN) + return pos; + + ++*pos; + if (*pos >= dd->first_user_ctxt) + return NULL; + return pos; +} + +static void _ctx_stats_seq_stop(struct seq_file *s, void *v) +{ + /* nothing allocated */ +} + +static int _ctx_stats_seq_show(struct seq_file *s, void *v) +{ + loff_t *spos; + loff_t i, j; + u64 n_packets = 0; + struct qib_ibdev *ibd = (struct qib_ibdev *)s->private; + struct qib_devdata *dd = dd_from_dev(ibd); + + if (v == SEQ_START_TOKEN) { + seq_puts(s, "Ctx:npkts\n"); + return 0; + } + + spos = v; + i = *spos; + + if (!dd->rcd[i]) + return SEQ_SKIP; + + for (j = 0; j < ARRAY_SIZE(dd->rcd[i]->opstats->stats); j++) + n_packets += dd->rcd[i]->opstats->stats[j].n_packets; + + if (!n_packets) + return SEQ_SKIP; + + seq_printf(s, " %llu:%llu\n", i, n_packets); + return 0; +} + +DEBUGFS_FILE(ctx_stats) + +static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos) +{ + struct qib_qp_iter *iter; + loff_t n = *pos; + + iter = qib_qp_iter_init(s->private); + if (!iter) + return NULL; + + while (n--) { + if (qib_qp_iter_next(iter)) { + kfree(iter); + return NULL; + } + } + + return iter; +} + +static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr, + loff_t *pos) +{ + struct qib_qp_iter *iter = iter_ptr; + + (*pos)++; + + if (qib_qp_iter_next(iter)) { + kfree(iter); + return NULL; + } + + return iter; +} + +static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr) +{ + /* nothing for now */ +} + +static int _qp_stats_seq_show(struct seq_file *s, void *iter_ptr) +{ + struct qib_qp_iter *iter = iter_ptr; + + if (!iter) + return 0; + + qib_qp_iter_print(s, iter); + + return 0; +} + +DEBUGFS_FILE(qp_stats) + +void qib_dbg_ibdev_init(struct qib_ibdev *ibd) +{ + char name[10]; + + snprintf(name, sizeof(name), "qib%d", dd_from_dev(ibd)->unit); + ibd->qib_ibdev_dbg = debugfs_create_dir(name, qib_dbg_root); + if (!ibd->qib_ibdev_dbg) { + pr_warn("create of %s failed\n", name); + return; + } + DEBUGFS_FILE_CREATE(opcode_stats); + DEBUGFS_FILE_CREATE(ctx_stats); + DEBUGFS_FILE_CREATE(qp_stats); + return; +} + +void qib_dbg_ibdev_exit(struct qib_ibdev *ibd) +{ + if (!qib_dbg_root) + goto out; + debugfs_remove_recursive(ibd->qib_ibdev_dbg); +out: + ibd->qib_ibdev_dbg = NULL; +} + +void qib_dbg_init(void) +{ + qib_dbg_root = debugfs_create_dir(QIB_DRV_NAME, NULL); + if (!qib_dbg_root) + pr_warn("init of debugfs failed\n"); +} + +void qib_dbg_exit(void) +{ + debugfs_remove_recursive(qib_dbg_root); + qib_dbg_root = NULL; +} + +#endif + diff --git a/drivers/infiniband/hw/qib/qib_debugfs.h b/drivers/infiniband/hw/qib/qib_debugfs.h new file mode 100644 index 00000000000..7ae983a91b8 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_debugfs.h @@ -0,0 +1,45 @@ +#ifndef _QIB_DEBUGFS_H +#define _QIB_DEBUGFS_H + +#ifdef CONFIG_DEBUG_FS +/* + * Copyright (c) 2013 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +struct qib_ibdev; +void qib_dbg_ibdev_init(struct qib_ibdev *ibd); +void qib_dbg_ibdev_exit(struct qib_ibdev *ibd); +void qib_dbg_init(void); +void qib_dbg_exit(void); + +#endif + +#endif /* _QIB_DEBUGFS_H */ diff --git a/drivers/infiniband/hw/qib/qib_diag.c b/drivers/infiniband/hw/qib/qib_diag.c new file mode 100644 index 00000000000..5dfda4c5cc9 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_diag.c @@ -0,0 +1,911 @@ +/* + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * This file contains support for diagnostic functions. It is accessed by + * opening the qib_diag device, normally minor number 129. Diagnostic use + * of the QLogic_IB chip may render the chip or board unusable until the + * driver is unloaded, or in some cases, until the system is rebooted. + * + * Accesses to the chip through this interface are not similar to going + * through the /sys/bus/pci resource mmap interface. + */ + +#include <linux/io.h> +#include <linux/pci.h> +#include <linux/poll.h> +#include <linux/vmalloc.h> +#include <linux/export.h> +#include <linux/fs.h> +#include <linux/uaccess.h> + +#include "qib.h" +#include "qib_common.h" + +#undef pr_fmt +#define pr_fmt(fmt) QIB_DRV_NAME ": " fmt + +/* + * Each client that opens the diag device must read then write + * offset 0, to prevent lossage from random cat or od. diag_state + * sequences this "handshake". + */ +enum diag_state { UNUSED = 0, OPENED, INIT, READY }; + +/* State for an individual client. PID so children cannot abuse handshake */ +static struct qib_diag_client { + struct qib_diag_client *next; + struct qib_devdata *dd; + pid_t pid; + enum diag_state state; +} *client_pool; + +/* + * Get a client struct. Recycled if possible, else kmalloc. + * Must be called with qib_mutex held + */ +static struct qib_diag_client *get_client(struct qib_devdata *dd) +{ + struct qib_diag_client *dc; + + dc = client_pool; + if (dc) + /* got from pool remove it and use */ + client_pool = dc->next; + else + /* None in pool, alloc and init */ + dc = kmalloc(sizeof *dc, GFP_KERNEL); + + if (dc) { + dc->next = NULL; + dc->dd = dd; + dc->pid = current->pid; + dc->state = OPENED; + } + return dc; +} + +/* + * Return to pool. Must be called with qib_mutex held + */ +static void return_client(struct qib_diag_client *dc) +{ + struct qib_devdata *dd = dc->dd; + struct qib_diag_client *tdc, *rdc; + + rdc = NULL; + if (dc == dd->diag_client) { + dd->diag_client = dc->next; + rdc = dc; + } else { + tdc = dc->dd->diag_client; + while (tdc) { + if (dc == tdc->next) { + tdc->next = dc->next; + rdc = dc; + break; + } + tdc = tdc->next; + } + } + if (rdc) { + rdc->state = UNUSED; + rdc->dd = NULL; + rdc->pid = 0; + rdc->next = client_pool; + client_pool = rdc; + } +} + +static int qib_diag_open(struct inode *in, struct file *fp); +static int qib_diag_release(struct inode *in, struct file *fp); +static ssize_t qib_diag_read(struct file *fp, char __user *data, + size_t count, loff_t *off); +static ssize_t qib_diag_write(struct file *fp, const char __user *data, + size_t count, loff_t *off); + +static const struct file_operations diag_file_ops = { + .owner = THIS_MODULE, + .write = qib_diag_write, + .read = qib_diag_read, + .open = qib_diag_open, + .release = qib_diag_release, + .llseek = default_llseek, +}; + +static atomic_t diagpkt_count = ATOMIC_INIT(0); +static struct cdev *diagpkt_cdev; +static struct device *diagpkt_device; + +static ssize_t qib_diagpkt_write(struct file *fp, const char __user *data, + size_t count, loff_t *off); + +static const struct file_operations diagpkt_file_ops = { + .owner = THIS_MODULE, + .write = qib_diagpkt_write, + .llseek = noop_llseek, +}; + +int qib_diag_add(struct qib_devdata *dd) +{ + char name[16]; + int ret = 0; + + if (atomic_inc_return(&diagpkt_count) == 1) { + ret = qib_cdev_init(QIB_DIAGPKT_MINOR, "ipath_diagpkt", + &diagpkt_file_ops, &diagpkt_cdev, + &diagpkt_device); + if (ret) + goto done; + } + + snprintf(name, sizeof(name), "ipath_diag%d", dd->unit); + ret = qib_cdev_init(QIB_DIAG_MINOR_BASE + dd->unit, name, + &diag_file_ops, &dd->diag_cdev, + &dd->diag_device); +done: + return ret; +} + +static void qib_unregister_observers(struct qib_devdata *dd); + +void qib_diag_remove(struct qib_devdata *dd) +{ + struct qib_diag_client *dc; + + if (atomic_dec_and_test(&diagpkt_count)) + qib_cdev_cleanup(&diagpkt_cdev, &diagpkt_device); + + qib_cdev_cleanup(&dd->diag_cdev, &dd->diag_device); + + /* + * Return all diag_clients of this device. There should be none, + * as we are "guaranteed" that no clients are still open + */ + while (dd->diag_client) + return_client(dd->diag_client); + + /* Now clean up all unused client structs */ + while (client_pool) { + dc = client_pool; + client_pool = dc->next; + kfree(dc); + } + /* Clean up observer list */ + qib_unregister_observers(dd); +} + +/* qib_remap_ioaddr32 - remap an offset into chip address space to __iomem * + * + * @dd: the qlogic_ib device + * @offs: the offset in chip-space + * @cntp: Pointer to max (byte) count for transfer starting at offset + * This returns a u32 __iomem * so it can be used for both 64 and 32-bit + * mapping. It is needed because with the use of PAT for control of + * write-combining, the logically contiguous address-space of the chip + * may be split into virtually non-contiguous spaces, with different + * attributes, which are them mapped to contiguous physical space + * based from the first BAR. + * + * The code below makes the same assumptions as were made in + * init_chip_wc_pat() (qib_init.c), copied here: + * Assumes chip address space looks like: + * - kregs + sregs + cregs + uregs (in any order) + * - piobufs (2K and 4K bufs in either order) + * or: + * - kregs + sregs + cregs (in any order) + * - piobufs (2K and 4K bufs in either order) + * - uregs + * + * If cntp is non-NULL, returns how many bytes from offset can be accessed + * Returns 0 if the offset is not mapped. + */ +static u32 __iomem *qib_remap_ioaddr32(struct qib_devdata *dd, u32 offset, + u32 *cntp) +{ + u32 kreglen; + u32 snd_bottom, snd_lim = 0; + u32 __iomem *krb32 = (u32 __iomem *)dd->kregbase; + u32 __iomem *map = NULL; + u32 cnt = 0; + u32 tot4k, offs4k; + + /* First, simplest case, offset is within the first map. */ + kreglen = (dd->kregend - dd->kregbase) * sizeof(u64); + if (offset < kreglen) { + map = krb32 + (offset / sizeof(u32)); + cnt = kreglen - offset; + goto mapped; + } + + /* + * Next check for user regs, the next most common case, + * and a cheap check because if they are not in the first map + * they are last in chip. + */ + if (dd->userbase) { + /* If user regs mapped, they are after send, so set limit. */ + u32 ulim = (dd->cfgctxts * dd->ureg_align) + dd->uregbase; + if (!dd->piovl15base) + snd_lim = dd->uregbase; + krb32 = (u32 __iomem *)dd->userbase; + if (offset >= dd->uregbase && offset < ulim) { + map = krb32 + (offset - dd->uregbase) / sizeof(u32); + cnt = ulim - offset; + goto mapped; + } + } + + /* + * Lastly, check for offset within Send Buffers. + * This is gnarly because struct devdata is deliberately vague + * about things like 7322 VL15 buffers, and we are not in + * chip-specific code here, so should not make many assumptions. + * The one we _do_ make is that the only chip that has more sndbufs + * than we admit is the 7322, and it has userregs above that, so + * we know the snd_lim. + */ + /* Assume 2K buffers are first. */ + snd_bottom = dd->pio2k_bufbase; + if (snd_lim == 0) { + u32 tot2k = dd->piobcnt2k * ALIGN(dd->piosize2k, dd->palign); + snd_lim = snd_bottom + tot2k; + } + /* If 4k buffers exist, account for them by bumping + * appropriate limit. + */ + tot4k = dd->piobcnt4k * dd->align4k; + offs4k = dd->piobufbase >> 32; + if (dd->piobcnt4k) { + if (snd_bottom > offs4k) + snd_bottom = offs4k; + else { + /* 4k above 2k. Bump snd_lim, if needed*/ + if (!dd->userbase || dd->piovl15base) + snd_lim = offs4k + tot4k; + } + } + /* + * Judgement call: can we ignore the space between SendBuffs and + * UserRegs, where we would like to see vl15 buffs, but not more? + */ + if (offset >= snd_bottom && offset < snd_lim) { + offset -= snd_bottom; + map = (u32 __iomem *)dd->piobase + (offset / sizeof(u32)); + cnt = snd_lim - offset; + } + + if (!map && offs4k && dd->piovl15base) { + snd_lim = offs4k + tot4k + 2 * dd->align4k; + if (offset >= (offs4k + tot4k) && offset < snd_lim) { + map = (u32 __iomem *)dd->piovl15base + + ((offset - (offs4k + tot4k)) / sizeof(u32)); + cnt = snd_lim - offset; + } + } + +mapped: + if (cntp) + *cntp = cnt; + return map; +} + +/* + * qib_read_umem64 - read a 64-bit quantity from the chip into user space + * @dd: the qlogic_ib device + * @uaddr: the location to store the data in user memory + * @regoffs: the offset from BAR0 (_NOT_ full pointer, anymore) + * @count: number of bytes to copy (multiple of 32 bits) + * + * This function also localizes all chip memory accesses. + * The copy should be written such that we read full cacheline packets + * from the chip. This is usually used for a single qword + * + * NOTE: This assumes the chip address is 64-bit aligned. + */ +static int qib_read_umem64(struct qib_devdata *dd, void __user *uaddr, + u32 regoffs, size_t count) +{ + const u64 __iomem *reg_addr; + const u64 __iomem *reg_end; + u32 limit; + int ret; + + reg_addr = (const u64 __iomem *)qib_remap_ioaddr32(dd, regoffs, &limit); + if (reg_addr == NULL || limit == 0 || !(dd->flags & QIB_PRESENT)) { + ret = -EINVAL; + goto bail; + } + if (count >= limit) + count = limit; + reg_end = reg_addr + (count / sizeof(u64)); + + /* not very efficient, but it works for now */ + while (reg_addr < reg_end) { + u64 data = readq(reg_addr); + + if (copy_to_user(uaddr, &data, sizeof(u64))) { + ret = -EFAULT; + goto bail; + } + reg_addr++; + uaddr += sizeof(u64); + } + ret = 0; +bail: + return ret; +} + +/* + * qib_write_umem64 - write a 64-bit quantity to the chip from user space + * @dd: the qlogic_ib device + * @regoffs: the offset from BAR0 (_NOT_ full pointer, anymore) + * @uaddr: the source of the data in user memory + * @count: the number of bytes to copy (multiple of 32 bits) + * + * This is usually used for a single qword + * NOTE: This assumes the chip address is 64-bit aligned. + */ + +static int qib_write_umem64(struct qib_devdata *dd, u32 regoffs, + const void __user *uaddr, size_t count) +{ + u64 __iomem *reg_addr; + const u64 __iomem *reg_end; + u32 limit; + int ret; + + reg_addr = (u64 __iomem *)qib_remap_ioaddr32(dd, regoffs, &limit); + if (reg_addr == NULL || limit == 0 || !(dd->flags & QIB_PRESENT)) { + ret = -EINVAL; + goto bail; + } + if (count >= limit) + count = limit; + reg_end = reg_addr + (count / sizeof(u64)); + + /* not very efficient, but it works for now */ + while (reg_addr < reg_end) { + u64 data; + if (copy_from_user(&data, uaddr, sizeof(data))) { + ret = -EFAULT; + goto bail; + } + writeq(data, reg_addr); + + reg_addr++; + uaddr += sizeof(u64); + } + ret = 0; +bail: + return ret; +} + +/* + * qib_read_umem32 - read a 32-bit quantity from the chip into user space + * @dd: the qlogic_ib device + * @uaddr: the location to store the data in user memory + * @regoffs: the offset from BAR0 (_NOT_ full pointer, anymore) + * @count: number of bytes to copy + * + * read 32 bit values, not 64 bit; for memories that only + * support 32 bit reads; usually a single dword. + */ +static int qib_read_umem32(struct qib_devdata *dd, void __user *uaddr, + u32 regoffs, size_t count) +{ + const u32 __iomem *reg_addr; + const u32 __iomem *reg_end; + u32 limit; + int ret; + + reg_addr = qib_remap_ioaddr32(dd, regoffs, &limit); + if (reg_addr == NULL || limit == 0 || !(dd->flags & QIB_PRESENT)) { + ret = -EINVAL; + goto bail; + } + if (count >= limit) + count = limit; + reg_end = reg_addr + (count / sizeof(u32)); + + /* not very efficient, but it works for now */ + while (reg_addr < reg_end) { + u32 data = readl(reg_addr); + + if (copy_to_user(uaddr, &data, sizeof(data))) { + ret = -EFAULT; + goto bail; + } + + reg_addr++; + uaddr += sizeof(u32); + + } + ret = 0; +bail: + return ret; +} + +/* + * qib_write_umem32 - write a 32-bit quantity to the chip from user space + * @dd: the qlogic_ib device + * @regoffs: the offset from BAR0 (_NOT_ full pointer, anymore) + * @uaddr: the source of the data in user memory + * @count: number of bytes to copy + * + * write 32 bit values, not 64 bit; for memories that only + * support 32 bit write; usually a single dword. + */ + +static int qib_write_umem32(struct qib_devdata *dd, u32 regoffs, + const void __user *uaddr, size_t count) +{ + u32 __iomem *reg_addr; + const u32 __iomem *reg_end; + u32 limit; + int ret; + + reg_addr = qib_remap_ioaddr32(dd, regoffs, &limit); + if (reg_addr == NULL || limit == 0 || !(dd->flags & QIB_PRESENT)) { + ret = -EINVAL; + goto bail; + } + if (count >= limit) + count = limit; + reg_end = reg_addr + (count / sizeof(u32)); + + while (reg_addr < reg_end) { + u32 data; + + if (copy_from_user(&data, uaddr, sizeof(data))) { + ret = -EFAULT; + goto bail; + } + writel(data, reg_addr); + + reg_addr++; + uaddr += sizeof(u32); + } + ret = 0; +bail: + return ret; +} + +static int qib_diag_open(struct inode *in, struct file *fp) +{ + int unit = iminor(in) - QIB_DIAG_MINOR_BASE; + struct qib_devdata *dd; + struct qib_diag_client *dc; + int ret; + + mutex_lock(&qib_mutex); + + dd = qib_lookup(unit); + + if (dd == NULL || !(dd->flags & QIB_PRESENT) || + !dd->kregbase) { + ret = -ENODEV; + goto bail; + } + + dc = get_client(dd); + if (!dc) { + ret = -ENOMEM; + goto bail; + } + dc->next = dd->diag_client; + dd->diag_client = dc; + fp->private_data = dc; + ret = 0; +bail: + mutex_unlock(&qib_mutex); + + return ret; +} + +/** + * qib_diagpkt_write - write an IB packet + * @fp: the diag data device file pointer + * @data: qib_diag_pkt structure saying where to get the packet + * @count: size of data to write + * @off: unused by this code + */ +static ssize_t qib_diagpkt_write(struct file *fp, + const char __user *data, + size_t count, loff_t *off) +{ + u32 __iomem *piobuf; + u32 plen, pbufn, maxlen_reserve; + struct qib_diag_xpkt dp; + u32 *tmpbuf = NULL; + struct qib_devdata *dd; + struct qib_pportdata *ppd; + ssize_t ret = 0; + + if (count != sizeof(dp)) { + ret = -EINVAL; + goto bail; + } + if (copy_from_user(&dp, data, sizeof(dp))) { + ret = -EFAULT; + goto bail; + } + + dd = qib_lookup(dp.unit); + if (!dd || !(dd->flags & QIB_PRESENT) || !dd->kregbase) { + ret = -ENODEV; + goto bail; + } + if (!(dd->flags & QIB_INITTED)) { + /* no hardware, freeze, etc. */ + ret = -ENODEV; + goto bail; + } + + if (dp.version != _DIAG_XPKT_VERS) { + qib_dev_err(dd, "Invalid version %u for diagpkt_write\n", + dp.version); + ret = -EINVAL; + goto bail; + } + /* send count must be an exact number of dwords */ + if (dp.len & 3) { + ret = -EINVAL; + goto bail; + } + if (!dp.port || dp.port > dd->num_pports) { + ret = -EINVAL; + goto bail; + } + ppd = &dd->pport[dp.port - 1]; + + /* + * need total length before first word written, plus 2 Dwords. One Dword + * is for padding so we get the full user data when not aligned on + * a word boundary. The other Dword is to make sure we have room for the + * ICRC which gets tacked on later. + */ + maxlen_reserve = 2 * sizeof(u32); + if (dp.len > ppd->ibmaxlen - maxlen_reserve) { + ret = -EINVAL; + goto bail; + } + + plen = sizeof(u32) + dp.len; + + tmpbuf = vmalloc(plen); + if (!tmpbuf) { + qib_devinfo(dd->pcidev, + "Unable to allocate tmp buffer, failing\n"); + ret = -ENOMEM; + goto bail; + } + + if (copy_from_user(tmpbuf, + (const void __user *) (unsigned long) dp.data, + dp.len)) { + ret = -EFAULT; + goto bail; + } + + plen >>= 2; /* in dwords */ + + if (dp.pbc_wd == 0) + dp.pbc_wd = plen; + + piobuf = dd->f_getsendbuf(ppd, dp.pbc_wd, &pbufn); + if (!piobuf) { + ret = -EBUSY; + goto bail; + } + /* disarm it just to be extra sure */ + dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_BUF(pbufn)); + + /* disable header check on pbufn for this packet */ + dd->f_txchk_change(dd, pbufn, 1, TXCHK_CHG_TYPE_DIS1, NULL); + + writeq(dp.pbc_wd, piobuf); + /* + * Copy all but the trigger word, then flush, so it's written + * to chip before trigger word, then write trigger word, then + * flush again, so packet is sent. + */ + if (dd->flags & QIB_PIO_FLUSH_WC) { + qib_flush_wc(); + qib_pio_copy(piobuf + 2, tmpbuf, plen - 1); + qib_flush_wc(); + __raw_writel(tmpbuf[plen - 1], piobuf + plen + 1); + } else + qib_pio_copy(piobuf + 2, tmpbuf, plen); + + if (dd->flags & QIB_USE_SPCL_TRIG) { + u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023; + + qib_flush_wc(); + __raw_writel(0xaebecede, piobuf + spcl_off); + } + + /* + * Ensure buffer is written to the chip, then re-enable + * header checks (if supported by chip). The txchk + * code will ensure seen by chip before returning. + */ + qib_flush_wc(); + qib_sendbuf_done(dd, pbufn); + dd->f_txchk_change(dd, pbufn, 1, TXCHK_CHG_TYPE_ENAB1, NULL); + + ret = sizeof(dp); + +bail: + vfree(tmpbuf); + return ret; +} + +static int qib_diag_release(struct inode *in, struct file *fp) +{ + mutex_lock(&qib_mutex); + return_client(fp->private_data); + fp->private_data = NULL; + mutex_unlock(&qib_mutex); + return 0; +} + +/* + * Chip-specific code calls to register its interest in + * a specific range. + */ +struct diag_observer_list_elt { + struct diag_observer_list_elt *next; + const struct diag_observer *op; +}; + +int qib_register_observer(struct qib_devdata *dd, + const struct diag_observer *op) +{ + struct diag_observer_list_elt *olp; + unsigned long flags; + + if (!dd || !op) + return -EINVAL; + olp = vmalloc(sizeof *olp); + if (!olp) { + pr_err("vmalloc for observer failed\n"); + return -ENOMEM; + } + + spin_lock_irqsave(&dd->qib_diag_trans_lock, flags); + olp->op = op; + olp->next = dd->diag_observer_list; + dd->diag_observer_list = olp; + spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags); + + return 0; +} + +/* Remove all registered observers when device is closed */ +static void qib_unregister_observers(struct qib_devdata *dd) +{ + struct diag_observer_list_elt *olp; + unsigned long flags; + + spin_lock_irqsave(&dd->qib_diag_trans_lock, flags); + olp = dd->diag_observer_list; + while (olp) { + /* Pop one observer, let go of lock */ + dd->diag_observer_list = olp->next; + spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags); + vfree(olp); + /* try again. */ + spin_lock_irqsave(&dd->qib_diag_trans_lock, flags); + olp = dd->diag_observer_list; + } + spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags); +} + +/* + * Find the observer, if any, for the specified address. Initial implementation + * is simple stack of observers. This must be called with diag transaction + * lock held. + */ +static const struct diag_observer *diag_get_observer(struct qib_devdata *dd, + u32 addr) +{ + struct diag_observer_list_elt *olp; + const struct diag_observer *op = NULL; + + olp = dd->diag_observer_list; + while (olp) { + op = olp->op; + if (addr >= op->bottom && addr <= op->top) + break; + olp = olp->next; + } + if (!olp) + op = NULL; + + return op; +} + +static ssize_t qib_diag_read(struct file *fp, char __user *data, + size_t count, loff_t *off) +{ + struct qib_diag_client *dc = fp->private_data; + struct qib_devdata *dd = dc->dd; + void __iomem *kreg_base; + ssize_t ret; + + if (dc->pid != current->pid) { + ret = -EPERM; + goto bail; + } + + kreg_base = dd->kregbase; + + if (count == 0) + ret = 0; + else if ((count % 4) || (*off % 4)) + /* address or length is not 32-bit aligned, hence invalid */ + ret = -EINVAL; + else if (dc->state < READY && (*off || count != 8)) + ret = -EINVAL; /* prevent cat /dev/qib_diag* */ + else { + unsigned long flags; + u64 data64 = 0; + int use_32; + const struct diag_observer *op; + + use_32 = (count % 8) || (*off % 8); + ret = -1; + spin_lock_irqsave(&dd->qib_diag_trans_lock, flags); + /* + * Check for observer on this address range. + * we only support a single 32 or 64-bit read + * via observer, currently. + */ + op = diag_get_observer(dd, *off); + if (op) { + u32 offset = *off; + ret = op->hook(dd, op, offset, &data64, 0, use_32); + } + /* + * We need to release lock before any copy_to_user(), + * whether implicit in qib_read_umem* or explicit below. + */ + spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags); + if (!op) { + if (use_32) + /* + * Address or length is not 64-bit aligned; + * do 32-bit rd + */ + ret = qib_read_umem32(dd, data, (u32) *off, + count); + else + ret = qib_read_umem64(dd, data, (u32) *off, + count); + } else if (ret == count) { + /* Below finishes case where observer existed */ + ret = copy_to_user(data, &data64, use_32 ? + sizeof(u32) : sizeof(u64)); + if (ret) + ret = -EFAULT; + } + } + + if (ret >= 0) { + *off += count; + ret = count; + if (dc->state == OPENED) + dc->state = INIT; + } +bail: + return ret; +} + +static ssize_t qib_diag_write(struct file *fp, const char __user *data, + size_t count, loff_t *off) +{ + struct qib_diag_client *dc = fp->private_data; + struct qib_devdata *dd = dc->dd; + void __iomem *kreg_base; + ssize_t ret; + + if (dc->pid != current->pid) { + ret = -EPERM; + goto bail; + } + + kreg_base = dd->kregbase; + + if (count == 0) + ret = 0; + else if ((count % 4) || (*off % 4)) + /* address or length is not 32-bit aligned, hence invalid */ + ret = -EINVAL; + else if (dc->state < READY && + ((*off || count != 8) || dc->state != INIT)) + /* No writes except second-step of init seq */ + ret = -EINVAL; /* before any other write allowed */ + else { + unsigned long flags; + const struct diag_observer *op = NULL; + int use_32 = (count % 8) || (*off % 8); + + /* + * Check for observer on this address range. + * We only support a single 32 or 64-bit write + * via observer, currently. This helps, because + * we would otherwise have to jump through hoops + * to make "diag transaction" meaningful when we + * cannot do a copy_from_user while holding the lock. + */ + if (count == 4 || count == 8) { + u64 data64; + u32 offset = *off; + ret = copy_from_user(&data64, data, count); + if (ret) { + ret = -EFAULT; + goto bail; + } + spin_lock_irqsave(&dd->qib_diag_trans_lock, flags); + op = diag_get_observer(dd, *off); + if (op) + ret = op->hook(dd, op, offset, &data64, ~0Ull, + use_32); + spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags); + } + + if (!op) { + if (use_32) + /* + * Address or length is not 64-bit aligned; + * do 32-bit write + */ + ret = qib_write_umem32(dd, (u32) *off, data, + count); + else + ret = qib_write_umem64(dd, (u32) *off, data, + count); + } + } + + if (ret >= 0) { + *off += count; + ret = count; + if (dc->state == INIT) + dc->state = READY; /* all read/write OK now */ + } +bail: + return ret; +} diff --git a/drivers/infiniband/hw/qib/qib_dma.c b/drivers/infiniband/hw/qib/qib_dma.c new file mode 100644 index 00000000000..59fe092b4b0 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_dma.c @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2006, 2009, 2010 QLogic, Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/types.h> +#include <linux/scatterlist.h> + +#include "qib_verbs.h" + +#define BAD_DMA_ADDRESS ((u64) 0) + +/* + * The following functions implement driver specific replacements + * for the ib_dma_*() functions. + * + * These functions return kernel virtual addresses instead of + * device bus addresses since the driver uses the CPU to copy + * data instead of using hardware DMA. + */ + +static int qib_mapping_error(struct ib_device *dev, u64 dma_addr) +{ + return dma_addr == BAD_DMA_ADDRESS; +} + +static u64 qib_dma_map_single(struct ib_device *dev, void *cpu_addr, + size_t size, enum dma_data_direction direction) +{ + BUG_ON(!valid_dma_direction(direction)); + return (u64) cpu_addr; +} + +static void qib_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(!valid_dma_direction(direction)); +} + +static u64 qib_dma_map_page(struct ib_device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + u64 addr; + + BUG_ON(!valid_dma_direction(direction)); + + if (offset + size > PAGE_SIZE) { + addr = BAD_DMA_ADDRESS; + goto done; + } + + addr = (u64) page_address(page); + if (addr) + addr += offset; + /* TODO: handle highmem pages */ + +done: + return addr; +} + +static void qib_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size, + enum dma_data_direction direction) +{ + BUG_ON(!valid_dma_direction(direction)); +} + +static int qib_map_sg(struct ib_device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction direction) +{ + struct scatterlist *sg; + u64 addr; + int i; + int ret = nents; + + BUG_ON(!valid_dma_direction(direction)); + + for_each_sg(sgl, sg, nents, i) { + addr = (u64) page_address(sg_page(sg)); + /* TODO: handle highmem pages */ + if (!addr) { + ret = 0; + break; + } + sg->dma_address = addr + sg->offset; +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg->dma_length = sg->length; +#endif + } + return ret; +} + +static void qib_unmap_sg(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction) +{ + BUG_ON(!valid_dma_direction(direction)); +} + +static void qib_sync_single_for_cpu(struct ib_device *dev, u64 addr, + size_t size, enum dma_data_direction dir) +{ +} + +static void qib_sync_single_for_device(struct ib_device *dev, u64 addr, + size_t size, + enum dma_data_direction dir) +{ +} + +static void *qib_dma_alloc_coherent(struct ib_device *dev, size_t size, + u64 *dma_handle, gfp_t flag) +{ + struct page *p; + void *addr = NULL; + + p = alloc_pages(flag, get_order(size)); + if (p) + addr = page_address(p); + if (dma_handle) + *dma_handle = (u64) addr; + return addr; +} + +static void qib_dma_free_coherent(struct ib_device *dev, size_t size, + void *cpu_addr, u64 dma_handle) +{ + free_pages((unsigned long) cpu_addr, get_order(size)); +} + +struct ib_dma_mapping_ops qib_dma_mapping_ops = { + .mapping_error = qib_mapping_error, + .map_single = qib_dma_map_single, + .unmap_single = qib_dma_unmap_single, + .map_page = qib_dma_map_page, + .unmap_page = qib_dma_unmap_page, + .map_sg = qib_map_sg, + .unmap_sg = qib_unmap_sg, + .sync_single_for_cpu = qib_sync_single_for_cpu, + .sync_single_for_device = qib_sync_single_for_device, + .alloc_coherent = qib_dma_alloc_coherent, + .free_coherent = qib_dma_free_coherent +}; diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c new file mode 100644 index 00000000000..5bee08f16d7 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -0,0 +1,817 @@ +/* + * Copyright (c) 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/spinlock.h> +#include <linux/pci.h> +#include <linux/io.h> +#include <linux/delay.h> +#include <linux/netdevice.h> +#include <linux/vmalloc.h> +#include <linux/module.h> +#include <linux/prefetch.h> + +#include "qib.h" + +/* + * The size has to be longer than this string, so we can append + * board/chip information to it in the init code. + */ +const char ib_qib_version[] = QIB_DRIVER_VERSION "\n"; + +DEFINE_SPINLOCK(qib_devs_lock); +LIST_HEAD(qib_dev_list); +DEFINE_MUTEX(qib_mutex); /* general driver use */ + +unsigned qib_ibmtu; +module_param_named(ibmtu, qib_ibmtu, uint, S_IRUGO); +MODULE_PARM_DESC(ibmtu, "Set max IB MTU (0=2KB, 1=256, 2=512, ... 5=4096"); + +unsigned qib_compat_ddr_negotiate = 1; +module_param_named(compat_ddr_negotiate, qib_compat_ddr_negotiate, uint, + S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(compat_ddr_negotiate, + "Attempt pre-IBTA 1.2 DDR speed negotiation"); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Intel <ibsupport@intel.com>"); +MODULE_DESCRIPTION("Intel IB driver"); +MODULE_VERSION(QIB_DRIVER_VERSION); + +/* + * QIB_PIO_MAXIBHDR is the max IB header size allowed for in our + * PIO send buffers. This is well beyond anything currently + * defined in the InfiniBand spec. + */ +#define QIB_PIO_MAXIBHDR 128 + +/* + * QIB_MAX_PKT_RCV is the max # if packets processed per receive interrupt. + */ +#define QIB_MAX_PKT_RECV 64 + +struct qlogic_ib_stats qib_stats; + +const char *qib_get_unit_name(int unit) +{ + static char iname[16]; + + snprintf(iname, sizeof iname, "infinipath%u", unit); + return iname; +} + +/* + * Return count of units with at least one port ACTIVE. + */ +int qib_count_active_units(void) +{ + struct qib_devdata *dd; + struct qib_pportdata *ppd; + unsigned long flags; + int pidx, nunits_active = 0; + + spin_lock_irqsave(&qib_devs_lock, flags); + list_for_each_entry(dd, &qib_dev_list, list) { + if (!(dd->flags & QIB_PRESENT) || !dd->kregbase) + continue; + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + if (ppd->lid && (ppd->lflags & (QIBL_LINKINIT | + QIBL_LINKARMED | QIBL_LINKACTIVE))) { + nunits_active++; + break; + } + } + } + spin_unlock_irqrestore(&qib_devs_lock, flags); + return nunits_active; +} + +/* + * Return count of all units, optionally return in arguments + * the number of usable (present) units, and the number of + * ports that are up. + */ +int qib_count_units(int *npresentp, int *nupp) +{ + int nunits = 0, npresent = 0, nup = 0; + struct qib_devdata *dd; + unsigned long flags; + int pidx; + struct qib_pportdata *ppd; + + spin_lock_irqsave(&qib_devs_lock, flags); + + list_for_each_entry(dd, &qib_dev_list, list) { + nunits++; + if ((dd->flags & QIB_PRESENT) && dd->kregbase) + npresent++; + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + if (ppd->lid && (ppd->lflags & (QIBL_LINKINIT | + QIBL_LINKARMED | QIBL_LINKACTIVE))) + nup++; + } + } + + spin_unlock_irqrestore(&qib_devs_lock, flags); + + if (npresentp) + *npresentp = npresent; + if (nupp) + *nupp = nup; + + return nunits; +} + +/** + * qib_wait_linkstate - wait for an IB link state change to occur + * @dd: the qlogic_ib device + * @state: the state to wait for + * @msecs: the number of milliseconds to wait + * + * wait up to msecs milliseconds for IB link state change to occur for + * now, take the easy polling route. Currently used only by + * qib_set_linkstate. Returns 0 if state reached, otherwise + * -ETIMEDOUT state can have multiple states set, for any of several + * transitions. + */ +int qib_wait_linkstate(struct qib_pportdata *ppd, u32 state, int msecs) +{ + int ret; + unsigned long flags; + + spin_lock_irqsave(&ppd->lflags_lock, flags); + if (ppd->state_wanted) { + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + ret = -EBUSY; + goto bail; + } + ppd->state_wanted = state; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + wait_event_interruptible_timeout(ppd->state_wait, + (ppd->lflags & state), + msecs_to_jiffies(msecs)); + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->state_wanted = 0; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + + if (!(ppd->lflags & state)) + ret = -ETIMEDOUT; + else + ret = 0; +bail: + return ret; +} + +int qib_set_linkstate(struct qib_pportdata *ppd, u8 newstate) +{ + u32 lstate; + int ret; + struct qib_devdata *dd = ppd->dd; + unsigned long flags; + + switch (newstate) { + case QIB_IB_LINKDOWN_ONLY: + dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE, + IB_LINKCMD_DOWN | IB_LINKINITCMD_NOP); + /* don't wait */ + ret = 0; + goto bail; + + case QIB_IB_LINKDOWN: + dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE, + IB_LINKCMD_DOWN | IB_LINKINITCMD_POLL); + /* don't wait */ + ret = 0; + goto bail; + + case QIB_IB_LINKDOWN_SLEEP: + dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE, + IB_LINKCMD_DOWN | IB_LINKINITCMD_SLEEP); + /* don't wait */ + ret = 0; + goto bail; + + case QIB_IB_LINKDOWN_DISABLE: + dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE, + IB_LINKCMD_DOWN | IB_LINKINITCMD_DISABLE); + /* don't wait */ + ret = 0; + goto bail; + + case QIB_IB_LINKARM: + if (ppd->lflags & QIBL_LINKARMED) { + ret = 0; + goto bail; + } + if (!(ppd->lflags & (QIBL_LINKINIT | QIBL_LINKACTIVE))) { + ret = -EINVAL; + goto bail; + } + /* + * Since the port can be ACTIVE when we ask for ARMED, + * clear QIBL_LINKV so we can wait for a transition. + * If the link isn't ARMED, then something else happened + * and there is no point waiting for ARMED. + */ + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_LINKV; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE, + IB_LINKCMD_ARMED | IB_LINKINITCMD_NOP); + lstate = QIBL_LINKV; + break; + + case QIB_IB_LINKACTIVE: + if (ppd->lflags & QIBL_LINKACTIVE) { + ret = 0; + goto bail; + } + if (!(ppd->lflags & QIBL_LINKARMED)) { + ret = -EINVAL; + goto bail; + } + dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE, + IB_LINKCMD_ACTIVE | IB_LINKINITCMD_NOP); + lstate = QIBL_LINKACTIVE; + break; + + default: + ret = -EINVAL; + goto bail; + } + ret = qib_wait_linkstate(ppd, lstate, 10); + +bail: + return ret; +} + +/* + * Get address of eager buffer from it's index (allocated in chunks, not + * contiguous). + */ +static inline void *qib_get_egrbuf(const struct qib_ctxtdata *rcd, u32 etail) +{ + const u32 chunk = etail >> rcd->rcvegrbufs_perchunk_shift; + const u32 idx = etail & ((u32)rcd->rcvegrbufs_perchunk - 1); + + return rcd->rcvegrbuf[chunk] + (idx << rcd->dd->rcvegrbufsize_shift); +} + +/* + * Returns 1 if error was a CRC, else 0. + * Needed for some chip's synthesized error counters. + */ +static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd, + u32 ctxt, u32 eflags, u32 l, u32 etail, + __le32 *rhf_addr, struct qib_message_header *rhdr) +{ + u32 ret = 0; + + if (eflags & (QLOGIC_IB_RHF_H_ICRCERR | QLOGIC_IB_RHF_H_VCRCERR)) + ret = 1; + else if (eflags == QLOGIC_IB_RHF_H_TIDERR) { + /* For TIDERR and RC QPs premptively schedule a NAK */ + struct qib_ib_header *hdr = (struct qib_ib_header *) rhdr; + struct qib_other_headers *ohdr = NULL; + struct qib_ibport *ibp = &ppd->ibport_data; + struct qib_qp *qp = NULL; + u32 tlen = qib_hdrget_length_in_bytes(rhf_addr); + u16 lid = be16_to_cpu(hdr->lrh[1]); + int lnh = be16_to_cpu(hdr->lrh[0]) & 3; + u32 qp_num; + u32 opcode; + u32 psn; + int diff; + + /* Sanity check packet */ + if (tlen < 24) + goto drop; + + if (lid < QIB_MULTICAST_LID_BASE) { + lid &= ~((1 << ppd->lmc) - 1); + if (unlikely(lid != ppd->lid)) + goto drop; + } + + /* Check for GRH */ + if (lnh == QIB_LRH_BTH) + ohdr = &hdr->u.oth; + else if (lnh == QIB_LRH_GRH) { + u32 vtf; + + ohdr = &hdr->u.l.oth; + if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR) + goto drop; + vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow); + if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) + goto drop; + } else + goto drop; + + /* Get opcode and PSN from packet */ + opcode = be32_to_cpu(ohdr->bth[0]); + opcode >>= 24; + psn = be32_to_cpu(ohdr->bth[2]); + + /* Get the destination QP number. */ + qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK; + if (qp_num != QIB_MULTICAST_QPN) { + int ruc_res; + qp = qib_lookup_qpn(ibp, qp_num); + if (!qp) + goto drop; + + /* + * Handle only RC QPs - for other QP types drop error + * packet. + */ + spin_lock(&qp->r_lock); + + /* Check for valid receive state. */ + if (!(ib_qib_state_ops[qp->state] & + QIB_PROCESS_RECV_OK)) { + ibp->n_pkt_drops++; + goto unlock; + } + + switch (qp->ibqp.qp_type) { + case IB_QPT_RC: + ruc_res = + qib_ruc_check_hdr( + ibp, hdr, + lnh == QIB_LRH_GRH, + qp, + be32_to_cpu(ohdr->bth[0])); + if (ruc_res) + goto unlock; + + /* Only deal with RDMA Writes for now */ + if (opcode < + IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) { + diff = qib_cmp24(psn, qp->r_psn); + if (!qp->r_nak_state && diff >= 0) { + ibp->n_rc_seqnak++; + qp->r_nak_state = + IB_NAK_PSN_ERROR; + /* Use the expected PSN. */ + qp->r_ack_psn = qp->r_psn; + /* + * Wait to send the sequence + * NAK until all packets + * in the receive queue have + * been processed. + * Otherwise, we end up + * propagating congestion. + */ + if (list_empty(&qp->rspwait)) { + qp->r_flags |= + QIB_R_RSP_NAK; + atomic_inc( + &qp->refcount); + list_add_tail( + &qp->rspwait, + &rcd->qp_wait_list); + } + } /* Out of sequence NAK */ + } /* QP Request NAKs */ + break; + case IB_QPT_SMI: + case IB_QPT_GSI: + case IB_QPT_UD: + case IB_QPT_UC: + default: + /* For now don't handle any other QP types */ + break; + } + +unlock: + spin_unlock(&qp->r_lock); + /* + * Notify qib_destroy_qp() if it is waiting + * for us to finish. + */ + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + } /* Unicast QP */ + } /* Valid packet with TIDErr */ + +drop: + return ret; +} + +/* + * qib_kreceive - receive a packet + * @rcd: the qlogic_ib context + * @llic: gets count of good packets needed to clear lli, + * (used with chips that need need to track crcs for lli) + * + * called from interrupt handler for errors or receive interrupt + * Returns number of CRC error packets, needed by some chips for + * local link integrity tracking. crcs are adjusted down by following + * good packets, if any, and count of good packets is also tracked. + */ +u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts) +{ + struct qib_devdata *dd = rcd->dd; + struct qib_pportdata *ppd = rcd->ppd; + __le32 *rhf_addr; + void *ebuf; + const u32 rsize = dd->rcvhdrentsize; /* words */ + const u32 maxcnt = dd->rcvhdrcnt * rsize; /* words */ + u32 etail = -1, l, hdrqtail; + struct qib_message_header *hdr; + u32 eflags, etype, tlen, i = 0, updegr = 0, crcs = 0; + int last; + u64 lval; + struct qib_qp *qp, *nqp; + + l = rcd->head; + rhf_addr = (__le32 *) rcd->rcvhdrq + l + dd->rhf_offset; + if (dd->flags & QIB_NODMA_RTAIL) { + u32 seq = qib_hdrget_seq(rhf_addr); + if (seq != rcd->seq_cnt) + goto bail; + hdrqtail = 0; + } else { + hdrqtail = qib_get_rcvhdrtail(rcd); + if (l == hdrqtail) + goto bail; + smp_rmb(); /* prevent speculative reads of dma'ed hdrq */ + } + + for (last = 0, i = 1; !last; i += !last) { + hdr = dd->f_get_msgheader(dd, rhf_addr); + eflags = qib_hdrget_err_flags(rhf_addr); + etype = qib_hdrget_rcv_type(rhf_addr); + /* total length */ + tlen = qib_hdrget_length_in_bytes(rhf_addr); + ebuf = NULL; + if ((dd->flags & QIB_NODMA_RTAIL) ? + qib_hdrget_use_egr_buf(rhf_addr) : + (etype != RCVHQ_RCV_TYPE_EXPECTED)) { + etail = qib_hdrget_index(rhf_addr); + updegr = 1; + if (tlen > sizeof(*hdr) || + etype >= RCVHQ_RCV_TYPE_NON_KD) { + ebuf = qib_get_egrbuf(rcd, etail); + prefetch_range(ebuf, tlen - sizeof(*hdr)); + } + } + if (!eflags) { + u16 lrh_len = be16_to_cpu(hdr->lrh[2]) << 2; + + if (lrh_len != tlen) { + qib_stats.sps_lenerrs++; + goto move_along; + } + } + if (etype == RCVHQ_RCV_TYPE_NON_KD && !eflags && + ebuf == NULL && + tlen > (dd->rcvhdrentsize - 2 + 1 - + qib_hdrget_offset(rhf_addr)) << 2) { + goto move_along; + } + + /* + * Both tiderr and qibhdrerr are set for all plain IB + * packets; only qibhdrerr should be set. + */ + if (unlikely(eflags)) + crcs += qib_rcv_hdrerr(rcd, ppd, rcd->ctxt, eflags, l, + etail, rhf_addr, hdr); + else if (etype == RCVHQ_RCV_TYPE_NON_KD) { + qib_ib_rcv(rcd, hdr, ebuf, tlen); + if (crcs) + crcs--; + else if (llic && *llic) + --*llic; + } +move_along: + l += rsize; + if (l >= maxcnt) + l = 0; + if (i == QIB_MAX_PKT_RECV) + last = 1; + + rhf_addr = (__le32 *) rcd->rcvhdrq + l + dd->rhf_offset; + if (dd->flags & QIB_NODMA_RTAIL) { + u32 seq = qib_hdrget_seq(rhf_addr); + + if (++rcd->seq_cnt > 13) + rcd->seq_cnt = 1; + if (seq != rcd->seq_cnt) + last = 1; + } else if (l == hdrqtail) + last = 1; + /* + * Update head regs etc., every 16 packets, if not last pkt, + * to help prevent rcvhdrq overflows, when many packets + * are processed and queue is nearly full. + * Don't request an interrupt for intermediate updates. + */ + lval = l; + if (!last && !(i & 0xf)) { + dd->f_update_usrhead(rcd, lval, updegr, etail, i); + updegr = 0; + } + } + /* + * Notify qib_destroy_qp() if it is waiting + * for lookaside_qp to finish. + */ + if (rcd->lookaside_qp) { + if (atomic_dec_and_test(&rcd->lookaside_qp->refcount)) + wake_up(&rcd->lookaside_qp->wait); + rcd->lookaside_qp = NULL; + } + + rcd->head = l; + + /* + * Iterate over all QPs waiting to respond. + * The list won't change since the IRQ is only run on one CPU. + */ + list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) { + list_del_init(&qp->rspwait); + if (qp->r_flags & QIB_R_RSP_NAK) { + qp->r_flags &= ~QIB_R_RSP_NAK; + qib_send_rc_ack(qp); + } + if (qp->r_flags & QIB_R_RSP_SEND) { + unsigned long flags; + + qp->r_flags &= ~QIB_R_RSP_SEND; + spin_lock_irqsave(&qp->s_lock, flags); + if (ib_qib_state_ops[qp->state] & + QIB_PROCESS_OR_FLUSH_SEND) + qib_schedule_send(qp); + spin_unlock_irqrestore(&qp->s_lock, flags); + } + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + } + +bail: + /* Report number of packets consumed */ + if (npkts) + *npkts = i; + + /* + * Always write head at end, and setup rcv interrupt, even + * if no packets were processed. + */ + lval = (u64)rcd->head | dd->rhdrhead_intr_off; + dd->f_update_usrhead(rcd, lval, updegr, etail, i); + return crcs; +} + +/** + * qib_set_mtu - set the MTU + * @ppd: the perport data + * @arg: the new MTU + * + * We can handle "any" incoming size, the issue here is whether we + * need to restrict our outgoing size. For now, we don't do any + * sanity checking on this, and we don't deal with what happens to + * programs that are already running when the size changes. + * NOTE: changing the MTU will usually cause the IBC to go back to + * link INIT state... + */ +int qib_set_mtu(struct qib_pportdata *ppd, u16 arg) +{ + u32 piosize; + int ret, chk; + + if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 && + arg != 4096) { + ret = -EINVAL; + goto bail; + } + chk = ib_mtu_enum_to_int(qib_ibmtu); + if (chk > 0 && arg > chk) { + ret = -EINVAL; + goto bail; + } + + piosize = ppd->ibmaxlen; + ppd->ibmtu = arg; + + if (arg >= (piosize - QIB_PIO_MAXIBHDR)) { + /* Only if it's not the initial value (or reset to it) */ + if (piosize != ppd->init_ibmaxlen) { + if (arg > piosize && arg <= ppd->init_ibmaxlen) + piosize = ppd->init_ibmaxlen - 2 * sizeof(u32); + ppd->ibmaxlen = piosize; + } + } else if ((arg + QIB_PIO_MAXIBHDR) != ppd->ibmaxlen) { + piosize = arg + QIB_PIO_MAXIBHDR - 2 * sizeof(u32); + ppd->ibmaxlen = piosize; + } + + ppd->dd->f_set_ib_cfg(ppd, QIB_IB_CFG_MTU, 0); + + ret = 0; + +bail: + return ret; +} + +int qib_set_lid(struct qib_pportdata *ppd, u32 lid, u8 lmc) +{ + struct qib_devdata *dd = ppd->dd; + ppd->lid = lid; + ppd->lmc = lmc; + + dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LIDLMC, + lid | (~((1U << lmc) - 1)) << 16); + + qib_devinfo(dd->pcidev, "IB%u:%u got a lid: 0x%x\n", + dd->unit, ppd->port, lid); + + return 0; +} + +/* + * Following deal with the "obviously simple" task of overriding the state + * of the LEDS, which normally indicate link physical and logical status. + * The complications arise in dealing with different hardware mappings + * and the board-dependent routine being called from interrupts. + * and then there's the requirement to _flash_ them. + */ +#define LED_OVER_FREQ_SHIFT 8 +#define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT) +/* Below is "non-zero" to force override, but both actual LEDs are off */ +#define LED_OVER_BOTH_OFF (8) + +static void qib_run_led_override(unsigned long opaque) +{ + struct qib_pportdata *ppd = (struct qib_pportdata *)opaque; + struct qib_devdata *dd = ppd->dd; + int timeoff; + int ph_idx; + + if (!(dd->flags & QIB_INITTED)) + return; + + ph_idx = ppd->led_override_phase++ & 1; + ppd->led_override = ppd->led_override_vals[ph_idx]; + timeoff = ppd->led_override_timeoff; + + dd->f_setextled(ppd, 1); + /* + * don't re-fire the timer if user asked for it to be off; we let + * it fire one more time after they turn it off to simplify + */ + if (ppd->led_override_vals[0] || ppd->led_override_vals[1]) + mod_timer(&ppd->led_override_timer, jiffies + timeoff); +} + +void qib_set_led_override(struct qib_pportdata *ppd, unsigned int val) +{ + struct qib_devdata *dd = ppd->dd; + int timeoff, freq; + + if (!(dd->flags & QIB_INITTED)) + return; + + /* First check if we are blinking. If not, use 1HZ polling */ + timeoff = HZ; + freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT; + + if (freq) { + /* For blink, set each phase from one nybble of val */ + ppd->led_override_vals[0] = val & 0xF; + ppd->led_override_vals[1] = (val >> 4) & 0xF; + timeoff = (HZ << 4)/freq; + } else { + /* Non-blink set both phases the same. */ + ppd->led_override_vals[0] = val & 0xF; + ppd->led_override_vals[1] = val & 0xF; + } + ppd->led_override_timeoff = timeoff; + + /* + * If the timer has not already been started, do so. Use a "quick" + * timeout so the function will be called soon, to look at our request. + */ + if (atomic_inc_return(&ppd->led_override_timer_active) == 1) { + /* Need to start timer */ + init_timer(&ppd->led_override_timer); + ppd->led_override_timer.function = qib_run_led_override; + ppd->led_override_timer.data = (unsigned long) ppd; + ppd->led_override_timer.expires = jiffies + 1; + add_timer(&ppd->led_override_timer); + } else { + if (ppd->led_override_vals[0] || ppd->led_override_vals[1]) + mod_timer(&ppd->led_override_timer, jiffies + 1); + atomic_dec(&ppd->led_override_timer_active); + } +} + +/** + * qib_reset_device - reset the chip if possible + * @unit: the device to reset + * + * Whether or not reset is successful, we attempt to re-initialize the chip + * (that is, much like a driver unload/reload). We clear the INITTED flag + * so that the various entry points will fail until we reinitialize. For + * now, we only allow this if no user contexts are open that use chip resources + */ +int qib_reset_device(int unit) +{ + int ret, i; + struct qib_devdata *dd = qib_lookup(unit); + struct qib_pportdata *ppd; + unsigned long flags; + int pidx; + + if (!dd) { + ret = -ENODEV; + goto bail; + } + + qib_devinfo(dd->pcidev, "Reset on unit %u requested\n", unit); + + if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) { + qib_devinfo(dd->pcidev, + "Invalid unit number %u or not initialized or not present\n", + unit); + ret = -ENXIO; + goto bail; + } + + spin_lock_irqsave(&dd->uctxt_lock, flags); + if (dd->rcd) + for (i = dd->first_user_ctxt; i < dd->cfgctxts; i++) { + if (!dd->rcd[i] || !dd->rcd[i]->cnt) + continue; + spin_unlock_irqrestore(&dd->uctxt_lock, flags); + ret = -EBUSY; + goto bail; + } + spin_unlock_irqrestore(&dd->uctxt_lock, flags); + + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + if (atomic_read(&ppd->led_override_timer_active)) { + /* Need to stop LED timer, _then_ shut off LEDs */ + del_timer_sync(&ppd->led_override_timer); + atomic_set(&ppd->led_override_timer_active, 0); + } + + /* Shut off LEDs after we are sure timer is not running */ + ppd->led_override = LED_OVER_BOTH_OFF; + dd->f_setextled(ppd, 0); + if (dd->flags & QIB_HAS_SEND_DMA) + qib_teardown_sdma(ppd); + } + + ret = dd->f_reset(dd); + if (ret == 1) + ret = qib_init(dd, 1); + else + ret = -EAGAIN; + if (ret) + qib_dev_err(dd, + "Reinitialize unit %u after reset failed with %d\n", + unit, ret); + else + qib_devinfo(dd->pcidev, + "Reinitialized unit %u after resetting\n", + unit); + +bail: + return ret; +} diff --git a/drivers/infiniband/hw/qib/qib_eeprom.c b/drivers/infiniband/hw/qib/qib_eeprom.c new file mode 100644 index 00000000000..4d5d71aaa2b --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_eeprom.c @@ -0,0 +1,456 @@ +/* + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/delay.h> +#include <linux/pci.h> +#include <linux/vmalloc.h> + +#include "qib.h" + +/* + * Functions specific to the serial EEPROM on cards handled by ib_qib. + * The actual serail interface code is in qib_twsi.c. This file is a client + */ + +/** + * qib_eeprom_read - receives bytes from the eeprom via I2C + * @dd: the qlogic_ib device + * @eeprom_offset: address to read from + * @buffer: where to store result + * @len: number of bytes to receive + */ +int qib_eeprom_read(struct qib_devdata *dd, u8 eeprom_offset, + void *buff, int len) +{ + int ret; + + ret = mutex_lock_interruptible(&dd->eep_lock); + if (!ret) { + ret = qib_twsi_reset(dd); + if (ret) + qib_dev_err(dd, "EEPROM Reset for read failed\n"); + else + ret = qib_twsi_blk_rd(dd, dd->twsi_eeprom_dev, + eeprom_offset, buff, len); + mutex_unlock(&dd->eep_lock); + } + + return ret; +} + +/* + * Actually update the eeprom, first doing write enable if + * needed, then restoring write enable state. + * Must be called with eep_lock held + */ +static int eeprom_write_with_enable(struct qib_devdata *dd, u8 offset, + const void *buf, int len) +{ + int ret, pwen; + + pwen = dd->f_eeprom_wen(dd, 1); + ret = qib_twsi_reset(dd); + if (ret) + qib_dev_err(dd, "EEPROM Reset for write failed\n"); + else + ret = qib_twsi_blk_wr(dd, dd->twsi_eeprom_dev, + offset, buf, len); + dd->f_eeprom_wen(dd, pwen); + return ret; +} + +/** + * qib_eeprom_write - writes data to the eeprom via I2C + * @dd: the qlogic_ib device + * @eeprom_offset: where to place data + * @buffer: data to write + * @len: number of bytes to write + */ +int qib_eeprom_write(struct qib_devdata *dd, u8 eeprom_offset, + const void *buff, int len) +{ + int ret; + + ret = mutex_lock_interruptible(&dd->eep_lock); + if (!ret) { + ret = eeprom_write_with_enable(dd, eeprom_offset, buff, len); + mutex_unlock(&dd->eep_lock); + } + + return ret; +} + +static u8 flash_csum(struct qib_flash *ifp, int adjust) +{ + u8 *ip = (u8 *) ifp; + u8 csum = 0, len; + + /* + * Limit length checksummed to max length of actual data. + * Checksum of erased eeprom will still be bad, but we avoid + * reading past the end of the buffer we were passed. + */ + len = ifp->if_length; + if (len > sizeof(struct qib_flash)) + len = sizeof(struct qib_flash); + while (len--) + csum += *ip++; + csum -= ifp->if_csum; + csum = ~csum; + if (adjust) + ifp->if_csum = csum; + + return csum; +} + +/** + * qib_get_eeprom_info- get the GUID et al. from the TSWI EEPROM device + * @dd: the qlogic_ib device + * + * We have the capability to use the nguid field, and get + * the guid from the first chip's flash, to use for all of them. + */ +void qib_get_eeprom_info(struct qib_devdata *dd) +{ + void *buf; + struct qib_flash *ifp; + __be64 guid; + int len, eep_stat; + u8 csum, *bguid; + int t = dd->unit; + struct qib_devdata *dd0 = qib_lookup(0); + + if (t && dd0->nguid > 1 && t <= dd0->nguid) { + u8 oguid; + dd->base_guid = dd0->base_guid; + bguid = (u8 *) &dd->base_guid; + + oguid = bguid[7]; + bguid[7] += t; + if (oguid > bguid[7]) { + if (bguid[6] == 0xff) { + if (bguid[5] == 0xff) { + qib_dev_err(dd, + "Can't set %s GUID from base, wraps to OUI!\n", + qib_get_unit_name(t)); + dd->base_guid = 0; + goto bail; + } + bguid[5]++; + } + bguid[6]++; + } + dd->nguid = 1; + goto bail; + } + + /* + * Read full flash, not just currently used part, since it may have + * been written with a newer definition. + * */ + len = sizeof(struct qib_flash); + buf = vmalloc(len); + if (!buf) { + qib_dev_err(dd, + "Couldn't allocate memory to read %u bytes from eeprom for GUID\n", + len); + goto bail; + } + + /* + * Use "public" eeprom read function, which does locking and + * figures out device. This will migrate to chip-specific. + */ + eep_stat = qib_eeprom_read(dd, 0, buf, len); + + if (eep_stat) { + qib_dev_err(dd, "Failed reading GUID from eeprom\n"); + goto done; + } + ifp = (struct qib_flash *)buf; + + csum = flash_csum(ifp, 0); + if (csum != ifp->if_csum) { + qib_devinfo(dd->pcidev, + "Bad I2C flash checksum: 0x%x, not 0x%x\n", + csum, ifp->if_csum); + goto done; + } + if (*(__be64 *) ifp->if_guid == cpu_to_be64(0) || + *(__be64 *) ifp->if_guid == ~cpu_to_be64(0)) { + qib_dev_err(dd, + "Invalid GUID %llx from flash; ignoring\n", + *(unsigned long long *) ifp->if_guid); + /* don't allow GUID if all 0 or all 1's */ + goto done; + } + + /* complain, but allow it */ + if (*(u64 *) ifp->if_guid == 0x100007511000000ULL) + qib_devinfo(dd->pcidev, + "Warning, GUID %llx is default, probably not correct!\n", + *(unsigned long long *) ifp->if_guid); + + bguid = ifp->if_guid; + if (!bguid[0] && !bguid[1] && !bguid[2]) { + /* + * Original incorrect GUID format in flash; fix in + * core copy, by shifting up 2 octets; don't need to + * change top octet, since both it and shifted are 0. + */ + bguid[1] = bguid[3]; + bguid[2] = bguid[4]; + bguid[3] = 0; + bguid[4] = 0; + guid = *(__be64 *) ifp->if_guid; + } else + guid = *(__be64 *) ifp->if_guid; + dd->base_guid = guid; + dd->nguid = ifp->if_numguid; + /* + * Things are slightly complicated by the desire to transparently + * support both the Pathscale 10-digit serial number and the QLogic + * 13-character version. + */ + if ((ifp->if_fversion > 1) && ifp->if_sprefix[0] && + ((u8 *) ifp->if_sprefix)[0] != 0xFF) { + char *snp = dd->serial; + + /* + * This board has a Serial-prefix, which is stored + * elsewhere for backward-compatibility. + */ + memcpy(snp, ifp->if_sprefix, sizeof ifp->if_sprefix); + snp[sizeof ifp->if_sprefix] = '\0'; + len = strlen(snp); + snp += len; + len = (sizeof dd->serial) - len; + if (len > sizeof ifp->if_serial) + len = sizeof ifp->if_serial; + memcpy(snp, ifp->if_serial, len); + } else + memcpy(dd->serial, ifp->if_serial, + sizeof ifp->if_serial); + if (!strstr(ifp->if_comment, "Tested successfully")) + qib_dev_err(dd, + "Board SN %s did not pass functional test: %s\n", + dd->serial, ifp->if_comment); + + memcpy(&dd->eep_st_errs, &ifp->if_errcntp, QIB_EEP_LOG_CNT); + /* + * Power-on (actually "active") hours are kept as little-endian value + * in EEPROM, but as seconds in a (possibly as small as 24-bit) + * atomic_t while running. + */ + atomic_set(&dd->active_time, 0); + dd->eep_hrs = ifp->if_powerhour[0] | (ifp->if_powerhour[1] << 8); + +done: + vfree(buf); + +bail:; +} + +/** + * qib_update_eeprom_log - copy active-time and error counters to eeprom + * @dd: the qlogic_ib device + * + * Although the time is kept as seconds in the qib_devdata struct, it is + * rounded to hours for re-write, as we have only 16 bits in EEPROM. + * First-cut code reads whole (expected) struct qib_flash, modifies, + * re-writes. Future direction: read/write only what we need, assuming + * that the EEPROM had to have been "good enough" for driver init, and + * if not, we aren't making it worse. + * + */ +int qib_update_eeprom_log(struct qib_devdata *dd) +{ + void *buf; + struct qib_flash *ifp; + int len, hi_water; + uint32_t new_time, new_hrs; + u8 csum; + int ret, idx; + unsigned long flags; + + /* first, check if we actually need to do anything. */ + ret = 0; + for (idx = 0; idx < QIB_EEP_LOG_CNT; ++idx) { + if (dd->eep_st_new_errs[idx]) { + ret = 1; + break; + } + } + new_time = atomic_read(&dd->active_time); + + if (ret == 0 && new_time < 3600) + goto bail; + + /* + * The quick-check above determined that there is something worthy + * of logging, so get current contents and do a more detailed idea. + * read full flash, not just currently used part, since it may have + * been written with a newer definition + */ + len = sizeof(struct qib_flash); + buf = vmalloc(len); + ret = 1; + if (!buf) { + qib_dev_err(dd, + "Couldn't allocate memory to read %u bytes from eeprom for logging\n", + len); + goto bail; + } + + /* Grab semaphore and read current EEPROM. If we get an + * error, let go, but if not, keep it until we finish write. + */ + ret = mutex_lock_interruptible(&dd->eep_lock); + if (ret) { + qib_dev_err(dd, "Unable to acquire EEPROM for logging\n"); + goto free_bail; + } + ret = qib_twsi_blk_rd(dd, dd->twsi_eeprom_dev, 0, buf, len); + if (ret) { + mutex_unlock(&dd->eep_lock); + qib_dev_err(dd, "Unable read EEPROM for logging\n"); + goto free_bail; + } + ifp = (struct qib_flash *)buf; + + csum = flash_csum(ifp, 0); + if (csum != ifp->if_csum) { + mutex_unlock(&dd->eep_lock); + qib_dev_err(dd, "EEPROM cks err (0x%02X, S/B 0x%02X)\n", + csum, ifp->if_csum); + ret = 1; + goto free_bail; + } + hi_water = 0; + spin_lock_irqsave(&dd->eep_st_lock, flags); + for (idx = 0; idx < QIB_EEP_LOG_CNT; ++idx) { + int new_val = dd->eep_st_new_errs[idx]; + if (new_val) { + /* + * If we have seen any errors, add to EEPROM values + * We need to saturate at 0xFF (255) and we also + * would need to adjust the checksum if we were + * trying to minimize EEPROM traffic + * Note that we add to actual current count in EEPROM, + * in case it was altered while we were running. + */ + new_val += ifp->if_errcntp[idx]; + if (new_val > 0xFF) + new_val = 0xFF; + if (ifp->if_errcntp[idx] != new_val) { + ifp->if_errcntp[idx] = new_val; + hi_water = offsetof(struct qib_flash, + if_errcntp) + idx; + } + /* + * update our shadow (used to minimize EEPROM + * traffic), to match what we are about to write. + */ + dd->eep_st_errs[idx] = new_val; + dd->eep_st_new_errs[idx] = 0; + } + } + /* + * Now update active-time. We would like to round to the nearest hour + * but unless atomic_t are sure to be proper signed ints we cannot, + * because we need to account for what we "transfer" to EEPROM and + * if we log an hour at 31 minutes, then we would need to set + * active_time to -29 to accurately count the _next_ hour. + */ + if (new_time >= 3600) { + new_hrs = new_time / 3600; + atomic_sub((new_hrs * 3600), &dd->active_time); + new_hrs += dd->eep_hrs; + if (new_hrs > 0xFFFF) + new_hrs = 0xFFFF; + dd->eep_hrs = new_hrs; + if ((new_hrs & 0xFF) != ifp->if_powerhour[0]) { + ifp->if_powerhour[0] = new_hrs & 0xFF; + hi_water = offsetof(struct qib_flash, if_powerhour); + } + if ((new_hrs >> 8) != ifp->if_powerhour[1]) { + ifp->if_powerhour[1] = new_hrs >> 8; + hi_water = offsetof(struct qib_flash, if_powerhour) + 1; + } + } + /* + * There is a tiny possibility that we could somehow fail to write + * the EEPROM after updating our shadows, but problems from holding + * the spinlock too long are a much bigger issue. + */ + spin_unlock_irqrestore(&dd->eep_st_lock, flags); + if (hi_water) { + /* we made some change to the data, uopdate cksum and write */ + csum = flash_csum(ifp, 1); + ret = eeprom_write_with_enable(dd, 0, buf, hi_water + 1); + } + mutex_unlock(&dd->eep_lock); + if (ret) + qib_dev_err(dd, "Failed updating EEPROM\n"); + +free_bail: + vfree(buf); +bail: + return ret; +} + +/** + * qib_inc_eeprom_err - increment one of the four error counters + * that are logged to EEPROM. + * @dd: the qlogic_ib device + * @eidx: 0..3, the counter to increment + * @incr: how much to add + * + * Each counter is 8-bits, and saturates at 255 (0xFF). They + * are copied to the EEPROM (aka flash) whenever qib_update_eeprom_log() + * is called, but it can only be called in a context that allows sleep. + * This function can be called even at interrupt level. + */ +void qib_inc_eeprom_err(struct qib_devdata *dd, u32 eidx, u32 incr) +{ + uint new_val; + unsigned long flags; + + spin_lock_irqsave(&dd->eep_st_lock, flags); + new_val = dd->eep_st_new_errs[eidx] + incr; + if (new_val > 255) + new_val = 255; + dd->eep_st_new_errs[eidx] = new_val; + spin_unlock_irqrestore(&dd->eep_st_lock, flags); +} diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c new file mode 100644 index 00000000000..b15e34eeef6 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -0,0 +1,2410 @@ +/* + * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/pci.h> +#include <linux/poll.h> +#include <linux/cdev.h> +#include <linux/swap.h> +#include <linux/vmalloc.h> +#include <linux/highmem.h> +#include <linux/io.h> +#include <linux/aio.h> +#include <linux/jiffies.h> +#include <asm/pgtable.h> +#include <linux/delay.h> +#include <linux/export.h> + +#include "qib.h" +#include "qib_common.h" +#include "qib_user_sdma.h" + +#undef pr_fmt +#define pr_fmt(fmt) QIB_DRV_NAME ": " fmt + +static int qib_open(struct inode *, struct file *); +static int qib_close(struct inode *, struct file *); +static ssize_t qib_write(struct file *, const char __user *, size_t, loff_t *); +static ssize_t qib_aio_write(struct kiocb *, const struct iovec *, + unsigned long, loff_t); +static unsigned int qib_poll(struct file *, struct poll_table_struct *); +static int qib_mmapf(struct file *, struct vm_area_struct *); + +static const struct file_operations qib_file_ops = { + .owner = THIS_MODULE, + .write = qib_write, + .aio_write = qib_aio_write, + .open = qib_open, + .release = qib_close, + .poll = qib_poll, + .mmap = qib_mmapf, + .llseek = noop_llseek, +}; + +/* + * Convert kernel virtual addresses to physical addresses so they don't + * potentially conflict with the chip addresses used as mmap offsets. + * It doesn't really matter what mmap offset we use as long as we can + * interpret it correctly. + */ +static u64 cvt_kvaddr(void *p) +{ + struct page *page; + u64 paddr = 0; + + page = vmalloc_to_page(p); + if (page) + paddr = page_to_pfn(page) << PAGE_SHIFT; + + return paddr; +} + +static int qib_get_base_info(struct file *fp, void __user *ubase, + size_t ubase_size) +{ + struct qib_ctxtdata *rcd = ctxt_fp(fp); + int ret = 0; + struct qib_base_info *kinfo = NULL; + struct qib_devdata *dd = rcd->dd; + struct qib_pportdata *ppd = rcd->ppd; + unsigned subctxt_cnt; + int shared, master; + size_t sz; + + subctxt_cnt = rcd->subctxt_cnt; + if (!subctxt_cnt) { + shared = 0; + master = 0; + subctxt_cnt = 1; + } else { + shared = 1; + master = !subctxt_fp(fp); + } + + sz = sizeof(*kinfo); + /* If context sharing is not requested, allow the old size structure */ + if (!shared) + sz -= 7 * sizeof(u64); + if (ubase_size < sz) { + ret = -EINVAL; + goto bail; + } + + kinfo = kzalloc(sizeof(*kinfo), GFP_KERNEL); + if (kinfo == NULL) { + ret = -ENOMEM; + goto bail; + } + + ret = dd->f_get_base_info(rcd, kinfo); + if (ret < 0) + goto bail; + + kinfo->spi_rcvhdr_cnt = dd->rcvhdrcnt; + kinfo->spi_rcvhdrent_size = dd->rcvhdrentsize; + kinfo->spi_tidegrcnt = rcd->rcvegrcnt; + kinfo->spi_rcv_egrbufsize = dd->rcvegrbufsize; + /* + * have to mmap whole thing + */ + kinfo->spi_rcv_egrbuftotlen = + rcd->rcvegrbuf_chunks * rcd->rcvegrbuf_size; + kinfo->spi_rcv_egrperchunk = rcd->rcvegrbufs_perchunk; + kinfo->spi_rcv_egrchunksize = kinfo->spi_rcv_egrbuftotlen / + rcd->rcvegrbuf_chunks; + kinfo->spi_tidcnt = dd->rcvtidcnt / subctxt_cnt; + if (master) + kinfo->spi_tidcnt += dd->rcvtidcnt % subctxt_cnt; + /* + * for this use, may be cfgctxts summed over all chips that + * are are configured and present + */ + kinfo->spi_nctxts = dd->cfgctxts; + /* unit (chip/board) our context is on */ + kinfo->spi_unit = dd->unit; + kinfo->spi_port = ppd->port; + /* for now, only a single page */ + kinfo->spi_tid_maxsize = PAGE_SIZE; + + /* + * Doing this per context, and based on the skip value, etc. This has + * to be the actual buffer size, since the protocol code treats it + * as an array. + * + * These have to be set to user addresses in the user code via mmap. + * These values are used on return to user code for the mmap target + * addresses only. For 32 bit, same 44 bit address problem, so use + * the physical address, not virtual. Before 2.6.11, using the + * page_address() macro worked, but in 2.6.11, even that returns the + * full 64 bit address (upper bits all 1's). So far, using the + * physical addresses (or chip offsets, for chip mapping) works, but + * no doubt some future kernel release will change that, and we'll be + * on to yet another method of dealing with this. + * Normally only one of rcvhdr_tailaddr or rhf_offset is useful + * since the chips with non-zero rhf_offset don't normally + * enable tail register updates to host memory, but for testing, + * both can be enabled and used. + */ + kinfo->spi_rcvhdr_base = (u64) rcd->rcvhdrq_phys; + kinfo->spi_rcvhdr_tailaddr = (u64) rcd->rcvhdrqtailaddr_phys; + kinfo->spi_rhf_offset = dd->rhf_offset; + kinfo->spi_rcv_egrbufs = (u64) rcd->rcvegr_phys; + kinfo->spi_pioavailaddr = (u64) dd->pioavailregs_phys; + /* setup per-unit (not port) status area for user programs */ + kinfo->spi_status = (u64) kinfo->spi_pioavailaddr + + (char *) ppd->statusp - + (char *) dd->pioavailregs_dma; + kinfo->spi_uregbase = (u64) dd->uregbase + dd->ureg_align * rcd->ctxt; + if (!shared) { + kinfo->spi_piocnt = rcd->piocnt; + kinfo->spi_piobufbase = (u64) rcd->piobufs; + kinfo->spi_sendbuf_status = cvt_kvaddr(rcd->user_event_mask); + } else if (master) { + kinfo->spi_piocnt = (rcd->piocnt / subctxt_cnt) + + (rcd->piocnt % subctxt_cnt); + /* Master's PIO buffers are after all the slave's */ + kinfo->spi_piobufbase = (u64) rcd->piobufs + + dd->palign * + (rcd->piocnt - kinfo->spi_piocnt); + } else { + unsigned slave = subctxt_fp(fp) - 1; + + kinfo->spi_piocnt = rcd->piocnt / subctxt_cnt; + kinfo->spi_piobufbase = (u64) rcd->piobufs + + dd->palign * kinfo->spi_piocnt * slave; + } + + if (shared) { + kinfo->spi_sendbuf_status = + cvt_kvaddr(&rcd->user_event_mask[subctxt_fp(fp)]); + /* only spi_subctxt_* fields should be set in this block! */ + kinfo->spi_subctxt_uregbase = cvt_kvaddr(rcd->subctxt_uregbase); + + kinfo->spi_subctxt_rcvegrbuf = + cvt_kvaddr(rcd->subctxt_rcvegrbuf); + kinfo->spi_subctxt_rcvhdr_base = + cvt_kvaddr(rcd->subctxt_rcvhdr_base); + } + + /* + * All user buffers are 2KB buffers. If we ever support + * giving 4KB buffers to user processes, this will need some + * work. Can't use piobufbase directly, because it has + * both 2K and 4K buffer base values. + */ + kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->pio2k_bufbase) / + dd->palign; + kinfo->spi_pioalign = dd->palign; + kinfo->spi_qpair = QIB_KD_QP; + /* + * user mode PIO buffers are always 2KB, even when 4KB can + * be received, and sent via the kernel; this is ibmaxlen + * for 2K MTU. + */ + kinfo->spi_piosize = dd->piosize2k - 2 * sizeof(u32); + kinfo->spi_mtu = ppd->ibmaxlen; /* maxlen, not ibmtu */ + kinfo->spi_ctxt = rcd->ctxt; + kinfo->spi_subctxt = subctxt_fp(fp); + kinfo->spi_sw_version = QIB_KERN_SWVERSION; + kinfo->spi_sw_version |= 1U << 31; /* QLogic-built, not kernel.org */ + kinfo->spi_hw_version = dd->revision; + + if (master) + kinfo->spi_runtime_flags |= QIB_RUNTIME_MASTER; + + sz = (ubase_size < sizeof(*kinfo)) ? ubase_size : sizeof(*kinfo); + if (copy_to_user(ubase, kinfo, sz)) + ret = -EFAULT; +bail: + kfree(kinfo); + return ret; +} + +/** + * qib_tid_update - update a context TID + * @rcd: the context + * @fp: the qib device file + * @ti: the TID information + * + * The new implementation as of Oct 2004 is that the driver assigns + * the tid and returns it to the caller. To reduce search time, we + * keep a cursor for each context, walking the shadow tid array to find + * one that's not in use. + * + * For now, if we can't allocate the full list, we fail, although + * in the long run, we'll allocate as many as we can, and the + * caller will deal with that by trying the remaining pages later. + * That means that when we fail, we have to mark the tids as not in + * use again, in our shadow copy. + * + * It's up to the caller to free the tids when they are done. + * We'll unlock the pages as they free them. + * + * Also, right now we are locking one page at a time, but since + * the intended use of this routine is for a single group of + * virtually contiguous pages, that should change to improve + * performance. + */ +static int qib_tid_update(struct qib_ctxtdata *rcd, struct file *fp, + const struct qib_tid_info *ti) +{ + int ret = 0, ntids; + u32 tid, ctxttid, cnt, i, tidcnt, tidoff; + u16 *tidlist; + struct qib_devdata *dd = rcd->dd; + u64 physaddr; + unsigned long vaddr; + u64 __iomem *tidbase; + unsigned long tidmap[8]; + struct page **pagep = NULL; + unsigned subctxt = subctxt_fp(fp); + + if (!dd->pageshadow) { + ret = -ENOMEM; + goto done; + } + + cnt = ti->tidcnt; + if (!cnt) { + ret = -EFAULT; + goto done; + } + ctxttid = rcd->ctxt * dd->rcvtidcnt; + if (!rcd->subctxt_cnt) { + tidcnt = dd->rcvtidcnt; + tid = rcd->tidcursor; + tidoff = 0; + } else if (!subctxt) { + tidcnt = (dd->rcvtidcnt / rcd->subctxt_cnt) + + (dd->rcvtidcnt % rcd->subctxt_cnt); + tidoff = dd->rcvtidcnt - tidcnt; + ctxttid += tidoff; + tid = tidcursor_fp(fp); + } else { + tidcnt = dd->rcvtidcnt / rcd->subctxt_cnt; + tidoff = tidcnt * (subctxt - 1); + ctxttid += tidoff; + tid = tidcursor_fp(fp); + } + if (cnt > tidcnt) { + /* make sure it all fits in tid_pg_list */ + qib_devinfo(dd->pcidev, + "Process tried to allocate %u TIDs, only trying max (%u)\n", + cnt, tidcnt); + cnt = tidcnt; + } + pagep = (struct page **) rcd->tid_pg_list; + tidlist = (u16 *) &pagep[dd->rcvtidcnt]; + pagep += tidoff; + tidlist += tidoff; + + memset(tidmap, 0, sizeof(tidmap)); + /* before decrement; chip actual # */ + ntids = tidcnt; + tidbase = (u64 __iomem *) (((char __iomem *) dd->kregbase) + + dd->rcvtidbase + + ctxttid * sizeof(*tidbase)); + + /* virtual address of first page in transfer */ + vaddr = ti->tidvaddr; + if (!access_ok(VERIFY_WRITE, (void __user *) vaddr, + cnt * PAGE_SIZE)) { + ret = -EFAULT; + goto done; + } + ret = qib_get_user_pages(vaddr, cnt, pagep); + if (ret) { + /* + * if (ret == -EBUSY) + * We can't continue because the pagep array won't be + * initialized. This should never happen, + * unless perhaps the user has mpin'ed the pages + * themselves. + */ + qib_devinfo(dd->pcidev, + "Failed to lock addr %p, %u pages: " + "errno %d\n", (void *) vaddr, cnt, -ret); + goto done; + } + for (i = 0; i < cnt; i++, vaddr += PAGE_SIZE) { + for (; ntids--; tid++) { + if (tid == tidcnt) + tid = 0; + if (!dd->pageshadow[ctxttid + tid]) + break; + } + if (ntids < 0) { + /* + * Oops, wrapped all the way through their TIDs, + * and didn't have enough free; see comments at + * start of routine + */ + i--; /* last tidlist[i] not filled in */ + ret = -ENOMEM; + break; + } + tidlist[i] = tid + tidoff; + /* we "know" system pages and TID pages are same size */ + dd->pageshadow[ctxttid + tid] = pagep[i]; + dd->physshadow[ctxttid + tid] = + qib_map_page(dd->pcidev, pagep[i], 0, PAGE_SIZE, + PCI_DMA_FROMDEVICE); + /* + * don't need atomic or it's overhead + */ + __set_bit(tid, tidmap); + physaddr = dd->physshadow[ctxttid + tid]; + /* PERFORMANCE: below should almost certainly be cached */ + dd->f_put_tid(dd, &tidbase[tid], + RCVHQ_RCV_TYPE_EXPECTED, physaddr); + /* + * don't check this tid in qib_ctxtshadow, since we + * just filled it in; start with the next one. + */ + tid++; + } + + if (ret) { + u32 limit; +cleanup: + /* jump here if copy out of updated info failed... */ + /* same code that's in qib_free_tid() */ + limit = sizeof(tidmap) * BITS_PER_BYTE; + if (limit > tidcnt) + /* just in case size changes in future */ + limit = tidcnt; + tid = find_first_bit((const unsigned long *)tidmap, limit); + for (; tid < limit; tid++) { + if (!test_bit(tid, tidmap)) + continue; + if (dd->pageshadow[ctxttid + tid]) { + dma_addr_t phys; + + phys = dd->physshadow[ctxttid + tid]; + dd->physshadow[ctxttid + tid] = dd->tidinvalid; + /* PERFORMANCE: below should almost certainly + * be cached + */ + dd->f_put_tid(dd, &tidbase[tid], + RCVHQ_RCV_TYPE_EXPECTED, + dd->tidinvalid); + pci_unmap_page(dd->pcidev, phys, PAGE_SIZE, + PCI_DMA_FROMDEVICE); + dd->pageshadow[ctxttid + tid] = NULL; + } + } + qib_release_user_pages(pagep, cnt); + } else { + /* + * Copy the updated array, with qib_tid's filled in, back + * to user. Since we did the copy in already, this "should + * never fail" If it does, we have to clean up... + */ + if (copy_to_user((void __user *) + (unsigned long) ti->tidlist, + tidlist, cnt * sizeof(*tidlist))) { + ret = -EFAULT; + goto cleanup; + } + if (copy_to_user((void __user *) (unsigned long) ti->tidmap, + tidmap, sizeof tidmap)) { + ret = -EFAULT; + goto cleanup; + } + if (tid == tidcnt) + tid = 0; + if (!rcd->subctxt_cnt) + rcd->tidcursor = tid; + else + tidcursor_fp(fp) = tid; + } + +done: + return ret; +} + +/** + * qib_tid_free - free a context TID + * @rcd: the context + * @subctxt: the subcontext + * @ti: the TID info + * + * right now we are unlocking one page at a time, but since + * the intended use of this routine is for a single group of + * virtually contiguous pages, that should change to improve + * performance. We check that the TID is in range for this context + * but otherwise don't check validity; if user has an error and + * frees the wrong tid, it's only their own data that can thereby + * be corrupted. We do check that the TID was in use, for sanity + * We always use our idea of the saved address, not the address that + * they pass in to us. + */ +static int qib_tid_free(struct qib_ctxtdata *rcd, unsigned subctxt, + const struct qib_tid_info *ti) +{ + int ret = 0; + u32 tid, ctxttid, cnt, limit, tidcnt; + struct qib_devdata *dd = rcd->dd; + u64 __iomem *tidbase; + unsigned long tidmap[8]; + + if (!dd->pageshadow) { + ret = -ENOMEM; + goto done; + } + + if (copy_from_user(tidmap, (void __user *)(unsigned long)ti->tidmap, + sizeof tidmap)) { + ret = -EFAULT; + goto done; + } + + ctxttid = rcd->ctxt * dd->rcvtidcnt; + if (!rcd->subctxt_cnt) + tidcnt = dd->rcvtidcnt; + else if (!subctxt) { + tidcnt = (dd->rcvtidcnt / rcd->subctxt_cnt) + + (dd->rcvtidcnt % rcd->subctxt_cnt); + ctxttid += dd->rcvtidcnt - tidcnt; + } else { + tidcnt = dd->rcvtidcnt / rcd->subctxt_cnt; + ctxttid += tidcnt * (subctxt - 1); + } + tidbase = (u64 __iomem *) ((char __iomem *)(dd->kregbase) + + dd->rcvtidbase + + ctxttid * sizeof(*tidbase)); + + limit = sizeof(tidmap) * BITS_PER_BYTE; + if (limit > tidcnt) + /* just in case size changes in future */ + limit = tidcnt; + tid = find_first_bit(tidmap, limit); + for (cnt = 0; tid < limit; tid++) { + /* + * small optimization; if we detect a run of 3 or so without + * any set, use find_first_bit again. That's mainly to + * accelerate the case where we wrapped, so we have some at + * the beginning, and some at the end, and a big gap + * in the middle. + */ + if (!test_bit(tid, tidmap)) + continue; + cnt++; + if (dd->pageshadow[ctxttid + tid]) { + struct page *p; + dma_addr_t phys; + + p = dd->pageshadow[ctxttid + tid]; + dd->pageshadow[ctxttid + tid] = NULL; + phys = dd->physshadow[ctxttid + tid]; + dd->physshadow[ctxttid + tid] = dd->tidinvalid; + /* PERFORMANCE: below should almost certainly be + * cached + */ + dd->f_put_tid(dd, &tidbase[tid], + RCVHQ_RCV_TYPE_EXPECTED, dd->tidinvalid); + pci_unmap_page(dd->pcidev, phys, PAGE_SIZE, + PCI_DMA_FROMDEVICE); + qib_release_user_pages(&p, 1); + } + } +done: + return ret; +} + +/** + * qib_set_part_key - set a partition key + * @rcd: the context + * @key: the key + * + * We can have up to 4 active at a time (other than the default, which is + * always allowed). This is somewhat tricky, since multiple contexts may set + * the same key, so we reference count them, and clean up at exit. All 4 + * partition keys are packed into a single qlogic_ib register. It's an + * error for a process to set the same pkey multiple times. We provide no + * mechanism to de-allocate a pkey at this time, we may eventually need to + * do that. I've used the atomic operations, and no locking, and only make + * a single pass through what's available. This should be more than + * adequate for some time. I'll think about spinlocks or the like if and as + * it's necessary. + */ +static int qib_set_part_key(struct qib_ctxtdata *rcd, u16 key) +{ + struct qib_pportdata *ppd = rcd->ppd; + int i, any = 0, pidx = -1; + u16 lkey = key & 0x7FFF; + int ret; + + if (lkey == (QIB_DEFAULT_P_KEY & 0x7FFF)) { + /* nothing to do; this key always valid */ + ret = 0; + goto bail; + } + + if (!lkey) { + ret = -EINVAL; + goto bail; + } + + /* + * Set the full membership bit, because it has to be + * set in the register or the packet, and it seems + * cleaner to set in the register than to force all + * callers to set it. + */ + key |= 0x8000; + + for (i = 0; i < ARRAY_SIZE(rcd->pkeys); i++) { + if (!rcd->pkeys[i] && pidx == -1) + pidx = i; + if (rcd->pkeys[i] == key) { + ret = -EEXIST; + goto bail; + } + } + if (pidx == -1) { + ret = -EBUSY; + goto bail; + } + for (any = i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) { + if (!ppd->pkeys[i]) { + any++; + continue; + } + if (ppd->pkeys[i] == key) { + atomic_t *pkrefs = &ppd->pkeyrefs[i]; + + if (atomic_inc_return(pkrefs) > 1) { + rcd->pkeys[pidx] = key; + ret = 0; + goto bail; + } else { + /* + * lost race, decrement count, catch below + */ + atomic_dec(pkrefs); + any++; + } + } + if ((ppd->pkeys[i] & 0x7FFF) == lkey) { + /* + * It makes no sense to have both the limited and + * full membership PKEY set at the same time since + * the unlimited one will disable the limited one. + */ + ret = -EEXIST; + goto bail; + } + } + if (!any) { + ret = -EBUSY; + goto bail; + } + for (any = i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) { + if (!ppd->pkeys[i] && + atomic_inc_return(&ppd->pkeyrefs[i]) == 1) { + rcd->pkeys[pidx] = key; + ppd->pkeys[i] = key; + (void) ppd->dd->f_set_ib_cfg(ppd, QIB_IB_CFG_PKEYS, 0); + ret = 0; + goto bail; + } + } + ret = -EBUSY; + +bail: + return ret; +} + +/** + * qib_manage_rcvq - manage a context's receive queue + * @rcd: the context + * @subctxt: the subcontext + * @start_stop: action to carry out + * + * start_stop == 0 disables receive on the context, for use in queue + * overflow conditions. start_stop==1 re-enables, to be used to + * re-init the software copy of the head register + */ +static int qib_manage_rcvq(struct qib_ctxtdata *rcd, unsigned subctxt, + int start_stop) +{ + struct qib_devdata *dd = rcd->dd; + unsigned int rcvctrl_op; + + if (subctxt) + goto bail; + /* atomically clear receive enable ctxt. */ + if (start_stop) { + /* + * On enable, force in-memory copy of the tail register to + * 0, so that protocol code doesn't have to worry about + * whether or not the chip has yet updated the in-memory + * copy or not on return from the system call. The chip + * always resets it's tail register back to 0 on a + * transition from disabled to enabled. + */ + if (rcd->rcvhdrtail_kvaddr) + qib_clear_rcvhdrtail(rcd); + rcvctrl_op = QIB_RCVCTRL_CTXT_ENB; + } else + rcvctrl_op = QIB_RCVCTRL_CTXT_DIS; + dd->f_rcvctrl(rcd->ppd, rcvctrl_op, rcd->ctxt); + /* always; new head should be equal to new tail; see above */ +bail: + return 0; +} + +static void qib_clean_part_key(struct qib_ctxtdata *rcd, + struct qib_devdata *dd) +{ + int i, j, pchanged = 0; + u64 oldpkey; + struct qib_pportdata *ppd = rcd->ppd; + + /* for debugging only */ + oldpkey = (u64) ppd->pkeys[0] | + ((u64) ppd->pkeys[1] << 16) | + ((u64) ppd->pkeys[2] << 32) | + ((u64) ppd->pkeys[3] << 48); + + for (i = 0; i < ARRAY_SIZE(rcd->pkeys); i++) { + if (!rcd->pkeys[i]) + continue; + for (j = 0; j < ARRAY_SIZE(ppd->pkeys); j++) { + /* check for match independent of the global bit */ + if ((ppd->pkeys[j] & 0x7fff) != + (rcd->pkeys[i] & 0x7fff)) + continue; + if (atomic_dec_and_test(&ppd->pkeyrefs[j])) { + ppd->pkeys[j] = 0; + pchanged++; + } + break; + } + rcd->pkeys[i] = 0; + } + if (pchanged) + (void) ppd->dd->f_set_ib_cfg(ppd, QIB_IB_CFG_PKEYS, 0); +} + +/* common code for the mappings on dma_alloc_coherent mem */ +static int qib_mmap_mem(struct vm_area_struct *vma, struct qib_ctxtdata *rcd, + unsigned len, void *kvaddr, u32 write_ok, char *what) +{ + struct qib_devdata *dd = rcd->dd; + unsigned long pfn; + int ret; + + if ((vma->vm_end - vma->vm_start) > len) { + qib_devinfo(dd->pcidev, + "FAIL on %s: len %lx > %x\n", what, + vma->vm_end - vma->vm_start, len); + ret = -EFAULT; + goto bail; + } + + /* + * shared context user code requires rcvhdrq mapped r/w, others + * only allowed readonly mapping. + */ + if (!write_ok) { + if (vma->vm_flags & VM_WRITE) { + qib_devinfo(dd->pcidev, + "%s must be mapped readonly\n", what); + ret = -EPERM; + goto bail; + } + + /* don't allow them to later change with mprotect */ + vma->vm_flags &= ~VM_MAYWRITE; + } + + pfn = virt_to_phys(kvaddr) >> PAGE_SHIFT; + ret = remap_pfn_range(vma, vma->vm_start, pfn, + len, vma->vm_page_prot); + if (ret) + qib_devinfo(dd->pcidev, + "%s ctxt%u mmap of %lx, %x bytes failed: %d\n", + what, rcd->ctxt, pfn, len, ret); +bail: + return ret; +} + +static int mmap_ureg(struct vm_area_struct *vma, struct qib_devdata *dd, + u64 ureg) +{ + unsigned long phys; + unsigned long sz; + int ret; + + /* + * This is real hardware, so use io_remap. This is the mechanism + * for the user process to update the head registers for their ctxt + * in the chip. + */ + sz = dd->flags & QIB_HAS_HDRSUPP ? 2 * PAGE_SIZE : PAGE_SIZE; + if ((vma->vm_end - vma->vm_start) > sz) { + qib_devinfo(dd->pcidev, + "FAIL mmap userreg: reqlen %lx > PAGE\n", + vma->vm_end - vma->vm_start); + ret = -EFAULT; + } else { + phys = dd->physaddr + ureg; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; + ret = io_remap_pfn_range(vma, vma->vm_start, + phys >> PAGE_SHIFT, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); + } + return ret; +} + +static int mmap_piobufs(struct vm_area_struct *vma, + struct qib_devdata *dd, + struct qib_ctxtdata *rcd, + unsigned piobufs, unsigned piocnt) +{ + unsigned long phys; + int ret; + + /* + * When we map the PIO buffers in the chip, we want to map them as + * writeonly, no read possible; unfortunately, x86 doesn't allow + * for this in hardware, but we still prevent users from asking + * for it. + */ + if ((vma->vm_end - vma->vm_start) > (piocnt * dd->palign)) { + qib_devinfo(dd->pcidev, + "FAIL mmap piobufs: reqlen %lx > PAGE\n", + vma->vm_end - vma->vm_start); + ret = -EINVAL; + goto bail; + } + + phys = dd->physaddr + piobufs; + +#if defined(__powerpc__) + /* There isn't a generic way to specify writethrough mappings */ + pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE; + pgprot_val(vma->vm_page_prot) |= _PAGE_WRITETHRU; + pgprot_val(vma->vm_page_prot) &= ~_PAGE_GUARDED; +#endif + + /* + * don't allow them to later change to readable with mprotect (for when + * not initially mapped readable, as is normally the case) + */ + vma->vm_flags &= ~VM_MAYREAD; + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; + + if (qib_wc_pat) + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + + ret = io_remap_pfn_range(vma, vma->vm_start, phys >> PAGE_SHIFT, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); +bail: + return ret; +} + +static int mmap_rcvegrbufs(struct vm_area_struct *vma, + struct qib_ctxtdata *rcd) +{ + struct qib_devdata *dd = rcd->dd; + unsigned long start, size; + size_t total_size, i; + unsigned long pfn; + int ret; + + size = rcd->rcvegrbuf_size; + total_size = rcd->rcvegrbuf_chunks * size; + if ((vma->vm_end - vma->vm_start) > total_size) { + qib_devinfo(dd->pcidev, + "FAIL on egr bufs: reqlen %lx > actual %lx\n", + vma->vm_end - vma->vm_start, + (unsigned long) total_size); + ret = -EINVAL; + goto bail; + } + + if (vma->vm_flags & VM_WRITE) { + qib_devinfo(dd->pcidev, + "Can't map eager buffers as writable (flags=%lx)\n", + vma->vm_flags); + ret = -EPERM; + goto bail; + } + /* don't allow them to later change to writeable with mprotect */ + vma->vm_flags &= ~VM_MAYWRITE; + + start = vma->vm_start; + + for (i = 0; i < rcd->rcvegrbuf_chunks; i++, start += size) { + pfn = virt_to_phys(rcd->rcvegrbuf[i]) >> PAGE_SHIFT; + ret = remap_pfn_range(vma, start, pfn, size, + vma->vm_page_prot); + if (ret < 0) + goto bail; + } + ret = 0; + +bail: + return ret; +} + +/* + * qib_file_vma_fault - handle a VMA page fault. + */ +static int qib_file_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct page *page; + + page = vmalloc_to_page((void *)(vmf->pgoff << PAGE_SHIFT)); + if (!page) + return VM_FAULT_SIGBUS; + + get_page(page); + vmf->page = page; + + return 0; +} + +static struct vm_operations_struct qib_file_vm_ops = { + .fault = qib_file_vma_fault, +}; + +static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr, + struct qib_ctxtdata *rcd, unsigned subctxt) +{ + struct qib_devdata *dd = rcd->dd; + unsigned subctxt_cnt; + unsigned long len; + void *addr; + size_t size; + int ret = 0; + + subctxt_cnt = rcd->subctxt_cnt; + size = rcd->rcvegrbuf_chunks * rcd->rcvegrbuf_size; + + /* + * Each process has all the subctxt uregbase, rcvhdrq, and + * rcvegrbufs mmapped - as an array for all the processes, + * and also separately for this process. + */ + if (pgaddr == cvt_kvaddr(rcd->subctxt_uregbase)) { + addr = rcd->subctxt_uregbase; + size = PAGE_SIZE * subctxt_cnt; + } else if (pgaddr == cvt_kvaddr(rcd->subctxt_rcvhdr_base)) { + addr = rcd->subctxt_rcvhdr_base; + size = rcd->rcvhdrq_size * subctxt_cnt; + } else if (pgaddr == cvt_kvaddr(rcd->subctxt_rcvegrbuf)) { + addr = rcd->subctxt_rcvegrbuf; + size *= subctxt_cnt; + } else if (pgaddr == cvt_kvaddr(rcd->subctxt_uregbase + + PAGE_SIZE * subctxt)) { + addr = rcd->subctxt_uregbase + PAGE_SIZE * subctxt; + size = PAGE_SIZE; + } else if (pgaddr == cvt_kvaddr(rcd->subctxt_rcvhdr_base + + rcd->rcvhdrq_size * subctxt)) { + addr = rcd->subctxt_rcvhdr_base + + rcd->rcvhdrq_size * subctxt; + size = rcd->rcvhdrq_size; + } else if (pgaddr == cvt_kvaddr(&rcd->user_event_mask[subctxt])) { + addr = rcd->user_event_mask; + size = PAGE_SIZE; + } else if (pgaddr == cvt_kvaddr(rcd->subctxt_rcvegrbuf + + size * subctxt)) { + addr = rcd->subctxt_rcvegrbuf + size * subctxt; + /* rcvegrbufs are read-only on the slave */ + if (vma->vm_flags & VM_WRITE) { + qib_devinfo(dd->pcidev, + "Can't map eager buffers as " + "writable (flags=%lx)\n", vma->vm_flags); + ret = -EPERM; + goto bail; + } + /* + * Don't allow permission to later change to writeable + * with mprotect. + */ + vma->vm_flags &= ~VM_MAYWRITE; + } else + goto bail; + len = vma->vm_end - vma->vm_start; + if (len > size) { + ret = -EINVAL; + goto bail; + } + + vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT; + vma->vm_ops = &qib_file_vm_ops; + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; + ret = 1; + +bail: + return ret; +} + +/** + * qib_mmapf - mmap various structures into user space + * @fp: the file pointer + * @vma: the VM area + * + * We use this to have a shared buffer between the kernel and the user code + * for the rcvhdr queue, egr buffers, and the per-context user regs and pio + * buffers in the chip. We have the open and close entries so we can bump + * the ref count and keep the driver from being unloaded while still mapped. + */ +static int qib_mmapf(struct file *fp, struct vm_area_struct *vma) +{ + struct qib_ctxtdata *rcd; + struct qib_devdata *dd; + u64 pgaddr, ureg; + unsigned piobufs, piocnt; + int ret, match = 1; + + rcd = ctxt_fp(fp); + if (!rcd || !(vma->vm_flags & VM_SHARED)) { + ret = -EINVAL; + goto bail; + } + dd = rcd->dd; + + /* + * This is the qib_do_user_init() code, mapping the shared buffers + * and per-context user registers into the user process. The address + * referred to by vm_pgoff is the file offset passed via mmap(). + * For shared contexts, this is the kernel vmalloc() address of the + * pages to share with the master. + * For non-shared or master ctxts, this is a physical address. + * We only do one mmap for each space mapped. + */ + pgaddr = vma->vm_pgoff << PAGE_SHIFT; + + /* + * Check for 0 in case one of the allocations failed, but user + * called mmap anyway. + */ + if (!pgaddr) { + ret = -EINVAL; + goto bail; + } + + /* + * Physical addresses must fit in 40 bits for our hardware. + * Check for kernel virtual addresses first, anything else must + * match a HW or memory address. + */ + ret = mmap_kvaddr(vma, pgaddr, rcd, subctxt_fp(fp)); + if (ret) { + if (ret > 0) + ret = 0; + goto bail; + } + + ureg = dd->uregbase + dd->ureg_align * rcd->ctxt; + if (!rcd->subctxt_cnt) { + /* ctxt is not shared */ + piocnt = rcd->piocnt; + piobufs = rcd->piobufs; + } else if (!subctxt_fp(fp)) { + /* caller is the master */ + piocnt = (rcd->piocnt / rcd->subctxt_cnt) + + (rcd->piocnt % rcd->subctxt_cnt); + piobufs = rcd->piobufs + + dd->palign * (rcd->piocnt - piocnt); + } else { + unsigned slave = subctxt_fp(fp) - 1; + + /* caller is a slave */ + piocnt = rcd->piocnt / rcd->subctxt_cnt; + piobufs = rcd->piobufs + dd->palign * piocnt * slave; + } + + if (pgaddr == ureg) + ret = mmap_ureg(vma, dd, ureg); + else if (pgaddr == piobufs) + ret = mmap_piobufs(vma, dd, rcd, piobufs, piocnt); + else if (pgaddr == dd->pioavailregs_phys) + /* in-memory copy of pioavail registers */ + ret = qib_mmap_mem(vma, rcd, PAGE_SIZE, + (void *) dd->pioavailregs_dma, 0, + "pioavail registers"); + else if (pgaddr == rcd->rcvegr_phys) + ret = mmap_rcvegrbufs(vma, rcd); + else if (pgaddr == (u64) rcd->rcvhdrq_phys) + /* + * The rcvhdrq itself; multiple pages, contiguous + * from an i/o perspective. Shared contexts need + * to map r/w, so we allow writing. + */ + ret = qib_mmap_mem(vma, rcd, rcd->rcvhdrq_size, + rcd->rcvhdrq, 1, "rcvhdrq"); + else if (pgaddr == (u64) rcd->rcvhdrqtailaddr_phys) + /* in-memory copy of rcvhdrq tail register */ + ret = qib_mmap_mem(vma, rcd, PAGE_SIZE, + rcd->rcvhdrtail_kvaddr, 0, + "rcvhdrq tail"); + else + match = 0; + if (!match) + ret = -EINVAL; + + vma->vm_private_data = NULL; + + if (ret < 0) + qib_devinfo(dd->pcidev, + "mmap Failure %d: off %llx len %lx\n", + -ret, (unsigned long long)pgaddr, + vma->vm_end - vma->vm_start); +bail: + return ret; +} + +static unsigned int qib_poll_urgent(struct qib_ctxtdata *rcd, + struct file *fp, + struct poll_table_struct *pt) +{ + struct qib_devdata *dd = rcd->dd; + unsigned pollflag; + + poll_wait(fp, &rcd->wait, pt); + + spin_lock_irq(&dd->uctxt_lock); + if (rcd->urgent != rcd->urgent_poll) { + pollflag = POLLIN | POLLRDNORM; + rcd->urgent_poll = rcd->urgent; + } else { + pollflag = 0; + set_bit(QIB_CTXT_WAITING_URG, &rcd->flag); + } + spin_unlock_irq(&dd->uctxt_lock); + + return pollflag; +} + +static unsigned int qib_poll_next(struct qib_ctxtdata *rcd, + struct file *fp, + struct poll_table_struct *pt) +{ + struct qib_devdata *dd = rcd->dd; + unsigned pollflag; + + poll_wait(fp, &rcd->wait, pt); + + spin_lock_irq(&dd->uctxt_lock); + if (dd->f_hdrqempty(rcd)) { + set_bit(QIB_CTXT_WAITING_RCV, &rcd->flag); + dd->f_rcvctrl(rcd->ppd, QIB_RCVCTRL_INTRAVAIL_ENB, rcd->ctxt); + pollflag = 0; + } else + pollflag = POLLIN | POLLRDNORM; + spin_unlock_irq(&dd->uctxt_lock); + + return pollflag; +} + +static unsigned int qib_poll(struct file *fp, struct poll_table_struct *pt) +{ + struct qib_ctxtdata *rcd; + unsigned pollflag; + + rcd = ctxt_fp(fp); + if (!rcd) + pollflag = POLLERR; + else if (rcd->poll_type == QIB_POLL_TYPE_URGENT) + pollflag = qib_poll_urgent(rcd, fp, pt); + else if (rcd->poll_type == QIB_POLL_TYPE_ANYRCV) + pollflag = qib_poll_next(rcd, fp, pt); + else /* invalid */ + pollflag = POLLERR; + + return pollflag; +} + +static void assign_ctxt_affinity(struct file *fp, struct qib_devdata *dd) +{ + struct qib_filedata *fd = fp->private_data; + const unsigned int weight = cpumask_weight(¤t->cpus_allowed); + const struct cpumask *local_mask = cpumask_of_pcibus(dd->pcidev->bus); + int local_cpu; + + /* + * If process has NOT already set it's affinity, select and + * reserve a processor for it on the local NUMA node. + */ + if ((weight >= qib_cpulist_count) && + (cpumask_weight(local_mask) <= qib_cpulist_count)) { + for_each_cpu(local_cpu, local_mask) + if (!test_and_set_bit(local_cpu, qib_cpulist)) { + fd->rec_cpu_num = local_cpu; + return; + } + } + + /* + * If process has NOT already set it's affinity, select and + * reserve a processor for it, as a rendevous for all + * users of the driver. If they don't actually later + * set affinity to this cpu, or set it to some other cpu, + * it just means that sooner or later we don't recommend + * a cpu, and let the scheduler do it's best. + */ + if (weight >= qib_cpulist_count) { + int cpu; + cpu = find_first_zero_bit(qib_cpulist, + qib_cpulist_count); + if (cpu == qib_cpulist_count) + qib_dev_err(dd, + "no cpus avail for affinity PID %u\n", + current->pid); + else { + __set_bit(cpu, qib_cpulist); + fd->rec_cpu_num = cpu; + } + } +} + +/* + * Check that userland and driver are compatible for subcontexts. + */ +static int qib_compatible_subctxts(int user_swmajor, int user_swminor) +{ + /* this code is written long-hand for clarity */ + if (QIB_USER_SWMAJOR != user_swmajor) { + /* no promise of compatibility if major mismatch */ + return 0; + } + if (QIB_USER_SWMAJOR == 1) { + switch (QIB_USER_SWMINOR) { + case 0: + case 1: + case 2: + /* no subctxt implementation so cannot be compatible */ + return 0; + case 3: + /* 3 is only compatible with itself */ + return user_swminor == 3; + default: + /* >= 4 are compatible (or are expected to be) */ + return user_swminor <= QIB_USER_SWMINOR; + } + } + /* make no promises yet for future major versions */ + return 0; +} + +static int init_subctxts(struct qib_devdata *dd, + struct qib_ctxtdata *rcd, + const struct qib_user_info *uinfo) +{ + int ret = 0; + unsigned num_subctxts; + size_t size; + + /* + * If the user is requesting zero subctxts, + * skip the subctxt allocation. + */ + if (uinfo->spu_subctxt_cnt <= 0) + goto bail; + num_subctxts = uinfo->spu_subctxt_cnt; + + /* Check for subctxt compatibility */ + if (!qib_compatible_subctxts(uinfo->spu_userversion >> 16, + uinfo->spu_userversion & 0xffff)) { + qib_devinfo(dd->pcidev, + "Mismatched user version (%d.%d) and driver " + "version (%d.%d) while context sharing. Ensure " + "that driver and library are from the same " + "release.\n", + (int) (uinfo->spu_userversion >> 16), + (int) (uinfo->spu_userversion & 0xffff), + QIB_USER_SWMAJOR, QIB_USER_SWMINOR); + goto bail; + } + if (num_subctxts > QLOGIC_IB_MAX_SUBCTXT) { + ret = -EINVAL; + goto bail; + } + + rcd->subctxt_uregbase = vmalloc_user(PAGE_SIZE * num_subctxts); + if (!rcd->subctxt_uregbase) { + ret = -ENOMEM; + goto bail; + } + /* Note: rcd->rcvhdrq_size isn't initialized yet. */ + size = ALIGN(dd->rcvhdrcnt * dd->rcvhdrentsize * + sizeof(u32), PAGE_SIZE) * num_subctxts; + rcd->subctxt_rcvhdr_base = vmalloc_user(size); + if (!rcd->subctxt_rcvhdr_base) { + ret = -ENOMEM; + goto bail_ureg; + } + + rcd->subctxt_rcvegrbuf = vmalloc_user(rcd->rcvegrbuf_chunks * + rcd->rcvegrbuf_size * + num_subctxts); + if (!rcd->subctxt_rcvegrbuf) { + ret = -ENOMEM; + goto bail_rhdr; + } + + rcd->subctxt_cnt = uinfo->spu_subctxt_cnt; + rcd->subctxt_id = uinfo->spu_subctxt_id; + rcd->active_slaves = 1; + rcd->redirect_seq_cnt = 1; + set_bit(QIB_CTXT_MASTER_UNINIT, &rcd->flag); + goto bail; + +bail_rhdr: + vfree(rcd->subctxt_rcvhdr_base); +bail_ureg: + vfree(rcd->subctxt_uregbase); + rcd->subctxt_uregbase = NULL; +bail: + return ret; +} + +static int setup_ctxt(struct qib_pportdata *ppd, int ctxt, + struct file *fp, const struct qib_user_info *uinfo) +{ + struct qib_filedata *fd = fp->private_data; + struct qib_devdata *dd = ppd->dd; + struct qib_ctxtdata *rcd; + void *ptmp = NULL; + int ret; + int numa_id; + + assign_ctxt_affinity(fp, dd); + + numa_id = qib_numa_aware ? ((fd->rec_cpu_num != -1) ? + cpu_to_node(fd->rec_cpu_num) : + numa_node_id()) : dd->assigned_node_id; + + rcd = qib_create_ctxtdata(ppd, ctxt, numa_id); + + /* + * Allocate memory for use in qib_tid_update() at open to + * reduce cost of expected send setup per message segment + */ + if (rcd) + ptmp = kmalloc(dd->rcvtidcnt * sizeof(u16) + + dd->rcvtidcnt * sizeof(struct page **), + GFP_KERNEL); + + if (!rcd || !ptmp) { + qib_dev_err(dd, + "Unable to allocate ctxtdata memory, failing open\n"); + ret = -ENOMEM; + goto bailerr; + } + rcd->userversion = uinfo->spu_userversion; + ret = init_subctxts(dd, rcd, uinfo); + if (ret) + goto bailerr; + rcd->tid_pg_list = ptmp; + rcd->pid = current->pid; + init_waitqueue_head(&dd->rcd[ctxt]->wait); + strlcpy(rcd->comm, current->comm, sizeof(rcd->comm)); + ctxt_fp(fp) = rcd; + qib_stats.sps_ctxts++; + dd->freectxts--; + ret = 0; + goto bail; + +bailerr: + if (fd->rec_cpu_num != -1) + __clear_bit(fd->rec_cpu_num, qib_cpulist); + + dd->rcd[ctxt] = NULL; + kfree(rcd); + kfree(ptmp); +bail: + return ret; +} + +static inline int usable(struct qib_pportdata *ppd) +{ + struct qib_devdata *dd = ppd->dd; + + return dd && (dd->flags & QIB_PRESENT) && dd->kregbase && ppd->lid && + (ppd->lflags & QIBL_LINKACTIVE); +} + +/* + * Select a context on the given device, either using a requested port + * or the port based on the context number. + */ +static int choose_port_ctxt(struct file *fp, struct qib_devdata *dd, u32 port, + const struct qib_user_info *uinfo) +{ + struct qib_pportdata *ppd = NULL; + int ret, ctxt; + + if (port) { + if (!usable(dd->pport + port - 1)) { + ret = -ENETDOWN; + goto done; + } else + ppd = dd->pport + port - 1; + } + for (ctxt = dd->first_user_ctxt; ctxt < dd->cfgctxts && dd->rcd[ctxt]; + ctxt++) + ; + if (ctxt == dd->cfgctxts) { + ret = -EBUSY; + goto done; + } + if (!ppd) { + u32 pidx = ctxt % dd->num_pports; + if (usable(dd->pport + pidx)) + ppd = dd->pport + pidx; + else { + for (pidx = 0; pidx < dd->num_pports && !ppd; + pidx++) + if (usable(dd->pport + pidx)) + ppd = dd->pport + pidx; + } + } + ret = ppd ? setup_ctxt(ppd, ctxt, fp, uinfo) : -ENETDOWN; +done: + return ret; +} + +static int find_free_ctxt(int unit, struct file *fp, + const struct qib_user_info *uinfo) +{ + struct qib_devdata *dd = qib_lookup(unit); + int ret; + + if (!dd || (uinfo->spu_port && uinfo->spu_port > dd->num_pports)) + ret = -ENODEV; + else + ret = choose_port_ctxt(fp, dd, uinfo->spu_port, uinfo); + + return ret; +} + +static int get_a_ctxt(struct file *fp, const struct qib_user_info *uinfo, + unsigned alg) +{ + struct qib_devdata *udd = NULL; + int ret = 0, devmax, npresent, nup, ndev, dusable = 0, i; + u32 port = uinfo->spu_port, ctxt; + + devmax = qib_count_units(&npresent, &nup); + if (!npresent) { + ret = -ENXIO; + goto done; + } + if (nup == 0) { + ret = -ENETDOWN; + goto done; + } + + if (alg == QIB_PORT_ALG_ACROSS) { + unsigned inuse = ~0U; + /* find device (with ACTIVE ports) with fewest ctxts in use */ + for (ndev = 0; ndev < devmax; ndev++) { + struct qib_devdata *dd = qib_lookup(ndev); + unsigned cused = 0, cfree = 0, pusable = 0; + if (!dd) + continue; + if (port && port <= dd->num_pports && + usable(dd->pport + port - 1)) + pusable = 1; + else + for (i = 0; i < dd->num_pports; i++) + if (usable(dd->pport + i)) + pusable++; + if (!pusable) + continue; + for (ctxt = dd->first_user_ctxt; ctxt < dd->cfgctxts; + ctxt++) + if (dd->rcd[ctxt]) + cused++; + else + cfree++; + if (cfree && cused < inuse) { + udd = dd; + inuse = cused; + } + } + if (udd) { + ret = choose_port_ctxt(fp, udd, port, uinfo); + goto done; + } + } else { + for (ndev = 0; ndev < devmax; ndev++) { + struct qib_devdata *dd = qib_lookup(ndev); + if (dd) { + ret = choose_port_ctxt(fp, dd, port, uinfo); + if (!ret) + goto done; + if (ret == -EBUSY) + dusable++; + } + } + } + ret = dusable ? -EBUSY : -ENETDOWN; + +done: + return ret; +} + +static int find_shared_ctxt(struct file *fp, + const struct qib_user_info *uinfo) +{ + int devmax, ndev, i; + int ret = 0; + + devmax = qib_count_units(NULL, NULL); + + for (ndev = 0; ndev < devmax; ndev++) { + struct qib_devdata *dd = qib_lookup(ndev); + + /* device portion of usable() */ + if (!(dd && (dd->flags & QIB_PRESENT) && dd->kregbase)) + continue; + for (i = dd->first_user_ctxt; i < dd->cfgctxts; i++) { + struct qib_ctxtdata *rcd = dd->rcd[i]; + + /* Skip ctxts which are not yet open */ + if (!rcd || !rcd->cnt) + continue; + /* Skip ctxt if it doesn't match the requested one */ + if (rcd->subctxt_id != uinfo->spu_subctxt_id) + continue; + /* Verify the sharing process matches the master */ + if (rcd->subctxt_cnt != uinfo->spu_subctxt_cnt || + rcd->userversion != uinfo->spu_userversion || + rcd->cnt >= rcd->subctxt_cnt) { + ret = -EINVAL; + goto done; + } + ctxt_fp(fp) = rcd; + subctxt_fp(fp) = rcd->cnt++; + rcd->subpid[subctxt_fp(fp)] = current->pid; + tidcursor_fp(fp) = 0; + rcd->active_slaves |= 1 << subctxt_fp(fp); + ret = 1; + goto done; + } + } + +done: + return ret; +} + +static int qib_open(struct inode *in, struct file *fp) +{ + /* The real work is performed later in qib_assign_ctxt() */ + fp->private_data = kzalloc(sizeof(struct qib_filedata), GFP_KERNEL); + if (fp->private_data) /* no cpu affinity by default */ + ((struct qib_filedata *)fp->private_data)->rec_cpu_num = -1; + return fp->private_data ? 0 : -ENOMEM; +} + +static int find_hca(unsigned int cpu, int *unit) +{ + int ret = 0, devmax, npresent, nup, ndev; + + *unit = -1; + + devmax = qib_count_units(&npresent, &nup); + if (!npresent) { + ret = -ENXIO; + goto done; + } + if (!nup) { + ret = -ENETDOWN; + goto done; + } + for (ndev = 0; ndev < devmax; ndev++) { + struct qib_devdata *dd = qib_lookup(ndev); + if (dd) { + if (pcibus_to_node(dd->pcidev->bus) < 0) { + ret = -EINVAL; + goto done; + } + if (cpu_to_node(cpu) == + pcibus_to_node(dd->pcidev->bus)) { + *unit = ndev; + goto done; + } + } + } +done: + return ret; +} + +static int do_qib_user_sdma_queue_create(struct file *fp) +{ + struct qib_filedata *fd = fp->private_data; + struct qib_ctxtdata *rcd = fd->rcd; + struct qib_devdata *dd = rcd->dd; + + if (dd->flags & QIB_HAS_SEND_DMA) { + + fd->pq = qib_user_sdma_queue_create(&dd->pcidev->dev, + dd->unit, + rcd->ctxt, + fd->subctxt); + if (!fd->pq) + return -ENOMEM; + } + + return 0; +} + +/* + * Get ctxt early, so can set affinity prior to memory allocation. + */ +static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo) +{ + int ret; + int i_minor; + unsigned swmajor, swminor, alg = QIB_PORT_ALG_ACROSS; + + /* Check to be sure we haven't already initialized this file */ + if (ctxt_fp(fp)) { + ret = -EINVAL; + goto done; + } + + /* for now, if major version is different, bail */ + swmajor = uinfo->spu_userversion >> 16; + if (swmajor != QIB_USER_SWMAJOR) { + ret = -ENODEV; + goto done; + } + + swminor = uinfo->spu_userversion & 0xffff; + + if (swminor >= 11 && uinfo->spu_port_alg < QIB_PORT_ALG_COUNT) + alg = uinfo->spu_port_alg; + + mutex_lock(&qib_mutex); + + if (qib_compatible_subctxts(swmajor, swminor) && + uinfo->spu_subctxt_cnt) { + ret = find_shared_ctxt(fp, uinfo); + if (ret > 0) { + ret = do_qib_user_sdma_queue_create(fp); + if (!ret) + assign_ctxt_affinity(fp, (ctxt_fp(fp))->dd); + goto done_ok; + } + } + + i_minor = iminor(file_inode(fp)) - QIB_USER_MINOR_BASE; + if (i_minor) + ret = find_free_ctxt(i_minor - 1, fp, uinfo); + else { + int unit; + const unsigned int cpu = cpumask_first(¤t->cpus_allowed); + const unsigned int weight = + cpumask_weight(¤t->cpus_allowed); + + if (weight == 1 && !test_bit(cpu, qib_cpulist)) + if (!find_hca(cpu, &unit) && unit >= 0) + if (!find_free_ctxt(unit, fp, uinfo)) { + ret = 0; + goto done_chk_sdma; + } + ret = get_a_ctxt(fp, uinfo, alg); + } + +done_chk_sdma: + if (!ret) + ret = do_qib_user_sdma_queue_create(fp); +done_ok: + mutex_unlock(&qib_mutex); + +done: + return ret; +} + + +static int qib_do_user_init(struct file *fp, + const struct qib_user_info *uinfo) +{ + int ret; + struct qib_ctxtdata *rcd = ctxt_fp(fp); + struct qib_devdata *dd; + unsigned uctxt; + + /* Subctxts don't need to initialize anything since master did it. */ + if (subctxt_fp(fp)) { + ret = wait_event_interruptible(rcd->wait, + !test_bit(QIB_CTXT_MASTER_UNINIT, &rcd->flag)); + goto bail; + } + + dd = rcd->dd; + + /* some ctxts may get extra buffers, calculate that here */ + uctxt = rcd->ctxt - dd->first_user_ctxt; + if (uctxt < dd->ctxts_extrabuf) { + rcd->piocnt = dd->pbufsctxt + 1; + rcd->pio_base = rcd->piocnt * uctxt; + } else { + rcd->piocnt = dd->pbufsctxt; + rcd->pio_base = rcd->piocnt * uctxt + + dd->ctxts_extrabuf; + } + + /* + * All user buffers are 2KB buffers. If we ever support + * giving 4KB buffers to user processes, this will need some + * work. Can't use piobufbase directly, because it has + * both 2K and 4K buffer base values. So check and handle. + */ + if ((rcd->pio_base + rcd->piocnt) > dd->piobcnt2k) { + if (rcd->pio_base >= dd->piobcnt2k) { + qib_dev_err(dd, + "%u:ctxt%u: no 2KB buffers available\n", + dd->unit, rcd->ctxt); + ret = -ENOBUFS; + goto bail; + } + rcd->piocnt = dd->piobcnt2k - rcd->pio_base; + qib_dev_err(dd, "Ctxt%u: would use 4KB bufs, using %u\n", + rcd->ctxt, rcd->piocnt); + } + + rcd->piobufs = dd->pio2k_bufbase + rcd->pio_base * dd->palign; + qib_chg_pioavailkernel(dd, rcd->pio_base, rcd->piocnt, + TXCHK_CHG_TYPE_USER, rcd); + /* + * try to ensure that processes start up with consistent avail update + * for their own range, at least. If system very quiet, it might + * have the in-memory copy out of date at startup for this range of + * buffers, when a context gets re-used. Do after the chg_pioavail + * and before the rest of setup, so it's "almost certain" the dma + * will have occurred (can't 100% guarantee, but should be many + * decimals of 9s, with this ordering), given how much else happens + * after this. + */ + dd->f_sendctrl(dd->pport, QIB_SENDCTRL_AVAIL_BLIP); + + /* + * Now allocate the rcvhdr Q and eager TIDs; skip the TID + * array for time being. If rcd->ctxt > chip-supported, + * we need to do extra stuff here to handle by handling overflow + * through ctxt 0, someday + */ + ret = qib_create_rcvhdrq(dd, rcd); + if (!ret) + ret = qib_setup_eagerbufs(rcd); + if (ret) + goto bail_pio; + + rcd->tidcursor = 0; /* start at beginning after open */ + + /* initialize poll variables... */ + rcd->urgent = 0; + rcd->urgent_poll = 0; + + /* + * Now enable the ctxt for receive. + * For chips that are set to DMA the tail register to memory + * when they change (and when the update bit transitions from + * 0 to 1. So for those chips, we turn it off and then back on. + * This will (very briefly) affect any other open ctxts, but the + * duration is very short, and therefore isn't an issue. We + * explicitly set the in-memory tail copy to 0 beforehand, so we + * don't have to wait to be sure the DMA update has happened + * (chip resets head/tail to 0 on transition to enable). + */ + if (rcd->rcvhdrtail_kvaddr) + qib_clear_rcvhdrtail(rcd); + + dd->f_rcvctrl(rcd->ppd, QIB_RCVCTRL_CTXT_ENB | QIB_RCVCTRL_TIDFLOW_ENB, + rcd->ctxt); + + /* Notify any waiting slaves */ + if (rcd->subctxt_cnt) { + clear_bit(QIB_CTXT_MASTER_UNINIT, &rcd->flag); + wake_up(&rcd->wait); + } + return 0; + +bail_pio: + qib_chg_pioavailkernel(dd, rcd->pio_base, rcd->piocnt, + TXCHK_CHG_TYPE_KERN, rcd); +bail: + return ret; +} + +/** + * unlock_exptid - unlock any expected TID entries context still had in use + * @rcd: ctxt + * + * We don't actually update the chip here, because we do a bulk update + * below, using f_clear_tids. + */ +static void unlock_expected_tids(struct qib_ctxtdata *rcd) +{ + struct qib_devdata *dd = rcd->dd; + int ctxt_tidbase = rcd->ctxt * dd->rcvtidcnt; + int i, cnt = 0, maxtid = ctxt_tidbase + dd->rcvtidcnt; + + for (i = ctxt_tidbase; i < maxtid; i++) { + struct page *p = dd->pageshadow[i]; + dma_addr_t phys; + + if (!p) + continue; + + phys = dd->physshadow[i]; + dd->physshadow[i] = dd->tidinvalid; + dd->pageshadow[i] = NULL; + pci_unmap_page(dd->pcidev, phys, PAGE_SIZE, + PCI_DMA_FROMDEVICE); + qib_release_user_pages(&p, 1); + cnt++; + } +} + +static int qib_close(struct inode *in, struct file *fp) +{ + int ret = 0; + struct qib_filedata *fd; + struct qib_ctxtdata *rcd; + struct qib_devdata *dd; + unsigned long flags; + unsigned ctxt; + pid_t pid; + + mutex_lock(&qib_mutex); + + fd = fp->private_data; + fp->private_data = NULL; + rcd = fd->rcd; + if (!rcd) { + mutex_unlock(&qib_mutex); + goto bail; + } + + dd = rcd->dd; + + /* ensure all pio buffer writes in progress are flushed */ + qib_flush_wc(); + + /* drain user sdma queue */ + if (fd->pq) { + qib_user_sdma_queue_drain(rcd->ppd, fd->pq); + qib_user_sdma_queue_destroy(fd->pq); + } + + if (fd->rec_cpu_num != -1) + __clear_bit(fd->rec_cpu_num, qib_cpulist); + + if (--rcd->cnt) { + /* + * XXX If the master closes the context before the slave(s), + * revoke the mmap for the eager receive queue so + * the slave(s) don't wait for receive data forever. + */ + rcd->active_slaves &= ~(1 << fd->subctxt); + rcd->subpid[fd->subctxt] = 0; + mutex_unlock(&qib_mutex); + goto bail; + } + + /* early; no interrupt users after this */ + spin_lock_irqsave(&dd->uctxt_lock, flags); + ctxt = rcd->ctxt; + dd->rcd[ctxt] = NULL; + pid = rcd->pid; + rcd->pid = 0; + spin_unlock_irqrestore(&dd->uctxt_lock, flags); + + if (rcd->rcvwait_to || rcd->piowait_to || + rcd->rcvnowait || rcd->pionowait) { + rcd->rcvwait_to = 0; + rcd->piowait_to = 0; + rcd->rcvnowait = 0; + rcd->pionowait = 0; + } + if (rcd->flag) + rcd->flag = 0; + + if (dd->kregbase) { + /* atomically clear receive enable ctxt and intr avail. */ + dd->f_rcvctrl(rcd->ppd, QIB_RCVCTRL_CTXT_DIS | + QIB_RCVCTRL_INTRAVAIL_DIS, ctxt); + + /* clean up the pkeys for this ctxt user */ + qib_clean_part_key(rcd, dd); + qib_disarm_piobufs(dd, rcd->pio_base, rcd->piocnt); + qib_chg_pioavailkernel(dd, rcd->pio_base, + rcd->piocnt, TXCHK_CHG_TYPE_KERN, NULL); + + dd->f_clear_tids(dd, rcd); + + if (dd->pageshadow) + unlock_expected_tids(rcd); + qib_stats.sps_ctxts--; + dd->freectxts++; + } + + mutex_unlock(&qib_mutex); + qib_free_ctxtdata(dd, rcd); /* after releasing the mutex */ + +bail: + kfree(fd); + return ret; +} + +static int qib_ctxt_info(struct file *fp, struct qib_ctxt_info __user *uinfo) +{ + struct qib_ctxt_info info; + int ret; + size_t sz; + struct qib_ctxtdata *rcd = ctxt_fp(fp); + struct qib_filedata *fd; + + fd = fp->private_data; + + info.num_active = qib_count_active_units(); + info.unit = rcd->dd->unit; + info.port = rcd->ppd->port; + info.ctxt = rcd->ctxt; + info.subctxt = subctxt_fp(fp); + /* Number of user ctxts available for this device. */ + info.num_ctxts = rcd->dd->cfgctxts - rcd->dd->first_user_ctxt; + info.num_subctxts = rcd->subctxt_cnt; + info.rec_cpu = fd->rec_cpu_num; + sz = sizeof(info); + + if (copy_to_user(uinfo, &info, sz)) { + ret = -EFAULT; + goto bail; + } + ret = 0; + +bail: + return ret; +} + +static int qib_sdma_get_inflight(struct qib_user_sdma_queue *pq, + u32 __user *inflightp) +{ + const u32 val = qib_user_sdma_inflight_counter(pq); + + if (put_user(val, inflightp)) + return -EFAULT; + + return 0; +} + +static int qib_sdma_get_complete(struct qib_pportdata *ppd, + struct qib_user_sdma_queue *pq, + u32 __user *completep) +{ + u32 val; + int err; + + if (!pq) + return -EINVAL; + + err = qib_user_sdma_make_progress(ppd, pq); + if (err < 0) + return err; + + val = qib_user_sdma_complete_counter(pq); + if (put_user(val, completep)) + return -EFAULT; + + return 0; +} + +static int disarm_req_delay(struct qib_ctxtdata *rcd) +{ + int ret = 0; + + if (!usable(rcd->ppd)) { + int i; + /* + * if link is down, or otherwise not usable, delay + * the caller up to 30 seconds, so we don't thrash + * in trying to get the chip back to ACTIVE, and + * set flag so they make the call again. + */ + if (rcd->user_event_mask) { + /* + * subctxt_cnt is 0 if not shared, so do base + * separately, first, then remaining subctxt, if any + */ + set_bit(_QIB_EVENT_DISARM_BUFS_BIT, + &rcd->user_event_mask[0]); + for (i = 1; i < rcd->subctxt_cnt; i++) + set_bit(_QIB_EVENT_DISARM_BUFS_BIT, + &rcd->user_event_mask[i]); + } + for (i = 0; !usable(rcd->ppd) && i < 300; i++) + msleep(100); + ret = -ENETDOWN; + } + return ret; +} + +/* + * Find all user contexts in use, and set the specified bit in their + * event mask. + * See also find_ctxt() for a similar use, that is specific to send buffers. + */ +int qib_set_uevent_bits(struct qib_pportdata *ppd, const int evtbit) +{ + struct qib_ctxtdata *rcd; + unsigned ctxt; + int ret = 0; + unsigned long flags; + + spin_lock_irqsave(&ppd->dd->uctxt_lock, flags); + for (ctxt = ppd->dd->first_user_ctxt; ctxt < ppd->dd->cfgctxts; + ctxt++) { + rcd = ppd->dd->rcd[ctxt]; + if (!rcd) + continue; + if (rcd->user_event_mask) { + int i; + /* + * subctxt_cnt is 0 if not shared, so do base + * separately, first, then remaining subctxt, if any + */ + set_bit(evtbit, &rcd->user_event_mask[0]); + for (i = 1; i < rcd->subctxt_cnt; i++) + set_bit(evtbit, &rcd->user_event_mask[i]); + } + ret = 1; + break; + } + spin_unlock_irqrestore(&ppd->dd->uctxt_lock, flags); + + return ret; +} + +/* + * clear the event notifier events for this context. + * For the DISARM_BUFS case, we also take action (this obsoletes + * the older QIB_CMD_DISARM_BUFS, but we keep it for backwards + * compatibility. + * Other bits don't currently require actions, just atomically clear. + * User process then performs actions appropriate to bit having been + * set, if desired, and checks again in future. + */ +static int qib_user_event_ack(struct qib_ctxtdata *rcd, int subctxt, + unsigned long events) +{ + int ret = 0, i; + + for (i = 0; i <= _QIB_MAX_EVENT_BIT; i++) { + if (!test_bit(i, &events)) + continue; + if (i == _QIB_EVENT_DISARM_BUFS_BIT) { + (void)qib_disarm_piobufs_ifneeded(rcd); + ret = disarm_req_delay(rcd); + } else + clear_bit(i, &rcd->user_event_mask[subctxt]); + } + return ret; +} + +static ssize_t qib_write(struct file *fp, const char __user *data, + size_t count, loff_t *off) +{ + const struct qib_cmd __user *ucmd; + struct qib_ctxtdata *rcd; + const void __user *src; + size_t consumed, copy = 0; + struct qib_cmd cmd; + ssize_t ret = 0; + void *dest; + + if (count < sizeof(cmd.type)) { + ret = -EINVAL; + goto bail; + } + + ucmd = (const struct qib_cmd __user *) data; + + if (copy_from_user(&cmd.type, &ucmd->type, sizeof(cmd.type))) { + ret = -EFAULT; + goto bail; + } + + consumed = sizeof(cmd.type); + + switch (cmd.type) { + case QIB_CMD_ASSIGN_CTXT: + case QIB_CMD_USER_INIT: + copy = sizeof(cmd.cmd.user_info); + dest = &cmd.cmd.user_info; + src = &ucmd->cmd.user_info; + break; + + case QIB_CMD_RECV_CTRL: + copy = sizeof(cmd.cmd.recv_ctrl); + dest = &cmd.cmd.recv_ctrl; + src = &ucmd->cmd.recv_ctrl; + break; + + case QIB_CMD_CTXT_INFO: + copy = sizeof(cmd.cmd.ctxt_info); + dest = &cmd.cmd.ctxt_info; + src = &ucmd->cmd.ctxt_info; + break; + + case QIB_CMD_TID_UPDATE: + case QIB_CMD_TID_FREE: + copy = sizeof(cmd.cmd.tid_info); + dest = &cmd.cmd.tid_info; + src = &ucmd->cmd.tid_info; + break; + + case QIB_CMD_SET_PART_KEY: + copy = sizeof(cmd.cmd.part_key); + dest = &cmd.cmd.part_key; + src = &ucmd->cmd.part_key; + break; + + case QIB_CMD_DISARM_BUFS: + case QIB_CMD_PIOAVAILUPD: /* force an update of PIOAvail reg */ + copy = 0; + src = NULL; + dest = NULL; + break; + + case QIB_CMD_POLL_TYPE: + copy = sizeof(cmd.cmd.poll_type); + dest = &cmd.cmd.poll_type; + src = &ucmd->cmd.poll_type; + break; + + case QIB_CMD_ARMLAUNCH_CTRL: + copy = sizeof(cmd.cmd.armlaunch_ctrl); + dest = &cmd.cmd.armlaunch_ctrl; + src = &ucmd->cmd.armlaunch_ctrl; + break; + + case QIB_CMD_SDMA_INFLIGHT: + copy = sizeof(cmd.cmd.sdma_inflight); + dest = &cmd.cmd.sdma_inflight; + src = &ucmd->cmd.sdma_inflight; + break; + + case QIB_CMD_SDMA_COMPLETE: + copy = sizeof(cmd.cmd.sdma_complete); + dest = &cmd.cmd.sdma_complete; + src = &ucmd->cmd.sdma_complete; + break; + + case QIB_CMD_ACK_EVENT: + copy = sizeof(cmd.cmd.event_mask); + dest = &cmd.cmd.event_mask; + src = &ucmd->cmd.event_mask; + break; + + default: + ret = -EINVAL; + goto bail; + } + + if (copy) { + if ((count - consumed) < copy) { + ret = -EINVAL; + goto bail; + } + if (copy_from_user(dest, src, copy)) { + ret = -EFAULT; + goto bail; + } + consumed += copy; + } + + rcd = ctxt_fp(fp); + if (!rcd && cmd.type != QIB_CMD_ASSIGN_CTXT) { + ret = -EINVAL; + goto bail; + } + + switch (cmd.type) { + case QIB_CMD_ASSIGN_CTXT: + ret = qib_assign_ctxt(fp, &cmd.cmd.user_info); + if (ret) + goto bail; + break; + + case QIB_CMD_USER_INIT: + ret = qib_do_user_init(fp, &cmd.cmd.user_info); + if (ret) + goto bail; + ret = qib_get_base_info(fp, (void __user *) (unsigned long) + cmd.cmd.user_info.spu_base_info, + cmd.cmd.user_info.spu_base_info_size); + break; + + case QIB_CMD_RECV_CTRL: + ret = qib_manage_rcvq(rcd, subctxt_fp(fp), cmd.cmd.recv_ctrl); + break; + + case QIB_CMD_CTXT_INFO: + ret = qib_ctxt_info(fp, (struct qib_ctxt_info __user *) + (unsigned long) cmd.cmd.ctxt_info); + break; + + case QIB_CMD_TID_UPDATE: + ret = qib_tid_update(rcd, fp, &cmd.cmd.tid_info); + break; + + case QIB_CMD_TID_FREE: + ret = qib_tid_free(rcd, subctxt_fp(fp), &cmd.cmd.tid_info); + break; + + case QIB_CMD_SET_PART_KEY: + ret = qib_set_part_key(rcd, cmd.cmd.part_key); + break; + + case QIB_CMD_DISARM_BUFS: + (void)qib_disarm_piobufs_ifneeded(rcd); + ret = disarm_req_delay(rcd); + break; + + case QIB_CMD_PIOAVAILUPD: + qib_force_pio_avail_update(rcd->dd); + break; + + case QIB_CMD_POLL_TYPE: + rcd->poll_type = cmd.cmd.poll_type; + break; + + case QIB_CMD_ARMLAUNCH_CTRL: + rcd->dd->f_set_armlaunch(rcd->dd, cmd.cmd.armlaunch_ctrl); + break; + + case QIB_CMD_SDMA_INFLIGHT: + ret = qib_sdma_get_inflight(user_sdma_queue_fp(fp), + (u32 __user *) (unsigned long) + cmd.cmd.sdma_inflight); + break; + + case QIB_CMD_SDMA_COMPLETE: + ret = qib_sdma_get_complete(rcd->ppd, + user_sdma_queue_fp(fp), + (u32 __user *) (unsigned long) + cmd.cmd.sdma_complete); + break; + + case QIB_CMD_ACK_EVENT: + ret = qib_user_event_ack(rcd, subctxt_fp(fp), + cmd.cmd.event_mask); + break; + } + + if (ret >= 0) + ret = consumed; + +bail: + return ret; +} + +static ssize_t qib_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long dim, loff_t off) +{ + struct qib_filedata *fp = iocb->ki_filp->private_data; + struct qib_ctxtdata *rcd = ctxt_fp(iocb->ki_filp); + struct qib_user_sdma_queue *pq = fp->pq; + + if (!dim || !pq) + return -EINVAL; + + return qib_user_sdma_writev(rcd, pq, iov, dim); +} + +static struct class *qib_class; +static dev_t qib_dev; + +int qib_cdev_init(int minor, const char *name, + const struct file_operations *fops, + struct cdev **cdevp, struct device **devp) +{ + const dev_t dev = MKDEV(MAJOR(qib_dev), minor); + struct cdev *cdev; + struct device *device = NULL; + int ret; + + cdev = cdev_alloc(); + if (!cdev) { + pr_err("Could not allocate cdev for minor %d, %s\n", + minor, name); + ret = -ENOMEM; + goto done; + } + + cdev->owner = THIS_MODULE; + cdev->ops = fops; + kobject_set_name(&cdev->kobj, name); + + ret = cdev_add(cdev, dev, 1); + if (ret < 0) { + pr_err("Could not add cdev for minor %d, %s (err %d)\n", + minor, name, -ret); + goto err_cdev; + } + + device = device_create(qib_class, NULL, dev, NULL, "%s", name); + if (!IS_ERR(device)) + goto done; + ret = PTR_ERR(device); + device = NULL; + pr_err("Could not create device for minor %d, %s (err %d)\n", + minor, name, -ret); +err_cdev: + cdev_del(cdev); + cdev = NULL; +done: + *cdevp = cdev; + *devp = device; + return ret; +} + +void qib_cdev_cleanup(struct cdev **cdevp, struct device **devp) +{ + struct device *device = *devp; + + if (device) { + device_unregister(device); + *devp = NULL; + } + + if (*cdevp) { + cdev_del(*cdevp); + *cdevp = NULL; + } +} + +static struct cdev *wildcard_cdev; +static struct device *wildcard_device; + +int __init qib_dev_init(void) +{ + int ret; + + ret = alloc_chrdev_region(&qib_dev, 0, QIB_NMINORS, QIB_DRV_NAME); + if (ret < 0) { + pr_err("Could not allocate chrdev region (err %d)\n", -ret); + goto done; + } + + qib_class = class_create(THIS_MODULE, "ipath"); + if (IS_ERR(qib_class)) { + ret = PTR_ERR(qib_class); + pr_err("Could not create device class (err %d)\n", -ret); + unregister_chrdev_region(qib_dev, QIB_NMINORS); + } + +done: + return ret; +} + +void qib_dev_cleanup(void) +{ + if (qib_class) { + class_destroy(qib_class); + qib_class = NULL; + } + + unregister_chrdev_region(qib_dev, QIB_NMINORS); +} + +static atomic_t user_count = ATOMIC_INIT(0); + +static void qib_user_remove(struct qib_devdata *dd) +{ + if (atomic_dec_return(&user_count) == 0) + qib_cdev_cleanup(&wildcard_cdev, &wildcard_device); + + qib_cdev_cleanup(&dd->user_cdev, &dd->user_device); +} + +static int qib_user_add(struct qib_devdata *dd) +{ + char name[10]; + int ret; + + if (atomic_inc_return(&user_count) == 1) { + ret = qib_cdev_init(0, "ipath", &qib_file_ops, + &wildcard_cdev, &wildcard_device); + if (ret) + goto done; + } + + snprintf(name, sizeof(name), "ipath%d", dd->unit); + ret = qib_cdev_init(dd->unit + 1, name, &qib_file_ops, + &dd->user_cdev, &dd->user_device); + if (ret) + qib_user_remove(dd); +done: + return ret; +} + +/* + * Create per-unit files in /dev + */ +int qib_device_create(struct qib_devdata *dd) +{ + int r, ret; + + r = qib_user_add(dd); + ret = qib_diag_add(dd); + if (r && !ret) + ret = r; + return ret; +} + +/* + * Remove per-unit files in /dev + * void, core kernel returns no errors for this stuff + */ +void qib_device_remove(struct qib_devdata *dd) +{ + qib_user_remove(dd); + qib_diag_remove(dd); +} diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c new file mode 100644 index 00000000000..cab610ccd50 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -0,0 +1,621 @@ +/* + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. + * Copyright (c) 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/mount.h> +#include <linux/pagemap.h> +#include <linux/init.h> +#include <linux/namei.h> + +#include "qib.h" + +#define QIBFS_MAGIC 0x726a77 + +static struct super_block *qib_super; + +#define private2dd(file) (file_inode(file)->i_private) + +static int qibfs_mknod(struct inode *dir, struct dentry *dentry, + umode_t mode, const struct file_operations *fops, + void *data) +{ + int error; + struct inode *inode = new_inode(dir->i_sb); + + if (!inode) { + error = -EPERM; + goto bail; + } + + inode->i_ino = get_next_ino(); + inode->i_mode = mode; + inode->i_uid = GLOBAL_ROOT_UID; + inode->i_gid = GLOBAL_ROOT_GID; + inode->i_blocks = 0; + inode->i_atime = CURRENT_TIME; + inode->i_mtime = inode->i_atime; + inode->i_ctime = inode->i_atime; + inode->i_private = data; + if (S_ISDIR(mode)) { + inode->i_op = &simple_dir_inode_operations; + inc_nlink(inode); + inc_nlink(dir); + } + + inode->i_fop = fops; + + d_instantiate(dentry, inode); + error = 0; + +bail: + return error; +} + +static int create_file(const char *name, umode_t mode, + struct dentry *parent, struct dentry **dentry, + const struct file_operations *fops, void *data) +{ + int error; + + *dentry = NULL; + mutex_lock(&parent->d_inode->i_mutex); + *dentry = lookup_one_len(name, parent, strlen(name)); + if (!IS_ERR(*dentry)) + error = qibfs_mknod(parent->d_inode, *dentry, + mode, fops, data); + else + error = PTR_ERR(*dentry); + mutex_unlock(&parent->d_inode->i_mutex); + + return error; +} + +static ssize_t driver_stats_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + qib_stats.sps_ints = qib_sps_ints(); + return simple_read_from_buffer(buf, count, ppos, &qib_stats, + sizeof qib_stats); +} + +/* + * driver stats field names, one line per stat, single string. Used by + * programs like ipathstats to print the stats in a way which works for + * different versions of drivers, without changing program source. + * if qlogic_ib_stats changes, this needs to change. Names need to be + * 12 chars or less (w/o newline), for proper display by ipathstats utility. + */ +static const char qib_statnames[] = + "KernIntr\n" + "ErrorIntr\n" + "Tx_Errs\n" + "Rcv_Errs\n" + "H/W_Errs\n" + "NoPIOBufs\n" + "CtxtsOpen\n" + "RcvLen_Errs\n" + "EgrBufFull\n" + "EgrHdrFull\n" + ; + +static ssize_t driver_names_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + return simple_read_from_buffer(buf, count, ppos, qib_statnames, + sizeof qib_statnames - 1); /* no null */ +} + +static const struct file_operations driver_ops[] = { + { .read = driver_stats_read, .llseek = generic_file_llseek, }, + { .read = driver_names_read, .llseek = generic_file_llseek, }, +}; + +/* read the per-device counters */ +static ssize_t dev_counters_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + u64 *counters; + size_t avail; + struct qib_devdata *dd = private2dd(file); + + avail = dd->f_read_cntrs(dd, *ppos, NULL, &counters); + return simple_read_from_buffer(buf, count, ppos, counters, avail); +} + +/* read the per-device counters */ +static ssize_t dev_names_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + char *names; + size_t avail; + struct qib_devdata *dd = private2dd(file); + + avail = dd->f_read_cntrs(dd, *ppos, &names, NULL); + return simple_read_from_buffer(buf, count, ppos, names, avail); +} + +static const struct file_operations cntr_ops[] = { + { .read = dev_counters_read, .llseek = generic_file_llseek, }, + { .read = dev_names_read, .llseek = generic_file_llseek, }, +}; + +/* + * Could use file_inode(file)->i_ino to figure out which file, + * instead of separate routine for each, but for now, this works... + */ + +/* read the per-port names (same for each port) */ +static ssize_t portnames_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + char *names; + size_t avail; + struct qib_devdata *dd = private2dd(file); + + avail = dd->f_read_portcntrs(dd, *ppos, 0, &names, NULL); + return simple_read_from_buffer(buf, count, ppos, names, avail); +} + +/* read the per-port counters for port 1 (pidx 0) */ +static ssize_t portcntrs_1_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + u64 *counters; + size_t avail; + struct qib_devdata *dd = private2dd(file); + + avail = dd->f_read_portcntrs(dd, *ppos, 0, NULL, &counters); + return simple_read_from_buffer(buf, count, ppos, counters, avail); +} + +/* read the per-port counters for port 2 (pidx 1) */ +static ssize_t portcntrs_2_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + u64 *counters; + size_t avail; + struct qib_devdata *dd = private2dd(file); + + avail = dd->f_read_portcntrs(dd, *ppos, 1, NULL, &counters); + return simple_read_from_buffer(buf, count, ppos, counters, avail); +} + +static const struct file_operations portcntr_ops[] = { + { .read = portnames_read, .llseek = generic_file_llseek, }, + { .read = portcntrs_1_read, .llseek = generic_file_llseek, }, + { .read = portcntrs_2_read, .llseek = generic_file_llseek, }, +}; + +/* + * read the per-port QSFP data for port 1 (pidx 0) + */ +static ssize_t qsfp_1_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct qib_devdata *dd = private2dd(file); + char *tmp; + int ret; + + tmp = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + ret = qib_qsfp_dump(dd->pport, tmp, PAGE_SIZE); + if (ret > 0) + ret = simple_read_from_buffer(buf, count, ppos, tmp, ret); + kfree(tmp); + return ret; +} + +/* + * read the per-port QSFP data for port 2 (pidx 1) + */ +static ssize_t qsfp_2_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct qib_devdata *dd = private2dd(file); + char *tmp; + int ret; + + if (dd->num_pports < 2) + return -ENODEV; + + tmp = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + ret = qib_qsfp_dump(dd->pport + 1, tmp, PAGE_SIZE); + if (ret > 0) + ret = simple_read_from_buffer(buf, count, ppos, tmp, ret); + kfree(tmp); + return ret; +} + +static const struct file_operations qsfp_ops[] = { + { .read = qsfp_1_read, .llseek = generic_file_llseek, }, + { .read = qsfp_2_read, .llseek = generic_file_llseek, }, +}; + +static ssize_t flash_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct qib_devdata *dd; + ssize_t ret; + loff_t pos; + char *tmp; + + pos = *ppos; + + if (pos < 0) { + ret = -EINVAL; + goto bail; + } + + if (pos >= sizeof(struct qib_flash)) { + ret = 0; + goto bail; + } + + if (count > sizeof(struct qib_flash) - pos) + count = sizeof(struct qib_flash) - pos; + + tmp = kmalloc(count, GFP_KERNEL); + if (!tmp) { + ret = -ENOMEM; + goto bail; + } + + dd = private2dd(file); + if (qib_eeprom_read(dd, pos, tmp, count)) { + qib_dev_err(dd, "failed to read from flash\n"); + ret = -ENXIO; + goto bail_tmp; + } + + if (copy_to_user(buf, tmp, count)) { + ret = -EFAULT; + goto bail_tmp; + } + + *ppos = pos + count; + ret = count; + +bail_tmp: + kfree(tmp); + +bail: + return ret; +} + +static ssize_t flash_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct qib_devdata *dd; + ssize_t ret; + loff_t pos; + char *tmp; + + pos = *ppos; + + if (pos != 0) { + ret = -EINVAL; + goto bail; + } + + if (count != sizeof(struct qib_flash)) { + ret = -EINVAL; + goto bail; + } + + tmp = kmalloc(count, GFP_KERNEL); + if (!tmp) { + ret = -ENOMEM; + goto bail; + } + + if (copy_from_user(tmp, buf, count)) { + ret = -EFAULT; + goto bail_tmp; + } + + dd = private2dd(file); + if (qib_eeprom_write(dd, pos, tmp, count)) { + ret = -ENXIO; + qib_dev_err(dd, "failed to write to flash\n"); + goto bail_tmp; + } + + *ppos = pos + count; + ret = count; + +bail_tmp: + kfree(tmp); + +bail: + return ret; +} + +static const struct file_operations flash_ops = { + .read = flash_read, + .write = flash_write, + .llseek = default_llseek, +}; + +static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd) +{ + struct dentry *dir, *tmp; + char unit[10]; + int ret, i; + + /* create the per-unit directory */ + snprintf(unit, sizeof unit, "%u", dd->unit); + ret = create_file(unit, S_IFDIR|S_IRUGO|S_IXUGO, sb->s_root, &dir, + &simple_dir_operations, dd); + if (ret) { + pr_err("create_file(%s) failed: %d\n", unit, ret); + goto bail; + } + + /* create the files in the new directory */ + ret = create_file("counters", S_IFREG|S_IRUGO, dir, &tmp, + &cntr_ops[0], dd); + if (ret) { + pr_err("create_file(%s/counters) failed: %d\n", + unit, ret); + goto bail; + } + ret = create_file("counter_names", S_IFREG|S_IRUGO, dir, &tmp, + &cntr_ops[1], dd); + if (ret) { + pr_err("create_file(%s/counter_names) failed: %d\n", + unit, ret); + goto bail; + } + ret = create_file("portcounter_names", S_IFREG|S_IRUGO, dir, &tmp, + &portcntr_ops[0], dd); + if (ret) { + pr_err("create_file(%s/%s) failed: %d\n", + unit, "portcounter_names", ret); + goto bail; + } + for (i = 1; i <= dd->num_pports; i++) { + char fname[24]; + + sprintf(fname, "port%dcounters", i); + /* create the files in the new directory */ + ret = create_file(fname, S_IFREG|S_IRUGO, dir, &tmp, + &portcntr_ops[i], dd); + if (ret) { + pr_err("create_file(%s/%s) failed: %d\n", + unit, fname, ret); + goto bail; + } + if (!(dd->flags & QIB_HAS_QSFP)) + continue; + sprintf(fname, "qsfp%d", i); + ret = create_file(fname, S_IFREG|S_IRUGO, dir, &tmp, + &qsfp_ops[i - 1], dd); + if (ret) { + pr_err("create_file(%s/%s) failed: %d\n", + unit, fname, ret); + goto bail; + } + } + + ret = create_file("flash", S_IFREG|S_IWUSR|S_IRUGO, dir, &tmp, + &flash_ops, dd); + if (ret) + pr_err("create_file(%s/flash) failed: %d\n", + unit, ret); +bail: + return ret; +} + +static int remove_file(struct dentry *parent, char *name) +{ + struct dentry *tmp; + int ret; + + tmp = lookup_one_len(name, parent, strlen(name)); + + if (IS_ERR(tmp)) { + ret = PTR_ERR(tmp); + goto bail; + } + + spin_lock(&tmp->d_lock); + if (!(d_unhashed(tmp) && tmp->d_inode)) { + __d_drop(tmp); + spin_unlock(&tmp->d_lock); + simple_unlink(parent->d_inode, tmp); + } else { + spin_unlock(&tmp->d_lock); + } + dput(tmp); + + ret = 0; +bail: + /* + * We don't expect clients to care about the return value, but + * it's there if they need it. + */ + return ret; +} + +static int remove_device_files(struct super_block *sb, + struct qib_devdata *dd) +{ + struct dentry *dir, *root; + char unit[10]; + int ret, i; + + root = dget(sb->s_root); + mutex_lock(&root->d_inode->i_mutex); + snprintf(unit, sizeof unit, "%u", dd->unit); + dir = lookup_one_len(unit, root, strlen(unit)); + + if (IS_ERR(dir)) { + ret = PTR_ERR(dir); + pr_err("Lookup of %s failed\n", unit); + goto bail; + } + + mutex_lock(&dir->d_inode->i_mutex); + remove_file(dir, "counters"); + remove_file(dir, "counter_names"); + remove_file(dir, "portcounter_names"); + for (i = 0; i < dd->num_pports; i++) { + char fname[24]; + + sprintf(fname, "port%dcounters", i + 1); + remove_file(dir, fname); + if (dd->flags & QIB_HAS_QSFP) { + sprintf(fname, "qsfp%d", i + 1); + remove_file(dir, fname); + } + } + remove_file(dir, "flash"); + mutex_unlock(&dir->d_inode->i_mutex); + ret = simple_rmdir(root->d_inode, dir); + d_delete(dir); + dput(dir); + +bail: + mutex_unlock(&root->d_inode->i_mutex); + dput(root); + return ret; +} + +/* + * This fills everything in when the fs is mounted, to handle umount/mount + * after device init. The direct add_cntr_files() call handles adding + * them from the init code, when the fs is already mounted. + */ +static int qibfs_fill_super(struct super_block *sb, void *data, int silent) +{ + struct qib_devdata *dd, *tmp; + unsigned long flags; + int ret; + + static struct tree_descr files[] = { + [2] = {"driver_stats", &driver_ops[0], S_IRUGO}, + [3] = {"driver_stats_names", &driver_ops[1], S_IRUGO}, + {""}, + }; + + ret = simple_fill_super(sb, QIBFS_MAGIC, files); + if (ret) { + pr_err("simple_fill_super failed: %d\n", ret); + goto bail; + } + + spin_lock_irqsave(&qib_devs_lock, flags); + + list_for_each_entry_safe(dd, tmp, &qib_dev_list, list) { + spin_unlock_irqrestore(&qib_devs_lock, flags); + ret = add_cntr_files(sb, dd); + if (ret) + goto bail; + spin_lock_irqsave(&qib_devs_lock, flags); + } + + spin_unlock_irqrestore(&qib_devs_lock, flags); + +bail: + return ret; +} + +static struct dentry *qibfs_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data) +{ + struct dentry *ret; + ret = mount_single(fs_type, flags, data, qibfs_fill_super); + if (!IS_ERR(ret)) + qib_super = ret->d_sb; + return ret; +} + +static void qibfs_kill_super(struct super_block *s) +{ + kill_litter_super(s); + qib_super = NULL; +} + +int qibfs_add(struct qib_devdata *dd) +{ + int ret; + + /* + * On first unit initialized, qib_super will not yet exist + * because nobody has yet tried to mount the filesystem, so + * we can't consider that to be an error; if an error occurs + * during the mount, that will get a complaint, so this is OK. + * add_cntr_files() for all units is done at mount from + * qibfs_fill_super(), so one way or another, everything works. + */ + if (qib_super == NULL) + ret = 0; + else + ret = add_cntr_files(qib_super, dd); + return ret; +} + +int qibfs_remove(struct qib_devdata *dd) +{ + int ret = 0; + + if (qib_super) + ret = remove_device_files(qib_super, dd); + + return ret; +} + +static struct file_system_type qibfs_fs_type = { + .owner = THIS_MODULE, + .name = "ipathfs", + .mount = qibfs_mount, + .kill_sb = qibfs_kill_super, +}; +MODULE_ALIAS_FS("ipathfs"); + +int __init qib_init_qibfs(void) +{ + return register_filesystem(&qibfs_fs_type); +} + +int __exit qib_exit_qibfs(void) +{ + return unregister_filesystem(&qibfs_fs_type); +} diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c new file mode 100644 index 00000000000..d68266ac761 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -0,0 +1,3599 @@ +/* + * Copyright (c) 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. + * All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +/* + * This file contains all of the code that is specific to the + * QLogic_IB 6120 PCIe chip. + */ + +#include <linux/interrupt.h> +#include <linux/pci.h> +#include <linux/delay.h> +#include <rdma/ib_verbs.h> + +#include "qib.h" +#include "qib_6120_regs.h" + +static void qib_6120_setup_setextled(struct qib_pportdata *, u32); +static void sendctrl_6120_mod(struct qib_pportdata *ppd, u32 op); +static u8 qib_6120_phys_portstate(u64); +static u32 qib_6120_iblink_state(u64); + +/* + * This file contains all the chip-specific register information and + * access functions for the Intel Intel_IB PCI-Express chip. + * + */ + +/* KREG_IDX uses machine-generated #defines */ +#define KREG_IDX(regname) (QIB_6120_##regname##_OFFS / sizeof(u64)) + +/* Use defines to tie machine-generated names to lower-case names */ +#define kr_extctrl KREG_IDX(EXTCtrl) +#define kr_extstatus KREG_IDX(EXTStatus) +#define kr_gpio_clear KREG_IDX(GPIOClear) +#define kr_gpio_mask KREG_IDX(GPIOMask) +#define kr_gpio_out KREG_IDX(GPIOOut) +#define kr_gpio_status KREG_IDX(GPIOStatus) +#define kr_rcvctrl KREG_IDX(RcvCtrl) +#define kr_sendctrl KREG_IDX(SendCtrl) +#define kr_partitionkey KREG_IDX(RcvPartitionKey) +#define kr_hwdiagctrl KREG_IDX(HwDiagCtrl) +#define kr_ibcstatus KREG_IDX(IBCStatus) +#define kr_ibcctrl KREG_IDX(IBCCtrl) +#define kr_sendbuffererror KREG_IDX(SendBufErr0) +#define kr_rcvbthqp KREG_IDX(RcvBTHQP) +#define kr_counterregbase KREG_IDX(CntrRegBase) +#define kr_palign KREG_IDX(PageAlign) +#define kr_rcvegrbase KREG_IDX(RcvEgrBase) +#define kr_rcvegrcnt KREG_IDX(RcvEgrCnt) +#define kr_rcvhdrcnt KREG_IDX(RcvHdrCnt) +#define kr_rcvhdrentsize KREG_IDX(RcvHdrEntSize) +#define kr_rcvhdrsize KREG_IDX(RcvHdrSize) +#define kr_rcvtidbase KREG_IDX(RcvTIDBase) +#define kr_rcvtidcnt KREG_IDX(RcvTIDCnt) +#define kr_scratch KREG_IDX(Scratch) +#define kr_sendctrl KREG_IDX(SendCtrl) +#define kr_sendpioavailaddr KREG_IDX(SendPIOAvailAddr) +#define kr_sendpiobufbase KREG_IDX(SendPIOBufBase) +#define kr_sendpiobufcnt KREG_IDX(SendPIOBufCnt) +#define kr_sendpiosize KREG_IDX(SendPIOSize) +#define kr_sendregbase KREG_IDX(SendRegBase) +#define kr_userregbase KREG_IDX(UserRegBase) +#define kr_control KREG_IDX(Control) +#define kr_intclear KREG_IDX(IntClear) +#define kr_intmask KREG_IDX(IntMask) +#define kr_intstatus KREG_IDX(IntStatus) +#define kr_errclear KREG_IDX(ErrClear) +#define kr_errmask KREG_IDX(ErrMask) +#define kr_errstatus KREG_IDX(ErrStatus) +#define kr_hwerrclear KREG_IDX(HwErrClear) +#define kr_hwerrmask KREG_IDX(HwErrMask) +#define kr_hwerrstatus KREG_IDX(HwErrStatus) +#define kr_revision KREG_IDX(Revision) +#define kr_portcnt KREG_IDX(PortCnt) +#define kr_serdes_cfg0 KREG_IDX(SerdesCfg0) +#define kr_serdes_cfg1 (kr_serdes_cfg0 + 1) +#define kr_serdes_stat KREG_IDX(SerdesStat) +#define kr_xgxs_cfg KREG_IDX(XGXSCfg) + +/* These must only be written via qib_write_kreg_ctxt() */ +#define kr_rcvhdraddr KREG_IDX(RcvHdrAddr0) +#define kr_rcvhdrtailaddr KREG_IDX(RcvHdrTailAddr0) + +#define CREG_IDX(regname) ((QIB_6120_##regname##_OFFS - \ + QIB_6120_LBIntCnt_OFFS) / sizeof(u64)) + +#define cr_badformat CREG_IDX(RxBadFormatCnt) +#define cr_erricrc CREG_IDX(RxICRCErrCnt) +#define cr_errlink CREG_IDX(RxLinkProblemCnt) +#define cr_errlpcrc CREG_IDX(RxLPCRCErrCnt) +#define cr_errpkey CREG_IDX(RxPKeyMismatchCnt) +#define cr_rcvflowctrl_err CREG_IDX(RxFlowCtrlErrCnt) +#define cr_err_rlen CREG_IDX(RxLenErrCnt) +#define cr_errslen CREG_IDX(TxLenErrCnt) +#define cr_errtidfull CREG_IDX(RxTIDFullErrCnt) +#define cr_errtidvalid CREG_IDX(RxTIDValidErrCnt) +#define cr_errvcrc CREG_IDX(RxVCRCErrCnt) +#define cr_ibstatuschange CREG_IDX(IBStatusChangeCnt) +#define cr_lbint CREG_IDX(LBIntCnt) +#define cr_invalidrlen CREG_IDX(RxMaxMinLenErrCnt) +#define cr_invalidslen CREG_IDX(TxMaxMinLenErrCnt) +#define cr_lbflowstall CREG_IDX(LBFlowStallCnt) +#define cr_pktrcv CREG_IDX(RxDataPktCnt) +#define cr_pktrcvflowctrl CREG_IDX(RxFlowPktCnt) +#define cr_pktsend CREG_IDX(TxDataPktCnt) +#define cr_pktsendflow CREG_IDX(TxFlowPktCnt) +#define cr_portovfl CREG_IDX(RxP0HdrEgrOvflCnt) +#define cr_rcvebp CREG_IDX(RxEBPCnt) +#define cr_rcvovfl CREG_IDX(RxBufOvflCnt) +#define cr_senddropped CREG_IDX(TxDroppedPktCnt) +#define cr_sendstall CREG_IDX(TxFlowStallCnt) +#define cr_sendunderrun CREG_IDX(TxUnderrunCnt) +#define cr_wordrcv CREG_IDX(RxDwordCnt) +#define cr_wordsend CREG_IDX(TxDwordCnt) +#define cr_txunsupvl CREG_IDX(TxUnsupVLErrCnt) +#define cr_rxdroppkt CREG_IDX(RxDroppedPktCnt) +#define cr_iblinkerrrecov CREG_IDX(IBLinkErrRecoveryCnt) +#define cr_iblinkdown CREG_IDX(IBLinkDownedCnt) +#define cr_ibsymbolerr CREG_IDX(IBSymbolErrCnt) + +#define SYM_RMASK(regname, fldname) ((u64) \ + QIB_6120_##regname##_##fldname##_RMASK) +#define SYM_MASK(regname, fldname) ((u64) \ + QIB_6120_##regname##_##fldname##_RMASK << \ + QIB_6120_##regname##_##fldname##_LSB) +#define SYM_LSB(regname, fldname) (QIB_6120_##regname##_##fldname##_LSB) + +#define SYM_FIELD(value, regname, fldname) ((u64) \ + (((value) >> SYM_LSB(regname, fldname)) & \ + SYM_RMASK(regname, fldname))) +#define ERR_MASK(fldname) SYM_MASK(ErrMask, fldname##Mask) +#define HWE_MASK(fldname) SYM_MASK(HwErrMask, fldname##Mask) + +/* link training states, from IBC */ +#define IB_6120_LT_STATE_DISABLED 0x00 +#define IB_6120_LT_STATE_LINKUP 0x01 +#define IB_6120_LT_STATE_POLLACTIVE 0x02 +#define IB_6120_LT_STATE_POLLQUIET 0x03 +#define IB_6120_LT_STATE_SLEEPDELAY 0x04 +#define IB_6120_LT_STATE_SLEEPQUIET 0x05 +#define IB_6120_LT_STATE_CFGDEBOUNCE 0x08 +#define IB_6120_LT_STATE_CFGRCVFCFG 0x09 +#define IB_6120_LT_STATE_CFGWAITRMT 0x0a +#define IB_6120_LT_STATE_CFGIDLE 0x0b +#define IB_6120_LT_STATE_RECOVERRETRAIN 0x0c +#define IB_6120_LT_STATE_RECOVERWAITRMT 0x0e +#define IB_6120_LT_STATE_RECOVERIDLE 0x0f + +/* link state machine states from IBC */ +#define IB_6120_L_STATE_DOWN 0x0 +#define IB_6120_L_STATE_INIT 0x1 +#define IB_6120_L_STATE_ARM 0x2 +#define IB_6120_L_STATE_ACTIVE 0x3 +#define IB_6120_L_STATE_ACT_DEFER 0x4 + +static const u8 qib_6120_physportstate[0x20] = { + [IB_6120_LT_STATE_DISABLED] = IB_PHYSPORTSTATE_DISABLED, + [IB_6120_LT_STATE_LINKUP] = IB_PHYSPORTSTATE_LINKUP, + [IB_6120_LT_STATE_POLLACTIVE] = IB_PHYSPORTSTATE_POLL, + [IB_6120_LT_STATE_POLLQUIET] = IB_PHYSPORTSTATE_POLL, + [IB_6120_LT_STATE_SLEEPDELAY] = IB_PHYSPORTSTATE_SLEEP, + [IB_6120_LT_STATE_SLEEPQUIET] = IB_PHYSPORTSTATE_SLEEP, + [IB_6120_LT_STATE_CFGDEBOUNCE] = + IB_PHYSPORTSTATE_CFG_TRAIN, + [IB_6120_LT_STATE_CFGRCVFCFG] = + IB_PHYSPORTSTATE_CFG_TRAIN, + [IB_6120_LT_STATE_CFGWAITRMT] = + IB_PHYSPORTSTATE_CFG_TRAIN, + [IB_6120_LT_STATE_CFGIDLE] = IB_PHYSPORTSTATE_CFG_TRAIN, + [IB_6120_LT_STATE_RECOVERRETRAIN] = + IB_PHYSPORTSTATE_LINK_ERR_RECOVER, + [IB_6120_LT_STATE_RECOVERWAITRMT] = + IB_PHYSPORTSTATE_LINK_ERR_RECOVER, + [IB_6120_LT_STATE_RECOVERIDLE] = + IB_PHYSPORTSTATE_LINK_ERR_RECOVER, + [0x10] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x11] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x12] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x13] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x14] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x15] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x16] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN +}; + + +struct qib_chip_specific { + u64 __iomem *cregbase; + u64 *cntrs; + u64 *portcntrs; + void *dummy_hdrq; /* used after ctxt close */ + dma_addr_t dummy_hdrq_phys; + spinlock_t kernel_tid_lock; /* no back to back kernel TID writes */ + spinlock_t user_tid_lock; /* no back to back user TID writes */ + spinlock_t rcvmod_lock; /* protect rcvctrl shadow changes */ + spinlock_t gpio_lock; /* RMW of shadows/regs for ExtCtrl and GPIO */ + u64 hwerrmask; + u64 errormask; + u64 gpio_out; /* shadow of kr_gpio_out, for rmw ops */ + u64 gpio_mask; /* shadow the gpio mask register */ + u64 extctrl; /* shadow the gpio output enable, etc... */ + /* + * these 5 fields are used to establish deltas for IB symbol + * errors and linkrecovery errors. They can be reported on + * some chips during link negotiation prior to INIT, and with + * DDR when faking DDR negotiations with non-IBTA switches. + * The chip counters are adjusted at driver unload if there is + * a non-zero delta. + */ + u64 ibdeltainprog; + u64 ibsymdelta; + u64 ibsymsnap; + u64 iblnkerrdelta; + u64 iblnkerrsnap; + u64 ibcctrl; /* shadow for kr_ibcctrl */ + u32 lastlinkrecov; /* link recovery issue */ + int irq; + u32 cntrnamelen; + u32 portcntrnamelen; + u32 ncntrs; + u32 nportcntrs; + /* used with gpio interrupts to implement IB counters */ + u32 rxfc_unsupvl_errs; + u32 overrun_thresh_errs; + /* + * these count only cases where _successive_ LocalLinkIntegrity + * errors were seen in the receive headers of IB standard packets + */ + u32 lli_errs; + u32 lli_counter; + u64 lli_thresh; + u64 sword; /* total dwords sent (sample result) */ + u64 rword; /* total dwords received (sample result) */ + u64 spkts; /* total packets sent (sample result) */ + u64 rpkts; /* total packets received (sample result) */ + u64 xmit_wait; /* # of ticks no data sent (sample result) */ + struct timer_list pma_timer; + char emsgbuf[128]; + char bitsmsgbuf[64]; + u8 pma_sample_status; +}; + +/* ibcctrl bits */ +#define QLOGIC_IB_IBCC_LINKINITCMD_DISABLE 1 +/* cycle through TS1/TS2 till OK */ +#define QLOGIC_IB_IBCC_LINKINITCMD_POLL 2 +/* wait for TS1, then go on */ +#define QLOGIC_IB_IBCC_LINKINITCMD_SLEEP 3 +#define QLOGIC_IB_IBCC_LINKINITCMD_SHIFT 16 + +#define QLOGIC_IB_IBCC_LINKCMD_DOWN 1 /* move to 0x11 */ +#define QLOGIC_IB_IBCC_LINKCMD_ARMED 2 /* move to 0x21 */ +#define QLOGIC_IB_IBCC_LINKCMD_ACTIVE 3 /* move to 0x31 */ +#define QLOGIC_IB_IBCC_LINKCMD_SHIFT 18 + +/* + * We could have a single register get/put routine, that takes a group type, + * but this is somewhat clearer and cleaner. It also gives us some error + * checking. 64 bit register reads should always work, but are inefficient + * on opteron (the northbridge always generates 2 separate HT 32 bit reads), + * so we use kreg32 wherever possible. User register and counter register + * reads are always 32 bit reads, so only one form of those routines. + */ + +/** + * qib_read_ureg32 - read 32-bit virtualized per-context register + * @dd: device + * @regno: register number + * @ctxt: context number + * + * Return the contents of a register that is virtualized to be per context. + * Returns -1 on errors (not distinguishable from valid contents at + * runtime; we may add a separate error variable at some point). + */ +static inline u32 qib_read_ureg32(const struct qib_devdata *dd, + enum qib_ureg regno, int ctxt) +{ + if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) + return 0; + + if (dd->userbase) + return readl(regno + (u64 __iomem *) + ((char __iomem *)dd->userbase + + dd->ureg_align * ctxt)); + else + return readl(regno + (u64 __iomem *) + (dd->uregbase + + (char __iomem *)dd->kregbase + + dd->ureg_align * ctxt)); +} + +/** + * qib_write_ureg - write 32-bit virtualized per-context register + * @dd: device + * @regno: register number + * @value: value + * @ctxt: context + * + * Write the contents of a register that is virtualized to be per context. + */ +static inline void qib_write_ureg(const struct qib_devdata *dd, + enum qib_ureg regno, u64 value, int ctxt) +{ + u64 __iomem *ubase; + if (dd->userbase) + ubase = (u64 __iomem *) + ((char __iomem *) dd->userbase + + dd->ureg_align * ctxt); + else + ubase = (u64 __iomem *) + (dd->uregbase + + (char __iomem *) dd->kregbase + + dd->ureg_align * ctxt); + + if (dd->kregbase && (dd->flags & QIB_PRESENT)) + writeq(value, &ubase[regno]); +} + +static inline u32 qib_read_kreg32(const struct qib_devdata *dd, + const u16 regno) +{ + if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) + return -1; + return readl((u32 __iomem *)&dd->kregbase[regno]); +} + +static inline u64 qib_read_kreg64(const struct qib_devdata *dd, + const u16 regno) +{ + if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) + return -1; + + return readq(&dd->kregbase[regno]); +} + +static inline void qib_write_kreg(const struct qib_devdata *dd, + const u16 regno, u64 value) +{ + if (dd->kregbase && (dd->flags & QIB_PRESENT)) + writeq(value, &dd->kregbase[regno]); +} + +/** + * qib_write_kreg_ctxt - write a device's per-ctxt 64-bit kernel register + * @dd: the qlogic_ib device + * @regno: the register number to write + * @ctxt: the context containing the register + * @value: the value to write + */ +static inline void qib_write_kreg_ctxt(const struct qib_devdata *dd, + const u16 regno, unsigned ctxt, + u64 value) +{ + qib_write_kreg(dd, regno + ctxt, value); +} + +static inline void write_6120_creg(const struct qib_devdata *dd, + u16 regno, u64 value) +{ + if (dd->cspec->cregbase && (dd->flags & QIB_PRESENT)) + writeq(value, &dd->cspec->cregbase[regno]); +} + +static inline u64 read_6120_creg(const struct qib_devdata *dd, u16 regno) +{ + if (!dd->cspec->cregbase || !(dd->flags & QIB_PRESENT)) + return 0; + return readq(&dd->cspec->cregbase[regno]); +} + +static inline u32 read_6120_creg32(const struct qib_devdata *dd, u16 regno) +{ + if (!dd->cspec->cregbase || !(dd->flags & QIB_PRESENT)) + return 0; + return readl(&dd->cspec->cregbase[regno]); +} + +/* kr_control bits */ +#define QLOGIC_IB_C_RESET 1U + +/* kr_intstatus, kr_intclear, kr_intmask bits */ +#define QLOGIC_IB_I_RCVURG_MASK ((1U << 5) - 1) +#define QLOGIC_IB_I_RCVURG_SHIFT 0 +#define QLOGIC_IB_I_RCVAVAIL_MASK ((1U << 5) - 1) +#define QLOGIC_IB_I_RCVAVAIL_SHIFT 12 + +#define QLOGIC_IB_C_FREEZEMODE 0x00000002 +#define QLOGIC_IB_C_LINKENABLE 0x00000004 +#define QLOGIC_IB_I_ERROR 0x0000000080000000ULL +#define QLOGIC_IB_I_SPIOSENT 0x0000000040000000ULL +#define QLOGIC_IB_I_SPIOBUFAVAIL 0x0000000020000000ULL +#define QLOGIC_IB_I_GPIO 0x0000000010000000ULL +#define QLOGIC_IB_I_BITSEXTANT \ + ((QLOGIC_IB_I_RCVURG_MASK << QLOGIC_IB_I_RCVURG_SHIFT) | \ + (QLOGIC_IB_I_RCVAVAIL_MASK << \ + QLOGIC_IB_I_RCVAVAIL_SHIFT) | \ + QLOGIC_IB_I_ERROR | QLOGIC_IB_I_SPIOSENT | \ + QLOGIC_IB_I_SPIOBUFAVAIL | QLOGIC_IB_I_GPIO) + +/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */ +#define QLOGIC_IB_HWE_PCIEMEMPARITYERR_MASK 0x000000000000003fULL +#define QLOGIC_IB_HWE_PCIEMEMPARITYERR_SHIFT 0 +#define QLOGIC_IB_HWE_PCIEPOISONEDTLP 0x0000000010000000ULL +#define QLOGIC_IB_HWE_PCIECPLTIMEOUT 0x0000000020000000ULL +#define QLOGIC_IB_HWE_PCIEBUSPARITYXTLH 0x0000000040000000ULL +#define QLOGIC_IB_HWE_PCIEBUSPARITYXADM 0x0000000080000000ULL +#define QLOGIC_IB_HWE_PCIEBUSPARITYRADM 0x0000000100000000ULL +#define QLOGIC_IB_HWE_COREPLL_FBSLIP 0x0080000000000000ULL +#define QLOGIC_IB_HWE_COREPLL_RFSLIP 0x0100000000000000ULL +#define QLOGIC_IB_HWE_PCIE1PLLFAILED 0x0400000000000000ULL +#define QLOGIC_IB_HWE_PCIE0PLLFAILED 0x0800000000000000ULL +#define QLOGIC_IB_HWE_SERDESPLLFAILED 0x1000000000000000ULL + + +/* kr_extstatus bits */ +#define QLOGIC_IB_EXTS_FREQSEL 0x2 +#define QLOGIC_IB_EXTS_SERDESSEL 0x4 +#define QLOGIC_IB_EXTS_MEMBIST_ENDTEST 0x0000000000004000 +#define QLOGIC_IB_EXTS_MEMBIST_FOUND 0x0000000000008000 + +/* kr_xgxsconfig bits */ +#define QLOGIC_IB_XGXS_RESET 0x5ULL + +#define _QIB_GPIO_SDA_NUM 1 +#define _QIB_GPIO_SCL_NUM 0 + +/* Bits in GPIO for the added IB link interrupts */ +#define GPIO_RXUVL_BIT 3 +#define GPIO_OVRUN_BIT 4 +#define GPIO_LLI_BIT 5 +#define GPIO_ERRINTR_MASK 0x38 + + +#define QLOGIC_IB_RT_BUFSIZE_MASK 0xe0000000ULL +#define QLOGIC_IB_RT_BUFSIZE_SHIFTVAL(tid) \ + ((((tid) & QLOGIC_IB_RT_BUFSIZE_MASK) >> 29) + 11 - 1) +#define QLOGIC_IB_RT_BUFSIZE(tid) (1 << QLOGIC_IB_RT_BUFSIZE_SHIFTVAL(tid)) +#define QLOGIC_IB_RT_IS_VALID(tid) \ + (((tid) & QLOGIC_IB_RT_BUFSIZE_MASK) && \ + ((((tid) & QLOGIC_IB_RT_BUFSIZE_MASK) != QLOGIC_IB_RT_BUFSIZE_MASK))) +#define QLOGIC_IB_RT_ADDR_MASK 0x1FFFFFFFULL /* 29 bits valid */ +#define QLOGIC_IB_RT_ADDR_SHIFT 10 + +#define QLOGIC_IB_R_INTRAVAIL_SHIFT 16 +#define QLOGIC_IB_R_TAILUPD_SHIFT 31 +#define IBA6120_R_PKEY_DIS_SHIFT 30 + +#define PBC_6120_VL15_SEND_CTRL (1ULL << 31) /* pbc; VL15; link_buf only */ + +#define IBCBUSFRSPCPARITYERR HWE_MASK(IBCBusFromSPCParityErr) +#define IBCBUSTOSPCPARITYERR HWE_MASK(IBCBusToSPCParityErr) + +#define SYM_MASK_BIT(regname, fldname, bit) ((u64) \ + ((1ULL << (SYM_LSB(regname, fldname) + (bit))))) + +#define TXEMEMPARITYERR_PIOBUF \ + SYM_MASK_BIT(HwErrMask, TXEMemParityErrMask, 0) +#define TXEMEMPARITYERR_PIOPBC \ + SYM_MASK_BIT(HwErrMask, TXEMemParityErrMask, 1) +#define TXEMEMPARITYERR_PIOLAUNCHFIFO \ + SYM_MASK_BIT(HwErrMask, TXEMemParityErrMask, 2) + +#define RXEMEMPARITYERR_RCVBUF \ + SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 0) +#define RXEMEMPARITYERR_LOOKUPQ \ + SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 1) +#define RXEMEMPARITYERR_EXPTID \ + SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 2) +#define RXEMEMPARITYERR_EAGERTID \ + SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 3) +#define RXEMEMPARITYERR_FLAGBUF \ + SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 4) +#define RXEMEMPARITYERR_DATAINFO \ + SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 5) +#define RXEMEMPARITYERR_HDRINFO \ + SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 6) + +/* 6120 specific hardware errors... */ +static const struct qib_hwerror_msgs qib_6120_hwerror_msgs[] = { + /* generic hardware errors */ + QLOGIC_IB_HWE_MSG(IBCBUSFRSPCPARITYERR, "QIB2IB Parity"), + QLOGIC_IB_HWE_MSG(IBCBUSTOSPCPARITYERR, "IB2QIB Parity"), + + QLOGIC_IB_HWE_MSG(TXEMEMPARITYERR_PIOBUF, + "TXE PIOBUF Memory Parity"), + QLOGIC_IB_HWE_MSG(TXEMEMPARITYERR_PIOPBC, + "TXE PIOPBC Memory Parity"), + QLOGIC_IB_HWE_MSG(TXEMEMPARITYERR_PIOLAUNCHFIFO, + "TXE PIOLAUNCHFIFO Memory Parity"), + + QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_RCVBUF, + "RXE RCVBUF Memory Parity"), + QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_LOOKUPQ, + "RXE LOOKUPQ Memory Parity"), + QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_EAGERTID, + "RXE EAGERTID Memory Parity"), + QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_EXPTID, + "RXE EXPTID Memory Parity"), + QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_FLAGBUF, + "RXE FLAGBUF Memory Parity"), + QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_DATAINFO, + "RXE DATAINFO Memory Parity"), + QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_HDRINFO, + "RXE HDRINFO Memory Parity"), + + /* chip-specific hardware errors */ + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIEPOISONEDTLP, + "PCIe Poisoned TLP"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIECPLTIMEOUT, + "PCIe completion timeout"), + /* + * In practice, it's unlikely wthat we'll see PCIe PLL, or bus + * parity or memory parity error failures, because most likely we + * won't be able to talk to the core of the chip. Nonetheless, we + * might see them, if they are in parts of the PCIe core that aren't + * essential. + */ + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIE1PLLFAILED, + "PCIePLL1"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIE0PLLFAILED, + "PCIePLL0"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIEBUSPARITYXTLH, + "PCIe XTLH core parity"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIEBUSPARITYXADM, + "PCIe ADM TX core parity"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIEBUSPARITYRADM, + "PCIe ADM RX core parity"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_SERDESPLLFAILED, + "SerDes PLL"), +}; + +#define TXE_PIO_PARITY (TXEMEMPARITYERR_PIOBUF | TXEMEMPARITYERR_PIOPBC) +#define _QIB_PLL_FAIL (QLOGIC_IB_HWE_COREPLL_FBSLIP | \ + QLOGIC_IB_HWE_COREPLL_RFSLIP) + + /* variables for sanity checking interrupt and errors */ +#define IB_HWE_BITSEXTANT \ + (HWE_MASK(RXEMemParityErr) | \ + HWE_MASK(TXEMemParityErr) | \ + (QLOGIC_IB_HWE_PCIEMEMPARITYERR_MASK << \ + QLOGIC_IB_HWE_PCIEMEMPARITYERR_SHIFT) | \ + QLOGIC_IB_HWE_PCIE1PLLFAILED | \ + QLOGIC_IB_HWE_PCIE0PLLFAILED | \ + QLOGIC_IB_HWE_PCIEPOISONEDTLP | \ + QLOGIC_IB_HWE_PCIECPLTIMEOUT | \ + QLOGIC_IB_HWE_PCIEBUSPARITYXTLH | \ + QLOGIC_IB_HWE_PCIEBUSPARITYXADM | \ + QLOGIC_IB_HWE_PCIEBUSPARITYRADM | \ + HWE_MASK(PowerOnBISTFailed) | \ + QLOGIC_IB_HWE_COREPLL_FBSLIP | \ + QLOGIC_IB_HWE_COREPLL_RFSLIP | \ + QLOGIC_IB_HWE_SERDESPLLFAILED | \ + HWE_MASK(IBCBusToSPCParityErr) | \ + HWE_MASK(IBCBusFromSPCParityErr)) + +#define IB_E_BITSEXTANT \ + (ERR_MASK(RcvFormatErr) | ERR_MASK(RcvVCRCErr) | \ + ERR_MASK(RcvICRCErr) | ERR_MASK(RcvMinPktLenErr) | \ + ERR_MASK(RcvMaxPktLenErr) | ERR_MASK(RcvLongPktLenErr) | \ + ERR_MASK(RcvShortPktLenErr) | ERR_MASK(RcvUnexpectedCharErr) | \ + ERR_MASK(RcvUnsupportedVLErr) | ERR_MASK(RcvEBPErr) | \ + ERR_MASK(RcvIBFlowErr) | ERR_MASK(RcvBadVersionErr) | \ + ERR_MASK(RcvEgrFullErr) | ERR_MASK(RcvHdrFullErr) | \ + ERR_MASK(RcvBadTidErr) | ERR_MASK(RcvHdrLenErr) | \ + ERR_MASK(RcvHdrErr) | ERR_MASK(RcvIBLostLinkErr) | \ + ERR_MASK(SendMinPktLenErr) | ERR_MASK(SendMaxPktLenErr) | \ + ERR_MASK(SendUnderRunErr) | ERR_MASK(SendPktLenErr) | \ + ERR_MASK(SendDroppedSmpPktErr) | \ + ERR_MASK(SendDroppedDataPktErr) | \ + ERR_MASK(SendPioArmLaunchErr) | \ + ERR_MASK(SendUnexpectedPktNumErr) | \ + ERR_MASK(SendUnsupportedVLErr) | ERR_MASK(IBStatusChanged) | \ + ERR_MASK(InvalidAddrErr) | ERR_MASK(ResetNegated) | \ + ERR_MASK(HardwareErr)) + +#define QLOGIC_IB_E_PKTERRS ( \ + ERR_MASK(SendPktLenErr) | \ + ERR_MASK(SendDroppedDataPktErr) | \ + ERR_MASK(RcvVCRCErr) | \ + ERR_MASK(RcvICRCErr) | \ + ERR_MASK(RcvShortPktLenErr) | \ + ERR_MASK(RcvEBPErr)) + +/* These are all rcv-related errors which we want to count for stats */ +#define E_SUM_PKTERRS \ + (ERR_MASK(RcvHdrLenErr) | ERR_MASK(RcvBadTidErr) | \ + ERR_MASK(RcvBadVersionErr) | ERR_MASK(RcvHdrErr) | \ + ERR_MASK(RcvLongPktLenErr) | ERR_MASK(RcvShortPktLenErr) | \ + ERR_MASK(RcvMaxPktLenErr) | ERR_MASK(RcvMinPktLenErr) | \ + ERR_MASK(RcvFormatErr) | ERR_MASK(RcvUnsupportedVLErr) | \ + ERR_MASK(RcvUnexpectedCharErr) | ERR_MASK(RcvEBPErr)) + +/* These are all send-related errors which we want to count for stats */ +#define E_SUM_ERRS \ + (ERR_MASK(SendPioArmLaunchErr) | \ + ERR_MASK(SendUnexpectedPktNumErr) | \ + ERR_MASK(SendDroppedDataPktErr) | \ + ERR_MASK(SendDroppedSmpPktErr) | \ + ERR_MASK(SendMaxPktLenErr) | ERR_MASK(SendUnsupportedVLErr) | \ + ERR_MASK(SendMinPktLenErr) | ERR_MASK(SendPktLenErr) | \ + ERR_MASK(InvalidAddrErr)) + +/* + * this is similar to E_SUM_ERRS, but can't ignore armlaunch, don't ignore + * errors not related to freeze and cancelling buffers. Can't ignore + * armlaunch because could get more while still cleaning up, and need + * to cancel those as they happen. + */ +#define E_SPKT_ERRS_IGNORE \ + (ERR_MASK(SendDroppedDataPktErr) | \ + ERR_MASK(SendDroppedSmpPktErr) | \ + ERR_MASK(SendMaxPktLenErr) | ERR_MASK(SendMinPktLenErr) | \ + ERR_MASK(SendPktLenErr)) + +/* + * these are errors that can occur when the link changes state while + * a packet is being sent or received. This doesn't cover things + * like EBP or VCRC that can be the result of a sending having the + * link change state, so we receive a "known bad" packet. + */ +#define E_SUM_LINK_PKTERRS \ + (ERR_MASK(SendDroppedDataPktErr) | \ + ERR_MASK(SendDroppedSmpPktErr) | \ + ERR_MASK(SendMinPktLenErr) | ERR_MASK(SendPktLenErr) | \ + ERR_MASK(RcvShortPktLenErr) | ERR_MASK(RcvMinPktLenErr) | \ + ERR_MASK(RcvUnexpectedCharErr)) + +static void qib_6120_put_tid_2(struct qib_devdata *, u64 __iomem *, + u32, unsigned long); + +/* + * On platforms using this chip, and not having ordered WC stores, we + * can get TXE parity errors due to speculative reads to the PIO buffers, + * and this, due to a chip issue can result in (many) false parity error + * reports. So it's a debug print on those, and an info print on systems + * where the speculative reads don't occur. + */ +static void qib_6120_txe_recover(struct qib_devdata *dd) +{ + if (!qib_unordered_wc()) + qib_devinfo(dd->pcidev, + "Recovering from TXE PIO parity error\n"); +} + +/* enable/disable chip from delivering interrupts */ +static void qib_6120_set_intr_state(struct qib_devdata *dd, u32 enable) +{ + if (enable) { + if (dd->flags & QIB_BADINTR) + return; + qib_write_kreg(dd, kr_intmask, ~0ULL); + /* force re-interrupt of any pending interrupts. */ + qib_write_kreg(dd, kr_intclear, 0ULL); + } else + qib_write_kreg(dd, kr_intmask, 0ULL); +} + +/* + * Try to cleanup as much as possible for anything that might have gone + * wrong while in freeze mode, such as pio buffers being written by user + * processes (causing armlaunch), send errors due to going into freeze mode, + * etc., and try to avoid causing extra interrupts while doing so. + * Forcibly update the in-memory pioavail register copies after cleanup + * because the chip won't do it while in freeze mode (the register values + * themselves are kept correct). + * Make sure that we don't lose any important interrupts by using the chip + * feature that says that writing 0 to a bit in *clear that is set in + * *status will cause an interrupt to be generated again (if allowed by + * the *mask value). + * This is in chip-specific code because of all of the register accesses, + * even though the details are similar on most chips + */ +static void qib_6120_clear_freeze(struct qib_devdata *dd) +{ + /* disable error interrupts, to avoid confusion */ + qib_write_kreg(dd, kr_errmask, 0ULL); + + /* also disable interrupts; errormask is sometimes overwriten */ + qib_6120_set_intr_state(dd, 0); + + qib_cancel_sends(dd->pport); + + /* clear the freeze, and be sure chip saw it */ + qib_write_kreg(dd, kr_control, dd->control); + qib_read_kreg32(dd, kr_scratch); + + /* force in-memory update now we are out of freeze */ + qib_force_pio_avail_update(dd); + + /* + * force new interrupt if any hwerr, error or interrupt bits are + * still set, and clear "safe" send packet errors related to freeze + * and cancelling sends. Re-enable error interrupts before possible + * force of re-interrupt on pending interrupts. + */ + qib_write_kreg(dd, kr_hwerrclear, 0ULL); + qib_write_kreg(dd, kr_errclear, E_SPKT_ERRS_IGNORE); + qib_write_kreg(dd, kr_errmask, dd->cspec->errormask); + qib_6120_set_intr_state(dd, 1); +} + +/** + * qib_handle_6120_hwerrors - display hardware errors. + * @dd: the qlogic_ib device + * @msg: the output buffer + * @msgl: the size of the output buffer + * + * Use same msg buffer as regular errors to avoid excessive stack + * use. Most hardware errors are catastrophic, but for right now, + * we'll print them and continue. Reuse the same message buffer as + * handle_6120_errors() to avoid excessive stack usage. + */ +static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg, + size_t msgl) +{ + u64 hwerrs; + u32 bits, ctrl; + int isfatal = 0; + char *bitsmsg; + int log_idx; + + hwerrs = qib_read_kreg64(dd, kr_hwerrstatus); + if (!hwerrs) + return; + if (hwerrs == ~0ULL) { + qib_dev_err(dd, + "Read of hardware error status failed (all bits set); ignoring\n"); + return; + } + qib_stats.sps_hwerrs++; + + /* Always clear the error status register, except MEMBISTFAIL, + * regardless of whether we continue or stop using the chip. + * We want that set so we know it failed, even across driver reload. + * We'll still ignore it in the hwerrmask. We do this partly for + * diagnostics, but also for support */ + qib_write_kreg(dd, kr_hwerrclear, + hwerrs & ~HWE_MASK(PowerOnBISTFailed)); + + hwerrs &= dd->cspec->hwerrmask; + + /* We log some errors to EEPROM, check if we have any of those. */ + for (log_idx = 0; log_idx < QIB_EEP_LOG_CNT; ++log_idx) + if (hwerrs & dd->eep_st_masks[log_idx].hwerrs_to_log) + qib_inc_eeprom_err(dd, log_idx, 1); + + /* + * Make sure we get this much out, unless told to be quiet, + * or it's occurred within the last 5 seconds. + */ + if (hwerrs & ~(TXE_PIO_PARITY | RXEMEMPARITYERR_EAGERTID)) + qib_devinfo(dd->pcidev, + "Hardware error: hwerr=0x%llx (cleared)\n", + (unsigned long long) hwerrs); + + if (hwerrs & ~IB_HWE_BITSEXTANT) + qib_dev_err(dd, + "hwerror interrupt with unknown errors %llx set\n", + (unsigned long long)(hwerrs & ~IB_HWE_BITSEXTANT)); + + ctrl = qib_read_kreg32(dd, kr_control); + if ((ctrl & QLOGIC_IB_C_FREEZEMODE) && !dd->diag_client) { + /* + * Parity errors in send memory are recoverable, + * just cancel the send (if indicated in * sendbuffererror), + * count the occurrence, unfreeze (if no other handled + * hardware error bits are set), and continue. They can + * occur if a processor speculative read is done to the PIO + * buffer while we are sending a packet, for example. + */ + if (hwerrs & TXE_PIO_PARITY) { + qib_6120_txe_recover(dd); + hwerrs &= ~TXE_PIO_PARITY; + } + + if (!hwerrs) { + static u32 freeze_cnt; + + freeze_cnt++; + qib_6120_clear_freeze(dd); + } else + isfatal = 1; + } + + *msg = '\0'; + + if (hwerrs & HWE_MASK(PowerOnBISTFailed)) { + isfatal = 1; + strlcat(msg, + "[Memory BIST test failed, InfiniPath hardware unusable]", + msgl); + /* ignore from now on, so disable until driver reloaded */ + dd->cspec->hwerrmask &= ~HWE_MASK(PowerOnBISTFailed); + qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); + } + + qib_format_hwerrors(hwerrs, qib_6120_hwerror_msgs, + ARRAY_SIZE(qib_6120_hwerror_msgs), msg, msgl); + + bitsmsg = dd->cspec->bitsmsgbuf; + if (hwerrs & (QLOGIC_IB_HWE_PCIEMEMPARITYERR_MASK << + QLOGIC_IB_HWE_PCIEMEMPARITYERR_SHIFT)) { + bits = (u32) ((hwerrs >> + QLOGIC_IB_HWE_PCIEMEMPARITYERR_SHIFT) & + QLOGIC_IB_HWE_PCIEMEMPARITYERR_MASK); + snprintf(bitsmsg, sizeof dd->cspec->bitsmsgbuf, + "[PCIe Mem Parity Errs %x] ", bits); + strlcat(msg, bitsmsg, msgl); + } + + if (hwerrs & _QIB_PLL_FAIL) { + isfatal = 1; + snprintf(bitsmsg, sizeof dd->cspec->bitsmsgbuf, + "[PLL failed (%llx), InfiniPath hardware unusable]", + (unsigned long long) hwerrs & _QIB_PLL_FAIL); + strlcat(msg, bitsmsg, msgl); + /* ignore from now on, so disable until driver reloaded */ + dd->cspec->hwerrmask &= ~(hwerrs & _QIB_PLL_FAIL); + qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); + } + + if (hwerrs & QLOGIC_IB_HWE_SERDESPLLFAILED) { + /* + * If it occurs, it is left masked since the external + * interface is unused + */ + dd->cspec->hwerrmask &= ~QLOGIC_IB_HWE_SERDESPLLFAILED; + qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); + } + + if (hwerrs) + /* + * if any set that we aren't ignoring; only + * make the complaint once, in case it's stuck + * or recurring, and we get here multiple + * times. + */ + qib_dev_err(dd, "%s hardware error\n", msg); + else + *msg = 0; /* recovered from all of them */ + + if (isfatal && !dd->diag_client) { + qib_dev_err(dd, + "Fatal Hardware Error, no longer usable, SN %.16s\n", + dd->serial); + /* + * for /sys status file and user programs to print; if no + * trailing brace is copied, we'll know it was truncated. + */ + if (dd->freezemsg) + snprintf(dd->freezemsg, dd->freezelen, + "{%s}", msg); + qib_disable_after_error(dd); + } +} + +/* + * Decode the error status into strings, deciding whether to always + * print * it or not depending on "normal packet errors" vs everything + * else. Return 1 if "real" errors, otherwise 0 if only packet + * errors, so caller can decide what to print with the string. + */ +static int qib_decode_6120_err(struct qib_devdata *dd, char *buf, size_t blen, + u64 err) +{ + int iserr = 1; + + *buf = '\0'; + if (err & QLOGIC_IB_E_PKTERRS) { + if (!(err & ~QLOGIC_IB_E_PKTERRS)) + iserr = 0; + if ((err & ERR_MASK(RcvICRCErr)) && + !(err&(ERR_MASK(RcvVCRCErr)|ERR_MASK(RcvEBPErr)))) + strlcat(buf, "CRC ", blen); + if (!iserr) + goto done; + } + if (err & ERR_MASK(RcvHdrLenErr)) + strlcat(buf, "rhdrlen ", blen); + if (err & ERR_MASK(RcvBadTidErr)) + strlcat(buf, "rbadtid ", blen); + if (err & ERR_MASK(RcvBadVersionErr)) + strlcat(buf, "rbadversion ", blen); + if (err & ERR_MASK(RcvHdrErr)) + strlcat(buf, "rhdr ", blen); + if (err & ERR_MASK(RcvLongPktLenErr)) + strlcat(buf, "rlongpktlen ", blen); + if (err & ERR_MASK(RcvMaxPktLenErr)) + strlcat(buf, "rmaxpktlen ", blen); + if (err & ERR_MASK(RcvMinPktLenErr)) + strlcat(buf, "rminpktlen ", blen); + if (err & ERR_MASK(SendMinPktLenErr)) + strlcat(buf, "sminpktlen ", blen); + if (err & ERR_MASK(RcvFormatErr)) + strlcat(buf, "rformaterr ", blen); + if (err & ERR_MASK(RcvUnsupportedVLErr)) + strlcat(buf, "runsupvl ", blen); + if (err & ERR_MASK(RcvUnexpectedCharErr)) + strlcat(buf, "runexpchar ", blen); + if (err & ERR_MASK(RcvIBFlowErr)) + strlcat(buf, "ribflow ", blen); + if (err & ERR_MASK(SendUnderRunErr)) + strlcat(buf, "sunderrun ", blen); + if (err & ERR_MASK(SendPioArmLaunchErr)) + strlcat(buf, "spioarmlaunch ", blen); + if (err & ERR_MASK(SendUnexpectedPktNumErr)) + strlcat(buf, "sunexperrpktnum ", blen); + if (err & ERR_MASK(SendDroppedSmpPktErr)) + strlcat(buf, "sdroppedsmppkt ", blen); + if (err & ERR_MASK(SendMaxPktLenErr)) + strlcat(buf, "smaxpktlen ", blen); + if (err & ERR_MASK(SendUnsupportedVLErr)) + strlcat(buf, "sunsupVL ", blen); + if (err & ERR_MASK(InvalidAddrErr)) + strlcat(buf, "invalidaddr ", blen); + if (err & ERR_MASK(RcvEgrFullErr)) + strlcat(buf, "rcvegrfull ", blen); + if (err & ERR_MASK(RcvHdrFullErr)) + strlcat(buf, "rcvhdrfull ", blen); + if (err & ERR_MASK(IBStatusChanged)) + strlcat(buf, "ibcstatuschg ", blen); + if (err & ERR_MASK(RcvIBLostLinkErr)) + strlcat(buf, "riblostlink ", blen); + if (err & ERR_MASK(HardwareErr)) + strlcat(buf, "hardware ", blen); + if (err & ERR_MASK(ResetNegated)) + strlcat(buf, "reset ", blen); +done: + return iserr; +} + +/* + * Called when we might have an error that is specific to a particular + * PIO buffer, and may need to cancel that buffer, so it can be re-used. + */ +static void qib_disarm_6120_senderrbufs(struct qib_pportdata *ppd) +{ + unsigned long sbuf[2]; + struct qib_devdata *dd = ppd->dd; + + /* + * It's possible that sendbuffererror could have bits set; might + * have already done this as a result of hardware error handling. + */ + sbuf[0] = qib_read_kreg64(dd, kr_sendbuffererror); + sbuf[1] = qib_read_kreg64(dd, kr_sendbuffererror + 1); + + if (sbuf[0] || sbuf[1]) + qib_disarm_piobufs_set(dd, sbuf, + dd->piobcnt2k + dd->piobcnt4k); +} + +static int chk_6120_linkrecovery(struct qib_devdata *dd, u64 ibcs) +{ + int ret = 1; + u32 ibstate = qib_6120_iblink_state(ibcs); + u32 linkrecov = read_6120_creg32(dd, cr_iblinkerrrecov); + + if (linkrecov != dd->cspec->lastlinkrecov) { + /* and no more until active again */ + dd->cspec->lastlinkrecov = 0; + qib_set_linkstate(dd->pport, QIB_IB_LINKDOWN); + ret = 0; + } + if (ibstate == IB_PORT_ACTIVE) + dd->cspec->lastlinkrecov = + read_6120_creg32(dd, cr_iblinkerrrecov); + return ret; +} + +static void handle_6120_errors(struct qib_devdata *dd, u64 errs) +{ + char *msg; + u64 ignore_this_time = 0; + u64 iserr = 0; + int log_idx; + struct qib_pportdata *ppd = dd->pport; + u64 mask; + + /* don't report errors that are masked */ + errs &= dd->cspec->errormask; + msg = dd->cspec->emsgbuf; + + /* do these first, they are most important */ + if (errs & ERR_MASK(HardwareErr)) + qib_handle_6120_hwerrors(dd, msg, sizeof dd->cspec->emsgbuf); + else + for (log_idx = 0; log_idx < QIB_EEP_LOG_CNT; ++log_idx) + if (errs & dd->eep_st_masks[log_idx].errs_to_log) + qib_inc_eeprom_err(dd, log_idx, 1); + + if (errs & ~IB_E_BITSEXTANT) + qib_dev_err(dd, + "error interrupt with unknown errors %llx set\n", + (unsigned long long) (errs & ~IB_E_BITSEXTANT)); + + if (errs & E_SUM_ERRS) { + qib_disarm_6120_senderrbufs(ppd); + if ((errs & E_SUM_LINK_PKTERRS) && + !(ppd->lflags & QIBL_LINKACTIVE)) { + /* + * This can happen when trying to bring the link + * up, but the IB link changes state at the "wrong" + * time. The IB logic then complains that the packet + * isn't valid. We don't want to confuse people, so + * we just don't print them, except at debug + */ + ignore_this_time = errs & E_SUM_LINK_PKTERRS; + } + } else if ((errs & E_SUM_LINK_PKTERRS) && + !(ppd->lflags & QIBL_LINKACTIVE)) { + /* + * This can happen when SMA is trying to bring the link + * up, but the IB link changes state at the "wrong" time. + * The IB logic then complains that the packet isn't + * valid. We don't want to confuse people, so we just + * don't print them, except at debug + */ + ignore_this_time = errs & E_SUM_LINK_PKTERRS; + } + + qib_write_kreg(dd, kr_errclear, errs); + + errs &= ~ignore_this_time; + if (!errs) + goto done; + + /* + * The ones we mask off are handled specially below + * or above. + */ + mask = ERR_MASK(IBStatusChanged) | ERR_MASK(RcvEgrFullErr) | + ERR_MASK(RcvHdrFullErr) | ERR_MASK(HardwareErr); + qib_decode_6120_err(dd, msg, sizeof dd->cspec->emsgbuf, errs & ~mask); + + if (errs & E_SUM_PKTERRS) + qib_stats.sps_rcverrs++; + if (errs & E_SUM_ERRS) + qib_stats.sps_txerrs++; + + iserr = errs & ~(E_SUM_PKTERRS | QLOGIC_IB_E_PKTERRS); + + if (errs & ERR_MASK(IBStatusChanged)) { + u64 ibcs = qib_read_kreg64(dd, kr_ibcstatus); + u32 ibstate = qib_6120_iblink_state(ibcs); + int handle = 1; + + if (ibstate != IB_PORT_INIT && dd->cspec->lastlinkrecov) + handle = chk_6120_linkrecovery(dd, ibcs); + /* + * Since going into a recovery state causes the link state + * to go down and since recovery is transitory, it is better + * if we "miss" ever seeing the link training state go into + * recovery (i.e., ignore this transition for link state + * special handling purposes) without updating lastibcstat. + */ + if (handle && qib_6120_phys_portstate(ibcs) == + IB_PHYSPORTSTATE_LINK_ERR_RECOVER) + handle = 0; + if (handle) + qib_handle_e_ibstatuschanged(ppd, ibcs); + } + + if (errs & ERR_MASK(ResetNegated)) { + qib_dev_err(dd, + "Got reset, requires re-init (unload and reload driver)\n"); + dd->flags &= ~QIB_INITTED; /* needs re-init */ + /* mark as having had error */ + *dd->devstatusp |= QIB_STATUS_HWERROR; + *dd->pport->statusp &= ~QIB_STATUS_IB_CONF; + } + + if (*msg && iserr) + qib_dev_porterr(dd, ppd->port, "%s error\n", msg); + + if (ppd->state_wanted & ppd->lflags) + wake_up_interruptible(&ppd->state_wait); + + /* + * If there were hdrq or egrfull errors, wake up any processes + * waiting in poll. We used to try to check which contexts had + * the overflow, but given the cost of that and the chip reads + * to support it, it's better to just wake everybody up if we + * get an overflow; waiters can poll again if it's not them. + */ + if (errs & (ERR_MASK(RcvEgrFullErr) | ERR_MASK(RcvHdrFullErr))) { + qib_handle_urcv(dd, ~0U); + if (errs & ERR_MASK(RcvEgrFullErr)) + qib_stats.sps_buffull++; + else + qib_stats.sps_hdrfull++; + } +done: + return; +} + +/** + * qib_6120_init_hwerrors - enable hardware errors + * @dd: the qlogic_ib device + * + * now that we have finished initializing everything that might reasonably + * cause a hardware error, and cleared those errors bits as they occur, + * we can enable hardware errors in the mask (potentially enabling + * freeze mode), and enable hardware errors as errors (along with + * everything else) in errormask + */ +static void qib_6120_init_hwerrors(struct qib_devdata *dd) +{ + u64 val; + u64 extsval; + + extsval = qib_read_kreg64(dd, kr_extstatus); + + if (!(extsval & QLOGIC_IB_EXTS_MEMBIST_ENDTEST)) + qib_dev_err(dd, "MemBIST did not complete!\n"); + + /* init so all hwerrors interrupt, and enter freeze, ajdust below */ + val = ~0ULL; + if (dd->minrev < 2) { + /* + * Avoid problem with internal interface bus parity + * checking. Fixed in Rev2. + */ + val &= ~QLOGIC_IB_HWE_PCIEBUSPARITYRADM; + } + /* avoid some intel cpu's speculative read freeze mode issue */ + val &= ~TXEMEMPARITYERR_PIOBUF; + + dd->cspec->hwerrmask = val; + + qib_write_kreg(dd, kr_hwerrclear, ~HWE_MASK(PowerOnBISTFailed)); + qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); + + /* clear all */ + qib_write_kreg(dd, kr_errclear, ~0ULL); + /* enable errors that are masked, at least this first time. */ + qib_write_kreg(dd, kr_errmask, ~0ULL); + dd->cspec->errormask = qib_read_kreg64(dd, kr_errmask); + /* clear any interrupts up to this point (ints still not enabled) */ + qib_write_kreg(dd, kr_intclear, ~0ULL); + + qib_write_kreg(dd, kr_rcvbthqp, + dd->qpn_mask << (QIB_6120_RcvBTHQP_BTHQP_Mask_LSB - 1) | + QIB_KD_QP); +} + +/* + * Disable and enable the armlaunch error. Used for PIO bandwidth testing + * on chips that are count-based, rather than trigger-based. There is no + * reference counting, but that's also fine, given the intended use. + * Only chip-specific because it's all register accesses + */ +static void qib_set_6120_armlaunch(struct qib_devdata *dd, u32 enable) +{ + if (enable) { + qib_write_kreg(dd, kr_errclear, + ERR_MASK(SendPioArmLaunchErr)); + dd->cspec->errormask |= ERR_MASK(SendPioArmLaunchErr); + } else + dd->cspec->errormask &= ~ERR_MASK(SendPioArmLaunchErr); + qib_write_kreg(dd, kr_errmask, dd->cspec->errormask); +} + +/* + * Formerly took parameter <which> in pre-shifted, + * pre-merged form with LinkCmd and LinkInitCmd + * together, and assuming the zero was NOP. + */ +static void qib_set_ib_6120_lstate(struct qib_pportdata *ppd, u16 linkcmd, + u16 linitcmd) +{ + u64 mod_wd; + struct qib_devdata *dd = ppd->dd; + unsigned long flags; + + if (linitcmd == QLOGIC_IB_IBCC_LINKINITCMD_DISABLE) { + /* + * If we are told to disable, note that so link-recovery + * code does not attempt to bring us back up. + */ + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags |= QIBL_IB_LINK_DISABLED; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + } else if (linitcmd || linkcmd == QLOGIC_IB_IBCC_LINKCMD_DOWN) { + /* + * Any other linkinitcmd will lead to LINKDOWN and then + * to INIT (if all is well), so clear flag to let + * link-recovery code attempt to bring us back up. + */ + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_IB_LINK_DISABLED; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + } + + mod_wd = (linkcmd << QLOGIC_IB_IBCC_LINKCMD_SHIFT) | + (linitcmd << QLOGIC_IB_IBCC_LINKINITCMD_SHIFT); + + qib_write_kreg(dd, kr_ibcctrl, dd->cspec->ibcctrl | mod_wd); + /* write to chip to prevent back-to-back writes of control reg */ + qib_write_kreg(dd, kr_scratch, 0); +} + +/** + * qib_6120_bringup_serdes - bring up the serdes + * @dd: the qlogic_ib device + */ +static int qib_6120_bringup_serdes(struct qib_pportdata *ppd) +{ + struct qib_devdata *dd = ppd->dd; + u64 val, config1, prev_val, hwstat, ibc; + + /* Put IBC in reset, sends disabled */ + dd->control &= ~QLOGIC_IB_C_LINKENABLE; + qib_write_kreg(dd, kr_control, 0ULL); + + dd->cspec->ibdeltainprog = 1; + dd->cspec->ibsymsnap = read_6120_creg32(dd, cr_ibsymbolerr); + dd->cspec->iblnkerrsnap = read_6120_creg32(dd, cr_iblinkerrrecov); + + /* flowcontrolwatermark is in units of KBytes */ + ibc = 0x5ULL << SYM_LSB(IBCCtrl, FlowCtrlWaterMark); + /* + * How often flowctrl sent. More or less in usecs; balance against + * watermark value, so that in theory senders always get a flow + * control update in time to not let the IB link go idle. + */ + ibc |= 0x3ULL << SYM_LSB(IBCCtrl, FlowCtrlPeriod); + /* max error tolerance */ + dd->cspec->lli_thresh = 0xf; + ibc |= (u64) dd->cspec->lli_thresh << SYM_LSB(IBCCtrl, PhyerrThreshold); + /* use "real" buffer space for */ + ibc |= 4ULL << SYM_LSB(IBCCtrl, CreditScale); + /* IB credit flow control. */ + ibc |= 0xfULL << SYM_LSB(IBCCtrl, OverrunThreshold); + /* + * set initial max size pkt IBC will send, including ICRC; it's the + * PIO buffer size in dwords, less 1; also see qib_set_mtu() + */ + ibc |= ((u64)(ppd->ibmaxlen >> 2) + 1) << SYM_LSB(IBCCtrl, MaxPktLen); + dd->cspec->ibcctrl = ibc; /* without linkcmd or linkinitcmd! */ + + /* initially come up waiting for TS1, without sending anything. */ + val = dd->cspec->ibcctrl | (QLOGIC_IB_IBCC_LINKINITCMD_DISABLE << + QLOGIC_IB_IBCC_LINKINITCMD_SHIFT); + qib_write_kreg(dd, kr_ibcctrl, val); + + val = qib_read_kreg64(dd, kr_serdes_cfg0); + config1 = qib_read_kreg64(dd, kr_serdes_cfg1); + + /* + * Force reset on, also set rxdetect enable. Must do before reading + * serdesstatus at least for simulation, or some of the bits in + * serdes status will come back as undefined and cause simulation + * failures + */ + val |= SYM_MASK(SerdesCfg0, ResetPLL) | + SYM_MASK(SerdesCfg0, RxDetEnX) | + (SYM_MASK(SerdesCfg0, L1PwrDnA) | + SYM_MASK(SerdesCfg0, L1PwrDnB) | + SYM_MASK(SerdesCfg0, L1PwrDnC) | + SYM_MASK(SerdesCfg0, L1PwrDnD)); + qib_write_kreg(dd, kr_serdes_cfg0, val); + /* be sure chip saw it */ + qib_read_kreg64(dd, kr_scratch); + udelay(5); /* need pll reset set at least for a bit */ + /* + * after PLL is reset, set the per-lane Resets and TxIdle and + * clear the PLL reset and rxdetect (to get falling edge). + * Leave L1PWR bits set (permanently) + */ + val &= ~(SYM_MASK(SerdesCfg0, RxDetEnX) | + SYM_MASK(SerdesCfg0, ResetPLL) | + (SYM_MASK(SerdesCfg0, L1PwrDnA) | + SYM_MASK(SerdesCfg0, L1PwrDnB) | + SYM_MASK(SerdesCfg0, L1PwrDnC) | + SYM_MASK(SerdesCfg0, L1PwrDnD))); + val |= (SYM_MASK(SerdesCfg0, ResetA) | + SYM_MASK(SerdesCfg0, ResetB) | + SYM_MASK(SerdesCfg0, ResetC) | + SYM_MASK(SerdesCfg0, ResetD)) | + SYM_MASK(SerdesCfg0, TxIdeEnX); + qib_write_kreg(dd, kr_serdes_cfg0, val); + /* be sure chip saw it */ + (void) qib_read_kreg64(dd, kr_scratch); + /* need PLL reset clear for at least 11 usec before lane + * resets cleared; give it a few more to be sure */ + udelay(15); + val &= ~((SYM_MASK(SerdesCfg0, ResetA) | + SYM_MASK(SerdesCfg0, ResetB) | + SYM_MASK(SerdesCfg0, ResetC) | + SYM_MASK(SerdesCfg0, ResetD)) | + SYM_MASK(SerdesCfg0, TxIdeEnX)); + + qib_write_kreg(dd, kr_serdes_cfg0, val); + /* be sure chip saw it */ + (void) qib_read_kreg64(dd, kr_scratch); + + val = qib_read_kreg64(dd, kr_xgxs_cfg); + prev_val = val; + if (val & QLOGIC_IB_XGXS_RESET) + val &= ~QLOGIC_IB_XGXS_RESET; + if (SYM_FIELD(val, XGXSCfg, polarity_inv) != ppd->rx_pol_inv) { + /* need to compensate for Tx inversion in partner */ + val &= ~SYM_MASK(XGXSCfg, polarity_inv); + val |= (u64)ppd->rx_pol_inv << SYM_LSB(XGXSCfg, polarity_inv); + } + if (val != prev_val) + qib_write_kreg(dd, kr_xgxs_cfg, val); + + val = qib_read_kreg64(dd, kr_serdes_cfg0); + + /* clear current and de-emphasis bits */ + config1 &= ~0x0ffffffff00ULL; + /* set current to 20ma */ + config1 |= 0x00000000000ULL; + /* set de-emphasis to -5.68dB */ + config1 |= 0x0cccc000000ULL; + qib_write_kreg(dd, kr_serdes_cfg1, config1); + + /* base and port guid same for single port */ + ppd->guid = dd->base_guid; + + /* + * the process of setting and un-resetting the serdes normally + * causes a serdes PLL error, so check for that and clear it + * here. Also clearr hwerr bit in errstatus, but not others. + */ + hwstat = qib_read_kreg64(dd, kr_hwerrstatus); + if (hwstat) { + /* should just have PLL, clear all set, in an case */ + qib_write_kreg(dd, kr_hwerrclear, hwstat); + qib_write_kreg(dd, kr_errclear, ERR_MASK(HardwareErr)); + } + + dd->control |= QLOGIC_IB_C_LINKENABLE; + dd->control &= ~QLOGIC_IB_C_FREEZEMODE; + qib_write_kreg(dd, kr_control, dd->control); + + return 0; +} + +/** + * qib_6120_quiet_serdes - set serdes to txidle + * @ppd: physical port of the qlogic_ib device + * Called when driver is being unloaded + */ +static void qib_6120_quiet_serdes(struct qib_pportdata *ppd) +{ + struct qib_devdata *dd = ppd->dd; + u64 val; + + qib_set_ib_6120_lstate(ppd, 0, QLOGIC_IB_IBCC_LINKINITCMD_DISABLE); + + /* disable IBC */ + dd->control &= ~QLOGIC_IB_C_LINKENABLE; + qib_write_kreg(dd, kr_control, + dd->control | QLOGIC_IB_C_FREEZEMODE); + + if (dd->cspec->ibsymdelta || dd->cspec->iblnkerrdelta || + dd->cspec->ibdeltainprog) { + u64 diagc; + + /* enable counter writes */ + diagc = qib_read_kreg64(dd, kr_hwdiagctrl); + qib_write_kreg(dd, kr_hwdiagctrl, + diagc | SYM_MASK(HwDiagCtrl, CounterWrEnable)); + + if (dd->cspec->ibsymdelta || dd->cspec->ibdeltainprog) { + val = read_6120_creg32(dd, cr_ibsymbolerr); + if (dd->cspec->ibdeltainprog) + val -= val - dd->cspec->ibsymsnap; + val -= dd->cspec->ibsymdelta; + write_6120_creg(dd, cr_ibsymbolerr, val); + } + if (dd->cspec->iblnkerrdelta || dd->cspec->ibdeltainprog) { + val = read_6120_creg32(dd, cr_iblinkerrrecov); + if (dd->cspec->ibdeltainprog) + val -= val - dd->cspec->iblnkerrsnap; + val -= dd->cspec->iblnkerrdelta; + write_6120_creg(dd, cr_iblinkerrrecov, val); + } + + /* and disable counter writes */ + qib_write_kreg(dd, kr_hwdiagctrl, diagc); + } + + val = qib_read_kreg64(dd, kr_serdes_cfg0); + val |= SYM_MASK(SerdesCfg0, TxIdeEnX); + qib_write_kreg(dd, kr_serdes_cfg0, val); +} + +/** + * qib_6120_setup_setextled - set the state of the two external LEDs + * @dd: the qlogic_ib device + * @on: whether the link is up or not + * + * The exact combo of LEDs if on is true is determined by looking + * at the ibcstatus. + + * These LEDs indicate the physical and logical state of IB link. + * For this chip (at least with recommended board pinouts), LED1 + * is Yellow (logical state) and LED2 is Green (physical state), + * + * Note: We try to match the Mellanox HCA LED behavior as best + * we can. Green indicates physical link state is OK (something is + * plugged in, and we can train). + * Amber indicates the link is logically up (ACTIVE). + * Mellanox further blinks the amber LED to indicate data packet + * activity, but we have no hardware support for that, so it would + * require waking up every 10-20 msecs and checking the counters + * on the chip, and then turning the LED off if appropriate. That's + * visible overhead, so not something we will do. + * + */ +static void qib_6120_setup_setextled(struct qib_pportdata *ppd, u32 on) +{ + u64 extctl, val, lst, ltst; + unsigned long flags; + struct qib_devdata *dd = ppd->dd; + + /* + * The diags use the LED to indicate diag info, so we leave + * the external LED alone when the diags are running. + */ + if (dd->diag_client) + return; + + /* Allow override of LED display for, e.g. Locating system in rack */ + if (ppd->led_override) { + ltst = (ppd->led_override & QIB_LED_PHYS) ? + IB_PHYSPORTSTATE_LINKUP : IB_PHYSPORTSTATE_DISABLED, + lst = (ppd->led_override & QIB_LED_LOG) ? + IB_PORT_ACTIVE : IB_PORT_DOWN; + } else if (on) { + val = qib_read_kreg64(dd, kr_ibcstatus); + ltst = qib_6120_phys_portstate(val); + lst = qib_6120_iblink_state(val); + } else { + ltst = 0; + lst = 0; + } + + spin_lock_irqsave(&dd->cspec->gpio_lock, flags); + extctl = dd->cspec->extctrl & ~(SYM_MASK(EXTCtrl, LEDPriPortGreenOn) | + SYM_MASK(EXTCtrl, LEDPriPortYellowOn)); + + if (ltst == IB_PHYSPORTSTATE_LINKUP) + extctl |= SYM_MASK(EXTCtrl, LEDPriPortYellowOn); + if (lst == IB_PORT_ACTIVE) + extctl |= SYM_MASK(EXTCtrl, LEDPriPortGreenOn); + dd->cspec->extctrl = extctl; + qib_write_kreg(dd, kr_extctrl, extctl); + spin_unlock_irqrestore(&dd->cspec->gpio_lock, flags); +} + +static void qib_6120_free_irq(struct qib_devdata *dd) +{ + if (dd->cspec->irq) { + free_irq(dd->cspec->irq, dd); + dd->cspec->irq = 0; + } + qib_nomsi(dd); +} + +/** + * qib_6120_setup_cleanup - clean up any per-chip chip-specific stuff + * @dd: the qlogic_ib device + * + * This is called during driver unload. +*/ +static void qib_6120_setup_cleanup(struct qib_devdata *dd) +{ + qib_6120_free_irq(dd); + kfree(dd->cspec->cntrs); + kfree(dd->cspec->portcntrs); + if (dd->cspec->dummy_hdrq) { + dma_free_coherent(&dd->pcidev->dev, + ALIGN(dd->rcvhdrcnt * + dd->rcvhdrentsize * + sizeof(u32), PAGE_SIZE), + dd->cspec->dummy_hdrq, + dd->cspec->dummy_hdrq_phys); + dd->cspec->dummy_hdrq = NULL; + } +} + +static void qib_wantpiobuf_6120_intr(struct qib_devdata *dd, u32 needint) +{ + unsigned long flags; + + spin_lock_irqsave(&dd->sendctrl_lock, flags); + if (needint) + dd->sendctrl |= SYM_MASK(SendCtrl, PIOIntBufAvail); + else + dd->sendctrl &= ~SYM_MASK(SendCtrl, PIOIntBufAvail); + qib_write_kreg(dd, kr_sendctrl, dd->sendctrl); + qib_write_kreg(dd, kr_scratch, 0ULL); + spin_unlock_irqrestore(&dd->sendctrl_lock, flags); +} + +/* + * handle errors and unusual events first, separate function + * to improve cache hits for fast path interrupt handling + */ +static noinline void unlikely_6120_intr(struct qib_devdata *dd, u64 istat) +{ + if (unlikely(istat & ~QLOGIC_IB_I_BITSEXTANT)) + qib_dev_err(dd, "interrupt with unknown interrupts %Lx set\n", + istat & ~QLOGIC_IB_I_BITSEXTANT); + + if (istat & QLOGIC_IB_I_ERROR) { + u64 estat = 0; + + qib_stats.sps_errints++; + estat = qib_read_kreg64(dd, kr_errstatus); + if (!estat) + qib_devinfo(dd->pcidev, + "error interrupt (%Lx), but no error bits set!\n", + istat); + handle_6120_errors(dd, estat); + } + + if (istat & QLOGIC_IB_I_GPIO) { + u32 gpiostatus; + u32 to_clear = 0; + + /* + * GPIO_3..5 on IBA6120 Rev2 chips indicate + * errors that we need to count. + */ + gpiostatus = qib_read_kreg32(dd, kr_gpio_status); + /* First the error-counter case. */ + if (gpiostatus & GPIO_ERRINTR_MASK) { + /* want to clear the bits we see asserted. */ + to_clear |= (gpiostatus & GPIO_ERRINTR_MASK); + + /* + * Count appropriately, clear bits out of our copy, + * as they have been "handled". + */ + if (gpiostatus & (1 << GPIO_RXUVL_BIT)) + dd->cspec->rxfc_unsupvl_errs++; + if (gpiostatus & (1 << GPIO_OVRUN_BIT)) + dd->cspec->overrun_thresh_errs++; + if (gpiostatus & (1 << GPIO_LLI_BIT)) + dd->cspec->lli_errs++; + gpiostatus &= ~GPIO_ERRINTR_MASK; + } + if (gpiostatus) { + /* + * Some unexpected bits remain. If they could have + * caused the interrupt, complain and clear. + * To avoid repetition of this condition, also clear + * the mask. It is almost certainly due to error. + */ + const u32 mask = qib_read_kreg32(dd, kr_gpio_mask); + + /* + * Also check that the chip reflects our shadow, + * and report issues, If they caused the interrupt. + * we will suppress by refreshing from the shadow. + */ + if (mask & gpiostatus) { + to_clear |= (gpiostatus & mask); + dd->cspec->gpio_mask &= ~(gpiostatus & mask); + qib_write_kreg(dd, kr_gpio_mask, + dd->cspec->gpio_mask); + } + } + if (to_clear) + qib_write_kreg(dd, kr_gpio_clear, (u64) to_clear); + } +} + +static irqreturn_t qib_6120intr(int irq, void *data) +{ + struct qib_devdata *dd = data; + irqreturn_t ret; + u32 istat, ctxtrbits, rmask, crcs = 0; + unsigned i; + + if ((dd->flags & (QIB_PRESENT | QIB_BADINTR)) != QIB_PRESENT) { + /* + * This return value is not great, but we do not want the + * interrupt core code to remove our interrupt handler + * because we don't appear to be handling an interrupt + * during a chip reset. + */ + ret = IRQ_HANDLED; + goto bail; + } + + istat = qib_read_kreg32(dd, kr_intstatus); + + if (unlikely(!istat)) { + ret = IRQ_NONE; /* not our interrupt, or already handled */ + goto bail; + } + if (unlikely(istat == -1)) { + qib_bad_intrstatus(dd); + /* don't know if it was our interrupt or not */ + ret = IRQ_NONE; + goto bail; + } + + this_cpu_inc(*dd->int_counter); + + if (unlikely(istat & (~QLOGIC_IB_I_BITSEXTANT | + QLOGIC_IB_I_GPIO | QLOGIC_IB_I_ERROR))) + unlikely_6120_intr(dd, istat); + + /* + * Clear the interrupt bits we found set, relatively early, so we + * "know" know the chip will have seen this by the time we process + * the queue, and will re-interrupt if necessary. The processor + * itself won't take the interrupt again until we return. + */ + qib_write_kreg(dd, kr_intclear, istat); + + /* + * Handle kernel receive queues before checking for pio buffers + * available since receives can overflow; piobuf waiters can afford + * a few extra cycles, since they were waiting anyway. + */ + ctxtrbits = istat & + ((QLOGIC_IB_I_RCVAVAIL_MASK << QLOGIC_IB_I_RCVAVAIL_SHIFT) | + (QLOGIC_IB_I_RCVURG_MASK << QLOGIC_IB_I_RCVURG_SHIFT)); + if (ctxtrbits) { + rmask = (1U << QLOGIC_IB_I_RCVAVAIL_SHIFT) | + (1U << QLOGIC_IB_I_RCVURG_SHIFT); + for (i = 0; i < dd->first_user_ctxt; i++) { + if (ctxtrbits & rmask) { + ctxtrbits &= ~rmask; + crcs += qib_kreceive(dd->rcd[i], + &dd->cspec->lli_counter, + NULL); + } + rmask <<= 1; + } + if (crcs) { + u32 cntr = dd->cspec->lli_counter; + cntr += crcs; + if (cntr) { + if (cntr > dd->cspec->lli_thresh) { + dd->cspec->lli_counter = 0; + dd->cspec->lli_errs++; + } else + dd->cspec->lli_counter += cntr; + } + } + + + if (ctxtrbits) { + ctxtrbits = + (ctxtrbits >> QLOGIC_IB_I_RCVAVAIL_SHIFT) | + (ctxtrbits >> QLOGIC_IB_I_RCVURG_SHIFT); + qib_handle_urcv(dd, ctxtrbits); + } + } + + if ((istat & QLOGIC_IB_I_SPIOBUFAVAIL) && (dd->flags & QIB_INITTED)) + qib_ib_piobufavail(dd); + + ret = IRQ_HANDLED; +bail: + return ret; +} + +/* + * Set up our chip-specific interrupt handler + * The interrupt type has already been setup, so + * we just need to do the registration and error checking. + */ +static void qib_setup_6120_interrupt(struct qib_devdata *dd) +{ + /* + * If the chip supports added error indication via GPIO pins, + * enable interrupts on those bits so the interrupt routine + * can count the events. Also set flag so interrupt routine + * can know they are expected. + */ + if (SYM_FIELD(dd->revision, Revision_R, + ChipRevMinor) > 1) { + /* Rev2+ reports extra errors via internal GPIO pins */ + dd->cspec->gpio_mask |= GPIO_ERRINTR_MASK; + qib_write_kreg(dd, kr_gpio_mask, dd->cspec->gpio_mask); + } + + if (!dd->cspec->irq) + qib_dev_err(dd, + "irq is 0, BIOS error? Interrupts won't work\n"); + else { + int ret; + ret = request_irq(dd->cspec->irq, qib_6120intr, 0, + QIB_DRV_NAME, dd); + if (ret) + qib_dev_err(dd, + "Couldn't setup interrupt (irq=%d): %d\n", + dd->cspec->irq, ret); + } +} + +/** + * pe_boardname - fill in the board name + * @dd: the qlogic_ib device + * + * info is based on the board revision register + */ +static void pe_boardname(struct qib_devdata *dd) +{ + char *n; + u32 boardid, namelen; + + boardid = SYM_FIELD(dd->revision, Revision, + BoardID); + + switch (boardid) { + case 2: + n = "InfiniPath_QLE7140"; + break; + default: + qib_dev_err(dd, "Unknown 6120 board with ID %u\n", boardid); + n = "Unknown_InfiniPath_6120"; + break; + } + namelen = strlen(n) + 1; + dd->boardname = kmalloc(namelen, GFP_KERNEL); + if (!dd->boardname) + qib_dev_err(dd, "Failed allocation for board name: %s\n", n); + else + snprintf(dd->boardname, namelen, "%s", n); + + if (dd->majrev != 4 || !dd->minrev || dd->minrev > 2) + qib_dev_err(dd, + "Unsupported InfiniPath hardware revision %u.%u!\n", + dd->majrev, dd->minrev); + + snprintf(dd->boardversion, sizeof(dd->boardversion), + "ChipABI %u.%u, %s, InfiniPath%u %u.%u, SW Compat %u\n", + QIB_CHIP_VERS_MAJ, QIB_CHIP_VERS_MIN, dd->boardname, + (unsigned)SYM_FIELD(dd->revision, Revision_R, Arch), + dd->majrev, dd->minrev, + (unsigned)SYM_FIELD(dd->revision, Revision_R, SW)); + +} + +/* + * This routine sleeps, so it can only be called from user context, not + * from interrupt context. If we need interrupt context, we can split + * it into two routines. + */ +static int qib_6120_setup_reset(struct qib_devdata *dd) +{ + u64 val; + int i; + int ret; + u16 cmdval; + u8 int_line, clinesz; + + qib_pcie_getcmd(dd, &cmdval, &int_line, &clinesz); + + /* Use ERROR so it shows up in logs, etc. */ + qib_dev_err(dd, "Resetting InfiniPath unit %u\n", dd->unit); + + /* no interrupts till re-initted */ + qib_6120_set_intr_state(dd, 0); + + dd->cspec->ibdeltainprog = 0; + dd->cspec->ibsymdelta = 0; + dd->cspec->iblnkerrdelta = 0; + + /* + * Keep chip from being accessed until we are ready. Use + * writeq() directly, to allow the write even though QIB_PRESENT + * isn't set. + */ + dd->flags &= ~(QIB_INITTED | QIB_PRESENT); + /* so we check interrupts work again */ + dd->z_int_counter = qib_int_counter(dd); + val = dd->control | QLOGIC_IB_C_RESET; + writeq(val, &dd->kregbase[kr_control]); + mb(); /* prevent compiler re-ordering around actual reset */ + + for (i = 1; i <= 5; i++) { + /* + * Allow MBIST, etc. to complete; longer on each retry. + * We sometimes get machine checks from bus timeout if no + * response, so for now, make it *really* long. + */ + msleep(1000 + (1 + i) * 2000); + + qib_pcie_reenable(dd, cmdval, int_line, clinesz); + + /* + * Use readq directly, so we don't need to mark it as PRESENT + * until we get a successful indication that all is well. + */ + val = readq(&dd->kregbase[kr_revision]); + if (val == dd->revision) { + dd->flags |= QIB_PRESENT; /* it's back */ + ret = qib_reinit_intr(dd); + goto bail; + } + } + ret = 0; /* failed */ + +bail: + if (ret) { + if (qib_pcie_params(dd, dd->lbus_width, NULL, NULL)) + qib_dev_err(dd, + "Reset failed to setup PCIe or interrupts; continuing anyway\n"); + /* clear the reset error, init error/hwerror mask */ + qib_6120_init_hwerrors(dd); + /* for Rev2 error interrupts; nop for rev 1 */ + qib_write_kreg(dd, kr_gpio_mask, dd->cspec->gpio_mask); + /* clear the reset error, init error/hwerror mask */ + qib_6120_init_hwerrors(dd); + } + return ret; +} + +/** + * qib_6120_put_tid - write a TID in chip + * @dd: the qlogic_ib device + * @tidptr: pointer to the expected TID (in chip) to update + * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) + * for expected + * @pa: physical address of in memory buffer; tidinvalid if freeing + * + * This exists as a separate routine to allow for special locking etc. + * It's used for both the full cleanup on exit, as well as the normal + * setup and teardown. + */ +static void qib_6120_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, + u32 type, unsigned long pa) +{ + u32 __iomem *tidp32 = (u32 __iomem *)tidptr; + unsigned long flags; + int tidx; + spinlock_t *tidlockp; /* select appropriate spinlock */ + + if (!dd->kregbase) + return; + + if (pa != dd->tidinvalid) { + if (pa & ((1U << 11) - 1)) { + qib_dev_err(dd, "Physaddr %lx not 2KB aligned!\n", + pa); + return; + } + pa >>= 11; + if (pa & ~QLOGIC_IB_RT_ADDR_MASK) { + qib_dev_err(dd, + "Physical page address 0x%lx larger than supported\n", + pa); + return; + } + + if (type == RCVHQ_RCV_TYPE_EAGER) + pa |= dd->tidtemplate; + else /* for now, always full 4KB page */ + pa |= 2 << 29; + } + + /* + * Avoid chip issue by writing the scratch register + * before and after the TID, and with an io write barrier. + * We use a spinlock around the writes, so they can't intermix + * with other TID (eager or expected) writes (the chip problem + * is triggered by back to back TID writes). Unfortunately, this + * call can be done from interrupt level for the ctxt 0 eager TIDs, + * so we have to use irqsave locks. + */ + /* + * Assumes tidptr always > egrtidbase + * if type == RCVHQ_RCV_TYPE_EAGER. + */ + tidx = tidptr - dd->egrtidbase; + + tidlockp = (type == RCVHQ_RCV_TYPE_EAGER && tidx < dd->rcvhdrcnt) + ? &dd->cspec->kernel_tid_lock : &dd->cspec->user_tid_lock; + spin_lock_irqsave(tidlockp, flags); + qib_write_kreg(dd, kr_scratch, 0xfeeddeaf); + writel(pa, tidp32); + qib_write_kreg(dd, kr_scratch, 0xdeadbeef); + mmiowb(); + spin_unlock_irqrestore(tidlockp, flags); +} + +/** + * qib_6120_put_tid_2 - write a TID in chip, Revision 2 or higher + * @dd: the qlogic_ib device + * @tidptr: pointer to the expected TID (in chip) to update + * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) + * for expected + * @pa: physical address of in memory buffer; tidinvalid if freeing + * + * This exists as a separate routine to allow for selection of the + * appropriate "flavor". The static calls in cleanup just use the + * revision-agnostic form, as they are not performance critical. + */ +static void qib_6120_put_tid_2(struct qib_devdata *dd, u64 __iomem *tidptr, + u32 type, unsigned long pa) +{ + u32 __iomem *tidp32 = (u32 __iomem *)tidptr; + u32 tidx; + + if (!dd->kregbase) + return; + + if (pa != dd->tidinvalid) { + if (pa & ((1U << 11) - 1)) { + qib_dev_err(dd, "Physaddr %lx not 2KB aligned!\n", + pa); + return; + } + pa >>= 11; + if (pa & ~QLOGIC_IB_RT_ADDR_MASK) { + qib_dev_err(dd, + "Physical page address 0x%lx larger than supported\n", + pa); + return; + } + + if (type == RCVHQ_RCV_TYPE_EAGER) + pa |= dd->tidtemplate; + else /* for now, always full 4KB page */ + pa |= 2 << 29; + } + tidx = tidptr - dd->egrtidbase; + writel(pa, tidp32); + mmiowb(); +} + + +/** + * qib_6120_clear_tids - clear all TID entries for a context, expected and eager + * @dd: the qlogic_ib device + * @ctxt: the context + * + * clear all TID entries for a context, expected and eager. + * Used from qib_close(). On this chip, TIDs are only 32 bits, + * not 64, but they are still on 64 bit boundaries, so tidbase + * is declared as u64 * for the pointer math, even though we write 32 bits + */ +static void qib_6120_clear_tids(struct qib_devdata *dd, + struct qib_ctxtdata *rcd) +{ + u64 __iomem *tidbase; + unsigned long tidinv; + u32 ctxt; + int i; + + if (!dd->kregbase || !rcd) + return; + + ctxt = rcd->ctxt; + + tidinv = dd->tidinvalid; + tidbase = (u64 __iomem *) + ((char __iomem *)(dd->kregbase) + + dd->rcvtidbase + + ctxt * dd->rcvtidcnt * sizeof(*tidbase)); + + for (i = 0; i < dd->rcvtidcnt; i++) + /* use func pointer because could be one of two funcs */ + dd->f_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED, + tidinv); + + tidbase = (u64 __iomem *) + ((char __iomem *)(dd->kregbase) + + dd->rcvegrbase + + rcd->rcvegr_tid_base * sizeof(*tidbase)); + + for (i = 0; i < rcd->rcvegrcnt; i++) + /* use func pointer because could be one of two funcs */ + dd->f_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EAGER, + tidinv); +} + +/** + * qib_6120_tidtemplate - setup constants for TID updates + * @dd: the qlogic_ib device + * + * We setup stuff that we use a lot, to avoid calculating each time + */ +static void qib_6120_tidtemplate(struct qib_devdata *dd) +{ + u32 egrsize = dd->rcvegrbufsize; + + /* + * For now, we always allocate 4KB buffers (at init) so we can + * receive max size packets. We may want a module parameter to + * specify 2KB or 4KB and/or make be per ctxt instead of per device + * for those who want to reduce memory footprint. Note that the + * rcvhdrentsize size must be large enough to hold the largest + * IB header (currently 96 bytes) that we expect to handle (plus of + * course the 2 dwords of RHF). + */ + if (egrsize == 2048) + dd->tidtemplate = 1U << 29; + else if (egrsize == 4096) + dd->tidtemplate = 2U << 29; + dd->tidinvalid = 0; +} + +int __attribute__((weak)) qib_unordered_wc(void) +{ + return 0; +} + +/** + * qib_6120_get_base_info - set chip-specific flags for user code + * @rcd: the qlogic_ib ctxt + * @kbase: qib_base_info pointer + * + * We set the PCIE flag because the lower bandwidth on PCIe vs + * HyperTransport can affect some user packet algorithms. + */ +static int qib_6120_get_base_info(struct qib_ctxtdata *rcd, + struct qib_base_info *kinfo) +{ + if (qib_unordered_wc()) + kinfo->spi_runtime_flags |= QIB_RUNTIME_FORCE_WC_ORDER; + + kinfo->spi_runtime_flags |= QIB_RUNTIME_PCIE | + QIB_RUNTIME_FORCE_PIOAVAIL | QIB_RUNTIME_PIO_REGSWAPPED; + return 0; +} + + +static struct qib_message_header * +qib_6120_get_msgheader(struct qib_devdata *dd, __le32 *rhf_addr) +{ + return (struct qib_message_header *) + &rhf_addr[sizeof(u64) / sizeof(u32)]; +} + +static void qib_6120_config_ctxts(struct qib_devdata *dd) +{ + dd->ctxtcnt = qib_read_kreg32(dd, kr_portcnt); + if (qib_n_krcv_queues > 1) { + dd->first_user_ctxt = qib_n_krcv_queues * dd->num_pports; + if (dd->first_user_ctxt > dd->ctxtcnt) + dd->first_user_ctxt = dd->ctxtcnt; + dd->qpn_mask = dd->first_user_ctxt <= 2 ? 2 : 6; + } else + dd->first_user_ctxt = dd->num_pports; + dd->n_krcv_queues = dd->first_user_ctxt; +} + +static void qib_update_6120_usrhead(struct qib_ctxtdata *rcd, u64 hd, + u32 updegr, u32 egrhd, u32 npkts) +{ + if (updegr) + qib_write_ureg(rcd->dd, ur_rcvegrindexhead, egrhd, rcd->ctxt); + mmiowb(); + qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt); + mmiowb(); +} + +static u32 qib_6120_hdrqempty(struct qib_ctxtdata *rcd) +{ + u32 head, tail; + + head = qib_read_ureg32(rcd->dd, ur_rcvhdrhead, rcd->ctxt); + if (rcd->rcvhdrtail_kvaddr) + tail = qib_get_rcvhdrtail(rcd); + else + tail = qib_read_ureg32(rcd->dd, ur_rcvhdrtail, rcd->ctxt); + return head == tail; +} + +/* + * Used when we close any ctxt, for DMA already in flight + * at close. Can't be done until we know hdrq size, so not + * early in chip init. + */ +static void alloc_dummy_hdrq(struct qib_devdata *dd) +{ + dd->cspec->dummy_hdrq = dma_alloc_coherent(&dd->pcidev->dev, + dd->rcd[0]->rcvhdrq_size, + &dd->cspec->dummy_hdrq_phys, + GFP_ATOMIC | __GFP_COMP); + if (!dd->cspec->dummy_hdrq) { + qib_devinfo(dd->pcidev, "Couldn't allocate dummy hdrq\n"); + /* fallback to just 0'ing */ + dd->cspec->dummy_hdrq_phys = 0UL; + } +} + +/* + * Modify the RCVCTRL register in chip-specific way. This + * is a function because bit positions and (future) register + * location is chip-specific, but the needed operations are + * generic. <op> is a bit-mask because we often want to + * do multiple modifications. + */ +static void rcvctrl_6120_mod(struct qib_pportdata *ppd, unsigned int op, + int ctxt) +{ + struct qib_devdata *dd = ppd->dd; + u64 mask, val; + unsigned long flags; + + spin_lock_irqsave(&dd->cspec->rcvmod_lock, flags); + + if (op & QIB_RCVCTRL_TAILUPD_ENB) + dd->rcvctrl |= (1ULL << QLOGIC_IB_R_TAILUPD_SHIFT); + if (op & QIB_RCVCTRL_TAILUPD_DIS) + dd->rcvctrl &= ~(1ULL << QLOGIC_IB_R_TAILUPD_SHIFT); + if (op & QIB_RCVCTRL_PKEY_ENB) + dd->rcvctrl &= ~(1ULL << IBA6120_R_PKEY_DIS_SHIFT); + if (op & QIB_RCVCTRL_PKEY_DIS) + dd->rcvctrl |= (1ULL << IBA6120_R_PKEY_DIS_SHIFT); + if (ctxt < 0) + mask = (1ULL << dd->ctxtcnt) - 1; + else + mask = (1ULL << ctxt); + if (op & QIB_RCVCTRL_CTXT_ENB) { + /* always done for specific ctxt */ + dd->rcvctrl |= (mask << SYM_LSB(RcvCtrl, PortEnable)); + if (!(dd->flags & QIB_NODMA_RTAIL)) + dd->rcvctrl |= 1ULL << QLOGIC_IB_R_TAILUPD_SHIFT; + /* Write these registers before the context is enabled. */ + qib_write_kreg_ctxt(dd, kr_rcvhdrtailaddr, ctxt, + dd->rcd[ctxt]->rcvhdrqtailaddr_phys); + qib_write_kreg_ctxt(dd, kr_rcvhdraddr, ctxt, + dd->rcd[ctxt]->rcvhdrq_phys); + + if (ctxt == 0 && !dd->cspec->dummy_hdrq) + alloc_dummy_hdrq(dd); + } + if (op & QIB_RCVCTRL_CTXT_DIS) + dd->rcvctrl &= ~(mask << SYM_LSB(RcvCtrl, PortEnable)); + if (op & QIB_RCVCTRL_INTRAVAIL_ENB) + dd->rcvctrl |= (mask << QLOGIC_IB_R_INTRAVAIL_SHIFT); + if (op & QIB_RCVCTRL_INTRAVAIL_DIS) + dd->rcvctrl &= ~(mask << QLOGIC_IB_R_INTRAVAIL_SHIFT); + qib_write_kreg(dd, kr_rcvctrl, dd->rcvctrl); + if ((op & QIB_RCVCTRL_INTRAVAIL_ENB) && dd->rhdrhead_intr_off) { + /* arm rcv interrupt */ + val = qib_read_ureg32(dd, ur_rcvhdrhead, ctxt) | + dd->rhdrhead_intr_off; + qib_write_ureg(dd, ur_rcvhdrhead, val, ctxt); + } + if (op & QIB_RCVCTRL_CTXT_ENB) { + /* + * Init the context registers also; if we were + * disabled, tail and head should both be zero + * already from the enable, but since we don't + * know, we have to do it explicitly. + */ + val = qib_read_ureg32(dd, ur_rcvegrindextail, ctxt); + qib_write_ureg(dd, ur_rcvegrindexhead, val, ctxt); + + val = qib_read_ureg32(dd, ur_rcvhdrtail, ctxt); + dd->rcd[ctxt]->head = val; + /* If kctxt, interrupt on next receive. */ + if (ctxt < dd->first_user_ctxt) + val |= dd->rhdrhead_intr_off; + qib_write_ureg(dd, ur_rcvhdrhead, val, ctxt); + } + if (op & QIB_RCVCTRL_CTXT_DIS) { + /* + * Be paranoid, and never write 0's to these, just use an + * unused page. Of course, + * rcvhdraddr points to a large chunk of memory, so this + * could still trash things, but at least it won't trash + * page 0, and by disabling the ctxt, it should stop "soon", + * even if a packet or two is in already in flight after we + * disabled the ctxt. Only 6120 has this issue. + */ + if (ctxt >= 0) { + qib_write_kreg_ctxt(dd, kr_rcvhdrtailaddr, ctxt, + dd->cspec->dummy_hdrq_phys); + qib_write_kreg_ctxt(dd, kr_rcvhdraddr, ctxt, + dd->cspec->dummy_hdrq_phys); + } else { + unsigned i; + + for (i = 0; i < dd->cfgctxts; i++) { + qib_write_kreg_ctxt(dd, kr_rcvhdrtailaddr, + i, dd->cspec->dummy_hdrq_phys); + qib_write_kreg_ctxt(dd, kr_rcvhdraddr, + i, dd->cspec->dummy_hdrq_phys); + } + } + } + spin_unlock_irqrestore(&dd->cspec->rcvmod_lock, flags); +} + +/* + * Modify the SENDCTRL register in chip-specific way. This + * is a function there may be multiple such registers with + * slightly different layouts. Only operations actually used + * are implemented yet. + * Chip requires no back-back sendctrl writes, so write + * scratch register after writing sendctrl + */ +static void sendctrl_6120_mod(struct qib_pportdata *ppd, u32 op) +{ + struct qib_devdata *dd = ppd->dd; + u64 tmp_dd_sendctrl; + unsigned long flags; + + spin_lock_irqsave(&dd->sendctrl_lock, flags); + + /* First the ones that are "sticky", saved in shadow */ + if (op & QIB_SENDCTRL_CLEAR) + dd->sendctrl = 0; + if (op & QIB_SENDCTRL_SEND_DIS) + dd->sendctrl &= ~SYM_MASK(SendCtrl, PIOEnable); + else if (op & QIB_SENDCTRL_SEND_ENB) + dd->sendctrl |= SYM_MASK(SendCtrl, PIOEnable); + if (op & QIB_SENDCTRL_AVAIL_DIS) + dd->sendctrl &= ~SYM_MASK(SendCtrl, PIOBufAvailUpd); + else if (op & QIB_SENDCTRL_AVAIL_ENB) + dd->sendctrl |= SYM_MASK(SendCtrl, PIOBufAvailUpd); + + if (op & QIB_SENDCTRL_DISARM_ALL) { + u32 i, last; + + tmp_dd_sendctrl = dd->sendctrl; + /* + * disarm any that are not yet launched, disabling sends + * and updates until done. + */ + last = dd->piobcnt2k + dd->piobcnt4k; + tmp_dd_sendctrl &= + ~(SYM_MASK(SendCtrl, PIOEnable) | + SYM_MASK(SendCtrl, PIOBufAvailUpd)); + for (i = 0; i < last; i++) { + qib_write_kreg(dd, kr_sendctrl, tmp_dd_sendctrl | + SYM_MASK(SendCtrl, Disarm) | i); + qib_write_kreg(dd, kr_scratch, 0); + } + } + + tmp_dd_sendctrl = dd->sendctrl; + + if (op & QIB_SENDCTRL_FLUSH) + tmp_dd_sendctrl |= SYM_MASK(SendCtrl, Abort); + if (op & QIB_SENDCTRL_DISARM) + tmp_dd_sendctrl |= SYM_MASK(SendCtrl, Disarm) | + ((op & QIB_6120_SendCtrl_DisarmPIOBuf_RMASK) << + SYM_LSB(SendCtrl, DisarmPIOBuf)); + if (op & QIB_SENDCTRL_AVAIL_BLIP) + tmp_dd_sendctrl &= ~SYM_MASK(SendCtrl, PIOBufAvailUpd); + + qib_write_kreg(dd, kr_sendctrl, tmp_dd_sendctrl); + qib_write_kreg(dd, kr_scratch, 0); + + if (op & QIB_SENDCTRL_AVAIL_BLIP) { + qib_write_kreg(dd, kr_sendctrl, dd->sendctrl); + qib_write_kreg(dd, kr_scratch, 0); + } + + spin_unlock_irqrestore(&dd->sendctrl_lock, flags); + + if (op & QIB_SENDCTRL_FLUSH) { + u32 v; + /* + * ensure writes have hit chip, then do a few + * more reads, to allow DMA of pioavail registers + * to occur, so in-memory copy is in sync with + * the chip. Not always safe to sleep. + */ + v = qib_read_kreg32(dd, kr_scratch); + qib_write_kreg(dd, kr_scratch, v); + v = qib_read_kreg32(dd, kr_scratch); + qib_write_kreg(dd, kr_scratch, v); + qib_read_kreg32(dd, kr_scratch); + } +} + +/** + * qib_portcntr_6120 - read a per-port counter + * @dd: the qlogic_ib device + * @creg: the counter to snapshot + */ +static u64 qib_portcntr_6120(struct qib_pportdata *ppd, u32 reg) +{ + u64 ret = 0ULL; + struct qib_devdata *dd = ppd->dd; + u16 creg; + /* 0xffff for unimplemented or synthesized counters */ + static const u16 xlator[] = { + [QIBPORTCNTR_PKTSEND] = cr_pktsend, + [QIBPORTCNTR_WORDSEND] = cr_wordsend, + [QIBPORTCNTR_PSXMITDATA] = 0xffff, + [QIBPORTCNTR_PSXMITPKTS] = 0xffff, + [QIBPORTCNTR_PSXMITWAIT] = 0xffff, + [QIBPORTCNTR_SENDSTALL] = cr_sendstall, + [QIBPORTCNTR_PKTRCV] = cr_pktrcv, + [QIBPORTCNTR_PSRCVDATA] = 0xffff, + [QIBPORTCNTR_PSRCVPKTS] = 0xffff, + [QIBPORTCNTR_RCVEBP] = cr_rcvebp, + [QIBPORTCNTR_RCVOVFL] = cr_rcvovfl, + [QIBPORTCNTR_WORDRCV] = cr_wordrcv, + [QIBPORTCNTR_RXDROPPKT] = cr_rxdroppkt, + [QIBPORTCNTR_RXLOCALPHYERR] = 0xffff, + [QIBPORTCNTR_RXVLERR] = 0xffff, + [QIBPORTCNTR_ERRICRC] = cr_erricrc, + [QIBPORTCNTR_ERRVCRC] = cr_errvcrc, + [QIBPORTCNTR_ERRLPCRC] = cr_errlpcrc, + [QIBPORTCNTR_BADFORMAT] = cr_badformat, + [QIBPORTCNTR_ERR_RLEN] = cr_err_rlen, + [QIBPORTCNTR_IBSYMBOLERR] = cr_ibsymbolerr, + [QIBPORTCNTR_INVALIDRLEN] = cr_invalidrlen, + [QIBPORTCNTR_UNSUPVL] = cr_txunsupvl, + [QIBPORTCNTR_EXCESSBUFOVFL] = 0xffff, + [QIBPORTCNTR_ERRLINK] = cr_errlink, + [QIBPORTCNTR_IBLINKDOWN] = cr_iblinkdown, + [QIBPORTCNTR_IBLINKERRRECOV] = cr_iblinkerrrecov, + [QIBPORTCNTR_LLI] = 0xffff, + [QIBPORTCNTR_PSINTERVAL] = 0xffff, + [QIBPORTCNTR_PSSTART] = 0xffff, + [QIBPORTCNTR_PSSTAT] = 0xffff, + [QIBPORTCNTR_VL15PKTDROP] = 0xffff, + [QIBPORTCNTR_ERRPKEY] = cr_errpkey, + [QIBPORTCNTR_KHDROVFL] = 0xffff, + }; + + if (reg >= ARRAY_SIZE(xlator)) { + qib_devinfo(ppd->dd->pcidev, + "Unimplemented portcounter %u\n", reg); + goto done; + } + creg = xlator[reg]; + + /* handle counters requests not implemented as chip counters */ + if (reg == QIBPORTCNTR_LLI) + ret = dd->cspec->lli_errs; + else if (reg == QIBPORTCNTR_EXCESSBUFOVFL) + ret = dd->cspec->overrun_thresh_errs; + else if (reg == QIBPORTCNTR_KHDROVFL) { + int i; + + /* sum over all kernel contexts */ + for (i = 0; i < dd->first_user_ctxt; i++) + ret += read_6120_creg32(dd, cr_portovfl + i); + } else if (reg == QIBPORTCNTR_PSSTAT) + ret = dd->cspec->pma_sample_status; + if (creg == 0xffff) + goto done; + + /* + * only fast incrementing counters are 64bit; use 32 bit reads to + * avoid two independent reads when on opteron + */ + if (creg == cr_wordsend || creg == cr_wordrcv || + creg == cr_pktsend || creg == cr_pktrcv) + ret = read_6120_creg(dd, creg); + else + ret = read_6120_creg32(dd, creg); + if (creg == cr_ibsymbolerr) { + if (dd->cspec->ibdeltainprog) + ret -= ret - dd->cspec->ibsymsnap; + ret -= dd->cspec->ibsymdelta; + } else if (creg == cr_iblinkerrrecov) { + if (dd->cspec->ibdeltainprog) + ret -= ret - dd->cspec->iblnkerrsnap; + ret -= dd->cspec->iblnkerrdelta; + } + if (reg == QIBPORTCNTR_RXDROPPKT) /* add special cased count */ + ret += dd->cspec->rxfc_unsupvl_errs; + +done: + return ret; +} + +/* + * Device counter names (not port-specific), one line per stat, + * single string. Used by utilities like ipathstats to print the stats + * in a way which works for different versions of drivers, without changing + * the utility. Names need to be 12 chars or less (w/o newline), for proper + * display by utility. + * Non-error counters are first. + * Start of "error" conters is indicated by a leading "E " on the first + * "error" counter, and doesn't count in label length. + * The EgrOvfl list needs to be last so we truncate them at the configured + * context count for the device. + * cntr6120indices contains the corresponding register indices. + */ +static const char cntr6120names[] = + "Interrupts\n" + "HostBusStall\n" + "E RxTIDFull\n" + "RxTIDInvalid\n" + "Ctxt0EgrOvfl\n" + "Ctxt1EgrOvfl\n" + "Ctxt2EgrOvfl\n" + "Ctxt3EgrOvfl\n" + "Ctxt4EgrOvfl\n"; + +static const size_t cntr6120indices[] = { + cr_lbint, + cr_lbflowstall, + cr_errtidfull, + cr_errtidvalid, + cr_portovfl + 0, + cr_portovfl + 1, + cr_portovfl + 2, + cr_portovfl + 3, + cr_portovfl + 4, +}; + +/* + * same as cntr6120names and cntr6120indices, but for port-specific counters. + * portcntr6120indices is somewhat complicated by some registers needing + * adjustments of various kinds, and those are ORed with _PORT_VIRT_FLAG + */ +static const char portcntr6120names[] = + "TxPkt\n" + "TxFlowPkt\n" + "TxWords\n" + "RxPkt\n" + "RxFlowPkt\n" + "RxWords\n" + "TxFlowStall\n" + "E IBStatusChng\n" + "IBLinkDown\n" + "IBLnkRecov\n" + "IBRxLinkErr\n" + "IBSymbolErr\n" + "RxLLIErr\n" + "RxBadFormat\n" + "RxBadLen\n" + "RxBufOvrfl\n" + "RxEBP\n" + "RxFlowCtlErr\n" + "RxICRCerr\n" + "RxLPCRCerr\n" + "RxVCRCerr\n" + "RxInvalLen\n" + "RxInvalPKey\n" + "RxPktDropped\n" + "TxBadLength\n" + "TxDropped\n" + "TxInvalLen\n" + "TxUnderrun\n" + "TxUnsupVL\n" + ; + +#define _PORT_VIRT_FLAG 0x8000 /* "virtual", need adjustments */ +static const size_t portcntr6120indices[] = { + QIBPORTCNTR_PKTSEND | _PORT_VIRT_FLAG, + cr_pktsendflow, + QIBPORTCNTR_WORDSEND | _PORT_VIRT_FLAG, + QIBPORTCNTR_PKTRCV | _PORT_VIRT_FLAG, + cr_pktrcvflowctrl, + QIBPORTCNTR_WORDRCV | _PORT_VIRT_FLAG, + QIBPORTCNTR_SENDSTALL | _PORT_VIRT_FLAG, + cr_ibstatuschange, + QIBPORTCNTR_IBLINKDOWN | _PORT_VIRT_FLAG, + QIBPORTCNTR_IBLINKERRRECOV | _PORT_VIRT_FLAG, + QIBPORTCNTR_ERRLINK | _PORT_VIRT_FLAG, + QIBPORTCNTR_IBSYMBOLERR | _PORT_VIRT_FLAG, + QIBPORTCNTR_LLI | _PORT_VIRT_FLAG, + QIBPORTCNTR_BADFORMAT | _PORT_VIRT_FLAG, + QIBPORTCNTR_ERR_RLEN | _PORT_VIRT_FLAG, + QIBPORTCNTR_RCVOVFL | _PORT_VIRT_FLAG, + QIBPORTCNTR_RCVEBP | _PORT_VIRT_FLAG, + cr_rcvflowctrl_err, + QIBPORTCNTR_ERRICRC | _PORT_VIRT_FLAG, + QIBPORTCNTR_ERRLPCRC | _PORT_VIRT_FLAG, + QIBPORTCNTR_ERRVCRC | _PORT_VIRT_FLAG, + QIBPORTCNTR_INVALIDRLEN | _PORT_VIRT_FLAG, + QIBPORTCNTR_ERRPKEY | _PORT_VIRT_FLAG, + QIBPORTCNTR_RXDROPPKT | _PORT_VIRT_FLAG, + cr_invalidslen, + cr_senddropped, + cr_errslen, + cr_sendunderrun, + cr_txunsupvl, +}; + +/* do all the setup to make the counter reads efficient later */ +static void init_6120_cntrnames(struct qib_devdata *dd) +{ + int i, j = 0; + char *s; + + for (i = 0, s = (char *)cntr6120names; s && j <= dd->cfgctxts; + i++) { + /* we always have at least one counter before the egrovfl */ + if (!j && !strncmp("Ctxt0EgrOvfl", s + 1, 12)) + j = 1; + s = strchr(s + 1, '\n'); + if (s && j) + j++; + } + dd->cspec->ncntrs = i; + if (!s) + /* full list; size is without terminating null */ + dd->cspec->cntrnamelen = sizeof(cntr6120names) - 1; + else + dd->cspec->cntrnamelen = 1 + s - cntr6120names; + dd->cspec->cntrs = kmalloc(dd->cspec->ncntrs + * sizeof(u64), GFP_KERNEL); + if (!dd->cspec->cntrs) + qib_dev_err(dd, "Failed allocation for counters\n"); + + for (i = 0, s = (char *)portcntr6120names; s; i++) + s = strchr(s + 1, '\n'); + dd->cspec->nportcntrs = i - 1; + dd->cspec->portcntrnamelen = sizeof(portcntr6120names) - 1; + dd->cspec->portcntrs = kmalloc(dd->cspec->nportcntrs + * sizeof(u64), GFP_KERNEL); + if (!dd->cspec->portcntrs) + qib_dev_err(dd, "Failed allocation for portcounters\n"); +} + +static u32 qib_read_6120cntrs(struct qib_devdata *dd, loff_t pos, char **namep, + u64 **cntrp) +{ + u32 ret; + + if (namep) { + ret = dd->cspec->cntrnamelen; + if (pos >= ret) + ret = 0; /* final read after getting everything */ + else + *namep = (char *)cntr6120names; + } else { + u64 *cntr = dd->cspec->cntrs; + int i; + + ret = dd->cspec->ncntrs * sizeof(u64); + if (!cntr || pos >= ret) { + /* everything read, or couldn't get memory */ + ret = 0; + goto done; + } + if (pos >= ret) { + ret = 0; /* final read after getting everything */ + goto done; + } + *cntrp = cntr; + for (i = 0; i < dd->cspec->ncntrs; i++) + *cntr++ = read_6120_creg32(dd, cntr6120indices[i]); + } +done: + return ret; +} + +static u32 qib_read_6120portcntrs(struct qib_devdata *dd, loff_t pos, u32 port, + char **namep, u64 **cntrp) +{ + u32 ret; + + if (namep) { + ret = dd->cspec->portcntrnamelen; + if (pos >= ret) + ret = 0; /* final read after getting everything */ + else + *namep = (char *)portcntr6120names; + } else { + u64 *cntr = dd->cspec->portcntrs; + struct qib_pportdata *ppd = &dd->pport[port]; + int i; + + ret = dd->cspec->nportcntrs * sizeof(u64); + if (!cntr || pos >= ret) { + /* everything read, or couldn't get memory */ + ret = 0; + goto done; + } + *cntrp = cntr; + for (i = 0; i < dd->cspec->nportcntrs; i++) { + if (portcntr6120indices[i] & _PORT_VIRT_FLAG) + *cntr++ = qib_portcntr_6120(ppd, + portcntr6120indices[i] & + ~_PORT_VIRT_FLAG); + else + *cntr++ = read_6120_creg32(dd, + portcntr6120indices[i]); + } + } +done: + return ret; +} + +static void qib_chk_6120_errormask(struct qib_devdata *dd) +{ + static u32 fixed; + u32 ctrl; + unsigned long errormask; + unsigned long hwerrs; + + if (!dd->cspec->errormask || !(dd->flags & QIB_INITTED)) + return; + + errormask = qib_read_kreg64(dd, kr_errmask); + + if (errormask == dd->cspec->errormask) + return; + fixed++; + + hwerrs = qib_read_kreg64(dd, kr_hwerrstatus); + ctrl = qib_read_kreg32(dd, kr_control); + + qib_write_kreg(dd, kr_errmask, + dd->cspec->errormask); + + if ((hwerrs & dd->cspec->hwerrmask) || + (ctrl & QLOGIC_IB_C_FREEZEMODE)) { + qib_write_kreg(dd, kr_hwerrclear, 0ULL); + qib_write_kreg(dd, kr_errclear, 0ULL); + /* force re-interrupt of pending events, just in case */ + qib_write_kreg(dd, kr_intclear, 0ULL); + qib_devinfo(dd->pcidev, + "errormask fixed(%u) %lx->%lx, ctrl %x hwerr %lx\n", + fixed, errormask, (unsigned long)dd->cspec->errormask, + ctrl, hwerrs); + } +} + +/** + * qib_get_faststats - get word counters from chip before they overflow + * @opaque - contains a pointer to the qlogic_ib device qib_devdata + * + * This needs more work; in particular, decision on whether we really + * need traffic_wds done the way it is + * called from add_timer + */ +static void qib_get_6120_faststats(unsigned long opaque) +{ + struct qib_devdata *dd = (struct qib_devdata *) opaque; + struct qib_pportdata *ppd = dd->pport; + unsigned long flags; + u64 traffic_wds; + + /* + * don't access the chip while running diags, or memory diags can + * fail + */ + if (!(dd->flags & QIB_INITTED) || dd->diag_client) + /* but re-arm the timer, for diags case; won't hurt other */ + goto done; + + /* + * We now try to maintain an activity timer, based on traffic + * exceeding a threshold, so we need to check the word-counts + * even if they are 64-bit. + */ + traffic_wds = qib_portcntr_6120(ppd, cr_wordsend) + + qib_portcntr_6120(ppd, cr_wordrcv); + spin_lock_irqsave(&dd->eep_st_lock, flags); + traffic_wds -= dd->traffic_wds; + dd->traffic_wds += traffic_wds; + if (traffic_wds >= QIB_TRAFFIC_ACTIVE_THRESHOLD) + atomic_add(5, &dd->active_time); /* S/B #define */ + spin_unlock_irqrestore(&dd->eep_st_lock, flags); + + qib_chk_6120_errormask(dd); +done: + mod_timer(&dd->stats_timer, jiffies + HZ * ACTIVITY_TIMER); +} + +/* no interrupt fallback for these chips */ +static int qib_6120_nointr_fallback(struct qib_devdata *dd) +{ + return 0; +} + +/* + * reset the XGXS (between serdes and IBC). Slightly less intrusive + * than resetting the IBC or external link state, and useful in some + * cases to cause some retraining. To do this right, we reset IBC + * as well. + */ +static void qib_6120_xgxs_reset(struct qib_pportdata *ppd) +{ + u64 val, prev_val; + struct qib_devdata *dd = ppd->dd; + + prev_val = qib_read_kreg64(dd, kr_xgxs_cfg); + val = prev_val | QLOGIC_IB_XGXS_RESET; + prev_val &= ~QLOGIC_IB_XGXS_RESET; /* be sure */ + qib_write_kreg(dd, kr_control, + dd->control & ~QLOGIC_IB_C_LINKENABLE); + qib_write_kreg(dd, kr_xgxs_cfg, val); + qib_read_kreg32(dd, kr_scratch); + qib_write_kreg(dd, kr_xgxs_cfg, prev_val); + qib_write_kreg(dd, kr_control, dd->control); +} + +static int qib_6120_get_ib_cfg(struct qib_pportdata *ppd, int which) +{ + int ret; + + switch (which) { + case QIB_IB_CFG_LWID: + ret = ppd->link_width_active; + break; + + case QIB_IB_CFG_SPD: + ret = ppd->link_speed_active; + break; + + case QIB_IB_CFG_LWID_ENB: + ret = ppd->link_width_enabled; + break; + + case QIB_IB_CFG_SPD_ENB: + ret = ppd->link_speed_enabled; + break; + + case QIB_IB_CFG_OP_VLS: + ret = ppd->vls_operational; + break; + + case QIB_IB_CFG_VL_HIGH_CAP: + ret = 0; + break; + + case QIB_IB_CFG_VL_LOW_CAP: + ret = 0; + break; + + case QIB_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */ + ret = SYM_FIELD(ppd->dd->cspec->ibcctrl, IBCCtrl, + OverrunThreshold); + break; + + case QIB_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */ + ret = SYM_FIELD(ppd->dd->cspec->ibcctrl, IBCCtrl, + PhyerrThreshold); + break; + + case QIB_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */ + /* will only take effect when the link state changes */ + ret = (ppd->dd->cspec->ibcctrl & + SYM_MASK(IBCCtrl, LinkDownDefaultState)) ? + IB_LINKINITCMD_SLEEP : IB_LINKINITCMD_POLL; + break; + + case QIB_IB_CFG_HRTBT: /* Get Heartbeat off/enable/auto */ + ret = 0; /* no heartbeat on this chip */ + break; + + case QIB_IB_CFG_PMA_TICKS: + ret = 250; /* 1 usec. */ + break; + + default: + ret = -EINVAL; + break; + } + return ret; +} + +/* + * We assume range checking is already done, if needed. + */ +static int qib_6120_set_ib_cfg(struct qib_pportdata *ppd, int which, u32 val) +{ + struct qib_devdata *dd = ppd->dd; + int ret = 0; + u64 val64; + u16 lcmd, licmd; + + switch (which) { + case QIB_IB_CFG_LWID_ENB: + ppd->link_width_enabled = val; + break; + + case QIB_IB_CFG_SPD_ENB: + ppd->link_speed_enabled = val; + break; + + case QIB_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */ + val64 = SYM_FIELD(dd->cspec->ibcctrl, IBCCtrl, + OverrunThreshold); + if (val64 != val) { + dd->cspec->ibcctrl &= + ~SYM_MASK(IBCCtrl, OverrunThreshold); + dd->cspec->ibcctrl |= (u64) val << + SYM_LSB(IBCCtrl, OverrunThreshold); + qib_write_kreg(dd, kr_ibcctrl, dd->cspec->ibcctrl); + qib_write_kreg(dd, kr_scratch, 0); + } + break; + + case QIB_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */ + val64 = SYM_FIELD(dd->cspec->ibcctrl, IBCCtrl, + PhyerrThreshold); + if (val64 != val) { + dd->cspec->ibcctrl &= + ~SYM_MASK(IBCCtrl, PhyerrThreshold); + dd->cspec->ibcctrl |= (u64) val << + SYM_LSB(IBCCtrl, PhyerrThreshold); + qib_write_kreg(dd, kr_ibcctrl, dd->cspec->ibcctrl); + qib_write_kreg(dd, kr_scratch, 0); + } + break; + + case QIB_IB_CFG_PKEYS: /* update pkeys */ + val64 = (u64) ppd->pkeys[0] | ((u64) ppd->pkeys[1] << 16) | + ((u64) ppd->pkeys[2] << 32) | + ((u64) ppd->pkeys[3] << 48); + qib_write_kreg(dd, kr_partitionkey, val64); + break; + + case QIB_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */ + /* will only take effect when the link state changes */ + if (val == IB_LINKINITCMD_POLL) + dd->cspec->ibcctrl &= + ~SYM_MASK(IBCCtrl, LinkDownDefaultState); + else /* SLEEP */ + dd->cspec->ibcctrl |= + SYM_MASK(IBCCtrl, LinkDownDefaultState); + qib_write_kreg(dd, kr_ibcctrl, dd->cspec->ibcctrl); + qib_write_kreg(dd, kr_scratch, 0); + break; + + case QIB_IB_CFG_MTU: /* update the MTU in IBC */ + /* + * Update our housekeeping variables, and set IBC max + * size, same as init code; max IBC is max we allow in + * buffer, less the qword pbc, plus 1 for ICRC, in dwords + * Set even if it's unchanged, print debug message only + * on changes. + */ + val = (ppd->ibmaxlen >> 2) + 1; + dd->cspec->ibcctrl &= ~SYM_MASK(IBCCtrl, MaxPktLen); + dd->cspec->ibcctrl |= (u64)val << + SYM_LSB(IBCCtrl, MaxPktLen); + qib_write_kreg(dd, kr_ibcctrl, dd->cspec->ibcctrl); + qib_write_kreg(dd, kr_scratch, 0); + break; + + case QIB_IB_CFG_LSTATE: /* set the IB link state */ + switch (val & 0xffff0000) { + case IB_LINKCMD_DOWN: + lcmd = QLOGIC_IB_IBCC_LINKCMD_DOWN; + if (!dd->cspec->ibdeltainprog) { + dd->cspec->ibdeltainprog = 1; + dd->cspec->ibsymsnap = + read_6120_creg32(dd, cr_ibsymbolerr); + dd->cspec->iblnkerrsnap = + read_6120_creg32(dd, cr_iblinkerrrecov); + } + break; + + case IB_LINKCMD_ARMED: + lcmd = QLOGIC_IB_IBCC_LINKCMD_ARMED; + break; + + case IB_LINKCMD_ACTIVE: + lcmd = QLOGIC_IB_IBCC_LINKCMD_ACTIVE; + break; + + default: + ret = -EINVAL; + qib_dev_err(dd, "bad linkcmd req 0x%x\n", val >> 16); + goto bail; + } + switch (val & 0xffff) { + case IB_LINKINITCMD_NOP: + licmd = 0; + break; + + case IB_LINKINITCMD_POLL: + licmd = QLOGIC_IB_IBCC_LINKINITCMD_POLL; + break; + + case IB_LINKINITCMD_SLEEP: + licmd = QLOGIC_IB_IBCC_LINKINITCMD_SLEEP; + break; + + case IB_LINKINITCMD_DISABLE: + licmd = QLOGIC_IB_IBCC_LINKINITCMD_DISABLE; + break; + + default: + ret = -EINVAL; + qib_dev_err(dd, "bad linkinitcmd req 0x%x\n", + val & 0xffff); + goto bail; + } + qib_set_ib_6120_lstate(ppd, lcmd, licmd); + goto bail; + + case QIB_IB_CFG_HRTBT: + ret = -EINVAL; + break; + + default: + ret = -EINVAL; + } +bail: + return ret; +} + +static int qib_6120_set_loopback(struct qib_pportdata *ppd, const char *what) +{ + int ret = 0; + if (!strncmp(what, "ibc", 3)) { + ppd->dd->cspec->ibcctrl |= SYM_MASK(IBCCtrl, Loopback); + qib_devinfo(ppd->dd->pcidev, "Enabling IB%u:%u IBC loopback\n", + ppd->dd->unit, ppd->port); + } else if (!strncmp(what, "off", 3)) { + ppd->dd->cspec->ibcctrl &= ~SYM_MASK(IBCCtrl, Loopback); + qib_devinfo(ppd->dd->pcidev, + "Disabling IB%u:%u IBC loopback (normal)\n", + ppd->dd->unit, ppd->port); + } else + ret = -EINVAL; + if (!ret) { + qib_write_kreg(ppd->dd, kr_ibcctrl, ppd->dd->cspec->ibcctrl); + qib_write_kreg(ppd->dd, kr_scratch, 0); + } + return ret; +} + +static void pma_6120_timer(unsigned long data) +{ + struct qib_pportdata *ppd = (struct qib_pportdata *)data; + struct qib_chip_specific *cs = ppd->dd->cspec; + struct qib_ibport *ibp = &ppd->ibport_data; + unsigned long flags; + + spin_lock_irqsave(&ibp->lock, flags); + if (cs->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED) { + cs->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING; + qib_snapshot_counters(ppd, &cs->sword, &cs->rword, + &cs->spkts, &cs->rpkts, &cs->xmit_wait); + mod_timer(&cs->pma_timer, + jiffies + usecs_to_jiffies(ibp->pma_sample_interval)); + } else if (cs->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) { + u64 ta, tb, tc, td, te; + + cs->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE; + qib_snapshot_counters(ppd, &ta, &tb, &tc, &td, &te); + + cs->sword = ta - cs->sword; + cs->rword = tb - cs->rword; + cs->spkts = tc - cs->spkts; + cs->rpkts = td - cs->rpkts; + cs->xmit_wait = te - cs->xmit_wait; + } + spin_unlock_irqrestore(&ibp->lock, flags); +} + +/* + * Note that the caller has the ibp->lock held. + */ +static void qib_set_cntr_6120_sample(struct qib_pportdata *ppd, u32 intv, + u32 start) +{ + struct qib_chip_specific *cs = ppd->dd->cspec; + + if (start && intv) { + cs->pma_sample_status = IB_PMA_SAMPLE_STATUS_STARTED; + mod_timer(&cs->pma_timer, jiffies + usecs_to_jiffies(start)); + } else if (intv) { + cs->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING; + qib_snapshot_counters(ppd, &cs->sword, &cs->rword, + &cs->spkts, &cs->rpkts, &cs->xmit_wait); + mod_timer(&cs->pma_timer, jiffies + usecs_to_jiffies(intv)); + } else { + cs->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE; + cs->sword = 0; + cs->rword = 0; + cs->spkts = 0; + cs->rpkts = 0; + cs->xmit_wait = 0; + } +} + +static u32 qib_6120_iblink_state(u64 ibcs) +{ + u32 state = (u32)SYM_FIELD(ibcs, IBCStatus, LinkState); + + switch (state) { + case IB_6120_L_STATE_INIT: + state = IB_PORT_INIT; + break; + case IB_6120_L_STATE_ARM: + state = IB_PORT_ARMED; + break; + case IB_6120_L_STATE_ACTIVE: + /* fall through */ + case IB_6120_L_STATE_ACT_DEFER: + state = IB_PORT_ACTIVE; + break; + default: /* fall through */ + case IB_6120_L_STATE_DOWN: + state = IB_PORT_DOWN; + break; + } + return state; +} + +/* returns the IBTA port state, rather than the IBC link training state */ +static u8 qib_6120_phys_portstate(u64 ibcs) +{ + u8 state = (u8)SYM_FIELD(ibcs, IBCStatus, LinkTrainingState); + return qib_6120_physportstate[state]; +} + +static int qib_6120_ib_updown(struct qib_pportdata *ppd, int ibup, u64 ibcs) +{ + unsigned long flags; + + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_IB_FORCE_NOTIFY; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + + if (ibup) { + if (ppd->dd->cspec->ibdeltainprog) { + ppd->dd->cspec->ibdeltainprog = 0; + ppd->dd->cspec->ibsymdelta += + read_6120_creg32(ppd->dd, cr_ibsymbolerr) - + ppd->dd->cspec->ibsymsnap; + ppd->dd->cspec->iblnkerrdelta += + read_6120_creg32(ppd->dd, cr_iblinkerrrecov) - + ppd->dd->cspec->iblnkerrsnap; + } + qib_hol_init(ppd); + } else { + ppd->dd->cspec->lli_counter = 0; + if (!ppd->dd->cspec->ibdeltainprog) { + ppd->dd->cspec->ibdeltainprog = 1; + ppd->dd->cspec->ibsymsnap = + read_6120_creg32(ppd->dd, cr_ibsymbolerr); + ppd->dd->cspec->iblnkerrsnap = + read_6120_creg32(ppd->dd, cr_iblinkerrrecov); + } + qib_hol_down(ppd); + } + + qib_6120_setup_setextled(ppd, ibup); + + return 0; +} + +/* Does read/modify/write to appropriate registers to + * set output and direction bits selected by mask. + * these are in their canonical postions (e.g. lsb of + * dir will end up in D48 of extctrl on existing chips). + * returns contents of GP Inputs. + */ +static int gpio_6120_mod(struct qib_devdata *dd, u32 out, u32 dir, u32 mask) +{ + u64 read_val, new_out; + unsigned long flags; + + if (mask) { + /* some bits being written, lock access to GPIO */ + dir &= mask; + out &= mask; + spin_lock_irqsave(&dd->cspec->gpio_lock, flags); + dd->cspec->extctrl &= ~((u64)mask << SYM_LSB(EXTCtrl, GPIOOe)); + dd->cspec->extctrl |= ((u64) dir << SYM_LSB(EXTCtrl, GPIOOe)); + new_out = (dd->cspec->gpio_out & ~mask) | out; + + qib_write_kreg(dd, kr_extctrl, dd->cspec->extctrl); + qib_write_kreg(dd, kr_gpio_out, new_out); + dd->cspec->gpio_out = new_out; + spin_unlock_irqrestore(&dd->cspec->gpio_lock, flags); + } + /* + * It is unlikely that a read at this time would get valid + * data on a pin whose direction line was set in the same + * call to this function. We include the read here because + * that allows us to potentially combine a change on one pin with + * a read on another, and because the old code did something like + * this. + */ + read_val = qib_read_kreg64(dd, kr_extstatus); + return SYM_FIELD(read_val, EXTStatus, GPIOIn); +} + +/* + * Read fundamental info we need to use the chip. These are + * the registers that describe chip capabilities, and are + * saved in shadow registers. + */ +static void get_6120_chip_params(struct qib_devdata *dd) +{ + u64 val; + u32 piobufs; + int mtu; + + dd->uregbase = qib_read_kreg32(dd, kr_userregbase); + + dd->rcvtidcnt = qib_read_kreg32(dd, kr_rcvtidcnt); + dd->rcvtidbase = qib_read_kreg32(dd, kr_rcvtidbase); + dd->rcvegrbase = qib_read_kreg32(dd, kr_rcvegrbase); + dd->palign = qib_read_kreg32(dd, kr_palign); + dd->piobufbase = qib_read_kreg64(dd, kr_sendpiobufbase); + dd->pio2k_bufbase = dd->piobufbase & 0xffffffff; + + dd->rcvhdrcnt = qib_read_kreg32(dd, kr_rcvegrcnt); + + val = qib_read_kreg64(dd, kr_sendpiosize); + dd->piosize2k = val & ~0U; + dd->piosize4k = val >> 32; + + mtu = ib_mtu_enum_to_int(qib_ibmtu); + if (mtu == -1) + mtu = QIB_DEFAULT_MTU; + dd->pport->ibmtu = (u32)mtu; + + val = qib_read_kreg64(dd, kr_sendpiobufcnt); + dd->piobcnt2k = val & ~0U; + dd->piobcnt4k = val >> 32; + dd->last_pio = dd->piobcnt4k + dd->piobcnt2k - 1; + /* these may be adjusted in init_chip_wc_pat() */ + dd->pio2kbase = (u32 __iomem *) + (((char __iomem *)dd->kregbase) + dd->pio2k_bufbase); + if (dd->piobcnt4k) { + dd->pio4kbase = (u32 __iomem *) + (((char __iomem *) dd->kregbase) + + (dd->piobufbase >> 32)); + /* + * 4K buffers take 2 pages; we use roundup just to be + * paranoid; we calculate it once here, rather than on + * ever buf allocate + */ + dd->align4k = ALIGN(dd->piosize4k, dd->palign); + } + + piobufs = dd->piobcnt4k + dd->piobcnt2k; + + dd->pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2) / + (sizeof(u64) * BITS_PER_BYTE / 2); +} + +/* + * The chip base addresses in cspec and cpspec have to be set + * after possible init_chip_wc_pat(), rather than in + * get_6120_chip_params(), so split out as separate function + */ +static void set_6120_baseaddrs(struct qib_devdata *dd) +{ + u32 cregbase; + cregbase = qib_read_kreg32(dd, kr_counterregbase); + dd->cspec->cregbase = (u64 __iomem *) + ((char __iomem *) dd->kregbase + cregbase); + + dd->egrtidbase = (u64 __iomem *) + ((char __iomem *) dd->kregbase + dd->rcvegrbase); +} + +/* + * Write the final few registers that depend on some of the + * init setup. Done late in init, just before bringing up + * the serdes. + */ +static int qib_late_6120_initreg(struct qib_devdata *dd) +{ + int ret = 0; + u64 val; + + qib_write_kreg(dd, kr_rcvhdrentsize, dd->rcvhdrentsize); + qib_write_kreg(dd, kr_rcvhdrsize, dd->rcvhdrsize); + qib_write_kreg(dd, kr_rcvhdrcnt, dd->rcvhdrcnt); + qib_write_kreg(dd, kr_sendpioavailaddr, dd->pioavailregs_phys); + val = qib_read_kreg64(dd, kr_sendpioavailaddr); + if (val != dd->pioavailregs_phys) { + qib_dev_err(dd, + "Catastrophic software error, SendPIOAvailAddr written as %lx, read back as %llx\n", + (unsigned long) dd->pioavailregs_phys, + (unsigned long long) val); + ret = -EINVAL; + } + return ret; +} + +static int init_6120_variables(struct qib_devdata *dd) +{ + int ret = 0; + struct qib_pportdata *ppd; + u32 sbufs; + + ppd = (struct qib_pportdata *)(dd + 1); + dd->pport = ppd; + dd->num_pports = 1; + + dd->cspec = (struct qib_chip_specific *)(ppd + dd->num_pports); + ppd->cpspec = NULL; /* not used in this chip */ + + spin_lock_init(&dd->cspec->kernel_tid_lock); + spin_lock_init(&dd->cspec->user_tid_lock); + spin_lock_init(&dd->cspec->rcvmod_lock); + spin_lock_init(&dd->cspec->gpio_lock); + + /* we haven't yet set QIB_PRESENT, so use read directly */ + dd->revision = readq(&dd->kregbase[kr_revision]); + + if ((dd->revision & 0xffffffffU) == 0xffffffffU) { + qib_dev_err(dd, + "Revision register read failure, giving up initialization\n"); + ret = -ENODEV; + goto bail; + } + dd->flags |= QIB_PRESENT; /* now register routines work */ + + dd->majrev = (u8) SYM_FIELD(dd->revision, Revision_R, + ChipRevMajor); + dd->minrev = (u8) SYM_FIELD(dd->revision, Revision_R, + ChipRevMinor); + + get_6120_chip_params(dd); + pe_boardname(dd); /* fill in boardname */ + + /* + * GPIO bits for TWSI data and clock, + * used for serial EEPROM. + */ + dd->gpio_sda_num = _QIB_GPIO_SDA_NUM; + dd->gpio_scl_num = _QIB_GPIO_SCL_NUM; + dd->twsi_eeprom_dev = QIB_TWSI_NO_DEV; + + if (qib_unordered_wc()) + dd->flags |= QIB_PIO_FLUSH_WC; + + /* + * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity. + * 2 is Some Misc, 3 is reserved for future. + */ + dd->eep_st_masks[0].hwerrs_to_log = HWE_MASK(TXEMemParityErr); + + /* Ignore errors in PIO/PBC on systems with unordered write-combining */ + if (qib_unordered_wc()) + dd->eep_st_masks[0].hwerrs_to_log &= ~TXE_PIO_PARITY; + + dd->eep_st_masks[1].hwerrs_to_log = HWE_MASK(RXEMemParityErr); + + dd->eep_st_masks[2].errs_to_log = ERR_MASK(ResetNegated); + + ret = qib_init_pportdata(ppd, dd, 0, 1); + if (ret) + goto bail; + ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X; + ppd->link_speed_supported = QIB_IB_SDR; + ppd->link_width_enabled = IB_WIDTH_4X; + ppd->link_speed_enabled = ppd->link_speed_supported; + /* these can't change for this chip, so set once */ + ppd->link_width_active = ppd->link_width_enabled; + ppd->link_speed_active = ppd->link_speed_enabled; + ppd->vls_supported = IB_VL_VL0; + ppd->vls_operational = ppd->vls_supported; + + dd->rcvhdrentsize = QIB_RCVHDR_ENTSIZE; + dd->rcvhdrsize = QIB_DFLT_RCVHDRSIZE; + dd->rhf_offset = 0; + + /* we always allocate at least 2048 bytes for eager buffers */ + ret = ib_mtu_enum_to_int(qib_ibmtu); + dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU; + BUG_ON(!is_power_of_2(dd->rcvegrbufsize)); + dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize); + + qib_6120_tidtemplate(dd); + + /* + * We can request a receive interrupt for 1 or + * more packets from current offset. For now, we set this + * up for a single packet. + */ + dd->rhdrhead_intr_off = 1ULL << 32; + + /* setup the stats timer; the add_timer is done at end of init */ + init_timer(&dd->stats_timer); + dd->stats_timer.function = qib_get_6120_faststats; + dd->stats_timer.data = (unsigned long) dd; + + init_timer(&dd->cspec->pma_timer); + dd->cspec->pma_timer.function = pma_6120_timer; + dd->cspec->pma_timer.data = (unsigned long) ppd; + + dd->ureg_align = qib_read_kreg32(dd, kr_palign); + + dd->piosize2kmax_dwords = dd->piosize2k >> 2; + qib_6120_config_ctxts(dd); + qib_set_ctxtcnt(dd); + + if (qib_wc_pat) { + ret = init_chip_wc_pat(dd, 0); + if (ret) + goto bail; + } + set_6120_baseaddrs(dd); /* set chip access pointers now */ + + ret = 0; + if (qib_mini_init) + goto bail; + + qib_num_cfg_vls = 1; /* if any 6120's, only one VL */ + + ret = qib_create_ctxts(dd); + init_6120_cntrnames(dd); + + /* use all of 4KB buffers for the kernel, otherwise 16 */ + sbufs = dd->piobcnt4k ? dd->piobcnt4k : 16; + + dd->lastctxt_piobuf = dd->piobcnt2k + dd->piobcnt4k - sbufs; + dd->pbufsctxt = dd->lastctxt_piobuf / + (dd->cfgctxts - dd->first_user_ctxt); + + if (ret) + goto bail; +bail: + return ret; +} + +/* + * For this chip, we want to use the same buffer every time + * when we are trying to bring the link up (they are always VL15 + * packets). At that link state the packet should always go out immediately + * (or at least be discarded at the tx interface if the link is down). + * If it doesn't, and the buffer isn't available, that means some other + * sender has gotten ahead of us, and is preventing our packet from going + * out. In that case, we flush all packets, and try again. If that still + * fails, we fail the request, and hope things work the next time around. + * + * We don't need very complicated heuristics on whether the packet had + * time to go out or not, since even at SDR 1X, it goes out in very short + * time periods, covered by the chip reads done here and as part of the + * flush. + */ +static u32 __iomem *get_6120_link_buf(struct qib_pportdata *ppd, u32 *bnum) +{ + u32 __iomem *buf; + u32 lbuf = ppd->dd->piobcnt2k + ppd->dd->piobcnt4k - 1; + + /* + * always blip to get avail list updated, since it's almost + * always needed, and is fairly cheap. + */ + sendctrl_6120_mod(ppd->dd->pport, QIB_SENDCTRL_AVAIL_BLIP); + qib_read_kreg64(ppd->dd, kr_scratch); /* extra chip flush */ + buf = qib_getsendbuf_range(ppd->dd, bnum, lbuf, lbuf); + if (buf) + goto done; + + sendctrl_6120_mod(ppd, QIB_SENDCTRL_DISARM_ALL | QIB_SENDCTRL_FLUSH | + QIB_SENDCTRL_AVAIL_BLIP); + ppd->dd->upd_pio_shadow = 1; /* update our idea of what's busy */ + qib_read_kreg64(ppd->dd, kr_scratch); /* extra chip flush */ + buf = qib_getsendbuf_range(ppd->dd, bnum, lbuf, lbuf); +done: + return buf; +} + +static u32 __iomem *qib_6120_getsendbuf(struct qib_pportdata *ppd, u64 pbc, + u32 *pbufnum) +{ + u32 first, last, plen = pbc & QIB_PBC_LENGTH_MASK; + struct qib_devdata *dd = ppd->dd; + u32 __iomem *buf; + + if (((pbc >> 32) & PBC_6120_VL15_SEND_CTRL) && + !(ppd->lflags & (QIBL_IB_AUTONEG_INPROG | QIBL_LINKACTIVE))) + buf = get_6120_link_buf(ppd, pbufnum); + else { + + if ((plen + 1) > dd->piosize2kmax_dwords) + first = dd->piobcnt2k; + else + first = 0; + /* try 4k if all 2k busy, so same last for both sizes */ + last = dd->piobcnt2k + dd->piobcnt4k - 1; + buf = qib_getsendbuf_range(dd, pbufnum, first, last); + } + return buf; +} + +static int init_sdma_6120_regs(struct qib_pportdata *ppd) +{ + return -ENODEV; +} + +static u16 qib_sdma_6120_gethead(struct qib_pportdata *ppd) +{ + return 0; +} + +static int qib_sdma_6120_busy(struct qib_pportdata *ppd) +{ + return 0; +} + +static void qib_sdma_update_6120_tail(struct qib_pportdata *ppd, u16 tail) +{ +} + +static void qib_6120_sdma_sendctrl(struct qib_pportdata *ppd, unsigned op) +{ +} + +static void qib_sdma_set_6120_desc_cnt(struct qib_pportdata *ppd, unsigned cnt) +{ +} + +/* + * the pbc doesn't need a VL15 indicator, but we need it for link_buf. + * The chip ignores the bit if set. + */ +static u32 qib_6120_setpbc_control(struct qib_pportdata *ppd, u32 plen, + u8 srate, u8 vl) +{ + return vl == 15 ? PBC_6120_VL15_SEND_CTRL : 0; +} + +static void qib_6120_initvl15_bufs(struct qib_devdata *dd) +{ +} + +static void qib_6120_init_ctxt(struct qib_ctxtdata *rcd) +{ + rcd->rcvegrcnt = rcd->dd->rcvhdrcnt; + rcd->rcvegr_tid_base = rcd->ctxt * rcd->rcvegrcnt; +} + +static void qib_6120_txchk_change(struct qib_devdata *dd, u32 start, + u32 len, u32 avail, struct qib_ctxtdata *rcd) +{ +} + +static void writescratch(struct qib_devdata *dd, u32 val) +{ + (void) qib_write_kreg(dd, kr_scratch, val); +} + +static int qib_6120_tempsense_rd(struct qib_devdata *dd, int regnum) +{ + return -ENXIO; +} + +#ifdef CONFIG_INFINIBAND_QIB_DCA +static int qib_6120_notify_dca(struct qib_devdata *dd, unsigned long event) +{ + return 0; +} +#endif + +/* Dummy function, as 6120 boards never disable EEPROM Write */ +static int qib_6120_eeprom_wen(struct qib_devdata *dd, int wen) +{ + return 1; +} + +/** + * qib_init_iba6120_funcs - set up the chip-specific function pointers + * @pdev: pci_dev of the qlogic_ib device + * @ent: pci_device_id matching this chip + * + * This is global, and is called directly at init to set up the + * chip-specific function pointers for later use. + * + * It also allocates/partially-inits the qib_devdata struct for + * this device. + */ +struct qib_devdata *qib_init_iba6120_funcs(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct qib_devdata *dd; + int ret; + + dd = qib_alloc_devdata(pdev, sizeof(struct qib_pportdata) + + sizeof(struct qib_chip_specific)); + if (IS_ERR(dd)) + goto bail; + + dd->f_bringup_serdes = qib_6120_bringup_serdes; + dd->f_cleanup = qib_6120_setup_cleanup; + dd->f_clear_tids = qib_6120_clear_tids; + dd->f_free_irq = qib_6120_free_irq; + dd->f_get_base_info = qib_6120_get_base_info; + dd->f_get_msgheader = qib_6120_get_msgheader; + dd->f_getsendbuf = qib_6120_getsendbuf; + dd->f_gpio_mod = gpio_6120_mod; + dd->f_eeprom_wen = qib_6120_eeprom_wen; + dd->f_hdrqempty = qib_6120_hdrqempty; + dd->f_ib_updown = qib_6120_ib_updown; + dd->f_init_ctxt = qib_6120_init_ctxt; + dd->f_initvl15_bufs = qib_6120_initvl15_bufs; + dd->f_intr_fallback = qib_6120_nointr_fallback; + dd->f_late_initreg = qib_late_6120_initreg; + dd->f_setpbc_control = qib_6120_setpbc_control; + dd->f_portcntr = qib_portcntr_6120; + dd->f_put_tid = (dd->minrev >= 2) ? + qib_6120_put_tid_2 : + qib_6120_put_tid; + dd->f_quiet_serdes = qib_6120_quiet_serdes; + dd->f_rcvctrl = rcvctrl_6120_mod; + dd->f_read_cntrs = qib_read_6120cntrs; + dd->f_read_portcntrs = qib_read_6120portcntrs; + dd->f_reset = qib_6120_setup_reset; + dd->f_init_sdma_regs = init_sdma_6120_regs; + dd->f_sdma_busy = qib_sdma_6120_busy; + dd->f_sdma_gethead = qib_sdma_6120_gethead; + dd->f_sdma_sendctrl = qib_6120_sdma_sendctrl; + dd->f_sdma_set_desc_cnt = qib_sdma_set_6120_desc_cnt; + dd->f_sdma_update_tail = qib_sdma_update_6120_tail; + dd->f_sendctrl = sendctrl_6120_mod; + dd->f_set_armlaunch = qib_set_6120_armlaunch; + dd->f_set_cntr_sample = qib_set_cntr_6120_sample; + dd->f_iblink_state = qib_6120_iblink_state; + dd->f_ibphys_portstate = qib_6120_phys_portstate; + dd->f_get_ib_cfg = qib_6120_get_ib_cfg; + dd->f_set_ib_cfg = qib_6120_set_ib_cfg; + dd->f_set_ib_loopback = qib_6120_set_loopback; + dd->f_set_intr_state = qib_6120_set_intr_state; + dd->f_setextled = qib_6120_setup_setextled; + dd->f_txchk_change = qib_6120_txchk_change; + dd->f_update_usrhead = qib_update_6120_usrhead; + dd->f_wantpiobuf_intr = qib_wantpiobuf_6120_intr; + dd->f_xgxs_reset = qib_6120_xgxs_reset; + dd->f_writescratch = writescratch; + dd->f_tempsense_rd = qib_6120_tempsense_rd; +#ifdef CONFIG_INFINIBAND_QIB_DCA + dd->f_notify_dca = qib_6120_notify_dca; +#endif + /* + * Do remaining pcie setup and save pcie values in dd. + * Any error printing is already done by the init code. + * On return, we have the chip mapped and accessible, + * but chip registers are not set up until start of + * init_6120_variables. + */ + ret = qib_pcie_ddinit(dd, pdev, ent); + if (ret < 0) + goto bail_free; + + /* initialize chip-specific variables */ + ret = init_6120_variables(dd); + if (ret) + goto bail_cleanup; + + if (qib_mini_init) + goto bail; + + if (qib_pcie_params(dd, 8, NULL, NULL)) + qib_dev_err(dd, + "Failed to setup PCIe or interrupts; continuing anyway\n"); + dd->cspec->irq = pdev->irq; /* save IRQ */ + + /* clear diagctrl register, in case diags were running and crashed */ + qib_write_kreg(dd, kr_hwdiagctrl, 0); + + if (qib_read_kreg64(dd, kr_hwerrstatus) & + QLOGIC_IB_HWE_SERDESPLLFAILED) + qib_write_kreg(dd, kr_hwerrclear, + QLOGIC_IB_HWE_SERDESPLLFAILED); + + /* setup interrupt handler (interrupt type handled above) */ + qib_setup_6120_interrupt(dd); + /* Note that qpn_mask is set by qib_6120_config_ctxts() first */ + qib_6120_init_hwerrors(dd); + + goto bail; + +bail_cleanup: + qib_pcie_ddcleanup(dd); +bail_free: + qib_free_devdata(dd); + dd = ERR_PTR(ret); +bail: + return dd; +} diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c new file mode 100644 index 00000000000..7dec89fdc12 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -0,0 +1,4659 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. + * All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +/* + * This file contains all of the code that is specific to the + * QLogic_IB 7220 chip (except that specific to the SerDes) + */ + +#include <linux/interrupt.h> +#include <linux/pci.h> +#include <linux/delay.h> +#include <linux/module.h> +#include <linux/io.h> +#include <rdma/ib_verbs.h> + +#include "qib.h" +#include "qib_7220.h" + +static void qib_setup_7220_setextled(struct qib_pportdata *, u32); +static void qib_7220_handle_hwerrors(struct qib_devdata *, char *, size_t); +static void sendctrl_7220_mod(struct qib_pportdata *ppd, u32 op); +static u32 qib_7220_iblink_state(u64); +static u8 qib_7220_phys_portstate(u64); +static void qib_sdma_update_7220_tail(struct qib_pportdata *, u16); +static void qib_set_ib_7220_lstate(struct qib_pportdata *, u16, u16); + +/* + * This file contains almost all the chip-specific register information and + * access functions for the QLogic QLogic_IB 7220 PCI-Express chip, with the + * exception of SerDes support, which in in qib_sd7220.c. + */ + +/* Below uses machine-generated qib_chipnum_regs.h file */ +#define KREG_IDX(regname) (QIB_7220_##regname##_OFFS / sizeof(u64)) + +/* Use defines to tie machine-generated names to lower-case names */ +#define kr_control KREG_IDX(Control) +#define kr_counterregbase KREG_IDX(CntrRegBase) +#define kr_errclear KREG_IDX(ErrClear) +#define kr_errmask KREG_IDX(ErrMask) +#define kr_errstatus KREG_IDX(ErrStatus) +#define kr_extctrl KREG_IDX(EXTCtrl) +#define kr_extstatus KREG_IDX(EXTStatus) +#define kr_gpio_clear KREG_IDX(GPIOClear) +#define kr_gpio_mask KREG_IDX(GPIOMask) +#define kr_gpio_out KREG_IDX(GPIOOut) +#define kr_gpio_status KREG_IDX(GPIOStatus) +#define kr_hrtbt_guid KREG_IDX(HRTBT_GUID) +#define kr_hwdiagctrl KREG_IDX(HwDiagCtrl) +#define kr_hwerrclear KREG_IDX(HwErrClear) +#define kr_hwerrmask KREG_IDX(HwErrMask) +#define kr_hwerrstatus KREG_IDX(HwErrStatus) +#define kr_ibcctrl KREG_IDX(IBCCtrl) +#define kr_ibcddrctrl KREG_IDX(IBCDDRCtrl) +#define kr_ibcddrstatus KREG_IDX(IBCDDRStatus) +#define kr_ibcstatus KREG_IDX(IBCStatus) +#define kr_ibserdesctrl KREG_IDX(IBSerDesCtrl) +#define kr_intclear KREG_IDX(IntClear) +#define kr_intmask KREG_IDX(IntMask) +#define kr_intstatus KREG_IDX(IntStatus) +#define kr_ncmodectrl KREG_IDX(IBNCModeCtrl) +#define kr_palign KREG_IDX(PageAlign) +#define kr_partitionkey KREG_IDX(RcvPartitionKey) +#define kr_portcnt KREG_IDX(PortCnt) +#define kr_rcvbthqp KREG_IDX(RcvBTHQP) +#define kr_rcvctrl KREG_IDX(RcvCtrl) +#define kr_rcvegrbase KREG_IDX(RcvEgrBase) +#define kr_rcvegrcnt KREG_IDX(RcvEgrCnt) +#define kr_rcvhdrcnt KREG_IDX(RcvHdrCnt) +#define kr_rcvhdrentsize KREG_IDX(RcvHdrEntSize) +#define kr_rcvhdrsize KREG_IDX(RcvHdrSize) +#define kr_rcvpktledcnt KREG_IDX(RcvPktLEDCnt) +#define kr_rcvtidbase KREG_IDX(RcvTIDBase) +#define kr_rcvtidcnt KREG_IDX(RcvTIDCnt) +#define kr_revision KREG_IDX(Revision) +#define kr_scratch KREG_IDX(Scratch) +#define kr_sendbuffererror KREG_IDX(SendBufErr0) +#define kr_sendctrl KREG_IDX(SendCtrl) +#define kr_senddmabase KREG_IDX(SendDmaBase) +#define kr_senddmabufmask0 KREG_IDX(SendDmaBufMask0) +#define kr_senddmabufmask1 (KREG_IDX(SendDmaBufMask0) + 1) +#define kr_senddmabufmask2 (KREG_IDX(SendDmaBufMask0) + 2) +#define kr_senddmahead KREG_IDX(SendDmaHead) +#define kr_senddmaheadaddr KREG_IDX(SendDmaHeadAddr) +#define kr_senddmalengen KREG_IDX(SendDmaLenGen) +#define kr_senddmastatus KREG_IDX(SendDmaStatus) +#define kr_senddmatail KREG_IDX(SendDmaTail) +#define kr_sendpioavailaddr KREG_IDX(SendBufAvailAddr) +#define kr_sendpiobufbase KREG_IDX(SendBufBase) +#define kr_sendpiobufcnt KREG_IDX(SendBufCnt) +#define kr_sendpiosize KREG_IDX(SendBufSize) +#define kr_sendregbase KREG_IDX(SendRegBase) +#define kr_userregbase KREG_IDX(UserRegBase) +#define kr_xgxs_cfg KREG_IDX(XGXSCfg) + +/* These must only be written via qib_write_kreg_ctxt() */ +#define kr_rcvhdraddr KREG_IDX(RcvHdrAddr0) +#define kr_rcvhdrtailaddr KREG_IDX(RcvHdrTailAddr0) + + +#define CREG_IDX(regname) ((QIB_7220_##regname##_OFFS - \ + QIB_7220_LBIntCnt_OFFS) / sizeof(u64)) + +#define cr_badformat CREG_IDX(RxVersionErrCnt) +#define cr_erricrc CREG_IDX(RxICRCErrCnt) +#define cr_errlink CREG_IDX(RxLinkMalformCnt) +#define cr_errlpcrc CREG_IDX(RxLPCRCErrCnt) +#define cr_errpkey CREG_IDX(RxPKeyMismatchCnt) +#define cr_rcvflowctrl_err CREG_IDX(RxFlowCtrlViolCnt) +#define cr_err_rlen CREG_IDX(RxLenErrCnt) +#define cr_errslen CREG_IDX(TxLenErrCnt) +#define cr_errtidfull CREG_IDX(RxTIDFullErrCnt) +#define cr_errtidvalid CREG_IDX(RxTIDValidErrCnt) +#define cr_errvcrc CREG_IDX(RxVCRCErrCnt) +#define cr_ibstatuschange CREG_IDX(IBStatusChangeCnt) +#define cr_lbint CREG_IDX(LBIntCnt) +#define cr_invalidrlen CREG_IDX(RxMaxMinLenErrCnt) +#define cr_invalidslen CREG_IDX(TxMaxMinLenErrCnt) +#define cr_lbflowstall CREG_IDX(LBFlowStallCnt) +#define cr_pktrcv CREG_IDX(RxDataPktCnt) +#define cr_pktrcvflowctrl CREG_IDX(RxFlowPktCnt) +#define cr_pktsend CREG_IDX(TxDataPktCnt) +#define cr_pktsendflow CREG_IDX(TxFlowPktCnt) +#define cr_portovfl CREG_IDX(RxP0HdrEgrOvflCnt) +#define cr_rcvebp CREG_IDX(RxEBPCnt) +#define cr_rcvovfl CREG_IDX(RxBufOvflCnt) +#define cr_senddropped CREG_IDX(TxDroppedPktCnt) +#define cr_sendstall CREG_IDX(TxFlowStallCnt) +#define cr_sendunderrun CREG_IDX(TxUnderrunCnt) +#define cr_wordrcv CREG_IDX(RxDwordCnt) +#define cr_wordsend CREG_IDX(TxDwordCnt) +#define cr_txunsupvl CREG_IDX(TxUnsupVLErrCnt) +#define cr_rxdroppkt CREG_IDX(RxDroppedPktCnt) +#define cr_iblinkerrrecov CREG_IDX(IBLinkErrRecoveryCnt) +#define cr_iblinkdown CREG_IDX(IBLinkDownedCnt) +#define cr_ibsymbolerr CREG_IDX(IBSymbolErrCnt) +#define cr_vl15droppedpkt CREG_IDX(RxVL15DroppedPktCnt) +#define cr_rxotherlocalphyerr CREG_IDX(RxOtherLocalPhyErrCnt) +#define cr_excessbufferovfl CREG_IDX(ExcessBufferOvflCnt) +#define cr_locallinkintegrityerr CREG_IDX(LocalLinkIntegrityErrCnt) +#define cr_rxvlerr CREG_IDX(RxVlErrCnt) +#define cr_rxdlidfltr CREG_IDX(RxDlidFltrCnt) +#define cr_psstat CREG_IDX(PSStat) +#define cr_psstart CREG_IDX(PSStart) +#define cr_psinterval CREG_IDX(PSInterval) +#define cr_psrcvdatacount CREG_IDX(PSRcvDataCount) +#define cr_psrcvpktscount CREG_IDX(PSRcvPktsCount) +#define cr_psxmitdatacount CREG_IDX(PSXmitDataCount) +#define cr_psxmitpktscount CREG_IDX(PSXmitPktsCount) +#define cr_psxmitwaitcount CREG_IDX(PSXmitWaitCount) +#define cr_txsdmadesc CREG_IDX(TxSDmaDescCnt) +#define cr_pcieretrydiag CREG_IDX(PcieRetryBufDiagQwordCnt) + +#define SYM_RMASK(regname, fldname) ((u64) \ + QIB_7220_##regname##_##fldname##_RMASK) +#define SYM_MASK(regname, fldname) ((u64) \ + QIB_7220_##regname##_##fldname##_RMASK << \ + QIB_7220_##regname##_##fldname##_LSB) +#define SYM_LSB(regname, fldname) (QIB_7220_##regname##_##fldname##_LSB) +#define SYM_FIELD(value, regname, fldname) ((u64) \ + (((value) >> SYM_LSB(regname, fldname)) & \ + SYM_RMASK(regname, fldname))) +#define ERR_MASK(fldname) SYM_MASK(ErrMask, fldname##Mask) +#define HWE_MASK(fldname) SYM_MASK(HwErrMask, fldname##Mask) + +/* ibcctrl bits */ +#define QLOGIC_IB_IBCC_LINKINITCMD_DISABLE 1 +/* cycle through TS1/TS2 till OK */ +#define QLOGIC_IB_IBCC_LINKINITCMD_POLL 2 +/* wait for TS1, then go on */ +#define QLOGIC_IB_IBCC_LINKINITCMD_SLEEP 3 +#define QLOGIC_IB_IBCC_LINKINITCMD_SHIFT 16 + +#define QLOGIC_IB_IBCC_LINKCMD_DOWN 1 /* move to 0x11 */ +#define QLOGIC_IB_IBCC_LINKCMD_ARMED 2 /* move to 0x21 */ +#define QLOGIC_IB_IBCC_LINKCMD_ACTIVE 3 /* move to 0x31 */ + +#define BLOB_7220_IBCHG 0x81 + +/* + * We could have a single register get/put routine, that takes a group type, + * but this is somewhat clearer and cleaner. It also gives us some error + * checking. 64 bit register reads should always work, but are inefficient + * on opteron (the northbridge always generates 2 separate HT 32 bit reads), + * so we use kreg32 wherever possible. User register and counter register + * reads are always 32 bit reads, so only one form of those routines. + */ + +/** + * qib_read_ureg32 - read 32-bit virtualized per-context register + * @dd: device + * @regno: register number + * @ctxt: context number + * + * Return the contents of a register that is virtualized to be per context. + * Returns -1 on errors (not distinguishable from valid contents at + * runtime; we may add a separate error variable at some point). + */ +static inline u32 qib_read_ureg32(const struct qib_devdata *dd, + enum qib_ureg regno, int ctxt) +{ + if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) + return 0; + + if (dd->userbase) + return readl(regno + (u64 __iomem *) + ((char __iomem *)dd->userbase + + dd->ureg_align * ctxt)); + else + return readl(regno + (u64 __iomem *) + (dd->uregbase + + (char __iomem *)dd->kregbase + + dd->ureg_align * ctxt)); +} + +/** + * qib_write_ureg - write 32-bit virtualized per-context register + * @dd: device + * @regno: register number + * @value: value + * @ctxt: context + * + * Write the contents of a register that is virtualized to be per context. + */ +static inline void qib_write_ureg(const struct qib_devdata *dd, + enum qib_ureg regno, u64 value, int ctxt) +{ + u64 __iomem *ubase; + + if (dd->userbase) + ubase = (u64 __iomem *) + ((char __iomem *) dd->userbase + + dd->ureg_align * ctxt); + else + ubase = (u64 __iomem *) + (dd->uregbase + + (char __iomem *) dd->kregbase + + dd->ureg_align * ctxt); + + if (dd->kregbase && (dd->flags & QIB_PRESENT)) + writeq(value, &ubase[regno]); +} + +/** + * qib_write_kreg_ctxt - write a device's per-ctxt 64-bit kernel register + * @dd: the qlogic_ib device + * @regno: the register number to write + * @ctxt: the context containing the register + * @value: the value to write + */ +static inline void qib_write_kreg_ctxt(const struct qib_devdata *dd, + const u16 regno, unsigned ctxt, + u64 value) +{ + qib_write_kreg(dd, regno + ctxt, value); +} + +static inline void write_7220_creg(const struct qib_devdata *dd, + u16 regno, u64 value) +{ + if (dd->cspec->cregbase && (dd->flags & QIB_PRESENT)) + writeq(value, &dd->cspec->cregbase[regno]); +} + +static inline u64 read_7220_creg(const struct qib_devdata *dd, u16 regno) +{ + if (!dd->cspec->cregbase || !(dd->flags & QIB_PRESENT)) + return 0; + return readq(&dd->cspec->cregbase[regno]); +} + +static inline u32 read_7220_creg32(const struct qib_devdata *dd, u16 regno) +{ + if (!dd->cspec->cregbase || !(dd->flags & QIB_PRESENT)) + return 0; + return readl(&dd->cspec->cregbase[regno]); +} + +/* kr_revision bits */ +#define QLOGIC_IB_R_EMULATORREV_MASK ((1ULL << 22) - 1) +#define QLOGIC_IB_R_EMULATORREV_SHIFT 40 + +/* kr_control bits */ +#define QLOGIC_IB_C_RESET (1U << 7) + +/* kr_intstatus, kr_intclear, kr_intmask bits */ +#define QLOGIC_IB_I_RCVURG_MASK ((1ULL << 17) - 1) +#define QLOGIC_IB_I_RCVURG_SHIFT 32 +#define QLOGIC_IB_I_RCVAVAIL_MASK ((1ULL << 17) - 1) +#define QLOGIC_IB_I_RCVAVAIL_SHIFT 0 +#define QLOGIC_IB_I_SERDESTRIMDONE (1ULL << 27) + +#define QLOGIC_IB_C_FREEZEMODE 0x00000002 +#define QLOGIC_IB_C_LINKENABLE 0x00000004 + +#define QLOGIC_IB_I_SDMAINT 0x8000000000000000ULL +#define QLOGIC_IB_I_SDMADISABLED 0x4000000000000000ULL +#define QLOGIC_IB_I_ERROR 0x0000000080000000ULL +#define QLOGIC_IB_I_SPIOSENT 0x0000000040000000ULL +#define QLOGIC_IB_I_SPIOBUFAVAIL 0x0000000020000000ULL +#define QLOGIC_IB_I_GPIO 0x0000000010000000ULL + +/* variables for sanity checking interrupt and errors */ +#define QLOGIC_IB_I_BITSEXTANT \ + (QLOGIC_IB_I_SDMAINT | QLOGIC_IB_I_SDMADISABLED | \ + (QLOGIC_IB_I_RCVURG_MASK << QLOGIC_IB_I_RCVURG_SHIFT) | \ + (QLOGIC_IB_I_RCVAVAIL_MASK << \ + QLOGIC_IB_I_RCVAVAIL_SHIFT) | \ + QLOGIC_IB_I_ERROR | QLOGIC_IB_I_SPIOSENT | \ + QLOGIC_IB_I_SPIOBUFAVAIL | QLOGIC_IB_I_GPIO | \ + QLOGIC_IB_I_SERDESTRIMDONE) + +#define IB_HWE_BITSEXTANT \ + (HWE_MASK(RXEMemParityErr) | \ + HWE_MASK(TXEMemParityErr) | \ + (QLOGIC_IB_HWE_PCIEMEMPARITYERR_MASK << \ + QLOGIC_IB_HWE_PCIEMEMPARITYERR_SHIFT) | \ + QLOGIC_IB_HWE_PCIE1PLLFAILED | \ + QLOGIC_IB_HWE_PCIE0PLLFAILED | \ + QLOGIC_IB_HWE_PCIEPOISONEDTLP | \ + QLOGIC_IB_HWE_PCIECPLTIMEOUT | \ + QLOGIC_IB_HWE_PCIEBUSPARITYXTLH | \ + QLOGIC_IB_HWE_PCIEBUSPARITYXADM | \ + QLOGIC_IB_HWE_PCIEBUSPARITYRADM | \ + HWE_MASK(PowerOnBISTFailed) | \ + QLOGIC_IB_HWE_COREPLL_FBSLIP | \ + QLOGIC_IB_HWE_COREPLL_RFSLIP | \ + QLOGIC_IB_HWE_SERDESPLLFAILED | \ + HWE_MASK(IBCBusToSPCParityErr) | \ + HWE_MASK(IBCBusFromSPCParityErr) | \ + QLOGIC_IB_HWE_PCIECPLDATAQUEUEERR | \ + QLOGIC_IB_HWE_PCIECPLHDRQUEUEERR | \ + QLOGIC_IB_HWE_SDMAMEMREADERR | \ + QLOGIC_IB_HWE_CLK_UC_PLLNOTLOCKED | \ + QLOGIC_IB_HWE_PCIESERDESQ0PCLKNOTDETECT | \ + QLOGIC_IB_HWE_PCIESERDESQ1PCLKNOTDETECT | \ + QLOGIC_IB_HWE_PCIESERDESQ2PCLKNOTDETECT | \ + QLOGIC_IB_HWE_PCIESERDESQ3PCLKNOTDETECT | \ + QLOGIC_IB_HWE_DDSRXEQMEMORYPARITYERR | \ + QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR | \ + QLOGIC_IB_HWE_PCIE_UC_OCT0MEMORYPARITYERR | \ + QLOGIC_IB_HWE_PCIE_UC_OCT1MEMORYPARITYERR) + +#define IB_E_BITSEXTANT \ + (ERR_MASK(RcvFormatErr) | ERR_MASK(RcvVCRCErr) | \ + ERR_MASK(RcvICRCErr) | ERR_MASK(RcvMinPktLenErr) | \ + ERR_MASK(RcvMaxPktLenErr) | ERR_MASK(RcvLongPktLenErr) | \ + ERR_MASK(RcvShortPktLenErr) | ERR_MASK(RcvUnexpectedCharErr) | \ + ERR_MASK(RcvUnsupportedVLErr) | ERR_MASK(RcvEBPErr) | \ + ERR_MASK(RcvIBFlowErr) | ERR_MASK(RcvBadVersionErr) | \ + ERR_MASK(RcvEgrFullErr) | ERR_MASK(RcvHdrFullErr) | \ + ERR_MASK(RcvBadTidErr) | ERR_MASK(RcvHdrLenErr) | \ + ERR_MASK(RcvHdrErr) | ERR_MASK(RcvIBLostLinkErr) | \ + ERR_MASK(SendSpecialTriggerErr) | \ + ERR_MASK(SDmaDisabledErr) | ERR_MASK(SendMinPktLenErr) | \ + ERR_MASK(SendMaxPktLenErr) | ERR_MASK(SendUnderRunErr) | \ + ERR_MASK(SendPktLenErr) | ERR_MASK(SendDroppedSmpPktErr) | \ + ERR_MASK(SendDroppedDataPktErr) | \ + ERR_MASK(SendPioArmLaunchErr) | \ + ERR_MASK(SendUnexpectedPktNumErr) | \ + ERR_MASK(SendUnsupportedVLErr) | ERR_MASK(SendBufMisuseErr) | \ + ERR_MASK(SDmaGenMismatchErr) | ERR_MASK(SDmaOutOfBoundErr) | \ + ERR_MASK(SDmaTailOutOfBoundErr) | ERR_MASK(SDmaBaseErr) | \ + ERR_MASK(SDma1stDescErr) | ERR_MASK(SDmaRpyTagErr) | \ + ERR_MASK(SDmaDwEnErr) | ERR_MASK(SDmaMissingDwErr) | \ + ERR_MASK(SDmaUnexpDataErr) | \ + ERR_MASK(IBStatusChanged) | ERR_MASK(InvalidAddrErr) | \ + ERR_MASK(ResetNegated) | ERR_MASK(HardwareErr) | \ + ERR_MASK(SDmaDescAddrMisalignErr) | \ + ERR_MASK(InvalidEEPCmd)) + +/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */ +#define QLOGIC_IB_HWE_PCIEMEMPARITYERR_MASK 0x00000000000000ffULL +#define QLOGIC_IB_HWE_PCIEMEMPARITYERR_SHIFT 0 +#define QLOGIC_IB_HWE_PCIEPOISONEDTLP 0x0000000010000000ULL +#define QLOGIC_IB_HWE_PCIECPLTIMEOUT 0x0000000020000000ULL +#define QLOGIC_IB_HWE_PCIEBUSPARITYXTLH 0x0000000040000000ULL +#define QLOGIC_IB_HWE_PCIEBUSPARITYXADM 0x0000000080000000ULL +#define QLOGIC_IB_HWE_PCIEBUSPARITYRADM 0x0000000100000000ULL +#define QLOGIC_IB_HWE_COREPLL_FBSLIP 0x0080000000000000ULL +#define QLOGIC_IB_HWE_COREPLL_RFSLIP 0x0100000000000000ULL +#define QLOGIC_IB_HWE_PCIE1PLLFAILED 0x0400000000000000ULL +#define QLOGIC_IB_HWE_PCIE0PLLFAILED 0x0800000000000000ULL +#define QLOGIC_IB_HWE_SERDESPLLFAILED 0x1000000000000000ULL +/* specific to this chip */ +#define QLOGIC_IB_HWE_PCIECPLDATAQUEUEERR 0x0000000000000040ULL +#define QLOGIC_IB_HWE_PCIECPLHDRQUEUEERR 0x0000000000000080ULL +#define QLOGIC_IB_HWE_SDMAMEMREADERR 0x0000000010000000ULL +#define QLOGIC_IB_HWE_CLK_UC_PLLNOTLOCKED 0x2000000000000000ULL +#define QLOGIC_IB_HWE_PCIESERDESQ0PCLKNOTDETECT 0x0100000000000000ULL +#define QLOGIC_IB_HWE_PCIESERDESQ1PCLKNOTDETECT 0x0200000000000000ULL +#define QLOGIC_IB_HWE_PCIESERDESQ2PCLKNOTDETECT 0x0400000000000000ULL +#define QLOGIC_IB_HWE_PCIESERDESQ3PCLKNOTDETECT 0x0800000000000000ULL +#define QLOGIC_IB_HWE_DDSRXEQMEMORYPARITYERR 0x0000008000000000ULL +#define QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR 0x0000004000000000ULL +#define QLOGIC_IB_HWE_PCIE_UC_OCT0MEMORYPARITYERR 0x0000001000000000ULL +#define QLOGIC_IB_HWE_PCIE_UC_OCT1MEMORYPARITYERR 0x0000002000000000ULL + +#define IBA7220_IBCC_LINKCMD_SHIFT 19 + +/* kr_ibcddrctrl bits */ +#define IBA7220_IBC_DLIDLMC_MASK 0xFFFFFFFFUL +#define IBA7220_IBC_DLIDLMC_SHIFT 32 + +#define IBA7220_IBC_HRTBT_MASK (SYM_RMASK(IBCDDRCtrl, HRTBT_AUTO) | \ + SYM_RMASK(IBCDDRCtrl, HRTBT_ENB)) +#define IBA7220_IBC_HRTBT_SHIFT SYM_LSB(IBCDDRCtrl, HRTBT_ENB) + +#define IBA7220_IBC_LANE_REV_SUPPORTED (1<<8) +#define IBA7220_IBC_LREV_MASK 1 +#define IBA7220_IBC_LREV_SHIFT 8 +#define IBA7220_IBC_RXPOL_MASK 1 +#define IBA7220_IBC_RXPOL_SHIFT 7 +#define IBA7220_IBC_WIDTH_SHIFT 5 +#define IBA7220_IBC_WIDTH_MASK 0x3 +#define IBA7220_IBC_WIDTH_1X_ONLY (0 << IBA7220_IBC_WIDTH_SHIFT) +#define IBA7220_IBC_WIDTH_4X_ONLY (1 << IBA7220_IBC_WIDTH_SHIFT) +#define IBA7220_IBC_WIDTH_AUTONEG (2 << IBA7220_IBC_WIDTH_SHIFT) +#define IBA7220_IBC_SPEED_AUTONEG (1 << 1) +#define IBA7220_IBC_SPEED_SDR (1 << 2) +#define IBA7220_IBC_SPEED_DDR (1 << 3) +#define IBA7220_IBC_SPEED_AUTONEG_MASK (0x7 << 1) +#define IBA7220_IBC_IBTA_1_2_MASK (1) + +/* kr_ibcddrstatus */ +/* link latency shift is 0, don't bother defining */ +#define IBA7220_DDRSTAT_LINKLAT_MASK 0x3ffffff + +/* kr_extstatus bits */ +#define QLOGIC_IB_EXTS_FREQSEL 0x2 +#define QLOGIC_IB_EXTS_SERDESSEL 0x4 +#define QLOGIC_IB_EXTS_MEMBIST_ENDTEST 0x0000000000004000 +#define QLOGIC_IB_EXTS_MEMBIST_DISABLED 0x0000000000008000 + +/* kr_xgxsconfig bits */ +#define QLOGIC_IB_XGXS_RESET 0x5ULL +#define QLOGIC_IB_XGXS_FC_SAFE (1ULL << 63) + +/* kr_rcvpktledcnt */ +#define IBA7220_LEDBLINK_ON_SHIFT 32 /* 4ns period on after packet */ +#define IBA7220_LEDBLINK_OFF_SHIFT 0 /* 4ns period off before next on */ + +#define _QIB_GPIO_SDA_NUM 1 +#define _QIB_GPIO_SCL_NUM 0 +#define QIB_TWSI_EEPROM_DEV 0xA2 /* All Production 7220 cards. */ +#define QIB_TWSI_TEMP_DEV 0x98 + +/* HW counter clock is at 4nsec */ +#define QIB_7220_PSXMITWAIT_CHECK_RATE 4000 + +#define IBA7220_R_INTRAVAIL_SHIFT 17 +#define IBA7220_R_PKEY_DIS_SHIFT 34 +#define IBA7220_R_TAILUPD_SHIFT 35 +#define IBA7220_R_CTXTCFG_SHIFT 36 + +#define IBA7220_HDRHEAD_PKTINT_SHIFT 32 /* interrupt cnt in upper 32 bits */ + +/* + * the size bits give us 2^N, in KB units. 0 marks as invalid, + * and 7 is reserved. We currently use only 2KB and 4KB + */ +#define IBA7220_TID_SZ_SHIFT 37 /* shift to 3bit size selector */ +#define IBA7220_TID_SZ_2K (1UL << IBA7220_TID_SZ_SHIFT) /* 2KB */ +#define IBA7220_TID_SZ_4K (2UL << IBA7220_TID_SZ_SHIFT) /* 4KB */ +#define IBA7220_TID_PA_SHIFT 11U /* TID addr in chip stored w/o low bits */ +#define PBC_7220_VL15_SEND (1ULL << 63) /* pbc; VL15, no credit check */ +#define PBC_7220_VL15_SEND_CTRL (1ULL << 31) /* control version of same */ + +#define AUTONEG_TRIES 5 /* sequential retries to negotiate DDR */ + +/* packet rate matching delay multiplier */ +static u8 rate_to_delay[2][2] = { + /* 1x, 4x */ + { 8, 2 }, /* SDR */ + { 4, 1 } /* DDR */ +}; + +static u8 ib_rate_to_delay[IB_RATE_120_GBPS + 1] = { + [IB_RATE_2_5_GBPS] = 8, + [IB_RATE_5_GBPS] = 4, + [IB_RATE_10_GBPS] = 2, + [IB_RATE_20_GBPS] = 1 +}; + +#define IBA7220_LINKSPEED_SHIFT SYM_LSB(IBCStatus, LinkSpeedActive) +#define IBA7220_LINKWIDTH_SHIFT SYM_LSB(IBCStatus, LinkWidthActive) + +/* link training states, from IBC */ +#define IB_7220_LT_STATE_DISABLED 0x00 +#define IB_7220_LT_STATE_LINKUP 0x01 +#define IB_7220_LT_STATE_POLLACTIVE 0x02 +#define IB_7220_LT_STATE_POLLQUIET 0x03 +#define IB_7220_LT_STATE_SLEEPDELAY 0x04 +#define IB_7220_LT_STATE_SLEEPQUIET 0x05 +#define IB_7220_LT_STATE_CFGDEBOUNCE 0x08 +#define IB_7220_LT_STATE_CFGRCVFCFG 0x09 +#define IB_7220_LT_STATE_CFGWAITRMT 0x0a +#define IB_7220_LT_STATE_CFGIDLE 0x0b +#define IB_7220_LT_STATE_RECOVERRETRAIN 0x0c +#define IB_7220_LT_STATE_RECOVERWAITRMT 0x0e +#define IB_7220_LT_STATE_RECOVERIDLE 0x0f + +/* link state machine states from IBC */ +#define IB_7220_L_STATE_DOWN 0x0 +#define IB_7220_L_STATE_INIT 0x1 +#define IB_7220_L_STATE_ARM 0x2 +#define IB_7220_L_STATE_ACTIVE 0x3 +#define IB_7220_L_STATE_ACT_DEFER 0x4 + +static const u8 qib_7220_physportstate[0x20] = { + [IB_7220_LT_STATE_DISABLED] = IB_PHYSPORTSTATE_DISABLED, + [IB_7220_LT_STATE_LINKUP] = IB_PHYSPORTSTATE_LINKUP, + [IB_7220_LT_STATE_POLLACTIVE] = IB_PHYSPORTSTATE_POLL, + [IB_7220_LT_STATE_POLLQUIET] = IB_PHYSPORTSTATE_POLL, + [IB_7220_LT_STATE_SLEEPDELAY] = IB_PHYSPORTSTATE_SLEEP, + [IB_7220_LT_STATE_SLEEPQUIET] = IB_PHYSPORTSTATE_SLEEP, + [IB_7220_LT_STATE_CFGDEBOUNCE] = + IB_PHYSPORTSTATE_CFG_TRAIN, + [IB_7220_LT_STATE_CFGRCVFCFG] = + IB_PHYSPORTSTATE_CFG_TRAIN, + [IB_7220_LT_STATE_CFGWAITRMT] = + IB_PHYSPORTSTATE_CFG_TRAIN, + [IB_7220_LT_STATE_CFGIDLE] = IB_PHYSPORTSTATE_CFG_TRAIN, + [IB_7220_LT_STATE_RECOVERRETRAIN] = + IB_PHYSPORTSTATE_LINK_ERR_RECOVER, + [IB_7220_LT_STATE_RECOVERWAITRMT] = + IB_PHYSPORTSTATE_LINK_ERR_RECOVER, + [IB_7220_LT_STATE_RECOVERIDLE] = + IB_PHYSPORTSTATE_LINK_ERR_RECOVER, + [0x10] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x11] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x12] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x13] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x14] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x15] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x16] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN +}; + +int qib_special_trigger; +module_param_named(special_trigger, qib_special_trigger, int, S_IRUGO); +MODULE_PARM_DESC(special_trigger, "Enable SpecialTrigger arm/launch"); + +#define IBCBUSFRSPCPARITYERR HWE_MASK(IBCBusFromSPCParityErr) +#define IBCBUSTOSPCPARITYERR HWE_MASK(IBCBusToSPCParityErr) + +#define SYM_MASK_BIT(regname, fldname, bit) ((u64) \ + (1ULL << (SYM_LSB(regname, fldname) + (bit)))) + +#define TXEMEMPARITYERR_PIOBUF \ + SYM_MASK_BIT(HwErrMask, TXEMemParityErrMask, 0) +#define TXEMEMPARITYERR_PIOPBC \ + SYM_MASK_BIT(HwErrMask, TXEMemParityErrMask, 1) +#define TXEMEMPARITYERR_PIOLAUNCHFIFO \ + SYM_MASK_BIT(HwErrMask, TXEMemParityErrMask, 2) + +#define RXEMEMPARITYERR_RCVBUF \ + SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 0) +#define RXEMEMPARITYERR_LOOKUPQ \ + SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 1) +#define RXEMEMPARITYERR_EXPTID \ + SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 2) +#define RXEMEMPARITYERR_EAGERTID \ + SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 3) +#define RXEMEMPARITYERR_FLAGBUF \ + SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 4) +#define RXEMEMPARITYERR_DATAINFO \ + SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 5) +#define RXEMEMPARITYERR_HDRINFO \ + SYM_MASK_BIT(HwErrMask, RXEMemParityErrMask, 6) + +/* 7220 specific hardware errors... */ +static const struct qib_hwerror_msgs qib_7220_hwerror_msgs[] = { + /* generic hardware errors */ + QLOGIC_IB_HWE_MSG(IBCBUSFRSPCPARITYERR, "QIB2IB Parity"), + QLOGIC_IB_HWE_MSG(IBCBUSTOSPCPARITYERR, "IB2QIB Parity"), + + QLOGIC_IB_HWE_MSG(TXEMEMPARITYERR_PIOBUF, + "TXE PIOBUF Memory Parity"), + QLOGIC_IB_HWE_MSG(TXEMEMPARITYERR_PIOPBC, + "TXE PIOPBC Memory Parity"), + QLOGIC_IB_HWE_MSG(TXEMEMPARITYERR_PIOLAUNCHFIFO, + "TXE PIOLAUNCHFIFO Memory Parity"), + + QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_RCVBUF, + "RXE RCVBUF Memory Parity"), + QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_LOOKUPQ, + "RXE LOOKUPQ Memory Parity"), + QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_EAGERTID, + "RXE EAGERTID Memory Parity"), + QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_EXPTID, + "RXE EXPTID Memory Parity"), + QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_FLAGBUF, + "RXE FLAGBUF Memory Parity"), + QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_DATAINFO, + "RXE DATAINFO Memory Parity"), + QLOGIC_IB_HWE_MSG(RXEMEMPARITYERR_HDRINFO, + "RXE HDRINFO Memory Parity"), + + /* chip-specific hardware errors */ + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIEPOISONEDTLP, + "PCIe Poisoned TLP"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIECPLTIMEOUT, + "PCIe completion timeout"), + /* + * In practice, it's unlikely wthat we'll see PCIe PLL, or bus + * parity or memory parity error failures, because most likely we + * won't be able to talk to the core of the chip. Nonetheless, we + * might see them, if they are in parts of the PCIe core that aren't + * essential. + */ + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIE1PLLFAILED, + "PCIePLL1"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIE0PLLFAILED, + "PCIePLL0"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIEBUSPARITYXTLH, + "PCIe XTLH core parity"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIEBUSPARITYXADM, + "PCIe ADM TX core parity"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIEBUSPARITYRADM, + "PCIe ADM RX core parity"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_SERDESPLLFAILED, + "SerDes PLL"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIECPLDATAQUEUEERR, + "PCIe cpl header queue"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIECPLHDRQUEUEERR, + "PCIe cpl data queue"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_SDMAMEMREADERR, + "Send DMA memory read"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_CLK_UC_PLLNOTLOCKED, + "uC PLL clock not locked"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIESERDESQ0PCLKNOTDETECT, + "PCIe serdes Q0 no clock"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIESERDESQ1PCLKNOTDETECT, + "PCIe serdes Q1 no clock"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIESERDESQ2PCLKNOTDETECT, + "PCIe serdes Q2 no clock"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIESERDESQ3PCLKNOTDETECT, + "PCIe serdes Q3 no clock"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_DDSRXEQMEMORYPARITYERR, + "DDS RXEQ memory parity"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR, + "IB uC memory parity"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIE_UC_OCT0MEMORYPARITYERR, + "PCIe uC oct0 memory parity"), + QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIE_UC_OCT1MEMORYPARITYERR, + "PCIe uC oct1 memory parity"), +}; + +#define RXE_PARITY (RXEMEMPARITYERR_EAGERTID|RXEMEMPARITYERR_EXPTID) + +#define QLOGIC_IB_E_PKTERRS (\ + ERR_MASK(SendPktLenErr) | \ + ERR_MASK(SendDroppedDataPktErr) | \ + ERR_MASK(RcvVCRCErr) | \ + ERR_MASK(RcvICRCErr) | \ + ERR_MASK(RcvShortPktLenErr) | \ + ERR_MASK(RcvEBPErr)) + +/* Convenience for decoding Send DMA errors */ +#define QLOGIC_IB_E_SDMAERRS ( \ + ERR_MASK(SDmaGenMismatchErr) | \ + ERR_MASK(SDmaOutOfBoundErr) | \ + ERR_MASK(SDmaTailOutOfBoundErr) | ERR_MASK(SDmaBaseErr) | \ + ERR_MASK(SDma1stDescErr) | ERR_MASK(SDmaRpyTagErr) | \ + ERR_MASK(SDmaDwEnErr) | ERR_MASK(SDmaMissingDwErr) | \ + ERR_MASK(SDmaUnexpDataErr) | \ + ERR_MASK(SDmaDescAddrMisalignErr) | \ + ERR_MASK(SDmaDisabledErr) | \ + ERR_MASK(SendBufMisuseErr)) + +/* These are all rcv-related errors which we want to count for stats */ +#define E_SUM_PKTERRS \ + (ERR_MASK(RcvHdrLenErr) | ERR_MASK(RcvBadTidErr) | \ + ERR_MASK(RcvBadVersionErr) | ERR_MASK(RcvHdrErr) | \ + ERR_MASK(RcvLongPktLenErr) | ERR_MASK(RcvShortPktLenErr) | \ + ERR_MASK(RcvMaxPktLenErr) | ERR_MASK(RcvMinPktLenErr) | \ + ERR_MASK(RcvFormatErr) | ERR_MASK(RcvUnsupportedVLErr) | \ + ERR_MASK(RcvUnexpectedCharErr) | ERR_MASK(RcvEBPErr)) + +/* These are all send-related errors which we want to count for stats */ +#define E_SUM_ERRS \ + (ERR_MASK(SendPioArmLaunchErr) | ERR_MASK(SendUnexpectedPktNumErr) | \ + ERR_MASK(SendDroppedDataPktErr) | ERR_MASK(SendDroppedSmpPktErr) | \ + ERR_MASK(SendMaxPktLenErr) | ERR_MASK(SendUnsupportedVLErr) | \ + ERR_MASK(SendMinPktLenErr) | ERR_MASK(SendPktLenErr) | \ + ERR_MASK(InvalidAddrErr)) + +/* + * this is similar to E_SUM_ERRS, but can't ignore armlaunch, don't ignore + * errors not related to freeze and cancelling buffers. Can't ignore + * armlaunch because could get more while still cleaning up, and need + * to cancel those as they happen. + */ +#define E_SPKT_ERRS_IGNORE \ + (ERR_MASK(SendDroppedDataPktErr) | ERR_MASK(SendDroppedSmpPktErr) | \ + ERR_MASK(SendMaxPktLenErr) | ERR_MASK(SendMinPktLenErr) | \ + ERR_MASK(SendPktLenErr)) + +/* + * these are errors that can occur when the link changes state while + * a packet is being sent or received. This doesn't cover things + * like EBP or VCRC that can be the result of a sending having the + * link change state, so we receive a "known bad" packet. + */ +#define E_SUM_LINK_PKTERRS \ + (ERR_MASK(SendDroppedDataPktErr) | ERR_MASK(SendDroppedSmpPktErr) | \ + ERR_MASK(SendMinPktLenErr) | ERR_MASK(SendPktLenErr) | \ + ERR_MASK(RcvShortPktLenErr) | ERR_MASK(RcvMinPktLenErr) | \ + ERR_MASK(RcvUnexpectedCharErr)) + +static void autoneg_7220_work(struct work_struct *); +static u32 __iomem *qib_7220_getsendbuf(struct qib_pportdata *, u64, u32 *); + +/* + * Called when we might have an error that is specific to a particular + * PIO buffer, and may need to cancel that buffer, so it can be re-used. + * because we don't need to force the update of pioavail. + */ +static void qib_disarm_7220_senderrbufs(struct qib_pportdata *ppd) +{ + unsigned long sbuf[3]; + struct qib_devdata *dd = ppd->dd; + + /* + * It's possible that sendbuffererror could have bits set; might + * have already done this as a result of hardware error handling. + */ + /* read these before writing errorclear */ + sbuf[0] = qib_read_kreg64(dd, kr_sendbuffererror); + sbuf[1] = qib_read_kreg64(dd, kr_sendbuffererror + 1); + sbuf[2] = qib_read_kreg64(dd, kr_sendbuffererror + 2); + + if (sbuf[0] || sbuf[1] || sbuf[2]) + qib_disarm_piobufs_set(dd, sbuf, + dd->piobcnt2k + dd->piobcnt4k); +} + +static void qib_7220_txe_recover(struct qib_devdata *dd) +{ + qib_devinfo(dd->pcidev, "Recovering from TXE PIO parity error\n"); + qib_disarm_7220_senderrbufs(dd->pport); +} + +/* + * This is called with interrupts disabled and sdma_lock held. + */ +static void qib_7220_sdma_sendctrl(struct qib_pportdata *ppd, unsigned op) +{ + struct qib_devdata *dd = ppd->dd; + u64 set_sendctrl = 0; + u64 clr_sendctrl = 0; + + if (op & QIB_SDMA_SENDCTRL_OP_ENABLE) + set_sendctrl |= SYM_MASK(SendCtrl, SDmaEnable); + else + clr_sendctrl |= SYM_MASK(SendCtrl, SDmaEnable); + + if (op & QIB_SDMA_SENDCTRL_OP_INTENABLE) + set_sendctrl |= SYM_MASK(SendCtrl, SDmaIntEnable); + else + clr_sendctrl |= SYM_MASK(SendCtrl, SDmaIntEnable); + + if (op & QIB_SDMA_SENDCTRL_OP_HALT) + set_sendctrl |= SYM_MASK(SendCtrl, SDmaHalt); + else + clr_sendctrl |= SYM_MASK(SendCtrl, SDmaHalt); + + spin_lock(&dd->sendctrl_lock); + + dd->sendctrl |= set_sendctrl; + dd->sendctrl &= ~clr_sendctrl; + + qib_write_kreg(dd, kr_sendctrl, dd->sendctrl); + qib_write_kreg(dd, kr_scratch, 0); + + spin_unlock(&dd->sendctrl_lock); +} + +static void qib_decode_7220_sdma_errs(struct qib_pportdata *ppd, + u64 err, char *buf, size_t blen) +{ + static const struct { + u64 err; + const char *msg; + } errs[] = { + { ERR_MASK(SDmaGenMismatchErr), + "SDmaGenMismatch" }, + { ERR_MASK(SDmaOutOfBoundErr), + "SDmaOutOfBound" }, + { ERR_MASK(SDmaTailOutOfBoundErr), + "SDmaTailOutOfBound" }, + { ERR_MASK(SDmaBaseErr), + "SDmaBase" }, + { ERR_MASK(SDma1stDescErr), + "SDma1stDesc" }, + { ERR_MASK(SDmaRpyTagErr), + "SDmaRpyTag" }, + { ERR_MASK(SDmaDwEnErr), + "SDmaDwEn" }, + { ERR_MASK(SDmaMissingDwErr), + "SDmaMissingDw" }, + { ERR_MASK(SDmaUnexpDataErr), + "SDmaUnexpData" }, + { ERR_MASK(SDmaDescAddrMisalignErr), + "SDmaDescAddrMisalign" }, + { ERR_MASK(SendBufMisuseErr), + "SendBufMisuse" }, + { ERR_MASK(SDmaDisabledErr), + "SDmaDisabled" }, + }; + int i; + size_t bidx = 0; + + for (i = 0; i < ARRAY_SIZE(errs); i++) { + if (err & errs[i].err) + bidx += scnprintf(buf + bidx, blen - bidx, + "%s ", errs[i].msg); + } +} + +/* + * This is called as part of link down clean up so disarm and flush + * all send buffers so that SMP packets can be sent. + */ +static void qib_7220_sdma_hw_clean_up(struct qib_pportdata *ppd) +{ + /* This will trigger the Abort interrupt */ + sendctrl_7220_mod(ppd, QIB_SENDCTRL_DISARM_ALL | QIB_SENDCTRL_FLUSH | + QIB_SENDCTRL_AVAIL_BLIP); + ppd->dd->upd_pio_shadow = 1; /* update our idea of what's busy */ +} + +static void qib_sdma_7220_setlengen(struct qib_pportdata *ppd) +{ + /* + * Set SendDmaLenGen and clear and set + * the MSB of the generation count to enable generation checking + * and load the internal generation counter. + */ + qib_write_kreg(ppd->dd, kr_senddmalengen, ppd->sdma_descq_cnt); + qib_write_kreg(ppd->dd, kr_senddmalengen, + ppd->sdma_descq_cnt | + (1ULL << QIB_7220_SendDmaLenGen_Generation_MSB)); +} + +static void qib_7220_sdma_hw_start_up(struct qib_pportdata *ppd) +{ + qib_sdma_7220_setlengen(ppd); + qib_sdma_update_7220_tail(ppd, 0); /* Set SendDmaTail */ + ppd->sdma_head_dma[0] = 0; +} + +#define DISABLES_SDMA ( \ + ERR_MASK(SDmaDisabledErr) | \ + ERR_MASK(SDmaBaseErr) | \ + ERR_MASK(SDmaTailOutOfBoundErr) | \ + ERR_MASK(SDmaOutOfBoundErr) | \ + ERR_MASK(SDma1stDescErr) | \ + ERR_MASK(SDmaRpyTagErr) | \ + ERR_MASK(SDmaGenMismatchErr) | \ + ERR_MASK(SDmaDescAddrMisalignErr) | \ + ERR_MASK(SDmaMissingDwErr) | \ + ERR_MASK(SDmaDwEnErr)) + +static void sdma_7220_errors(struct qib_pportdata *ppd, u64 errs) +{ + unsigned long flags; + struct qib_devdata *dd = ppd->dd; + char *msg; + + errs &= QLOGIC_IB_E_SDMAERRS; + + msg = dd->cspec->sdmamsgbuf; + qib_decode_7220_sdma_errs(ppd, errs, msg, sizeof dd->cspec->sdmamsgbuf); + spin_lock_irqsave(&ppd->sdma_lock, flags); + + if (errs & ERR_MASK(SendBufMisuseErr)) { + unsigned long sbuf[3]; + + sbuf[0] = qib_read_kreg64(dd, kr_sendbuffererror); + sbuf[1] = qib_read_kreg64(dd, kr_sendbuffererror + 1); + sbuf[2] = qib_read_kreg64(dd, kr_sendbuffererror + 2); + + qib_dev_err(ppd->dd, + "IB%u:%u SendBufMisuse: %04lx %016lx %016lx\n", + ppd->dd->unit, ppd->port, sbuf[2], sbuf[1], + sbuf[0]); + } + + if (errs & ERR_MASK(SDmaUnexpDataErr)) + qib_dev_err(dd, "IB%u:%u SDmaUnexpData\n", ppd->dd->unit, + ppd->port); + + switch (ppd->sdma_state.current_state) { + case qib_sdma_state_s00_hw_down: + /* not expecting any interrupts */ + break; + + case qib_sdma_state_s10_hw_start_up_wait: + /* handled in intr path */ + break; + + case qib_sdma_state_s20_idle: + /* not expecting any interrupts */ + break; + + case qib_sdma_state_s30_sw_clean_up_wait: + /* not expecting any interrupts */ + break; + + case qib_sdma_state_s40_hw_clean_up_wait: + if (errs & ERR_MASK(SDmaDisabledErr)) + __qib_sdma_process_event(ppd, + qib_sdma_event_e50_hw_cleaned); + break; + + case qib_sdma_state_s50_hw_halt_wait: + /* handled in intr path */ + break; + + case qib_sdma_state_s99_running: + if (errs & DISABLES_SDMA) + __qib_sdma_process_event(ppd, + qib_sdma_event_e7220_err_halted); + break; + } + + spin_unlock_irqrestore(&ppd->sdma_lock, flags); +} + +/* + * Decode the error status into strings, deciding whether to always + * print * it or not depending on "normal packet errors" vs everything + * else. Return 1 if "real" errors, otherwise 0 if only packet + * errors, so caller can decide what to print with the string. + */ +static int qib_decode_7220_err(struct qib_devdata *dd, char *buf, size_t blen, + u64 err) +{ + int iserr = 1; + + *buf = '\0'; + if (err & QLOGIC_IB_E_PKTERRS) { + if (!(err & ~QLOGIC_IB_E_PKTERRS)) + iserr = 0; + if ((err & ERR_MASK(RcvICRCErr)) && + !(err & (ERR_MASK(RcvVCRCErr) | ERR_MASK(RcvEBPErr)))) + strlcat(buf, "CRC ", blen); + if (!iserr) + goto done; + } + if (err & ERR_MASK(RcvHdrLenErr)) + strlcat(buf, "rhdrlen ", blen); + if (err & ERR_MASK(RcvBadTidErr)) + strlcat(buf, "rbadtid ", blen); + if (err & ERR_MASK(RcvBadVersionErr)) + strlcat(buf, "rbadversion ", blen); + if (err & ERR_MASK(RcvHdrErr)) + strlcat(buf, "rhdr ", blen); + if (err & ERR_MASK(SendSpecialTriggerErr)) + strlcat(buf, "sendspecialtrigger ", blen); + if (err & ERR_MASK(RcvLongPktLenErr)) + strlcat(buf, "rlongpktlen ", blen); + if (err & ERR_MASK(RcvMaxPktLenErr)) + strlcat(buf, "rmaxpktlen ", blen); + if (err & ERR_MASK(RcvMinPktLenErr)) + strlcat(buf, "rminpktlen ", blen); + if (err & ERR_MASK(SendMinPktLenErr)) + strlcat(buf, "sminpktlen ", blen); + if (err & ERR_MASK(RcvFormatErr)) + strlcat(buf, "rformaterr ", blen); + if (err & ERR_MASK(RcvUnsupportedVLErr)) + strlcat(buf, "runsupvl ", blen); + if (err & ERR_MASK(RcvUnexpectedCharErr)) + strlcat(buf, "runexpchar ", blen); + if (err & ERR_MASK(RcvIBFlowErr)) + strlcat(buf, "ribflow ", blen); + if (err & ERR_MASK(SendUnderRunErr)) + strlcat(buf, "sunderrun ", blen); + if (err & ERR_MASK(SendPioArmLaunchErr)) + strlcat(buf, "spioarmlaunch ", blen); + if (err & ERR_MASK(SendUnexpectedPktNumErr)) + strlcat(buf, "sunexperrpktnum ", blen); + if (err & ERR_MASK(SendDroppedSmpPktErr)) + strlcat(buf, "sdroppedsmppkt ", blen); + if (err & ERR_MASK(SendMaxPktLenErr)) + strlcat(buf, "smaxpktlen ", blen); + if (err & ERR_MASK(SendUnsupportedVLErr)) + strlcat(buf, "sunsupVL ", blen); + if (err & ERR_MASK(InvalidAddrErr)) + strlcat(buf, "invalidaddr ", blen); + if (err & ERR_MASK(RcvEgrFullErr)) + strlcat(buf, "rcvegrfull ", blen); + if (err & ERR_MASK(RcvHdrFullErr)) + strlcat(buf, "rcvhdrfull ", blen); + if (err & ERR_MASK(IBStatusChanged)) + strlcat(buf, "ibcstatuschg ", blen); + if (err & ERR_MASK(RcvIBLostLinkErr)) + strlcat(buf, "riblostlink ", blen); + if (err & ERR_MASK(HardwareErr)) + strlcat(buf, "hardware ", blen); + if (err & ERR_MASK(ResetNegated)) + strlcat(buf, "reset ", blen); + if (err & QLOGIC_IB_E_SDMAERRS) + qib_decode_7220_sdma_errs(dd->pport, err, buf, blen); + if (err & ERR_MASK(InvalidEEPCmd)) + strlcat(buf, "invalideepromcmd ", blen); +done: + return iserr; +} + +static void reenable_7220_chase(unsigned long opaque) +{ + struct qib_pportdata *ppd = (struct qib_pportdata *)opaque; + ppd->cpspec->chase_timer.expires = 0; + qib_set_ib_7220_lstate(ppd, QLOGIC_IB_IBCC_LINKCMD_DOWN, + QLOGIC_IB_IBCC_LINKINITCMD_POLL); +} + +static void handle_7220_chase(struct qib_pportdata *ppd, u64 ibcst) +{ + u8 ibclt; + unsigned long tnow; + + ibclt = (u8)SYM_FIELD(ibcst, IBCStatus, LinkTrainingState); + + /* + * Detect and handle the state chase issue, where we can + * get stuck if we are unlucky on timing on both sides of + * the link. If we are, we disable, set a timer, and + * then re-enable. + */ + switch (ibclt) { + case IB_7220_LT_STATE_CFGRCVFCFG: + case IB_7220_LT_STATE_CFGWAITRMT: + case IB_7220_LT_STATE_TXREVLANES: + case IB_7220_LT_STATE_CFGENH: + tnow = jiffies; + if (ppd->cpspec->chase_end && + time_after(tnow, ppd->cpspec->chase_end)) { + ppd->cpspec->chase_end = 0; + qib_set_ib_7220_lstate(ppd, + QLOGIC_IB_IBCC_LINKCMD_DOWN, + QLOGIC_IB_IBCC_LINKINITCMD_DISABLE); + ppd->cpspec->chase_timer.expires = jiffies + + QIB_CHASE_DIS_TIME; + add_timer(&ppd->cpspec->chase_timer); + } else if (!ppd->cpspec->chase_end) + ppd->cpspec->chase_end = tnow + QIB_CHASE_TIME; + break; + + default: + ppd->cpspec->chase_end = 0; + break; + } +} + +static void handle_7220_errors(struct qib_devdata *dd, u64 errs) +{ + char *msg; + u64 ignore_this_time = 0; + u64 iserr = 0; + int log_idx; + struct qib_pportdata *ppd = dd->pport; + u64 mask; + + /* don't report errors that are masked */ + errs &= dd->cspec->errormask; + msg = dd->cspec->emsgbuf; + + /* do these first, they are most important */ + if (errs & ERR_MASK(HardwareErr)) + qib_7220_handle_hwerrors(dd, msg, sizeof dd->cspec->emsgbuf); + else + for (log_idx = 0; log_idx < QIB_EEP_LOG_CNT; ++log_idx) + if (errs & dd->eep_st_masks[log_idx].errs_to_log) + qib_inc_eeprom_err(dd, log_idx, 1); + + if (errs & QLOGIC_IB_E_SDMAERRS) + sdma_7220_errors(ppd, errs); + + if (errs & ~IB_E_BITSEXTANT) + qib_dev_err(dd, + "error interrupt with unknown errors %llx set\n", + (unsigned long long) (errs & ~IB_E_BITSEXTANT)); + + if (errs & E_SUM_ERRS) { + qib_disarm_7220_senderrbufs(ppd); + if ((errs & E_SUM_LINK_PKTERRS) && + !(ppd->lflags & QIBL_LINKACTIVE)) { + /* + * This can happen when trying to bring the link + * up, but the IB link changes state at the "wrong" + * time. The IB logic then complains that the packet + * isn't valid. We don't want to confuse people, so + * we just don't print them, except at debug + */ + ignore_this_time = errs & E_SUM_LINK_PKTERRS; + } + } else if ((errs & E_SUM_LINK_PKTERRS) && + !(ppd->lflags & QIBL_LINKACTIVE)) { + /* + * This can happen when SMA is trying to bring the link + * up, but the IB link changes state at the "wrong" time. + * The IB logic then complains that the packet isn't + * valid. We don't want to confuse people, so we just + * don't print them, except at debug + */ + ignore_this_time = errs & E_SUM_LINK_PKTERRS; + } + + qib_write_kreg(dd, kr_errclear, errs); + + errs &= ~ignore_this_time; + if (!errs) + goto done; + + /* + * The ones we mask off are handled specially below + * or above. Also mask SDMADISABLED by default as it + * is too chatty. + */ + mask = ERR_MASK(IBStatusChanged) | + ERR_MASK(RcvEgrFullErr) | ERR_MASK(RcvHdrFullErr) | + ERR_MASK(HardwareErr) | ERR_MASK(SDmaDisabledErr); + + qib_decode_7220_err(dd, msg, sizeof dd->cspec->emsgbuf, errs & ~mask); + + if (errs & E_SUM_PKTERRS) + qib_stats.sps_rcverrs++; + if (errs & E_SUM_ERRS) + qib_stats.sps_txerrs++; + iserr = errs & ~(E_SUM_PKTERRS | QLOGIC_IB_E_PKTERRS | + ERR_MASK(SDmaDisabledErr)); + + if (errs & ERR_MASK(IBStatusChanged)) { + u64 ibcs; + + ibcs = qib_read_kreg64(dd, kr_ibcstatus); + if (!(ppd->lflags & QIBL_IB_AUTONEG_INPROG)) + handle_7220_chase(ppd, ibcs); + + /* Update our picture of width and speed from chip */ + ppd->link_width_active = + ((ibcs >> IBA7220_LINKWIDTH_SHIFT) & 1) ? + IB_WIDTH_4X : IB_WIDTH_1X; + ppd->link_speed_active = + ((ibcs >> IBA7220_LINKSPEED_SHIFT) & 1) ? + QIB_IB_DDR : QIB_IB_SDR; + + /* + * Since going into a recovery state causes the link state + * to go down and since recovery is transitory, it is better + * if we "miss" ever seeing the link training state go into + * recovery (i.e., ignore this transition for link state + * special handling purposes) without updating lastibcstat. + */ + if (qib_7220_phys_portstate(ibcs) != + IB_PHYSPORTSTATE_LINK_ERR_RECOVER) + qib_handle_e_ibstatuschanged(ppd, ibcs); + } + + if (errs & ERR_MASK(ResetNegated)) { + qib_dev_err(dd, + "Got reset, requires re-init (unload and reload driver)\n"); + dd->flags &= ~QIB_INITTED; /* needs re-init */ + /* mark as having had error */ + *dd->devstatusp |= QIB_STATUS_HWERROR; + *dd->pport->statusp &= ~QIB_STATUS_IB_CONF; + } + + if (*msg && iserr) + qib_dev_porterr(dd, ppd->port, "%s error\n", msg); + + if (ppd->state_wanted & ppd->lflags) + wake_up_interruptible(&ppd->state_wait); + + /* + * If there were hdrq or egrfull errors, wake up any processes + * waiting in poll. We used to try to check which contexts had + * the overflow, but given the cost of that and the chip reads + * to support it, it's better to just wake everybody up if we + * get an overflow; waiters can poll again if it's not them. + */ + if (errs & (ERR_MASK(RcvEgrFullErr) | ERR_MASK(RcvHdrFullErr))) { + qib_handle_urcv(dd, ~0U); + if (errs & ERR_MASK(RcvEgrFullErr)) + qib_stats.sps_buffull++; + else + qib_stats.sps_hdrfull++; + } +done: + return; +} + +/* enable/disable chip from delivering interrupts */ +static void qib_7220_set_intr_state(struct qib_devdata *dd, u32 enable) +{ + if (enable) { + if (dd->flags & QIB_BADINTR) + return; + qib_write_kreg(dd, kr_intmask, ~0ULL); + /* force re-interrupt of any pending interrupts. */ + qib_write_kreg(dd, kr_intclear, 0ULL); + } else + qib_write_kreg(dd, kr_intmask, 0ULL); +} + +/* + * Try to cleanup as much as possible for anything that might have gone + * wrong while in freeze mode, such as pio buffers being written by user + * processes (causing armlaunch), send errors due to going into freeze mode, + * etc., and try to avoid causing extra interrupts while doing so. + * Forcibly update the in-memory pioavail register copies after cleanup + * because the chip won't do it while in freeze mode (the register values + * themselves are kept correct). + * Make sure that we don't lose any important interrupts by using the chip + * feature that says that writing 0 to a bit in *clear that is set in + * *status will cause an interrupt to be generated again (if allowed by + * the *mask value). + * This is in chip-specific code because of all of the register accesses, + * even though the details are similar on most chips. + */ +static void qib_7220_clear_freeze(struct qib_devdata *dd) +{ + /* disable error interrupts, to avoid confusion */ + qib_write_kreg(dd, kr_errmask, 0ULL); + + /* also disable interrupts; errormask is sometimes overwriten */ + qib_7220_set_intr_state(dd, 0); + + qib_cancel_sends(dd->pport); + + /* clear the freeze, and be sure chip saw it */ + qib_write_kreg(dd, kr_control, dd->control); + qib_read_kreg32(dd, kr_scratch); + + /* force in-memory update now we are out of freeze */ + qib_force_pio_avail_update(dd); + + /* + * force new interrupt if any hwerr, error or interrupt bits are + * still set, and clear "safe" send packet errors related to freeze + * and cancelling sends. Re-enable error interrupts before possible + * force of re-interrupt on pending interrupts. + */ + qib_write_kreg(dd, kr_hwerrclear, 0ULL); + qib_write_kreg(dd, kr_errclear, E_SPKT_ERRS_IGNORE); + qib_write_kreg(dd, kr_errmask, dd->cspec->errormask); + qib_7220_set_intr_state(dd, 1); +} + +/** + * qib_7220_handle_hwerrors - display hardware errors. + * @dd: the qlogic_ib device + * @msg: the output buffer + * @msgl: the size of the output buffer + * + * Use same msg buffer as regular errors to avoid excessive stack + * use. Most hardware errors are catastrophic, but for right now, + * we'll print them and continue. We reuse the same message buffer as + * handle_7220_errors() to avoid excessive stack usage. + */ +static void qib_7220_handle_hwerrors(struct qib_devdata *dd, char *msg, + size_t msgl) +{ + u64 hwerrs; + u32 bits, ctrl; + int isfatal = 0; + char *bitsmsg; + int log_idx; + + hwerrs = qib_read_kreg64(dd, kr_hwerrstatus); + if (!hwerrs) + goto bail; + if (hwerrs == ~0ULL) { + qib_dev_err(dd, + "Read of hardware error status failed (all bits set); ignoring\n"); + goto bail; + } + qib_stats.sps_hwerrs++; + + /* + * Always clear the error status register, except MEMBISTFAIL, + * regardless of whether we continue or stop using the chip. + * We want that set so we know it failed, even across driver reload. + * We'll still ignore it in the hwerrmask. We do this partly for + * diagnostics, but also for support. + */ + qib_write_kreg(dd, kr_hwerrclear, + hwerrs & ~HWE_MASK(PowerOnBISTFailed)); + + hwerrs &= dd->cspec->hwerrmask; + + /* We log some errors to EEPROM, check if we have any of those. */ + for (log_idx = 0; log_idx < QIB_EEP_LOG_CNT; ++log_idx) + if (hwerrs & dd->eep_st_masks[log_idx].hwerrs_to_log) + qib_inc_eeprom_err(dd, log_idx, 1); + if (hwerrs & ~(TXEMEMPARITYERR_PIOBUF | TXEMEMPARITYERR_PIOPBC | + RXE_PARITY)) + qib_devinfo(dd->pcidev, + "Hardware error: hwerr=0x%llx (cleared)\n", + (unsigned long long) hwerrs); + + if (hwerrs & ~IB_HWE_BITSEXTANT) + qib_dev_err(dd, + "hwerror interrupt with unknown errors %llx set\n", + (unsigned long long) (hwerrs & ~IB_HWE_BITSEXTANT)); + + if (hwerrs & QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR) + qib_sd7220_clr_ibpar(dd); + + ctrl = qib_read_kreg32(dd, kr_control); + if ((ctrl & QLOGIC_IB_C_FREEZEMODE) && !dd->diag_client) { + /* + * Parity errors in send memory are recoverable by h/w + * just do housekeeping, exit freeze mode and continue. + */ + if (hwerrs & (TXEMEMPARITYERR_PIOBUF | + TXEMEMPARITYERR_PIOPBC)) { + qib_7220_txe_recover(dd); + hwerrs &= ~(TXEMEMPARITYERR_PIOBUF | + TXEMEMPARITYERR_PIOPBC); + } + if (hwerrs) + isfatal = 1; + else + qib_7220_clear_freeze(dd); + } + + *msg = '\0'; + + if (hwerrs & HWE_MASK(PowerOnBISTFailed)) { + isfatal = 1; + strlcat(msg, + "[Memory BIST test failed, InfiniPath hardware unusable]", + msgl); + /* ignore from now on, so disable until driver reloaded */ + dd->cspec->hwerrmask &= ~HWE_MASK(PowerOnBISTFailed); + qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); + } + + qib_format_hwerrors(hwerrs, qib_7220_hwerror_msgs, + ARRAY_SIZE(qib_7220_hwerror_msgs), msg, msgl); + + bitsmsg = dd->cspec->bitsmsgbuf; + if (hwerrs & (QLOGIC_IB_HWE_PCIEMEMPARITYERR_MASK << + QLOGIC_IB_HWE_PCIEMEMPARITYERR_SHIFT)) { + bits = (u32) ((hwerrs >> + QLOGIC_IB_HWE_PCIEMEMPARITYERR_SHIFT) & + QLOGIC_IB_HWE_PCIEMEMPARITYERR_MASK); + snprintf(bitsmsg, sizeof dd->cspec->bitsmsgbuf, + "[PCIe Mem Parity Errs %x] ", bits); + strlcat(msg, bitsmsg, msgl); + } + +#define _QIB_PLL_FAIL (QLOGIC_IB_HWE_COREPLL_FBSLIP | \ + QLOGIC_IB_HWE_COREPLL_RFSLIP) + + if (hwerrs & _QIB_PLL_FAIL) { + isfatal = 1; + snprintf(bitsmsg, sizeof dd->cspec->bitsmsgbuf, + "[PLL failed (%llx), InfiniPath hardware unusable]", + (unsigned long long) hwerrs & _QIB_PLL_FAIL); + strlcat(msg, bitsmsg, msgl); + /* ignore from now on, so disable until driver reloaded */ + dd->cspec->hwerrmask &= ~(hwerrs & _QIB_PLL_FAIL); + qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); + } + + if (hwerrs & QLOGIC_IB_HWE_SERDESPLLFAILED) { + /* + * If it occurs, it is left masked since the eternal + * interface is unused. + */ + dd->cspec->hwerrmask &= ~QLOGIC_IB_HWE_SERDESPLLFAILED; + qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); + } + + qib_dev_err(dd, "%s hardware error\n", msg); + + if (isfatal && !dd->diag_client) { + qib_dev_err(dd, + "Fatal Hardware Error, no longer usable, SN %.16s\n", + dd->serial); + /* + * For /sys status file and user programs to print; if no + * trailing brace is copied, we'll know it was truncated. + */ + if (dd->freezemsg) + snprintf(dd->freezemsg, dd->freezelen, + "{%s}", msg); + qib_disable_after_error(dd); + } +bail:; +} + +/** + * qib_7220_init_hwerrors - enable hardware errors + * @dd: the qlogic_ib device + * + * now that we have finished initializing everything that might reasonably + * cause a hardware error, and cleared those errors bits as they occur, + * we can enable hardware errors in the mask (potentially enabling + * freeze mode), and enable hardware errors as errors (along with + * everything else) in errormask + */ +static void qib_7220_init_hwerrors(struct qib_devdata *dd) +{ + u64 val; + u64 extsval; + + extsval = qib_read_kreg64(dd, kr_extstatus); + + if (!(extsval & (QLOGIC_IB_EXTS_MEMBIST_ENDTEST | + QLOGIC_IB_EXTS_MEMBIST_DISABLED))) + qib_dev_err(dd, "MemBIST did not complete!\n"); + if (extsval & QLOGIC_IB_EXTS_MEMBIST_DISABLED) + qib_devinfo(dd->pcidev, "MemBIST is disabled.\n"); + + val = ~0ULL; /* default to all hwerrors become interrupts, */ + + val &= ~QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR; + dd->cspec->hwerrmask = val; + + qib_write_kreg(dd, kr_hwerrclear, ~HWE_MASK(PowerOnBISTFailed)); + qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); + + /* clear all */ + qib_write_kreg(dd, kr_errclear, ~0ULL); + /* enable errors that are masked, at least this first time. */ + qib_write_kreg(dd, kr_errmask, ~0ULL); + dd->cspec->errormask = qib_read_kreg64(dd, kr_errmask); + /* clear any interrupts up to this point (ints still not enabled) */ + qib_write_kreg(dd, kr_intclear, ~0ULL); +} + +/* + * Disable and enable the armlaunch error. Used for PIO bandwidth testing + * on chips that are count-based, rather than trigger-based. There is no + * reference counting, but that's also fine, given the intended use. + * Only chip-specific because it's all register accesses + */ +static void qib_set_7220_armlaunch(struct qib_devdata *dd, u32 enable) +{ + if (enable) { + qib_write_kreg(dd, kr_errclear, ERR_MASK(SendPioArmLaunchErr)); + dd->cspec->errormask |= ERR_MASK(SendPioArmLaunchErr); + } else + dd->cspec->errormask &= ~ERR_MASK(SendPioArmLaunchErr); + qib_write_kreg(dd, kr_errmask, dd->cspec->errormask); +} + +/* + * Formerly took parameter <which> in pre-shifted, + * pre-merged form with LinkCmd and LinkInitCmd + * together, and assuming the zero was NOP. + */ +static void qib_set_ib_7220_lstate(struct qib_pportdata *ppd, u16 linkcmd, + u16 linitcmd) +{ + u64 mod_wd; + struct qib_devdata *dd = ppd->dd; + unsigned long flags; + + if (linitcmd == QLOGIC_IB_IBCC_LINKINITCMD_DISABLE) { + /* + * If we are told to disable, note that so link-recovery + * code does not attempt to bring us back up. + */ + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags |= QIBL_IB_LINK_DISABLED; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + } else if (linitcmd || linkcmd == QLOGIC_IB_IBCC_LINKCMD_DOWN) { + /* + * Any other linkinitcmd will lead to LINKDOWN and then + * to INIT (if all is well), so clear flag to let + * link-recovery code attempt to bring us back up. + */ + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_IB_LINK_DISABLED; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + } + + mod_wd = (linkcmd << IBA7220_IBCC_LINKCMD_SHIFT) | + (linitcmd << QLOGIC_IB_IBCC_LINKINITCMD_SHIFT); + + qib_write_kreg(dd, kr_ibcctrl, ppd->cpspec->ibcctrl | mod_wd); + /* write to chip to prevent back-to-back writes of ibc reg */ + qib_write_kreg(dd, kr_scratch, 0); +} + +/* + * All detailed interaction with the SerDes has been moved to qib_sd7220.c + * + * The portion of IBA7220-specific bringup_serdes() that actually deals with + * registers and memory within the SerDes itself is qib_sd7220_init(). + */ + +/** + * qib_7220_bringup_serdes - bring up the serdes + * @ppd: physical port on the qlogic_ib device + */ +static int qib_7220_bringup_serdes(struct qib_pportdata *ppd) +{ + struct qib_devdata *dd = ppd->dd; + u64 val, prev_val, guid, ibc; + int ret = 0; + + /* Put IBC in reset, sends disabled */ + dd->control &= ~QLOGIC_IB_C_LINKENABLE; + qib_write_kreg(dd, kr_control, 0ULL); + + if (qib_compat_ddr_negotiate) { + ppd->cpspec->ibdeltainprog = 1; + ppd->cpspec->ibsymsnap = read_7220_creg32(dd, cr_ibsymbolerr); + ppd->cpspec->iblnkerrsnap = + read_7220_creg32(dd, cr_iblinkerrrecov); + } + + /* flowcontrolwatermark is in units of KBytes */ + ibc = 0x5ULL << SYM_LSB(IBCCtrl, FlowCtrlWaterMark); + /* + * How often flowctrl sent. More or less in usecs; balance against + * watermark value, so that in theory senders always get a flow + * control update in time to not let the IB link go idle. + */ + ibc |= 0x3ULL << SYM_LSB(IBCCtrl, FlowCtrlPeriod); + /* max error tolerance */ + ibc |= 0xfULL << SYM_LSB(IBCCtrl, PhyerrThreshold); + /* use "real" buffer space for */ + ibc |= 4ULL << SYM_LSB(IBCCtrl, CreditScale); + /* IB credit flow control. */ + ibc |= 0xfULL << SYM_LSB(IBCCtrl, OverrunThreshold); + /* + * set initial max size pkt IBC will send, including ICRC; it's the + * PIO buffer size in dwords, less 1; also see qib_set_mtu() + */ + ibc |= ((u64)(ppd->ibmaxlen >> 2) + 1) << SYM_LSB(IBCCtrl, MaxPktLen); + ppd->cpspec->ibcctrl = ibc; /* without linkcmd or linkinitcmd! */ + + /* initially come up waiting for TS1, without sending anything. */ + val = ppd->cpspec->ibcctrl | (QLOGIC_IB_IBCC_LINKINITCMD_DISABLE << + QLOGIC_IB_IBCC_LINKINITCMD_SHIFT); + qib_write_kreg(dd, kr_ibcctrl, val); + + if (!ppd->cpspec->ibcddrctrl) { + /* not on re-init after reset */ + ppd->cpspec->ibcddrctrl = qib_read_kreg64(dd, kr_ibcddrctrl); + + if (ppd->link_speed_enabled == (QIB_IB_SDR | QIB_IB_DDR)) + ppd->cpspec->ibcddrctrl |= + IBA7220_IBC_SPEED_AUTONEG_MASK | + IBA7220_IBC_IBTA_1_2_MASK; + else + ppd->cpspec->ibcddrctrl |= + ppd->link_speed_enabled == QIB_IB_DDR ? + IBA7220_IBC_SPEED_DDR : IBA7220_IBC_SPEED_SDR; + if ((ppd->link_width_enabled & (IB_WIDTH_1X | IB_WIDTH_4X)) == + (IB_WIDTH_1X | IB_WIDTH_4X)) + ppd->cpspec->ibcddrctrl |= IBA7220_IBC_WIDTH_AUTONEG; + else + ppd->cpspec->ibcddrctrl |= + ppd->link_width_enabled == IB_WIDTH_4X ? + IBA7220_IBC_WIDTH_4X_ONLY : + IBA7220_IBC_WIDTH_1X_ONLY; + + /* always enable these on driver reload, not sticky */ + ppd->cpspec->ibcddrctrl |= + IBA7220_IBC_RXPOL_MASK << IBA7220_IBC_RXPOL_SHIFT; + ppd->cpspec->ibcddrctrl |= + IBA7220_IBC_HRTBT_MASK << IBA7220_IBC_HRTBT_SHIFT; + + /* enable automatic lane reversal detection for receive */ + ppd->cpspec->ibcddrctrl |= IBA7220_IBC_LANE_REV_SUPPORTED; + } else + /* write to chip to prevent back-to-back writes of ibc reg */ + qib_write_kreg(dd, kr_scratch, 0); + + qib_write_kreg(dd, kr_ibcddrctrl, ppd->cpspec->ibcddrctrl); + qib_write_kreg(dd, kr_scratch, 0); + + qib_write_kreg(dd, kr_ncmodectrl, 0Ull); + qib_write_kreg(dd, kr_scratch, 0); + + ret = qib_sd7220_init(dd); + + val = qib_read_kreg64(dd, kr_xgxs_cfg); + prev_val = val; + val |= QLOGIC_IB_XGXS_FC_SAFE; + if (val != prev_val) { + qib_write_kreg(dd, kr_xgxs_cfg, val); + qib_read_kreg32(dd, kr_scratch); + } + if (val & QLOGIC_IB_XGXS_RESET) + val &= ~QLOGIC_IB_XGXS_RESET; + if (val != prev_val) + qib_write_kreg(dd, kr_xgxs_cfg, val); + + /* first time through, set port guid */ + if (!ppd->guid) + ppd->guid = dd->base_guid; + guid = be64_to_cpu(ppd->guid); + + qib_write_kreg(dd, kr_hrtbt_guid, guid); + if (!ret) { + dd->control |= QLOGIC_IB_C_LINKENABLE; + qib_write_kreg(dd, kr_control, dd->control); + } else + /* write to chip to prevent back-to-back writes of ibc reg */ + qib_write_kreg(dd, kr_scratch, 0); + return ret; +} + +/** + * qib_7220_quiet_serdes - set serdes to txidle + * @ppd: physical port of the qlogic_ib device + * Called when driver is being unloaded + */ +static void qib_7220_quiet_serdes(struct qib_pportdata *ppd) +{ + u64 val; + struct qib_devdata *dd = ppd->dd; + unsigned long flags; + + /* disable IBC */ + dd->control &= ~QLOGIC_IB_C_LINKENABLE; + qib_write_kreg(dd, kr_control, + dd->control | QLOGIC_IB_C_FREEZEMODE); + + ppd->cpspec->chase_end = 0; + if (ppd->cpspec->chase_timer.data) /* if initted */ + del_timer_sync(&ppd->cpspec->chase_timer); + + if (ppd->cpspec->ibsymdelta || ppd->cpspec->iblnkerrdelta || + ppd->cpspec->ibdeltainprog) { + u64 diagc; + + /* enable counter writes */ + diagc = qib_read_kreg64(dd, kr_hwdiagctrl); + qib_write_kreg(dd, kr_hwdiagctrl, + diagc | SYM_MASK(HwDiagCtrl, CounterWrEnable)); + + if (ppd->cpspec->ibsymdelta || ppd->cpspec->ibdeltainprog) { + val = read_7220_creg32(dd, cr_ibsymbolerr); + if (ppd->cpspec->ibdeltainprog) + val -= val - ppd->cpspec->ibsymsnap; + val -= ppd->cpspec->ibsymdelta; + write_7220_creg(dd, cr_ibsymbolerr, val); + } + if (ppd->cpspec->iblnkerrdelta || ppd->cpspec->ibdeltainprog) { + val = read_7220_creg32(dd, cr_iblinkerrrecov); + if (ppd->cpspec->ibdeltainprog) + val -= val - ppd->cpspec->iblnkerrsnap; + val -= ppd->cpspec->iblnkerrdelta; + write_7220_creg(dd, cr_iblinkerrrecov, val); + } + + /* and disable counter writes */ + qib_write_kreg(dd, kr_hwdiagctrl, diagc); + } + qib_set_ib_7220_lstate(ppd, 0, QLOGIC_IB_IBCC_LINKINITCMD_DISABLE); + + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_IB_AUTONEG_INPROG; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + wake_up(&ppd->cpspec->autoneg_wait); + cancel_delayed_work_sync(&ppd->cpspec->autoneg_work); + + shutdown_7220_relock_poll(ppd->dd); + val = qib_read_kreg64(ppd->dd, kr_xgxs_cfg); + val |= QLOGIC_IB_XGXS_RESET; + qib_write_kreg(ppd->dd, kr_xgxs_cfg, val); +} + +/** + * qib_setup_7220_setextled - set the state of the two external LEDs + * @dd: the qlogic_ib device + * @on: whether the link is up or not + * + * The exact combo of LEDs if on is true is determined by looking + * at the ibcstatus. + * + * These LEDs indicate the physical and logical state of IB link. + * For this chip (at least with recommended board pinouts), LED1 + * is Yellow (logical state) and LED2 is Green (physical state), + * + * Note: We try to match the Mellanox HCA LED behavior as best + * we can. Green indicates physical link state is OK (something is + * plugged in, and we can train). + * Amber indicates the link is logically up (ACTIVE). + * Mellanox further blinks the amber LED to indicate data packet + * activity, but we have no hardware support for that, so it would + * require waking up every 10-20 msecs and checking the counters + * on the chip, and then turning the LED off if appropriate. That's + * visible overhead, so not something we will do. + * + */ +static void qib_setup_7220_setextled(struct qib_pportdata *ppd, u32 on) +{ + struct qib_devdata *dd = ppd->dd; + u64 extctl, ledblink = 0, val, lst, ltst; + unsigned long flags; + + /* + * The diags use the LED to indicate diag info, so we leave + * the external LED alone when the diags are running. + */ + if (dd->diag_client) + return; + + if (ppd->led_override) { + ltst = (ppd->led_override & QIB_LED_PHYS) ? + IB_PHYSPORTSTATE_LINKUP : IB_PHYSPORTSTATE_DISABLED, + lst = (ppd->led_override & QIB_LED_LOG) ? + IB_PORT_ACTIVE : IB_PORT_DOWN; + } else if (on) { + val = qib_read_kreg64(dd, kr_ibcstatus); + ltst = qib_7220_phys_portstate(val); + lst = qib_7220_iblink_state(val); + } else { + ltst = 0; + lst = 0; + } + + spin_lock_irqsave(&dd->cspec->gpio_lock, flags); + extctl = dd->cspec->extctrl & ~(SYM_MASK(EXTCtrl, LEDPriPortGreenOn) | + SYM_MASK(EXTCtrl, LEDPriPortYellowOn)); + if (ltst == IB_PHYSPORTSTATE_LINKUP) { + extctl |= SYM_MASK(EXTCtrl, LEDPriPortGreenOn); + /* + * counts are in chip clock (4ns) periods. + * This is 1/16 sec (66.6ms) on, + * 3/16 sec (187.5 ms) off, with packets rcvd + */ + ledblink = ((66600 * 1000UL / 4) << IBA7220_LEDBLINK_ON_SHIFT) + | ((187500 * 1000UL / 4) << IBA7220_LEDBLINK_OFF_SHIFT); + } + if (lst == IB_PORT_ACTIVE) + extctl |= SYM_MASK(EXTCtrl, LEDPriPortYellowOn); + dd->cspec->extctrl = extctl; + qib_write_kreg(dd, kr_extctrl, extctl); + spin_unlock_irqrestore(&dd->cspec->gpio_lock, flags); + + if (ledblink) /* blink the LED on packet receive */ + qib_write_kreg(dd, kr_rcvpktledcnt, ledblink); +} + +static void qib_7220_free_irq(struct qib_devdata *dd) +{ + if (dd->cspec->irq) { + free_irq(dd->cspec->irq, dd); + dd->cspec->irq = 0; + } + qib_nomsi(dd); +} + +/* + * qib_setup_7220_cleanup - clean up any per-chip chip-specific stuff + * @dd: the qlogic_ib device + * + * This is called during driver unload. + * + */ +static void qib_setup_7220_cleanup(struct qib_devdata *dd) +{ + qib_7220_free_irq(dd); + kfree(dd->cspec->cntrs); + kfree(dd->cspec->portcntrs); +} + +/* + * This is only called for SDmaInt. + * SDmaDisabled is handled on the error path. + */ +static void sdma_7220_intr(struct qib_pportdata *ppd, u64 istat) +{ + unsigned long flags; + + spin_lock_irqsave(&ppd->sdma_lock, flags); + + switch (ppd->sdma_state.current_state) { + case qib_sdma_state_s00_hw_down: + break; + + case qib_sdma_state_s10_hw_start_up_wait: + __qib_sdma_process_event(ppd, qib_sdma_event_e20_hw_started); + break; + + case qib_sdma_state_s20_idle: + break; + + case qib_sdma_state_s30_sw_clean_up_wait: + break; + + case qib_sdma_state_s40_hw_clean_up_wait: + break; + + case qib_sdma_state_s50_hw_halt_wait: + __qib_sdma_process_event(ppd, qib_sdma_event_e60_hw_halted); + break; + + case qib_sdma_state_s99_running: + /* too chatty to print here */ + __qib_sdma_intr(ppd); + break; + } + spin_unlock_irqrestore(&ppd->sdma_lock, flags); +} + +static void qib_wantpiobuf_7220_intr(struct qib_devdata *dd, u32 needint) +{ + unsigned long flags; + + spin_lock_irqsave(&dd->sendctrl_lock, flags); + if (needint) { + if (!(dd->sendctrl & SYM_MASK(SendCtrl, SendBufAvailUpd))) + goto done; + /* + * blip the availupd off, next write will be on, so + * we ensure an avail update, regardless of threshold or + * buffers becoming free, whenever we want an interrupt + */ + qib_write_kreg(dd, kr_sendctrl, dd->sendctrl & + ~SYM_MASK(SendCtrl, SendBufAvailUpd)); + qib_write_kreg(dd, kr_scratch, 0ULL); + dd->sendctrl |= SYM_MASK(SendCtrl, SendIntBufAvail); + } else + dd->sendctrl &= ~SYM_MASK(SendCtrl, SendIntBufAvail); + qib_write_kreg(dd, kr_sendctrl, dd->sendctrl); + qib_write_kreg(dd, kr_scratch, 0ULL); +done: + spin_unlock_irqrestore(&dd->sendctrl_lock, flags); +} + +/* + * Handle errors and unusual events first, separate function + * to improve cache hits for fast path interrupt handling. + */ +static noinline void unlikely_7220_intr(struct qib_devdata *dd, u64 istat) +{ + if (unlikely(istat & ~QLOGIC_IB_I_BITSEXTANT)) + qib_dev_err(dd, + "interrupt with unknown interrupts %Lx set\n", + istat & ~QLOGIC_IB_I_BITSEXTANT); + + if (istat & QLOGIC_IB_I_GPIO) { + u32 gpiostatus; + + /* + * Boards for this chip currently don't use GPIO interrupts, + * so clear by writing GPIOstatus to GPIOclear, and complain + * to alert developer. To avoid endless repeats, clear + * the bits in the mask, since there is some kind of + * programming error or chip problem. + */ + gpiostatus = qib_read_kreg32(dd, kr_gpio_status); + /* + * In theory, writing GPIOstatus to GPIOclear could + * have a bad side-effect on some diagnostic that wanted + * to poll for a status-change, but the various shadows + * make that problematic at best. Diags will just suppress + * all GPIO interrupts during such tests. + */ + qib_write_kreg(dd, kr_gpio_clear, gpiostatus); + + if (gpiostatus) { + const u32 mask = qib_read_kreg32(dd, kr_gpio_mask); + u32 gpio_irq = mask & gpiostatus; + + /* + * A bit set in status and (chip) Mask register + * would cause an interrupt. Since we are not + * expecting any, report it. Also check that the + * chip reflects our shadow, report issues, + * and refresh from the shadow. + */ + /* + * Clear any troublemakers, and update chip + * from shadow + */ + dd->cspec->gpio_mask &= ~gpio_irq; + qib_write_kreg(dd, kr_gpio_mask, dd->cspec->gpio_mask); + } + } + + if (istat & QLOGIC_IB_I_ERROR) { + u64 estat; + + qib_stats.sps_errints++; + estat = qib_read_kreg64(dd, kr_errstatus); + if (!estat) + qib_devinfo(dd->pcidev, + "error interrupt (%Lx), but no error bits set!\n", + istat); + else + handle_7220_errors(dd, estat); + } +} + +static irqreturn_t qib_7220intr(int irq, void *data) +{ + struct qib_devdata *dd = data; + irqreturn_t ret; + u64 istat; + u64 ctxtrbits; + u64 rmask; + unsigned i; + + if ((dd->flags & (QIB_PRESENT | QIB_BADINTR)) != QIB_PRESENT) { + /* + * This return value is not great, but we do not want the + * interrupt core code to remove our interrupt handler + * because we don't appear to be handling an interrupt + * during a chip reset. + */ + ret = IRQ_HANDLED; + goto bail; + } + + istat = qib_read_kreg64(dd, kr_intstatus); + + if (unlikely(!istat)) { + ret = IRQ_NONE; /* not our interrupt, or already handled */ + goto bail; + } + if (unlikely(istat == -1)) { + qib_bad_intrstatus(dd); + /* don't know if it was our interrupt or not */ + ret = IRQ_NONE; + goto bail; + } + + this_cpu_inc(*dd->int_counter); + if (unlikely(istat & (~QLOGIC_IB_I_BITSEXTANT | + QLOGIC_IB_I_GPIO | QLOGIC_IB_I_ERROR))) + unlikely_7220_intr(dd, istat); + + /* + * Clear the interrupt bits we found set, relatively early, so we + * "know" know the chip will have seen this by the time we process + * the queue, and will re-interrupt if necessary. The processor + * itself won't take the interrupt again until we return. + */ + qib_write_kreg(dd, kr_intclear, istat); + + /* + * Handle kernel receive queues before checking for pio buffers + * available since receives can overflow; piobuf waiters can afford + * a few extra cycles, since they were waiting anyway. + */ + ctxtrbits = istat & + ((QLOGIC_IB_I_RCVAVAIL_MASK << QLOGIC_IB_I_RCVAVAIL_SHIFT) | + (QLOGIC_IB_I_RCVURG_MASK << QLOGIC_IB_I_RCVURG_SHIFT)); + if (ctxtrbits) { + rmask = (1ULL << QLOGIC_IB_I_RCVAVAIL_SHIFT) | + (1ULL << QLOGIC_IB_I_RCVURG_SHIFT); + for (i = 0; i < dd->first_user_ctxt; i++) { + if (ctxtrbits & rmask) { + ctxtrbits &= ~rmask; + qib_kreceive(dd->rcd[i], NULL, NULL); + } + rmask <<= 1; + } + if (ctxtrbits) { + ctxtrbits = + (ctxtrbits >> QLOGIC_IB_I_RCVAVAIL_SHIFT) | + (ctxtrbits >> QLOGIC_IB_I_RCVURG_SHIFT); + qib_handle_urcv(dd, ctxtrbits); + } + } + + /* only call for SDmaInt */ + if (istat & QLOGIC_IB_I_SDMAINT) + sdma_7220_intr(dd->pport, istat); + + if ((istat & QLOGIC_IB_I_SPIOBUFAVAIL) && (dd->flags & QIB_INITTED)) + qib_ib_piobufavail(dd); + + ret = IRQ_HANDLED; +bail: + return ret; +} + +/* + * Set up our chip-specific interrupt handler. + * The interrupt type has already been setup, so + * we just need to do the registration and error checking. + * If we are using MSI interrupts, we may fall back to + * INTx later, if the interrupt handler doesn't get called + * within 1/2 second (see verify_interrupt()). + */ +static void qib_setup_7220_interrupt(struct qib_devdata *dd) +{ + if (!dd->cspec->irq) + qib_dev_err(dd, + "irq is 0, BIOS error? Interrupts won't work\n"); + else { + int ret = request_irq(dd->cspec->irq, qib_7220intr, + dd->msi_lo ? 0 : IRQF_SHARED, + QIB_DRV_NAME, dd); + + if (ret) + qib_dev_err(dd, + "Couldn't setup %s interrupt (irq=%d): %d\n", + dd->msi_lo ? "MSI" : "INTx", + dd->cspec->irq, ret); + } +} + +/** + * qib_7220_boardname - fill in the board name + * @dd: the qlogic_ib device + * + * info is based on the board revision register + */ +static void qib_7220_boardname(struct qib_devdata *dd) +{ + char *n; + u32 boardid, namelen; + + boardid = SYM_FIELD(dd->revision, Revision, + BoardID); + + switch (boardid) { + case 1: + n = "InfiniPath_QLE7240"; + break; + case 2: + n = "InfiniPath_QLE7280"; + break; + default: + qib_dev_err(dd, "Unknown 7220 board with ID %u\n", boardid); + n = "Unknown_InfiniPath_7220"; + break; + } + + namelen = strlen(n) + 1; + dd->boardname = kmalloc(namelen, GFP_KERNEL); + if (!dd->boardname) + qib_dev_err(dd, "Failed allocation for board name: %s\n", n); + else + snprintf(dd->boardname, namelen, "%s", n); + + if (dd->majrev != 5 || !dd->minrev || dd->minrev > 2) + qib_dev_err(dd, + "Unsupported InfiniPath hardware revision %u.%u!\n", + dd->majrev, dd->minrev); + + snprintf(dd->boardversion, sizeof(dd->boardversion), + "ChipABI %u.%u, %s, InfiniPath%u %u.%u, SW Compat %u\n", + QIB_CHIP_VERS_MAJ, QIB_CHIP_VERS_MIN, dd->boardname, + (unsigned)SYM_FIELD(dd->revision, Revision_R, Arch), + dd->majrev, dd->minrev, + (unsigned)SYM_FIELD(dd->revision, Revision_R, SW)); +} + +/* + * This routine sleeps, so it can only be called from user context, not + * from interrupt context. + */ +static int qib_setup_7220_reset(struct qib_devdata *dd) +{ + u64 val; + int i; + int ret; + u16 cmdval; + u8 int_line, clinesz; + unsigned long flags; + + qib_pcie_getcmd(dd, &cmdval, &int_line, &clinesz); + + /* Use dev_err so it shows up in logs, etc. */ + qib_dev_err(dd, "Resetting InfiniPath unit %u\n", dd->unit); + + /* no interrupts till re-initted */ + qib_7220_set_intr_state(dd, 0); + + dd->pport->cpspec->ibdeltainprog = 0; + dd->pport->cpspec->ibsymdelta = 0; + dd->pport->cpspec->iblnkerrdelta = 0; + + /* + * Keep chip from being accessed until we are ready. Use + * writeq() directly, to allow the write even though QIB_PRESENT + * isn't set. + */ + dd->flags &= ~(QIB_INITTED | QIB_PRESENT); + /* so we check interrupts work again */ + dd->z_int_counter = qib_int_counter(dd); + val = dd->control | QLOGIC_IB_C_RESET; + writeq(val, &dd->kregbase[kr_control]); + mb(); /* prevent compiler reordering around actual reset */ + + for (i = 1; i <= 5; i++) { + /* + * Allow MBIST, etc. to complete; longer on each retry. + * We sometimes get machine checks from bus timeout if no + * response, so for now, make it *really* long. + */ + msleep(1000 + (1 + i) * 2000); + + qib_pcie_reenable(dd, cmdval, int_line, clinesz); + + /* + * Use readq directly, so we don't need to mark it as PRESENT + * until we get a successful indication that all is well. + */ + val = readq(&dd->kregbase[kr_revision]); + if (val == dd->revision) { + dd->flags |= QIB_PRESENT; /* it's back */ + ret = qib_reinit_intr(dd); + goto bail; + } + } + ret = 0; /* failed */ + +bail: + if (ret) { + if (qib_pcie_params(dd, dd->lbus_width, NULL, NULL)) + qib_dev_err(dd, + "Reset failed to setup PCIe or interrupts; continuing anyway\n"); + + /* hold IBC in reset, no sends, etc till later */ + qib_write_kreg(dd, kr_control, 0ULL); + + /* clear the reset error, init error/hwerror mask */ + qib_7220_init_hwerrors(dd); + + /* do setup similar to speed or link-width changes */ + if (dd->pport->cpspec->ibcddrctrl & IBA7220_IBC_IBTA_1_2_MASK) + dd->cspec->presets_needed = 1; + spin_lock_irqsave(&dd->pport->lflags_lock, flags); + dd->pport->lflags |= QIBL_IB_FORCE_NOTIFY; + dd->pport->lflags &= ~QIBL_IB_AUTONEG_FAILED; + spin_unlock_irqrestore(&dd->pport->lflags_lock, flags); + } + + return ret; +} + +/** + * qib_7220_put_tid - write a TID to the chip + * @dd: the qlogic_ib device + * @tidptr: pointer to the expected TID (in chip) to update + * @tidtype: 0 for eager, 1 for expected + * @pa: physical address of in memory buffer; tidinvalid if freeing + */ +static void qib_7220_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, + u32 type, unsigned long pa) +{ + if (pa != dd->tidinvalid) { + u64 chippa = pa >> IBA7220_TID_PA_SHIFT; + + /* paranoia checks */ + if (pa != (chippa << IBA7220_TID_PA_SHIFT)) { + qib_dev_err(dd, "Physaddr %lx not 2KB aligned!\n", + pa); + return; + } + if (chippa >= (1UL << IBA7220_TID_SZ_SHIFT)) { + qib_dev_err(dd, + "Physical page address 0x%lx larger than supported\n", + pa); + return; + } + + if (type == RCVHQ_RCV_TYPE_EAGER) + chippa |= dd->tidtemplate; + else /* for now, always full 4KB page */ + chippa |= IBA7220_TID_SZ_4K; + pa = chippa; + } + writeq(pa, tidptr); + mmiowb(); +} + +/** + * qib_7220_clear_tids - clear all TID entries for a ctxt, expected and eager + * @dd: the qlogic_ib device + * @ctxt: the ctxt + * + * clear all TID entries for a ctxt, expected and eager. + * Used from qib_close(). On this chip, TIDs are only 32 bits, + * not 64, but they are still on 64 bit boundaries, so tidbase + * is declared as u64 * for the pointer math, even though we write 32 bits + */ +static void qib_7220_clear_tids(struct qib_devdata *dd, + struct qib_ctxtdata *rcd) +{ + u64 __iomem *tidbase; + unsigned long tidinv; + u32 ctxt; + int i; + + if (!dd->kregbase || !rcd) + return; + + ctxt = rcd->ctxt; + + tidinv = dd->tidinvalid; + tidbase = (u64 __iomem *) + ((char __iomem *)(dd->kregbase) + + dd->rcvtidbase + + ctxt * dd->rcvtidcnt * sizeof(*tidbase)); + + for (i = 0; i < dd->rcvtidcnt; i++) + qib_7220_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED, + tidinv); + + tidbase = (u64 __iomem *) + ((char __iomem *)(dd->kregbase) + + dd->rcvegrbase + + rcd->rcvegr_tid_base * sizeof(*tidbase)); + + for (i = 0; i < rcd->rcvegrcnt; i++) + qib_7220_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EAGER, + tidinv); +} + +/** + * qib_7220_tidtemplate - setup constants for TID updates + * @dd: the qlogic_ib device + * + * We setup stuff that we use a lot, to avoid calculating each time + */ +static void qib_7220_tidtemplate(struct qib_devdata *dd) +{ + if (dd->rcvegrbufsize == 2048) + dd->tidtemplate = IBA7220_TID_SZ_2K; + else if (dd->rcvegrbufsize == 4096) + dd->tidtemplate = IBA7220_TID_SZ_4K; + dd->tidinvalid = 0; +} + +/** + * qib_init_7220_get_base_info - set chip-specific flags for user code + * @rcd: the qlogic_ib ctxt + * @kbase: qib_base_info pointer + * + * We set the PCIE flag because the lower bandwidth on PCIe vs + * HyperTransport can affect some user packet algorithims. + */ +static int qib_7220_get_base_info(struct qib_ctxtdata *rcd, + struct qib_base_info *kinfo) +{ + kinfo->spi_runtime_flags |= QIB_RUNTIME_PCIE | + QIB_RUNTIME_NODMA_RTAIL | QIB_RUNTIME_SDMA; + + if (rcd->dd->flags & QIB_USE_SPCL_TRIG) + kinfo->spi_runtime_flags |= QIB_RUNTIME_SPECIAL_TRIGGER; + + return 0; +} + +static struct qib_message_header * +qib_7220_get_msgheader(struct qib_devdata *dd, __le32 *rhf_addr) +{ + u32 offset = qib_hdrget_offset(rhf_addr); + + return (struct qib_message_header *) + (rhf_addr - dd->rhf_offset + offset); +} + +static void qib_7220_config_ctxts(struct qib_devdata *dd) +{ + unsigned long flags; + u32 nchipctxts; + + nchipctxts = qib_read_kreg32(dd, kr_portcnt); + dd->cspec->numctxts = nchipctxts; + if (qib_n_krcv_queues > 1) { + dd->qpn_mask = 0x3e; + dd->first_user_ctxt = qib_n_krcv_queues * dd->num_pports; + if (dd->first_user_ctxt > nchipctxts) + dd->first_user_ctxt = nchipctxts; + } else + dd->first_user_ctxt = dd->num_pports; + dd->n_krcv_queues = dd->first_user_ctxt; + + if (!qib_cfgctxts) { + int nctxts = dd->first_user_ctxt + num_online_cpus(); + + if (nctxts <= 5) + dd->ctxtcnt = 5; + else if (nctxts <= 9) + dd->ctxtcnt = 9; + else if (nctxts <= nchipctxts) + dd->ctxtcnt = nchipctxts; + } else if (qib_cfgctxts <= nchipctxts) + dd->ctxtcnt = qib_cfgctxts; + if (!dd->ctxtcnt) /* none of the above, set to max */ + dd->ctxtcnt = nchipctxts; + + /* + * Chip can be configured for 5, 9, or 17 ctxts, and choice + * affects number of eager TIDs per ctxt (1K, 2K, 4K). + * Lock to be paranoid about later motion, etc. + */ + spin_lock_irqsave(&dd->cspec->rcvmod_lock, flags); + if (dd->ctxtcnt > 9) + dd->rcvctrl |= 2ULL << IBA7220_R_CTXTCFG_SHIFT; + else if (dd->ctxtcnt > 5) + dd->rcvctrl |= 1ULL << IBA7220_R_CTXTCFG_SHIFT; + /* else configure for default 5 receive ctxts */ + if (dd->qpn_mask) + dd->rcvctrl |= 1ULL << QIB_7220_RcvCtrl_RcvQPMapEnable_LSB; + qib_write_kreg(dd, kr_rcvctrl, dd->rcvctrl); + spin_unlock_irqrestore(&dd->cspec->rcvmod_lock, flags); + + /* kr_rcvegrcnt changes based on the number of contexts enabled */ + dd->cspec->rcvegrcnt = qib_read_kreg32(dd, kr_rcvegrcnt); + dd->rcvhdrcnt = max(dd->cspec->rcvegrcnt, IBA7220_KRCVEGRCNT); +} + +static int qib_7220_get_ib_cfg(struct qib_pportdata *ppd, int which) +{ + int lsb, ret = 0; + u64 maskr; /* right-justified mask */ + + switch (which) { + case QIB_IB_CFG_LWID_ENB: /* Get allowed Link-width */ + ret = ppd->link_width_enabled; + goto done; + + case QIB_IB_CFG_LWID: /* Get currently active Link-width */ + ret = ppd->link_width_active; + goto done; + + case QIB_IB_CFG_SPD_ENB: /* Get allowed Link speeds */ + ret = ppd->link_speed_enabled; + goto done; + + case QIB_IB_CFG_SPD: /* Get current Link spd */ + ret = ppd->link_speed_active; + goto done; + + case QIB_IB_CFG_RXPOL_ENB: /* Get Auto-RX-polarity enable */ + lsb = IBA7220_IBC_RXPOL_SHIFT; + maskr = IBA7220_IBC_RXPOL_MASK; + break; + + case QIB_IB_CFG_LREV_ENB: /* Get Auto-Lane-reversal enable */ + lsb = IBA7220_IBC_LREV_SHIFT; + maskr = IBA7220_IBC_LREV_MASK; + break; + + case QIB_IB_CFG_LINKLATENCY: + ret = qib_read_kreg64(ppd->dd, kr_ibcddrstatus) + & IBA7220_DDRSTAT_LINKLAT_MASK; + goto done; + + case QIB_IB_CFG_OP_VLS: + ret = ppd->vls_operational; + goto done; + + case QIB_IB_CFG_VL_HIGH_CAP: + ret = 0; + goto done; + + case QIB_IB_CFG_VL_LOW_CAP: + ret = 0; + goto done; + + case QIB_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */ + ret = SYM_FIELD(ppd->cpspec->ibcctrl, IBCCtrl, + OverrunThreshold); + goto done; + + case QIB_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */ + ret = SYM_FIELD(ppd->cpspec->ibcctrl, IBCCtrl, + PhyerrThreshold); + goto done; + + case QIB_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */ + /* will only take effect when the link state changes */ + ret = (ppd->cpspec->ibcctrl & + SYM_MASK(IBCCtrl, LinkDownDefaultState)) ? + IB_LINKINITCMD_SLEEP : IB_LINKINITCMD_POLL; + goto done; + + case QIB_IB_CFG_HRTBT: /* Get Heartbeat off/enable/auto */ + lsb = IBA7220_IBC_HRTBT_SHIFT; + maskr = IBA7220_IBC_HRTBT_MASK; + break; + + case QIB_IB_CFG_PMA_TICKS: + /* + * 0x00 = 10x link transfer rate or 4 nsec. for 2.5Gbs + * Since the clock is always 250MHz, the value is 1 or 0. + */ + ret = (ppd->link_speed_active == QIB_IB_DDR); + goto done; + + default: + ret = -EINVAL; + goto done; + } + ret = (int)((ppd->cpspec->ibcddrctrl >> lsb) & maskr); +done: + return ret; +} + +static int qib_7220_set_ib_cfg(struct qib_pportdata *ppd, int which, u32 val) +{ + struct qib_devdata *dd = ppd->dd; + u64 maskr; /* right-justified mask */ + int lsb, ret = 0, setforce = 0; + u16 lcmd, licmd; + unsigned long flags; + u32 tmp = 0; + + switch (which) { + case QIB_IB_CFG_LIDLMC: + /* + * Set LID and LMC. Combined to avoid possible hazard + * caller puts LMC in 16MSbits, DLID in 16LSbits of val + */ + lsb = IBA7220_IBC_DLIDLMC_SHIFT; + maskr = IBA7220_IBC_DLIDLMC_MASK; + break; + + case QIB_IB_CFG_LWID_ENB: /* set allowed Link-width */ + /* + * As with speed, only write the actual register if + * the link is currently down, otherwise takes effect + * on next link change. + */ + ppd->link_width_enabled = val; + if (!(ppd->lflags & QIBL_LINKDOWN)) + goto bail; + /* + * We set the QIBL_IB_FORCE_NOTIFY bit so updown + * will get called because we want update + * link_width_active, and the change may not take + * effect for some time (if we are in POLL), so this + * flag will force the updown routine to be called + * on the next ibstatuschange down interrupt, even + * if it's not an down->up transition. + */ + val--; /* convert from IB to chip */ + maskr = IBA7220_IBC_WIDTH_MASK; + lsb = IBA7220_IBC_WIDTH_SHIFT; + setforce = 1; + break; + + case QIB_IB_CFG_SPD_ENB: /* set allowed Link speeds */ + /* + * If we turn off IB1.2, need to preset SerDes defaults, + * but not right now. Set a flag for the next time + * we command the link down. As with width, only write the + * actual register if the link is currently down, otherwise + * takes effect on next link change. Since setting is being + * explicitly requested (via MAD or sysfs), clear autoneg + * failure status if speed autoneg is enabled. + */ + ppd->link_speed_enabled = val; + if ((ppd->cpspec->ibcddrctrl & IBA7220_IBC_IBTA_1_2_MASK) && + !(val & (val - 1))) + dd->cspec->presets_needed = 1; + if (!(ppd->lflags & QIBL_LINKDOWN)) + goto bail; + /* + * We set the QIBL_IB_FORCE_NOTIFY bit so updown + * will get called because we want update + * link_speed_active, and the change may not take + * effect for some time (if we are in POLL), so this + * flag will force the updown routine to be called + * on the next ibstatuschange down interrupt, even + * if it's not an down->up transition. + */ + if (val == (QIB_IB_SDR | QIB_IB_DDR)) { + val = IBA7220_IBC_SPEED_AUTONEG_MASK | + IBA7220_IBC_IBTA_1_2_MASK; + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_IB_AUTONEG_FAILED; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + } else + val = val == QIB_IB_DDR ? + IBA7220_IBC_SPEED_DDR : IBA7220_IBC_SPEED_SDR; + maskr = IBA7220_IBC_SPEED_AUTONEG_MASK | + IBA7220_IBC_IBTA_1_2_MASK; + /* IBTA 1.2 mode + speed bits are contiguous */ + lsb = SYM_LSB(IBCDDRCtrl, IB_ENHANCED_MODE); + setforce = 1; + break; + + case QIB_IB_CFG_RXPOL_ENB: /* set Auto-RX-polarity enable */ + lsb = IBA7220_IBC_RXPOL_SHIFT; + maskr = IBA7220_IBC_RXPOL_MASK; + break; + + case QIB_IB_CFG_LREV_ENB: /* set Auto-Lane-reversal enable */ + lsb = IBA7220_IBC_LREV_SHIFT; + maskr = IBA7220_IBC_LREV_MASK; + break; + + case QIB_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */ + maskr = SYM_FIELD(ppd->cpspec->ibcctrl, IBCCtrl, + OverrunThreshold); + if (maskr != val) { + ppd->cpspec->ibcctrl &= + ~SYM_MASK(IBCCtrl, OverrunThreshold); + ppd->cpspec->ibcctrl |= (u64) val << + SYM_LSB(IBCCtrl, OverrunThreshold); + qib_write_kreg(dd, kr_ibcctrl, ppd->cpspec->ibcctrl); + qib_write_kreg(dd, kr_scratch, 0); + } + goto bail; + + case QIB_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */ + maskr = SYM_FIELD(ppd->cpspec->ibcctrl, IBCCtrl, + PhyerrThreshold); + if (maskr != val) { + ppd->cpspec->ibcctrl &= + ~SYM_MASK(IBCCtrl, PhyerrThreshold); + ppd->cpspec->ibcctrl |= (u64) val << + SYM_LSB(IBCCtrl, PhyerrThreshold); + qib_write_kreg(dd, kr_ibcctrl, ppd->cpspec->ibcctrl); + qib_write_kreg(dd, kr_scratch, 0); + } + goto bail; + + case QIB_IB_CFG_PKEYS: /* update pkeys */ + maskr = (u64) ppd->pkeys[0] | ((u64) ppd->pkeys[1] << 16) | + ((u64) ppd->pkeys[2] << 32) | + ((u64) ppd->pkeys[3] << 48); + qib_write_kreg(dd, kr_partitionkey, maskr); + goto bail; + + case QIB_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */ + /* will only take effect when the link state changes */ + if (val == IB_LINKINITCMD_POLL) + ppd->cpspec->ibcctrl &= + ~SYM_MASK(IBCCtrl, LinkDownDefaultState); + else /* SLEEP */ + ppd->cpspec->ibcctrl |= + SYM_MASK(IBCCtrl, LinkDownDefaultState); + qib_write_kreg(dd, kr_ibcctrl, ppd->cpspec->ibcctrl); + qib_write_kreg(dd, kr_scratch, 0); + goto bail; + + case QIB_IB_CFG_MTU: /* update the MTU in IBC */ + /* + * Update our housekeeping variables, and set IBC max + * size, same as init code; max IBC is max we allow in + * buffer, less the qword pbc, plus 1 for ICRC, in dwords + * Set even if it's unchanged, print debug message only + * on changes. + */ + val = (ppd->ibmaxlen >> 2) + 1; + ppd->cpspec->ibcctrl &= ~SYM_MASK(IBCCtrl, MaxPktLen); + ppd->cpspec->ibcctrl |= (u64)val << SYM_LSB(IBCCtrl, MaxPktLen); + qib_write_kreg(dd, kr_ibcctrl, ppd->cpspec->ibcctrl); + qib_write_kreg(dd, kr_scratch, 0); + goto bail; + + case QIB_IB_CFG_LSTATE: /* set the IB link state */ + switch (val & 0xffff0000) { + case IB_LINKCMD_DOWN: + lcmd = QLOGIC_IB_IBCC_LINKCMD_DOWN; + if (!ppd->cpspec->ibdeltainprog && + qib_compat_ddr_negotiate) { + ppd->cpspec->ibdeltainprog = 1; + ppd->cpspec->ibsymsnap = + read_7220_creg32(dd, cr_ibsymbolerr); + ppd->cpspec->iblnkerrsnap = + read_7220_creg32(dd, cr_iblinkerrrecov); + } + break; + + case IB_LINKCMD_ARMED: + lcmd = QLOGIC_IB_IBCC_LINKCMD_ARMED; + break; + + case IB_LINKCMD_ACTIVE: + lcmd = QLOGIC_IB_IBCC_LINKCMD_ACTIVE; + break; + + default: + ret = -EINVAL; + qib_dev_err(dd, "bad linkcmd req 0x%x\n", val >> 16); + goto bail; + } + switch (val & 0xffff) { + case IB_LINKINITCMD_NOP: + licmd = 0; + break; + + case IB_LINKINITCMD_POLL: + licmd = QLOGIC_IB_IBCC_LINKINITCMD_POLL; + break; + + case IB_LINKINITCMD_SLEEP: + licmd = QLOGIC_IB_IBCC_LINKINITCMD_SLEEP; + break; + + case IB_LINKINITCMD_DISABLE: + licmd = QLOGIC_IB_IBCC_LINKINITCMD_DISABLE; + ppd->cpspec->chase_end = 0; + /* + * stop state chase counter and timer, if running. + * wait forpending timer, but don't clear .data (ppd)! + */ + if (ppd->cpspec->chase_timer.expires) { + del_timer_sync(&ppd->cpspec->chase_timer); + ppd->cpspec->chase_timer.expires = 0; + } + break; + + default: + ret = -EINVAL; + qib_dev_err(dd, "bad linkinitcmd req 0x%x\n", + val & 0xffff); + goto bail; + } + qib_set_ib_7220_lstate(ppd, lcmd, licmd); + + maskr = IBA7220_IBC_WIDTH_MASK; + lsb = IBA7220_IBC_WIDTH_SHIFT; + tmp = (ppd->cpspec->ibcddrctrl >> lsb) & maskr; + /* If the width active on the chip does not match the + * width in the shadow register, write the new active + * width to the chip. + * We don't have to worry about speed as the speed is taken + * care of by set_7220_ibspeed_fast called by ib_updown. + */ + if (ppd->link_width_enabled-1 != tmp) { + ppd->cpspec->ibcddrctrl &= ~(maskr << lsb); + ppd->cpspec->ibcddrctrl |= + (((u64)(ppd->link_width_enabled-1) & maskr) << + lsb); + qib_write_kreg(dd, kr_ibcddrctrl, + ppd->cpspec->ibcddrctrl); + qib_write_kreg(dd, kr_scratch, 0); + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags |= QIBL_IB_FORCE_NOTIFY; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + } + goto bail; + + case QIB_IB_CFG_HRTBT: /* set Heartbeat off/enable/auto */ + if (val > IBA7220_IBC_HRTBT_MASK) { + ret = -EINVAL; + goto bail; + } + lsb = IBA7220_IBC_HRTBT_SHIFT; + maskr = IBA7220_IBC_HRTBT_MASK; + break; + + default: + ret = -EINVAL; + goto bail; + } + ppd->cpspec->ibcddrctrl &= ~(maskr << lsb); + ppd->cpspec->ibcddrctrl |= (((u64) val & maskr) << lsb); + qib_write_kreg(dd, kr_ibcddrctrl, ppd->cpspec->ibcddrctrl); + qib_write_kreg(dd, kr_scratch, 0); + if (setforce) { + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags |= QIBL_IB_FORCE_NOTIFY; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + } +bail: + return ret; +} + +static int qib_7220_set_loopback(struct qib_pportdata *ppd, const char *what) +{ + int ret = 0; + u64 val, ddr; + + if (!strncmp(what, "ibc", 3)) { + ppd->cpspec->ibcctrl |= SYM_MASK(IBCCtrl, Loopback); + val = 0; /* disable heart beat, so link will come up */ + qib_devinfo(ppd->dd->pcidev, "Enabling IB%u:%u IBC loopback\n", + ppd->dd->unit, ppd->port); + } else if (!strncmp(what, "off", 3)) { + ppd->cpspec->ibcctrl &= ~SYM_MASK(IBCCtrl, Loopback); + /* enable heart beat again */ + val = IBA7220_IBC_HRTBT_MASK << IBA7220_IBC_HRTBT_SHIFT; + qib_devinfo(ppd->dd->pcidev, + "Disabling IB%u:%u IBC loopback (normal)\n", + ppd->dd->unit, ppd->port); + } else + ret = -EINVAL; + if (!ret) { + qib_write_kreg(ppd->dd, kr_ibcctrl, ppd->cpspec->ibcctrl); + ddr = ppd->cpspec->ibcddrctrl & ~(IBA7220_IBC_HRTBT_MASK + << IBA7220_IBC_HRTBT_SHIFT); + ppd->cpspec->ibcddrctrl = ddr | val; + qib_write_kreg(ppd->dd, kr_ibcddrctrl, + ppd->cpspec->ibcddrctrl); + qib_write_kreg(ppd->dd, kr_scratch, 0); + } + return ret; +} + +static void qib_update_7220_usrhead(struct qib_ctxtdata *rcd, u64 hd, + u32 updegr, u32 egrhd, u32 npkts) +{ + if (updegr) + qib_write_ureg(rcd->dd, ur_rcvegrindexhead, egrhd, rcd->ctxt); + mmiowb(); + qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt); + mmiowb(); +} + +static u32 qib_7220_hdrqempty(struct qib_ctxtdata *rcd) +{ + u32 head, tail; + + head = qib_read_ureg32(rcd->dd, ur_rcvhdrhead, rcd->ctxt); + if (rcd->rcvhdrtail_kvaddr) + tail = qib_get_rcvhdrtail(rcd); + else + tail = qib_read_ureg32(rcd->dd, ur_rcvhdrtail, rcd->ctxt); + return head == tail; +} + +/* + * Modify the RCVCTRL register in chip-specific way. This + * is a function because bit positions and (future) register + * location is chip-specifc, but the needed operations are + * generic. <op> is a bit-mask because we often want to + * do multiple modifications. + */ +static void rcvctrl_7220_mod(struct qib_pportdata *ppd, unsigned int op, + int ctxt) +{ + struct qib_devdata *dd = ppd->dd; + u64 mask, val; + unsigned long flags; + + spin_lock_irqsave(&dd->cspec->rcvmod_lock, flags); + if (op & QIB_RCVCTRL_TAILUPD_ENB) + dd->rcvctrl |= (1ULL << IBA7220_R_TAILUPD_SHIFT); + if (op & QIB_RCVCTRL_TAILUPD_DIS) + dd->rcvctrl &= ~(1ULL << IBA7220_R_TAILUPD_SHIFT); + if (op & QIB_RCVCTRL_PKEY_ENB) + dd->rcvctrl &= ~(1ULL << IBA7220_R_PKEY_DIS_SHIFT); + if (op & QIB_RCVCTRL_PKEY_DIS) + dd->rcvctrl |= (1ULL << IBA7220_R_PKEY_DIS_SHIFT); + if (ctxt < 0) + mask = (1ULL << dd->ctxtcnt) - 1; + else + mask = (1ULL << ctxt); + if (op & QIB_RCVCTRL_CTXT_ENB) { + /* always done for specific ctxt */ + dd->rcvctrl |= (mask << SYM_LSB(RcvCtrl, PortEnable)); + if (!(dd->flags & QIB_NODMA_RTAIL)) + dd->rcvctrl |= 1ULL << IBA7220_R_TAILUPD_SHIFT; + /* Write these registers before the context is enabled. */ + qib_write_kreg_ctxt(dd, kr_rcvhdrtailaddr, ctxt, + dd->rcd[ctxt]->rcvhdrqtailaddr_phys); + qib_write_kreg_ctxt(dd, kr_rcvhdraddr, ctxt, + dd->rcd[ctxt]->rcvhdrq_phys); + dd->rcd[ctxt]->seq_cnt = 1; + } + if (op & QIB_RCVCTRL_CTXT_DIS) + dd->rcvctrl &= ~(mask << SYM_LSB(RcvCtrl, PortEnable)); + if (op & QIB_RCVCTRL_INTRAVAIL_ENB) + dd->rcvctrl |= (mask << IBA7220_R_INTRAVAIL_SHIFT); + if (op & QIB_RCVCTRL_INTRAVAIL_DIS) + dd->rcvctrl &= ~(mask << IBA7220_R_INTRAVAIL_SHIFT); + qib_write_kreg(dd, kr_rcvctrl, dd->rcvctrl); + if ((op & QIB_RCVCTRL_INTRAVAIL_ENB) && dd->rhdrhead_intr_off) { + /* arm rcv interrupt */ + val = qib_read_ureg32(dd, ur_rcvhdrhead, ctxt) | + dd->rhdrhead_intr_off; + qib_write_ureg(dd, ur_rcvhdrhead, val, ctxt); + } + if (op & QIB_RCVCTRL_CTXT_ENB) { + /* + * Init the context registers also; if we were + * disabled, tail and head should both be zero + * already from the enable, but since we don't + * know, we have to do it explicitly. + */ + val = qib_read_ureg32(dd, ur_rcvegrindextail, ctxt); + qib_write_ureg(dd, ur_rcvegrindexhead, val, ctxt); + + val = qib_read_ureg32(dd, ur_rcvhdrtail, ctxt); + dd->rcd[ctxt]->head = val; + /* If kctxt, interrupt on next receive. */ + if (ctxt < dd->first_user_ctxt) + val |= dd->rhdrhead_intr_off; + qib_write_ureg(dd, ur_rcvhdrhead, val, ctxt); + } + if (op & QIB_RCVCTRL_CTXT_DIS) { + if (ctxt >= 0) { + qib_write_kreg_ctxt(dd, kr_rcvhdrtailaddr, ctxt, 0); + qib_write_kreg_ctxt(dd, kr_rcvhdraddr, ctxt, 0); + } else { + unsigned i; + + for (i = 0; i < dd->cfgctxts; i++) { + qib_write_kreg_ctxt(dd, kr_rcvhdrtailaddr, + i, 0); + qib_write_kreg_ctxt(dd, kr_rcvhdraddr, i, 0); + } + } + } + spin_unlock_irqrestore(&dd->cspec->rcvmod_lock, flags); +} + +/* + * Modify the SENDCTRL register in chip-specific way. This + * is a function there may be multiple such registers with + * slightly different layouts. To start, we assume the + * "canonical" register layout of the first chips. + * Chip requires no back-back sendctrl writes, so write + * scratch register after writing sendctrl + */ +static void sendctrl_7220_mod(struct qib_pportdata *ppd, u32 op) +{ + struct qib_devdata *dd = ppd->dd; + u64 tmp_dd_sendctrl; + unsigned long flags; + + spin_lock_irqsave(&dd->sendctrl_lock, flags); + + /* First the ones that are "sticky", saved in shadow */ + if (op & QIB_SENDCTRL_CLEAR) + dd->sendctrl = 0; + if (op & QIB_SENDCTRL_SEND_DIS) + dd->sendctrl &= ~SYM_MASK(SendCtrl, SPioEnable); + else if (op & QIB_SENDCTRL_SEND_ENB) { + dd->sendctrl |= SYM_MASK(SendCtrl, SPioEnable); + if (dd->flags & QIB_USE_SPCL_TRIG) + dd->sendctrl |= SYM_MASK(SendCtrl, + SSpecialTriggerEn); + } + if (op & QIB_SENDCTRL_AVAIL_DIS) + dd->sendctrl &= ~SYM_MASK(SendCtrl, SendBufAvailUpd); + else if (op & QIB_SENDCTRL_AVAIL_ENB) + dd->sendctrl |= SYM_MASK(SendCtrl, SendBufAvailUpd); + + if (op & QIB_SENDCTRL_DISARM_ALL) { + u32 i, last; + + tmp_dd_sendctrl = dd->sendctrl; + /* + * disarm any that are not yet launched, disabling sends + * and updates until done. + */ + last = dd->piobcnt2k + dd->piobcnt4k; + tmp_dd_sendctrl &= + ~(SYM_MASK(SendCtrl, SPioEnable) | + SYM_MASK(SendCtrl, SendBufAvailUpd)); + for (i = 0; i < last; i++) { + qib_write_kreg(dd, kr_sendctrl, + tmp_dd_sendctrl | + SYM_MASK(SendCtrl, Disarm) | i); + qib_write_kreg(dd, kr_scratch, 0); + } + } + + tmp_dd_sendctrl = dd->sendctrl; + + if (op & QIB_SENDCTRL_FLUSH) + tmp_dd_sendctrl |= SYM_MASK(SendCtrl, Abort); + if (op & QIB_SENDCTRL_DISARM) + tmp_dd_sendctrl |= SYM_MASK(SendCtrl, Disarm) | + ((op & QIB_7220_SendCtrl_DisarmPIOBuf_RMASK) << + SYM_LSB(SendCtrl, DisarmPIOBuf)); + if ((op & QIB_SENDCTRL_AVAIL_BLIP) && + (dd->sendctrl & SYM_MASK(SendCtrl, SendBufAvailUpd))) + tmp_dd_sendctrl &= ~SYM_MASK(SendCtrl, SendBufAvailUpd); + + qib_write_kreg(dd, kr_sendctrl, tmp_dd_sendctrl); + qib_write_kreg(dd, kr_scratch, 0); + + if (op & QIB_SENDCTRL_AVAIL_BLIP) { + qib_write_kreg(dd, kr_sendctrl, dd->sendctrl); + qib_write_kreg(dd, kr_scratch, 0); + } + + spin_unlock_irqrestore(&dd->sendctrl_lock, flags); + + if (op & QIB_SENDCTRL_FLUSH) { + u32 v; + /* + * ensure writes have hit chip, then do a few + * more reads, to allow DMA of pioavail registers + * to occur, so in-memory copy is in sync with + * the chip. Not always safe to sleep. + */ + v = qib_read_kreg32(dd, kr_scratch); + qib_write_kreg(dd, kr_scratch, v); + v = qib_read_kreg32(dd, kr_scratch); + qib_write_kreg(dd, kr_scratch, v); + qib_read_kreg32(dd, kr_scratch); + } +} + +/** + * qib_portcntr_7220 - read a per-port counter + * @dd: the qlogic_ib device + * @creg: the counter to snapshot + */ +static u64 qib_portcntr_7220(struct qib_pportdata *ppd, u32 reg) +{ + u64 ret = 0ULL; + struct qib_devdata *dd = ppd->dd; + u16 creg; + /* 0xffff for unimplemented or synthesized counters */ + static const u16 xlator[] = { + [QIBPORTCNTR_PKTSEND] = cr_pktsend, + [QIBPORTCNTR_WORDSEND] = cr_wordsend, + [QIBPORTCNTR_PSXMITDATA] = cr_psxmitdatacount, + [QIBPORTCNTR_PSXMITPKTS] = cr_psxmitpktscount, + [QIBPORTCNTR_PSXMITWAIT] = cr_psxmitwaitcount, + [QIBPORTCNTR_SENDSTALL] = cr_sendstall, + [QIBPORTCNTR_PKTRCV] = cr_pktrcv, + [QIBPORTCNTR_PSRCVDATA] = cr_psrcvdatacount, + [QIBPORTCNTR_PSRCVPKTS] = cr_psrcvpktscount, + [QIBPORTCNTR_RCVEBP] = cr_rcvebp, + [QIBPORTCNTR_RCVOVFL] = cr_rcvovfl, + [QIBPORTCNTR_WORDRCV] = cr_wordrcv, + [QIBPORTCNTR_RXDROPPKT] = cr_rxdroppkt, + [QIBPORTCNTR_RXLOCALPHYERR] = cr_rxotherlocalphyerr, + [QIBPORTCNTR_RXVLERR] = cr_rxvlerr, + [QIBPORTCNTR_ERRICRC] = cr_erricrc, + [QIBPORTCNTR_ERRVCRC] = cr_errvcrc, + [QIBPORTCNTR_ERRLPCRC] = cr_errlpcrc, + [QIBPORTCNTR_BADFORMAT] = cr_badformat, + [QIBPORTCNTR_ERR_RLEN] = cr_err_rlen, + [QIBPORTCNTR_IBSYMBOLERR] = cr_ibsymbolerr, + [QIBPORTCNTR_INVALIDRLEN] = cr_invalidrlen, + [QIBPORTCNTR_UNSUPVL] = cr_txunsupvl, + [QIBPORTCNTR_EXCESSBUFOVFL] = cr_excessbufferovfl, + [QIBPORTCNTR_ERRLINK] = cr_errlink, + [QIBPORTCNTR_IBLINKDOWN] = cr_iblinkdown, + [QIBPORTCNTR_IBLINKERRRECOV] = cr_iblinkerrrecov, + [QIBPORTCNTR_LLI] = cr_locallinkintegrityerr, + [QIBPORTCNTR_PSINTERVAL] = cr_psinterval, + [QIBPORTCNTR_PSSTART] = cr_psstart, + [QIBPORTCNTR_PSSTAT] = cr_psstat, + [QIBPORTCNTR_VL15PKTDROP] = cr_vl15droppedpkt, + [QIBPORTCNTR_ERRPKEY] = cr_errpkey, + [QIBPORTCNTR_KHDROVFL] = 0xffff, + }; + + if (reg >= ARRAY_SIZE(xlator)) { + qib_devinfo(ppd->dd->pcidev, + "Unimplemented portcounter %u\n", reg); + goto done; + } + creg = xlator[reg]; + + if (reg == QIBPORTCNTR_KHDROVFL) { + int i; + + /* sum over all kernel contexts */ + for (i = 0; i < dd->first_user_ctxt; i++) + ret += read_7220_creg32(dd, cr_portovfl + i); + } + if (creg == 0xffff) + goto done; + + /* + * only fast incrementing counters are 64bit; use 32 bit reads to + * avoid two independent reads when on opteron + */ + if ((creg == cr_wordsend || creg == cr_wordrcv || + creg == cr_pktsend || creg == cr_pktrcv)) + ret = read_7220_creg(dd, creg); + else + ret = read_7220_creg32(dd, creg); + if (creg == cr_ibsymbolerr) { + if (dd->pport->cpspec->ibdeltainprog) + ret -= ret - ppd->cpspec->ibsymsnap; + ret -= dd->pport->cpspec->ibsymdelta; + } else if (creg == cr_iblinkerrrecov) { + if (dd->pport->cpspec->ibdeltainprog) + ret -= ret - ppd->cpspec->iblnkerrsnap; + ret -= dd->pport->cpspec->iblnkerrdelta; + } +done: + return ret; +} + +/* + * Device counter names (not port-specific), one line per stat, + * single string. Used by utilities like ipathstats to print the stats + * in a way which works for different versions of drivers, without changing + * the utility. Names need to be 12 chars or less (w/o newline), for proper + * display by utility. + * Non-error counters are first. + * Start of "error" conters is indicated by a leading "E " on the first + * "error" counter, and doesn't count in label length. + * The EgrOvfl list needs to be last so we truncate them at the configured + * context count for the device. + * cntr7220indices contains the corresponding register indices. + */ +static const char cntr7220names[] = + "Interrupts\n" + "HostBusStall\n" + "E RxTIDFull\n" + "RxTIDInvalid\n" + "Ctxt0EgrOvfl\n" + "Ctxt1EgrOvfl\n" + "Ctxt2EgrOvfl\n" + "Ctxt3EgrOvfl\n" + "Ctxt4EgrOvfl\n" + "Ctxt5EgrOvfl\n" + "Ctxt6EgrOvfl\n" + "Ctxt7EgrOvfl\n" + "Ctxt8EgrOvfl\n" + "Ctxt9EgrOvfl\n" + "Ctx10EgrOvfl\n" + "Ctx11EgrOvfl\n" + "Ctx12EgrOvfl\n" + "Ctx13EgrOvfl\n" + "Ctx14EgrOvfl\n" + "Ctx15EgrOvfl\n" + "Ctx16EgrOvfl\n"; + +static const size_t cntr7220indices[] = { + cr_lbint, + cr_lbflowstall, + cr_errtidfull, + cr_errtidvalid, + cr_portovfl + 0, + cr_portovfl + 1, + cr_portovfl + 2, + cr_portovfl + 3, + cr_portovfl + 4, + cr_portovfl + 5, + cr_portovfl + 6, + cr_portovfl + 7, + cr_portovfl + 8, + cr_portovfl + 9, + cr_portovfl + 10, + cr_portovfl + 11, + cr_portovfl + 12, + cr_portovfl + 13, + cr_portovfl + 14, + cr_portovfl + 15, + cr_portovfl + 16, +}; + +/* + * same as cntr7220names and cntr7220indices, but for port-specific counters. + * portcntr7220indices is somewhat complicated by some registers needing + * adjustments of various kinds, and those are ORed with _PORT_VIRT_FLAG + */ +static const char portcntr7220names[] = + "TxPkt\n" + "TxFlowPkt\n" + "TxWords\n" + "RxPkt\n" + "RxFlowPkt\n" + "RxWords\n" + "TxFlowStall\n" + "TxDmaDesc\n" /* 7220 and 7322-only */ + "E RxDlidFltr\n" /* 7220 and 7322-only */ + "IBStatusChng\n" + "IBLinkDown\n" + "IBLnkRecov\n" + "IBRxLinkErr\n" + "IBSymbolErr\n" + "RxLLIErr\n" + "RxBadFormat\n" + "RxBadLen\n" + "RxBufOvrfl\n" + "RxEBP\n" + "RxFlowCtlErr\n" + "RxICRCerr\n" + "RxLPCRCerr\n" + "RxVCRCerr\n" + "RxInvalLen\n" + "RxInvalPKey\n" + "RxPktDropped\n" + "TxBadLength\n" + "TxDropped\n" + "TxInvalLen\n" + "TxUnderrun\n" + "TxUnsupVL\n" + "RxLclPhyErr\n" /* 7220 and 7322-only */ + "RxVL15Drop\n" /* 7220 and 7322-only */ + "RxVlErr\n" /* 7220 and 7322-only */ + "XcessBufOvfl\n" /* 7220 and 7322-only */ + ; + +#define _PORT_VIRT_FLAG 0x8000 /* "virtual", need adjustments */ +static const size_t portcntr7220indices[] = { + QIBPORTCNTR_PKTSEND | _PORT_VIRT_FLAG, + cr_pktsendflow, + QIBPORTCNTR_WORDSEND | _PORT_VIRT_FLAG, + QIBPORTCNTR_PKTRCV | _PORT_VIRT_FLAG, + cr_pktrcvflowctrl, + QIBPORTCNTR_WORDRCV | _PORT_VIRT_FLAG, + QIBPORTCNTR_SENDSTALL | _PORT_VIRT_FLAG, + cr_txsdmadesc, + cr_rxdlidfltr, + cr_ibstatuschange, + QIBPORTCNTR_IBLINKDOWN | _PORT_VIRT_FLAG, + QIBPORTCNTR_IBLINKERRRECOV | _PORT_VIRT_FLAG, + QIBPORTCNTR_ERRLINK | _PORT_VIRT_FLAG, + QIBPORTCNTR_IBSYMBOLERR | _PORT_VIRT_FLAG, + QIBPORTCNTR_LLI | _PORT_VIRT_FLAG, + QIBPORTCNTR_BADFORMAT | _PORT_VIRT_FLAG, + QIBPORTCNTR_ERR_RLEN | _PORT_VIRT_FLAG, + QIBPORTCNTR_RCVOVFL | _PORT_VIRT_FLAG, + QIBPORTCNTR_RCVEBP | _PORT_VIRT_FLAG, + cr_rcvflowctrl_err, + QIBPORTCNTR_ERRICRC | _PORT_VIRT_FLAG, + QIBPORTCNTR_ERRLPCRC | _PORT_VIRT_FLAG, + QIBPORTCNTR_ERRVCRC | _PORT_VIRT_FLAG, + QIBPORTCNTR_INVALIDRLEN | _PORT_VIRT_FLAG, + QIBPORTCNTR_ERRPKEY | _PORT_VIRT_FLAG, + QIBPORTCNTR_RXDROPPKT | _PORT_VIRT_FLAG, + cr_invalidslen, + cr_senddropped, + cr_errslen, + cr_sendunderrun, + cr_txunsupvl, + QIBPORTCNTR_RXLOCALPHYERR | _PORT_VIRT_FLAG, + QIBPORTCNTR_VL15PKTDROP | _PORT_VIRT_FLAG, + QIBPORTCNTR_RXVLERR | _PORT_VIRT_FLAG, + QIBPORTCNTR_EXCESSBUFOVFL | _PORT_VIRT_FLAG, +}; + +/* do all the setup to make the counter reads efficient later */ +static void init_7220_cntrnames(struct qib_devdata *dd) +{ + int i, j = 0; + char *s; + + for (i = 0, s = (char *)cntr7220names; s && j <= dd->cfgctxts; + i++) { + /* we always have at least one counter before the egrovfl */ + if (!j && !strncmp("Ctxt0EgrOvfl", s + 1, 12)) + j = 1; + s = strchr(s + 1, '\n'); + if (s && j) + j++; + } + dd->cspec->ncntrs = i; + if (!s) + /* full list; size is without terminating null */ + dd->cspec->cntrnamelen = sizeof(cntr7220names) - 1; + else + dd->cspec->cntrnamelen = 1 + s - cntr7220names; + dd->cspec->cntrs = kmalloc(dd->cspec->ncntrs + * sizeof(u64), GFP_KERNEL); + if (!dd->cspec->cntrs) + qib_dev_err(dd, "Failed allocation for counters\n"); + + for (i = 0, s = (char *)portcntr7220names; s; i++) + s = strchr(s + 1, '\n'); + dd->cspec->nportcntrs = i - 1; + dd->cspec->portcntrnamelen = sizeof(portcntr7220names) - 1; + dd->cspec->portcntrs = kmalloc(dd->cspec->nportcntrs + * sizeof(u64), GFP_KERNEL); + if (!dd->cspec->portcntrs) + qib_dev_err(dd, "Failed allocation for portcounters\n"); +} + +static u32 qib_read_7220cntrs(struct qib_devdata *dd, loff_t pos, char **namep, + u64 **cntrp) +{ + u32 ret; + + if (!dd->cspec->cntrs) { + ret = 0; + goto done; + } + + if (namep) { + *namep = (char *)cntr7220names; + ret = dd->cspec->cntrnamelen; + if (pos >= ret) + ret = 0; /* final read after getting everything */ + } else { + u64 *cntr = dd->cspec->cntrs; + int i; + + ret = dd->cspec->ncntrs * sizeof(u64); + if (!cntr || pos >= ret) { + /* everything read, or couldn't get memory */ + ret = 0; + goto done; + } + + *cntrp = cntr; + for (i = 0; i < dd->cspec->ncntrs; i++) + *cntr++ = read_7220_creg32(dd, cntr7220indices[i]); + } +done: + return ret; +} + +static u32 qib_read_7220portcntrs(struct qib_devdata *dd, loff_t pos, u32 port, + char **namep, u64 **cntrp) +{ + u32 ret; + + if (!dd->cspec->portcntrs) { + ret = 0; + goto done; + } + if (namep) { + *namep = (char *)portcntr7220names; + ret = dd->cspec->portcntrnamelen; + if (pos >= ret) + ret = 0; /* final read after getting everything */ + } else { + u64 *cntr = dd->cspec->portcntrs; + struct qib_pportdata *ppd = &dd->pport[port]; + int i; + + ret = dd->cspec->nportcntrs * sizeof(u64); + if (!cntr || pos >= ret) { + /* everything read, or couldn't get memory */ + ret = 0; + goto done; + } + *cntrp = cntr; + for (i = 0; i < dd->cspec->nportcntrs; i++) { + if (portcntr7220indices[i] & _PORT_VIRT_FLAG) + *cntr++ = qib_portcntr_7220(ppd, + portcntr7220indices[i] & + ~_PORT_VIRT_FLAG); + else + *cntr++ = read_7220_creg32(dd, + portcntr7220indices[i]); + } + } +done: + return ret; +} + +/** + * qib_get_7220_faststats - get word counters from chip before they overflow + * @opaque - contains a pointer to the qlogic_ib device qib_devdata + * + * This needs more work; in particular, decision on whether we really + * need traffic_wds done the way it is + * called from add_timer + */ +static void qib_get_7220_faststats(unsigned long opaque) +{ + struct qib_devdata *dd = (struct qib_devdata *) opaque; + struct qib_pportdata *ppd = dd->pport; + unsigned long flags; + u64 traffic_wds; + + /* + * don't access the chip while running diags, or memory diags can + * fail + */ + if (!(dd->flags & QIB_INITTED) || dd->diag_client) + /* but re-arm the timer, for diags case; won't hurt other */ + goto done; + + /* + * We now try to maintain an activity timer, based on traffic + * exceeding a threshold, so we need to check the word-counts + * even if they are 64-bit. + */ + traffic_wds = qib_portcntr_7220(ppd, cr_wordsend) + + qib_portcntr_7220(ppd, cr_wordrcv); + spin_lock_irqsave(&dd->eep_st_lock, flags); + traffic_wds -= dd->traffic_wds; + dd->traffic_wds += traffic_wds; + if (traffic_wds >= QIB_TRAFFIC_ACTIVE_THRESHOLD) + atomic_add(5, &dd->active_time); /* S/B #define */ + spin_unlock_irqrestore(&dd->eep_st_lock, flags); +done: + mod_timer(&dd->stats_timer, jiffies + HZ * ACTIVITY_TIMER); +} + +/* + * If we are using MSI, try to fallback to INTx. + */ +static int qib_7220_intr_fallback(struct qib_devdata *dd) +{ + if (!dd->msi_lo) + return 0; + + qib_devinfo(dd->pcidev, + "MSI interrupt not detected, trying INTx interrupts\n"); + qib_7220_free_irq(dd); + qib_enable_intx(dd->pcidev); + /* + * Some newer kernels require free_irq before disable_msi, + * and irq can be changed during disable and INTx enable + * and we need to therefore use the pcidev->irq value, + * not our saved MSI value. + */ + dd->cspec->irq = dd->pcidev->irq; + qib_setup_7220_interrupt(dd); + return 1; +} + +/* + * Reset the XGXS (between serdes and IBC). Slightly less intrusive + * than resetting the IBC or external link state, and useful in some + * cases to cause some retraining. To do this right, we reset IBC + * as well. + */ +static void qib_7220_xgxs_reset(struct qib_pportdata *ppd) +{ + u64 val, prev_val; + struct qib_devdata *dd = ppd->dd; + + prev_val = qib_read_kreg64(dd, kr_xgxs_cfg); + val = prev_val | QLOGIC_IB_XGXS_RESET; + prev_val &= ~QLOGIC_IB_XGXS_RESET; /* be sure */ + qib_write_kreg(dd, kr_control, + dd->control & ~QLOGIC_IB_C_LINKENABLE); + qib_write_kreg(dd, kr_xgxs_cfg, val); + qib_read_kreg32(dd, kr_scratch); + qib_write_kreg(dd, kr_xgxs_cfg, prev_val); + qib_write_kreg(dd, kr_control, dd->control); +} + +/* + * For this chip, we want to use the same buffer every time + * when we are trying to bring the link up (they are always VL15 + * packets). At that link state the packet should always go out immediately + * (or at least be discarded at the tx interface if the link is down). + * If it doesn't, and the buffer isn't available, that means some other + * sender has gotten ahead of us, and is preventing our packet from going + * out. In that case, we flush all packets, and try again. If that still + * fails, we fail the request, and hope things work the next time around. + * + * We don't need very complicated heuristics on whether the packet had + * time to go out or not, since even at SDR 1X, it goes out in very short + * time periods, covered by the chip reads done here and as part of the + * flush. + */ +static u32 __iomem *get_7220_link_buf(struct qib_pportdata *ppd, u32 *bnum) +{ + u32 __iomem *buf; + u32 lbuf = ppd->dd->cspec->lastbuf_for_pio; + int do_cleanup; + unsigned long flags; + + /* + * always blip to get avail list updated, since it's almost + * always needed, and is fairly cheap. + */ + sendctrl_7220_mod(ppd->dd->pport, QIB_SENDCTRL_AVAIL_BLIP); + qib_read_kreg64(ppd->dd, kr_scratch); /* extra chip flush */ + buf = qib_getsendbuf_range(ppd->dd, bnum, lbuf, lbuf); + if (buf) + goto done; + + spin_lock_irqsave(&ppd->sdma_lock, flags); + if (ppd->sdma_state.current_state == qib_sdma_state_s20_idle && + ppd->sdma_state.current_state != qib_sdma_state_s00_hw_down) { + __qib_sdma_process_event(ppd, qib_sdma_event_e00_go_hw_down); + do_cleanup = 0; + } else { + do_cleanup = 1; + qib_7220_sdma_hw_clean_up(ppd); + } + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + + if (do_cleanup) { + qib_read_kreg64(ppd->dd, kr_scratch); /* extra chip flush */ + buf = qib_getsendbuf_range(ppd->dd, bnum, lbuf, lbuf); + } +done: + return buf; +} + +/* + * This code for non-IBTA-compliant IB speed negotiation is only known to + * work for the SDR to DDR transition, and only between an HCA and a switch + * with recent firmware. It is based on observed heuristics, rather than + * actual knowledge of the non-compliant speed negotiation. + * It has a number of hard-coded fields, since the hope is to rewrite this + * when a spec is available on how the negoation is intended to work. + */ +static void autoneg_7220_sendpkt(struct qib_pportdata *ppd, u32 *hdr, + u32 dcnt, u32 *data) +{ + int i; + u64 pbc; + u32 __iomem *piobuf; + u32 pnum; + struct qib_devdata *dd = ppd->dd; + + i = 0; + pbc = 7 + dcnt + 1; /* 7 dword header, dword data, icrc */ + pbc |= PBC_7220_VL15_SEND; + while (!(piobuf = get_7220_link_buf(ppd, &pnum))) { + if (i++ > 5) + return; + udelay(2); + } + sendctrl_7220_mod(dd->pport, QIB_SENDCTRL_DISARM_BUF(pnum)); + writeq(pbc, piobuf); + qib_flush_wc(); + qib_pio_copy(piobuf + 2, hdr, 7); + qib_pio_copy(piobuf + 9, data, dcnt); + if (dd->flags & QIB_USE_SPCL_TRIG) { + u32 spcl_off = (pnum >= dd->piobcnt2k) ? 2047 : 1023; + + qib_flush_wc(); + __raw_writel(0xaebecede, piobuf + spcl_off); + } + qib_flush_wc(); + qib_sendbuf_done(dd, pnum); +} + +/* + * _start packet gets sent twice at start, _done gets sent twice at end + */ +static void autoneg_7220_send(struct qib_pportdata *ppd, int which) +{ + struct qib_devdata *dd = ppd->dd; + static u32 swapped; + u32 dw, i, hcnt, dcnt, *data; + static u32 hdr[7] = { 0xf002ffff, 0x48ffff, 0x6400abba }; + static u32 madpayload_start[0x40] = { + 0x1810103, 0x1, 0x0, 0x0, 0x2c90000, 0x2c9, 0x0, 0x0, + 0xffffffff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x1, 0x1388, 0x15e, 0x1, /* rest 0's */ + }; + static u32 madpayload_done[0x40] = { + 0x1810103, 0x1, 0x0, 0x0, 0x2c90000, 0x2c9, 0x0, 0x0, + 0xffffffff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x40000001, 0x1388, 0x15e, /* rest 0's */ + }; + + dcnt = ARRAY_SIZE(madpayload_start); + hcnt = ARRAY_SIZE(hdr); + if (!swapped) { + /* for maintainability, do it at runtime */ + for (i = 0; i < hcnt; i++) { + dw = (__force u32) cpu_to_be32(hdr[i]); + hdr[i] = dw; + } + for (i = 0; i < dcnt; i++) { + dw = (__force u32) cpu_to_be32(madpayload_start[i]); + madpayload_start[i] = dw; + dw = (__force u32) cpu_to_be32(madpayload_done[i]); + madpayload_done[i] = dw; + } + swapped = 1; + } + + data = which ? madpayload_done : madpayload_start; + + autoneg_7220_sendpkt(ppd, hdr, dcnt, data); + qib_read_kreg64(dd, kr_scratch); + udelay(2); + autoneg_7220_sendpkt(ppd, hdr, dcnt, data); + qib_read_kreg64(dd, kr_scratch); + udelay(2); +} + +/* + * Do the absolute minimum to cause an IB speed change, and make it + * ready, but don't actually trigger the change. The caller will + * do that when ready (if link is in Polling training state, it will + * happen immediately, otherwise when link next goes down) + * + * This routine should only be used as part of the DDR autonegotation + * code for devices that are not compliant with IB 1.2 (or code that + * fixes things up for same). + * + * When link has gone down, and autoneg enabled, or autoneg has + * failed and we give up until next time we set both speeds, and + * then we want IBTA enabled as well as "use max enabled speed. + */ +static void set_7220_ibspeed_fast(struct qib_pportdata *ppd, u32 speed) +{ + ppd->cpspec->ibcddrctrl &= ~(IBA7220_IBC_SPEED_AUTONEG_MASK | + IBA7220_IBC_IBTA_1_2_MASK); + + if (speed == (QIB_IB_SDR | QIB_IB_DDR)) + ppd->cpspec->ibcddrctrl |= IBA7220_IBC_SPEED_AUTONEG_MASK | + IBA7220_IBC_IBTA_1_2_MASK; + else + ppd->cpspec->ibcddrctrl |= speed == QIB_IB_DDR ? + IBA7220_IBC_SPEED_DDR : IBA7220_IBC_SPEED_SDR; + + qib_write_kreg(ppd->dd, kr_ibcddrctrl, ppd->cpspec->ibcddrctrl); + qib_write_kreg(ppd->dd, kr_scratch, 0); +} + +/* + * This routine is only used when we are not talking to another + * IB 1.2-compliant device that we think can do DDR. + * (This includes all existing switch chips as of Oct 2007.) + * 1.2-compliant devices go directly to DDR prior to reaching INIT + */ +static void try_7220_autoneg(struct qib_pportdata *ppd) +{ + unsigned long flags; + + /* + * Required for older non-IB1.2 DDR switches. Newer + * non-IB-compliant switches don't need it, but so far, + * aren't bothered by it either. "Magic constant" + */ + qib_write_kreg(ppd->dd, kr_ncmodectrl, 0x3b9dc07); + + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags |= QIBL_IB_AUTONEG_INPROG; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + autoneg_7220_send(ppd, 0); + set_7220_ibspeed_fast(ppd, QIB_IB_DDR); + + toggle_7220_rclkrls(ppd->dd); + /* 2 msec is minimum length of a poll cycle */ + queue_delayed_work(ib_wq, &ppd->cpspec->autoneg_work, + msecs_to_jiffies(2)); +} + +/* + * Handle the empirically determined mechanism for auto-negotiation + * of DDR speed with switches. + */ +static void autoneg_7220_work(struct work_struct *work) +{ + struct qib_pportdata *ppd; + struct qib_devdata *dd; + u64 startms; + u32 i; + unsigned long flags; + + ppd = &container_of(work, struct qib_chippport_specific, + autoneg_work.work)->pportdata; + dd = ppd->dd; + + startms = jiffies_to_msecs(jiffies); + + /* + * Busy wait for this first part, it should be at most a + * few hundred usec, since we scheduled ourselves for 2msec. + */ + for (i = 0; i < 25; i++) { + if (SYM_FIELD(ppd->lastibcstat, IBCStatus, LinkTrainingState) + == IB_7220_LT_STATE_POLLQUIET) { + qib_set_linkstate(ppd, QIB_IB_LINKDOWN_DISABLE); + break; + } + udelay(100); + } + + if (!(ppd->lflags & QIBL_IB_AUTONEG_INPROG)) + goto done; /* we got there early or told to stop */ + + /* we expect this to timeout */ + if (wait_event_timeout(ppd->cpspec->autoneg_wait, + !(ppd->lflags & QIBL_IB_AUTONEG_INPROG), + msecs_to_jiffies(90))) + goto done; + + toggle_7220_rclkrls(dd); + + /* we expect this to timeout */ + if (wait_event_timeout(ppd->cpspec->autoneg_wait, + !(ppd->lflags & QIBL_IB_AUTONEG_INPROG), + msecs_to_jiffies(1700))) + goto done; + + set_7220_ibspeed_fast(ppd, QIB_IB_SDR); + toggle_7220_rclkrls(dd); + + /* + * Wait up to 250 msec for link to train and get to INIT at DDR; + * this should terminate early. + */ + wait_event_timeout(ppd->cpspec->autoneg_wait, + !(ppd->lflags & QIBL_IB_AUTONEG_INPROG), + msecs_to_jiffies(250)); +done: + if (ppd->lflags & QIBL_IB_AUTONEG_INPROG) { + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_IB_AUTONEG_INPROG; + if (dd->cspec->autoneg_tries == AUTONEG_TRIES) { + ppd->lflags |= QIBL_IB_AUTONEG_FAILED; + dd->cspec->autoneg_tries = 0; + } + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + set_7220_ibspeed_fast(ppd, ppd->link_speed_enabled); + } +} + +static u32 qib_7220_iblink_state(u64 ibcs) +{ + u32 state = (u32)SYM_FIELD(ibcs, IBCStatus, LinkState); + + switch (state) { + case IB_7220_L_STATE_INIT: + state = IB_PORT_INIT; + break; + case IB_7220_L_STATE_ARM: + state = IB_PORT_ARMED; + break; + case IB_7220_L_STATE_ACTIVE: + /* fall through */ + case IB_7220_L_STATE_ACT_DEFER: + state = IB_PORT_ACTIVE; + break; + default: /* fall through */ + case IB_7220_L_STATE_DOWN: + state = IB_PORT_DOWN; + break; + } + return state; +} + +/* returns the IBTA port state, rather than the IBC link training state */ +static u8 qib_7220_phys_portstate(u64 ibcs) +{ + u8 state = (u8)SYM_FIELD(ibcs, IBCStatus, LinkTrainingState); + return qib_7220_physportstate[state]; +} + +static int qib_7220_ib_updown(struct qib_pportdata *ppd, int ibup, u64 ibcs) +{ + int ret = 0, symadj = 0; + struct qib_devdata *dd = ppd->dd; + unsigned long flags; + + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_IB_FORCE_NOTIFY; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + + if (!ibup) { + /* + * When the link goes down we don't want AEQ running, so it + * won't interfere with IBC training, etc., and we need + * to go back to the static SerDes preset values. + */ + if (!(ppd->lflags & (QIBL_IB_AUTONEG_FAILED | + QIBL_IB_AUTONEG_INPROG))) + set_7220_ibspeed_fast(ppd, ppd->link_speed_enabled); + if (!(ppd->lflags & QIBL_IB_AUTONEG_INPROG)) { + qib_sd7220_presets(dd); + qib_cancel_sends(ppd); /* initial disarm, etc. */ + spin_lock_irqsave(&ppd->sdma_lock, flags); + if (__qib_sdma_running(ppd)) + __qib_sdma_process_event(ppd, + qib_sdma_event_e70_go_idle); + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + } + /* this might better in qib_sd7220_presets() */ + set_7220_relock_poll(dd, ibup); + } else { + if (qib_compat_ddr_negotiate && + !(ppd->lflags & (QIBL_IB_AUTONEG_FAILED | + QIBL_IB_AUTONEG_INPROG)) && + ppd->link_speed_active == QIB_IB_SDR && + (ppd->link_speed_enabled & (QIB_IB_DDR | QIB_IB_SDR)) == + (QIB_IB_DDR | QIB_IB_SDR) && + dd->cspec->autoneg_tries < AUTONEG_TRIES) { + /* we are SDR, and DDR auto-negotiation enabled */ + ++dd->cspec->autoneg_tries; + if (!ppd->cpspec->ibdeltainprog) { + ppd->cpspec->ibdeltainprog = 1; + ppd->cpspec->ibsymsnap = read_7220_creg32(dd, + cr_ibsymbolerr); + ppd->cpspec->iblnkerrsnap = read_7220_creg32(dd, + cr_iblinkerrrecov); + } + try_7220_autoneg(ppd); + ret = 1; /* no other IB status change processing */ + } else if ((ppd->lflags & QIBL_IB_AUTONEG_INPROG) && + ppd->link_speed_active == QIB_IB_SDR) { + autoneg_7220_send(ppd, 1); + set_7220_ibspeed_fast(ppd, QIB_IB_DDR); + udelay(2); + toggle_7220_rclkrls(dd); + ret = 1; /* no other IB status change processing */ + } else { + if ((ppd->lflags & QIBL_IB_AUTONEG_INPROG) && + (ppd->link_speed_active & QIB_IB_DDR)) { + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~(QIBL_IB_AUTONEG_INPROG | + QIBL_IB_AUTONEG_FAILED); + spin_unlock_irqrestore(&ppd->lflags_lock, + flags); + dd->cspec->autoneg_tries = 0; + /* re-enable SDR, for next link down */ + set_7220_ibspeed_fast(ppd, + ppd->link_speed_enabled); + wake_up(&ppd->cpspec->autoneg_wait); + symadj = 1; + } else if (ppd->lflags & QIBL_IB_AUTONEG_FAILED) { + /* + * Clear autoneg failure flag, and do setup + * so we'll try next time link goes down and + * back to INIT (possibly connected to a + * different device). + */ + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_IB_AUTONEG_FAILED; + spin_unlock_irqrestore(&ppd->lflags_lock, + flags); + ppd->cpspec->ibcddrctrl |= + IBA7220_IBC_IBTA_1_2_MASK; + qib_write_kreg(dd, kr_ncmodectrl, 0); + symadj = 1; + } + } + + if (!(ppd->lflags & QIBL_IB_AUTONEG_INPROG)) + symadj = 1; + + if (!ret) { + ppd->delay_mult = rate_to_delay + [(ibcs >> IBA7220_LINKSPEED_SHIFT) & 1] + [(ibcs >> IBA7220_LINKWIDTH_SHIFT) & 1]; + + set_7220_relock_poll(dd, ibup); + spin_lock_irqsave(&ppd->sdma_lock, flags); + /* + * Unlike 7322, the 7220 needs this, due to lack of + * interrupt in some cases when we have sdma active + * when the link goes down. + */ + if (ppd->sdma_state.current_state != + qib_sdma_state_s20_idle) + __qib_sdma_process_event(ppd, + qib_sdma_event_e00_go_hw_down); + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + } + } + + if (symadj) { + if (ppd->cpspec->ibdeltainprog) { + ppd->cpspec->ibdeltainprog = 0; + ppd->cpspec->ibsymdelta += read_7220_creg32(ppd->dd, + cr_ibsymbolerr) - ppd->cpspec->ibsymsnap; + ppd->cpspec->iblnkerrdelta += read_7220_creg32(ppd->dd, + cr_iblinkerrrecov) - ppd->cpspec->iblnkerrsnap; + } + } else if (!ibup && qib_compat_ddr_negotiate && + !ppd->cpspec->ibdeltainprog && + !(ppd->lflags & QIBL_IB_AUTONEG_INPROG)) { + ppd->cpspec->ibdeltainprog = 1; + ppd->cpspec->ibsymsnap = read_7220_creg32(ppd->dd, + cr_ibsymbolerr); + ppd->cpspec->iblnkerrsnap = read_7220_creg32(ppd->dd, + cr_iblinkerrrecov); + } + + if (!ret) + qib_setup_7220_setextled(ppd, ibup); + return ret; +} + +/* + * Does read/modify/write to appropriate registers to + * set output and direction bits selected by mask. + * these are in their canonical postions (e.g. lsb of + * dir will end up in D48 of extctrl on existing chips). + * returns contents of GP Inputs. + */ +static int gpio_7220_mod(struct qib_devdata *dd, u32 out, u32 dir, u32 mask) +{ + u64 read_val, new_out; + unsigned long flags; + + if (mask) { + /* some bits being written, lock access to GPIO */ + dir &= mask; + out &= mask; + spin_lock_irqsave(&dd->cspec->gpio_lock, flags); + dd->cspec->extctrl &= ~((u64)mask << SYM_LSB(EXTCtrl, GPIOOe)); + dd->cspec->extctrl |= ((u64) dir << SYM_LSB(EXTCtrl, GPIOOe)); + new_out = (dd->cspec->gpio_out & ~mask) | out; + + qib_write_kreg(dd, kr_extctrl, dd->cspec->extctrl); + qib_write_kreg(dd, kr_gpio_out, new_out); + dd->cspec->gpio_out = new_out; + spin_unlock_irqrestore(&dd->cspec->gpio_lock, flags); + } + /* + * It is unlikely that a read at this time would get valid + * data on a pin whose direction line was set in the same + * call to this function. We include the read here because + * that allows us to potentially combine a change on one pin with + * a read on another, and because the old code did something like + * this. + */ + read_val = qib_read_kreg64(dd, kr_extstatus); + return SYM_FIELD(read_val, EXTStatus, GPIOIn); +} + +/* + * Read fundamental info we need to use the chip. These are + * the registers that describe chip capabilities, and are + * saved in shadow registers. + */ +static void get_7220_chip_params(struct qib_devdata *dd) +{ + u64 val; + u32 piobufs; + int mtu; + + dd->uregbase = qib_read_kreg32(dd, kr_userregbase); + + dd->rcvtidcnt = qib_read_kreg32(dd, kr_rcvtidcnt); + dd->rcvtidbase = qib_read_kreg32(dd, kr_rcvtidbase); + dd->rcvegrbase = qib_read_kreg32(dd, kr_rcvegrbase); + dd->palign = qib_read_kreg32(dd, kr_palign); + dd->piobufbase = qib_read_kreg64(dd, kr_sendpiobufbase); + dd->pio2k_bufbase = dd->piobufbase & 0xffffffff; + + val = qib_read_kreg64(dd, kr_sendpiosize); + dd->piosize2k = val & ~0U; + dd->piosize4k = val >> 32; + + mtu = ib_mtu_enum_to_int(qib_ibmtu); + if (mtu == -1) + mtu = QIB_DEFAULT_MTU; + dd->pport->ibmtu = (u32)mtu; + + val = qib_read_kreg64(dd, kr_sendpiobufcnt); + dd->piobcnt2k = val & ~0U; + dd->piobcnt4k = val >> 32; + /* these may be adjusted in init_chip_wc_pat() */ + dd->pio2kbase = (u32 __iomem *) + ((char __iomem *) dd->kregbase + dd->pio2k_bufbase); + if (dd->piobcnt4k) { + dd->pio4kbase = (u32 __iomem *) + ((char __iomem *) dd->kregbase + + (dd->piobufbase >> 32)); + /* + * 4K buffers take 2 pages; we use roundup just to be + * paranoid; we calculate it once here, rather than on + * ever buf allocate + */ + dd->align4k = ALIGN(dd->piosize4k, dd->palign); + } + + piobufs = dd->piobcnt4k + dd->piobcnt2k; + + dd->pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2) / + (sizeof(u64) * BITS_PER_BYTE / 2); +} + +/* + * The chip base addresses in cspec and cpspec have to be set + * after possible init_chip_wc_pat(), rather than in + * qib_get_7220_chip_params(), so split out as separate function + */ +static void set_7220_baseaddrs(struct qib_devdata *dd) +{ + u32 cregbase; + /* init after possible re-map in init_chip_wc_pat() */ + cregbase = qib_read_kreg32(dd, kr_counterregbase); + dd->cspec->cregbase = (u64 __iomem *) + ((char __iomem *) dd->kregbase + cregbase); + + dd->egrtidbase = (u64 __iomem *) + ((char __iomem *) dd->kregbase + dd->rcvegrbase); +} + + +#define SENDCTRL_SHADOWED (SYM_MASK(SendCtrl, SendIntBufAvail) | \ + SYM_MASK(SendCtrl, SPioEnable) | \ + SYM_MASK(SendCtrl, SSpecialTriggerEn) | \ + SYM_MASK(SendCtrl, SendBufAvailUpd) | \ + SYM_MASK(SendCtrl, AvailUpdThld) | \ + SYM_MASK(SendCtrl, SDmaEnable) | \ + SYM_MASK(SendCtrl, SDmaIntEnable) | \ + SYM_MASK(SendCtrl, SDmaHalt) | \ + SYM_MASK(SendCtrl, SDmaSingleDescriptor)) + +static int sendctrl_hook(struct qib_devdata *dd, + const struct diag_observer *op, + u32 offs, u64 *data, u64 mask, int only_32) +{ + unsigned long flags; + unsigned idx = offs / sizeof(u64); + u64 local_data, all_bits; + + if (idx != kr_sendctrl) { + qib_dev_err(dd, "SendCtrl Hook called with offs %X, %s-bit\n", + offs, only_32 ? "32" : "64"); + return 0; + } + + all_bits = ~0ULL; + if (only_32) + all_bits >>= 32; + spin_lock_irqsave(&dd->sendctrl_lock, flags); + if ((mask & all_bits) != all_bits) { + /* + * At least some mask bits are zero, so we need + * to read. The judgement call is whether from + * reg or shadow. First-cut: read reg, and complain + * if any bits which should be shadowed are different + * from their shadowed value. + */ + if (only_32) + local_data = (u64)qib_read_kreg32(dd, idx); + else + local_data = qib_read_kreg64(dd, idx); + qib_dev_err(dd, "Sendctrl -> %X, Shad -> %X\n", + (u32)local_data, (u32)dd->sendctrl); + if ((local_data & SENDCTRL_SHADOWED) != + (dd->sendctrl & SENDCTRL_SHADOWED)) + qib_dev_err(dd, "Sendctrl read: %X shadow is %X\n", + (u32)local_data, (u32) dd->sendctrl); + *data = (local_data & ~mask) | (*data & mask); + } + if (mask) { + /* + * At least some mask bits are one, so we need + * to write, but only shadow some bits. + */ + u64 sval, tval; /* Shadowed, transient */ + + /* + * New shadow val is bits we don't want to touch, + * ORed with bits we do, that are intended for shadow. + */ + sval = (dd->sendctrl & ~mask); + sval |= *data & SENDCTRL_SHADOWED & mask; + dd->sendctrl = sval; + tval = sval | (*data & ~SENDCTRL_SHADOWED & mask); + qib_dev_err(dd, "Sendctrl <- %X, Shad <- %X\n", + (u32)tval, (u32)sval); + qib_write_kreg(dd, kr_sendctrl, tval); + qib_write_kreg(dd, kr_scratch, 0Ull); + } + spin_unlock_irqrestore(&dd->sendctrl_lock, flags); + + return only_32 ? 4 : 8; +} + +static const struct diag_observer sendctrl_observer = { + sendctrl_hook, kr_sendctrl * sizeof(u64), + kr_sendctrl * sizeof(u64) +}; + +/* + * write the final few registers that depend on some of the + * init setup. Done late in init, just before bringing up + * the serdes. + */ +static int qib_late_7220_initreg(struct qib_devdata *dd) +{ + int ret = 0; + u64 val; + + qib_write_kreg(dd, kr_rcvhdrentsize, dd->rcvhdrentsize); + qib_write_kreg(dd, kr_rcvhdrsize, dd->rcvhdrsize); + qib_write_kreg(dd, kr_rcvhdrcnt, dd->rcvhdrcnt); + qib_write_kreg(dd, kr_sendpioavailaddr, dd->pioavailregs_phys); + val = qib_read_kreg64(dd, kr_sendpioavailaddr); + if (val != dd->pioavailregs_phys) { + qib_dev_err(dd, + "Catastrophic software error, SendPIOAvailAddr written as %lx, read back as %llx\n", + (unsigned long) dd->pioavailregs_phys, + (unsigned long long) val); + ret = -EINVAL; + } + qib_register_observer(dd, &sendctrl_observer); + return ret; +} + +static int qib_init_7220_variables(struct qib_devdata *dd) +{ + struct qib_chippport_specific *cpspec; + struct qib_pportdata *ppd; + int ret = 0; + u32 sbufs, updthresh; + + cpspec = (struct qib_chippport_specific *)(dd + 1); + ppd = &cpspec->pportdata; + dd->pport = ppd; + dd->num_pports = 1; + + dd->cspec = (struct qib_chip_specific *)(cpspec + dd->num_pports); + ppd->cpspec = cpspec; + + spin_lock_init(&dd->cspec->sdepb_lock); + spin_lock_init(&dd->cspec->rcvmod_lock); + spin_lock_init(&dd->cspec->gpio_lock); + + /* we haven't yet set QIB_PRESENT, so use read directly */ + dd->revision = readq(&dd->kregbase[kr_revision]); + + if ((dd->revision & 0xffffffffU) == 0xffffffffU) { + qib_dev_err(dd, + "Revision register read failure, giving up initialization\n"); + ret = -ENODEV; + goto bail; + } + dd->flags |= QIB_PRESENT; /* now register routines work */ + + dd->majrev = (u8) SYM_FIELD(dd->revision, Revision_R, + ChipRevMajor); + dd->minrev = (u8) SYM_FIELD(dd->revision, Revision_R, + ChipRevMinor); + + get_7220_chip_params(dd); + qib_7220_boardname(dd); + + /* + * GPIO bits for TWSI data and clock, + * used for serial EEPROM. + */ + dd->gpio_sda_num = _QIB_GPIO_SDA_NUM; + dd->gpio_scl_num = _QIB_GPIO_SCL_NUM; + dd->twsi_eeprom_dev = QIB_TWSI_EEPROM_DEV; + + dd->flags |= QIB_HAS_INTX | QIB_HAS_LINK_LATENCY | + QIB_NODMA_RTAIL | QIB_HAS_THRESH_UPDATE; + dd->flags |= qib_special_trigger ? + QIB_USE_SPCL_TRIG : QIB_HAS_SEND_DMA; + + /* + * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity. + * 2 is Some Misc, 3 is reserved for future. + */ + dd->eep_st_masks[0].hwerrs_to_log = HWE_MASK(TXEMemParityErr); + + dd->eep_st_masks[1].hwerrs_to_log = HWE_MASK(RXEMemParityErr); + + dd->eep_st_masks[2].errs_to_log = ERR_MASK(ResetNegated); + + init_waitqueue_head(&cpspec->autoneg_wait); + INIT_DELAYED_WORK(&cpspec->autoneg_work, autoneg_7220_work); + + ret = qib_init_pportdata(ppd, dd, 0, 1); + if (ret) + goto bail; + ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X; + ppd->link_speed_supported = QIB_IB_SDR | QIB_IB_DDR; + + ppd->link_width_enabled = ppd->link_width_supported; + ppd->link_speed_enabled = ppd->link_speed_supported; + /* + * Set the initial values to reasonable default, will be set + * for real when link is up. + */ + ppd->link_width_active = IB_WIDTH_4X; + ppd->link_speed_active = QIB_IB_SDR; + ppd->delay_mult = rate_to_delay[0][1]; + ppd->vls_supported = IB_VL_VL0; + ppd->vls_operational = ppd->vls_supported; + + if (!qib_mini_init) + qib_write_kreg(dd, kr_rcvbthqp, QIB_KD_QP); + + init_timer(&ppd->cpspec->chase_timer); + ppd->cpspec->chase_timer.function = reenable_7220_chase; + ppd->cpspec->chase_timer.data = (unsigned long)ppd; + + qib_num_cfg_vls = 1; /* if any 7220's, only one VL */ + + dd->rcvhdrentsize = QIB_RCVHDR_ENTSIZE; + dd->rcvhdrsize = QIB_DFLT_RCVHDRSIZE; + dd->rhf_offset = + dd->rcvhdrentsize - sizeof(u64) / sizeof(u32); + + /* we always allocate at least 2048 bytes for eager buffers */ + ret = ib_mtu_enum_to_int(qib_ibmtu); + dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU; + BUG_ON(!is_power_of_2(dd->rcvegrbufsize)); + dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize); + + qib_7220_tidtemplate(dd); + + /* + * We can request a receive interrupt for 1 or + * more packets from current offset. For now, we set this + * up for a single packet. + */ + dd->rhdrhead_intr_off = 1ULL << 32; + + /* setup the stats timer; the add_timer is done at end of init */ + init_timer(&dd->stats_timer); + dd->stats_timer.function = qib_get_7220_faststats; + dd->stats_timer.data = (unsigned long) dd; + dd->stats_timer.expires = jiffies + ACTIVITY_TIMER * HZ; + + /* + * Control[4] has been added to change the arbitration within + * the SDMA engine between favoring data fetches over descriptor + * fetches. qib_sdma_fetch_arb==0 gives data fetches priority. + */ + if (qib_sdma_fetch_arb) + dd->control |= 1 << 4; + + dd->ureg_align = 0x10000; /* 64KB alignment */ + + dd->piosize2kmax_dwords = (dd->piosize2k >> 2)-1; + qib_7220_config_ctxts(dd); + qib_set_ctxtcnt(dd); /* needed for PAT setup */ + + if (qib_wc_pat) { + ret = init_chip_wc_pat(dd, 0); + if (ret) + goto bail; + } + set_7220_baseaddrs(dd); /* set chip access pointers now */ + + ret = 0; + if (qib_mini_init) + goto bail; + + ret = qib_create_ctxts(dd); + init_7220_cntrnames(dd); + + /* use all of 4KB buffers for the kernel SDMA, zero if !SDMA. + * reserve the update threshold amount for other kernel use, such + * as sending SMI, MAD, and ACKs, or 3, whichever is greater, + * unless we aren't enabling SDMA, in which case we want to use + * all the 4k bufs for the kernel. + * if this was less than the update threshold, we could wait + * a long time for an update. Coded this way because we + * sometimes change the update threshold for various reasons, + * and we want this to remain robust. + */ + updthresh = 8U; /* update threshold */ + if (dd->flags & QIB_HAS_SEND_DMA) { + dd->cspec->sdmabufcnt = dd->piobcnt4k; + sbufs = updthresh > 3 ? updthresh : 3; + } else { + dd->cspec->sdmabufcnt = 0; + sbufs = dd->piobcnt4k; + } + + dd->cspec->lastbuf_for_pio = dd->piobcnt2k + dd->piobcnt4k - + dd->cspec->sdmabufcnt; + dd->lastctxt_piobuf = dd->cspec->lastbuf_for_pio - sbufs; + dd->cspec->lastbuf_for_pio--; /* range is <= , not < */ + dd->last_pio = dd->cspec->lastbuf_for_pio; + dd->pbufsctxt = dd->lastctxt_piobuf / + (dd->cfgctxts - dd->first_user_ctxt); + + /* + * if we are at 16 user contexts, we will have one 7 sbufs + * per context, so drop the update threshold to match. We + * want to update before we actually run out, at low pbufs/ctxt + * so give ourselves some margin + */ + if ((dd->pbufsctxt - 2) < updthresh) + updthresh = dd->pbufsctxt - 2; + + dd->cspec->updthresh_dflt = updthresh; + dd->cspec->updthresh = updthresh; + + /* before full enable, no interrupts, no locking needed */ + dd->sendctrl |= (updthresh & SYM_RMASK(SendCtrl, AvailUpdThld)) + << SYM_LSB(SendCtrl, AvailUpdThld); + + dd->psxmitwait_supported = 1; + dd->psxmitwait_check_rate = QIB_7220_PSXMITWAIT_CHECK_RATE; +bail: + return ret; +} + +static u32 __iomem *qib_7220_getsendbuf(struct qib_pportdata *ppd, u64 pbc, + u32 *pbufnum) +{ + u32 first, last, plen = pbc & QIB_PBC_LENGTH_MASK; + struct qib_devdata *dd = ppd->dd; + u32 __iomem *buf; + + if (((pbc >> 32) & PBC_7220_VL15_SEND_CTRL) && + !(ppd->lflags & (QIBL_IB_AUTONEG_INPROG | QIBL_LINKACTIVE))) + buf = get_7220_link_buf(ppd, pbufnum); + else { + if ((plen + 1) > dd->piosize2kmax_dwords) + first = dd->piobcnt2k; + else + first = 0; + /* try 4k if all 2k busy, so same last for both sizes */ + last = dd->cspec->lastbuf_for_pio; + buf = qib_getsendbuf_range(dd, pbufnum, first, last); + } + return buf; +} + +/* these 2 "counters" are really control registers, and are always RW */ +static void qib_set_cntr_7220_sample(struct qib_pportdata *ppd, u32 intv, + u32 start) +{ + write_7220_creg(ppd->dd, cr_psinterval, intv); + write_7220_creg(ppd->dd, cr_psstart, start); +} + +/* + * NOTE: no real attempt is made to generalize the SDMA stuff. + * At some point "soon" we will have a new more generalized + * set of sdma interface, and then we'll clean this up. + */ + +/* Must be called with sdma_lock held, or before init finished */ +static void qib_sdma_update_7220_tail(struct qib_pportdata *ppd, u16 tail) +{ + /* Commit writes to memory and advance the tail on the chip */ + wmb(); + ppd->sdma_descq_tail = tail; + qib_write_kreg(ppd->dd, kr_senddmatail, tail); +} + +static void qib_sdma_set_7220_desc_cnt(struct qib_pportdata *ppd, unsigned cnt) +{ +} + +static struct sdma_set_state_action sdma_7220_action_table[] = { + [qib_sdma_state_s00_hw_down] = { + .op_enable = 0, + .op_intenable = 0, + .op_halt = 0, + .go_s99_running_tofalse = 1, + }, + [qib_sdma_state_s10_hw_start_up_wait] = { + .op_enable = 1, + .op_intenable = 1, + .op_halt = 1, + }, + [qib_sdma_state_s20_idle] = { + .op_enable = 1, + .op_intenable = 1, + .op_halt = 1, + }, + [qib_sdma_state_s30_sw_clean_up_wait] = { + .op_enable = 0, + .op_intenable = 1, + .op_halt = 0, + }, + [qib_sdma_state_s40_hw_clean_up_wait] = { + .op_enable = 1, + .op_intenable = 1, + .op_halt = 1, + }, + [qib_sdma_state_s50_hw_halt_wait] = { + .op_enable = 1, + .op_intenable = 1, + .op_halt = 1, + }, + [qib_sdma_state_s99_running] = { + .op_enable = 1, + .op_intenable = 1, + .op_halt = 0, + .go_s99_running_totrue = 1, + }, +}; + +static void qib_7220_sdma_init_early(struct qib_pportdata *ppd) +{ + ppd->sdma_state.set_state_action = sdma_7220_action_table; +} + +static int init_sdma_7220_regs(struct qib_pportdata *ppd) +{ + struct qib_devdata *dd = ppd->dd; + unsigned i, n; + u64 senddmabufmask[3] = { 0 }; + + /* Set SendDmaBase */ + qib_write_kreg(dd, kr_senddmabase, ppd->sdma_descq_phys); + qib_sdma_7220_setlengen(ppd); + qib_sdma_update_7220_tail(ppd, 0); /* Set SendDmaTail */ + /* Set SendDmaHeadAddr */ + qib_write_kreg(dd, kr_senddmaheadaddr, ppd->sdma_head_phys); + + /* + * Reserve all the former "kernel" piobufs, using high number range + * so we get as many 4K buffers as possible + */ + n = dd->piobcnt2k + dd->piobcnt4k; + i = n - dd->cspec->sdmabufcnt; + + for (; i < n; ++i) { + unsigned word = i / 64; + unsigned bit = i & 63; + + BUG_ON(word >= 3); + senddmabufmask[word] |= 1ULL << bit; + } + qib_write_kreg(dd, kr_senddmabufmask0, senddmabufmask[0]); + qib_write_kreg(dd, kr_senddmabufmask1, senddmabufmask[1]); + qib_write_kreg(dd, kr_senddmabufmask2, senddmabufmask[2]); + + ppd->sdma_state.first_sendbuf = i; + ppd->sdma_state.last_sendbuf = n; + + return 0; +} + +/* sdma_lock must be held */ +static u16 qib_sdma_7220_gethead(struct qib_pportdata *ppd) +{ + struct qib_devdata *dd = ppd->dd; + int sane; + int use_dmahead; + u16 swhead; + u16 swtail; + u16 cnt; + u16 hwhead; + + use_dmahead = __qib_sdma_running(ppd) && + (dd->flags & QIB_HAS_SDMA_TIMEOUT); +retry: + hwhead = use_dmahead ? + (u16)le64_to_cpu(*ppd->sdma_head_dma) : + (u16)qib_read_kreg32(dd, kr_senddmahead); + + swhead = ppd->sdma_descq_head; + swtail = ppd->sdma_descq_tail; + cnt = ppd->sdma_descq_cnt; + + if (swhead < swtail) { + /* not wrapped */ + sane = (hwhead >= swhead) & (hwhead <= swtail); + } else if (swhead > swtail) { + /* wrapped around */ + sane = ((hwhead >= swhead) && (hwhead < cnt)) || + (hwhead <= swtail); + } else { + /* empty */ + sane = (hwhead == swhead); + } + + if (unlikely(!sane)) { + if (use_dmahead) { + /* try one more time, directly from the register */ + use_dmahead = 0; + goto retry; + } + /* assume no progress */ + hwhead = swhead; + } + + return hwhead; +} + +static int qib_sdma_7220_busy(struct qib_pportdata *ppd) +{ + u64 hwstatus = qib_read_kreg64(ppd->dd, kr_senddmastatus); + + return (hwstatus & SYM_MASK(SendDmaStatus, ScoreBoardDrainInProg)) || + (hwstatus & SYM_MASK(SendDmaStatus, AbortInProg)) || + (hwstatus & SYM_MASK(SendDmaStatus, InternalSDmaEnable)) || + !(hwstatus & SYM_MASK(SendDmaStatus, ScbEmpty)); +} + +/* + * Compute the amount of delay before sending the next packet if the + * port's send rate differs from the static rate set for the QP. + * Since the delay affects this packet but the amount of the delay is + * based on the length of the previous packet, use the last delay computed + * and save the delay count for this packet to be used next time + * we get here. + */ +static u32 qib_7220_setpbc_control(struct qib_pportdata *ppd, u32 plen, + u8 srate, u8 vl) +{ + u8 snd_mult = ppd->delay_mult; + u8 rcv_mult = ib_rate_to_delay[srate]; + u32 ret = ppd->cpspec->last_delay_mult; + + ppd->cpspec->last_delay_mult = (rcv_mult > snd_mult) ? + (plen * (rcv_mult - snd_mult) + 1) >> 1 : 0; + + /* Indicate VL15, if necessary */ + if (vl == 15) + ret |= PBC_7220_VL15_SEND_CTRL; + return ret; +} + +static void qib_7220_initvl15_bufs(struct qib_devdata *dd) +{ +} + +static void qib_7220_init_ctxt(struct qib_ctxtdata *rcd) +{ + if (!rcd->ctxt) { + rcd->rcvegrcnt = IBA7220_KRCVEGRCNT; + rcd->rcvegr_tid_base = 0; + } else { + rcd->rcvegrcnt = rcd->dd->cspec->rcvegrcnt; + rcd->rcvegr_tid_base = IBA7220_KRCVEGRCNT + + (rcd->ctxt - 1) * rcd->rcvegrcnt; + } +} + +static void qib_7220_txchk_change(struct qib_devdata *dd, u32 start, + u32 len, u32 which, struct qib_ctxtdata *rcd) +{ + int i; + unsigned long flags; + + switch (which) { + case TXCHK_CHG_TYPE_KERN: + /* see if we need to raise avail update threshold */ + spin_lock_irqsave(&dd->uctxt_lock, flags); + for (i = dd->first_user_ctxt; + dd->cspec->updthresh != dd->cspec->updthresh_dflt + && i < dd->cfgctxts; i++) + if (dd->rcd[i] && dd->rcd[i]->subctxt_cnt && + ((dd->rcd[i]->piocnt / dd->rcd[i]->subctxt_cnt) - 1) + < dd->cspec->updthresh_dflt) + break; + spin_unlock_irqrestore(&dd->uctxt_lock, flags); + if (i == dd->cfgctxts) { + spin_lock_irqsave(&dd->sendctrl_lock, flags); + dd->cspec->updthresh = dd->cspec->updthresh_dflt; + dd->sendctrl &= ~SYM_MASK(SendCtrl, AvailUpdThld); + dd->sendctrl |= (dd->cspec->updthresh & + SYM_RMASK(SendCtrl, AvailUpdThld)) << + SYM_LSB(SendCtrl, AvailUpdThld); + spin_unlock_irqrestore(&dd->sendctrl_lock, flags); + sendctrl_7220_mod(dd->pport, QIB_SENDCTRL_AVAIL_BLIP); + } + break; + case TXCHK_CHG_TYPE_USER: + spin_lock_irqsave(&dd->sendctrl_lock, flags); + if (rcd && rcd->subctxt_cnt && ((rcd->piocnt + / rcd->subctxt_cnt) - 1) < dd->cspec->updthresh) { + dd->cspec->updthresh = (rcd->piocnt / + rcd->subctxt_cnt) - 1; + dd->sendctrl &= ~SYM_MASK(SendCtrl, AvailUpdThld); + dd->sendctrl |= (dd->cspec->updthresh & + SYM_RMASK(SendCtrl, AvailUpdThld)) + << SYM_LSB(SendCtrl, AvailUpdThld); + spin_unlock_irqrestore(&dd->sendctrl_lock, flags); + sendctrl_7220_mod(dd->pport, QIB_SENDCTRL_AVAIL_BLIP); + } else + spin_unlock_irqrestore(&dd->sendctrl_lock, flags); + break; + } +} + +static void writescratch(struct qib_devdata *dd, u32 val) +{ + qib_write_kreg(dd, kr_scratch, val); +} + +#define VALID_TS_RD_REG_MASK 0xBF +/** + * qib_7220_tempsense_read - read register of temp sensor via TWSI + * @dd: the qlogic_ib device + * @regnum: register to read from + * + * returns reg contents (0..255) or < 0 for error + */ +static int qib_7220_tempsense_rd(struct qib_devdata *dd, int regnum) +{ + int ret; + u8 rdata; + + if (regnum > 7) { + ret = -EINVAL; + goto bail; + } + + /* return a bogus value for (the one) register we do not have */ + if (!((1 << regnum) & VALID_TS_RD_REG_MASK)) { + ret = 0; + goto bail; + } + + ret = mutex_lock_interruptible(&dd->eep_lock); + if (ret) + goto bail; + + ret = qib_twsi_blk_rd(dd, QIB_TWSI_TEMP_DEV, regnum, &rdata, 1); + if (!ret) + ret = rdata; + + mutex_unlock(&dd->eep_lock); + + /* + * There are three possibilities here: + * ret is actual value (0..255) + * ret is -ENXIO or -EINVAL from twsi code or this file + * ret is -EINTR from mutex_lock_interruptible. + */ +bail: + return ret; +} + +#ifdef CONFIG_INFINIBAND_QIB_DCA +static int qib_7220_notify_dca(struct qib_devdata *dd, unsigned long event) +{ + return 0; +} +#endif + +/* Dummy function, as 7220 boards never disable EEPROM Write */ +static int qib_7220_eeprom_wen(struct qib_devdata *dd, int wen) +{ + return 1; +} + +/** + * qib_init_iba7220_funcs - set up the chip-specific function pointers + * @dev: the pci_dev for qlogic_ib device + * @ent: pci_device_id struct for this dev + * + * This is global, and is called directly at init to set up the + * chip-specific function pointers for later use. + */ +struct qib_devdata *qib_init_iba7220_funcs(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct qib_devdata *dd; + int ret; + u32 boardid, minwidth; + + dd = qib_alloc_devdata(pdev, sizeof(struct qib_chip_specific) + + sizeof(struct qib_chippport_specific)); + if (IS_ERR(dd)) + goto bail; + + dd->f_bringup_serdes = qib_7220_bringup_serdes; + dd->f_cleanup = qib_setup_7220_cleanup; + dd->f_clear_tids = qib_7220_clear_tids; + dd->f_free_irq = qib_7220_free_irq; + dd->f_get_base_info = qib_7220_get_base_info; + dd->f_get_msgheader = qib_7220_get_msgheader; + dd->f_getsendbuf = qib_7220_getsendbuf; + dd->f_gpio_mod = gpio_7220_mod; + dd->f_eeprom_wen = qib_7220_eeprom_wen; + dd->f_hdrqempty = qib_7220_hdrqempty; + dd->f_ib_updown = qib_7220_ib_updown; + dd->f_init_ctxt = qib_7220_init_ctxt; + dd->f_initvl15_bufs = qib_7220_initvl15_bufs; + dd->f_intr_fallback = qib_7220_intr_fallback; + dd->f_late_initreg = qib_late_7220_initreg; + dd->f_setpbc_control = qib_7220_setpbc_control; + dd->f_portcntr = qib_portcntr_7220; + dd->f_put_tid = qib_7220_put_tid; + dd->f_quiet_serdes = qib_7220_quiet_serdes; + dd->f_rcvctrl = rcvctrl_7220_mod; + dd->f_read_cntrs = qib_read_7220cntrs; + dd->f_read_portcntrs = qib_read_7220portcntrs; + dd->f_reset = qib_setup_7220_reset; + dd->f_init_sdma_regs = init_sdma_7220_regs; + dd->f_sdma_busy = qib_sdma_7220_busy; + dd->f_sdma_gethead = qib_sdma_7220_gethead; + dd->f_sdma_sendctrl = qib_7220_sdma_sendctrl; + dd->f_sdma_set_desc_cnt = qib_sdma_set_7220_desc_cnt; + dd->f_sdma_update_tail = qib_sdma_update_7220_tail; + dd->f_sdma_hw_clean_up = qib_7220_sdma_hw_clean_up; + dd->f_sdma_hw_start_up = qib_7220_sdma_hw_start_up; + dd->f_sdma_init_early = qib_7220_sdma_init_early; + dd->f_sendctrl = sendctrl_7220_mod; + dd->f_set_armlaunch = qib_set_7220_armlaunch; + dd->f_set_cntr_sample = qib_set_cntr_7220_sample; + dd->f_iblink_state = qib_7220_iblink_state; + dd->f_ibphys_portstate = qib_7220_phys_portstate; + dd->f_get_ib_cfg = qib_7220_get_ib_cfg; + dd->f_set_ib_cfg = qib_7220_set_ib_cfg; + dd->f_set_ib_loopback = qib_7220_set_loopback; + dd->f_set_intr_state = qib_7220_set_intr_state; + dd->f_setextled = qib_setup_7220_setextled; + dd->f_txchk_change = qib_7220_txchk_change; + dd->f_update_usrhead = qib_update_7220_usrhead; + dd->f_wantpiobuf_intr = qib_wantpiobuf_7220_intr; + dd->f_xgxs_reset = qib_7220_xgxs_reset; + dd->f_writescratch = writescratch; + dd->f_tempsense_rd = qib_7220_tempsense_rd; +#ifdef CONFIG_INFINIBAND_QIB_DCA + dd->f_notify_dca = qib_7220_notify_dca; +#endif + /* + * Do remaining pcie setup and save pcie values in dd. + * Any error printing is already done by the init code. + * On return, we have the chip mapped, but chip registers + * are not set up until start of qib_init_7220_variables. + */ + ret = qib_pcie_ddinit(dd, pdev, ent); + if (ret < 0) + goto bail_free; + + /* initialize chip-specific variables */ + ret = qib_init_7220_variables(dd); + if (ret) + goto bail_cleanup; + + if (qib_mini_init) + goto bail; + + boardid = SYM_FIELD(dd->revision, Revision, + BoardID); + switch (boardid) { + case 0: + case 2: + case 10: + case 12: + minwidth = 16; /* x16 capable boards */ + break; + default: + minwidth = 8; /* x8 capable boards */ + break; + } + if (qib_pcie_params(dd, minwidth, NULL, NULL)) + qib_dev_err(dd, + "Failed to setup PCIe or interrupts; continuing anyway\n"); + + /* save IRQ for possible later use */ + dd->cspec->irq = pdev->irq; + + if (qib_read_kreg64(dd, kr_hwerrstatus) & + QLOGIC_IB_HWE_SERDESPLLFAILED) + qib_write_kreg(dd, kr_hwerrclear, + QLOGIC_IB_HWE_SERDESPLLFAILED); + + /* setup interrupt handler (interrupt type handled above) */ + qib_setup_7220_interrupt(dd); + qib_7220_init_hwerrors(dd); + + /* clear diagctrl register, in case diags were running and crashed */ + qib_write_kreg(dd, kr_hwdiagctrl, 0); + + goto bail; + +bail_cleanup: + qib_pcie_ddcleanup(dd); +bail_free: + qib_free_devdata(dd); + dd = ERR_PTR(ret); +bail: + return dd; +} diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c new file mode 100644 index 00000000000..a7eb32517a0 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -0,0 +1,8554 @@ +/* + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2008 - 2012 QLogic Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * This file contains all of the code that is specific to the + * InfiniPath 7322 chip + */ + +#include <linux/interrupt.h> +#include <linux/pci.h> +#include <linux/delay.h> +#include <linux/io.h> +#include <linux/jiffies.h> +#include <linux/module.h> +#include <rdma/ib_verbs.h> +#include <rdma/ib_smi.h> +#ifdef CONFIG_INFINIBAND_QIB_DCA +#include <linux/dca.h> +#endif + +#include "qib.h" +#include "qib_7322_regs.h" +#include "qib_qsfp.h" + +#include "qib_mad.h" +#include "qib_verbs.h" + +#undef pr_fmt +#define pr_fmt(fmt) QIB_DRV_NAME " " fmt + +static void qib_setup_7322_setextled(struct qib_pportdata *, u32); +static void qib_7322_handle_hwerrors(struct qib_devdata *, char *, size_t); +static void sendctrl_7322_mod(struct qib_pportdata *ppd, u32 op); +static irqreturn_t qib_7322intr(int irq, void *data); +static irqreturn_t qib_7322bufavail(int irq, void *data); +static irqreturn_t sdma_intr(int irq, void *data); +static irqreturn_t sdma_idle_intr(int irq, void *data); +static irqreturn_t sdma_progress_intr(int irq, void *data); +static irqreturn_t sdma_cleanup_intr(int irq, void *data); +static void qib_7322_txchk_change(struct qib_devdata *, u32, u32, u32, + struct qib_ctxtdata *rcd); +static u8 qib_7322_phys_portstate(u64); +static u32 qib_7322_iblink_state(u64); +static void qib_set_ib_7322_lstate(struct qib_pportdata *ppd, u16 linkcmd, + u16 linitcmd); +static void force_h1(struct qib_pportdata *); +static void adj_tx_serdes(struct qib_pportdata *); +static u32 qib_7322_setpbc_control(struct qib_pportdata *, u32, u8, u8); +static void qib_7322_mini_pcs_reset(struct qib_pportdata *); + +static u32 ahb_mod(struct qib_devdata *, int, int, int, u32, u32); +static void ibsd_wr_allchans(struct qib_pportdata *, int, unsigned, unsigned); +static void serdes_7322_los_enable(struct qib_pportdata *, int); +static int serdes_7322_init_old(struct qib_pportdata *); +static int serdes_7322_init_new(struct qib_pportdata *); +static void dump_sdma_7322_state(struct qib_pportdata *); + +#define BMASK(msb, lsb) (((1 << ((msb) + 1 - (lsb))) - 1) << (lsb)) + +/* LE2 serdes values for different cases */ +#define LE2_DEFAULT 5 +#define LE2_5m 4 +#define LE2_QME 0 + +/* Below is special-purpose, so only really works for the IB SerDes blocks. */ +#define IBSD(hw_pidx) (hw_pidx + 2) + +/* these are variables for documentation and experimentation purposes */ +static const unsigned rcv_int_timeout = 375; +static const unsigned rcv_int_count = 16; +static const unsigned sdma_idle_cnt = 64; + +/* Time to stop altering Rx Equalization parameters, after link up. */ +#define RXEQ_DISABLE_MSECS 2500 + +/* + * Number of VLs we are configured to use (to allow for more + * credits per vl, etc.) + */ +ushort qib_num_cfg_vls = 2; +module_param_named(num_vls, qib_num_cfg_vls, ushort, S_IRUGO); +MODULE_PARM_DESC(num_vls, "Set number of Virtual Lanes to use (1-8)"); + +static ushort qib_chase = 1; +module_param_named(chase, qib_chase, ushort, S_IRUGO); +MODULE_PARM_DESC(chase, "Enable state chase handling"); + +static ushort qib_long_atten = 10; /* 10 dB ~= 5m length */ +module_param_named(long_attenuation, qib_long_atten, ushort, S_IRUGO); +MODULE_PARM_DESC(long_attenuation, \ + "attenuation cutoff (dB) for long copper cable setup"); + +static ushort qib_singleport; +module_param_named(singleport, qib_singleport, ushort, S_IRUGO); +MODULE_PARM_DESC(singleport, "Use only IB port 1; more per-port buffer space"); + +static ushort qib_krcvq01_no_msi; +module_param_named(krcvq01_no_msi, qib_krcvq01_no_msi, ushort, S_IRUGO); +MODULE_PARM_DESC(krcvq01_no_msi, "No MSI for kctx < 2"); + +/* + * Receive header queue sizes + */ +static unsigned qib_rcvhdrcnt; +module_param_named(rcvhdrcnt, qib_rcvhdrcnt, uint, S_IRUGO); +MODULE_PARM_DESC(rcvhdrcnt, "receive header count"); + +static unsigned qib_rcvhdrsize; +module_param_named(rcvhdrsize, qib_rcvhdrsize, uint, S_IRUGO); +MODULE_PARM_DESC(rcvhdrsize, "receive header size in 32-bit words"); + +static unsigned qib_rcvhdrentsize; +module_param_named(rcvhdrentsize, qib_rcvhdrentsize, uint, S_IRUGO); +MODULE_PARM_DESC(rcvhdrentsize, "receive header entry size in 32-bit words"); + +#define MAX_ATTEN_LEN 64 /* plenty for any real system */ +/* for read back, default index is ~5m copper cable */ +static char txselect_list[MAX_ATTEN_LEN] = "10"; +static struct kparam_string kp_txselect = { + .string = txselect_list, + .maxlen = MAX_ATTEN_LEN +}; +static int setup_txselect(const char *, struct kernel_param *); +module_param_call(txselect, setup_txselect, param_get_string, + &kp_txselect, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(txselect, \ + "Tx serdes indices (for no QSFP or invalid QSFP data)"); + +#define BOARD_QME7342 5 +#define BOARD_QMH7342 6 +#define IS_QMH(dd) (SYM_FIELD((dd)->revision, Revision, BoardID) == \ + BOARD_QMH7342) +#define IS_QME(dd) (SYM_FIELD((dd)->revision, Revision, BoardID) == \ + BOARD_QME7342) + +#define KREG_IDX(regname) (QIB_7322_##regname##_OFFS / sizeof(u64)) + +#define KREG_IBPORT_IDX(regname) ((QIB_7322_##regname##_0_OFFS / sizeof(u64))) + +#define MASK_ACROSS(lsb, msb) \ + (((1ULL << ((msb) + 1 - (lsb))) - 1) << (lsb)) + +#define SYM_RMASK(regname, fldname) ((u64) \ + QIB_7322_##regname##_##fldname##_RMASK) + +#define SYM_MASK(regname, fldname) ((u64) \ + QIB_7322_##regname##_##fldname##_RMASK << \ + QIB_7322_##regname##_##fldname##_LSB) + +#define SYM_FIELD(value, regname, fldname) ((u64) \ + (((value) >> SYM_LSB(regname, fldname)) & \ + SYM_RMASK(regname, fldname))) + +/* useful for things like LaFifoEmpty_0...7, TxCreditOK_0...7, etc. */ +#define SYM_FIELD_ACROSS(value, regname, fldname, nbits) \ + (((value) >> SYM_LSB(regname, fldname)) & MASK_ACROSS(0, nbits)) + +#define HWE_MASK(fldname) SYM_MASK(HwErrMask, fldname##Mask) +#define ERR_MASK(fldname) SYM_MASK(ErrMask, fldname##Mask) +#define ERR_MASK_N(fldname) SYM_MASK(ErrMask_0, fldname##Mask) +#define INT_MASK(fldname) SYM_MASK(IntMask, fldname##IntMask) +#define INT_MASK_P(fldname, port) SYM_MASK(IntMask, fldname##IntMask##_##port) +/* Below because most, but not all, fields of IntMask have that full suffix */ +#define INT_MASK_PM(fldname, port) SYM_MASK(IntMask, fldname##Mask##_##port) + + +#define SYM_LSB(regname, fldname) (QIB_7322_##regname##_##fldname##_LSB) + +/* + * the size bits give us 2^N, in KB units. 0 marks as invalid, + * and 7 is reserved. We currently use only 2KB and 4KB + */ +#define IBA7322_TID_SZ_SHIFT QIB_7322_RcvTIDArray0_RT_BufSize_LSB +#define IBA7322_TID_SZ_2K (1UL<<IBA7322_TID_SZ_SHIFT) /* 2KB */ +#define IBA7322_TID_SZ_4K (2UL<<IBA7322_TID_SZ_SHIFT) /* 4KB */ +#define IBA7322_TID_PA_SHIFT 11U /* TID addr in chip stored w/o low bits */ + +#define SendIBSLIDAssignMask \ + QIB_7322_SendIBSLIDAssign_0_SendIBSLIDAssign_15_0_RMASK +#define SendIBSLMCMask \ + QIB_7322_SendIBSLIDMask_0_SendIBSLIDMask_15_0_RMASK + +#define ExtLED_IB1_YEL SYM_MASK(EXTCtrl, LEDPort0YellowOn) +#define ExtLED_IB1_GRN SYM_MASK(EXTCtrl, LEDPort0GreenOn) +#define ExtLED_IB2_YEL SYM_MASK(EXTCtrl, LEDPort1YellowOn) +#define ExtLED_IB2_GRN SYM_MASK(EXTCtrl, LEDPort1GreenOn) +#define ExtLED_IB1_MASK (ExtLED_IB1_YEL | ExtLED_IB1_GRN) +#define ExtLED_IB2_MASK (ExtLED_IB2_YEL | ExtLED_IB2_GRN) + +#define _QIB_GPIO_SDA_NUM 1 +#define _QIB_GPIO_SCL_NUM 0 +#define QIB_EEPROM_WEN_NUM 14 +#define QIB_TWSI_EEPROM_DEV 0xA2 /* All Production 7322 cards. */ + +/* HW counter clock is at 4nsec */ +#define QIB_7322_PSXMITWAIT_CHECK_RATE 4000 + +/* full speed IB port 1 only */ +#define PORT_SPD_CAP (QIB_IB_SDR | QIB_IB_DDR | QIB_IB_QDR) +#define PORT_SPD_CAP_SHIFT 3 + +/* full speed featuremask, both ports */ +#define DUAL_PORT_CAP (PORT_SPD_CAP | (PORT_SPD_CAP << PORT_SPD_CAP_SHIFT)) + +/* + * This file contains almost all the chip-specific register information and + * access functions for the FAKED QLogic InfiniPath 7322 PCI-Express chip. + */ + +/* Use defines to tie machine-generated names to lower-case names */ +#define kr_contextcnt KREG_IDX(ContextCnt) +#define kr_control KREG_IDX(Control) +#define kr_counterregbase KREG_IDX(CntrRegBase) +#define kr_errclear KREG_IDX(ErrClear) +#define kr_errmask KREG_IDX(ErrMask) +#define kr_errstatus KREG_IDX(ErrStatus) +#define kr_extctrl KREG_IDX(EXTCtrl) +#define kr_extstatus KREG_IDX(EXTStatus) +#define kr_gpio_clear KREG_IDX(GPIOClear) +#define kr_gpio_mask KREG_IDX(GPIOMask) +#define kr_gpio_out KREG_IDX(GPIOOut) +#define kr_gpio_status KREG_IDX(GPIOStatus) +#define kr_hwdiagctrl KREG_IDX(HwDiagCtrl) +#define kr_debugportval KREG_IDX(DebugPortValueReg) +#define kr_fmask KREG_IDX(feature_mask) +#define kr_act_fmask KREG_IDX(active_feature_mask) +#define kr_hwerrclear KREG_IDX(HwErrClear) +#define kr_hwerrmask KREG_IDX(HwErrMask) +#define kr_hwerrstatus KREG_IDX(HwErrStatus) +#define kr_intclear KREG_IDX(IntClear) +#define kr_intmask KREG_IDX(IntMask) +#define kr_intredirect KREG_IDX(IntRedirect0) +#define kr_intstatus KREG_IDX(IntStatus) +#define kr_pagealign KREG_IDX(PageAlign) +#define kr_rcvavailtimeout KREG_IDX(RcvAvailTimeOut0) +#define kr_rcvctrl KREG_IDX(RcvCtrl) /* Common, but chip also has per-port */ +#define kr_rcvegrbase KREG_IDX(RcvEgrBase) +#define kr_rcvegrcnt KREG_IDX(RcvEgrCnt) +#define kr_rcvhdrcnt KREG_IDX(RcvHdrCnt) +#define kr_rcvhdrentsize KREG_IDX(RcvHdrEntSize) +#define kr_rcvhdrsize KREG_IDX(RcvHdrSize) +#define kr_rcvtidbase KREG_IDX(RcvTIDBase) +#define kr_rcvtidcnt KREG_IDX(RcvTIDCnt) +#define kr_revision KREG_IDX(Revision) +#define kr_scratch KREG_IDX(Scratch) +#define kr_sendbuffererror KREG_IDX(SendBufErr0) /* and base for 1 and 2 */ +#define kr_sendcheckmask KREG_IDX(SendCheckMask0) /* and 1, 2 */ +#define kr_sendctrl KREG_IDX(SendCtrl) +#define kr_sendgrhcheckmask KREG_IDX(SendGRHCheckMask0) /* and 1, 2 */ +#define kr_sendibpktmask KREG_IDX(SendIBPacketMask0) /* and 1, 2 */ +#define kr_sendpioavailaddr KREG_IDX(SendBufAvailAddr) +#define kr_sendpiobufbase KREG_IDX(SendBufBase) +#define kr_sendpiobufcnt KREG_IDX(SendBufCnt) +#define kr_sendpiosize KREG_IDX(SendBufSize) +#define kr_sendregbase KREG_IDX(SendRegBase) +#define kr_sendbufavail0 KREG_IDX(SendBufAvail0) +#define kr_userregbase KREG_IDX(UserRegBase) +#define kr_intgranted KREG_IDX(Int_Granted) +#define kr_vecclr_wo_int KREG_IDX(vec_clr_without_int) +#define kr_intblocked KREG_IDX(IntBlocked) +#define kr_r_access KREG_IDX(SPC_JTAG_ACCESS_REG) + +/* + * per-port kernel registers. Access only with qib_read_kreg_port() + * or qib_write_kreg_port() + */ +#define krp_errclear KREG_IBPORT_IDX(ErrClear) +#define krp_errmask KREG_IBPORT_IDX(ErrMask) +#define krp_errstatus KREG_IBPORT_IDX(ErrStatus) +#define krp_highprio_0 KREG_IBPORT_IDX(HighPriority0) +#define krp_highprio_limit KREG_IBPORT_IDX(HighPriorityLimit) +#define krp_hrtbt_guid KREG_IBPORT_IDX(HRTBT_GUID) +#define krp_ib_pcsconfig KREG_IBPORT_IDX(IBPCSConfig) +#define krp_ibcctrl_a KREG_IBPORT_IDX(IBCCtrlA) +#define krp_ibcctrl_b KREG_IBPORT_IDX(IBCCtrlB) +#define krp_ibcctrl_c KREG_IBPORT_IDX(IBCCtrlC) +#define krp_ibcstatus_a KREG_IBPORT_IDX(IBCStatusA) +#define krp_ibcstatus_b KREG_IBPORT_IDX(IBCStatusB) +#define krp_txestatus KREG_IBPORT_IDX(TXEStatus) +#define krp_lowprio_0 KREG_IBPORT_IDX(LowPriority0) +#define krp_ncmodectrl KREG_IBPORT_IDX(IBNCModeCtrl) +#define krp_partitionkey KREG_IBPORT_IDX(RcvPartitionKey) +#define krp_psinterval KREG_IBPORT_IDX(PSInterval) +#define krp_psstart KREG_IBPORT_IDX(PSStart) +#define krp_psstat KREG_IBPORT_IDX(PSStat) +#define krp_rcvbthqp KREG_IBPORT_IDX(RcvBTHQP) +#define krp_rcvctrl KREG_IBPORT_IDX(RcvCtrl) +#define krp_rcvpktledcnt KREG_IBPORT_IDX(RcvPktLEDCnt) +#define krp_rcvqpmaptable KREG_IBPORT_IDX(RcvQPMapTableA) +#define krp_rxcreditvl0 KREG_IBPORT_IDX(RxCreditVL0) +#define krp_rxcreditvl15 (KREG_IBPORT_IDX(RxCreditVL0)+15) +#define krp_sendcheckcontrol KREG_IBPORT_IDX(SendCheckControl) +#define krp_sendctrl KREG_IBPORT_IDX(SendCtrl) +#define krp_senddmabase KREG_IBPORT_IDX(SendDmaBase) +#define krp_senddmabufmask0 KREG_IBPORT_IDX(SendDmaBufMask0) +#define krp_senddmabufmask1 (KREG_IBPORT_IDX(SendDmaBufMask0) + 1) +#define krp_senddmabufmask2 (KREG_IBPORT_IDX(SendDmaBufMask0) + 2) +#define krp_senddmabuf_use0 KREG_IBPORT_IDX(SendDmaBufUsed0) +#define krp_senddmabuf_use1 (KREG_IBPORT_IDX(SendDmaBufUsed0) + 1) +#define krp_senddmabuf_use2 (KREG_IBPORT_IDX(SendDmaBufUsed0) + 2) +#define krp_senddmadesccnt KREG_IBPORT_IDX(SendDmaDescCnt) +#define krp_senddmahead KREG_IBPORT_IDX(SendDmaHead) +#define krp_senddmaheadaddr KREG_IBPORT_IDX(SendDmaHeadAddr) +#define krp_senddmaidlecnt KREG_IBPORT_IDX(SendDmaIdleCnt) +#define krp_senddmalengen KREG_IBPORT_IDX(SendDmaLenGen) +#define krp_senddmaprioritythld KREG_IBPORT_IDX(SendDmaPriorityThld) +#define krp_senddmareloadcnt KREG_IBPORT_IDX(SendDmaReloadCnt) +#define krp_senddmastatus KREG_IBPORT_IDX(SendDmaStatus) +#define krp_senddmatail KREG_IBPORT_IDX(SendDmaTail) +#define krp_sendhdrsymptom KREG_IBPORT_IDX(SendHdrErrSymptom) +#define krp_sendslid KREG_IBPORT_IDX(SendIBSLIDAssign) +#define krp_sendslidmask KREG_IBPORT_IDX(SendIBSLIDMask) +#define krp_ibsdtestiftx KREG_IBPORT_IDX(IB_SDTEST_IF_TX) +#define krp_adapt_dis_timer KREG_IBPORT_IDX(ADAPT_DISABLE_TIMER_THRESHOLD) +#define krp_tx_deemph_override KREG_IBPORT_IDX(IBSD_TX_DEEMPHASIS_OVERRIDE) +#define krp_serdesctrl KREG_IBPORT_IDX(IBSerdesCtrl) + +/* + * Per-context kernel registers. Access only with qib_read_kreg_ctxt() + * or qib_write_kreg_ctxt() + */ +#define krc_rcvhdraddr KREG_IDX(RcvHdrAddr0) +#define krc_rcvhdrtailaddr KREG_IDX(RcvHdrTailAddr0) + +/* + * TID Flow table, per context. Reduces + * number of hdrq updates to one per flow (or on errors). + * context 0 and 1 share same memory, but have distinct + * addresses. Since for now, we never use expected sends + * on kernel contexts, we don't worry about that (we initialize + * those entries for ctxt 0/1 on driver load twice, for example). + */ +#define NUM_TIDFLOWS_CTXT 0x20 /* 0x20 per context; have to hardcode */ +#define ur_rcvflowtable (KREG_IDX(RcvTIDFlowTable0) - KREG_IDX(RcvHdrTail0)) + +/* these are the error bits in the tid flows, and are W1C */ +#define TIDFLOW_ERRBITS ( \ + (SYM_MASK(RcvTIDFlowTable0, GenMismatch) << \ + SYM_LSB(RcvTIDFlowTable0, GenMismatch)) | \ + (SYM_MASK(RcvTIDFlowTable0, SeqMismatch) << \ + SYM_LSB(RcvTIDFlowTable0, SeqMismatch))) + +/* Most (not all) Counters are per-IBport. + * Requires LBIntCnt is at offset 0 in the group + */ +#define CREG_IDX(regname) \ +((QIB_7322_##regname##_0_OFFS - QIB_7322_LBIntCnt_OFFS) / sizeof(u64)) + +#define crp_badformat CREG_IDX(RxVersionErrCnt) +#define crp_err_rlen CREG_IDX(RxLenErrCnt) +#define crp_erricrc CREG_IDX(RxICRCErrCnt) +#define crp_errlink CREG_IDX(RxLinkMalformCnt) +#define crp_errlpcrc CREG_IDX(RxLPCRCErrCnt) +#define crp_errpkey CREG_IDX(RxPKeyMismatchCnt) +#define crp_errvcrc CREG_IDX(RxVCRCErrCnt) +#define crp_excessbufferovfl CREG_IDX(ExcessBufferOvflCnt) +#define crp_iblinkdown CREG_IDX(IBLinkDownedCnt) +#define crp_iblinkerrrecov CREG_IDX(IBLinkErrRecoveryCnt) +#define crp_ibstatuschange CREG_IDX(IBStatusChangeCnt) +#define crp_ibsymbolerr CREG_IDX(IBSymbolErrCnt) +#define crp_invalidrlen CREG_IDX(RxMaxMinLenErrCnt) +#define crp_locallinkintegrityerr CREG_IDX(LocalLinkIntegrityErrCnt) +#define crp_pktrcv CREG_IDX(RxDataPktCnt) +#define crp_pktrcvflowctrl CREG_IDX(RxFlowPktCnt) +#define crp_pktsend CREG_IDX(TxDataPktCnt) +#define crp_pktsendflow CREG_IDX(TxFlowPktCnt) +#define crp_psrcvdatacount CREG_IDX(PSRcvDataCount) +#define crp_psrcvpktscount CREG_IDX(PSRcvPktsCount) +#define crp_psxmitdatacount CREG_IDX(PSXmitDataCount) +#define crp_psxmitpktscount CREG_IDX(PSXmitPktsCount) +#define crp_psxmitwaitcount CREG_IDX(PSXmitWaitCount) +#define crp_rcvebp CREG_IDX(RxEBPCnt) +#define crp_rcvflowctrlviol CREG_IDX(RxFlowCtrlViolCnt) +#define crp_rcvovfl CREG_IDX(RxBufOvflCnt) +#define crp_rxdlidfltr CREG_IDX(RxDlidFltrCnt) +#define crp_rxdroppkt CREG_IDX(RxDroppedPktCnt) +#define crp_rxotherlocalphyerr CREG_IDX(RxOtherLocalPhyErrCnt) +#define crp_rxqpinvalidctxt CREG_IDX(RxQPInvalidContextCnt) +#define crp_rxvlerr CREG_IDX(RxVlErrCnt) +#define crp_sendstall CREG_IDX(TxFlowStallCnt) +#define crp_txdroppedpkt CREG_IDX(TxDroppedPktCnt) +#define crp_txhdrerr CREG_IDX(TxHeadersErrCnt) +#define crp_txlenerr CREG_IDX(TxLenErrCnt) +#define crp_txminmaxlenerr CREG_IDX(TxMaxMinLenErrCnt) +#define crp_txsdmadesc CREG_IDX(TxSDmaDescCnt) +#define crp_txunderrun CREG_IDX(TxUnderrunCnt) +#define crp_txunsupvl CREG_IDX(TxUnsupVLErrCnt) +#define crp_vl15droppedpkt CREG_IDX(RxVL15DroppedPktCnt) +#define crp_wordrcv CREG_IDX(RxDwordCnt) +#define crp_wordsend CREG_IDX(TxDwordCnt) +#define crp_tx_creditstalls CREG_IDX(TxCreditUpToDateTimeOut) + +/* these are the (few) counters that are not port-specific */ +#define CREG_DEVIDX(regname) ((QIB_7322_##regname##_OFFS - \ + QIB_7322_LBIntCnt_OFFS) / sizeof(u64)) +#define cr_base_egrovfl CREG_DEVIDX(RxP0HdrEgrOvflCnt) +#define cr_lbint CREG_DEVIDX(LBIntCnt) +#define cr_lbstall CREG_DEVIDX(LBFlowStallCnt) +#define cr_pcieretrydiag CREG_DEVIDX(PcieRetryBufDiagQwordCnt) +#define cr_rxtidflowdrop CREG_DEVIDX(RxTidFlowDropCnt) +#define cr_tidfull CREG_DEVIDX(RxTIDFullErrCnt) +#define cr_tidinvalid CREG_DEVIDX(RxTIDValidErrCnt) + +/* no chip register for # of IB ports supported, so define */ +#define NUM_IB_PORTS 2 + +/* 1 VL15 buffer per hardware IB port, no register for this, so define */ +#define NUM_VL15_BUFS NUM_IB_PORTS + +/* + * context 0 and 1 are special, and there is no chip register that + * defines this value, so we have to define it here. + * These are all allocated to either 0 or 1 for single port + * hardware configuration, otherwise each gets half + */ +#define KCTXT0_EGRCNT 2048 + +/* values for vl and port fields in PBC, 7322-specific */ +#define PBC_PORT_SEL_LSB 26 +#define PBC_PORT_SEL_RMASK 1 +#define PBC_VL_NUM_LSB 27 +#define PBC_VL_NUM_RMASK 7 +#define PBC_7322_VL15_SEND (1ULL << 63) /* pbc; VL15, no credit check */ +#define PBC_7322_VL15_SEND_CTRL (1ULL << 31) /* control version of same */ + +static u8 ib_rate_to_delay[IB_RATE_120_GBPS + 1] = { + [IB_RATE_2_5_GBPS] = 16, + [IB_RATE_5_GBPS] = 8, + [IB_RATE_10_GBPS] = 4, + [IB_RATE_20_GBPS] = 2, + [IB_RATE_30_GBPS] = 2, + [IB_RATE_40_GBPS] = 1 +}; + +#define IBA7322_LINKSPEED_SHIFT SYM_LSB(IBCStatusA_0, LinkSpeedActive) +#define IBA7322_LINKWIDTH_SHIFT SYM_LSB(IBCStatusA_0, LinkWidthActive) + +/* link training states, from IBC */ +#define IB_7322_LT_STATE_DISABLED 0x00 +#define IB_7322_LT_STATE_LINKUP 0x01 +#define IB_7322_LT_STATE_POLLACTIVE 0x02 +#define IB_7322_LT_STATE_POLLQUIET 0x03 +#define IB_7322_LT_STATE_SLEEPDELAY 0x04 +#define IB_7322_LT_STATE_SLEEPQUIET 0x05 +#define IB_7322_LT_STATE_CFGDEBOUNCE 0x08 +#define IB_7322_LT_STATE_CFGRCVFCFG 0x09 +#define IB_7322_LT_STATE_CFGWAITRMT 0x0a +#define IB_7322_LT_STATE_CFGIDLE 0x0b +#define IB_7322_LT_STATE_RECOVERRETRAIN 0x0c +#define IB_7322_LT_STATE_TXREVLANES 0x0d +#define IB_7322_LT_STATE_RECOVERWAITRMT 0x0e +#define IB_7322_LT_STATE_RECOVERIDLE 0x0f +#define IB_7322_LT_STATE_CFGENH 0x10 +#define IB_7322_LT_STATE_CFGTEST 0x11 +#define IB_7322_LT_STATE_CFGWAITRMTTEST 0x12 +#define IB_7322_LT_STATE_CFGWAITENH 0x13 + +/* link state machine states from IBC */ +#define IB_7322_L_STATE_DOWN 0x0 +#define IB_7322_L_STATE_INIT 0x1 +#define IB_7322_L_STATE_ARM 0x2 +#define IB_7322_L_STATE_ACTIVE 0x3 +#define IB_7322_L_STATE_ACT_DEFER 0x4 + +static const u8 qib_7322_physportstate[0x20] = { + [IB_7322_LT_STATE_DISABLED] = IB_PHYSPORTSTATE_DISABLED, + [IB_7322_LT_STATE_LINKUP] = IB_PHYSPORTSTATE_LINKUP, + [IB_7322_LT_STATE_POLLACTIVE] = IB_PHYSPORTSTATE_POLL, + [IB_7322_LT_STATE_POLLQUIET] = IB_PHYSPORTSTATE_POLL, + [IB_7322_LT_STATE_SLEEPDELAY] = IB_PHYSPORTSTATE_SLEEP, + [IB_7322_LT_STATE_SLEEPQUIET] = IB_PHYSPORTSTATE_SLEEP, + [IB_7322_LT_STATE_CFGDEBOUNCE] = IB_PHYSPORTSTATE_CFG_TRAIN, + [IB_7322_LT_STATE_CFGRCVFCFG] = + IB_PHYSPORTSTATE_CFG_TRAIN, + [IB_7322_LT_STATE_CFGWAITRMT] = + IB_PHYSPORTSTATE_CFG_TRAIN, + [IB_7322_LT_STATE_CFGIDLE] = IB_PHYSPORTSTATE_CFG_IDLE, + [IB_7322_LT_STATE_RECOVERRETRAIN] = + IB_PHYSPORTSTATE_LINK_ERR_RECOVER, + [IB_7322_LT_STATE_RECOVERWAITRMT] = + IB_PHYSPORTSTATE_LINK_ERR_RECOVER, + [IB_7322_LT_STATE_RECOVERIDLE] = + IB_PHYSPORTSTATE_LINK_ERR_RECOVER, + [IB_7322_LT_STATE_CFGENH] = IB_PHYSPORTSTATE_CFG_ENH, + [IB_7322_LT_STATE_CFGTEST] = IB_PHYSPORTSTATE_CFG_TRAIN, + [IB_7322_LT_STATE_CFGWAITRMTTEST] = + IB_PHYSPORTSTATE_CFG_TRAIN, + [IB_7322_LT_STATE_CFGWAITENH] = + IB_PHYSPORTSTATE_CFG_WAIT_ENH, + [0x14] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x15] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x16] = IB_PHYSPORTSTATE_CFG_TRAIN, + [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN +}; + +#ifdef CONFIG_INFINIBAND_QIB_DCA +struct qib_irq_notify { + int rcv; + void *arg; + struct irq_affinity_notify notify; +}; +#endif + +struct qib_chip_specific { + u64 __iomem *cregbase; + u64 *cntrs; + spinlock_t rcvmod_lock; /* protect rcvctrl shadow changes */ + spinlock_t gpio_lock; /* RMW of shadows/regs for ExtCtrl and GPIO */ + u64 main_int_mask; /* clear bits which have dedicated handlers */ + u64 int_enable_mask; /* for per port interrupts in single port mode */ + u64 errormask; + u64 hwerrmask; + u64 gpio_out; /* shadow of kr_gpio_out, for rmw ops */ + u64 gpio_mask; /* shadow the gpio mask register */ + u64 extctrl; /* shadow the gpio output enable, etc... */ + u32 ncntrs; + u32 nportcntrs; + u32 cntrnamelen; + u32 portcntrnamelen; + u32 numctxts; + u32 rcvegrcnt; + u32 updthresh; /* current AvailUpdThld */ + u32 updthresh_dflt; /* default AvailUpdThld */ + u32 r1; + int irq; + u32 num_msix_entries; + u32 sdmabufcnt; + u32 lastbuf_for_pio; + u32 stay_in_freeze; + u32 recovery_ports_initted; +#ifdef CONFIG_INFINIBAND_QIB_DCA + u32 dca_ctrl; + int rhdr_cpu[18]; + int sdma_cpu[2]; + u64 dca_rcvhdr_ctrl[5]; /* B, C, D, E, F */ +#endif + struct qib_msix_entry *msix_entries; + unsigned long *sendchkenable; + unsigned long *sendgrhchk; + unsigned long *sendibchk; + u32 rcvavail_timeout[18]; + char emsgbuf[128]; /* for device error interrupt msg buffer */ +}; + +/* Table of entries in "human readable" form Tx Emphasis. */ +struct txdds_ent { + u8 amp; + u8 pre; + u8 main; + u8 post; +}; + +struct vendor_txdds_ent { + u8 oui[QSFP_VOUI_LEN]; + u8 *partnum; + struct txdds_ent sdr; + struct txdds_ent ddr; + struct txdds_ent qdr; +}; + +static void write_tx_serdes_param(struct qib_pportdata *, struct txdds_ent *); + +#define TXDDS_TABLE_SZ 16 /* number of entries per speed in onchip table */ +#define TXDDS_EXTRA_SZ 18 /* number of extra tx settings entries */ +#define TXDDS_MFG_SZ 2 /* number of mfg tx settings entries */ +#define SERDES_CHANS 4 /* yes, it's obvious, but one less magic number */ + +#define H1_FORCE_VAL 8 +#define H1_FORCE_QME 1 /* may be overridden via setup_txselect() */ +#define H1_FORCE_QMH 7 /* may be overridden via setup_txselect() */ + +/* The static and dynamic registers are paired, and the pairs indexed by spd */ +#define krp_static_adapt_dis(spd) (KREG_IBPORT_IDX(ADAPT_DISABLE_STATIC_SDR) \ + + ((spd) * 2)) + +#define QDR_DFE_DISABLE_DELAY 4000 /* msec after LINKUP */ +#define QDR_STATIC_ADAPT_DOWN 0xf0f0f0f0ULL /* link down, H1-H4 QDR adapts */ +#define QDR_STATIC_ADAPT_DOWN_R1 0ULL /* r1 link down, H1-H4 QDR adapts */ +#define QDR_STATIC_ADAPT_INIT 0xffffffffffULL /* up, disable H0,H1-8, LE */ +#define QDR_STATIC_ADAPT_INIT_R1 0xf0ffffffffULL /* r1 up, disable H0,H1-8 */ + +struct qib_chippport_specific { + u64 __iomem *kpregbase; + u64 __iomem *cpregbase; + u64 *portcntrs; + struct qib_pportdata *ppd; + wait_queue_head_t autoneg_wait; + struct delayed_work autoneg_work; + struct delayed_work ipg_work; + struct timer_list chase_timer; + /* + * these 5 fields are used to establish deltas for IB symbol + * errors and linkrecovery errors. They can be reported on + * some chips during link negotiation prior to INIT, and with + * DDR when faking DDR negotiations with non-IBTA switches. + * The chip counters are adjusted at driver unload if there is + * a non-zero delta. + */ + u64 ibdeltainprog; + u64 ibsymdelta; + u64 ibsymsnap; + u64 iblnkerrdelta; + u64 iblnkerrsnap; + u64 iblnkdownsnap; + u64 iblnkdowndelta; + u64 ibmalfdelta; + u64 ibmalfsnap; + u64 ibcctrl_a; /* krp_ibcctrl_a shadow */ + u64 ibcctrl_b; /* krp_ibcctrl_b shadow */ + unsigned long qdr_dfe_time; + unsigned long chase_end; + u32 autoneg_tries; + u32 recovery_init; + u32 qdr_dfe_on; + u32 qdr_reforce; + /* + * Per-bay per-channel rcv QMH H1 values and Tx values for QDR. + * entry zero is unused, to simplify indexing + */ + u8 h1_val; + u8 no_eep; /* txselect table index to use if no qsfp info */ + u8 ipg_tries; + u8 ibmalfusesnap; + struct qib_qsfp_data qsfp_data; + char epmsgbuf[192]; /* for port error interrupt msg buffer */ + char sdmamsgbuf[192]; /* for per-port sdma error messages */ +}; + +static struct { + const char *name; + irq_handler_t handler; + int lsb; + int port; /* 0 if not port-specific, else port # */ + int dca; +} irq_table[] = { + { "", qib_7322intr, -1, 0, 0 }, + { " (buf avail)", qib_7322bufavail, + SYM_LSB(IntStatus, SendBufAvail), 0, 0}, + { " (sdma 0)", sdma_intr, + SYM_LSB(IntStatus, SDmaInt_0), 1, 1 }, + { " (sdma 1)", sdma_intr, + SYM_LSB(IntStatus, SDmaInt_1), 2, 1 }, + { " (sdmaI 0)", sdma_idle_intr, + SYM_LSB(IntStatus, SDmaIdleInt_0), 1, 1}, + { " (sdmaI 1)", sdma_idle_intr, + SYM_LSB(IntStatus, SDmaIdleInt_1), 2, 1}, + { " (sdmaP 0)", sdma_progress_intr, + SYM_LSB(IntStatus, SDmaProgressInt_0), 1, 1 }, + { " (sdmaP 1)", sdma_progress_intr, + SYM_LSB(IntStatus, SDmaProgressInt_1), 2, 1 }, + { " (sdmaC 0)", sdma_cleanup_intr, + SYM_LSB(IntStatus, SDmaCleanupDone_0), 1, 0 }, + { " (sdmaC 1)", sdma_cleanup_intr, + SYM_LSB(IntStatus, SDmaCleanupDone_1), 2 , 0}, +}; + +#ifdef CONFIG_INFINIBAND_QIB_DCA + +static const struct dca_reg_map { + int shadow_inx; + int lsb; + u64 mask; + u16 regno; +} dca_rcvhdr_reg_map[] = { + { 0, SYM_LSB(DCACtrlB, RcvHdrq0DCAOPH), + ~SYM_MASK(DCACtrlB, RcvHdrq0DCAOPH) , KREG_IDX(DCACtrlB) }, + { 0, SYM_LSB(DCACtrlB, RcvHdrq1DCAOPH), + ~SYM_MASK(DCACtrlB, RcvHdrq1DCAOPH) , KREG_IDX(DCACtrlB) }, + { 0, SYM_LSB(DCACtrlB, RcvHdrq2DCAOPH), + ~SYM_MASK(DCACtrlB, RcvHdrq2DCAOPH) , KREG_IDX(DCACtrlB) }, + { 0, SYM_LSB(DCACtrlB, RcvHdrq3DCAOPH), + ~SYM_MASK(DCACtrlB, RcvHdrq3DCAOPH) , KREG_IDX(DCACtrlB) }, + { 1, SYM_LSB(DCACtrlC, RcvHdrq4DCAOPH), + ~SYM_MASK(DCACtrlC, RcvHdrq4DCAOPH) , KREG_IDX(DCACtrlC) }, + { 1, SYM_LSB(DCACtrlC, RcvHdrq5DCAOPH), + ~SYM_MASK(DCACtrlC, RcvHdrq5DCAOPH) , KREG_IDX(DCACtrlC) }, + { 1, SYM_LSB(DCACtrlC, RcvHdrq6DCAOPH), + ~SYM_MASK(DCACtrlC, RcvHdrq6DCAOPH) , KREG_IDX(DCACtrlC) }, + { 1, SYM_LSB(DCACtrlC, RcvHdrq7DCAOPH), + ~SYM_MASK(DCACtrlC, RcvHdrq7DCAOPH) , KREG_IDX(DCACtrlC) }, + { 2, SYM_LSB(DCACtrlD, RcvHdrq8DCAOPH), + ~SYM_MASK(DCACtrlD, RcvHdrq8DCAOPH) , KREG_IDX(DCACtrlD) }, + { 2, SYM_LSB(DCACtrlD, RcvHdrq9DCAOPH), + ~SYM_MASK(DCACtrlD, RcvHdrq9DCAOPH) , KREG_IDX(DCACtrlD) }, + { 2, SYM_LSB(DCACtrlD, RcvHdrq10DCAOPH), + ~SYM_MASK(DCACtrlD, RcvHdrq10DCAOPH) , KREG_IDX(DCACtrlD) }, + { 2, SYM_LSB(DCACtrlD, RcvHdrq11DCAOPH), + ~SYM_MASK(DCACtrlD, RcvHdrq11DCAOPH) , KREG_IDX(DCACtrlD) }, + { 3, SYM_LSB(DCACtrlE, RcvHdrq12DCAOPH), + ~SYM_MASK(DCACtrlE, RcvHdrq12DCAOPH) , KREG_IDX(DCACtrlE) }, + { 3, SYM_LSB(DCACtrlE, RcvHdrq13DCAOPH), + ~SYM_MASK(DCACtrlE, RcvHdrq13DCAOPH) , KREG_IDX(DCACtrlE) }, + { 3, SYM_LSB(DCACtrlE, RcvHdrq14DCAOPH), + ~SYM_MASK(DCACtrlE, RcvHdrq14DCAOPH) , KREG_IDX(DCACtrlE) }, + { 3, SYM_LSB(DCACtrlE, RcvHdrq15DCAOPH), + ~SYM_MASK(DCACtrlE, RcvHdrq15DCAOPH) , KREG_IDX(DCACtrlE) }, + { 4, SYM_LSB(DCACtrlF, RcvHdrq16DCAOPH), + ~SYM_MASK(DCACtrlF, RcvHdrq16DCAOPH) , KREG_IDX(DCACtrlF) }, + { 4, SYM_LSB(DCACtrlF, RcvHdrq17DCAOPH), + ~SYM_MASK(DCACtrlF, RcvHdrq17DCAOPH) , KREG_IDX(DCACtrlF) }, +}; +#endif + +/* ibcctrl bits */ +#define QLOGIC_IB_IBCC_LINKINITCMD_DISABLE 1 +/* cycle through TS1/TS2 till OK */ +#define QLOGIC_IB_IBCC_LINKINITCMD_POLL 2 +/* wait for TS1, then go on */ +#define QLOGIC_IB_IBCC_LINKINITCMD_SLEEP 3 +#define QLOGIC_IB_IBCC_LINKINITCMD_SHIFT 16 + +#define QLOGIC_IB_IBCC_LINKCMD_DOWN 1 /* move to 0x11 */ +#define QLOGIC_IB_IBCC_LINKCMD_ARMED 2 /* move to 0x21 */ +#define QLOGIC_IB_IBCC_LINKCMD_ACTIVE 3 /* move to 0x31 */ + +#define BLOB_7322_IBCHG 0x101 + +static inline void qib_write_kreg(const struct qib_devdata *dd, + const u32 regno, u64 value); +static inline u32 qib_read_kreg32(const struct qib_devdata *, const u32); +static void write_7322_initregs(struct qib_devdata *); +static void write_7322_init_portregs(struct qib_pportdata *); +static void setup_7322_link_recovery(struct qib_pportdata *, u32); +static void check_7322_rxe_status(struct qib_pportdata *); +static u32 __iomem *qib_7322_getsendbuf(struct qib_pportdata *, u64, u32 *); +#ifdef CONFIG_INFINIBAND_QIB_DCA +static void qib_setup_dca(struct qib_devdata *dd); +static void setup_dca_notifier(struct qib_devdata *dd, + struct qib_msix_entry *m); +static void reset_dca_notifier(struct qib_devdata *dd, + struct qib_msix_entry *m); +#endif + +/** + * qib_read_ureg32 - read 32-bit virtualized per-context register + * @dd: device + * @regno: register number + * @ctxt: context number + * + * Return the contents of a register that is virtualized to be per context. + * Returns -1 on errors (not distinguishable from valid contents at + * runtime; we may add a separate error variable at some point). + */ +static inline u32 qib_read_ureg32(const struct qib_devdata *dd, + enum qib_ureg regno, int ctxt) +{ + if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) + return 0; + return readl(regno + (u64 __iomem *)( + (dd->ureg_align * ctxt) + (dd->userbase ? + (char __iomem *)dd->userbase : + (char __iomem *)dd->kregbase + dd->uregbase))); +} + +/** + * qib_read_ureg - read virtualized per-context register + * @dd: device + * @regno: register number + * @ctxt: context number + * + * Return the contents of a register that is virtualized to be per context. + * Returns -1 on errors (not distinguishable from valid contents at + * runtime; we may add a separate error variable at some point). + */ +static inline u64 qib_read_ureg(const struct qib_devdata *dd, + enum qib_ureg regno, int ctxt) +{ + + if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) + return 0; + return readq(regno + (u64 __iomem *)( + (dd->ureg_align * ctxt) + (dd->userbase ? + (char __iomem *)dd->userbase : + (char __iomem *)dd->kregbase + dd->uregbase))); +} + +/** + * qib_write_ureg - write virtualized per-context register + * @dd: device + * @regno: register number + * @value: value + * @ctxt: context + * + * Write the contents of a register that is virtualized to be per context. + */ +static inline void qib_write_ureg(const struct qib_devdata *dd, + enum qib_ureg regno, u64 value, int ctxt) +{ + u64 __iomem *ubase; + if (dd->userbase) + ubase = (u64 __iomem *) + ((char __iomem *) dd->userbase + + dd->ureg_align * ctxt); + else + ubase = (u64 __iomem *) + (dd->uregbase + + (char __iomem *) dd->kregbase + + dd->ureg_align * ctxt); + + if (dd->kregbase && (dd->flags & QIB_PRESENT)) + writeq(value, &ubase[regno]); +} + +static inline u32 qib_read_kreg32(const struct qib_devdata *dd, + const u32 regno) +{ + if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) + return -1; + return readl((u32 __iomem *) &dd->kregbase[regno]); +} + +static inline u64 qib_read_kreg64(const struct qib_devdata *dd, + const u32 regno) +{ + if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) + return -1; + return readq(&dd->kregbase[regno]); +} + +static inline void qib_write_kreg(const struct qib_devdata *dd, + const u32 regno, u64 value) +{ + if (dd->kregbase && (dd->flags & QIB_PRESENT)) + writeq(value, &dd->kregbase[regno]); +} + +/* + * not many sanity checks for the port-specific kernel register routines, + * since they are only used when it's known to be safe. +*/ +static inline u64 qib_read_kreg_port(const struct qib_pportdata *ppd, + const u16 regno) +{ + if (!ppd->cpspec->kpregbase || !(ppd->dd->flags & QIB_PRESENT)) + return 0ULL; + return readq(&ppd->cpspec->kpregbase[regno]); +} + +static inline void qib_write_kreg_port(const struct qib_pportdata *ppd, + const u16 regno, u64 value) +{ + if (ppd->cpspec && ppd->dd && ppd->cpspec->kpregbase && + (ppd->dd->flags & QIB_PRESENT)) + writeq(value, &ppd->cpspec->kpregbase[regno]); +} + +/** + * qib_write_kreg_ctxt - write a device's per-ctxt 64-bit kernel register + * @dd: the qlogic_ib device + * @regno: the register number to write + * @ctxt: the context containing the register + * @value: the value to write + */ +static inline void qib_write_kreg_ctxt(const struct qib_devdata *dd, + const u16 regno, unsigned ctxt, + u64 value) +{ + qib_write_kreg(dd, regno + ctxt, value); +} + +static inline u64 read_7322_creg(const struct qib_devdata *dd, u16 regno) +{ + if (!dd->cspec->cregbase || !(dd->flags & QIB_PRESENT)) + return 0; + return readq(&dd->cspec->cregbase[regno]); + + +} + +static inline u32 read_7322_creg32(const struct qib_devdata *dd, u16 regno) +{ + if (!dd->cspec->cregbase || !(dd->flags & QIB_PRESENT)) + return 0; + return readl(&dd->cspec->cregbase[regno]); + + +} + +static inline void write_7322_creg_port(const struct qib_pportdata *ppd, + u16 regno, u64 value) +{ + if (ppd->cpspec && ppd->cpspec->cpregbase && + (ppd->dd->flags & QIB_PRESENT)) + writeq(value, &ppd->cpspec->cpregbase[regno]); +} + +static inline u64 read_7322_creg_port(const struct qib_pportdata *ppd, + u16 regno) +{ + if (!ppd->cpspec || !ppd->cpspec->cpregbase || + !(ppd->dd->flags & QIB_PRESENT)) + return 0; + return readq(&ppd->cpspec->cpregbase[regno]); +} + +static inline u32 read_7322_creg32_port(const struct qib_pportdata *ppd, + u16 regno) +{ + if (!ppd->cpspec || !ppd->cpspec->cpregbase || + !(ppd->dd->flags & QIB_PRESENT)) + return 0; + return readl(&ppd->cpspec->cpregbase[regno]); +} + +/* bits in Control register */ +#define QLOGIC_IB_C_RESET SYM_MASK(Control, SyncReset) +#define QLOGIC_IB_C_SDMAFETCHPRIOEN SYM_MASK(Control, SDmaDescFetchPriorityEn) + +/* bits in general interrupt regs */ +#define QIB_I_RCVURG_LSB SYM_LSB(IntMask, RcvUrg0IntMask) +#define QIB_I_RCVURG_RMASK MASK_ACROSS(0, 17) +#define QIB_I_RCVURG_MASK (QIB_I_RCVURG_RMASK << QIB_I_RCVURG_LSB) +#define QIB_I_RCVAVAIL_LSB SYM_LSB(IntMask, RcvAvail0IntMask) +#define QIB_I_RCVAVAIL_RMASK MASK_ACROSS(0, 17) +#define QIB_I_RCVAVAIL_MASK (QIB_I_RCVAVAIL_RMASK << QIB_I_RCVAVAIL_LSB) +#define QIB_I_C_ERROR INT_MASK(Err) + +#define QIB_I_SPIOSENT (INT_MASK_P(SendDone, 0) | INT_MASK_P(SendDone, 1)) +#define QIB_I_SPIOBUFAVAIL INT_MASK(SendBufAvail) +#define QIB_I_GPIO INT_MASK(AssertGPIO) +#define QIB_I_P_SDMAINT(pidx) \ + (INT_MASK_P(SDma, pidx) | INT_MASK_P(SDmaIdle, pidx) | \ + INT_MASK_P(SDmaProgress, pidx) | \ + INT_MASK_PM(SDmaCleanupDone, pidx)) + +/* Interrupt bits that are "per port" */ +#define QIB_I_P_BITSEXTANT(pidx) \ + (INT_MASK_P(Err, pidx) | INT_MASK_P(SendDone, pidx) | \ + INT_MASK_P(SDma, pidx) | INT_MASK_P(SDmaIdle, pidx) | \ + INT_MASK_P(SDmaProgress, pidx) | \ + INT_MASK_PM(SDmaCleanupDone, pidx)) + +/* Interrupt bits that are common to a device */ +/* currently unused: QIB_I_SPIOSENT */ +#define QIB_I_C_BITSEXTANT \ + (QIB_I_RCVURG_MASK | QIB_I_RCVAVAIL_MASK | \ + QIB_I_SPIOSENT | \ + QIB_I_C_ERROR | QIB_I_SPIOBUFAVAIL | QIB_I_GPIO) + +#define QIB_I_BITSEXTANT (QIB_I_C_BITSEXTANT | \ + QIB_I_P_BITSEXTANT(0) | QIB_I_P_BITSEXTANT(1)) + +/* + * Error bits that are "per port". + */ +#define QIB_E_P_IBSTATUSCHANGED ERR_MASK_N(IBStatusChanged) +#define QIB_E_P_SHDR ERR_MASK_N(SHeadersErr) +#define QIB_E_P_VL15_BUF_MISUSE ERR_MASK_N(VL15BufMisuseErr) +#define QIB_E_P_SND_BUF_MISUSE ERR_MASK_N(SendBufMisuseErr) +#define QIB_E_P_SUNSUPVL ERR_MASK_N(SendUnsupportedVLErr) +#define QIB_E_P_SUNEXP_PKTNUM ERR_MASK_N(SendUnexpectedPktNumErr) +#define QIB_E_P_SDROP_DATA ERR_MASK_N(SendDroppedDataPktErr) +#define QIB_E_P_SDROP_SMP ERR_MASK_N(SendDroppedSmpPktErr) +#define QIB_E_P_SPKTLEN ERR_MASK_N(SendPktLenErr) +#define QIB_E_P_SUNDERRUN ERR_MASK_N(SendUnderRunErr) +#define QIB_E_P_SMAXPKTLEN ERR_MASK_N(SendMaxPktLenErr) +#define QIB_E_P_SMINPKTLEN ERR_MASK_N(SendMinPktLenErr) +#define QIB_E_P_RIBLOSTLINK ERR_MASK_N(RcvIBLostLinkErr) +#define QIB_E_P_RHDR ERR_MASK_N(RcvHdrErr) +#define QIB_E_P_RHDRLEN ERR_MASK_N(RcvHdrLenErr) +#define QIB_E_P_RBADTID ERR_MASK_N(RcvBadTidErr) +#define QIB_E_P_RBADVERSION ERR_MASK_N(RcvBadVersionErr) +#define QIB_E_P_RIBFLOW ERR_MASK_N(RcvIBFlowErr) +#define QIB_E_P_REBP ERR_MASK_N(RcvEBPErr) +#define QIB_E_P_RUNSUPVL ERR_MASK_N(RcvUnsupportedVLErr) +#define QIB_E_P_RUNEXPCHAR ERR_MASK_N(RcvUnexpectedCharErr) +#define QIB_E_P_RSHORTPKTLEN ERR_MASK_N(RcvShortPktLenErr) +#define QIB_E_P_RLONGPKTLEN ERR_MASK_N(RcvLongPktLenErr) +#define QIB_E_P_RMAXPKTLEN ERR_MASK_N(RcvMaxPktLenErr) +#define QIB_E_P_RMINPKTLEN ERR_MASK_N(RcvMinPktLenErr) +#define QIB_E_P_RICRC ERR_MASK_N(RcvICRCErr) +#define QIB_E_P_RVCRC ERR_MASK_N(RcvVCRCErr) +#define QIB_E_P_RFORMATERR ERR_MASK_N(RcvFormatErr) + +#define QIB_E_P_SDMA1STDESC ERR_MASK_N(SDma1stDescErr) +#define QIB_E_P_SDMABASE ERR_MASK_N(SDmaBaseErr) +#define QIB_E_P_SDMADESCADDRMISALIGN ERR_MASK_N(SDmaDescAddrMisalignErr) +#define QIB_E_P_SDMADWEN ERR_MASK_N(SDmaDwEnErr) +#define QIB_E_P_SDMAGENMISMATCH ERR_MASK_N(SDmaGenMismatchErr) +#define QIB_E_P_SDMAHALT ERR_MASK_N(SDmaHaltErr) +#define QIB_E_P_SDMAMISSINGDW ERR_MASK_N(SDmaMissingDwErr) +#define QIB_E_P_SDMAOUTOFBOUND ERR_MASK_N(SDmaOutOfBoundErr) +#define QIB_E_P_SDMARPYTAG ERR_MASK_N(SDmaRpyTagErr) +#define QIB_E_P_SDMATAILOUTOFBOUND ERR_MASK_N(SDmaTailOutOfBoundErr) +#define QIB_E_P_SDMAUNEXPDATA ERR_MASK_N(SDmaUnexpDataErr) + +/* Error bits that are common to a device */ +#define QIB_E_RESET ERR_MASK(ResetNegated) +#define QIB_E_HARDWARE ERR_MASK(HardwareErr) +#define QIB_E_INVALIDADDR ERR_MASK(InvalidAddrErr) + + +/* + * Per chip (rather than per-port) errors. Most either do + * nothing but trigger a print (because they self-recover, or + * always occur in tandem with other errors that handle the + * issue), or because they indicate errors with no recovery, + * but we want to know that they happened. + */ +#define QIB_E_SBUF_VL15_MISUSE ERR_MASK(SBufVL15MisUseErr) +#define QIB_E_BADEEP ERR_MASK(InvalidEEPCmd) +#define QIB_E_VLMISMATCH ERR_MASK(SendVLMismatchErr) +#define QIB_E_ARMLAUNCH ERR_MASK(SendArmLaunchErr) +#define QIB_E_SPCLTRIG ERR_MASK(SendSpecialTriggerErr) +#define QIB_E_RRCVHDRFULL ERR_MASK(RcvHdrFullErr) +#define QIB_E_RRCVEGRFULL ERR_MASK(RcvEgrFullErr) +#define QIB_E_RCVCTXTSHARE ERR_MASK(RcvContextShareErr) + +/* SDMA chip errors (not per port) + * QIB_E_SDMA_BUF_DUP needs no special handling, because we will also get + * the SDMAHALT error immediately, so we just print the dup error via the + * E_AUTO mechanism. This is true of most of the per-port fatal errors + * as well, but since this is port-independent, by definition, it's + * handled a bit differently. SDMA_VL15 and SDMA_WRONG_PORT are per + * packet send errors, and so are handled in the same manner as other + * per-packet errors. + */ +#define QIB_E_SDMA_VL15 ERR_MASK(SDmaVL15Err) +#define QIB_E_SDMA_WRONG_PORT ERR_MASK(SDmaWrongPortErr) +#define QIB_E_SDMA_BUF_DUP ERR_MASK(SDmaBufMaskDuplicateErr) + +/* + * Below functionally equivalent to legacy QLOGIC_IB_E_PKTERRS + * it is used to print "common" packet errors. + */ +#define QIB_E_P_PKTERRS (QIB_E_P_SPKTLEN |\ + QIB_E_P_SDROP_DATA | QIB_E_P_RVCRC |\ + QIB_E_P_RICRC | QIB_E_P_RSHORTPKTLEN |\ + QIB_E_P_VL15_BUF_MISUSE | QIB_E_P_SHDR | \ + QIB_E_P_REBP) + +/* Error Bits that Packet-related (Receive, per-port) */ +#define QIB_E_P_RPKTERRS (\ + QIB_E_P_RHDRLEN | QIB_E_P_RBADTID | \ + QIB_E_P_RBADVERSION | QIB_E_P_RHDR | \ + QIB_E_P_RLONGPKTLEN | QIB_E_P_RSHORTPKTLEN |\ + QIB_E_P_RMAXPKTLEN | QIB_E_P_RMINPKTLEN | \ + QIB_E_P_RFORMATERR | QIB_E_P_RUNSUPVL | \ + QIB_E_P_RUNEXPCHAR | QIB_E_P_RIBFLOW | QIB_E_P_REBP) + +/* + * Error bits that are Send-related (per port) + * (ARMLAUNCH excluded from E_SPKTERRS because it gets special handling). + * All of these potentially need to have a buffer disarmed + */ +#define QIB_E_P_SPKTERRS (\ + QIB_E_P_SUNEXP_PKTNUM |\ + QIB_E_P_SDROP_DATA | QIB_E_P_SDROP_SMP |\ + QIB_E_P_SMAXPKTLEN |\ + QIB_E_P_VL15_BUF_MISUSE | QIB_E_P_SHDR | \ + QIB_E_P_SMINPKTLEN | QIB_E_P_SPKTLEN | \ + QIB_E_P_SND_BUF_MISUSE | QIB_E_P_SUNSUPVL) + +#define QIB_E_SPKTERRS ( \ + QIB_E_SBUF_VL15_MISUSE | QIB_E_VLMISMATCH | \ + ERR_MASK_N(SendUnsupportedVLErr) | \ + QIB_E_SPCLTRIG | QIB_E_SDMA_VL15 | QIB_E_SDMA_WRONG_PORT) + +#define QIB_E_P_SDMAERRS ( \ + QIB_E_P_SDMAHALT | \ + QIB_E_P_SDMADESCADDRMISALIGN | \ + QIB_E_P_SDMAUNEXPDATA | \ + QIB_E_P_SDMAMISSINGDW | \ + QIB_E_P_SDMADWEN | \ + QIB_E_P_SDMARPYTAG | \ + QIB_E_P_SDMA1STDESC | \ + QIB_E_P_SDMABASE | \ + QIB_E_P_SDMATAILOUTOFBOUND | \ + QIB_E_P_SDMAOUTOFBOUND | \ + QIB_E_P_SDMAGENMISMATCH) + +/* + * This sets some bits more than once, but makes it more obvious which + * bits are not handled under other categories, and the repeat definition + * is not a problem. + */ +#define QIB_E_P_BITSEXTANT ( \ + QIB_E_P_SPKTERRS | QIB_E_P_PKTERRS | QIB_E_P_RPKTERRS | \ + QIB_E_P_RIBLOSTLINK | QIB_E_P_IBSTATUSCHANGED | \ + QIB_E_P_SND_BUF_MISUSE | QIB_E_P_SUNDERRUN | \ + QIB_E_P_SHDR | QIB_E_P_VL15_BUF_MISUSE | QIB_E_P_SDMAERRS \ + ) + +/* + * These are errors that can occur when the link + * changes state while a packet is being sent or received. This doesn't + * cover things like EBP or VCRC that can be the result of a sending + * having the link change state, so we receive a "known bad" packet. + * All of these are "per port", so renamed: + */ +#define QIB_E_P_LINK_PKTERRS (\ + QIB_E_P_SDROP_DATA | QIB_E_P_SDROP_SMP |\ + QIB_E_P_SMINPKTLEN | QIB_E_P_SPKTLEN |\ + QIB_E_P_RSHORTPKTLEN | QIB_E_P_RMINPKTLEN |\ + QIB_E_P_RUNEXPCHAR) + +/* + * This sets some bits more than once, but makes it more obvious which + * bits are not handled under other categories (such as QIB_E_SPKTERRS), + * and the repeat definition is not a problem. + */ +#define QIB_E_C_BITSEXTANT (\ + QIB_E_HARDWARE | QIB_E_INVALIDADDR | QIB_E_BADEEP |\ + QIB_E_ARMLAUNCH | QIB_E_VLMISMATCH | QIB_E_RRCVHDRFULL |\ + QIB_E_RRCVEGRFULL | QIB_E_RESET | QIB_E_SBUF_VL15_MISUSE) + +/* Likewise Neuter E_SPKT_ERRS_IGNORE */ +#define E_SPKT_ERRS_IGNORE 0 + +#define QIB_EXTS_MEMBIST_DISABLED \ + SYM_MASK(EXTStatus, MemBISTDisabled) +#define QIB_EXTS_MEMBIST_ENDTEST \ + SYM_MASK(EXTStatus, MemBISTEndTest) + +#define QIB_E_SPIOARMLAUNCH \ + ERR_MASK(SendArmLaunchErr) + +#define IBA7322_IBCC_LINKINITCMD_MASK SYM_RMASK(IBCCtrlA_0, LinkInitCmd) +#define IBA7322_IBCC_LINKCMD_SHIFT SYM_LSB(IBCCtrlA_0, LinkCmd) + +/* + * IBTA_1_2 is set when multiple speeds are enabled (normal), + * and also if forced QDR (only QDR enabled). It's enabled for the + * forced QDR case so that scrambling will be enabled by the TS3 + * exchange, when supported by both sides of the link. + */ +#define IBA7322_IBC_IBTA_1_2_MASK SYM_MASK(IBCCtrlB_0, IB_ENHANCED_MODE) +#define IBA7322_IBC_MAX_SPEED_MASK SYM_MASK(IBCCtrlB_0, SD_SPEED) +#define IBA7322_IBC_SPEED_QDR SYM_MASK(IBCCtrlB_0, SD_SPEED_QDR) +#define IBA7322_IBC_SPEED_DDR SYM_MASK(IBCCtrlB_0, SD_SPEED_DDR) +#define IBA7322_IBC_SPEED_SDR SYM_MASK(IBCCtrlB_0, SD_SPEED_SDR) +#define IBA7322_IBC_SPEED_MASK (SYM_MASK(IBCCtrlB_0, SD_SPEED_SDR) | \ + SYM_MASK(IBCCtrlB_0, SD_SPEED_DDR) | SYM_MASK(IBCCtrlB_0, SD_SPEED_QDR)) +#define IBA7322_IBC_SPEED_LSB SYM_LSB(IBCCtrlB_0, SD_SPEED_SDR) + +#define IBA7322_LEDBLINK_OFF_SHIFT SYM_LSB(RcvPktLEDCnt_0, OFFperiod) +#define IBA7322_LEDBLINK_ON_SHIFT SYM_LSB(RcvPktLEDCnt_0, ONperiod) + +#define IBA7322_IBC_WIDTH_AUTONEG SYM_MASK(IBCCtrlB_0, IB_NUM_CHANNELS) +#define IBA7322_IBC_WIDTH_4X_ONLY (1<<SYM_LSB(IBCCtrlB_0, IB_NUM_CHANNELS)) +#define IBA7322_IBC_WIDTH_1X_ONLY (0<<SYM_LSB(IBCCtrlB_0, IB_NUM_CHANNELS)) + +#define IBA7322_IBC_RXPOL_MASK SYM_MASK(IBCCtrlB_0, IB_POLARITY_REV_SUPP) +#define IBA7322_IBC_RXPOL_LSB SYM_LSB(IBCCtrlB_0, IB_POLARITY_REV_SUPP) +#define IBA7322_IBC_HRTBT_MASK (SYM_MASK(IBCCtrlB_0, HRTBT_AUTO) | \ + SYM_MASK(IBCCtrlB_0, HRTBT_ENB)) +#define IBA7322_IBC_HRTBT_RMASK (IBA7322_IBC_HRTBT_MASK >> \ + SYM_LSB(IBCCtrlB_0, HRTBT_ENB)) +#define IBA7322_IBC_HRTBT_LSB SYM_LSB(IBCCtrlB_0, HRTBT_ENB) + +#define IBA7322_REDIRECT_VEC_PER_REG 12 + +#define IBA7322_SENDCHK_PKEY SYM_MASK(SendCheckControl_0, PKey_En) +#define IBA7322_SENDCHK_BTHQP SYM_MASK(SendCheckControl_0, BTHQP_En) +#define IBA7322_SENDCHK_SLID SYM_MASK(SendCheckControl_0, SLID_En) +#define IBA7322_SENDCHK_RAW_IPV6 SYM_MASK(SendCheckControl_0, RawIPV6_En) +#define IBA7322_SENDCHK_MINSZ SYM_MASK(SendCheckControl_0, PacketTooSmall_En) + +#define AUTONEG_TRIES 3 /* sequential retries to negotiate DDR */ + +#define HWE_AUTO(fldname) { .mask = SYM_MASK(HwErrMask, fldname##Mask), \ + .msg = #fldname , .sz = sizeof(#fldname) } +#define HWE_AUTO_P(fldname, port) { .mask = SYM_MASK(HwErrMask, \ + fldname##Mask##_##port), .msg = #fldname , .sz = sizeof(#fldname) } +static const struct qib_hwerror_msgs qib_7322_hwerror_msgs[] = { + HWE_AUTO_P(IBSerdesPClkNotDetect, 1), + HWE_AUTO_P(IBSerdesPClkNotDetect, 0), + HWE_AUTO(PCIESerdesPClkNotDetect), + HWE_AUTO(PowerOnBISTFailed), + HWE_AUTO(TempsenseTholdReached), + HWE_AUTO(MemoryErr), + HWE_AUTO(PCIeBusParityErr), + HWE_AUTO(PcieCplTimeout), + HWE_AUTO(PciePoisonedTLP), + HWE_AUTO_P(SDmaMemReadErr, 1), + HWE_AUTO_P(SDmaMemReadErr, 0), + HWE_AUTO_P(IBCBusFromSPCParityErr, 1), + HWE_AUTO_P(IBCBusToSPCParityErr, 1), + HWE_AUTO_P(IBCBusFromSPCParityErr, 0), + HWE_AUTO(statusValidNoEop), + HWE_AUTO(LATriggered), + { .mask = 0, .sz = 0 } +}; + +#define E_AUTO(fldname) { .mask = SYM_MASK(ErrMask, fldname##Mask), \ + .msg = #fldname, .sz = sizeof(#fldname) } +#define E_P_AUTO(fldname) { .mask = SYM_MASK(ErrMask_0, fldname##Mask), \ + .msg = #fldname, .sz = sizeof(#fldname) } +static const struct qib_hwerror_msgs qib_7322error_msgs[] = { + E_AUTO(RcvEgrFullErr), + E_AUTO(RcvHdrFullErr), + E_AUTO(ResetNegated), + E_AUTO(HardwareErr), + E_AUTO(InvalidAddrErr), + E_AUTO(SDmaVL15Err), + E_AUTO(SBufVL15MisUseErr), + E_AUTO(InvalidEEPCmd), + E_AUTO(RcvContextShareErr), + E_AUTO(SendVLMismatchErr), + E_AUTO(SendArmLaunchErr), + E_AUTO(SendSpecialTriggerErr), + E_AUTO(SDmaWrongPortErr), + E_AUTO(SDmaBufMaskDuplicateErr), + { .mask = 0, .sz = 0 } +}; + +static const struct qib_hwerror_msgs qib_7322p_error_msgs[] = { + E_P_AUTO(IBStatusChanged), + E_P_AUTO(SHeadersErr), + E_P_AUTO(VL15BufMisuseErr), + /* + * SDmaHaltErr is not really an error, make it clearer; + */ + {.mask = SYM_MASK(ErrMask_0, SDmaHaltErrMask), .msg = "SDmaHalted", + .sz = 11}, + E_P_AUTO(SDmaDescAddrMisalignErr), + E_P_AUTO(SDmaUnexpDataErr), + E_P_AUTO(SDmaMissingDwErr), + E_P_AUTO(SDmaDwEnErr), + E_P_AUTO(SDmaRpyTagErr), + E_P_AUTO(SDma1stDescErr), + E_P_AUTO(SDmaBaseErr), + E_P_AUTO(SDmaTailOutOfBoundErr), + E_P_AUTO(SDmaOutOfBoundErr), + E_P_AUTO(SDmaGenMismatchErr), + E_P_AUTO(SendBufMisuseErr), + E_P_AUTO(SendUnsupportedVLErr), + E_P_AUTO(SendUnexpectedPktNumErr), + E_P_AUTO(SendDroppedDataPktErr), + E_P_AUTO(SendDroppedSmpPktErr), + E_P_AUTO(SendPktLenErr), + E_P_AUTO(SendUnderRunErr), + E_P_AUTO(SendMaxPktLenErr), + E_P_AUTO(SendMinPktLenErr), + E_P_AUTO(RcvIBLostLinkErr), + E_P_AUTO(RcvHdrErr), + E_P_AUTO(RcvHdrLenErr), + E_P_AUTO(RcvBadTidErr), + E_P_AUTO(RcvBadVersionErr), + E_P_AUTO(RcvIBFlowErr), + E_P_AUTO(RcvEBPErr), + E_P_AUTO(RcvUnsupportedVLErr), + E_P_AUTO(RcvUnexpectedCharErr), + E_P_AUTO(RcvShortPktLenErr), + E_P_AUTO(RcvLongPktLenErr), + E_P_AUTO(RcvMaxPktLenErr), + E_P_AUTO(RcvMinPktLenErr), + E_P_AUTO(RcvICRCErr), + E_P_AUTO(RcvVCRCErr), + E_P_AUTO(RcvFormatErr), + { .mask = 0, .sz = 0 } +}; + +/* + * Below generates "auto-message" for interrupts not specific to any port or + * context + */ +#define INTR_AUTO(fldname) { .mask = SYM_MASK(IntMask, fldname##Mask), \ + .msg = #fldname, .sz = sizeof(#fldname) } +/* Below generates "auto-message" for interrupts specific to a port */ +#define INTR_AUTO_P(fldname) { .mask = MASK_ACROSS(\ + SYM_LSB(IntMask, fldname##Mask##_0), \ + SYM_LSB(IntMask, fldname##Mask##_1)), \ + .msg = #fldname "_P", .sz = sizeof(#fldname "_P") } +/* For some reason, the SerDesTrimDone bits are reversed */ +#define INTR_AUTO_PI(fldname) { .mask = MASK_ACROSS(\ + SYM_LSB(IntMask, fldname##Mask##_1), \ + SYM_LSB(IntMask, fldname##Mask##_0)), \ + .msg = #fldname "_P", .sz = sizeof(#fldname "_P") } +/* + * Below generates "auto-message" for interrupts specific to a context, + * with ctxt-number appended + */ +#define INTR_AUTO_C(fldname) { .mask = MASK_ACROSS(\ + SYM_LSB(IntMask, fldname##0IntMask), \ + SYM_LSB(IntMask, fldname##17IntMask)), \ + .msg = #fldname "_C", .sz = sizeof(#fldname "_C") } + +static const struct qib_hwerror_msgs qib_7322_intr_msgs[] = { + INTR_AUTO_P(SDmaInt), + INTR_AUTO_P(SDmaProgressInt), + INTR_AUTO_P(SDmaIdleInt), + INTR_AUTO_P(SDmaCleanupDone), + INTR_AUTO_C(RcvUrg), + INTR_AUTO_P(ErrInt), + INTR_AUTO(ErrInt), /* non-port-specific errs */ + INTR_AUTO(AssertGPIOInt), + INTR_AUTO_P(SendDoneInt), + INTR_AUTO(SendBufAvailInt), + INTR_AUTO_C(RcvAvail), + { .mask = 0, .sz = 0 } +}; + +#define TXSYMPTOM_AUTO_P(fldname) \ + { .mask = SYM_MASK(SendHdrErrSymptom_0, fldname), \ + .msg = #fldname, .sz = sizeof(#fldname) } +static const struct qib_hwerror_msgs hdrchk_msgs[] = { + TXSYMPTOM_AUTO_P(NonKeyPacket), + TXSYMPTOM_AUTO_P(GRHFail), + TXSYMPTOM_AUTO_P(PkeyFail), + TXSYMPTOM_AUTO_P(QPFail), + TXSYMPTOM_AUTO_P(SLIDFail), + TXSYMPTOM_AUTO_P(RawIPV6), + TXSYMPTOM_AUTO_P(PacketTooSmall), + { .mask = 0, .sz = 0 } +}; + +#define IBA7322_HDRHEAD_PKTINT_SHIFT 32 /* interrupt cnt in upper 32 bits */ + +/* + * Called when we might have an error that is specific to a particular + * PIO buffer, and may need to cancel that buffer, so it can be re-used, + * because we don't need to force the update of pioavail + */ +static void qib_disarm_7322_senderrbufs(struct qib_pportdata *ppd) +{ + struct qib_devdata *dd = ppd->dd; + u32 i; + int any; + u32 piobcnt = dd->piobcnt2k + dd->piobcnt4k + NUM_VL15_BUFS; + u32 regcnt = (piobcnt + BITS_PER_LONG - 1) / BITS_PER_LONG; + unsigned long sbuf[4]; + + /* + * It's possible that sendbuffererror could have bits set; might + * have already done this as a result of hardware error handling. + */ + any = 0; + for (i = 0; i < regcnt; ++i) { + sbuf[i] = qib_read_kreg64(dd, kr_sendbuffererror + i); + if (sbuf[i]) { + any = 1; + qib_write_kreg(dd, kr_sendbuffererror + i, sbuf[i]); + } + } + + if (any) + qib_disarm_piobufs_set(dd, sbuf, piobcnt); +} + +/* No txe_recover yet, if ever */ + +/* No decode__errors yet */ +static void err_decode(char *msg, size_t len, u64 errs, + const struct qib_hwerror_msgs *msp) +{ + u64 these, lmask; + int took, multi, n = 0; + + while (errs && msp && msp->mask) { + multi = (msp->mask & (msp->mask - 1)); + while (errs & msp->mask) { + these = (errs & msp->mask); + lmask = (these & (these - 1)) ^ these; + if (len) { + if (n++) { + /* separate the strings */ + *msg++ = ','; + len--; + } + BUG_ON(!msp->sz); + /* msp->sz counts the nul */ + took = min_t(size_t, msp->sz - (size_t)1, len); + memcpy(msg, msp->msg, took); + len -= took; + msg += took; + if (len) + *msg = '\0'; + } + errs &= ~lmask; + if (len && multi) { + /* More than one bit this mask */ + int idx = -1; + + while (lmask & msp->mask) { + ++idx; + lmask >>= 1; + } + took = scnprintf(msg, len, "_%d", idx); + len -= took; + msg += took; + } + } + ++msp; + } + /* If some bits are left, show in hex. */ + if (len && errs) + snprintf(msg, len, "%sMORE:%llX", n ? "," : "", + (unsigned long long) errs); +} + +/* only called if r1 set */ +static void flush_fifo(struct qib_pportdata *ppd) +{ + struct qib_devdata *dd = ppd->dd; + u32 __iomem *piobuf; + u32 bufn; + u32 *hdr; + u64 pbc; + const unsigned hdrwords = 7; + static struct qib_ib_header ibhdr = { + .lrh[0] = cpu_to_be16(0xF000 | QIB_LRH_BTH), + .lrh[1] = IB_LID_PERMISSIVE, + .lrh[2] = cpu_to_be16(hdrwords + SIZE_OF_CRC), + .lrh[3] = IB_LID_PERMISSIVE, + .u.oth.bth[0] = cpu_to_be32( + (IB_OPCODE_UD_SEND_ONLY << 24) | QIB_DEFAULT_P_KEY), + .u.oth.bth[1] = cpu_to_be32(0), + .u.oth.bth[2] = cpu_to_be32(0), + .u.oth.u.ud.deth[0] = cpu_to_be32(0), + .u.oth.u.ud.deth[1] = cpu_to_be32(0), + }; + + /* + * Send a dummy VL15 packet to flush the launch FIFO. + * This will not actually be sent since the TxeBypassIbc bit is set. + */ + pbc = PBC_7322_VL15_SEND | + (((u64)ppd->hw_pidx) << (PBC_PORT_SEL_LSB + 32)) | + (hdrwords + SIZE_OF_CRC); + piobuf = qib_7322_getsendbuf(ppd, pbc, &bufn); + if (!piobuf) + return; + writeq(pbc, piobuf); + hdr = (u32 *) &ibhdr; + if (dd->flags & QIB_PIO_FLUSH_WC) { + qib_flush_wc(); + qib_pio_copy(piobuf + 2, hdr, hdrwords - 1); + qib_flush_wc(); + __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords + 1); + qib_flush_wc(); + } else + qib_pio_copy(piobuf + 2, hdr, hdrwords); + qib_sendbuf_done(dd, bufn); +} + +/* + * This is called with interrupts disabled and sdma_lock held. + */ +static void qib_7322_sdma_sendctrl(struct qib_pportdata *ppd, unsigned op) +{ + struct qib_devdata *dd = ppd->dd; + u64 set_sendctrl = 0; + u64 clr_sendctrl = 0; + + if (op & QIB_SDMA_SENDCTRL_OP_ENABLE) + set_sendctrl |= SYM_MASK(SendCtrl_0, SDmaEnable); + else + clr_sendctrl |= SYM_MASK(SendCtrl_0, SDmaEnable); + + if (op & QIB_SDMA_SENDCTRL_OP_INTENABLE) + set_sendctrl |= SYM_MASK(SendCtrl_0, SDmaIntEnable); + else + clr_sendctrl |= SYM_MASK(SendCtrl_0, SDmaIntEnable); + + if (op & QIB_SDMA_SENDCTRL_OP_HALT) + set_sendctrl |= SYM_MASK(SendCtrl_0, SDmaHalt); + else + clr_sendctrl |= SYM_MASK(SendCtrl_0, SDmaHalt); + + if (op & QIB_SDMA_SENDCTRL_OP_DRAIN) + set_sendctrl |= SYM_MASK(SendCtrl_0, TxeBypassIbc) | + SYM_MASK(SendCtrl_0, TxeAbortIbc) | + SYM_MASK(SendCtrl_0, TxeDrainRmFifo); + else + clr_sendctrl |= SYM_MASK(SendCtrl_0, TxeBypassIbc) | + SYM_MASK(SendCtrl_0, TxeAbortIbc) | + SYM_MASK(SendCtrl_0, TxeDrainRmFifo); + + spin_lock(&dd->sendctrl_lock); + + /* If we are draining everything, block sends first */ + if (op & QIB_SDMA_SENDCTRL_OP_DRAIN) { + ppd->p_sendctrl &= ~SYM_MASK(SendCtrl_0, SendEnable); + qib_write_kreg_port(ppd, krp_sendctrl, ppd->p_sendctrl); + qib_write_kreg(dd, kr_scratch, 0); + } + + ppd->p_sendctrl |= set_sendctrl; + ppd->p_sendctrl &= ~clr_sendctrl; + + if (op & QIB_SDMA_SENDCTRL_OP_CLEANUP) + qib_write_kreg_port(ppd, krp_sendctrl, + ppd->p_sendctrl | + SYM_MASK(SendCtrl_0, SDmaCleanup)); + else + qib_write_kreg_port(ppd, krp_sendctrl, ppd->p_sendctrl); + qib_write_kreg(dd, kr_scratch, 0); + + if (op & QIB_SDMA_SENDCTRL_OP_DRAIN) { + ppd->p_sendctrl |= SYM_MASK(SendCtrl_0, SendEnable); + qib_write_kreg_port(ppd, krp_sendctrl, ppd->p_sendctrl); + qib_write_kreg(dd, kr_scratch, 0); + } + + spin_unlock(&dd->sendctrl_lock); + + if ((op & QIB_SDMA_SENDCTRL_OP_DRAIN) && ppd->dd->cspec->r1) + flush_fifo(ppd); +} + +static void qib_7322_sdma_hw_clean_up(struct qib_pportdata *ppd) +{ + __qib_sdma_process_event(ppd, qib_sdma_event_e50_hw_cleaned); +} + +static void qib_sdma_7322_setlengen(struct qib_pportdata *ppd) +{ + /* + * Set SendDmaLenGen and clear and set + * the MSB of the generation count to enable generation checking + * and load the internal generation counter. + */ + qib_write_kreg_port(ppd, krp_senddmalengen, ppd->sdma_descq_cnt); + qib_write_kreg_port(ppd, krp_senddmalengen, + ppd->sdma_descq_cnt | + (1ULL << QIB_7322_SendDmaLenGen_0_Generation_MSB)); +} + +/* + * Must be called with sdma_lock held, or before init finished. + */ +static void qib_sdma_update_7322_tail(struct qib_pportdata *ppd, u16 tail) +{ + /* Commit writes to memory and advance the tail on the chip */ + wmb(); + ppd->sdma_descq_tail = tail; + qib_write_kreg_port(ppd, krp_senddmatail, tail); +} + +/* + * This is called with interrupts disabled and sdma_lock held. + */ +static void qib_7322_sdma_hw_start_up(struct qib_pportdata *ppd) +{ + /* + * Drain all FIFOs. + * The hardware doesn't require this but we do it so that verbs + * and user applications don't wait for link active to send stale + * data. + */ + sendctrl_7322_mod(ppd, QIB_SENDCTRL_FLUSH); + + qib_sdma_7322_setlengen(ppd); + qib_sdma_update_7322_tail(ppd, 0); /* Set SendDmaTail */ + ppd->sdma_head_dma[0] = 0; + qib_7322_sdma_sendctrl(ppd, + ppd->sdma_state.current_op | QIB_SDMA_SENDCTRL_OP_CLEANUP); +} + +#define DISABLES_SDMA ( \ + QIB_E_P_SDMAHALT | \ + QIB_E_P_SDMADESCADDRMISALIGN | \ + QIB_E_P_SDMAMISSINGDW | \ + QIB_E_P_SDMADWEN | \ + QIB_E_P_SDMARPYTAG | \ + QIB_E_P_SDMA1STDESC | \ + QIB_E_P_SDMABASE | \ + QIB_E_P_SDMATAILOUTOFBOUND | \ + QIB_E_P_SDMAOUTOFBOUND | \ + QIB_E_P_SDMAGENMISMATCH) + +static void sdma_7322_p_errors(struct qib_pportdata *ppd, u64 errs) +{ + unsigned long flags; + struct qib_devdata *dd = ppd->dd; + + errs &= QIB_E_P_SDMAERRS; + err_decode(ppd->cpspec->sdmamsgbuf, sizeof(ppd->cpspec->sdmamsgbuf), + errs, qib_7322p_error_msgs); + + if (errs & QIB_E_P_SDMAUNEXPDATA) + qib_dev_err(dd, "IB%u:%u SDmaUnexpData\n", dd->unit, + ppd->port); + + spin_lock_irqsave(&ppd->sdma_lock, flags); + + if (errs != QIB_E_P_SDMAHALT) { + /* SDMA errors have QIB_E_P_SDMAHALT and another bit set */ + qib_dev_porterr(dd, ppd->port, + "SDMA %s 0x%016llx %s\n", + qib_sdma_state_names[ppd->sdma_state.current_state], + errs, ppd->cpspec->sdmamsgbuf); + dump_sdma_7322_state(ppd); + } + + switch (ppd->sdma_state.current_state) { + case qib_sdma_state_s00_hw_down: + break; + + case qib_sdma_state_s10_hw_start_up_wait: + if (errs & QIB_E_P_SDMAHALT) + __qib_sdma_process_event(ppd, + qib_sdma_event_e20_hw_started); + break; + + case qib_sdma_state_s20_idle: + break; + + case qib_sdma_state_s30_sw_clean_up_wait: + break; + + case qib_sdma_state_s40_hw_clean_up_wait: + if (errs & QIB_E_P_SDMAHALT) + __qib_sdma_process_event(ppd, + qib_sdma_event_e50_hw_cleaned); + break; + + case qib_sdma_state_s50_hw_halt_wait: + if (errs & QIB_E_P_SDMAHALT) + __qib_sdma_process_event(ppd, + qib_sdma_event_e60_hw_halted); + break; + + case qib_sdma_state_s99_running: + __qib_sdma_process_event(ppd, qib_sdma_event_e7322_err_halted); + __qib_sdma_process_event(ppd, qib_sdma_event_e60_hw_halted); + break; + } + + spin_unlock_irqrestore(&ppd->sdma_lock, flags); +} + +/* + * handle per-device errors (not per-port errors) + */ +static noinline void handle_7322_errors(struct qib_devdata *dd) +{ + char *msg; + u64 iserr = 0; + u64 errs; + u64 mask; + int log_idx; + + qib_stats.sps_errints++; + errs = qib_read_kreg64(dd, kr_errstatus); + if (!errs) { + qib_devinfo(dd->pcidev, + "device error interrupt, but no error bits set!\n"); + goto done; + } + + /* don't report errors that are masked */ + errs &= dd->cspec->errormask; + msg = dd->cspec->emsgbuf; + + /* do these first, they are most important */ + if (errs & QIB_E_HARDWARE) { + *msg = '\0'; + qib_7322_handle_hwerrors(dd, msg, sizeof dd->cspec->emsgbuf); + } else + for (log_idx = 0; log_idx < QIB_EEP_LOG_CNT; ++log_idx) + if (errs & dd->eep_st_masks[log_idx].errs_to_log) + qib_inc_eeprom_err(dd, log_idx, 1); + + if (errs & QIB_E_SPKTERRS) { + qib_disarm_7322_senderrbufs(dd->pport); + qib_stats.sps_txerrs++; + } else if (errs & QIB_E_INVALIDADDR) + qib_stats.sps_txerrs++; + else if (errs & QIB_E_ARMLAUNCH) { + qib_stats.sps_txerrs++; + qib_disarm_7322_senderrbufs(dd->pport); + } + qib_write_kreg(dd, kr_errclear, errs); + + /* + * The ones we mask off are handled specially below + * or above. Also mask SDMADISABLED by default as it + * is too chatty. + */ + mask = QIB_E_HARDWARE; + *msg = '\0'; + + err_decode(msg, sizeof dd->cspec->emsgbuf, errs & ~mask, + qib_7322error_msgs); + + /* + * Getting reset is a tragedy for all ports. Mark the device + * _and_ the ports as "offline" in way meaningful to each. + */ + if (errs & QIB_E_RESET) { + int pidx; + + qib_dev_err(dd, + "Got reset, requires re-init (unload and reload driver)\n"); + dd->flags &= ~QIB_INITTED; /* needs re-init */ + /* mark as having had error */ + *dd->devstatusp |= QIB_STATUS_HWERROR; + for (pidx = 0; pidx < dd->num_pports; ++pidx) + if (dd->pport[pidx].link_speed_supported) + *dd->pport[pidx].statusp &= ~QIB_STATUS_IB_CONF; + } + + if (*msg && iserr) + qib_dev_err(dd, "%s error\n", msg); + + /* + * If there were hdrq or egrfull errors, wake up any processes + * waiting in poll. We used to try to check which contexts had + * the overflow, but given the cost of that and the chip reads + * to support it, it's better to just wake everybody up if we + * get an overflow; waiters can poll again if it's not them. + */ + if (errs & (ERR_MASK(RcvEgrFullErr) | ERR_MASK(RcvHdrFullErr))) { + qib_handle_urcv(dd, ~0U); + if (errs & ERR_MASK(RcvEgrFullErr)) + qib_stats.sps_buffull++; + else + qib_stats.sps_hdrfull++; + } + +done: + return; +} + +static void qib_error_tasklet(unsigned long data) +{ + struct qib_devdata *dd = (struct qib_devdata *)data; + + handle_7322_errors(dd); + qib_write_kreg(dd, kr_errmask, dd->cspec->errormask); +} + +static void reenable_chase(unsigned long opaque) +{ + struct qib_pportdata *ppd = (struct qib_pportdata *)opaque; + + ppd->cpspec->chase_timer.expires = 0; + qib_set_ib_7322_lstate(ppd, QLOGIC_IB_IBCC_LINKCMD_DOWN, + QLOGIC_IB_IBCC_LINKINITCMD_POLL); +} + +static void disable_chase(struct qib_pportdata *ppd, unsigned long tnow, + u8 ibclt) +{ + ppd->cpspec->chase_end = 0; + + if (!qib_chase) + return; + + qib_set_ib_7322_lstate(ppd, QLOGIC_IB_IBCC_LINKCMD_DOWN, + QLOGIC_IB_IBCC_LINKINITCMD_DISABLE); + ppd->cpspec->chase_timer.expires = jiffies + QIB_CHASE_DIS_TIME; + add_timer(&ppd->cpspec->chase_timer); +} + +static void handle_serdes_issues(struct qib_pportdata *ppd, u64 ibcst) +{ + u8 ibclt; + unsigned long tnow; + + ibclt = (u8)SYM_FIELD(ibcst, IBCStatusA_0, LinkTrainingState); + + /* + * Detect and handle the state chase issue, where we can + * get stuck if we are unlucky on timing on both sides of + * the link. If we are, we disable, set a timer, and + * then re-enable. + */ + switch (ibclt) { + case IB_7322_LT_STATE_CFGRCVFCFG: + case IB_7322_LT_STATE_CFGWAITRMT: + case IB_7322_LT_STATE_TXREVLANES: + case IB_7322_LT_STATE_CFGENH: + tnow = jiffies; + if (ppd->cpspec->chase_end && + time_after(tnow, ppd->cpspec->chase_end)) + disable_chase(ppd, tnow, ibclt); + else if (!ppd->cpspec->chase_end) + ppd->cpspec->chase_end = tnow + QIB_CHASE_TIME; + break; + default: + ppd->cpspec->chase_end = 0; + break; + } + + if (((ibclt >= IB_7322_LT_STATE_CFGTEST && + ibclt <= IB_7322_LT_STATE_CFGWAITENH) || + ibclt == IB_7322_LT_STATE_LINKUP) && + (ibcst & SYM_MASK(IBCStatusA_0, LinkSpeedQDR))) { + force_h1(ppd); + ppd->cpspec->qdr_reforce = 1; + if (!ppd->dd->cspec->r1) + serdes_7322_los_enable(ppd, 0); + } else if (ppd->cpspec->qdr_reforce && + (ibcst & SYM_MASK(IBCStatusA_0, LinkSpeedQDR)) && + (ibclt == IB_7322_LT_STATE_CFGENH || + ibclt == IB_7322_LT_STATE_CFGIDLE || + ibclt == IB_7322_LT_STATE_LINKUP)) + force_h1(ppd); + + if ((IS_QMH(ppd->dd) || IS_QME(ppd->dd)) && + ppd->link_speed_enabled == QIB_IB_QDR && + (ibclt == IB_7322_LT_STATE_CFGTEST || + ibclt == IB_7322_LT_STATE_CFGENH || + (ibclt >= IB_7322_LT_STATE_POLLACTIVE && + ibclt <= IB_7322_LT_STATE_SLEEPQUIET))) + adj_tx_serdes(ppd); + + if (ibclt != IB_7322_LT_STATE_LINKUP) { + u8 ltstate = qib_7322_phys_portstate(ibcst); + u8 pibclt = (u8)SYM_FIELD(ppd->lastibcstat, IBCStatusA_0, + LinkTrainingState); + if (!ppd->dd->cspec->r1 && + pibclt == IB_7322_LT_STATE_LINKUP && + ltstate != IB_PHYSPORTSTATE_LINK_ERR_RECOVER && + ltstate != IB_PHYSPORTSTATE_RECOVERY_RETRAIN && + ltstate != IB_PHYSPORTSTATE_RECOVERY_WAITRMT && + ltstate != IB_PHYSPORTSTATE_RECOVERY_IDLE) + /* If the link went down (but no into recovery, + * turn LOS back on */ + serdes_7322_los_enable(ppd, 1); + if (!ppd->cpspec->qdr_dfe_on && + ibclt <= IB_7322_LT_STATE_SLEEPQUIET) { + ppd->cpspec->qdr_dfe_on = 1; + ppd->cpspec->qdr_dfe_time = 0; + /* On link down, reenable QDR adaptation */ + qib_write_kreg_port(ppd, krp_static_adapt_dis(2), + ppd->dd->cspec->r1 ? + QDR_STATIC_ADAPT_DOWN_R1 : + QDR_STATIC_ADAPT_DOWN); + pr_info( + "IB%u:%u re-enabled QDR adaptation ibclt %x\n", + ppd->dd->unit, ppd->port, ibclt); + } + } +} + +static int qib_7322_set_ib_cfg(struct qib_pportdata *, int, u32); + +/* + * This is per-pport error handling. + * will likely get it's own MSIx interrupt (one for each port, + * although just a single handler). + */ +static noinline void handle_7322_p_errors(struct qib_pportdata *ppd) +{ + char *msg; + u64 ignore_this_time = 0, iserr = 0, errs, fmask; + struct qib_devdata *dd = ppd->dd; + + /* do this as soon as possible */ + fmask = qib_read_kreg64(dd, kr_act_fmask); + if (!fmask) + check_7322_rxe_status(ppd); + + errs = qib_read_kreg_port(ppd, krp_errstatus); + if (!errs) + qib_devinfo(dd->pcidev, + "Port%d error interrupt, but no error bits set!\n", + ppd->port); + if (!fmask) + errs &= ~QIB_E_P_IBSTATUSCHANGED; + if (!errs) + goto done; + + msg = ppd->cpspec->epmsgbuf; + *msg = '\0'; + + if (errs & ~QIB_E_P_BITSEXTANT) { + err_decode(msg, sizeof ppd->cpspec->epmsgbuf, + errs & ~QIB_E_P_BITSEXTANT, qib_7322p_error_msgs); + if (!*msg) + snprintf(msg, sizeof ppd->cpspec->epmsgbuf, + "no others"); + qib_dev_porterr(dd, ppd->port, + "error interrupt with unknown errors 0x%016Lx set (and %s)\n", + (errs & ~QIB_E_P_BITSEXTANT), msg); + *msg = '\0'; + } + + if (errs & QIB_E_P_SHDR) { + u64 symptom; + + /* determine cause, then write to clear */ + symptom = qib_read_kreg_port(ppd, krp_sendhdrsymptom); + qib_write_kreg_port(ppd, krp_sendhdrsymptom, 0); + err_decode(msg, sizeof ppd->cpspec->epmsgbuf, symptom, + hdrchk_msgs); + *msg = '\0'; + /* senderrbuf cleared in SPKTERRS below */ + } + + if (errs & QIB_E_P_SPKTERRS) { + if ((errs & QIB_E_P_LINK_PKTERRS) && + !(ppd->lflags & QIBL_LINKACTIVE)) { + /* + * This can happen when trying to bring the link + * up, but the IB link changes state at the "wrong" + * time. The IB logic then complains that the packet + * isn't valid. We don't want to confuse people, so + * we just don't print them, except at debug + */ + err_decode(msg, sizeof ppd->cpspec->epmsgbuf, + (errs & QIB_E_P_LINK_PKTERRS), + qib_7322p_error_msgs); + *msg = '\0'; + ignore_this_time = errs & QIB_E_P_LINK_PKTERRS; + } + qib_disarm_7322_senderrbufs(ppd); + } else if ((errs & QIB_E_P_LINK_PKTERRS) && + !(ppd->lflags & QIBL_LINKACTIVE)) { + /* + * This can happen when SMA is trying to bring the link + * up, but the IB link changes state at the "wrong" time. + * The IB logic then complains that the packet isn't + * valid. We don't want to confuse people, so we just + * don't print them, except at debug + */ + err_decode(msg, sizeof ppd->cpspec->epmsgbuf, errs, + qib_7322p_error_msgs); + ignore_this_time = errs & QIB_E_P_LINK_PKTERRS; + *msg = '\0'; + } + + qib_write_kreg_port(ppd, krp_errclear, errs); + + errs &= ~ignore_this_time; + if (!errs) + goto done; + + if (errs & QIB_E_P_RPKTERRS) + qib_stats.sps_rcverrs++; + if (errs & QIB_E_P_SPKTERRS) + qib_stats.sps_txerrs++; + + iserr = errs & ~(QIB_E_P_RPKTERRS | QIB_E_P_PKTERRS); + + if (errs & QIB_E_P_SDMAERRS) + sdma_7322_p_errors(ppd, errs); + + if (errs & QIB_E_P_IBSTATUSCHANGED) { + u64 ibcs; + u8 ltstate; + + ibcs = qib_read_kreg_port(ppd, krp_ibcstatus_a); + ltstate = qib_7322_phys_portstate(ibcs); + + if (!(ppd->lflags & QIBL_IB_AUTONEG_INPROG)) + handle_serdes_issues(ppd, ibcs); + if (!(ppd->cpspec->ibcctrl_a & + SYM_MASK(IBCCtrlA_0, IBStatIntReductionEn))) { + /* + * We got our interrupt, so init code should be + * happy and not try alternatives. Now squelch + * other "chatter" from link-negotiation (pre Init) + */ + ppd->cpspec->ibcctrl_a |= + SYM_MASK(IBCCtrlA_0, IBStatIntReductionEn); + qib_write_kreg_port(ppd, krp_ibcctrl_a, + ppd->cpspec->ibcctrl_a); + } + + /* Update our picture of width and speed from chip */ + ppd->link_width_active = + (ibcs & SYM_MASK(IBCStatusA_0, LinkWidthActive)) ? + IB_WIDTH_4X : IB_WIDTH_1X; + ppd->link_speed_active = (ibcs & SYM_MASK(IBCStatusA_0, + LinkSpeedQDR)) ? QIB_IB_QDR : (ibcs & + SYM_MASK(IBCStatusA_0, LinkSpeedActive)) ? + QIB_IB_DDR : QIB_IB_SDR; + + if ((ppd->lflags & QIBL_IB_LINK_DISABLED) && ltstate != + IB_PHYSPORTSTATE_DISABLED) + qib_set_ib_7322_lstate(ppd, 0, + QLOGIC_IB_IBCC_LINKINITCMD_DISABLE); + else + /* + * Since going into a recovery state causes the link + * state to go down and since recovery is transitory, + * it is better if we "miss" ever seeing the link + * training state go into recovery (i.e., ignore this + * transition for link state special handling purposes) + * without updating lastibcstat. + */ + if (ltstate != IB_PHYSPORTSTATE_LINK_ERR_RECOVER && + ltstate != IB_PHYSPORTSTATE_RECOVERY_RETRAIN && + ltstate != IB_PHYSPORTSTATE_RECOVERY_WAITRMT && + ltstate != IB_PHYSPORTSTATE_RECOVERY_IDLE) + qib_handle_e_ibstatuschanged(ppd, ibcs); + } + if (*msg && iserr) + qib_dev_porterr(dd, ppd->port, "%s error\n", msg); + + if (ppd->state_wanted & ppd->lflags) + wake_up_interruptible(&ppd->state_wait); +done: + return; +} + +/* enable/disable chip from delivering interrupts */ +static void qib_7322_set_intr_state(struct qib_devdata *dd, u32 enable) +{ + if (enable) { + if (dd->flags & QIB_BADINTR) + return; + qib_write_kreg(dd, kr_intmask, dd->cspec->int_enable_mask); + /* cause any pending enabled interrupts to be re-delivered */ + qib_write_kreg(dd, kr_intclear, 0ULL); + if (dd->cspec->num_msix_entries) { + /* and same for MSIx */ + u64 val = qib_read_kreg64(dd, kr_intgranted); + if (val) + qib_write_kreg(dd, kr_intgranted, val); + } + } else + qib_write_kreg(dd, kr_intmask, 0ULL); +} + +/* + * Try to cleanup as much as possible for anything that might have gone + * wrong while in freeze mode, such as pio buffers being written by user + * processes (causing armlaunch), send errors due to going into freeze mode, + * etc., and try to avoid causing extra interrupts while doing so. + * Forcibly update the in-memory pioavail register copies after cleanup + * because the chip won't do it while in freeze mode (the register values + * themselves are kept correct). + * Make sure that we don't lose any important interrupts by using the chip + * feature that says that writing 0 to a bit in *clear that is set in + * *status will cause an interrupt to be generated again (if allowed by + * the *mask value). + * This is in chip-specific code because of all of the register accesses, + * even though the details are similar on most chips. + */ +static void qib_7322_clear_freeze(struct qib_devdata *dd) +{ + int pidx; + + /* disable error interrupts, to avoid confusion */ + qib_write_kreg(dd, kr_errmask, 0ULL); + + for (pidx = 0; pidx < dd->num_pports; ++pidx) + if (dd->pport[pidx].link_speed_supported) + qib_write_kreg_port(dd->pport + pidx, krp_errmask, + 0ULL); + + /* also disable interrupts; errormask is sometimes overwriten */ + qib_7322_set_intr_state(dd, 0); + + /* clear the freeze, and be sure chip saw it */ + qib_write_kreg(dd, kr_control, dd->control); + qib_read_kreg32(dd, kr_scratch); + + /* + * Force new interrupt if any hwerr, error or interrupt bits are + * still set, and clear "safe" send packet errors related to freeze + * and cancelling sends. Re-enable error interrupts before possible + * force of re-interrupt on pending interrupts. + */ + qib_write_kreg(dd, kr_hwerrclear, 0ULL); + qib_write_kreg(dd, kr_errclear, E_SPKT_ERRS_IGNORE); + qib_write_kreg(dd, kr_errmask, dd->cspec->errormask); + /* We need to purge per-port errs and reset mask, too */ + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + if (!dd->pport[pidx].link_speed_supported) + continue; + qib_write_kreg_port(dd->pport + pidx, krp_errclear, ~0Ull); + qib_write_kreg_port(dd->pport + pidx, krp_errmask, ~0Ull); + } + qib_7322_set_intr_state(dd, 1); +} + +/* no error handling to speak of */ +/** + * qib_7322_handle_hwerrors - display hardware errors. + * @dd: the qlogic_ib device + * @msg: the output buffer + * @msgl: the size of the output buffer + * + * Use same msg buffer as regular errors to avoid excessive stack + * use. Most hardware errors are catastrophic, but for right now, + * we'll print them and continue. We reuse the same message buffer as + * qib_handle_errors() to avoid excessive stack usage. + */ +static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg, + size_t msgl) +{ + u64 hwerrs; + u32 ctrl; + int isfatal = 0; + + hwerrs = qib_read_kreg64(dd, kr_hwerrstatus); + if (!hwerrs) + goto bail; + if (hwerrs == ~0ULL) { + qib_dev_err(dd, + "Read of hardware error status failed (all bits set); ignoring\n"); + goto bail; + } + qib_stats.sps_hwerrs++; + + /* Always clear the error status register, except BIST fail */ + qib_write_kreg(dd, kr_hwerrclear, hwerrs & + ~HWE_MASK(PowerOnBISTFailed)); + + hwerrs &= dd->cspec->hwerrmask; + + /* no EEPROM logging, yet */ + + if (hwerrs) + qib_devinfo(dd->pcidev, + "Hardware error: hwerr=0x%llx (cleared)\n", + (unsigned long long) hwerrs); + + ctrl = qib_read_kreg32(dd, kr_control); + if ((ctrl & SYM_MASK(Control, FreezeMode)) && !dd->diag_client) { + /* + * No recovery yet... + */ + if ((hwerrs & ~HWE_MASK(LATriggered)) || + dd->cspec->stay_in_freeze) { + /* + * If any set that we aren't ignoring only make the + * complaint once, in case it's stuck or recurring, + * and we get here multiple times + * Force link down, so switch knows, and + * LEDs are turned off. + */ + if (dd->flags & QIB_INITTED) + isfatal = 1; + } else + qib_7322_clear_freeze(dd); + } + + if (hwerrs & HWE_MASK(PowerOnBISTFailed)) { + isfatal = 1; + strlcpy(msg, + "[Memory BIST test failed, InfiniPath hardware unusable]", + msgl); + /* ignore from now on, so disable until driver reloaded */ + dd->cspec->hwerrmask &= ~HWE_MASK(PowerOnBISTFailed); + qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); + } + + err_decode(msg, msgl, hwerrs, qib_7322_hwerror_msgs); + + /* Ignore esoteric PLL failures et al. */ + + qib_dev_err(dd, "%s hardware error\n", msg); + + if (hwerrs & + (SYM_MASK(HwErrMask, SDmaMemReadErrMask_0) | + SYM_MASK(HwErrMask, SDmaMemReadErrMask_1))) { + int pidx = 0; + int err; + unsigned long flags; + struct qib_pportdata *ppd = dd->pport; + for (; pidx < dd->num_pports; ++pidx, ppd++) { + err = 0; + if (pidx == 0 && (hwerrs & + SYM_MASK(HwErrMask, SDmaMemReadErrMask_0))) + err++; + if (pidx == 1 && (hwerrs & + SYM_MASK(HwErrMask, SDmaMemReadErrMask_1))) + err++; + if (err) { + spin_lock_irqsave(&ppd->sdma_lock, flags); + dump_sdma_7322_state(ppd); + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + } + } + } + + if (isfatal && !dd->diag_client) { + qib_dev_err(dd, + "Fatal Hardware Error, no longer usable, SN %.16s\n", + dd->serial); + /* + * for /sys status file and user programs to print; if no + * trailing brace is copied, we'll know it was truncated. + */ + if (dd->freezemsg) + snprintf(dd->freezemsg, dd->freezelen, + "{%s}", msg); + qib_disable_after_error(dd); + } +bail:; +} + +/** + * qib_7322_init_hwerrors - enable hardware errors + * @dd: the qlogic_ib device + * + * now that we have finished initializing everything that might reasonably + * cause a hardware error, and cleared those errors bits as they occur, + * we can enable hardware errors in the mask (potentially enabling + * freeze mode), and enable hardware errors as errors (along with + * everything else) in errormask + */ +static void qib_7322_init_hwerrors(struct qib_devdata *dd) +{ + int pidx; + u64 extsval; + + extsval = qib_read_kreg64(dd, kr_extstatus); + if (!(extsval & (QIB_EXTS_MEMBIST_DISABLED | + QIB_EXTS_MEMBIST_ENDTEST))) + qib_dev_err(dd, "MemBIST did not complete!\n"); + + /* never clear BIST failure, so reported on each driver load */ + qib_write_kreg(dd, kr_hwerrclear, ~HWE_MASK(PowerOnBISTFailed)); + qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); + + /* clear all */ + qib_write_kreg(dd, kr_errclear, ~0ULL); + /* enable errors that are masked, at least this first time. */ + qib_write_kreg(dd, kr_errmask, ~0ULL); + dd->cspec->errormask = qib_read_kreg64(dd, kr_errmask); + for (pidx = 0; pidx < dd->num_pports; ++pidx) + if (dd->pport[pidx].link_speed_supported) + qib_write_kreg_port(dd->pport + pidx, krp_errmask, + ~0ULL); +} + +/* + * Disable and enable the armlaunch error. Used for PIO bandwidth testing + * on chips that are count-based, rather than trigger-based. There is no + * reference counting, but that's also fine, given the intended use. + * Only chip-specific because it's all register accesses + */ +static void qib_set_7322_armlaunch(struct qib_devdata *dd, u32 enable) +{ + if (enable) { + qib_write_kreg(dd, kr_errclear, QIB_E_SPIOARMLAUNCH); + dd->cspec->errormask |= QIB_E_SPIOARMLAUNCH; + } else + dd->cspec->errormask &= ~QIB_E_SPIOARMLAUNCH; + qib_write_kreg(dd, kr_errmask, dd->cspec->errormask); +} + +/* + * Formerly took parameter <which> in pre-shifted, + * pre-merged form with LinkCmd and LinkInitCmd + * together, and assuming the zero was NOP. + */ +static void qib_set_ib_7322_lstate(struct qib_pportdata *ppd, u16 linkcmd, + u16 linitcmd) +{ + u64 mod_wd; + struct qib_devdata *dd = ppd->dd; + unsigned long flags; + + if (linitcmd == QLOGIC_IB_IBCC_LINKINITCMD_DISABLE) { + /* + * If we are told to disable, note that so link-recovery + * code does not attempt to bring us back up. + * Also reset everything that we can, so we start + * completely clean when re-enabled (before we + * actually issue the disable to the IBC) + */ + qib_7322_mini_pcs_reset(ppd); + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags |= QIBL_IB_LINK_DISABLED; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + } else if (linitcmd || linkcmd == QLOGIC_IB_IBCC_LINKCMD_DOWN) { + /* + * Any other linkinitcmd will lead to LINKDOWN and then + * to INIT (if all is well), so clear flag to let + * link-recovery code attempt to bring us back up. + */ + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_IB_LINK_DISABLED; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + /* + * Clear status change interrupt reduction so the + * new state is seen. + */ + ppd->cpspec->ibcctrl_a &= + ~SYM_MASK(IBCCtrlA_0, IBStatIntReductionEn); + } + + mod_wd = (linkcmd << IBA7322_IBCC_LINKCMD_SHIFT) | + (linitcmd << QLOGIC_IB_IBCC_LINKINITCMD_SHIFT); + + qib_write_kreg_port(ppd, krp_ibcctrl_a, ppd->cpspec->ibcctrl_a | + mod_wd); + /* write to chip to prevent back-to-back writes of ibc reg */ + qib_write_kreg(dd, kr_scratch, 0); + +} + +/* + * The total RCV buffer memory is 64KB, used for both ports, and is + * in units of 64 bytes (same as IB flow control credit unit). + * The consumedVL unit in the same registers are in 32 byte units! + * So, a VL15 packet needs 4.50 IB credits, and 9 rx buffer chunks, + * and we can therefore allocate just 9 IB credits for 2 VL15 packets + * in krp_rxcreditvl15, rather than 10. + */ +#define RCV_BUF_UNITSZ 64 +#define NUM_RCV_BUF_UNITS(dd) ((64 * 1024) / (RCV_BUF_UNITSZ * dd->num_pports)) + +static void set_vls(struct qib_pportdata *ppd) +{ + int i, numvls, totcred, cred_vl, vl0extra; + struct qib_devdata *dd = ppd->dd; + u64 val; + + numvls = qib_num_vls(ppd->vls_operational); + + /* + * Set up per-VL credits. Below is kluge based on these assumptions: + * 1) port is disabled at the time early_init is called. + * 2) give VL15 17 credits, for two max-plausible packets. + * 3) Give VL0-N the rest, with any rounding excess used for VL0 + */ + /* 2 VL15 packets @ 288 bytes each (including IB headers) */ + totcred = NUM_RCV_BUF_UNITS(dd); + cred_vl = (2 * 288 + RCV_BUF_UNITSZ - 1) / RCV_BUF_UNITSZ; + totcred -= cred_vl; + qib_write_kreg_port(ppd, krp_rxcreditvl15, (u64) cred_vl); + cred_vl = totcred / numvls; + vl0extra = totcred - cred_vl * numvls; + qib_write_kreg_port(ppd, krp_rxcreditvl0, cred_vl + vl0extra); + for (i = 1; i < numvls; i++) + qib_write_kreg_port(ppd, krp_rxcreditvl0 + i, cred_vl); + for (; i < 8; i++) /* no buffer space for other VLs */ + qib_write_kreg_port(ppd, krp_rxcreditvl0 + i, 0); + + /* Notify IBC that credits need to be recalculated */ + val = qib_read_kreg_port(ppd, krp_ibsdtestiftx); + val |= SYM_MASK(IB_SDTEST_IF_TX_0, CREDIT_CHANGE); + qib_write_kreg_port(ppd, krp_ibsdtestiftx, val); + qib_write_kreg(dd, kr_scratch, 0ULL); + val &= ~SYM_MASK(IB_SDTEST_IF_TX_0, CREDIT_CHANGE); + qib_write_kreg_port(ppd, krp_ibsdtestiftx, val); + + for (i = 0; i < numvls; i++) + val = qib_read_kreg_port(ppd, krp_rxcreditvl0 + i); + val = qib_read_kreg_port(ppd, krp_rxcreditvl15); + + /* Change the number of operational VLs */ + ppd->cpspec->ibcctrl_a = (ppd->cpspec->ibcctrl_a & + ~SYM_MASK(IBCCtrlA_0, NumVLane)) | + ((u64)(numvls - 1) << SYM_LSB(IBCCtrlA_0, NumVLane)); + qib_write_kreg_port(ppd, krp_ibcctrl_a, ppd->cpspec->ibcctrl_a); + qib_write_kreg(dd, kr_scratch, 0ULL); +} + +/* + * The code that deals with actual SerDes is in serdes_7322_init(). + * Compared to the code for iba7220, it is minimal. + */ +static int serdes_7322_init(struct qib_pportdata *ppd); + +/** + * qib_7322_bringup_serdes - bring up the serdes + * @ppd: physical port on the qlogic_ib device + */ +static int qib_7322_bringup_serdes(struct qib_pportdata *ppd) +{ + struct qib_devdata *dd = ppd->dd; + u64 val, guid, ibc; + unsigned long flags; + int ret = 0; + + /* + * SerDes model not in Pd, but still need to + * set up much of IBCCtrl and IBCDDRCtrl; move elsewhere + * eventually. + */ + /* Put IBC in reset, sends disabled (should be in reset already) */ + ppd->cpspec->ibcctrl_a &= ~SYM_MASK(IBCCtrlA_0, IBLinkEn); + qib_write_kreg_port(ppd, krp_ibcctrl_a, ppd->cpspec->ibcctrl_a); + qib_write_kreg(dd, kr_scratch, 0ULL); + + /* ensure previous Tx parameters are not still forced */ + qib_write_kreg_port(ppd, krp_tx_deemph_override, + SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, + reset_tx_deemphasis_override)); + + if (qib_compat_ddr_negotiate) { + ppd->cpspec->ibdeltainprog = 1; + ppd->cpspec->ibsymsnap = read_7322_creg32_port(ppd, + crp_ibsymbolerr); + ppd->cpspec->iblnkerrsnap = read_7322_creg32_port(ppd, + crp_iblinkerrrecov); + } + + /* flowcontrolwatermark is in units of KBytes */ + ibc = 0x5ULL << SYM_LSB(IBCCtrlA_0, FlowCtrlWaterMark); + /* + * Flow control is sent this often, even if no changes in + * buffer space occur. Units are 128ns for this chip. + * Set to 3usec. + */ + ibc |= 24ULL << SYM_LSB(IBCCtrlA_0, FlowCtrlPeriod); + /* max error tolerance */ + ibc |= 0xfULL << SYM_LSB(IBCCtrlA_0, PhyerrThreshold); + /* IB credit flow control. */ + ibc |= 0xfULL << SYM_LSB(IBCCtrlA_0, OverrunThreshold); + /* + * set initial max size pkt IBC will send, including ICRC; it's the + * PIO buffer size in dwords, less 1; also see qib_set_mtu() + */ + ibc |= ((u64)(ppd->ibmaxlen >> 2) + 1) << + SYM_LSB(IBCCtrlA_0, MaxPktLen); + ppd->cpspec->ibcctrl_a = ibc; /* without linkcmd or linkinitcmd! */ + + /* + * Reset the PCS interface to the serdes (and also ibc, which is still + * in reset from above). Writes new value of ibcctrl_a as last step. + */ + qib_7322_mini_pcs_reset(ppd); + + if (!ppd->cpspec->ibcctrl_b) { + unsigned lse = ppd->link_speed_enabled; + + /* + * Not on re-init after reset, establish shadow + * and force initial config. + */ + ppd->cpspec->ibcctrl_b = qib_read_kreg_port(ppd, + krp_ibcctrl_b); + ppd->cpspec->ibcctrl_b &= ~(IBA7322_IBC_SPEED_QDR | + IBA7322_IBC_SPEED_DDR | + IBA7322_IBC_SPEED_SDR | + IBA7322_IBC_WIDTH_AUTONEG | + SYM_MASK(IBCCtrlB_0, IB_LANE_REV_SUPPORTED)); + if (lse & (lse - 1)) /* Muliple speeds enabled */ + ppd->cpspec->ibcctrl_b |= + (lse << IBA7322_IBC_SPEED_LSB) | + IBA7322_IBC_IBTA_1_2_MASK | + IBA7322_IBC_MAX_SPEED_MASK; + else + ppd->cpspec->ibcctrl_b |= (lse == QIB_IB_QDR) ? + IBA7322_IBC_SPEED_QDR | + IBA7322_IBC_IBTA_1_2_MASK : + (lse == QIB_IB_DDR) ? + IBA7322_IBC_SPEED_DDR : + IBA7322_IBC_SPEED_SDR; + if ((ppd->link_width_enabled & (IB_WIDTH_1X | IB_WIDTH_4X)) == + (IB_WIDTH_1X | IB_WIDTH_4X)) + ppd->cpspec->ibcctrl_b |= IBA7322_IBC_WIDTH_AUTONEG; + else + ppd->cpspec->ibcctrl_b |= + ppd->link_width_enabled == IB_WIDTH_4X ? + IBA7322_IBC_WIDTH_4X_ONLY : + IBA7322_IBC_WIDTH_1X_ONLY; + + /* always enable these on driver reload, not sticky */ + ppd->cpspec->ibcctrl_b |= (IBA7322_IBC_RXPOL_MASK | + IBA7322_IBC_HRTBT_MASK); + } + qib_write_kreg_port(ppd, krp_ibcctrl_b, ppd->cpspec->ibcctrl_b); + + /* setup so we have more time at CFGTEST to change H1 */ + val = qib_read_kreg_port(ppd, krp_ibcctrl_c); + val &= ~SYM_MASK(IBCCtrlC_0, IB_FRONT_PORCH); + val |= 0xfULL << SYM_LSB(IBCCtrlC_0, IB_FRONT_PORCH); + qib_write_kreg_port(ppd, krp_ibcctrl_c, val); + + serdes_7322_init(ppd); + + guid = be64_to_cpu(ppd->guid); + if (!guid) { + if (dd->base_guid) + guid = be64_to_cpu(dd->base_guid) + ppd->port - 1; + ppd->guid = cpu_to_be64(guid); + } + + qib_write_kreg_port(ppd, krp_hrtbt_guid, guid); + /* write to chip to prevent back-to-back writes of ibc reg */ + qib_write_kreg(dd, kr_scratch, 0); + + /* Enable port */ + ppd->cpspec->ibcctrl_a |= SYM_MASK(IBCCtrlA_0, IBLinkEn); + set_vls(ppd); + + /* initially come up DISABLED, without sending anything. */ + val = ppd->cpspec->ibcctrl_a | (QLOGIC_IB_IBCC_LINKINITCMD_DISABLE << + QLOGIC_IB_IBCC_LINKINITCMD_SHIFT); + qib_write_kreg_port(ppd, krp_ibcctrl_a, val); + qib_write_kreg(dd, kr_scratch, 0ULL); + /* clear the linkinit cmds */ + ppd->cpspec->ibcctrl_a = val & ~SYM_MASK(IBCCtrlA_0, LinkInitCmd); + + /* be paranoid against later code motion, etc. */ + spin_lock_irqsave(&dd->cspec->rcvmod_lock, flags); + ppd->p_rcvctrl |= SYM_MASK(RcvCtrl_0, RcvIBPortEnable); + qib_write_kreg_port(ppd, krp_rcvctrl, ppd->p_rcvctrl); + spin_unlock_irqrestore(&dd->cspec->rcvmod_lock, flags); + + /* Also enable IBSTATUSCHG interrupt. */ + val = qib_read_kreg_port(ppd, krp_errmask); + qib_write_kreg_port(ppd, krp_errmask, + val | ERR_MASK_N(IBStatusChanged)); + + /* Always zero until we start messing with SerDes for real */ + return ret; +} + +/** + * qib_7322_quiet_serdes - set serdes to txidle + * @dd: the qlogic_ib device + * Called when driver is being unloaded + */ +static void qib_7322_mini_quiet_serdes(struct qib_pportdata *ppd) +{ + u64 val; + unsigned long flags; + + qib_set_ib_7322_lstate(ppd, 0, QLOGIC_IB_IBCC_LINKINITCMD_DISABLE); + + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_IB_AUTONEG_INPROG; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + wake_up(&ppd->cpspec->autoneg_wait); + cancel_delayed_work_sync(&ppd->cpspec->autoneg_work); + if (ppd->dd->cspec->r1) + cancel_delayed_work_sync(&ppd->cpspec->ipg_work); + + ppd->cpspec->chase_end = 0; + if (ppd->cpspec->chase_timer.data) /* if initted */ + del_timer_sync(&ppd->cpspec->chase_timer); + + /* + * Despite the name, actually disables IBC as well. Do it when + * we are as sure as possible that no more packets can be + * received, following the down and the PCS reset. + * The actual disabling happens in qib_7322_mini_pci_reset(), + * along with the PCS being reset. + */ + ppd->cpspec->ibcctrl_a &= ~SYM_MASK(IBCCtrlA_0, IBLinkEn); + qib_7322_mini_pcs_reset(ppd); + + /* + * Update the adjusted counters so the adjustment persists + * across driver reload. + */ + if (ppd->cpspec->ibsymdelta || ppd->cpspec->iblnkerrdelta || + ppd->cpspec->ibdeltainprog || ppd->cpspec->iblnkdowndelta) { + struct qib_devdata *dd = ppd->dd; + u64 diagc; + + /* enable counter writes */ + diagc = qib_read_kreg64(dd, kr_hwdiagctrl); + qib_write_kreg(dd, kr_hwdiagctrl, + diagc | SYM_MASK(HwDiagCtrl, CounterWrEnable)); + + if (ppd->cpspec->ibsymdelta || ppd->cpspec->ibdeltainprog) { + val = read_7322_creg32_port(ppd, crp_ibsymbolerr); + if (ppd->cpspec->ibdeltainprog) + val -= val - ppd->cpspec->ibsymsnap; + val -= ppd->cpspec->ibsymdelta; + write_7322_creg_port(ppd, crp_ibsymbolerr, val); + } + if (ppd->cpspec->iblnkerrdelta || ppd->cpspec->ibdeltainprog) { + val = read_7322_creg32_port(ppd, crp_iblinkerrrecov); + if (ppd->cpspec->ibdeltainprog) + val -= val - ppd->cpspec->iblnkerrsnap; + val -= ppd->cpspec->iblnkerrdelta; + write_7322_creg_port(ppd, crp_iblinkerrrecov, val); + } + if (ppd->cpspec->iblnkdowndelta) { + val = read_7322_creg32_port(ppd, crp_iblinkdown); + val += ppd->cpspec->iblnkdowndelta; + write_7322_creg_port(ppd, crp_iblinkdown, val); + } + /* + * No need to save ibmalfdelta since IB perfcounters + * are cleared on driver reload. + */ + + /* and disable counter writes */ + qib_write_kreg(dd, kr_hwdiagctrl, diagc); + } +} + +/** + * qib_setup_7322_setextled - set the state of the two external LEDs + * @ppd: physical port on the qlogic_ib device + * @on: whether the link is up or not + * + * The exact combo of LEDs if on is true is determined by looking + * at the ibcstatus. + * + * These LEDs indicate the physical and logical state of IB link. + * For this chip (at least with recommended board pinouts), LED1 + * is Yellow (logical state) and LED2 is Green (physical state), + * + * Note: We try to match the Mellanox HCA LED behavior as best + * we can. Green indicates physical link state is OK (something is + * plugged in, and we can train). + * Amber indicates the link is logically up (ACTIVE). + * Mellanox further blinks the amber LED to indicate data packet + * activity, but we have no hardware support for that, so it would + * require waking up every 10-20 msecs and checking the counters + * on the chip, and then turning the LED off if appropriate. That's + * visible overhead, so not something we will do. + */ +static void qib_setup_7322_setextled(struct qib_pportdata *ppd, u32 on) +{ + struct qib_devdata *dd = ppd->dd; + u64 extctl, ledblink = 0, val; + unsigned long flags; + int yel, grn; + + /* + * The diags use the LED to indicate diag info, so we leave + * the external LED alone when the diags are running. + */ + if (dd->diag_client) + return; + + /* Allow override of LED display for, e.g. Locating system in rack */ + if (ppd->led_override) { + grn = (ppd->led_override & QIB_LED_PHYS); + yel = (ppd->led_override & QIB_LED_LOG); + } else if (on) { + val = qib_read_kreg_port(ppd, krp_ibcstatus_a); + grn = qib_7322_phys_portstate(val) == + IB_PHYSPORTSTATE_LINKUP; + yel = qib_7322_iblink_state(val) == IB_PORT_ACTIVE; + } else { + grn = 0; + yel = 0; + } + + spin_lock_irqsave(&dd->cspec->gpio_lock, flags); + extctl = dd->cspec->extctrl & (ppd->port == 1 ? + ~ExtLED_IB1_MASK : ~ExtLED_IB2_MASK); + if (grn) { + extctl |= ppd->port == 1 ? ExtLED_IB1_GRN : ExtLED_IB2_GRN; + /* + * Counts are in chip clock (4ns) periods. + * This is 1/16 sec (66.6ms) on, + * 3/16 sec (187.5 ms) off, with packets rcvd. + */ + ledblink = ((66600 * 1000UL / 4) << IBA7322_LEDBLINK_ON_SHIFT) | + ((187500 * 1000UL / 4) << IBA7322_LEDBLINK_OFF_SHIFT); + } + if (yel) + extctl |= ppd->port == 1 ? ExtLED_IB1_YEL : ExtLED_IB2_YEL; + dd->cspec->extctrl = extctl; + qib_write_kreg(dd, kr_extctrl, dd->cspec->extctrl); + spin_unlock_irqrestore(&dd->cspec->gpio_lock, flags); + + if (ledblink) /* blink the LED on packet receive */ + qib_write_kreg_port(ppd, krp_rcvpktledcnt, ledblink); +} + +#ifdef CONFIG_INFINIBAND_QIB_DCA + +static int qib_7322_notify_dca(struct qib_devdata *dd, unsigned long event) +{ + switch (event) { + case DCA_PROVIDER_ADD: + if (dd->flags & QIB_DCA_ENABLED) + break; + if (!dca_add_requester(&dd->pcidev->dev)) { + qib_devinfo(dd->pcidev, "DCA enabled\n"); + dd->flags |= QIB_DCA_ENABLED; + qib_setup_dca(dd); + } + break; + case DCA_PROVIDER_REMOVE: + if (dd->flags & QIB_DCA_ENABLED) { + dca_remove_requester(&dd->pcidev->dev); + dd->flags &= ~QIB_DCA_ENABLED; + dd->cspec->dca_ctrl = 0; + qib_write_kreg(dd, KREG_IDX(DCACtrlA), + dd->cspec->dca_ctrl); + } + break; + } + return 0; +} + +static void qib_update_rhdrq_dca(struct qib_ctxtdata *rcd, int cpu) +{ + struct qib_devdata *dd = rcd->dd; + struct qib_chip_specific *cspec = dd->cspec; + + if (!(dd->flags & QIB_DCA_ENABLED)) + return; + if (cspec->rhdr_cpu[rcd->ctxt] != cpu) { + const struct dca_reg_map *rmp; + + cspec->rhdr_cpu[rcd->ctxt] = cpu; + rmp = &dca_rcvhdr_reg_map[rcd->ctxt]; + cspec->dca_rcvhdr_ctrl[rmp->shadow_inx] &= rmp->mask; + cspec->dca_rcvhdr_ctrl[rmp->shadow_inx] |= + (u64) dca3_get_tag(&dd->pcidev->dev, cpu) << rmp->lsb; + qib_devinfo(dd->pcidev, + "Ctxt %d cpu %d dca %llx\n", rcd->ctxt, cpu, + (long long) cspec->dca_rcvhdr_ctrl[rmp->shadow_inx]); + qib_write_kreg(dd, rmp->regno, + cspec->dca_rcvhdr_ctrl[rmp->shadow_inx]); + cspec->dca_ctrl |= SYM_MASK(DCACtrlA, RcvHdrqDCAEnable); + qib_write_kreg(dd, KREG_IDX(DCACtrlA), cspec->dca_ctrl); + } +} + +static void qib_update_sdma_dca(struct qib_pportdata *ppd, int cpu) +{ + struct qib_devdata *dd = ppd->dd; + struct qib_chip_specific *cspec = dd->cspec; + unsigned pidx = ppd->port - 1; + + if (!(dd->flags & QIB_DCA_ENABLED)) + return; + if (cspec->sdma_cpu[pidx] != cpu) { + cspec->sdma_cpu[pidx] = cpu; + cspec->dca_rcvhdr_ctrl[4] &= ~(ppd->hw_pidx ? + SYM_MASK(DCACtrlF, SendDma1DCAOPH) : + SYM_MASK(DCACtrlF, SendDma0DCAOPH)); + cspec->dca_rcvhdr_ctrl[4] |= + (u64) dca3_get_tag(&dd->pcidev->dev, cpu) << + (ppd->hw_pidx ? + SYM_LSB(DCACtrlF, SendDma1DCAOPH) : + SYM_LSB(DCACtrlF, SendDma0DCAOPH)); + qib_devinfo(dd->pcidev, + "sdma %d cpu %d dca %llx\n", ppd->hw_pidx, cpu, + (long long) cspec->dca_rcvhdr_ctrl[4]); + qib_write_kreg(dd, KREG_IDX(DCACtrlF), + cspec->dca_rcvhdr_ctrl[4]); + cspec->dca_ctrl |= ppd->hw_pidx ? + SYM_MASK(DCACtrlA, SendDMAHead1DCAEnable) : + SYM_MASK(DCACtrlA, SendDMAHead0DCAEnable); + qib_write_kreg(dd, KREG_IDX(DCACtrlA), cspec->dca_ctrl); + } +} + +static void qib_setup_dca(struct qib_devdata *dd) +{ + struct qib_chip_specific *cspec = dd->cspec; + int i; + + for (i = 0; i < ARRAY_SIZE(cspec->rhdr_cpu); i++) + cspec->rhdr_cpu[i] = -1; + for (i = 0; i < ARRAY_SIZE(cspec->sdma_cpu); i++) + cspec->sdma_cpu[i] = -1; + cspec->dca_rcvhdr_ctrl[0] = + (1ULL << SYM_LSB(DCACtrlB, RcvHdrq0DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlB, RcvHdrq1DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlB, RcvHdrq2DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlB, RcvHdrq3DCAXfrCnt)); + cspec->dca_rcvhdr_ctrl[1] = + (1ULL << SYM_LSB(DCACtrlC, RcvHdrq4DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlC, RcvHdrq5DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlC, RcvHdrq6DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlC, RcvHdrq7DCAXfrCnt)); + cspec->dca_rcvhdr_ctrl[2] = + (1ULL << SYM_LSB(DCACtrlD, RcvHdrq8DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlD, RcvHdrq9DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlD, RcvHdrq10DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlD, RcvHdrq11DCAXfrCnt)); + cspec->dca_rcvhdr_ctrl[3] = + (1ULL << SYM_LSB(DCACtrlE, RcvHdrq12DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlE, RcvHdrq13DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlE, RcvHdrq14DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlE, RcvHdrq15DCAXfrCnt)); + cspec->dca_rcvhdr_ctrl[4] = + (1ULL << SYM_LSB(DCACtrlF, RcvHdrq16DCAXfrCnt)) | + (1ULL << SYM_LSB(DCACtrlF, RcvHdrq17DCAXfrCnt)); + for (i = 0; i < ARRAY_SIZE(cspec->sdma_cpu); i++) + qib_write_kreg(dd, KREG_IDX(DCACtrlB) + i, + cspec->dca_rcvhdr_ctrl[i]); + for (i = 0; i < cspec->num_msix_entries; i++) + setup_dca_notifier(dd, &cspec->msix_entries[i]); +} + +static void qib_irq_notifier_notify(struct irq_affinity_notify *notify, + const cpumask_t *mask) +{ + struct qib_irq_notify *n = + container_of(notify, struct qib_irq_notify, notify); + int cpu = cpumask_first(mask); + + if (n->rcv) { + struct qib_ctxtdata *rcd = (struct qib_ctxtdata *)n->arg; + qib_update_rhdrq_dca(rcd, cpu); + } else { + struct qib_pportdata *ppd = (struct qib_pportdata *)n->arg; + qib_update_sdma_dca(ppd, cpu); + } +} + +static void qib_irq_notifier_release(struct kref *ref) +{ + struct qib_irq_notify *n = + container_of(ref, struct qib_irq_notify, notify.kref); + struct qib_devdata *dd; + + if (n->rcv) { + struct qib_ctxtdata *rcd = (struct qib_ctxtdata *)n->arg; + dd = rcd->dd; + } else { + struct qib_pportdata *ppd = (struct qib_pportdata *)n->arg; + dd = ppd->dd; + } + qib_devinfo(dd->pcidev, + "release on HCA notify 0x%p n 0x%p\n", ref, n); + kfree(n); +} +#endif + +/* + * Disable MSIx interrupt if enabled, call generic MSIx code + * to cleanup, and clear pending MSIx interrupts. + * Used for fallback to INTx, after reset, and when MSIx setup fails. + */ +static void qib_7322_nomsix(struct qib_devdata *dd) +{ + u64 intgranted; + int n; + + dd->cspec->main_int_mask = ~0ULL; + n = dd->cspec->num_msix_entries; + if (n) { + int i; + + dd->cspec->num_msix_entries = 0; + for (i = 0; i < n; i++) { +#ifdef CONFIG_INFINIBAND_QIB_DCA + reset_dca_notifier(dd, &dd->cspec->msix_entries[i]); +#endif + irq_set_affinity_hint( + dd->cspec->msix_entries[i].msix.vector, NULL); + free_cpumask_var(dd->cspec->msix_entries[i].mask); + free_irq(dd->cspec->msix_entries[i].msix.vector, + dd->cspec->msix_entries[i].arg); + } + qib_nomsix(dd); + } + /* make sure no MSIx interrupts are left pending */ + intgranted = qib_read_kreg64(dd, kr_intgranted); + if (intgranted) + qib_write_kreg(dd, kr_intgranted, intgranted); +} + +static void qib_7322_free_irq(struct qib_devdata *dd) +{ + if (dd->cspec->irq) { + free_irq(dd->cspec->irq, dd); + dd->cspec->irq = 0; + } + qib_7322_nomsix(dd); +} + +static void qib_setup_7322_cleanup(struct qib_devdata *dd) +{ + int i; + +#ifdef CONFIG_INFINIBAND_QIB_DCA + if (dd->flags & QIB_DCA_ENABLED) { + dca_remove_requester(&dd->pcidev->dev); + dd->flags &= ~QIB_DCA_ENABLED; + dd->cspec->dca_ctrl = 0; + qib_write_kreg(dd, KREG_IDX(DCACtrlA), dd->cspec->dca_ctrl); + } +#endif + + qib_7322_free_irq(dd); + kfree(dd->cspec->cntrs); + kfree(dd->cspec->sendchkenable); + kfree(dd->cspec->sendgrhchk); + kfree(dd->cspec->sendibchk); + kfree(dd->cspec->msix_entries); + for (i = 0; i < dd->num_pports; i++) { + unsigned long flags; + u32 mask = QSFP_GPIO_MOD_PRS_N | + (QSFP_GPIO_MOD_PRS_N << QSFP_GPIO_PORT2_SHIFT); + + kfree(dd->pport[i].cpspec->portcntrs); + if (dd->flags & QIB_HAS_QSFP) { + spin_lock_irqsave(&dd->cspec->gpio_lock, flags); + dd->cspec->gpio_mask &= ~mask; + qib_write_kreg(dd, kr_gpio_mask, dd->cspec->gpio_mask); + spin_unlock_irqrestore(&dd->cspec->gpio_lock, flags); + qib_qsfp_deinit(&dd->pport[i].cpspec->qsfp_data); + } + if (dd->pport[i].ibport_data.smi_ah) + ib_destroy_ah(&dd->pport[i].ibport_data.smi_ah->ibah); + } +} + +/* handle SDMA interrupts */ +static void sdma_7322_intr(struct qib_devdata *dd, u64 istat) +{ + struct qib_pportdata *ppd0 = &dd->pport[0]; + struct qib_pportdata *ppd1 = &dd->pport[1]; + u64 intr0 = istat & (INT_MASK_P(SDma, 0) | + INT_MASK_P(SDmaIdle, 0) | INT_MASK_P(SDmaProgress, 0)); + u64 intr1 = istat & (INT_MASK_P(SDma, 1) | + INT_MASK_P(SDmaIdle, 1) | INT_MASK_P(SDmaProgress, 1)); + + if (intr0) + qib_sdma_intr(ppd0); + if (intr1) + qib_sdma_intr(ppd1); + + if (istat & INT_MASK_PM(SDmaCleanupDone, 0)) + qib_sdma_process_event(ppd0, qib_sdma_event_e20_hw_started); + if (istat & INT_MASK_PM(SDmaCleanupDone, 1)) + qib_sdma_process_event(ppd1, qib_sdma_event_e20_hw_started); +} + +/* + * Set or clear the Send buffer available interrupt enable bit. + */ +static void qib_wantpiobuf_7322_intr(struct qib_devdata *dd, u32 needint) +{ + unsigned long flags; + + spin_lock_irqsave(&dd->sendctrl_lock, flags); + if (needint) + dd->sendctrl |= SYM_MASK(SendCtrl, SendIntBufAvail); + else + dd->sendctrl &= ~SYM_MASK(SendCtrl, SendIntBufAvail); + qib_write_kreg(dd, kr_sendctrl, dd->sendctrl); + qib_write_kreg(dd, kr_scratch, 0ULL); + spin_unlock_irqrestore(&dd->sendctrl_lock, flags); +} + +/* + * Somehow got an interrupt with reserved bits set in interrupt status. + * Print a message so we know it happened, then clear them. + * keep mainline interrupt handler cache-friendly + */ +static noinline void unknown_7322_ibits(struct qib_devdata *dd, u64 istat) +{ + u64 kills; + char msg[128]; + + kills = istat & ~QIB_I_BITSEXTANT; + qib_dev_err(dd, + "Clearing reserved interrupt(s) 0x%016llx: %s\n", + (unsigned long long) kills, msg); + qib_write_kreg(dd, kr_intmask, (dd->cspec->int_enable_mask & ~kills)); +} + +/* keep mainline interrupt handler cache-friendly */ +static noinline void unknown_7322_gpio_intr(struct qib_devdata *dd) +{ + u32 gpiostatus; + int handled = 0; + int pidx; + + /* + * Boards for this chip currently don't use GPIO interrupts, + * so clear by writing GPIOstatus to GPIOclear, and complain + * to developer. To avoid endless repeats, clear + * the bits in the mask, since there is some kind of + * programming error or chip problem. + */ + gpiostatus = qib_read_kreg32(dd, kr_gpio_status); + /* + * In theory, writing GPIOstatus to GPIOclear could + * have a bad side-effect on some diagnostic that wanted + * to poll for a status-change, but the various shadows + * make that problematic at best. Diags will just suppress + * all GPIO interrupts during such tests. + */ + qib_write_kreg(dd, kr_gpio_clear, gpiostatus); + /* + * Check for QSFP MOD_PRS changes + * only works for single port if IB1 != pidx1 + */ + for (pidx = 0; pidx < dd->num_pports && (dd->flags & QIB_HAS_QSFP); + ++pidx) { + struct qib_pportdata *ppd; + struct qib_qsfp_data *qd; + u32 mask; + if (!dd->pport[pidx].link_speed_supported) + continue; + mask = QSFP_GPIO_MOD_PRS_N; + ppd = dd->pport + pidx; + mask <<= (QSFP_GPIO_PORT2_SHIFT * ppd->hw_pidx); + if (gpiostatus & dd->cspec->gpio_mask & mask) { + u64 pins; + qd = &ppd->cpspec->qsfp_data; + gpiostatus &= ~mask; + pins = qib_read_kreg64(dd, kr_extstatus); + pins >>= SYM_LSB(EXTStatus, GPIOIn); + if (!(pins & mask)) { + ++handled; + qd->t_insert = jiffies; + queue_work(ib_wq, &qd->work); + } + } + } + + if (gpiostatus && !handled) { + const u32 mask = qib_read_kreg32(dd, kr_gpio_mask); + u32 gpio_irq = mask & gpiostatus; + + /* + * Clear any troublemakers, and update chip from shadow + */ + dd->cspec->gpio_mask &= ~gpio_irq; + qib_write_kreg(dd, kr_gpio_mask, dd->cspec->gpio_mask); + } +} + +/* + * Handle errors and unusual events first, separate function + * to improve cache hits for fast path interrupt handling. + */ +static noinline void unlikely_7322_intr(struct qib_devdata *dd, u64 istat) +{ + if (istat & ~QIB_I_BITSEXTANT) + unknown_7322_ibits(dd, istat); + if (istat & QIB_I_GPIO) + unknown_7322_gpio_intr(dd); + if (istat & QIB_I_C_ERROR) { + qib_write_kreg(dd, kr_errmask, 0ULL); + tasklet_schedule(&dd->error_tasklet); + } + if (istat & INT_MASK_P(Err, 0) && dd->rcd[0]) + handle_7322_p_errors(dd->rcd[0]->ppd); + if (istat & INT_MASK_P(Err, 1) && dd->rcd[1]) + handle_7322_p_errors(dd->rcd[1]->ppd); +} + +/* + * Dynamically adjust the rcv int timeout for a context based on incoming + * packet rate. + */ +static void adjust_rcv_timeout(struct qib_ctxtdata *rcd, int npkts) +{ + struct qib_devdata *dd = rcd->dd; + u32 timeout = dd->cspec->rcvavail_timeout[rcd->ctxt]; + + /* + * Dynamically adjust idle timeout on chip + * based on number of packets processed. + */ + if (npkts < rcv_int_count && timeout > 2) + timeout >>= 1; + else if (npkts >= rcv_int_count && timeout < rcv_int_timeout) + timeout = min(timeout << 1, rcv_int_timeout); + else + return; + + dd->cspec->rcvavail_timeout[rcd->ctxt] = timeout; + qib_write_kreg(dd, kr_rcvavailtimeout + rcd->ctxt, timeout); +} + +/* + * This is the main interrupt handler. + * It will normally only be used for low frequency interrupts but may + * have to handle all interrupts if INTx is enabled or fewer than normal + * MSIx interrupts were allocated. + * This routine should ignore the interrupt bits for any of the + * dedicated MSIx handlers. + */ +static irqreturn_t qib_7322intr(int irq, void *data) +{ + struct qib_devdata *dd = data; + irqreturn_t ret; + u64 istat; + u64 ctxtrbits; + u64 rmask; + unsigned i; + u32 npkts; + + if ((dd->flags & (QIB_PRESENT | QIB_BADINTR)) != QIB_PRESENT) { + /* + * This return value is not great, but we do not want the + * interrupt core code to remove our interrupt handler + * because we don't appear to be handling an interrupt + * during a chip reset. + */ + ret = IRQ_HANDLED; + goto bail; + } + + istat = qib_read_kreg64(dd, kr_intstatus); + + if (unlikely(istat == ~0ULL)) { + qib_bad_intrstatus(dd); + qib_dev_err(dd, "Interrupt status all f's, skipping\n"); + /* don't know if it was our interrupt or not */ + ret = IRQ_NONE; + goto bail; + } + + istat &= dd->cspec->main_int_mask; + if (unlikely(!istat)) { + /* already handled, or shared and not us */ + ret = IRQ_NONE; + goto bail; + } + + this_cpu_inc(*dd->int_counter); + + /* handle "errors" of various kinds first, device ahead of port */ + if (unlikely(istat & (~QIB_I_BITSEXTANT | QIB_I_GPIO | + QIB_I_C_ERROR | INT_MASK_P(Err, 0) | + INT_MASK_P(Err, 1)))) + unlikely_7322_intr(dd, istat); + + /* + * Clear the interrupt bits we found set, relatively early, so we + * "know" know the chip will have seen this by the time we process + * the queue, and will re-interrupt if necessary. The processor + * itself won't take the interrupt again until we return. + */ + qib_write_kreg(dd, kr_intclear, istat); + + /* + * Handle kernel receive queues before checking for pio buffers + * available since receives can overflow; piobuf waiters can afford + * a few extra cycles, since they were waiting anyway. + */ + ctxtrbits = istat & (QIB_I_RCVAVAIL_MASK | QIB_I_RCVURG_MASK); + if (ctxtrbits) { + rmask = (1ULL << QIB_I_RCVAVAIL_LSB) | + (1ULL << QIB_I_RCVURG_LSB); + for (i = 0; i < dd->first_user_ctxt; i++) { + if (ctxtrbits & rmask) { + ctxtrbits &= ~rmask; + if (dd->rcd[i]) + qib_kreceive(dd->rcd[i], NULL, &npkts); + } + rmask <<= 1; + } + if (ctxtrbits) { + ctxtrbits = (ctxtrbits >> QIB_I_RCVAVAIL_LSB) | + (ctxtrbits >> QIB_I_RCVURG_LSB); + qib_handle_urcv(dd, ctxtrbits); + } + } + + if (istat & (QIB_I_P_SDMAINT(0) | QIB_I_P_SDMAINT(1))) + sdma_7322_intr(dd, istat); + + if ((istat & QIB_I_SPIOBUFAVAIL) && (dd->flags & QIB_INITTED)) + qib_ib_piobufavail(dd); + + ret = IRQ_HANDLED; +bail: + return ret; +} + +/* + * Dedicated receive packet available interrupt handler. + */ +static irqreturn_t qib_7322pintr(int irq, void *data) +{ + struct qib_ctxtdata *rcd = data; + struct qib_devdata *dd = rcd->dd; + u32 npkts; + + if ((dd->flags & (QIB_PRESENT | QIB_BADINTR)) != QIB_PRESENT) + /* + * This return value is not great, but we do not want the + * interrupt core code to remove our interrupt handler + * because we don't appear to be handling an interrupt + * during a chip reset. + */ + return IRQ_HANDLED; + + this_cpu_inc(*dd->int_counter); + + /* Clear the interrupt bit we expect to be set. */ + qib_write_kreg(dd, kr_intclear, ((1ULL << QIB_I_RCVAVAIL_LSB) | + (1ULL << QIB_I_RCVURG_LSB)) << rcd->ctxt); + + qib_kreceive(rcd, NULL, &npkts); + + return IRQ_HANDLED; +} + +/* + * Dedicated Send buffer available interrupt handler. + */ +static irqreturn_t qib_7322bufavail(int irq, void *data) +{ + struct qib_devdata *dd = data; + + if ((dd->flags & (QIB_PRESENT | QIB_BADINTR)) != QIB_PRESENT) + /* + * This return value is not great, but we do not want the + * interrupt core code to remove our interrupt handler + * because we don't appear to be handling an interrupt + * during a chip reset. + */ + return IRQ_HANDLED; + + this_cpu_inc(*dd->int_counter); + + /* Clear the interrupt bit we expect to be set. */ + qib_write_kreg(dd, kr_intclear, QIB_I_SPIOBUFAVAIL); + + /* qib_ib_piobufavail() will clear the want PIO interrupt if needed */ + if (dd->flags & QIB_INITTED) + qib_ib_piobufavail(dd); + else + qib_wantpiobuf_7322_intr(dd, 0); + + return IRQ_HANDLED; +} + +/* + * Dedicated Send DMA interrupt handler. + */ +static irqreturn_t sdma_intr(int irq, void *data) +{ + struct qib_pportdata *ppd = data; + struct qib_devdata *dd = ppd->dd; + + if ((dd->flags & (QIB_PRESENT | QIB_BADINTR)) != QIB_PRESENT) + /* + * This return value is not great, but we do not want the + * interrupt core code to remove our interrupt handler + * because we don't appear to be handling an interrupt + * during a chip reset. + */ + return IRQ_HANDLED; + + this_cpu_inc(*dd->int_counter); + + /* Clear the interrupt bit we expect to be set. */ + qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ? + INT_MASK_P(SDma, 1) : INT_MASK_P(SDma, 0)); + qib_sdma_intr(ppd); + + return IRQ_HANDLED; +} + +/* + * Dedicated Send DMA idle interrupt handler. + */ +static irqreturn_t sdma_idle_intr(int irq, void *data) +{ + struct qib_pportdata *ppd = data; + struct qib_devdata *dd = ppd->dd; + + if ((dd->flags & (QIB_PRESENT | QIB_BADINTR)) != QIB_PRESENT) + /* + * This return value is not great, but we do not want the + * interrupt core code to remove our interrupt handler + * because we don't appear to be handling an interrupt + * during a chip reset. + */ + return IRQ_HANDLED; + + this_cpu_inc(*dd->int_counter); + + /* Clear the interrupt bit we expect to be set. */ + qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ? + INT_MASK_P(SDmaIdle, 1) : INT_MASK_P(SDmaIdle, 0)); + qib_sdma_intr(ppd); + + return IRQ_HANDLED; +} + +/* + * Dedicated Send DMA progress interrupt handler. + */ +static irqreturn_t sdma_progress_intr(int irq, void *data) +{ + struct qib_pportdata *ppd = data; + struct qib_devdata *dd = ppd->dd; + + if ((dd->flags & (QIB_PRESENT | QIB_BADINTR)) != QIB_PRESENT) + /* + * This return value is not great, but we do not want the + * interrupt core code to remove our interrupt handler + * because we don't appear to be handling an interrupt + * during a chip reset. + */ + return IRQ_HANDLED; + + this_cpu_inc(*dd->int_counter); + + /* Clear the interrupt bit we expect to be set. */ + qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ? + INT_MASK_P(SDmaProgress, 1) : + INT_MASK_P(SDmaProgress, 0)); + qib_sdma_intr(ppd); + + return IRQ_HANDLED; +} + +/* + * Dedicated Send DMA cleanup interrupt handler. + */ +static irqreturn_t sdma_cleanup_intr(int irq, void *data) +{ + struct qib_pportdata *ppd = data; + struct qib_devdata *dd = ppd->dd; + + if ((dd->flags & (QIB_PRESENT | QIB_BADINTR)) != QIB_PRESENT) + /* + * This return value is not great, but we do not want the + * interrupt core code to remove our interrupt handler + * because we don't appear to be handling an interrupt + * during a chip reset. + */ + return IRQ_HANDLED; + + this_cpu_inc(*dd->int_counter); + + /* Clear the interrupt bit we expect to be set. */ + qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ? + INT_MASK_PM(SDmaCleanupDone, 1) : + INT_MASK_PM(SDmaCleanupDone, 0)); + qib_sdma_process_event(ppd, qib_sdma_event_e20_hw_started); + + return IRQ_HANDLED; +} + +#ifdef CONFIG_INFINIBAND_QIB_DCA + +static void reset_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m) +{ + if (!m->dca) + return; + qib_devinfo(dd->pcidev, + "Disabling notifier on HCA %d irq %d\n", + dd->unit, + m->msix.vector); + irq_set_affinity_notifier( + m->msix.vector, + NULL); + m->notifier = NULL; +} + +static void setup_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m) +{ + struct qib_irq_notify *n; + + if (!m->dca) + return; + n = kzalloc(sizeof(*n), GFP_KERNEL); + if (n) { + int ret; + + m->notifier = n; + n->notify.irq = m->msix.vector; + n->notify.notify = qib_irq_notifier_notify; + n->notify.release = qib_irq_notifier_release; + n->arg = m->arg; + n->rcv = m->rcv; + qib_devinfo(dd->pcidev, + "set notifier irq %d rcv %d notify %p\n", + n->notify.irq, n->rcv, &n->notify); + ret = irq_set_affinity_notifier( + n->notify.irq, + &n->notify); + if (ret) { + m->notifier = NULL; + kfree(n); + } + } +} + +#endif + +/* + * Set up our chip-specific interrupt handler. + * The interrupt type has already been setup, so + * we just need to do the registration and error checking. + * If we are using MSIx interrupts, we may fall back to + * INTx later, if the interrupt handler doesn't get called + * within 1/2 second (see verify_interrupt()). + */ +static void qib_setup_7322_interrupt(struct qib_devdata *dd, int clearpend) +{ + int ret, i, msixnum; + u64 redirect[6]; + u64 mask; + const struct cpumask *local_mask; + int firstcpu, secondcpu = 0, currrcvcpu = 0; + + if (!dd->num_pports) + return; + + if (clearpend) { + /* + * if not switching interrupt types, be sure interrupts are + * disabled, and then clear anything pending at this point, + * because we are starting clean. + */ + qib_7322_set_intr_state(dd, 0); + + /* clear the reset error, init error/hwerror mask */ + qib_7322_init_hwerrors(dd); + + /* clear any interrupt bits that might be set */ + qib_write_kreg(dd, kr_intclear, ~0ULL); + + /* make sure no pending MSIx intr, and clear diag reg */ + qib_write_kreg(dd, kr_intgranted, ~0ULL); + qib_write_kreg(dd, kr_vecclr_wo_int, ~0ULL); + } + + if (!dd->cspec->num_msix_entries) { + /* Try to get INTx interrupt */ +try_intx: + if (!dd->pcidev->irq) { + qib_dev_err(dd, + "irq is 0, BIOS error? Interrupts won't work\n"); + goto bail; + } + ret = request_irq(dd->pcidev->irq, qib_7322intr, + IRQF_SHARED, QIB_DRV_NAME, dd); + if (ret) { + qib_dev_err(dd, + "Couldn't setup INTx interrupt (irq=%d): %d\n", + dd->pcidev->irq, ret); + goto bail; + } + dd->cspec->irq = dd->pcidev->irq; + dd->cspec->main_int_mask = ~0ULL; + goto bail; + } + + /* Try to get MSIx interrupts */ + memset(redirect, 0, sizeof redirect); + mask = ~0ULL; + msixnum = 0; + local_mask = cpumask_of_pcibus(dd->pcidev->bus); + firstcpu = cpumask_first(local_mask); + if (firstcpu >= nr_cpu_ids || + cpumask_weight(local_mask) == num_online_cpus()) { + local_mask = topology_core_cpumask(0); + firstcpu = cpumask_first(local_mask); + } + if (firstcpu < nr_cpu_ids) { + secondcpu = cpumask_next(firstcpu, local_mask); + if (secondcpu >= nr_cpu_ids) + secondcpu = firstcpu; + currrcvcpu = secondcpu; + } + for (i = 0; msixnum < dd->cspec->num_msix_entries; i++) { + irq_handler_t handler; + void *arg; + u64 val; + int lsb, reg, sh; +#ifdef CONFIG_INFINIBAND_QIB_DCA + int dca = 0; +#endif + + dd->cspec->msix_entries[msixnum]. + name[sizeof(dd->cspec->msix_entries[msixnum].name) - 1] + = '\0'; + if (i < ARRAY_SIZE(irq_table)) { + if (irq_table[i].port) { + /* skip if for a non-configured port */ + if (irq_table[i].port > dd->num_pports) + continue; + arg = dd->pport + irq_table[i].port - 1; + } else + arg = dd; +#ifdef CONFIG_INFINIBAND_QIB_DCA + dca = irq_table[i].dca; +#endif + lsb = irq_table[i].lsb; + handler = irq_table[i].handler; + snprintf(dd->cspec->msix_entries[msixnum].name, + sizeof(dd->cspec->msix_entries[msixnum].name) + - 1, + QIB_DRV_NAME "%d%s", dd->unit, + irq_table[i].name); + } else { + unsigned ctxt; + + ctxt = i - ARRAY_SIZE(irq_table); + /* per krcvq context receive interrupt */ + arg = dd->rcd[ctxt]; + if (!arg) + continue; + if (qib_krcvq01_no_msi && ctxt < 2) + continue; +#ifdef CONFIG_INFINIBAND_QIB_DCA + dca = 1; +#endif + lsb = QIB_I_RCVAVAIL_LSB + ctxt; + handler = qib_7322pintr; + snprintf(dd->cspec->msix_entries[msixnum].name, + sizeof(dd->cspec->msix_entries[msixnum].name) + - 1, + QIB_DRV_NAME "%d (kctx)", dd->unit); + } + ret = request_irq( + dd->cspec->msix_entries[msixnum].msix.vector, + handler, 0, dd->cspec->msix_entries[msixnum].name, + arg); + if (ret) { + /* + * Shouldn't happen since the enable said we could + * have as many as we are trying to setup here. + */ + qib_dev_err(dd, + "Couldn't setup MSIx interrupt (vec=%d, irq=%d): %d\n", + msixnum, + dd->cspec->msix_entries[msixnum].msix.vector, + ret); + qib_7322_nomsix(dd); + goto try_intx; + } + dd->cspec->msix_entries[msixnum].arg = arg; +#ifdef CONFIG_INFINIBAND_QIB_DCA + dd->cspec->msix_entries[msixnum].dca = dca; + dd->cspec->msix_entries[msixnum].rcv = + handler == qib_7322pintr; +#endif + if (lsb >= 0) { + reg = lsb / IBA7322_REDIRECT_VEC_PER_REG; + sh = (lsb % IBA7322_REDIRECT_VEC_PER_REG) * + SYM_LSB(IntRedirect0, vec1); + mask &= ~(1ULL << lsb); + redirect[reg] |= ((u64) msixnum) << sh; + } + val = qib_read_kreg64(dd, 2 * msixnum + 1 + + (QIB_7322_MsixTable_OFFS / sizeof(u64))); + if (firstcpu < nr_cpu_ids && + zalloc_cpumask_var( + &dd->cspec->msix_entries[msixnum].mask, + GFP_KERNEL)) { + if (handler == qib_7322pintr) { + cpumask_set_cpu(currrcvcpu, + dd->cspec->msix_entries[msixnum].mask); + currrcvcpu = cpumask_next(currrcvcpu, + local_mask); + if (currrcvcpu >= nr_cpu_ids) + currrcvcpu = secondcpu; + } else { + cpumask_set_cpu(firstcpu, + dd->cspec->msix_entries[msixnum].mask); + } + irq_set_affinity_hint( + dd->cspec->msix_entries[msixnum].msix.vector, + dd->cspec->msix_entries[msixnum].mask); + } + msixnum++; + } + /* Initialize the vector mapping */ + for (i = 0; i < ARRAY_SIZE(redirect); i++) + qib_write_kreg(dd, kr_intredirect + i, redirect[i]); + dd->cspec->main_int_mask = mask; + tasklet_init(&dd->error_tasklet, qib_error_tasklet, + (unsigned long)dd); +bail:; +} + +/** + * qib_7322_boardname - fill in the board name and note features + * @dd: the qlogic_ib device + * + * info will be based on the board revision register + */ +static unsigned qib_7322_boardname(struct qib_devdata *dd) +{ + /* Will need enumeration of board-types here */ + char *n; + u32 boardid, namelen; + unsigned features = DUAL_PORT_CAP; + + boardid = SYM_FIELD(dd->revision, Revision, BoardID); + + switch (boardid) { + case 0: + n = "InfiniPath_QLE7342_Emulation"; + break; + case 1: + n = "InfiniPath_QLE7340"; + dd->flags |= QIB_HAS_QSFP; + features = PORT_SPD_CAP; + break; + case 2: + n = "InfiniPath_QLE7342"; + dd->flags |= QIB_HAS_QSFP; + break; + case 3: + n = "InfiniPath_QMI7342"; + break; + case 4: + n = "InfiniPath_Unsupported7342"; + qib_dev_err(dd, "Unsupported version of QMH7342\n"); + features = 0; + break; + case BOARD_QMH7342: + n = "InfiniPath_QMH7342"; + features = 0x24; + break; + case BOARD_QME7342: + n = "InfiniPath_QME7342"; + break; + case 8: + n = "InfiniPath_QME7362"; + dd->flags |= QIB_HAS_QSFP; + break; + case 15: + n = "InfiniPath_QLE7342_TEST"; + dd->flags |= QIB_HAS_QSFP; + break; + default: + n = "InfiniPath_QLE73xy_UNKNOWN"; + qib_dev_err(dd, "Unknown 7322 board type %u\n", boardid); + break; + } + dd->board_atten = 1; /* index into txdds_Xdr */ + + namelen = strlen(n) + 1; + dd->boardname = kmalloc(namelen, GFP_KERNEL); + if (!dd->boardname) + qib_dev_err(dd, "Failed allocation for board name: %s\n", n); + else + snprintf(dd->boardname, namelen, "%s", n); + + snprintf(dd->boardversion, sizeof(dd->boardversion), + "ChipABI %u.%u, %s, InfiniPath%u %u.%u, SW Compat %u\n", + QIB_CHIP_VERS_MAJ, QIB_CHIP_VERS_MIN, dd->boardname, + (unsigned)SYM_FIELD(dd->revision, Revision_R, Arch), + dd->majrev, dd->minrev, + (unsigned)SYM_FIELD(dd->revision, Revision_R, SW)); + + if (qib_singleport && (features >> PORT_SPD_CAP_SHIFT) & PORT_SPD_CAP) { + qib_devinfo(dd->pcidev, + "IB%u: Forced to single port mode by module parameter\n", + dd->unit); + features &= PORT_SPD_CAP; + } + + return features; +} + +/* + * This routine sleeps, so it can only be called from user context, not + * from interrupt context. + */ +static int qib_do_7322_reset(struct qib_devdata *dd) +{ + u64 val; + u64 *msix_vecsave; + int i, msix_entries, ret = 1; + u16 cmdval; + u8 int_line, clinesz; + unsigned long flags; + + /* Use dev_err so it shows up in logs, etc. */ + qib_dev_err(dd, "Resetting InfiniPath unit %u\n", dd->unit); + + qib_pcie_getcmd(dd, &cmdval, &int_line, &clinesz); + + msix_entries = dd->cspec->num_msix_entries; + + /* no interrupts till re-initted */ + qib_7322_set_intr_state(dd, 0); + + if (msix_entries) { + qib_7322_nomsix(dd); + /* can be up to 512 bytes, too big for stack */ + msix_vecsave = kmalloc(2 * dd->cspec->num_msix_entries * + sizeof(u64), GFP_KERNEL); + if (!msix_vecsave) + qib_dev_err(dd, "No mem to save MSIx data\n"); + } else + msix_vecsave = NULL; + + /* + * Core PCI (as of 2.6.18) doesn't save or rewrite the full vector + * info that is set up by the BIOS, so we have to save and restore + * it ourselves. There is some risk something could change it, + * after we save it, but since we have disabled the MSIx, it + * shouldn't be touched... + */ + for (i = 0; i < msix_entries; i++) { + u64 vecaddr, vecdata; + vecaddr = qib_read_kreg64(dd, 2 * i + + (QIB_7322_MsixTable_OFFS / sizeof(u64))); + vecdata = qib_read_kreg64(dd, 1 + 2 * i + + (QIB_7322_MsixTable_OFFS / sizeof(u64))); + if (msix_vecsave) { + msix_vecsave[2 * i] = vecaddr; + /* save it without the masked bit set */ + msix_vecsave[1 + 2 * i] = vecdata & ~0x100000000ULL; + } + } + + dd->pport->cpspec->ibdeltainprog = 0; + dd->pport->cpspec->ibsymdelta = 0; + dd->pport->cpspec->iblnkerrdelta = 0; + dd->pport->cpspec->ibmalfdelta = 0; + /* so we check interrupts work again */ + dd->z_int_counter = qib_int_counter(dd); + + /* + * Keep chip from being accessed until we are ready. Use + * writeq() directly, to allow the write even though QIB_PRESENT + * isn't set. + */ + dd->flags &= ~(QIB_INITTED | QIB_PRESENT | QIB_BADINTR); + dd->flags |= QIB_DOING_RESET; + val = dd->control | QLOGIC_IB_C_RESET; + writeq(val, &dd->kregbase[kr_control]); + + for (i = 1; i <= 5; i++) { + /* + * Allow MBIST, etc. to complete; longer on each retry. + * We sometimes get machine checks from bus timeout if no + * response, so for now, make it *really* long. + */ + msleep(1000 + (1 + i) * 3000); + + qib_pcie_reenable(dd, cmdval, int_line, clinesz); + + /* + * Use readq directly, so we don't need to mark it as PRESENT + * until we get a successful indication that all is well. + */ + val = readq(&dd->kregbase[kr_revision]); + if (val == dd->revision) + break; + if (i == 5) { + qib_dev_err(dd, + "Failed to initialize after reset, unusable\n"); + ret = 0; + goto bail; + } + } + + dd->flags |= QIB_PRESENT; /* it's back */ + + if (msix_entries) { + /* restore the MSIx vector address and data if saved above */ + for (i = 0; i < msix_entries; i++) { + dd->cspec->msix_entries[i].msix.entry = i; + if (!msix_vecsave || !msix_vecsave[2 * i]) + continue; + qib_write_kreg(dd, 2 * i + + (QIB_7322_MsixTable_OFFS / sizeof(u64)), + msix_vecsave[2 * i]); + qib_write_kreg(dd, 1 + 2 * i + + (QIB_7322_MsixTable_OFFS / sizeof(u64)), + msix_vecsave[1 + 2 * i]); + } + } + + /* initialize the remaining registers. */ + for (i = 0; i < dd->num_pports; ++i) + write_7322_init_portregs(&dd->pport[i]); + write_7322_initregs(dd); + + if (qib_pcie_params(dd, dd->lbus_width, + &dd->cspec->num_msix_entries, + dd->cspec->msix_entries)) + qib_dev_err(dd, + "Reset failed to setup PCIe or interrupts; continuing anyway\n"); + + qib_setup_7322_interrupt(dd, 1); + + for (i = 0; i < dd->num_pports; ++i) { + struct qib_pportdata *ppd = &dd->pport[i]; + + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags |= QIBL_IB_FORCE_NOTIFY; + ppd->lflags &= ~QIBL_IB_AUTONEG_FAILED; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + } + +bail: + dd->flags &= ~QIB_DOING_RESET; /* OK or not, no longer resetting */ + kfree(msix_vecsave); + return ret; +} + +/** + * qib_7322_put_tid - write a TID to the chip + * @dd: the qlogic_ib device + * @tidptr: pointer to the expected TID (in chip) to update + * @tidtype: 0 for eager, 1 for expected + * @pa: physical address of in memory buffer; tidinvalid if freeing + */ +static void qib_7322_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, + u32 type, unsigned long pa) +{ + if (!(dd->flags & QIB_PRESENT)) + return; + if (pa != dd->tidinvalid) { + u64 chippa = pa >> IBA7322_TID_PA_SHIFT; + + /* paranoia checks */ + if (pa != (chippa << IBA7322_TID_PA_SHIFT)) { + qib_dev_err(dd, "Physaddr %lx not 2KB aligned!\n", + pa); + return; + } + if (chippa >= (1UL << IBA7322_TID_SZ_SHIFT)) { + qib_dev_err(dd, + "Physical page address 0x%lx larger than supported\n", + pa); + return; + } + + if (type == RCVHQ_RCV_TYPE_EAGER) + chippa |= dd->tidtemplate; + else /* for now, always full 4KB page */ + chippa |= IBA7322_TID_SZ_4K; + pa = chippa; + } + writeq(pa, tidptr); + mmiowb(); +} + +/** + * qib_7322_clear_tids - clear all TID entries for a ctxt, expected and eager + * @dd: the qlogic_ib device + * @ctxt: the ctxt + * + * clear all TID entries for a ctxt, expected and eager. + * Used from qib_close(). + */ +static void qib_7322_clear_tids(struct qib_devdata *dd, + struct qib_ctxtdata *rcd) +{ + u64 __iomem *tidbase; + unsigned long tidinv; + u32 ctxt; + int i; + + if (!dd->kregbase || !rcd) + return; + + ctxt = rcd->ctxt; + + tidinv = dd->tidinvalid; + tidbase = (u64 __iomem *) + ((char __iomem *) dd->kregbase + + dd->rcvtidbase + + ctxt * dd->rcvtidcnt * sizeof(*tidbase)); + + for (i = 0; i < dd->rcvtidcnt; i++) + qib_7322_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED, + tidinv); + + tidbase = (u64 __iomem *) + ((char __iomem *) dd->kregbase + + dd->rcvegrbase + + rcd->rcvegr_tid_base * sizeof(*tidbase)); + + for (i = 0; i < rcd->rcvegrcnt; i++) + qib_7322_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EAGER, + tidinv); +} + +/** + * qib_7322_tidtemplate - setup constants for TID updates + * @dd: the qlogic_ib device + * + * We setup stuff that we use a lot, to avoid calculating each time + */ +static void qib_7322_tidtemplate(struct qib_devdata *dd) +{ + /* + * For now, we always allocate 4KB buffers (at init) so we can + * receive max size packets. We may want a module parameter to + * specify 2KB or 4KB and/or make it per port instead of per device + * for those who want to reduce memory footprint. Note that the + * rcvhdrentsize size must be large enough to hold the largest + * IB header (currently 96 bytes) that we expect to handle (plus of + * course the 2 dwords of RHF). + */ + if (dd->rcvegrbufsize == 2048) + dd->tidtemplate = IBA7322_TID_SZ_2K; + else if (dd->rcvegrbufsize == 4096) + dd->tidtemplate = IBA7322_TID_SZ_4K; + dd->tidinvalid = 0; +} + +/** + * qib_init_7322_get_base_info - set chip-specific flags for user code + * @rcd: the qlogic_ib ctxt + * @kbase: qib_base_info pointer + * + * We set the PCIE flag because the lower bandwidth on PCIe vs + * HyperTransport can affect some user packet algorithims. + */ + +static int qib_7322_get_base_info(struct qib_ctxtdata *rcd, + struct qib_base_info *kinfo) +{ + kinfo->spi_runtime_flags |= QIB_RUNTIME_CTXT_MSB_IN_QP | + QIB_RUNTIME_PCIE | QIB_RUNTIME_NODMA_RTAIL | + QIB_RUNTIME_HDRSUPP | QIB_RUNTIME_SDMA; + if (rcd->dd->cspec->r1) + kinfo->spi_runtime_flags |= QIB_RUNTIME_RCHK; + if (rcd->dd->flags & QIB_USE_SPCL_TRIG) + kinfo->spi_runtime_flags |= QIB_RUNTIME_SPECIAL_TRIGGER; + + return 0; +} + +static struct qib_message_header * +qib_7322_get_msgheader(struct qib_devdata *dd, __le32 *rhf_addr) +{ + u32 offset = qib_hdrget_offset(rhf_addr); + + return (struct qib_message_header *) + (rhf_addr - dd->rhf_offset + offset); +} + +/* + * Configure number of contexts. + */ +static void qib_7322_config_ctxts(struct qib_devdata *dd) +{ + unsigned long flags; + u32 nchipctxts; + + nchipctxts = qib_read_kreg32(dd, kr_contextcnt); + dd->cspec->numctxts = nchipctxts; + if (qib_n_krcv_queues > 1 && dd->num_pports) { + dd->first_user_ctxt = NUM_IB_PORTS + + (qib_n_krcv_queues - 1) * dd->num_pports; + if (dd->first_user_ctxt > nchipctxts) + dd->first_user_ctxt = nchipctxts; + dd->n_krcv_queues = dd->first_user_ctxt / dd->num_pports; + } else { + dd->first_user_ctxt = NUM_IB_PORTS; + dd->n_krcv_queues = 1; + } + + if (!qib_cfgctxts) { + int nctxts = dd->first_user_ctxt + num_online_cpus(); + + if (nctxts <= 6) + dd->ctxtcnt = 6; + else if (nctxts <= 10) + dd->ctxtcnt = 10; + else if (nctxts <= nchipctxts) + dd->ctxtcnt = nchipctxts; + } else if (qib_cfgctxts < dd->num_pports) + dd->ctxtcnt = dd->num_pports; + else if (qib_cfgctxts <= nchipctxts) + dd->ctxtcnt = qib_cfgctxts; + if (!dd->ctxtcnt) /* none of the above, set to max */ + dd->ctxtcnt = nchipctxts; + + /* + * Chip can be configured for 6, 10, or 18 ctxts, and choice + * affects number of eager TIDs per ctxt (1K, 2K, 4K). + * Lock to be paranoid about later motion, etc. + */ + spin_lock_irqsave(&dd->cspec->rcvmod_lock, flags); + if (dd->ctxtcnt > 10) + dd->rcvctrl |= 2ULL << SYM_LSB(RcvCtrl, ContextCfg); + else if (dd->ctxtcnt > 6) + dd->rcvctrl |= 1ULL << SYM_LSB(RcvCtrl, ContextCfg); + /* else configure for default 6 receive ctxts */ + + /* The XRC opcode is 5. */ + dd->rcvctrl |= 5ULL << SYM_LSB(RcvCtrl, XrcTypeCode); + + /* + * RcvCtrl *must* be written here so that the + * chip understands how to change rcvegrcnt below. + */ + qib_write_kreg(dd, kr_rcvctrl, dd->rcvctrl); + spin_unlock_irqrestore(&dd->cspec->rcvmod_lock, flags); + + /* kr_rcvegrcnt changes based on the number of contexts enabled */ + dd->cspec->rcvegrcnt = qib_read_kreg32(dd, kr_rcvegrcnt); + if (qib_rcvhdrcnt) + dd->rcvhdrcnt = max(dd->cspec->rcvegrcnt, qib_rcvhdrcnt); + else + dd->rcvhdrcnt = 2 * max(dd->cspec->rcvegrcnt, + dd->num_pports > 1 ? 1024U : 2048U); +} + +static int qib_7322_get_ib_cfg(struct qib_pportdata *ppd, int which) +{ + + int lsb, ret = 0; + u64 maskr; /* right-justified mask */ + + switch (which) { + + case QIB_IB_CFG_LWID_ENB: /* Get allowed Link-width */ + ret = ppd->link_width_enabled; + goto done; + + case QIB_IB_CFG_LWID: /* Get currently active Link-width */ + ret = ppd->link_width_active; + goto done; + + case QIB_IB_CFG_SPD_ENB: /* Get allowed Link speeds */ + ret = ppd->link_speed_enabled; + goto done; + + case QIB_IB_CFG_SPD: /* Get current Link spd */ + ret = ppd->link_speed_active; + goto done; + + case QIB_IB_CFG_RXPOL_ENB: /* Get Auto-RX-polarity enable */ + lsb = SYM_LSB(IBCCtrlB_0, IB_POLARITY_REV_SUPP); + maskr = SYM_RMASK(IBCCtrlB_0, IB_POLARITY_REV_SUPP); + break; + + case QIB_IB_CFG_LREV_ENB: /* Get Auto-Lane-reversal enable */ + lsb = SYM_LSB(IBCCtrlB_0, IB_LANE_REV_SUPPORTED); + maskr = SYM_RMASK(IBCCtrlB_0, IB_LANE_REV_SUPPORTED); + break; + + case QIB_IB_CFG_LINKLATENCY: + ret = qib_read_kreg_port(ppd, krp_ibcstatus_b) & + SYM_MASK(IBCStatusB_0, LinkRoundTripLatency); + goto done; + + case QIB_IB_CFG_OP_VLS: + ret = ppd->vls_operational; + goto done; + + case QIB_IB_CFG_VL_HIGH_CAP: + ret = 16; + goto done; + + case QIB_IB_CFG_VL_LOW_CAP: + ret = 16; + goto done; + + case QIB_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */ + ret = SYM_FIELD(ppd->cpspec->ibcctrl_a, IBCCtrlA_0, + OverrunThreshold); + goto done; + + case QIB_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */ + ret = SYM_FIELD(ppd->cpspec->ibcctrl_a, IBCCtrlA_0, + PhyerrThreshold); + goto done; + + case QIB_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */ + /* will only take effect when the link state changes */ + ret = (ppd->cpspec->ibcctrl_a & + SYM_MASK(IBCCtrlA_0, LinkDownDefaultState)) ? + IB_LINKINITCMD_SLEEP : IB_LINKINITCMD_POLL; + goto done; + + case QIB_IB_CFG_HRTBT: /* Get Heartbeat off/enable/auto */ + lsb = IBA7322_IBC_HRTBT_LSB; + maskr = IBA7322_IBC_HRTBT_RMASK; /* OR of AUTO and ENB */ + break; + + case QIB_IB_CFG_PMA_TICKS: + /* + * 0x00 = 10x link transfer rate or 4 nsec. for 2.5Gbs + * Since the clock is always 250MHz, the value is 3, 1 or 0. + */ + if (ppd->link_speed_active == QIB_IB_QDR) + ret = 3; + else if (ppd->link_speed_active == QIB_IB_DDR) + ret = 1; + else + ret = 0; + goto done; + + default: + ret = -EINVAL; + goto done; + } + ret = (int)((ppd->cpspec->ibcctrl_b >> lsb) & maskr); +done: + return ret; +} + +/* + * Below again cribbed liberally from older version. Do not lean + * heavily on it. + */ +#define IBA7322_IBC_DLIDLMC_SHIFT QIB_7322_IBCCtrlB_0_IB_DLID_LSB +#define IBA7322_IBC_DLIDLMC_MASK (QIB_7322_IBCCtrlB_0_IB_DLID_RMASK \ + | (QIB_7322_IBCCtrlB_0_IB_DLID_MASK_RMASK << 16)) + +static int qib_7322_set_ib_cfg(struct qib_pportdata *ppd, int which, u32 val) +{ + struct qib_devdata *dd = ppd->dd; + u64 maskr; /* right-justified mask */ + int lsb, ret = 0; + u16 lcmd, licmd; + unsigned long flags; + + switch (which) { + case QIB_IB_CFG_LIDLMC: + /* + * Set LID and LMC. Combined to avoid possible hazard + * caller puts LMC in 16MSbits, DLID in 16LSbits of val + */ + lsb = IBA7322_IBC_DLIDLMC_SHIFT; + maskr = IBA7322_IBC_DLIDLMC_MASK; + /* + * For header-checking, the SLID in the packet will + * be masked with SendIBSLMCMask, and compared + * with SendIBSLIDAssignMask. Make sure we do not + * set any bits not covered by the mask, or we get + * false-positives. + */ + qib_write_kreg_port(ppd, krp_sendslid, + val & (val >> 16) & SendIBSLIDAssignMask); + qib_write_kreg_port(ppd, krp_sendslidmask, + (val >> 16) & SendIBSLMCMask); + break; + + case QIB_IB_CFG_LWID_ENB: /* set allowed Link-width */ + ppd->link_width_enabled = val; + /* convert IB value to chip register value */ + if (val == IB_WIDTH_1X) + val = 0; + else if (val == IB_WIDTH_4X) + val = 1; + else + val = 3; + maskr = SYM_RMASK(IBCCtrlB_0, IB_NUM_CHANNELS); + lsb = SYM_LSB(IBCCtrlB_0, IB_NUM_CHANNELS); + break; + + case QIB_IB_CFG_SPD_ENB: /* set allowed Link speeds */ + /* + * As with width, only write the actual register if the + * link is currently down, otherwise takes effect on next + * link change. Since setting is being explicitly requested + * (via MAD or sysfs), clear autoneg failure status if speed + * autoneg is enabled. + */ + ppd->link_speed_enabled = val; + val <<= IBA7322_IBC_SPEED_LSB; + maskr = IBA7322_IBC_SPEED_MASK | IBA7322_IBC_IBTA_1_2_MASK | + IBA7322_IBC_MAX_SPEED_MASK; + if (val & (val - 1)) { + /* Muliple speeds enabled */ + val |= IBA7322_IBC_IBTA_1_2_MASK | + IBA7322_IBC_MAX_SPEED_MASK; + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_IB_AUTONEG_FAILED; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + } else if (val & IBA7322_IBC_SPEED_QDR) + val |= IBA7322_IBC_IBTA_1_2_MASK; + /* IBTA 1.2 mode + min/max + speed bits are contiguous */ + lsb = SYM_LSB(IBCCtrlB_0, IB_ENHANCED_MODE); + break; + + case QIB_IB_CFG_RXPOL_ENB: /* set Auto-RX-polarity enable */ + lsb = SYM_LSB(IBCCtrlB_0, IB_POLARITY_REV_SUPP); + maskr = SYM_RMASK(IBCCtrlB_0, IB_POLARITY_REV_SUPP); + break; + + case QIB_IB_CFG_LREV_ENB: /* set Auto-Lane-reversal enable */ + lsb = SYM_LSB(IBCCtrlB_0, IB_LANE_REV_SUPPORTED); + maskr = SYM_RMASK(IBCCtrlB_0, IB_LANE_REV_SUPPORTED); + break; + + case QIB_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */ + maskr = SYM_FIELD(ppd->cpspec->ibcctrl_a, IBCCtrlA_0, + OverrunThreshold); + if (maskr != val) { + ppd->cpspec->ibcctrl_a &= + ~SYM_MASK(IBCCtrlA_0, OverrunThreshold); + ppd->cpspec->ibcctrl_a |= (u64) val << + SYM_LSB(IBCCtrlA_0, OverrunThreshold); + qib_write_kreg_port(ppd, krp_ibcctrl_a, + ppd->cpspec->ibcctrl_a); + qib_write_kreg(dd, kr_scratch, 0ULL); + } + goto bail; + + case QIB_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */ + maskr = SYM_FIELD(ppd->cpspec->ibcctrl_a, IBCCtrlA_0, + PhyerrThreshold); + if (maskr != val) { + ppd->cpspec->ibcctrl_a &= + ~SYM_MASK(IBCCtrlA_0, PhyerrThreshold); + ppd->cpspec->ibcctrl_a |= (u64) val << + SYM_LSB(IBCCtrlA_0, PhyerrThreshold); + qib_write_kreg_port(ppd, krp_ibcctrl_a, + ppd->cpspec->ibcctrl_a); + qib_write_kreg(dd, kr_scratch, 0ULL); + } + goto bail; + + case QIB_IB_CFG_PKEYS: /* update pkeys */ + maskr = (u64) ppd->pkeys[0] | ((u64) ppd->pkeys[1] << 16) | + ((u64) ppd->pkeys[2] << 32) | + ((u64) ppd->pkeys[3] << 48); + qib_write_kreg_port(ppd, krp_partitionkey, maskr); + goto bail; + + case QIB_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */ + /* will only take effect when the link state changes */ + if (val == IB_LINKINITCMD_POLL) + ppd->cpspec->ibcctrl_a &= + ~SYM_MASK(IBCCtrlA_0, LinkDownDefaultState); + else /* SLEEP */ + ppd->cpspec->ibcctrl_a |= + SYM_MASK(IBCCtrlA_0, LinkDownDefaultState); + qib_write_kreg_port(ppd, krp_ibcctrl_a, ppd->cpspec->ibcctrl_a); + qib_write_kreg(dd, kr_scratch, 0ULL); + goto bail; + + case QIB_IB_CFG_MTU: /* update the MTU in IBC */ + /* + * Update our housekeeping variables, and set IBC max + * size, same as init code; max IBC is max we allow in + * buffer, less the qword pbc, plus 1 for ICRC, in dwords + * Set even if it's unchanged, print debug message only + * on changes. + */ + val = (ppd->ibmaxlen >> 2) + 1; + ppd->cpspec->ibcctrl_a &= ~SYM_MASK(IBCCtrlA_0, MaxPktLen); + ppd->cpspec->ibcctrl_a |= (u64)val << + SYM_LSB(IBCCtrlA_0, MaxPktLen); + qib_write_kreg_port(ppd, krp_ibcctrl_a, + ppd->cpspec->ibcctrl_a); + qib_write_kreg(dd, kr_scratch, 0ULL); + goto bail; + + case QIB_IB_CFG_LSTATE: /* set the IB link state */ + switch (val & 0xffff0000) { + case IB_LINKCMD_DOWN: + lcmd = QLOGIC_IB_IBCC_LINKCMD_DOWN; + ppd->cpspec->ibmalfusesnap = 1; + ppd->cpspec->ibmalfsnap = read_7322_creg32_port(ppd, + crp_errlink); + if (!ppd->cpspec->ibdeltainprog && + qib_compat_ddr_negotiate) { + ppd->cpspec->ibdeltainprog = 1; + ppd->cpspec->ibsymsnap = + read_7322_creg32_port(ppd, + crp_ibsymbolerr); + ppd->cpspec->iblnkerrsnap = + read_7322_creg32_port(ppd, + crp_iblinkerrrecov); + } + break; + + case IB_LINKCMD_ARMED: + lcmd = QLOGIC_IB_IBCC_LINKCMD_ARMED; + if (ppd->cpspec->ibmalfusesnap) { + ppd->cpspec->ibmalfusesnap = 0; + ppd->cpspec->ibmalfdelta += + read_7322_creg32_port(ppd, + crp_errlink) - + ppd->cpspec->ibmalfsnap; + } + break; + + case IB_LINKCMD_ACTIVE: + lcmd = QLOGIC_IB_IBCC_LINKCMD_ACTIVE; + break; + + default: + ret = -EINVAL; + qib_dev_err(dd, "bad linkcmd req 0x%x\n", val >> 16); + goto bail; + } + switch (val & 0xffff) { + case IB_LINKINITCMD_NOP: + licmd = 0; + break; + + case IB_LINKINITCMD_POLL: + licmd = QLOGIC_IB_IBCC_LINKINITCMD_POLL; + break; + + case IB_LINKINITCMD_SLEEP: + licmd = QLOGIC_IB_IBCC_LINKINITCMD_SLEEP; + break; + + case IB_LINKINITCMD_DISABLE: + licmd = QLOGIC_IB_IBCC_LINKINITCMD_DISABLE; + ppd->cpspec->chase_end = 0; + /* + * stop state chase counter and timer, if running. + * wait forpending timer, but don't clear .data (ppd)! + */ + if (ppd->cpspec->chase_timer.expires) { + del_timer_sync(&ppd->cpspec->chase_timer); + ppd->cpspec->chase_timer.expires = 0; + } + break; + + default: + ret = -EINVAL; + qib_dev_err(dd, "bad linkinitcmd req 0x%x\n", + val & 0xffff); + goto bail; + } + qib_set_ib_7322_lstate(ppd, lcmd, licmd); + goto bail; + + case QIB_IB_CFG_OP_VLS: + if (ppd->vls_operational != val) { + ppd->vls_operational = val; + set_vls(ppd); + } + goto bail; + + case QIB_IB_CFG_VL_HIGH_LIMIT: + qib_write_kreg_port(ppd, krp_highprio_limit, val); + goto bail; + + case QIB_IB_CFG_HRTBT: /* set Heartbeat off/enable/auto */ + if (val > 3) { + ret = -EINVAL; + goto bail; + } + lsb = IBA7322_IBC_HRTBT_LSB; + maskr = IBA7322_IBC_HRTBT_RMASK; /* OR of AUTO and ENB */ + break; + + case QIB_IB_CFG_PORT: + /* val is the port number of the switch we are connected to. */ + if (ppd->dd->cspec->r1) { + cancel_delayed_work(&ppd->cpspec->ipg_work); + ppd->cpspec->ipg_tries = 0; + } + goto bail; + + default: + ret = -EINVAL; + goto bail; + } + ppd->cpspec->ibcctrl_b &= ~(maskr << lsb); + ppd->cpspec->ibcctrl_b |= (((u64) val & maskr) << lsb); + qib_write_kreg_port(ppd, krp_ibcctrl_b, ppd->cpspec->ibcctrl_b); + qib_write_kreg(dd, kr_scratch, 0); +bail: + return ret; +} + +static int qib_7322_set_loopback(struct qib_pportdata *ppd, const char *what) +{ + int ret = 0; + u64 val, ctrlb; + + /* only IBC loopback, may add serdes and xgxs loopbacks later */ + if (!strncmp(what, "ibc", 3)) { + ppd->cpspec->ibcctrl_a |= SYM_MASK(IBCCtrlA_0, + Loopback); + val = 0; /* disable heart beat, so link will come up */ + qib_devinfo(ppd->dd->pcidev, "Enabling IB%u:%u IBC loopback\n", + ppd->dd->unit, ppd->port); + } else if (!strncmp(what, "off", 3)) { + ppd->cpspec->ibcctrl_a &= ~SYM_MASK(IBCCtrlA_0, + Loopback); + /* enable heart beat again */ + val = IBA7322_IBC_HRTBT_RMASK << IBA7322_IBC_HRTBT_LSB; + qib_devinfo(ppd->dd->pcidev, + "Disabling IB%u:%u IBC loopback (normal)\n", + ppd->dd->unit, ppd->port); + } else + ret = -EINVAL; + if (!ret) { + qib_write_kreg_port(ppd, krp_ibcctrl_a, + ppd->cpspec->ibcctrl_a); + ctrlb = ppd->cpspec->ibcctrl_b & ~(IBA7322_IBC_HRTBT_MASK + << IBA7322_IBC_HRTBT_LSB); + ppd->cpspec->ibcctrl_b = ctrlb | val; + qib_write_kreg_port(ppd, krp_ibcctrl_b, + ppd->cpspec->ibcctrl_b); + qib_write_kreg(ppd->dd, kr_scratch, 0); + } + return ret; +} + +static void get_vl_weights(struct qib_pportdata *ppd, unsigned regno, + struct ib_vl_weight_elem *vl) +{ + unsigned i; + + for (i = 0; i < 16; i++, regno++, vl++) { + u32 val = qib_read_kreg_port(ppd, regno); + + vl->vl = (val >> SYM_LSB(LowPriority0_0, VirtualLane)) & + SYM_RMASK(LowPriority0_0, VirtualLane); + vl->weight = (val >> SYM_LSB(LowPriority0_0, Weight)) & + SYM_RMASK(LowPriority0_0, Weight); + } +} + +static void set_vl_weights(struct qib_pportdata *ppd, unsigned regno, + struct ib_vl_weight_elem *vl) +{ + unsigned i; + + for (i = 0; i < 16; i++, regno++, vl++) { + u64 val; + + val = ((vl->vl & SYM_RMASK(LowPriority0_0, VirtualLane)) << + SYM_LSB(LowPriority0_0, VirtualLane)) | + ((vl->weight & SYM_RMASK(LowPriority0_0, Weight)) << + SYM_LSB(LowPriority0_0, Weight)); + qib_write_kreg_port(ppd, regno, val); + } + if (!(ppd->p_sendctrl & SYM_MASK(SendCtrl_0, IBVLArbiterEn))) { + struct qib_devdata *dd = ppd->dd; + unsigned long flags; + + spin_lock_irqsave(&dd->sendctrl_lock, flags); + ppd->p_sendctrl |= SYM_MASK(SendCtrl_0, IBVLArbiterEn); + qib_write_kreg_port(ppd, krp_sendctrl, ppd->p_sendctrl); + qib_write_kreg(dd, kr_scratch, 0); + spin_unlock_irqrestore(&dd->sendctrl_lock, flags); + } +} + +static int qib_7322_get_ib_table(struct qib_pportdata *ppd, int which, void *t) +{ + switch (which) { + case QIB_IB_TBL_VL_HIGH_ARB: + get_vl_weights(ppd, krp_highprio_0, t); + break; + + case QIB_IB_TBL_VL_LOW_ARB: + get_vl_weights(ppd, krp_lowprio_0, t); + break; + + default: + return -EINVAL; + } + return 0; +} + +static int qib_7322_set_ib_table(struct qib_pportdata *ppd, int which, void *t) +{ + switch (which) { + case QIB_IB_TBL_VL_HIGH_ARB: + set_vl_weights(ppd, krp_highprio_0, t); + break; + + case QIB_IB_TBL_VL_LOW_ARB: + set_vl_weights(ppd, krp_lowprio_0, t); + break; + + default: + return -EINVAL; + } + return 0; +} + +static void qib_update_7322_usrhead(struct qib_ctxtdata *rcd, u64 hd, + u32 updegr, u32 egrhd, u32 npkts) +{ + /* + * Need to write timeout register before updating rcvhdrhead to ensure + * that the timer is enabled on reception of a packet. + */ + if (hd >> IBA7322_HDRHEAD_PKTINT_SHIFT) + adjust_rcv_timeout(rcd, npkts); + if (updegr) + qib_write_ureg(rcd->dd, ur_rcvegrindexhead, egrhd, rcd->ctxt); + mmiowb(); + qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt); + qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt); + mmiowb(); +} + +static u32 qib_7322_hdrqempty(struct qib_ctxtdata *rcd) +{ + u32 head, tail; + + head = qib_read_ureg32(rcd->dd, ur_rcvhdrhead, rcd->ctxt); + if (rcd->rcvhdrtail_kvaddr) + tail = qib_get_rcvhdrtail(rcd); + else + tail = qib_read_ureg32(rcd->dd, ur_rcvhdrtail, rcd->ctxt); + return head == tail; +} + +#define RCVCTRL_COMMON_MODS (QIB_RCVCTRL_CTXT_ENB | \ + QIB_RCVCTRL_CTXT_DIS | \ + QIB_RCVCTRL_TIDFLOW_ENB | \ + QIB_RCVCTRL_TIDFLOW_DIS | \ + QIB_RCVCTRL_TAILUPD_ENB | \ + QIB_RCVCTRL_TAILUPD_DIS | \ + QIB_RCVCTRL_INTRAVAIL_ENB | \ + QIB_RCVCTRL_INTRAVAIL_DIS | \ + QIB_RCVCTRL_BP_ENB | \ + QIB_RCVCTRL_BP_DIS) + +#define RCVCTRL_PORT_MODS (QIB_RCVCTRL_CTXT_ENB | \ + QIB_RCVCTRL_CTXT_DIS | \ + QIB_RCVCTRL_PKEY_DIS | \ + QIB_RCVCTRL_PKEY_ENB) + +/* + * Modify the RCVCTRL register in chip-specific way. This + * is a function because bit positions and (future) register + * location is chip-specifc, but the needed operations are + * generic. <op> is a bit-mask because we often want to + * do multiple modifications. + */ +static void rcvctrl_7322_mod(struct qib_pportdata *ppd, unsigned int op, + int ctxt) +{ + struct qib_devdata *dd = ppd->dd; + struct qib_ctxtdata *rcd; + u64 mask, val; + unsigned long flags; + + spin_lock_irqsave(&dd->cspec->rcvmod_lock, flags); + + if (op & QIB_RCVCTRL_TIDFLOW_ENB) + dd->rcvctrl |= SYM_MASK(RcvCtrl, TidFlowEnable); + if (op & QIB_RCVCTRL_TIDFLOW_DIS) + dd->rcvctrl &= ~SYM_MASK(RcvCtrl, TidFlowEnable); + if (op & QIB_RCVCTRL_TAILUPD_ENB) + dd->rcvctrl |= SYM_MASK(RcvCtrl, TailUpd); + if (op & QIB_RCVCTRL_TAILUPD_DIS) + dd->rcvctrl &= ~SYM_MASK(RcvCtrl, TailUpd); + if (op & QIB_RCVCTRL_PKEY_ENB) + ppd->p_rcvctrl &= ~SYM_MASK(RcvCtrl_0, RcvPartitionKeyDisable); + if (op & QIB_RCVCTRL_PKEY_DIS) + ppd->p_rcvctrl |= SYM_MASK(RcvCtrl_0, RcvPartitionKeyDisable); + if (ctxt < 0) { + mask = (1ULL << dd->ctxtcnt) - 1; + rcd = NULL; + } else { + mask = (1ULL << ctxt); + rcd = dd->rcd[ctxt]; + } + if ((op & QIB_RCVCTRL_CTXT_ENB) && rcd) { + ppd->p_rcvctrl |= + (mask << SYM_LSB(RcvCtrl_0, ContextEnableKernel)); + if (!(dd->flags & QIB_NODMA_RTAIL)) { + op |= QIB_RCVCTRL_TAILUPD_ENB; /* need reg write */ + dd->rcvctrl |= SYM_MASK(RcvCtrl, TailUpd); + } + /* Write these registers before the context is enabled. */ + qib_write_kreg_ctxt(dd, krc_rcvhdrtailaddr, ctxt, + rcd->rcvhdrqtailaddr_phys); + qib_write_kreg_ctxt(dd, krc_rcvhdraddr, ctxt, + rcd->rcvhdrq_phys); + rcd->seq_cnt = 1; + } + if (op & QIB_RCVCTRL_CTXT_DIS) + ppd->p_rcvctrl &= + ~(mask << SYM_LSB(RcvCtrl_0, ContextEnableKernel)); + if (op & QIB_RCVCTRL_BP_ENB) + dd->rcvctrl |= mask << SYM_LSB(RcvCtrl, dontDropRHQFull); + if (op & QIB_RCVCTRL_BP_DIS) + dd->rcvctrl &= ~(mask << SYM_LSB(RcvCtrl, dontDropRHQFull)); + if (op & QIB_RCVCTRL_INTRAVAIL_ENB) + dd->rcvctrl |= (mask << SYM_LSB(RcvCtrl, IntrAvail)); + if (op & QIB_RCVCTRL_INTRAVAIL_DIS) + dd->rcvctrl &= ~(mask << SYM_LSB(RcvCtrl, IntrAvail)); + /* + * Decide which registers to write depending on the ops enabled. + * Special case is "flush" (no bits set at all) + * which needs to write both. + */ + if (op == 0 || (op & RCVCTRL_COMMON_MODS)) + qib_write_kreg(dd, kr_rcvctrl, dd->rcvctrl); + if (op == 0 || (op & RCVCTRL_PORT_MODS)) + qib_write_kreg_port(ppd, krp_rcvctrl, ppd->p_rcvctrl); + if ((op & QIB_RCVCTRL_CTXT_ENB) && dd->rcd[ctxt]) { + /* + * Init the context registers also; if we were + * disabled, tail and head should both be zero + * already from the enable, but since we don't + * know, we have to do it explicitly. + */ + val = qib_read_ureg32(dd, ur_rcvegrindextail, ctxt); + qib_write_ureg(dd, ur_rcvegrindexhead, val, ctxt); + + /* be sure enabling write seen; hd/tl should be 0 */ + (void) qib_read_kreg32(dd, kr_scratch); + val = qib_read_ureg32(dd, ur_rcvhdrtail, ctxt); + dd->rcd[ctxt]->head = val; + /* If kctxt, interrupt on next receive. */ + if (ctxt < dd->first_user_ctxt) + val |= dd->rhdrhead_intr_off; + qib_write_ureg(dd, ur_rcvhdrhead, val, ctxt); + } else if ((op & QIB_RCVCTRL_INTRAVAIL_ENB) && + dd->rcd[ctxt] && dd->rhdrhead_intr_off) { + /* arm rcv interrupt */ + val = dd->rcd[ctxt]->head | dd->rhdrhead_intr_off; + qib_write_ureg(dd, ur_rcvhdrhead, val, ctxt); + } + if (op & QIB_RCVCTRL_CTXT_DIS) { + unsigned f; + + /* Now that the context is disabled, clear these registers. */ + if (ctxt >= 0) { + qib_write_kreg_ctxt(dd, krc_rcvhdrtailaddr, ctxt, 0); + qib_write_kreg_ctxt(dd, krc_rcvhdraddr, ctxt, 0); + for (f = 0; f < NUM_TIDFLOWS_CTXT; f++) + qib_write_ureg(dd, ur_rcvflowtable + f, + TIDFLOW_ERRBITS, ctxt); + } else { + unsigned i; + + for (i = 0; i < dd->cfgctxts; i++) { + qib_write_kreg_ctxt(dd, krc_rcvhdrtailaddr, + i, 0); + qib_write_kreg_ctxt(dd, krc_rcvhdraddr, i, 0); + for (f = 0; f < NUM_TIDFLOWS_CTXT; f++) + qib_write_ureg(dd, ur_rcvflowtable + f, + TIDFLOW_ERRBITS, i); + } + } + } + spin_unlock_irqrestore(&dd->cspec->rcvmod_lock, flags); +} + +/* + * Modify the SENDCTRL register in chip-specific way. This + * is a function where there are multiple such registers with + * slightly different layouts. + * The chip doesn't allow back-to-back sendctrl writes, so write + * the scratch register after writing sendctrl. + * + * Which register is written depends on the operation. + * Most operate on the common register, while + * SEND_ENB and SEND_DIS operate on the per-port ones. + * SEND_ENB is included in common because it can change SPCL_TRIG + */ +#define SENDCTRL_COMMON_MODS (\ + QIB_SENDCTRL_CLEAR | \ + QIB_SENDCTRL_AVAIL_DIS | \ + QIB_SENDCTRL_AVAIL_ENB | \ + QIB_SENDCTRL_AVAIL_BLIP | \ + QIB_SENDCTRL_DISARM | \ + QIB_SENDCTRL_DISARM_ALL | \ + QIB_SENDCTRL_SEND_ENB) + +#define SENDCTRL_PORT_MODS (\ + QIB_SENDCTRL_CLEAR | \ + QIB_SENDCTRL_SEND_ENB | \ + QIB_SENDCTRL_SEND_DIS | \ + QIB_SENDCTRL_FLUSH) + +static void sendctrl_7322_mod(struct qib_pportdata *ppd, u32 op) +{ + struct qib_devdata *dd = ppd->dd; + u64 tmp_dd_sendctrl; + unsigned long flags; + + spin_lock_irqsave(&dd->sendctrl_lock, flags); + + /* First the dd ones that are "sticky", saved in shadow */ + if (op & QIB_SENDCTRL_CLEAR) + dd->sendctrl = 0; + if (op & QIB_SENDCTRL_AVAIL_DIS) + dd->sendctrl &= ~SYM_MASK(SendCtrl, SendBufAvailUpd); + else if (op & QIB_SENDCTRL_AVAIL_ENB) { + dd->sendctrl |= SYM_MASK(SendCtrl, SendBufAvailUpd); + if (dd->flags & QIB_USE_SPCL_TRIG) + dd->sendctrl |= SYM_MASK(SendCtrl, SpecialTriggerEn); + } + + /* Then the ppd ones that are "sticky", saved in shadow */ + if (op & QIB_SENDCTRL_SEND_DIS) + ppd->p_sendctrl &= ~SYM_MASK(SendCtrl_0, SendEnable); + else if (op & QIB_SENDCTRL_SEND_ENB) + ppd->p_sendctrl |= SYM_MASK(SendCtrl_0, SendEnable); + + if (op & QIB_SENDCTRL_DISARM_ALL) { + u32 i, last; + + tmp_dd_sendctrl = dd->sendctrl; + last = dd->piobcnt2k + dd->piobcnt4k + NUM_VL15_BUFS; + /* + * Disarm any buffers that are not yet launched, + * disabling updates until done. + */ + tmp_dd_sendctrl &= ~SYM_MASK(SendCtrl, SendBufAvailUpd); + for (i = 0; i < last; i++) { + qib_write_kreg(dd, kr_sendctrl, + tmp_dd_sendctrl | + SYM_MASK(SendCtrl, Disarm) | i); + qib_write_kreg(dd, kr_scratch, 0); + } + } + + if (op & QIB_SENDCTRL_FLUSH) { + u64 tmp_ppd_sendctrl = ppd->p_sendctrl; + + /* + * Now drain all the fifos. The Abort bit should never be + * needed, so for now, at least, we don't use it. + */ + tmp_ppd_sendctrl |= + SYM_MASK(SendCtrl_0, TxeDrainRmFifo) | + SYM_MASK(SendCtrl_0, TxeDrainLaFifo) | + SYM_MASK(SendCtrl_0, TxeBypassIbc); + qib_write_kreg_port(ppd, krp_sendctrl, tmp_ppd_sendctrl); + qib_write_kreg(dd, kr_scratch, 0); + } + + tmp_dd_sendctrl = dd->sendctrl; + + if (op & QIB_SENDCTRL_DISARM) + tmp_dd_sendctrl |= SYM_MASK(SendCtrl, Disarm) | + ((op & QIB_7322_SendCtrl_DisarmSendBuf_RMASK) << + SYM_LSB(SendCtrl, DisarmSendBuf)); + if ((op & QIB_SENDCTRL_AVAIL_BLIP) && + (dd->sendctrl & SYM_MASK(SendCtrl, SendBufAvailUpd))) + tmp_dd_sendctrl &= ~SYM_MASK(SendCtrl, SendBufAvailUpd); + + if (op == 0 || (op & SENDCTRL_COMMON_MODS)) { + qib_write_kreg(dd, kr_sendctrl, tmp_dd_sendctrl); + qib_write_kreg(dd, kr_scratch, 0); + } + + if (op == 0 || (op & SENDCTRL_PORT_MODS)) { + qib_write_kreg_port(ppd, krp_sendctrl, ppd->p_sendctrl); + qib_write_kreg(dd, kr_scratch, 0); + } + + if (op & QIB_SENDCTRL_AVAIL_BLIP) { + qib_write_kreg(dd, kr_sendctrl, dd->sendctrl); + qib_write_kreg(dd, kr_scratch, 0); + } + + spin_unlock_irqrestore(&dd->sendctrl_lock, flags); + + if (op & QIB_SENDCTRL_FLUSH) { + u32 v; + /* + * ensure writes have hit chip, then do a few + * more reads, to allow DMA of pioavail registers + * to occur, so in-memory copy is in sync with + * the chip. Not always safe to sleep. + */ + v = qib_read_kreg32(dd, kr_scratch); + qib_write_kreg(dd, kr_scratch, v); + v = qib_read_kreg32(dd, kr_scratch); + qib_write_kreg(dd, kr_scratch, v); + qib_read_kreg32(dd, kr_scratch); + } +} + +#define _PORT_VIRT_FLAG 0x8000U /* "virtual", need adjustments */ +#define _PORT_64BIT_FLAG 0x10000U /* not "virtual", but 64bit */ +#define _PORT_CNTR_IDXMASK 0x7fffU /* mask off flags above */ + +/** + * qib_portcntr_7322 - read a per-port chip counter + * @ppd: the qlogic_ib pport + * @creg: the counter to read (not a chip offset) + */ +static u64 qib_portcntr_7322(struct qib_pportdata *ppd, u32 reg) +{ + struct qib_devdata *dd = ppd->dd; + u64 ret = 0ULL; + u16 creg; + /* 0xffff for unimplemented or synthesized counters */ + static const u32 xlator[] = { + [QIBPORTCNTR_PKTSEND] = crp_pktsend | _PORT_64BIT_FLAG, + [QIBPORTCNTR_WORDSEND] = crp_wordsend | _PORT_64BIT_FLAG, + [QIBPORTCNTR_PSXMITDATA] = crp_psxmitdatacount, + [QIBPORTCNTR_PSXMITPKTS] = crp_psxmitpktscount, + [QIBPORTCNTR_PSXMITWAIT] = crp_psxmitwaitcount, + [QIBPORTCNTR_SENDSTALL] = crp_sendstall, + [QIBPORTCNTR_PKTRCV] = crp_pktrcv | _PORT_64BIT_FLAG, + [QIBPORTCNTR_PSRCVDATA] = crp_psrcvdatacount, + [QIBPORTCNTR_PSRCVPKTS] = crp_psrcvpktscount, + [QIBPORTCNTR_RCVEBP] = crp_rcvebp, + [QIBPORTCNTR_RCVOVFL] = crp_rcvovfl, + [QIBPORTCNTR_WORDRCV] = crp_wordrcv | _PORT_64BIT_FLAG, + [QIBPORTCNTR_RXDROPPKT] = 0xffff, /* not needed for 7322 */ + [QIBPORTCNTR_RXLOCALPHYERR] = crp_rxotherlocalphyerr, + [QIBPORTCNTR_RXVLERR] = crp_rxvlerr, + [QIBPORTCNTR_ERRICRC] = crp_erricrc, + [QIBPORTCNTR_ERRVCRC] = crp_errvcrc, + [QIBPORTCNTR_ERRLPCRC] = crp_errlpcrc, + [QIBPORTCNTR_BADFORMAT] = crp_badformat, + [QIBPORTCNTR_ERR_RLEN] = crp_err_rlen, + [QIBPORTCNTR_IBSYMBOLERR] = crp_ibsymbolerr, + [QIBPORTCNTR_INVALIDRLEN] = crp_invalidrlen, + [QIBPORTCNTR_UNSUPVL] = crp_txunsupvl, + [QIBPORTCNTR_EXCESSBUFOVFL] = crp_excessbufferovfl, + [QIBPORTCNTR_ERRLINK] = crp_errlink, + [QIBPORTCNTR_IBLINKDOWN] = crp_iblinkdown, + [QIBPORTCNTR_IBLINKERRRECOV] = crp_iblinkerrrecov, + [QIBPORTCNTR_LLI] = crp_locallinkintegrityerr, + [QIBPORTCNTR_VL15PKTDROP] = crp_vl15droppedpkt, + [QIBPORTCNTR_ERRPKEY] = crp_errpkey, + /* + * the next 3 aren't really counters, but were implemented + * as counters in older chips, so still get accessed as + * though they were counters from this code. + */ + [QIBPORTCNTR_PSINTERVAL] = krp_psinterval, + [QIBPORTCNTR_PSSTART] = krp_psstart, + [QIBPORTCNTR_PSSTAT] = krp_psstat, + /* pseudo-counter, summed for all ports */ + [QIBPORTCNTR_KHDROVFL] = 0xffff, + }; + + if (reg >= ARRAY_SIZE(xlator)) { + qib_devinfo(ppd->dd->pcidev, + "Unimplemented portcounter %u\n", reg); + goto done; + } + creg = xlator[reg] & _PORT_CNTR_IDXMASK; + + /* handle non-counters and special cases first */ + if (reg == QIBPORTCNTR_KHDROVFL) { + int i; + + /* sum over all kernel contexts (skip if mini_init) */ + for (i = 0; dd->rcd && i < dd->first_user_ctxt; i++) { + struct qib_ctxtdata *rcd = dd->rcd[i]; + + if (!rcd || rcd->ppd != ppd) + continue; + ret += read_7322_creg32(dd, cr_base_egrovfl + i); + } + goto done; + } else if (reg == QIBPORTCNTR_RXDROPPKT) { + /* + * Used as part of the synthesis of port_rcv_errors + * in the verbs code for IBTA counters. Not needed for 7322, + * because all the errors are already counted by other cntrs. + */ + goto done; + } else if (reg == QIBPORTCNTR_PSINTERVAL || + reg == QIBPORTCNTR_PSSTART || reg == QIBPORTCNTR_PSSTAT) { + /* were counters in older chips, now per-port kernel regs */ + ret = qib_read_kreg_port(ppd, creg); + goto done; + } + + /* + * Only fast increment counters are 64 bits; use 32 bit reads to + * avoid two independent reads when on Opteron. + */ + if (xlator[reg] & _PORT_64BIT_FLAG) + ret = read_7322_creg_port(ppd, creg); + else + ret = read_7322_creg32_port(ppd, creg); + if (creg == crp_ibsymbolerr) { + if (ppd->cpspec->ibdeltainprog) + ret -= ret - ppd->cpspec->ibsymsnap; + ret -= ppd->cpspec->ibsymdelta; + } else if (creg == crp_iblinkerrrecov) { + if (ppd->cpspec->ibdeltainprog) + ret -= ret - ppd->cpspec->iblnkerrsnap; + ret -= ppd->cpspec->iblnkerrdelta; + } else if (creg == crp_errlink) + ret -= ppd->cpspec->ibmalfdelta; + else if (creg == crp_iblinkdown) + ret += ppd->cpspec->iblnkdowndelta; +done: + return ret; +} + +/* + * Device counter names (not port-specific), one line per stat, + * single string. Used by utilities like ipathstats to print the stats + * in a way which works for different versions of drivers, without changing + * the utility. Names need to be 12 chars or less (w/o newline), for proper + * display by utility. + * Non-error counters are first. + * Start of "error" conters is indicated by a leading "E " on the first + * "error" counter, and doesn't count in label length. + * The EgrOvfl list needs to be last so we truncate them at the configured + * context count for the device. + * cntr7322indices contains the corresponding register indices. + */ +static const char cntr7322names[] = + "Interrupts\n" + "HostBusStall\n" + "E RxTIDFull\n" + "RxTIDInvalid\n" + "RxTIDFloDrop\n" /* 7322 only */ + "Ctxt0EgrOvfl\n" + "Ctxt1EgrOvfl\n" + "Ctxt2EgrOvfl\n" + "Ctxt3EgrOvfl\n" + "Ctxt4EgrOvfl\n" + "Ctxt5EgrOvfl\n" + "Ctxt6EgrOvfl\n" + "Ctxt7EgrOvfl\n" + "Ctxt8EgrOvfl\n" + "Ctxt9EgrOvfl\n" + "Ctx10EgrOvfl\n" + "Ctx11EgrOvfl\n" + "Ctx12EgrOvfl\n" + "Ctx13EgrOvfl\n" + "Ctx14EgrOvfl\n" + "Ctx15EgrOvfl\n" + "Ctx16EgrOvfl\n" + "Ctx17EgrOvfl\n" + ; + +static const u32 cntr7322indices[] = { + cr_lbint | _PORT_64BIT_FLAG, + cr_lbstall | _PORT_64BIT_FLAG, + cr_tidfull, + cr_tidinvalid, + cr_rxtidflowdrop, + cr_base_egrovfl + 0, + cr_base_egrovfl + 1, + cr_base_egrovfl + 2, + cr_base_egrovfl + 3, + cr_base_egrovfl + 4, + cr_base_egrovfl + 5, + cr_base_egrovfl + 6, + cr_base_egrovfl + 7, + cr_base_egrovfl + 8, + cr_base_egrovfl + 9, + cr_base_egrovfl + 10, + cr_base_egrovfl + 11, + cr_base_egrovfl + 12, + cr_base_egrovfl + 13, + cr_base_egrovfl + 14, + cr_base_egrovfl + 15, + cr_base_egrovfl + 16, + cr_base_egrovfl + 17, +}; + +/* + * same as cntr7322names and cntr7322indices, but for port-specific counters. + * portcntr7322indices is somewhat complicated by some registers needing + * adjustments of various kinds, and those are ORed with _PORT_VIRT_FLAG + */ +static const char portcntr7322names[] = + "TxPkt\n" + "TxFlowPkt\n" + "TxWords\n" + "RxPkt\n" + "RxFlowPkt\n" + "RxWords\n" + "TxFlowStall\n" + "TxDmaDesc\n" /* 7220 and 7322-only */ + "E RxDlidFltr\n" /* 7220 and 7322-only */ + "IBStatusChng\n" + "IBLinkDown\n" + "IBLnkRecov\n" + "IBRxLinkErr\n" + "IBSymbolErr\n" + "RxLLIErr\n" + "RxBadFormat\n" + "RxBadLen\n" + "RxBufOvrfl\n" + "RxEBP\n" + "RxFlowCtlErr\n" + "RxICRCerr\n" + "RxLPCRCerr\n" + "RxVCRCerr\n" + "RxInvalLen\n" + "RxInvalPKey\n" + "RxPktDropped\n" + "TxBadLength\n" + "TxDropped\n" + "TxInvalLen\n" + "TxUnderrun\n" + "TxUnsupVL\n" + "RxLclPhyErr\n" /* 7220 and 7322-only from here down */ + "RxVL15Drop\n" + "RxVlErr\n" + "XcessBufOvfl\n" + "RxQPBadCtxt\n" /* 7322-only from here down */ + "TXBadHeader\n" + ; + +static const u32 portcntr7322indices[] = { + QIBPORTCNTR_PKTSEND | _PORT_VIRT_FLAG, + crp_pktsendflow, + QIBPORTCNTR_WORDSEND | _PORT_VIRT_FLAG, + QIBPORTCNTR_PKTRCV | _PORT_VIRT_FLAG, + crp_pktrcvflowctrl, + QIBPORTCNTR_WORDRCV | _PORT_VIRT_FLAG, + QIBPORTCNTR_SENDSTALL | _PORT_VIRT_FLAG, + crp_txsdmadesc | _PORT_64BIT_FLAG, + crp_rxdlidfltr, + crp_ibstatuschange, + QIBPORTCNTR_IBLINKDOWN | _PORT_VIRT_FLAG, + QIBPORTCNTR_IBLINKERRRECOV | _PORT_VIRT_FLAG, + QIBPORTCNTR_ERRLINK | _PORT_VIRT_FLAG, + QIBPORTCNTR_IBSYMBOLERR | _PORT_VIRT_FLAG, + QIBPORTCNTR_LLI | _PORT_VIRT_FLAG, + QIBPORTCNTR_BADFORMAT | _PORT_VIRT_FLAG, + QIBPORTCNTR_ERR_RLEN | _PORT_VIRT_FLAG, + QIBPORTCNTR_RCVOVFL | _PORT_VIRT_FLAG, + QIBPORTCNTR_RCVEBP | _PORT_VIRT_FLAG, + crp_rcvflowctrlviol, + QIBPORTCNTR_ERRICRC | _PORT_VIRT_FLAG, + QIBPORTCNTR_ERRLPCRC | _PORT_VIRT_FLAG, + QIBPORTCNTR_ERRVCRC | _PORT_VIRT_FLAG, + QIBPORTCNTR_INVALIDRLEN | _PORT_VIRT_FLAG, + QIBPORTCNTR_ERRPKEY | _PORT_VIRT_FLAG, + QIBPORTCNTR_RXDROPPKT | _PORT_VIRT_FLAG, + crp_txminmaxlenerr, + crp_txdroppedpkt, + crp_txlenerr, + crp_txunderrun, + crp_txunsupvl, + QIBPORTCNTR_RXLOCALPHYERR | _PORT_VIRT_FLAG, + QIBPORTCNTR_VL15PKTDROP | _PORT_VIRT_FLAG, + QIBPORTCNTR_RXVLERR | _PORT_VIRT_FLAG, + QIBPORTCNTR_EXCESSBUFOVFL | _PORT_VIRT_FLAG, + crp_rxqpinvalidctxt, + crp_txhdrerr, +}; + +/* do all the setup to make the counter reads efficient later */ +static void init_7322_cntrnames(struct qib_devdata *dd) +{ + int i, j = 0; + char *s; + + for (i = 0, s = (char *)cntr7322names; s && j <= dd->cfgctxts; + i++) { + /* we always have at least one counter before the egrovfl */ + if (!j && !strncmp("Ctxt0EgrOvfl", s + 1, 12)) + j = 1; + s = strchr(s + 1, '\n'); + if (s && j) + j++; + } + dd->cspec->ncntrs = i; + if (!s) + /* full list; size is without terminating null */ + dd->cspec->cntrnamelen = sizeof(cntr7322names) - 1; + else + dd->cspec->cntrnamelen = 1 + s - cntr7322names; + dd->cspec->cntrs = kmalloc(dd->cspec->ncntrs + * sizeof(u64), GFP_KERNEL); + if (!dd->cspec->cntrs) + qib_dev_err(dd, "Failed allocation for counters\n"); + + for (i = 0, s = (char *)portcntr7322names; s; i++) + s = strchr(s + 1, '\n'); + dd->cspec->nportcntrs = i - 1; + dd->cspec->portcntrnamelen = sizeof(portcntr7322names) - 1; + for (i = 0; i < dd->num_pports; ++i) { + dd->pport[i].cpspec->portcntrs = kmalloc(dd->cspec->nportcntrs + * sizeof(u64), GFP_KERNEL); + if (!dd->pport[i].cpspec->portcntrs) + qib_dev_err(dd, + "Failed allocation for portcounters\n"); + } +} + +static u32 qib_read_7322cntrs(struct qib_devdata *dd, loff_t pos, char **namep, + u64 **cntrp) +{ + u32 ret; + + if (namep) { + ret = dd->cspec->cntrnamelen; + if (pos >= ret) + ret = 0; /* final read after getting everything */ + else + *namep = (char *) cntr7322names; + } else { + u64 *cntr = dd->cspec->cntrs; + int i; + + ret = dd->cspec->ncntrs * sizeof(u64); + if (!cntr || pos >= ret) { + /* everything read, or couldn't get memory */ + ret = 0; + goto done; + } + *cntrp = cntr; + for (i = 0; i < dd->cspec->ncntrs; i++) + if (cntr7322indices[i] & _PORT_64BIT_FLAG) + *cntr++ = read_7322_creg(dd, + cntr7322indices[i] & + _PORT_CNTR_IDXMASK); + else + *cntr++ = read_7322_creg32(dd, + cntr7322indices[i]); + } +done: + return ret; +} + +static u32 qib_read_7322portcntrs(struct qib_devdata *dd, loff_t pos, u32 port, + char **namep, u64 **cntrp) +{ + u32 ret; + + if (namep) { + ret = dd->cspec->portcntrnamelen; + if (pos >= ret) + ret = 0; /* final read after getting everything */ + else + *namep = (char *)portcntr7322names; + } else { + struct qib_pportdata *ppd = &dd->pport[port]; + u64 *cntr = ppd->cpspec->portcntrs; + int i; + + ret = dd->cspec->nportcntrs * sizeof(u64); + if (!cntr || pos >= ret) { + /* everything read, or couldn't get memory */ + ret = 0; + goto done; + } + *cntrp = cntr; + for (i = 0; i < dd->cspec->nportcntrs; i++) { + if (portcntr7322indices[i] & _PORT_VIRT_FLAG) + *cntr++ = qib_portcntr_7322(ppd, + portcntr7322indices[i] & + _PORT_CNTR_IDXMASK); + else if (portcntr7322indices[i] & _PORT_64BIT_FLAG) + *cntr++ = read_7322_creg_port(ppd, + portcntr7322indices[i] & + _PORT_CNTR_IDXMASK); + else + *cntr++ = read_7322_creg32_port(ppd, + portcntr7322indices[i]); + } + } +done: + return ret; +} + +/** + * qib_get_7322_faststats - get word counters from chip before they overflow + * @opaque - contains a pointer to the qlogic_ib device qib_devdata + * + * VESTIGIAL IBA7322 has no "small fast counters", so the only + * real purpose of this function is to maintain the notion of + * "active time", which in turn is only logged into the eeprom, + * which we don;t have, yet, for 7322-based boards. + * + * called from add_timer + */ +static void qib_get_7322_faststats(unsigned long opaque) +{ + struct qib_devdata *dd = (struct qib_devdata *) opaque; + struct qib_pportdata *ppd; + unsigned long flags; + u64 traffic_wds; + int pidx; + + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + + /* + * If port isn't enabled or not operational ports, or + * diags is running (can cause memory diags to fail) + * skip this port this time. + */ + if (!ppd->link_speed_supported || !(dd->flags & QIB_INITTED) + || dd->diag_client) + continue; + + /* + * Maintain an activity timer, based on traffic + * exceeding a threshold, so we need to check the word-counts + * even if they are 64-bit. + */ + traffic_wds = qib_portcntr_7322(ppd, QIBPORTCNTR_WORDRCV) + + qib_portcntr_7322(ppd, QIBPORTCNTR_WORDSEND); + spin_lock_irqsave(&ppd->dd->eep_st_lock, flags); + traffic_wds -= ppd->dd->traffic_wds; + ppd->dd->traffic_wds += traffic_wds; + if (traffic_wds >= QIB_TRAFFIC_ACTIVE_THRESHOLD) + atomic_add(ACTIVITY_TIMER, &ppd->dd->active_time); + spin_unlock_irqrestore(&ppd->dd->eep_st_lock, flags); + if (ppd->cpspec->qdr_dfe_on && (ppd->link_speed_active & + QIB_IB_QDR) && + (ppd->lflags & (QIBL_LINKINIT | QIBL_LINKARMED | + QIBL_LINKACTIVE)) && + ppd->cpspec->qdr_dfe_time && + time_is_before_jiffies(ppd->cpspec->qdr_dfe_time)) { + ppd->cpspec->qdr_dfe_on = 0; + + qib_write_kreg_port(ppd, krp_static_adapt_dis(2), + ppd->dd->cspec->r1 ? + QDR_STATIC_ADAPT_INIT_R1 : + QDR_STATIC_ADAPT_INIT); + force_h1(ppd); + } + } + mod_timer(&dd->stats_timer, jiffies + HZ * ACTIVITY_TIMER); +} + +/* + * If we were using MSIx, try to fallback to INTx. + */ +static int qib_7322_intr_fallback(struct qib_devdata *dd) +{ + if (!dd->cspec->num_msix_entries) + return 0; /* already using INTx */ + + qib_devinfo(dd->pcidev, + "MSIx interrupt not detected, trying INTx interrupts\n"); + qib_7322_nomsix(dd); + qib_enable_intx(dd->pcidev); + qib_setup_7322_interrupt(dd, 0); + return 1; +} + +/* + * Reset the XGXS (between serdes and IBC). Slightly less intrusive + * than resetting the IBC or external link state, and useful in some + * cases to cause some retraining. To do this right, we reset IBC + * as well, then return to previous state (which may be still in reset) + * NOTE: some callers of this "know" this writes the current value + * of cpspec->ibcctrl_a as part of it's operation, so if that changes, + * check all callers. + */ +static void qib_7322_mini_pcs_reset(struct qib_pportdata *ppd) +{ + u64 val; + struct qib_devdata *dd = ppd->dd; + const u64 reset_bits = SYM_MASK(IBPCSConfig_0, xcv_rreset) | + SYM_MASK(IBPCSConfig_0, xcv_treset) | + SYM_MASK(IBPCSConfig_0, tx_rx_reset); + + val = qib_read_kreg_port(ppd, krp_ib_pcsconfig); + qib_write_kreg(dd, kr_hwerrmask, + dd->cspec->hwerrmask & ~HWE_MASK(statusValidNoEop)); + qib_write_kreg_port(ppd, krp_ibcctrl_a, + ppd->cpspec->ibcctrl_a & + ~SYM_MASK(IBCCtrlA_0, IBLinkEn)); + + qib_write_kreg_port(ppd, krp_ib_pcsconfig, val | reset_bits); + qib_read_kreg32(dd, kr_scratch); + qib_write_kreg_port(ppd, krp_ib_pcsconfig, val & ~reset_bits); + qib_write_kreg_port(ppd, krp_ibcctrl_a, ppd->cpspec->ibcctrl_a); + qib_write_kreg(dd, kr_scratch, 0ULL); + qib_write_kreg(dd, kr_hwerrclear, + SYM_MASK(HwErrClear, statusValidNoEopClear)); + qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); +} + +/* + * This code for non-IBTA-compliant IB speed negotiation is only known to + * work for the SDR to DDR transition, and only between an HCA and a switch + * with recent firmware. It is based on observed heuristics, rather than + * actual knowledge of the non-compliant speed negotiation. + * It has a number of hard-coded fields, since the hope is to rewrite this + * when a spec is available on how the negoation is intended to work. + */ +static void autoneg_7322_sendpkt(struct qib_pportdata *ppd, u32 *hdr, + u32 dcnt, u32 *data) +{ + int i; + u64 pbc; + u32 __iomem *piobuf; + u32 pnum, control, len; + struct qib_devdata *dd = ppd->dd; + + i = 0; + len = 7 + dcnt + 1; /* 7 dword header, dword data, icrc */ + control = qib_7322_setpbc_control(ppd, len, 0, 15); + pbc = ((u64) control << 32) | len; + while (!(piobuf = qib_7322_getsendbuf(ppd, pbc, &pnum))) { + if (i++ > 15) + return; + udelay(2); + } + /* disable header check on this packet, since it can't be valid */ + dd->f_txchk_change(dd, pnum, 1, TXCHK_CHG_TYPE_DIS1, NULL); + writeq(pbc, piobuf); + qib_flush_wc(); + qib_pio_copy(piobuf + 2, hdr, 7); + qib_pio_copy(piobuf + 9, data, dcnt); + if (dd->flags & QIB_USE_SPCL_TRIG) { + u32 spcl_off = (pnum >= dd->piobcnt2k) ? 2047 : 1023; + + qib_flush_wc(); + __raw_writel(0xaebecede, piobuf + spcl_off); + } + qib_flush_wc(); + qib_sendbuf_done(dd, pnum); + /* and re-enable hdr check */ + dd->f_txchk_change(dd, pnum, 1, TXCHK_CHG_TYPE_ENAB1, NULL); +} + +/* + * _start packet gets sent twice at start, _done gets sent twice at end + */ +static void qib_autoneg_7322_send(struct qib_pportdata *ppd, int which) +{ + struct qib_devdata *dd = ppd->dd; + static u32 swapped; + u32 dw, i, hcnt, dcnt, *data; + static u32 hdr[7] = { 0xf002ffff, 0x48ffff, 0x6400abba }; + static u32 madpayload_start[0x40] = { + 0x1810103, 0x1, 0x0, 0x0, 0x2c90000, 0x2c9, 0x0, 0x0, + 0xffffffff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x1, 0x1388, 0x15e, 0x1, /* rest 0's */ + }; + static u32 madpayload_done[0x40] = { + 0x1810103, 0x1, 0x0, 0x0, 0x2c90000, 0x2c9, 0x0, 0x0, + 0xffffffff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x40000001, 0x1388, 0x15e, /* rest 0's */ + }; + + dcnt = ARRAY_SIZE(madpayload_start); + hcnt = ARRAY_SIZE(hdr); + if (!swapped) { + /* for maintainability, do it at runtime */ + for (i = 0; i < hcnt; i++) { + dw = (__force u32) cpu_to_be32(hdr[i]); + hdr[i] = dw; + } + for (i = 0; i < dcnt; i++) { + dw = (__force u32) cpu_to_be32(madpayload_start[i]); + madpayload_start[i] = dw; + dw = (__force u32) cpu_to_be32(madpayload_done[i]); + madpayload_done[i] = dw; + } + swapped = 1; + } + + data = which ? madpayload_done : madpayload_start; + + autoneg_7322_sendpkt(ppd, hdr, dcnt, data); + qib_read_kreg64(dd, kr_scratch); + udelay(2); + autoneg_7322_sendpkt(ppd, hdr, dcnt, data); + qib_read_kreg64(dd, kr_scratch); + udelay(2); +} + +/* + * Do the absolute minimum to cause an IB speed change, and make it + * ready, but don't actually trigger the change. The caller will + * do that when ready (if link is in Polling training state, it will + * happen immediately, otherwise when link next goes down) + * + * This routine should only be used as part of the DDR autonegotation + * code for devices that are not compliant with IB 1.2 (or code that + * fixes things up for same). + * + * When link has gone down, and autoneg enabled, or autoneg has + * failed and we give up until next time we set both speeds, and + * then we want IBTA enabled as well as "use max enabled speed. + */ +static void set_7322_ibspeed_fast(struct qib_pportdata *ppd, u32 speed) +{ + u64 newctrlb; + newctrlb = ppd->cpspec->ibcctrl_b & ~(IBA7322_IBC_SPEED_MASK | + IBA7322_IBC_IBTA_1_2_MASK | + IBA7322_IBC_MAX_SPEED_MASK); + + if (speed & (speed - 1)) /* multiple speeds */ + newctrlb |= (speed << IBA7322_IBC_SPEED_LSB) | + IBA7322_IBC_IBTA_1_2_MASK | + IBA7322_IBC_MAX_SPEED_MASK; + else + newctrlb |= speed == QIB_IB_QDR ? + IBA7322_IBC_SPEED_QDR | IBA7322_IBC_IBTA_1_2_MASK : + ((speed == QIB_IB_DDR ? + IBA7322_IBC_SPEED_DDR : IBA7322_IBC_SPEED_SDR)); + + if (newctrlb == ppd->cpspec->ibcctrl_b) + return; + + ppd->cpspec->ibcctrl_b = newctrlb; + qib_write_kreg_port(ppd, krp_ibcctrl_b, ppd->cpspec->ibcctrl_b); + qib_write_kreg(ppd->dd, kr_scratch, 0); +} + +/* + * This routine is only used when we are not talking to another + * IB 1.2-compliant device that we think can do DDR. + * (This includes all existing switch chips as of Oct 2007.) + * 1.2-compliant devices go directly to DDR prior to reaching INIT + */ +static void try_7322_autoneg(struct qib_pportdata *ppd) +{ + unsigned long flags; + + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags |= QIBL_IB_AUTONEG_INPROG; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + qib_autoneg_7322_send(ppd, 0); + set_7322_ibspeed_fast(ppd, QIB_IB_DDR); + qib_7322_mini_pcs_reset(ppd); + /* 2 msec is minimum length of a poll cycle */ + queue_delayed_work(ib_wq, &ppd->cpspec->autoneg_work, + msecs_to_jiffies(2)); +} + +/* + * Handle the empirically determined mechanism for auto-negotiation + * of DDR speed with switches. + */ +static void autoneg_7322_work(struct work_struct *work) +{ + struct qib_pportdata *ppd; + struct qib_devdata *dd; + u64 startms; + u32 i; + unsigned long flags; + + ppd = container_of(work, struct qib_chippport_specific, + autoneg_work.work)->ppd; + dd = ppd->dd; + + startms = jiffies_to_msecs(jiffies); + + /* + * Busy wait for this first part, it should be at most a + * few hundred usec, since we scheduled ourselves for 2msec. + */ + for (i = 0; i < 25; i++) { + if (SYM_FIELD(ppd->lastibcstat, IBCStatusA_0, LinkState) + == IB_7322_LT_STATE_POLLQUIET) { + qib_set_linkstate(ppd, QIB_IB_LINKDOWN_DISABLE); + break; + } + udelay(100); + } + + if (!(ppd->lflags & QIBL_IB_AUTONEG_INPROG)) + goto done; /* we got there early or told to stop */ + + /* we expect this to timeout */ + if (wait_event_timeout(ppd->cpspec->autoneg_wait, + !(ppd->lflags & QIBL_IB_AUTONEG_INPROG), + msecs_to_jiffies(90))) + goto done; + qib_7322_mini_pcs_reset(ppd); + + /* we expect this to timeout */ + if (wait_event_timeout(ppd->cpspec->autoneg_wait, + !(ppd->lflags & QIBL_IB_AUTONEG_INPROG), + msecs_to_jiffies(1700))) + goto done; + qib_7322_mini_pcs_reset(ppd); + + set_7322_ibspeed_fast(ppd, QIB_IB_SDR); + + /* + * Wait up to 250 msec for link to train and get to INIT at DDR; + * this should terminate early. + */ + wait_event_timeout(ppd->cpspec->autoneg_wait, + !(ppd->lflags & QIBL_IB_AUTONEG_INPROG), + msecs_to_jiffies(250)); +done: + if (ppd->lflags & QIBL_IB_AUTONEG_INPROG) { + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_IB_AUTONEG_INPROG; + if (ppd->cpspec->autoneg_tries == AUTONEG_TRIES) { + ppd->lflags |= QIBL_IB_AUTONEG_FAILED; + ppd->cpspec->autoneg_tries = 0; + } + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + set_7322_ibspeed_fast(ppd, ppd->link_speed_enabled); + } +} + +/* + * This routine is used to request IPG set in the QLogic switch. + * Only called if r1. + */ +static void try_7322_ipg(struct qib_pportdata *ppd) +{ + struct qib_ibport *ibp = &ppd->ibport_data; + struct ib_mad_send_buf *send_buf; + struct ib_mad_agent *agent; + struct ib_smp *smp; + unsigned delay; + int ret; + + agent = ibp->send_agent; + if (!agent) + goto retry; + + send_buf = ib_create_send_mad(agent, 0, 0, 0, IB_MGMT_MAD_HDR, + IB_MGMT_MAD_DATA, GFP_ATOMIC); + if (IS_ERR(send_buf)) + goto retry; + + if (!ibp->smi_ah) { + struct ib_ah *ah; + + ah = qib_create_qp0_ah(ibp, be16_to_cpu(IB_LID_PERMISSIVE)); + if (IS_ERR(ah)) + ret = PTR_ERR(ah); + else { + send_buf->ah = ah; + ibp->smi_ah = to_iah(ah); + ret = 0; + } + } else { + send_buf->ah = &ibp->smi_ah->ibah; + ret = 0; + } + + smp = send_buf->mad; + smp->base_version = IB_MGMT_BASE_VERSION; + smp->mgmt_class = IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE; + smp->class_version = 1; + smp->method = IB_MGMT_METHOD_SEND; + smp->hop_cnt = 1; + smp->attr_id = QIB_VENDOR_IPG; + smp->attr_mod = 0; + + if (!ret) + ret = ib_post_send_mad(send_buf, NULL); + if (ret) + ib_free_send_mad(send_buf); +retry: + delay = 2 << ppd->cpspec->ipg_tries; + queue_delayed_work(ib_wq, &ppd->cpspec->ipg_work, + msecs_to_jiffies(delay)); +} + +/* + * Timeout handler for setting IPG. + * Only called if r1. + */ +static void ipg_7322_work(struct work_struct *work) +{ + struct qib_pportdata *ppd; + + ppd = container_of(work, struct qib_chippport_specific, + ipg_work.work)->ppd; + if ((ppd->lflags & (QIBL_LINKINIT | QIBL_LINKARMED | QIBL_LINKACTIVE)) + && ++ppd->cpspec->ipg_tries <= 10) + try_7322_ipg(ppd); +} + +static u32 qib_7322_iblink_state(u64 ibcs) +{ + u32 state = (u32)SYM_FIELD(ibcs, IBCStatusA_0, LinkState); + + switch (state) { + case IB_7322_L_STATE_INIT: + state = IB_PORT_INIT; + break; + case IB_7322_L_STATE_ARM: + state = IB_PORT_ARMED; + break; + case IB_7322_L_STATE_ACTIVE: + /* fall through */ + case IB_7322_L_STATE_ACT_DEFER: + state = IB_PORT_ACTIVE; + break; + default: /* fall through */ + case IB_7322_L_STATE_DOWN: + state = IB_PORT_DOWN; + break; + } + return state; +} + +/* returns the IBTA port state, rather than the IBC link training state */ +static u8 qib_7322_phys_portstate(u64 ibcs) +{ + u8 state = (u8)SYM_FIELD(ibcs, IBCStatusA_0, LinkTrainingState); + return qib_7322_physportstate[state]; +} + +static int qib_7322_ib_updown(struct qib_pportdata *ppd, int ibup, u64 ibcs) +{ + int ret = 0, symadj = 0; + unsigned long flags; + int mult; + + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_IB_FORCE_NOTIFY; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + + /* Update our picture of width and speed from chip */ + if (ibcs & SYM_MASK(IBCStatusA_0, LinkSpeedQDR)) { + ppd->link_speed_active = QIB_IB_QDR; + mult = 4; + } else if (ibcs & SYM_MASK(IBCStatusA_0, LinkSpeedActive)) { + ppd->link_speed_active = QIB_IB_DDR; + mult = 2; + } else { + ppd->link_speed_active = QIB_IB_SDR; + mult = 1; + } + if (ibcs & SYM_MASK(IBCStatusA_0, LinkWidthActive)) { + ppd->link_width_active = IB_WIDTH_4X; + mult *= 4; + } else + ppd->link_width_active = IB_WIDTH_1X; + ppd->delay_mult = ib_rate_to_delay[mult_to_ib_rate(mult)]; + + if (!ibup) { + u64 clr; + + /* Link went down. */ + /* do IPG MAD again after linkdown, even if last time failed */ + ppd->cpspec->ipg_tries = 0; + clr = qib_read_kreg_port(ppd, krp_ibcstatus_b) & + (SYM_MASK(IBCStatusB_0, heartbeat_timed_out) | + SYM_MASK(IBCStatusB_0, heartbeat_crosstalk)); + if (clr) + qib_write_kreg_port(ppd, krp_ibcstatus_b, clr); + if (!(ppd->lflags & (QIBL_IB_AUTONEG_FAILED | + QIBL_IB_AUTONEG_INPROG))) + set_7322_ibspeed_fast(ppd, ppd->link_speed_enabled); + if (!(ppd->lflags & QIBL_IB_AUTONEG_INPROG)) { + struct qib_qsfp_data *qd = + &ppd->cpspec->qsfp_data; + /* unlock the Tx settings, speed may change */ + qib_write_kreg_port(ppd, krp_tx_deemph_override, + SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, + reset_tx_deemphasis_override)); + qib_cancel_sends(ppd); + /* on link down, ensure sane pcs state */ + qib_7322_mini_pcs_reset(ppd); + /* schedule the qsfp refresh which should turn the link + off */ + if (ppd->dd->flags & QIB_HAS_QSFP) { + qd->t_insert = jiffies; + queue_work(ib_wq, &qd->work); + } + spin_lock_irqsave(&ppd->sdma_lock, flags); + if (__qib_sdma_running(ppd)) + __qib_sdma_process_event(ppd, + qib_sdma_event_e70_go_idle); + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + } + clr = read_7322_creg32_port(ppd, crp_iblinkdown); + if (clr == ppd->cpspec->iblnkdownsnap) + ppd->cpspec->iblnkdowndelta++; + } else { + if (qib_compat_ddr_negotiate && + !(ppd->lflags & (QIBL_IB_AUTONEG_FAILED | + QIBL_IB_AUTONEG_INPROG)) && + ppd->link_speed_active == QIB_IB_SDR && + (ppd->link_speed_enabled & QIB_IB_DDR) + && ppd->cpspec->autoneg_tries < AUTONEG_TRIES) { + /* we are SDR, and auto-negotiation enabled */ + ++ppd->cpspec->autoneg_tries; + if (!ppd->cpspec->ibdeltainprog) { + ppd->cpspec->ibdeltainprog = 1; + ppd->cpspec->ibsymdelta += + read_7322_creg32_port(ppd, + crp_ibsymbolerr) - + ppd->cpspec->ibsymsnap; + ppd->cpspec->iblnkerrdelta += + read_7322_creg32_port(ppd, + crp_iblinkerrrecov) - + ppd->cpspec->iblnkerrsnap; + } + try_7322_autoneg(ppd); + ret = 1; /* no other IB status change processing */ + } else if ((ppd->lflags & QIBL_IB_AUTONEG_INPROG) && + ppd->link_speed_active == QIB_IB_SDR) { + qib_autoneg_7322_send(ppd, 1); + set_7322_ibspeed_fast(ppd, QIB_IB_DDR); + qib_7322_mini_pcs_reset(ppd); + udelay(2); + ret = 1; /* no other IB status change processing */ + } else if ((ppd->lflags & QIBL_IB_AUTONEG_INPROG) && + (ppd->link_speed_active & QIB_IB_DDR)) { + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~(QIBL_IB_AUTONEG_INPROG | + QIBL_IB_AUTONEG_FAILED); + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + ppd->cpspec->autoneg_tries = 0; + /* re-enable SDR, for next link down */ + set_7322_ibspeed_fast(ppd, ppd->link_speed_enabled); + wake_up(&ppd->cpspec->autoneg_wait); + symadj = 1; + } else if (ppd->lflags & QIBL_IB_AUTONEG_FAILED) { + /* + * Clear autoneg failure flag, and do setup + * so we'll try next time link goes down and + * back to INIT (possibly connected to a + * different device). + */ + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_IB_AUTONEG_FAILED; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + ppd->cpspec->ibcctrl_b |= IBA7322_IBC_IBTA_1_2_MASK; + symadj = 1; + } + if (!(ppd->lflags & QIBL_IB_AUTONEG_INPROG)) { + symadj = 1; + if (ppd->dd->cspec->r1 && ppd->cpspec->ipg_tries <= 10) + try_7322_ipg(ppd); + if (!ppd->cpspec->recovery_init) + setup_7322_link_recovery(ppd, 0); + ppd->cpspec->qdr_dfe_time = jiffies + + msecs_to_jiffies(QDR_DFE_DISABLE_DELAY); + } + ppd->cpspec->ibmalfusesnap = 0; + ppd->cpspec->ibmalfsnap = read_7322_creg32_port(ppd, + crp_errlink); + } + if (symadj) { + ppd->cpspec->iblnkdownsnap = + read_7322_creg32_port(ppd, crp_iblinkdown); + if (ppd->cpspec->ibdeltainprog) { + ppd->cpspec->ibdeltainprog = 0; + ppd->cpspec->ibsymdelta += read_7322_creg32_port(ppd, + crp_ibsymbolerr) - ppd->cpspec->ibsymsnap; + ppd->cpspec->iblnkerrdelta += read_7322_creg32_port(ppd, + crp_iblinkerrrecov) - ppd->cpspec->iblnkerrsnap; + } + } else if (!ibup && qib_compat_ddr_negotiate && + !ppd->cpspec->ibdeltainprog && + !(ppd->lflags & QIBL_IB_AUTONEG_INPROG)) { + ppd->cpspec->ibdeltainprog = 1; + ppd->cpspec->ibsymsnap = read_7322_creg32_port(ppd, + crp_ibsymbolerr); + ppd->cpspec->iblnkerrsnap = read_7322_creg32_port(ppd, + crp_iblinkerrrecov); + } + + if (!ret) + qib_setup_7322_setextled(ppd, ibup); + return ret; +} + +/* + * Does read/modify/write to appropriate registers to + * set output and direction bits selected by mask. + * these are in their canonical postions (e.g. lsb of + * dir will end up in D48 of extctrl on existing chips). + * returns contents of GP Inputs. + */ +static int gpio_7322_mod(struct qib_devdata *dd, u32 out, u32 dir, u32 mask) +{ + u64 read_val, new_out; + unsigned long flags; + + if (mask) { + /* some bits being written, lock access to GPIO */ + dir &= mask; + out &= mask; + spin_lock_irqsave(&dd->cspec->gpio_lock, flags); + dd->cspec->extctrl &= ~((u64)mask << SYM_LSB(EXTCtrl, GPIOOe)); + dd->cspec->extctrl |= ((u64) dir << SYM_LSB(EXTCtrl, GPIOOe)); + new_out = (dd->cspec->gpio_out & ~mask) | out; + + qib_write_kreg(dd, kr_extctrl, dd->cspec->extctrl); + qib_write_kreg(dd, kr_gpio_out, new_out); + dd->cspec->gpio_out = new_out; + spin_unlock_irqrestore(&dd->cspec->gpio_lock, flags); + } + /* + * It is unlikely that a read at this time would get valid + * data on a pin whose direction line was set in the same + * call to this function. We include the read here because + * that allows us to potentially combine a change on one pin with + * a read on another, and because the old code did something like + * this. + */ + read_val = qib_read_kreg64(dd, kr_extstatus); + return SYM_FIELD(read_val, EXTStatus, GPIOIn); +} + +/* Enable writes to config EEPROM, if possible. Returns previous state */ +static int qib_7322_eeprom_wen(struct qib_devdata *dd, int wen) +{ + int prev_wen; + u32 mask; + + mask = 1 << QIB_EEPROM_WEN_NUM; + prev_wen = ~gpio_7322_mod(dd, 0, 0, 0) >> QIB_EEPROM_WEN_NUM; + gpio_7322_mod(dd, wen ? 0 : mask, mask, mask); + + return prev_wen & 1; +} + +/* + * Read fundamental info we need to use the chip. These are + * the registers that describe chip capabilities, and are + * saved in shadow registers. + */ +static void get_7322_chip_params(struct qib_devdata *dd) +{ + u64 val; + u32 piobufs; + int mtu; + + dd->palign = qib_read_kreg32(dd, kr_pagealign); + + dd->uregbase = qib_read_kreg32(dd, kr_userregbase); + + dd->rcvtidcnt = qib_read_kreg32(dd, kr_rcvtidcnt); + dd->rcvtidbase = qib_read_kreg32(dd, kr_rcvtidbase); + dd->rcvegrbase = qib_read_kreg32(dd, kr_rcvegrbase); + dd->piobufbase = qib_read_kreg64(dd, kr_sendpiobufbase); + dd->pio2k_bufbase = dd->piobufbase & 0xffffffff; + + val = qib_read_kreg64(dd, kr_sendpiobufcnt); + dd->piobcnt2k = val & ~0U; + dd->piobcnt4k = val >> 32; + val = qib_read_kreg64(dd, kr_sendpiosize); + dd->piosize2k = val & ~0U; + dd->piosize4k = val >> 32; + + mtu = ib_mtu_enum_to_int(qib_ibmtu); + if (mtu == -1) + mtu = QIB_DEFAULT_MTU; + dd->pport[0].ibmtu = (u32)mtu; + dd->pport[1].ibmtu = (u32)mtu; + + /* these may be adjusted in init_chip_wc_pat() */ + dd->pio2kbase = (u32 __iomem *) + ((char __iomem *) dd->kregbase + dd->pio2k_bufbase); + dd->pio4kbase = (u32 __iomem *) + ((char __iomem *) dd->kregbase + + (dd->piobufbase >> 32)); + /* + * 4K buffers take 2 pages; we use roundup just to be + * paranoid; we calculate it once here, rather than on + * ever buf allocate + */ + dd->align4k = ALIGN(dd->piosize4k, dd->palign); + + piobufs = dd->piobcnt4k + dd->piobcnt2k + NUM_VL15_BUFS; + + dd->pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2) / + (sizeof(u64) * BITS_PER_BYTE / 2); +} + +/* + * The chip base addresses in cspec and cpspec have to be set + * after possible init_chip_wc_pat(), rather than in + * get_7322_chip_params(), so split out as separate function + */ +static void qib_7322_set_baseaddrs(struct qib_devdata *dd) +{ + u32 cregbase; + cregbase = qib_read_kreg32(dd, kr_counterregbase); + + dd->cspec->cregbase = (u64 __iomem *)(cregbase + + (char __iomem *)dd->kregbase); + + dd->egrtidbase = (u64 __iomem *) + ((char __iomem *) dd->kregbase + dd->rcvegrbase); + + /* port registers are defined as relative to base of chip */ + dd->pport[0].cpspec->kpregbase = + (u64 __iomem *)((char __iomem *)dd->kregbase); + dd->pport[1].cpspec->kpregbase = + (u64 __iomem *)(dd->palign + + (char __iomem *)dd->kregbase); + dd->pport[0].cpspec->cpregbase = + (u64 __iomem *)(qib_read_kreg_port(&dd->pport[0], + kr_counterregbase) + (char __iomem *)dd->kregbase); + dd->pport[1].cpspec->cpregbase = + (u64 __iomem *)(qib_read_kreg_port(&dd->pport[1], + kr_counterregbase) + (char __iomem *)dd->kregbase); +} + +/* + * This is a fairly special-purpose observer, so we only support + * the port-specific parts of SendCtrl + */ + +#define SENDCTRL_SHADOWED (SYM_MASK(SendCtrl_0, SendEnable) | \ + SYM_MASK(SendCtrl_0, SDmaEnable) | \ + SYM_MASK(SendCtrl_0, SDmaIntEnable) | \ + SYM_MASK(SendCtrl_0, SDmaSingleDescriptor) | \ + SYM_MASK(SendCtrl_0, SDmaHalt) | \ + SYM_MASK(SendCtrl_0, IBVLArbiterEn) | \ + SYM_MASK(SendCtrl_0, ForceCreditUpToDate)) + +static int sendctrl_hook(struct qib_devdata *dd, + const struct diag_observer *op, u32 offs, + u64 *data, u64 mask, int only_32) +{ + unsigned long flags; + unsigned idx; + unsigned pidx; + struct qib_pportdata *ppd = NULL; + u64 local_data, all_bits; + + /* + * The fixed correspondence between Physical ports and pports is + * severed. We need to hunt for the ppd that corresponds + * to the offset we got. And we have to do that without admitting + * we know the stride, apparently. + */ + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + u64 __iomem *psptr; + u32 psoffs; + + ppd = dd->pport + pidx; + if (!ppd->cpspec->kpregbase) + continue; + + psptr = ppd->cpspec->kpregbase + krp_sendctrl; + psoffs = (u32) (psptr - dd->kregbase) * sizeof(*psptr); + if (psoffs == offs) + break; + } + + /* If pport is not being managed by driver, just avoid shadows. */ + if (pidx >= dd->num_pports) + ppd = NULL; + + /* In any case, "idx" is flat index in kreg space */ + idx = offs / sizeof(u64); + + all_bits = ~0ULL; + if (only_32) + all_bits >>= 32; + + spin_lock_irqsave(&dd->sendctrl_lock, flags); + if (!ppd || (mask & all_bits) != all_bits) { + /* + * At least some mask bits are zero, so we need + * to read. The judgement call is whether from + * reg or shadow. First-cut: read reg, and complain + * if any bits which should be shadowed are different + * from their shadowed value. + */ + if (only_32) + local_data = (u64)qib_read_kreg32(dd, idx); + else + local_data = qib_read_kreg64(dd, idx); + *data = (local_data & ~mask) | (*data & mask); + } + if (mask) { + /* + * At least some mask bits are one, so we need + * to write, but only shadow some bits. + */ + u64 sval, tval; /* Shadowed, transient */ + + /* + * New shadow val is bits we don't want to touch, + * ORed with bits we do, that are intended for shadow. + */ + if (ppd) { + sval = ppd->p_sendctrl & ~mask; + sval |= *data & SENDCTRL_SHADOWED & mask; + ppd->p_sendctrl = sval; + } else + sval = *data & SENDCTRL_SHADOWED & mask; + tval = sval | (*data & ~SENDCTRL_SHADOWED & mask); + qib_write_kreg(dd, idx, tval); + qib_write_kreg(dd, kr_scratch, 0Ull); + } + spin_unlock_irqrestore(&dd->sendctrl_lock, flags); + return only_32 ? 4 : 8; +} + +static const struct diag_observer sendctrl_0_observer = { + sendctrl_hook, KREG_IDX(SendCtrl_0) * sizeof(u64), + KREG_IDX(SendCtrl_0) * sizeof(u64) +}; + +static const struct diag_observer sendctrl_1_observer = { + sendctrl_hook, KREG_IDX(SendCtrl_1) * sizeof(u64), + KREG_IDX(SendCtrl_1) * sizeof(u64) +}; + +static ushort sdma_fetch_prio = 8; +module_param_named(sdma_fetch_prio, sdma_fetch_prio, ushort, S_IRUGO); +MODULE_PARM_DESC(sdma_fetch_prio, "SDMA descriptor fetch priority"); + +/* Besides logging QSFP events, we set appropriate TxDDS values */ +static void init_txdds_table(struct qib_pportdata *ppd, int override); + +static void qsfp_7322_event(struct work_struct *work) +{ + struct qib_qsfp_data *qd; + struct qib_pportdata *ppd; + unsigned long pwrup; + unsigned long flags; + int ret; + u32 le2; + + qd = container_of(work, struct qib_qsfp_data, work); + ppd = qd->ppd; + pwrup = qd->t_insert + + msecs_to_jiffies(QSFP_PWR_LAG_MSEC - QSFP_MODPRS_LAG_MSEC); + + /* Delay for 20 msecs to allow ModPrs resistor to setup */ + mdelay(QSFP_MODPRS_LAG_MSEC); + + if (!qib_qsfp_mod_present(ppd)) { + ppd->cpspec->qsfp_data.modpresent = 0; + /* Set the physical link to disabled */ + qib_set_ib_7322_lstate(ppd, 0, + QLOGIC_IB_IBCC_LINKINITCMD_DISABLE); + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_LINKV; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + } else { + /* + * Some QSFP's not only do not respond until the full power-up + * time, but may behave badly if we try. So hold off responding + * to insertion. + */ + while (1) { + if (time_is_before_jiffies(pwrup)) + break; + msleep(20); + } + + ret = qib_refresh_qsfp_cache(ppd, &qd->cache); + + /* + * Need to change LE2 back to defaults if we couldn't + * read the cable type (to handle cable swaps), so do this + * even on failure to read cable information. We don't + * get here for QME, so IS_QME check not needed here. + */ + if (!ret && !ppd->dd->cspec->r1) { + if (QSFP_IS_ACTIVE_FAR(qd->cache.tech)) + le2 = LE2_QME; + else if (qd->cache.atten[1] >= qib_long_atten && + QSFP_IS_CU(qd->cache.tech)) + le2 = LE2_5m; + else + le2 = LE2_DEFAULT; + } else + le2 = LE2_DEFAULT; + ibsd_wr_allchans(ppd, 13, (le2 << 7), BMASK(9, 7)); + /* + * We always change parameteters, since we can choose + * values for cables without eeproms, and the cable may have + * changed from a cable with full or partial eeprom content + * to one with partial or no content. + */ + init_txdds_table(ppd, 0); + /* The physical link is being re-enabled only when the + * previous state was DISABLED and the VALID bit is not + * set. This should only happen when the cable has been + * physically pulled. */ + if (!ppd->cpspec->qsfp_data.modpresent && + (ppd->lflags & (QIBL_LINKV | QIBL_IB_LINK_DISABLED))) { + ppd->cpspec->qsfp_data.modpresent = 1; + qib_set_ib_7322_lstate(ppd, 0, + QLOGIC_IB_IBCC_LINKINITCMD_SLEEP); + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags |= QIBL_LINKV; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + } + } +} + +/* + * There is little we can do but complain to the user if QSFP + * initialization fails. + */ +static void qib_init_7322_qsfp(struct qib_pportdata *ppd) +{ + unsigned long flags; + struct qib_qsfp_data *qd = &ppd->cpspec->qsfp_data; + struct qib_devdata *dd = ppd->dd; + u64 mod_prs_bit = QSFP_GPIO_MOD_PRS_N; + + mod_prs_bit <<= (QSFP_GPIO_PORT2_SHIFT * ppd->hw_pidx); + qd->ppd = ppd; + qib_qsfp_init(qd, qsfp_7322_event); + spin_lock_irqsave(&dd->cspec->gpio_lock, flags); + dd->cspec->extctrl |= (mod_prs_bit << SYM_LSB(EXTCtrl, GPIOInvert)); + dd->cspec->gpio_mask |= mod_prs_bit; + qib_write_kreg(dd, kr_extctrl, dd->cspec->extctrl); + qib_write_kreg(dd, kr_gpio_mask, dd->cspec->gpio_mask); + spin_unlock_irqrestore(&dd->cspec->gpio_lock, flags); +} + +/* + * called at device initialization time, and also if the txselect + * module parameter is changed. This is used for cables that don't + * have valid QSFP EEPROMs (not present, or attenuation is zero). + * We initialize to the default, then if there is a specific + * unit,port match, we use that (and set it immediately, for the + * current speed, if the link is at INIT or better). + * String format is "default# unit#,port#=# ... u,p=#", separators must + * be a SPACE character. A newline terminates. The u,p=# tuples may + * optionally have "u,p=#,#", where the final # is the H1 value + * The last specific match is used (actually, all are used, but last + * one is the one that winds up set); if none at all, fall back on default. + */ +static void set_no_qsfp_atten(struct qib_devdata *dd, int change) +{ + char *nxt, *str; + u32 pidx, unit, port, deflt, h1; + unsigned long val; + int any = 0, seth1; + int txdds_size; + + str = txselect_list; + + /* default number is validated in setup_txselect() */ + deflt = simple_strtoul(str, &nxt, 0); + for (pidx = 0; pidx < dd->num_pports; ++pidx) + dd->pport[pidx].cpspec->no_eep = deflt; + + txdds_size = TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ; + if (IS_QME(dd) || IS_QMH(dd)) + txdds_size += TXDDS_MFG_SZ; + + while (*nxt && nxt[1]) { + str = ++nxt; + unit = simple_strtoul(str, &nxt, 0); + if (nxt == str || !*nxt || *nxt != ',') { + while (*nxt && *nxt++ != ' ') /* skip to next, if any */ + ; + continue; + } + str = ++nxt; + port = simple_strtoul(str, &nxt, 0); + if (nxt == str || *nxt != '=') { + while (*nxt && *nxt++ != ' ') /* skip to next, if any */ + ; + continue; + } + str = ++nxt; + val = simple_strtoul(str, &nxt, 0); + if (nxt == str) { + while (*nxt && *nxt++ != ' ') /* skip to next, if any */ + ; + continue; + } + if (val >= txdds_size) + continue; + seth1 = 0; + h1 = 0; /* gcc thinks it might be used uninitted */ + if (*nxt == ',' && nxt[1]) { + str = ++nxt; + h1 = (u32)simple_strtoul(str, &nxt, 0); + if (nxt == str) + while (*nxt && *nxt++ != ' ') /* skip */ + ; + else + seth1 = 1; + } + for (pidx = 0; dd->unit == unit && pidx < dd->num_pports; + ++pidx) { + struct qib_pportdata *ppd = &dd->pport[pidx]; + + if (ppd->port != port || !ppd->link_speed_supported) + continue; + ppd->cpspec->no_eep = val; + if (seth1) + ppd->cpspec->h1_val = h1; + /* now change the IBC and serdes, overriding generic */ + init_txdds_table(ppd, 1); + /* Re-enable the physical state machine on mezz boards + * now that the correct settings have been set. + * QSFP boards are handles by the QSFP event handler */ + if (IS_QMH(dd) || IS_QME(dd)) + qib_set_ib_7322_lstate(ppd, 0, + QLOGIC_IB_IBCC_LINKINITCMD_SLEEP); + any++; + } + if (*nxt == '\n') + break; /* done */ + } + if (change && !any) { + /* no specific setting, use the default. + * Change the IBC and serdes, but since it's + * general, don't override specific settings. + */ + for (pidx = 0; pidx < dd->num_pports; ++pidx) + if (dd->pport[pidx].link_speed_supported) + init_txdds_table(&dd->pport[pidx], 0); + } +} + +/* handle the txselect parameter changing */ +static int setup_txselect(const char *str, struct kernel_param *kp) +{ + struct qib_devdata *dd; + unsigned long val; + char *n; + if (strlen(str) >= MAX_ATTEN_LEN) { + pr_info("txselect_values string too long\n"); + return -ENOSPC; + } + val = simple_strtoul(str, &n, 0); + if (n == str || val >= (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + + TXDDS_MFG_SZ)) { + pr_info("txselect_values must start with a number < %d\n", + TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + TXDDS_MFG_SZ); + return -EINVAL; + } + strcpy(txselect_list, str); + + list_for_each_entry(dd, &qib_dev_list, list) + if (dd->deviceid == PCI_DEVICE_ID_QLOGIC_IB_7322) + set_no_qsfp_atten(dd, 1); + return 0; +} + +/* + * Write the final few registers that depend on some of the + * init setup. Done late in init, just before bringing up + * the serdes. + */ +static int qib_late_7322_initreg(struct qib_devdata *dd) +{ + int ret = 0, n; + u64 val; + + qib_write_kreg(dd, kr_rcvhdrentsize, dd->rcvhdrentsize); + qib_write_kreg(dd, kr_rcvhdrsize, dd->rcvhdrsize); + qib_write_kreg(dd, kr_rcvhdrcnt, dd->rcvhdrcnt); + qib_write_kreg(dd, kr_sendpioavailaddr, dd->pioavailregs_phys); + val = qib_read_kreg64(dd, kr_sendpioavailaddr); + if (val != dd->pioavailregs_phys) { + qib_dev_err(dd, + "Catastrophic software error, SendPIOAvailAddr written as %lx, read back as %llx\n", + (unsigned long) dd->pioavailregs_phys, + (unsigned long long) val); + ret = -EINVAL; + } + + n = dd->piobcnt2k + dd->piobcnt4k + NUM_VL15_BUFS; + qib_7322_txchk_change(dd, 0, n, TXCHK_CHG_TYPE_KERN, NULL); + /* driver sends get pkey, lid, etc. checking also, to catch bugs */ + qib_7322_txchk_change(dd, 0, n, TXCHK_CHG_TYPE_ENAB1, NULL); + + qib_register_observer(dd, &sendctrl_0_observer); + qib_register_observer(dd, &sendctrl_1_observer); + + dd->control &= ~QLOGIC_IB_C_SDMAFETCHPRIOEN; + qib_write_kreg(dd, kr_control, dd->control); + /* + * Set SendDmaFetchPriority and init Tx params, including + * QSFP handler on boards that have QSFP. + * First set our default attenuation entry for cables that + * don't have valid attenuation. + */ + set_no_qsfp_atten(dd, 0); + for (n = 0; n < dd->num_pports; ++n) { + struct qib_pportdata *ppd = dd->pport + n; + + qib_write_kreg_port(ppd, krp_senddmaprioritythld, + sdma_fetch_prio & 0xf); + /* Initialize qsfp if present on board. */ + if (dd->flags & QIB_HAS_QSFP) + qib_init_7322_qsfp(ppd); + } + dd->control |= QLOGIC_IB_C_SDMAFETCHPRIOEN; + qib_write_kreg(dd, kr_control, dd->control); + + return ret; +} + +/* per IB port errors. */ +#define SENDCTRL_PIBP (MASK_ACROSS(0, 1) | MASK_ACROSS(3, 3) | \ + MASK_ACROSS(8, 15)) +#define RCVCTRL_PIBP (MASK_ACROSS(0, 17) | MASK_ACROSS(39, 41)) +#define ERRS_PIBP (MASK_ACROSS(57, 58) | MASK_ACROSS(54, 54) | \ + MASK_ACROSS(36, 49) | MASK_ACROSS(29, 34) | MASK_ACROSS(14, 17) | \ + MASK_ACROSS(0, 11)) + +/* + * Write the initialization per-port registers that need to be done at + * driver load and after reset completes (i.e., that aren't done as part + * of other init procedures called from qib_init.c). + * Some of these should be redundant on reset, but play safe. + */ +static void write_7322_init_portregs(struct qib_pportdata *ppd) +{ + u64 val; + int i; + + if (!ppd->link_speed_supported) { + /* no buffer credits for this port */ + for (i = 1; i < 8; i++) + qib_write_kreg_port(ppd, krp_rxcreditvl0 + i, 0); + qib_write_kreg_port(ppd, krp_ibcctrl_b, 0); + qib_write_kreg(ppd->dd, kr_scratch, 0); + return; + } + + /* + * Set the number of supported virtual lanes in IBC, + * for flow control packet handling on unsupported VLs + */ + val = qib_read_kreg_port(ppd, krp_ibsdtestiftx); + val &= ~SYM_MASK(IB_SDTEST_IF_TX_0, VL_CAP); + val |= (u64)(ppd->vls_supported - 1) << + SYM_LSB(IB_SDTEST_IF_TX_0, VL_CAP); + qib_write_kreg_port(ppd, krp_ibsdtestiftx, val); + + qib_write_kreg_port(ppd, krp_rcvbthqp, QIB_KD_QP); + + /* enable tx header checking */ + qib_write_kreg_port(ppd, krp_sendcheckcontrol, IBA7322_SENDCHK_PKEY | + IBA7322_SENDCHK_BTHQP | IBA7322_SENDCHK_SLID | + IBA7322_SENDCHK_RAW_IPV6 | IBA7322_SENDCHK_MINSZ); + + qib_write_kreg_port(ppd, krp_ncmodectrl, + SYM_MASK(IBNCModeCtrl_0, ScrambleCapLocal)); + + /* + * Unconditionally clear the bufmask bits. If SDMA is + * enabled, we'll set them appropriately later. + */ + qib_write_kreg_port(ppd, krp_senddmabufmask0, 0); + qib_write_kreg_port(ppd, krp_senddmabufmask1, 0); + qib_write_kreg_port(ppd, krp_senddmabufmask2, 0); + if (ppd->dd->cspec->r1) + ppd->p_sendctrl |= SYM_MASK(SendCtrl_0, ForceCreditUpToDate); +} + +/* + * Write the initialization per-device registers that need to be done at + * driver load and after reset completes (i.e., that aren't done as part + * of other init procedures called from qib_init.c). Also write per-port + * registers that are affected by overall device config, such as QP mapping + * Some of these should be redundant on reset, but play safe. + */ +static void write_7322_initregs(struct qib_devdata *dd) +{ + struct qib_pportdata *ppd; + int i, pidx; + u64 val; + + /* Set Multicast QPs received by port 2 to map to context one. */ + qib_write_kreg(dd, KREG_IDX(RcvQPMulticastContext_1), 1); + + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + unsigned n, regno; + unsigned long flags; + + if (dd->n_krcv_queues < 2 || + !dd->pport[pidx].link_speed_supported) + continue; + + ppd = &dd->pport[pidx]; + + /* be paranoid against later code motion, etc. */ + spin_lock_irqsave(&dd->cspec->rcvmod_lock, flags); + ppd->p_rcvctrl |= SYM_MASK(RcvCtrl_0, RcvQPMapEnable); + spin_unlock_irqrestore(&dd->cspec->rcvmod_lock, flags); + + /* Initialize QP to context mapping */ + regno = krp_rcvqpmaptable; + val = 0; + if (dd->num_pports > 1) + n = dd->first_user_ctxt / dd->num_pports; + else + n = dd->first_user_ctxt - 1; + for (i = 0; i < 32; ) { + unsigned ctxt; + + if (dd->num_pports > 1) + ctxt = (i % n) * dd->num_pports + pidx; + else if (i % n) + ctxt = (i % n) + 1; + else + ctxt = ppd->hw_pidx; + val |= ctxt << (5 * (i % 6)); + i++; + if (i % 6 == 0) { + qib_write_kreg_port(ppd, regno, val); + val = 0; + regno++; + } + } + qib_write_kreg_port(ppd, regno, val); + } + + /* + * Setup up interrupt mitigation for kernel contexts, but + * not user contexts (user contexts use interrupts when + * stalled waiting for any packet, so want those interrupts + * right away). + */ + for (i = 0; i < dd->first_user_ctxt; i++) { + dd->cspec->rcvavail_timeout[i] = rcv_int_timeout; + qib_write_kreg(dd, kr_rcvavailtimeout + i, rcv_int_timeout); + } + + /* + * Initialize as (disabled) rcvflow tables. Application code + * will setup each flow as it uses the flow. + * Doesn't clear any of the error bits that might be set. + */ + val = TIDFLOW_ERRBITS; /* these are W1C */ + for (i = 0; i < dd->cfgctxts; i++) { + int flow; + for (flow = 0; flow < NUM_TIDFLOWS_CTXT; flow++) + qib_write_ureg(dd, ur_rcvflowtable+flow, val, i); + } + + /* + * dual cards init to dual port recovery, single port cards to + * the one port. Dual port cards may later adjust to 1 port, + * and then back to dual port if both ports are connected + * */ + if (dd->num_pports) + setup_7322_link_recovery(dd->pport, dd->num_pports > 1); +} + +static int qib_init_7322_variables(struct qib_devdata *dd) +{ + struct qib_pportdata *ppd; + unsigned features, pidx, sbufcnt; + int ret, mtu; + u32 sbufs, updthresh; + + /* pport structs are contiguous, allocated after devdata */ + ppd = (struct qib_pportdata *)(dd + 1); + dd->pport = ppd; + ppd[0].dd = dd; + ppd[1].dd = dd; + + dd->cspec = (struct qib_chip_specific *)(ppd + 2); + + ppd[0].cpspec = (struct qib_chippport_specific *)(dd->cspec + 1); + ppd[1].cpspec = &ppd[0].cpspec[1]; + ppd[0].cpspec->ppd = &ppd[0]; /* for autoneg_7322_work() */ + ppd[1].cpspec->ppd = &ppd[1]; /* for autoneg_7322_work() */ + + spin_lock_init(&dd->cspec->rcvmod_lock); + spin_lock_init(&dd->cspec->gpio_lock); + + /* we haven't yet set QIB_PRESENT, so use read directly */ + dd->revision = readq(&dd->kregbase[kr_revision]); + + if ((dd->revision & 0xffffffffU) == 0xffffffffU) { + qib_dev_err(dd, + "Revision register read failure, giving up initialization\n"); + ret = -ENODEV; + goto bail; + } + dd->flags |= QIB_PRESENT; /* now register routines work */ + + dd->majrev = (u8) SYM_FIELD(dd->revision, Revision_R, ChipRevMajor); + dd->minrev = (u8) SYM_FIELD(dd->revision, Revision_R, ChipRevMinor); + dd->cspec->r1 = dd->minrev == 1; + + get_7322_chip_params(dd); + features = qib_7322_boardname(dd); + + /* now that piobcnt2k and 4k set, we can allocate these */ + sbufcnt = dd->piobcnt2k + dd->piobcnt4k + + NUM_VL15_BUFS + BITS_PER_LONG - 1; + sbufcnt /= BITS_PER_LONG; + dd->cspec->sendchkenable = kmalloc(sbufcnt * + sizeof(*dd->cspec->sendchkenable), GFP_KERNEL); + dd->cspec->sendgrhchk = kmalloc(sbufcnt * + sizeof(*dd->cspec->sendgrhchk), GFP_KERNEL); + dd->cspec->sendibchk = kmalloc(sbufcnt * + sizeof(*dd->cspec->sendibchk), GFP_KERNEL); + if (!dd->cspec->sendchkenable || !dd->cspec->sendgrhchk || + !dd->cspec->sendibchk) { + qib_dev_err(dd, "Failed allocation for hdrchk bitmaps\n"); + ret = -ENOMEM; + goto bail; + } + + ppd = dd->pport; + + /* + * GPIO bits for TWSI data and clock, + * used for serial EEPROM. + */ + dd->gpio_sda_num = _QIB_GPIO_SDA_NUM; + dd->gpio_scl_num = _QIB_GPIO_SCL_NUM; + dd->twsi_eeprom_dev = QIB_TWSI_EEPROM_DEV; + + dd->flags |= QIB_HAS_INTX | QIB_HAS_LINK_LATENCY | + QIB_NODMA_RTAIL | QIB_HAS_VLSUPP | QIB_HAS_HDRSUPP | + QIB_HAS_THRESH_UPDATE | + (sdma_idle_cnt ? QIB_HAS_SDMA_TIMEOUT : 0); + dd->flags |= qib_special_trigger ? + QIB_USE_SPCL_TRIG : QIB_HAS_SEND_DMA; + + /* + * Setup initial values. These may change when PAT is enabled, but + * we need these to do initial chip register accesses. + */ + qib_7322_set_baseaddrs(dd); + + mtu = ib_mtu_enum_to_int(qib_ibmtu); + if (mtu == -1) + mtu = QIB_DEFAULT_MTU; + + dd->cspec->int_enable_mask = QIB_I_BITSEXTANT; + /* all hwerrors become interrupts, unless special purposed */ + dd->cspec->hwerrmask = ~0ULL; + /* link_recovery setup causes these errors, so ignore them, + * other than clearing them when they occur */ + dd->cspec->hwerrmask &= + ~(SYM_MASK(HwErrMask, IBSerdesPClkNotDetectMask_0) | + SYM_MASK(HwErrMask, IBSerdesPClkNotDetectMask_1) | + HWE_MASK(LATriggered)); + + for (pidx = 0; pidx < NUM_IB_PORTS; ++pidx) { + struct qib_chippport_specific *cp = ppd->cpspec; + ppd->link_speed_supported = features & PORT_SPD_CAP; + features >>= PORT_SPD_CAP_SHIFT; + if (!ppd->link_speed_supported) { + /* single port mode (7340, or configured) */ + dd->skip_kctxt_mask |= 1 << pidx; + if (pidx == 0) { + /* Make sure port is disabled. */ + qib_write_kreg_port(ppd, krp_rcvctrl, 0); + qib_write_kreg_port(ppd, krp_ibcctrl_a, 0); + ppd[0] = ppd[1]; + dd->cspec->hwerrmask &= ~(SYM_MASK(HwErrMask, + IBSerdesPClkNotDetectMask_0) + | SYM_MASK(HwErrMask, + SDmaMemReadErrMask_0)); + dd->cspec->int_enable_mask &= ~( + SYM_MASK(IntMask, SDmaCleanupDoneMask_0) | + SYM_MASK(IntMask, SDmaIdleIntMask_0) | + SYM_MASK(IntMask, SDmaProgressIntMask_0) | + SYM_MASK(IntMask, SDmaIntMask_0) | + SYM_MASK(IntMask, ErrIntMask_0) | + SYM_MASK(IntMask, SendDoneIntMask_0)); + } else { + /* Make sure port is disabled. */ + qib_write_kreg_port(ppd, krp_rcvctrl, 0); + qib_write_kreg_port(ppd, krp_ibcctrl_a, 0); + dd->cspec->hwerrmask &= ~(SYM_MASK(HwErrMask, + IBSerdesPClkNotDetectMask_1) + | SYM_MASK(HwErrMask, + SDmaMemReadErrMask_1)); + dd->cspec->int_enable_mask &= ~( + SYM_MASK(IntMask, SDmaCleanupDoneMask_1) | + SYM_MASK(IntMask, SDmaIdleIntMask_1) | + SYM_MASK(IntMask, SDmaProgressIntMask_1) | + SYM_MASK(IntMask, SDmaIntMask_1) | + SYM_MASK(IntMask, ErrIntMask_1) | + SYM_MASK(IntMask, SendDoneIntMask_1)); + } + continue; + } + + dd->num_pports++; + ret = qib_init_pportdata(ppd, dd, pidx, dd->num_pports); + if (ret) { + dd->num_pports--; + goto bail; + } + + ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X; + ppd->link_width_enabled = IB_WIDTH_4X; + ppd->link_speed_enabled = ppd->link_speed_supported; + /* + * Set the initial values to reasonable default, will be set + * for real when link is up. + */ + ppd->link_width_active = IB_WIDTH_4X; + ppd->link_speed_active = QIB_IB_SDR; + ppd->delay_mult = ib_rate_to_delay[IB_RATE_10_GBPS]; + switch (qib_num_cfg_vls) { + case 1: + ppd->vls_supported = IB_VL_VL0; + break; + case 2: + ppd->vls_supported = IB_VL_VL0_1; + break; + default: + qib_devinfo(dd->pcidev, + "Invalid num_vls %u, using 4 VLs\n", + qib_num_cfg_vls); + qib_num_cfg_vls = 4; + /* fall through */ + case 4: + ppd->vls_supported = IB_VL_VL0_3; + break; + case 8: + if (mtu <= 2048) + ppd->vls_supported = IB_VL_VL0_7; + else { + qib_devinfo(dd->pcidev, + "Invalid num_vls %u for MTU %d " + ", using 4 VLs\n", + qib_num_cfg_vls, mtu); + ppd->vls_supported = IB_VL_VL0_3; + qib_num_cfg_vls = 4; + } + break; + } + ppd->vls_operational = ppd->vls_supported; + + init_waitqueue_head(&cp->autoneg_wait); + INIT_DELAYED_WORK(&cp->autoneg_work, + autoneg_7322_work); + if (ppd->dd->cspec->r1) + INIT_DELAYED_WORK(&cp->ipg_work, ipg_7322_work); + + /* + * For Mez and similar cards, no qsfp info, so do + * the "cable info" setup here. Can be overridden + * in adapter-specific routines. + */ + if (!(dd->flags & QIB_HAS_QSFP)) { + if (!IS_QMH(dd) && !IS_QME(dd)) + qib_devinfo(dd->pcidev, + "IB%u:%u: Unknown mezzanine card type\n", + dd->unit, ppd->port); + cp->h1_val = IS_QMH(dd) ? H1_FORCE_QMH : H1_FORCE_QME; + /* + * Choose center value as default tx serdes setting + * until changed through module parameter. + */ + ppd->cpspec->no_eep = IS_QMH(dd) ? + TXDDS_TABLE_SZ + 2 : TXDDS_TABLE_SZ + 4; + } else + cp->h1_val = H1_FORCE_VAL; + + /* Avoid writes to chip for mini_init */ + if (!qib_mini_init) + write_7322_init_portregs(ppd); + + init_timer(&cp->chase_timer); + cp->chase_timer.function = reenable_chase; + cp->chase_timer.data = (unsigned long)ppd; + + ppd++; + } + + dd->rcvhdrentsize = qib_rcvhdrentsize ? + qib_rcvhdrentsize : QIB_RCVHDR_ENTSIZE; + dd->rcvhdrsize = qib_rcvhdrsize ? + qib_rcvhdrsize : QIB_DFLT_RCVHDRSIZE; + dd->rhf_offset = dd->rcvhdrentsize - sizeof(u64) / sizeof(u32); + + /* we always allocate at least 2048 bytes for eager buffers */ + dd->rcvegrbufsize = max(mtu, 2048); + BUG_ON(!is_power_of_2(dd->rcvegrbufsize)); + dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize); + + qib_7322_tidtemplate(dd); + + /* + * We can request a receive interrupt for 1 or + * more packets from current offset. + */ + dd->rhdrhead_intr_off = + (u64) rcv_int_count << IBA7322_HDRHEAD_PKTINT_SHIFT; + + /* setup the stats timer; the add_timer is done at end of init */ + init_timer(&dd->stats_timer); + dd->stats_timer.function = qib_get_7322_faststats; + dd->stats_timer.data = (unsigned long) dd; + + dd->ureg_align = 0x10000; /* 64KB alignment */ + + dd->piosize2kmax_dwords = dd->piosize2k >> 2; + + qib_7322_config_ctxts(dd); + qib_set_ctxtcnt(dd); + + if (qib_wc_pat) { + resource_size_t vl15off; + /* + * We do not set WC on the VL15 buffers to avoid + * a rare problem with unaligned writes from + * interrupt-flushed store buffers, so we need + * to map those separately here. We can't solve + * this for the rarely used mtrr case. + */ + ret = init_chip_wc_pat(dd, 0); + if (ret) + goto bail; + + /* vl15 buffers start just after the 4k buffers */ + vl15off = dd->physaddr + (dd->piobufbase >> 32) + + dd->piobcnt4k * dd->align4k; + dd->piovl15base = ioremap_nocache(vl15off, + NUM_VL15_BUFS * dd->align4k); + if (!dd->piovl15base) { + ret = -ENOMEM; + goto bail; + } + } + qib_7322_set_baseaddrs(dd); /* set chip access pointers now */ + + ret = 0; + if (qib_mini_init) + goto bail; + if (!dd->num_pports) { + qib_dev_err(dd, "No ports enabled, giving up initialization\n"); + goto bail; /* no error, so can still figure out why err */ + } + + write_7322_initregs(dd); + ret = qib_create_ctxts(dd); + init_7322_cntrnames(dd); + + updthresh = 8U; /* update threshold */ + + /* use all of 4KB buffers for the kernel SDMA, zero if !SDMA. + * reserve the update threshold amount for other kernel use, such + * as sending SMI, MAD, and ACKs, or 3, whichever is greater, + * unless we aren't enabling SDMA, in which case we want to use + * all the 4k bufs for the kernel. + * if this was less than the update threshold, we could wait + * a long time for an update. Coded this way because we + * sometimes change the update threshold for various reasons, + * and we want this to remain robust. + */ + if (dd->flags & QIB_HAS_SEND_DMA) { + dd->cspec->sdmabufcnt = dd->piobcnt4k; + sbufs = updthresh > 3 ? updthresh : 3; + } else { + dd->cspec->sdmabufcnt = 0; + sbufs = dd->piobcnt4k; + } + dd->cspec->lastbuf_for_pio = dd->piobcnt2k + dd->piobcnt4k - + dd->cspec->sdmabufcnt; + dd->lastctxt_piobuf = dd->cspec->lastbuf_for_pio - sbufs; + dd->cspec->lastbuf_for_pio--; /* range is <= , not < */ + dd->last_pio = dd->cspec->lastbuf_for_pio; + dd->pbufsctxt = (dd->cfgctxts > dd->first_user_ctxt) ? + dd->lastctxt_piobuf / (dd->cfgctxts - dd->first_user_ctxt) : 0; + + /* + * If we have 16 user contexts, we will have 7 sbufs + * per context, so reduce the update threshold to match. We + * want to update before we actually run out, at low pbufs/ctxt + * so give ourselves some margin. + */ + if (dd->pbufsctxt >= 2 && dd->pbufsctxt - 2 < updthresh) + updthresh = dd->pbufsctxt - 2; + dd->cspec->updthresh_dflt = updthresh; + dd->cspec->updthresh = updthresh; + + /* before full enable, no interrupts, no locking needed */ + dd->sendctrl |= ((updthresh & SYM_RMASK(SendCtrl, AvailUpdThld)) + << SYM_LSB(SendCtrl, AvailUpdThld)) | + SYM_MASK(SendCtrl, SendBufAvailPad64Byte); + + dd->psxmitwait_supported = 1; + dd->psxmitwait_check_rate = QIB_7322_PSXMITWAIT_CHECK_RATE; +bail: + if (!dd->ctxtcnt) + dd->ctxtcnt = 1; /* for other initialization code */ + + return ret; +} + +static u32 __iomem *qib_7322_getsendbuf(struct qib_pportdata *ppd, u64 pbc, + u32 *pbufnum) +{ + u32 first, last, plen = pbc & QIB_PBC_LENGTH_MASK; + struct qib_devdata *dd = ppd->dd; + + /* last is same for 2k and 4k, because we use 4k if all 2k busy */ + if (pbc & PBC_7322_VL15_SEND) { + first = dd->piobcnt2k + dd->piobcnt4k + ppd->hw_pidx; + last = first; + } else { + if ((plen + 1) > dd->piosize2kmax_dwords) + first = dd->piobcnt2k; + else + first = 0; + last = dd->cspec->lastbuf_for_pio; + } + return qib_getsendbuf_range(dd, pbufnum, first, last); +} + +static void qib_set_cntr_7322_sample(struct qib_pportdata *ppd, u32 intv, + u32 start) +{ + qib_write_kreg_port(ppd, krp_psinterval, intv); + qib_write_kreg_port(ppd, krp_psstart, start); +} + +/* + * Must be called with sdma_lock held, or before init finished. + */ +static void qib_sdma_set_7322_desc_cnt(struct qib_pportdata *ppd, unsigned cnt) +{ + qib_write_kreg_port(ppd, krp_senddmadesccnt, cnt); +} + +/* + * sdma_lock should be acquired before calling this routine + */ +static void dump_sdma_7322_state(struct qib_pportdata *ppd) +{ + u64 reg, reg1, reg2; + + reg = qib_read_kreg_port(ppd, krp_senddmastatus); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmastatus: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_sendctrl); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA sendctrl: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmabase); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmabase: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmabufmask0); + reg1 = qib_read_kreg_port(ppd, krp_senddmabufmask1); + reg2 = qib_read_kreg_port(ppd, krp_senddmabufmask2); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmabufmask 0:%llx 1:%llx 2:%llx\n", + reg, reg1, reg2); + + /* get bufuse bits, clear them, and print them again if non-zero */ + reg = qib_read_kreg_port(ppd, krp_senddmabuf_use0); + qib_write_kreg_port(ppd, krp_senddmabuf_use0, reg); + reg1 = qib_read_kreg_port(ppd, krp_senddmabuf_use1); + qib_write_kreg_port(ppd, krp_senddmabuf_use0, reg1); + reg2 = qib_read_kreg_port(ppd, krp_senddmabuf_use2); + qib_write_kreg_port(ppd, krp_senddmabuf_use0, reg2); + /* 0 and 1 should always be zero, so print as short form */ + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA current senddmabuf_use 0:%llx 1:%llx 2:%llx\n", + reg, reg1, reg2); + reg = qib_read_kreg_port(ppd, krp_senddmabuf_use0); + reg1 = qib_read_kreg_port(ppd, krp_senddmabuf_use1); + reg2 = qib_read_kreg_port(ppd, krp_senddmabuf_use2); + /* 0 and 1 should always be zero, so print as short form */ + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA cleared senddmabuf_use 0:%llx 1:%llx 2:%llx\n", + reg, reg1, reg2); + + reg = qib_read_kreg_port(ppd, krp_senddmatail); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmatail: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmahead); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmahead: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmaheadaddr); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmaheadaddr: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmalengen); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmalengen: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmadesccnt); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmadesccnt: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmaidlecnt); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmaidlecnt: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmaprioritythld); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmapriorityhld: 0x%016llx\n", reg); + + reg = qib_read_kreg_port(ppd, krp_senddmareloadcnt); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA senddmareloadcnt: 0x%016llx\n", reg); + + dump_sdma_state(ppd); +} + +static struct sdma_set_state_action sdma_7322_action_table[] = { + [qib_sdma_state_s00_hw_down] = { + .go_s99_running_tofalse = 1, + .op_enable = 0, + .op_intenable = 0, + .op_halt = 0, + .op_drain = 0, + }, + [qib_sdma_state_s10_hw_start_up_wait] = { + .op_enable = 0, + .op_intenable = 1, + .op_halt = 1, + .op_drain = 0, + }, + [qib_sdma_state_s20_idle] = { + .op_enable = 1, + .op_intenable = 1, + .op_halt = 1, + .op_drain = 0, + }, + [qib_sdma_state_s30_sw_clean_up_wait] = { + .op_enable = 0, + .op_intenable = 1, + .op_halt = 1, + .op_drain = 0, + }, + [qib_sdma_state_s40_hw_clean_up_wait] = { + .op_enable = 1, + .op_intenable = 1, + .op_halt = 1, + .op_drain = 0, + }, + [qib_sdma_state_s50_hw_halt_wait] = { + .op_enable = 1, + .op_intenable = 1, + .op_halt = 1, + .op_drain = 1, + }, + [qib_sdma_state_s99_running] = { + .op_enable = 1, + .op_intenable = 1, + .op_halt = 0, + .op_drain = 0, + .go_s99_running_totrue = 1, + }, +}; + +static void qib_7322_sdma_init_early(struct qib_pportdata *ppd) +{ + ppd->sdma_state.set_state_action = sdma_7322_action_table; +} + +static int init_sdma_7322_regs(struct qib_pportdata *ppd) +{ + struct qib_devdata *dd = ppd->dd; + unsigned lastbuf, erstbuf; + u64 senddmabufmask[3] = { 0 }; + int n, ret = 0; + + qib_write_kreg_port(ppd, krp_senddmabase, ppd->sdma_descq_phys); + qib_sdma_7322_setlengen(ppd); + qib_sdma_update_7322_tail(ppd, 0); /* Set SendDmaTail */ + qib_write_kreg_port(ppd, krp_senddmareloadcnt, sdma_idle_cnt); + qib_write_kreg_port(ppd, krp_senddmadesccnt, 0); + qib_write_kreg_port(ppd, krp_senddmaheadaddr, ppd->sdma_head_phys); + + if (dd->num_pports) + n = dd->cspec->sdmabufcnt / dd->num_pports; /* no remainder */ + else + n = dd->cspec->sdmabufcnt; /* failsafe for init */ + erstbuf = (dd->piobcnt2k + dd->piobcnt4k) - + ((dd->num_pports == 1 || ppd->port == 2) ? n : + dd->cspec->sdmabufcnt); + lastbuf = erstbuf + n; + + ppd->sdma_state.first_sendbuf = erstbuf; + ppd->sdma_state.last_sendbuf = lastbuf; + for (; erstbuf < lastbuf; ++erstbuf) { + unsigned word = erstbuf / BITS_PER_LONG; + unsigned bit = erstbuf & (BITS_PER_LONG - 1); + + BUG_ON(word >= 3); + senddmabufmask[word] |= 1ULL << bit; + } + qib_write_kreg_port(ppd, krp_senddmabufmask0, senddmabufmask[0]); + qib_write_kreg_port(ppd, krp_senddmabufmask1, senddmabufmask[1]); + qib_write_kreg_port(ppd, krp_senddmabufmask2, senddmabufmask[2]); + return ret; +} + +/* sdma_lock must be held */ +static u16 qib_sdma_7322_gethead(struct qib_pportdata *ppd) +{ + struct qib_devdata *dd = ppd->dd; + int sane; + int use_dmahead; + u16 swhead; + u16 swtail; + u16 cnt; + u16 hwhead; + + use_dmahead = __qib_sdma_running(ppd) && + (dd->flags & QIB_HAS_SDMA_TIMEOUT); +retry: + hwhead = use_dmahead ? + (u16) le64_to_cpu(*ppd->sdma_head_dma) : + (u16) qib_read_kreg_port(ppd, krp_senddmahead); + + swhead = ppd->sdma_descq_head; + swtail = ppd->sdma_descq_tail; + cnt = ppd->sdma_descq_cnt; + + if (swhead < swtail) + /* not wrapped */ + sane = (hwhead >= swhead) & (hwhead <= swtail); + else if (swhead > swtail) + /* wrapped around */ + sane = ((hwhead >= swhead) && (hwhead < cnt)) || + (hwhead <= swtail); + else + /* empty */ + sane = (hwhead == swhead); + + if (unlikely(!sane)) { + if (use_dmahead) { + /* try one more time, directly from the register */ + use_dmahead = 0; + goto retry; + } + /* proceed as if no progress */ + hwhead = swhead; + } + + return hwhead; +} + +static int qib_sdma_7322_busy(struct qib_pportdata *ppd) +{ + u64 hwstatus = qib_read_kreg_port(ppd, krp_senddmastatus); + + return (hwstatus & SYM_MASK(SendDmaStatus_0, ScoreBoardDrainInProg)) || + (hwstatus & SYM_MASK(SendDmaStatus_0, HaltInProg)) || + !(hwstatus & SYM_MASK(SendDmaStatus_0, InternalSDmaHalt)) || + !(hwstatus & SYM_MASK(SendDmaStatus_0, ScbEmpty)); +} + +/* + * Compute the amount of delay before sending the next packet if the + * port's send rate differs from the static rate set for the QP. + * The delay affects the next packet and the amount of the delay is + * based on the length of the this packet. + */ +static u32 qib_7322_setpbc_control(struct qib_pportdata *ppd, u32 plen, + u8 srate, u8 vl) +{ + u8 snd_mult = ppd->delay_mult; + u8 rcv_mult = ib_rate_to_delay[srate]; + u32 ret; + + ret = rcv_mult > snd_mult ? ((plen + 1) >> 1) * snd_mult : 0; + + /* Indicate VL15, else set the VL in the control word */ + if (vl == 15) + ret |= PBC_7322_VL15_SEND_CTRL; + else + ret |= vl << PBC_VL_NUM_LSB; + ret |= ((u32)(ppd->hw_pidx)) << PBC_PORT_SEL_LSB; + + return ret; +} + +/* + * Enable the per-port VL15 send buffers for use. + * They follow the rest of the buffers, without a config parameter. + * This was in initregs, but that is done before the shadow + * is set up, and this has to be done after the shadow is + * set up. + */ +static void qib_7322_initvl15_bufs(struct qib_devdata *dd) +{ + unsigned vl15bufs; + + vl15bufs = dd->piobcnt2k + dd->piobcnt4k; + qib_chg_pioavailkernel(dd, vl15bufs, NUM_VL15_BUFS, + TXCHK_CHG_TYPE_KERN, NULL); +} + +static void qib_7322_init_ctxt(struct qib_ctxtdata *rcd) +{ + if (rcd->ctxt < NUM_IB_PORTS) { + if (rcd->dd->num_pports > 1) { + rcd->rcvegrcnt = KCTXT0_EGRCNT / 2; + rcd->rcvegr_tid_base = rcd->ctxt ? rcd->rcvegrcnt : 0; + } else { + rcd->rcvegrcnt = KCTXT0_EGRCNT; + rcd->rcvegr_tid_base = 0; + } + } else { + rcd->rcvegrcnt = rcd->dd->cspec->rcvegrcnt; + rcd->rcvegr_tid_base = KCTXT0_EGRCNT + + (rcd->ctxt - NUM_IB_PORTS) * rcd->rcvegrcnt; + } +} + +#define QTXSLEEPS 5000 +static void qib_7322_txchk_change(struct qib_devdata *dd, u32 start, + u32 len, u32 which, struct qib_ctxtdata *rcd) +{ + int i; + const int last = start + len - 1; + const int lastr = last / BITS_PER_LONG; + u32 sleeps = 0; + int wait = rcd != NULL; + unsigned long flags; + + while (wait) { + unsigned long shadow; + int cstart, previ = -1; + + /* + * when flipping from kernel to user, we can't change + * the checking type if the buffer is allocated to the + * driver. It's OK the other direction, because it's + * from close, and we have just disarm'ed all the + * buffers. All the kernel to kernel changes are also + * OK. + */ + for (cstart = start; cstart <= last; cstart++) { + i = ((2 * cstart) + QLOGIC_IB_SENDPIOAVAIL_BUSY_SHIFT) + / BITS_PER_LONG; + if (i != previ) { + shadow = (unsigned long) + le64_to_cpu(dd->pioavailregs_dma[i]); + previ = i; + } + if (test_bit(((2 * cstart) + + QLOGIC_IB_SENDPIOAVAIL_BUSY_SHIFT) + % BITS_PER_LONG, &shadow)) + break; + } + + if (cstart > last) + break; + + if (sleeps == QTXSLEEPS) + break; + /* make sure we see an updated copy next time around */ + sendctrl_7322_mod(dd->pport, QIB_SENDCTRL_AVAIL_BLIP); + sleeps++; + msleep(20); + } + + switch (which) { + case TXCHK_CHG_TYPE_DIS1: + /* + * disable checking on a range; used by diags; just + * one buffer, but still written generically + */ + for (i = start; i <= last; i++) + clear_bit(i, dd->cspec->sendchkenable); + break; + + case TXCHK_CHG_TYPE_ENAB1: + /* + * (re)enable checking on a range; used by diags; just + * one buffer, but still written generically; read + * scratch to be sure buffer actually triggered, not + * just flushed from processor. + */ + qib_read_kreg32(dd, kr_scratch); + for (i = start; i <= last; i++) + set_bit(i, dd->cspec->sendchkenable); + break; + + case TXCHK_CHG_TYPE_KERN: + /* usable by kernel */ + for (i = start; i <= last; i++) { + set_bit(i, dd->cspec->sendibchk); + clear_bit(i, dd->cspec->sendgrhchk); + } + spin_lock_irqsave(&dd->uctxt_lock, flags); + /* see if we need to raise avail update threshold */ + for (i = dd->first_user_ctxt; + dd->cspec->updthresh != dd->cspec->updthresh_dflt + && i < dd->cfgctxts; i++) + if (dd->rcd[i] && dd->rcd[i]->subctxt_cnt && + ((dd->rcd[i]->piocnt / dd->rcd[i]->subctxt_cnt) - 1) + < dd->cspec->updthresh_dflt) + break; + spin_unlock_irqrestore(&dd->uctxt_lock, flags); + if (i == dd->cfgctxts) { + spin_lock_irqsave(&dd->sendctrl_lock, flags); + dd->cspec->updthresh = dd->cspec->updthresh_dflt; + dd->sendctrl &= ~SYM_MASK(SendCtrl, AvailUpdThld); + dd->sendctrl |= (dd->cspec->updthresh & + SYM_RMASK(SendCtrl, AvailUpdThld)) << + SYM_LSB(SendCtrl, AvailUpdThld); + spin_unlock_irqrestore(&dd->sendctrl_lock, flags); + sendctrl_7322_mod(dd->pport, QIB_SENDCTRL_AVAIL_BLIP); + } + break; + + case TXCHK_CHG_TYPE_USER: + /* for user process */ + for (i = start; i <= last; i++) { + clear_bit(i, dd->cspec->sendibchk); + set_bit(i, dd->cspec->sendgrhchk); + } + spin_lock_irqsave(&dd->sendctrl_lock, flags); + if (rcd && rcd->subctxt_cnt && ((rcd->piocnt + / rcd->subctxt_cnt) - 1) < dd->cspec->updthresh) { + dd->cspec->updthresh = (rcd->piocnt / + rcd->subctxt_cnt) - 1; + dd->sendctrl &= ~SYM_MASK(SendCtrl, AvailUpdThld); + dd->sendctrl |= (dd->cspec->updthresh & + SYM_RMASK(SendCtrl, AvailUpdThld)) + << SYM_LSB(SendCtrl, AvailUpdThld); + spin_unlock_irqrestore(&dd->sendctrl_lock, flags); + sendctrl_7322_mod(dd->pport, QIB_SENDCTRL_AVAIL_BLIP); + } else + spin_unlock_irqrestore(&dd->sendctrl_lock, flags); + break; + + default: + break; + } + + for (i = start / BITS_PER_LONG; which >= 2 && i <= lastr; ++i) + qib_write_kreg(dd, kr_sendcheckmask + i, + dd->cspec->sendchkenable[i]); + + for (i = start / BITS_PER_LONG; which < 2 && i <= lastr; ++i) { + qib_write_kreg(dd, kr_sendgrhcheckmask + i, + dd->cspec->sendgrhchk[i]); + qib_write_kreg(dd, kr_sendibpktmask + i, + dd->cspec->sendibchk[i]); + } + + /* + * Be sure whatever we did was seen by the chip and acted upon, + * before we return. Mostly important for which >= 2. + */ + qib_read_kreg32(dd, kr_scratch); +} + + +/* useful for trigger analyzers, etc. */ +static void writescratch(struct qib_devdata *dd, u32 val) +{ + qib_write_kreg(dd, kr_scratch, val); +} + +/* Dummy for now, use chip regs soon */ +static int qib_7322_tempsense_rd(struct qib_devdata *dd, int regnum) +{ + return -ENXIO; +} + +/** + * qib_init_iba7322_funcs - set up the chip-specific function pointers + * @dev: the pci_dev for qlogic_ib device + * @ent: pci_device_id struct for this dev + * + * Also allocates, inits, and returns the devdata struct for this + * device instance + * + * This is global, and is called directly at init to set up the + * chip-specific function pointers for later use. + */ +struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct qib_devdata *dd; + int ret, i; + u32 tabsize, actual_cnt = 0; + + dd = qib_alloc_devdata(pdev, + NUM_IB_PORTS * sizeof(struct qib_pportdata) + + sizeof(struct qib_chip_specific) + + NUM_IB_PORTS * sizeof(struct qib_chippport_specific)); + if (IS_ERR(dd)) + goto bail; + + dd->f_bringup_serdes = qib_7322_bringup_serdes; + dd->f_cleanup = qib_setup_7322_cleanup; + dd->f_clear_tids = qib_7322_clear_tids; + dd->f_free_irq = qib_7322_free_irq; + dd->f_get_base_info = qib_7322_get_base_info; + dd->f_get_msgheader = qib_7322_get_msgheader; + dd->f_getsendbuf = qib_7322_getsendbuf; + dd->f_gpio_mod = gpio_7322_mod; + dd->f_eeprom_wen = qib_7322_eeprom_wen; + dd->f_hdrqempty = qib_7322_hdrqempty; + dd->f_ib_updown = qib_7322_ib_updown; + dd->f_init_ctxt = qib_7322_init_ctxt; + dd->f_initvl15_bufs = qib_7322_initvl15_bufs; + dd->f_intr_fallback = qib_7322_intr_fallback; + dd->f_late_initreg = qib_late_7322_initreg; + dd->f_setpbc_control = qib_7322_setpbc_control; + dd->f_portcntr = qib_portcntr_7322; + dd->f_put_tid = qib_7322_put_tid; + dd->f_quiet_serdes = qib_7322_mini_quiet_serdes; + dd->f_rcvctrl = rcvctrl_7322_mod; + dd->f_read_cntrs = qib_read_7322cntrs; + dd->f_read_portcntrs = qib_read_7322portcntrs; + dd->f_reset = qib_do_7322_reset; + dd->f_init_sdma_regs = init_sdma_7322_regs; + dd->f_sdma_busy = qib_sdma_7322_busy; + dd->f_sdma_gethead = qib_sdma_7322_gethead; + dd->f_sdma_sendctrl = qib_7322_sdma_sendctrl; + dd->f_sdma_set_desc_cnt = qib_sdma_set_7322_desc_cnt; + dd->f_sdma_update_tail = qib_sdma_update_7322_tail; + dd->f_sendctrl = sendctrl_7322_mod; + dd->f_set_armlaunch = qib_set_7322_armlaunch; + dd->f_set_cntr_sample = qib_set_cntr_7322_sample; + dd->f_iblink_state = qib_7322_iblink_state; + dd->f_ibphys_portstate = qib_7322_phys_portstate; + dd->f_get_ib_cfg = qib_7322_get_ib_cfg; + dd->f_set_ib_cfg = qib_7322_set_ib_cfg; + dd->f_set_ib_loopback = qib_7322_set_loopback; + dd->f_get_ib_table = qib_7322_get_ib_table; + dd->f_set_ib_table = qib_7322_set_ib_table; + dd->f_set_intr_state = qib_7322_set_intr_state; + dd->f_setextled = qib_setup_7322_setextled; + dd->f_txchk_change = qib_7322_txchk_change; + dd->f_update_usrhead = qib_update_7322_usrhead; + dd->f_wantpiobuf_intr = qib_wantpiobuf_7322_intr; + dd->f_xgxs_reset = qib_7322_mini_pcs_reset; + dd->f_sdma_hw_clean_up = qib_7322_sdma_hw_clean_up; + dd->f_sdma_hw_start_up = qib_7322_sdma_hw_start_up; + dd->f_sdma_init_early = qib_7322_sdma_init_early; + dd->f_writescratch = writescratch; + dd->f_tempsense_rd = qib_7322_tempsense_rd; +#ifdef CONFIG_INFINIBAND_QIB_DCA + dd->f_notify_dca = qib_7322_notify_dca; +#endif + /* + * Do remaining PCIe setup and save PCIe values in dd. + * Any error printing is already done by the init code. + * On return, we have the chip mapped, but chip registers + * are not set up until start of qib_init_7322_variables. + */ + ret = qib_pcie_ddinit(dd, pdev, ent); + if (ret < 0) + goto bail_free; + + /* initialize chip-specific variables */ + ret = qib_init_7322_variables(dd); + if (ret) + goto bail_cleanup; + + if (qib_mini_init || !dd->num_pports) + goto bail; + + /* + * Determine number of vectors we want; depends on port count + * and number of configured kernel receive queues actually used. + * Should also depend on whether sdma is enabled or not, but + * that's such a rare testing case it's not worth worrying about. + */ + tabsize = dd->first_user_ctxt + ARRAY_SIZE(irq_table); + for (i = 0; i < tabsize; i++) + if ((i < ARRAY_SIZE(irq_table) && + irq_table[i].port <= dd->num_pports) || + (i >= ARRAY_SIZE(irq_table) && + dd->rcd[i - ARRAY_SIZE(irq_table)])) + actual_cnt++; + /* reduce by ctxt's < 2 */ + if (qib_krcvq01_no_msi) + actual_cnt -= dd->num_pports; + + tabsize = actual_cnt; + dd->cspec->msix_entries = kzalloc(tabsize * + sizeof(struct qib_msix_entry), GFP_KERNEL); + if (!dd->cspec->msix_entries) { + qib_dev_err(dd, "No memory for MSIx table\n"); + tabsize = 0; + } + for (i = 0; i < tabsize; i++) + dd->cspec->msix_entries[i].msix.entry = i; + + if (qib_pcie_params(dd, 8, &tabsize, dd->cspec->msix_entries)) + qib_dev_err(dd, + "Failed to setup PCIe or interrupts; continuing anyway\n"); + /* may be less than we wanted, if not enough available */ + dd->cspec->num_msix_entries = tabsize; + + /* setup interrupt handler */ + qib_setup_7322_interrupt(dd, 1); + + /* clear diagctrl register, in case diags were running and crashed */ + qib_write_kreg(dd, kr_hwdiagctrl, 0); +#ifdef CONFIG_INFINIBAND_QIB_DCA + if (!dca_add_requester(&pdev->dev)) { + qib_devinfo(dd->pcidev, "DCA enabled\n"); + dd->flags |= QIB_DCA_ENABLED; + qib_setup_dca(dd); + } +#endif + goto bail; + +bail_cleanup: + qib_pcie_ddcleanup(dd); +bail_free: + qib_free_devdata(dd); + dd = ERR_PTR(ret); +bail: + return dd; +} + +/* + * Set the table entry at the specified index from the table specifed. + * There are 3 * TXDDS_TABLE_SZ entries in all per port, with the first + * TXDDS_TABLE_SZ for SDR, the next for DDR, and the last for QDR. + * 'idx' below addresses the correct entry, while its 4 LSBs select the + * corresponding entry (one of TXDDS_TABLE_SZ) from the selected table. + */ +#define DDS_ENT_AMP_LSB 14 +#define DDS_ENT_MAIN_LSB 9 +#define DDS_ENT_POST_LSB 5 +#define DDS_ENT_PRE_XTRA_LSB 3 +#define DDS_ENT_PRE_LSB 0 + +/* + * Set one entry in the TxDDS table for spec'd port + * ridx picks one of the entries, while tp points + * to the appropriate table entry. + */ +static void set_txdds(struct qib_pportdata *ppd, int ridx, + const struct txdds_ent *tp) +{ + struct qib_devdata *dd = ppd->dd; + u32 pack_ent; + int regidx; + + /* Get correct offset in chip-space, and in source table */ + regidx = KREG_IBPORT_IDX(IBSD_DDS_MAP_TABLE) + ridx; + /* + * We do not use qib_write_kreg_port() because it was intended + * only for registers in the lower "port specific" pages. + * So do index calculation by hand. + */ + if (ppd->hw_pidx) + regidx += (dd->palign / sizeof(u64)); + + pack_ent = tp->amp << DDS_ENT_AMP_LSB; + pack_ent |= tp->main << DDS_ENT_MAIN_LSB; + pack_ent |= tp->pre << DDS_ENT_PRE_LSB; + pack_ent |= tp->post << DDS_ENT_POST_LSB; + qib_write_kreg(dd, regidx, pack_ent); + /* Prevent back-to-back writes by hitting scratch */ + qib_write_kreg(ppd->dd, kr_scratch, 0); +} + +static const struct vendor_txdds_ent vendor_txdds[] = { + { /* Amphenol 1m 30awg NoEq */ + { 0x41, 0x50, 0x48 }, "584470002 ", + { 10, 0, 0, 5 }, { 10, 0, 0, 9 }, { 7, 1, 0, 13 }, + }, + { /* Amphenol 3m 28awg NoEq */ + { 0x41, 0x50, 0x48 }, "584470004 ", + { 0, 0, 0, 8 }, { 0, 0, 0, 11 }, { 0, 1, 7, 15 }, + }, + { /* Finisar 3m OM2 Optical */ + { 0x00, 0x90, 0x65 }, "FCBG410QB1C03-QL", + { 0, 0, 0, 3 }, { 0, 0, 0, 4 }, { 0, 0, 0, 13 }, + }, + { /* Finisar 30m OM2 Optical */ + { 0x00, 0x90, 0x65 }, "FCBG410QB1C30-QL", + { 0, 0, 0, 1 }, { 0, 0, 0, 5 }, { 0, 0, 0, 11 }, + }, + { /* Finisar Default OM2 Optical */ + { 0x00, 0x90, 0x65 }, NULL, + { 0, 0, 0, 2 }, { 0, 0, 0, 5 }, { 0, 0, 0, 12 }, + }, + { /* Gore 1m 30awg NoEq */ + { 0x00, 0x21, 0x77 }, "QSN3300-1 ", + { 0, 0, 0, 6 }, { 0, 0, 0, 9 }, { 0, 1, 0, 15 }, + }, + { /* Gore 2m 30awg NoEq */ + { 0x00, 0x21, 0x77 }, "QSN3300-2 ", + { 0, 0, 0, 8 }, { 0, 0, 0, 10 }, { 0, 1, 7, 15 }, + }, + { /* Gore 1m 28awg NoEq */ + { 0x00, 0x21, 0x77 }, "QSN3800-1 ", + { 0, 0, 0, 6 }, { 0, 0, 0, 8 }, { 0, 1, 0, 15 }, + }, + { /* Gore 3m 28awg NoEq */ + { 0x00, 0x21, 0x77 }, "QSN3800-3 ", + { 0, 0, 0, 9 }, { 0, 0, 0, 13 }, { 0, 1, 7, 15 }, + }, + { /* Gore 5m 24awg Eq */ + { 0x00, 0x21, 0x77 }, "QSN7000-5 ", + { 0, 0, 0, 7 }, { 0, 0, 0, 9 }, { 0, 1, 3, 15 }, + }, + { /* Gore 7m 24awg Eq */ + { 0x00, 0x21, 0x77 }, "QSN7000-7 ", + { 0, 0, 0, 9 }, { 0, 0, 0, 11 }, { 0, 2, 6, 15 }, + }, + { /* Gore 5m 26awg Eq */ + { 0x00, 0x21, 0x77 }, "QSN7600-5 ", + { 0, 0, 0, 8 }, { 0, 0, 0, 11 }, { 0, 1, 9, 13 }, + }, + { /* Gore 7m 26awg Eq */ + { 0x00, 0x21, 0x77 }, "QSN7600-7 ", + { 0, 0, 0, 8 }, { 0, 0, 0, 11 }, { 10, 1, 8, 15 }, + }, + { /* Intersil 12m 24awg Active */ + { 0x00, 0x30, 0xB4 }, "QLX4000CQSFP1224", + { 0, 0, 0, 2 }, { 0, 0, 0, 5 }, { 0, 3, 0, 9 }, + }, + { /* Intersil 10m 28awg Active */ + { 0x00, 0x30, 0xB4 }, "QLX4000CQSFP1028", + { 0, 0, 0, 6 }, { 0, 0, 0, 4 }, { 0, 2, 0, 2 }, + }, + { /* Intersil 7m 30awg Active */ + { 0x00, 0x30, 0xB4 }, "QLX4000CQSFP0730", + { 0, 0, 0, 6 }, { 0, 0, 0, 4 }, { 0, 1, 0, 3 }, + }, + { /* Intersil 5m 32awg Active */ + { 0x00, 0x30, 0xB4 }, "QLX4000CQSFP0532", + { 0, 0, 0, 6 }, { 0, 0, 0, 6 }, { 0, 2, 0, 8 }, + }, + { /* Intersil Default Active */ + { 0x00, 0x30, 0xB4 }, NULL, + { 0, 0, 0, 6 }, { 0, 0, 0, 5 }, { 0, 2, 0, 5 }, + }, + { /* Luxtera 20m Active Optical */ + { 0x00, 0x25, 0x63 }, NULL, + { 0, 0, 0, 5 }, { 0, 0, 0, 8 }, { 0, 2, 0, 12 }, + }, + { /* Molex 1M Cu loopback */ + { 0x00, 0x09, 0x3A }, "74763-0025 ", + { 2, 2, 6, 15 }, { 2, 2, 6, 15 }, { 2, 2, 6, 15 }, + }, + { /* Molex 2m 28awg NoEq */ + { 0x00, 0x09, 0x3A }, "74757-2201 ", + { 0, 0, 0, 6 }, { 0, 0, 0, 9 }, { 0, 1, 1, 15 }, + }, +}; + +static const struct txdds_ent txdds_sdr[TXDDS_TABLE_SZ] = { + /* amp, pre, main, post */ + { 2, 2, 15, 6 }, /* Loopback */ + { 0, 0, 0, 1 }, /* 2 dB */ + { 0, 0, 0, 2 }, /* 3 dB */ + { 0, 0, 0, 3 }, /* 4 dB */ + { 0, 0, 0, 4 }, /* 5 dB */ + { 0, 0, 0, 5 }, /* 6 dB */ + { 0, 0, 0, 6 }, /* 7 dB */ + { 0, 0, 0, 7 }, /* 8 dB */ + { 0, 0, 0, 8 }, /* 9 dB */ + { 0, 0, 0, 9 }, /* 10 dB */ + { 0, 0, 0, 10 }, /* 11 dB */ + { 0, 0, 0, 11 }, /* 12 dB */ + { 0, 0, 0, 12 }, /* 13 dB */ + { 0, 0, 0, 13 }, /* 14 dB */ + { 0, 0, 0, 14 }, /* 15 dB */ + { 0, 0, 0, 15 }, /* 16 dB */ +}; + +static const struct txdds_ent txdds_ddr[TXDDS_TABLE_SZ] = { + /* amp, pre, main, post */ + { 2, 2, 15, 6 }, /* Loopback */ + { 0, 0, 0, 8 }, /* 2 dB */ + { 0, 0, 0, 8 }, /* 3 dB */ + { 0, 0, 0, 9 }, /* 4 dB */ + { 0, 0, 0, 9 }, /* 5 dB */ + { 0, 0, 0, 10 }, /* 6 dB */ + { 0, 0, 0, 10 }, /* 7 dB */ + { 0, 0, 0, 11 }, /* 8 dB */ + { 0, 0, 0, 11 }, /* 9 dB */ + { 0, 0, 0, 12 }, /* 10 dB */ + { 0, 0, 0, 12 }, /* 11 dB */ + { 0, 0, 0, 13 }, /* 12 dB */ + { 0, 0, 0, 13 }, /* 13 dB */ + { 0, 0, 0, 14 }, /* 14 dB */ + { 0, 0, 0, 14 }, /* 15 dB */ + { 0, 0, 0, 15 }, /* 16 dB */ +}; + +static const struct txdds_ent txdds_qdr[TXDDS_TABLE_SZ] = { + /* amp, pre, main, post */ + { 2, 2, 15, 6 }, /* Loopback */ + { 0, 1, 0, 7 }, /* 2 dB (also QMH7342) */ + { 0, 1, 0, 9 }, /* 3 dB (also QMH7342) */ + { 0, 1, 0, 11 }, /* 4 dB */ + { 0, 1, 0, 13 }, /* 5 dB */ + { 0, 1, 0, 15 }, /* 6 dB */ + { 0, 1, 3, 15 }, /* 7 dB */ + { 0, 1, 7, 15 }, /* 8 dB */ + { 0, 1, 7, 15 }, /* 9 dB */ + { 0, 1, 8, 15 }, /* 10 dB */ + { 0, 1, 9, 15 }, /* 11 dB */ + { 0, 1, 10, 15 }, /* 12 dB */ + { 0, 2, 6, 15 }, /* 13 dB */ + { 0, 2, 7, 15 }, /* 14 dB */ + { 0, 2, 8, 15 }, /* 15 dB */ + { 0, 2, 9, 15 }, /* 16 dB */ +}; + +/* + * extra entries for use with txselect, for indices >= TXDDS_TABLE_SZ. + * These are mostly used for mez cards going through connectors + * and backplane traces, but can be used to add other "unusual" + * table values as well. + */ +static const struct txdds_ent txdds_extra_sdr[TXDDS_EXTRA_SZ] = { + /* amp, pre, main, post */ + { 0, 0, 0, 1 }, /* QMH7342 backplane settings */ + { 0, 0, 0, 1 }, /* QMH7342 backplane settings */ + { 0, 0, 0, 2 }, /* QMH7342 backplane settings */ + { 0, 0, 0, 2 }, /* QMH7342 backplane settings */ + { 0, 0, 0, 3 }, /* QMH7342 backplane settings */ + { 0, 0, 0, 4 }, /* QMH7342 backplane settings */ + { 0, 1, 4, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 3, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 12 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 14 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 2, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 7 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 6 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 8 }, /* QME7342 backplane settings 1.1 */ +}; + +static const struct txdds_ent txdds_extra_ddr[TXDDS_EXTRA_SZ] = { + /* amp, pre, main, post */ + { 0, 0, 0, 7 }, /* QMH7342 backplane settings */ + { 0, 0, 0, 7 }, /* QMH7342 backplane settings */ + { 0, 0, 0, 8 }, /* QMH7342 backplane settings */ + { 0, 0, 0, 8 }, /* QMH7342 backplane settings */ + { 0, 0, 0, 9 }, /* QMH7342 backplane settings */ + { 0, 0, 0, 10 }, /* QMH7342 backplane settings */ + { 0, 1, 4, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 3, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 12 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 14 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 2, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 7 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 6 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 8 }, /* QME7342 backplane settings 1.1 */ +}; + +static const struct txdds_ent txdds_extra_qdr[TXDDS_EXTRA_SZ] = { + /* amp, pre, main, post */ + { 0, 1, 0, 4 }, /* QMH7342 backplane settings */ + { 0, 1, 0, 5 }, /* QMH7342 backplane settings */ + { 0, 1, 0, 6 }, /* QMH7342 backplane settings */ + { 0, 1, 0, 8 }, /* QMH7342 backplane settings */ + { 0, 1, 0, 10 }, /* QMH7342 backplane settings */ + { 0, 1, 0, 12 }, /* QMH7342 backplane settings */ + { 0, 1, 4, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 3, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 12 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 14 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 2, 15 }, /* QME7342 backplane settings 1.0 */ + { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 7 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 6 }, /* QME7342 backplane settings 1.1 */ + { 0, 1, 0, 8 }, /* QME7342 backplane settings 1.1 */ +}; + +static const struct txdds_ent txdds_extra_mfg[TXDDS_MFG_SZ] = { + /* amp, pre, main, post */ + { 0, 0, 0, 0 }, /* QME7342 mfg settings */ + { 0, 0, 0, 6 }, /* QME7342 P2 mfg settings */ +}; + +static const struct txdds_ent *get_atten_table(const struct txdds_ent *txdds, + unsigned atten) +{ + /* + * The attenuation table starts at 2dB for entry 1, + * with entry 0 being the loopback entry. + */ + if (atten <= 2) + atten = 1; + else if (atten > TXDDS_TABLE_SZ) + atten = TXDDS_TABLE_SZ - 1; + else + atten--; + return txdds + atten; +} + +/* + * if override is set, the module parameter txselect has a value + * for this specific port, so use it, rather than our normal mechanism. + */ +static void find_best_ent(struct qib_pportdata *ppd, + const struct txdds_ent **sdr_dds, + const struct txdds_ent **ddr_dds, + const struct txdds_ent **qdr_dds, int override) +{ + struct qib_qsfp_cache *qd = &ppd->cpspec->qsfp_data.cache; + int idx; + + /* Search table of known cables */ + for (idx = 0; !override && idx < ARRAY_SIZE(vendor_txdds); ++idx) { + const struct vendor_txdds_ent *v = vendor_txdds + idx; + + if (!memcmp(v->oui, qd->oui, QSFP_VOUI_LEN) && + (!v->partnum || + !memcmp(v->partnum, qd->partnum, QSFP_PN_LEN))) { + *sdr_dds = &v->sdr; + *ddr_dds = &v->ddr; + *qdr_dds = &v->qdr; + return; + } + } + + /* Active cables don't have attenuation so we only set SERDES + * settings to account for the attenuation of the board traces. */ + if (!override && QSFP_IS_ACTIVE(qd->tech)) { + *sdr_dds = txdds_sdr + ppd->dd->board_atten; + *ddr_dds = txdds_ddr + ppd->dd->board_atten; + *qdr_dds = txdds_qdr + ppd->dd->board_atten; + return; + } + + if (!override && QSFP_HAS_ATTEN(qd->tech) && (qd->atten[0] || + qd->atten[1])) { + *sdr_dds = get_atten_table(txdds_sdr, qd->atten[0]); + *ddr_dds = get_atten_table(txdds_ddr, qd->atten[0]); + *qdr_dds = get_atten_table(txdds_qdr, qd->atten[1]); + return; + } else if (ppd->cpspec->no_eep < TXDDS_TABLE_SZ) { + /* + * If we have no (or incomplete) data from the cable + * EEPROM, or no QSFP, or override is set, use the + * module parameter value to index into the attentuation + * table. + */ + idx = ppd->cpspec->no_eep; + *sdr_dds = &txdds_sdr[idx]; + *ddr_dds = &txdds_ddr[idx]; + *qdr_dds = &txdds_qdr[idx]; + } else if (ppd->cpspec->no_eep < (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ)) { + /* similar to above, but index into the "extra" table. */ + idx = ppd->cpspec->no_eep - TXDDS_TABLE_SZ; + *sdr_dds = &txdds_extra_sdr[idx]; + *ddr_dds = &txdds_extra_ddr[idx]; + *qdr_dds = &txdds_extra_qdr[idx]; + } else if ((IS_QME(ppd->dd) || IS_QMH(ppd->dd)) && + ppd->cpspec->no_eep < (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + + TXDDS_MFG_SZ)) { + idx = ppd->cpspec->no_eep - (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ); + pr_info("IB%u:%u use idx %u into txdds_mfg\n", + ppd->dd->unit, ppd->port, idx); + *sdr_dds = &txdds_extra_mfg[idx]; + *ddr_dds = &txdds_extra_mfg[idx]; + *qdr_dds = &txdds_extra_mfg[idx]; + } else { + /* this shouldn't happen, it's range checked */ + *sdr_dds = txdds_sdr + qib_long_atten; + *ddr_dds = txdds_ddr + qib_long_atten; + *qdr_dds = txdds_qdr + qib_long_atten; + } +} + +static void init_txdds_table(struct qib_pportdata *ppd, int override) +{ + const struct txdds_ent *sdr_dds, *ddr_dds, *qdr_dds; + struct txdds_ent *dds; + int idx; + int single_ent = 0; + + find_best_ent(ppd, &sdr_dds, &ddr_dds, &qdr_dds, override); + + /* for mez cards or override, use the selected value for all entries */ + if (!(ppd->dd->flags & QIB_HAS_QSFP) || override) + single_ent = 1; + + /* Fill in the first entry with the best entry found. */ + set_txdds(ppd, 0, sdr_dds); + set_txdds(ppd, TXDDS_TABLE_SZ, ddr_dds); + set_txdds(ppd, 2 * TXDDS_TABLE_SZ, qdr_dds); + if (ppd->lflags & (QIBL_LINKINIT | QIBL_LINKARMED | + QIBL_LINKACTIVE)) { + dds = (struct txdds_ent *)(ppd->link_speed_active == + QIB_IB_QDR ? qdr_dds : + (ppd->link_speed_active == + QIB_IB_DDR ? ddr_dds : sdr_dds)); + write_tx_serdes_param(ppd, dds); + } + + /* Fill in the remaining entries with the default table values. */ + for (idx = 1; idx < ARRAY_SIZE(txdds_sdr); ++idx) { + set_txdds(ppd, idx, single_ent ? sdr_dds : txdds_sdr + idx); + set_txdds(ppd, idx + TXDDS_TABLE_SZ, + single_ent ? ddr_dds : txdds_ddr + idx); + set_txdds(ppd, idx + 2 * TXDDS_TABLE_SZ, + single_ent ? qdr_dds : txdds_qdr + idx); + } +} + +#define KR_AHB_ACC KREG_IDX(ahb_access_ctrl) +#define KR_AHB_TRANS KREG_IDX(ahb_transaction_reg) +#define AHB_TRANS_RDY SYM_MASK(ahb_transaction_reg, ahb_rdy) +#define AHB_ADDR_LSB SYM_LSB(ahb_transaction_reg, ahb_address) +#define AHB_DATA_LSB SYM_LSB(ahb_transaction_reg, ahb_data) +#define AHB_WR SYM_MASK(ahb_transaction_reg, write_not_read) +#define AHB_TRANS_TRIES 10 + +/* + * The chan argument is 0=chan0, 1=chan1, 2=pll, 3=chan2, 4=chan4, + * 5=subsystem which is why most calls have "chan + chan >> 1" + * for the channel argument. + */ +static u32 ahb_mod(struct qib_devdata *dd, int quad, int chan, int addr, + u32 data, u32 mask) +{ + u32 rd_data, wr_data, sz_mask; + u64 trans, acc, prev_acc; + u32 ret = 0xBAD0BAD; + int tries; + + prev_acc = qib_read_kreg64(dd, KR_AHB_ACC); + /* From this point on, make sure we return access */ + acc = (quad << 1) | 1; + qib_write_kreg(dd, KR_AHB_ACC, acc); + + for (tries = 1; tries < AHB_TRANS_TRIES; ++tries) { + trans = qib_read_kreg64(dd, KR_AHB_TRANS); + if (trans & AHB_TRANS_RDY) + break; + } + if (tries >= AHB_TRANS_TRIES) { + qib_dev_err(dd, "No ahb_rdy in %d tries\n", AHB_TRANS_TRIES); + goto bail; + } + + /* If mask is not all 1s, we need to read, but different SerDes + * entities have different sizes + */ + sz_mask = (1UL << ((quad == 1) ? 32 : 16)) - 1; + wr_data = data & mask & sz_mask; + if ((~mask & sz_mask) != 0) { + trans = ((chan << 6) | addr) << (AHB_ADDR_LSB + 1); + qib_write_kreg(dd, KR_AHB_TRANS, trans); + + for (tries = 1; tries < AHB_TRANS_TRIES; ++tries) { + trans = qib_read_kreg64(dd, KR_AHB_TRANS); + if (trans & AHB_TRANS_RDY) + break; + } + if (tries >= AHB_TRANS_TRIES) { + qib_dev_err(dd, "No Rd ahb_rdy in %d tries\n", + AHB_TRANS_TRIES); + goto bail; + } + /* Re-read in case host split reads and read data first */ + trans = qib_read_kreg64(dd, KR_AHB_TRANS); + rd_data = (uint32_t)(trans >> AHB_DATA_LSB); + wr_data |= (rd_data & ~mask & sz_mask); + } + + /* If mask is not zero, we need to write. */ + if (mask & sz_mask) { + trans = ((chan << 6) | addr) << (AHB_ADDR_LSB + 1); + trans |= ((uint64_t)wr_data << AHB_DATA_LSB); + trans |= AHB_WR; + qib_write_kreg(dd, KR_AHB_TRANS, trans); + + for (tries = 1; tries < AHB_TRANS_TRIES; ++tries) { + trans = qib_read_kreg64(dd, KR_AHB_TRANS); + if (trans & AHB_TRANS_RDY) + break; + } + if (tries >= AHB_TRANS_TRIES) { + qib_dev_err(dd, "No Wr ahb_rdy in %d tries\n", + AHB_TRANS_TRIES); + goto bail; + } + } + ret = wr_data; +bail: + qib_write_kreg(dd, KR_AHB_ACC, prev_acc); + return ret; +} + +static void ibsd_wr_allchans(struct qib_pportdata *ppd, int addr, unsigned data, + unsigned mask) +{ + struct qib_devdata *dd = ppd->dd; + int chan; + u32 rbc; + + for (chan = 0; chan < SERDES_CHANS; ++chan) { + ahb_mod(dd, IBSD(ppd->hw_pidx), (chan + (chan >> 1)), addr, + data, mask); + rbc = ahb_mod(dd, IBSD(ppd->hw_pidx), (chan + (chan >> 1)), + addr, 0, 0); + } +} + +static void serdes_7322_los_enable(struct qib_pportdata *ppd, int enable) +{ + u64 data = qib_read_kreg_port(ppd, krp_serdesctrl); + u8 state = SYM_FIELD(data, IBSerdesCtrl_0, RXLOSEN); + + if (enable && !state) { + pr_info("IB%u:%u Turning LOS on\n", + ppd->dd->unit, ppd->port); + data |= SYM_MASK(IBSerdesCtrl_0, RXLOSEN); + } else if (!enable && state) { + pr_info("IB%u:%u Turning LOS off\n", + ppd->dd->unit, ppd->port); + data &= ~SYM_MASK(IBSerdesCtrl_0, RXLOSEN); + } + qib_write_kreg_port(ppd, krp_serdesctrl, data); +} + +static int serdes_7322_init(struct qib_pportdata *ppd) +{ + int ret = 0; + if (ppd->dd->cspec->r1) + ret = serdes_7322_init_old(ppd); + else + ret = serdes_7322_init_new(ppd); + return ret; +} + +static int serdes_7322_init_old(struct qib_pportdata *ppd) +{ + u32 le_val; + + /* + * Initialize the Tx DDS tables. Also done every QSFP event, + * for adapters with QSFP + */ + init_txdds_table(ppd, 0); + + /* ensure no tx overrides from earlier driver loads */ + qib_write_kreg_port(ppd, krp_tx_deemph_override, + SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, + reset_tx_deemphasis_override)); + + /* Patch some SerDes defaults to "Better for IB" */ + /* Timing Loop Bandwidth: cdr_timing[11:9] = 0 */ + ibsd_wr_allchans(ppd, 2, 0, BMASK(11, 9)); + + /* Termination: rxtermctrl_r2d addr 11 bits [12:11] = 1 */ + ibsd_wr_allchans(ppd, 11, (1 << 11), BMASK(12, 11)); + /* Enable LE2: rxle2en_r2a addr 13 bit [6] = 1 */ + ibsd_wr_allchans(ppd, 13, (1 << 6), (1 << 6)); + + /* May be overridden in qsfp_7322_event */ + le_val = IS_QME(ppd->dd) ? LE2_QME : LE2_DEFAULT; + ibsd_wr_allchans(ppd, 13, (le_val << 7), BMASK(9, 7)); + + /* enable LE1 adaptation for all but QME, which is disabled */ + le_val = IS_QME(ppd->dd) ? 0 : 1; + ibsd_wr_allchans(ppd, 13, (le_val << 5), (1 << 5)); + + /* Clear cmode-override, may be set from older driver */ + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 10, 0 << 14, 1 << 14); + + /* Timing Recovery: rxtapsel addr 5 bits [9:8] = 0 */ + ibsd_wr_allchans(ppd, 5, (0 << 8), BMASK(9, 8)); + + /* setup LoS params; these are subsystem, so chan == 5 */ + /* LoS filter threshold_count on, ch 0-3, set to 8 */ + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 5, 8 << 11, BMASK(14, 11)); + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 7, 8 << 4, BMASK(7, 4)); + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 8, 8 << 11, BMASK(14, 11)); + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 10, 8 << 4, BMASK(7, 4)); + + /* LoS filter threshold_count off, ch 0-3, set to 4 */ + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 6, 4 << 0, BMASK(3, 0)); + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 7, 4 << 8, BMASK(11, 8)); + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 9, 4 << 0, BMASK(3, 0)); + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 10, 4 << 8, BMASK(11, 8)); + + /* LoS filter select enabled */ + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 9, 1 << 15, 1 << 15); + + /* LoS target data: SDR=4, DDR=2, QDR=1 */ + ibsd_wr_allchans(ppd, 14, (1 << 3), BMASK(5, 3)); /* QDR */ + ibsd_wr_allchans(ppd, 20, (2 << 10), BMASK(12, 10)); /* DDR */ + ibsd_wr_allchans(ppd, 20, (4 << 13), BMASK(15, 13)); /* SDR */ + + serdes_7322_los_enable(ppd, 1); + + /* rxbistena; set 0 to avoid effects of it switch later */ + ibsd_wr_allchans(ppd, 9, 0 << 15, 1 << 15); + + /* Configure 4 DFE taps, and only they adapt */ + ibsd_wr_allchans(ppd, 16, 0 << 0, BMASK(1, 0)); + + /* gain hi stop 32 (22) (6:1) lo stop 7 (10:7) target 22 (13) (15:11) */ + le_val = (ppd->dd->cspec->r1 || IS_QME(ppd->dd)) ? 0xb6c0 : 0x6bac; + ibsd_wr_allchans(ppd, 21, le_val, 0xfffe); + + /* + * Set receive adaptation mode. SDR and DDR adaptation are + * always on, and QDR is initially enabled; later disabled. + */ + qib_write_kreg_port(ppd, krp_static_adapt_dis(0), 0ULL); + qib_write_kreg_port(ppd, krp_static_adapt_dis(1), 0ULL); + qib_write_kreg_port(ppd, krp_static_adapt_dis(2), + ppd->dd->cspec->r1 ? + QDR_STATIC_ADAPT_DOWN_R1 : QDR_STATIC_ADAPT_DOWN); + ppd->cpspec->qdr_dfe_on = 1; + + /* FLoop LOS gate: PPM filter enabled */ + ibsd_wr_allchans(ppd, 38, 0 << 10, 1 << 10); + + /* rx offset center enabled */ + ibsd_wr_allchans(ppd, 12, 1 << 4, 1 << 4); + + if (!ppd->dd->cspec->r1) { + ibsd_wr_allchans(ppd, 12, 1 << 12, 1 << 12); + ibsd_wr_allchans(ppd, 12, 2 << 8, 0x0f << 8); + } + + /* Set the frequency loop bandwidth to 15 */ + ibsd_wr_allchans(ppd, 2, 15 << 5, BMASK(8, 5)); + + return 0; +} + +static int serdes_7322_init_new(struct qib_pportdata *ppd) +{ + unsigned long tend; + u32 le_val, rxcaldone; + int chan, chan_done = (1 << SERDES_CHANS) - 1; + + /* Clear cmode-override, may be set from older driver */ + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 10, 0 << 14, 1 << 14); + + /* ensure no tx overrides from earlier driver loads */ + qib_write_kreg_port(ppd, krp_tx_deemph_override, + SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, + reset_tx_deemphasis_override)); + + /* START OF LSI SUGGESTED SERDES BRINGUP */ + /* Reset - Calibration Setup */ + /* Stop DFE adaptaion */ + ibsd_wr_allchans(ppd, 1, 0, BMASK(9, 1)); + /* Disable LE1 */ + ibsd_wr_allchans(ppd, 13, 0, BMASK(5, 5)); + /* Disable autoadapt for LE1 */ + ibsd_wr_allchans(ppd, 1, 0, BMASK(15, 15)); + /* Disable LE2 */ + ibsd_wr_allchans(ppd, 13, 0, BMASK(6, 6)); + /* Disable VGA */ + ibsd_wr_allchans(ppd, 5, 0, BMASK(0, 0)); + /* Disable AFE Offset Cancel */ + ibsd_wr_allchans(ppd, 12, 0, BMASK(12, 12)); + /* Disable Timing Loop */ + ibsd_wr_allchans(ppd, 2, 0, BMASK(3, 3)); + /* Disable Frequency Loop */ + ibsd_wr_allchans(ppd, 2, 0, BMASK(4, 4)); + /* Disable Baseline Wander Correction */ + ibsd_wr_allchans(ppd, 13, 0, BMASK(13, 13)); + /* Disable RX Calibration */ + ibsd_wr_allchans(ppd, 4, 0, BMASK(10, 10)); + /* Disable RX Offset Calibration */ + ibsd_wr_allchans(ppd, 12, 0, BMASK(4, 4)); + /* Select BB CDR */ + ibsd_wr_allchans(ppd, 2, (1 << 15), BMASK(15, 15)); + /* CDR Step Size */ + ibsd_wr_allchans(ppd, 5, 0, BMASK(9, 8)); + /* Enable phase Calibration */ + ibsd_wr_allchans(ppd, 12, (1 << 5), BMASK(5, 5)); + /* DFE Bandwidth [2:14-12] */ + ibsd_wr_allchans(ppd, 2, (4 << 12), BMASK(14, 12)); + /* DFE Config (4 taps only) */ + ibsd_wr_allchans(ppd, 16, 0, BMASK(1, 0)); + /* Gain Loop Bandwidth */ + if (!ppd->dd->cspec->r1) { + ibsd_wr_allchans(ppd, 12, 1 << 12, BMASK(12, 12)); + ibsd_wr_allchans(ppd, 12, 2 << 8, BMASK(11, 8)); + } else { + ibsd_wr_allchans(ppd, 19, (3 << 11), BMASK(13, 11)); + } + /* Baseline Wander Correction Gain [13:4-0] (leave as default) */ + /* Baseline Wander Correction Gain [3:7-5] (leave as default) */ + /* Data Rate Select [5:7-6] (leave as default) */ + /* RX Parallel Word Width [3:10-8] (leave as default) */ + + /* RX REST */ + /* Single- or Multi-channel reset */ + /* RX Analog reset */ + /* RX Digital reset */ + ibsd_wr_allchans(ppd, 0, 0, BMASK(15, 13)); + msleep(20); + /* RX Analog reset */ + ibsd_wr_allchans(ppd, 0, (1 << 14), BMASK(14, 14)); + msleep(20); + /* RX Digital reset */ + ibsd_wr_allchans(ppd, 0, (1 << 13), BMASK(13, 13)); + msleep(20); + + /* setup LoS params; these are subsystem, so chan == 5 */ + /* LoS filter threshold_count on, ch 0-3, set to 8 */ + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 5, 8 << 11, BMASK(14, 11)); + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 7, 8 << 4, BMASK(7, 4)); + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 8, 8 << 11, BMASK(14, 11)); + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 10, 8 << 4, BMASK(7, 4)); + + /* LoS filter threshold_count off, ch 0-3, set to 4 */ + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 6, 4 << 0, BMASK(3, 0)); + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 7, 4 << 8, BMASK(11, 8)); + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 9, 4 << 0, BMASK(3, 0)); + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 10, 4 << 8, BMASK(11, 8)); + + /* LoS filter select enabled */ + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 9, 1 << 15, 1 << 15); + + /* LoS target data: SDR=4, DDR=2, QDR=1 */ + ibsd_wr_allchans(ppd, 14, (1 << 3), BMASK(5, 3)); /* QDR */ + ibsd_wr_allchans(ppd, 20, (2 << 10), BMASK(12, 10)); /* DDR */ + ibsd_wr_allchans(ppd, 20, (4 << 13), BMASK(15, 13)); /* SDR */ + + /* Turn on LOS on initial SERDES init */ + serdes_7322_los_enable(ppd, 1); + /* FLoop LOS gate: PPM filter enabled */ + ibsd_wr_allchans(ppd, 38, 0 << 10, 1 << 10); + + /* RX LATCH CALIBRATION */ + /* Enable Eyefinder Phase Calibration latch */ + ibsd_wr_allchans(ppd, 15, 1, BMASK(0, 0)); + /* Enable RX Offset Calibration latch */ + ibsd_wr_allchans(ppd, 12, (1 << 4), BMASK(4, 4)); + msleep(20); + /* Start Calibration */ + ibsd_wr_allchans(ppd, 4, (1 << 10), BMASK(10, 10)); + tend = jiffies + msecs_to_jiffies(500); + while (chan_done && !time_is_before_jiffies(tend)) { + msleep(20); + for (chan = 0; chan < SERDES_CHANS; ++chan) { + rxcaldone = ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), + (chan + (chan >> 1)), + 25, 0, 0); + if ((~rxcaldone & (u32)BMASK(9, 9)) == 0 && + (~chan_done & (1 << chan)) == 0) + chan_done &= ~(1 << chan); + } + } + if (chan_done) { + pr_info("Serdes %d calibration not done after .5 sec: 0x%x\n", + IBSD(ppd->hw_pidx), chan_done); + } else { + for (chan = 0; chan < SERDES_CHANS; ++chan) { + rxcaldone = ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), + (chan + (chan >> 1)), + 25, 0, 0); + if ((~rxcaldone & (u32)BMASK(10, 10)) == 0) + pr_info("Serdes %d chan %d calibration failed\n", + IBSD(ppd->hw_pidx), chan); + } + } + + /* Turn off Calibration */ + ibsd_wr_allchans(ppd, 4, 0, BMASK(10, 10)); + msleep(20); + + /* BRING RX UP */ + /* Set LE2 value (May be overridden in qsfp_7322_event) */ + le_val = IS_QME(ppd->dd) ? LE2_QME : LE2_DEFAULT; + ibsd_wr_allchans(ppd, 13, (le_val << 7), BMASK(9, 7)); + /* Set LE2 Loop bandwidth */ + ibsd_wr_allchans(ppd, 3, (7 << 5), BMASK(7, 5)); + /* Enable LE2 */ + ibsd_wr_allchans(ppd, 13, (1 << 6), BMASK(6, 6)); + msleep(20); + /* Enable H0 only */ + ibsd_wr_allchans(ppd, 1, 1, BMASK(9, 1)); + /* gain hi stop 32 (22) (6:1) lo stop 7 (10:7) target 22 (13) (15:11) */ + le_val = (ppd->dd->cspec->r1 || IS_QME(ppd->dd)) ? 0xb6c0 : 0x6bac; + ibsd_wr_allchans(ppd, 21, le_val, 0xfffe); + /* Enable VGA */ + ibsd_wr_allchans(ppd, 5, 0, BMASK(0, 0)); + msleep(20); + /* Set Frequency Loop Bandwidth */ + ibsd_wr_allchans(ppd, 2, (15 << 5), BMASK(8, 5)); + /* Enable Frequency Loop */ + ibsd_wr_allchans(ppd, 2, (1 << 4), BMASK(4, 4)); + /* Set Timing Loop Bandwidth */ + ibsd_wr_allchans(ppd, 2, 0, BMASK(11, 9)); + /* Enable Timing Loop */ + ibsd_wr_allchans(ppd, 2, (1 << 3), BMASK(3, 3)); + msleep(50); + /* Enable DFE + * Set receive adaptation mode. SDR and DDR adaptation are + * always on, and QDR is initially enabled; later disabled. + */ + qib_write_kreg_port(ppd, krp_static_adapt_dis(0), 0ULL); + qib_write_kreg_port(ppd, krp_static_adapt_dis(1), 0ULL); + qib_write_kreg_port(ppd, krp_static_adapt_dis(2), + ppd->dd->cspec->r1 ? + QDR_STATIC_ADAPT_DOWN_R1 : QDR_STATIC_ADAPT_DOWN); + ppd->cpspec->qdr_dfe_on = 1; + /* Disable LE1 */ + ibsd_wr_allchans(ppd, 13, (0 << 5), (1 << 5)); + /* Disable auto adapt for LE1 */ + ibsd_wr_allchans(ppd, 1, (0 << 15), BMASK(15, 15)); + msleep(20); + /* Enable AFE Offset Cancel */ + ibsd_wr_allchans(ppd, 12, (1 << 12), BMASK(12, 12)); + /* Enable Baseline Wander Correction */ + ibsd_wr_allchans(ppd, 12, (1 << 13), BMASK(13, 13)); + /* Termination: rxtermctrl_r2d addr 11 bits [12:11] = 1 */ + ibsd_wr_allchans(ppd, 11, (1 << 11), BMASK(12, 11)); + /* VGA output common mode */ + ibsd_wr_allchans(ppd, 12, (3 << 2), BMASK(3, 2)); + + /* + * Initialize the Tx DDS tables. Also done every QSFP event, + * for adapters with QSFP + */ + init_txdds_table(ppd, 0); + + return 0; +} + +/* start adjust QMH serdes parameters */ + +static void set_man_code(struct qib_pportdata *ppd, int chan, int code) +{ + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), (chan + (chan >> 1)), + 9, code << 9, 0x3f << 9); +} + +static void set_man_mode_h1(struct qib_pportdata *ppd, int chan, + int enable, u32 tapenable) +{ + if (enable) + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), (chan + (chan >> 1)), + 1, 3 << 10, 0x1f << 10); + else + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), (chan + (chan >> 1)), + 1, 0, 0x1f << 10); +} + +/* Set clock to 1, 0, 1, 0 */ +static void clock_man(struct qib_pportdata *ppd, int chan) +{ + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), (chan + (chan >> 1)), + 4, 0x4000, 0x4000); + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), (chan + (chan >> 1)), + 4, 0, 0x4000); + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), (chan + (chan >> 1)), + 4, 0x4000, 0x4000); + ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), (chan + (chan >> 1)), + 4, 0, 0x4000); +} + +/* + * write the current Tx serdes pre,post,main,amp settings into the serdes. + * The caller must pass the settings appropriate for the current speed, + * or not care if they are correct for the current speed. + */ +static void write_tx_serdes_param(struct qib_pportdata *ppd, + struct txdds_ent *txdds) +{ + u64 deemph; + + deemph = qib_read_kreg_port(ppd, krp_tx_deemph_override); + /* field names for amp, main, post, pre, respectively */ + deemph &= ~(SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, txampcntl_d2a) | + SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, txc0_ena) | + SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, txcp1_ena) | + SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, txcn1_ena)); + + deemph |= SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, + tx_override_deemphasis_select); + deemph |= (txdds->amp & SYM_RMASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, + txampcntl_d2a)) << SYM_LSB(IBSD_TX_DEEMPHASIS_OVERRIDE_0, + txampcntl_d2a); + deemph |= (txdds->main & SYM_RMASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, + txc0_ena)) << SYM_LSB(IBSD_TX_DEEMPHASIS_OVERRIDE_0, + txc0_ena); + deemph |= (txdds->post & SYM_RMASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, + txcp1_ena)) << SYM_LSB(IBSD_TX_DEEMPHASIS_OVERRIDE_0, + txcp1_ena); + deemph |= (txdds->pre & SYM_RMASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, + txcn1_ena)) << SYM_LSB(IBSD_TX_DEEMPHASIS_OVERRIDE_0, + txcn1_ena); + qib_write_kreg_port(ppd, krp_tx_deemph_override, deemph); +} + +/* + * Set the parameters for mez cards on link bounce, so they are + * always exactly what was requested. Similar logic to init_txdds + * but does just the serdes. + */ +static void adj_tx_serdes(struct qib_pportdata *ppd) +{ + const struct txdds_ent *sdr_dds, *ddr_dds, *qdr_dds; + struct txdds_ent *dds; + + find_best_ent(ppd, &sdr_dds, &ddr_dds, &qdr_dds, 1); + dds = (struct txdds_ent *)(ppd->link_speed_active == QIB_IB_QDR ? + qdr_dds : (ppd->link_speed_active == QIB_IB_DDR ? + ddr_dds : sdr_dds)); + write_tx_serdes_param(ppd, dds); +} + +/* set QDR forced value for H1, if needed */ +static void force_h1(struct qib_pportdata *ppd) +{ + int chan; + + ppd->cpspec->qdr_reforce = 0; + if (!ppd->dd->cspec->r1) + return; + + for (chan = 0; chan < SERDES_CHANS; chan++) { + set_man_mode_h1(ppd, chan, 1, 0); + set_man_code(ppd, chan, ppd->cpspec->h1_val); + clock_man(ppd, chan); + set_man_mode_h1(ppd, chan, 0, 0); + } +} + +#define SJA_EN SYM_MASK(SPC_JTAG_ACCESS_REG, SPC_JTAG_ACCESS_EN) +#define BISTEN_LSB SYM_LSB(SPC_JTAG_ACCESS_REG, bist_en) + +#define R_OPCODE_LSB 3 +#define R_OP_NOP 0 +#define R_OP_SHIFT 2 +#define R_OP_UPDATE 3 +#define R_TDI_LSB 2 +#define R_TDO_LSB 1 +#define R_RDY 1 + +static int qib_r_grab(struct qib_devdata *dd) +{ + u64 val; + val = SJA_EN; + qib_write_kreg(dd, kr_r_access, val); + qib_read_kreg32(dd, kr_scratch); + return 0; +} + +/* qib_r_wait_for_rdy() not only waits for the ready bit, it + * returns the current state of R_TDO + */ +static int qib_r_wait_for_rdy(struct qib_devdata *dd) +{ + u64 val; + int timeout; + for (timeout = 0; timeout < 100 ; ++timeout) { + val = qib_read_kreg32(dd, kr_r_access); + if (val & R_RDY) + return (val >> R_TDO_LSB) & 1; + } + return -1; +} + +static int qib_r_shift(struct qib_devdata *dd, int bisten, + int len, u8 *inp, u8 *outp) +{ + u64 valbase, val; + int ret, pos; + + valbase = SJA_EN | (bisten << BISTEN_LSB) | + (R_OP_SHIFT << R_OPCODE_LSB); + ret = qib_r_wait_for_rdy(dd); + if (ret < 0) + goto bail; + for (pos = 0; pos < len; ++pos) { + val = valbase; + if (outp) { + outp[pos >> 3] &= ~(1 << (pos & 7)); + outp[pos >> 3] |= (ret << (pos & 7)); + } + if (inp) { + int tdi = inp[pos >> 3] >> (pos & 7); + val |= ((tdi & 1) << R_TDI_LSB); + } + qib_write_kreg(dd, kr_r_access, val); + qib_read_kreg32(dd, kr_scratch); + ret = qib_r_wait_for_rdy(dd); + if (ret < 0) + break; + } + /* Restore to NOP between operations. */ + val = SJA_EN | (bisten << BISTEN_LSB); + qib_write_kreg(dd, kr_r_access, val); + qib_read_kreg32(dd, kr_scratch); + ret = qib_r_wait_for_rdy(dd); + + if (ret >= 0) + ret = pos; +bail: + return ret; +} + +static int qib_r_update(struct qib_devdata *dd, int bisten) +{ + u64 val; + int ret; + + val = SJA_EN | (bisten << BISTEN_LSB) | (R_OP_UPDATE << R_OPCODE_LSB); + ret = qib_r_wait_for_rdy(dd); + if (ret >= 0) { + qib_write_kreg(dd, kr_r_access, val); + qib_read_kreg32(dd, kr_scratch); + } + return ret; +} + +#define BISTEN_PORT_SEL 15 +#define LEN_PORT_SEL 625 +#define BISTEN_AT 17 +#define LEN_AT 156 +#define BISTEN_ETM 16 +#define LEN_ETM 632 + +#define BIT2BYTE(x) (((x) + BITS_PER_BYTE - 1) / BITS_PER_BYTE) + +/* these are common for all IB port use cases. */ +static u8 reset_at[BIT2BYTE(LEN_AT)] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, +}; +static u8 reset_atetm[BIT2BYTE(LEN_ETM)] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x80, 0xe3, 0x81, 0x73, 0x3c, 0x70, 0x8e, + 0x07, 0xce, 0xf1, 0xc0, 0x39, 0x1e, 0x38, 0xc7, 0x03, 0xe7, + 0x78, 0xe0, 0x1c, 0x0f, 0x9c, 0x7f, 0x80, 0x73, 0x0f, 0x70, + 0xde, 0x01, 0xce, 0x39, 0xc0, 0xf9, 0x06, 0x38, 0xd7, 0x00, + 0xe7, 0x19, 0xe0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, +}; +static u8 at[BIT2BYTE(LEN_AT)] = { + 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, +}; + +/* used for IB1 or IB2, only one in use */ +static u8 atetm_1port[BIT2BYTE(LEN_ETM)] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x10, 0xf2, 0x80, 0x83, 0x1e, 0x38, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x50, 0xf4, 0x41, 0x00, 0x18, 0x78, 0xc8, 0x03, + 0x07, 0x7b, 0xa0, 0x3e, 0x00, 0x02, 0x00, 0x00, 0x18, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x00, 0x4b, 0x00, 0x00, 0x00, +}; + +/* used when both IB1 and IB2 are in use */ +static u8 atetm_2port[BIT2BYTE(LEN_ETM)] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x79, + 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xf8, 0x80, 0x83, 0x1e, 0x38, 0xe0, 0x03, 0x05, + 0x7b, 0xa0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, + 0xa2, 0x0f, 0x50, 0xf4, 0x41, 0x00, 0x18, 0x78, 0xd1, 0x07, + 0x02, 0x7c, 0x80, 0x3e, 0x00, 0x02, 0x00, 0x00, 0x3e, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, +}; + +/* used when only IB1 is in use */ +static u8 portsel_port1[BIT2BYTE(LEN_PORT_SEL)] = { + 0x32, 0x65, 0xa4, 0x7b, 0x10, 0x98, 0xdc, 0xfe, 0x13, 0x13, + 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x73, 0x0c, 0x0c, 0x0c, + 0x0c, 0x0c, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, + 0x13, 0x78, 0x78, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, + 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x74, 0x32, + 0x32, 0x32, 0x32, 0x32, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, + 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, + 0x14, 0x14, 0x9f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, +}; + +/* used when only IB2 is in use */ +static u8 portsel_port2[BIT2BYTE(LEN_PORT_SEL)] = { + 0x32, 0x65, 0xa4, 0x7b, 0x10, 0x98, 0xdc, 0xfe, 0x39, 0x39, + 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x73, 0x32, 0x32, 0x32, + 0x32, 0x32, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, + 0x39, 0x78, 0x78, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, + 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x74, 0x32, + 0x32, 0x32, 0x32, 0x32, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, + 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, + 0x3a, 0x3a, 0x9f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x01, +}; + +/* used when both IB1 and IB2 are in use */ +static u8 portsel_2port[BIT2BYTE(LEN_PORT_SEL)] = { + 0x32, 0xba, 0x54, 0x76, 0x10, 0x98, 0xdc, 0xfe, 0x13, 0x13, + 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x73, 0x0c, 0x0c, 0x0c, + 0x0c, 0x0c, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, + 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, + 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x74, 0x32, + 0x32, 0x32, 0x32, 0x32, 0x14, 0x14, 0x14, 0x14, 0x14, 0x3a, + 0x3a, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, + 0x14, 0x14, 0x9f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, +}; + +/* + * Do setup to properly handle IB link recovery; if port is zero, we + * are initializing to cover both ports; otherwise we are initializing + * to cover a single port card, or the port has reached INIT and we may + * need to switch coverage types. + */ +static void setup_7322_link_recovery(struct qib_pportdata *ppd, u32 both) +{ + u8 *portsel, *etm; + struct qib_devdata *dd = ppd->dd; + + if (!ppd->dd->cspec->r1) + return; + if (!both) { + dd->cspec->recovery_ports_initted++; + ppd->cpspec->recovery_init = 1; + } + if (!both && dd->cspec->recovery_ports_initted == 1) { + portsel = ppd->port == 1 ? portsel_port1 : portsel_port2; + etm = atetm_1port; + } else { + portsel = portsel_2port; + etm = atetm_2port; + } + + if (qib_r_grab(dd) < 0 || + qib_r_shift(dd, BISTEN_ETM, LEN_ETM, reset_atetm, NULL) < 0 || + qib_r_update(dd, BISTEN_ETM) < 0 || + qib_r_shift(dd, BISTEN_AT, LEN_AT, reset_at, NULL) < 0 || + qib_r_update(dd, BISTEN_AT) < 0 || + qib_r_shift(dd, BISTEN_PORT_SEL, LEN_PORT_SEL, + portsel, NULL) < 0 || + qib_r_update(dd, BISTEN_PORT_SEL) < 0 || + qib_r_shift(dd, BISTEN_AT, LEN_AT, at, NULL) < 0 || + qib_r_update(dd, BISTEN_AT) < 0 || + qib_r_shift(dd, BISTEN_ETM, LEN_ETM, etm, NULL) < 0 || + qib_r_update(dd, BISTEN_ETM) < 0) + qib_dev_err(dd, "Failed IB link recovery setup\n"); +} + +static void check_7322_rxe_status(struct qib_pportdata *ppd) +{ + struct qib_devdata *dd = ppd->dd; + u64 fmask; + + if (dd->cspec->recovery_ports_initted != 1) + return; /* rest doesn't apply to dualport */ + qib_write_kreg(dd, kr_control, dd->control | + SYM_MASK(Control, FreezeMode)); + (void)qib_read_kreg64(dd, kr_scratch); + udelay(3); /* ibcreset asserted 400ns, be sure that's over */ + fmask = qib_read_kreg64(dd, kr_act_fmask); + if (!fmask) { + /* + * require a powercycle before we'll work again, and make + * sure we get no more interrupts, and don't turn off + * freeze. + */ + ppd->dd->cspec->stay_in_freeze = 1; + qib_7322_set_intr_state(ppd->dd, 0); + qib_write_kreg(dd, kr_fmask, 0ULL); + qib_dev_err(dd, "HCA unusable until powercycled\n"); + return; /* eventually reset */ + } + + qib_write_kreg(ppd->dd, kr_hwerrclear, + SYM_MASK(HwErrClear, IBSerdesPClkNotDetectClear_1)); + + /* don't do the full clear_freeze(), not needed for this */ + qib_write_kreg(dd, kr_control, dd->control); + qib_read_kreg32(dd, kr_scratch); + /* take IBC out of reset */ + if (ppd->link_speed_supported) { + ppd->cpspec->ibcctrl_a &= + ~SYM_MASK(IBCCtrlA_0, IBStatIntReductionEn); + qib_write_kreg_port(ppd, krp_ibcctrl_a, + ppd->cpspec->ibcctrl_a); + qib_read_kreg32(dd, kr_scratch); + if (ppd->lflags & QIBL_IB_LINK_DISABLED) + qib_set_ib_7322_lstate(ppd, 0, + QLOGIC_IB_IBCC_LINKINITCMD_DISABLE); + } +} diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c new file mode 100644 index 00000000000..8d3c78ddc90 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -0,0 +1,1857 @@ +/* + * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/pci.h> +#include <linux/netdevice.h> +#include <linux/vmalloc.h> +#include <linux/delay.h> +#include <linux/idr.h> +#include <linux/module.h> +#include <linux/printk.h> +#ifdef CONFIG_INFINIBAND_QIB_DCA +#include <linux/dca.h> +#endif + +#include "qib.h" +#include "qib_common.h" +#include "qib_mad.h" +#ifdef CONFIG_DEBUG_FS +#include "qib_debugfs.h" +#include "qib_verbs.h" +#endif + +#undef pr_fmt +#define pr_fmt(fmt) QIB_DRV_NAME ": " fmt + +/* + * min buffers we want to have per context, after driver + */ +#define QIB_MIN_USER_CTXT_BUFCNT 7 + +#define QLOGIC_IB_R_SOFTWARE_MASK 0xFF +#define QLOGIC_IB_R_SOFTWARE_SHIFT 24 +#define QLOGIC_IB_R_EMULATOR_MASK (1ULL<<62) + +/* + * Number of ctxts we are configured to use (to allow for more pio + * buffers per ctxt, etc.) Zero means use chip value. + */ +ushort qib_cfgctxts; +module_param_named(cfgctxts, qib_cfgctxts, ushort, S_IRUGO); +MODULE_PARM_DESC(cfgctxts, "Set max number of contexts to use"); + +unsigned qib_numa_aware; +module_param_named(numa_aware, qib_numa_aware, uint, S_IRUGO); +MODULE_PARM_DESC(numa_aware, + "0 -> PSM allocation close to HCA, 1 -> PSM allocation local to process"); + +/* + * If set, do not write to any regs if avoidable, hack to allow + * check for deranged default register values. + */ +ushort qib_mini_init; +module_param_named(mini_init, qib_mini_init, ushort, S_IRUGO); +MODULE_PARM_DESC(mini_init, "If set, do minimal diag init"); + +unsigned qib_n_krcv_queues; +module_param_named(krcvqs, qib_n_krcv_queues, uint, S_IRUGO); +MODULE_PARM_DESC(krcvqs, "number of kernel receive queues per IB port"); + +unsigned qib_cc_table_size; +module_param_named(cc_table_size, qib_cc_table_size, uint, S_IRUGO); +MODULE_PARM_DESC(cc_table_size, "Congestion control table entries 0 (CCA disabled - default), min = 128, max = 1984"); +/* + * qib_wc_pat parameter: + * 0 is WC via MTRR + * 1 is WC via PAT + * If PAT initialization fails, code reverts back to MTRR + */ +unsigned qib_wc_pat = 1; /* default (1) is to use PAT, not MTRR */ +module_param_named(wc_pat, qib_wc_pat, uint, S_IRUGO); +MODULE_PARM_DESC(wc_pat, "enable write-combining via PAT mechanism"); + +static void verify_interrupt(unsigned long); + +static struct idr qib_unit_table; +u32 qib_cpulist_count; +unsigned long *qib_cpulist; + +/* set number of contexts we'll actually use */ +void qib_set_ctxtcnt(struct qib_devdata *dd) +{ + if (!qib_cfgctxts) { + dd->cfgctxts = dd->first_user_ctxt + num_online_cpus(); + if (dd->cfgctxts > dd->ctxtcnt) + dd->cfgctxts = dd->ctxtcnt; + } else if (qib_cfgctxts < dd->num_pports) + dd->cfgctxts = dd->ctxtcnt; + else if (qib_cfgctxts <= dd->ctxtcnt) + dd->cfgctxts = qib_cfgctxts; + else + dd->cfgctxts = dd->ctxtcnt; + dd->freectxts = (dd->first_user_ctxt > dd->cfgctxts) ? 0 : + dd->cfgctxts - dd->first_user_ctxt; +} + +/* + * Common code for creating the receive context array. + */ +int qib_create_ctxts(struct qib_devdata *dd) +{ + unsigned i; + int local_node_id = pcibus_to_node(dd->pcidev->bus); + + if (local_node_id < 0) + local_node_id = numa_node_id(); + dd->assigned_node_id = local_node_id; + + /* + * Allocate full ctxtcnt array, rather than just cfgctxts, because + * cleanup iterates across all possible ctxts. + */ + dd->rcd = kzalloc(sizeof(*dd->rcd) * dd->ctxtcnt, GFP_KERNEL); + if (!dd->rcd) { + qib_dev_err(dd, + "Unable to allocate ctxtdata array, failing\n"); + return -ENOMEM; + } + + /* create (one or more) kctxt */ + for (i = 0; i < dd->first_user_ctxt; ++i) { + struct qib_pportdata *ppd; + struct qib_ctxtdata *rcd; + + if (dd->skip_kctxt_mask & (1 << i)) + continue; + + ppd = dd->pport + (i % dd->num_pports); + + rcd = qib_create_ctxtdata(ppd, i, dd->assigned_node_id); + if (!rcd) { + qib_dev_err(dd, + "Unable to allocate ctxtdata for Kernel ctxt, failing\n"); + kfree(dd->rcd); + dd->rcd = NULL; + return -ENOMEM; + } + rcd->pkeys[0] = QIB_DEFAULT_P_KEY; + rcd->seq_cnt = 1; + } + return 0; +} + +/* + * Common code for user and kernel context setup. + */ +struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt, + int node_id) +{ + struct qib_devdata *dd = ppd->dd; + struct qib_ctxtdata *rcd; + + rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, node_id); + if (rcd) { + INIT_LIST_HEAD(&rcd->qp_wait_list); + rcd->node_id = node_id; + rcd->ppd = ppd; + rcd->dd = dd; + rcd->cnt = 1; + rcd->ctxt = ctxt; + dd->rcd[ctxt] = rcd; +#ifdef CONFIG_DEBUG_FS + if (ctxt < dd->first_user_ctxt) { /* N/A for PSM contexts */ + rcd->opstats = kzalloc_node(sizeof(*rcd->opstats), + GFP_KERNEL, node_id); + if (!rcd->opstats) { + kfree(rcd); + qib_dev_err(dd, + "Unable to allocate per ctxt stats buffer\n"); + return NULL; + } + } +#endif + dd->f_init_ctxt(rcd); + + /* + * To avoid wasting a lot of memory, we allocate 32KB chunks + * of physically contiguous memory, advance through it until + * used up and then allocate more. Of course, we need + * memory to store those extra pointers, now. 32KB seems to + * be the most that is "safe" under memory pressure + * (creating large files and then copying them over + * NFS while doing lots of MPI jobs). The OOM killer can + * get invoked, even though we say we can sleep and this can + * cause significant system problems.... + */ + rcd->rcvegrbuf_size = 0x8000; + rcd->rcvegrbufs_perchunk = + rcd->rcvegrbuf_size / dd->rcvegrbufsize; + rcd->rcvegrbuf_chunks = (rcd->rcvegrcnt + + rcd->rcvegrbufs_perchunk - 1) / + rcd->rcvegrbufs_perchunk; + BUG_ON(!is_power_of_2(rcd->rcvegrbufs_perchunk)); + rcd->rcvegrbufs_perchunk_shift = + ilog2(rcd->rcvegrbufs_perchunk); + } + return rcd; +} + +/* + * Common code for initializing the physical port structure. + */ +int qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, + u8 hw_pidx, u8 port) +{ + int size; + ppd->dd = dd; + ppd->hw_pidx = hw_pidx; + ppd->port = port; /* IB port number, not index */ + + spin_lock_init(&ppd->sdma_lock); + spin_lock_init(&ppd->lflags_lock); + spin_lock_init(&ppd->cc_shadow_lock); + init_waitqueue_head(&ppd->state_wait); + + init_timer(&ppd->symerr_clear_timer); + ppd->symerr_clear_timer.function = qib_clear_symerror_on_linkup; + ppd->symerr_clear_timer.data = (unsigned long)ppd; + + ppd->qib_wq = NULL; + ppd->ibport_data.pmastats = + alloc_percpu(struct qib_pma_counters); + if (!ppd->ibport_data.pmastats) + return -ENOMEM; + + if (qib_cc_table_size < IB_CCT_MIN_ENTRIES) + goto bail; + + ppd->cc_supported_table_entries = min(max_t(int, qib_cc_table_size, + IB_CCT_MIN_ENTRIES), IB_CCT_ENTRIES*IB_CC_TABLE_CAP_DEFAULT); + + ppd->cc_max_table_entries = + ppd->cc_supported_table_entries/IB_CCT_ENTRIES; + + size = IB_CC_TABLE_CAP_DEFAULT * sizeof(struct ib_cc_table_entry) + * IB_CCT_ENTRIES; + ppd->ccti_entries = kzalloc(size, GFP_KERNEL); + if (!ppd->ccti_entries) { + qib_dev_err(dd, + "failed to allocate congestion control table for port %d!\n", + port); + goto bail; + } + + size = IB_CC_CCS_ENTRIES * sizeof(struct ib_cc_congestion_entry); + ppd->congestion_entries = kzalloc(size, GFP_KERNEL); + if (!ppd->congestion_entries) { + qib_dev_err(dd, + "failed to allocate congestion setting list for port %d!\n", + port); + goto bail_1; + } + + size = sizeof(struct cc_table_shadow); + ppd->ccti_entries_shadow = kzalloc(size, GFP_KERNEL); + if (!ppd->ccti_entries_shadow) { + qib_dev_err(dd, + "failed to allocate shadow ccti list for port %d!\n", + port); + goto bail_2; + } + + size = sizeof(struct ib_cc_congestion_setting_attr); + ppd->congestion_entries_shadow = kzalloc(size, GFP_KERNEL); + if (!ppd->congestion_entries_shadow) { + qib_dev_err(dd, + "failed to allocate shadow congestion setting list for port %d!\n", + port); + goto bail_3; + } + + return 0; + +bail_3: + kfree(ppd->ccti_entries_shadow); + ppd->ccti_entries_shadow = NULL; +bail_2: + kfree(ppd->congestion_entries); + ppd->congestion_entries = NULL; +bail_1: + kfree(ppd->ccti_entries); + ppd->ccti_entries = NULL; +bail: + /* User is intentionally disabling the congestion control agent */ + if (!qib_cc_table_size) + return 0; + + if (qib_cc_table_size < IB_CCT_MIN_ENTRIES) { + qib_cc_table_size = 0; + qib_dev_err(dd, + "Congestion Control table size %d less than minimum %d for port %d\n", + qib_cc_table_size, IB_CCT_MIN_ENTRIES, port); + } + + qib_dev_err(dd, "Congestion Control Agent disabled for port %d\n", + port); + return 0; +} + +static int init_pioavailregs(struct qib_devdata *dd) +{ + int ret, pidx; + u64 *status_page; + + dd->pioavailregs_dma = dma_alloc_coherent( + &dd->pcidev->dev, PAGE_SIZE, &dd->pioavailregs_phys, + GFP_KERNEL); + if (!dd->pioavailregs_dma) { + qib_dev_err(dd, + "failed to allocate PIOavail reg area in memory\n"); + ret = -ENOMEM; + goto done; + } + + /* + * We really want L2 cache aligned, but for current CPUs of + * interest, they are the same. + */ + status_page = (u64 *) + ((char *) dd->pioavailregs_dma + + ((2 * L1_CACHE_BYTES + + dd->pioavregs * sizeof(u64)) & ~L1_CACHE_BYTES)); + /* device status comes first, for backwards compatibility */ + dd->devstatusp = status_page; + *status_page++ = 0; + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + dd->pport[pidx].statusp = status_page; + *status_page++ = 0; + } + + /* + * Setup buffer to hold freeze and other messages, accessible to + * apps, following statusp. This is per-unit, not per port. + */ + dd->freezemsg = (char *) status_page; + *dd->freezemsg = 0; + /* length of msg buffer is "whatever is left" */ + ret = (char *) status_page - (char *) dd->pioavailregs_dma; + dd->freezelen = PAGE_SIZE - ret; + + ret = 0; + +done: + return ret; +} + +/** + * init_shadow_tids - allocate the shadow TID array + * @dd: the qlogic_ib device + * + * allocate the shadow TID array, so we can qib_munlock previous + * entries. It may make more sense to move the pageshadow to the + * ctxt data structure, so we only allocate memory for ctxts actually + * in use, since we at 8k per ctxt, now. + * We don't want failures here to prevent use of the driver/chip, + * so no return value. + */ +static void init_shadow_tids(struct qib_devdata *dd) +{ + struct page **pages; + dma_addr_t *addrs; + + pages = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(struct page *)); + if (!pages) { + qib_dev_err(dd, + "failed to allocate shadow page * array, no expected sends!\n"); + goto bail; + } + + addrs = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(dma_addr_t)); + if (!addrs) { + qib_dev_err(dd, + "failed to allocate shadow dma handle array, no expected sends!\n"); + goto bail_free; + } + + dd->pageshadow = pages; + dd->physshadow = addrs; + return; + +bail_free: + vfree(pages); +bail: + dd->pageshadow = NULL; +} + +/* + * Do initialization for device that is only needed on + * first detect, not on resets. + */ +static int loadtime_init(struct qib_devdata *dd) +{ + int ret = 0; + + if (((dd->revision >> QLOGIC_IB_R_SOFTWARE_SHIFT) & + QLOGIC_IB_R_SOFTWARE_MASK) != QIB_CHIP_SWVERSION) { + qib_dev_err(dd, + "Driver only handles version %d, chip swversion is %d (%llx), failng\n", + QIB_CHIP_SWVERSION, + (int)(dd->revision >> + QLOGIC_IB_R_SOFTWARE_SHIFT) & + QLOGIC_IB_R_SOFTWARE_MASK, + (unsigned long long) dd->revision); + ret = -ENOSYS; + goto done; + } + + if (dd->revision & QLOGIC_IB_R_EMULATOR_MASK) + qib_devinfo(dd->pcidev, "%s", dd->boardversion); + + spin_lock_init(&dd->pioavail_lock); + spin_lock_init(&dd->sendctrl_lock); + spin_lock_init(&dd->uctxt_lock); + spin_lock_init(&dd->qib_diag_trans_lock); + spin_lock_init(&dd->eep_st_lock); + mutex_init(&dd->eep_lock); + + if (qib_mini_init) + goto done; + + ret = init_pioavailregs(dd); + init_shadow_tids(dd); + + qib_get_eeprom_info(dd); + + /* setup time (don't start yet) to verify we got interrupt */ + init_timer(&dd->intrchk_timer); + dd->intrchk_timer.function = verify_interrupt; + dd->intrchk_timer.data = (unsigned long) dd; + + ret = qib_cq_init(dd); +done: + return ret; +} + +/** + * init_after_reset - re-initialize after a reset + * @dd: the qlogic_ib device + * + * sanity check at least some of the values after reset, and + * ensure no receive or transmit (explicitly, in case reset + * failed + */ +static int init_after_reset(struct qib_devdata *dd) +{ + int i; + + /* + * Ensure chip does no sends or receives, tail updates, or + * pioavail updates while we re-initialize. This is mostly + * for the driver data structures, not chip registers. + */ + for (i = 0; i < dd->num_pports; ++i) { + /* + * ctxt == -1 means "all contexts". Only really safe for + * _dis_abling things, as here. + */ + dd->f_rcvctrl(dd->pport + i, QIB_RCVCTRL_CTXT_DIS | + QIB_RCVCTRL_INTRAVAIL_DIS | + QIB_RCVCTRL_TAILUPD_DIS, -1); + /* Redundant across ports for some, but no big deal. */ + dd->f_sendctrl(dd->pport + i, QIB_SENDCTRL_SEND_DIS | + QIB_SENDCTRL_AVAIL_DIS); + } + + return 0; +} + +static void enable_chip(struct qib_devdata *dd) +{ + u64 rcvmask; + int i; + + /* + * Enable PIO send, and update of PIOavail regs to memory. + */ + for (i = 0; i < dd->num_pports; ++i) + dd->f_sendctrl(dd->pport + i, QIB_SENDCTRL_SEND_ENB | + QIB_SENDCTRL_AVAIL_ENB); + /* + * Enable kernel ctxts' receive and receive interrupt. + * Other ctxts done as user opens and inits them. + */ + rcvmask = QIB_RCVCTRL_CTXT_ENB | QIB_RCVCTRL_INTRAVAIL_ENB; + rcvmask |= (dd->flags & QIB_NODMA_RTAIL) ? + QIB_RCVCTRL_TAILUPD_DIS : QIB_RCVCTRL_TAILUPD_ENB; + for (i = 0; dd->rcd && i < dd->first_user_ctxt; ++i) { + struct qib_ctxtdata *rcd = dd->rcd[i]; + + if (rcd) + dd->f_rcvctrl(rcd->ppd, rcvmask, i); + } +} + +static void verify_interrupt(unsigned long opaque) +{ + struct qib_devdata *dd = (struct qib_devdata *) opaque; + u64 int_counter; + + if (!dd) + return; /* being torn down */ + + /* + * If we don't have a lid or any interrupts, let the user know and + * don't bother checking again. + */ + int_counter = qib_int_counter(dd) - dd->z_int_counter; + if (int_counter == 0) { + if (!dd->f_intr_fallback(dd)) + dev_err(&dd->pcidev->dev, + "No interrupts detected, not usable.\n"); + else /* re-arm the timer to see if fallback works */ + mod_timer(&dd->intrchk_timer, jiffies + HZ/2); + } +} + +static void init_piobuf_state(struct qib_devdata *dd) +{ + int i, pidx; + u32 uctxts; + + /* + * Ensure all buffers are free, and fifos empty. Buffers + * are common, so only do once for port 0. + * + * After enable and qib_chg_pioavailkernel so we can safely + * enable pioavail updates and PIOENABLE. After this, packets + * are ready and able to go out. + */ + dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_ALL); + for (pidx = 0; pidx < dd->num_pports; ++pidx) + dd->f_sendctrl(dd->pport + pidx, QIB_SENDCTRL_FLUSH); + + /* + * If not all sendbufs are used, add the one to each of the lower + * numbered contexts. pbufsctxt and lastctxt_piobuf are + * calculated in chip-specific code because it may cause some + * chip-specific adjustments to be made. + */ + uctxts = dd->cfgctxts - dd->first_user_ctxt; + dd->ctxts_extrabuf = dd->pbufsctxt ? + dd->lastctxt_piobuf - (dd->pbufsctxt * uctxts) : 0; + + /* + * Set up the shadow copies of the piobufavail registers, + * which we compare against the chip registers for now, and + * the in memory DMA'ed copies of the registers. + * By now pioavail updates to memory should have occurred, so + * copy them into our working/shadow registers; this is in + * case something went wrong with abort, but mostly to get the + * initial values of the generation bit correct. + */ + for (i = 0; i < dd->pioavregs; i++) { + __le64 tmp; + + tmp = dd->pioavailregs_dma[i]; + /* + * Don't need to worry about pioavailkernel here + * because we will call qib_chg_pioavailkernel() later + * in initialization, to busy out buffers as needed. + */ + dd->pioavailshadow[i] = le64_to_cpu(tmp); + } + while (i < ARRAY_SIZE(dd->pioavailshadow)) + dd->pioavailshadow[i++] = 0; /* for debugging sanity */ + + /* after pioavailshadow is setup */ + qib_chg_pioavailkernel(dd, 0, dd->piobcnt2k + dd->piobcnt4k, + TXCHK_CHG_TYPE_KERN, NULL); + dd->f_initvl15_bufs(dd); +} + +/** + * qib_create_workqueues - create per port workqueues + * @dd: the qlogic_ib device + */ +static int qib_create_workqueues(struct qib_devdata *dd) +{ + int pidx; + struct qib_pportdata *ppd; + + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + if (!ppd->qib_wq) { + char wq_name[8]; /* 3 + 2 + 1 + 1 + 1 */ + snprintf(wq_name, sizeof(wq_name), "qib%d_%d", + dd->unit, pidx); + ppd->qib_wq = + create_singlethread_workqueue(wq_name); + if (!ppd->qib_wq) + goto wq_error; + } + } + return 0; +wq_error: + pr_err("create_singlethread_workqueue failed for port %d\n", + pidx + 1); + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + if (ppd->qib_wq) { + destroy_workqueue(ppd->qib_wq); + ppd->qib_wq = NULL; + } + } + return -ENOMEM; +} + +static void qib_free_pportdata(struct qib_pportdata *ppd) +{ + free_percpu(ppd->ibport_data.pmastats); + ppd->ibport_data.pmastats = NULL; +} + +/** + * qib_init - do the actual initialization sequence on the chip + * @dd: the qlogic_ib device + * @reinit: reinitializing, so don't allocate new memory + * + * Do the actual initialization sequence on the chip. This is done + * both from the init routine called from the PCI infrastructure, and + * when we reset the chip, or detect that it was reset internally, + * or it's administratively re-enabled. + * + * Memory allocation here and in called routines is only done in + * the first case (reinit == 0). We have to be careful, because even + * without memory allocation, we need to re-write all the chip registers + * TIDs, etc. after the reset or enable has completed. + */ +int qib_init(struct qib_devdata *dd, int reinit) +{ + int ret = 0, pidx, lastfail = 0; + u32 portok = 0; + unsigned i; + struct qib_ctxtdata *rcd; + struct qib_pportdata *ppd; + unsigned long flags; + + /* Set linkstate to unknown, so we can watch for a transition. */ + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~(QIBL_LINKACTIVE | QIBL_LINKARMED | + QIBL_LINKDOWN | QIBL_LINKINIT | + QIBL_LINKV); + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + } + + if (reinit) + ret = init_after_reset(dd); + else + ret = loadtime_init(dd); + if (ret) + goto done; + + /* Bypass most chip-init, to get to device creation */ + if (qib_mini_init) + return 0; + + ret = dd->f_late_initreg(dd); + if (ret) + goto done; + + /* dd->rcd can be NULL if early init failed */ + for (i = 0; dd->rcd && i < dd->first_user_ctxt; ++i) { + /* + * Set up the (kernel) rcvhdr queue and egr TIDs. If doing + * re-init, the simplest way to handle this is to free + * existing, and re-allocate. + * Need to re-create rest of ctxt 0 ctxtdata as well. + */ + rcd = dd->rcd[i]; + if (!rcd) + continue; + + lastfail = qib_create_rcvhdrq(dd, rcd); + if (!lastfail) + lastfail = qib_setup_eagerbufs(rcd); + if (lastfail) { + qib_dev_err(dd, + "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n"); + continue; + } + } + + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + int mtu; + if (lastfail) + ret = lastfail; + ppd = dd->pport + pidx; + mtu = ib_mtu_enum_to_int(qib_ibmtu); + if (mtu == -1) { + mtu = QIB_DEFAULT_MTU; + qib_ibmtu = 0; /* don't leave invalid value */ + } + /* set max we can ever have for this driver load */ + ppd->init_ibmaxlen = min(mtu > 2048 ? + dd->piosize4k : dd->piosize2k, + dd->rcvegrbufsize + + (dd->rcvhdrentsize << 2)); + /* + * Have to initialize ibmaxlen, but this will normally + * change immediately in qib_set_mtu(). + */ + ppd->ibmaxlen = ppd->init_ibmaxlen; + qib_set_mtu(ppd, mtu); + + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags |= QIBL_IB_LINK_DISABLED; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + + lastfail = dd->f_bringup_serdes(ppd); + if (lastfail) { + qib_devinfo(dd->pcidev, + "Failed to bringup IB port %u\n", ppd->port); + lastfail = -ENETDOWN; + continue; + } + + portok++; + } + + if (!portok) { + /* none of the ports initialized */ + if (!ret && lastfail) + ret = lastfail; + else if (!ret) + ret = -ENETDOWN; + /* but continue on, so we can debug cause */ + } + + enable_chip(dd); + + init_piobuf_state(dd); + +done: + if (!ret) { + /* chip is OK for user apps; mark it as initialized */ + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + /* + * Set status even if port serdes is not initialized + * so that diags will work. + */ + *ppd->statusp |= QIB_STATUS_CHIP_PRESENT | + QIB_STATUS_INITTED; + if (!ppd->link_speed_enabled) + continue; + if (dd->flags & QIB_HAS_SEND_DMA) + ret = qib_setup_sdma(ppd); + init_timer(&ppd->hol_timer); + ppd->hol_timer.function = qib_hol_event; + ppd->hol_timer.data = (unsigned long)ppd; + ppd->hol_state = QIB_HOL_UP; + } + + /* now we can enable all interrupts from the chip */ + dd->f_set_intr_state(dd, 1); + + /* + * Setup to verify we get an interrupt, and fallback + * to an alternate if necessary and possible. + */ + mod_timer(&dd->intrchk_timer, jiffies + HZ/2); + /* start stats retrieval timer */ + mod_timer(&dd->stats_timer, jiffies + HZ * ACTIVITY_TIMER); + } + + /* if ret is non-zero, we probably should do some cleanup here... */ + return ret; +} + +/* + * These next two routines are placeholders in case we don't have per-arch + * code for controlling write combining. If explicit control of write + * combining is not available, performance will probably be awful. + */ + +int __attribute__((weak)) qib_enable_wc(struct qib_devdata *dd) +{ + return -EOPNOTSUPP; +} + +void __attribute__((weak)) qib_disable_wc(struct qib_devdata *dd) +{ +} + +static inline struct qib_devdata *__qib_lookup(int unit) +{ + return idr_find(&qib_unit_table, unit); +} + +struct qib_devdata *qib_lookup(int unit) +{ + struct qib_devdata *dd; + unsigned long flags; + + spin_lock_irqsave(&qib_devs_lock, flags); + dd = __qib_lookup(unit); + spin_unlock_irqrestore(&qib_devs_lock, flags); + + return dd; +} + +/* + * Stop the timers during unit shutdown, or after an error late + * in initialization. + */ +static void qib_stop_timers(struct qib_devdata *dd) +{ + struct qib_pportdata *ppd; + int pidx; + + if (dd->stats_timer.data) { + del_timer_sync(&dd->stats_timer); + dd->stats_timer.data = 0; + } + if (dd->intrchk_timer.data) { + del_timer_sync(&dd->intrchk_timer); + dd->intrchk_timer.data = 0; + } + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + if (ppd->hol_timer.data) + del_timer_sync(&ppd->hol_timer); + if (ppd->led_override_timer.data) { + del_timer_sync(&ppd->led_override_timer); + atomic_set(&ppd->led_override_timer_active, 0); + } + if (ppd->symerr_clear_timer.data) + del_timer_sync(&ppd->symerr_clear_timer); + } +} + +/** + * qib_shutdown_device - shut down a device + * @dd: the qlogic_ib device + * + * This is called to make the device quiet when we are about to + * unload the driver, and also when the device is administratively + * disabled. It does not free any data structures. + * Everything it does has to be setup again by qib_init(dd, 1) + */ +static void qib_shutdown_device(struct qib_devdata *dd) +{ + struct qib_pportdata *ppd; + unsigned pidx; + + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + + spin_lock_irq(&ppd->lflags_lock); + ppd->lflags &= ~(QIBL_LINKDOWN | QIBL_LINKINIT | + QIBL_LINKARMED | QIBL_LINKACTIVE | + QIBL_LINKV); + spin_unlock_irq(&ppd->lflags_lock); + *ppd->statusp &= ~(QIB_STATUS_IB_CONF | QIB_STATUS_IB_READY); + } + dd->flags &= ~QIB_INITTED; + + /* mask interrupts, but not errors */ + dd->f_set_intr_state(dd, 0); + + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + dd->f_rcvctrl(ppd, QIB_RCVCTRL_TAILUPD_DIS | + QIB_RCVCTRL_CTXT_DIS | + QIB_RCVCTRL_INTRAVAIL_DIS | + QIB_RCVCTRL_PKEY_ENB, -1); + /* + * Gracefully stop all sends allowing any in progress to + * trickle out first. + */ + dd->f_sendctrl(ppd, QIB_SENDCTRL_CLEAR); + } + + /* + * Enough for anything that's going to trickle out to have actually + * done so. + */ + udelay(20); + + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + dd->f_setextled(ppd, 0); /* make sure LEDs are off */ + + if (dd->flags & QIB_HAS_SEND_DMA) + qib_teardown_sdma(ppd); + + dd->f_sendctrl(ppd, QIB_SENDCTRL_AVAIL_DIS | + QIB_SENDCTRL_SEND_DIS); + /* + * Clear SerdesEnable. + * We can't count on interrupts since we are stopping. + */ + dd->f_quiet_serdes(ppd); + + if (ppd->qib_wq) { + destroy_workqueue(ppd->qib_wq); + ppd->qib_wq = NULL; + } + qib_free_pportdata(ppd); + } + + qib_update_eeprom_log(dd); +} + +/** + * qib_free_ctxtdata - free a context's allocated data + * @dd: the qlogic_ib device + * @rcd: the ctxtdata structure + * + * free up any allocated data for a context + * This should not touch anything that would affect a simultaneous + * re-allocation of context data, because it is called after qib_mutex + * is released (and can be called from reinit as well). + * It should never change any chip state, or global driver state. + */ +void qib_free_ctxtdata(struct qib_devdata *dd, struct qib_ctxtdata *rcd) +{ + if (!rcd) + return; + + if (rcd->rcvhdrq) { + dma_free_coherent(&dd->pcidev->dev, rcd->rcvhdrq_size, + rcd->rcvhdrq, rcd->rcvhdrq_phys); + rcd->rcvhdrq = NULL; + if (rcd->rcvhdrtail_kvaddr) { + dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, + rcd->rcvhdrtail_kvaddr, + rcd->rcvhdrqtailaddr_phys); + rcd->rcvhdrtail_kvaddr = NULL; + } + } + if (rcd->rcvegrbuf) { + unsigned e; + + for (e = 0; e < rcd->rcvegrbuf_chunks; e++) { + void *base = rcd->rcvegrbuf[e]; + size_t size = rcd->rcvegrbuf_size; + + dma_free_coherent(&dd->pcidev->dev, size, + base, rcd->rcvegrbuf_phys[e]); + } + kfree(rcd->rcvegrbuf); + rcd->rcvegrbuf = NULL; + kfree(rcd->rcvegrbuf_phys); + rcd->rcvegrbuf_phys = NULL; + rcd->rcvegrbuf_chunks = 0; + } + + kfree(rcd->tid_pg_list); + vfree(rcd->user_event_mask); + vfree(rcd->subctxt_uregbase); + vfree(rcd->subctxt_rcvegrbuf); + vfree(rcd->subctxt_rcvhdr_base); +#ifdef CONFIG_DEBUG_FS + kfree(rcd->opstats); + rcd->opstats = NULL; +#endif + kfree(rcd); +} + +/* + * Perform a PIO buffer bandwidth write test, to verify proper system + * configuration. Even when all the setup calls work, occasionally + * BIOS or other issues can prevent write combining from working, or + * can cause other bandwidth problems to the chip. + * + * This test simply writes the same buffer over and over again, and + * measures close to the peak bandwidth to the chip (not testing + * data bandwidth to the wire). On chips that use an address-based + * trigger to send packets to the wire, this is easy. On chips that + * use a count to trigger, we want to make sure that the packet doesn't + * go out on the wire, or trigger flow control checks. + */ +static void qib_verify_pioperf(struct qib_devdata *dd) +{ + u32 pbnum, cnt, lcnt; + u32 __iomem *piobuf; + u32 *addr; + u64 msecs, emsecs; + + piobuf = dd->f_getsendbuf(dd->pport, 0ULL, &pbnum); + if (!piobuf) { + qib_devinfo(dd->pcidev, + "No PIObufs for checking perf, skipping\n"); + return; + } + + /* + * Enough to give us a reasonable test, less than piobuf size, and + * likely multiple of store buffer length. + */ + cnt = 1024; + + addr = vmalloc(cnt); + if (!addr) { + qib_devinfo(dd->pcidev, + "Couldn't get memory for checking PIO perf," + " skipping\n"); + goto done; + } + + preempt_disable(); /* we want reasonably accurate elapsed time */ + msecs = 1 + jiffies_to_msecs(jiffies); + for (lcnt = 0; lcnt < 10000U; lcnt++) { + /* wait until we cross msec boundary */ + if (jiffies_to_msecs(jiffies) >= msecs) + break; + udelay(1); + } + + dd->f_set_armlaunch(dd, 0); + + /* + * length 0, no dwords actually sent + */ + writeq(0, piobuf); + qib_flush_wc(); + + /* + * This is only roughly accurate, since even with preempt we + * still take interrupts that could take a while. Running for + * >= 5 msec seems to get us "close enough" to accurate values. + */ + msecs = jiffies_to_msecs(jiffies); + for (emsecs = lcnt = 0; emsecs <= 5UL; lcnt++) { + qib_pio_copy(piobuf + 64, addr, cnt >> 2); + emsecs = jiffies_to_msecs(jiffies) - msecs; + } + + /* 1 GiB/sec, slightly over IB SDR line rate */ + if (lcnt < (emsecs * 1024U)) + qib_dev_err(dd, + "Performance problem: bandwidth to PIO buffers is only %u MiB/sec\n", + lcnt / (u32) emsecs); + + preempt_enable(); + + vfree(addr); + +done: + /* disarm piobuf, so it's available again */ + dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_BUF(pbnum)); + qib_sendbuf_done(dd, pbnum); + dd->f_set_armlaunch(dd, 1); +} + +void qib_free_devdata(struct qib_devdata *dd) +{ + unsigned long flags; + + spin_lock_irqsave(&qib_devs_lock, flags); + idr_remove(&qib_unit_table, dd->unit); + list_del(&dd->list); + spin_unlock_irqrestore(&qib_devs_lock, flags); + +#ifdef CONFIG_DEBUG_FS + qib_dbg_ibdev_exit(&dd->verbs_dev); +#endif + free_percpu(dd->int_counter); + ib_dealloc_device(&dd->verbs_dev.ibdev); +} + +u64 qib_int_counter(struct qib_devdata *dd) +{ + int cpu; + u64 int_counter = 0; + + for_each_possible_cpu(cpu) + int_counter += *per_cpu_ptr(dd->int_counter, cpu); + return int_counter; +} + +u64 qib_sps_ints(void) +{ + unsigned long flags; + struct qib_devdata *dd; + u64 sps_ints = 0; + + spin_lock_irqsave(&qib_devs_lock, flags); + list_for_each_entry(dd, &qib_dev_list, list) { + sps_ints += qib_int_counter(dd); + } + spin_unlock_irqrestore(&qib_devs_lock, flags); + return sps_ints; +} + +/* + * Allocate our primary per-unit data structure. Must be done via verbs + * allocator, because the verbs cleanup process both does cleanup and + * free of the data structure. + * "extra" is for chip-specific data. + * + * Use the idr mechanism to get a unit number for this unit. + */ +struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) +{ + unsigned long flags; + struct qib_devdata *dd; + int ret; + + dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra); + if (!dd) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&dd->list); + + idr_preload(GFP_KERNEL); + spin_lock_irqsave(&qib_devs_lock, flags); + + ret = idr_alloc(&qib_unit_table, dd, 0, 0, GFP_NOWAIT); + if (ret >= 0) { + dd->unit = ret; + list_add(&dd->list, &qib_dev_list); + } + + spin_unlock_irqrestore(&qib_devs_lock, flags); + idr_preload_end(); + + if (ret < 0) { + qib_early_err(&pdev->dev, + "Could not allocate unit ID: error %d\n", -ret); + goto bail; + } + dd->int_counter = alloc_percpu(u64); + if (!dd->int_counter) { + ret = -ENOMEM; + qib_early_err(&pdev->dev, + "Could not allocate per-cpu int_counter\n"); + goto bail; + } + + if (!qib_cpulist_count) { + u32 count = num_online_cpus(); + qib_cpulist = kzalloc(BITS_TO_LONGS(count) * + sizeof(long), GFP_KERNEL); + if (qib_cpulist) + qib_cpulist_count = count; + else + qib_early_err(&pdev->dev, + "Could not alloc cpulist info, cpu affinity might be wrong\n"); + } +#ifdef CONFIG_DEBUG_FS + qib_dbg_ibdev_init(&dd->verbs_dev); +#endif + return dd; +bail: + if (!list_empty(&dd->list)) + list_del_init(&dd->list); + ib_dealloc_device(&dd->verbs_dev.ibdev); + return ERR_PTR(ret);; +} + +/* + * Called from freeze mode handlers, and from PCI error + * reporting code. Should be paranoid about state of + * system and data structures. + */ +void qib_disable_after_error(struct qib_devdata *dd) +{ + if (dd->flags & QIB_INITTED) { + u32 pidx; + + dd->flags &= ~QIB_INITTED; + if (dd->pport) + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + struct qib_pportdata *ppd; + + ppd = dd->pport + pidx; + if (dd->flags & QIB_PRESENT) { + qib_set_linkstate(ppd, + QIB_IB_LINKDOWN_DISABLE); + dd->f_setextled(ppd, 0); + } + *ppd->statusp &= ~QIB_STATUS_IB_READY; + } + } + + /* + * Mark as having had an error for driver, and also + * for /sys and status word mapped to user programs. + * This marks unit as not usable, until reset. + */ + if (dd->devstatusp) + *dd->devstatusp |= QIB_STATUS_HWERROR; +} + +static void qib_remove_one(struct pci_dev *); +static int qib_init_one(struct pci_dev *, const struct pci_device_id *); + +#define DRIVER_LOAD_MSG "Intel " QIB_DRV_NAME " loaded: " +#define PFX QIB_DRV_NAME ": " + +static DEFINE_PCI_DEVICE_TABLE(qib_pci_tbl) = { + { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_QLOGIC_IB_6120) }, + { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_IB_7220) }, + { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_IB_7322) }, + { 0, } +}; + +MODULE_DEVICE_TABLE(pci, qib_pci_tbl); + +static struct pci_driver qib_driver = { + .name = QIB_DRV_NAME, + .probe = qib_init_one, + .remove = qib_remove_one, + .id_table = qib_pci_tbl, + .err_handler = &qib_pci_err_handler, +}; + +#ifdef CONFIG_INFINIBAND_QIB_DCA + +static int qib_notify_dca(struct notifier_block *, unsigned long, void *); +static struct notifier_block dca_notifier = { + .notifier_call = qib_notify_dca, + .next = NULL, + .priority = 0 +}; + +static int qib_notify_dca_device(struct device *device, void *data) +{ + struct qib_devdata *dd = dev_get_drvdata(device); + unsigned long event = *(unsigned long *)data; + + return dd->f_notify_dca(dd, event); +} + +static int qib_notify_dca(struct notifier_block *nb, unsigned long event, + void *p) +{ + int rval; + + rval = driver_for_each_device(&qib_driver.driver, NULL, + &event, qib_notify_dca_device); + return rval ? NOTIFY_BAD : NOTIFY_DONE; +} + +#endif + +/* + * Do all the generic driver unit- and chip-independent memory + * allocation and initialization. + */ +static int __init qib_ib_init(void) +{ + int ret; + + ret = qib_dev_init(); + if (ret) + goto bail; + + /* + * These must be called before the driver is registered with + * the PCI subsystem. + */ + idr_init(&qib_unit_table); + +#ifdef CONFIG_INFINIBAND_QIB_DCA + dca_register_notify(&dca_notifier); +#endif +#ifdef CONFIG_DEBUG_FS + qib_dbg_init(); +#endif + ret = pci_register_driver(&qib_driver); + if (ret < 0) { + pr_err("Unable to register driver: error %d\n", -ret); + goto bail_dev; + } + + /* not fatal if it doesn't work */ + if (qib_init_qibfs()) + pr_err("Unable to register ipathfs\n"); + goto bail; /* all OK */ + +bail_dev: +#ifdef CONFIG_INFINIBAND_QIB_DCA + dca_unregister_notify(&dca_notifier); +#endif +#ifdef CONFIG_DEBUG_FS + qib_dbg_exit(); +#endif + idr_destroy(&qib_unit_table); + qib_dev_cleanup(); +bail: + return ret; +} + +module_init(qib_ib_init); + +/* + * Do the non-unit driver cleanup, memory free, etc. at unload. + */ +static void __exit qib_ib_cleanup(void) +{ + int ret; + + ret = qib_exit_qibfs(); + if (ret) + pr_err( + "Unable to cleanup counter filesystem: error %d\n", + -ret); + +#ifdef CONFIG_INFINIBAND_QIB_DCA + dca_unregister_notify(&dca_notifier); +#endif + pci_unregister_driver(&qib_driver); +#ifdef CONFIG_DEBUG_FS + qib_dbg_exit(); +#endif + + qib_cpulist_count = 0; + kfree(qib_cpulist); + + idr_destroy(&qib_unit_table); + qib_dev_cleanup(); +} + +module_exit(qib_ib_cleanup); + +/* this can only be called after a successful initialization */ +static void cleanup_device_data(struct qib_devdata *dd) +{ + int ctxt; + int pidx; + struct qib_ctxtdata **tmp; + unsigned long flags; + + /* users can't do anything more with chip */ + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + if (dd->pport[pidx].statusp) + *dd->pport[pidx].statusp &= ~QIB_STATUS_CHIP_PRESENT; + + spin_lock(&dd->pport[pidx].cc_shadow_lock); + + kfree(dd->pport[pidx].congestion_entries); + dd->pport[pidx].congestion_entries = NULL; + kfree(dd->pport[pidx].ccti_entries); + dd->pport[pidx].ccti_entries = NULL; + kfree(dd->pport[pidx].ccti_entries_shadow); + dd->pport[pidx].ccti_entries_shadow = NULL; + kfree(dd->pport[pidx].congestion_entries_shadow); + dd->pport[pidx].congestion_entries_shadow = NULL; + + spin_unlock(&dd->pport[pidx].cc_shadow_lock); + } + + if (!qib_wc_pat) + qib_disable_wc(dd); + + if (dd->pioavailregs_dma) { + dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, + (void *) dd->pioavailregs_dma, + dd->pioavailregs_phys); + dd->pioavailregs_dma = NULL; + } + + if (dd->pageshadow) { + struct page **tmpp = dd->pageshadow; + dma_addr_t *tmpd = dd->physshadow; + int i; + + for (ctxt = 0; ctxt < dd->cfgctxts; ctxt++) { + int ctxt_tidbase = ctxt * dd->rcvtidcnt; + int maxtid = ctxt_tidbase + dd->rcvtidcnt; + + for (i = ctxt_tidbase; i < maxtid; i++) { + if (!tmpp[i]) + continue; + pci_unmap_page(dd->pcidev, tmpd[i], + PAGE_SIZE, PCI_DMA_FROMDEVICE); + qib_release_user_pages(&tmpp[i], 1); + tmpp[i] = NULL; + } + } + + dd->pageshadow = NULL; + vfree(tmpp); + dd->physshadow = NULL; + vfree(tmpd); + } + + /* + * Free any resources still in use (usually just kernel contexts) + * at unload; we do for ctxtcnt, because that's what we allocate. + * We acquire lock to be really paranoid that rcd isn't being + * accessed from some interrupt-related code (that should not happen, + * but best to be sure). + */ + spin_lock_irqsave(&dd->uctxt_lock, flags); + tmp = dd->rcd; + dd->rcd = NULL; + spin_unlock_irqrestore(&dd->uctxt_lock, flags); + for (ctxt = 0; tmp && ctxt < dd->ctxtcnt; ctxt++) { + struct qib_ctxtdata *rcd = tmp[ctxt]; + + tmp[ctxt] = NULL; /* debugging paranoia */ + qib_free_ctxtdata(dd, rcd); + } + kfree(tmp); + kfree(dd->boardname); + qib_cq_exit(dd); +} + +/* + * Clean up on unit shutdown, or error during unit load after + * successful initialization. + */ +static void qib_postinit_cleanup(struct qib_devdata *dd) +{ + /* + * Clean up chip-specific stuff. + * We check for NULL here, because it's outside + * the kregbase check, and we need to call it + * after the free_irq. Thus it's possible that + * the function pointers were never initialized. + */ + if (dd->f_cleanup) + dd->f_cleanup(dd); + + qib_pcie_ddcleanup(dd); + + cleanup_device_data(dd); + + qib_free_devdata(dd); +} + +static int qib_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + int ret, j, pidx, initfail; + struct qib_devdata *dd = NULL; + + ret = qib_pcie_init(pdev, ent); + if (ret) + goto bail; + + /* + * Do device-specific initialiation, function table setup, dd + * allocation, etc. + */ + switch (ent->device) { + case PCI_DEVICE_ID_QLOGIC_IB_6120: +#ifdef CONFIG_PCI_MSI + dd = qib_init_iba6120_funcs(pdev, ent); +#else + qib_early_err(&pdev->dev, + "Intel PCIE device 0x%x cannot work if CONFIG_PCI_MSI is not enabled\n", + ent->device); + dd = ERR_PTR(-ENODEV); +#endif + break; + + case PCI_DEVICE_ID_QLOGIC_IB_7220: + dd = qib_init_iba7220_funcs(pdev, ent); + break; + + case PCI_DEVICE_ID_QLOGIC_IB_7322: + dd = qib_init_iba7322_funcs(pdev, ent); + break; + + default: + qib_early_err(&pdev->dev, + "Failing on unknown Intel deviceid 0x%x\n", + ent->device); + ret = -ENODEV; + } + + if (IS_ERR(dd)) + ret = PTR_ERR(dd); + if (ret) + goto bail; /* error already printed */ + + ret = qib_create_workqueues(dd); + if (ret) + goto bail; + + /* do the generic initialization */ + initfail = qib_init(dd, 0); + + ret = qib_register_ib_device(dd); + + /* + * Now ready for use. this should be cleared whenever we + * detect a reset, or initiate one. If earlier failure, + * we still create devices, so diags, etc. can be used + * to determine cause of problem. + */ + if (!qib_mini_init && !initfail && !ret) + dd->flags |= QIB_INITTED; + + j = qib_device_create(dd); + if (j) + qib_dev_err(dd, "Failed to create /dev devices: %d\n", -j); + j = qibfs_add(dd); + if (j) + qib_dev_err(dd, "Failed filesystem setup for counters: %d\n", + -j); + + if (qib_mini_init || initfail || ret) { + qib_stop_timers(dd); + flush_workqueue(ib_wq); + for (pidx = 0; pidx < dd->num_pports; ++pidx) + dd->f_quiet_serdes(dd->pport + pidx); + if (qib_mini_init) + goto bail; + if (!j) { + (void) qibfs_remove(dd); + qib_device_remove(dd); + } + if (!ret) + qib_unregister_ib_device(dd); + qib_postinit_cleanup(dd); + if (initfail) + ret = initfail; + goto bail; + } + + if (!qib_wc_pat) { + ret = qib_enable_wc(dd); + if (ret) { + qib_dev_err(dd, + "Write combining not enabled (err %d): performance may be poor\n", + -ret); + ret = 0; + } + } + + qib_verify_pioperf(dd); +bail: + return ret; +} + +static void qib_remove_one(struct pci_dev *pdev) +{ + struct qib_devdata *dd = pci_get_drvdata(pdev); + int ret; + + /* unregister from IB core */ + qib_unregister_ib_device(dd); + + /* + * Disable the IB link, disable interrupts on the device, + * clear dma engines, etc. + */ + if (!qib_mini_init) + qib_shutdown_device(dd); + + qib_stop_timers(dd); + + /* wait until all of our (qsfp) queue_work() calls complete */ + flush_workqueue(ib_wq); + + ret = qibfs_remove(dd); + if (ret) + qib_dev_err(dd, "Failed counters filesystem cleanup: %d\n", + -ret); + + qib_device_remove(dd); + + qib_postinit_cleanup(dd); +} + +/** + * qib_create_rcvhdrq - create a receive header queue + * @dd: the qlogic_ib device + * @rcd: the context data + * + * This must be contiguous memory (from an i/o perspective), and must be + * DMA'able (which means for some systems, it will go through an IOMMU, + * or be forced into a low address range). + */ +int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd) +{ + unsigned amt; + int old_node_id; + + if (!rcd->rcvhdrq) { + dma_addr_t phys_hdrqtail; + gfp_t gfp_flags; + + amt = ALIGN(dd->rcvhdrcnt * dd->rcvhdrentsize * + sizeof(u32), PAGE_SIZE); + gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ? + GFP_USER : GFP_KERNEL; + + old_node_id = dev_to_node(&dd->pcidev->dev); + set_dev_node(&dd->pcidev->dev, rcd->node_id); + rcd->rcvhdrq = dma_alloc_coherent( + &dd->pcidev->dev, amt, &rcd->rcvhdrq_phys, + gfp_flags | __GFP_COMP); + set_dev_node(&dd->pcidev->dev, old_node_id); + + if (!rcd->rcvhdrq) { + qib_dev_err(dd, + "attempt to allocate %d bytes for ctxt %u rcvhdrq failed\n", + amt, rcd->ctxt); + goto bail; + } + + if (rcd->ctxt >= dd->first_user_ctxt) { + rcd->user_event_mask = vmalloc_user(PAGE_SIZE); + if (!rcd->user_event_mask) + goto bail_free_hdrq; + } + + if (!(dd->flags & QIB_NODMA_RTAIL)) { + set_dev_node(&dd->pcidev->dev, rcd->node_id); + rcd->rcvhdrtail_kvaddr = dma_alloc_coherent( + &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, + gfp_flags); + set_dev_node(&dd->pcidev->dev, old_node_id); + if (!rcd->rcvhdrtail_kvaddr) + goto bail_free; + rcd->rcvhdrqtailaddr_phys = phys_hdrqtail; + } + + rcd->rcvhdrq_size = amt; + } + + /* clear for security and sanity on each use */ + memset(rcd->rcvhdrq, 0, rcd->rcvhdrq_size); + if (rcd->rcvhdrtail_kvaddr) + memset(rcd->rcvhdrtail_kvaddr, 0, PAGE_SIZE); + return 0; + +bail_free: + qib_dev_err(dd, + "attempt to allocate 1 page for ctxt %u rcvhdrqtailaddr failed\n", + rcd->ctxt); + vfree(rcd->user_event_mask); + rcd->user_event_mask = NULL; +bail_free_hdrq: + dma_free_coherent(&dd->pcidev->dev, amt, rcd->rcvhdrq, + rcd->rcvhdrq_phys); + rcd->rcvhdrq = NULL; +bail: + return -ENOMEM; +} + +/** + * allocate eager buffers, both kernel and user contexts. + * @rcd: the context we are setting up. + * + * Allocate the eager TID buffers and program them into hip. + * They are no longer completely contiguous, we do multiple allocation + * calls. Otherwise we get the OOM code involved, by asking for too + * much per call, with disastrous results on some kernels. + */ +int qib_setup_eagerbufs(struct qib_ctxtdata *rcd) +{ + struct qib_devdata *dd = rcd->dd; + unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff; + size_t size; + gfp_t gfp_flags; + int old_node_id; + + /* + * GFP_USER, but without GFP_FS, so buffer cache can be + * coalesced (we hope); otherwise, even at order 4, + * heavy filesystem activity makes these fail, and we can + * use compound pages. + */ + gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP; + + egrcnt = rcd->rcvegrcnt; + egroff = rcd->rcvegr_tid_base; + egrsize = dd->rcvegrbufsize; + + chunk = rcd->rcvegrbuf_chunks; + egrperchunk = rcd->rcvegrbufs_perchunk; + size = rcd->rcvegrbuf_size; + if (!rcd->rcvegrbuf) { + rcd->rcvegrbuf = + kzalloc_node(chunk * sizeof(rcd->rcvegrbuf[0]), + GFP_KERNEL, rcd->node_id); + if (!rcd->rcvegrbuf) + goto bail; + } + if (!rcd->rcvegrbuf_phys) { + rcd->rcvegrbuf_phys = + kmalloc_node(chunk * sizeof(rcd->rcvegrbuf_phys[0]), + GFP_KERNEL, rcd->node_id); + if (!rcd->rcvegrbuf_phys) + goto bail_rcvegrbuf; + } + for (e = 0; e < rcd->rcvegrbuf_chunks; e++) { + if (rcd->rcvegrbuf[e]) + continue; + + old_node_id = dev_to_node(&dd->pcidev->dev); + set_dev_node(&dd->pcidev->dev, rcd->node_id); + rcd->rcvegrbuf[e] = + dma_alloc_coherent(&dd->pcidev->dev, size, + &rcd->rcvegrbuf_phys[e], + gfp_flags); + set_dev_node(&dd->pcidev->dev, old_node_id); + if (!rcd->rcvegrbuf[e]) + goto bail_rcvegrbuf_phys; + } + + rcd->rcvegr_phys = rcd->rcvegrbuf_phys[0]; + + for (e = chunk = 0; chunk < rcd->rcvegrbuf_chunks; chunk++) { + dma_addr_t pa = rcd->rcvegrbuf_phys[chunk]; + unsigned i; + + /* clear for security and sanity on each use */ + memset(rcd->rcvegrbuf[chunk], 0, size); + + for (i = 0; e < egrcnt && i < egrperchunk; e++, i++) { + dd->f_put_tid(dd, e + egroff + + (u64 __iomem *) + ((char __iomem *) + dd->kregbase + + dd->rcvegrbase), + RCVHQ_RCV_TYPE_EAGER, pa); + pa += egrsize; + } + cond_resched(); /* don't hog the cpu */ + } + + return 0; + +bail_rcvegrbuf_phys: + for (e = 0; e < rcd->rcvegrbuf_chunks && rcd->rcvegrbuf[e]; e++) + dma_free_coherent(&dd->pcidev->dev, size, + rcd->rcvegrbuf[e], rcd->rcvegrbuf_phys[e]); + kfree(rcd->rcvegrbuf_phys); + rcd->rcvegrbuf_phys = NULL; +bail_rcvegrbuf: + kfree(rcd->rcvegrbuf); + rcd->rcvegrbuf = NULL; +bail: + return -ENOMEM; +} + +/* + * Note: Changes to this routine should be mirrored + * for the diagnostics routine qib_remap_ioaddr32(). + * There is also related code for VL15 buffers in qib_init_7322_variables(). + * The teardown code that unmaps is in qib_pcie_ddcleanup() + */ +int init_chip_wc_pat(struct qib_devdata *dd, u32 vl15buflen) +{ + u64 __iomem *qib_kregbase = NULL; + void __iomem *qib_piobase = NULL; + u64 __iomem *qib_userbase = NULL; + u64 qib_kreglen; + u64 qib_pio2koffset = dd->piobufbase & 0xffffffff; + u64 qib_pio4koffset = dd->piobufbase >> 32; + u64 qib_pio2klen = dd->piobcnt2k * dd->palign; + u64 qib_pio4klen = dd->piobcnt4k * dd->align4k; + u64 qib_physaddr = dd->physaddr; + u64 qib_piolen; + u64 qib_userlen = 0; + + /* + * Free the old mapping because the kernel will try to reuse the + * old mapping and not create a new mapping with the + * write combining attribute. + */ + iounmap(dd->kregbase); + dd->kregbase = NULL; + + /* + * Assumes chip address space looks like: + * - kregs + sregs + cregs + uregs (in any order) + * - piobufs (2K and 4K bufs in either order) + * or: + * - kregs + sregs + cregs (in any order) + * - piobufs (2K and 4K bufs in either order) + * - uregs + */ + if (dd->piobcnt4k == 0) { + qib_kreglen = qib_pio2koffset; + qib_piolen = qib_pio2klen; + } else if (qib_pio2koffset < qib_pio4koffset) { + qib_kreglen = qib_pio2koffset; + qib_piolen = qib_pio4koffset + qib_pio4klen - qib_kreglen; + } else { + qib_kreglen = qib_pio4koffset; + qib_piolen = qib_pio2koffset + qib_pio2klen - qib_kreglen; + } + qib_piolen += vl15buflen; + /* Map just the configured ports (not all hw ports) */ + if (dd->uregbase > qib_kreglen) + qib_userlen = dd->ureg_align * dd->cfgctxts; + + /* Sanity checks passed, now create the new mappings */ + qib_kregbase = ioremap_nocache(qib_physaddr, qib_kreglen); + if (!qib_kregbase) + goto bail; + + qib_piobase = ioremap_wc(qib_physaddr + qib_kreglen, qib_piolen); + if (!qib_piobase) + goto bail_kregbase; + + if (qib_userlen) { + qib_userbase = ioremap_nocache(qib_physaddr + dd->uregbase, + qib_userlen); + if (!qib_userbase) + goto bail_piobase; + } + + dd->kregbase = qib_kregbase; + dd->kregend = (u64 __iomem *) + ((char __iomem *) qib_kregbase + qib_kreglen); + dd->piobase = qib_piobase; + dd->pio2kbase = (void __iomem *) + (((char __iomem *) dd->piobase) + + qib_pio2koffset - qib_kreglen); + if (dd->piobcnt4k) + dd->pio4kbase = (void __iomem *) + (((char __iomem *) dd->piobase) + + qib_pio4koffset - qib_kreglen); + if (qib_userlen) + /* ureg will now be accessed relative to dd->userbase */ + dd->userbase = qib_userbase; + return 0; + +bail_piobase: + iounmap(qib_piobase); +bail_kregbase: + iounmap(qib_kregbase); +bail: + return -ENOMEM; +} diff --git a/drivers/infiniband/hw/qib/qib_intr.c b/drivers/infiniband/hw/qib/qib_intr.c new file mode 100644 index 00000000000..f4918f2165e --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_intr.c @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. + * All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/pci.h> +#include <linux/delay.h> + +#include "qib.h" +#include "qib_common.h" + +/** + * qib_format_hwmsg - format a single hwerror message + * @msg message buffer + * @msgl length of message buffer + * @hwmsg message to add to message buffer + */ +static void qib_format_hwmsg(char *msg, size_t msgl, const char *hwmsg) +{ + strlcat(msg, "[", msgl); + strlcat(msg, hwmsg, msgl); + strlcat(msg, "]", msgl); +} + +/** + * qib_format_hwerrors - format hardware error messages for display + * @hwerrs hardware errors bit vector + * @hwerrmsgs hardware error descriptions + * @nhwerrmsgs number of hwerrmsgs + * @msg message buffer + * @msgl message buffer length + */ +void qib_format_hwerrors(u64 hwerrs, const struct qib_hwerror_msgs *hwerrmsgs, + size_t nhwerrmsgs, char *msg, size_t msgl) +{ + int i; + + for (i = 0; i < nhwerrmsgs; i++) + if (hwerrs & hwerrmsgs[i].mask) + qib_format_hwmsg(msg, msgl, hwerrmsgs[i].msg); +} + +static void signal_ib_event(struct qib_pportdata *ppd, enum ib_event_type ev) +{ + struct ib_event event; + struct qib_devdata *dd = ppd->dd; + + event.device = &dd->verbs_dev.ibdev; + event.element.port_num = ppd->port; + event.event = ev; + ib_dispatch_event(&event); +} + +void qib_handle_e_ibstatuschanged(struct qib_pportdata *ppd, u64 ibcs) +{ + struct qib_devdata *dd = ppd->dd; + unsigned long flags; + u32 lstate; + u8 ltstate; + enum ib_event_type ev = 0; + + lstate = dd->f_iblink_state(ibcs); /* linkstate */ + ltstate = dd->f_ibphys_portstate(ibcs); + + /* + * If linkstate transitions into INIT from any of the various down + * states, or if it transitions from any of the up (INIT or better) + * states into any of the down states (except link recovery), then + * call the chip-specific code to take appropriate actions. + * + * ppd->lflags could be 0 if this is the first time the interrupt + * handlers has been called but the link is already up. + */ + if (lstate >= IB_PORT_INIT && + (!ppd->lflags || (ppd->lflags & QIBL_LINKDOWN)) && + ltstate == IB_PHYSPORTSTATE_LINKUP) { + /* transitioned to UP */ + if (dd->f_ib_updown(ppd, 1, ibcs)) + goto skip_ibchange; /* chip-code handled */ + } else if (ppd->lflags & (QIBL_LINKINIT | QIBL_LINKARMED | + QIBL_LINKACTIVE | QIBL_IB_FORCE_NOTIFY)) { + if (ltstate != IB_PHYSPORTSTATE_LINKUP && + ltstate <= IB_PHYSPORTSTATE_CFG_TRAIN && + dd->f_ib_updown(ppd, 0, ibcs)) + goto skip_ibchange; /* chip-code handled */ + qib_set_uevent_bits(ppd, _QIB_EVENT_LINKDOWN_BIT); + } + + if (lstate != IB_PORT_DOWN) { + /* lstate is INIT, ARMED, or ACTIVE */ + if (lstate != IB_PORT_ACTIVE) { + *ppd->statusp &= ~QIB_STATUS_IB_READY; + if (ppd->lflags & QIBL_LINKACTIVE) + ev = IB_EVENT_PORT_ERR; + spin_lock_irqsave(&ppd->lflags_lock, flags); + if (lstate == IB_PORT_ARMED) { + ppd->lflags |= QIBL_LINKARMED | QIBL_LINKV; + ppd->lflags &= ~(QIBL_LINKINIT | + QIBL_LINKDOWN | QIBL_LINKACTIVE); + } else { + ppd->lflags |= QIBL_LINKINIT | QIBL_LINKV; + ppd->lflags &= ~(QIBL_LINKARMED | + QIBL_LINKDOWN | QIBL_LINKACTIVE); + } + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + /* start a 75msec timer to clear symbol errors */ + mod_timer(&ppd->symerr_clear_timer, + msecs_to_jiffies(75)); + } else if (ltstate == IB_PHYSPORTSTATE_LINKUP && + !(ppd->lflags & QIBL_LINKACTIVE)) { + /* active, but not active defered */ + qib_hol_up(ppd); /* useful only for 6120 now */ + *ppd->statusp |= + QIB_STATUS_IB_READY | QIB_STATUS_IB_CONF; + qib_clear_symerror_on_linkup((unsigned long)ppd); + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags |= QIBL_LINKACTIVE | QIBL_LINKV; + ppd->lflags &= ~(QIBL_LINKINIT | + QIBL_LINKDOWN | QIBL_LINKARMED); + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + if (dd->flags & QIB_HAS_SEND_DMA) + qib_sdma_process_event(ppd, + qib_sdma_event_e30_go_running); + ev = IB_EVENT_PORT_ACTIVE; + dd->f_setextled(ppd, 1); + } + } else { /* down */ + if (ppd->lflags & QIBL_LINKACTIVE) + ev = IB_EVENT_PORT_ERR; + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags |= QIBL_LINKDOWN | QIBL_LINKV; + ppd->lflags &= ~(QIBL_LINKINIT | + QIBL_LINKACTIVE | QIBL_LINKARMED); + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + *ppd->statusp &= ~QIB_STATUS_IB_READY; + } + +skip_ibchange: + ppd->lastibcstat = ibcs; + if (ev) + signal_ib_event(ppd, ev); + return; +} + +void qib_clear_symerror_on_linkup(unsigned long opaque) +{ + struct qib_pportdata *ppd = (struct qib_pportdata *)opaque; + + if (ppd->lflags & QIBL_LINKACTIVE) + return; + + ppd->ibport_data.z_symbol_error_counter = + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBSYMBOLERR); +} + +/* + * Handle receive interrupts for user ctxts; this means a user + * process was waiting for a packet to arrive, and didn't want + * to poll. + */ +void qib_handle_urcv(struct qib_devdata *dd, u64 ctxtr) +{ + struct qib_ctxtdata *rcd; + unsigned long flags; + int i; + + spin_lock_irqsave(&dd->uctxt_lock, flags); + for (i = dd->first_user_ctxt; dd->rcd && i < dd->cfgctxts; i++) { + if (!(ctxtr & (1ULL << i))) + continue; + rcd = dd->rcd[i]; + if (!rcd || !rcd->cnt) + continue; + + if (test_and_clear_bit(QIB_CTXT_WAITING_RCV, &rcd->flag)) { + wake_up_interruptible(&rcd->wait); + dd->f_rcvctrl(rcd->ppd, QIB_RCVCTRL_INTRAVAIL_DIS, + rcd->ctxt); + } else if (test_and_clear_bit(QIB_CTXT_WAITING_URG, + &rcd->flag)) { + rcd->urgent++; + wake_up_interruptible(&rcd->wait); + } + } + spin_unlock_irqrestore(&dd->uctxt_lock, flags); +} + +void qib_bad_intrstatus(struct qib_devdata *dd) +{ + static int allbits; + + /* separate routine, for better optimization of qib_intr() */ + + /* + * We print the message and disable interrupts, in hope of + * having a better chance of debugging the problem. + */ + qib_dev_err(dd, + "Read of chip interrupt status failed disabling interrupts\n"); + if (allbits++) { + /* disable interrupt delivery, something is very wrong */ + if (allbits == 2) + dd->f_set_intr_state(dd, 0); + if (allbits == 3) { + qib_dev_err(dd, + "2nd bad interrupt status, unregistering interrupts\n"); + dd->flags |= QIB_BADINTR; + dd->flags &= ~QIB_INITTED; + dd->f_free_irq(dd); + } + } +} diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c new file mode 100644 index 00000000000..3b9afccaaad --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_keys.c @@ -0,0 +1,387 @@ +/* + * Copyright (c) 2006, 2007, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "qib.h" + +/** + * qib_alloc_lkey - allocate an lkey + * @mr: memory region that this lkey protects + * @dma_region: 0->normal key, 1->restricted DMA key + * + * Returns 0 if successful, otherwise returns -errno. + * + * Increments mr reference count as required. + * + * Sets the lkey field mr for non-dma regions. + * + */ + +int qib_alloc_lkey(struct qib_mregion *mr, int dma_region) +{ + unsigned long flags; + u32 r; + u32 n; + int ret = 0; + struct qib_ibdev *dev = to_idev(mr->pd->device); + struct qib_lkey_table *rkt = &dev->lk_table; + + spin_lock_irqsave(&rkt->lock, flags); + + /* special case for dma_mr lkey == 0 */ + if (dma_region) { + struct qib_mregion *tmr; + + tmr = rcu_access_pointer(dev->dma_mr); + if (!tmr) { + qib_get_mr(mr); + rcu_assign_pointer(dev->dma_mr, mr); + mr->lkey_published = 1; + } + goto success; + } + + /* Find the next available LKEY */ + r = rkt->next; + n = r; + for (;;) { + if (rkt->table[r] == NULL) + break; + r = (r + 1) & (rkt->max - 1); + if (r == n) + goto bail; + } + rkt->next = (r + 1) & (rkt->max - 1); + /* + * Make sure lkey is never zero which is reserved to indicate an + * unrestricted LKEY. + */ + rkt->gen++; + mr->lkey = (r << (32 - ib_qib_lkey_table_size)) | + ((((1 << (24 - ib_qib_lkey_table_size)) - 1) & rkt->gen) + << 8); + if (mr->lkey == 0) { + mr->lkey |= 1 << 8; + rkt->gen++; + } + qib_get_mr(mr); + rcu_assign_pointer(rkt->table[r], mr); + mr->lkey_published = 1; +success: + spin_unlock_irqrestore(&rkt->lock, flags); +out: + return ret; +bail: + spin_unlock_irqrestore(&rkt->lock, flags); + ret = -ENOMEM; + goto out; +} + +/** + * qib_free_lkey - free an lkey + * @mr: mr to free from tables + */ +void qib_free_lkey(struct qib_mregion *mr) +{ + unsigned long flags; + u32 lkey = mr->lkey; + u32 r; + struct qib_ibdev *dev = to_idev(mr->pd->device); + struct qib_lkey_table *rkt = &dev->lk_table; + + spin_lock_irqsave(&rkt->lock, flags); + if (!mr->lkey_published) + goto out; + if (lkey == 0) + rcu_assign_pointer(dev->dma_mr, NULL); + else { + r = lkey >> (32 - ib_qib_lkey_table_size); + rcu_assign_pointer(rkt->table[r], NULL); + } + qib_put_mr(mr); + mr->lkey_published = 0; +out: + spin_unlock_irqrestore(&rkt->lock, flags); +} + +/** + * qib_lkey_ok - check IB SGE for validity and initialize + * @rkt: table containing lkey to check SGE against + * @pd: protection domain + * @isge: outgoing internal SGE + * @sge: SGE to check + * @acc: access flags + * + * Return 1 if valid and successful, otherwise returns 0. + * + * increments the reference count upon success + * + * Check the IB SGE for validity and initialize our internal version + * of it. + */ +int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, + struct qib_sge *isge, struct ib_sge *sge, int acc) +{ + struct qib_mregion *mr; + unsigned n, m; + size_t off; + + /* + * We use LKEY == zero for kernel virtual addresses + * (see qib_get_dma_mr and qib_dma.c). + */ + rcu_read_lock(); + if (sge->lkey == 0) { + struct qib_ibdev *dev = to_idev(pd->ibpd.device); + + if (pd->user) + goto bail; + mr = rcu_dereference(dev->dma_mr); + if (!mr) + goto bail; + if (unlikely(!atomic_inc_not_zero(&mr->refcount))) + goto bail; + rcu_read_unlock(); + + isge->mr = mr; + isge->vaddr = (void *) sge->addr; + isge->length = sge->length; + isge->sge_length = sge->length; + isge->m = 0; + isge->n = 0; + goto ok; + } + mr = rcu_dereference( + rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]); + if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) + goto bail; + + off = sge->addr - mr->user_base; + if (unlikely(sge->addr < mr->user_base || + off + sge->length > mr->length || + (mr->access_flags & acc) != acc)) + goto bail; + if (unlikely(!atomic_inc_not_zero(&mr->refcount))) + goto bail; + rcu_read_unlock(); + + off += mr->offset; + if (mr->page_shift) { + /* + page sizes are uniform power of 2 so no loop is necessary + entries_spanned_by_off is the number of times the loop below + would have executed. + */ + size_t entries_spanned_by_off; + + entries_spanned_by_off = off >> mr->page_shift; + off -= (entries_spanned_by_off << mr->page_shift); + m = entries_spanned_by_off/QIB_SEGSZ; + n = entries_spanned_by_off%QIB_SEGSZ; + } else { + m = 0; + n = 0; + while (off >= mr->map[m]->segs[n].length) { + off -= mr->map[m]->segs[n].length; + n++; + if (n >= QIB_SEGSZ) { + m++; + n = 0; + } + } + } + isge->mr = mr; + isge->vaddr = mr->map[m]->segs[n].vaddr + off; + isge->length = mr->map[m]->segs[n].length - off; + isge->sge_length = sge->length; + isge->m = m; + isge->n = n; +ok: + return 1; +bail: + rcu_read_unlock(); + return 0; +} + +/** + * qib_rkey_ok - check the IB virtual address, length, and RKEY + * @qp: qp for validation + * @sge: SGE state + * @len: length of data + * @vaddr: virtual address to place data + * @rkey: rkey to check + * @acc: access flags + * + * Return 1 if successful, otherwise 0. + * + * increments the reference count upon success + */ +int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, + u32 len, u64 vaddr, u32 rkey, int acc) +{ + struct qib_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table; + struct qib_mregion *mr; + unsigned n, m; + size_t off; + + /* + * We use RKEY == zero for kernel virtual addresses + * (see qib_get_dma_mr and qib_dma.c). + */ + rcu_read_lock(); + if (rkey == 0) { + struct qib_pd *pd = to_ipd(qp->ibqp.pd); + struct qib_ibdev *dev = to_idev(pd->ibpd.device); + + if (pd->user) + goto bail; + mr = rcu_dereference(dev->dma_mr); + if (!mr) + goto bail; + if (unlikely(!atomic_inc_not_zero(&mr->refcount))) + goto bail; + rcu_read_unlock(); + + sge->mr = mr; + sge->vaddr = (void *) vaddr; + sge->length = len; + sge->sge_length = len; + sge->m = 0; + sge->n = 0; + goto ok; + } + + mr = rcu_dereference( + rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]); + if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) + goto bail; + + off = vaddr - mr->iova; + if (unlikely(vaddr < mr->iova || off + len > mr->length || + (mr->access_flags & acc) == 0)) + goto bail; + if (unlikely(!atomic_inc_not_zero(&mr->refcount))) + goto bail; + rcu_read_unlock(); + + off += mr->offset; + if (mr->page_shift) { + /* + page sizes are uniform power of 2 so no loop is necessary + entries_spanned_by_off is the number of times the loop below + would have executed. + */ + size_t entries_spanned_by_off; + + entries_spanned_by_off = off >> mr->page_shift; + off -= (entries_spanned_by_off << mr->page_shift); + m = entries_spanned_by_off/QIB_SEGSZ; + n = entries_spanned_by_off%QIB_SEGSZ; + } else { + m = 0; + n = 0; + while (off >= mr->map[m]->segs[n].length) { + off -= mr->map[m]->segs[n].length; + n++; + if (n >= QIB_SEGSZ) { + m++; + n = 0; + } + } + } + sge->mr = mr; + sge->vaddr = mr->map[m]->segs[n].vaddr + off; + sge->length = mr->map[m]->segs[n].length - off; + sge->sge_length = len; + sge->m = m; + sge->n = n; +ok: + return 1; +bail: + rcu_read_unlock(); + return 0; +} + +/* + * Initialize the memory region specified by the work reqeust. + */ +int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr) +{ + struct qib_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table; + struct qib_pd *pd = to_ipd(qp->ibqp.pd); + struct qib_mregion *mr; + u32 rkey = wr->wr.fast_reg.rkey; + unsigned i, n, m; + int ret = -EINVAL; + unsigned long flags; + u64 *page_list; + size_t ps; + + spin_lock_irqsave(&rkt->lock, flags); + if (pd->user || rkey == 0) + goto bail; + + mr = rcu_dereference_protected( + rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))], + lockdep_is_held(&rkt->lock)); + if (unlikely(mr == NULL || qp->ibqp.pd != mr->pd)) + goto bail; + + if (wr->wr.fast_reg.page_list_len > mr->max_segs) + goto bail; + + ps = 1UL << wr->wr.fast_reg.page_shift; + if (wr->wr.fast_reg.length > ps * wr->wr.fast_reg.page_list_len) + goto bail; + + mr->user_base = wr->wr.fast_reg.iova_start; + mr->iova = wr->wr.fast_reg.iova_start; + mr->lkey = rkey; + mr->length = wr->wr.fast_reg.length; + mr->access_flags = wr->wr.fast_reg.access_flags; + page_list = wr->wr.fast_reg.page_list->page_list; + m = 0; + n = 0; + for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { + mr->map[m]->segs[n].vaddr = (void *) page_list[i]; + mr->map[m]->segs[n].length = ps; + if (++n == QIB_SEGSZ) { + m++; + n = 0; + } + } + + ret = 0; +bail: + spin_unlock_irqrestore(&rkt->lock, flags); + return ret; +} diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c new file mode 100644 index 00000000000..22c720e5740 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -0,0 +1,2533 @@ +/* + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <rdma/ib_smi.h> + +#include "qib.h" +#include "qib_mad.h" + +static int reply(struct ib_smp *smp) +{ + /* + * The verbs framework will handle the directed/LID route + * packet changes. + */ + smp->method = IB_MGMT_METHOD_GET_RESP; + if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + smp->status |= IB_SMP_DIRECTION; + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; +} + +static int reply_failure(struct ib_smp *smp) +{ + /* + * The verbs framework will handle the directed/LID route + * packet changes. + */ + smp->method = IB_MGMT_METHOD_GET_RESP; + if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + smp->status |= IB_SMP_DIRECTION; + return IB_MAD_RESULT_FAILURE | IB_MAD_RESULT_REPLY; +} + +static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len) +{ + struct ib_mad_send_buf *send_buf; + struct ib_mad_agent *agent; + struct ib_smp *smp; + int ret; + unsigned long flags; + unsigned long timeout; + + agent = ibp->send_agent; + if (!agent) + return; + + /* o14-3.2.1 */ + if (!(ppd_from_ibp(ibp)->lflags & QIBL_LINKACTIVE)) + return; + + /* o14-2 */ + if (ibp->trap_timeout && time_before(jiffies, ibp->trap_timeout)) + return; + + send_buf = ib_create_send_mad(agent, 0, 0, 0, IB_MGMT_MAD_HDR, + IB_MGMT_MAD_DATA, GFP_ATOMIC); + if (IS_ERR(send_buf)) + return; + + smp = send_buf->mad; + smp->base_version = IB_MGMT_BASE_VERSION; + smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + smp->class_version = 1; + smp->method = IB_MGMT_METHOD_TRAP; + ibp->tid++; + smp->tid = cpu_to_be64(ibp->tid); + smp->attr_id = IB_SMP_ATTR_NOTICE; + /* o14-1: smp->mkey = 0; */ + memcpy(smp->data, data, len); + + spin_lock_irqsave(&ibp->lock, flags); + if (!ibp->sm_ah) { + if (ibp->sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) { + struct ib_ah *ah; + + ah = qib_create_qp0_ah(ibp, ibp->sm_lid); + if (IS_ERR(ah)) + ret = PTR_ERR(ah); + else { + send_buf->ah = ah; + ibp->sm_ah = to_iah(ah); + ret = 0; + } + } else + ret = -EINVAL; + } else { + send_buf->ah = &ibp->sm_ah->ibah; + ret = 0; + } + spin_unlock_irqrestore(&ibp->lock, flags); + + if (!ret) + ret = ib_post_send_mad(send_buf, NULL); + if (!ret) { + /* 4.096 usec. */ + timeout = (4096 * (1UL << ibp->subnet_timeout)) / 1000; + ibp->trap_timeout = jiffies + usecs_to_jiffies(timeout); + } else { + ib_free_send_mad(send_buf); + ibp->trap_timeout = 0; + } +} + +/* + * Send a bad [PQ]_Key trap (ch. 14.3.8). + */ +void qib_bad_pqkey(struct qib_ibport *ibp, __be16 trap_num, u32 key, u32 sl, + u32 qp1, u32 qp2, __be16 lid1, __be16 lid2) +{ + struct ib_mad_notice_attr data; + + if (trap_num == IB_NOTICE_TRAP_BAD_PKEY) + ibp->pkey_violations++; + else + ibp->qkey_violations++; + ibp->n_pkt_drops++; + + /* Send violation trap */ + data.generic_type = IB_NOTICE_TYPE_SECURITY; + data.prod_type_msb = 0; + data.prod_type_lsb = IB_NOTICE_PROD_CA; + data.trap_num = trap_num; + data.issuer_lid = cpu_to_be16(ppd_from_ibp(ibp)->lid); + data.toggle_count = 0; + memset(&data.details, 0, sizeof data.details); + data.details.ntc_257_258.lid1 = lid1; + data.details.ntc_257_258.lid2 = lid2; + data.details.ntc_257_258.key = cpu_to_be32(key); + data.details.ntc_257_258.sl_qp1 = cpu_to_be32((sl << 28) | qp1); + data.details.ntc_257_258.qp2 = cpu_to_be32(qp2); + + qib_send_trap(ibp, &data, sizeof data); +} + +/* + * Send a bad M_Key trap (ch. 14.3.9). + */ +static void qib_bad_mkey(struct qib_ibport *ibp, struct ib_smp *smp) +{ + struct ib_mad_notice_attr data; + + /* Send violation trap */ + data.generic_type = IB_NOTICE_TYPE_SECURITY; + data.prod_type_msb = 0; + data.prod_type_lsb = IB_NOTICE_PROD_CA; + data.trap_num = IB_NOTICE_TRAP_BAD_MKEY; + data.issuer_lid = cpu_to_be16(ppd_from_ibp(ibp)->lid); + data.toggle_count = 0; + memset(&data.details, 0, sizeof data.details); + data.details.ntc_256.lid = data.issuer_lid; + data.details.ntc_256.method = smp->method; + data.details.ntc_256.attr_id = smp->attr_id; + data.details.ntc_256.attr_mod = smp->attr_mod; + data.details.ntc_256.mkey = smp->mkey; + if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { + u8 hop_cnt; + + data.details.ntc_256.dr_slid = smp->dr_slid; + data.details.ntc_256.dr_trunc_hop = IB_NOTICE_TRAP_DR_NOTICE; + hop_cnt = smp->hop_cnt; + if (hop_cnt > ARRAY_SIZE(data.details.ntc_256.dr_rtn_path)) { + data.details.ntc_256.dr_trunc_hop |= + IB_NOTICE_TRAP_DR_TRUNC; + hop_cnt = ARRAY_SIZE(data.details.ntc_256.dr_rtn_path); + } + data.details.ntc_256.dr_trunc_hop |= hop_cnt; + memcpy(data.details.ntc_256.dr_rtn_path, smp->return_path, + hop_cnt); + } + + qib_send_trap(ibp, &data, sizeof data); +} + +/* + * Send a Port Capability Mask Changed trap (ch. 14.3.11). + */ +void qib_cap_mask_chg(struct qib_ibport *ibp) +{ + struct ib_mad_notice_attr data; + + data.generic_type = IB_NOTICE_TYPE_INFO; + data.prod_type_msb = 0; + data.prod_type_lsb = IB_NOTICE_PROD_CA; + data.trap_num = IB_NOTICE_TRAP_CAP_MASK_CHG; + data.issuer_lid = cpu_to_be16(ppd_from_ibp(ibp)->lid); + data.toggle_count = 0; + memset(&data.details, 0, sizeof data.details); + data.details.ntc_144.lid = data.issuer_lid; + data.details.ntc_144.new_cap_mask = cpu_to_be32(ibp->port_cap_flags); + + qib_send_trap(ibp, &data, sizeof data); +} + +/* + * Send a System Image GUID Changed trap (ch. 14.3.12). + */ +void qib_sys_guid_chg(struct qib_ibport *ibp) +{ + struct ib_mad_notice_attr data; + + data.generic_type = IB_NOTICE_TYPE_INFO; + data.prod_type_msb = 0; + data.prod_type_lsb = IB_NOTICE_PROD_CA; + data.trap_num = IB_NOTICE_TRAP_SYS_GUID_CHG; + data.issuer_lid = cpu_to_be16(ppd_from_ibp(ibp)->lid); + data.toggle_count = 0; + memset(&data.details, 0, sizeof data.details); + data.details.ntc_145.lid = data.issuer_lid; + data.details.ntc_145.new_sys_guid = ib_qib_sys_image_guid; + + qib_send_trap(ibp, &data, sizeof data); +} + +/* + * Send a Node Description Changed trap (ch. 14.3.13). + */ +void qib_node_desc_chg(struct qib_ibport *ibp) +{ + struct ib_mad_notice_attr data; + + data.generic_type = IB_NOTICE_TYPE_INFO; + data.prod_type_msb = 0; + data.prod_type_lsb = IB_NOTICE_PROD_CA; + data.trap_num = IB_NOTICE_TRAP_CAP_MASK_CHG; + data.issuer_lid = cpu_to_be16(ppd_from_ibp(ibp)->lid); + data.toggle_count = 0; + memset(&data.details, 0, sizeof data.details); + data.details.ntc_144.lid = data.issuer_lid; + data.details.ntc_144.local_changes = 1; + data.details.ntc_144.change_flags = IB_NOTICE_TRAP_NODE_DESC_CHG; + + qib_send_trap(ibp, &data, sizeof data); +} + +static int subn_get_nodedescription(struct ib_smp *smp, + struct ib_device *ibdev) +{ + if (smp->attr_mod) + smp->status |= IB_SMP_INVALID_FIELD; + + memcpy(smp->data, ibdev->node_desc, sizeof(smp->data)); + + return reply(smp); +} + +static int subn_get_nodeinfo(struct ib_smp *smp, struct ib_device *ibdev, + u8 port) +{ + struct ib_node_info *nip = (struct ib_node_info *)&smp->data; + struct qib_devdata *dd = dd_from_ibdev(ibdev); + u32 vendor, majrev, minrev; + unsigned pidx = port - 1; /* IB number port from 1, hdw from 0 */ + + /* GUID 0 is illegal */ + if (smp->attr_mod || pidx >= dd->num_pports || + dd->pport[pidx].guid == 0) + smp->status |= IB_SMP_INVALID_FIELD; + else + nip->port_guid = dd->pport[pidx].guid; + + nip->base_version = 1; + nip->class_version = 1; + nip->node_type = 1; /* channel adapter */ + nip->num_ports = ibdev->phys_port_cnt; + /* This is already in network order */ + nip->sys_guid = ib_qib_sys_image_guid; + nip->node_guid = dd->pport->guid; /* Use first-port GUID as node */ + nip->partition_cap = cpu_to_be16(qib_get_npkeys(dd)); + nip->device_id = cpu_to_be16(dd->deviceid); + majrev = dd->majrev; + minrev = dd->minrev; + nip->revision = cpu_to_be32((majrev << 16) | minrev); + nip->local_port_num = port; + vendor = dd->vendorid; + nip->vendor_id[0] = QIB_SRC_OUI_1; + nip->vendor_id[1] = QIB_SRC_OUI_2; + nip->vendor_id[2] = QIB_SRC_OUI_3; + + return reply(smp); +} + +static int subn_get_guidinfo(struct ib_smp *smp, struct ib_device *ibdev, + u8 port) +{ + struct qib_devdata *dd = dd_from_ibdev(ibdev); + u32 startgx = 8 * be32_to_cpu(smp->attr_mod); + __be64 *p = (__be64 *) smp->data; + unsigned pidx = port - 1; /* IB number port from 1, hdw from 0 */ + + /* 32 blocks of 8 64-bit GUIDs per block */ + + memset(smp->data, 0, sizeof(smp->data)); + + if (startgx == 0 && pidx < dd->num_pports) { + struct qib_pportdata *ppd = dd->pport + pidx; + struct qib_ibport *ibp = &ppd->ibport_data; + __be64 g = ppd->guid; + unsigned i; + + /* GUID 0 is illegal */ + if (g == 0) + smp->status |= IB_SMP_INVALID_FIELD; + else { + /* The first is a copy of the read-only HW GUID. */ + p[0] = g; + for (i = 1; i < QIB_GUIDS_PER_PORT; i++) + p[i] = ibp->guids[i - 1]; + } + } else + smp->status |= IB_SMP_INVALID_FIELD; + + return reply(smp); +} + +static void set_link_width_enabled(struct qib_pportdata *ppd, u32 w) +{ + (void) ppd->dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LWID_ENB, w); +} + +static void set_link_speed_enabled(struct qib_pportdata *ppd, u32 s) +{ + (void) ppd->dd->f_set_ib_cfg(ppd, QIB_IB_CFG_SPD_ENB, s); +} + +static int get_overrunthreshold(struct qib_pportdata *ppd) +{ + return ppd->dd->f_get_ib_cfg(ppd, QIB_IB_CFG_OVERRUN_THRESH); +} + +/** + * set_overrunthreshold - set the overrun threshold + * @ppd: the physical port data + * @n: the new threshold + * + * Note that this will only take effect when the link state changes. + */ +static int set_overrunthreshold(struct qib_pportdata *ppd, unsigned n) +{ + (void) ppd->dd->f_set_ib_cfg(ppd, QIB_IB_CFG_OVERRUN_THRESH, + (u32)n); + return 0; +} + +static int get_phyerrthreshold(struct qib_pportdata *ppd) +{ + return ppd->dd->f_get_ib_cfg(ppd, QIB_IB_CFG_PHYERR_THRESH); +} + +/** + * set_phyerrthreshold - set the physical error threshold + * @ppd: the physical port data + * @n: the new threshold + * + * Note that this will only take effect when the link state changes. + */ +static int set_phyerrthreshold(struct qib_pportdata *ppd, unsigned n) +{ + (void) ppd->dd->f_set_ib_cfg(ppd, QIB_IB_CFG_PHYERR_THRESH, + (u32)n); + return 0; +} + +/** + * get_linkdowndefaultstate - get the default linkdown state + * @ppd: the physical port data + * + * Returns zero if the default is POLL, 1 if the default is SLEEP. + */ +static int get_linkdowndefaultstate(struct qib_pportdata *ppd) +{ + return ppd->dd->f_get_ib_cfg(ppd, QIB_IB_CFG_LINKDEFAULT) == + IB_LINKINITCMD_SLEEP; +} + +static int check_mkey(struct qib_ibport *ibp, struct ib_smp *smp, int mad_flags) +{ + int valid_mkey = 0; + int ret = 0; + + /* Is the mkey in the process of expiring? */ + if (ibp->mkey_lease_timeout && + time_after_eq(jiffies, ibp->mkey_lease_timeout)) { + /* Clear timeout and mkey protection field. */ + ibp->mkey_lease_timeout = 0; + ibp->mkeyprot = 0; + } + + if ((mad_flags & IB_MAD_IGNORE_MKEY) || ibp->mkey == 0 || + ibp->mkey == smp->mkey) + valid_mkey = 1; + + /* Unset lease timeout on any valid Get/Set/TrapRepress */ + if (valid_mkey && ibp->mkey_lease_timeout && + (smp->method == IB_MGMT_METHOD_GET || + smp->method == IB_MGMT_METHOD_SET || + smp->method == IB_MGMT_METHOD_TRAP_REPRESS)) + ibp->mkey_lease_timeout = 0; + + if (!valid_mkey) { + switch (smp->method) { + case IB_MGMT_METHOD_GET: + /* Bad mkey not a violation below level 2 */ + if (ibp->mkeyprot < 2) + break; + case IB_MGMT_METHOD_SET: + case IB_MGMT_METHOD_TRAP_REPRESS: + if (ibp->mkey_violations != 0xFFFF) + ++ibp->mkey_violations; + if (!ibp->mkey_lease_timeout && ibp->mkey_lease_period) + ibp->mkey_lease_timeout = jiffies + + ibp->mkey_lease_period * HZ; + /* Generate a trap notice. */ + qib_bad_mkey(ibp, smp); + ret = 1; + } + } + + return ret; +} + +static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev, + u8 port) +{ + struct qib_devdata *dd; + struct qib_pportdata *ppd; + struct qib_ibport *ibp; + struct ib_port_info *pip = (struct ib_port_info *)smp->data; + u8 mtu; + int ret; + u32 state; + u32 port_num = be32_to_cpu(smp->attr_mod); + + if (port_num == 0) + port_num = port; + else { + if (port_num > ibdev->phys_port_cnt) { + smp->status |= IB_SMP_INVALID_FIELD; + ret = reply(smp); + goto bail; + } + if (port_num != port) { + ibp = to_iport(ibdev, port_num); + ret = check_mkey(ibp, smp, 0); + if (ret) { + ret = IB_MAD_RESULT_FAILURE; + goto bail; + } + } + } + + dd = dd_from_ibdev(ibdev); + /* IB numbers ports from 1, hdw from 0 */ + ppd = dd->pport + (port_num - 1); + ibp = &ppd->ibport_data; + + /* Clear all fields. Only set the non-zero fields. */ + memset(smp->data, 0, sizeof(smp->data)); + + /* Only return the mkey if the protection field allows it. */ + if (!(smp->method == IB_MGMT_METHOD_GET && + ibp->mkey != smp->mkey && + ibp->mkeyprot == 1)) + pip->mkey = ibp->mkey; + pip->gid_prefix = ibp->gid_prefix; + pip->lid = cpu_to_be16(ppd->lid); + pip->sm_lid = cpu_to_be16(ibp->sm_lid); + pip->cap_mask = cpu_to_be32(ibp->port_cap_flags); + /* pip->diag_code; */ + pip->mkey_lease_period = cpu_to_be16(ibp->mkey_lease_period); + pip->local_port_num = port; + pip->link_width_enabled = ppd->link_width_enabled; + pip->link_width_supported = ppd->link_width_supported; + pip->link_width_active = ppd->link_width_active; + state = dd->f_iblink_state(ppd->lastibcstat); + pip->linkspeed_portstate = ppd->link_speed_supported << 4 | state; + + pip->portphysstate_linkdown = + (dd->f_ibphys_portstate(ppd->lastibcstat) << 4) | + (get_linkdowndefaultstate(ppd) ? 1 : 2); + pip->mkeyprot_resv_lmc = (ibp->mkeyprot << 6) | ppd->lmc; + pip->linkspeedactive_enabled = (ppd->link_speed_active << 4) | + ppd->link_speed_enabled; + switch (ppd->ibmtu) { + default: /* something is wrong; fall through */ + case 4096: + mtu = IB_MTU_4096; + break; + case 2048: + mtu = IB_MTU_2048; + break; + case 1024: + mtu = IB_MTU_1024; + break; + case 512: + mtu = IB_MTU_512; + break; + case 256: + mtu = IB_MTU_256; + break; + } + pip->neighbormtu_mastersmsl = (mtu << 4) | ibp->sm_sl; + pip->vlcap_inittype = ppd->vls_supported << 4; /* InitType = 0 */ + pip->vl_high_limit = ibp->vl_high_limit; + pip->vl_arb_high_cap = + dd->f_get_ib_cfg(ppd, QIB_IB_CFG_VL_HIGH_CAP); + pip->vl_arb_low_cap = + dd->f_get_ib_cfg(ppd, QIB_IB_CFG_VL_LOW_CAP); + /* InitTypeReply = 0 */ + pip->inittypereply_mtucap = qib_ibmtu ? qib_ibmtu : IB_MTU_4096; + /* HCAs ignore VLStallCount and HOQLife */ + /* pip->vlstallcnt_hoqlife; */ + pip->operationalvl_pei_peo_fpi_fpo = + dd->f_get_ib_cfg(ppd, QIB_IB_CFG_OP_VLS) << 4; + pip->mkey_violations = cpu_to_be16(ibp->mkey_violations); + /* P_KeyViolations are counted by hardware. */ + pip->pkey_violations = cpu_to_be16(ibp->pkey_violations); + pip->qkey_violations = cpu_to_be16(ibp->qkey_violations); + /* Only the hardware GUID is supported for now */ + pip->guid_cap = QIB_GUIDS_PER_PORT; + pip->clientrereg_resv_subnetto = ibp->subnet_timeout; + /* 32.768 usec. response time (guessing) */ + pip->resv_resptimevalue = 3; + pip->localphyerrors_overrunerrors = + (get_phyerrthreshold(ppd) << 4) | + get_overrunthreshold(ppd); + /* pip->max_credit_hint; */ + if (ibp->port_cap_flags & IB_PORT_LINK_LATENCY_SUP) { + u32 v; + + v = dd->f_get_ib_cfg(ppd, QIB_IB_CFG_LINKLATENCY); + pip->link_roundtrip_latency[0] = v >> 16; + pip->link_roundtrip_latency[1] = v >> 8; + pip->link_roundtrip_latency[2] = v; + } + + ret = reply(smp); + +bail: + return ret; +} + +/** + * get_pkeys - return the PKEY table + * @dd: the qlogic_ib device + * @port: the IB port number + * @pkeys: the pkey table is placed here + */ +static int get_pkeys(struct qib_devdata *dd, u8 port, u16 *pkeys) +{ + struct qib_pportdata *ppd = dd->pport + port - 1; + /* + * always a kernel context, no locking needed. + * If we get here with ppd setup, no need to check + * that pd is valid. + */ + struct qib_ctxtdata *rcd = dd->rcd[ppd->hw_pidx]; + + memcpy(pkeys, rcd->pkeys, sizeof(rcd->pkeys)); + + return 0; +} + +static int subn_get_pkeytable(struct ib_smp *smp, struct ib_device *ibdev, + u8 port) +{ + u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff); + u16 *p = (u16 *) smp->data; + __be16 *q = (__be16 *) smp->data; + + /* 64 blocks of 32 16-bit P_Key entries */ + + memset(smp->data, 0, sizeof(smp->data)); + if (startpx == 0) { + struct qib_devdata *dd = dd_from_ibdev(ibdev); + unsigned i, n = qib_get_npkeys(dd); + + get_pkeys(dd, port, p); + + for (i = 0; i < n; i++) + q[i] = cpu_to_be16(p[i]); + } else + smp->status |= IB_SMP_INVALID_FIELD; + + return reply(smp); +} + +static int subn_set_guidinfo(struct ib_smp *smp, struct ib_device *ibdev, + u8 port) +{ + struct qib_devdata *dd = dd_from_ibdev(ibdev); + u32 startgx = 8 * be32_to_cpu(smp->attr_mod); + __be64 *p = (__be64 *) smp->data; + unsigned pidx = port - 1; /* IB number port from 1, hdw from 0 */ + + /* 32 blocks of 8 64-bit GUIDs per block */ + + if (startgx == 0 && pidx < dd->num_pports) { + struct qib_pportdata *ppd = dd->pport + pidx; + struct qib_ibport *ibp = &ppd->ibport_data; + unsigned i; + + /* The first entry is read-only. */ + for (i = 1; i < QIB_GUIDS_PER_PORT; i++) + ibp->guids[i - 1] = p[i]; + } else + smp->status |= IB_SMP_INVALID_FIELD; + + /* The only GUID we support is the first read-only entry. */ + return subn_get_guidinfo(smp, ibdev, port); +} + +/** + * subn_set_portinfo - set port information + * @smp: the incoming SM packet + * @ibdev: the infiniband device + * @port: the port on the device + * + * Set Portinfo (see ch. 14.2.5.6). + */ +static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev, + u8 port) +{ + struct ib_port_info *pip = (struct ib_port_info *)smp->data; + struct ib_event event; + struct qib_devdata *dd; + struct qib_pportdata *ppd; + struct qib_ibport *ibp; + u8 clientrereg = (pip->clientrereg_resv_subnetto & 0x80); + unsigned long flags; + u16 lid, smlid; + u8 lwe; + u8 lse; + u8 state; + u8 vls; + u8 msl; + u16 lstate; + int ret, ore, mtu; + u32 port_num = be32_to_cpu(smp->attr_mod); + + if (port_num == 0) + port_num = port; + else { + if (port_num > ibdev->phys_port_cnt) + goto err; + /* Port attributes can only be set on the receiving port */ + if (port_num != port) + goto get_only; + } + + dd = dd_from_ibdev(ibdev); + /* IB numbers ports from 1, hdw from 0 */ + ppd = dd->pport + (port_num - 1); + ibp = &ppd->ibport_data; + event.device = ibdev; + event.element.port_num = port; + + ibp->mkey = pip->mkey; + ibp->gid_prefix = pip->gid_prefix; + ibp->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period); + + lid = be16_to_cpu(pip->lid); + /* Must be a valid unicast LID address. */ + if (lid == 0 || lid >= QIB_MULTICAST_LID_BASE) + smp->status |= IB_SMP_INVALID_FIELD; + else if (ppd->lid != lid || ppd->lmc != (pip->mkeyprot_resv_lmc & 7)) { + if (ppd->lid != lid) + qib_set_uevent_bits(ppd, _QIB_EVENT_LID_CHANGE_BIT); + if (ppd->lmc != (pip->mkeyprot_resv_lmc & 7)) + qib_set_uevent_bits(ppd, _QIB_EVENT_LMC_CHANGE_BIT); + qib_set_lid(ppd, lid, pip->mkeyprot_resv_lmc & 7); + event.event = IB_EVENT_LID_CHANGE; + ib_dispatch_event(&event); + } + + smlid = be16_to_cpu(pip->sm_lid); + msl = pip->neighbormtu_mastersmsl & 0xF; + /* Must be a valid unicast LID address. */ + if (smlid == 0 || smlid >= QIB_MULTICAST_LID_BASE) + smp->status |= IB_SMP_INVALID_FIELD; + else if (smlid != ibp->sm_lid || msl != ibp->sm_sl) { + spin_lock_irqsave(&ibp->lock, flags); + if (ibp->sm_ah) { + if (smlid != ibp->sm_lid) + ibp->sm_ah->attr.dlid = smlid; + if (msl != ibp->sm_sl) + ibp->sm_ah->attr.sl = msl; + } + spin_unlock_irqrestore(&ibp->lock, flags); + if (smlid != ibp->sm_lid) + ibp->sm_lid = smlid; + if (msl != ibp->sm_sl) + ibp->sm_sl = msl; + event.event = IB_EVENT_SM_CHANGE; + ib_dispatch_event(&event); + } + + /* Allow 1x or 4x to be set (see 14.2.6.6). */ + lwe = pip->link_width_enabled; + if (lwe) { + if (lwe == 0xFF) + set_link_width_enabled(ppd, ppd->link_width_supported); + else if (lwe >= 16 || (lwe & ~ppd->link_width_supported)) + smp->status |= IB_SMP_INVALID_FIELD; + else if (lwe != ppd->link_width_enabled) + set_link_width_enabled(ppd, lwe); + } + + lse = pip->linkspeedactive_enabled & 0xF; + if (lse) { + /* + * The IB 1.2 spec. only allows link speed values + * 1, 3, 5, 7, 15. 1.2.1 extended to allow specific + * speeds. + */ + if (lse == 15) + set_link_speed_enabled(ppd, + ppd->link_speed_supported); + else if (lse >= 8 || (lse & ~ppd->link_speed_supported)) + smp->status |= IB_SMP_INVALID_FIELD; + else if (lse != ppd->link_speed_enabled) + set_link_speed_enabled(ppd, lse); + } + + /* Set link down default state. */ + switch (pip->portphysstate_linkdown & 0xF) { + case 0: /* NOP */ + break; + case 1: /* SLEEP */ + (void) dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LINKDEFAULT, + IB_LINKINITCMD_SLEEP); + break; + case 2: /* POLL */ + (void) dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LINKDEFAULT, + IB_LINKINITCMD_POLL); + break; + default: + smp->status |= IB_SMP_INVALID_FIELD; + } + + ibp->mkeyprot = pip->mkeyprot_resv_lmc >> 6; + ibp->vl_high_limit = pip->vl_high_limit; + (void) dd->f_set_ib_cfg(ppd, QIB_IB_CFG_VL_HIGH_LIMIT, + ibp->vl_high_limit); + + mtu = ib_mtu_enum_to_int((pip->neighbormtu_mastersmsl >> 4) & 0xF); + if (mtu == -1) + smp->status |= IB_SMP_INVALID_FIELD; + else + qib_set_mtu(ppd, mtu); + + /* Set operational VLs */ + vls = (pip->operationalvl_pei_peo_fpi_fpo >> 4) & 0xF; + if (vls) { + if (vls > ppd->vls_supported) + smp->status |= IB_SMP_INVALID_FIELD; + else + (void) dd->f_set_ib_cfg(ppd, QIB_IB_CFG_OP_VLS, vls); + } + + if (pip->mkey_violations == 0) + ibp->mkey_violations = 0; + + if (pip->pkey_violations == 0) + ibp->pkey_violations = 0; + + if (pip->qkey_violations == 0) + ibp->qkey_violations = 0; + + ore = pip->localphyerrors_overrunerrors; + if (set_phyerrthreshold(ppd, (ore >> 4) & 0xF)) + smp->status |= IB_SMP_INVALID_FIELD; + + if (set_overrunthreshold(ppd, (ore & 0xF))) + smp->status |= IB_SMP_INVALID_FIELD; + + ibp->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F; + + /* + * Do the port state change now that the other link parameters + * have been set. + * Changing the port physical state only makes sense if the link + * is down or is being set to down. + */ + state = pip->linkspeed_portstate & 0xF; + lstate = (pip->portphysstate_linkdown >> 4) & 0xF; + if (lstate && !(state == IB_PORT_DOWN || state == IB_PORT_NOP)) + smp->status |= IB_SMP_INVALID_FIELD; + + /* + * Only state changes of DOWN, ARM, and ACTIVE are valid + * and must be in the correct state to take effect (see 7.2.6). + */ + switch (state) { + case IB_PORT_NOP: + if (lstate == 0) + break; + /* FALLTHROUGH */ + case IB_PORT_DOWN: + if (lstate == 0) + lstate = QIB_IB_LINKDOWN_ONLY; + else if (lstate == 1) + lstate = QIB_IB_LINKDOWN_SLEEP; + else if (lstate == 2) + lstate = QIB_IB_LINKDOWN; + else if (lstate == 3) + lstate = QIB_IB_LINKDOWN_DISABLE; + else { + smp->status |= IB_SMP_INVALID_FIELD; + break; + } + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_LINKV; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + qib_set_linkstate(ppd, lstate); + /* + * Don't send a reply if the response would be sent + * through the disabled port. + */ + if (lstate == QIB_IB_LINKDOWN_DISABLE && smp->hop_cnt) { + ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; + goto done; + } + qib_wait_linkstate(ppd, QIBL_LINKV, 10); + break; + case IB_PORT_ARMED: + qib_set_linkstate(ppd, QIB_IB_LINKARM); + break; + case IB_PORT_ACTIVE: + qib_set_linkstate(ppd, QIB_IB_LINKACTIVE); + break; + default: + smp->status |= IB_SMP_INVALID_FIELD; + } + + if (clientrereg) { + event.event = IB_EVENT_CLIENT_REREGISTER; + ib_dispatch_event(&event); + } + + ret = subn_get_portinfo(smp, ibdev, port); + + /* restore re-reg bit per o14-12.2.1 */ + pip->clientrereg_resv_subnetto |= clientrereg; + + goto get_only; + +err: + smp->status |= IB_SMP_INVALID_FIELD; +get_only: + ret = subn_get_portinfo(smp, ibdev, port); +done: + return ret; +} + +/** + * rm_pkey - decrecment the reference count for the given PKEY + * @dd: the qlogic_ib device + * @key: the PKEY index + * + * Return true if this was the last reference and the hardware table entry + * needs to be changed. + */ +static int rm_pkey(struct qib_pportdata *ppd, u16 key) +{ + int i; + int ret; + + for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) { + if (ppd->pkeys[i] != key) + continue; + if (atomic_dec_and_test(&ppd->pkeyrefs[i])) { + ppd->pkeys[i] = 0; + ret = 1; + goto bail; + } + break; + } + + ret = 0; + +bail: + return ret; +} + +/** + * add_pkey - add the given PKEY to the hardware table + * @dd: the qlogic_ib device + * @key: the PKEY + * + * Return an error code if unable to add the entry, zero if no change, + * or 1 if the hardware PKEY register needs to be updated. + */ +static int add_pkey(struct qib_pportdata *ppd, u16 key) +{ + int i; + u16 lkey = key & 0x7FFF; + int any = 0; + int ret; + + if (lkey == 0x7FFF) { + ret = 0; + goto bail; + } + + /* Look for an empty slot or a matching PKEY. */ + for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) { + if (!ppd->pkeys[i]) { + any++; + continue; + } + /* If it matches exactly, try to increment the ref count */ + if (ppd->pkeys[i] == key) { + if (atomic_inc_return(&ppd->pkeyrefs[i]) > 1) { + ret = 0; + goto bail; + } + /* Lost the race. Look for an empty slot below. */ + atomic_dec(&ppd->pkeyrefs[i]); + any++; + } + /* + * It makes no sense to have both the limited and unlimited + * PKEY set at the same time since the unlimited one will + * disable the limited one. + */ + if ((ppd->pkeys[i] & 0x7FFF) == lkey) { + ret = -EEXIST; + goto bail; + } + } + if (!any) { + ret = -EBUSY; + goto bail; + } + for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) { + if (!ppd->pkeys[i] && + atomic_inc_return(&ppd->pkeyrefs[i]) == 1) { + /* for qibstats, etc. */ + ppd->pkeys[i] = key; + ret = 1; + goto bail; + } + } + ret = -EBUSY; + +bail: + return ret; +} + +/** + * set_pkeys - set the PKEY table for ctxt 0 + * @dd: the qlogic_ib device + * @port: the IB port number + * @pkeys: the PKEY table + */ +static int set_pkeys(struct qib_devdata *dd, u8 port, u16 *pkeys) +{ + struct qib_pportdata *ppd; + struct qib_ctxtdata *rcd; + int i; + int changed = 0; + + /* + * IB port one/two always maps to context zero/one, + * always a kernel context, no locking needed + * If we get here with ppd setup, no need to check + * that rcd is valid. + */ + ppd = dd->pport + (port - 1); + rcd = dd->rcd[ppd->hw_pidx]; + + for (i = 0; i < ARRAY_SIZE(rcd->pkeys); i++) { + u16 key = pkeys[i]; + u16 okey = rcd->pkeys[i]; + + if (key == okey) + continue; + /* + * The value of this PKEY table entry is changing. + * Remove the old entry in the hardware's array of PKEYs. + */ + if (okey & 0x7FFF) + changed |= rm_pkey(ppd, okey); + if (key & 0x7FFF) { + int ret = add_pkey(ppd, key); + + if (ret < 0) + key = 0; + else + changed |= ret; + } + rcd->pkeys[i] = key; + } + if (changed) { + struct ib_event event; + + (void) dd->f_set_ib_cfg(ppd, QIB_IB_CFG_PKEYS, 0); + + event.event = IB_EVENT_PKEY_CHANGE; + event.device = &dd->verbs_dev.ibdev; + event.element.port_num = port; + ib_dispatch_event(&event); + } + return 0; +} + +static int subn_set_pkeytable(struct ib_smp *smp, struct ib_device *ibdev, + u8 port) +{ + u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff); + __be16 *p = (__be16 *) smp->data; + u16 *q = (u16 *) smp->data; + struct qib_devdata *dd = dd_from_ibdev(ibdev); + unsigned i, n = qib_get_npkeys(dd); + + for (i = 0; i < n; i++) + q[i] = be16_to_cpu(p[i]); + + if (startpx != 0 || set_pkeys(dd, port, q) != 0) + smp->status |= IB_SMP_INVALID_FIELD; + + return subn_get_pkeytable(smp, ibdev, port); +} + +static int subn_get_sl_to_vl(struct ib_smp *smp, struct ib_device *ibdev, + u8 port) +{ + struct qib_ibport *ibp = to_iport(ibdev, port); + u8 *p = (u8 *) smp->data; + unsigned i; + + memset(smp->data, 0, sizeof(smp->data)); + + if (!(ibp->port_cap_flags & IB_PORT_SL_MAP_SUP)) + smp->status |= IB_SMP_UNSUP_METHOD; + else + for (i = 0; i < ARRAY_SIZE(ibp->sl_to_vl); i += 2) + *p++ = (ibp->sl_to_vl[i] << 4) | ibp->sl_to_vl[i + 1]; + + return reply(smp); +} + +static int subn_set_sl_to_vl(struct ib_smp *smp, struct ib_device *ibdev, + u8 port) +{ + struct qib_ibport *ibp = to_iport(ibdev, port); + u8 *p = (u8 *) smp->data; + unsigned i; + + if (!(ibp->port_cap_flags & IB_PORT_SL_MAP_SUP)) { + smp->status |= IB_SMP_UNSUP_METHOD; + return reply(smp); + } + + for (i = 0; i < ARRAY_SIZE(ibp->sl_to_vl); i += 2, p++) { + ibp->sl_to_vl[i] = *p >> 4; + ibp->sl_to_vl[i + 1] = *p & 0xF; + } + qib_set_uevent_bits(ppd_from_ibp(to_iport(ibdev, port)), + _QIB_EVENT_SL2VL_CHANGE_BIT); + + return subn_get_sl_to_vl(smp, ibdev, port); +} + +static int subn_get_vl_arb(struct ib_smp *smp, struct ib_device *ibdev, + u8 port) +{ + unsigned which = be32_to_cpu(smp->attr_mod) >> 16; + struct qib_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port)); + + memset(smp->data, 0, sizeof(smp->data)); + + if (ppd->vls_supported == IB_VL_VL0) + smp->status |= IB_SMP_UNSUP_METHOD; + else if (which == IB_VLARB_LOWPRI_0_31) + (void) ppd->dd->f_get_ib_table(ppd, QIB_IB_TBL_VL_LOW_ARB, + smp->data); + else if (which == IB_VLARB_HIGHPRI_0_31) + (void) ppd->dd->f_get_ib_table(ppd, QIB_IB_TBL_VL_HIGH_ARB, + smp->data); + else + smp->status |= IB_SMP_INVALID_FIELD; + + return reply(smp); +} + +static int subn_set_vl_arb(struct ib_smp *smp, struct ib_device *ibdev, + u8 port) +{ + unsigned which = be32_to_cpu(smp->attr_mod) >> 16; + struct qib_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port)); + + if (ppd->vls_supported == IB_VL_VL0) + smp->status |= IB_SMP_UNSUP_METHOD; + else if (which == IB_VLARB_LOWPRI_0_31) + (void) ppd->dd->f_set_ib_table(ppd, QIB_IB_TBL_VL_LOW_ARB, + smp->data); + else if (which == IB_VLARB_HIGHPRI_0_31) + (void) ppd->dd->f_set_ib_table(ppd, QIB_IB_TBL_VL_HIGH_ARB, + smp->data); + else + smp->status |= IB_SMP_INVALID_FIELD; + + return subn_get_vl_arb(smp, ibdev, port); +} + +static int subn_trap_repress(struct ib_smp *smp, struct ib_device *ibdev, + u8 port) +{ + /* + * For now, we only send the trap once so no need to process this. + * o13-6, o13-7, + * o14-3.a4 The SMA shall not send any message in response to a valid + * SubnTrapRepress() message. + */ + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; +} + +static int pma_get_classportinfo(struct ib_pma_mad *pmp, + struct ib_device *ibdev) +{ + struct ib_class_port_info *p = + (struct ib_class_port_info *)pmp->data; + struct qib_devdata *dd = dd_from_ibdev(ibdev); + + memset(pmp->data, 0, sizeof(pmp->data)); + + if (pmp->mad_hdr.attr_mod != 0) + pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; + + /* Note that AllPortSelect is not valid */ + p->base_version = 1; + p->class_version = 1; + p->capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH; + /* + * Set the most significant bit of CM2 to indicate support for + * congestion statistics + */ + p->reserved[0] = dd->psxmitwait_supported << 7; + /* + * Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec. + */ + p->resp_time_value = 18; + + return reply((struct ib_smp *) pmp); +} + +static int pma_get_portsamplescontrol(struct ib_pma_mad *pmp, + struct ib_device *ibdev, u8 port) +{ + struct ib_pma_portsamplescontrol *p = + (struct ib_pma_portsamplescontrol *)pmp->data; + struct qib_ibdev *dev = to_idev(ibdev); + struct qib_devdata *dd = dd_from_dev(dev); + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + unsigned long flags; + u8 port_select = p->port_select; + + memset(pmp->data, 0, sizeof(pmp->data)); + + p->port_select = port_select; + if (pmp->mad_hdr.attr_mod != 0 || port_select != port) { + pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; + goto bail; + } + spin_lock_irqsave(&ibp->lock, flags); + p->tick = dd->f_get_ib_cfg(ppd, QIB_IB_CFG_PMA_TICKS); + p->sample_status = dd->f_portcntr(ppd, QIBPORTCNTR_PSSTAT); + p->counter_width = 4; /* 32 bit counters */ + p->counter_mask0_9 = COUNTER_MASK0_9; + p->sample_start = cpu_to_be32(ibp->pma_sample_start); + p->sample_interval = cpu_to_be32(ibp->pma_sample_interval); + p->tag = cpu_to_be16(ibp->pma_tag); + p->counter_select[0] = ibp->pma_counter_select[0]; + p->counter_select[1] = ibp->pma_counter_select[1]; + p->counter_select[2] = ibp->pma_counter_select[2]; + p->counter_select[3] = ibp->pma_counter_select[3]; + p->counter_select[4] = ibp->pma_counter_select[4]; + spin_unlock_irqrestore(&ibp->lock, flags); + +bail: + return reply((struct ib_smp *) pmp); +} + +static int pma_set_portsamplescontrol(struct ib_pma_mad *pmp, + struct ib_device *ibdev, u8 port) +{ + struct ib_pma_portsamplescontrol *p = + (struct ib_pma_portsamplescontrol *)pmp->data; + struct qib_ibdev *dev = to_idev(ibdev); + struct qib_devdata *dd = dd_from_dev(dev); + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + unsigned long flags; + u8 status, xmit_flags; + int ret; + + if (pmp->mad_hdr.attr_mod != 0 || p->port_select != port) { + pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; + ret = reply((struct ib_smp *) pmp); + goto bail; + } + + spin_lock_irqsave(&ibp->lock, flags); + + /* Port Sampling code owns the PS* HW counters */ + xmit_flags = ppd->cong_stats.flags; + ppd->cong_stats.flags = IB_PMA_CONG_HW_CONTROL_SAMPLE; + status = dd->f_portcntr(ppd, QIBPORTCNTR_PSSTAT); + if (status == IB_PMA_SAMPLE_STATUS_DONE || + (status == IB_PMA_SAMPLE_STATUS_RUNNING && + xmit_flags == IB_PMA_CONG_HW_CONTROL_TIMER)) { + ibp->pma_sample_start = be32_to_cpu(p->sample_start); + ibp->pma_sample_interval = be32_to_cpu(p->sample_interval); + ibp->pma_tag = be16_to_cpu(p->tag); + ibp->pma_counter_select[0] = p->counter_select[0]; + ibp->pma_counter_select[1] = p->counter_select[1]; + ibp->pma_counter_select[2] = p->counter_select[2]; + ibp->pma_counter_select[3] = p->counter_select[3]; + ibp->pma_counter_select[4] = p->counter_select[4]; + dd->f_set_cntr_sample(ppd, ibp->pma_sample_interval, + ibp->pma_sample_start); + } + spin_unlock_irqrestore(&ibp->lock, flags); + + ret = pma_get_portsamplescontrol(pmp, ibdev, port); + +bail: + return ret; +} + +static u64 get_counter(struct qib_ibport *ibp, struct qib_pportdata *ppd, + __be16 sel) +{ + u64 ret; + + switch (sel) { + case IB_PMA_PORT_XMIT_DATA: + ret = ppd->dd->f_portcntr(ppd, QIBPORTCNTR_PSXMITDATA); + break; + case IB_PMA_PORT_RCV_DATA: + ret = ppd->dd->f_portcntr(ppd, QIBPORTCNTR_PSRCVDATA); + break; + case IB_PMA_PORT_XMIT_PKTS: + ret = ppd->dd->f_portcntr(ppd, QIBPORTCNTR_PSXMITPKTS); + break; + case IB_PMA_PORT_RCV_PKTS: + ret = ppd->dd->f_portcntr(ppd, QIBPORTCNTR_PSRCVPKTS); + break; + case IB_PMA_PORT_XMIT_WAIT: + ret = ppd->dd->f_portcntr(ppd, QIBPORTCNTR_PSXMITWAIT); + break; + default: + ret = 0; + } + + return ret; +} + +/* This function assumes that the xmit_wait lock is already held */ +static u64 xmit_wait_get_value_delta(struct qib_pportdata *ppd) +{ + u32 delta; + + delta = get_counter(&ppd->ibport_data, ppd, + IB_PMA_PORT_XMIT_WAIT); + return ppd->cong_stats.counter + delta; +} + +static void cache_hw_sample_counters(struct qib_pportdata *ppd) +{ + struct qib_ibport *ibp = &ppd->ibport_data; + + ppd->cong_stats.counter_cache.psxmitdata = + get_counter(ibp, ppd, IB_PMA_PORT_XMIT_DATA); + ppd->cong_stats.counter_cache.psrcvdata = + get_counter(ibp, ppd, IB_PMA_PORT_RCV_DATA); + ppd->cong_stats.counter_cache.psxmitpkts = + get_counter(ibp, ppd, IB_PMA_PORT_XMIT_PKTS); + ppd->cong_stats.counter_cache.psrcvpkts = + get_counter(ibp, ppd, IB_PMA_PORT_RCV_PKTS); + ppd->cong_stats.counter_cache.psxmitwait = + get_counter(ibp, ppd, IB_PMA_PORT_XMIT_WAIT); +} + +static u64 get_cache_hw_sample_counters(struct qib_pportdata *ppd, + __be16 sel) +{ + u64 ret; + + switch (sel) { + case IB_PMA_PORT_XMIT_DATA: + ret = ppd->cong_stats.counter_cache.psxmitdata; + break; + case IB_PMA_PORT_RCV_DATA: + ret = ppd->cong_stats.counter_cache.psrcvdata; + break; + case IB_PMA_PORT_XMIT_PKTS: + ret = ppd->cong_stats.counter_cache.psxmitpkts; + break; + case IB_PMA_PORT_RCV_PKTS: + ret = ppd->cong_stats.counter_cache.psrcvpkts; + break; + case IB_PMA_PORT_XMIT_WAIT: + ret = ppd->cong_stats.counter_cache.psxmitwait; + break; + default: + ret = 0; + } + + return ret; +} + +static int pma_get_portsamplesresult(struct ib_pma_mad *pmp, + struct ib_device *ibdev, u8 port) +{ + struct ib_pma_portsamplesresult *p = + (struct ib_pma_portsamplesresult *)pmp->data; + struct qib_ibdev *dev = to_idev(ibdev); + struct qib_devdata *dd = dd_from_dev(dev); + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + unsigned long flags; + u8 status; + int i; + + memset(pmp->data, 0, sizeof(pmp->data)); + spin_lock_irqsave(&ibp->lock, flags); + p->tag = cpu_to_be16(ibp->pma_tag); + if (ppd->cong_stats.flags == IB_PMA_CONG_HW_CONTROL_TIMER) + p->sample_status = IB_PMA_SAMPLE_STATUS_DONE; + else { + status = dd->f_portcntr(ppd, QIBPORTCNTR_PSSTAT); + p->sample_status = cpu_to_be16(status); + if (status == IB_PMA_SAMPLE_STATUS_DONE) { + cache_hw_sample_counters(ppd); + ppd->cong_stats.counter = + xmit_wait_get_value_delta(ppd); + dd->f_set_cntr_sample(ppd, + QIB_CONG_TIMER_PSINTERVAL, 0); + ppd->cong_stats.flags = IB_PMA_CONG_HW_CONTROL_TIMER; + } + } + for (i = 0; i < ARRAY_SIZE(ibp->pma_counter_select); i++) + p->counter[i] = cpu_to_be32( + get_cache_hw_sample_counters( + ppd, ibp->pma_counter_select[i])); + spin_unlock_irqrestore(&ibp->lock, flags); + + return reply((struct ib_smp *) pmp); +} + +static int pma_get_portsamplesresult_ext(struct ib_pma_mad *pmp, + struct ib_device *ibdev, u8 port) +{ + struct ib_pma_portsamplesresult_ext *p = + (struct ib_pma_portsamplesresult_ext *)pmp->data; + struct qib_ibdev *dev = to_idev(ibdev); + struct qib_devdata *dd = dd_from_dev(dev); + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + unsigned long flags; + u8 status; + int i; + + /* Port Sampling code owns the PS* HW counters */ + memset(pmp->data, 0, sizeof(pmp->data)); + spin_lock_irqsave(&ibp->lock, flags); + p->tag = cpu_to_be16(ibp->pma_tag); + if (ppd->cong_stats.flags == IB_PMA_CONG_HW_CONTROL_TIMER) + p->sample_status = IB_PMA_SAMPLE_STATUS_DONE; + else { + status = dd->f_portcntr(ppd, QIBPORTCNTR_PSSTAT); + p->sample_status = cpu_to_be16(status); + /* 64 bits */ + p->extended_width = cpu_to_be32(0x80000000); + if (status == IB_PMA_SAMPLE_STATUS_DONE) { + cache_hw_sample_counters(ppd); + ppd->cong_stats.counter = + xmit_wait_get_value_delta(ppd); + dd->f_set_cntr_sample(ppd, + QIB_CONG_TIMER_PSINTERVAL, 0); + ppd->cong_stats.flags = IB_PMA_CONG_HW_CONTROL_TIMER; + } + } + for (i = 0; i < ARRAY_SIZE(ibp->pma_counter_select); i++) + p->counter[i] = cpu_to_be64( + get_cache_hw_sample_counters( + ppd, ibp->pma_counter_select[i])); + spin_unlock_irqrestore(&ibp->lock, flags); + + return reply((struct ib_smp *) pmp); +} + +static int pma_get_portcounters(struct ib_pma_mad *pmp, + struct ib_device *ibdev, u8 port) +{ + struct ib_pma_portcounters *p = (struct ib_pma_portcounters *) + pmp->data; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + struct qib_verbs_counters cntrs; + u8 port_select = p->port_select; + + qib_get_counters(ppd, &cntrs); + + /* Adjust counters for any resets done. */ + cntrs.symbol_error_counter -= ibp->z_symbol_error_counter; + cntrs.link_error_recovery_counter -= + ibp->z_link_error_recovery_counter; + cntrs.link_downed_counter -= ibp->z_link_downed_counter; + cntrs.port_rcv_errors -= ibp->z_port_rcv_errors; + cntrs.port_rcv_remphys_errors -= ibp->z_port_rcv_remphys_errors; + cntrs.port_xmit_discards -= ibp->z_port_xmit_discards; + cntrs.port_xmit_data -= ibp->z_port_xmit_data; + cntrs.port_rcv_data -= ibp->z_port_rcv_data; + cntrs.port_xmit_packets -= ibp->z_port_xmit_packets; + cntrs.port_rcv_packets -= ibp->z_port_rcv_packets; + cntrs.local_link_integrity_errors -= + ibp->z_local_link_integrity_errors; + cntrs.excessive_buffer_overrun_errors -= + ibp->z_excessive_buffer_overrun_errors; + cntrs.vl15_dropped -= ibp->z_vl15_dropped; + cntrs.vl15_dropped += ibp->n_vl15_dropped; + + memset(pmp->data, 0, sizeof(pmp->data)); + + p->port_select = port_select; + if (pmp->mad_hdr.attr_mod != 0 || port_select != port) + pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; + + if (cntrs.symbol_error_counter > 0xFFFFUL) + p->symbol_error_counter = cpu_to_be16(0xFFFF); + else + p->symbol_error_counter = + cpu_to_be16((u16)cntrs.symbol_error_counter); + if (cntrs.link_error_recovery_counter > 0xFFUL) + p->link_error_recovery_counter = 0xFF; + else + p->link_error_recovery_counter = + (u8)cntrs.link_error_recovery_counter; + if (cntrs.link_downed_counter > 0xFFUL) + p->link_downed_counter = 0xFF; + else + p->link_downed_counter = (u8)cntrs.link_downed_counter; + if (cntrs.port_rcv_errors > 0xFFFFUL) + p->port_rcv_errors = cpu_to_be16(0xFFFF); + else + p->port_rcv_errors = + cpu_to_be16((u16) cntrs.port_rcv_errors); + if (cntrs.port_rcv_remphys_errors > 0xFFFFUL) + p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF); + else + p->port_rcv_remphys_errors = + cpu_to_be16((u16)cntrs.port_rcv_remphys_errors); + if (cntrs.port_xmit_discards > 0xFFFFUL) + p->port_xmit_discards = cpu_to_be16(0xFFFF); + else + p->port_xmit_discards = + cpu_to_be16((u16)cntrs.port_xmit_discards); + if (cntrs.local_link_integrity_errors > 0xFUL) + cntrs.local_link_integrity_errors = 0xFUL; + if (cntrs.excessive_buffer_overrun_errors > 0xFUL) + cntrs.excessive_buffer_overrun_errors = 0xFUL; + p->link_overrun_errors = (cntrs.local_link_integrity_errors << 4) | + cntrs.excessive_buffer_overrun_errors; + if (cntrs.vl15_dropped > 0xFFFFUL) + p->vl15_dropped = cpu_to_be16(0xFFFF); + else + p->vl15_dropped = cpu_to_be16((u16)cntrs.vl15_dropped); + if (cntrs.port_xmit_data > 0xFFFFFFFFUL) + p->port_xmit_data = cpu_to_be32(0xFFFFFFFF); + else + p->port_xmit_data = cpu_to_be32((u32)cntrs.port_xmit_data); + if (cntrs.port_rcv_data > 0xFFFFFFFFUL) + p->port_rcv_data = cpu_to_be32(0xFFFFFFFF); + else + p->port_rcv_data = cpu_to_be32((u32)cntrs.port_rcv_data); + if (cntrs.port_xmit_packets > 0xFFFFFFFFUL) + p->port_xmit_packets = cpu_to_be32(0xFFFFFFFF); + else + p->port_xmit_packets = + cpu_to_be32((u32)cntrs.port_xmit_packets); + if (cntrs.port_rcv_packets > 0xFFFFFFFFUL) + p->port_rcv_packets = cpu_to_be32(0xFFFFFFFF); + else + p->port_rcv_packets = + cpu_to_be32((u32) cntrs.port_rcv_packets); + + return reply((struct ib_smp *) pmp); +} + +static int pma_get_portcounters_cong(struct ib_pma_mad *pmp, + struct ib_device *ibdev, u8 port) +{ + /* Congestion PMA packets start at offset 24 not 64 */ + struct ib_pma_portcounters_cong *p = + (struct ib_pma_portcounters_cong *)pmp->reserved; + struct qib_verbs_counters cntrs; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + struct qib_devdata *dd = dd_from_ppd(ppd); + u32 port_select = be32_to_cpu(pmp->mad_hdr.attr_mod) & 0xFF; + u64 xmit_wait_counter; + unsigned long flags; + + /* + * This check is performed only in the GET method because the + * SET method ends up calling this anyway. + */ + if (!dd->psxmitwait_supported) + pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR; + if (port_select != port) + pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; + + qib_get_counters(ppd, &cntrs); + spin_lock_irqsave(&ppd->ibport_data.lock, flags); + xmit_wait_counter = xmit_wait_get_value_delta(ppd); + spin_unlock_irqrestore(&ppd->ibport_data.lock, flags); + + /* Adjust counters for any resets done. */ + cntrs.symbol_error_counter -= ibp->z_symbol_error_counter; + cntrs.link_error_recovery_counter -= + ibp->z_link_error_recovery_counter; + cntrs.link_downed_counter -= ibp->z_link_downed_counter; + cntrs.port_rcv_errors -= ibp->z_port_rcv_errors; + cntrs.port_rcv_remphys_errors -= + ibp->z_port_rcv_remphys_errors; + cntrs.port_xmit_discards -= ibp->z_port_xmit_discards; + cntrs.local_link_integrity_errors -= + ibp->z_local_link_integrity_errors; + cntrs.excessive_buffer_overrun_errors -= + ibp->z_excessive_buffer_overrun_errors; + cntrs.vl15_dropped -= ibp->z_vl15_dropped; + cntrs.vl15_dropped += ibp->n_vl15_dropped; + cntrs.port_xmit_data -= ibp->z_port_xmit_data; + cntrs.port_rcv_data -= ibp->z_port_rcv_data; + cntrs.port_xmit_packets -= ibp->z_port_xmit_packets; + cntrs.port_rcv_packets -= ibp->z_port_rcv_packets; + + memset(pmp->reserved, 0, sizeof(pmp->reserved) + + sizeof(pmp->data)); + + /* + * Set top 3 bits to indicate interval in picoseconds in + * remaining bits. + */ + p->port_check_rate = + cpu_to_be16((QIB_XMIT_RATE_PICO << 13) | + (dd->psxmitwait_check_rate & + ~(QIB_XMIT_RATE_PICO << 13))); + p->port_adr_events = cpu_to_be64(0); + p->port_xmit_wait = cpu_to_be64(xmit_wait_counter); + p->port_xmit_data = cpu_to_be64(cntrs.port_xmit_data); + p->port_rcv_data = cpu_to_be64(cntrs.port_rcv_data); + p->port_xmit_packets = + cpu_to_be64(cntrs.port_xmit_packets); + p->port_rcv_packets = + cpu_to_be64(cntrs.port_rcv_packets); + if (cntrs.symbol_error_counter > 0xFFFFUL) + p->symbol_error_counter = cpu_to_be16(0xFFFF); + else + p->symbol_error_counter = + cpu_to_be16( + (u16)cntrs.symbol_error_counter); + if (cntrs.link_error_recovery_counter > 0xFFUL) + p->link_error_recovery_counter = 0xFF; + else + p->link_error_recovery_counter = + (u8)cntrs.link_error_recovery_counter; + if (cntrs.link_downed_counter > 0xFFUL) + p->link_downed_counter = 0xFF; + else + p->link_downed_counter = + (u8)cntrs.link_downed_counter; + if (cntrs.port_rcv_errors > 0xFFFFUL) + p->port_rcv_errors = cpu_to_be16(0xFFFF); + else + p->port_rcv_errors = + cpu_to_be16((u16) cntrs.port_rcv_errors); + if (cntrs.port_rcv_remphys_errors > 0xFFFFUL) + p->port_rcv_remphys_errors = cpu_to_be16(0xFFFF); + else + p->port_rcv_remphys_errors = + cpu_to_be16( + (u16)cntrs.port_rcv_remphys_errors); + if (cntrs.port_xmit_discards > 0xFFFFUL) + p->port_xmit_discards = cpu_to_be16(0xFFFF); + else + p->port_xmit_discards = + cpu_to_be16((u16)cntrs.port_xmit_discards); + if (cntrs.local_link_integrity_errors > 0xFUL) + cntrs.local_link_integrity_errors = 0xFUL; + if (cntrs.excessive_buffer_overrun_errors > 0xFUL) + cntrs.excessive_buffer_overrun_errors = 0xFUL; + p->link_overrun_errors = (cntrs.local_link_integrity_errors << 4) | + cntrs.excessive_buffer_overrun_errors; + if (cntrs.vl15_dropped > 0xFFFFUL) + p->vl15_dropped = cpu_to_be16(0xFFFF); + else + p->vl15_dropped = cpu_to_be16((u16)cntrs.vl15_dropped); + + return reply((struct ib_smp *)pmp); +} + +static void qib_snapshot_pmacounters( + struct qib_ibport *ibp, + struct qib_pma_counters *pmacounters) +{ + struct qib_pma_counters *p; + int cpu; + + memset(pmacounters, 0, sizeof(*pmacounters)); + for_each_possible_cpu(cpu) { + p = per_cpu_ptr(ibp->pmastats, cpu); + pmacounters->n_unicast_xmit += p->n_unicast_xmit; + pmacounters->n_unicast_rcv += p->n_unicast_rcv; + pmacounters->n_multicast_xmit += p->n_multicast_xmit; + pmacounters->n_multicast_rcv += p->n_multicast_rcv; + } +} + +static int pma_get_portcounters_ext(struct ib_pma_mad *pmp, + struct ib_device *ibdev, u8 port) +{ + struct ib_pma_portcounters_ext *p = + (struct ib_pma_portcounters_ext *)pmp->data; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + u64 swords, rwords, spkts, rpkts, xwait; + struct qib_pma_counters pma; + u8 port_select = p->port_select; + + memset(pmp->data, 0, sizeof(pmp->data)); + + p->port_select = port_select; + if (pmp->mad_hdr.attr_mod != 0 || port_select != port) { + pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; + goto bail; + } + + qib_snapshot_counters(ppd, &swords, &rwords, &spkts, &rpkts, &xwait); + + /* Adjust counters for any resets done. */ + swords -= ibp->z_port_xmit_data; + rwords -= ibp->z_port_rcv_data; + spkts -= ibp->z_port_xmit_packets; + rpkts -= ibp->z_port_rcv_packets; + + p->port_xmit_data = cpu_to_be64(swords); + p->port_rcv_data = cpu_to_be64(rwords); + p->port_xmit_packets = cpu_to_be64(spkts); + p->port_rcv_packets = cpu_to_be64(rpkts); + + qib_snapshot_pmacounters(ibp, &pma); + + p->port_unicast_xmit_packets = cpu_to_be64(pma.n_unicast_xmit + - ibp->z_unicast_xmit); + p->port_unicast_rcv_packets = cpu_to_be64(pma.n_unicast_rcv + - ibp->z_unicast_rcv); + p->port_multicast_xmit_packets = cpu_to_be64(pma.n_multicast_xmit + - ibp->z_multicast_xmit); + p->port_multicast_rcv_packets = cpu_to_be64(pma.n_multicast_rcv + - ibp->z_multicast_rcv); + +bail: + return reply((struct ib_smp *) pmp); +} + +static int pma_set_portcounters(struct ib_pma_mad *pmp, + struct ib_device *ibdev, u8 port) +{ + struct ib_pma_portcounters *p = (struct ib_pma_portcounters *) + pmp->data; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + struct qib_verbs_counters cntrs; + + /* + * Since the HW doesn't support clearing counters, we save the + * current count and subtract it from future responses. + */ + qib_get_counters(ppd, &cntrs); + + if (p->counter_select & IB_PMA_SEL_SYMBOL_ERROR) + ibp->z_symbol_error_counter = cntrs.symbol_error_counter; + + if (p->counter_select & IB_PMA_SEL_LINK_ERROR_RECOVERY) + ibp->z_link_error_recovery_counter = + cntrs.link_error_recovery_counter; + + if (p->counter_select & IB_PMA_SEL_LINK_DOWNED) + ibp->z_link_downed_counter = cntrs.link_downed_counter; + + if (p->counter_select & IB_PMA_SEL_PORT_RCV_ERRORS) + ibp->z_port_rcv_errors = cntrs.port_rcv_errors; + + if (p->counter_select & IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS) + ibp->z_port_rcv_remphys_errors = + cntrs.port_rcv_remphys_errors; + + if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DISCARDS) + ibp->z_port_xmit_discards = cntrs.port_xmit_discards; + + if (p->counter_select & IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS) + ibp->z_local_link_integrity_errors = + cntrs.local_link_integrity_errors; + + if (p->counter_select & IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS) + ibp->z_excessive_buffer_overrun_errors = + cntrs.excessive_buffer_overrun_errors; + + if (p->counter_select & IB_PMA_SEL_PORT_VL15_DROPPED) { + ibp->n_vl15_dropped = 0; + ibp->z_vl15_dropped = cntrs.vl15_dropped; + } + + if (p->counter_select & IB_PMA_SEL_PORT_XMIT_DATA) + ibp->z_port_xmit_data = cntrs.port_xmit_data; + + if (p->counter_select & IB_PMA_SEL_PORT_RCV_DATA) + ibp->z_port_rcv_data = cntrs.port_rcv_data; + + if (p->counter_select & IB_PMA_SEL_PORT_XMIT_PACKETS) + ibp->z_port_xmit_packets = cntrs.port_xmit_packets; + + if (p->counter_select & IB_PMA_SEL_PORT_RCV_PACKETS) + ibp->z_port_rcv_packets = cntrs.port_rcv_packets; + + return pma_get_portcounters(pmp, ibdev, port); +} + +static int pma_set_portcounters_cong(struct ib_pma_mad *pmp, + struct ib_device *ibdev, u8 port) +{ + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + struct qib_devdata *dd = dd_from_ppd(ppd); + struct qib_verbs_counters cntrs; + u32 counter_select = (be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24) & 0xFF; + int ret = 0; + unsigned long flags; + + qib_get_counters(ppd, &cntrs); + /* Get counter values before we save them */ + ret = pma_get_portcounters_cong(pmp, ibdev, port); + + if (counter_select & IB_PMA_SEL_CONG_XMIT) { + spin_lock_irqsave(&ppd->ibport_data.lock, flags); + ppd->cong_stats.counter = 0; + dd->f_set_cntr_sample(ppd, QIB_CONG_TIMER_PSINTERVAL, + 0x0); + spin_unlock_irqrestore(&ppd->ibport_data.lock, flags); + } + if (counter_select & IB_PMA_SEL_CONG_PORT_DATA) { + ibp->z_port_xmit_data = cntrs.port_xmit_data; + ibp->z_port_rcv_data = cntrs.port_rcv_data; + ibp->z_port_xmit_packets = cntrs.port_xmit_packets; + ibp->z_port_rcv_packets = cntrs.port_rcv_packets; + } + if (counter_select & IB_PMA_SEL_CONG_ALL) { + ibp->z_symbol_error_counter = + cntrs.symbol_error_counter; + ibp->z_link_error_recovery_counter = + cntrs.link_error_recovery_counter; + ibp->z_link_downed_counter = + cntrs.link_downed_counter; + ibp->z_port_rcv_errors = cntrs.port_rcv_errors; + ibp->z_port_rcv_remphys_errors = + cntrs.port_rcv_remphys_errors; + ibp->z_port_xmit_discards = + cntrs.port_xmit_discards; + ibp->z_local_link_integrity_errors = + cntrs.local_link_integrity_errors; + ibp->z_excessive_buffer_overrun_errors = + cntrs.excessive_buffer_overrun_errors; + ibp->n_vl15_dropped = 0; + ibp->z_vl15_dropped = cntrs.vl15_dropped; + } + + return ret; +} + +static int pma_set_portcounters_ext(struct ib_pma_mad *pmp, + struct ib_device *ibdev, u8 port) +{ + struct ib_pma_portcounters *p = (struct ib_pma_portcounters *) + pmp->data; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + u64 swords, rwords, spkts, rpkts, xwait; + struct qib_pma_counters pma; + + qib_snapshot_counters(ppd, &swords, &rwords, &spkts, &rpkts, &xwait); + + if (p->counter_select & IB_PMA_SELX_PORT_XMIT_DATA) + ibp->z_port_xmit_data = swords; + + if (p->counter_select & IB_PMA_SELX_PORT_RCV_DATA) + ibp->z_port_rcv_data = rwords; + + if (p->counter_select & IB_PMA_SELX_PORT_XMIT_PACKETS) + ibp->z_port_xmit_packets = spkts; + + if (p->counter_select & IB_PMA_SELX_PORT_RCV_PACKETS) + ibp->z_port_rcv_packets = rpkts; + + qib_snapshot_pmacounters(ibp, &pma); + + if (p->counter_select & IB_PMA_SELX_PORT_UNI_XMIT_PACKETS) + ibp->z_unicast_xmit = pma.n_unicast_xmit; + + if (p->counter_select & IB_PMA_SELX_PORT_UNI_RCV_PACKETS) + ibp->z_unicast_rcv = pma.n_unicast_rcv; + + if (p->counter_select & IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS) + ibp->z_multicast_xmit = pma.n_multicast_xmit; + + if (p->counter_select & IB_PMA_SELX_PORT_MULTI_RCV_PACKETS) + ibp->z_multicast_rcv = pma.n_multicast_rcv; + + return pma_get_portcounters_ext(pmp, ibdev, port); +} + +static int process_subn(struct ib_device *ibdev, int mad_flags, + u8 port, struct ib_mad *in_mad, + struct ib_mad *out_mad) +{ + struct ib_smp *smp = (struct ib_smp *)out_mad; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + int ret; + + *out_mad = *in_mad; + if (smp->class_version != 1) { + smp->status |= IB_SMP_UNSUP_VERSION; + ret = reply(smp); + goto bail; + } + + ret = check_mkey(ibp, smp, mad_flags); + if (ret) { + u32 port_num = be32_to_cpu(smp->attr_mod); + + /* + * If this is a get/set portinfo, we already check the + * M_Key if the MAD is for another port and the M_Key + * is OK on the receiving port. This check is needed + * to increment the error counters when the M_Key + * fails to match on *both* ports. + */ + if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO && + (smp->method == IB_MGMT_METHOD_GET || + smp->method == IB_MGMT_METHOD_SET) && + port_num && port_num <= ibdev->phys_port_cnt && + port != port_num) + (void) check_mkey(to_iport(ibdev, port_num), smp, 0); + ret = IB_MAD_RESULT_FAILURE; + goto bail; + } + + switch (smp->method) { + case IB_MGMT_METHOD_GET: + switch (smp->attr_id) { + case IB_SMP_ATTR_NODE_DESC: + ret = subn_get_nodedescription(smp, ibdev); + goto bail; + case IB_SMP_ATTR_NODE_INFO: + ret = subn_get_nodeinfo(smp, ibdev, port); + goto bail; + case IB_SMP_ATTR_GUID_INFO: + ret = subn_get_guidinfo(smp, ibdev, port); + goto bail; + case IB_SMP_ATTR_PORT_INFO: + ret = subn_get_portinfo(smp, ibdev, port); + goto bail; + case IB_SMP_ATTR_PKEY_TABLE: + ret = subn_get_pkeytable(smp, ibdev, port); + goto bail; + case IB_SMP_ATTR_SL_TO_VL_TABLE: + ret = subn_get_sl_to_vl(smp, ibdev, port); + goto bail; + case IB_SMP_ATTR_VL_ARB_TABLE: + ret = subn_get_vl_arb(smp, ibdev, port); + goto bail; + case IB_SMP_ATTR_SM_INFO: + if (ibp->port_cap_flags & IB_PORT_SM_DISABLED) { + ret = IB_MAD_RESULT_SUCCESS | + IB_MAD_RESULT_CONSUMED; + goto bail; + } + if (ibp->port_cap_flags & IB_PORT_SM) { + ret = IB_MAD_RESULT_SUCCESS; + goto bail; + } + /* FALLTHROUGH */ + default: + smp->status |= IB_SMP_UNSUP_METH_ATTR; + ret = reply(smp); + goto bail; + } + + case IB_MGMT_METHOD_SET: + switch (smp->attr_id) { + case IB_SMP_ATTR_GUID_INFO: + ret = subn_set_guidinfo(smp, ibdev, port); + goto bail; + case IB_SMP_ATTR_PORT_INFO: + ret = subn_set_portinfo(smp, ibdev, port); + goto bail; + case IB_SMP_ATTR_PKEY_TABLE: + ret = subn_set_pkeytable(smp, ibdev, port); + goto bail; + case IB_SMP_ATTR_SL_TO_VL_TABLE: + ret = subn_set_sl_to_vl(smp, ibdev, port); + goto bail; + case IB_SMP_ATTR_VL_ARB_TABLE: + ret = subn_set_vl_arb(smp, ibdev, port); + goto bail; + case IB_SMP_ATTR_SM_INFO: + if (ibp->port_cap_flags & IB_PORT_SM_DISABLED) { + ret = IB_MAD_RESULT_SUCCESS | + IB_MAD_RESULT_CONSUMED; + goto bail; + } + if (ibp->port_cap_flags & IB_PORT_SM) { + ret = IB_MAD_RESULT_SUCCESS; + goto bail; + } + /* FALLTHROUGH */ + default: + smp->status |= IB_SMP_UNSUP_METH_ATTR; + ret = reply(smp); + goto bail; + } + + case IB_MGMT_METHOD_TRAP_REPRESS: + if (smp->attr_id == IB_SMP_ATTR_NOTICE) + ret = subn_trap_repress(smp, ibdev, port); + else { + smp->status |= IB_SMP_UNSUP_METH_ATTR; + ret = reply(smp); + } + goto bail; + + case IB_MGMT_METHOD_TRAP: + case IB_MGMT_METHOD_REPORT: + case IB_MGMT_METHOD_REPORT_RESP: + case IB_MGMT_METHOD_GET_RESP: + /* + * The ib_mad module will call us to process responses + * before checking for other consumers. + * Just tell the caller to process it normally. + */ + ret = IB_MAD_RESULT_SUCCESS; + goto bail; + + case IB_MGMT_METHOD_SEND: + if (ib_get_smp_direction(smp) && + smp->attr_id == QIB_VENDOR_IPG) { + ppd->dd->f_set_ib_cfg(ppd, QIB_IB_CFG_PORT, + smp->data[0]); + ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; + } else + ret = IB_MAD_RESULT_SUCCESS; + goto bail; + + default: + smp->status |= IB_SMP_UNSUP_METHOD; + ret = reply(smp); + } + +bail: + return ret; +} + +static int process_perf(struct ib_device *ibdev, u8 port, + struct ib_mad *in_mad, + struct ib_mad *out_mad) +{ + struct ib_pma_mad *pmp = (struct ib_pma_mad *)out_mad; + int ret; + + *out_mad = *in_mad; + if (pmp->mad_hdr.class_version != 1) { + pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION; + ret = reply((struct ib_smp *) pmp); + goto bail; + } + + switch (pmp->mad_hdr.method) { + case IB_MGMT_METHOD_GET: + switch (pmp->mad_hdr.attr_id) { + case IB_PMA_CLASS_PORT_INFO: + ret = pma_get_classportinfo(pmp, ibdev); + goto bail; + case IB_PMA_PORT_SAMPLES_CONTROL: + ret = pma_get_portsamplescontrol(pmp, ibdev, port); + goto bail; + case IB_PMA_PORT_SAMPLES_RESULT: + ret = pma_get_portsamplesresult(pmp, ibdev, port); + goto bail; + case IB_PMA_PORT_SAMPLES_RESULT_EXT: + ret = pma_get_portsamplesresult_ext(pmp, ibdev, port); + goto bail; + case IB_PMA_PORT_COUNTERS: + ret = pma_get_portcounters(pmp, ibdev, port); + goto bail; + case IB_PMA_PORT_COUNTERS_EXT: + ret = pma_get_portcounters_ext(pmp, ibdev, port); + goto bail; + case IB_PMA_PORT_COUNTERS_CONG: + ret = pma_get_portcounters_cong(pmp, ibdev, port); + goto bail; + default: + pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR; + ret = reply((struct ib_smp *) pmp); + goto bail; + } + + case IB_MGMT_METHOD_SET: + switch (pmp->mad_hdr.attr_id) { + case IB_PMA_PORT_SAMPLES_CONTROL: + ret = pma_set_portsamplescontrol(pmp, ibdev, port); + goto bail; + case IB_PMA_PORT_COUNTERS: + ret = pma_set_portcounters(pmp, ibdev, port); + goto bail; + case IB_PMA_PORT_COUNTERS_EXT: + ret = pma_set_portcounters_ext(pmp, ibdev, port); + goto bail; + case IB_PMA_PORT_COUNTERS_CONG: + ret = pma_set_portcounters_cong(pmp, ibdev, port); + goto bail; + default: + pmp->mad_hdr.status |= IB_SMP_UNSUP_METH_ATTR; + ret = reply((struct ib_smp *) pmp); + goto bail; + } + + case IB_MGMT_METHOD_TRAP: + case IB_MGMT_METHOD_GET_RESP: + /* + * The ib_mad module will call us to process responses + * before checking for other consumers. + * Just tell the caller to process it normally. + */ + ret = IB_MAD_RESULT_SUCCESS; + goto bail; + + default: + pmp->mad_hdr.status |= IB_SMP_UNSUP_METHOD; + ret = reply((struct ib_smp *) pmp); + } + +bail: + return ret; +} + +static int cc_get_classportinfo(struct ib_cc_mad *ccp, + struct ib_device *ibdev) +{ + struct ib_cc_classportinfo_attr *p = + (struct ib_cc_classportinfo_attr *)ccp->mgmt_data; + + memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data)); + + p->base_version = 1; + p->class_version = 1; + p->cap_mask = 0; + + /* + * Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec. + */ + p->resp_time_value = 18; + + return reply((struct ib_smp *) ccp); +} + +static int cc_get_congestion_info(struct ib_cc_mad *ccp, + struct ib_device *ibdev, u8 port) +{ + struct ib_cc_info_attr *p = + (struct ib_cc_info_attr *)ccp->mgmt_data; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + + memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data)); + + p->congestion_info = 0; + p->control_table_cap = ppd->cc_max_table_entries; + + return reply((struct ib_smp *) ccp); +} + +static int cc_get_congestion_setting(struct ib_cc_mad *ccp, + struct ib_device *ibdev, u8 port) +{ + int i; + struct ib_cc_congestion_setting_attr *p = + (struct ib_cc_congestion_setting_attr *)ccp->mgmt_data; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + struct ib_cc_congestion_entry_shadow *entries; + + memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data)); + + spin_lock(&ppd->cc_shadow_lock); + + entries = ppd->congestion_entries_shadow->entries; + p->port_control = cpu_to_be16( + ppd->congestion_entries_shadow->port_control); + p->control_map = cpu_to_be16( + ppd->congestion_entries_shadow->control_map); + for (i = 0; i < IB_CC_CCS_ENTRIES; i++) { + p->entries[i].ccti_increase = entries[i].ccti_increase; + p->entries[i].ccti_timer = cpu_to_be16(entries[i].ccti_timer); + p->entries[i].trigger_threshold = entries[i].trigger_threshold; + p->entries[i].ccti_min = entries[i].ccti_min; + } + + spin_unlock(&ppd->cc_shadow_lock); + + return reply((struct ib_smp *) ccp); +} + +static int cc_get_congestion_control_table(struct ib_cc_mad *ccp, + struct ib_device *ibdev, u8 port) +{ + struct ib_cc_table_attr *p = + (struct ib_cc_table_attr *)ccp->mgmt_data; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + u32 cct_block_index = be32_to_cpu(ccp->attr_mod); + u32 max_cct_block; + u32 cct_entry; + struct ib_cc_table_entry_shadow *entries; + int i; + + /* Is the table index more than what is supported? */ + if (cct_block_index > IB_CC_TABLE_CAP_DEFAULT - 1) + goto bail; + + memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data)); + + spin_lock(&ppd->cc_shadow_lock); + + max_cct_block = + (ppd->ccti_entries_shadow->ccti_last_entry + 1)/IB_CCT_ENTRIES; + max_cct_block = max_cct_block ? max_cct_block - 1 : 0; + + if (cct_block_index > max_cct_block) { + spin_unlock(&ppd->cc_shadow_lock); + goto bail; + } + + ccp->attr_mod = cpu_to_be32(cct_block_index); + + cct_entry = IB_CCT_ENTRIES * (cct_block_index + 1); + + cct_entry--; + + p->ccti_limit = cpu_to_be16(cct_entry); + + entries = &ppd->ccti_entries_shadow-> + entries[IB_CCT_ENTRIES * cct_block_index]; + cct_entry %= IB_CCT_ENTRIES; + + for (i = 0; i <= cct_entry; i++) + p->ccti_entries[i].entry = cpu_to_be16(entries[i].entry); + + spin_unlock(&ppd->cc_shadow_lock); + + return reply((struct ib_smp *) ccp); + +bail: + return reply_failure((struct ib_smp *) ccp); +} + +static int cc_set_congestion_setting(struct ib_cc_mad *ccp, + struct ib_device *ibdev, u8 port) +{ + struct ib_cc_congestion_setting_attr *p = + (struct ib_cc_congestion_setting_attr *)ccp->mgmt_data; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + int i; + + ppd->cc_sl_control_map = be16_to_cpu(p->control_map); + + for (i = 0; i < IB_CC_CCS_ENTRIES; i++) { + ppd->congestion_entries[i].ccti_increase = + p->entries[i].ccti_increase; + + ppd->congestion_entries[i].ccti_timer = + be16_to_cpu(p->entries[i].ccti_timer); + + ppd->congestion_entries[i].trigger_threshold = + p->entries[i].trigger_threshold; + + ppd->congestion_entries[i].ccti_min = + p->entries[i].ccti_min; + } + + return reply((struct ib_smp *) ccp); +} + +static int cc_set_congestion_control_table(struct ib_cc_mad *ccp, + struct ib_device *ibdev, u8 port) +{ + struct ib_cc_table_attr *p = + (struct ib_cc_table_attr *)ccp->mgmt_data; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + u32 cct_block_index = be32_to_cpu(ccp->attr_mod); + u32 cct_entry; + struct ib_cc_table_entry_shadow *entries; + int i; + + /* Is the table index more than what is supported? */ + if (cct_block_index > IB_CC_TABLE_CAP_DEFAULT - 1) + goto bail; + + /* If this packet is the first in the sequence then + * zero the total table entry count. + */ + if (be16_to_cpu(p->ccti_limit) < IB_CCT_ENTRIES) + ppd->total_cct_entry = 0; + + cct_entry = (be16_to_cpu(p->ccti_limit))%IB_CCT_ENTRIES; + + /* ccti_limit is 0 to 63 */ + ppd->total_cct_entry += (cct_entry + 1); + + if (ppd->total_cct_entry > ppd->cc_supported_table_entries) + goto bail; + + ppd->ccti_limit = be16_to_cpu(p->ccti_limit); + + entries = ppd->ccti_entries + (IB_CCT_ENTRIES * cct_block_index); + + for (i = 0; i <= cct_entry; i++) + entries[i].entry = be16_to_cpu(p->ccti_entries[i].entry); + + spin_lock(&ppd->cc_shadow_lock); + + ppd->ccti_entries_shadow->ccti_last_entry = ppd->total_cct_entry - 1; + memcpy(ppd->ccti_entries_shadow->entries, ppd->ccti_entries, + (ppd->total_cct_entry * sizeof(struct ib_cc_table_entry))); + + ppd->congestion_entries_shadow->port_control = IB_CC_CCS_PC_SL_BASED; + ppd->congestion_entries_shadow->control_map = ppd->cc_sl_control_map; + memcpy(ppd->congestion_entries_shadow->entries, ppd->congestion_entries, + IB_CC_CCS_ENTRIES * sizeof(struct ib_cc_congestion_entry)); + + spin_unlock(&ppd->cc_shadow_lock); + + return reply((struct ib_smp *) ccp); + +bail: + return reply_failure((struct ib_smp *) ccp); +} + +static int check_cc_key(struct qib_ibport *ibp, + struct ib_cc_mad *ccp, int mad_flags) +{ + return 0; +} + +static int process_cc(struct ib_device *ibdev, int mad_flags, + u8 port, struct ib_mad *in_mad, + struct ib_mad *out_mad) +{ + struct ib_cc_mad *ccp = (struct ib_cc_mad *)out_mad; + struct qib_ibport *ibp = to_iport(ibdev, port); + int ret; + + *out_mad = *in_mad; + + if (ccp->class_version != 2) { + ccp->status |= IB_SMP_UNSUP_VERSION; + ret = reply((struct ib_smp *)ccp); + goto bail; + } + + ret = check_cc_key(ibp, ccp, mad_flags); + if (ret) + goto bail; + + switch (ccp->method) { + case IB_MGMT_METHOD_GET: + switch (ccp->attr_id) { + case IB_CC_ATTR_CLASSPORTINFO: + ret = cc_get_classportinfo(ccp, ibdev); + goto bail; + + case IB_CC_ATTR_CONGESTION_INFO: + ret = cc_get_congestion_info(ccp, ibdev, port); + goto bail; + + case IB_CC_ATTR_CA_CONGESTION_SETTING: + ret = cc_get_congestion_setting(ccp, ibdev, port); + goto bail; + + case IB_CC_ATTR_CONGESTION_CONTROL_TABLE: + ret = cc_get_congestion_control_table(ccp, ibdev, port); + goto bail; + + /* FALLTHROUGH */ + default: + ccp->status |= IB_SMP_UNSUP_METH_ATTR; + ret = reply((struct ib_smp *) ccp); + goto bail; + } + + case IB_MGMT_METHOD_SET: + switch (ccp->attr_id) { + case IB_CC_ATTR_CA_CONGESTION_SETTING: + ret = cc_set_congestion_setting(ccp, ibdev, port); + goto bail; + + case IB_CC_ATTR_CONGESTION_CONTROL_TABLE: + ret = cc_set_congestion_control_table(ccp, ibdev, port); + goto bail; + + /* FALLTHROUGH */ + default: + ccp->status |= IB_SMP_UNSUP_METH_ATTR; + ret = reply((struct ib_smp *) ccp); + goto bail; + } + + case IB_MGMT_METHOD_GET_RESP: + /* + * The ib_mad module will call us to process responses + * before checking for other consumers. + * Just tell the caller to process it normally. + */ + ret = IB_MAD_RESULT_SUCCESS; + goto bail; + + case IB_MGMT_METHOD_TRAP: + default: + ccp->status |= IB_SMP_UNSUP_METHOD; + ret = reply((struct ib_smp *) ccp); + } + +bail: + return ret; +} + +/** + * qib_process_mad - process an incoming MAD packet + * @ibdev: the infiniband device this packet came in on + * @mad_flags: MAD flags + * @port: the port number this packet came in on + * @in_wc: the work completion entry for this packet + * @in_grh: the global route header for this packet + * @in_mad: the incoming MAD + * @out_mad: any outgoing MAD reply + * + * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not + * interested in processing. + * + * Note that the verbs framework has already done the MAD sanity checks, + * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE + * MADs. + * + * This is called by the ib_mad module. + */ +int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, + struct ib_wc *in_wc, struct ib_grh *in_grh, + struct ib_mad *in_mad, struct ib_mad *out_mad) +{ + int ret; + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + + switch (in_mad->mad_hdr.mgmt_class) { + case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: + case IB_MGMT_CLASS_SUBN_LID_ROUTED: + ret = process_subn(ibdev, mad_flags, port, in_mad, out_mad); + goto bail; + + case IB_MGMT_CLASS_PERF_MGMT: + ret = process_perf(ibdev, port, in_mad, out_mad); + goto bail; + + case IB_MGMT_CLASS_CONG_MGMT: + if (!ppd->congestion_entries_shadow || + !qib_cc_table_size) { + ret = IB_MAD_RESULT_SUCCESS; + goto bail; + } + ret = process_cc(ibdev, mad_flags, port, in_mad, out_mad); + goto bail; + + default: + ret = IB_MAD_RESULT_SUCCESS; + } + +bail: + return ret; +} + +static void send_handler(struct ib_mad_agent *agent, + struct ib_mad_send_wc *mad_send_wc) +{ + ib_free_send_mad(mad_send_wc->send_buf); +} + +static void xmit_wait_timer_func(unsigned long opaque) +{ + struct qib_pportdata *ppd = (struct qib_pportdata *)opaque; + struct qib_devdata *dd = dd_from_ppd(ppd); + unsigned long flags; + u8 status; + + spin_lock_irqsave(&ppd->ibport_data.lock, flags); + if (ppd->cong_stats.flags == IB_PMA_CONG_HW_CONTROL_SAMPLE) { + status = dd->f_portcntr(ppd, QIBPORTCNTR_PSSTAT); + if (status == IB_PMA_SAMPLE_STATUS_DONE) { + /* save counter cache */ + cache_hw_sample_counters(ppd); + ppd->cong_stats.flags = IB_PMA_CONG_HW_CONTROL_TIMER; + } else + goto done; + } + ppd->cong_stats.counter = xmit_wait_get_value_delta(ppd); + dd->f_set_cntr_sample(ppd, QIB_CONG_TIMER_PSINTERVAL, 0x0); +done: + spin_unlock_irqrestore(&ppd->ibport_data.lock, flags); + mod_timer(&ppd->cong_stats.timer, jiffies + HZ); +} + +int qib_create_agents(struct qib_ibdev *dev) +{ + struct qib_devdata *dd = dd_from_dev(dev); + struct ib_mad_agent *agent; + struct qib_ibport *ibp; + int p; + int ret; + + for (p = 0; p < dd->num_pports; p++) { + ibp = &dd->pport[p].ibport_data; + agent = ib_register_mad_agent(&dev->ibdev, p + 1, IB_QPT_SMI, + NULL, 0, send_handler, + NULL, NULL); + if (IS_ERR(agent)) { + ret = PTR_ERR(agent); + goto err; + } + + /* Initialize xmit_wait structure */ + dd->pport[p].cong_stats.counter = 0; + init_timer(&dd->pport[p].cong_stats.timer); + dd->pport[p].cong_stats.timer.function = xmit_wait_timer_func; + dd->pport[p].cong_stats.timer.data = + (unsigned long)(&dd->pport[p]); + dd->pport[p].cong_stats.timer.expires = 0; + add_timer(&dd->pport[p].cong_stats.timer); + + ibp->send_agent = agent; + } + + return 0; + +err: + for (p = 0; p < dd->num_pports; p++) { + ibp = &dd->pport[p].ibport_data; + if (ibp->send_agent) { + agent = ibp->send_agent; + ibp->send_agent = NULL; + ib_unregister_mad_agent(agent); + } + } + + return ret; +} + +void qib_free_agents(struct qib_ibdev *dev) +{ + struct qib_devdata *dd = dd_from_dev(dev); + struct ib_mad_agent *agent; + struct qib_ibport *ibp; + int p; + + for (p = 0; p < dd->num_pports; p++) { + ibp = &dd->pport[p].ibport_data; + if (ibp->send_agent) { + agent = ibp->send_agent; + ibp->send_agent = NULL; + ib_unregister_mad_agent(agent); + } + if (ibp->sm_ah) { + ib_destroy_ah(&ibp->sm_ah->ibah); + ibp->sm_ah = NULL; + } + if (dd->pport[p].cong_stats.timer.data) + del_timer_sync(&dd->pport[p].cong_stats.timer); + } +} diff --git a/drivers/infiniband/hw/qib/qib_mad.h b/drivers/infiniband/hw/qib/qib_mad.h new file mode 100644 index 00000000000..941d4d50d8e --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_mad.h @@ -0,0 +1,431 @@ +/* + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _QIB_MAD_H +#define _QIB_MAD_H + +#include <rdma/ib_pma.h> + +#define IB_SMP_UNSUP_VERSION cpu_to_be16(0x0004) +#define IB_SMP_UNSUP_METHOD cpu_to_be16(0x0008) +#define IB_SMP_UNSUP_METH_ATTR cpu_to_be16(0x000C) +#define IB_SMP_INVALID_FIELD cpu_to_be16(0x001C) + +struct ib_node_info { + u8 base_version; + u8 class_version; + u8 node_type; + u8 num_ports; + __be64 sys_guid; + __be64 node_guid; + __be64 port_guid; + __be16 partition_cap; + __be16 device_id; + __be32 revision; + u8 local_port_num; + u8 vendor_id[3]; +} __packed; + +struct ib_mad_notice_attr { + u8 generic_type; + u8 prod_type_msb; + __be16 prod_type_lsb; + __be16 trap_num; + __be16 issuer_lid; + __be16 toggle_count; + + union { + struct { + u8 details[54]; + } raw_data; + + struct { + __be16 reserved; + __be16 lid; /* where violation happened */ + u8 port_num; /* where violation happened */ + } __packed ntc_129_131; + + struct { + __be16 reserved; + __be16 lid; /* LID where change occurred */ + u8 reserved2; + u8 local_changes; /* low bit - local changes */ + __be32 new_cap_mask; /* new capability mask */ + u8 reserved3; + u8 change_flags; /* low 3 bits only */ + } __packed ntc_144; + + struct { + __be16 reserved; + __be16 lid; /* lid where sys guid changed */ + __be16 reserved2; + __be64 new_sys_guid; + } __packed ntc_145; + + struct { + __be16 reserved; + __be16 lid; + __be16 dr_slid; + u8 method; + u8 reserved2; + __be16 attr_id; + __be32 attr_mod; + __be64 mkey; + u8 reserved3; + u8 dr_trunc_hop; + u8 dr_rtn_path[30]; + } __packed ntc_256; + + struct { + __be16 reserved; + __be16 lid1; + __be16 lid2; + __be32 key; + __be32 sl_qp1; /* SL: high 4 bits */ + __be32 qp2; /* high 8 bits reserved */ + union ib_gid gid1; + union ib_gid gid2; + } __packed ntc_257_258; + + } details; +}; + +/* + * Generic trap/notice types + */ +#define IB_NOTICE_TYPE_FATAL 0x80 +#define IB_NOTICE_TYPE_URGENT 0x81 +#define IB_NOTICE_TYPE_SECURITY 0x82 +#define IB_NOTICE_TYPE_SM 0x83 +#define IB_NOTICE_TYPE_INFO 0x84 + +/* + * Generic trap/notice producers + */ +#define IB_NOTICE_PROD_CA cpu_to_be16(1) +#define IB_NOTICE_PROD_SWITCH cpu_to_be16(2) +#define IB_NOTICE_PROD_ROUTER cpu_to_be16(3) +#define IB_NOTICE_PROD_CLASS_MGR cpu_to_be16(4) + +/* + * Generic trap/notice numbers + */ +#define IB_NOTICE_TRAP_LLI_THRESH cpu_to_be16(129) +#define IB_NOTICE_TRAP_EBO_THRESH cpu_to_be16(130) +#define IB_NOTICE_TRAP_FLOW_UPDATE cpu_to_be16(131) +#define IB_NOTICE_TRAP_CAP_MASK_CHG cpu_to_be16(144) +#define IB_NOTICE_TRAP_SYS_GUID_CHG cpu_to_be16(145) +#define IB_NOTICE_TRAP_BAD_MKEY cpu_to_be16(256) +#define IB_NOTICE_TRAP_BAD_PKEY cpu_to_be16(257) +#define IB_NOTICE_TRAP_BAD_QKEY cpu_to_be16(258) + +/* + * Repress trap/notice flags + */ +#define IB_NOTICE_REPRESS_LLI_THRESH (1 << 0) +#define IB_NOTICE_REPRESS_EBO_THRESH (1 << 1) +#define IB_NOTICE_REPRESS_FLOW_UPDATE (1 << 2) +#define IB_NOTICE_REPRESS_CAP_MASK_CHG (1 << 3) +#define IB_NOTICE_REPRESS_SYS_GUID_CHG (1 << 4) +#define IB_NOTICE_REPRESS_BAD_MKEY (1 << 5) +#define IB_NOTICE_REPRESS_BAD_PKEY (1 << 6) +#define IB_NOTICE_REPRESS_BAD_QKEY (1 << 7) + +/* + * Generic trap/notice other local changes flags (trap 144). + */ +#define IB_NOTICE_TRAP_LSE_CHG 0x04 /* Link Speed Enable changed */ +#define IB_NOTICE_TRAP_LWE_CHG 0x02 /* Link Width Enable changed */ +#define IB_NOTICE_TRAP_NODE_DESC_CHG 0x01 + +/* + * Generic trap/notice M_Key volation flags in dr_trunc_hop (trap 256). + */ +#define IB_NOTICE_TRAP_DR_NOTICE 0x80 +#define IB_NOTICE_TRAP_DR_TRUNC 0x40 + +struct ib_vl_weight_elem { + u8 vl; /* Only low 4 bits, upper 4 bits reserved */ + u8 weight; +}; + +#define IB_VLARB_LOWPRI_0_31 1 +#define IB_VLARB_LOWPRI_32_63 2 +#define IB_VLARB_HIGHPRI_0_31 3 +#define IB_VLARB_HIGHPRI_32_63 4 + +#define IB_PMA_PORT_COUNTERS_CONG cpu_to_be16(0xFF00) + +struct ib_pma_portcounters_cong { + u8 reserved; + u8 reserved1; + __be16 port_check_rate; + __be16 symbol_error_counter; + u8 link_error_recovery_counter; + u8 link_downed_counter; + __be16 port_rcv_errors; + __be16 port_rcv_remphys_errors; + __be16 port_rcv_switch_relay_errors; + __be16 port_xmit_discards; + u8 port_xmit_constraint_errors; + u8 port_rcv_constraint_errors; + u8 reserved2; + u8 link_overrun_errors; /* LocalLink: 7:4, BufferOverrun: 3:0 */ + __be16 reserved3; + __be16 vl15_dropped; + __be64 port_xmit_data; + __be64 port_rcv_data; + __be64 port_xmit_packets; + __be64 port_rcv_packets; + __be64 port_xmit_wait; + __be64 port_adr_events; +} __packed; + +#define IB_PMA_CONG_HW_CONTROL_TIMER 0x00 +#define IB_PMA_CONG_HW_CONTROL_SAMPLE 0x01 + +#define QIB_XMIT_RATE_UNSUPPORTED 0x0 +#define QIB_XMIT_RATE_PICO 0x7 +/* number of 4nsec cycles equaling 2secs */ +#define QIB_CONG_TIMER_PSINTERVAL 0x1DCD64EC + +#define IB_PMA_SEL_CONG_ALL 0x01 +#define IB_PMA_SEL_CONG_PORT_DATA 0x02 +#define IB_PMA_SEL_CONG_XMIT 0x04 +#define IB_PMA_SEL_CONG_ROUTING 0x08 + +/* + * Congestion control class attributes + */ +#define IB_CC_ATTR_CLASSPORTINFO cpu_to_be16(0x0001) +#define IB_CC_ATTR_NOTICE cpu_to_be16(0x0002) +#define IB_CC_ATTR_CONGESTION_INFO cpu_to_be16(0x0011) +#define IB_CC_ATTR_CONGESTION_KEY_INFO cpu_to_be16(0x0012) +#define IB_CC_ATTR_CONGESTION_LOG cpu_to_be16(0x0013) +#define IB_CC_ATTR_SWITCH_CONGESTION_SETTING cpu_to_be16(0x0014) +#define IB_CC_ATTR_SWITCH_PORT_CONGESTION_SETTING cpu_to_be16(0x0015) +#define IB_CC_ATTR_CA_CONGESTION_SETTING cpu_to_be16(0x0016) +#define IB_CC_ATTR_CONGESTION_CONTROL_TABLE cpu_to_be16(0x0017) +#define IB_CC_ATTR_TIME_STAMP cpu_to_be16(0x0018) + +/* generalizations for threshold values */ +#define IB_CC_THRESHOLD_NONE 0x0 +#define IB_CC_THRESHOLD_MIN 0x1 +#define IB_CC_THRESHOLD_MAX 0xf + +/* CCA MAD header constants */ +#define IB_CC_MAD_LOGDATA_LEN 32 +#define IB_CC_MAD_MGMTDATA_LEN 192 + +struct ib_cc_mad { + u8 base_version; + u8 mgmt_class; + u8 class_version; + u8 method; + __be16 status; + __be16 class_specific; + __be64 tid; + __be16 attr_id; + __be16 resv; + __be32 attr_mod; + __be64 cckey; + + /* For CongestionLog attribute only */ + u8 log_data[IB_CC_MAD_LOGDATA_LEN]; + + u8 mgmt_data[IB_CC_MAD_MGMTDATA_LEN]; +} __packed; + +/* + * Congestion Control class portinfo capability mask bits + */ +#define IB_CC_CPI_CM_TRAP_GEN cpu_to_be16(1 << 0) +#define IB_CC_CPI_CM_GET_SET_NOTICE cpu_to_be16(1 << 1) +#define IB_CC_CPI_CM_CAP2 cpu_to_be16(1 << 2) +#define IB_CC_CPI_CM_ENHANCEDPORT0_CC cpu_to_be16(1 << 8) + +struct ib_cc_classportinfo_attr { + u8 base_version; + u8 class_version; + __be16 cap_mask; + u8 reserved[3]; + u8 resp_time_value; /* only lower 5 bits */ + union ib_gid redirect_gid; + __be32 redirect_tc_sl_fl; /* 8, 4, 20 bits respectively */ + __be16 redirect_lid; + __be16 redirect_pkey; + __be32 redirect_qp; /* only lower 24 bits */ + __be32 redirect_qkey; + union ib_gid trap_gid; + __be32 trap_tc_sl_fl; /* 8, 4, 20 bits respectively */ + __be16 trap_lid; + __be16 trap_pkey; + __be32 trap_hl_qp; /* 8, 24 bits respectively */ + __be32 trap_qkey; +} __packed; + +/* Congestion control traps */ +#define IB_CC_TRAP_KEY_VIOLATION 0x0000 + +struct ib_cc_trap_key_violation_attr { + __be16 source_lid; + u8 method; + u8 reserved1; + __be16 attrib_id; + __be32 attrib_mod; + __be32 qp; + __be64 cckey; + u8 sgid[16]; + u8 padding[24]; +} __packed; + +/* Congestion info flags */ +#define IB_CC_CI_FLAGS_CREDIT_STARVATION 0x1 +#define IB_CC_TABLE_CAP_DEFAULT 31 + +struct ib_cc_info_attr { + __be16 congestion_info; + u8 control_table_cap; /* Multiple of 64 entry unit CCTs */ +} __packed; + +struct ib_cc_key_info_attr { + __be64 cckey; + u8 protect; + __be16 lease_period; + __be16 violations; +} __packed; + +#define IB_CC_CL_CA_LOGEVENTS_LEN 208 + +struct ib_cc_log_attr { + u8 log_type; + u8 congestion_flags; + __be16 threshold_event_counter; + __be16 threshold_congestion_event_map; + __be16 current_time_stamp; + u8 log_events[IB_CC_CL_CA_LOGEVENTS_LEN]; +} __packed; + +#define IB_CC_CLEC_SERVICETYPE_RC 0x0 +#define IB_CC_CLEC_SERVICETYPE_UC 0x1 +#define IB_CC_CLEC_SERVICETYPE_RD 0x2 +#define IB_CC_CLEC_SERVICETYPE_UD 0x3 + +struct ib_cc_log_event { + u8 local_qp_cn_entry; + u8 remote_qp_number_cn_entry[3]; + u8 sl_cn_entry:4; + u8 service_type_cn_entry:4; + __be32 remote_lid_cn_entry; + __be32 timestamp_cn_entry; +} __packed; + +/* Sixteen congestion entries */ +#define IB_CC_CCS_ENTRIES 16 + +/* Port control flags */ +#define IB_CC_CCS_PC_SL_BASED 0x01 + +struct ib_cc_congestion_entry { + u8 ccti_increase; + __be16 ccti_timer; + u8 trigger_threshold; + u8 ccti_min; /* min CCTI for cc table */ +} __packed; + +struct ib_cc_congestion_entry_shadow { + u8 ccti_increase; + u16 ccti_timer; + u8 trigger_threshold; + u8 ccti_min; /* min CCTI for cc table */ +} __packed; + +struct ib_cc_congestion_setting_attr { + __be16 port_control; + __be16 control_map; + struct ib_cc_congestion_entry entries[IB_CC_CCS_ENTRIES]; +} __packed; + +struct ib_cc_congestion_setting_attr_shadow { + u16 port_control; + u16 control_map; + struct ib_cc_congestion_entry_shadow entries[IB_CC_CCS_ENTRIES]; +} __packed; + +#define IB_CC_TABLE_ENTRY_INCREASE_DEFAULT 1 +#define IB_CC_TABLE_ENTRY_TIMER_DEFAULT 1 + +/* 64 Congestion Control table entries in a single MAD */ +#define IB_CCT_ENTRIES 64 +#define IB_CCT_MIN_ENTRIES (IB_CCT_ENTRIES * 2) + +struct ib_cc_table_entry { + __be16 entry; /* shift:2, multiplier:14 */ +}; + +struct ib_cc_table_entry_shadow { + u16 entry; /* shift:2, multiplier:14 */ +}; + +struct ib_cc_table_attr { + __be16 ccti_limit; /* max CCTI for cc table */ + struct ib_cc_table_entry ccti_entries[IB_CCT_ENTRIES]; +} __packed; + +struct ib_cc_table_attr_shadow { + u16 ccti_limit; /* max CCTI for cc table */ + struct ib_cc_table_entry_shadow ccti_entries[IB_CCT_ENTRIES]; +} __packed; + +#define CC_TABLE_SHADOW_MAX \ + (IB_CC_TABLE_CAP_DEFAULT * IB_CCT_ENTRIES) + +struct cc_table_shadow { + u16 ccti_last_entry; + struct ib_cc_table_entry_shadow entries[CC_TABLE_SHADOW_MAX]; +} __packed; + +/* + * The PortSamplesControl.CounterMasks field is an array of 3 bit fields + * which specify the N'th counter's capabilities. See ch. 16.1.3.2. + * We support 5 counters which only count the mandatory quantities. + */ +#define COUNTER_MASK(q, n) (q << ((9 - n) * 3)) +#define COUNTER_MASK0_9 \ + cpu_to_be32(COUNTER_MASK(1, 0) | \ + COUNTER_MASK(1, 1) | \ + COUNTER_MASK(1, 2) | \ + COUNTER_MASK(1, 3) | \ + COUNTER_MASK(1, 4)) + +#endif /* _QIB_MAD_H */ diff --git a/drivers/infiniband/hw/qib/qib_mmap.c b/drivers/infiniband/hw/qib/qib_mmap.c new file mode 100644 index 00000000000..8b73a11d571 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_mmap.c @@ -0,0 +1,174 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/mm.h> +#include <linux/errno.h> +#include <asm/pgtable.h> + +#include "qib_verbs.h" + +/** + * qib_release_mmap_info - free mmap info structure + * @ref: a pointer to the kref within struct qib_mmap_info + */ +void qib_release_mmap_info(struct kref *ref) +{ + struct qib_mmap_info *ip = + container_of(ref, struct qib_mmap_info, ref); + struct qib_ibdev *dev = to_idev(ip->context->device); + + spin_lock_irq(&dev->pending_lock); + list_del(&ip->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); + + vfree(ip->obj); + kfree(ip); +} + +/* + * open and close keep track of how many times the CQ is mapped, + * to avoid releasing it. + */ +static void qib_vma_open(struct vm_area_struct *vma) +{ + struct qib_mmap_info *ip = vma->vm_private_data; + + kref_get(&ip->ref); +} + +static void qib_vma_close(struct vm_area_struct *vma) +{ + struct qib_mmap_info *ip = vma->vm_private_data; + + kref_put(&ip->ref, qib_release_mmap_info); +} + +static struct vm_operations_struct qib_vm_ops = { + .open = qib_vma_open, + .close = qib_vma_close, +}; + +/** + * qib_mmap - create a new mmap region + * @context: the IB user context of the process making the mmap() call + * @vma: the VMA to be initialized + * Return zero if the mmap is OK. Otherwise, return an errno. + */ +int qib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +{ + struct qib_ibdev *dev = to_idev(context->device); + unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; + unsigned long size = vma->vm_end - vma->vm_start; + struct qib_mmap_info *ip, *pp; + int ret = -EINVAL; + + /* + * Search the device's list of objects waiting for a mmap call. + * Normally, this list is very short since a call to create a + * CQ, QP, or SRQ is soon followed by a call to mmap(). + */ + spin_lock_irq(&dev->pending_lock); + list_for_each_entry_safe(ip, pp, &dev->pending_mmaps, + pending_mmaps) { + /* Only the creator is allowed to mmap the object */ + if (context != ip->context || (__u64) offset != ip->offset) + continue; + /* Don't allow a mmap larger than the object. */ + if (size > ip->size) + break; + + list_del_init(&ip->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); + + ret = remap_vmalloc_range(vma, ip->obj, 0); + if (ret) + goto done; + vma->vm_ops = &qib_vm_ops; + vma->vm_private_data = ip; + qib_vma_open(vma); + goto done; + } + spin_unlock_irq(&dev->pending_lock); +done: + return ret; +} + +/* + * Allocate information for qib_mmap + */ +struct qib_mmap_info *qib_create_mmap_info(struct qib_ibdev *dev, + u32 size, + struct ib_ucontext *context, + void *obj) { + struct qib_mmap_info *ip; + + ip = kmalloc(sizeof *ip, GFP_KERNEL); + if (!ip) + goto bail; + + size = PAGE_ALIGN(size); + + spin_lock_irq(&dev->mmap_offset_lock); + if (dev->mmap_offset == 0) + dev->mmap_offset = PAGE_SIZE; + ip->offset = dev->mmap_offset; + dev->mmap_offset += size; + spin_unlock_irq(&dev->mmap_offset_lock); + + INIT_LIST_HEAD(&ip->pending_mmaps); + ip->size = size; + ip->context = context; + ip->obj = obj; + kref_init(&ip->ref); + +bail: + return ip; +} + +void qib_update_mmap_info(struct qib_ibdev *dev, struct qib_mmap_info *ip, + u32 size, void *obj) +{ + size = PAGE_ALIGN(size); + + spin_lock_irq(&dev->mmap_offset_lock); + if (dev->mmap_offset == 0) + dev->mmap_offset = PAGE_SIZE; + ip->offset = dev->mmap_offset; + dev->mmap_offset += size; + spin_unlock_irq(&dev->mmap_offset_lock); + + ip->size = size; + ip->obj = obj; +} diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c new file mode 100644 index 00000000000..9bbb55347cc --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_mr.c @@ -0,0 +1,532 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <rdma/ib_umem.h> +#include <rdma/ib_smi.h> + +#include "qib.h" + +/* Fast memory region */ +struct qib_fmr { + struct ib_fmr ibfmr; + struct qib_mregion mr; /* must be last */ +}; + +static inline struct qib_fmr *to_ifmr(struct ib_fmr *ibfmr) +{ + return container_of(ibfmr, struct qib_fmr, ibfmr); +} + +static int init_qib_mregion(struct qib_mregion *mr, struct ib_pd *pd, + int count) +{ + int m, i = 0; + int rval = 0; + + m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ; + for (; i < m; i++) { + mr->map[i] = kzalloc(sizeof *mr->map[0], GFP_KERNEL); + if (!mr->map[i]) + goto bail; + } + mr->mapsz = m; + init_completion(&mr->comp); + /* count returning the ptr to user */ + atomic_set(&mr->refcount, 1); + mr->pd = pd; + mr->max_segs = count; +out: + return rval; +bail: + while (i) + kfree(mr->map[--i]); + rval = -ENOMEM; + goto out; +} + +static void deinit_qib_mregion(struct qib_mregion *mr) +{ + int i = mr->mapsz; + + mr->mapsz = 0; + while (i) + kfree(mr->map[--i]); +} + + +/** + * qib_get_dma_mr - get a DMA memory region + * @pd: protection domain for this memory region + * @acc: access flags + * + * Returns the memory region on success, otherwise returns an errno. + * Note that all DMA addresses should be created via the + * struct ib_dma_mapping_ops functions (see qib_dma.c). + */ +struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc) +{ + struct qib_mr *mr = NULL; + struct ib_mr *ret; + int rval; + + if (to_ipd(pd)->user) { + ret = ERR_PTR(-EPERM); + goto bail; + } + + mr = kzalloc(sizeof *mr, GFP_KERNEL); + if (!mr) { + ret = ERR_PTR(-ENOMEM); + goto bail; + } + + rval = init_qib_mregion(&mr->mr, pd, 0); + if (rval) { + ret = ERR_PTR(rval); + goto bail; + } + + + rval = qib_alloc_lkey(&mr->mr, 1); + if (rval) { + ret = ERR_PTR(rval); + goto bail_mregion; + } + + mr->mr.access_flags = acc; + ret = &mr->ibmr; +done: + return ret; + +bail_mregion: + deinit_qib_mregion(&mr->mr); +bail: + kfree(mr); + goto done; +} + +static struct qib_mr *alloc_mr(int count, struct ib_pd *pd) +{ + struct qib_mr *mr; + int rval = -ENOMEM; + int m; + + /* Allocate struct plus pointers to first level page tables. */ + m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ; + mr = kzalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL); + if (!mr) + goto bail; + + rval = init_qib_mregion(&mr->mr, pd, count); + if (rval) + goto bail; + /* + * ib_reg_phys_mr() will initialize mr->ibmr except for + * lkey and rkey. + */ + rval = qib_alloc_lkey(&mr->mr, 0); + if (rval) + goto bail_mregion; + mr->ibmr.lkey = mr->mr.lkey; + mr->ibmr.rkey = mr->mr.lkey; +done: + return mr; + +bail_mregion: + deinit_qib_mregion(&mr->mr); +bail: + kfree(mr); + mr = ERR_PTR(rval); + goto done; +} + +/** + * qib_reg_phys_mr - register a physical memory region + * @pd: protection domain for this memory region + * @buffer_list: pointer to the list of physical buffers to register + * @num_phys_buf: the number of physical buffers to register + * @iova_start: the starting address passed over IB which maps to this MR + * + * Returns the memory region on success, otherwise returns an errno. + */ +struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd, + struct ib_phys_buf *buffer_list, + int num_phys_buf, int acc, u64 *iova_start) +{ + struct qib_mr *mr; + int n, m, i; + struct ib_mr *ret; + + mr = alloc_mr(num_phys_buf, pd); + if (IS_ERR(mr)) { + ret = (struct ib_mr *)mr; + goto bail; + } + + mr->mr.user_base = *iova_start; + mr->mr.iova = *iova_start; + mr->mr.access_flags = acc; + + m = 0; + n = 0; + for (i = 0; i < num_phys_buf; i++) { + mr->mr.map[m]->segs[n].vaddr = (void *) buffer_list[i].addr; + mr->mr.map[m]->segs[n].length = buffer_list[i].size; + mr->mr.length += buffer_list[i].size; + n++; + if (n == QIB_SEGSZ) { + m++; + n = 0; + } + } + + ret = &mr->ibmr; + +bail: + return ret; +} + +/** + * qib_reg_user_mr - register a userspace memory region + * @pd: protection domain for this memory region + * @start: starting userspace address + * @length: length of region to register + * @mr_access_flags: access flags for this memory region + * @udata: unused by the QLogic_IB driver + * + * Returns the memory region on success, otherwise returns an errno. + */ +struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int mr_access_flags, + struct ib_udata *udata) +{ + struct qib_mr *mr; + struct ib_umem *umem; + struct scatterlist *sg; + int n, m, entry; + struct ib_mr *ret; + + if (length == 0) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + + umem = ib_umem_get(pd->uobject->context, start, length, + mr_access_flags, 0); + if (IS_ERR(umem)) + return (void *) umem; + + n = umem->nmap; + + mr = alloc_mr(n, pd); + if (IS_ERR(mr)) { + ret = (struct ib_mr *)mr; + ib_umem_release(umem); + goto bail; + } + + mr->mr.user_base = start; + mr->mr.iova = virt_addr; + mr->mr.length = length; + mr->mr.offset = umem->offset; + mr->mr.access_flags = mr_access_flags; + mr->umem = umem; + + if (is_power_of_2(umem->page_size)) + mr->mr.page_shift = ilog2(umem->page_size); + m = 0; + n = 0; + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + void *vaddr; + + vaddr = page_address(sg_page(sg)); + if (!vaddr) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + mr->mr.map[m]->segs[n].vaddr = vaddr; + mr->mr.map[m]->segs[n].length = umem->page_size; + n++; + if (n == QIB_SEGSZ) { + m++; + n = 0; + } + } + ret = &mr->ibmr; + +bail: + return ret; +} + +/** + * qib_dereg_mr - unregister and free a memory region + * @ibmr: the memory region to free + * + * Returns 0 on success. + * + * Note that this is called to free MRs created by qib_get_dma_mr() + * or qib_reg_user_mr(). + */ +int qib_dereg_mr(struct ib_mr *ibmr) +{ + struct qib_mr *mr = to_imr(ibmr); + int ret = 0; + unsigned long timeout; + + qib_free_lkey(&mr->mr); + + qib_put_mr(&mr->mr); /* will set completion if last */ + timeout = wait_for_completion_timeout(&mr->mr.comp, + 5 * HZ); + if (!timeout) { + qib_get_mr(&mr->mr); + ret = -EBUSY; + goto out; + } + deinit_qib_mregion(&mr->mr); + if (mr->umem) + ib_umem_release(mr->umem); + kfree(mr); +out: + return ret; +} + +/* + * Allocate a memory region usable with the + * IB_WR_FAST_REG_MR send work request. + * + * Return the memory region on success, otherwise return an errno. + */ +struct ib_mr *qib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len) +{ + struct qib_mr *mr; + + mr = alloc_mr(max_page_list_len, pd); + if (IS_ERR(mr)) + return (struct ib_mr *)mr; + + return &mr->ibmr; +} + +struct ib_fast_reg_page_list * +qib_alloc_fast_reg_page_list(struct ib_device *ibdev, int page_list_len) +{ + unsigned size = page_list_len * sizeof(u64); + struct ib_fast_reg_page_list *pl; + + if (size > PAGE_SIZE) + return ERR_PTR(-EINVAL); + + pl = kzalloc(sizeof *pl, GFP_KERNEL); + if (!pl) + return ERR_PTR(-ENOMEM); + + pl->page_list = kzalloc(size, GFP_KERNEL); + if (!pl->page_list) + goto err_free; + + return pl; + +err_free: + kfree(pl); + return ERR_PTR(-ENOMEM); +} + +void qib_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl) +{ + kfree(pl->page_list); + kfree(pl); +} + +/** + * qib_alloc_fmr - allocate a fast memory region + * @pd: the protection domain for this memory region + * @mr_access_flags: access flags for this memory region + * @fmr_attr: fast memory region attributes + * + * Returns the memory region on success, otherwise returns an errno. + */ +struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags, + struct ib_fmr_attr *fmr_attr) +{ + struct qib_fmr *fmr; + int m; + struct ib_fmr *ret; + int rval = -ENOMEM; + + /* Allocate struct plus pointers to first level page tables. */ + m = (fmr_attr->max_pages + QIB_SEGSZ - 1) / QIB_SEGSZ; + fmr = kzalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL); + if (!fmr) + goto bail; + + rval = init_qib_mregion(&fmr->mr, pd, fmr_attr->max_pages); + if (rval) + goto bail; + + /* + * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey & + * rkey. + */ + rval = qib_alloc_lkey(&fmr->mr, 0); + if (rval) + goto bail_mregion; + fmr->ibfmr.rkey = fmr->mr.lkey; + fmr->ibfmr.lkey = fmr->mr.lkey; + /* + * Resources are allocated but no valid mapping (RKEY can't be + * used). + */ + fmr->mr.access_flags = mr_access_flags; + fmr->mr.max_segs = fmr_attr->max_pages; + fmr->mr.page_shift = fmr_attr->page_shift; + + ret = &fmr->ibfmr; +done: + return ret; + +bail_mregion: + deinit_qib_mregion(&fmr->mr); +bail: + kfree(fmr); + ret = ERR_PTR(rval); + goto done; +} + +/** + * qib_map_phys_fmr - set up a fast memory region + * @ibmfr: the fast memory region to set up + * @page_list: the list of pages to associate with the fast memory region + * @list_len: the number of pages to associate with the fast memory region + * @iova: the virtual address of the start of the fast memory region + * + * This may be called from interrupt context. + */ + +int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, + int list_len, u64 iova) +{ + struct qib_fmr *fmr = to_ifmr(ibfmr); + struct qib_lkey_table *rkt; + unsigned long flags; + int m, n, i; + u32 ps; + int ret; + + i = atomic_read(&fmr->mr.refcount); + if (i > 2) + return -EBUSY; + + if (list_len > fmr->mr.max_segs) { + ret = -EINVAL; + goto bail; + } + rkt = &to_idev(ibfmr->device)->lk_table; + spin_lock_irqsave(&rkt->lock, flags); + fmr->mr.user_base = iova; + fmr->mr.iova = iova; + ps = 1 << fmr->mr.page_shift; + fmr->mr.length = list_len * ps; + m = 0; + n = 0; + for (i = 0; i < list_len; i++) { + fmr->mr.map[m]->segs[n].vaddr = (void *) page_list[i]; + fmr->mr.map[m]->segs[n].length = ps; + if (++n == QIB_SEGSZ) { + m++; + n = 0; + } + } + spin_unlock_irqrestore(&rkt->lock, flags); + ret = 0; + +bail: + return ret; +} + +/** + * qib_unmap_fmr - unmap fast memory regions + * @fmr_list: the list of fast memory regions to unmap + * + * Returns 0 on success. + */ +int qib_unmap_fmr(struct list_head *fmr_list) +{ + struct qib_fmr *fmr; + struct qib_lkey_table *rkt; + unsigned long flags; + + list_for_each_entry(fmr, fmr_list, ibfmr.list) { + rkt = &to_idev(fmr->ibfmr.device)->lk_table; + spin_lock_irqsave(&rkt->lock, flags); + fmr->mr.user_base = 0; + fmr->mr.iova = 0; + fmr->mr.length = 0; + spin_unlock_irqrestore(&rkt->lock, flags); + } + return 0; +} + +/** + * qib_dealloc_fmr - deallocate a fast memory region + * @ibfmr: the fast memory region to deallocate + * + * Returns 0 on success. + */ +int qib_dealloc_fmr(struct ib_fmr *ibfmr) +{ + struct qib_fmr *fmr = to_ifmr(ibfmr); + int ret = 0; + unsigned long timeout; + + qib_free_lkey(&fmr->mr); + qib_put_mr(&fmr->mr); /* will set completion if last */ + timeout = wait_for_completion_timeout(&fmr->mr.comp, + 5 * HZ); + if (!timeout) { + qib_get_mr(&fmr->mr); + ret = -EBUSY; + goto out; + } + deinit_qib_mregion(&fmr->mr); + kfree(fmr); +out: + return ret; +} + +void mr_rcu_callback(struct rcu_head *list) +{ + struct qib_mregion *mr = container_of(list, struct qib_mregion, list); + + complete(&mr->comp); +} diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c new file mode 100644 index 00000000000..61a0046efb7 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_pcie.c @@ -0,0 +1,715 @@ +/* + * Copyright (c) 2008, 2009 QLogic Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/pci.h> +#include <linux/io.h> +#include <linux/delay.h> +#include <linux/vmalloc.h> +#include <linux/aer.h> +#include <linux/module.h> + +#include "qib.h" + +/* + * This file contains PCIe utility routines that are common to the + * various QLogic InfiniPath adapters + */ + +/* + * Code to adjust PCIe capabilities. + * To minimize the change footprint, we call it + * from qib_pcie_params, which every chip-specific + * file calls, even though this violates some + * expectations of harmlessness. + */ +static void qib_tune_pcie_caps(struct qib_devdata *); +static void qib_tune_pcie_coalesce(struct qib_devdata *); + +/* + * Do all the common PCIe setup and initialization. + * devdata is not yet allocated, and is not allocated until after this + * routine returns success. Therefore qib_dev_err() can't be used for error + * printing. + */ +int qib_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + int ret; + + ret = pci_enable_device(pdev); + if (ret) { + /* + * This can happen (in theory) iff: + * We did a chip reset, and then failed to reprogram the + * BAR, or the chip reset due to an internal error. We then + * unloaded the driver and reloaded it. + * + * Both reset cases set the BAR back to initial state. For + * the latter case, the AER sticky error bit at offset 0x718 + * should be set, but the Linux kernel doesn't yet know + * about that, it appears. If the original BAR was retained + * in the kernel data structures, this may be OK. + */ + qib_early_err(&pdev->dev, "pci enable failed: error %d\n", + -ret); + goto done; + } + + ret = pci_request_regions(pdev, QIB_DRV_NAME); + if (ret) { + qib_devinfo(pdev, "pci_request_regions fails: err %d\n", -ret); + goto bail; + } + + ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + if (ret) { + /* + * If the 64 bit setup fails, try 32 bit. Some systems + * do not setup 64 bit maps on systems with 2GB or less + * memory installed. + */ + ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (ret) { + qib_devinfo(pdev, "Unable to set DMA mask: %d\n", ret); + goto bail; + } + ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); + } else + ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + if (ret) { + qib_early_err(&pdev->dev, + "Unable to set DMA consistent mask: %d\n", ret); + goto bail; + } + + pci_set_master(pdev); + ret = pci_enable_pcie_error_reporting(pdev); + if (ret) { + qib_early_err(&pdev->dev, + "Unable to enable pcie error reporting: %d\n", + ret); + ret = 0; + } + goto done; + +bail: + pci_disable_device(pdev); + pci_release_regions(pdev); +done: + return ret; +} + +/* + * Do remaining PCIe setup, once dd is allocated, and save away + * fields required to re-initialize after a chip reset, or for + * various other purposes + */ +int qib_pcie_ddinit(struct qib_devdata *dd, struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + unsigned long len; + resource_size_t addr; + + dd->pcidev = pdev; + pci_set_drvdata(pdev, dd); + + addr = pci_resource_start(pdev, 0); + len = pci_resource_len(pdev, 0); + +#if defined(__powerpc__) + /* There isn't a generic way to specify writethrough mappings */ + dd->kregbase = __ioremap(addr, len, _PAGE_NO_CACHE | _PAGE_WRITETHRU); +#else + dd->kregbase = ioremap_nocache(addr, len); +#endif + + if (!dd->kregbase) + return -ENOMEM; + + dd->kregend = (u64 __iomem *)((void __iomem *) dd->kregbase + len); + dd->physaddr = addr; /* used for io_remap, etc. */ + + /* + * Save BARs to rewrite after device reset. Save all 64 bits of + * BAR, just in case. + */ + dd->pcibar0 = addr; + dd->pcibar1 = addr >> 32; + dd->deviceid = ent->device; /* save for later use */ + dd->vendorid = ent->vendor; + + return 0; +} + +/* + * Do PCIe cleanup, after chip-specific cleanup, etc. Just prior + * to releasing the dd memory. + * void because none of the core pcie cleanup returns are void + */ +void qib_pcie_ddcleanup(struct qib_devdata *dd) +{ + u64 __iomem *base = (void __iomem *) dd->kregbase; + + dd->kregbase = NULL; + iounmap(base); + if (dd->piobase) + iounmap(dd->piobase); + if (dd->userbase) + iounmap(dd->userbase); + if (dd->piovl15base) + iounmap(dd->piovl15base); + + pci_disable_device(dd->pcidev); + pci_release_regions(dd->pcidev); + + pci_set_drvdata(dd->pcidev, NULL); +} + +static void qib_msix_setup(struct qib_devdata *dd, int pos, u32 *msixcnt, + struct qib_msix_entry *qib_msix_entry) +{ + int ret; + int nvec = *msixcnt; + struct msix_entry *msix_entry; + int i; + + ret = pci_msix_vec_count(dd->pcidev); + if (ret < 0) + goto do_intx; + + nvec = min(nvec, ret); + + /* We can't pass qib_msix_entry array to qib_msix_setup + * so use a dummy msix_entry array and copy the allocated + * irq back to the qib_msix_entry array. */ + msix_entry = kmalloc(nvec * sizeof(*msix_entry), GFP_KERNEL); + if (!msix_entry) + goto do_intx; + + for (i = 0; i < nvec; i++) + msix_entry[i] = qib_msix_entry[i].msix; + + ret = pci_enable_msix_range(dd->pcidev, msix_entry, 1, nvec); + if (ret < 0) + goto free_msix_entry; + else + nvec = ret; + + for (i = 0; i < nvec; i++) + qib_msix_entry[i].msix = msix_entry[i]; + + kfree(msix_entry); + *msixcnt = nvec; + return; + +free_msix_entry: + kfree(msix_entry); + +do_intx: + qib_dev_err(dd, "pci_enable_msix_range %d vectors failed: %d, " + "falling back to INTx\n", nvec, ret); + *msixcnt = 0; + qib_enable_intx(dd->pcidev); +} + +/** + * We save the msi lo and hi values, so we can restore them after + * chip reset (the kernel PCI infrastructure doesn't yet handle that + * correctly. + */ +static int qib_msi_setup(struct qib_devdata *dd, int pos) +{ + struct pci_dev *pdev = dd->pcidev; + u16 control; + int ret; + + ret = pci_enable_msi(pdev); + if (ret) + qib_dev_err(dd, + "pci_enable_msi failed: %d, interrupts may not work\n", + ret); + /* continue even if it fails, we may still be OK... */ + + pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_LO, + &dd->msi_lo); + pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_HI, + &dd->msi_hi); + pci_read_config_word(pdev, pos + PCI_MSI_FLAGS, &control); + /* now save the data (vector) info */ + pci_read_config_word(pdev, pos + ((control & PCI_MSI_FLAGS_64BIT) + ? 12 : 8), + &dd->msi_data); + return ret; +} + +int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent, + struct qib_msix_entry *entry) +{ + u16 linkstat, speed; + int pos = 0, ret = 1; + + if (!pci_is_pcie(dd->pcidev)) { + qib_dev_err(dd, "Can't find PCI Express capability!\n"); + /* set up something... */ + dd->lbus_width = 1; + dd->lbus_speed = 2500; /* Gen1, 2.5GHz */ + goto bail; + } + + pos = dd->pcidev->msix_cap; + if (nent && *nent && pos) { + qib_msix_setup(dd, pos, nent, entry); + ret = 0; /* did it, either MSIx or INTx */ + } else { + pos = dd->pcidev->msi_cap; + if (pos) + ret = qib_msi_setup(dd, pos); + else + qib_dev_err(dd, "No PCI MSI or MSIx capability!\n"); + } + if (!pos) + qib_enable_intx(dd->pcidev); + + pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKSTA, &linkstat); + /* + * speed is bits 0-3, linkwidth is bits 4-8 + * no defines for them in headers + */ + speed = linkstat & 0xf; + linkstat >>= 4; + linkstat &= 0x1f; + dd->lbus_width = linkstat; + + switch (speed) { + case 1: + dd->lbus_speed = 2500; /* Gen1, 2.5GHz */ + break; + case 2: + dd->lbus_speed = 5000; /* Gen1, 5GHz */ + break; + default: /* not defined, assume gen1 */ + dd->lbus_speed = 2500; + break; + } + + /* + * Check against expected pcie width and complain if "wrong" + * on first initialization, not afterwards (i.e., reset). + */ + if (minw && linkstat < minw) + qib_dev_err(dd, + "PCIe width %u (x%u HCA), performance reduced\n", + linkstat, minw); + + qib_tune_pcie_caps(dd); + + qib_tune_pcie_coalesce(dd); + +bail: + /* fill in string, even on errors */ + snprintf(dd->lbus_info, sizeof(dd->lbus_info), + "PCIe,%uMHz,x%u\n", dd->lbus_speed, dd->lbus_width); + return ret; +} + +/* + * Setup pcie interrupt stuff again after a reset. I'd like to just call + * pci_enable_msi() again for msi, but when I do that, + * the MSI enable bit doesn't get set in the command word, and + * we switch to to a different interrupt vector, which is confusing, + * so I instead just do it all inline. Perhaps somehow can tie this + * into the PCIe hotplug support at some point + */ +int qib_reinit_intr(struct qib_devdata *dd) +{ + int pos; + u16 control; + int ret = 0; + + /* If we aren't using MSI, don't restore it */ + if (!dd->msi_lo) + goto bail; + + pos = dd->pcidev->msi_cap; + if (!pos) { + qib_dev_err(dd, + "Can't find MSI capability, can't restore MSI settings\n"); + ret = 0; + /* nothing special for MSIx, just MSI */ + goto bail; + } + pci_write_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_LO, + dd->msi_lo); + pci_write_config_dword(dd->pcidev, pos + PCI_MSI_ADDRESS_HI, + dd->msi_hi); + pci_read_config_word(dd->pcidev, pos + PCI_MSI_FLAGS, &control); + if (!(control & PCI_MSI_FLAGS_ENABLE)) { + control |= PCI_MSI_FLAGS_ENABLE; + pci_write_config_word(dd->pcidev, pos + PCI_MSI_FLAGS, + control); + } + /* now rewrite the data (vector) info */ + pci_write_config_word(dd->pcidev, pos + + ((control & PCI_MSI_FLAGS_64BIT) ? 12 : 8), + dd->msi_data); + ret = 1; +bail: + if (!ret && (dd->flags & QIB_HAS_INTX)) { + qib_enable_intx(dd->pcidev); + ret = 1; + } + + /* and now set the pci master bit again */ + pci_set_master(dd->pcidev); + + return ret; +} + +/* + * Disable msi interrupt if enabled, and clear msi_lo. + * This is used primarily for the fallback to INTx, but + * is also used in reinit after reset, and during cleanup. + */ +void qib_nomsi(struct qib_devdata *dd) +{ + dd->msi_lo = 0; + pci_disable_msi(dd->pcidev); +} + +/* + * Same as qib_nosmi, but for MSIx. + */ +void qib_nomsix(struct qib_devdata *dd) +{ + pci_disable_msix(dd->pcidev); +} + +/* + * Similar to pci_intx(pdev, 1), except that we make sure + * msi(x) is off. + */ +void qib_enable_intx(struct pci_dev *pdev) +{ + u16 cw, new; + int pos; + + /* first, turn on INTx */ + pci_read_config_word(pdev, PCI_COMMAND, &cw); + new = cw & ~PCI_COMMAND_INTX_DISABLE; + if (new != cw) + pci_write_config_word(pdev, PCI_COMMAND, new); + + pos = pdev->msi_cap; + if (pos) { + /* then turn off MSI */ + pci_read_config_word(pdev, pos + PCI_MSI_FLAGS, &cw); + new = cw & ~PCI_MSI_FLAGS_ENABLE; + if (new != cw) + pci_write_config_word(pdev, pos + PCI_MSI_FLAGS, new); + } + pos = pdev->msix_cap; + if (pos) { + /* then turn off MSIx */ + pci_read_config_word(pdev, pos + PCI_MSIX_FLAGS, &cw); + new = cw & ~PCI_MSIX_FLAGS_ENABLE; + if (new != cw) + pci_write_config_word(pdev, pos + PCI_MSIX_FLAGS, new); + } +} + +/* + * These two routines are helper routines for the device reset code + * to move all the pcie code out of the chip-specific driver code. + */ +void qib_pcie_getcmd(struct qib_devdata *dd, u16 *cmd, u8 *iline, u8 *cline) +{ + pci_read_config_word(dd->pcidev, PCI_COMMAND, cmd); + pci_read_config_byte(dd->pcidev, PCI_INTERRUPT_LINE, iline); + pci_read_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE, cline); +} + +void qib_pcie_reenable(struct qib_devdata *dd, u16 cmd, u8 iline, u8 cline) +{ + int r; + r = pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, + dd->pcibar0); + if (r) + qib_dev_err(dd, "rewrite of BAR0 failed: %d\n", r); + r = pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, + dd->pcibar1); + if (r) + qib_dev_err(dd, "rewrite of BAR1 failed: %d\n", r); + /* now re-enable memory access, and restore cosmetic settings */ + pci_write_config_word(dd->pcidev, PCI_COMMAND, cmd); + pci_write_config_byte(dd->pcidev, PCI_INTERRUPT_LINE, iline); + pci_write_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE, cline); + r = pci_enable_device(dd->pcidev); + if (r) + qib_dev_err(dd, + "pci_enable_device failed after reset: %d\n", r); +} + + +static int qib_pcie_coalesce; +module_param_named(pcie_coalesce, qib_pcie_coalesce, int, S_IRUGO); +MODULE_PARM_DESC(pcie_coalesce, "tune PCIe colescing on some Intel chipsets"); + +/* + * Enable PCIe completion and data coalescing, on Intel 5x00 and 7300 + * chipsets. This is known to be unsafe for some revisions of some + * of these chipsets, with some BIOS settings, and enabling it on those + * systems may result in the system crashing, and/or data corruption. + */ +static void qib_tune_pcie_coalesce(struct qib_devdata *dd) +{ + int r; + struct pci_dev *parent; + u16 devid; + u32 mask, bits, val; + + if (!qib_pcie_coalesce) + return; + + /* Find out supported and configured values for parent (root) */ + parent = dd->pcidev->bus->self; + if (parent->bus->parent) { + qib_devinfo(dd->pcidev, "Parent not root\n"); + return; + } + if (!pci_is_pcie(parent)) + return; + if (parent->vendor != 0x8086) + return; + + /* + * - bit 12: Max_rdcmp_Imt_EN: need to set to 1 + * - bit 11: COALESCE_FORCE: need to set to 0 + * - bit 10: COALESCE_EN: need to set to 1 + * (but limitations on some on some chipsets) + * + * On the Intel 5000, 5100, and 7300 chipsets, there is + * also: - bit 25:24: COALESCE_MODE, need to set to 0 + */ + devid = parent->device; + if (devid >= 0x25e2 && devid <= 0x25fa) { + /* 5000 P/V/X/Z */ + if (parent->revision <= 0xb2) + bits = 1U << 10; + else + bits = 7U << 10; + mask = (3U << 24) | (7U << 10); + } else if (devid >= 0x65e2 && devid <= 0x65fa) { + /* 5100 */ + bits = 1U << 10; + mask = (3U << 24) | (7U << 10); + } else if (devid >= 0x4021 && devid <= 0x402e) { + /* 5400 */ + bits = 7U << 10; + mask = 7U << 10; + } else if (devid >= 0x3604 && devid <= 0x360a) { + /* 7300 */ + bits = 7U << 10; + mask = (3U << 24) | (7U << 10); + } else { + /* not one of the chipsets that we know about */ + return; + } + pci_read_config_dword(parent, 0x48, &val); + val &= ~mask; + val |= bits; + r = pci_write_config_dword(parent, 0x48, val); +} + +/* + * BIOS may not set PCIe bus-utilization parameters for best performance. + * Check and optionally adjust them to maximize our throughput. + */ +static int qib_pcie_caps; +module_param_named(pcie_caps, qib_pcie_caps, int, S_IRUGO); +MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)"); + +static void qib_tune_pcie_caps(struct qib_devdata *dd) +{ + struct pci_dev *parent; + u16 rc_mpss, rc_mps, ep_mpss, ep_mps; + u16 rc_mrrs, ep_mrrs, max_mrrs; + + /* Find out supported and configured values for parent (root) */ + parent = dd->pcidev->bus->self; + if (!pci_is_root_bus(parent->bus)) { + qib_devinfo(dd->pcidev, "Parent not root\n"); + return; + } + + if (!pci_is_pcie(parent) || !pci_is_pcie(dd->pcidev)) + return; + + rc_mpss = parent->pcie_mpss; + rc_mps = ffs(pcie_get_mps(parent)) - 8; + /* Find out supported and configured values for endpoint (us) */ + ep_mpss = dd->pcidev->pcie_mpss; + ep_mps = ffs(pcie_get_mps(dd->pcidev)) - 8; + + /* Find max payload supported by root, endpoint */ + if (rc_mpss > ep_mpss) + rc_mpss = ep_mpss; + + /* If Supported greater than limit in module param, limit it */ + if (rc_mpss > (qib_pcie_caps & 7)) + rc_mpss = qib_pcie_caps & 7; + /* If less than (allowed, supported), bump root payload */ + if (rc_mpss > rc_mps) { + rc_mps = rc_mpss; + pcie_set_mps(parent, 128 << rc_mps); + } + /* If less than (allowed, supported), bump endpoint payload */ + if (rc_mpss > ep_mps) { + ep_mps = rc_mpss; + pcie_set_mps(dd->pcidev, 128 << ep_mps); + } + + /* + * Now the Read Request size. + * No field for max supported, but PCIe spec limits it to 4096, + * which is code '5' (log2(4096) - 7) + */ + max_mrrs = 5; + if (max_mrrs > ((qib_pcie_caps >> 4) & 7)) + max_mrrs = (qib_pcie_caps >> 4) & 7; + + max_mrrs = 128 << max_mrrs; + rc_mrrs = pcie_get_readrq(parent); + ep_mrrs = pcie_get_readrq(dd->pcidev); + + if (max_mrrs > rc_mrrs) { + rc_mrrs = max_mrrs; + pcie_set_readrq(parent, rc_mrrs); + } + if (max_mrrs > ep_mrrs) { + ep_mrrs = max_mrrs; + pcie_set_readrq(dd->pcidev, ep_mrrs); + } +} +/* End of PCIe capability tuning */ + +/* + * From here through qib_pci_err_handler definition is invoked via + * PCI error infrastructure, registered via pci + */ +static pci_ers_result_t +qib_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state) +{ + struct qib_devdata *dd = pci_get_drvdata(pdev); + pci_ers_result_t ret = PCI_ERS_RESULT_RECOVERED; + + switch (state) { + case pci_channel_io_normal: + qib_devinfo(pdev, "State Normal, ignoring\n"); + break; + + case pci_channel_io_frozen: + qib_devinfo(pdev, "State Frozen, requesting reset\n"); + pci_disable_device(pdev); + ret = PCI_ERS_RESULT_NEED_RESET; + break; + + case pci_channel_io_perm_failure: + qib_devinfo(pdev, "State Permanent Failure, disabling\n"); + if (dd) { + /* no more register accesses! */ + dd->flags &= ~QIB_PRESENT; + qib_disable_after_error(dd); + } + /* else early, or other problem */ + ret = PCI_ERS_RESULT_DISCONNECT; + break; + + default: /* shouldn't happen */ + qib_devinfo(pdev, "QIB PCI errors detected (state %d)\n", + state); + break; + } + return ret; +} + +static pci_ers_result_t +qib_pci_mmio_enabled(struct pci_dev *pdev) +{ + u64 words = 0U; + struct qib_devdata *dd = pci_get_drvdata(pdev); + pci_ers_result_t ret = PCI_ERS_RESULT_RECOVERED; + + if (dd && dd->pport) { + words = dd->f_portcntr(dd->pport, QIBPORTCNTR_WORDRCV); + if (words == ~0ULL) + ret = PCI_ERS_RESULT_NEED_RESET; + } + qib_devinfo(pdev, + "QIB mmio_enabled function called, read wordscntr %Lx, returning %d\n", + words, ret); + return ret; +} + +static pci_ers_result_t +qib_pci_slot_reset(struct pci_dev *pdev) +{ + qib_devinfo(pdev, "QIB slot_reset function called, ignored\n"); + return PCI_ERS_RESULT_CAN_RECOVER; +} + +static pci_ers_result_t +qib_pci_link_reset(struct pci_dev *pdev) +{ + qib_devinfo(pdev, "QIB link_reset function called, ignored\n"); + return PCI_ERS_RESULT_CAN_RECOVER; +} + +static void +qib_pci_resume(struct pci_dev *pdev) +{ + struct qib_devdata *dd = pci_get_drvdata(pdev); + qib_devinfo(pdev, "QIB resume function called\n"); + pci_cleanup_aer_uncorrect_error_status(pdev); + /* + * Running jobs will fail, since it's asynchronous + * unlike sysfs-requested reset. Better than + * doing nothing. + */ + qib_init(dd, 1); /* same as re-init after reset */ +} + +const struct pci_error_handlers qib_pci_err_handler = { + .error_detected = qib_pci_error_detected, + .mmio_enabled = qib_pci_mmio_enabled, + .link_reset = qib_pci_link_reset, + .slot_reset = qib_pci_slot_reset, + .resume = qib_pci_resume, +}; diff --git a/drivers/infiniband/hw/qib/qib_pio_copy.c b/drivers/infiniband/hw/qib/qib_pio_copy.c new file mode 100644 index 00000000000..10b8c444dd3 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_pio_copy.c @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2009 QLogic Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "qib.h" + +/** + * qib_pio_copy - copy data to MMIO space, in multiples of 32-bits + * @to: destination, in MMIO space (must be 64-bit aligned) + * @from: source (must be 64-bit aligned) + * @count: number of 32-bit quantities to copy + * + * Copy data from kernel space to MMIO space, in multiples of 32 bits at a + * time. Order of access is not guaranteed, nor is a memory barrier + * performed afterwards. + */ +void qib_pio_copy(void __iomem *to, const void *from, size_t count) +{ +#ifdef CONFIG_64BIT + u64 __iomem *dst = to; + const u64 *src = from; + const u64 *end = src + (count >> 1); + + while (src < end) + __raw_writeq(*src++, dst++); + if (count & 1) + __raw_writel(*(const u32 *)src, dst); +#else + u32 __iomem *dst = to; + const u32 *src = from; + const u32 *end = src + count; + + while (src < end) + __raw_writel(*src++, dst++); +#endif +} diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c new file mode 100644 index 00000000000..7fcc150d603 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -0,0 +1,1384 @@ +/* + * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. * All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/err.h> +#include <linux/vmalloc.h> +#include <linux/jhash.h> +#ifdef CONFIG_DEBUG_FS +#include <linux/seq_file.h> +#endif + +#include "qib.h" + +#define BITS_PER_PAGE (PAGE_SIZE*BITS_PER_BYTE) +#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) + +static inline unsigned mk_qpn(struct qib_qpn_table *qpt, + struct qpn_map *map, unsigned off) +{ + return (map - qpt->map) * BITS_PER_PAGE + off; +} + +static inline unsigned find_next_offset(struct qib_qpn_table *qpt, + struct qpn_map *map, unsigned off, + unsigned n) +{ + if (qpt->mask) { + off++; + if (((off & qpt->mask) >> 1) >= n) + off = (off | qpt->mask) + 2; + } else + off = find_next_zero_bit(map->page, BITS_PER_PAGE, off); + return off; +} + +/* + * Convert the AETH credit code into the number of credits. + */ +static u32 credit_table[31] = { + 0, /* 0 */ + 1, /* 1 */ + 2, /* 2 */ + 3, /* 3 */ + 4, /* 4 */ + 6, /* 5 */ + 8, /* 6 */ + 12, /* 7 */ + 16, /* 8 */ + 24, /* 9 */ + 32, /* A */ + 48, /* B */ + 64, /* C */ + 96, /* D */ + 128, /* E */ + 192, /* F */ + 256, /* 10 */ + 384, /* 11 */ + 512, /* 12 */ + 768, /* 13 */ + 1024, /* 14 */ + 1536, /* 15 */ + 2048, /* 16 */ + 3072, /* 17 */ + 4096, /* 18 */ + 6144, /* 19 */ + 8192, /* 1A */ + 12288, /* 1B */ + 16384, /* 1C */ + 24576, /* 1D */ + 32768 /* 1E */ +}; + +static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map) +{ + unsigned long page = get_zeroed_page(GFP_KERNEL); + + /* + * Free the page if someone raced with us installing it. + */ + + spin_lock(&qpt->lock); + if (map->page) + free_page(page); + else + map->page = (void *)page; + spin_unlock(&qpt->lock); +} + +/* + * Allocate the next available QPN or + * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI. + */ +static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt, + enum ib_qp_type type, u8 port) +{ + u32 i, offset, max_scan, qpn; + struct qpn_map *map; + u32 ret; + + if (type == IB_QPT_SMI || type == IB_QPT_GSI) { + unsigned n; + + ret = type == IB_QPT_GSI; + n = 1 << (ret + 2 * (port - 1)); + spin_lock(&qpt->lock); + if (qpt->flags & n) + ret = -EINVAL; + else + qpt->flags |= n; + spin_unlock(&qpt->lock); + goto bail; + } + + qpn = qpt->last + 2; + if (qpn >= QPN_MAX) + qpn = 2; + if (qpt->mask && ((qpn & qpt->mask) >> 1) >= dd->n_krcv_queues) + qpn = (qpn | qpt->mask) + 2; + offset = qpn & BITS_PER_PAGE_MASK; + map = &qpt->map[qpn / BITS_PER_PAGE]; + max_scan = qpt->nmaps - !offset; + for (i = 0;;) { + if (unlikely(!map->page)) { + get_map_page(qpt, map); + if (unlikely(!map->page)) + break; + } + do { + if (!test_and_set_bit(offset, map->page)) { + qpt->last = qpn; + ret = qpn; + goto bail; + } + offset = find_next_offset(qpt, map, offset, + dd->n_krcv_queues); + qpn = mk_qpn(qpt, map, offset); + /* + * This test differs from alloc_pidmap(). + * If find_next_offset() does find a zero + * bit, we don't need to check for QPN + * wrapping around past our starting QPN. + * We just need to be sure we don't loop + * forever. + */ + } while (offset < BITS_PER_PAGE && qpn < QPN_MAX); + /* + * In order to keep the number of pages allocated to a + * minimum, we scan the all existing pages before increasing + * the size of the bitmap table. + */ + if (++i > max_scan) { + if (qpt->nmaps == QPNMAP_ENTRIES) + break; + map = &qpt->map[qpt->nmaps++]; + offset = 0; + } else if (map < &qpt->map[qpt->nmaps]) { + ++map; + offset = 0; + } else { + map = &qpt->map[0]; + offset = 2; + } + qpn = mk_qpn(qpt, map, offset); + } + + ret = -ENOMEM; + +bail: + return ret; +} + +static void free_qpn(struct qib_qpn_table *qpt, u32 qpn) +{ + struct qpn_map *map; + + map = qpt->map + qpn / BITS_PER_PAGE; + if (map->page) + clear_bit(qpn & BITS_PER_PAGE_MASK, map->page); +} + +static inline unsigned qpn_hash(struct qib_ibdev *dev, u32 qpn) +{ + return jhash_1word(qpn, dev->qp_rnd) & + (dev->qp_table_size - 1); +} + + +/* + * Put the QP into the hash table. + * The hash table holds a reference to the QP. + */ +static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp) +{ + struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); + unsigned long flags; + unsigned n = qpn_hash(dev, qp->ibqp.qp_num); + + atomic_inc(&qp->refcount); + spin_lock_irqsave(&dev->qpt_lock, flags); + + if (qp->ibqp.qp_num == 0) + rcu_assign_pointer(ibp->qp0, qp); + else if (qp->ibqp.qp_num == 1) + rcu_assign_pointer(ibp->qp1, qp); + else { + qp->next = dev->qp_table[n]; + rcu_assign_pointer(dev->qp_table[n], qp); + } + + spin_unlock_irqrestore(&dev->qpt_lock, flags); +} + +/* + * Remove the QP from the table so it can't be found asynchronously by + * the receive interrupt routine. + */ +static void remove_qp(struct qib_ibdev *dev, struct qib_qp *qp) +{ + struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); + unsigned n = qpn_hash(dev, qp->ibqp.qp_num); + unsigned long flags; + int removed = 1; + + spin_lock_irqsave(&dev->qpt_lock, flags); + + if (rcu_dereference_protected(ibp->qp0, + lockdep_is_held(&dev->qpt_lock)) == qp) { + rcu_assign_pointer(ibp->qp0, NULL); + } else if (rcu_dereference_protected(ibp->qp1, + lockdep_is_held(&dev->qpt_lock)) == qp) { + rcu_assign_pointer(ibp->qp1, NULL); + } else { + struct qib_qp *q; + struct qib_qp __rcu **qpp; + + removed = 0; + qpp = &dev->qp_table[n]; + for (; (q = rcu_dereference_protected(*qpp, + lockdep_is_held(&dev->qpt_lock))) != NULL; + qpp = &q->next) + if (q == qp) { + rcu_assign_pointer(*qpp, + rcu_dereference_protected(qp->next, + lockdep_is_held(&dev->qpt_lock))); + removed = 1; + break; + } + } + + spin_unlock_irqrestore(&dev->qpt_lock, flags); + if (removed) { + synchronize_rcu(); + atomic_dec(&qp->refcount); + } +} + +/** + * qib_free_all_qps - check for QPs still in use + * @qpt: the QP table to empty + * + * There should not be any QPs still in use. + * Free memory for table. + */ +unsigned qib_free_all_qps(struct qib_devdata *dd) +{ + struct qib_ibdev *dev = &dd->verbs_dev; + unsigned long flags; + struct qib_qp *qp; + unsigned n, qp_inuse = 0; + + for (n = 0; n < dd->num_pports; n++) { + struct qib_ibport *ibp = &dd->pport[n].ibport_data; + + if (!qib_mcast_tree_empty(ibp)) + qp_inuse++; + rcu_read_lock(); + if (rcu_dereference(ibp->qp0)) + qp_inuse++; + if (rcu_dereference(ibp->qp1)) + qp_inuse++; + rcu_read_unlock(); + } + + spin_lock_irqsave(&dev->qpt_lock, flags); + for (n = 0; n < dev->qp_table_size; n++) { + qp = rcu_dereference_protected(dev->qp_table[n], + lockdep_is_held(&dev->qpt_lock)); + rcu_assign_pointer(dev->qp_table[n], NULL); + + for (; qp; qp = rcu_dereference_protected(qp->next, + lockdep_is_held(&dev->qpt_lock))) + qp_inuse++; + } + spin_unlock_irqrestore(&dev->qpt_lock, flags); + synchronize_rcu(); + + return qp_inuse; +} + +/** + * qib_lookup_qpn - return the QP with the given QPN + * @qpt: the QP table + * @qpn: the QP number to look up + * + * The caller is responsible for decrementing the QP reference count + * when done. + */ +struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn) +{ + struct qib_qp *qp = NULL; + + rcu_read_lock(); + if (unlikely(qpn <= 1)) { + if (qpn == 0) + qp = rcu_dereference(ibp->qp0); + else + qp = rcu_dereference(ibp->qp1); + if (qp) + atomic_inc(&qp->refcount); + } else { + struct qib_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev; + unsigned n = qpn_hash(dev, qpn); + + for (qp = rcu_dereference(dev->qp_table[n]); qp; + qp = rcu_dereference(qp->next)) + if (qp->ibqp.qp_num == qpn) { + atomic_inc(&qp->refcount); + break; + } + } + rcu_read_unlock(); + return qp; +} + +/** + * qib_reset_qp - initialize the QP state to the reset state + * @qp: the QP to reset + * @type: the QP type + */ +static void qib_reset_qp(struct qib_qp *qp, enum ib_qp_type type) +{ + qp->remote_qpn = 0; + qp->qkey = 0; + qp->qp_access_flags = 0; + atomic_set(&qp->s_dma_busy, 0); + qp->s_flags &= QIB_S_SIGNAL_REQ_WR; + qp->s_hdrwords = 0; + qp->s_wqe = NULL; + qp->s_draining = 0; + qp->s_next_psn = 0; + qp->s_last_psn = 0; + qp->s_sending_psn = 0; + qp->s_sending_hpsn = 0; + qp->s_psn = 0; + qp->r_psn = 0; + qp->r_msn = 0; + if (type == IB_QPT_RC) { + qp->s_state = IB_OPCODE_RC_SEND_LAST; + qp->r_state = IB_OPCODE_RC_SEND_LAST; + } else { + qp->s_state = IB_OPCODE_UC_SEND_LAST; + qp->r_state = IB_OPCODE_UC_SEND_LAST; + } + qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; + qp->r_nak_state = 0; + qp->r_aflags = 0; + qp->r_flags = 0; + qp->s_head = 0; + qp->s_tail = 0; + qp->s_cur = 0; + qp->s_acked = 0; + qp->s_last = 0; + qp->s_ssn = 1; + qp->s_lsn = 0; + qp->s_mig_state = IB_MIG_MIGRATED; + memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue)); + qp->r_head_ack_queue = 0; + qp->s_tail_ack_queue = 0; + qp->s_num_rd_atomic = 0; + if (qp->r_rq.wq) { + qp->r_rq.wq->head = 0; + qp->r_rq.wq->tail = 0; + } + qp->r_sge.num_sge = 0; +} + +static void clear_mr_refs(struct qib_qp *qp, int clr_sends) +{ + unsigned n; + + if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags)) + qib_put_ss(&qp->s_rdma_read_sge); + + qib_put_ss(&qp->r_sge); + + if (clr_sends) { + while (qp->s_last != qp->s_head) { + struct qib_swqe *wqe = get_swqe_ptr(qp, qp->s_last); + unsigned i; + + for (i = 0; i < wqe->wr.num_sge; i++) { + struct qib_sge *sge = &wqe->sg_list[i]; + + qib_put_mr(sge->mr); + } + if (qp->ibqp.qp_type == IB_QPT_UD || + qp->ibqp.qp_type == IB_QPT_SMI || + qp->ibqp.qp_type == IB_QPT_GSI) + atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount); + if (++qp->s_last >= qp->s_size) + qp->s_last = 0; + } + if (qp->s_rdma_mr) { + qib_put_mr(qp->s_rdma_mr); + qp->s_rdma_mr = NULL; + } + } + + if (qp->ibqp.qp_type != IB_QPT_RC) + return; + + for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) { + struct qib_ack_entry *e = &qp->s_ack_queue[n]; + + if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST && + e->rdma_sge.mr) { + qib_put_mr(e->rdma_sge.mr); + e->rdma_sge.mr = NULL; + } + } +} + +/** + * qib_error_qp - put a QP into the error state + * @qp: the QP to put into the error state + * @err: the receive completion error to signal if a RWQE is active + * + * Flushes both send and receive work queues. + * Returns true if last WQE event should be generated. + * The QP r_lock and s_lock should be held and interrupts disabled. + * If we are already in error state, just return. + */ +int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err) +{ + struct qib_ibdev *dev = to_idev(qp->ibqp.device); + struct ib_wc wc; + int ret = 0; + + if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET) + goto bail; + + qp->state = IB_QPS_ERR; + + if (qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR)) { + qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR); + del_timer(&qp->s_timer); + } + + if (qp->s_flags & QIB_S_ANY_WAIT_SEND) + qp->s_flags &= ~QIB_S_ANY_WAIT_SEND; + + spin_lock(&dev->pending_lock); + if (!list_empty(&qp->iowait) && !(qp->s_flags & QIB_S_BUSY)) { + qp->s_flags &= ~QIB_S_ANY_WAIT_IO; + list_del_init(&qp->iowait); + } + spin_unlock(&dev->pending_lock); + + if (!(qp->s_flags & QIB_S_BUSY)) { + qp->s_hdrwords = 0; + if (qp->s_rdma_mr) { + qib_put_mr(qp->s_rdma_mr); + qp->s_rdma_mr = NULL; + } + if (qp->s_tx) { + qib_put_txreq(qp->s_tx); + qp->s_tx = NULL; + } + } + + /* Schedule the sending tasklet to drain the send work queue. */ + if (qp->s_last != qp->s_head) + qib_schedule_send(qp); + + clear_mr_refs(qp, 0); + + memset(&wc, 0, sizeof(wc)); + wc.qp = &qp->ibqp; + wc.opcode = IB_WC_RECV; + + if (test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) { + wc.wr_id = qp->r_wr_id; + wc.status = err; + qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); + } + wc.status = IB_WC_WR_FLUSH_ERR; + + if (qp->r_rq.wq) { + struct qib_rwq *wq; + u32 head; + u32 tail; + + spin_lock(&qp->r_rq.lock); + + /* sanity check pointers before trusting them */ + wq = qp->r_rq.wq; + head = wq->head; + if (head >= qp->r_rq.size) + head = 0; + tail = wq->tail; + if (tail >= qp->r_rq.size) + tail = 0; + while (tail != head) { + wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id; + if (++tail >= qp->r_rq.size) + tail = 0; + qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); + } + wq->tail = tail; + + spin_unlock(&qp->r_rq.lock); + } else if (qp->ibqp.event_handler) + ret = 1; + +bail: + return ret; +} + +/** + * qib_modify_qp - modify the attributes of a queue pair + * @ibqp: the queue pair who's attributes we're modifying + * @attr: the new attributes + * @attr_mask: the mask of attributes to modify + * @udata: user data for libibverbs.so + * + * Returns 0 on success, otherwise returns an errno. + */ +int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + struct qib_ibdev *dev = to_idev(ibqp->device); + struct qib_qp *qp = to_iqp(ibqp); + enum ib_qp_state cur_state, new_state; + struct ib_event ev; + int lastwqe = 0; + int mig = 0; + int ret; + u32 pmtu = 0; /* for gcc warning only */ + + spin_lock_irq(&qp->r_lock); + spin_lock(&qp->s_lock); + + cur_state = attr_mask & IB_QP_CUR_STATE ? + attr->cur_qp_state : qp->state; + new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; + + if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, + attr_mask, IB_LINK_LAYER_UNSPECIFIED)) + goto inval; + + if (attr_mask & IB_QP_AV) { + if (attr->ah_attr.dlid >= QIB_MULTICAST_LID_BASE) + goto inval; + if (qib_check_ah(qp->ibqp.device, &attr->ah_attr)) + goto inval; + } + + if (attr_mask & IB_QP_ALT_PATH) { + if (attr->alt_ah_attr.dlid >= QIB_MULTICAST_LID_BASE) + goto inval; + if (qib_check_ah(qp->ibqp.device, &attr->alt_ah_attr)) + goto inval; + if (attr->alt_pkey_index >= qib_get_npkeys(dd_from_dev(dev))) + goto inval; + } + + if (attr_mask & IB_QP_PKEY_INDEX) + if (attr->pkey_index >= qib_get_npkeys(dd_from_dev(dev))) + goto inval; + + if (attr_mask & IB_QP_MIN_RNR_TIMER) + if (attr->min_rnr_timer > 31) + goto inval; + + if (attr_mask & IB_QP_PORT) + if (qp->ibqp.qp_type == IB_QPT_SMI || + qp->ibqp.qp_type == IB_QPT_GSI || + attr->port_num == 0 || + attr->port_num > ibqp->device->phys_port_cnt) + goto inval; + + if (attr_mask & IB_QP_DEST_QPN) + if (attr->dest_qp_num > QIB_QPN_MASK) + goto inval; + + if (attr_mask & IB_QP_RETRY_CNT) + if (attr->retry_cnt > 7) + goto inval; + + if (attr_mask & IB_QP_RNR_RETRY) + if (attr->rnr_retry > 7) + goto inval; + + /* + * Don't allow invalid path_mtu values. OK to set greater + * than the active mtu (or even the max_cap, if we have tuned + * that to a small mtu. We'll set qp->path_mtu + * to the lesser of requested attribute mtu and active, + * for packetizing messages. + * Note that the QP port has to be set in INIT and MTU in RTR. + */ + if (attr_mask & IB_QP_PATH_MTU) { + struct qib_devdata *dd = dd_from_dev(dev); + int mtu, pidx = qp->port_num - 1; + + mtu = ib_mtu_enum_to_int(attr->path_mtu); + if (mtu == -1) + goto inval; + if (mtu > dd->pport[pidx].ibmtu) { + switch (dd->pport[pidx].ibmtu) { + case 4096: + pmtu = IB_MTU_4096; + break; + case 2048: + pmtu = IB_MTU_2048; + break; + case 1024: + pmtu = IB_MTU_1024; + break; + case 512: + pmtu = IB_MTU_512; + break; + case 256: + pmtu = IB_MTU_256; + break; + default: + pmtu = IB_MTU_2048; + } + } else + pmtu = attr->path_mtu; + } + + if (attr_mask & IB_QP_PATH_MIG_STATE) { + if (attr->path_mig_state == IB_MIG_REARM) { + if (qp->s_mig_state == IB_MIG_ARMED) + goto inval; + if (new_state != IB_QPS_RTS) + goto inval; + } else if (attr->path_mig_state == IB_MIG_MIGRATED) { + if (qp->s_mig_state == IB_MIG_REARM) + goto inval; + if (new_state != IB_QPS_RTS && new_state != IB_QPS_SQD) + goto inval; + if (qp->s_mig_state == IB_MIG_ARMED) + mig = 1; + } else + goto inval; + } + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + if (attr->max_dest_rd_atomic > QIB_MAX_RDMA_ATOMIC) + goto inval; + + switch (new_state) { + case IB_QPS_RESET: + if (qp->state != IB_QPS_RESET) { + qp->state = IB_QPS_RESET; + spin_lock(&dev->pending_lock); + if (!list_empty(&qp->iowait)) + list_del_init(&qp->iowait); + spin_unlock(&dev->pending_lock); + qp->s_flags &= ~(QIB_S_TIMER | QIB_S_ANY_WAIT); + spin_unlock(&qp->s_lock); + spin_unlock_irq(&qp->r_lock); + /* Stop the sending work queue and retry timer */ + cancel_work_sync(&qp->s_work); + del_timer_sync(&qp->s_timer); + wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy)); + if (qp->s_tx) { + qib_put_txreq(qp->s_tx); + qp->s_tx = NULL; + } + remove_qp(dev, qp); + wait_event(qp->wait, !atomic_read(&qp->refcount)); + spin_lock_irq(&qp->r_lock); + spin_lock(&qp->s_lock); + clear_mr_refs(qp, 1); + qib_reset_qp(qp, ibqp->qp_type); + } + break; + + case IB_QPS_RTR: + /* Allow event to retrigger if QP set to RTR more than once */ + qp->r_flags &= ~QIB_R_COMM_EST; + qp->state = new_state; + break; + + case IB_QPS_SQD: + qp->s_draining = qp->s_last != qp->s_cur; + qp->state = new_state; + break; + + case IB_QPS_SQE: + if (qp->ibqp.qp_type == IB_QPT_RC) + goto inval; + qp->state = new_state; + break; + + case IB_QPS_ERR: + lastwqe = qib_error_qp(qp, IB_WC_WR_FLUSH_ERR); + break; + + default: + qp->state = new_state; + break; + } + + if (attr_mask & IB_QP_PKEY_INDEX) + qp->s_pkey_index = attr->pkey_index; + + if (attr_mask & IB_QP_PORT) + qp->port_num = attr->port_num; + + if (attr_mask & IB_QP_DEST_QPN) + qp->remote_qpn = attr->dest_qp_num; + + if (attr_mask & IB_QP_SQ_PSN) { + qp->s_next_psn = attr->sq_psn & QIB_PSN_MASK; + qp->s_psn = qp->s_next_psn; + qp->s_sending_psn = qp->s_next_psn; + qp->s_last_psn = qp->s_next_psn - 1; + qp->s_sending_hpsn = qp->s_last_psn; + } + + if (attr_mask & IB_QP_RQ_PSN) + qp->r_psn = attr->rq_psn & QIB_PSN_MASK; + + if (attr_mask & IB_QP_ACCESS_FLAGS) + qp->qp_access_flags = attr->qp_access_flags; + + if (attr_mask & IB_QP_AV) { + qp->remote_ah_attr = attr->ah_attr; + qp->s_srate = attr->ah_attr.static_rate; + } + + if (attr_mask & IB_QP_ALT_PATH) { + qp->alt_ah_attr = attr->alt_ah_attr; + qp->s_alt_pkey_index = attr->alt_pkey_index; + } + + if (attr_mask & IB_QP_PATH_MIG_STATE) { + qp->s_mig_state = attr->path_mig_state; + if (mig) { + qp->remote_ah_attr = qp->alt_ah_attr; + qp->port_num = qp->alt_ah_attr.port_num; + qp->s_pkey_index = qp->s_alt_pkey_index; + } + } + + if (attr_mask & IB_QP_PATH_MTU) { + qp->path_mtu = pmtu; + qp->pmtu = ib_mtu_enum_to_int(pmtu); + } + + if (attr_mask & IB_QP_RETRY_CNT) { + qp->s_retry_cnt = attr->retry_cnt; + qp->s_retry = attr->retry_cnt; + } + + if (attr_mask & IB_QP_RNR_RETRY) { + qp->s_rnr_retry_cnt = attr->rnr_retry; + qp->s_rnr_retry = attr->rnr_retry; + } + + if (attr_mask & IB_QP_MIN_RNR_TIMER) + qp->r_min_rnr_timer = attr->min_rnr_timer; + + if (attr_mask & IB_QP_TIMEOUT) { + qp->timeout = attr->timeout; + qp->timeout_jiffies = + usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / + 1000UL); + } + + if (attr_mask & IB_QP_QKEY) + qp->qkey = attr->qkey; + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + qp->r_max_rd_atomic = attr->max_dest_rd_atomic; + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) + qp->s_max_rd_atomic = attr->max_rd_atomic; + + spin_unlock(&qp->s_lock); + spin_unlock_irq(&qp->r_lock); + + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) + insert_qp(dev, qp); + + if (lastwqe) { + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_QP_LAST_WQE_REACHED; + qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); + } + if (mig) { + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_PATH_MIG; + qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); + } + ret = 0; + goto bail; + +inval: + spin_unlock(&qp->s_lock); + spin_unlock_irq(&qp->r_lock); + ret = -EINVAL; + +bail: + return ret; +} + +int qib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_qp_init_attr *init_attr) +{ + struct qib_qp *qp = to_iqp(ibqp); + + attr->qp_state = qp->state; + attr->cur_qp_state = attr->qp_state; + attr->path_mtu = qp->path_mtu; + attr->path_mig_state = qp->s_mig_state; + attr->qkey = qp->qkey; + attr->rq_psn = qp->r_psn & QIB_PSN_MASK; + attr->sq_psn = qp->s_next_psn & QIB_PSN_MASK; + attr->dest_qp_num = qp->remote_qpn; + attr->qp_access_flags = qp->qp_access_flags; + attr->cap.max_send_wr = qp->s_size - 1; + attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1; + attr->cap.max_send_sge = qp->s_max_sge; + attr->cap.max_recv_sge = qp->r_rq.max_sge; + attr->cap.max_inline_data = 0; + attr->ah_attr = qp->remote_ah_attr; + attr->alt_ah_attr = qp->alt_ah_attr; + attr->pkey_index = qp->s_pkey_index; + attr->alt_pkey_index = qp->s_alt_pkey_index; + attr->en_sqd_async_notify = 0; + attr->sq_draining = qp->s_draining; + attr->max_rd_atomic = qp->s_max_rd_atomic; + attr->max_dest_rd_atomic = qp->r_max_rd_atomic; + attr->min_rnr_timer = qp->r_min_rnr_timer; + attr->port_num = qp->port_num; + attr->timeout = qp->timeout; + attr->retry_cnt = qp->s_retry_cnt; + attr->rnr_retry = qp->s_rnr_retry_cnt; + attr->alt_port_num = qp->alt_ah_attr.port_num; + attr->alt_timeout = qp->alt_timeout; + + init_attr->event_handler = qp->ibqp.event_handler; + init_attr->qp_context = qp->ibqp.qp_context; + init_attr->send_cq = qp->ibqp.send_cq; + init_attr->recv_cq = qp->ibqp.recv_cq; + init_attr->srq = qp->ibqp.srq; + init_attr->cap = attr->cap; + if (qp->s_flags & QIB_S_SIGNAL_REQ_WR) + init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; + else + init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; + init_attr->qp_type = qp->ibqp.qp_type; + init_attr->port_num = qp->port_num; + return 0; +} + +/** + * qib_compute_aeth - compute the AETH (syndrome + MSN) + * @qp: the queue pair to compute the AETH for + * + * Returns the AETH. + */ +__be32 qib_compute_aeth(struct qib_qp *qp) +{ + u32 aeth = qp->r_msn & QIB_MSN_MASK; + + if (qp->ibqp.srq) { + /* + * Shared receive queues don't generate credits. + * Set the credit field to the invalid value. + */ + aeth |= QIB_AETH_CREDIT_INVAL << QIB_AETH_CREDIT_SHIFT; + } else { + u32 min, max, x; + u32 credits; + struct qib_rwq *wq = qp->r_rq.wq; + u32 head; + u32 tail; + + /* sanity check pointers before trusting them */ + head = wq->head; + if (head >= qp->r_rq.size) + head = 0; + tail = wq->tail; + if (tail >= qp->r_rq.size) + tail = 0; + /* + * Compute the number of credits available (RWQEs). + * XXX Not holding the r_rq.lock here so there is a small + * chance that the pair of reads are not atomic. + */ + credits = head - tail; + if ((int)credits < 0) + credits += qp->r_rq.size; + /* + * Binary search the credit table to find the code to + * use. + */ + min = 0; + max = 31; + for (;;) { + x = (min + max) / 2; + if (credit_table[x] == credits) + break; + if (credit_table[x] > credits) + max = x; + else if (min == x) + break; + else + min = x; + } + aeth |= x << QIB_AETH_CREDIT_SHIFT; + } + return cpu_to_be32(aeth); +} + +/** + * qib_create_qp - create a queue pair for a device + * @ibpd: the protection domain who's device we create the queue pair for + * @init_attr: the attributes of the queue pair + * @udata: user data for libibverbs.so + * + * Returns the queue pair on success, otherwise returns an errno. + * + * Called by the ib_create_qp() core verbs function. + */ +struct ib_qp *qib_create_qp(struct ib_pd *ibpd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) +{ + struct qib_qp *qp; + int err; + struct qib_swqe *swq = NULL; + struct qib_ibdev *dev; + struct qib_devdata *dd; + size_t sz; + size_t sg_list_sz; + struct ib_qp *ret; + + if (init_attr->cap.max_send_sge > ib_qib_max_sges || + init_attr->cap.max_send_wr > ib_qib_max_qp_wrs || + init_attr->create_flags) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + + /* Check receive queue parameters if no SRQ is specified. */ + if (!init_attr->srq) { + if (init_attr->cap.max_recv_sge > ib_qib_max_sges || + init_attr->cap.max_recv_wr > ib_qib_max_qp_wrs) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + if (init_attr->cap.max_send_sge + + init_attr->cap.max_send_wr + + init_attr->cap.max_recv_sge + + init_attr->cap.max_recv_wr == 0) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + } + + switch (init_attr->qp_type) { + case IB_QPT_SMI: + case IB_QPT_GSI: + if (init_attr->port_num == 0 || + init_attr->port_num > ibpd->device->phys_port_cnt) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + case IB_QPT_UC: + case IB_QPT_RC: + case IB_QPT_UD: + sz = sizeof(struct qib_sge) * + init_attr->cap.max_send_sge + + sizeof(struct qib_swqe); + swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz); + if (swq == NULL) { + ret = ERR_PTR(-ENOMEM); + goto bail; + } + sz = sizeof(*qp); + sg_list_sz = 0; + if (init_attr->srq) { + struct qib_srq *srq = to_isrq(init_attr->srq); + + if (srq->rq.max_sge > 1) + sg_list_sz = sizeof(*qp->r_sg_list) * + (srq->rq.max_sge - 1); + } else if (init_attr->cap.max_recv_sge > 1) + sg_list_sz = sizeof(*qp->r_sg_list) * + (init_attr->cap.max_recv_sge - 1); + qp = kzalloc(sz + sg_list_sz, GFP_KERNEL); + if (!qp) { + ret = ERR_PTR(-ENOMEM); + goto bail_swq; + } + RCU_INIT_POINTER(qp->next, NULL); + qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), GFP_KERNEL); + if (!qp->s_hdr) { + ret = ERR_PTR(-ENOMEM); + goto bail_qp; + } + qp->timeout_jiffies = + usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / + 1000UL); + if (init_attr->srq) + sz = 0; + else { + qp->r_rq.size = init_attr->cap.max_recv_wr + 1; + qp->r_rq.max_sge = init_attr->cap.max_recv_sge; + sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + + sizeof(struct qib_rwqe); + qp->r_rq.wq = vmalloc_user(sizeof(struct qib_rwq) + + qp->r_rq.size * sz); + if (!qp->r_rq.wq) { + ret = ERR_PTR(-ENOMEM); + goto bail_qp; + } + } + + /* + * ib_create_qp() will initialize qp->ibqp + * except for qp->ibqp.qp_num. + */ + spin_lock_init(&qp->r_lock); + spin_lock_init(&qp->s_lock); + spin_lock_init(&qp->r_rq.lock); + atomic_set(&qp->refcount, 0); + init_waitqueue_head(&qp->wait); + init_waitqueue_head(&qp->wait_dma); + init_timer(&qp->s_timer); + qp->s_timer.data = (unsigned long)qp; + INIT_WORK(&qp->s_work, qib_do_send); + INIT_LIST_HEAD(&qp->iowait); + INIT_LIST_HEAD(&qp->rspwait); + qp->state = IB_QPS_RESET; + qp->s_wq = swq; + qp->s_size = init_attr->cap.max_send_wr + 1; + qp->s_max_sge = init_attr->cap.max_send_sge; + if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) + qp->s_flags = QIB_S_SIGNAL_REQ_WR; + dev = to_idev(ibpd->device); + dd = dd_from_dev(dev); + err = alloc_qpn(dd, &dev->qpn_table, init_attr->qp_type, + init_attr->port_num); + if (err < 0) { + ret = ERR_PTR(err); + vfree(qp->r_rq.wq); + goto bail_qp; + } + qp->ibqp.qp_num = err; + qp->port_num = init_attr->port_num; + qib_reset_qp(qp, init_attr->qp_type); + break; + + default: + /* Don't support raw QPs */ + ret = ERR_PTR(-ENOSYS); + goto bail; + } + + init_attr->cap.max_inline_data = 0; + + /* + * Return the address of the RWQ as the offset to mmap. + * See qib_mmap() for details. + */ + if (udata && udata->outlen >= sizeof(__u64)) { + if (!qp->r_rq.wq) { + __u64 offset = 0; + + err = ib_copy_to_udata(udata, &offset, + sizeof(offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_ip; + } + } else { + u32 s = sizeof(struct qib_rwq) + qp->r_rq.size * sz; + + qp->ip = qib_create_mmap_info(dev, s, + ibpd->uobject->context, + qp->r_rq.wq); + if (!qp->ip) { + ret = ERR_PTR(-ENOMEM); + goto bail_ip; + } + + err = ib_copy_to_udata(udata, &(qp->ip->offset), + sizeof(qp->ip->offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_ip; + } + } + } + + spin_lock(&dev->n_qps_lock); + if (dev->n_qps_allocated == ib_qib_max_qps) { + spin_unlock(&dev->n_qps_lock); + ret = ERR_PTR(-ENOMEM); + goto bail_ip; + } + + dev->n_qps_allocated++; + spin_unlock(&dev->n_qps_lock); + + if (qp->ip) { + spin_lock_irq(&dev->pending_lock); + list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); + } + + ret = &qp->ibqp; + goto bail; + +bail_ip: + if (qp->ip) + kref_put(&qp->ip->ref, qib_release_mmap_info); + else + vfree(qp->r_rq.wq); + free_qpn(&dev->qpn_table, qp->ibqp.qp_num); +bail_qp: + kfree(qp->s_hdr); + kfree(qp); +bail_swq: + vfree(swq); +bail: + return ret; +} + +/** + * qib_destroy_qp - destroy a queue pair + * @ibqp: the queue pair to destroy + * + * Returns 0 on success. + * + * Note that this can be called while the QP is actively sending or + * receiving! + */ +int qib_destroy_qp(struct ib_qp *ibqp) +{ + struct qib_qp *qp = to_iqp(ibqp); + struct qib_ibdev *dev = to_idev(ibqp->device); + + /* Make sure HW and driver activity is stopped. */ + spin_lock_irq(&qp->s_lock); + if (qp->state != IB_QPS_RESET) { + qp->state = IB_QPS_RESET; + spin_lock(&dev->pending_lock); + if (!list_empty(&qp->iowait)) + list_del_init(&qp->iowait); + spin_unlock(&dev->pending_lock); + qp->s_flags &= ~(QIB_S_TIMER | QIB_S_ANY_WAIT); + spin_unlock_irq(&qp->s_lock); + cancel_work_sync(&qp->s_work); + del_timer_sync(&qp->s_timer); + wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy)); + if (qp->s_tx) { + qib_put_txreq(qp->s_tx); + qp->s_tx = NULL; + } + remove_qp(dev, qp); + wait_event(qp->wait, !atomic_read(&qp->refcount)); + clear_mr_refs(qp, 1); + } else + spin_unlock_irq(&qp->s_lock); + + /* all user's cleaned up, mark it available */ + free_qpn(&dev->qpn_table, qp->ibqp.qp_num); + spin_lock(&dev->n_qps_lock); + dev->n_qps_allocated--; + spin_unlock(&dev->n_qps_lock); + + if (qp->ip) + kref_put(&qp->ip->ref, qib_release_mmap_info); + else + vfree(qp->r_rq.wq); + vfree(qp->s_wq); + kfree(qp->s_hdr); + kfree(qp); + return 0; +} + +/** + * qib_init_qpn_table - initialize the QP number table for a device + * @qpt: the QPN table + */ +void qib_init_qpn_table(struct qib_devdata *dd, struct qib_qpn_table *qpt) +{ + spin_lock_init(&qpt->lock); + qpt->last = 1; /* start with QPN 2 */ + qpt->nmaps = 1; + qpt->mask = dd->qpn_mask; +} + +/** + * qib_free_qpn_table - free the QP number table for a device + * @qpt: the QPN table + */ +void qib_free_qpn_table(struct qib_qpn_table *qpt) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(qpt->map); i++) + if (qpt->map[i].page) + free_page((unsigned long) qpt->map[i].page); +} + +/** + * qib_get_credit - flush the send work queue of a QP + * @qp: the qp who's send work queue to flush + * @aeth: the Acknowledge Extended Transport Header + * + * The QP s_lock should be held. + */ +void qib_get_credit(struct qib_qp *qp, u32 aeth) +{ + u32 credit = (aeth >> QIB_AETH_CREDIT_SHIFT) & QIB_AETH_CREDIT_MASK; + + /* + * If the credit is invalid, we can send + * as many packets as we like. Otherwise, we have to + * honor the credit field. + */ + if (credit == QIB_AETH_CREDIT_INVAL) { + if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT)) { + qp->s_flags |= QIB_S_UNLIMITED_CREDIT; + if (qp->s_flags & QIB_S_WAIT_SSN_CREDIT) { + qp->s_flags &= ~QIB_S_WAIT_SSN_CREDIT; + qib_schedule_send(qp); + } + } + } else if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT)) { + /* Compute new LSN (i.e., MSN + credit) */ + credit = (aeth + credit_table[credit]) & QIB_MSN_MASK; + if (qib_cmp24(credit, qp->s_lsn) > 0) { + qp->s_lsn = credit; + if (qp->s_flags & QIB_S_WAIT_SSN_CREDIT) { + qp->s_flags &= ~QIB_S_WAIT_SSN_CREDIT; + qib_schedule_send(qp); + } + } + } +} + +#ifdef CONFIG_DEBUG_FS + +struct qib_qp_iter { + struct qib_ibdev *dev; + struct qib_qp *qp; + int n; +}; + +struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev) +{ + struct qib_qp_iter *iter; + + iter = kzalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return NULL; + + iter->dev = dev; + if (qib_qp_iter_next(iter)) { + kfree(iter); + return NULL; + } + + return iter; +} + +int qib_qp_iter_next(struct qib_qp_iter *iter) +{ + struct qib_ibdev *dev = iter->dev; + int n = iter->n; + int ret = 1; + struct qib_qp *pqp = iter->qp; + struct qib_qp *qp; + + rcu_read_lock(); + for (; n < dev->qp_table_size; n++) { + if (pqp) + qp = rcu_dereference(pqp->next); + else + qp = rcu_dereference(dev->qp_table[n]); + pqp = qp; + if (qp) { + if (iter->qp) + atomic_dec(&iter->qp->refcount); + atomic_inc(&qp->refcount); + rcu_read_unlock(); + iter->qp = qp; + iter->n = n; + return 0; + } + } + rcu_read_unlock(); + if (iter->qp) + atomic_dec(&iter->qp->refcount); + return ret; +} + +static const char * const qp_type_str[] = { + "SMI", "GSI", "RC", "UC", "UD", +}; + +void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter) +{ + struct qib_swqe *wqe; + struct qib_qp *qp = iter->qp; + + wqe = get_swqe_ptr(qp, qp->s_last); + seq_printf(s, + "N %d QP%u %s %u %u %u f=%x %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u) QP%u LID %x\n", + iter->n, + qp->ibqp.qp_num, + qp_type_str[qp->ibqp.qp_type], + qp->state, + wqe->wr.opcode, + qp->s_hdrwords, + qp->s_flags, + atomic_read(&qp->s_dma_busy), + !list_empty(&qp->iowait), + qp->timeout, + wqe->ssn, + qp->s_lsn, + qp->s_last_psn, + qp->s_psn, qp->s_next_psn, + qp->s_sending_psn, qp->s_sending_hpsn, + qp->s_last, qp->s_acked, qp->s_cur, + qp->s_tail, qp->s_head, qp->s_size, + qp->remote_qpn, + qp->remote_ah_attr.dlid); +} + +#endif diff --git a/drivers/infiniband/hw/qib/qib_qsfp.c b/drivers/infiniband/hw/qib/qib_qsfp.c new file mode 100644 index 00000000000..fa71b1e666c --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_qsfp.c @@ -0,0 +1,556 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/delay.h> +#include <linux/pci.h> +#include <linux/vmalloc.h> + +#include "qib.h" +#include "qib_qsfp.h" + +/* + * QSFP support for ib_qib driver, using "Two Wire Serial Interface" driver + * in qib_twsi.c + */ +#define QSFP_MAX_RETRY 4 + +static int qsfp_read(struct qib_pportdata *ppd, int addr, void *bp, int len) +{ + struct qib_devdata *dd = ppd->dd; + u32 out, mask; + int ret, cnt, pass = 0; + int stuck = 0; + u8 *buff = bp; + + ret = mutex_lock_interruptible(&dd->eep_lock); + if (ret) + goto no_unlock; + + if (dd->twsi_eeprom_dev == QIB_TWSI_NO_DEV) { + ret = -ENXIO; + goto bail; + } + + /* + * We presume, if we are called at all, that this board has + * QSFP. This is on the same i2c chain as the legacy parts, + * but only responds if the module is selected via GPIO pins. + * Further, there are very long setup and hold requirements + * on MODSEL. + */ + mask = QSFP_GPIO_MOD_SEL_N | QSFP_GPIO_MOD_RST_N | QSFP_GPIO_LP_MODE; + out = QSFP_GPIO_MOD_RST_N | QSFP_GPIO_LP_MODE; + if (ppd->hw_pidx) { + mask <<= QSFP_GPIO_PORT2_SHIFT; + out <<= QSFP_GPIO_PORT2_SHIFT; + } + + dd->f_gpio_mod(dd, out, mask, mask); + + /* + * Module could take up to 2 Msec to respond to MOD_SEL, and there + * is no way to tell if it is ready, so we must wait. + */ + msleep(2); + + /* Make sure TWSI bus is in sane state. */ + ret = qib_twsi_reset(dd); + if (ret) { + qib_dev_porterr(dd, ppd->port, + "QSFP interface Reset for read failed\n"); + ret = -EIO; + stuck = 1; + goto deselect; + } + + /* All QSFP modules are at A0 */ + + cnt = 0; + while (cnt < len) { + unsigned in_page; + int wlen = len - cnt; + in_page = addr % QSFP_PAGESIZE; + if ((in_page + wlen) > QSFP_PAGESIZE) + wlen = QSFP_PAGESIZE - in_page; + ret = qib_twsi_blk_rd(dd, QSFP_DEV, addr, buff + cnt, wlen); + /* Some QSFP's fail first try. Retry as experiment */ + if (ret && cnt == 0 && ++pass < QSFP_MAX_RETRY) + continue; + if (ret) { + /* qib_twsi_blk_rd() 1 for error, else 0 */ + ret = -EIO; + goto deselect; + } + addr += wlen; + cnt += wlen; + } + ret = cnt; + +deselect: + /* + * Module could take up to 10 uSec after transfer before + * ready to respond to MOD_SEL negation, and there is no way + * to tell if it is ready, so we must wait. + */ + udelay(10); + /* set QSFP MODSEL, RST. LP all high */ + dd->f_gpio_mod(dd, mask, mask, mask); + + /* + * Module could take up to 2 Msec to respond to MOD_SEL + * going away, and there is no way to tell if it is ready. + * so we must wait. + */ + if (stuck) + qib_dev_err(dd, "QSFP interface bus stuck non-idle\n"); + + if (pass >= QSFP_MAX_RETRY && ret) + qib_dev_porterr(dd, ppd->port, "QSFP failed even retrying\n"); + else if (pass) + qib_dev_porterr(dd, ppd->port, "QSFP retries: %d\n", pass); + + msleep(2); + +bail: + mutex_unlock(&dd->eep_lock); + +no_unlock: + return ret; +} + +/* + * qsfp_write + * We do not ordinarily write the QSFP, but this is needed to select + * the page on non-flat QSFPs, and possibly later unusual cases + */ +static int qib_qsfp_write(struct qib_pportdata *ppd, int addr, void *bp, + int len) +{ + struct qib_devdata *dd = ppd->dd; + u32 out, mask; + int ret, cnt; + u8 *buff = bp; + + ret = mutex_lock_interruptible(&dd->eep_lock); + if (ret) + goto no_unlock; + + if (dd->twsi_eeprom_dev == QIB_TWSI_NO_DEV) { + ret = -ENXIO; + goto bail; + } + + /* + * We presume, if we are called at all, that this board has + * QSFP. This is on the same i2c chain as the legacy parts, + * but only responds if the module is selected via GPIO pins. + * Further, there are very long setup and hold requirements + * on MODSEL. + */ + mask = QSFP_GPIO_MOD_SEL_N | QSFP_GPIO_MOD_RST_N | QSFP_GPIO_LP_MODE; + out = QSFP_GPIO_MOD_RST_N | QSFP_GPIO_LP_MODE; + if (ppd->hw_pidx) { + mask <<= QSFP_GPIO_PORT2_SHIFT; + out <<= QSFP_GPIO_PORT2_SHIFT; + } + dd->f_gpio_mod(dd, out, mask, mask); + + /* + * Module could take up to 2 Msec to respond to MOD_SEL, + * and there is no way to tell if it is ready, so we must wait. + */ + msleep(2); + + /* Make sure TWSI bus is in sane state. */ + ret = qib_twsi_reset(dd); + if (ret) { + qib_dev_porterr(dd, ppd->port, + "QSFP interface Reset for write failed\n"); + ret = -EIO; + goto deselect; + } + + /* All QSFP modules are at A0 */ + + cnt = 0; + while (cnt < len) { + unsigned in_page; + int wlen = len - cnt; + in_page = addr % QSFP_PAGESIZE; + if ((in_page + wlen) > QSFP_PAGESIZE) + wlen = QSFP_PAGESIZE - in_page; + ret = qib_twsi_blk_wr(dd, QSFP_DEV, addr, buff + cnt, wlen); + if (ret) { + /* qib_twsi_blk_wr() 1 for error, else 0 */ + ret = -EIO; + goto deselect; + } + addr += wlen; + cnt += wlen; + } + ret = cnt; + +deselect: + /* + * Module could take up to 10 uSec after transfer before + * ready to respond to MOD_SEL negation, and there is no way + * to tell if it is ready, so we must wait. + */ + udelay(10); + /* set QSFP MODSEL, RST, LP high */ + dd->f_gpio_mod(dd, mask, mask, mask); + /* + * Module could take up to 2 Msec to respond to MOD_SEL + * going away, and there is no way to tell if it is ready. + * so we must wait. + */ + msleep(2); + +bail: + mutex_unlock(&dd->eep_lock); + +no_unlock: + return ret; +} + +/* + * For validation, we want to check the checksums, even of the + * fields we do not otherwise use. This function reads the bytes from + * <first> to <next-1> and returns the 8lsbs of the sum, or <0 for errors + */ +static int qsfp_cks(struct qib_pportdata *ppd, int first, int next) +{ + int ret; + u16 cks; + u8 bval; + + cks = 0; + while (first < next) { + ret = qsfp_read(ppd, first, &bval, 1); + if (ret < 0) + goto bail; + cks += bval; + ++first; + } + ret = cks & 0xFF; +bail: + return ret; + +} + +int qib_refresh_qsfp_cache(struct qib_pportdata *ppd, struct qib_qsfp_cache *cp) +{ + int ret; + int idx; + u16 cks; + u8 peek[4]; + + /* ensure sane contents on invalid reads, for cable swaps */ + memset(cp, 0, sizeof(*cp)); + + if (!qib_qsfp_mod_present(ppd)) { + ret = -ENODEV; + goto bail; + } + + ret = qsfp_read(ppd, 0, peek, 3); + if (ret < 0) + goto bail; + if ((peek[0] & 0xFE) != 0x0C) + qib_dev_porterr(ppd->dd, ppd->port, + "QSFP byte0 is 0x%02X, S/B 0x0C/D\n", peek[0]); + + if ((peek[2] & 2) == 0) { + /* + * If cable is paged, rather than "flat memory", we need to + * set the page to zero, Even if it already appears to be zero. + */ + u8 poke = 0; + ret = qib_qsfp_write(ppd, 127, &poke, 1); + udelay(50); + if (ret != 1) { + qib_dev_porterr(ppd->dd, ppd->port, + "Failed QSFP Page set\n"); + goto bail; + } + } + + ret = qsfp_read(ppd, QSFP_MOD_ID_OFFS, &cp->id, 1); + if (ret < 0) + goto bail; + if ((cp->id & 0xFE) != 0x0C) + qib_dev_porterr(ppd->dd, ppd->port, + "QSFP ID byte is 0x%02X, S/B 0x0C/D\n", cp->id); + cks = cp->id; + + ret = qsfp_read(ppd, QSFP_MOD_PWR_OFFS, &cp->pwr, 1); + if (ret < 0) + goto bail; + cks += cp->pwr; + + ret = qsfp_cks(ppd, QSFP_MOD_PWR_OFFS + 1, QSFP_MOD_LEN_OFFS); + if (ret < 0) + goto bail; + cks += ret; + + ret = qsfp_read(ppd, QSFP_MOD_LEN_OFFS, &cp->len, 1); + if (ret < 0) + goto bail; + cks += cp->len; + + ret = qsfp_read(ppd, QSFP_MOD_TECH_OFFS, &cp->tech, 1); + if (ret < 0) + goto bail; + cks += cp->tech; + + ret = qsfp_read(ppd, QSFP_VEND_OFFS, &cp->vendor, QSFP_VEND_LEN); + if (ret < 0) + goto bail; + for (idx = 0; idx < QSFP_VEND_LEN; ++idx) + cks += cp->vendor[idx]; + + ret = qsfp_read(ppd, QSFP_IBXCV_OFFS, &cp->xt_xcv, 1); + if (ret < 0) + goto bail; + cks += cp->xt_xcv; + + ret = qsfp_read(ppd, QSFP_VOUI_OFFS, &cp->oui, QSFP_VOUI_LEN); + if (ret < 0) + goto bail; + for (idx = 0; idx < QSFP_VOUI_LEN; ++idx) + cks += cp->oui[idx]; + + ret = qsfp_read(ppd, QSFP_PN_OFFS, &cp->partnum, QSFP_PN_LEN); + if (ret < 0) + goto bail; + for (idx = 0; idx < QSFP_PN_LEN; ++idx) + cks += cp->partnum[idx]; + + ret = qsfp_read(ppd, QSFP_REV_OFFS, &cp->rev, QSFP_REV_LEN); + if (ret < 0) + goto bail; + for (idx = 0; idx < QSFP_REV_LEN; ++idx) + cks += cp->rev[idx]; + + ret = qsfp_read(ppd, QSFP_ATTEN_OFFS, &cp->atten, QSFP_ATTEN_LEN); + if (ret < 0) + goto bail; + for (idx = 0; idx < QSFP_ATTEN_LEN; ++idx) + cks += cp->atten[idx]; + + ret = qsfp_cks(ppd, QSFP_ATTEN_OFFS + QSFP_ATTEN_LEN, QSFP_CC_OFFS); + if (ret < 0) + goto bail; + cks += ret; + + cks &= 0xFF; + ret = qsfp_read(ppd, QSFP_CC_OFFS, &cp->cks1, 1); + if (ret < 0) + goto bail; + if (cks != cp->cks1) + qib_dev_porterr(ppd->dd, ppd->port, + "QSFP cks1 is %02X, computed %02X\n", cp->cks1, + cks); + + /* Second checksum covers 192 to (serial, date, lot) */ + ret = qsfp_cks(ppd, QSFP_CC_OFFS + 1, QSFP_SN_OFFS); + if (ret < 0) + goto bail; + cks = ret; + + ret = qsfp_read(ppd, QSFP_SN_OFFS, &cp->serial, QSFP_SN_LEN); + if (ret < 0) + goto bail; + for (idx = 0; idx < QSFP_SN_LEN; ++idx) + cks += cp->serial[idx]; + + ret = qsfp_read(ppd, QSFP_DATE_OFFS, &cp->date, QSFP_DATE_LEN); + if (ret < 0) + goto bail; + for (idx = 0; idx < QSFP_DATE_LEN; ++idx) + cks += cp->date[idx]; + + ret = qsfp_read(ppd, QSFP_LOT_OFFS, &cp->lot, QSFP_LOT_LEN); + if (ret < 0) + goto bail; + for (idx = 0; idx < QSFP_LOT_LEN; ++idx) + cks += cp->lot[idx]; + + ret = qsfp_cks(ppd, QSFP_LOT_OFFS + QSFP_LOT_LEN, QSFP_CC_EXT_OFFS); + if (ret < 0) + goto bail; + cks += ret; + + ret = qsfp_read(ppd, QSFP_CC_EXT_OFFS, &cp->cks2, 1); + if (ret < 0) + goto bail; + cks &= 0xFF; + if (cks != cp->cks2) + qib_dev_porterr(ppd->dd, ppd->port, + "QSFP cks2 is %02X, computed %02X\n", cp->cks2, + cks); + return 0; + +bail: + cp->id = 0; + return ret; +} + +const char * const qib_qsfp_devtech[16] = { + "850nm VCSEL", "1310nm VCSEL", "1550nm VCSEL", "1310nm FP", + "1310nm DFB", "1550nm DFB", "1310nm EML", "1550nm EML", + "Cu Misc", "1490nm DFB", "Cu NoEq", "Cu Eq", + "Undef", "Cu Active BothEq", "Cu FarEq", "Cu NearEq" +}; + +#define QSFP_DUMP_CHUNK 16 /* Holds longest string */ +#define QSFP_DEFAULT_HDR_CNT 224 + +static const char *pwr_codes = "1.5W2.0W2.5W3.5W"; + +int qib_qsfp_mod_present(struct qib_pportdata *ppd) +{ + u32 mask; + int ret; + + mask = QSFP_GPIO_MOD_PRS_N << + (ppd->hw_pidx * QSFP_GPIO_PORT2_SHIFT); + ret = ppd->dd->f_gpio_mod(ppd->dd, 0, 0, 0); + + return !((ret & mask) >> + ((ppd->hw_pidx * QSFP_GPIO_PORT2_SHIFT) + 3)); +} + +/* + * Initialize structures that control access to QSFP. Called once per port + * on cards that support QSFP. + */ +void qib_qsfp_init(struct qib_qsfp_data *qd, + void (*fevent)(struct work_struct *)) +{ + u32 mask, highs; + + struct qib_devdata *dd = qd->ppd->dd; + + /* Initialize work struct for later QSFP events */ + INIT_WORK(&qd->work, fevent); + + /* + * Later, we may want more validation. For now, just set up pins and + * blip reset. If module is present, call qib_refresh_qsfp_cache(), + * to do further init. + */ + mask = QSFP_GPIO_MOD_SEL_N | QSFP_GPIO_MOD_RST_N | QSFP_GPIO_LP_MODE; + highs = mask - QSFP_GPIO_MOD_RST_N; + if (qd->ppd->hw_pidx) { + mask <<= QSFP_GPIO_PORT2_SHIFT; + highs <<= QSFP_GPIO_PORT2_SHIFT; + } + dd->f_gpio_mod(dd, highs, mask, mask); + udelay(20); /* Generous RST dwell */ + + dd->f_gpio_mod(dd, mask, mask, mask); + return; +} + +void qib_qsfp_deinit(struct qib_qsfp_data *qd) +{ + /* + * There is nothing to do here for now. our work is scheduled + * with queue_work(), and flush_workqueue() from remove_one + * will block until all work setup with queue_work() + * completes. + */ +} + +int qib_qsfp_dump(struct qib_pportdata *ppd, char *buf, int len) +{ + struct qib_qsfp_cache cd; + u8 bin_buff[QSFP_DUMP_CHUNK]; + char lenstr[6]; + int sofar, ret; + int bidx = 0; + + sofar = 0; + ret = qib_refresh_qsfp_cache(ppd, &cd); + if (ret < 0) + goto bail; + + lenstr[0] = ' '; + lenstr[1] = '\0'; + if (QSFP_IS_CU(cd.tech)) + sprintf(lenstr, "%dM ", cd.len); + + sofar += scnprintf(buf + sofar, len - sofar, "PWR:%.3sW\n", pwr_codes + + (QSFP_PWR(cd.pwr) * 4)); + + sofar += scnprintf(buf + sofar, len - sofar, "TECH:%s%s\n", lenstr, + qib_qsfp_devtech[cd.tech >> 4]); + + sofar += scnprintf(buf + sofar, len - sofar, "Vendor:%.*s\n", + QSFP_VEND_LEN, cd.vendor); + + sofar += scnprintf(buf + sofar, len - sofar, "OUI:%06X\n", + QSFP_OUI(cd.oui)); + + sofar += scnprintf(buf + sofar, len - sofar, "Part#:%.*s\n", + QSFP_PN_LEN, cd.partnum); + sofar += scnprintf(buf + sofar, len - sofar, "Rev:%.*s\n", + QSFP_REV_LEN, cd.rev); + if (QSFP_IS_CU(cd.tech)) + sofar += scnprintf(buf + sofar, len - sofar, "Atten:%d, %d\n", + QSFP_ATTEN_SDR(cd.atten), + QSFP_ATTEN_DDR(cd.atten)); + sofar += scnprintf(buf + sofar, len - sofar, "Serial:%.*s\n", + QSFP_SN_LEN, cd.serial); + sofar += scnprintf(buf + sofar, len - sofar, "Date:%.*s\n", + QSFP_DATE_LEN, cd.date); + sofar += scnprintf(buf + sofar, len - sofar, "Lot:%.*s\n", + QSFP_LOT_LEN, cd.date); + + while (bidx < QSFP_DEFAULT_HDR_CNT) { + int iidx; + ret = qsfp_read(ppd, bidx, bin_buff, QSFP_DUMP_CHUNK); + if (ret < 0) + goto bail; + for (iidx = 0; iidx < ret; ++iidx) { + sofar += scnprintf(buf + sofar, len-sofar, " %02X", + bin_buff[iidx]); + } + sofar += scnprintf(buf + sofar, len - sofar, "\n"); + bidx += QSFP_DUMP_CHUNK; + } + ret = sofar; +bail: + return ret; +} diff --git a/drivers/infiniband/hw/qib/qib_qsfp.h b/drivers/infiniband/hw/qib/qib_qsfp.h new file mode 100644 index 00000000000..91908f533a2 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_qsfp.h @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +/* QSFP support common definitions, for ib_qib driver */ + +#define QSFP_DEV 0xA0 +#define QSFP_PWR_LAG_MSEC 2000 +#define QSFP_MODPRS_LAG_MSEC 20 + +/* + * Below are masks for various QSFP signals, for Port 1. + * Port2 equivalents are shifted by QSFP_GPIO_PORT2_SHIFT. + * _N means asserted low + */ +#define QSFP_GPIO_MOD_SEL_N (4) +#define QSFP_GPIO_MOD_PRS_N (8) +#define QSFP_GPIO_INT_N (0x10) +#define QSFP_GPIO_MOD_RST_N (0x20) +#define QSFP_GPIO_LP_MODE (0x40) +#define QSFP_GPIO_PORT2_SHIFT 5 + +#define QSFP_PAGESIZE 128 +/* Defined fields that QLogic requires of qualified cables */ +/* Byte 0 is Identifier, not checked */ +/* Byte 1 is reserved "status MSB" */ +/* Byte 2 is "status LSB" We only care that D2 "Flat Mem" is set. */ +/* + * Rest of first 128 not used, although 127 is reserved for page select + * if module is not "Flat memory". + */ +/* Byte 128 is Identifier: must be 0x0c for QSFP, or 0x0d for QSFP+ */ +#define QSFP_MOD_ID_OFFS 128 +/* + * Byte 129 is "Extended Identifier". We only care about D7,D6: Power class + * 0:1.5W, 1:2.0W, 2:2.5W, 3:3.5W + */ +#define QSFP_MOD_PWR_OFFS 129 +/* Byte 130 is Connector type. Not QLogic req'd */ +/* Bytes 131..138 are Transceiver types, bit maps for various tech, none IB */ +/* Byte 139 is encoding. code 0x01 is 8b10b. Not QLogic req'd */ +/* byte 140 is nominal bit-rate, in units of 100Mbits/sec Not QLogic req'd */ +/* Byte 141 is Extended Rate Select. Not QLogic req'd */ +/* Bytes 142..145 are lengths for various fiber types. Not QLogic req'd */ +/* Byte 146 is length for Copper. Units of 1 meter */ +#define QSFP_MOD_LEN_OFFS 146 +/* + * Byte 147 is Device technology. D0..3 not Qlogc req'd + * D4..7 select from 15 choices, translated by table: + */ +#define QSFP_MOD_TECH_OFFS 147 +extern const char *const qib_qsfp_devtech[16]; +/* Active Equalization includes fiber, copper full EQ, and copper near Eq */ +#define QSFP_IS_ACTIVE(tech) ((0xA2FF >> ((tech) >> 4)) & 1) +/* Active Equalization includes fiber, copper full EQ, and copper far Eq */ +#define QSFP_IS_ACTIVE_FAR(tech) ((0x32FF >> ((tech) >> 4)) & 1) +/* Attenuation should be valid for copper other than full/near Eq */ +#define QSFP_HAS_ATTEN(tech) ((0x4D00 >> ((tech) >> 4)) & 1) +/* Length is only valid if technology is "copper" */ +#define QSFP_IS_CU(tech) ((0xED00 >> ((tech) >> 4)) & 1) +#define QSFP_TECH_1490 9 + +#define QSFP_OUI(oui) (((unsigned)oui[0] << 16) | ((unsigned)oui[1] << 8) | \ + oui[2]) +#define QSFP_OUI_AMPHENOL 0x415048 +#define QSFP_OUI_FINISAR 0x009065 +#define QSFP_OUI_GORE 0x002177 + +/* Bytes 148..163 are Vendor Name, Left-justified Blank-filled */ +#define QSFP_VEND_OFFS 148 +#define QSFP_VEND_LEN 16 +/* Byte 164 is IB Extended tranceiver codes Bits D0..3 are SDR,DDR,QDR,EDR */ +#define QSFP_IBXCV_OFFS 164 +/* Bytes 165..167 are Vendor OUI number */ +#define QSFP_VOUI_OFFS 165 +#define QSFP_VOUI_LEN 3 +/* Bytes 168..183 are Vendor Part Number, string */ +#define QSFP_PN_OFFS 168 +#define QSFP_PN_LEN 16 +/* Bytes 184,185 are Vendor Rev. Left Justified, Blank-filled */ +#define QSFP_REV_OFFS 184 +#define QSFP_REV_LEN 2 +/* + * Bytes 186,187 are Wavelength, if Optical. Not Qlogic req'd + * If copper, they are attenuation in dB: + * Byte 186 is at 2.5Gb/sec (SDR), Byte 187 at 5.0Gb/sec (DDR) + */ +#define QSFP_ATTEN_OFFS 186 +#define QSFP_ATTEN_LEN 2 +/* Bytes 188,189 are Wavelength tolerance, not QLogic req'd */ +/* Byte 190 is Max Case Temp. Not QLogic req'd */ +/* Byte 191 is LSB of sum of bytes 128..190. Not QLogic req'd */ +#define QSFP_CC_OFFS 191 +/* Bytes 192..195 are Options implemented in qsfp. Not Qlogic req'd */ +/* Bytes 196..211 are Serial Number, String */ +#define QSFP_SN_OFFS 196 +#define QSFP_SN_LEN 16 +/* Bytes 212..219 are date-code YYMMDD (MM==1 for Jan) */ +#define QSFP_DATE_OFFS 212 +#define QSFP_DATE_LEN 6 +/* Bytes 218,219 are optional lot-code, string */ +#define QSFP_LOT_OFFS 218 +#define QSFP_LOT_LEN 2 +/* Bytes 220, 221 indicate monitoring options, Not QLogic req'd */ +/* Byte 223 is LSB of sum of bytes 192..222 */ +#define QSFP_CC_EXT_OFFS 223 + +/* + * struct qib_qsfp_data encapsulates state of QSFP device for one port. + * it will be part of port-chip-specific data if a board supports QSFP. + * + * Since multiple board-types use QSFP, and their pport_data structs + * differ (in the chip-specific section), we need a pointer to its head. + * + * Avoiding premature optimization, we will have one work_struct per port, + * and let the (increasingly inaccurately named) eep_lock arbitrate + * access to common resources. + * + */ + +/* + * Hold the parts of the onboard EEPROM that we care about, so we aren't + * coonstantly bit-boffing + */ +struct qib_qsfp_cache { + u8 id; /* must be 0x0C or 0x0D; 0 indicates invalid EEPROM read */ + u8 pwr; /* in D6,7 */ + u8 len; /* in meters, Cu only */ + u8 tech; + char vendor[QSFP_VEND_LEN]; + u8 xt_xcv; /* Ext. tranceiver codes, 4 lsbs are IB speed supported */ + u8 oui[QSFP_VOUI_LEN]; + u8 partnum[QSFP_PN_LEN]; + u8 rev[QSFP_REV_LEN]; + u8 atten[QSFP_ATTEN_LEN]; + u8 cks1; /* Checksum of bytes 128..190 */ + u8 serial[QSFP_SN_LEN]; + u8 date[QSFP_DATE_LEN]; + u8 lot[QSFP_LOT_LEN]; + u8 cks2; /* Checsum of bytes 192..222 */ +}; + +#define QSFP_PWR(pbyte) (((pbyte) >> 6) & 3) +#define QSFP_ATTEN_SDR(attenarray) (attenarray[0]) +#define QSFP_ATTEN_DDR(attenarray) (attenarray[1]) + +struct qib_qsfp_data { + /* Helps to find our way */ + struct qib_pportdata *ppd; + struct work_struct work; + struct qib_qsfp_cache cache; + unsigned long t_insert; + u8 modpresent; +}; + +extern int qib_refresh_qsfp_cache(struct qib_pportdata *ppd, + struct qib_qsfp_cache *cp); +extern int qib_qsfp_mod_present(struct qib_pportdata *ppd); +extern void qib_qsfp_init(struct qib_qsfp_data *qd, + void (*fevent)(struct work_struct *)); +extern void qib_qsfp_deinit(struct qib_qsfp_data *qd); diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c new file mode 100644 index 00000000000..2f2501890c4 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -0,0 +1,2290 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/io.h> + +#include "qib.h" + +/* cut down ridiculously long IB macro names */ +#define OP(x) IB_OPCODE_RC_##x + +static void rc_timeout(unsigned long arg); + +static u32 restart_sge(struct qib_sge_state *ss, struct qib_swqe *wqe, + u32 psn, u32 pmtu) +{ + u32 len; + + len = ((psn - wqe->psn) & QIB_PSN_MASK) * pmtu; + ss->sge = wqe->sg_list[0]; + ss->sg_list = wqe->sg_list + 1; + ss->num_sge = wqe->wr.num_sge; + ss->total_len = wqe->length; + qib_skip_sge(ss, len, 0); + return wqe->length - len; +} + +static void start_timer(struct qib_qp *qp) +{ + qp->s_flags |= QIB_S_TIMER; + qp->s_timer.function = rc_timeout; + /* 4.096 usec. * (1 << qp->timeout) */ + qp->s_timer.expires = jiffies + qp->timeout_jiffies; + add_timer(&qp->s_timer); +} + +/** + * qib_make_rc_ack - construct a response packet (ACK, NAK, or RDMA read) + * @dev: the device for this QP + * @qp: a pointer to the QP + * @ohdr: a pointer to the IB header being constructed + * @pmtu: the path MTU + * + * Return 1 if constructed; otherwise, return 0. + * Note that we are in the responder's side of the QP context. + * Note the QP s_lock must be held. + */ +static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp, + struct qib_other_headers *ohdr, u32 pmtu) +{ + struct qib_ack_entry *e; + u32 hwords; + u32 len; + u32 bth0; + u32 bth2; + + /* Don't send an ACK if we aren't supposed to. */ + if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) + goto bail; + + /* header size in 32-bit words LRH+BTH = (8+12)/4. */ + hwords = 5; + + switch (qp->s_ack_state) { + case OP(RDMA_READ_RESPONSE_LAST): + case OP(RDMA_READ_RESPONSE_ONLY): + e = &qp->s_ack_queue[qp->s_tail_ack_queue]; + if (e->rdma_sge.mr) { + qib_put_mr(e->rdma_sge.mr); + e->rdma_sge.mr = NULL; + } + /* FALLTHROUGH */ + case OP(ATOMIC_ACKNOWLEDGE): + /* + * We can increment the tail pointer now that the last + * response has been sent instead of only being + * constructed. + */ + if (++qp->s_tail_ack_queue > QIB_MAX_RDMA_ATOMIC) + qp->s_tail_ack_queue = 0; + /* FALLTHROUGH */ + case OP(SEND_ONLY): + case OP(ACKNOWLEDGE): + /* Check for no next entry in the queue. */ + if (qp->r_head_ack_queue == qp->s_tail_ack_queue) { + if (qp->s_flags & QIB_S_ACK_PENDING) + goto normal; + goto bail; + } + + e = &qp->s_ack_queue[qp->s_tail_ack_queue]; + if (e->opcode == OP(RDMA_READ_REQUEST)) { + /* + * If a RDMA read response is being resent and + * we haven't seen the duplicate request yet, + * then stop sending the remaining responses the + * responder has seen until the requester resends it. + */ + len = e->rdma_sge.sge_length; + if (len && !e->rdma_sge.mr) { + qp->s_tail_ack_queue = qp->r_head_ack_queue; + goto bail; + } + /* Copy SGE state in case we need to resend */ + qp->s_rdma_mr = e->rdma_sge.mr; + if (qp->s_rdma_mr) + qib_get_mr(qp->s_rdma_mr); + qp->s_ack_rdma_sge.sge = e->rdma_sge; + qp->s_ack_rdma_sge.num_sge = 1; + qp->s_cur_sge = &qp->s_ack_rdma_sge; + if (len > pmtu) { + len = pmtu; + qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST); + } else { + qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY); + e->sent = 1; + } + ohdr->u.aeth = qib_compute_aeth(qp); + hwords++; + qp->s_ack_rdma_psn = e->psn; + bth2 = qp->s_ack_rdma_psn++ & QIB_PSN_MASK; + } else { + /* COMPARE_SWAP or FETCH_ADD */ + qp->s_cur_sge = NULL; + len = 0; + qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE); + ohdr->u.at.aeth = qib_compute_aeth(qp); + ohdr->u.at.atomic_ack_eth[0] = + cpu_to_be32(e->atomic_data >> 32); + ohdr->u.at.atomic_ack_eth[1] = + cpu_to_be32(e->atomic_data); + hwords += sizeof(ohdr->u.at) / sizeof(u32); + bth2 = e->psn & QIB_PSN_MASK; + e->sent = 1; + } + bth0 = qp->s_ack_state << 24; + break; + + case OP(RDMA_READ_RESPONSE_FIRST): + qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE); + /* FALLTHROUGH */ + case OP(RDMA_READ_RESPONSE_MIDDLE): + qp->s_cur_sge = &qp->s_ack_rdma_sge; + qp->s_rdma_mr = qp->s_ack_rdma_sge.sge.mr; + if (qp->s_rdma_mr) + qib_get_mr(qp->s_rdma_mr); + len = qp->s_ack_rdma_sge.sge.sge_length; + if (len > pmtu) + len = pmtu; + else { + ohdr->u.aeth = qib_compute_aeth(qp); + hwords++; + qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); + e = &qp->s_ack_queue[qp->s_tail_ack_queue]; + e->sent = 1; + } + bth0 = qp->s_ack_state << 24; + bth2 = qp->s_ack_rdma_psn++ & QIB_PSN_MASK; + break; + + default: +normal: + /* + * Send a regular ACK. + * Set the s_ack_state so we wait until after sending + * the ACK before setting s_ack_state to ACKNOWLEDGE + * (see above). + */ + qp->s_ack_state = OP(SEND_ONLY); + qp->s_flags &= ~QIB_S_ACK_PENDING; + qp->s_cur_sge = NULL; + if (qp->s_nak_state) + ohdr->u.aeth = + cpu_to_be32((qp->r_msn & QIB_MSN_MASK) | + (qp->s_nak_state << + QIB_AETH_CREDIT_SHIFT)); + else + ohdr->u.aeth = qib_compute_aeth(qp); + hwords++; + len = 0; + bth0 = OP(ACKNOWLEDGE) << 24; + bth2 = qp->s_ack_psn & QIB_PSN_MASK; + } + qp->s_rdma_ack_cnt++; + qp->s_hdrwords = hwords; + qp->s_cur_size = len; + qib_make_ruc_header(qp, ohdr, bth0, bth2); + return 1; + +bail: + qp->s_ack_state = OP(ACKNOWLEDGE); + qp->s_flags &= ~(QIB_S_RESP_PENDING | QIB_S_ACK_PENDING); + return 0; +} + +/** + * qib_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC) + * @qp: a pointer to the QP + * + * Return 1 if constructed; otherwise, return 0. + */ +int qib_make_rc_req(struct qib_qp *qp) +{ + struct qib_ibdev *dev = to_idev(qp->ibqp.device); + struct qib_other_headers *ohdr; + struct qib_sge_state *ss; + struct qib_swqe *wqe; + u32 hwords; + u32 len; + u32 bth0; + u32 bth2; + u32 pmtu = qp->pmtu; + char newreq; + unsigned long flags; + int ret = 0; + int delta; + + ohdr = &qp->s_hdr->u.oth; + if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) + ohdr = &qp->s_hdr->u.l.oth; + + /* + * The lock is needed to synchronize between the sending tasklet, + * the receive interrupt handler, and timeout resends. + */ + spin_lock_irqsave(&qp->s_lock, flags); + + /* Sending responses has higher priority over sending requests. */ + if ((qp->s_flags & QIB_S_RESP_PENDING) && + qib_make_rc_ack(dev, qp, ohdr, pmtu)) + goto done; + + if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_SEND_OK)) { + if (!(ib_qib_state_ops[qp->state] & QIB_FLUSH_SEND)) + goto bail; + /* We are in the error state, flush the work request. */ + if (qp->s_last == qp->s_head) + goto bail; + /* If DMAs are in progress, we can't flush immediately. */ + if (atomic_read(&qp->s_dma_busy)) { + qp->s_flags |= QIB_S_WAIT_DMA; + goto bail; + } + wqe = get_swqe_ptr(qp, qp->s_last); + qib_send_complete(qp, wqe, qp->s_last != qp->s_acked ? + IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR); + /* will get called again */ + goto done; + } + + if (qp->s_flags & (QIB_S_WAIT_RNR | QIB_S_WAIT_ACK)) + goto bail; + + if (qib_cmp24(qp->s_psn, qp->s_sending_hpsn) <= 0) { + if (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) { + qp->s_flags |= QIB_S_WAIT_PSN; + goto bail; + } + qp->s_sending_psn = qp->s_psn; + qp->s_sending_hpsn = qp->s_psn - 1; + } + + /* header size in 32-bit words LRH+BTH = (8+12)/4. */ + hwords = 5; + bth0 = 0; + + /* Send a request. */ + wqe = get_swqe_ptr(qp, qp->s_cur); + switch (qp->s_state) { + default: + if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_NEXT_SEND_OK)) + goto bail; + /* + * Resend an old request or start a new one. + * + * We keep track of the current SWQE so that + * we don't reset the "furthest progress" state + * if we need to back up. + */ + newreq = 0; + if (qp->s_cur == qp->s_tail) { + /* Check if send work queue is empty. */ + if (qp->s_tail == qp->s_head) + goto bail; + /* + * If a fence is requested, wait for previous + * RDMA read and atomic operations to finish. + */ + if ((wqe->wr.send_flags & IB_SEND_FENCE) && + qp->s_num_rd_atomic) { + qp->s_flags |= QIB_S_WAIT_FENCE; + goto bail; + } + wqe->psn = qp->s_next_psn; + newreq = 1; + } + /* + * Note that we have to be careful not to modify the + * original work request since we may need to resend + * it. + */ + len = wqe->length; + ss = &qp->s_sge; + bth2 = qp->s_psn & QIB_PSN_MASK; + switch (wqe->wr.opcode) { + case IB_WR_SEND: + case IB_WR_SEND_WITH_IMM: + /* If no credit, return. */ + if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT) && + qib_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) { + qp->s_flags |= QIB_S_WAIT_SSN_CREDIT; + goto bail; + } + wqe->lpsn = wqe->psn; + if (len > pmtu) { + wqe->lpsn += (len - 1) / pmtu; + qp->s_state = OP(SEND_FIRST); + len = pmtu; + break; + } + if (wqe->wr.opcode == IB_WR_SEND) + qp->s_state = OP(SEND_ONLY); + else { + qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE); + /* Immediate data comes after the BTH */ + ohdr->u.imm_data = wqe->wr.ex.imm_data; + hwords += 1; + } + if (wqe->wr.send_flags & IB_SEND_SOLICITED) + bth0 |= IB_BTH_SOLICITED; + bth2 |= IB_BTH_REQ_ACK; + if (++qp->s_cur == qp->s_size) + qp->s_cur = 0; + break; + + case IB_WR_RDMA_WRITE: + if (newreq && !(qp->s_flags & QIB_S_UNLIMITED_CREDIT)) + qp->s_lsn++; + /* FALLTHROUGH */ + case IB_WR_RDMA_WRITE_WITH_IMM: + /* If no credit, return. */ + if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT) && + qib_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) { + qp->s_flags |= QIB_S_WAIT_SSN_CREDIT; + goto bail; + } + ohdr->u.rc.reth.vaddr = + cpu_to_be64(wqe->wr.wr.rdma.remote_addr); + ohdr->u.rc.reth.rkey = + cpu_to_be32(wqe->wr.wr.rdma.rkey); + ohdr->u.rc.reth.length = cpu_to_be32(len); + hwords += sizeof(struct ib_reth) / sizeof(u32); + wqe->lpsn = wqe->psn; + if (len > pmtu) { + wqe->lpsn += (len - 1) / pmtu; + qp->s_state = OP(RDMA_WRITE_FIRST); + len = pmtu; + break; + } + if (wqe->wr.opcode == IB_WR_RDMA_WRITE) + qp->s_state = OP(RDMA_WRITE_ONLY); + else { + qp->s_state = + OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE); + /* Immediate data comes after RETH */ + ohdr->u.rc.imm_data = wqe->wr.ex.imm_data; + hwords += 1; + if (wqe->wr.send_flags & IB_SEND_SOLICITED) + bth0 |= IB_BTH_SOLICITED; + } + bth2 |= IB_BTH_REQ_ACK; + if (++qp->s_cur == qp->s_size) + qp->s_cur = 0; + break; + + case IB_WR_RDMA_READ: + /* + * Don't allow more operations to be started + * than the QP limits allow. + */ + if (newreq) { + if (qp->s_num_rd_atomic >= + qp->s_max_rd_atomic) { + qp->s_flags |= QIB_S_WAIT_RDMAR; + goto bail; + } + qp->s_num_rd_atomic++; + if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT)) + qp->s_lsn++; + /* + * Adjust s_next_psn to count the + * expected number of responses. + */ + if (len > pmtu) + qp->s_next_psn += (len - 1) / pmtu; + wqe->lpsn = qp->s_next_psn++; + } + ohdr->u.rc.reth.vaddr = + cpu_to_be64(wqe->wr.wr.rdma.remote_addr); + ohdr->u.rc.reth.rkey = + cpu_to_be32(wqe->wr.wr.rdma.rkey); + ohdr->u.rc.reth.length = cpu_to_be32(len); + qp->s_state = OP(RDMA_READ_REQUEST); + hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); + ss = NULL; + len = 0; + bth2 |= IB_BTH_REQ_ACK; + if (++qp->s_cur == qp->s_size) + qp->s_cur = 0; + break; + + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + /* + * Don't allow more operations to be started + * than the QP limits allow. + */ + if (newreq) { + if (qp->s_num_rd_atomic >= + qp->s_max_rd_atomic) { + qp->s_flags |= QIB_S_WAIT_RDMAR; + goto bail; + } + qp->s_num_rd_atomic++; + if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT)) + qp->s_lsn++; + wqe->lpsn = wqe->psn; + } + if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { + qp->s_state = OP(COMPARE_SWAP); + ohdr->u.atomic_eth.swap_data = cpu_to_be64( + wqe->wr.wr.atomic.swap); + ohdr->u.atomic_eth.compare_data = cpu_to_be64( + wqe->wr.wr.atomic.compare_add); + } else { + qp->s_state = OP(FETCH_ADD); + ohdr->u.atomic_eth.swap_data = cpu_to_be64( + wqe->wr.wr.atomic.compare_add); + ohdr->u.atomic_eth.compare_data = 0; + } + ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32( + wqe->wr.wr.atomic.remote_addr >> 32); + ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32( + wqe->wr.wr.atomic.remote_addr); + ohdr->u.atomic_eth.rkey = cpu_to_be32( + wqe->wr.wr.atomic.rkey); + hwords += sizeof(struct ib_atomic_eth) / sizeof(u32); + ss = NULL; + len = 0; + bth2 |= IB_BTH_REQ_ACK; + if (++qp->s_cur == qp->s_size) + qp->s_cur = 0; + break; + + default: + goto bail; + } + qp->s_sge.sge = wqe->sg_list[0]; + qp->s_sge.sg_list = wqe->sg_list + 1; + qp->s_sge.num_sge = wqe->wr.num_sge; + qp->s_sge.total_len = wqe->length; + qp->s_len = wqe->length; + if (newreq) { + qp->s_tail++; + if (qp->s_tail >= qp->s_size) + qp->s_tail = 0; + } + if (wqe->wr.opcode == IB_WR_RDMA_READ) + qp->s_psn = wqe->lpsn + 1; + else { + qp->s_psn++; + if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0) + qp->s_next_psn = qp->s_psn; + } + break; + + case OP(RDMA_READ_RESPONSE_FIRST): + /* + * qp->s_state is normally set to the opcode of the + * last packet constructed for new requests and therefore + * is never set to RDMA read response. + * RDMA_READ_RESPONSE_FIRST is used by the ACK processing + * thread to indicate a SEND needs to be restarted from an + * earlier PSN without interferring with the sending thread. + * See qib_restart_rc(). + */ + qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu); + /* FALLTHROUGH */ + case OP(SEND_FIRST): + qp->s_state = OP(SEND_MIDDLE); + /* FALLTHROUGH */ + case OP(SEND_MIDDLE): + bth2 = qp->s_psn++ & QIB_PSN_MASK; + if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0) + qp->s_next_psn = qp->s_psn; + ss = &qp->s_sge; + len = qp->s_len; + if (len > pmtu) { + len = pmtu; + break; + } + if (wqe->wr.opcode == IB_WR_SEND) + qp->s_state = OP(SEND_LAST); + else { + qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE); + /* Immediate data comes after the BTH */ + ohdr->u.imm_data = wqe->wr.ex.imm_data; + hwords += 1; + } + if (wqe->wr.send_flags & IB_SEND_SOLICITED) + bth0 |= IB_BTH_SOLICITED; + bth2 |= IB_BTH_REQ_ACK; + qp->s_cur++; + if (qp->s_cur >= qp->s_size) + qp->s_cur = 0; + break; + + case OP(RDMA_READ_RESPONSE_LAST): + /* + * qp->s_state is normally set to the opcode of the + * last packet constructed for new requests and therefore + * is never set to RDMA read response. + * RDMA_READ_RESPONSE_LAST is used by the ACK processing + * thread to indicate a RDMA write needs to be restarted from + * an earlier PSN without interferring with the sending thread. + * See qib_restart_rc(). + */ + qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu); + /* FALLTHROUGH */ + case OP(RDMA_WRITE_FIRST): + qp->s_state = OP(RDMA_WRITE_MIDDLE); + /* FALLTHROUGH */ + case OP(RDMA_WRITE_MIDDLE): + bth2 = qp->s_psn++ & QIB_PSN_MASK; + if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0) + qp->s_next_psn = qp->s_psn; + ss = &qp->s_sge; + len = qp->s_len; + if (len > pmtu) { + len = pmtu; + break; + } + if (wqe->wr.opcode == IB_WR_RDMA_WRITE) + qp->s_state = OP(RDMA_WRITE_LAST); + else { + qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE); + /* Immediate data comes after the BTH */ + ohdr->u.imm_data = wqe->wr.ex.imm_data; + hwords += 1; + if (wqe->wr.send_flags & IB_SEND_SOLICITED) + bth0 |= IB_BTH_SOLICITED; + } + bth2 |= IB_BTH_REQ_ACK; + qp->s_cur++; + if (qp->s_cur >= qp->s_size) + qp->s_cur = 0; + break; + + case OP(RDMA_READ_RESPONSE_MIDDLE): + /* + * qp->s_state is normally set to the opcode of the + * last packet constructed for new requests and therefore + * is never set to RDMA read response. + * RDMA_READ_RESPONSE_MIDDLE is used by the ACK processing + * thread to indicate a RDMA read needs to be restarted from + * an earlier PSN without interferring with the sending thread. + * See qib_restart_rc(). + */ + len = ((qp->s_psn - wqe->psn) & QIB_PSN_MASK) * pmtu; + ohdr->u.rc.reth.vaddr = + cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len); + ohdr->u.rc.reth.rkey = + cpu_to_be32(wqe->wr.wr.rdma.rkey); + ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len); + qp->s_state = OP(RDMA_READ_REQUEST); + hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); + bth2 = (qp->s_psn & QIB_PSN_MASK) | IB_BTH_REQ_ACK; + qp->s_psn = wqe->lpsn + 1; + ss = NULL; + len = 0; + qp->s_cur++; + if (qp->s_cur == qp->s_size) + qp->s_cur = 0; + break; + } + qp->s_sending_hpsn = bth2; + delta = (((int) bth2 - (int) wqe->psn) << 8) >> 8; + if (delta && delta % QIB_PSN_CREDIT == 0) + bth2 |= IB_BTH_REQ_ACK; + if (qp->s_flags & QIB_S_SEND_ONE) { + qp->s_flags &= ~QIB_S_SEND_ONE; + qp->s_flags |= QIB_S_WAIT_ACK; + bth2 |= IB_BTH_REQ_ACK; + } + qp->s_len -= len; + qp->s_hdrwords = hwords; + qp->s_cur_sge = ss; + qp->s_cur_size = len; + qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), bth2); +done: + ret = 1; + goto unlock; + +bail: + qp->s_flags &= ~QIB_S_BUSY; +unlock: + spin_unlock_irqrestore(&qp->s_lock, flags); + return ret; +} + +/** + * qib_send_rc_ack - Construct an ACK packet and send it + * @qp: a pointer to the QP + * + * This is called from qib_rc_rcv() and qib_kreceive(). + * Note that RDMA reads and atomics are handled in the + * send side QP state and tasklet. + */ +void qib_send_rc_ack(struct qib_qp *qp) +{ + struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device); + struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + u64 pbc; + u16 lrh0; + u32 bth0; + u32 hwords; + u32 pbufn; + u32 __iomem *piobuf; + struct qib_ib_header hdr; + struct qib_other_headers *ohdr; + u32 control; + unsigned long flags; + + spin_lock_irqsave(&qp->s_lock, flags); + + if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) + goto unlock; + + /* Don't send ACK or NAK if a RDMA read or atomic is pending. */ + if ((qp->s_flags & QIB_S_RESP_PENDING) || qp->s_rdma_ack_cnt) + goto queue_ack; + + /* Construct the header with s_lock held so APM doesn't change it. */ + ohdr = &hdr.u.oth; + lrh0 = QIB_LRH_BTH; + /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4. */ + hwords = 6; + if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { + hwords += qib_make_grh(ibp, &hdr.u.l.grh, + &qp->remote_ah_attr.grh, hwords, 0); + ohdr = &hdr.u.l.oth; + lrh0 = QIB_LRH_GRH; + } + /* read pkey_index w/o lock (its atomic) */ + bth0 = qib_get_pkey(ibp, qp->s_pkey_index) | (OP(ACKNOWLEDGE) << 24); + if (qp->s_mig_state == IB_MIG_MIGRATED) + bth0 |= IB_BTH_MIG_REQ; + if (qp->r_nak_state) + ohdr->u.aeth = cpu_to_be32((qp->r_msn & QIB_MSN_MASK) | + (qp->r_nak_state << + QIB_AETH_CREDIT_SHIFT)); + else + ohdr->u.aeth = qib_compute_aeth(qp); + lrh0 |= ibp->sl_to_vl[qp->remote_ah_attr.sl] << 12 | + qp->remote_ah_attr.sl << 4; + hdr.lrh[0] = cpu_to_be16(lrh0); + hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); + hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC); + hdr.lrh[3] = cpu_to_be16(ppd->lid | qp->remote_ah_attr.src_path_bits); + ohdr->bth[0] = cpu_to_be32(bth0); + ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); + ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & QIB_PSN_MASK); + + spin_unlock_irqrestore(&qp->s_lock, flags); + + /* Don't try to send ACKs if the link isn't ACTIVE */ + if (!(ppd->lflags & QIBL_LINKACTIVE)) + goto done; + + control = dd->f_setpbc_control(ppd, hwords + SIZE_OF_CRC, + qp->s_srate, lrh0 >> 12); + /* length is + 1 for the control dword */ + pbc = ((u64) control << 32) | (hwords + 1); + + piobuf = dd->f_getsendbuf(ppd, pbc, &pbufn); + if (!piobuf) { + /* + * We are out of PIO buffers at the moment. + * Pass responsibility for sending the ACK to the + * send tasklet so that when a PIO buffer becomes + * available, the ACK is sent ahead of other outgoing + * packets. + */ + spin_lock_irqsave(&qp->s_lock, flags); + goto queue_ack; + } + + /* + * Write the pbc. + * We have to flush after the PBC for correctness + * on some cpus or WC buffer can be written out of order. + */ + writeq(pbc, piobuf); + + if (dd->flags & QIB_PIO_FLUSH_WC) { + u32 *hdrp = (u32 *) &hdr; + + qib_flush_wc(); + qib_pio_copy(piobuf + 2, hdrp, hwords - 1); + qib_flush_wc(); + __raw_writel(hdrp[hwords - 1], piobuf + hwords + 1); + } else + qib_pio_copy(piobuf + 2, (u32 *) &hdr, hwords); + + if (dd->flags & QIB_USE_SPCL_TRIG) { + u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023; + + qib_flush_wc(); + __raw_writel(0xaebecede, piobuf + spcl_off); + } + + qib_flush_wc(); + qib_sendbuf_done(dd, pbufn); + + this_cpu_inc(ibp->pmastats->n_unicast_xmit); + goto done; + +queue_ack: + if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) { + ibp->n_rc_qacks++; + qp->s_flags |= QIB_S_ACK_PENDING | QIB_S_RESP_PENDING; + qp->s_nak_state = qp->r_nak_state; + qp->s_ack_psn = qp->r_ack_psn; + + /* Schedule the send tasklet. */ + qib_schedule_send(qp); + } +unlock: + spin_unlock_irqrestore(&qp->s_lock, flags); +done: + return; +} + +/** + * reset_psn - reset the QP state to send starting from PSN + * @qp: the QP + * @psn: the packet sequence number to restart at + * + * This is called from qib_rc_rcv() to process an incoming RC ACK + * for the given QP. + * Called at interrupt level with the QP s_lock held. + */ +static void reset_psn(struct qib_qp *qp, u32 psn) +{ + u32 n = qp->s_acked; + struct qib_swqe *wqe = get_swqe_ptr(qp, n); + u32 opcode; + + qp->s_cur = n; + + /* + * If we are starting the request from the beginning, + * let the normal send code handle initialization. + */ + if (qib_cmp24(psn, wqe->psn) <= 0) { + qp->s_state = OP(SEND_LAST); + goto done; + } + + /* Find the work request opcode corresponding to the given PSN. */ + opcode = wqe->wr.opcode; + for (;;) { + int diff; + + if (++n == qp->s_size) + n = 0; + if (n == qp->s_tail) + break; + wqe = get_swqe_ptr(qp, n); + diff = qib_cmp24(psn, wqe->psn); + if (diff < 0) + break; + qp->s_cur = n; + /* + * If we are starting the request from the beginning, + * let the normal send code handle initialization. + */ + if (diff == 0) { + qp->s_state = OP(SEND_LAST); + goto done; + } + opcode = wqe->wr.opcode; + } + + /* + * Set the state to restart in the middle of a request. + * Don't change the s_sge, s_cur_sge, or s_cur_size. + * See qib_make_rc_req(). + */ + switch (opcode) { + case IB_WR_SEND: + case IB_WR_SEND_WITH_IMM: + qp->s_state = OP(RDMA_READ_RESPONSE_FIRST); + break; + + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + qp->s_state = OP(RDMA_READ_RESPONSE_LAST); + break; + + case IB_WR_RDMA_READ: + qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE); + break; + + default: + /* + * This case shouldn't happen since its only + * one PSN per req. + */ + qp->s_state = OP(SEND_LAST); + } +done: + qp->s_psn = psn; + /* + * Set QIB_S_WAIT_PSN as qib_rc_complete() may start the timer + * asynchronously before the send tasklet can get scheduled. + * Doing it in qib_make_rc_req() is too late. + */ + if ((qib_cmp24(qp->s_psn, qp->s_sending_hpsn) <= 0) && + (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)) + qp->s_flags |= QIB_S_WAIT_PSN; +} + +/* + * Back up requester to resend the last un-ACKed request. + * The QP r_lock and s_lock should be held and interrupts disabled. + */ +static void qib_restart_rc(struct qib_qp *qp, u32 psn, int wait) +{ + struct qib_swqe *wqe = get_swqe_ptr(qp, qp->s_acked); + struct qib_ibport *ibp; + + if (qp->s_retry == 0) { + if (qp->s_mig_state == IB_MIG_ARMED) { + qib_migrate_qp(qp); + qp->s_retry = qp->s_retry_cnt; + } else if (qp->s_last == qp->s_acked) { + qib_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR); + qib_error_qp(qp, IB_WC_WR_FLUSH_ERR); + return; + } else /* XXX need to handle delayed completion */ + return; + } else + qp->s_retry--; + + ibp = to_iport(qp->ibqp.device, qp->port_num); + if (wqe->wr.opcode == IB_WR_RDMA_READ) + ibp->n_rc_resends++; + else + ibp->n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK; + + qp->s_flags &= ~(QIB_S_WAIT_FENCE | QIB_S_WAIT_RDMAR | + QIB_S_WAIT_SSN_CREDIT | QIB_S_WAIT_PSN | + QIB_S_WAIT_ACK); + if (wait) + qp->s_flags |= QIB_S_SEND_ONE; + reset_psn(qp, psn); +} + +/* + * This is called from s_timer for missing responses. + */ +static void rc_timeout(unsigned long arg) +{ + struct qib_qp *qp = (struct qib_qp *)arg; + struct qib_ibport *ibp; + unsigned long flags; + + spin_lock_irqsave(&qp->r_lock, flags); + spin_lock(&qp->s_lock); + if (qp->s_flags & QIB_S_TIMER) { + ibp = to_iport(qp->ibqp.device, qp->port_num); + ibp->n_rc_timeouts++; + qp->s_flags &= ~QIB_S_TIMER; + del_timer(&qp->s_timer); + qib_restart_rc(qp, qp->s_last_psn + 1, 1); + qib_schedule_send(qp); + } + spin_unlock(&qp->s_lock); + spin_unlock_irqrestore(&qp->r_lock, flags); +} + +/* + * This is called from s_timer for RNR timeouts. + */ +void qib_rc_rnr_retry(unsigned long arg) +{ + struct qib_qp *qp = (struct qib_qp *)arg; + unsigned long flags; + + spin_lock_irqsave(&qp->s_lock, flags); + if (qp->s_flags & QIB_S_WAIT_RNR) { + qp->s_flags &= ~QIB_S_WAIT_RNR; + del_timer(&qp->s_timer); + qib_schedule_send(qp); + } + spin_unlock_irqrestore(&qp->s_lock, flags); +} + +/* + * Set qp->s_sending_psn to the next PSN after the given one. + * This would be psn+1 except when RDMA reads are present. + */ +static void reset_sending_psn(struct qib_qp *qp, u32 psn) +{ + struct qib_swqe *wqe; + u32 n = qp->s_last; + + /* Find the work request corresponding to the given PSN. */ + for (;;) { + wqe = get_swqe_ptr(qp, n); + if (qib_cmp24(psn, wqe->lpsn) <= 0) { + if (wqe->wr.opcode == IB_WR_RDMA_READ) + qp->s_sending_psn = wqe->lpsn + 1; + else + qp->s_sending_psn = psn + 1; + break; + } + if (++n == qp->s_size) + n = 0; + if (n == qp->s_tail) + break; + } +} + +/* + * This should be called with the QP s_lock held and interrupts disabled. + */ +void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr) +{ + struct qib_other_headers *ohdr; + struct qib_swqe *wqe; + struct ib_wc wc; + unsigned i; + u32 opcode; + u32 psn; + + if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_OR_FLUSH_SEND)) + return; + + /* Find out where the BTH is */ + if ((be16_to_cpu(hdr->lrh[0]) & 3) == QIB_LRH_BTH) + ohdr = &hdr->u.oth; + else + ohdr = &hdr->u.l.oth; + + opcode = be32_to_cpu(ohdr->bth[0]) >> 24; + if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) && + opcode <= OP(ATOMIC_ACKNOWLEDGE)) { + WARN_ON(!qp->s_rdma_ack_cnt); + qp->s_rdma_ack_cnt--; + return; + } + + psn = be32_to_cpu(ohdr->bth[2]); + reset_sending_psn(qp, psn); + + /* + * Start timer after a packet requesting an ACK has been sent and + * there are still requests that haven't been acked. + */ + if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail && + !(qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR | QIB_S_WAIT_PSN)) && + (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) + start_timer(qp); + + while (qp->s_last != qp->s_acked) { + wqe = get_swqe_ptr(qp, qp->s_last); + if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) >= 0 && + qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) + break; + for (i = 0; i < wqe->wr.num_sge; i++) { + struct qib_sge *sge = &wqe->sg_list[i]; + + qib_put_mr(sge->mr); + } + /* Post a send completion queue entry if requested. */ + if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) || + (wqe->wr.send_flags & IB_SEND_SIGNALED)) { + memset(&wc, 0, sizeof wc); + wc.wr_id = wqe->wr.wr_id; + wc.status = IB_WC_SUCCESS; + wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode]; + wc.byte_len = wqe->length; + wc.qp = &qp->ibqp; + qib_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0); + } + if (++qp->s_last >= qp->s_size) + qp->s_last = 0; + } + /* + * If we were waiting for sends to complete before resending, + * and they are now complete, restart sending. + */ + if (qp->s_flags & QIB_S_WAIT_PSN && + qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) { + qp->s_flags &= ~QIB_S_WAIT_PSN; + qp->s_sending_psn = qp->s_psn; + qp->s_sending_hpsn = qp->s_psn - 1; + qib_schedule_send(qp); + } +} + +static inline void update_last_psn(struct qib_qp *qp, u32 psn) +{ + qp->s_last_psn = psn; +} + +/* + * Generate a SWQE completion. + * This is similar to qib_send_complete but has to check to be sure + * that the SGEs are not being referenced if the SWQE is being resent. + */ +static struct qib_swqe *do_rc_completion(struct qib_qp *qp, + struct qib_swqe *wqe, + struct qib_ibport *ibp) +{ + struct ib_wc wc; + unsigned i; + + /* + * Don't decrement refcount and don't generate a + * completion if the SWQE is being resent until the send + * is finished. + */ + if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) < 0 || + qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) { + for (i = 0; i < wqe->wr.num_sge; i++) { + struct qib_sge *sge = &wqe->sg_list[i]; + + qib_put_mr(sge->mr); + } + /* Post a send completion queue entry if requested. */ + if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) || + (wqe->wr.send_flags & IB_SEND_SIGNALED)) { + memset(&wc, 0, sizeof wc); + wc.wr_id = wqe->wr.wr_id; + wc.status = IB_WC_SUCCESS; + wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode]; + wc.byte_len = wqe->length; + wc.qp = &qp->ibqp; + qib_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0); + } + if (++qp->s_last >= qp->s_size) + qp->s_last = 0; + } else + ibp->n_rc_delayed_comp++; + + qp->s_retry = qp->s_retry_cnt; + update_last_psn(qp, wqe->lpsn); + + /* + * If we are completing a request which is in the process of + * being resent, we can stop resending it since we know the + * responder has already seen it. + */ + if (qp->s_acked == qp->s_cur) { + if (++qp->s_cur >= qp->s_size) + qp->s_cur = 0; + qp->s_acked = qp->s_cur; + wqe = get_swqe_ptr(qp, qp->s_cur); + if (qp->s_acked != qp->s_tail) { + qp->s_state = OP(SEND_LAST); + qp->s_psn = wqe->psn; + } + } else { + if (++qp->s_acked >= qp->s_size) + qp->s_acked = 0; + if (qp->state == IB_QPS_SQD && qp->s_acked == qp->s_cur) + qp->s_draining = 0; + wqe = get_swqe_ptr(qp, qp->s_acked); + } + return wqe; +} + +/** + * do_rc_ack - process an incoming RC ACK + * @qp: the QP the ACK came in on + * @psn: the packet sequence number of the ACK + * @opcode: the opcode of the request that resulted in the ACK + * + * This is called from qib_rc_rcv_resp() to process an incoming RC ACK + * for the given QP. + * Called at interrupt level with the QP s_lock held. + * Returns 1 if OK, 0 if current operation should be aborted (NAK). + */ +static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode, + u64 val, struct qib_ctxtdata *rcd) +{ + struct qib_ibport *ibp; + enum ib_wc_status status; + struct qib_swqe *wqe; + int ret = 0; + u32 ack_psn; + int diff; + + /* Remove QP from retry timer */ + if (qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR)) { + qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR); + del_timer(&qp->s_timer); + } + + /* + * Note that NAKs implicitly ACK outstanding SEND and RDMA write + * requests and implicitly NAK RDMA read and atomic requests issued + * before the NAK'ed request. The MSN won't include the NAK'ed + * request but will include an ACK'ed request(s). + */ + ack_psn = psn; + if (aeth >> 29) + ack_psn--; + wqe = get_swqe_ptr(qp, qp->s_acked); + ibp = to_iport(qp->ibqp.device, qp->port_num); + + /* + * The MSN might be for a later WQE than the PSN indicates so + * only complete WQEs that the PSN finishes. + */ + while ((diff = qib_cmp24(ack_psn, wqe->lpsn)) >= 0) { + /* + * RDMA_READ_RESPONSE_ONLY is a special case since + * we want to generate completion events for everything + * before the RDMA read, copy the data, then generate + * the completion for the read. + */ + if (wqe->wr.opcode == IB_WR_RDMA_READ && + opcode == OP(RDMA_READ_RESPONSE_ONLY) && + diff == 0) { + ret = 1; + goto bail; + } + /* + * If this request is a RDMA read or atomic, and the ACK is + * for a later operation, this ACK NAKs the RDMA read or + * atomic. In other words, only a RDMA_READ_LAST or ONLY + * can ACK a RDMA read and likewise for atomic ops. Note + * that the NAK case can only happen if relaxed ordering is + * used and requests are sent after an RDMA read or atomic + * is sent but before the response is received. + */ + if ((wqe->wr.opcode == IB_WR_RDMA_READ && + (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) || + ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || + wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) && + (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) { + /* Retry this request. */ + if (!(qp->r_flags & QIB_R_RDMAR_SEQ)) { + qp->r_flags |= QIB_R_RDMAR_SEQ; + qib_restart_rc(qp, qp->s_last_psn + 1, 0); + if (list_empty(&qp->rspwait)) { + qp->r_flags |= QIB_R_RSP_SEND; + atomic_inc(&qp->refcount); + list_add_tail(&qp->rspwait, + &rcd->qp_wait_list); + } + } + /* + * No need to process the ACK/NAK since we are + * restarting an earlier request. + */ + goto bail; + } + if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || + wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) { + u64 *vaddr = wqe->sg_list[0].vaddr; + *vaddr = val; + } + if (qp->s_num_rd_atomic && + (wqe->wr.opcode == IB_WR_RDMA_READ || + wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || + wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) { + qp->s_num_rd_atomic--; + /* Restart sending task if fence is complete */ + if ((qp->s_flags & QIB_S_WAIT_FENCE) && + !qp->s_num_rd_atomic) { + qp->s_flags &= ~(QIB_S_WAIT_FENCE | + QIB_S_WAIT_ACK); + qib_schedule_send(qp); + } else if (qp->s_flags & QIB_S_WAIT_RDMAR) { + qp->s_flags &= ~(QIB_S_WAIT_RDMAR | + QIB_S_WAIT_ACK); + qib_schedule_send(qp); + } + } + wqe = do_rc_completion(qp, wqe, ibp); + if (qp->s_acked == qp->s_tail) + break; + } + + switch (aeth >> 29) { + case 0: /* ACK */ + ibp->n_rc_acks++; + if (qp->s_acked != qp->s_tail) { + /* + * We are expecting more ACKs so + * reset the retransmit timer. + */ + start_timer(qp); + /* + * We can stop resending the earlier packets and + * continue with the next packet the receiver wants. + */ + if (qib_cmp24(qp->s_psn, psn) <= 0) + reset_psn(qp, psn + 1); + } else if (qib_cmp24(qp->s_psn, psn) <= 0) { + qp->s_state = OP(SEND_LAST); + qp->s_psn = psn + 1; + } + if (qp->s_flags & QIB_S_WAIT_ACK) { + qp->s_flags &= ~QIB_S_WAIT_ACK; + qib_schedule_send(qp); + } + qib_get_credit(qp, aeth); + qp->s_rnr_retry = qp->s_rnr_retry_cnt; + qp->s_retry = qp->s_retry_cnt; + update_last_psn(qp, psn); + ret = 1; + goto bail; + + case 1: /* RNR NAK */ + ibp->n_rnr_naks++; + if (qp->s_acked == qp->s_tail) + goto bail; + if (qp->s_flags & QIB_S_WAIT_RNR) + goto bail; + if (qp->s_rnr_retry == 0) { + status = IB_WC_RNR_RETRY_EXC_ERR; + goto class_b; + } + if (qp->s_rnr_retry_cnt < 7) + qp->s_rnr_retry--; + + /* The last valid PSN is the previous PSN. */ + update_last_psn(qp, psn - 1); + + ibp->n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK; + + reset_psn(qp, psn); + + qp->s_flags &= ~(QIB_S_WAIT_SSN_CREDIT | QIB_S_WAIT_ACK); + qp->s_flags |= QIB_S_WAIT_RNR; + qp->s_timer.function = qib_rc_rnr_retry; + qp->s_timer.expires = jiffies + usecs_to_jiffies( + ib_qib_rnr_table[(aeth >> QIB_AETH_CREDIT_SHIFT) & + QIB_AETH_CREDIT_MASK]); + add_timer(&qp->s_timer); + goto bail; + + case 3: /* NAK */ + if (qp->s_acked == qp->s_tail) + goto bail; + /* The last valid PSN is the previous PSN. */ + update_last_psn(qp, psn - 1); + switch ((aeth >> QIB_AETH_CREDIT_SHIFT) & + QIB_AETH_CREDIT_MASK) { + case 0: /* PSN sequence error */ + ibp->n_seq_naks++; + /* + * Back up to the responder's expected PSN. + * Note that we might get a NAK in the middle of an + * RDMA READ response which terminates the RDMA + * READ. + */ + qib_restart_rc(qp, psn, 0); + qib_schedule_send(qp); + break; + + case 1: /* Invalid Request */ + status = IB_WC_REM_INV_REQ_ERR; + ibp->n_other_naks++; + goto class_b; + + case 2: /* Remote Access Error */ + status = IB_WC_REM_ACCESS_ERR; + ibp->n_other_naks++; + goto class_b; + + case 3: /* Remote Operation Error */ + status = IB_WC_REM_OP_ERR; + ibp->n_other_naks++; +class_b: + if (qp->s_last == qp->s_acked) { + qib_send_complete(qp, wqe, status); + qib_error_qp(qp, IB_WC_WR_FLUSH_ERR); + } + break; + + default: + /* Ignore other reserved NAK error codes */ + goto reserved; + } + qp->s_retry = qp->s_retry_cnt; + qp->s_rnr_retry = qp->s_rnr_retry_cnt; + goto bail; + + default: /* 2: reserved */ +reserved: + /* Ignore reserved NAK codes. */ + goto bail; + } + +bail: + return ret; +} + +/* + * We have seen an out of sequence RDMA read middle or last packet. + * This ACKs SENDs and RDMA writes up to the first RDMA read or atomic SWQE. + */ +static void rdma_seq_err(struct qib_qp *qp, struct qib_ibport *ibp, u32 psn, + struct qib_ctxtdata *rcd) +{ + struct qib_swqe *wqe; + + /* Remove QP from retry timer */ + if (qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR)) { + qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR); + del_timer(&qp->s_timer); + } + + wqe = get_swqe_ptr(qp, qp->s_acked); + + while (qib_cmp24(psn, wqe->lpsn) > 0) { + if (wqe->wr.opcode == IB_WR_RDMA_READ || + wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || + wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) + break; + wqe = do_rc_completion(qp, wqe, ibp); + } + + ibp->n_rdma_seq++; + qp->r_flags |= QIB_R_RDMAR_SEQ; + qib_restart_rc(qp, qp->s_last_psn + 1, 0); + if (list_empty(&qp->rspwait)) { + qp->r_flags |= QIB_R_RSP_SEND; + atomic_inc(&qp->refcount); + list_add_tail(&qp->rspwait, &rcd->qp_wait_list); + } +} + +/** + * qib_rc_rcv_resp - process an incoming RC response packet + * @ibp: the port this packet came in on + * @ohdr: the other headers for this packet + * @data: the packet data + * @tlen: the packet length + * @qp: the QP for this packet + * @opcode: the opcode for this packet + * @psn: the packet sequence number for this packet + * @hdrsize: the header length + * @pmtu: the path MTU + * + * This is called from qib_rc_rcv() to process an incoming RC response + * packet for the given QP. + * Called at interrupt level. + */ +static void qib_rc_rcv_resp(struct qib_ibport *ibp, + struct qib_other_headers *ohdr, + void *data, u32 tlen, + struct qib_qp *qp, + u32 opcode, + u32 psn, u32 hdrsize, u32 pmtu, + struct qib_ctxtdata *rcd) +{ + struct qib_swqe *wqe; + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + enum ib_wc_status status; + unsigned long flags; + int diff; + u32 pad; + u32 aeth; + u64 val; + + if (opcode != OP(RDMA_READ_RESPONSE_MIDDLE)) { + /* + * If ACK'd PSN on SDMA busy list try to make progress to + * reclaim SDMA credits. + */ + if ((qib_cmp24(psn, qp->s_sending_psn) >= 0) && + (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)) { + + /* + * If send tasklet not running attempt to progress + * SDMA queue. + */ + if (!(qp->s_flags & QIB_S_BUSY)) { + /* Acquire SDMA Lock */ + spin_lock_irqsave(&ppd->sdma_lock, flags); + /* Invoke sdma make progress */ + qib_sdma_make_progress(ppd); + /* Release SDMA Lock */ + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + } + } + } + + spin_lock_irqsave(&qp->s_lock, flags); + if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) + goto ack_done; + + /* Ignore invalid responses. */ + if (qib_cmp24(psn, qp->s_next_psn) >= 0) + goto ack_done; + + /* Ignore duplicate responses. */ + diff = qib_cmp24(psn, qp->s_last_psn); + if (unlikely(diff <= 0)) { + /* Update credits for "ghost" ACKs */ + if (diff == 0 && opcode == OP(ACKNOWLEDGE)) { + aeth = be32_to_cpu(ohdr->u.aeth); + if ((aeth >> 29) == 0) + qib_get_credit(qp, aeth); + } + goto ack_done; + } + + /* + * Skip everything other than the PSN we expect, if we are waiting + * for a reply to a restarted RDMA read or atomic op. + */ + if (qp->r_flags & QIB_R_RDMAR_SEQ) { + if (qib_cmp24(psn, qp->s_last_psn + 1) != 0) + goto ack_done; + qp->r_flags &= ~QIB_R_RDMAR_SEQ; + } + + if (unlikely(qp->s_acked == qp->s_tail)) + goto ack_done; + wqe = get_swqe_ptr(qp, qp->s_acked); + status = IB_WC_SUCCESS; + + switch (opcode) { + case OP(ACKNOWLEDGE): + case OP(ATOMIC_ACKNOWLEDGE): + case OP(RDMA_READ_RESPONSE_FIRST): + aeth = be32_to_cpu(ohdr->u.aeth); + if (opcode == OP(ATOMIC_ACKNOWLEDGE)) { + __be32 *p = ohdr->u.at.atomic_ack_eth; + + val = ((u64) be32_to_cpu(p[0]) << 32) | + be32_to_cpu(p[1]); + } else + val = 0; + if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) || + opcode != OP(RDMA_READ_RESPONSE_FIRST)) + goto ack_done; + hdrsize += 4; + wqe = get_swqe_ptr(qp, qp->s_acked); + if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) + goto ack_op_err; + /* + * If this is a response to a resent RDMA read, we + * have to be careful to copy the data to the right + * location. + */ + qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge, + wqe, psn, pmtu); + goto read_middle; + + case OP(RDMA_READ_RESPONSE_MIDDLE): + /* no AETH, no ACK */ + if (unlikely(qib_cmp24(psn, qp->s_last_psn + 1))) + goto ack_seq_err; + if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) + goto ack_op_err; +read_middle: + if (unlikely(tlen != (hdrsize + pmtu + 4))) + goto ack_len_err; + if (unlikely(pmtu >= qp->s_rdma_read_len)) + goto ack_len_err; + + /* + * We got a response so update the timeout. + * 4.096 usec. * (1 << qp->timeout) + */ + qp->s_flags |= QIB_S_TIMER; + mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies); + if (qp->s_flags & QIB_S_WAIT_ACK) { + qp->s_flags &= ~QIB_S_WAIT_ACK; + qib_schedule_send(qp); + } + + if (opcode == OP(RDMA_READ_RESPONSE_MIDDLE)) + qp->s_retry = qp->s_retry_cnt; + + /* + * Update the RDMA receive state but do the copy w/o + * holding the locks and blocking interrupts. + */ + qp->s_rdma_read_len -= pmtu; + update_last_psn(qp, psn); + spin_unlock_irqrestore(&qp->s_lock, flags); + qib_copy_sge(&qp->s_rdma_read_sge, data, pmtu, 0); + goto bail; + + case OP(RDMA_READ_RESPONSE_ONLY): + aeth = be32_to_cpu(ohdr->u.aeth); + if (!do_rc_ack(qp, aeth, psn, opcode, 0, rcd)) + goto ack_done; + /* Get the number of bytes the message was padded by. */ + pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + /* + * Check that the data size is >= 0 && <= pmtu. + * Remember to account for the AETH header (4) and + * ICRC (4). + */ + if (unlikely(tlen < (hdrsize + pad + 8))) + goto ack_len_err; + /* + * If this is a response to a resent RDMA read, we + * have to be careful to copy the data to the right + * location. + */ + wqe = get_swqe_ptr(qp, qp->s_acked); + qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge, + wqe, psn, pmtu); + goto read_last; + + case OP(RDMA_READ_RESPONSE_LAST): + /* ACKs READ req. */ + if (unlikely(qib_cmp24(psn, qp->s_last_psn + 1))) + goto ack_seq_err; + if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) + goto ack_op_err; + /* Get the number of bytes the message was padded by. */ + pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + /* + * Check that the data size is >= 1 && <= pmtu. + * Remember to account for the AETH header (4) and + * ICRC (4). + */ + if (unlikely(tlen <= (hdrsize + pad + 8))) + goto ack_len_err; +read_last: + tlen -= hdrsize + pad + 8; + if (unlikely(tlen != qp->s_rdma_read_len)) + goto ack_len_err; + aeth = be32_to_cpu(ohdr->u.aeth); + qib_copy_sge(&qp->s_rdma_read_sge, data, tlen, 0); + WARN_ON(qp->s_rdma_read_sge.num_sge); + (void) do_rc_ack(qp, aeth, psn, + OP(RDMA_READ_RESPONSE_LAST), 0, rcd); + goto ack_done; + } + +ack_op_err: + status = IB_WC_LOC_QP_OP_ERR; + goto ack_err; + +ack_seq_err: + rdma_seq_err(qp, ibp, psn, rcd); + goto ack_done; + +ack_len_err: + status = IB_WC_LOC_LEN_ERR; +ack_err: + if (qp->s_last == qp->s_acked) { + qib_send_complete(qp, wqe, status); + qib_error_qp(qp, IB_WC_WR_FLUSH_ERR); + } +ack_done: + spin_unlock_irqrestore(&qp->s_lock, flags); +bail: + return; +} + +/** + * qib_rc_rcv_error - process an incoming duplicate or error RC packet + * @ohdr: the other headers for this packet + * @data: the packet data + * @qp: the QP for this packet + * @opcode: the opcode for this packet + * @psn: the packet sequence number for this packet + * @diff: the difference between the PSN and the expected PSN + * + * This is called from qib_rc_rcv() to process an unexpected + * incoming RC packet for the given QP. + * Called at interrupt level. + * Return 1 if no more processing is needed; otherwise return 0 to + * schedule a response to be sent. + */ +static int qib_rc_rcv_error(struct qib_other_headers *ohdr, + void *data, + struct qib_qp *qp, + u32 opcode, + u32 psn, + int diff, + struct qib_ctxtdata *rcd) +{ + struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); + struct qib_ack_entry *e; + unsigned long flags; + u8 i, prev; + int old_req; + + if (diff > 0) { + /* + * Packet sequence error. + * A NAK will ACK earlier sends and RDMA writes. + * Don't queue the NAK if we already sent one. + */ + if (!qp->r_nak_state) { + ibp->n_rc_seqnak++; + qp->r_nak_state = IB_NAK_PSN_ERROR; + /* Use the expected PSN. */ + qp->r_ack_psn = qp->r_psn; + /* + * Wait to send the sequence NAK until all packets + * in the receive queue have been processed. + * Otherwise, we end up propagating congestion. + */ + if (list_empty(&qp->rspwait)) { + qp->r_flags |= QIB_R_RSP_NAK; + atomic_inc(&qp->refcount); + list_add_tail(&qp->rspwait, &rcd->qp_wait_list); + } + } + goto done; + } + + /* + * Handle a duplicate request. Don't re-execute SEND, RDMA + * write or atomic op. Don't NAK errors, just silently drop + * the duplicate request. Note that r_sge, r_len, and + * r_rcv_len may be in use so don't modify them. + * + * We are supposed to ACK the earliest duplicate PSN but we + * can coalesce an outstanding duplicate ACK. We have to + * send the earliest so that RDMA reads can be restarted at + * the requester's expected PSN. + * + * First, find where this duplicate PSN falls within the + * ACKs previously sent. + * old_req is true if there is an older response that is scheduled + * to be sent before sending this one. + */ + e = NULL; + old_req = 1; + ibp->n_rc_dupreq++; + + spin_lock_irqsave(&qp->s_lock, flags); + + for (i = qp->r_head_ack_queue; ; i = prev) { + if (i == qp->s_tail_ack_queue) + old_req = 0; + if (i) + prev = i - 1; + else + prev = QIB_MAX_RDMA_ATOMIC; + if (prev == qp->r_head_ack_queue) { + e = NULL; + break; + } + e = &qp->s_ack_queue[prev]; + if (!e->opcode) { + e = NULL; + break; + } + if (qib_cmp24(psn, e->psn) >= 0) { + if (prev == qp->s_tail_ack_queue && + qib_cmp24(psn, e->lpsn) <= 0) + old_req = 0; + break; + } + } + switch (opcode) { + case OP(RDMA_READ_REQUEST): { + struct ib_reth *reth; + u32 offset; + u32 len; + + /* + * If we didn't find the RDMA read request in the ack queue, + * we can ignore this request. + */ + if (!e || e->opcode != OP(RDMA_READ_REQUEST)) + goto unlock_done; + /* RETH comes after BTH */ + reth = &ohdr->u.rc.reth; + /* + * Address range must be a subset of the original + * request and start on pmtu boundaries. + * We reuse the old ack_queue slot since the requester + * should not back up and request an earlier PSN for the + * same request. + */ + offset = ((psn - e->psn) & QIB_PSN_MASK) * + qp->pmtu; + len = be32_to_cpu(reth->length); + if (unlikely(offset + len != e->rdma_sge.sge_length)) + goto unlock_done; + if (e->rdma_sge.mr) { + qib_put_mr(e->rdma_sge.mr); + e->rdma_sge.mr = NULL; + } + if (len != 0) { + u32 rkey = be32_to_cpu(reth->rkey); + u64 vaddr = be64_to_cpu(reth->vaddr); + int ok; + + ok = qib_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey, + IB_ACCESS_REMOTE_READ); + if (unlikely(!ok)) + goto unlock_done; + } else { + e->rdma_sge.vaddr = NULL; + e->rdma_sge.length = 0; + e->rdma_sge.sge_length = 0; + } + e->psn = psn; + if (old_req) + goto unlock_done; + qp->s_tail_ack_queue = prev; + break; + } + + case OP(COMPARE_SWAP): + case OP(FETCH_ADD): { + /* + * If we didn't find the atomic request in the ack queue + * or the send tasklet is already backed up to send an + * earlier entry, we can ignore this request. + */ + if (!e || e->opcode != (u8) opcode || old_req) + goto unlock_done; + qp->s_tail_ack_queue = prev; + break; + } + + default: + /* + * Ignore this operation if it doesn't request an ACK + * or an earlier RDMA read or atomic is going to be resent. + */ + if (!(psn & IB_BTH_REQ_ACK) || old_req) + goto unlock_done; + /* + * Resend the most recent ACK if this request is + * after all the previous RDMA reads and atomics. + */ + if (i == qp->r_head_ack_queue) { + spin_unlock_irqrestore(&qp->s_lock, flags); + qp->r_nak_state = 0; + qp->r_ack_psn = qp->r_psn - 1; + goto send_ack; + } + /* + * Try to send a simple ACK to work around a Mellanox bug + * which doesn't accept a RDMA read response or atomic + * response as an ACK for earlier SENDs or RDMA writes. + */ + if (!(qp->s_flags & QIB_S_RESP_PENDING)) { + spin_unlock_irqrestore(&qp->s_lock, flags); + qp->r_nak_state = 0; + qp->r_ack_psn = qp->s_ack_queue[i].psn - 1; + goto send_ack; + } + /* + * Resend the RDMA read or atomic op which + * ACKs this duplicate request. + */ + qp->s_tail_ack_queue = i; + break; + } + qp->s_ack_state = OP(ACKNOWLEDGE); + qp->s_flags |= QIB_S_RESP_PENDING; + qp->r_nak_state = 0; + qib_schedule_send(qp); + +unlock_done: + spin_unlock_irqrestore(&qp->s_lock, flags); +done: + return 1; + +send_ack: + return 0; +} + +void qib_rc_error(struct qib_qp *qp, enum ib_wc_status err) +{ + unsigned long flags; + int lastwqe; + + spin_lock_irqsave(&qp->s_lock, flags); + lastwqe = qib_error_qp(qp, err); + spin_unlock_irqrestore(&qp->s_lock, flags); + + if (lastwqe) { + struct ib_event ev; + + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_QP_LAST_WQE_REACHED; + qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); + } +} + +static inline void qib_update_ack_queue(struct qib_qp *qp, unsigned n) +{ + unsigned next; + + next = n + 1; + if (next > QIB_MAX_RDMA_ATOMIC) + next = 0; + qp->s_tail_ack_queue = next; + qp->s_ack_state = OP(ACKNOWLEDGE); +} + +/** + * qib_rc_rcv - process an incoming RC packet + * @rcd: the context pointer + * @hdr: the header of this packet + * @has_grh: true if the header has a GRH + * @data: the packet data + * @tlen: the packet length + * @qp: the QP for this packet + * + * This is called from qib_qp_rcv() to process an incoming RC packet + * for the given QP. + * Called at interrupt level. + */ +void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, + int has_grh, void *data, u32 tlen, struct qib_qp *qp) +{ + struct qib_ibport *ibp = &rcd->ppd->ibport_data; + struct qib_other_headers *ohdr; + u32 opcode; + u32 hdrsize; + u32 psn; + u32 pad; + struct ib_wc wc; + u32 pmtu = qp->pmtu; + int diff; + struct ib_reth *reth; + unsigned long flags; + int ret; + + /* Check for GRH */ + if (!has_grh) { + ohdr = &hdr->u.oth; + hdrsize = 8 + 12; /* LRH + BTH */ + } else { + ohdr = &hdr->u.l.oth; + hdrsize = 8 + 40 + 12; /* LRH + GRH + BTH */ + } + + opcode = be32_to_cpu(ohdr->bth[0]); + if (qib_ruc_check_hdr(ibp, hdr, has_grh, qp, opcode)) + return; + + psn = be32_to_cpu(ohdr->bth[2]); + opcode >>= 24; + + /* + * Process responses (ACKs) before anything else. Note that the + * packet sequence number will be for something in the send work + * queue rather than the expected receive packet sequence number. + * In other words, this QP is the requester. + */ + if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) && + opcode <= OP(ATOMIC_ACKNOWLEDGE)) { + qib_rc_rcv_resp(ibp, ohdr, data, tlen, qp, opcode, psn, + hdrsize, pmtu, rcd); + return; + } + + /* Compute 24 bits worth of difference. */ + diff = qib_cmp24(psn, qp->r_psn); + if (unlikely(diff)) { + if (qib_rc_rcv_error(ohdr, data, qp, opcode, psn, diff, rcd)) + return; + goto send_ack; + } + + /* Check for opcode sequence errors. */ + switch (qp->r_state) { + case OP(SEND_FIRST): + case OP(SEND_MIDDLE): + if (opcode == OP(SEND_MIDDLE) || + opcode == OP(SEND_LAST) || + opcode == OP(SEND_LAST_WITH_IMMEDIATE)) + break; + goto nack_inv; + + case OP(RDMA_WRITE_FIRST): + case OP(RDMA_WRITE_MIDDLE): + if (opcode == OP(RDMA_WRITE_MIDDLE) || + opcode == OP(RDMA_WRITE_LAST) || + opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) + break; + goto nack_inv; + + default: + if (opcode == OP(SEND_MIDDLE) || + opcode == OP(SEND_LAST) || + opcode == OP(SEND_LAST_WITH_IMMEDIATE) || + opcode == OP(RDMA_WRITE_MIDDLE) || + opcode == OP(RDMA_WRITE_LAST) || + opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) + goto nack_inv; + /* + * Note that it is up to the requester to not send a new + * RDMA read or atomic operation before receiving an ACK + * for the previous operation. + */ + break; + } + + if (qp->state == IB_QPS_RTR && !(qp->r_flags & QIB_R_COMM_EST)) { + qp->r_flags |= QIB_R_COMM_EST; + if (qp->ibqp.event_handler) { + struct ib_event ev; + + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_COMM_EST; + qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); + } + } + + /* OK, process the packet. */ + switch (opcode) { + case OP(SEND_FIRST): + ret = qib_get_rwqe(qp, 0); + if (ret < 0) + goto nack_op_err; + if (!ret) + goto rnr_nak; + qp->r_rcv_len = 0; + /* FALLTHROUGH */ + case OP(SEND_MIDDLE): + case OP(RDMA_WRITE_MIDDLE): +send_middle: + /* Check for invalid length PMTU or posted rwqe len. */ + if (unlikely(tlen != (hdrsize + pmtu + 4))) + goto nack_inv; + qp->r_rcv_len += pmtu; + if (unlikely(qp->r_rcv_len > qp->r_len)) + goto nack_inv; + qib_copy_sge(&qp->r_sge, data, pmtu, 1); + break; + + case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): + /* consume RWQE */ + ret = qib_get_rwqe(qp, 1); + if (ret < 0) + goto nack_op_err; + if (!ret) + goto rnr_nak; + goto send_last_imm; + + case OP(SEND_ONLY): + case OP(SEND_ONLY_WITH_IMMEDIATE): + ret = qib_get_rwqe(qp, 0); + if (ret < 0) + goto nack_op_err; + if (!ret) + goto rnr_nak; + qp->r_rcv_len = 0; + if (opcode == OP(SEND_ONLY)) + goto no_immediate_data; + /* FALLTHROUGH for SEND_ONLY_WITH_IMMEDIATE */ + case OP(SEND_LAST_WITH_IMMEDIATE): +send_last_imm: + wc.ex.imm_data = ohdr->u.imm_data; + hdrsize += 4; + wc.wc_flags = IB_WC_WITH_IMM; + goto send_last; + case OP(SEND_LAST): + case OP(RDMA_WRITE_LAST): +no_immediate_data: + wc.wc_flags = 0; + wc.ex.imm_data = 0; +send_last: + /* Get the number of bytes the message was padded by. */ + pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + /* Check for invalid length. */ + /* XXX LAST len should be >= 1 */ + if (unlikely(tlen < (hdrsize + pad + 4))) + goto nack_inv; + /* Don't count the CRC. */ + tlen -= (hdrsize + pad + 4); + wc.byte_len = tlen + qp->r_rcv_len; + if (unlikely(wc.byte_len > qp->r_len)) + goto nack_inv; + qib_copy_sge(&qp->r_sge, data, tlen, 1); + qib_put_ss(&qp->r_sge); + qp->r_msn++; + if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) + break; + wc.wr_id = qp->r_wr_id; + wc.status = IB_WC_SUCCESS; + if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) || + opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) + wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; + else + wc.opcode = IB_WC_RECV; + wc.qp = &qp->ibqp; + wc.src_qp = qp->remote_qpn; + wc.slid = qp->remote_ah_attr.dlid; + wc.sl = qp->remote_ah_attr.sl; + /* zero fields that are N/A */ + wc.vendor_err = 0; + wc.pkey_index = 0; + wc.dlid_path_bits = 0; + wc.port_num = 0; + /* Signal completion event if the solicited bit is set. */ + qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, + (ohdr->bth[0] & + cpu_to_be32(IB_BTH_SOLICITED)) != 0); + break; + + case OP(RDMA_WRITE_FIRST): + case OP(RDMA_WRITE_ONLY): + case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) + goto nack_inv; + /* consume RWQE */ + reth = &ohdr->u.rc.reth; + hdrsize += sizeof(*reth); + qp->r_len = be32_to_cpu(reth->length); + qp->r_rcv_len = 0; + qp->r_sge.sg_list = NULL; + if (qp->r_len != 0) { + u32 rkey = be32_to_cpu(reth->rkey); + u64 vaddr = be64_to_cpu(reth->vaddr); + int ok; + + /* Check rkey & NAK */ + ok = qib_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr, + rkey, IB_ACCESS_REMOTE_WRITE); + if (unlikely(!ok)) + goto nack_acc; + qp->r_sge.num_sge = 1; + } else { + qp->r_sge.num_sge = 0; + qp->r_sge.sge.mr = NULL; + qp->r_sge.sge.vaddr = NULL; + qp->r_sge.sge.length = 0; + qp->r_sge.sge.sge_length = 0; + } + if (opcode == OP(RDMA_WRITE_FIRST)) + goto send_middle; + else if (opcode == OP(RDMA_WRITE_ONLY)) + goto no_immediate_data; + ret = qib_get_rwqe(qp, 1); + if (ret < 0) + goto nack_op_err; + if (!ret) + goto rnr_nak; + wc.ex.imm_data = ohdr->u.rc.imm_data; + hdrsize += 4; + wc.wc_flags = IB_WC_WITH_IMM; + goto send_last; + + case OP(RDMA_READ_REQUEST): { + struct qib_ack_entry *e; + u32 len; + u8 next; + + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) + goto nack_inv; + next = qp->r_head_ack_queue + 1; + /* s_ack_queue is size QIB_MAX_RDMA_ATOMIC+1 so use > not >= */ + if (next > QIB_MAX_RDMA_ATOMIC) + next = 0; + spin_lock_irqsave(&qp->s_lock, flags); + if (unlikely(next == qp->s_tail_ack_queue)) { + if (!qp->s_ack_queue[next].sent) + goto nack_inv_unlck; + qib_update_ack_queue(qp, next); + } + e = &qp->s_ack_queue[qp->r_head_ack_queue]; + if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) { + qib_put_mr(e->rdma_sge.mr); + e->rdma_sge.mr = NULL; + } + reth = &ohdr->u.rc.reth; + len = be32_to_cpu(reth->length); + if (len) { + u32 rkey = be32_to_cpu(reth->rkey); + u64 vaddr = be64_to_cpu(reth->vaddr); + int ok; + + /* Check rkey & NAK */ + ok = qib_rkey_ok(qp, &e->rdma_sge, len, vaddr, + rkey, IB_ACCESS_REMOTE_READ); + if (unlikely(!ok)) + goto nack_acc_unlck; + /* + * Update the next expected PSN. We add 1 later + * below, so only add the remainder here. + */ + if (len > pmtu) + qp->r_psn += (len - 1) / pmtu; + } else { + e->rdma_sge.mr = NULL; + e->rdma_sge.vaddr = NULL; + e->rdma_sge.length = 0; + e->rdma_sge.sge_length = 0; + } + e->opcode = opcode; + e->sent = 0; + e->psn = psn; + e->lpsn = qp->r_psn; + /* + * We need to increment the MSN here instead of when we + * finish sending the result since a duplicate request would + * increment it more than once. + */ + qp->r_msn++; + qp->r_psn++; + qp->r_state = opcode; + qp->r_nak_state = 0; + qp->r_head_ack_queue = next; + + /* Schedule the send tasklet. */ + qp->s_flags |= QIB_S_RESP_PENDING; + qib_schedule_send(qp); + + goto sunlock; + } + + case OP(COMPARE_SWAP): + case OP(FETCH_ADD): { + struct ib_atomic_eth *ateth; + struct qib_ack_entry *e; + u64 vaddr; + atomic64_t *maddr; + u64 sdata; + u32 rkey; + u8 next; + + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) + goto nack_inv; + next = qp->r_head_ack_queue + 1; + if (next > QIB_MAX_RDMA_ATOMIC) + next = 0; + spin_lock_irqsave(&qp->s_lock, flags); + if (unlikely(next == qp->s_tail_ack_queue)) { + if (!qp->s_ack_queue[next].sent) + goto nack_inv_unlck; + qib_update_ack_queue(qp, next); + } + e = &qp->s_ack_queue[qp->r_head_ack_queue]; + if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) { + qib_put_mr(e->rdma_sge.mr); + e->rdma_sge.mr = NULL; + } + ateth = &ohdr->u.atomic_eth; + vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) | + be32_to_cpu(ateth->vaddr[1]); + if (unlikely(vaddr & (sizeof(u64) - 1))) + goto nack_inv_unlck; + rkey = be32_to_cpu(ateth->rkey); + /* Check rkey & NAK */ + if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64), + vaddr, rkey, + IB_ACCESS_REMOTE_ATOMIC))) + goto nack_acc_unlck; + /* Perform atomic OP and save result. */ + maddr = (atomic64_t *) qp->r_sge.sge.vaddr; + sdata = be64_to_cpu(ateth->swap_data); + e->atomic_data = (opcode == OP(FETCH_ADD)) ? + (u64) atomic64_add_return(sdata, maddr) - sdata : + (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, + be64_to_cpu(ateth->compare_data), + sdata); + qib_put_mr(qp->r_sge.sge.mr); + qp->r_sge.num_sge = 0; + e->opcode = opcode; + e->sent = 0; + e->psn = psn; + e->lpsn = psn; + qp->r_msn++; + qp->r_psn++; + qp->r_state = opcode; + qp->r_nak_state = 0; + qp->r_head_ack_queue = next; + + /* Schedule the send tasklet. */ + qp->s_flags |= QIB_S_RESP_PENDING; + qib_schedule_send(qp); + + goto sunlock; + } + + default: + /* NAK unknown opcodes. */ + goto nack_inv; + } + qp->r_psn++; + qp->r_state = opcode; + qp->r_ack_psn = psn; + qp->r_nak_state = 0; + /* Send an ACK if requested or required. */ + if (psn & (1 << 31)) + goto send_ack; + return; + +rnr_nak: + qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer; + qp->r_ack_psn = qp->r_psn; + /* Queue RNR NAK for later */ + if (list_empty(&qp->rspwait)) { + qp->r_flags |= QIB_R_RSP_NAK; + atomic_inc(&qp->refcount); + list_add_tail(&qp->rspwait, &rcd->qp_wait_list); + } + return; + +nack_op_err: + qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR); + qp->r_nak_state = IB_NAK_REMOTE_OPERATIONAL_ERROR; + qp->r_ack_psn = qp->r_psn; + /* Queue NAK for later */ + if (list_empty(&qp->rspwait)) { + qp->r_flags |= QIB_R_RSP_NAK; + atomic_inc(&qp->refcount); + list_add_tail(&qp->rspwait, &rcd->qp_wait_list); + } + return; + +nack_inv_unlck: + spin_unlock_irqrestore(&qp->s_lock, flags); +nack_inv: + qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR); + qp->r_nak_state = IB_NAK_INVALID_REQUEST; + qp->r_ack_psn = qp->r_psn; + /* Queue NAK for later */ + if (list_empty(&qp->rspwait)) { + qp->r_flags |= QIB_R_RSP_NAK; + atomic_inc(&qp->refcount); + list_add_tail(&qp->rspwait, &rcd->qp_wait_list); + } + return; + +nack_acc_unlck: + spin_unlock_irqrestore(&qp->s_lock, flags); +nack_acc: + qib_rc_error(qp, IB_WC_LOC_PROT_ERR); + qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; + qp->r_ack_psn = qp->r_psn; +send_ack: + qib_send_rc_ack(qp); + return; + +sunlock: + spin_unlock_irqrestore(&qp->s_lock, flags); +} diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c new file mode 100644 index 00000000000..4c07a8b34ff --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -0,0 +1,819 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/spinlock.h> + +#include "qib.h" +#include "qib_mad.h" + +/* + * Convert the AETH RNR timeout code into the number of microseconds. + */ +const u32 ib_qib_rnr_table[32] = { + 655360, /* 00: 655.36 */ + 10, /* 01: .01 */ + 20, /* 02 .02 */ + 30, /* 03: .03 */ + 40, /* 04: .04 */ + 60, /* 05: .06 */ + 80, /* 06: .08 */ + 120, /* 07: .12 */ + 160, /* 08: .16 */ + 240, /* 09: .24 */ + 320, /* 0A: .32 */ + 480, /* 0B: .48 */ + 640, /* 0C: .64 */ + 960, /* 0D: .96 */ + 1280, /* 0E: 1.28 */ + 1920, /* 0F: 1.92 */ + 2560, /* 10: 2.56 */ + 3840, /* 11: 3.84 */ + 5120, /* 12: 5.12 */ + 7680, /* 13: 7.68 */ + 10240, /* 14: 10.24 */ + 15360, /* 15: 15.36 */ + 20480, /* 16: 20.48 */ + 30720, /* 17: 30.72 */ + 40960, /* 18: 40.96 */ + 61440, /* 19: 61.44 */ + 81920, /* 1A: 81.92 */ + 122880, /* 1B: 122.88 */ + 163840, /* 1C: 163.84 */ + 245760, /* 1D: 245.76 */ + 327680, /* 1E: 327.68 */ + 491520 /* 1F: 491.52 */ +}; + +/* + * Validate a RWQE and fill in the SGE state. + * Return 1 if OK. + */ +static int qib_init_sge(struct qib_qp *qp, struct qib_rwqe *wqe) +{ + int i, j, ret; + struct ib_wc wc; + struct qib_lkey_table *rkt; + struct qib_pd *pd; + struct qib_sge_state *ss; + + rkt = &to_idev(qp->ibqp.device)->lk_table; + pd = to_ipd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd); + ss = &qp->r_sge; + ss->sg_list = qp->r_sg_list; + qp->r_len = 0; + for (i = j = 0; i < wqe->num_sge; i++) { + if (wqe->sg_list[i].length == 0) + continue; + /* Check LKEY */ + if (!qib_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, + &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) + goto bad_lkey; + qp->r_len += wqe->sg_list[i].length; + j++; + } + ss->num_sge = j; + ss->total_len = qp->r_len; + ret = 1; + goto bail; + +bad_lkey: + while (j) { + struct qib_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge; + + qib_put_mr(sge->mr); + } + ss->num_sge = 0; + memset(&wc, 0, sizeof(wc)); + wc.wr_id = wqe->wr_id; + wc.status = IB_WC_LOC_PROT_ERR; + wc.opcode = IB_WC_RECV; + wc.qp = &qp->ibqp; + /* Signal solicited completion event. */ + qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); + ret = 0; +bail: + return ret; +} + +/** + * qib_get_rwqe - copy the next RWQE into the QP's RWQE + * @qp: the QP + * @wr_id_only: update qp->r_wr_id only, not qp->r_sge + * + * Return -1 if there is a local error, 0 if no RWQE is available, + * otherwise return 1. + * + * Can be called from interrupt level. + */ +int qib_get_rwqe(struct qib_qp *qp, int wr_id_only) +{ + unsigned long flags; + struct qib_rq *rq; + struct qib_rwq *wq; + struct qib_srq *srq; + struct qib_rwqe *wqe; + void (*handler)(struct ib_event *, void *); + u32 tail; + int ret; + + if (qp->ibqp.srq) { + srq = to_isrq(qp->ibqp.srq); + handler = srq->ibsrq.event_handler; + rq = &srq->rq; + } else { + srq = NULL; + handler = NULL; + rq = &qp->r_rq; + } + + spin_lock_irqsave(&rq->lock, flags); + if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) { + ret = 0; + goto unlock; + } + + wq = rq->wq; + tail = wq->tail; + /* Validate tail before using it since it is user writable. */ + if (tail >= rq->size) + tail = 0; + if (unlikely(tail == wq->head)) { + ret = 0; + goto unlock; + } + /* Make sure entry is read after head index is read. */ + smp_rmb(); + wqe = get_rwqe_ptr(rq, tail); + /* + * Even though we update the tail index in memory, the verbs + * consumer is not supposed to post more entries until a + * completion is generated. + */ + if (++tail >= rq->size) + tail = 0; + wq->tail = tail; + if (!wr_id_only && !qib_init_sge(qp, wqe)) { + ret = -1; + goto unlock; + } + qp->r_wr_id = wqe->wr_id; + + ret = 1; + set_bit(QIB_R_WRID_VALID, &qp->r_aflags); + if (handler) { + u32 n; + + /* + * Validate head pointer value and compute + * the number of remaining WQEs. + */ + n = wq->head; + if (n >= rq->size) + n = 0; + if (n < tail) + n += rq->size - tail; + else + n -= tail; + if (n < srq->limit) { + struct ib_event ev; + + srq->limit = 0; + spin_unlock_irqrestore(&rq->lock, flags); + ev.device = qp->ibqp.device; + ev.element.srq = qp->ibqp.srq; + ev.event = IB_EVENT_SRQ_LIMIT_REACHED; + handler(&ev, srq->ibsrq.srq_context); + goto bail; + } + } +unlock: + spin_unlock_irqrestore(&rq->lock, flags); +bail: + return ret; +} + +/* + * Switch to alternate path. + * The QP s_lock should be held and interrupts disabled. + */ +void qib_migrate_qp(struct qib_qp *qp) +{ + struct ib_event ev; + + qp->s_mig_state = IB_MIG_MIGRATED; + qp->remote_ah_attr = qp->alt_ah_attr; + qp->port_num = qp->alt_ah_attr.port_num; + qp->s_pkey_index = qp->s_alt_pkey_index; + + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_PATH_MIG; + qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); +} + +static __be64 get_sguid(struct qib_ibport *ibp, unsigned index) +{ + if (!index) { + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + + return ppd->guid; + } else + return ibp->guids[index - 1]; +} + +static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id) +{ + return (gid->global.interface_id == id && + (gid->global.subnet_prefix == gid_prefix || + gid->global.subnet_prefix == IB_DEFAULT_GID_PREFIX)); +} + +/* + * + * This should be called with the QP r_lock held. + * + * The s_lock will be acquired around the qib_migrate_qp() call. + */ +int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr, + int has_grh, struct qib_qp *qp, u32 bth0) +{ + __be64 guid; + unsigned long flags; + + if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) { + if (!has_grh) { + if (qp->alt_ah_attr.ah_flags & IB_AH_GRH) + goto err; + } else { + if (!(qp->alt_ah_attr.ah_flags & IB_AH_GRH)) + goto err; + guid = get_sguid(ibp, qp->alt_ah_attr.grh.sgid_index); + if (!gid_ok(&hdr->u.l.grh.dgid, ibp->gid_prefix, guid)) + goto err; + if (!gid_ok(&hdr->u.l.grh.sgid, + qp->alt_ah_attr.grh.dgid.global.subnet_prefix, + qp->alt_ah_attr.grh.dgid.global.interface_id)) + goto err; + } + if (!qib_pkey_ok((u16)bth0, + qib_get_pkey(ibp, qp->s_alt_pkey_index))) { + qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY, + (u16)bth0, + (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF, + 0, qp->ibqp.qp_num, + hdr->lrh[3], hdr->lrh[1]); + goto err; + } + /* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */ + if (be16_to_cpu(hdr->lrh[3]) != qp->alt_ah_attr.dlid || + ppd_from_ibp(ibp)->port != qp->alt_ah_attr.port_num) + goto err; + spin_lock_irqsave(&qp->s_lock, flags); + qib_migrate_qp(qp); + spin_unlock_irqrestore(&qp->s_lock, flags); + } else { + if (!has_grh) { + if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) + goto err; + } else { + if (!(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) + goto err; + guid = get_sguid(ibp, + qp->remote_ah_attr.grh.sgid_index); + if (!gid_ok(&hdr->u.l.grh.dgid, ibp->gid_prefix, guid)) + goto err; + if (!gid_ok(&hdr->u.l.grh.sgid, + qp->remote_ah_attr.grh.dgid.global.subnet_prefix, + qp->remote_ah_attr.grh.dgid.global.interface_id)) + goto err; + } + if (!qib_pkey_ok((u16)bth0, + qib_get_pkey(ibp, qp->s_pkey_index))) { + qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY, + (u16)bth0, + (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF, + 0, qp->ibqp.qp_num, + hdr->lrh[3], hdr->lrh[1]); + goto err; + } + /* Validate the SLID. See Ch. 9.6.1.5 */ + if (be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid || + ppd_from_ibp(ibp)->port != qp->port_num) + goto err; + if (qp->s_mig_state == IB_MIG_REARM && + !(bth0 & IB_BTH_MIG_REQ)) + qp->s_mig_state = IB_MIG_ARMED; + } + + return 0; + +err: + return 1; +} + +/** + * qib_ruc_loopback - handle UC and RC lookback requests + * @sqp: the sending QP + * + * This is called from qib_do_send() to + * forward a WQE addressed to the same HCA. + * Note that although we are single threaded due to the tasklet, we still + * have to protect against post_send(). We don't have to worry about + * receive interrupts since this is a connected protocol and all packets + * will pass through here. + */ +static void qib_ruc_loopback(struct qib_qp *sqp) +{ + struct qib_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num); + struct qib_qp *qp; + struct qib_swqe *wqe; + struct qib_sge *sge; + unsigned long flags; + struct ib_wc wc; + u64 sdata; + atomic64_t *maddr; + enum ib_wc_status send_status; + int release; + int ret; + + /* + * Note that we check the responder QP state after + * checking the requester's state. + */ + qp = qib_lookup_qpn(ibp, sqp->remote_qpn); + + spin_lock_irqsave(&sqp->s_lock, flags); + + /* Return if we are already busy processing a work request. */ + if ((sqp->s_flags & (QIB_S_BUSY | QIB_S_ANY_WAIT)) || + !(ib_qib_state_ops[sqp->state] & QIB_PROCESS_OR_FLUSH_SEND)) + goto unlock; + + sqp->s_flags |= QIB_S_BUSY; + +again: + if (sqp->s_last == sqp->s_head) + goto clr_busy; + wqe = get_swqe_ptr(sqp, sqp->s_last); + + /* Return if it is not OK to start a new work reqeust. */ + if (!(ib_qib_state_ops[sqp->state] & QIB_PROCESS_NEXT_SEND_OK)) { + if (!(ib_qib_state_ops[sqp->state] & QIB_FLUSH_SEND)) + goto clr_busy; + /* We are in the error state, flush the work request. */ + send_status = IB_WC_WR_FLUSH_ERR; + goto flush_send; + } + + /* + * We can rely on the entry not changing without the s_lock + * being held until we update s_last. + * We increment s_cur to indicate s_last is in progress. + */ + if (sqp->s_last == sqp->s_cur) { + if (++sqp->s_cur >= sqp->s_size) + sqp->s_cur = 0; + } + spin_unlock_irqrestore(&sqp->s_lock, flags); + + if (!qp || !(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) || + qp->ibqp.qp_type != sqp->ibqp.qp_type) { + ibp->n_pkt_drops++; + /* + * For RC, the requester would timeout and retry so + * shortcut the timeouts and just signal too many retries. + */ + if (sqp->ibqp.qp_type == IB_QPT_RC) + send_status = IB_WC_RETRY_EXC_ERR; + else + send_status = IB_WC_SUCCESS; + goto serr; + } + + memset(&wc, 0, sizeof wc); + send_status = IB_WC_SUCCESS; + + release = 1; + sqp->s_sge.sge = wqe->sg_list[0]; + sqp->s_sge.sg_list = wqe->sg_list + 1; + sqp->s_sge.num_sge = wqe->wr.num_sge; + sqp->s_len = wqe->length; + switch (wqe->wr.opcode) { + case IB_WR_SEND_WITH_IMM: + wc.wc_flags = IB_WC_WITH_IMM; + wc.ex.imm_data = wqe->wr.ex.imm_data; + /* FALLTHROUGH */ + case IB_WR_SEND: + ret = qib_get_rwqe(qp, 0); + if (ret < 0) + goto op_err; + if (!ret) + goto rnr_nak; + break; + + case IB_WR_RDMA_WRITE_WITH_IMM: + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) + goto inv_err; + wc.wc_flags = IB_WC_WITH_IMM; + wc.ex.imm_data = wqe->wr.ex.imm_data; + ret = qib_get_rwqe(qp, 1); + if (ret < 0) + goto op_err; + if (!ret) + goto rnr_nak; + /* FALLTHROUGH */ + case IB_WR_RDMA_WRITE: + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) + goto inv_err; + if (wqe->length == 0) + break; + if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, wqe->length, + wqe->wr.wr.rdma.remote_addr, + wqe->wr.wr.rdma.rkey, + IB_ACCESS_REMOTE_WRITE))) + goto acc_err; + qp->r_sge.sg_list = NULL; + qp->r_sge.num_sge = 1; + qp->r_sge.total_len = wqe->length; + break; + + case IB_WR_RDMA_READ: + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) + goto inv_err; + if (unlikely(!qib_rkey_ok(qp, &sqp->s_sge.sge, wqe->length, + wqe->wr.wr.rdma.remote_addr, + wqe->wr.wr.rdma.rkey, + IB_ACCESS_REMOTE_READ))) + goto acc_err; + release = 0; + sqp->s_sge.sg_list = NULL; + sqp->s_sge.num_sge = 1; + qp->r_sge.sge = wqe->sg_list[0]; + qp->r_sge.sg_list = wqe->sg_list + 1; + qp->r_sge.num_sge = wqe->wr.num_sge; + qp->r_sge.total_len = wqe->length; + break; + + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) + goto inv_err; + if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64), + wqe->wr.wr.atomic.remote_addr, + wqe->wr.wr.atomic.rkey, + IB_ACCESS_REMOTE_ATOMIC))) + goto acc_err; + /* Perform atomic OP and save result. */ + maddr = (atomic64_t *) qp->r_sge.sge.vaddr; + sdata = wqe->wr.wr.atomic.compare_add; + *(u64 *) sqp->s_sge.sge.vaddr = + (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ? + (u64) atomic64_add_return(sdata, maddr) - sdata : + (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, + sdata, wqe->wr.wr.atomic.swap); + qib_put_mr(qp->r_sge.sge.mr); + qp->r_sge.num_sge = 0; + goto send_comp; + + default: + send_status = IB_WC_LOC_QP_OP_ERR; + goto serr; + } + + sge = &sqp->s_sge.sge; + while (sqp->s_len) { + u32 len = sqp->s_len; + + if (len > sge->length) + len = sge->length; + if (len > sge->sge_length) + len = sge->sge_length; + BUG_ON(len == 0); + qib_copy_sge(&qp->r_sge, sge->vaddr, len, release); + sge->vaddr += len; + sge->length -= len; + sge->sge_length -= len; + if (sge->sge_length == 0) { + if (!release) + qib_put_mr(sge->mr); + if (--sqp->s_sge.num_sge) + *sge = *sqp->s_sge.sg_list++; + } else if (sge->length == 0 && sge->mr->lkey) { + if (++sge->n >= QIB_SEGSZ) { + if (++sge->m >= sge->mr->mapsz) + break; + sge->n = 0; + } + sge->vaddr = + sge->mr->map[sge->m]->segs[sge->n].vaddr; + sge->length = + sge->mr->map[sge->m]->segs[sge->n].length; + } + sqp->s_len -= len; + } + if (release) + qib_put_ss(&qp->r_sge); + + if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) + goto send_comp; + + if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM) + wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; + else + wc.opcode = IB_WC_RECV; + wc.wr_id = qp->r_wr_id; + wc.status = IB_WC_SUCCESS; + wc.byte_len = wqe->length; + wc.qp = &qp->ibqp; + wc.src_qp = qp->remote_qpn; + wc.slid = qp->remote_ah_attr.dlid; + wc.sl = qp->remote_ah_attr.sl; + wc.port_num = 1; + /* Signal completion event if the solicited bit is set. */ + qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, + wqe->wr.send_flags & IB_SEND_SOLICITED); + +send_comp: + spin_lock_irqsave(&sqp->s_lock, flags); + ibp->n_loop_pkts++; +flush_send: + sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; + qib_send_complete(sqp, wqe, send_status); + goto again; + +rnr_nak: + /* Handle RNR NAK */ + if (qp->ibqp.qp_type == IB_QPT_UC) + goto send_comp; + ibp->n_rnr_naks++; + /* + * Note: we don't need the s_lock held since the BUSY flag + * makes this single threaded. + */ + if (sqp->s_rnr_retry == 0) { + send_status = IB_WC_RNR_RETRY_EXC_ERR; + goto serr; + } + if (sqp->s_rnr_retry_cnt < 7) + sqp->s_rnr_retry--; + spin_lock_irqsave(&sqp->s_lock, flags); + if (!(ib_qib_state_ops[sqp->state] & QIB_PROCESS_RECV_OK)) + goto clr_busy; + sqp->s_flags |= QIB_S_WAIT_RNR; + sqp->s_timer.function = qib_rc_rnr_retry; + sqp->s_timer.expires = jiffies + + usecs_to_jiffies(ib_qib_rnr_table[qp->r_min_rnr_timer]); + add_timer(&sqp->s_timer); + goto clr_busy; + +op_err: + send_status = IB_WC_REM_OP_ERR; + wc.status = IB_WC_LOC_QP_OP_ERR; + goto err; + +inv_err: + send_status = IB_WC_REM_INV_REQ_ERR; + wc.status = IB_WC_LOC_QP_OP_ERR; + goto err; + +acc_err: + send_status = IB_WC_REM_ACCESS_ERR; + wc.status = IB_WC_LOC_PROT_ERR; +err: + /* responder goes to error state */ + qib_rc_error(qp, wc.status); + +serr: + spin_lock_irqsave(&sqp->s_lock, flags); + qib_send_complete(sqp, wqe, send_status); + if (sqp->ibqp.qp_type == IB_QPT_RC) { + int lastwqe = qib_error_qp(sqp, IB_WC_WR_FLUSH_ERR); + + sqp->s_flags &= ~QIB_S_BUSY; + spin_unlock_irqrestore(&sqp->s_lock, flags); + if (lastwqe) { + struct ib_event ev; + + ev.device = sqp->ibqp.device; + ev.element.qp = &sqp->ibqp; + ev.event = IB_EVENT_QP_LAST_WQE_REACHED; + sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context); + } + goto done; + } +clr_busy: + sqp->s_flags &= ~QIB_S_BUSY; +unlock: + spin_unlock_irqrestore(&sqp->s_lock, flags); +done: + if (qp && atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); +} + +/** + * qib_make_grh - construct a GRH header + * @ibp: a pointer to the IB port + * @hdr: a pointer to the GRH header being constructed + * @grh: the global route address to send to + * @hwords: the number of 32 bit words of header being sent + * @nwords: the number of 32 bit words of data being sent + * + * Return the size of the header in 32 bit words. + */ +u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr, + struct ib_global_route *grh, u32 hwords, u32 nwords) +{ + hdr->version_tclass_flow = + cpu_to_be32((IB_GRH_VERSION << IB_GRH_VERSION_SHIFT) | + (grh->traffic_class << IB_GRH_TCLASS_SHIFT) | + (grh->flow_label << IB_GRH_FLOW_SHIFT)); + hdr->paylen = cpu_to_be16((hwords - 2 + nwords + SIZE_OF_CRC) << 2); + /* next_hdr is defined by C8-7 in ch. 8.4.1 */ + hdr->next_hdr = IB_GRH_NEXT_HDR; + hdr->hop_limit = grh->hop_limit; + /* The SGID is 32-bit aligned. */ + hdr->sgid.global.subnet_prefix = ibp->gid_prefix; + hdr->sgid.global.interface_id = grh->sgid_index ? + ibp->guids[grh->sgid_index - 1] : ppd_from_ibp(ibp)->guid; + hdr->dgid = grh->dgid; + + /* GRH header size in 32-bit words. */ + return sizeof(struct ib_grh) / sizeof(u32); +} + +void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr, + u32 bth0, u32 bth2) +{ + struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); + u16 lrh0; + u32 nwords; + u32 extra_bytes; + + /* Construct the header. */ + extra_bytes = -qp->s_cur_size & 3; + nwords = (qp->s_cur_size + extra_bytes) >> 2; + lrh0 = QIB_LRH_BTH; + if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { + qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr->u.l.grh, + &qp->remote_ah_attr.grh, + qp->s_hdrwords, nwords); + lrh0 = QIB_LRH_GRH; + } + lrh0 |= ibp->sl_to_vl[qp->remote_ah_attr.sl] << 12 | + qp->remote_ah_attr.sl << 4; + qp->s_hdr->lrh[0] = cpu_to_be16(lrh0); + qp->s_hdr->lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); + qp->s_hdr->lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); + qp->s_hdr->lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid | + qp->remote_ah_attr.src_path_bits); + bth0 |= qib_get_pkey(ibp, qp->s_pkey_index); + bth0 |= extra_bytes << 20; + if (qp->s_mig_state == IB_MIG_MIGRATED) + bth0 |= IB_BTH_MIG_REQ; + ohdr->bth[0] = cpu_to_be32(bth0); + ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); + ohdr->bth[2] = cpu_to_be32(bth2); + this_cpu_inc(ibp->pmastats->n_unicast_xmit); +} + +/** + * qib_do_send - perform a send on a QP + * @work: contains a pointer to the QP + * + * Process entries in the send work queue until credit or queue is + * exhausted. Only allow one CPU to send a packet per QP (tasklet). + * Otherwise, two threads could send packets out of order. + */ +void qib_do_send(struct work_struct *work) +{ + struct qib_qp *qp = container_of(work, struct qib_qp, s_work); + struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + int (*make_req)(struct qib_qp *qp); + unsigned long flags; + + if ((qp->ibqp.qp_type == IB_QPT_RC || + qp->ibqp.qp_type == IB_QPT_UC) && + (qp->remote_ah_attr.dlid & ~((1 << ppd->lmc) - 1)) == ppd->lid) { + qib_ruc_loopback(qp); + return; + } + + if (qp->ibqp.qp_type == IB_QPT_RC) + make_req = qib_make_rc_req; + else if (qp->ibqp.qp_type == IB_QPT_UC) + make_req = qib_make_uc_req; + else + make_req = qib_make_ud_req; + + spin_lock_irqsave(&qp->s_lock, flags); + + /* Return if we are already busy processing a work request. */ + if (!qib_send_ok(qp)) { + spin_unlock_irqrestore(&qp->s_lock, flags); + return; + } + + qp->s_flags |= QIB_S_BUSY; + + spin_unlock_irqrestore(&qp->s_lock, flags); + + do { + /* Check for a constructed packet to be sent. */ + if (qp->s_hdrwords != 0) { + /* + * If the packet cannot be sent now, return and + * the send tasklet will be woken up later. + */ + if (qib_verbs_send(qp, qp->s_hdr, qp->s_hdrwords, + qp->s_cur_sge, qp->s_cur_size)) + break; + /* Record that s_hdr is empty. */ + qp->s_hdrwords = 0; + } + } while (make_req(qp)); +} + +/* + * This should be called with s_lock held. + */ +void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe, + enum ib_wc_status status) +{ + u32 old_last, last; + unsigned i; + + if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_OR_FLUSH_SEND)) + return; + + for (i = 0; i < wqe->wr.num_sge; i++) { + struct qib_sge *sge = &wqe->sg_list[i]; + + qib_put_mr(sge->mr); + } + if (qp->ibqp.qp_type == IB_QPT_UD || + qp->ibqp.qp_type == IB_QPT_SMI || + qp->ibqp.qp_type == IB_QPT_GSI) + atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount); + + /* See ch. 11.2.4.1 and 10.7.3.1 */ + if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) || + (wqe->wr.send_flags & IB_SEND_SIGNALED) || + status != IB_WC_SUCCESS) { + struct ib_wc wc; + + memset(&wc, 0, sizeof wc); + wc.wr_id = wqe->wr.wr_id; + wc.status = status; + wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode]; + wc.qp = &qp->ibqp; + if (status == IB_WC_SUCCESS) + wc.byte_len = wqe->length; + qib_cq_enter(to_icq(qp->ibqp.send_cq), &wc, + status != IB_WC_SUCCESS); + } + + last = qp->s_last; + old_last = last; + if (++last >= qp->s_size) + last = 0; + qp->s_last = last; + if (qp->s_acked == old_last) + qp->s_acked = last; + if (qp->s_cur == old_last) + qp->s_cur = last; + if (qp->s_tail == old_last) + qp->s_tail = last; + if (qp->state == IB_QPS_SQD && last == qp->s_cur) + qp->s_draining = 0; +} diff --git a/drivers/infiniband/hw/ipath/ipath_sd7220.c b/drivers/infiniband/hw/qib/qib_sd7220.c index aa47eb54952..911205d3d5a 100644 --- a/drivers/infiniband/hw/ipath/ipath_sd7220.c +++ b/drivers/infiniband/hw/qib/qib_sd7220.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. + * Copyright (c) 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -32,22 +33,45 @@ */ /* * This file contains all of the code that is specific to the SerDes - * on the InfiniPath 7220 chip. + * on the QLogic_IB 7220 chip. */ #include <linux/pci.h> #include <linux/delay.h> +#include <linux/module.h> +#include <linux/firmware.h> -#include "ipath_kernel.h" -#include "ipath_registers.h" -#include "ipath_7220.h" +#include "qib.h" +#include "qib_7220.h" + +#define SD7220_FW_NAME "qlogic/sd7220.fw" +MODULE_FIRMWARE(SD7220_FW_NAME); + +/* + * Same as in qib_iba7220.c, but just the registers needed here. + * Could move whole set to qib_7220.h, but decided better to keep + * local. + */ +#define KREG_IDX(regname) (QIB_7220_##regname##_OFFS / sizeof(u64)) +#define kr_hwerrclear KREG_IDX(HwErrClear) +#define kr_hwerrmask KREG_IDX(HwErrMask) +#define kr_hwerrstatus KREG_IDX(HwErrStatus) +#define kr_ibcstatus KREG_IDX(IBCStatus) +#define kr_ibserdesctrl KREG_IDX(IBSerDesCtrl) +#define kr_scratch KREG_IDX(Scratch) +#define kr_xgxs_cfg KREG_IDX(XGXSCfg) +/* these are used only here, not in qib_iba7220.c */ +#define kr_ibsd_epb_access_ctrl KREG_IDX(ibsd_epb_access_ctrl) +#define kr_ibsd_epb_transaction_reg KREG_IDX(ibsd_epb_transaction_reg) +#define kr_pciesd_epb_transaction_reg KREG_IDX(pciesd_epb_transaction_reg) +#define kr_pciesd_epb_access_ctrl KREG_IDX(pciesd_epb_access_ctrl) +#define kr_serdes_ddsrxeq0 KREG_IDX(SerDes_DDSRXEQ0) /* * The IBSerDesMappTable is a memory that holds values to be stored in - * various SerDes registers by IBC. It is not part of the normal kregs - * map and is used in exactly one place, hence the #define below. + * various SerDes registers by IBC. */ -#define KR_IBSerDesMappTable (0x94000 / (sizeof(uint64_t))) +#define kr_serdes_maptable KREG_IDX(IBSerDesMappTable) /* * Below used for sdnum parameter, selecting one of the two sections @@ -71,42 +95,44 @@ #define EPB_GLOBAL_WR (1U << (EPB_ADDR_SHF + 8)) /* Forward declarations. */ -static int ipath_sd7220_reg_mod(struct ipath_devdata *dd, int sdnum, u32 loc, - u32 data, u32 mask); -static int ibsd_mod_allchnls(struct ipath_devdata *dd, int loc, int val, +static int qib_sd7220_reg_mod(struct qib_devdata *dd, int sdnum, u32 loc, + u32 data, u32 mask); +static int ibsd_mod_allchnls(struct qib_devdata *dd, int loc, int val, int mask); -static int ipath_sd_trimdone_poll(struct ipath_devdata *dd); -static void ipath_sd_trimdone_monitor(struct ipath_devdata *dd, - const char *where); -static int ipath_sd_setvals(struct ipath_devdata *dd); -static int ipath_sd_early(struct ipath_devdata *dd); -static int ipath_sd_dactrim(struct ipath_devdata *dd); -/* Set the registers that IBC may muck with to their default "preset" values */ -int ipath_sd7220_presets(struct ipath_devdata *dd); -static int ipath_internal_presets(struct ipath_devdata *dd); +static int qib_sd_trimdone_poll(struct qib_devdata *dd); +static void qib_sd_trimdone_monitor(struct qib_devdata *dd, const char *where); +static int qib_sd_setvals(struct qib_devdata *dd); +static int qib_sd_early(struct qib_devdata *dd); +static int qib_sd_dactrim(struct qib_devdata *dd); +static int qib_internal_presets(struct qib_devdata *dd); /* Tweak the register (CMUCTRL5) that contains the TRIMSELF controls */ -static int ipath_sd_trimself(struct ipath_devdata *dd, int val); -static int epb_access(struct ipath_devdata *dd, int sdnum, int claim); - -void ipath_set_relock_poll(struct ipath_devdata *dd, int ibup); +static int qib_sd_trimself(struct qib_devdata *dd, int val); +static int epb_access(struct qib_devdata *dd, int sdnum, int claim); +static int qib_sd7220_ib_load(struct qib_devdata *dd, + const struct firmware *fw); +static int qib_sd7220_ib_vfy(struct qib_devdata *dd, + const struct firmware *fw); /* * Below keeps track of whether the "once per power-on" initialization has * been done, because uC code Version 1.32.17 or higher allows the uC to * be reset at will, and Automatic Equalization may require it. So the - * state of the reset "pin", as reflected in was_reset parameter to - * ipath_sd7220_init() is no longer valid. Instead, we check for the + * state of the reset "pin", is no longer valid. Instead, we check for the * actual uC code having been loaded. */ -static int ipath_ibsd_ucode_loaded(struct ipath_devdata *dd) +static int qib_ibsd_ucode_loaded(struct qib_pportdata *ppd, + const struct firmware *fw) { - if (!dd->serdes_first_init_done && (ipath_sd7220_ib_vfy(dd) > 0)) - dd->serdes_first_init_done = 1; - return dd->serdes_first_init_done; + struct qib_devdata *dd = ppd->dd; + + if (!dd->cspec->serdes_first_init_done && + qib_sd7220_ib_vfy(dd, fw) > 0) + dd->cspec->serdes_first_init_done = 1; + return dd->cspec->serdes_first_init_done; } -/* repeat #define for local use. "Real" #define is in ipath_iba7220.c */ -#define INFINIPATH_HWE_IB_UC_MEMORYPARITYERR 0x0000004000000000ULL +/* repeat #define for local use. "Real" #define is in qib_iba7220.c */ +#define QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR 0x0000004000000000ULL #define IB_MPREG5 (EPB_LOC(6, 0, 0xE) | (1L << EPB_IB_UC_CS_SHF)) #define IB_MPREG6 (EPB_LOC(6, 0, 0xF) | (1U << EPB_IB_UC_CS_SHF)) #define UC_PAR_CLR_D 8 @@ -114,25 +140,25 @@ static int ipath_ibsd_ucode_loaded(struct ipath_devdata *dd) #define IB_CTRL2(chn) (EPB_LOC(chn, 7, 3) | EPB_IB_QUAD0_CS) #define START_EQ1(chan) EPB_LOC(chan, 7, 0x27) -void ipath_sd7220_clr_ibpar(struct ipath_devdata *dd) +void qib_sd7220_clr_ibpar(struct qib_devdata *dd) { int ret; /* clear, then re-enable parity errs */ - ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6, + ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6, UC_PAR_CLR_D, UC_PAR_CLR_M); if (ret < 0) { - ipath_dev_err(dd, "Failed clearing IBSerDes Parity err\n"); + qib_dev_err(dd, "Failed clearing IBSerDes Parity err\n"); goto bail; } - ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6, 0, + ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6, 0, UC_PAR_CLR_M); - ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch); + qib_read_kreg32(dd, kr_scratch); udelay(4); - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, - INFINIPATH_HWE_IB_UC_MEMORYPARITYERR); - ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch); + qib_write_kreg(dd, kr_hwerrclear, + QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR); + qib_read_kreg32(dd, kr_scratch); bail: return; } @@ -146,7 +172,7 @@ bail: #define IB_PGUDP(chn) (EPB_LOC((chn), 2, 1) | EPB_IB_QUAD0_CS) #define IB_CMUDONE(chn) (EPB_LOC((chn), 7, 0xF) | EPB_IB_QUAD0_CS) -static int ipath_resync_ibepb(struct ipath_devdata *dd) +static int qib_resync_ibepb(struct qib_devdata *dd) { int ret, pat, tries, chn; u32 loc; @@ -155,43 +181,42 @@ static int ipath_resync_ibepb(struct ipath_devdata *dd) chn = 0; for (tries = 0; tries < (4 * IBSD_RESYNC_TRIES); ++tries) { loc = IB_PGUDP(chn); - ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, loc, 0, 0); + ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, loc, 0, 0); if (ret < 0) { - ipath_dev_err(dd, "Failed read in resync\n"); + qib_dev_err(dd, "Failed read in resync\n"); continue; } if (ret != 0xF0 && ret != 0x55 && tries == 0) - ipath_dev_err(dd, "unexpected pattern in resync\n"); + qib_dev_err(dd, "unexpected pattern in resync\n"); pat = ret ^ 0xA5; /* alternate F0 and 55 */ - ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, loc, pat, 0xFF); + ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, loc, pat, 0xFF); if (ret < 0) { - ipath_dev_err(dd, "Failed write in resync\n"); + qib_dev_err(dd, "Failed write in resync\n"); continue; } - ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, loc, 0, 0); + ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, loc, 0, 0); if (ret < 0) { - ipath_dev_err(dd, "Failed re-read in resync\n"); + qib_dev_err(dd, "Failed re-read in resync\n"); continue; } if (ret != pat) { - ipath_dev_err(dd, "Failed compare1 in resync\n"); + qib_dev_err(dd, "Failed compare1 in resync\n"); continue; } loc = IB_CMUDONE(chn); - ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, loc, 0, 0); + ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, loc, 0, 0); if (ret < 0) { - ipath_dev_err(dd, "Failed CMUDONE rd in resync\n"); + qib_dev_err(dd, "Failed CMUDONE rd in resync\n"); continue; } if ((ret & 0x70) != ((chn << 4) | 0x40)) { - ipath_dev_err(dd, "Bad CMUDONE value %02X, chn %d\n", - ret, chn); + qib_dev_err(dd, "Bad CMUDONE value %02X, chn %d\n", + ret, chn); continue; } if (++chn == 4) break; /* Success */ } - ipath_cdbg(VERBOSE, "Resync in %d tries\n", tries); return (ret > 0) ? 0 : ret; } @@ -199,32 +224,32 @@ static int ipath_resync_ibepb(struct ipath_devdata *dd) * Localize the stuff that should be done to change IB uC reset * returns <0 for errors. */ -static int ipath_ibsd_reset(struct ipath_devdata *dd, int assert_rst) +static int qib_ibsd_reset(struct qib_devdata *dd, int assert_rst) { u64 rst_val; int ret = 0; unsigned long flags; - rst_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibserdesctrl); + rst_val = qib_read_kreg64(dd, kr_ibserdesctrl); if (assert_rst) { /* * Vendor recommends "interrupting" uC before reset, to * minimize possible glitches. */ - spin_lock_irqsave(&dd->ipath_sdepb_lock, flags); + spin_lock_irqsave(&dd->cspec->sdepb_lock, flags); epb_access(dd, IB_7220_SERDES, 1); rst_val |= 1ULL; /* Squelch possible parity error from _asserting_ reset */ - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, - dd->ipath_hwerrmask & - ~INFINIPATH_HWE_IB_UC_MEMORYPARITYERR); - ipath_write_kreg(dd, dd->ipath_kregs->kr_ibserdesctrl, rst_val); + qib_write_kreg(dd, kr_hwerrmask, + dd->cspec->hwerrmask & + ~QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR); + qib_write_kreg(dd, kr_ibserdesctrl, rst_val); /* flush write, delay to ensure it took effect */ - ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch); + qib_read_kreg32(dd, kr_scratch); udelay(2); /* once it's reset, can remove interrupt */ epb_access(dd, IB_7220_SERDES, -1); - spin_unlock_irqrestore(&dd->ipath_sdepb_lock, flags); + spin_unlock_irqrestore(&dd->cspec->sdepb_lock, flags); } else { /* * Before we de-assert reset, we need to deal with @@ -235,47 +260,47 @@ static int ipath_ibsd_reset(struct ipath_devdata *dd, int assert_rst) */ u64 val; rst_val &= ~(1ULL); - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, - dd->ipath_hwerrmask & - ~INFINIPATH_HWE_IB_UC_MEMORYPARITYERR); + qib_write_kreg(dd, kr_hwerrmask, + dd->cspec->hwerrmask & + ~QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR); - ret = ipath_resync_ibepb(dd); + ret = qib_resync_ibepb(dd); if (ret < 0) - ipath_dev_err(dd, "unable to re-sync IB EPB\n"); + qib_dev_err(dd, "unable to re-sync IB EPB\n"); /* set uC control regs to suppress parity errs */ - ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG5, 1, 1); + ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG5, 1, 1); if (ret < 0) goto bail; /* IB uC code past Version 1.32.17 allow suppression of wdog */ - ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6, 0x80, + ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6, 0x80, 0x80); if (ret < 0) { - ipath_dev_err(dd, "Failed to set WDOG disable\n"); + qib_dev_err(dd, "Failed to set WDOG disable\n"); goto bail; } - ipath_write_kreg(dd, dd->ipath_kregs->kr_ibserdesctrl, rst_val); + qib_write_kreg(dd, kr_ibserdesctrl, rst_val); /* flush write, delay for startup */ - ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch); + qib_read_kreg32(dd, kr_scratch); udelay(1); /* clear, then re-enable parity errs */ - ipath_sd7220_clr_ibpar(dd); - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus); - if (val & INFINIPATH_HWE_IB_UC_MEMORYPARITYERR) { - ipath_dev_err(dd, "IBUC Parity still set after RST\n"); - dd->ipath_hwerrmask &= - ~INFINIPATH_HWE_IB_UC_MEMORYPARITYERR; + qib_sd7220_clr_ibpar(dd); + val = qib_read_kreg64(dd, kr_hwerrstatus); + if (val & QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR) { + qib_dev_err(dd, "IBUC Parity still set after RST\n"); + dd->cspec->hwerrmask &= + ~QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR; } - ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, - dd->ipath_hwerrmask); + qib_write_kreg(dd, kr_hwerrmask, + dd->cspec->hwerrmask); } bail: return ret; } -static void ipath_sd_trimdone_monitor(struct ipath_devdata *dd, - const char *where) +static void qib_sd_trimdone_monitor(struct qib_devdata *dd, + const char *where) { int ret, chn, baduns; u64 val; @@ -286,70 +311,74 @@ static void ipath_sd_trimdone_monitor(struct ipath_devdata *dd, /* give time for reset to settle out in EPB */ udelay(2); - ret = ipath_resync_ibepb(dd); + ret = qib_resync_ibepb(dd); if (ret < 0) - ipath_dev_err(dd, "not able to re-sync IB EPB (%s)\n", where); + qib_dev_err(dd, "not able to re-sync IB EPB (%s)\n", where); /* Do "sacrificial read" to get EPB in sane state after reset */ - ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, IB_CTRL2(0), 0, 0); + ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_CTRL2(0), 0, 0); if (ret < 0) - ipath_dev_err(dd, "Failed TRIMDONE 1st read, (%s)\n", where); + qib_dev_err(dd, "Failed TRIMDONE 1st read, (%s)\n", where); /* Check/show "summary" Trim-done bit in IBCStatus */ - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); - if (val & (1ULL << 11)) - ipath_cdbg(VERBOSE, "IBCS TRIMDONE set (%s)\n", where); - else - ipath_dev_err(dd, "IBCS TRIMDONE clear (%s)\n", where); - + val = qib_read_kreg64(dd, kr_ibcstatus); + if (!(val & (1ULL << 11))) + qib_dev_err(dd, "IBCS TRIMDONE clear (%s)\n", where); + /* + * Do "dummy read/mod/wr" to get EPB in sane state after reset + * The default value for MPREG6 is 0. + */ udelay(2); - ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6, 0x80, 0x80); + ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6, 0x80, 0x80); if (ret < 0) - ipath_dev_err(dd, "Failed Dummy RMW, (%s)\n", where); + qib_dev_err(dd, "Failed Dummy RMW, (%s)\n", where); udelay(10); baduns = 0; for (chn = 3; chn >= 0; --chn) { /* Read CTRL reg for each channel to check TRIMDONE */ - ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, + ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_CTRL2(chn), 0, 0); if (ret < 0) - ipath_dev_err(dd, "Failed checking TRIMDONE, chn %d" - " (%s)\n", chn, where); + qib_dev_err(dd, + "Failed checking TRIMDONE, chn %d (%s)\n", + chn, where); if (!(ret & 0x10)) { int probe; + baduns |= (1 << chn); - ipath_dev_err(dd, "TRIMDONE cleared on chn %d (%02X)." - " (%s)\n", chn, ret, where); - probe = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, + qib_dev_err(dd, + "TRIMDONE cleared on chn %d (%02X). (%s)\n", + chn, ret, where); + probe = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_PGUDP(0), 0, 0); - ipath_dev_err(dd, "probe is %d (%02X)\n", + qib_dev_err(dd, "probe is %d (%02X)\n", probe, probe); - probe = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, + probe = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_CTRL2(chn), 0, 0); - ipath_dev_err(dd, "re-read: %d (%02X)\n", + qib_dev_err(dd, "re-read: %d (%02X)\n", probe, probe); - ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, + ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_CTRL2(chn), 0x10, 0x10); if (ret < 0) - ipath_dev_err(dd, + qib_dev_err(dd, "Err on TRIMDONE rewrite1\n"); } } for (chn = 3; chn >= 0; --chn) { /* Read CTRL reg for each channel to check TRIMDONE */ if (baduns & (1 << chn)) { - ipath_dev_err(dd, - "Reseting TRIMDONE on chn %d (%s)\n", + qib_dev_err(dd, + "Resetting TRIMDONE on chn %d (%s)\n", chn, where); - ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, + ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_CTRL2(chn), 0x10, 0x10); if (ret < 0) - ipath_dev_err(dd, "Failed re-setting " - "TRIMDONE, chn %d (%s)\n", + qib_dev_err(dd, + "Failed re-setting TRIMDONE, chn %d (%s)\n", chn, where); } } @@ -361,96 +390,94 @@ static void ipath_sd_trimdone_monitor(struct ipath_devdata *dd, * Post IB uC code version 1.32.17, was_reset being 1 is not really * informative, so we double-check. */ -int ipath_sd7220_init(struct ipath_devdata *dd, int was_reset) +int qib_sd7220_init(struct qib_devdata *dd) { + const struct firmware *fw; int ret = 1; /* default to failure */ - int first_reset; - int val_stat; + int first_reset, was_reset; + /* SERDES MPU reset recorded in D0 */ + was_reset = (qib_read_kreg64(dd, kr_ibserdesctrl) & 1); if (!was_reset) { /* entered with reset not asserted, we need to do it */ - ipath_ibsd_reset(dd, 1); - ipath_sd_trimdone_monitor(dd, "Driver-reload"); + qib_ibsd_reset(dd, 1); + qib_sd_trimdone_monitor(dd, "Driver-reload"); } - /* Substitute our deduced value for was_reset */ - ret = ipath_ibsd_ucode_loaded(dd); - if (ret < 0) { - ret = 1; + ret = request_firmware(&fw, SD7220_FW_NAME, &dd->pcidev->dev); + if (ret) { + qib_dev_err(dd, "Failed to load IB SERDES image\n"); goto done; } - first_reset = !ret; /* First reset if IBSD uCode not yet loaded */ + /* Substitute our deduced value for was_reset */ + ret = qib_ibsd_ucode_loaded(dd->pport, fw); + if (ret < 0) + goto bail; + + first_reset = !ret; /* First reset if IBSD uCode not yet loaded */ /* * Alter some regs per vendor latest doc, reset-defaults * are not right for IB. */ - ret = ipath_sd_early(dd); + ret = qib_sd_early(dd); if (ret < 0) { - ipath_dev_err(dd, "Failed to set IB SERDES early defaults\n"); - ret = 1; - goto done; + qib_dev_err(dd, "Failed to set IB SERDES early defaults\n"); + goto bail; } - /* * Set DAC manual trim IB. * We only do this once after chip has been reset (usually * same as once per system boot). */ if (first_reset) { - ret = ipath_sd_dactrim(dd); + ret = qib_sd_dactrim(dd); if (ret < 0) { - ipath_dev_err(dd, "Failed IB SERDES DAC trim\n"); - ret = 1; - goto done; + qib_dev_err(dd, "Failed IB SERDES DAC trim\n"); + goto bail; } } - /* * Set various registers (DDS and RXEQ) that will be * controlled by IBC (in 1.2 mode) to reasonable preset values * Calling the "internal" version avoids the "check for needed" * and "trimdone monitor" that might be counter-productive. */ - ret = ipath_internal_presets(dd); + ret = qib_internal_presets(dd); if (ret < 0) { - ipath_dev_err(dd, "Failed to set IB SERDES presets\n"); - ret = 1; - goto done; + qib_dev_err(dd, "Failed to set IB SERDES presets\n"); + goto bail; } - ret = ipath_sd_trimself(dd, 0x80); + ret = qib_sd_trimself(dd, 0x80); if (ret < 0) { - ipath_dev_err(dd, "Failed to set IB SERDES TRIMSELF\n"); - ret = 1; - goto done; + qib_dev_err(dd, "Failed to set IB SERDES TRIMSELF\n"); + goto bail; } /* Load image, then try to verify */ - ret = 0; /* Assume success */ + ret = 0; /* Assume success */ if (first_reset) { int vfy; int trim_done; - ipath_dbg("SerDes uC was reset, reloading PRAM\n"); - ret = ipath_sd7220_ib_load(dd); + + ret = qib_sd7220_ib_load(dd, fw); if (ret < 0) { - ipath_dev_err(dd, "Failed to load IB SERDES image\n"); - ret = 1; - goto done; - } + qib_dev_err(dd, "Failed to load IB SERDES image\n"); + goto bail; + } else { + /* Loaded image, try to verify */ + vfy = qib_sd7220_ib_vfy(dd, fw); + if (vfy != ret) { + qib_dev_err(dd, "SERDES PRAM VFY failed\n"); + goto bail; + } /* end if verified */ + } /* end if loaded */ - /* Loaded image, try to verify */ - vfy = ipath_sd7220_ib_vfy(dd); - if (vfy != ret) { - ipath_dev_err(dd, "SERDES PRAM VFY failed\n"); - ret = 1; - goto done; - } /* * Loaded and verified. Almost good... * hold "success" in ret */ ret = 0; - /* * Prev steps all worked, continue bringup * De-assert RESET to uC, only in first reset, to allow @@ -461,45 +488,49 @@ int ipath_sd7220_init(struct ipath_devdata *dd, int was_reset) */ ret = ibsd_mod_allchnls(dd, START_EQ1(0), 0, 0x38); if (ret < 0) { - ipath_dev_err(dd, "Failed clearing START_EQ1\n"); - ret = 1; - goto done; + qib_dev_err(dd, "Failed clearing START_EQ1\n"); + goto bail; } - ipath_ibsd_reset(dd, 0); + qib_ibsd_reset(dd, 0); /* * If this is not the first reset, trimdone should be set - * already. + * already. We may need to check about this. */ - trim_done = ipath_sd_trimdone_poll(dd); + trim_done = qib_sd_trimdone_poll(dd); /* * Whether or not trimdone succeeded, we need to put the * uC back into reset to avoid a possible fight with the * IBC state-machine. */ - ipath_ibsd_reset(dd, 1); + qib_ibsd_reset(dd, 1); if (!trim_done) { - ipath_dev_err(dd, "No TRIMDONE seen\n"); - ret = 1; - goto done; + qib_dev_err(dd, "No TRIMDONE seen\n"); + goto bail; } - - ipath_sd_trimdone_monitor(dd, "First-reset"); + /* + * DEBUG: check each time we reset if trimdone bits have + * gotten cleared, and re-set them. + */ + qib_sd_trimdone_monitor(dd, "First-reset"); /* Remember so we do not re-do the load, dactrim, etc. */ - dd->serdes_first_init_done = 1; + dd->cspec->serdes_first_init_done = 1; } /* - * Setup for channel training and load values for + * setup for channel training and load values for * RxEq and DDS in tables used by IBC in IB1.2 mode */ - - val_stat = ipath_sd_setvals(dd); - if (val_stat < 0) - ret = 1; + ret = 0; + if (qib_sd_setvals(dd) >= 0) + goto done; +bail: + ret = 1; done: /* start relock timer regardless, but start at 1 second */ - ipath_set_relock_poll(dd, -1); + set_7220_relock_poll(dd, -1); + + release_firmware(fw); return ret; } @@ -517,7 +548,7 @@ done: * the "claim" parameter is >0 to claim, <0 to release, 0 to query. * Returns <0 for errors, >0 if we had ownership, else 0. */ -static int epb_access(struct ipath_devdata *dd, int sdnum, int claim) +static int epb_access(struct qib_devdata *dd, int sdnum, int claim) { u16 acc; u64 accval; @@ -525,28 +556,30 @@ static int epb_access(struct ipath_devdata *dd, int sdnum, int claim) u64 oct_sel = 0; switch (sdnum) { - case IB_7220_SERDES : + case IB_7220_SERDES: /* * The IB SERDES "ownership" is fairly simple. A single each * request/grant. */ - acc = dd->ipath_kregs->kr_ib_epbacc; + acc = kr_ibsd_epb_access_ctrl; break; - case PCIE_SERDES0 : - case PCIE_SERDES1 : + + case PCIE_SERDES0: + case PCIE_SERDES1: /* PCIe SERDES has two "octants", need to select which */ - acc = dd->ipath_kregs->kr_pcie_epbacc; + acc = kr_pciesd_epb_access_ctrl; oct_sel = (2 << (sdnum - PCIE_SERDES0)); break; - default : + + default: return 0; } /* Make sure any outstanding transaction was seen */ - ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch); + qib_read_kreg32(dd, kr_scratch); udelay(15); - accval = ipath_read_kreg32(dd, acc); + accval = qib_read_kreg32(dd, acc); owned = !!(accval & EPB_ACC_GNT); if (claim < 0) { @@ -557,22 +590,22 @@ static int epb_access(struct ipath_devdata *dd, int sdnum, int claim) * Both should be clear */ u64 newval = 0; - ipath_write_kreg(dd, acc, newval); + qib_write_kreg(dd, acc, newval); /* First read after write is not trustworthy */ - pollval = ipath_read_kreg32(dd, acc); + pollval = qib_read_kreg32(dd, acc); udelay(5); - pollval = ipath_read_kreg32(dd, acc); + pollval = qib_read_kreg32(dd, acc); if (pollval & EPB_ACC_GNT) owned = -1; } else if (claim > 0) { /* Need to claim */ u64 pollval; u64 newval = EPB_ACC_REQ | oct_sel; - ipath_write_kreg(dd, acc, newval); + qib_write_kreg(dd, acc, newval); /* First read after write is not trustworthy */ - pollval = ipath_read_kreg32(dd, acc); + pollval = qib_read_kreg32(dd, acc); udelay(5); - pollval = ipath_read_kreg32(dd, acc); + pollval = qib_read_kreg32(dd, acc); if (!(pollval & EPB_ACC_GNT)) owned = -1; } @@ -582,18 +615,17 @@ static int epb_access(struct ipath_devdata *dd, int sdnum, int claim) /* * Lemma to deal with race condition of write..read to epb regs */ -static int epb_trans(struct ipath_devdata *dd, u16 reg, u64 i_val, u64 *o_vp) +static int epb_trans(struct qib_devdata *dd, u16 reg, u64 i_val, u64 *o_vp) { int tries; u64 transval; - - ipath_write_kreg(dd, reg, i_val); + qib_write_kreg(dd, reg, i_val); /* Throw away first read, as RDY bit may be stale */ - transval = ipath_read_kreg64(dd, reg); + transval = qib_read_kreg64(dd, reg); for (tries = EPB_TRANS_TRIES; tries; --tries) { - transval = ipath_read_kreg32(dd, reg); + transval = qib_read_kreg32(dd, reg); if (transval & EPB_TRANS_RDY) break; udelay(5); @@ -606,9 +638,8 @@ static int epb_trans(struct ipath_devdata *dd, u16 reg, u64 i_val, u64 *o_vp) } /** - * - * ipath_sd7220_reg_mod - modify SERDES register - * @dd: the infinipath device + * qib_sd7220_reg_mod - modify SERDES register + * @dd: the qlogic_ib device * @sdnum: which SERDES to access * @loc: location - channel, element, register, as packed by EPB_LOC() macro. * @wd: Write Data - value to set in register @@ -619,8 +650,8 @@ static int epb_trans(struct ipath_devdata *dd, u16 reg, u64 i_val, u64 *o_vp) * returns current (presumed, if a write was done) contents of selected * register, or <0 if errors. */ -static int ipath_sd7220_reg_mod(struct ipath_devdata *dd, int sdnum, u32 loc, - u32 wd, u32 mask) +static int qib_sd7220_reg_mod(struct qib_devdata *dd, int sdnum, u32 loc, + u32 wd, u32 mask) { u16 trans; u64 transval; @@ -629,14 +660,16 @@ static int ipath_sd7220_reg_mod(struct ipath_devdata *dd, int sdnum, u32 loc, unsigned long flags; switch (sdnum) { - case IB_7220_SERDES : - trans = dd->ipath_kregs->kr_ib_epbtrans; + case IB_7220_SERDES: + trans = kr_ibsd_epb_transaction_reg; break; - case PCIE_SERDES0 : - case PCIE_SERDES1 : - trans = dd->ipath_kregs->kr_pcie_epbtrans; + + case PCIE_SERDES0: + case PCIE_SERDES1: + trans = kr_pciesd_epb_transaction_reg; break; - default : + + default: return -1; } @@ -644,23 +677,23 @@ static int ipath_sd7220_reg_mod(struct ipath_devdata *dd, int sdnum, u32 loc, * All access is locked in software (vs other host threads) and * hardware (vs uC access). */ - spin_lock_irqsave(&dd->ipath_sdepb_lock, flags); + spin_lock_irqsave(&dd->cspec->sdepb_lock, flags); owned = epb_access(dd, sdnum, 1); if (owned < 0) { - spin_unlock_irqrestore(&dd->ipath_sdepb_lock, flags); + spin_unlock_irqrestore(&dd->cspec->sdepb_lock, flags); return -1; } ret = 0; for (tries = EPB_TRANS_TRIES; tries; --tries) { - transval = ipath_read_kreg32(dd, trans); + transval = qib_read_kreg32(dd, trans); if (transval & EPB_TRANS_RDY) break; udelay(5); } if (tries > 0) { - tries = 1; /* to make read-skip work */ + tries = 1; /* to make read-skip work */ if (mask != 0xFF) { /* * Not a pure write, so need to read. @@ -688,7 +721,7 @@ static int ipath_sd7220_reg_mod(struct ipath_devdata *dd, int sdnum, u32 loc, else ret = transval & EPB_DATA_MASK; - spin_unlock_irqrestore(&dd->ipath_sdepb_lock, flags); + spin_unlock_irqrestore(&dd->cspec->sdepb_lock, flags); if (tries <= 0) ret = -1; return ret; @@ -707,7 +740,7 @@ static int ipath_sd7220_reg_mod(struct ipath_devdata *dd, int sdnum, u32 loc, #define EPB_RAMDATA EPB_LOC(6, 0, 5) /* Transfer date to/from uC Program RAM of IB or PCIe SerDes */ -static int ipath_sd7220_ram_xfer(struct ipath_devdata *dd, int sdnum, u32 loc, +static int qib_sd7220_ram_xfer(struct qib_devdata *dd, int sdnum, u32 loc, u8 *buf, int cnt, int rd_notwr) { u16 trans; @@ -723,29 +756,28 @@ static int ipath_sd7220_ram_xfer(struct ipath_devdata *dd, int sdnum, u32 loc, /* Pick appropriate transaction reg and "Chip select" for this serdes */ switch (sdnum) { - case IB_7220_SERDES : + case IB_7220_SERDES: csbit = 1ULL << EPB_IB_UC_CS_SHF; - trans = dd->ipath_kregs->kr_ib_epbtrans; + trans = kr_ibsd_epb_transaction_reg; break; - case PCIE_SERDES0 : - case PCIE_SERDES1 : + + case PCIE_SERDES0: + case PCIE_SERDES1: /* PCIe SERDES has uC "chip select" in different bit, too */ csbit = 1ULL << EPB_PCIE_UC_CS_SHF; - trans = dd->ipath_kregs->kr_pcie_epbtrans; + trans = kr_pciesd_epb_transaction_reg; break; - default : + + default: return -1; } op = rd_notwr ? "Rd" : "Wr"; - spin_lock_irqsave(&dd->ipath_sdepb_lock, flags); + spin_lock_irqsave(&dd->cspec->sdepb_lock, flags); owned = epb_access(dd, sdnum, 1); if (owned < 0) { - spin_unlock_irqrestore(&dd->ipath_sdepb_lock, flags); - ipath_dbg("Could not get %s access to %s EPB: %X, loc %X\n", - op, (sdnum == IB_7220_SERDES) ? "IB" : "PCIe", - owned, loc); + spin_unlock_irqrestore(&dd->cspec->sdepb_lock, flags); return -1; } @@ -758,16 +790,14 @@ static int ipath_sd7220_ram_xfer(struct ipath_devdata *dd, int sdnum, u32 loc, */ addr = loc & 0x1FFF; for (tries = EPB_TRANS_TRIES; tries; --tries) { - transval = ipath_read_kreg32(dd, trans); + transval = qib_read_kreg32(dd, trans); if (transval & EPB_TRANS_RDY) break; udelay(5); } sofar = 0; - if (tries <= 0) - ipath_dbg("No initial RDY on EPB access request\n"); - else { + if (tries > 0) { /* * Every "memory" access is doubly-indirect. * We set two bytes of address, then read/write @@ -778,8 +808,6 @@ static int ipath_sd7220_ram_xfer(struct ipath_devdata *dd, int sdnum, u32 loc, transval = csbit | EPB_UC_CTL | (rd_notwr ? EPB_ROM_R : EPB_ROM_W); tries = epb_trans(dd, trans, transval, &transval); - if (tries <= 0) - ipath_dbg("No EPB response to uC %s cmd\n", op); while (tries > 0 && sofar < cnt) { if (!sofar) { /* Only set address at start of chunk */ @@ -787,18 +815,14 @@ static int ipath_sd7220_ram_xfer(struct ipath_devdata *dd, int sdnum, u32 loc, transval = csbit | EPB_MADDRH | addrbyte; tries = epb_trans(dd, trans, transval, &transval); - if (tries <= 0) { - ipath_dbg("No EPB response ADDRH\n"); + if (tries <= 0) break; - } addrbyte = (addr + sofar) & 0xFF; transval = csbit | EPB_MADDRL | addrbyte; tries = epb_trans(dd, trans, transval, &transval); - if (tries <= 0) { - ipath_dbg("No EPB response ADDRL\n"); + if (tries <= 0) break; - } } if (rd_notwr) @@ -806,10 +830,8 @@ static int ipath_sd7220_ram_xfer(struct ipath_devdata *dd, int sdnum, u32 loc, else transval = csbit | EPB_ROMDATA | buf[sofar]; tries = epb_trans(dd, trans, transval, &transval); - if (tries <= 0) { - ipath_dbg("No EPB response DATA\n"); + if (tries <= 0) break; - } if (rd_notwr) buf[sofar] = transval & EPB_DATA_MASK; ++sofar; @@ -817,8 +839,6 @@ static int ipath_sd7220_ram_xfer(struct ipath_devdata *dd, int sdnum, u32 loc, /* Finally, clear control-bit for Read or Write */ transval = csbit | EPB_UC_CTL; tries = epb_trans(dd, trans, transval, &transval); - if (tries <= 0) - ipath_dbg("No EPB response to drop of uC %s cmd\n", op); } ret = sofar; @@ -826,18 +846,16 @@ static int ipath_sd7220_ram_xfer(struct ipath_devdata *dd, int sdnum, u32 loc, if (epb_access(dd, sdnum, -1) < 0) ret = -1; - spin_unlock_irqrestore(&dd->ipath_sdepb_lock, flags); - if (tries <= 0) { - ipath_dbg("SERDES PRAM %s failed after %d bytes\n", op, sofar); + spin_unlock_irqrestore(&dd->cspec->sdepb_lock, flags); + if (tries <= 0) ret = -1; - } return ret; } #define PROG_CHUNK 64 -int ipath_sd7220_prog_ld(struct ipath_devdata *dd, int sdnum, - u8 *img, int len, int offset) +static int qib_sd7220_prog_ld(struct qib_devdata *dd, int sdnum, + const u8 *img, int len, int offset) { int cnt, sofar, req; @@ -846,8 +864,8 @@ int ipath_sd7220_prog_ld(struct ipath_devdata *dd, int sdnum, req = len - sofar; if (req > PROG_CHUNK) req = PROG_CHUNK; - cnt = ipath_sd7220_ram_xfer(dd, sdnum, offset + sofar, - img + sofar, req, 0); + cnt = qib_sd7220_ram_xfer(dd, sdnum, offset + sofar, + (u8 *)img + sofar, req, 0); if (cnt < req) { sofar = -1; break; @@ -860,8 +878,8 @@ int ipath_sd7220_prog_ld(struct ipath_devdata *dd, int sdnum, #define VFY_CHUNK 64 #define SD_PRAM_ERROR_LIMIT 42 -int ipath_sd7220_prog_vfy(struct ipath_devdata *dd, int sdnum, - const u8 *img, int len, int offset) +static int qib_sd7220_prog_vfy(struct qib_devdata *dd, int sdnum, + const u8 *img, int len, int offset) { int cnt, sofar, req, idx, errors; unsigned char readback[VFY_CHUNK]; @@ -872,7 +890,7 @@ int ipath_sd7220_prog_vfy(struct ipath_devdata *dd, int sdnum, req = len - sofar; if (req > VFY_CHUNK) req = VFY_CHUNK; - cnt = ipath_sd7220_ram_xfer(dd, sdnum, sofar + offset, + cnt = qib_sd7220_ram_xfer(dd, sdnum, sofar + offset, readback, req, 1); if (cnt < req) { /* failed in read itself */ @@ -888,11 +906,25 @@ int ipath_sd7220_prog_vfy(struct ipath_devdata *dd, int sdnum, return errors ? -errors : sofar; } -/* IRQ not set up at this point in init, so we poll. */ +static int +qib_sd7220_ib_load(struct qib_devdata *dd, const struct firmware *fw) +{ + return qib_sd7220_prog_ld(dd, IB_7220_SERDES, fw->data, fw->size, 0); +} + +static int +qib_sd7220_ib_vfy(struct qib_devdata *dd, const struct firmware *fw) +{ + return qib_sd7220_prog_vfy(dd, IB_7220_SERDES, fw->data, fw->size, 0); +} + +/* + * IRQ not set up at this point in init, so we poll. + */ #define IB_SERDES_TRIM_DONE (1ULL << 11) #define TRIM_TMO (30) -static int ipath_sd_trimdone_poll(struct ipath_devdata *dd) +static int qib_sd_trimdone_poll(struct qib_devdata *dd) { int trim_tmo, ret; uint64_t val; @@ -903,16 +935,15 @@ static int ipath_sd_trimdone_poll(struct ipath_devdata *dd) */ ret = 0; for (trim_tmo = 0; trim_tmo < TRIM_TMO; ++trim_tmo) { - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); + val = qib_read_kreg64(dd, kr_ibcstatus); if (val & IB_SERDES_TRIM_DONE) { - ipath_cdbg(VERBOSE, "TRIMDONE after %d\n", trim_tmo); ret = 1; break; } msleep(10); } if (trim_tmo >= TRIM_TMO) { - ipath_dev_err(dd, "No TRIMDONE in %d tries\n", trim_tmo); + qib_dev_err(dd, "No TRIMDONE in %d tries\n", trim_tmo); ret = 0; } return ret; @@ -964,8 +995,7 @@ static struct dds_init { }; /* - * Next, values related to Receive Equalization. - * In comments, FDR (Full) is IB DDR, HDR (Half) is IB SDR + * Now the RXEQ section of the table. */ /* Hardware packs an element number and register address thus: */ #define RXEQ_INIT_RDESC(elt, addr) (((elt) & 0xF) | ((addr) << 4)) @@ -981,23 +1011,23 @@ static struct dds_init { #define RXEQ_SDR_ZCNT 23 static struct rxeq_init { - u16 rdesc; /* in form used in SerDesDDSRXEQ */ + u16 rdesc; /* in form used in SerDesDDSRXEQ */ u8 rdata[4]; } rxeq_init_vals[] = { /* Set Rcv Eq. to Preset node */ RXEQ_VAL_ALL(7, 0x27, 0x10), /* Set DFELTHFDR/HDR thresholds */ - RXEQ_VAL(7, 8, 0, 0, 0, 0), /* FDR */ + RXEQ_VAL(7, 8, 0, 0, 0, 0), /* FDR, was 0, 1, 2, 3 */ RXEQ_VAL(7, 0x21, 0, 0, 0, 0), /* HDR */ /* Set TLTHFDR/HDR theshold */ - RXEQ_VAL(7, 9, 2, 2, 2, 2), /* FDR */ - RXEQ_VAL(7, 0x23, 2, 2, 2, 2), /* HDR */ + RXEQ_VAL(7, 9, 2, 2, 2, 2), /* FDR, was 0, 2, 4, 6 */ + RXEQ_VAL(7, 0x23, 2, 2, 2, 2), /* HDR, was 0, 1, 2, 3 */ /* Set Preamp setting 2 (ZFR/ZCNT) */ - RXEQ_VAL(7, 0x1B, 12, 12, 12, 12), /* FDR */ - RXEQ_VAL(7, 0x1C, 12, 12, 12, 12), /* HDR */ + RXEQ_VAL(7, 0x1B, 12, 12, 12, 12), /* FDR, was 12, 16, 20, 24 */ + RXEQ_VAL(7, 0x1C, 12, 12, 12, 12), /* HDR, was 12, 16, 20, 24 */ /* Set Preamp DC gain and Setting 1 (GFR/GHR) */ - RXEQ_VAL(7, 0x1E, 0x10, 0x10, 0x10, 0x10), /* FDR */ - RXEQ_VAL(7, 0x1F, 0x10, 0x10, 0x10, 0x10), /* HDR */ + RXEQ_VAL(7, 0x1E, 16, 16, 16, 16), /* FDR, was 16, 17, 18, 20 */ + RXEQ_VAL(7, 0x1F, 16, 16, 16, 16), /* HDR, was 16, 17, 18, 20 */ /* Toggle RELOCK (in VCDL_CTRL0) to lock to data */ RXEQ_VAL_ALL(6, 6, 0x20), /* Set D5 High */ RXEQ_VAL_ALL(6, 6, 0), /* Set D5 Low */ @@ -1007,27 +1037,27 @@ static struct rxeq_init { #define DDS_ROWS (16) #define RXEQ_ROWS ARRAY_SIZE(rxeq_init_vals) -static int ipath_sd_setvals(struct ipath_devdata *dd) +static int qib_sd_setvals(struct qib_devdata *dd) { int idx, midx; - int min_idx; /* Minimum index for this portion of table */ + int min_idx; /* Minimum index for this portion of table */ uint32_t dds_reg_map; u64 __iomem *taddr, *iaddr; uint64_t data; uint64_t sdctl; - taddr = dd->ipath_kregbase + KR_IBSerDesMappTable; - iaddr = dd->ipath_kregbase + dd->ipath_kregs->kr_ib_ddsrxeq; + taddr = dd->kregbase + kr_serdes_maptable; + iaddr = dd->kregbase + kr_serdes_ddsrxeq0; /* * Init the DDS section of the table. * Each "row" of the table provokes NUM_DDS_REG writes, to the * registers indicated in DDS_REG_MAP. */ - sdctl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibserdesctrl); + sdctl = qib_read_kreg64(dd, kr_ibserdesctrl); sdctl = (sdctl & ~(0x1f << 8)) | (NUM_DDS_REGS << 8); sdctl = (sdctl & ~(0x1f << 13)) | (RXEQ_ROWS << 13); - ipath_write_kreg(dd, dd->ipath_kregs->kr_ibserdesctrl, sdctl); + qib_write_kreg(dd, kr_ibserdesctrl, sdctl); /* * Iterate down table within loop for each register to store. @@ -1037,21 +1067,21 @@ static int ipath_sd_setvals(struct ipath_devdata *dd) data = ((dds_reg_map & 0xF) << 4) | TX_FAST_ELT; writeq(data, iaddr + idx); mmiowb(); - ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch); + qib_read_kreg32(dd, kr_scratch); dds_reg_map >>= 4; for (midx = 0; midx < DDS_ROWS; ++midx) { u64 __iomem *daddr = taddr + ((midx << 4) + idx); data = dds_init_vals[midx].reg_vals[idx]; writeq(data, daddr); mmiowb(); - ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch); + qib_read_kreg32(dd, kr_scratch); } /* End inner for (vals for this reg, each row) */ } /* end outer for (regs to be stored) */ /* - * Init the RXEQ section of the table. As explained above the table - * rxeq_init_vals[], this runs in a different order, as the pattern - * of register references is more complex, but there are only + * Init the RXEQ section of the table. + * This runs in a different order, as the pattern of + * register references is more complex, but there are only * four "data" values per register. */ min_idx = idx; /* RXEQ indices pick up where DDS left off */ @@ -1066,13 +1096,13 @@ static int ipath_sd_setvals(struct ipath_devdata *dd) /* Store the next RXEQ register address */ writeq(rxeq_init_vals[idx].rdesc, iaddr + didx); mmiowb(); - ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch); + qib_read_kreg32(dd, kr_scratch); /* Iterate through RXEQ values */ for (vidx = 0; vidx < 4; vidx++) { data = rxeq_init_vals[idx].rdata[vidx]; writeq(data, taddr + (vidx << 6) + idx); mmiowb(); - ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch); + qib_read_kreg32(dd, kr_scratch); } } /* end outer for (Reg-writes for RXEQ) */ return 0; @@ -1085,33 +1115,18 @@ static int ipath_sd_setvals(struct ipath_devdata *dd) #define VCDL_CTRL2(chan) EPB_LOC(chan, 6, 8) #define START_EQ2(chan) EPB_LOC(chan, 7, 0x28) -static int ibsd_sto_noisy(struct ipath_devdata *dd, int loc, int val, int mask) -{ - int ret = -1; - int sloc; /* shifted loc, for messages */ - - loc |= (1U << EPB_IB_QUAD0_CS_SHF); - sloc = loc >> EPB_ADDR_SHF; - - ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, loc, val, mask); - if (ret < 0) - ipath_dev_err(dd, "Write failed: elt %d," - " addr 0x%X, chnl %d, val 0x%02X, mask 0x%02X\n", - (sloc & 0xF), (sloc >> 9) & 0x3f, (sloc >> 4) & 7, - val & 0xFF, mask & 0xFF); - return ret; -} - /* * Repeat a "store" across all channels of the IB SerDes. * Although nominally it inherits the "read value" of the last * channel it modified, the only really useful return is <0 for * failure, >= 0 for success. The parameter 'loc' is assumed to - * be the location for the channel-0 copy of the register to - * be modified. + * be the location in some channel of the register to be modified + * The caller can specify use of the "gang write" option of EPB, + * in which case we use the specified channel data for any fields + * not explicitely written. */ -static int ibsd_mod_allchnls(struct ipath_devdata *dd, int loc, int val, - int mask) +static int ibsd_mod_allchnls(struct qib_devdata *dd, int loc, int val, + int mask) { int ret = -1; int chnl; @@ -1126,23 +1141,26 @@ static int ibsd_mod_allchnls(struct ipath_devdata *dd, int loc, int val, loc |= (1U << EPB_IB_QUAD0_CS_SHF); chnl = (loc >> (4 + EPB_ADDR_SHF)) & 7; if (mask != 0xFF) { - ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, - loc & ~EPB_GLOBAL_WR, 0, 0); + ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, + loc & ~EPB_GLOBAL_WR, 0, 0); if (ret < 0) { int sloc = loc >> EPB_ADDR_SHF; - ipath_dev_err(dd, "pre-read failed: elt %d," - " addr 0x%X, chnl %d\n", (sloc & 0xF), + + qib_dev_err(dd, + "pre-read failed: elt %d, addr 0x%X, chnl %d\n", + (sloc & 0xF), (sloc >> 9) & 0x3f, chnl); return ret; } val = (ret & ~mask) | (val & mask); } loc &= ~(7 << (4+EPB_ADDR_SHF)); - ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, loc, val, 0xFF); + ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, loc, val, 0xFF); if (ret < 0) { int sloc = loc >> EPB_ADDR_SHF; - ipath_dev_err(dd, "Global WR failed: elt %d," - " addr 0x%X, val %02X\n", + + qib_dev_err(dd, + "Global WR failed: elt %d, addr 0x%X, val %02X\n", (sloc & 0xF), (sloc >> 9) & 0x3f, val); } return ret; @@ -1151,14 +1169,14 @@ static int ibsd_mod_allchnls(struct ipath_devdata *dd, int loc, int val, loc &= ~(7 << (4+EPB_ADDR_SHF)); loc |= (1U << EPB_IB_QUAD0_CS_SHF); for (chnl = 0; chnl < 4; ++chnl) { - int cloc; - cloc = loc | (chnl << (4+EPB_ADDR_SHF)); - ret = ipath_sd7220_reg_mod(dd, IB_7220_SERDES, cloc, val, mask); + int cloc = loc | (chnl << (4+EPB_ADDR_SHF)); + + ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, cloc, val, mask); if (ret < 0) { int sloc = loc >> EPB_ADDR_SHF; - ipath_dev_err(dd, "Write failed: elt %d," - " addr 0x%X, chnl %d, val 0x%02X," - " mask 0x%02X\n", + + qib_dev_err(dd, + "Write failed: elt %d, addr 0x%X, chnl %d, val 0x%02X, mask 0x%02X\n", (sloc & 0xF), (sloc >> 9) & 0x3f, chnl, val & 0xFF, mask & 0xFF); break; @@ -1171,7 +1189,7 @@ static int ibsd_mod_allchnls(struct ipath_devdata *dd, int loc, int val, * Set the Tx values normally modified by IBC in IB1.2 mode to default * values, as gotten from first row of init table. */ -static int set_dds_vals(struct ipath_devdata *dd, struct dds_init *ddi) +static int set_dds_vals(struct qib_devdata *dd, struct dds_init *ddi) { int ret; int idx, reg, data; @@ -1194,7 +1212,7 @@ static int set_dds_vals(struct ipath_devdata *dd, struct dds_init *ddi) * Set the Rx values normally modified by IBC in IB1.2 mode to default * values, as gotten from selected column of init table. */ -static int set_rxeq_vals(struct ipath_devdata *dd, int vsel) +static int set_rxeq_vals(struct qib_devdata *dd, int vsel) { int ret; int ridx; @@ -1202,6 +1220,7 @@ static int set_rxeq_vals(struct ipath_devdata *dd, int vsel) for (ridx = 0; ridx < cnt; ++ridx) { int elt, reg, val, loc; + elt = rxeq_init_vals[ridx].rdesc & 0xF; reg = rxeq_init_vals[ridx].rdesc >> 4; loc = EPB_LOC(0, elt, reg); @@ -1217,83 +1236,66 @@ static int set_rxeq_vals(struct ipath_devdata *dd, int vsel) /* * Set the default values (row 0) for DDR Driver Demphasis. * we do this initially and whenever we turn off IB-1.2 + * * The "default" values for Rx equalization are also stored to * SerDes registers. Formerly (and still default), we used set 2. * For experimenting with cables and link-partners, we allow changing * that via a module parameter. */ -static unsigned ipath_rxeq_set = 2; -module_param_named(rxeq_default_set, ipath_rxeq_set, uint, - S_IWUSR | S_IRUGO); +static unsigned qib_rxeq_set = 2; +module_param_named(rxeq_default_set, qib_rxeq_set, uint, + S_IWUSR | S_IRUGO); MODULE_PARM_DESC(rxeq_default_set, - "Which set [0..3] of Rx Equalization values is default"); + "Which set [0..3] of Rx Equalization values is default"); -static int ipath_internal_presets(struct ipath_devdata *dd) +static int qib_internal_presets(struct qib_devdata *dd) { int ret = 0; ret = set_dds_vals(dd, dds_init_vals + DDS_3M); if (ret < 0) - ipath_dev_err(dd, "Failed to set default DDS values\n"); - ret = set_rxeq_vals(dd, ipath_rxeq_set & 3); + qib_dev_err(dd, "Failed to set default DDS values\n"); + ret = set_rxeq_vals(dd, qib_rxeq_set & 3); if (ret < 0) - ipath_dev_err(dd, "Failed to set default RXEQ values\n"); + qib_dev_err(dd, "Failed to set default RXEQ values\n"); return ret; } -int ipath_sd7220_presets(struct ipath_devdata *dd) +int qib_sd7220_presets(struct qib_devdata *dd) { int ret = 0; - if (!dd->ipath_presets_needed) + if (!dd->cspec->presets_needed) return ret; - dd->ipath_presets_needed = 0; + dd->cspec->presets_needed = 0; /* Assert uC reset, so we don't clash with it. */ - ipath_ibsd_reset(dd, 1); + qib_ibsd_reset(dd, 1); udelay(2); - ipath_sd_trimdone_monitor(dd, "link-down"); + qib_sd_trimdone_monitor(dd, "link-down"); - ret = ipath_internal_presets(dd); -return ret; + ret = qib_internal_presets(dd); + return ret; } -static int ipath_sd_trimself(struct ipath_devdata *dd, int val) +static int qib_sd_trimself(struct qib_devdata *dd, int val) { - return ibsd_sto_noisy(dd, CMUCTRL5, val, 0xFF); + int loc = CMUCTRL5 | (1U << EPB_IB_QUAD0_CS_SHF); + + return qib_sd7220_reg_mod(dd, IB_7220_SERDES, loc, val, 0xFF); } -static int ipath_sd_early(struct ipath_devdata *dd) +static int qib_sd_early(struct qib_devdata *dd) { - int ret = -1; /* Default failed */ - int chnl; + int ret; - for (chnl = 0; chnl < 4; ++chnl) { - ret = ibsd_sto_noisy(dd, RXHSCTRL0(chnl), 0xD4, 0xFF); - if (ret < 0) - goto bail; - } - for (chnl = 0; chnl < 4; ++chnl) { - ret = ibsd_sto_noisy(dd, VCDL_DAC2(chnl), 0x2D, 0xFF); - if (ret < 0) - goto bail; - } - /* more fine-tuning of what will be default */ - for (chnl = 0; chnl < 4; ++chnl) { - ret = ibsd_sto_noisy(dd, VCDL_CTRL2(chnl), 3, 0xF); - if (ret < 0) - goto bail; - } - for (chnl = 0; chnl < 4; ++chnl) { - ret = ibsd_sto_noisy(dd, START_EQ1(chnl), 0x10, 0xFF); - if (ret < 0) - goto bail; - } - for (chnl = 0; chnl < 4; ++chnl) { - ret = ibsd_sto_noisy(dd, START_EQ2(chnl), 0x30, 0xFF); - if (ret < 0) - goto bail; - } + ret = ibsd_mod_allchnls(dd, RXHSCTRL0(0) | EPB_GLOBAL_WR, 0xD4, 0xFF); + if (ret < 0) + goto bail; + ret = ibsd_mod_allchnls(dd, START_EQ1(0) | EPB_GLOBAL_WR, 0x10, 0xFF); + if (ret < 0) + goto bail; + ret = ibsd_mod_allchnls(dd, START_EQ2(0) | EPB_GLOBAL_WR, 0x30, 0xFF); bail: return ret; } @@ -1302,50 +1304,53 @@ bail: #define LDOUTCTRL1(chnl) EPB_LOC(chnl, 7, 6) #define RXHSSTATUS(chnl) EPB_LOC(chnl, 6, 0xF) -static int ipath_sd_dactrim(struct ipath_devdata *dd) +static int qib_sd_dactrim(struct qib_devdata *dd) { - int ret = -1; /* Default failed */ - int chnl; + int ret; + + ret = ibsd_mod_allchnls(dd, VCDL_DAC2(0) | EPB_GLOBAL_WR, 0x2D, 0xFF); + if (ret < 0) + goto bail; + + /* more fine-tuning of what will be default */ + ret = ibsd_mod_allchnls(dd, VCDL_CTRL2(0), 3, 0xF); + if (ret < 0) + goto bail; + + ret = ibsd_mod_allchnls(dd, BACTRL(0) | EPB_GLOBAL_WR, 0x40, 0xFF); + if (ret < 0) + goto bail; + + ret = ibsd_mod_allchnls(dd, LDOUTCTRL1(0) | EPB_GLOBAL_WR, 0x04, 0xFF); + if (ret < 0) + goto bail; + + ret = ibsd_mod_allchnls(dd, RXHSSTATUS(0) | EPB_GLOBAL_WR, 0x04, 0xFF); + if (ret < 0) + goto bail; - for (chnl = 0; chnl < 4; ++chnl) { - ret = ibsd_sto_noisy(dd, BACTRL(chnl), 0x40, 0xFF); - if (ret < 0) - goto bail; - } - for (chnl = 0; chnl < 4; ++chnl) { - ret = ibsd_sto_noisy(dd, LDOUTCTRL1(chnl), 0x04, 0xFF); - if (ret < 0) - goto bail; - } - for (chnl = 0; chnl < 4; ++chnl) { - ret = ibsd_sto_noisy(dd, RXHSSTATUS(chnl), 0x04, 0xFF); - if (ret < 0) - goto bail; - } /* - * delay for max possible number of steps, with slop. + * Delay for max possible number of steps, with slop. * Each step is about 4usec. */ udelay(415); - for (chnl = 0; chnl < 4; ++chnl) { - ret = ibsd_sto_noisy(dd, LDOUTCTRL1(chnl), 0x00, 0xFF); - if (ret < 0) - goto bail; - } + + ret = ibsd_mod_allchnls(dd, LDOUTCTRL1(0) | EPB_GLOBAL_WR, 0x00, 0xFF); + bail: return ret; } #define RELOCK_FIRST_MS 3 #define RXLSPPM(chan) EPB_LOC(chan, 0, 2) -void ipath_toggle_rclkrls(struct ipath_devdata *dd) +void toggle_7220_rclkrls(struct qib_devdata *dd) { int loc = RXLSPPM(0) | EPB_GLOBAL_WR; int ret; ret = ibsd_mod_allchnls(dd, loc, 0, 0x80); if (ret < 0) - ipath_dev_err(dd, "RCLKRLS failed to clear D7\n"); + qib_dev_err(dd, "RCLKRLS failed to clear D7\n"); else { udelay(1); ibsd_mod_allchnls(dd, loc, 0x80, 0x80); @@ -1354,109 +1359,91 @@ void ipath_toggle_rclkrls(struct ipath_devdata *dd) udelay(1); ret = ibsd_mod_allchnls(dd, loc, 0, 0x80); if (ret < 0) - ipath_dev_err(dd, "RCLKRLS failed to clear D7\n"); + qib_dev_err(dd, "RCLKRLS failed to clear D7\n"); else { udelay(1); ibsd_mod_allchnls(dd, loc, 0x80, 0x80); } /* Now reset xgxs and IBC to complete the recovery */ - dd->ipath_f_xgxs_reset(dd); + dd->f_xgxs_reset(dd->pport); } /* * Shut down the timer that polls for relock occasions, if needed - * this is "hooked" from ipath_7220_quiet_serdes(), which is called - * just before ipath_shutdown_device() in ipath_driver.c shuts down all + * this is "hooked" from qib_7220_quiet_serdes(), which is called + * just before qib_shutdown_device() in qib_driver.c shuts down all * the other timers */ -void ipath_shutdown_relock_poll(struct ipath_devdata *dd) +void shutdown_7220_relock_poll(struct qib_devdata *dd) { - struct ipath_relock *irp = &dd->ipath_relock_singleton; - if (atomic_read(&irp->ipath_relock_timer_active)) { - del_timer_sync(&irp->ipath_relock_timer); - atomic_set(&irp->ipath_relock_timer_active, 0); - } + if (dd->cspec->relock_timer_active) + del_timer_sync(&dd->cspec->relock_timer); } -static unsigned ipath_relock_by_timer = 1; -module_param_named(relock_by_timer, ipath_relock_by_timer, uint, - S_IWUSR | S_IRUGO); +static unsigned qib_relock_by_timer = 1; +module_param_named(relock_by_timer, qib_relock_by_timer, uint, + S_IWUSR | S_IRUGO); MODULE_PARM_DESC(relock_by_timer, "Allow relock attempt if link not up"); -static void ipath_run_relock(unsigned long opaque) +static void qib_run_relock(unsigned long opaque) { - struct ipath_devdata *dd = (struct ipath_devdata *)opaque; - struct ipath_relock *irp = &dd->ipath_relock_singleton; - u64 val, ltstate; - - if (!(dd->ipath_flags & IPATH_INITTED)) { - /* Not yet up, just reenable the timer for later */ - irp->ipath_relock_interval = HZ; - mod_timer(&irp->ipath_relock_timer, jiffies + HZ); - return; - } + struct qib_devdata *dd = (struct qib_devdata *)opaque; + struct qib_pportdata *ppd = dd->pport; + struct qib_chip_specific *cs = dd->cspec; + int timeoff; /* - * Check link-training state for "stuck" state. + * Check link-training state for "stuck" state, when down. * if found, try relock and schedule another try at * exponentially growing delay, maxed at one second. * if not stuck, our work is done. */ - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); - ltstate = ipath_ib_linktrstate(dd, val); - - if (ltstate <= INFINIPATH_IBCS_LT_STATE_CFGWAITRMT - && ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP) { - int timeoff; - /* Not up yet. Try again, if allowed by module-param */ - if (ipath_relock_by_timer) { - if (dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) - ipath_cdbg(VERBOSE, "Skip RELOCK in AUTONEG\n"); - else if (!(dd->ipath_flags & IPATH_IB_LINK_DISABLED)) { - ipath_cdbg(VERBOSE, "RELOCK\n"); - ipath_toggle_rclkrls(dd); - } + if ((dd->flags & QIB_INITTED) && !(ppd->lflags & + (QIBL_IB_AUTONEG_INPROG | QIBL_LINKINIT | QIBL_LINKARMED | + QIBL_LINKACTIVE))) { + if (qib_relock_by_timer) { + if (!(ppd->lflags & QIBL_IB_LINK_DISABLED)) + toggle_7220_rclkrls(dd); } /* re-set timer for next check */ - timeoff = irp->ipath_relock_interval << 1; + timeoff = cs->relock_interval << 1; if (timeoff > HZ) timeoff = HZ; - irp->ipath_relock_interval = timeoff; - - mod_timer(&irp->ipath_relock_timer, jiffies + timeoff); - } else { - /* Up, so no more need to check so often */ - mod_timer(&irp->ipath_relock_timer, jiffies + HZ); - } + cs->relock_interval = timeoff; + } else + timeoff = HZ; + mod_timer(&cs->relock_timer, jiffies + timeoff); } -void ipath_set_relock_poll(struct ipath_devdata *dd, int ibup) +void set_7220_relock_poll(struct qib_devdata *dd, int ibup) { - struct ipath_relock *irp = &dd->ipath_relock_singleton; + struct qib_chip_specific *cs = dd->cspec; - if (ibup > 0) { - /* we are now up, so relax timer to 1 second interval */ - if (atomic_read(&irp->ipath_relock_timer_active)) - mod_timer(&irp->ipath_relock_timer, jiffies + HZ); + if (ibup) { + /* We are now up, relax timer to 1 second interval */ + if (cs->relock_timer_active) { + cs->relock_interval = HZ; + mod_timer(&cs->relock_timer, jiffies + HZ); + } } else { /* Transition to down, (re-)set timer to short interval. */ - int timeout; - timeout = (HZ * ((ibup == -1) ? 1000 : RELOCK_FIRST_MS))/1000; + unsigned int timeout; + + timeout = msecs_to_jiffies(RELOCK_FIRST_MS); if (timeout == 0) timeout = 1; /* If timer has not yet been started, do so. */ - if (atomic_inc_return(&irp->ipath_relock_timer_active) == 1) { - init_timer(&irp->ipath_relock_timer); - irp->ipath_relock_timer.function = ipath_run_relock; - irp->ipath_relock_timer.data = (unsigned long) dd; - irp->ipath_relock_interval = timeout; - irp->ipath_relock_timer.expires = jiffies + timeout; - add_timer(&irp->ipath_relock_timer); + if (!cs->relock_timer_active) { + cs->relock_timer_active = 1; + init_timer(&cs->relock_timer); + cs->relock_timer.function = qib_run_relock; + cs->relock_timer.data = (unsigned long) dd; + cs->relock_interval = timeout; + cs->relock_timer.expires = jiffies + timeout; + add_timer(&cs->relock_timer); } else { - irp->ipath_relock_interval = timeout; - mod_timer(&irp->ipath_relock_timer, jiffies + timeout); - atomic_dec(&irp->ipath_relock_timer_active); + cs->relock_interval = timeout; + mod_timer(&cs->relock_timer, jiffies + timeout); } } } - diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c new file mode 100644 index 00000000000..c6d6a54d2e1 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_sdma.c @@ -0,0 +1,1039 @@ +/* + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2007 - 2012 QLogic Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/spinlock.h> +#include <linux/netdevice.h> +#include <linux/moduleparam.h> + +#include "qib.h" +#include "qib_common.h" + +/* default pio off, sdma on */ +static ushort sdma_descq_cnt = 256; +module_param_named(sdma_descq_cnt, sdma_descq_cnt, ushort, S_IRUGO); +MODULE_PARM_DESC(sdma_descq_cnt, "Number of SDMA descq entries"); + +/* + * Bits defined in the send DMA descriptor. + */ +#define SDMA_DESC_LAST (1ULL << 11) +#define SDMA_DESC_FIRST (1ULL << 12) +#define SDMA_DESC_DMA_HEAD (1ULL << 13) +#define SDMA_DESC_USE_LARGE_BUF (1ULL << 14) +#define SDMA_DESC_INTR (1ULL << 15) +#define SDMA_DESC_COUNT_LSB 16 +#define SDMA_DESC_GEN_LSB 30 + +char *qib_sdma_state_names[] = { + [qib_sdma_state_s00_hw_down] = "s00_HwDown", + [qib_sdma_state_s10_hw_start_up_wait] = "s10_HwStartUpWait", + [qib_sdma_state_s20_idle] = "s20_Idle", + [qib_sdma_state_s30_sw_clean_up_wait] = "s30_SwCleanUpWait", + [qib_sdma_state_s40_hw_clean_up_wait] = "s40_HwCleanUpWait", + [qib_sdma_state_s50_hw_halt_wait] = "s50_HwHaltWait", + [qib_sdma_state_s99_running] = "s99_Running", +}; + +char *qib_sdma_event_names[] = { + [qib_sdma_event_e00_go_hw_down] = "e00_GoHwDown", + [qib_sdma_event_e10_go_hw_start] = "e10_GoHwStart", + [qib_sdma_event_e20_hw_started] = "e20_HwStarted", + [qib_sdma_event_e30_go_running] = "e30_GoRunning", + [qib_sdma_event_e40_sw_cleaned] = "e40_SwCleaned", + [qib_sdma_event_e50_hw_cleaned] = "e50_HwCleaned", + [qib_sdma_event_e60_hw_halted] = "e60_HwHalted", + [qib_sdma_event_e70_go_idle] = "e70_GoIdle", + [qib_sdma_event_e7220_err_halted] = "e7220_ErrHalted", + [qib_sdma_event_e7322_err_halted] = "e7322_ErrHalted", + [qib_sdma_event_e90_timer_tick] = "e90_TimerTick", +}; + +/* declare all statics here rather than keep sorting */ +static int alloc_sdma(struct qib_pportdata *); +static void sdma_complete(struct kref *); +static void sdma_finalput(struct qib_sdma_state *); +static void sdma_get(struct qib_sdma_state *); +static void sdma_put(struct qib_sdma_state *); +static void sdma_set_state(struct qib_pportdata *, enum qib_sdma_states); +static void sdma_start_sw_clean_up(struct qib_pportdata *); +static void sdma_sw_clean_up_task(unsigned long); +static void unmap_desc(struct qib_pportdata *, unsigned); + +static void sdma_get(struct qib_sdma_state *ss) +{ + kref_get(&ss->kref); +} + +static void sdma_complete(struct kref *kref) +{ + struct qib_sdma_state *ss = + container_of(kref, struct qib_sdma_state, kref); + + complete(&ss->comp); +} + +static void sdma_put(struct qib_sdma_state *ss) +{ + kref_put(&ss->kref, sdma_complete); +} + +static void sdma_finalput(struct qib_sdma_state *ss) +{ + sdma_put(ss); + wait_for_completion(&ss->comp); +} + +/* + * Complete all the sdma requests on the active list, in the correct + * order, and with appropriate processing. Called when cleaning up + * after sdma shutdown, and when new sdma requests are submitted for + * a link that is down. This matches what is done for requests + * that complete normally, it's just the full list. + * + * Must be called with sdma_lock held + */ +static void clear_sdma_activelist(struct qib_pportdata *ppd) +{ + struct qib_sdma_txreq *txp, *txp_next; + + list_for_each_entry_safe(txp, txp_next, &ppd->sdma_activelist, list) { + list_del_init(&txp->list); + if (txp->flags & QIB_SDMA_TXREQ_F_FREEDESC) { + unsigned idx; + + idx = txp->start_idx; + while (idx != txp->next_descq_idx) { + unmap_desc(ppd, idx); + if (++idx == ppd->sdma_descq_cnt) + idx = 0; + } + } + if (txp->callback) + (*txp->callback)(txp, QIB_SDMA_TXREQ_S_ABORTED); + } +} + +static void sdma_sw_clean_up_task(unsigned long opaque) +{ + struct qib_pportdata *ppd = (struct qib_pportdata *) opaque; + unsigned long flags; + + spin_lock_irqsave(&ppd->sdma_lock, flags); + + /* + * At this point, the following should always be true: + * - We are halted, so no more descriptors are getting retired. + * - We are not running, so no one is submitting new work. + * - Only we can send the e40_sw_cleaned, so we can't start + * running again until we say so. So, the active list and + * descq are ours to play with. + */ + + /* Process all retired requests. */ + qib_sdma_make_progress(ppd); + + clear_sdma_activelist(ppd); + + /* + * Resync count of added and removed. It is VERY important that + * sdma_descq_removed NEVER decrement - user_sdma depends on it. + */ + ppd->sdma_descq_removed = ppd->sdma_descq_added; + + /* + * Reset our notion of head and tail. + * Note that the HW registers will be reset when switching states + * due to calling __qib_sdma_process_event() below. + */ + ppd->sdma_descq_tail = 0; + ppd->sdma_descq_head = 0; + ppd->sdma_head_dma[0] = 0; + ppd->sdma_generation = 0; + + __qib_sdma_process_event(ppd, qib_sdma_event_e40_sw_cleaned); + + spin_unlock_irqrestore(&ppd->sdma_lock, flags); +} + +/* + * This is called when changing to state qib_sdma_state_s10_hw_start_up_wait + * as a result of send buffer errors or send DMA descriptor errors. + * We want to disarm the buffers in these cases. + */ +static void sdma_hw_start_up(struct qib_pportdata *ppd) +{ + struct qib_sdma_state *ss = &ppd->sdma_state; + unsigned bufno; + + for (bufno = ss->first_sendbuf; bufno < ss->last_sendbuf; ++bufno) + ppd->dd->f_sendctrl(ppd, QIB_SENDCTRL_DISARM_BUF(bufno)); + + ppd->dd->f_sdma_hw_start_up(ppd); +} + +static void sdma_sw_tear_down(struct qib_pportdata *ppd) +{ + struct qib_sdma_state *ss = &ppd->sdma_state; + + /* Releasing this reference means the state machine has stopped. */ + sdma_put(ss); +} + +static void sdma_start_sw_clean_up(struct qib_pportdata *ppd) +{ + tasklet_hi_schedule(&ppd->sdma_sw_clean_up_task); +} + +static void sdma_set_state(struct qib_pportdata *ppd, + enum qib_sdma_states next_state) +{ + struct qib_sdma_state *ss = &ppd->sdma_state; + struct sdma_set_state_action *action = ss->set_state_action; + unsigned op = 0; + + /* debugging bookkeeping */ + ss->previous_state = ss->current_state; + ss->previous_op = ss->current_op; + + ss->current_state = next_state; + + if (action[next_state].op_enable) + op |= QIB_SDMA_SENDCTRL_OP_ENABLE; + + if (action[next_state].op_intenable) + op |= QIB_SDMA_SENDCTRL_OP_INTENABLE; + + if (action[next_state].op_halt) + op |= QIB_SDMA_SENDCTRL_OP_HALT; + + if (action[next_state].op_drain) + op |= QIB_SDMA_SENDCTRL_OP_DRAIN; + + if (action[next_state].go_s99_running_tofalse) + ss->go_s99_running = 0; + + if (action[next_state].go_s99_running_totrue) + ss->go_s99_running = 1; + + ss->current_op = op; + + ppd->dd->f_sdma_sendctrl(ppd, ss->current_op); +} + +static void unmap_desc(struct qib_pportdata *ppd, unsigned head) +{ + __le64 *descqp = &ppd->sdma_descq[head].qw[0]; + u64 desc[2]; + dma_addr_t addr; + size_t len; + + desc[0] = le64_to_cpu(descqp[0]); + desc[1] = le64_to_cpu(descqp[1]); + + addr = (desc[1] << 32) | (desc[0] >> 32); + len = (desc[0] >> 14) & (0x7ffULL << 2); + dma_unmap_single(&ppd->dd->pcidev->dev, addr, len, DMA_TO_DEVICE); +} + +static int alloc_sdma(struct qib_pportdata *ppd) +{ + ppd->sdma_descq_cnt = sdma_descq_cnt; + if (!ppd->sdma_descq_cnt) + ppd->sdma_descq_cnt = 256; + + /* Allocate memory for SendDMA descriptor FIFO */ + ppd->sdma_descq = dma_alloc_coherent(&ppd->dd->pcidev->dev, + ppd->sdma_descq_cnt * sizeof(u64[2]), &ppd->sdma_descq_phys, + GFP_KERNEL); + + if (!ppd->sdma_descq) { + qib_dev_err(ppd->dd, + "failed to allocate SendDMA descriptor FIFO memory\n"); + goto bail; + } + + /* Allocate memory for DMA of head register to memory */ + ppd->sdma_head_dma = dma_alloc_coherent(&ppd->dd->pcidev->dev, + PAGE_SIZE, &ppd->sdma_head_phys, GFP_KERNEL); + if (!ppd->sdma_head_dma) { + qib_dev_err(ppd->dd, + "failed to allocate SendDMA head memory\n"); + goto cleanup_descq; + } + ppd->sdma_head_dma[0] = 0; + return 0; + +cleanup_descq: + dma_free_coherent(&ppd->dd->pcidev->dev, + ppd->sdma_descq_cnt * sizeof(u64[2]), (void *)ppd->sdma_descq, + ppd->sdma_descq_phys); + ppd->sdma_descq = NULL; + ppd->sdma_descq_phys = 0; +bail: + ppd->sdma_descq_cnt = 0; + return -ENOMEM; +} + +static void free_sdma(struct qib_pportdata *ppd) +{ + struct qib_devdata *dd = ppd->dd; + + if (ppd->sdma_head_dma) { + dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, + (void *)ppd->sdma_head_dma, + ppd->sdma_head_phys); + ppd->sdma_head_dma = NULL; + ppd->sdma_head_phys = 0; + } + + if (ppd->sdma_descq) { + dma_free_coherent(&dd->pcidev->dev, + ppd->sdma_descq_cnt * sizeof(u64[2]), + ppd->sdma_descq, ppd->sdma_descq_phys); + ppd->sdma_descq = NULL; + ppd->sdma_descq_phys = 0; + } +} + +static inline void make_sdma_desc(struct qib_pportdata *ppd, + u64 *sdmadesc, u64 addr, u64 dwlen, + u64 dwoffset) +{ + + WARN_ON(addr & 3); + /* SDmaPhyAddr[47:32] */ + sdmadesc[1] = addr >> 32; + /* SDmaPhyAddr[31:0] */ + sdmadesc[0] = (addr & 0xfffffffcULL) << 32; + /* SDmaGeneration[1:0] */ + sdmadesc[0] |= (ppd->sdma_generation & 3ULL) << + SDMA_DESC_GEN_LSB; + /* SDmaDwordCount[10:0] */ + sdmadesc[0] |= (dwlen & 0x7ffULL) << SDMA_DESC_COUNT_LSB; + /* SDmaBufOffset[12:2] */ + sdmadesc[0] |= dwoffset & 0x7ffULL; +} + +/* sdma_lock must be held */ +int qib_sdma_make_progress(struct qib_pportdata *ppd) +{ + struct list_head *lp = NULL; + struct qib_sdma_txreq *txp = NULL; + struct qib_devdata *dd = ppd->dd; + int progress = 0; + u16 hwhead; + u16 idx = 0; + + hwhead = dd->f_sdma_gethead(ppd); + + /* The reason for some of the complexity of this code is that + * not all descriptors have corresponding txps. So, we have to + * be able to skip over descs until we wander into the range of + * the next txp on the list. + */ + + if (!list_empty(&ppd->sdma_activelist)) { + lp = ppd->sdma_activelist.next; + txp = list_entry(lp, struct qib_sdma_txreq, list); + idx = txp->start_idx; + } + + while (ppd->sdma_descq_head != hwhead) { + /* if desc is part of this txp, unmap if needed */ + if (txp && (txp->flags & QIB_SDMA_TXREQ_F_FREEDESC) && + (idx == ppd->sdma_descq_head)) { + unmap_desc(ppd, ppd->sdma_descq_head); + if (++idx == ppd->sdma_descq_cnt) + idx = 0; + } + + /* increment dequed desc count */ + ppd->sdma_descq_removed++; + + /* advance head, wrap if needed */ + if (++ppd->sdma_descq_head == ppd->sdma_descq_cnt) + ppd->sdma_descq_head = 0; + + /* if now past this txp's descs, do the callback */ + if (txp && txp->next_descq_idx == ppd->sdma_descq_head) { + /* remove from active list */ + list_del_init(&txp->list); + if (txp->callback) + (*txp->callback)(txp, QIB_SDMA_TXREQ_S_OK); + /* see if there is another txp */ + if (list_empty(&ppd->sdma_activelist)) + txp = NULL; + else { + lp = ppd->sdma_activelist.next; + txp = list_entry(lp, struct qib_sdma_txreq, + list); + idx = txp->start_idx; + } + } + progress = 1; + } + if (progress) + qib_verbs_sdma_desc_avail(ppd, qib_sdma_descq_freecnt(ppd)); + return progress; +} + +/* + * This is called from interrupt context. + */ +void qib_sdma_intr(struct qib_pportdata *ppd) +{ + unsigned long flags; + + spin_lock_irqsave(&ppd->sdma_lock, flags); + + __qib_sdma_intr(ppd); + + spin_unlock_irqrestore(&ppd->sdma_lock, flags); +} + +void __qib_sdma_intr(struct qib_pportdata *ppd) +{ + if (__qib_sdma_running(ppd)) { + qib_sdma_make_progress(ppd); + if (!list_empty(&ppd->sdma_userpending)) + qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending); + } +} + +int qib_setup_sdma(struct qib_pportdata *ppd) +{ + struct qib_devdata *dd = ppd->dd; + unsigned long flags; + int ret = 0; + + ret = alloc_sdma(ppd); + if (ret) + goto bail; + + /* set consistent sdma state */ + ppd->dd->f_sdma_init_early(ppd); + spin_lock_irqsave(&ppd->sdma_lock, flags); + sdma_set_state(ppd, qib_sdma_state_s00_hw_down); + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + + /* set up reference counting */ + kref_init(&ppd->sdma_state.kref); + init_completion(&ppd->sdma_state.comp); + + ppd->sdma_generation = 0; + ppd->sdma_descq_head = 0; + ppd->sdma_descq_removed = 0; + ppd->sdma_descq_added = 0; + + ppd->sdma_intrequest = 0; + INIT_LIST_HEAD(&ppd->sdma_userpending); + + INIT_LIST_HEAD(&ppd->sdma_activelist); + + tasklet_init(&ppd->sdma_sw_clean_up_task, sdma_sw_clean_up_task, + (unsigned long)ppd); + + ret = dd->f_init_sdma_regs(ppd); + if (ret) + goto bail_alloc; + + qib_sdma_process_event(ppd, qib_sdma_event_e10_go_hw_start); + + return 0; + +bail_alloc: + qib_teardown_sdma(ppd); +bail: + return ret; +} + +void qib_teardown_sdma(struct qib_pportdata *ppd) +{ + qib_sdma_process_event(ppd, qib_sdma_event_e00_go_hw_down); + + /* + * This waits for the state machine to exit so it is not + * necessary to kill the sdma_sw_clean_up_task to make sure + * it is not running. + */ + sdma_finalput(&ppd->sdma_state); + + free_sdma(ppd); +} + +int qib_sdma_running(struct qib_pportdata *ppd) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&ppd->sdma_lock, flags); + ret = __qib_sdma_running(ppd); + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + + return ret; +} + +/* + * Complete a request when sdma not running; likely only request + * but to simplify the code, always queue it, then process the full + * activelist. We process the entire list to ensure that this particular + * request does get it's callback, but in the correct order. + * Must be called with sdma_lock held + */ +static void complete_sdma_err_req(struct qib_pportdata *ppd, + struct qib_verbs_txreq *tx) +{ + atomic_inc(&tx->qp->s_dma_busy); + /* no sdma descriptors, so no unmap_desc */ + tx->txreq.start_idx = 0; + tx->txreq.next_descq_idx = 0; + list_add_tail(&tx->txreq.list, &ppd->sdma_activelist); + clear_sdma_activelist(ppd); +} + +/* + * This function queues one IB packet onto the send DMA queue per call. + * The caller is responsible for checking: + * 1) The number of send DMA descriptor entries is less than the size of + * the descriptor queue. + * 2) The IB SGE addresses and lengths are 32-bit aligned + * (except possibly the last SGE's length) + * 3) The SGE addresses are suitable for passing to dma_map_single(). + */ +int qib_sdma_verbs_send(struct qib_pportdata *ppd, + struct qib_sge_state *ss, u32 dwords, + struct qib_verbs_txreq *tx) +{ + unsigned long flags; + struct qib_sge *sge; + struct qib_qp *qp; + int ret = 0; + u16 tail; + __le64 *descqp; + u64 sdmadesc[2]; + u32 dwoffset; + dma_addr_t addr; + + spin_lock_irqsave(&ppd->sdma_lock, flags); + +retry: + if (unlikely(!__qib_sdma_running(ppd))) { + complete_sdma_err_req(ppd, tx); + goto unlock; + } + + if (tx->txreq.sg_count > qib_sdma_descq_freecnt(ppd)) { + if (qib_sdma_make_progress(ppd)) + goto retry; + if (ppd->dd->flags & QIB_HAS_SDMA_TIMEOUT) + ppd->dd->f_sdma_set_desc_cnt(ppd, + ppd->sdma_descq_cnt / 2); + goto busy; + } + + dwoffset = tx->hdr_dwords; + make_sdma_desc(ppd, sdmadesc, (u64) tx->txreq.addr, dwoffset, 0); + + sdmadesc[0] |= SDMA_DESC_FIRST; + if (tx->txreq.flags & QIB_SDMA_TXREQ_F_USELARGEBUF) + sdmadesc[0] |= SDMA_DESC_USE_LARGE_BUF; + + /* write to the descq */ + tail = ppd->sdma_descq_tail; + descqp = &ppd->sdma_descq[tail].qw[0]; + *descqp++ = cpu_to_le64(sdmadesc[0]); + *descqp++ = cpu_to_le64(sdmadesc[1]); + + /* increment the tail */ + if (++tail == ppd->sdma_descq_cnt) { + tail = 0; + descqp = &ppd->sdma_descq[0].qw[0]; + ++ppd->sdma_generation; + } + + tx->txreq.start_idx = tail; + + sge = &ss->sge; + while (dwords) { + u32 dw; + u32 len; + + len = dwords << 2; + if (len > sge->length) + len = sge->length; + if (len > sge->sge_length) + len = sge->sge_length; + BUG_ON(len == 0); + dw = (len + 3) >> 2; + addr = dma_map_single(&ppd->dd->pcidev->dev, sge->vaddr, + dw << 2, DMA_TO_DEVICE); + if (dma_mapping_error(&ppd->dd->pcidev->dev, addr)) + goto unmap; + sdmadesc[0] = 0; + make_sdma_desc(ppd, sdmadesc, (u64) addr, dw, dwoffset); + /* SDmaUseLargeBuf has to be set in every descriptor */ + if (tx->txreq.flags & QIB_SDMA_TXREQ_F_USELARGEBUF) + sdmadesc[0] |= SDMA_DESC_USE_LARGE_BUF; + /* write to the descq */ + *descqp++ = cpu_to_le64(sdmadesc[0]); + *descqp++ = cpu_to_le64(sdmadesc[1]); + + /* increment the tail */ + if (++tail == ppd->sdma_descq_cnt) { + tail = 0; + descqp = &ppd->sdma_descq[0].qw[0]; + ++ppd->sdma_generation; + } + sge->vaddr += len; + sge->length -= len; + sge->sge_length -= len; + if (sge->sge_length == 0) { + if (--ss->num_sge) + *sge = *ss->sg_list++; + } else if (sge->length == 0 && sge->mr->lkey) { + if (++sge->n >= QIB_SEGSZ) { + if (++sge->m >= sge->mr->mapsz) + break; + sge->n = 0; + } + sge->vaddr = + sge->mr->map[sge->m]->segs[sge->n].vaddr; + sge->length = + sge->mr->map[sge->m]->segs[sge->n].length; + } + + dwoffset += dw; + dwords -= dw; + } + + if (!tail) + descqp = &ppd->sdma_descq[ppd->sdma_descq_cnt].qw[0]; + descqp -= 2; + descqp[0] |= cpu_to_le64(SDMA_DESC_LAST); + if (tx->txreq.flags & QIB_SDMA_TXREQ_F_HEADTOHOST) + descqp[0] |= cpu_to_le64(SDMA_DESC_DMA_HEAD); + if (tx->txreq.flags & QIB_SDMA_TXREQ_F_INTREQ) + descqp[0] |= cpu_to_le64(SDMA_DESC_INTR); + + atomic_inc(&tx->qp->s_dma_busy); + tx->txreq.next_descq_idx = tail; + ppd->dd->f_sdma_update_tail(ppd, tail); + ppd->sdma_descq_added += tx->txreq.sg_count; + list_add_tail(&tx->txreq.list, &ppd->sdma_activelist); + goto unlock; + +unmap: + for (;;) { + if (!tail) + tail = ppd->sdma_descq_cnt - 1; + else + tail--; + if (tail == ppd->sdma_descq_tail) + break; + unmap_desc(ppd, tail); + } + qp = tx->qp; + qib_put_txreq(tx); + spin_lock(&qp->r_lock); + spin_lock(&qp->s_lock); + if (qp->ibqp.qp_type == IB_QPT_RC) { + /* XXX what about error sending RDMA read responses? */ + if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) + qib_error_qp(qp, IB_WC_GENERAL_ERR); + } else if (qp->s_wqe) + qib_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR); + spin_unlock(&qp->s_lock); + spin_unlock(&qp->r_lock); + /* return zero to process the next send work request */ + goto unlock; + +busy: + qp = tx->qp; + spin_lock(&qp->s_lock); + if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) { + struct qib_ibdev *dev; + + /* + * If we couldn't queue the DMA request, save the info + * and try again later rather than destroying the + * buffer and undoing the side effects of the copy. + */ + tx->ss = ss; + tx->dwords = dwords; + qp->s_tx = tx; + dev = &ppd->dd->verbs_dev; + spin_lock(&dev->pending_lock); + if (list_empty(&qp->iowait)) { + struct qib_ibport *ibp; + + ibp = &ppd->ibport_data; + ibp->n_dmawait++; + qp->s_flags |= QIB_S_WAIT_DMA_DESC; + list_add_tail(&qp->iowait, &dev->dmawait); + } + spin_unlock(&dev->pending_lock); + qp->s_flags &= ~QIB_S_BUSY; + spin_unlock(&qp->s_lock); + ret = -EBUSY; + } else { + spin_unlock(&qp->s_lock); + qib_put_txreq(tx); + } +unlock: + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + return ret; +} + +/* + * sdma_lock should be acquired before calling this routine + */ +void dump_sdma_state(struct qib_pportdata *ppd) +{ + struct qib_sdma_desc *descq; + struct qib_sdma_txreq *txp, *txpnext; + __le64 *descqp; + u64 desc[2]; + u64 addr; + u16 gen, dwlen, dwoffset; + u16 head, tail, cnt; + + head = ppd->sdma_descq_head; + tail = ppd->sdma_descq_tail; + cnt = qib_sdma_descq_freecnt(ppd); + descq = ppd->sdma_descq; + + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA ppd->sdma_descq_head: %u\n", head); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA ppd->sdma_descq_tail: %u\n", tail); + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA sdma_descq_freecnt: %u\n", cnt); + + /* print info for each entry in the descriptor queue */ + while (head != tail) { + char flags[6] = { 'x', 'x', 'x', 'x', 'x', 0 }; + + descqp = &descq[head].qw[0]; + desc[0] = le64_to_cpu(descqp[0]); + desc[1] = le64_to_cpu(descqp[1]); + flags[0] = (desc[0] & 1<<15) ? 'I' : '-'; + flags[1] = (desc[0] & 1<<14) ? 'L' : 'S'; + flags[2] = (desc[0] & 1<<13) ? 'H' : '-'; + flags[3] = (desc[0] & 1<<12) ? 'F' : '-'; + flags[4] = (desc[0] & 1<<11) ? 'L' : '-'; + addr = (desc[1] << 32) | ((desc[0] >> 32) & 0xfffffffcULL); + gen = (desc[0] >> 30) & 3ULL; + dwlen = (desc[0] >> 14) & (0x7ffULL << 2); + dwoffset = (desc[0] & 0x7ffULL) << 2; + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA sdmadesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes offset:%u bytes\n", + head, flags, addr, gen, dwlen, dwoffset); + if (++head == ppd->sdma_descq_cnt) + head = 0; + } + + /* print dma descriptor indices from the TX requests */ + list_for_each_entry_safe(txp, txpnext, &ppd->sdma_activelist, + list) + qib_dev_porterr(ppd->dd, ppd->port, + "SDMA txp->start_idx: %u txp->next_descq_idx: %u\n", + txp->start_idx, txp->next_descq_idx); +} + +void qib_sdma_process_event(struct qib_pportdata *ppd, + enum qib_sdma_events event) +{ + unsigned long flags; + + spin_lock_irqsave(&ppd->sdma_lock, flags); + + __qib_sdma_process_event(ppd, event); + + if (ppd->sdma_state.current_state == qib_sdma_state_s99_running) + qib_verbs_sdma_desc_avail(ppd, qib_sdma_descq_freecnt(ppd)); + + spin_unlock_irqrestore(&ppd->sdma_lock, flags); +} + +void __qib_sdma_process_event(struct qib_pportdata *ppd, + enum qib_sdma_events event) +{ + struct qib_sdma_state *ss = &ppd->sdma_state; + + switch (ss->current_state) { + case qib_sdma_state_s00_hw_down: + switch (event) { + case qib_sdma_event_e00_go_hw_down: + break; + case qib_sdma_event_e30_go_running: + /* + * If down, but running requested (usually result + * of link up, then we need to start up. + * This can happen when hw down is requested while + * bringing the link up with traffic active on + * 7220, e.g. */ + ss->go_s99_running = 1; + /* fall through and start dma engine */ + case qib_sdma_event_e10_go_hw_start: + /* This reference means the state machine is started */ + sdma_get(&ppd->sdma_state); + sdma_set_state(ppd, + qib_sdma_state_s10_hw_start_up_wait); + break; + case qib_sdma_event_e20_hw_started: + break; + case qib_sdma_event_e40_sw_cleaned: + sdma_sw_tear_down(ppd); + break; + case qib_sdma_event_e50_hw_cleaned: + break; + case qib_sdma_event_e60_hw_halted: + break; + case qib_sdma_event_e70_go_idle: + break; + case qib_sdma_event_e7220_err_halted: + break; + case qib_sdma_event_e7322_err_halted: + break; + case qib_sdma_event_e90_timer_tick: + break; + } + break; + + case qib_sdma_state_s10_hw_start_up_wait: + switch (event) { + case qib_sdma_event_e00_go_hw_down: + sdma_set_state(ppd, qib_sdma_state_s00_hw_down); + sdma_sw_tear_down(ppd); + break; + case qib_sdma_event_e10_go_hw_start: + break; + case qib_sdma_event_e20_hw_started: + sdma_set_state(ppd, ss->go_s99_running ? + qib_sdma_state_s99_running : + qib_sdma_state_s20_idle); + break; + case qib_sdma_event_e30_go_running: + ss->go_s99_running = 1; + break; + case qib_sdma_event_e40_sw_cleaned: + break; + case qib_sdma_event_e50_hw_cleaned: + break; + case qib_sdma_event_e60_hw_halted: + break; + case qib_sdma_event_e70_go_idle: + ss->go_s99_running = 0; + break; + case qib_sdma_event_e7220_err_halted: + break; + case qib_sdma_event_e7322_err_halted: + break; + case qib_sdma_event_e90_timer_tick: + break; + } + break; + + case qib_sdma_state_s20_idle: + switch (event) { + case qib_sdma_event_e00_go_hw_down: + sdma_set_state(ppd, qib_sdma_state_s00_hw_down); + sdma_sw_tear_down(ppd); + break; + case qib_sdma_event_e10_go_hw_start: + break; + case qib_sdma_event_e20_hw_started: + break; + case qib_sdma_event_e30_go_running: + sdma_set_state(ppd, qib_sdma_state_s99_running); + ss->go_s99_running = 1; + break; + case qib_sdma_event_e40_sw_cleaned: + break; + case qib_sdma_event_e50_hw_cleaned: + break; + case qib_sdma_event_e60_hw_halted: + break; + case qib_sdma_event_e70_go_idle: + break; + case qib_sdma_event_e7220_err_halted: + break; + case qib_sdma_event_e7322_err_halted: + break; + case qib_sdma_event_e90_timer_tick: + break; + } + break; + + case qib_sdma_state_s30_sw_clean_up_wait: + switch (event) { + case qib_sdma_event_e00_go_hw_down: + sdma_set_state(ppd, qib_sdma_state_s00_hw_down); + break; + case qib_sdma_event_e10_go_hw_start: + break; + case qib_sdma_event_e20_hw_started: + break; + case qib_sdma_event_e30_go_running: + ss->go_s99_running = 1; + break; + case qib_sdma_event_e40_sw_cleaned: + sdma_set_state(ppd, + qib_sdma_state_s10_hw_start_up_wait); + sdma_hw_start_up(ppd); + break; + case qib_sdma_event_e50_hw_cleaned: + break; + case qib_sdma_event_e60_hw_halted: + break; + case qib_sdma_event_e70_go_idle: + ss->go_s99_running = 0; + break; + case qib_sdma_event_e7220_err_halted: + break; + case qib_sdma_event_e7322_err_halted: + break; + case qib_sdma_event_e90_timer_tick: + break; + } + break; + + case qib_sdma_state_s40_hw_clean_up_wait: + switch (event) { + case qib_sdma_event_e00_go_hw_down: + sdma_set_state(ppd, qib_sdma_state_s00_hw_down); + sdma_start_sw_clean_up(ppd); + break; + case qib_sdma_event_e10_go_hw_start: + break; + case qib_sdma_event_e20_hw_started: + break; + case qib_sdma_event_e30_go_running: + ss->go_s99_running = 1; + break; + case qib_sdma_event_e40_sw_cleaned: + break; + case qib_sdma_event_e50_hw_cleaned: + sdma_set_state(ppd, + qib_sdma_state_s30_sw_clean_up_wait); + sdma_start_sw_clean_up(ppd); + break; + case qib_sdma_event_e60_hw_halted: + break; + case qib_sdma_event_e70_go_idle: + ss->go_s99_running = 0; + break; + case qib_sdma_event_e7220_err_halted: + break; + case qib_sdma_event_e7322_err_halted: + break; + case qib_sdma_event_e90_timer_tick: + break; + } + break; + + case qib_sdma_state_s50_hw_halt_wait: + switch (event) { + case qib_sdma_event_e00_go_hw_down: + sdma_set_state(ppd, qib_sdma_state_s00_hw_down); + sdma_start_sw_clean_up(ppd); + break; + case qib_sdma_event_e10_go_hw_start: + break; + case qib_sdma_event_e20_hw_started: + break; + case qib_sdma_event_e30_go_running: + ss->go_s99_running = 1; + break; + case qib_sdma_event_e40_sw_cleaned: + break; + case qib_sdma_event_e50_hw_cleaned: + break; + case qib_sdma_event_e60_hw_halted: + sdma_set_state(ppd, + qib_sdma_state_s40_hw_clean_up_wait); + ppd->dd->f_sdma_hw_clean_up(ppd); + break; + case qib_sdma_event_e70_go_idle: + ss->go_s99_running = 0; + break; + case qib_sdma_event_e7220_err_halted: + break; + case qib_sdma_event_e7322_err_halted: + break; + case qib_sdma_event_e90_timer_tick: + break; + } + break; + + case qib_sdma_state_s99_running: + switch (event) { + case qib_sdma_event_e00_go_hw_down: + sdma_set_state(ppd, qib_sdma_state_s00_hw_down); + sdma_start_sw_clean_up(ppd); + break; + case qib_sdma_event_e10_go_hw_start: + break; + case qib_sdma_event_e20_hw_started: + break; + case qib_sdma_event_e30_go_running: + break; + case qib_sdma_event_e40_sw_cleaned: + break; + case qib_sdma_event_e50_hw_cleaned: + break; + case qib_sdma_event_e60_hw_halted: + sdma_set_state(ppd, + qib_sdma_state_s30_sw_clean_up_wait); + sdma_start_sw_clean_up(ppd); + break; + case qib_sdma_event_e70_go_idle: + sdma_set_state(ppd, qib_sdma_state_s50_hw_halt_wait); + ss->go_s99_running = 0; + break; + case qib_sdma_event_e7220_err_halted: + sdma_set_state(ppd, + qib_sdma_state_s30_sw_clean_up_wait); + sdma_start_sw_clean_up(ppd); + break; + case qib_sdma_event_e7322_err_halted: + sdma_set_state(ppd, qib_sdma_state_s50_hw_halt_wait); + break; + case qib_sdma_event_e90_timer_tick: + break; + } + break; + } + + ss->last_event = event; +} diff --git a/drivers/infiniband/hw/qib/qib_srq.c b/drivers/infiniband/hw/qib/qib_srq.c new file mode 100644 index 00000000000..d6235931a1b --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_srq.c @@ -0,0 +1,380 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/err.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> + +#include "qib_verbs.h" + +/** + * qib_post_srq_receive - post a receive on a shared receive queue + * @ibsrq: the SRQ to post the receive on + * @wr: the list of work requests to post + * @bad_wr: A pointer to the first WR to cause a problem is put here + * + * This may be called from interrupt context. + */ +int qib_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct qib_srq *srq = to_isrq(ibsrq); + struct qib_rwq *wq; + unsigned long flags; + int ret; + + for (; wr; wr = wr->next) { + struct qib_rwqe *wqe; + u32 next; + int i; + + if ((unsigned) wr->num_sge > srq->rq.max_sge) { + *bad_wr = wr; + ret = -EINVAL; + goto bail; + } + + spin_lock_irqsave(&srq->rq.lock, flags); + wq = srq->rq.wq; + next = wq->head + 1; + if (next >= srq->rq.size) + next = 0; + if (next == wq->tail) { + spin_unlock_irqrestore(&srq->rq.lock, flags); + *bad_wr = wr; + ret = -ENOMEM; + goto bail; + } + + wqe = get_rwqe_ptr(&srq->rq, wq->head); + wqe->wr_id = wr->wr_id; + wqe->num_sge = wr->num_sge; + for (i = 0; i < wr->num_sge; i++) + wqe->sg_list[i] = wr->sg_list[i]; + /* Make sure queue entry is written before the head index. */ + smp_wmb(); + wq->head = next; + spin_unlock_irqrestore(&srq->rq.lock, flags); + } + ret = 0; + +bail: + return ret; +} + +/** + * qib_create_srq - create a shared receive queue + * @ibpd: the protection domain of the SRQ to create + * @srq_init_attr: the attributes of the SRQ + * @udata: data from libibverbs when creating a user SRQ + */ +struct ib_srq *qib_create_srq(struct ib_pd *ibpd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata) +{ + struct qib_ibdev *dev = to_idev(ibpd->device); + struct qib_srq *srq; + u32 sz; + struct ib_srq *ret; + + if (srq_init_attr->srq_type != IB_SRQT_BASIC) { + ret = ERR_PTR(-ENOSYS); + goto done; + } + + if (srq_init_attr->attr.max_sge == 0 || + srq_init_attr->attr.max_sge > ib_qib_max_srq_sges || + srq_init_attr->attr.max_wr == 0 || + srq_init_attr->attr.max_wr > ib_qib_max_srq_wrs) { + ret = ERR_PTR(-EINVAL); + goto done; + } + + srq = kmalloc(sizeof(*srq), GFP_KERNEL); + if (!srq) { + ret = ERR_PTR(-ENOMEM); + goto done; + } + + /* + * Need to use vmalloc() if we want to support large #s of entries. + */ + srq->rq.size = srq_init_attr->attr.max_wr + 1; + srq->rq.max_sge = srq_init_attr->attr.max_sge; + sz = sizeof(struct ib_sge) * srq->rq.max_sge + + sizeof(struct qib_rwqe); + srq->rq.wq = vmalloc_user(sizeof(struct qib_rwq) + srq->rq.size * sz); + if (!srq->rq.wq) { + ret = ERR_PTR(-ENOMEM); + goto bail_srq; + } + + /* + * Return the address of the RWQ as the offset to mmap. + * See qib_mmap() for details. + */ + if (udata && udata->outlen >= sizeof(__u64)) { + int err; + u32 s = sizeof(struct qib_rwq) + srq->rq.size * sz; + + srq->ip = + qib_create_mmap_info(dev, s, ibpd->uobject->context, + srq->rq.wq); + if (!srq->ip) { + ret = ERR_PTR(-ENOMEM); + goto bail_wq; + } + + err = ib_copy_to_udata(udata, &srq->ip->offset, + sizeof(srq->ip->offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_ip; + } + } else + srq->ip = NULL; + + /* + * ib_create_srq() will initialize srq->ibsrq. + */ + spin_lock_init(&srq->rq.lock); + srq->rq.wq->head = 0; + srq->rq.wq->tail = 0; + srq->limit = srq_init_attr->attr.srq_limit; + + spin_lock(&dev->n_srqs_lock); + if (dev->n_srqs_allocated == ib_qib_max_srqs) { + spin_unlock(&dev->n_srqs_lock); + ret = ERR_PTR(-ENOMEM); + goto bail_ip; + } + + dev->n_srqs_allocated++; + spin_unlock(&dev->n_srqs_lock); + + if (srq->ip) { + spin_lock_irq(&dev->pending_lock); + list_add(&srq->ip->pending_mmaps, &dev->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); + } + + ret = &srq->ibsrq; + goto done; + +bail_ip: + kfree(srq->ip); +bail_wq: + vfree(srq->rq.wq); +bail_srq: + kfree(srq); +done: + return ret; +} + +/** + * qib_modify_srq - modify a shared receive queue + * @ibsrq: the SRQ to modify + * @attr: the new attributes of the SRQ + * @attr_mask: indicates which attributes to modify + * @udata: user data for libibverbs.so + */ +int qib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, + struct ib_udata *udata) +{ + struct qib_srq *srq = to_isrq(ibsrq); + struct qib_rwq *wq; + int ret = 0; + + if (attr_mask & IB_SRQ_MAX_WR) { + struct qib_rwq *owq; + struct qib_rwqe *p; + u32 sz, size, n, head, tail; + + /* Check that the requested sizes are below the limits. */ + if ((attr->max_wr > ib_qib_max_srq_wrs) || + ((attr_mask & IB_SRQ_LIMIT) ? + attr->srq_limit : srq->limit) > attr->max_wr) { + ret = -EINVAL; + goto bail; + } + + sz = sizeof(struct qib_rwqe) + + srq->rq.max_sge * sizeof(struct ib_sge); + size = attr->max_wr + 1; + wq = vmalloc_user(sizeof(struct qib_rwq) + size * sz); + if (!wq) { + ret = -ENOMEM; + goto bail; + } + + /* Check that we can write the offset to mmap. */ + if (udata && udata->inlen >= sizeof(__u64)) { + __u64 offset_addr; + __u64 offset = 0; + + ret = ib_copy_from_udata(&offset_addr, udata, + sizeof(offset_addr)); + if (ret) + goto bail_free; + udata->outbuf = + (void __user *) (unsigned long) offset_addr; + ret = ib_copy_to_udata(udata, &offset, + sizeof(offset)); + if (ret) + goto bail_free; + } + + spin_lock_irq(&srq->rq.lock); + /* + * validate head and tail pointer values and compute + * the number of remaining WQEs. + */ + owq = srq->rq.wq; + head = owq->head; + tail = owq->tail; + if (head >= srq->rq.size || tail >= srq->rq.size) { + ret = -EINVAL; + goto bail_unlock; + } + n = head; + if (n < tail) + n += srq->rq.size - tail; + else + n -= tail; + if (size <= n) { + ret = -EINVAL; + goto bail_unlock; + } + n = 0; + p = wq->wq; + while (tail != head) { + struct qib_rwqe *wqe; + int i; + + wqe = get_rwqe_ptr(&srq->rq, tail); + p->wr_id = wqe->wr_id; + p->num_sge = wqe->num_sge; + for (i = 0; i < wqe->num_sge; i++) + p->sg_list[i] = wqe->sg_list[i]; + n++; + p = (struct qib_rwqe *)((char *) p + sz); + if (++tail >= srq->rq.size) + tail = 0; + } + srq->rq.wq = wq; + srq->rq.size = size; + wq->head = n; + wq->tail = 0; + if (attr_mask & IB_SRQ_LIMIT) + srq->limit = attr->srq_limit; + spin_unlock_irq(&srq->rq.lock); + + vfree(owq); + + if (srq->ip) { + struct qib_mmap_info *ip = srq->ip; + struct qib_ibdev *dev = to_idev(srq->ibsrq.device); + u32 s = sizeof(struct qib_rwq) + size * sz; + + qib_update_mmap_info(dev, ip, s, wq); + + /* + * Return the offset to mmap. + * See qib_mmap() for details. + */ + if (udata && udata->inlen >= sizeof(__u64)) { + ret = ib_copy_to_udata(udata, &ip->offset, + sizeof(ip->offset)); + if (ret) + goto bail; + } + + /* + * Put user mapping info onto the pending list + * unless it already is on the list. + */ + spin_lock_irq(&dev->pending_lock); + if (list_empty(&ip->pending_mmaps)) + list_add(&ip->pending_mmaps, + &dev->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); + } + } else if (attr_mask & IB_SRQ_LIMIT) { + spin_lock_irq(&srq->rq.lock); + if (attr->srq_limit >= srq->rq.size) + ret = -EINVAL; + else + srq->limit = attr->srq_limit; + spin_unlock_irq(&srq->rq.lock); + } + goto bail; + +bail_unlock: + spin_unlock_irq(&srq->rq.lock); +bail_free: + vfree(wq); +bail: + return ret; +} + +int qib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) +{ + struct qib_srq *srq = to_isrq(ibsrq); + + attr->max_wr = srq->rq.size - 1; + attr->max_sge = srq->rq.max_sge; + attr->srq_limit = srq->limit; + return 0; +} + +/** + * qib_destroy_srq - destroy a shared receive queue + * @ibsrq: the SRQ to destroy + */ +int qib_destroy_srq(struct ib_srq *ibsrq) +{ + struct qib_srq *srq = to_isrq(ibsrq); + struct qib_ibdev *dev = to_idev(ibsrq->device); + + spin_lock(&dev->n_srqs_lock); + dev->n_srqs_allocated--; + spin_unlock(&dev->n_srqs_lock); + if (srq->ip) + kref_put(&srq->ip->ref, qib_release_mmap_info); + else + vfree(srq->rq.wq); + kfree(srq); + + return 0; +} diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c new file mode 100644 index 00000000000..3c8e4e3caca --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -0,0 +1,842 @@ +/* + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. + * Copyright (c) 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/ctype.h> + +#include "qib.h" +#include "qib_mad.h" + +/* start of per-port functions */ +/* + * Get/Set heartbeat enable. OR of 1=enabled, 2=auto + */ +static ssize_t show_hrtbt_enb(struct qib_pportdata *ppd, char *buf) +{ + struct qib_devdata *dd = ppd->dd; + int ret; + + ret = dd->f_get_ib_cfg(ppd, QIB_IB_CFG_HRTBT); + ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret); + return ret; +} + +static ssize_t store_hrtbt_enb(struct qib_pportdata *ppd, const char *buf, + size_t count) +{ + struct qib_devdata *dd = ppd->dd; + int ret; + u16 val; + + ret = kstrtou16(buf, 0, &val); + if (ret) { + qib_dev_err(dd, "attempt to set invalid Heartbeat enable\n"); + return ret; + } + + /* + * Set the "intentional" heartbeat enable per either of + * "Enable" and "Auto", as these are normally set together. + * This bit is consulted when leaving loopback mode, + * because entering loopback mode overrides it and automatically + * disables heartbeat. + */ + ret = dd->f_set_ib_cfg(ppd, QIB_IB_CFG_HRTBT, val); + return ret < 0 ? ret : count; +} + +static ssize_t store_loopback(struct qib_pportdata *ppd, const char *buf, + size_t count) +{ + struct qib_devdata *dd = ppd->dd; + int ret = count, r; + + r = dd->f_set_ib_loopback(ppd, buf); + if (r < 0) + ret = r; + + return ret; +} + +static ssize_t store_led_override(struct qib_pportdata *ppd, const char *buf, + size_t count) +{ + struct qib_devdata *dd = ppd->dd; + int ret; + u16 val; + + ret = kstrtou16(buf, 0, &val); + if (ret) { + qib_dev_err(dd, "attempt to set invalid LED override\n"); + return ret; + } + + qib_set_led_override(ppd, val); + return count; +} + +static ssize_t show_status(struct qib_pportdata *ppd, char *buf) +{ + ssize_t ret; + + if (!ppd->statusp) + ret = -EINVAL; + else + ret = scnprintf(buf, PAGE_SIZE, "0x%llx\n", + (unsigned long long) *(ppd->statusp)); + return ret; +} + +/* + * For userland compatibility, these offsets must remain fixed. + * They are strings for QIB_STATUS_* + */ +static const char * const qib_status_str[] = { + "Initted", + "", + "", + "", + "", + "Present", + "IB_link_up", + "IB_configured", + "", + "Fatal_Hardware_Error", + NULL, +}; + +static ssize_t show_status_str(struct qib_pportdata *ppd, char *buf) +{ + int i, any; + u64 s; + ssize_t ret; + + if (!ppd->statusp) { + ret = -EINVAL; + goto bail; + } + + s = *(ppd->statusp); + *buf = '\0'; + for (any = i = 0; s && qib_status_str[i]; i++) { + if (s & 1) { + /* if overflow */ + if (any && strlcat(buf, " ", PAGE_SIZE) >= PAGE_SIZE) + break; + if (strlcat(buf, qib_status_str[i], PAGE_SIZE) >= + PAGE_SIZE) + break; + any = 1; + } + s >>= 1; + } + if (any) + strlcat(buf, "\n", PAGE_SIZE); + + ret = strlen(buf); + +bail: + return ret; +} + +/* end of per-port functions */ + +/* + * Start of per-port file structures and support code + * Because we are fitting into other infrastructure, we have to supply the + * full set of kobject/sysfs_ops structures and routines. + */ +#define QIB_PORT_ATTR(name, mode, show, store) \ + static struct qib_port_attr qib_port_attr_##name = \ + __ATTR(name, mode, show, store) + +struct qib_port_attr { + struct attribute attr; + ssize_t (*show)(struct qib_pportdata *, char *); + ssize_t (*store)(struct qib_pportdata *, const char *, size_t); +}; + +QIB_PORT_ATTR(loopback, S_IWUSR, NULL, store_loopback); +QIB_PORT_ATTR(led_override, S_IWUSR, NULL, store_led_override); +QIB_PORT_ATTR(hrtbt_enable, S_IWUSR | S_IRUGO, show_hrtbt_enb, + store_hrtbt_enb); +QIB_PORT_ATTR(status, S_IRUGO, show_status, NULL); +QIB_PORT_ATTR(status_str, S_IRUGO, show_status_str, NULL); + +static struct attribute *port_default_attributes[] = { + &qib_port_attr_loopback.attr, + &qib_port_attr_led_override.attr, + &qib_port_attr_hrtbt_enable.attr, + &qib_port_attr_status.attr, + &qib_port_attr_status_str.attr, + NULL +}; + +/* + * Start of per-port congestion control structures and support code + */ + +/* + * Congestion control table size followed by table entries + */ +static ssize_t read_cc_table_bin(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t count) +{ + int ret; + struct qib_pportdata *ppd = + container_of(kobj, struct qib_pportdata, pport_cc_kobj); + + if (!qib_cc_table_size || !ppd->ccti_entries_shadow) + return -EINVAL; + + ret = ppd->total_cct_entry * sizeof(struct ib_cc_table_entry_shadow) + + sizeof(__be16); + + if (pos > ret) + return -EINVAL; + + if (count > ret - pos) + count = ret - pos; + + if (!count) + return count; + + spin_lock(&ppd->cc_shadow_lock); + memcpy(buf, ppd->ccti_entries_shadow, count); + spin_unlock(&ppd->cc_shadow_lock); + + return count; +} + +static void qib_port_release(struct kobject *kobj) +{ + /* nothing to do since memory is freed by qib_free_devdata() */ +} + +static struct kobj_type qib_port_cc_ktype = { + .release = qib_port_release, +}; + +static struct bin_attribute cc_table_bin_attr = { + .attr = {.name = "cc_table_bin", .mode = 0444}, + .read = read_cc_table_bin, + .size = PAGE_SIZE, +}; + +/* + * Congestion settings: port control, control map and an array of 16 + * entries for the congestion entries - increase, timer, event log + * trigger threshold and the minimum injection rate delay. + */ +static ssize_t read_cc_setting_bin(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t count) +{ + int ret; + struct qib_pportdata *ppd = + container_of(kobj, struct qib_pportdata, pport_cc_kobj); + + if (!qib_cc_table_size || !ppd->congestion_entries_shadow) + return -EINVAL; + + ret = sizeof(struct ib_cc_congestion_setting_attr_shadow); + + if (pos > ret) + return -EINVAL; + if (count > ret - pos) + count = ret - pos; + + if (!count) + return count; + + spin_lock(&ppd->cc_shadow_lock); + memcpy(buf, ppd->congestion_entries_shadow, count); + spin_unlock(&ppd->cc_shadow_lock); + + return count; +} + +static struct bin_attribute cc_setting_bin_attr = { + .attr = {.name = "cc_settings_bin", .mode = 0444}, + .read = read_cc_setting_bin, + .size = PAGE_SIZE, +}; + + +static ssize_t qib_portattr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct qib_port_attr *pattr = + container_of(attr, struct qib_port_attr, attr); + struct qib_pportdata *ppd = + container_of(kobj, struct qib_pportdata, pport_kobj); + + return pattr->show(ppd, buf); +} + +static ssize_t qib_portattr_store(struct kobject *kobj, + struct attribute *attr, const char *buf, size_t len) +{ + struct qib_port_attr *pattr = + container_of(attr, struct qib_port_attr, attr); + struct qib_pportdata *ppd = + container_of(kobj, struct qib_pportdata, pport_kobj); + + return pattr->store(ppd, buf, len); +} + + +static const struct sysfs_ops qib_port_ops = { + .show = qib_portattr_show, + .store = qib_portattr_store, +}; + +static struct kobj_type qib_port_ktype = { + .release = qib_port_release, + .sysfs_ops = &qib_port_ops, + .default_attrs = port_default_attributes +}; + +/* Start sl2vl */ + +#define QIB_SL2VL_ATTR(N) \ + static struct qib_sl2vl_attr qib_sl2vl_attr_##N = { \ + .attr = { .name = __stringify(N), .mode = 0444 }, \ + .sl = N \ + } + +struct qib_sl2vl_attr { + struct attribute attr; + int sl; +}; + +QIB_SL2VL_ATTR(0); +QIB_SL2VL_ATTR(1); +QIB_SL2VL_ATTR(2); +QIB_SL2VL_ATTR(3); +QIB_SL2VL_ATTR(4); +QIB_SL2VL_ATTR(5); +QIB_SL2VL_ATTR(6); +QIB_SL2VL_ATTR(7); +QIB_SL2VL_ATTR(8); +QIB_SL2VL_ATTR(9); +QIB_SL2VL_ATTR(10); +QIB_SL2VL_ATTR(11); +QIB_SL2VL_ATTR(12); +QIB_SL2VL_ATTR(13); +QIB_SL2VL_ATTR(14); +QIB_SL2VL_ATTR(15); + +static struct attribute *sl2vl_default_attributes[] = { + &qib_sl2vl_attr_0.attr, + &qib_sl2vl_attr_1.attr, + &qib_sl2vl_attr_2.attr, + &qib_sl2vl_attr_3.attr, + &qib_sl2vl_attr_4.attr, + &qib_sl2vl_attr_5.attr, + &qib_sl2vl_attr_6.attr, + &qib_sl2vl_attr_7.attr, + &qib_sl2vl_attr_8.attr, + &qib_sl2vl_attr_9.attr, + &qib_sl2vl_attr_10.attr, + &qib_sl2vl_attr_11.attr, + &qib_sl2vl_attr_12.attr, + &qib_sl2vl_attr_13.attr, + &qib_sl2vl_attr_14.attr, + &qib_sl2vl_attr_15.attr, + NULL +}; + +static ssize_t sl2vl_attr_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct qib_sl2vl_attr *sattr = + container_of(attr, struct qib_sl2vl_attr, attr); + struct qib_pportdata *ppd = + container_of(kobj, struct qib_pportdata, sl2vl_kobj); + struct qib_ibport *qibp = &ppd->ibport_data; + + return sprintf(buf, "%u\n", qibp->sl_to_vl[sattr->sl]); +} + +static const struct sysfs_ops qib_sl2vl_ops = { + .show = sl2vl_attr_show, +}; + +static struct kobj_type qib_sl2vl_ktype = { + .release = qib_port_release, + .sysfs_ops = &qib_sl2vl_ops, + .default_attrs = sl2vl_default_attributes +}; + +/* End sl2vl */ + +/* Start diag_counters */ + +#define QIB_DIAGC_ATTR(N) \ + static struct qib_diagc_attr qib_diagc_attr_##N = { \ + .attr = { .name = __stringify(N), .mode = 0664 }, \ + .counter = offsetof(struct qib_ibport, n_##N) \ + } + +struct qib_diagc_attr { + struct attribute attr; + size_t counter; +}; + +QIB_DIAGC_ATTR(rc_resends); +QIB_DIAGC_ATTR(rc_acks); +QIB_DIAGC_ATTR(rc_qacks); +QIB_DIAGC_ATTR(rc_delayed_comp); +QIB_DIAGC_ATTR(seq_naks); +QIB_DIAGC_ATTR(rdma_seq); +QIB_DIAGC_ATTR(rnr_naks); +QIB_DIAGC_ATTR(other_naks); +QIB_DIAGC_ATTR(rc_timeouts); +QIB_DIAGC_ATTR(loop_pkts); +QIB_DIAGC_ATTR(pkt_drops); +QIB_DIAGC_ATTR(dmawait); +QIB_DIAGC_ATTR(unaligned); +QIB_DIAGC_ATTR(rc_dupreq); +QIB_DIAGC_ATTR(rc_seqnak); + +static struct attribute *diagc_default_attributes[] = { + &qib_diagc_attr_rc_resends.attr, + &qib_diagc_attr_rc_acks.attr, + &qib_diagc_attr_rc_qacks.attr, + &qib_diagc_attr_rc_delayed_comp.attr, + &qib_diagc_attr_seq_naks.attr, + &qib_diagc_attr_rdma_seq.attr, + &qib_diagc_attr_rnr_naks.attr, + &qib_diagc_attr_other_naks.attr, + &qib_diagc_attr_rc_timeouts.attr, + &qib_diagc_attr_loop_pkts.attr, + &qib_diagc_attr_pkt_drops.attr, + &qib_diagc_attr_dmawait.attr, + &qib_diagc_attr_unaligned.attr, + &qib_diagc_attr_rc_dupreq.attr, + &qib_diagc_attr_rc_seqnak.attr, + NULL +}; + +static ssize_t diagc_attr_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct qib_diagc_attr *dattr = + container_of(attr, struct qib_diagc_attr, attr); + struct qib_pportdata *ppd = + container_of(kobj, struct qib_pportdata, diagc_kobj); + struct qib_ibport *qibp = &ppd->ibport_data; + + return sprintf(buf, "%u\n", *(u32 *)((char *)qibp + dattr->counter)); +} + +static ssize_t diagc_attr_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t size) +{ + struct qib_diagc_attr *dattr = + container_of(attr, struct qib_diagc_attr, attr); + struct qib_pportdata *ppd = + container_of(kobj, struct qib_pportdata, diagc_kobj); + struct qib_ibport *qibp = &ppd->ibport_data; + u32 val; + int ret; + + ret = kstrtou32(buf, 0, &val); + if (ret) + return ret; + *(u32 *)((char *) qibp + dattr->counter) = val; + return size; +} + +static const struct sysfs_ops qib_diagc_ops = { + .show = diagc_attr_show, + .store = diagc_attr_store, +}; + +static struct kobj_type qib_diagc_ktype = { + .release = qib_port_release, + .sysfs_ops = &qib_diagc_ops, + .default_attrs = diagc_default_attributes +}; + +/* End diag_counters */ + +/* end of per-port file structures and support code */ + +/* + * Start of per-unit (or driver, in some cases, but replicated + * per unit) functions (these get a device *) + */ +static ssize_t show_rev(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct qib_ibdev *dev = + container_of(device, struct qib_ibdev, ibdev.dev); + + return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev); +} + +static ssize_t show_hca(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct qib_ibdev *dev = + container_of(device, struct qib_ibdev, ibdev.dev); + struct qib_devdata *dd = dd_from_dev(dev); + int ret; + + if (!dd->boardname) + ret = -EINVAL; + else + ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname); + return ret; +} + +static ssize_t show_version(struct device *device, + struct device_attribute *attr, char *buf) +{ + /* The string printed here is already newline-terminated. */ + return scnprintf(buf, PAGE_SIZE, "%s", (char *)ib_qib_version); +} + +static ssize_t show_boardversion(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct qib_ibdev *dev = + container_of(device, struct qib_ibdev, ibdev.dev); + struct qib_devdata *dd = dd_from_dev(dev); + + /* The string printed here is already newline-terminated. */ + return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion); +} + + +static ssize_t show_localbus_info(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct qib_ibdev *dev = + container_of(device, struct qib_ibdev, ibdev.dev); + struct qib_devdata *dd = dd_from_dev(dev); + + /* The string printed here is already newline-terminated. */ + return scnprintf(buf, PAGE_SIZE, "%s", dd->lbus_info); +} + + +static ssize_t show_nctxts(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct qib_ibdev *dev = + container_of(device, struct qib_ibdev, ibdev.dev); + struct qib_devdata *dd = dd_from_dev(dev); + + /* Return the number of user ports (contexts) available. */ + /* The calculation below deals with a special case where + * cfgctxts is set to 1 on a single-port board. */ + return scnprintf(buf, PAGE_SIZE, "%u\n", + (dd->first_user_ctxt > dd->cfgctxts) ? 0 : + (dd->cfgctxts - dd->first_user_ctxt)); +} + +static ssize_t show_nfreectxts(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct qib_ibdev *dev = + container_of(device, struct qib_ibdev, ibdev.dev); + struct qib_devdata *dd = dd_from_dev(dev); + + /* Return the number of free user ports (contexts) available. */ + return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts); +} + +static ssize_t show_serial(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct qib_ibdev *dev = + container_of(device, struct qib_ibdev, ibdev.dev); + struct qib_devdata *dd = dd_from_dev(dev); + + buf[sizeof dd->serial] = '\0'; + memcpy(buf, dd->serial, sizeof dd->serial); + strcat(buf, "\n"); + return strlen(buf); +} + +static ssize_t store_chip_reset(struct device *device, + struct device_attribute *attr, const char *buf, + size_t count) +{ + struct qib_ibdev *dev = + container_of(device, struct qib_ibdev, ibdev.dev); + struct qib_devdata *dd = dd_from_dev(dev); + int ret; + + if (count < 5 || memcmp(buf, "reset", 5) || !dd->diag_client) { + ret = -EINVAL; + goto bail; + } + + ret = qib_reset_device(dd->unit); +bail: + return ret < 0 ? ret : count; +} + +static ssize_t show_logged_errs(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct qib_ibdev *dev = + container_of(device, struct qib_ibdev, ibdev.dev); + struct qib_devdata *dd = dd_from_dev(dev); + int idx, count; + + /* force consistency with actual EEPROM */ + if (qib_update_eeprom_log(dd) != 0) + return -ENXIO; + + count = 0; + for (idx = 0; idx < QIB_EEP_LOG_CNT; ++idx) { + count += scnprintf(buf + count, PAGE_SIZE - count, "%d%c", + dd->eep_st_errs[idx], + idx == (QIB_EEP_LOG_CNT - 1) ? '\n' : ' '); + } + + return count; +} + +/* + * Dump tempsense regs. in decimal, to ease shell-scripts. + */ +static ssize_t show_tempsense(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct qib_ibdev *dev = + container_of(device, struct qib_ibdev, ibdev.dev); + struct qib_devdata *dd = dd_from_dev(dev); + int ret; + int idx; + u8 regvals[8]; + + ret = -ENXIO; + for (idx = 0; idx < 8; ++idx) { + if (idx == 6) + continue; + ret = dd->f_tempsense_rd(dd, idx); + if (ret < 0) + break; + regvals[idx] = ret; + } + if (idx == 8) + ret = scnprintf(buf, PAGE_SIZE, "%d %d %02X %02X %d %d\n", + *(signed char *)(regvals), + *(signed char *)(regvals + 1), + regvals[2], regvals[3], + *(signed char *)(regvals + 5), + *(signed char *)(regvals + 7)); + return ret; +} + +/* + * end of per-unit (or driver, in some cases, but replicated + * per unit) functions + */ + +/* start of per-unit file structures and support code */ +static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); +static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); +static DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL); +static DEVICE_ATTR(version, S_IRUGO, show_version, NULL); +static DEVICE_ATTR(nctxts, S_IRUGO, show_nctxts, NULL); +static DEVICE_ATTR(nfreectxts, S_IRUGO, show_nfreectxts, NULL); +static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL); +static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL); +static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL); +static DEVICE_ATTR(tempsense, S_IRUGO, show_tempsense, NULL); +static DEVICE_ATTR(localbus_info, S_IRUGO, show_localbus_info, NULL); +static DEVICE_ATTR(chip_reset, S_IWUSR, NULL, store_chip_reset); + +static struct device_attribute *qib_attributes[] = { + &dev_attr_hw_rev, + &dev_attr_hca_type, + &dev_attr_board_id, + &dev_attr_version, + &dev_attr_nctxts, + &dev_attr_nfreectxts, + &dev_attr_serial, + &dev_attr_boardversion, + &dev_attr_logged_errors, + &dev_attr_tempsense, + &dev_attr_localbus_info, + &dev_attr_chip_reset, +}; + +int qib_create_port_files(struct ib_device *ibdev, u8 port_num, + struct kobject *kobj) +{ + struct qib_pportdata *ppd; + struct qib_devdata *dd = dd_from_ibdev(ibdev); + int ret; + + if (!port_num || port_num > dd->num_pports) { + qib_dev_err(dd, + "Skipping infiniband class with invalid port %u\n", + port_num); + ret = -ENODEV; + goto bail; + } + ppd = &dd->pport[port_num - 1]; + + ret = kobject_init_and_add(&ppd->pport_kobj, &qib_port_ktype, kobj, + "linkcontrol"); + if (ret) { + qib_dev_err(dd, + "Skipping linkcontrol sysfs info, (err %d) port %u\n", + ret, port_num); + goto bail; + } + kobject_uevent(&ppd->pport_kobj, KOBJ_ADD); + + ret = kobject_init_and_add(&ppd->sl2vl_kobj, &qib_sl2vl_ktype, kobj, + "sl2vl"); + if (ret) { + qib_dev_err(dd, + "Skipping sl2vl sysfs info, (err %d) port %u\n", + ret, port_num); + goto bail_link; + } + kobject_uevent(&ppd->sl2vl_kobj, KOBJ_ADD); + + ret = kobject_init_and_add(&ppd->diagc_kobj, &qib_diagc_ktype, kobj, + "diag_counters"); + if (ret) { + qib_dev_err(dd, + "Skipping diag_counters sysfs info, (err %d) port %u\n", + ret, port_num); + goto bail_sl; + } + kobject_uevent(&ppd->diagc_kobj, KOBJ_ADD); + + if (!qib_cc_table_size || !ppd->congestion_entries_shadow) + return 0; + + ret = kobject_init_and_add(&ppd->pport_cc_kobj, &qib_port_cc_ktype, + kobj, "CCMgtA"); + if (ret) { + qib_dev_err(dd, + "Skipping Congestion Control sysfs info, (err %d) port %u\n", + ret, port_num); + goto bail_diagc; + } + + kobject_uevent(&ppd->pport_cc_kobj, KOBJ_ADD); + + ret = sysfs_create_bin_file(&ppd->pport_cc_kobj, + &cc_setting_bin_attr); + if (ret) { + qib_dev_err(dd, + "Skipping Congestion Control setting sysfs info, (err %d) port %u\n", + ret, port_num); + goto bail_cc; + } + + ret = sysfs_create_bin_file(&ppd->pport_cc_kobj, + &cc_table_bin_attr); + if (ret) { + qib_dev_err(dd, + "Skipping Congestion Control table sysfs info, (err %d) port %u\n", + ret, port_num); + goto bail_cc_entry_bin; + } + + qib_devinfo(dd->pcidev, + "IB%u: Congestion Control Agent enabled for port %d\n", + dd->unit, port_num); + + return 0; + +bail_cc_entry_bin: + sysfs_remove_bin_file(&ppd->pport_cc_kobj, &cc_setting_bin_attr); +bail_cc: + kobject_put(&ppd->pport_cc_kobj); +bail_diagc: + kobject_put(&ppd->diagc_kobj); +bail_sl: + kobject_put(&ppd->sl2vl_kobj); +bail_link: + kobject_put(&ppd->pport_kobj); +bail: + return ret; +} + +/* + * Register and create our files in /sys/class/infiniband. + */ +int qib_verbs_register_sysfs(struct qib_devdata *dd) +{ + struct ib_device *dev = &dd->verbs_dev.ibdev; + int i, ret; + + for (i = 0; i < ARRAY_SIZE(qib_attributes); ++i) { + ret = device_create_file(&dev->dev, qib_attributes[i]); + if (ret) + goto bail; + } + + return 0; +bail: + for (i = 0; i < ARRAY_SIZE(qib_attributes); ++i) + device_remove_file(&dev->dev, qib_attributes[i]); + return ret; +} + +/* + * Unregister and remove our files in /sys/class/infiniband. + */ +void qib_verbs_unregister_sysfs(struct qib_devdata *dd) +{ + struct qib_pportdata *ppd; + int i; + + for (i = 0; i < dd->num_pports; i++) { + ppd = &dd->pport[i]; + if (qib_cc_table_size && + ppd->congestion_entries_shadow) { + sysfs_remove_bin_file(&ppd->pport_cc_kobj, + &cc_setting_bin_attr); + sysfs_remove_bin_file(&ppd->pport_cc_kobj, + &cc_table_bin_attr); + kobject_put(&ppd->pport_cc_kobj); + } + kobject_put(&ppd->sl2vl_kobj); + kobject_put(&ppd->pport_kobj); + } +} diff --git a/drivers/infiniband/hw/qib/qib_twsi.c b/drivers/infiniband/hw/qib/qib_twsi.c new file mode 100644 index 00000000000..647f7beb1b0 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_twsi.c @@ -0,0 +1,500 @@ +/* + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/delay.h> +#include <linux/pci.h> +#include <linux/vmalloc.h> + +#include "qib.h" + +/* + * QLogic_IB "Two Wire Serial Interface" driver. + * Originally written for a not-quite-i2c serial eeprom, which is + * still used on some supported boards. Later boards have added a + * variety of other uses, most board-specific, so the bit-boffing + * part has been split off to this file, while the other parts + * have been moved to chip-specific files. + * + * We have also dropped all pretense of fully generic (e.g. pretend + * we don't know whether '1' is the higher voltage) interface, as + * the restrictions of the generic i2c interface (e.g. no access from + * driver itself) make it unsuitable for this use. + */ + +#define READ_CMD 1 +#define WRITE_CMD 0 + +/** + * i2c_wait_for_writes - wait for a write + * @dd: the qlogic_ib device + * + * We use this instead of udelay directly, so we can make sure + * that previous register writes have been flushed all the way + * to the chip. Since we are delaying anyway, the cost doesn't + * hurt, and makes the bit twiddling more regular + */ +static void i2c_wait_for_writes(struct qib_devdata *dd) +{ + /* + * implicit read of EXTStatus is as good as explicit + * read of scratch, if all we want to do is flush + * writes. + */ + dd->f_gpio_mod(dd, 0, 0, 0); + rmb(); /* inlined, so prevent compiler reordering */ +} + +/* + * QSFP modules are allowed to hold SCL low for 500uSec. Allow twice that + * for "almost compliant" modules + */ +#define SCL_WAIT_USEC 1000 + +/* BUF_WAIT is time bus must be free between STOP or ACK and to next START. + * Should be 20, but some chips need more. + */ +#define TWSI_BUF_WAIT_USEC 60 + +static void scl_out(struct qib_devdata *dd, u8 bit) +{ + u32 mask; + + udelay(1); + + mask = 1UL << dd->gpio_scl_num; + + /* SCL is meant to be bare-drain, so never set "OUT", just DIR */ + dd->f_gpio_mod(dd, 0, bit ? 0 : mask, mask); + + /* + * Allow for slow slaves by simple + * delay for falling edge, sampling on rise. + */ + if (!bit) + udelay(2); + else { + int rise_usec; + for (rise_usec = SCL_WAIT_USEC; rise_usec > 0; rise_usec -= 2) { + if (mask & dd->f_gpio_mod(dd, 0, 0, 0)) + break; + udelay(2); + } + if (rise_usec <= 0) + qib_dev_err(dd, "SCL interface stuck low > %d uSec\n", + SCL_WAIT_USEC); + } + i2c_wait_for_writes(dd); +} + +static void sda_out(struct qib_devdata *dd, u8 bit) +{ + u32 mask; + + mask = 1UL << dd->gpio_sda_num; + + /* SDA is meant to be bare-drain, so never set "OUT", just DIR */ + dd->f_gpio_mod(dd, 0, bit ? 0 : mask, mask); + + i2c_wait_for_writes(dd); + udelay(2); +} + +static u8 sda_in(struct qib_devdata *dd, int wait) +{ + int bnum; + u32 read_val, mask; + + bnum = dd->gpio_sda_num; + mask = (1UL << bnum); + /* SDA is meant to be bare-drain, so never set "OUT", just DIR */ + dd->f_gpio_mod(dd, 0, 0, mask); + read_val = dd->f_gpio_mod(dd, 0, 0, 0); + if (wait) + i2c_wait_for_writes(dd); + return (read_val & mask) >> bnum; +} + +/** + * i2c_ackrcv - see if ack following write is true + * @dd: the qlogic_ib device + */ +static int i2c_ackrcv(struct qib_devdata *dd) +{ + u8 ack_received; + + /* AT ENTRY SCL = LOW */ + /* change direction, ignore data */ + ack_received = sda_in(dd, 1); + scl_out(dd, 1); + ack_received = sda_in(dd, 1) == 0; + scl_out(dd, 0); + return ack_received; +} + +static void stop_cmd(struct qib_devdata *dd); + +/** + * rd_byte - read a byte, sending STOP on last, else ACK + * @dd: the qlogic_ib device + * + * Returns byte shifted out of device + */ +static int rd_byte(struct qib_devdata *dd, int last) +{ + int bit_cntr, data; + + data = 0; + + for (bit_cntr = 7; bit_cntr >= 0; --bit_cntr) { + data <<= 1; + scl_out(dd, 1); + data |= sda_in(dd, 0); + scl_out(dd, 0); + } + if (last) { + scl_out(dd, 1); + stop_cmd(dd); + } else { + sda_out(dd, 0); + scl_out(dd, 1); + scl_out(dd, 0); + sda_out(dd, 1); + } + return data; +} + +/** + * wr_byte - write a byte, one bit at a time + * @dd: the qlogic_ib device + * @data: the byte to write + * + * Returns 0 if we got the following ack, otherwise 1 + */ +static int wr_byte(struct qib_devdata *dd, u8 data) +{ + int bit_cntr; + u8 bit; + + for (bit_cntr = 7; bit_cntr >= 0; bit_cntr--) { + bit = (data >> bit_cntr) & 1; + sda_out(dd, bit); + scl_out(dd, 1); + scl_out(dd, 0); + } + return (!i2c_ackrcv(dd)) ? 1 : 0; +} + +/* + * issue TWSI start sequence: + * (both clock/data high, clock high, data low while clock is high) + */ +static void start_seq(struct qib_devdata *dd) +{ + sda_out(dd, 1); + scl_out(dd, 1); + sda_out(dd, 0); + udelay(1); + scl_out(dd, 0); +} + +/** + * stop_seq - transmit the stop sequence + * @dd: the qlogic_ib device + * + * (both clock/data low, clock high, data high while clock is high) + */ +static void stop_seq(struct qib_devdata *dd) +{ + scl_out(dd, 0); + sda_out(dd, 0); + scl_out(dd, 1); + sda_out(dd, 1); +} + +/** + * stop_cmd - transmit the stop condition + * @dd: the qlogic_ib device + * + * (both clock/data low, clock high, data high while clock is high) + */ +static void stop_cmd(struct qib_devdata *dd) +{ + stop_seq(dd); + udelay(TWSI_BUF_WAIT_USEC); +} + +/** + * qib_twsi_reset - reset I2C communication + * @dd: the qlogic_ib device + */ + +int qib_twsi_reset(struct qib_devdata *dd) +{ + int clock_cycles_left = 9; + int was_high = 0; + u32 pins, mask; + + /* Both SCL and SDA should be high. If not, there + * is something wrong. + */ + mask = (1UL << dd->gpio_scl_num) | (1UL << dd->gpio_sda_num); + + /* + * Force pins to desired innocuous state. + * This is the default power-on state with out=0 and dir=0, + * So tri-stated and should be floating high (barring HW problems) + */ + dd->f_gpio_mod(dd, 0, 0, mask); + + /* + * Clock nine times to get all listeners into a sane state. + * If SDA does not go high at any point, we are wedged. + * One vendor recommends then issuing START followed by STOP. + * we cannot use our "normal" functions to do that, because + * if SCL drops between them, another vendor's part will + * wedge, dropping SDA and keeping it low forever, at the end of + * the next transaction (even if it was not the device addressed). + * So our START and STOP take place with SCL held high. + */ + while (clock_cycles_left--) { + scl_out(dd, 0); + scl_out(dd, 1); + /* Note if SDA is high, but keep clocking to sync slave */ + was_high |= sda_in(dd, 0); + } + + if (was_high) { + /* + * We saw a high, which we hope means the slave is sync'd. + * Issue START, STOP, pause for T_BUF. + */ + + pins = dd->f_gpio_mod(dd, 0, 0, 0); + if ((pins & mask) != mask) + qib_dev_err(dd, "GPIO pins not at rest: %d\n", + pins & mask); + /* Drop SDA to issue START */ + udelay(1); /* Guarantee .6 uSec setup */ + sda_out(dd, 0); + udelay(1); /* Guarantee .6 uSec hold */ + /* At this point, SCL is high, SDA low. Raise SDA for STOP */ + sda_out(dd, 1); + udelay(TWSI_BUF_WAIT_USEC); + } + + return !was_high; +} + +#define QIB_TWSI_START 0x100 +#define QIB_TWSI_STOP 0x200 + +/* Write byte to TWSI, optionally prefixed with START or suffixed with + * STOP. + * returns 0 if OK (ACK received), else != 0 + */ +static int qib_twsi_wr(struct qib_devdata *dd, int data, int flags) +{ + int ret = 1; + if (flags & QIB_TWSI_START) + start_seq(dd); + + ret = wr_byte(dd, data); /* Leaves SCL low (from i2c_ackrcv()) */ + + if (flags & QIB_TWSI_STOP) + stop_cmd(dd); + return ret; +} + +/* Added functionality for IBA7220-based cards */ +#define QIB_TEMP_DEV 0x98 + +/* + * qib_twsi_blk_rd + * Formerly called qib_eeprom_internal_read, and only used for eeprom, + * but now the general interface for data transfer from twsi devices. + * One vestige of its former role is that it recognizes a device + * QIB_TWSI_NO_DEV and does the correct operation for the legacy part, + * which responded to all TWSI device codes, interpreting them as + * address within device. On all other devices found on board handled by + * this driver, the device is followed by a one-byte "address" which selects + * the "register" or "offset" within the device from which data should + * be read. + */ +int qib_twsi_blk_rd(struct qib_devdata *dd, int dev, int addr, + void *buffer, int len) +{ + int ret; + u8 *bp = buffer; + + ret = 1; + + if (dev == QIB_TWSI_NO_DEV) { + /* legacy not-really-I2C */ + addr = (addr << 1) | READ_CMD; + ret = qib_twsi_wr(dd, addr, QIB_TWSI_START); + } else { + /* Actual I2C */ + ret = qib_twsi_wr(dd, dev | WRITE_CMD, QIB_TWSI_START); + if (ret) { + stop_cmd(dd); + ret = 1; + goto bail; + } + /* + * SFF spec claims we do _not_ stop after the addr + * but simply issue a start with the "read" dev-addr. + * Since we are implicitely waiting for ACK here, + * we need t_buf (nominally 20uSec) before that start, + * and cannot rely on the delay built in to the STOP + */ + ret = qib_twsi_wr(dd, addr, 0); + udelay(TWSI_BUF_WAIT_USEC); + + if (ret) { + qib_dev_err(dd, + "Failed to write interface read addr %02X\n", + addr); + ret = 1; + goto bail; + } + ret = qib_twsi_wr(dd, dev | READ_CMD, QIB_TWSI_START); + } + if (ret) { + stop_cmd(dd); + ret = 1; + goto bail; + } + + /* + * block devices keeps clocking data out as long as we ack, + * automatically incrementing the address. Some have "pages" + * whose boundaries will not be crossed, but the handling + * of these is left to the caller, who is in a better + * position to know. + */ + while (len-- > 0) { + /* + * Get and store data, sending ACK if length remaining, + * else STOP + */ + *bp++ = rd_byte(dd, !len); + } + + ret = 0; + +bail: + return ret; +} + +/* + * qib_twsi_blk_wr + * Formerly called qib_eeprom_internal_write, and only used for eeprom, + * but now the general interface for data transfer to twsi devices. + * One vestige of its former role is that it recognizes a device + * QIB_TWSI_NO_DEV and does the correct operation for the legacy part, + * which responded to all TWSI device codes, interpreting them as + * address within device. On all other devices found on board handled by + * this driver, the device is followed by a one-byte "address" which selects + * the "register" or "offset" within the device to which data should + * be written. + */ +int qib_twsi_blk_wr(struct qib_devdata *dd, int dev, int addr, + const void *buffer, int len) +{ + int sub_len; + const u8 *bp = buffer; + int max_wait_time, i; + int ret; + ret = 1; + + while (len > 0) { + if (dev == QIB_TWSI_NO_DEV) { + if (qib_twsi_wr(dd, (addr << 1) | WRITE_CMD, + QIB_TWSI_START)) { + goto failed_write; + } + } else { + /* Real I2C */ + if (qib_twsi_wr(dd, dev | WRITE_CMD, QIB_TWSI_START)) + goto failed_write; + ret = qib_twsi_wr(dd, addr, 0); + if (ret) { + qib_dev_err(dd, + "Failed to write interface write addr %02X\n", + addr); + goto failed_write; + } + } + + sub_len = min(len, 4); + addr += sub_len; + len -= sub_len; + + for (i = 0; i < sub_len; i++) + if (qib_twsi_wr(dd, *bp++, 0)) + goto failed_write; + + stop_cmd(dd); + + /* + * Wait for write complete by waiting for a successful + * read (the chip replies with a zero after the write + * cmd completes, and before it writes to the eeprom. + * The startcmd for the read will fail the ack until + * the writes have completed. We do this inline to avoid + * the debug prints that are in the real read routine + * if the startcmd fails. + * We also use the proper device address, so it doesn't matter + * whether we have real eeprom_dev. Legacy likes any address. + */ + max_wait_time = 100; + while (qib_twsi_wr(dd, dev | READ_CMD, QIB_TWSI_START)) { + stop_cmd(dd); + if (!--max_wait_time) + goto failed_write; + } + /* now read (and ignore) the resulting byte */ + rd_byte(dd, 1); + } + + ret = 0; + goto bail; + +failed_write: + stop_cmd(dd); + ret = 1; + +bail: + return ret; +} diff --git a/drivers/infiniband/hw/qib/qib_tx.c b/drivers/infiniband/hw/qib/qib_tx.c new file mode 100644 index 00000000000..31d3561400a --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_tx.c @@ -0,0 +1,571 @@ +/* + * Copyright (c) 2008, 2009, 2010 QLogic Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/spinlock.h> +#include <linux/pci.h> +#include <linux/io.h> +#include <linux/delay.h> +#include <linux/netdevice.h> +#include <linux/vmalloc.h> +#include <linux/moduleparam.h> + +#include "qib.h" + +static unsigned qib_hol_timeout_ms = 3000; +module_param_named(hol_timeout_ms, qib_hol_timeout_ms, uint, S_IRUGO); +MODULE_PARM_DESC(hol_timeout_ms, + "duration of user app suspension after link failure"); + +unsigned qib_sdma_fetch_arb = 1; +module_param_named(fetch_arb, qib_sdma_fetch_arb, uint, S_IRUGO); +MODULE_PARM_DESC(fetch_arb, "IBA7220: change SDMA descriptor arbitration"); + +/** + * qib_disarm_piobufs - cancel a range of PIO buffers + * @dd: the qlogic_ib device + * @first: the first PIO buffer to cancel + * @cnt: the number of PIO buffers to cancel + * + * Cancel a range of PIO buffers. Used at user process close, + * in case it died while writing to a PIO buffer. + */ +void qib_disarm_piobufs(struct qib_devdata *dd, unsigned first, unsigned cnt) +{ + unsigned long flags; + unsigned i; + unsigned last; + + last = first + cnt; + spin_lock_irqsave(&dd->pioavail_lock, flags); + for (i = first; i < last; i++) { + __clear_bit(i, dd->pio_need_disarm); + dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_BUF(i)); + } + spin_unlock_irqrestore(&dd->pioavail_lock, flags); +} + +/* + * This is called by a user process when it sees the DISARM_BUFS event + * bit is set. + */ +int qib_disarm_piobufs_ifneeded(struct qib_ctxtdata *rcd) +{ + struct qib_devdata *dd = rcd->dd; + unsigned i; + unsigned last; + unsigned n = 0; + + last = rcd->pio_base + rcd->piocnt; + /* + * Don't need uctxt_lock here, since user has called in to us. + * Clear at start in case more interrupts set bits while we + * are disarming + */ + if (rcd->user_event_mask) { + /* + * subctxt_cnt is 0 if not shared, so do base + * separately, first, then remaining subctxt, if any + */ + clear_bit(_QIB_EVENT_DISARM_BUFS_BIT, &rcd->user_event_mask[0]); + for (i = 1; i < rcd->subctxt_cnt; i++) + clear_bit(_QIB_EVENT_DISARM_BUFS_BIT, + &rcd->user_event_mask[i]); + } + spin_lock_irq(&dd->pioavail_lock); + for (i = rcd->pio_base; i < last; i++) { + if (__test_and_clear_bit(i, dd->pio_need_disarm)) { + n++; + dd->f_sendctrl(rcd->ppd, QIB_SENDCTRL_DISARM_BUF(i)); + } + } + spin_unlock_irq(&dd->pioavail_lock); + return 0; +} + +static struct qib_pportdata *is_sdma_buf(struct qib_devdata *dd, unsigned i) +{ + struct qib_pportdata *ppd; + unsigned pidx; + + for (pidx = 0; pidx < dd->num_pports; pidx++) { + ppd = dd->pport + pidx; + if (i >= ppd->sdma_state.first_sendbuf && + i < ppd->sdma_state.last_sendbuf) + return ppd; + } + return NULL; +} + +/* + * Return true if send buffer is being used by a user context. + * Sets _QIB_EVENT_DISARM_BUFS_BIT in user_event_mask as a side effect + */ +static int find_ctxt(struct qib_devdata *dd, unsigned bufn) +{ + struct qib_ctxtdata *rcd; + unsigned ctxt; + int ret = 0; + + spin_lock(&dd->uctxt_lock); + for (ctxt = dd->first_user_ctxt; ctxt < dd->cfgctxts; ctxt++) { + rcd = dd->rcd[ctxt]; + if (!rcd || bufn < rcd->pio_base || + bufn >= rcd->pio_base + rcd->piocnt) + continue; + if (rcd->user_event_mask) { + int i; + /* + * subctxt_cnt is 0 if not shared, so do base + * separately, first, then remaining subctxt, if any + */ + set_bit(_QIB_EVENT_DISARM_BUFS_BIT, + &rcd->user_event_mask[0]); + for (i = 1; i < rcd->subctxt_cnt; i++) + set_bit(_QIB_EVENT_DISARM_BUFS_BIT, + &rcd->user_event_mask[i]); + } + ret = 1; + break; + } + spin_unlock(&dd->uctxt_lock); + + return ret; +} + +/* + * Disarm a set of send buffers. If the buffer might be actively being + * written to, mark the buffer to be disarmed later when it is not being + * written to. + * + * This should only be called from the IRQ error handler. + */ +void qib_disarm_piobufs_set(struct qib_devdata *dd, unsigned long *mask, + unsigned cnt) +{ + struct qib_pportdata *ppd, *pppd[QIB_MAX_IB_PORTS]; + unsigned i; + unsigned long flags; + + for (i = 0; i < dd->num_pports; i++) + pppd[i] = NULL; + + for (i = 0; i < cnt; i++) { + int which; + if (!test_bit(i, mask)) + continue; + /* + * If the buffer is owned by the DMA hardware, + * reset the DMA engine. + */ + ppd = is_sdma_buf(dd, i); + if (ppd) { + pppd[ppd->port] = ppd; + continue; + } + /* + * If the kernel is writing the buffer or the buffer is + * owned by a user process, we can't clear it yet. + */ + spin_lock_irqsave(&dd->pioavail_lock, flags); + if (test_bit(i, dd->pio_writing) || + (!test_bit(i << 1, dd->pioavailkernel) && + find_ctxt(dd, i))) { + __set_bit(i, dd->pio_need_disarm); + which = 0; + } else { + which = 1; + dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_BUF(i)); + } + spin_unlock_irqrestore(&dd->pioavail_lock, flags); + } + + /* do cancel_sends once per port that had sdma piobufs in error */ + for (i = 0; i < dd->num_pports; i++) + if (pppd[i]) + qib_cancel_sends(pppd[i]); +} + +/** + * update_send_bufs - update shadow copy of the PIO availability map + * @dd: the qlogic_ib device + * + * called whenever our local copy indicates we have run out of send buffers + */ +static void update_send_bufs(struct qib_devdata *dd) +{ + unsigned long flags; + unsigned i; + const unsigned piobregs = dd->pioavregs; + + /* + * If the generation (check) bits have changed, then we update the + * busy bit for the corresponding PIO buffer. This algorithm will + * modify positions to the value they already have in some cases + * (i.e., no change), but it's faster than changing only the bits + * that have changed. + * + * We would like to do this atomicly, to avoid spinlocks in the + * critical send path, but that's not really possible, given the + * type of changes, and that this routine could be called on + * multiple cpu's simultaneously, so we lock in this routine only, + * to avoid conflicting updates; all we change is the shadow, and + * it's a single 64 bit memory location, so by definition the update + * is atomic in terms of what other cpu's can see in testing the + * bits. The spin_lock overhead isn't too bad, since it only + * happens when all buffers are in use, so only cpu overhead, not + * latency or bandwidth is affected. + */ + if (!dd->pioavailregs_dma) + return; + spin_lock_irqsave(&dd->pioavail_lock, flags); + for (i = 0; i < piobregs; i++) { + u64 pchbusy, pchg, piov, pnew; + + piov = le64_to_cpu(dd->pioavailregs_dma[i]); + pchg = dd->pioavailkernel[i] & + ~(dd->pioavailshadow[i] ^ piov); + pchbusy = pchg << QLOGIC_IB_SENDPIOAVAIL_BUSY_SHIFT; + if (pchg && (pchbusy & dd->pioavailshadow[i])) { + pnew = dd->pioavailshadow[i] & ~pchbusy; + pnew |= piov & pchbusy; + dd->pioavailshadow[i] = pnew; + } + } + spin_unlock_irqrestore(&dd->pioavail_lock, flags); +} + +/* + * Debugging code and stats updates if no pio buffers available. + */ +static noinline void no_send_bufs(struct qib_devdata *dd) +{ + dd->upd_pio_shadow = 1; + + /* not atomic, but if we lose a stat count in a while, that's OK */ + qib_stats.sps_nopiobufs++; +} + +/* + * Common code for normal driver send buffer allocation, and reserved + * allocation. + * + * Do appropriate marking as busy, etc. + * Returns buffer pointer if one is found, otherwise NULL. + */ +u32 __iomem *qib_getsendbuf_range(struct qib_devdata *dd, u32 *pbufnum, + u32 first, u32 last) +{ + unsigned i, j, updated = 0; + unsigned nbufs; + unsigned long flags; + unsigned long *shadow = dd->pioavailshadow; + u32 __iomem *buf; + + if (!(dd->flags & QIB_PRESENT)) + return NULL; + + nbufs = last - first + 1; /* number in range to check */ + if (dd->upd_pio_shadow) { +update_shadow: + /* + * Minor optimization. If we had no buffers on last call, + * start out by doing the update; continue and do scan even + * if no buffers were updated, to be paranoid. + */ + update_send_bufs(dd); + updated++; + } + i = first; + /* + * While test_and_set_bit() is atomic, we do that and then the + * change_bit(), and the pair is not. See if this is the cause + * of the remaining armlaunch errors. + */ + spin_lock_irqsave(&dd->pioavail_lock, flags); + if (dd->last_pio >= first && dd->last_pio <= last) + i = dd->last_pio + 1; + if (!first) + /* adjust to min possible */ + nbufs = last - dd->min_kernel_pio + 1; + for (j = 0; j < nbufs; j++, i++) { + if (i > last) + i = !first ? dd->min_kernel_pio : first; + if (__test_and_set_bit((2 * i) + 1, shadow)) + continue; + /* flip generation bit */ + __change_bit(2 * i, shadow); + /* remember that the buffer can be written to now */ + __set_bit(i, dd->pio_writing); + if (!first && first != last) /* first == last on VL15, avoid */ + dd->last_pio = i; + break; + } + spin_unlock_irqrestore(&dd->pioavail_lock, flags); + + if (j == nbufs) { + if (!updated) + /* + * First time through; shadow exhausted, but may be + * buffers available, try an update and then rescan. + */ + goto update_shadow; + no_send_bufs(dd); + buf = NULL; + } else { + if (i < dd->piobcnt2k) + buf = (u32 __iomem *)(dd->pio2kbase + + i * dd->palign); + else if (i < dd->piobcnt2k + dd->piobcnt4k || !dd->piovl15base) + buf = (u32 __iomem *)(dd->pio4kbase + + (i - dd->piobcnt2k) * dd->align4k); + else + buf = (u32 __iomem *)(dd->piovl15base + + (i - (dd->piobcnt2k + dd->piobcnt4k)) * + dd->align4k); + if (pbufnum) + *pbufnum = i; + dd->upd_pio_shadow = 0; + } + + return buf; +} + +/* + * Record that the caller is finished writing to the buffer so we don't + * disarm it while it is being written and disarm it now if needed. + */ +void qib_sendbuf_done(struct qib_devdata *dd, unsigned n) +{ + unsigned long flags; + + spin_lock_irqsave(&dd->pioavail_lock, flags); + __clear_bit(n, dd->pio_writing); + if (__test_and_clear_bit(n, dd->pio_need_disarm)) + dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_BUF(n)); + spin_unlock_irqrestore(&dd->pioavail_lock, flags); +} + +/** + * qib_chg_pioavailkernel - change which send buffers are available for kernel + * @dd: the qlogic_ib device + * @start: the starting send buffer number + * @len: the number of send buffers + * @avail: true if the buffers are available for kernel use, false otherwise + */ +void qib_chg_pioavailkernel(struct qib_devdata *dd, unsigned start, + unsigned len, u32 avail, struct qib_ctxtdata *rcd) +{ + unsigned long flags; + unsigned end; + unsigned ostart = start; + + /* There are two bits per send buffer (busy and generation) */ + start *= 2; + end = start + len * 2; + + spin_lock_irqsave(&dd->pioavail_lock, flags); + /* Set or clear the busy bit in the shadow. */ + while (start < end) { + if (avail) { + unsigned long dma; + int i; + + /* + * The BUSY bit will never be set, because we disarm + * the user buffers before we hand them back to the + * kernel. We do have to make sure the generation + * bit is set correctly in shadow, since it could + * have changed many times while allocated to user. + * We can't use the bitmap functions on the full + * dma array because it is always little-endian, so + * we have to flip to host-order first. + * BITS_PER_LONG is slightly wrong, since it's + * always 64 bits per register in chip... + * We only work on 64 bit kernels, so that's OK. + */ + i = start / BITS_PER_LONG; + __clear_bit(QLOGIC_IB_SENDPIOAVAIL_BUSY_SHIFT + start, + dd->pioavailshadow); + dma = (unsigned long) + le64_to_cpu(dd->pioavailregs_dma[i]); + if (test_bit((QLOGIC_IB_SENDPIOAVAIL_CHECK_SHIFT + + start) % BITS_PER_LONG, &dma)) + __set_bit(QLOGIC_IB_SENDPIOAVAIL_CHECK_SHIFT + + start, dd->pioavailshadow); + else + __clear_bit(QLOGIC_IB_SENDPIOAVAIL_CHECK_SHIFT + + start, dd->pioavailshadow); + __set_bit(start, dd->pioavailkernel); + if ((start >> 1) < dd->min_kernel_pio) + dd->min_kernel_pio = start >> 1; + } else { + __set_bit(start + QLOGIC_IB_SENDPIOAVAIL_BUSY_SHIFT, + dd->pioavailshadow); + __clear_bit(start, dd->pioavailkernel); + if ((start >> 1) > dd->min_kernel_pio) + dd->min_kernel_pio = start >> 1; + } + start += 2; + } + + if (dd->min_kernel_pio > 0 && dd->last_pio < dd->min_kernel_pio - 1) + dd->last_pio = dd->min_kernel_pio - 1; + spin_unlock_irqrestore(&dd->pioavail_lock, flags); + + dd->f_txchk_change(dd, ostart, len, avail, rcd); +} + +/* + * Flush all sends that might be in the ready to send state, as well as any + * that are in the process of being sent. Used whenever we need to be + * sure the send side is idle. Cleans up all buffer state by canceling + * all pio buffers, and issuing an abort, which cleans up anything in the + * launch fifo. The cancel is superfluous on some chip versions, but + * it's safer to always do it. + * PIOAvail bits are updated by the chip as if a normal send had happened. + */ +void qib_cancel_sends(struct qib_pportdata *ppd) +{ + struct qib_devdata *dd = ppd->dd; + struct qib_ctxtdata *rcd; + unsigned long flags; + unsigned ctxt; + unsigned i; + unsigned last; + + /* + * Tell PSM to disarm buffers again before trying to reuse them. + * We need to be sure the rcd doesn't change out from under us + * while we do so. We hold the two locks sequentially. We might + * needlessly set some need_disarm bits as a result, if the + * context is closed after we release the uctxt_lock, but that's + * fairly benign, and safer than nesting the locks. + */ + for (ctxt = dd->first_user_ctxt; ctxt < dd->cfgctxts; ctxt++) { + spin_lock_irqsave(&dd->uctxt_lock, flags); + rcd = dd->rcd[ctxt]; + if (rcd && rcd->ppd == ppd) { + last = rcd->pio_base + rcd->piocnt; + if (rcd->user_event_mask) { + /* + * subctxt_cnt is 0 if not shared, so do base + * separately, first, then remaining subctxt, + * if any + */ + set_bit(_QIB_EVENT_DISARM_BUFS_BIT, + &rcd->user_event_mask[0]); + for (i = 1; i < rcd->subctxt_cnt; i++) + set_bit(_QIB_EVENT_DISARM_BUFS_BIT, + &rcd->user_event_mask[i]); + } + i = rcd->pio_base; + spin_unlock_irqrestore(&dd->uctxt_lock, flags); + spin_lock_irqsave(&dd->pioavail_lock, flags); + for (; i < last; i++) + __set_bit(i, dd->pio_need_disarm); + spin_unlock_irqrestore(&dd->pioavail_lock, flags); + } else + spin_unlock_irqrestore(&dd->uctxt_lock, flags); + } + + if (!(dd->flags & QIB_HAS_SEND_DMA)) + dd->f_sendctrl(ppd, QIB_SENDCTRL_DISARM_ALL | + QIB_SENDCTRL_FLUSH); +} + +/* + * Force an update of in-memory copy of the pioavail registers, when + * needed for any of a variety of reasons. + * If already off, this routine is a nop, on the assumption that the + * caller (or set of callers) will "do the right thing". + * This is a per-device operation, so just the first port. + */ +void qib_force_pio_avail_update(struct qib_devdata *dd) +{ + dd->f_sendctrl(dd->pport, QIB_SENDCTRL_AVAIL_BLIP); +} + +void qib_hol_down(struct qib_pportdata *ppd) +{ + /* + * Cancel sends when the link goes DOWN so that we aren't doing it + * at INIT when we might be trying to send SMI packets. + */ + if (!(ppd->lflags & QIBL_IB_AUTONEG_INPROG)) + qib_cancel_sends(ppd); +} + +/* + * Link is at INIT. + * We start the HoL timer so we can detect stuck packets blocking SMP replies. + * Timer may already be running, so use mod_timer, not add_timer. + */ +void qib_hol_init(struct qib_pportdata *ppd) +{ + if (ppd->hol_state != QIB_HOL_INIT) { + ppd->hol_state = QIB_HOL_INIT; + mod_timer(&ppd->hol_timer, + jiffies + msecs_to_jiffies(qib_hol_timeout_ms)); + } +} + +/* + * Link is up, continue any user processes, and ensure timer + * is a nop, if running. Let timer keep running, if set; it + * will nop when it sees the link is up. + */ +void qib_hol_up(struct qib_pportdata *ppd) +{ + ppd->hol_state = QIB_HOL_UP; +} + +/* + * This is only called via the timer. + */ +void qib_hol_event(unsigned long opaque) +{ + struct qib_pportdata *ppd = (struct qib_pportdata *)opaque; + + /* If hardware error, etc, skip. */ + if (!(ppd->dd->flags & QIB_INITTED)) + return; + + if (ppd->hol_state != QIB_HOL_UP) { + /* + * Try to flush sends in case a stuck packet is blocking + * SMP replies. + */ + qib_hol_down(ppd); + mod_timer(&ppd->hol_timer, + jiffies + msecs_to_jiffies(qib_hol_timeout_ms)); + } +} diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c new file mode 100644 index 00000000000..aa3a8035bb6 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -0,0 +1,536 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. + * All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "qib.h" + +/* cut down ridiculously long IB macro names */ +#define OP(x) IB_OPCODE_UC_##x + +/** + * qib_make_uc_req - construct a request packet (SEND, RDMA write) + * @qp: a pointer to the QP + * + * Return 1 if constructed; otherwise, return 0. + */ +int qib_make_uc_req(struct qib_qp *qp) +{ + struct qib_other_headers *ohdr; + struct qib_swqe *wqe; + unsigned long flags; + u32 hwords; + u32 bth0; + u32 len; + u32 pmtu = qp->pmtu; + int ret = 0; + + spin_lock_irqsave(&qp->s_lock, flags); + + if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_SEND_OK)) { + if (!(ib_qib_state_ops[qp->state] & QIB_FLUSH_SEND)) + goto bail; + /* We are in the error state, flush the work request. */ + if (qp->s_last == qp->s_head) + goto bail; + /* If DMAs are in progress, we can't flush immediately. */ + if (atomic_read(&qp->s_dma_busy)) { + qp->s_flags |= QIB_S_WAIT_DMA; + goto bail; + } + wqe = get_swqe_ptr(qp, qp->s_last); + qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); + goto done; + } + + ohdr = &qp->s_hdr->u.oth; + if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) + ohdr = &qp->s_hdr->u.l.oth; + + /* header size in 32-bit words LRH+BTH = (8+12)/4. */ + hwords = 5; + bth0 = 0; + + /* Get the next send request. */ + wqe = get_swqe_ptr(qp, qp->s_cur); + qp->s_wqe = NULL; + switch (qp->s_state) { + default: + if (!(ib_qib_state_ops[qp->state] & + QIB_PROCESS_NEXT_SEND_OK)) + goto bail; + /* Check if send work queue is empty. */ + if (qp->s_cur == qp->s_head) + goto bail; + /* + * Start a new request. + */ + wqe->psn = qp->s_next_psn; + qp->s_psn = qp->s_next_psn; + qp->s_sge.sge = wqe->sg_list[0]; + qp->s_sge.sg_list = wqe->sg_list + 1; + qp->s_sge.num_sge = wqe->wr.num_sge; + qp->s_sge.total_len = wqe->length; + len = wqe->length; + qp->s_len = len; + switch (wqe->wr.opcode) { + case IB_WR_SEND: + case IB_WR_SEND_WITH_IMM: + if (len > pmtu) { + qp->s_state = OP(SEND_FIRST); + len = pmtu; + break; + } + if (wqe->wr.opcode == IB_WR_SEND) + qp->s_state = OP(SEND_ONLY); + else { + qp->s_state = + OP(SEND_ONLY_WITH_IMMEDIATE); + /* Immediate data comes after the BTH */ + ohdr->u.imm_data = wqe->wr.ex.imm_data; + hwords += 1; + } + if (wqe->wr.send_flags & IB_SEND_SOLICITED) + bth0 |= IB_BTH_SOLICITED; + qp->s_wqe = wqe; + if (++qp->s_cur >= qp->s_size) + qp->s_cur = 0; + break; + + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + ohdr->u.rc.reth.vaddr = + cpu_to_be64(wqe->wr.wr.rdma.remote_addr); + ohdr->u.rc.reth.rkey = + cpu_to_be32(wqe->wr.wr.rdma.rkey); + ohdr->u.rc.reth.length = cpu_to_be32(len); + hwords += sizeof(struct ib_reth) / 4; + if (len > pmtu) { + qp->s_state = OP(RDMA_WRITE_FIRST); + len = pmtu; + break; + } + if (wqe->wr.opcode == IB_WR_RDMA_WRITE) + qp->s_state = OP(RDMA_WRITE_ONLY); + else { + qp->s_state = + OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE); + /* Immediate data comes after the RETH */ + ohdr->u.rc.imm_data = wqe->wr.ex.imm_data; + hwords += 1; + if (wqe->wr.send_flags & IB_SEND_SOLICITED) + bth0 |= IB_BTH_SOLICITED; + } + qp->s_wqe = wqe; + if (++qp->s_cur >= qp->s_size) + qp->s_cur = 0; + break; + + default: + goto bail; + } + break; + + case OP(SEND_FIRST): + qp->s_state = OP(SEND_MIDDLE); + /* FALLTHROUGH */ + case OP(SEND_MIDDLE): + len = qp->s_len; + if (len > pmtu) { + len = pmtu; + break; + } + if (wqe->wr.opcode == IB_WR_SEND) + qp->s_state = OP(SEND_LAST); + else { + qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE); + /* Immediate data comes after the BTH */ + ohdr->u.imm_data = wqe->wr.ex.imm_data; + hwords += 1; + } + if (wqe->wr.send_flags & IB_SEND_SOLICITED) + bth0 |= IB_BTH_SOLICITED; + qp->s_wqe = wqe; + if (++qp->s_cur >= qp->s_size) + qp->s_cur = 0; + break; + + case OP(RDMA_WRITE_FIRST): + qp->s_state = OP(RDMA_WRITE_MIDDLE); + /* FALLTHROUGH */ + case OP(RDMA_WRITE_MIDDLE): + len = qp->s_len; + if (len > pmtu) { + len = pmtu; + break; + } + if (wqe->wr.opcode == IB_WR_RDMA_WRITE) + qp->s_state = OP(RDMA_WRITE_LAST); + else { + qp->s_state = + OP(RDMA_WRITE_LAST_WITH_IMMEDIATE); + /* Immediate data comes after the BTH */ + ohdr->u.imm_data = wqe->wr.ex.imm_data; + hwords += 1; + if (wqe->wr.send_flags & IB_SEND_SOLICITED) + bth0 |= IB_BTH_SOLICITED; + } + qp->s_wqe = wqe; + if (++qp->s_cur >= qp->s_size) + qp->s_cur = 0; + break; + } + qp->s_len -= len; + qp->s_hdrwords = hwords; + qp->s_cur_sge = &qp->s_sge; + qp->s_cur_size = len; + qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), + qp->s_next_psn++ & QIB_PSN_MASK); +done: + ret = 1; + goto unlock; + +bail: + qp->s_flags &= ~QIB_S_BUSY; +unlock: + spin_unlock_irqrestore(&qp->s_lock, flags); + return ret; +} + +/** + * qib_uc_rcv - handle an incoming UC packet + * @ibp: the port the packet came in on + * @hdr: the header of the packet + * @has_grh: true if the packet has a GRH + * @data: the packet data + * @tlen: the length of the packet + * @qp: the QP for this packet. + * + * This is called from qib_qp_rcv() to process an incoming UC packet + * for the given QP. + * Called at interrupt level. + */ +void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, + int has_grh, void *data, u32 tlen, struct qib_qp *qp) +{ + struct qib_other_headers *ohdr; + u32 opcode; + u32 hdrsize; + u32 psn; + u32 pad; + struct ib_wc wc; + u32 pmtu = qp->pmtu; + struct ib_reth *reth; + int ret; + + /* Check for GRH */ + if (!has_grh) { + ohdr = &hdr->u.oth; + hdrsize = 8 + 12; /* LRH + BTH */ + } else { + ohdr = &hdr->u.l.oth; + hdrsize = 8 + 40 + 12; /* LRH + GRH + BTH */ + } + + opcode = be32_to_cpu(ohdr->bth[0]); + if (qib_ruc_check_hdr(ibp, hdr, has_grh, qp, opcode)) + return; + + psn = be32_to_cpu(ohdr->bth[2]); + opcode >>= 24; + + /* Compare the PSN verses the expected PSN. */ + if (unlikely(qib_cmp24(psn, qp->r_psn) != 0)) { + /* + * Handle a sequence error. + * Silently drop any current message. + */ + qp->r_psn = psn; +inv: + if (qp->r_state == OP(SEND_FIRST) || + qp->r_state == OP(SEND_MIDDLE)) { + set_bit(QIB_R_REWIND_SGE, &qp->r_aflags); + qp->r_sge.num_sge = 0; + } else + qib_put_ss(&qp->r_sge); + qp->r_state = OP(SEND_LAST); + switch (opcode) { + case OP(SEND_FIRST): + case OP(SEND_ONLY): + case OP(SEND_ONLY_WITH_IMMEDIATE): + goto send_first; + + case OP(RDMA_WRITE_FIRST): + case OP(RDMA_WRITE_ONLY): + case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): + goto rdma_first; + + default: + goto drop; + } + } + + /* Check for opcode sequence errors. */ + switch (qp->r_state) { + case OP(SEND_FIRST): + case OP(SEND_MIDDLE): + if (opcode == OP(SEND_MIDDLE) || + opcode == OP(SEND_LAST) || + opcode == OP(SEND_LAST_WITH_IMMEDIATE)) + break; + goto inv; + + case OP(RDMA_WRITE_FIRST): + case OP(RDMA_WRITE_MIDDLE): + if (opcode == OP(RDMA_WRITE_MIDDLE) || + opcode == OP(RDMA_WRITE_LAST) || + opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) + break; + goto inv; + + default: + if (opcode == OP(SEND_FIRST) || + opcode == OP(SEND_ONLY) || + opcode == OP(SEND_ONLY_WITH_IMMEDIATE) || + opcode == OP(RDMA_WRITE_FIRST) || + opcode == OP(RDMA_WRITE_ONLY) || + opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) + break; + goto inv; + } + + if (qp->state == IB_QPS_RTR && !(qp->r_flags & QIB_R_COMM_EST)) { + qp->r_flags |= QIB_R_COMM_EST; + if (qp->ibqp.event_handler) { + struct ib_event ev; + + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_COMM_EST; + qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); + } + } + + /* OK, process the packet. */ + switch (opcode) { + case OP(SEND_FIRST): + case OP(SEND_ONLY): + case OP(SEND_ONLY_WITH_IMMEDIATE): +send_first: + if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags)) + qp->r_sge = qp->s_rdma_read_sge; + else { + ret = qib_get_rwqe(qp, 0); + if (ret < 0) + goto op_err; + if (!ret) + goto drop; + /* + * qp->s_rdma_read_sge will be the owner + * of the mr references. + */ + qp->s_rdma_read_sge = qp->r_sge; + } + qp->r_rcv_len = 0; + if (opcode == OP(SEND_ONLY)) + goto no_immediate_data; + else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE)) + goto send_last_imm; + /* FALLTHROUGH */ + case OP(SEND_MIDDLE): + /* Check for invalid length PMTU or posted rwqe len. */ + if (unlikely(tlen != (hdrsize + pmtu + 4))) + goto rewind; + qp->r_rcv_len += pmtu; + if (unlikely(qp->r_rcv_len > qp->r_len)) + goto rewind; + qib_copy_sge(&qp->r_sge, data, pmtu, 0); + break; + + case OP(SEND_LAST_WITH_IMMEDIATE): +send_last_imm: + wc.ex.imm_data = ohdr->u.imm_data; + hdrsize += 4; + wc.wc_flags = IB_WC_WITH_IMM; + goto send_last; + case OP(SEND_LAST): +no_immediate_data: + wc.ex.imm_data = 0; + wc.wc_flags = 0; +send_last: + /* Get the number of bytes the message was padded by. */ + pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + /* Check for invalid length. */ + /* XXX LAST len should be >= 1 */ + if (unlikely(tlen < (hdrsize + pad + 4))) + goto rewind; + /* Don't count the CRC. */ + tlen -= (hdrsize + pad + 4); + wc.byte_len = tlen + qp->r_rcv_len; + if (unlikely(wc.byte_len > qp->r_len)) + goto rewind; + wc.opcode = IB_WC_RECV; + qib_copy_sge(&qp->r_sge, data, tlen, 0); + qib_put_ss(&qp->s_rdma_read_sge); +last_imm: + wc.wr_id = qp->r_wr_id; + wc.status = IB_WC_SUCCESS; + wc.qp = &qp->ibqp; + wc.src_qp = qp->remote_qpn; + wc.slid = qp->remote_ah_attr.dlid; + wc.sl = qp->remote_ah_attr.sl; + /* zero fields that are N/A */ + wc.vendor_err = 0; + wc.pkey_index = 0; + wc.dlid_path_bits = 0; + wc.port_num = 0; + /* Signal completion event if the solicited bit is set. */ + qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, + (ohdr->bth[0] & + cpu_to_be32(IB_BTH_SOLICITED)) != 0); + break; + + case OP(RDMA_WRITE_FIRST): + case OP(RDMA_WRITE_ONLY): + case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): /* consume RWQE */ +rdma_first: + if (unlikely(!(qp->qp_access_flags & + IB_ACCESS_REMOTE_WRITE))) { + goto drop; + } + reth = &ohdr->u.rc.reth; + hdrsize += sizeof(*reth); + qp->r_len = be32_to_cpu(reth->length); + qp->r_rcv_len = 0; + qp->r_sge.sg_list = NULL; + if (qp->r_len != 0) { + u32 rkey = be32_to_cpu(reth->rkey); + u64 vaddr = be64_to_cpu(reth->vaddr); + int ok; + + /* Check rkey */ + ok = qib_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, + vaddr, rkey, IB_ACCESS_REMOTE_WRITE); + if (unlikely(!ok)) + goto drop; + qp->r_sge.num_sge = 1; + } else { + qp->r_sge.num_sge = 0; + qp->r_sge.sge.mr = NULL; + qp->r_sge.sge.vaddr = NULL; + qp->r_sge.sge.length = 0; + qp->r_sge.sge.sge_length = 0; + } + if (opcode == OP(RDMA_WRITE_ONLY)) + goto rdma_last; + else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) { + wc.ex.imm_data = ohdr->u.rc.imm_data; + goto rdma_last_imm; + } + /* FALLTHROUGH */ + case OP(RDMA_WRITE_MIDDLE): + /* Check for invalid length PMTU or posted rwqe len. */ + if (unlikely(tlen != (hdrsize + pmtu + 4))) + goto drop; + qp->r_rcv_len += pmtu; + if (unlikely(qp->r_rcv_len > qp->r_len)) + goto drop; + qib_copy_sge(&qp->r_sge, data, pmtu, 1); + break; + + case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): + wc.ex.imm_data = ohdr->u.imm_data; +rdma_last_imm: + hdrsize += 4; + wc.wc_flags = IB_WC_WITH_IMM; + + /* Get the number of bytes the message was padded by. */ + pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + /* Check for invalid length. */ + /* XXX LAST len should be >= 1 */ + if (unlikely(tlen < (hdrsize + pad + 4))) + goto drop; + /* Don't count the CRC. */ + tlen -= (hdrsize + pad + 4); + if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) + goto drop; + if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags)) + qib_put_ss(&qp->s_rdma_read_sge); + else { + ret = qib_get_rwqe(qp, 1); + if (ret < 0) + goto op_err; + if (!ret) + goto drop; + } + wc.byte_len = qp->r_len; + wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; + qib_copy_sge(&qp->r_sge, data, tlen, 1); + qib_put_ss(&qp->r_sge); + goto last_imm; + + case OP(RDMA_WRITE_LAST): +rdma_last: + /* Get the number of bytes the message was padded by. */ + pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + /* Check for invalid length. */ + /* XXX LAST len should be >= 1 */ + if (unlikely(tlen < (hdrsize + pad + 4))) + goto drop; + /* Don't count the CRC. */ + tlen -= (hdrsize + pad + 4); + if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) + goto drop; + qib_copy_sge(&qp->r_sge, data, tlen, 1); + qib_put_ss(&qp->r_sge); + break; + + default: + /* Drop packet for unknown opcodes. */ + goto drop; + } + qp->r_psn++; + qp->r_state = opcode; + return; + +rewind: + set_bit(QIB_R_REWIND_SGE, &qp->r_aflags); + qp->r_sge.num_sge = 0; +drop: + ibp->n_pkt_drops++; + return; + +op_err: + qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR); + return; + +} diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c new file mode 100644 index 00000000000..aaf7039f8ed --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -0,0 +1,590 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <rdma/ib_smi.h> + +#include "qib.h" +#include "qib_mad.h" + +/** + * qib_ud_loopback - handle send on loopback QPs + * @sqp: the sending QP + * @swqe: the send work request + * + * This is called from qib_make_ud_req() to forward a WQE addressed + * to the same HCA. + * Note that the receive interrupt handler may be calling qib_ud_rcv() + * while this is being called. + */ +static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe) +{ + struct qib_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num); + struct qib_pportdata *ppd; + struct qib_qp *qp; + struct ib_ah_attr *ah_attr; + unsigned long flags; + struct qib_sge_state ssge; + struct qib_sge *sge; + struct ib_wc wc; + u32 length; + enum ib_qp_type sqptype, dqptype; + + qp = qib_lookup_qpn(ibp, swqe->wr.wr.ud.remote_qpn); + if (!qp) { + ibp->n_pkt_drops++; + return; + } + + sqptype = sqp->ibqp.qp_type == IB_QPT_GSI ? + IB_QPT_UD : sqp->ibqp.qp_type; + dqptype = qp->ibqp.qp_type == IB_QPT_GSI ? + IB_QPT_UD : qp->ibqp.qp_type; + + if (dqptype != sqptype || + !(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) { + ibp->n_pkt_drops++; + goto drop; + } + + ah_attr = &to_iah(swqe->wr.wr.ud.ah)->attr; + ppd = ppd_from_ibp(ibp); + + if (qp->ibqp.qp_num > 1) { + u16 pkey1; + u16 pkey2; + u16 lid; + + pkey1 = qib_get_pkey(ibp, sqp->s_pkey_index); + pkey2 = qib_get_pkey(ibp, qp->s_pkey_index); + if (unlikely(!qib_pkey_ok(pkey1, pkey2))) { + lid = ppd->lid | (ah_attr->src_path_bits & + ((1 << ppd->lmc) - 1)); + qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY, pkey1, + ah_attr->sl, + sqp->ibqp.qp_num, qp->ibqp.qp_num, + cpu_to_be16(lid), + cpu_to_be16(ah_attr->dlid)); + goto drop; + } + } + + /* + * Check that the qkey matches (except for QP0, see 9.6.1.4.1). + * Qkeys with the high order bit set mean use the + * qkey from the QP context instead of the WR (see 10.2.5). + */ + if (qp->ibqp.qp_num) { + u32 qkey; + + qkey = (int)swqe->wr.wr.ud.remote_qkey < 0 ? + sqp->qkey : swqe->wr.wr.ud.remote_qkey; + if (unlikely(qkey != qp->qkey)) { + u16 lid; + + lid = ppd->lid | (ah_attr->src_path_bits & + ((1 << ppd->lmc) - 1)); + qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_QKEY, qkey, + ah_attr->sl, + sqp->ibqp.qp_num, qp->ibqp.qp_num, + cpu_to_be16(lid), + cpu_to_be16(ah_attr->dlid)); + goto drop; + } + } + + /* + * A GRH is expected to precede the data even if not + * present on the wire. + */ + length = swqe->length; + memset(&wc, 0, sizeof wc); + wc.byte_len = length + sizeof(struct ib_grh); + + if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) { + wc.wc_flags = IB_WC_WITH_IMM; + wc.ex.imm_data = swqe->wr.ex.imm_data; + } + + spin_lock_irqsave(&qp->r_lock, flags); + + /* + * Get the next work request entry to find where to put the data. + */ + if (qp->r_flags & QIB_R_REUSE_SGE) + qp->r_flags &= ~QIB_R_REUSE_SGE; + else { + int ret; + + ret = qib_get_rwqe(qp, 0); + if (ret < 0) { + qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR); + goto bail_unlock; + } + if (!ret) { + if (qp->ibqp.qp_num == 0) + ibp->n_vl15_dropped++; + goto bail_unlock; + } + } + /* Silently drop packets which are too big. */ + if (unlikely(wc.byte_len > qp->r_len)) { + qp->r_flags |= QIB_R_REUSE_SGE; + ibp->n_pkt_drops++; + goto bail_unlock; + } + + if (ah_attr->ah_flags & IB_AH_GRH) { + qib_copy_sge(&qp->r_sge, &ah_attr->grh, + sizeof(struct ib_grh), 1); + wc.wc_flags |= IB_WC_GRH; + } else + qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1); + ssge.sg_list = swqe->sg_list + 1; + ssge.sge = *swqe->sg_list; + ssge.num_sge = swqe->wr.num_sge; + sge = &ssge.sge; + while (length) { + u32 len = sge->length; + + if (len > length) + len = length; + if (len > sge->sge_length) + len = sge->sge_length; + BUG_ON(len == 0); + qib_copy_sge(&qp->r_sge, sge->vaddr, len, 1); + sge->vaddr += len; + sge->length -= len; + sge->sge_length -= len; + if (sge->sge_length == 0) { + if (--ssge.num_sge) + *sge = *ssge.sg_list++; + } else if (sge->length == 0 && sge->mr->lkey) { + if (++sge->n >= QIB_SEGSZ) { + if (++sge->m >= sge->mr->mapsz) + break; + sge->n = 0; + } + sge->vaddr = + sge->mr->map[sge->m]->segs[sge->n].vaddr; + sge->length = + sge->mr->map[sge->m]->segs[sge->n].length; + } + length -= len; + } + qib_put_ss(&qp->r_sge); + if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) + goto bail_unlock; + wc.wr_id = qp->r_wr_id; + wc.status = IB_WC_SUCCESS; + wc.opcode = IB_WC_RECV; + wc.qp = &qp->ibqp; + wc.src_qp = sqp->ibqp.qp_num; + wc.pkey_index = qp->ibqp.qp_type == IB_QPT_GSI ? + swqe->wr.wr.ud.pkey_index : 0; + wc.slid = ppd->lid | (ah_attr->src_path_bits & ((1 << ppd->lmc) - 1)); + wc.sl = ah_attr->sl; + wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1); + wc.port_num = qp->port_num; + /* Signal completion event if the solicited bit is set. */ + qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, + swqe->wr.send_flags & IB_SEND_SOLICITED); + ibp->n_loop_pkts++; +bail_unlock: + spin_unlock_irqrestore(&qp->r_lock, flags); +drop: + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); +} + +/** + * qib_make_ud_req - construct a UD request packet + * @qp: the QP + * + * Return 1 if constructed; otherwise, return 0. + */ +int qib_make_ud_req(struct qib_qp *qp) +{ + struct qib_other_headers *ohdr; + struct ib_ah_attr *ah_attr; + struct qib_pportdata *ppd; + struct qib_ibport *ibp; + struct qib_swqe *wqe; + unsigned long flags; + u32 nwords; + u32 extra_bytes; + u32 bth0; + u16 lrh0; + u16 lid; + int ret = 0; + int next_cur; + + spin_lock_irqsave(&qp->s_lock, flags); + + if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_NEXT_SEND_OK)) { + if (!(ib_qib_state_ops[qp->state] & QIB_FLUSH_SEND)) + goto bail; + /* We are in the error state, flush the work request. */ + if (qp->s_last == qp->s_head) + goto bail; + /* If DMAs are in progress, we can't flush immediately. */ + if (atomic_read(&qp->s_dma_busy)) { + qp->s_flags |= QIB_S_WAIT_DMA; + goto bail; + } + wqe = get_swqe_ptr(qp, qp->s_last); + qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); + goto done; + } + + if (qp->s_cur == qp->s_head) + goto bail; + + wqe = get_swqe_ptr(qp, qp->s_cur); + next_cur = qp->s_cur + 1; + if (next_cur >= qp->s_size) + next_cur = 0; + + /* Construct the header. */ + ibp = to_iport(qp->ibqp.device, qp->port_num); + ppd = ppd_from_ibp(ibp); + ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr; + if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE) { + if (ah_attr->dlid != QIB_PERMISSIVE_LID) + this_cpu_inc(ibp->pmastats->n_multicast_xmit); + else + this_cpu_inc(ibp->pmastats->n_unicast_xmit); + } else { + this_cpu_inc(ibp->pmastats->n_unicast_xmit); + lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1); + if (unlikely(lid == ppd->lid)) { + /* + * If DMAs are in progress, we can't generate + * a completion for the loopback packet since + * it would be out of order. + * XXX Instead of waiting, we could queue a + * zero length descriptor so we get a callback. + */ + if (atomic_read(&qp->s_dma_busy)) { + qp->s_flags |= QIB_S_WAIT_DMA; + goto bail; + } + qp->s_cur = next_cur; + spin_unlock_irqrestore(&qp->s_lock, flags); + qib_ud_loopback(qp, wqe); + spin_lock_irqsave(&qp->s_lock, flags); + qib_send_complete(qp, wqe, IB_WC_SUCCESS); + goto done; + } + } + + qp->s_cur = next_cur; + extra_bytes = -wqe->length & 3; + nwords = (wqe->length + extra_bytes) >> 2; + + /* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */ + qp->s_hdrwords = 7; + qp->s_cur_size = wqe->length; + qp->s_cur_sge = &qp->s_sge; + qp->s_srate = ah_attr->static_rate; + qp->s_wqe = wqe; + qp->s_sge.sge = wqe->sg_list[0]; + qp->s_sge.sg_list = wqe->sg_list + 1; + qp->s_sge.num_sge = wqe->wr.num_sge; + qp->s_sge.total_len = wqe->length; + + if (ah_attr->ah_flags & IB_AH_GRH) { + /* Header size in 32-bit words. */ + qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr->u.l.grh, + &ah_attr->grh, + qp->s_hdrwords, nwords); + lrh0 = QIB_LRH_GRH; + ohdr = &qp->s_hdr->u.l.oth; + /* + * Don't worry about sending to locally attached multicast + * QPs. It is unspecified by the spec. what happens. + */ + } else { + /* Header size in 32-bit words. */ + lrh0 = QIB_LRH_BTH; + ohdr = &qp->s_hdr->u.oth; + } + if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) { + qp->s_hdrwords++; + ohdr->u.ud.imm_data = wqe->wr.ex.imm_data; + bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24; + } else + bth0 = IB_OPCODE_UD_SEND_ONLY << 24; + lrh0 |= ah_attr->sl << 4; + if (qp->ibqp.qp_type == IB_QPT_SMI) + lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */ + else + lrh0 |= ibp->sl_to_vl[ah_attr->sl] << 12; + qp->s_hdr->lrh[0] = cpu_to_be16(lrh0); + qp->s_hdr->lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */ + qp->s_hdr->lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); + lid = ppd->lid; + if (lid) { + lid |= ah_attr->src_path_bits & ((1 << ppd->lmc) - 1); + qp->s_hdr->lrh[3] = cpu_to_be16(lid); + } else + qp->s_hdr->lrh[3] = IB_LID_PERMISSIVE; + if (wqe->wr.send_flags & IB_SEND_SOLICITED) + bth0 |= IB_BTH_SOLICITED; + bth0 |= extra_bytes << 20; + bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? QIB_DEFAULT_P_KEY : + qib_get_pkey(ibp, qp->ibqp.qp_type == IB_QPT_GSI ? + wqe->wr.wr.ud.pkey_index : qp->s_pkey_index); + ohdr->bth[0] = cpu_to_be32(bth0); + /* + * Use the multicast QP if the destination LID is a multicast LID. + */ + ohdr->bth[1] = ah_attr->dlid >= QIB_MULTICAST_LID_BASE && + ah_attr->dlid != QIB_PERMISSIVE_LID ? + cpu_to_be32(QIB_MULTICAST_QPN) : + cpu_to_be32(wqe->wr.wr.ud.remote_qpn); + ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & QIB_PSN_MASK); + /* + * Qkeys with the high order bit set mean use the + * qkey from the QP context instead of the WR (see 10.2.5). + */ + ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->wr.wr.ud.remote_qkey < 0 ? + qp->qkey : wqe->wr.wr.ud.remote_qkey); + ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); + +done: + ret = 1; + goto unlock; + +bail: + qp->s_flags &= ~QIB_S_BUSY; +unlock: + spin_unlock_irqrestore(&qp->s_lock, flags); + return ret; +} + +static unsigned qib_lookup_pkey(struct qib_ibport *ibp, u16 pkey) +{ + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + struct qib_devdata *dd = ppd->dd; + unsigned ctxt = ppd->hw_pidx; + unsigned i; + + pkey &= 0x7fff; /* remove limited/full membership bit */ + + for (i = 0; i < ARRAY_SIZE(dd->rcd[ctxt]->pkeys); ++i) + if ((dd->rcd[ctxt]->pkeys[i] & 0x7fff) == pkey) + return i; + + /* + * Should not get here, this means hardware failed to validate pkeys. + * Punt and return index 0. + */ + return 0; +} + +/** + * qib_ud_rcv - receive an incoming UD packet + * @ibp: the port the packet came in on + * @hdr: the packet header + * @has_grh: true if the packet has a GRH + * @data: the packet data + * @tlen: the packet length + * @qp: the QP the packet came on + * + * This is called from qib_qp_rcv() to process an incoming UD packet + * for the given QP. + * Called at interrupt level. + */ +void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, + int has_grh, void *data, u32 tlen, struct qib_qp *qp) +{ + struct qib_other_headers *ohdr; + int opcode; + u32 hdrsize; + u32 pad; + struct ib_wc wc; + u32 qkey; + u32 src_qp; + u16 dlid; + + /* Check for GRH */ + if (!has_grh) { + ohdr = &hdr->u.oth; + hdrsize = 8 + 12 + 8; /* LRH + BTH + DETH */ + } else { + ohdr = &hdr->u.l.oth; + hdrsize = 8 + 40 + 12 + 8; /* LRH + GRH + BTH + DETH */ + } + qkey = be32_to_cpu(ohdr->u.ud.deth[0]); + src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & QIB_QPN_MASK; + + /* + * Get the number of bytes the message was padded by + * and drop incomplete packets. + */ + pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; + if (unlikely(tlen < (hdrsize + pad + 4))) + goto drop; + + tlen -= hdrsize + pad + 4; + + /* + * Check that the permissive LID is only used on QP0 + * and the QKEY matches (see 9.6.1.4.1 and 9.6.1.5.1). + */ + if (qp->ibqp.qp_num) { + if (unlikely(hdr->lrh[1] == IB_LID_PERMISSIVE || + hdr->lrh[3] == IB_LID_PERMISSIVE)) + goto drop; + if (qp->ibqp.qp_num > 1) { + u16 pkey1, pkey2; + + pkey1 = be32_to_cpu(ohdr->bth[0]); + pkey2 = qib_get_pkey(ibp, qp->s_pkey_index); + if (unlikely(!qib_pkey_ok(pkey1, pkey2))) { + qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY, + pkey1, + (be16_to_cpu(hdr->lrh[0]) >> 4) & + 0xF, + src_qp, qp->ibqp.qp_num, + hdr->lrh[3], hdr->lrh[1]); + return; + } + } + if (unlikely(qkey != qp->qkey)) { + qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_QKEY, qkey, + (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF, + src_qp, qp->ibqp.qp_num, + hdr->lrh[3], hdr->lrh[1]); + return; + } + /* Drop invalid MAD packets (see 13.5.3.1). */ + if (unlikely(qp->ibqp.qp_num == 1 && + (tlen != 256 || + (be16_to_cpu(hdr->lrh[0]) >> 12) == 15))) + goto drop; + } else { + struct ib_smp *smp; + + /* Drop invalid MAD packets (see 13.5.3.1). */ + if (tlen != 256 || (be16_to_cpu(hdr->lrh[0]) >> 12) != 15) + goto drop; + smp = (struct ib_smp *) data; + if ((hdr->lrh[1] == IB_LID_PERMISSIVE || + hdr->lrh[3] == IB_LID_PERMISSIVE) && + smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + goto drop; + } + + /* + * The opcode is in the low byte when its in network order + * (top byte when in host order). + */ + opcode = be32_to_cpu(ohdr->bth[0]) >> 24; + if (qp->ibqp.qp_num > 1 && + opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { + wc.ex.imm_data = ohdr->u.ud.imm_data; + wc.wc_flags = IB_WC_WITH_IMM; + tlen -= sizeof(u32); + } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { + wc.ex.imm_data = 0; + wc.wc_flags = 0; + } else + goto drop; + + /* + * A GRH is expected to precede the data even if not + * present on the wire. + */ + wc.byte_len = tlen + sizeof(struct ib_grh); + + /* + * Get the next work request entry to find where to put the data. + */ + if (qp->r_flags & QIB_R_REUSE_SGE) + qp->r_flags &= ~QIB_R_REUSE_SGE; + else { + int ret; + + ret = qib_get_rwqe(qp, 0); + if (ret < 0) { + qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR); + return; + } + if (!ret) { + if (qp->ibqp.qp_num == 0) + ibp->n_vl15_dropped++; + return; + } + } + /* Silently drop packets which are too big. */ + if (unlikely(wc.byte_len > qp->r_len)) { + qp->r_flags |= QIB_R_REUSE_SGE; + goto drop; + } + if (has_grh) { + qib_copy_sge(&qp->r_sge, &hdr->u.l.grh, + sizeof(struct ib_grh), 1); + wc.wc_flags |= IB_WC_GRH; + } else + qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1); + qib_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1); + qib_put_ss(&qp->r_sge); + if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) + return; + wc.wr_id = qp->r_wr_id; + wc.status = IB_WC_SUCCESS; + wc.opcode = IB_WC_RECV; + wc.vendor_err = 0; + wc.qp = &qp->ibqp; + wc.src_qp = src_qp; + wc.pkey_index = qp->ibqp.qp_type == IB_QPT_GSI ? + qib_lookup_pkey(ibp, be32_to_cpu(ohdr->bth[0])) : 0; + wc.slid = be16_to_cpu(hdr->lrh[3]); + wc.sl = (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF; + dlid = be16_to_cpu(hdr->lrh[1]); + /* + * Save the LMC lower bits if the destination LID is a unicast LID. + */ + wc.dlid_path_bits = dlid >= QIB_MULTICAST_LID_BASE ? 0 : + dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1); + wc.port_num = qp->port_num; + /* Signal completion event if the solicited bit is set. */ + qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, + (ohdr->bth[0] & + cpu_to_be32(IB_BTH_SOLICITED)) != 0); + return; + +drop: + ibp->n_pkt_drops++; +} diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c new file mode 100644 index 00000000000..2bc1d2b9629 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_user_pages.c @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mm.h> +#include <linux/device.h> + +#include "qib.h" + +static void __qib_release_user_pages(struct page **p, size_t num_pages, + int dirty) +{ + size_t i; + + for (i = 0; i < num_pages; i++) { + if (dirty) + set_page_dirty_lock(p[i]); + put_page(p[i]); + } +} + +/* + * Call with current->mm->mmap_sem held. + */ +static int __qib_get_user_pages(unsigned long start_page, size_t num_pages, + struct page **p, struct vm_area_struct **vma) +{ + unsigned long lock_limit; + size_t got; + int ret; + + lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + + if (num_pages > lock_limit && !capable(CAP_IPC_LOCK)) { + ret = -ENOMEM; + goto bail; + } + + for (got = 0; got < num_pages; got += ret) { + ret = get_user_pages(current, current->mm, + start_page + got * PAGE_SIZE, + num_pages - got, 1, 1, + p + got, vma); + if (ret < 0) + goto bail_release; + } + + current->mm->pinned_vm += num_pages; + + ret = 0; + goto bail; + +bail_release: + __qib_release_user_pages(p, got, 0); +bail: + return ret; +} + +/** + * qib_map_page - a safety wrapper around pci_map_page() + * + * A dma_addr of all 0's is interpreted by the chip as "disabled". + * Unfortunately, it can also be a valid dma_addr returned on some + * architectures. + * + * The powerpc iommu assigns dma_addrs in ascending order, so we don't + * have to bother with retries or mapping a dummy page to insure we + * don't just get the same mapping again. + * + * I'm sure we won't be so lucky with other iommu's, so FIXME. + */ +dma_addr_t qib_map_page(struct pci_dev *hwdev, struct page *page, + unsigned long offset, size_t size, int direction) +{ + dma_addr_t phys; + + phys = pci_map_page(hwdev, page, offset, size, direction); + + if (phys == 0) { + pci_unmap_page(hwdev, phys, size, direction); + phys = pci_map_page(hwdev, page, offset, size, direction); + /* + * FIXME: If we get 0 again, we should keep this page, + * map another, then free the 0 page. + */ + } + + return phys; +} + +/** + * qib_get_user_pages - lock user pages into memory + * @start_page: the start page + * @num_pages: the number of pages + * @p: the output page structures + * + * This function takes a given start page (page aligned user virtual + * address) and pins it and the following specified number of pages. For + * now, num_pages is always 1, but that will probably change at some point + * (because caller is doing expected sends on a single virtually contiguous + * buffer, so we can do all pages at once). + */ +int qib_get_user_pages(unsigned long start_page, size_t num_pages, + struct page **p) +{ + int ret; + + down_write(¤t->mm->mmap_sem); + + ret = __qib_get_user_pages(start_page, num_pages, p, NULL); + + up_write(¤t->mm->mmap_sem); + + return ret; +} + +void qib_release_user_pages(struct page **p, size_t num_pages) +{ + if (current->mm) /* during close after signal, mm can be NULL */ + down_write(¤t->mm->mmap_sem); + + __qib_release_user_pages(p, num_pages, 1); + + if (current->mm) { + current->mm->pinned_vm -= num_pages; + up_write(¤t->mm->mmap_sem); + } +} diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c new file mode 100644 index 00000000000..d2806cae234 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_user_sdma.c @@ -0,0 +1,1465 @@ +/* + * Copyright (c) 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/mm.h> +#include <linux/types.h> +#include <linux/device.h> +#include <linux/dmapool.h> +#include <linux/slab.h> +#include <linux/list.h> +#include <linux/highmem.h> +#include <linux/io.h> +#include <linux/uio.h> +#include <linux/rbtree.h> +#include <linux/spinlock.h> +#include <linux/delay.h> + +#include "qib.h" +#include "qib_user_sdma.h" + +/* minimum size of header */ +#define QIB_USER_SDMA_MIN_HEADER_LENGTH 64 +/* expected size of headers (for dma_pool) */ +#define QIB_USER_SDMA_EXP_HEADER_LENGTH 64 +/* attempt to drain the queue for 5secs */ +#define QIB_USER_SDMA_DRAIN_TIMEOUT 500 + +/* + * track how many times a process open this driver. + */ +static struct rb_root qib_user_sdma_rb_root = RB_ROOT; + +struct qib_user_sdma_rb_node { + struct rb_node node; + int refcount; + pid_t pid; +}; + +struct qib_user_sdma_pkt { + struct list_head list; /* list element */ + + u8 tiddma; /* if this is NEW tid-sdma */ + u8 largepkt; /* this is large pkt from kmalloc */ + u16 frag_size; /* frag size used by PSM */ + u16 index; /* last header index or push index */ + u16 naddr; /* dimension of addr (1..3) ... */ + u16 addrlimit; /* addr array size */ + u16 tidsmidx; /* current tidsm index */ + u16 tidsmcount; /* tidsm array item count */ + u16 payload_size; /* payload size so far for header */ + u32 bytes_togo; /* bytes for processing */ + u32 counter; /* sdma pkts queued counter for this entry */ + struct qib_tid_session_member *tidsm; /* tid session member array */ + struct qib_user_sdma_queue *pq; /* which pq this pkt belongs to */ + u64 added; /* global descq number of entries */ + + struct { + u16 offset; /* offset for kvaddr, addr */ + u16 length; /* length in page */ + u16 first_desc; /* first desc */ + u16 last_desc; /* last desc */ + u16 put_page; /* should we put_page? */ + u16 dma_mapped; /* is page dma_mapped? */ + u16 dma_length; /* for dma_unmap_page() */ + u16 padding; + struct page *page; /* may be NULL (coherent mem) */ + void *kvaddr; /* FIXME: only for pio hack */ + dma_addr_t addr; + } addr[4]; /* max pages, any more and we coalesce */ +}; + +struct qib_user_sdma_queue { + /* + * pkts sent to dma engine are queued on this + * list head. the type of the elements of this + * list are struct qib_user_sdma_pkt... + */ + struct list_head sent; + + /* + * Because above list will be accessed by both process and + * signal handler, we need a spinlock for it. + */ + spinlock_t sent_lock ____cacheline_aligned_in_smp; + + /* headers with expected length are allocated from here... */ + char header_cache_name[64]; + struct dma_pool *header_cache; + + /* packets are allocated from the slab cache... */ + char pkt_slab_name[64]; + struct kmem_cache *pkt_slab; + + /* as packets go on the queued queue, they are counted... */ + u32 counter; + u32 sent_counter; + /* pending packets, not sending yet */ + u32 num_pending; + /* sending packets, not complete yet */ + u32 num_sending; + /* global descq number of entry of last sending packet */ + u64 added; + + /* dma page table */ + struct rb_root dma_pages_root; + + struct qib_user_sdma_rb_node *sdma_rb_node; + + /* protect everything above... */ + struct mutex lock; +}; + +static struct qib_user_sdma_rb_node * +qib_user_sdma_rb_search(struct rb_root *root, pid_t pid) +{ + struct qib_user_sdma_rb_node *sdma_rb_node; + struct rb_node *node = root->rb_node; + + while (node) { + sdma_rb_node = container_of(node, + struct qib_user_sdma_rb_node, node); + if (pid < sdma_rb_node->pid) + node = node->rb_left; + else if (pid > sdma_rb_node->pid) + node = node->rb_right; + else + return sdma_rb_node; + } + return NULL; +} + +static int +qib_user_sdma_rb_insert(struct rb_root *root, struct qib_user_sdma_rb_node *new) +{ + struct rb_node **node = &(root->rb_node); + struct rb_node *parent = NULL; + struct qib_user_sdma_rb_node *got; + + while (*node) { + got = container_of(*node, struct qib_user_sdma_rb_node, node); + parent = *node; + if (new->pid < got->pid) + node = &((*node)->rb_left); + else if (new->pid > got->pid) + node = &((*node)->rb_right); + else + return 0; + } + + rb_link_node(&new->node, parent, node); + rb_insert_color(&new->node, root); + return 1; +} + +struct qib_user_sdma_queue * +qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt) +{ + struct qib_user_sdma_queue *pq = + kmalloc(sizeof(struct qib_user_sdma_queue), GFP_KERNEL); + struct qib_user_sdma_rb_node *sdma_rb_node; + + if (!pq) + goto done; + + pq->counter = 0; + pq->sent_counter = 0; + pq->num_pending = 0; + pq->num_sending = 0; + pq->added = 0; + pq->sdma_rb_node = NULL; + + INIT_LIST_HEAD(&pq->sent); + spin_lock_init(&pq->sent_lock); + mutex_init(&pq->lock); + + snprintf(pq->pkt_slab_name, sizeof(pq->pkt_slab_name), + "qib-user-sdma-pkts-%u-%02u.%02u", unit, ctxt, sctxt); + pq->pkt_slab = kmem_cache_create(pq->pkt_slab_name, + sizeof(struct qib_user_sdma_pkt), + 0, 0, NULL); + + if (!pq->pkt_slab) + goto err_kfree; + + snprintf(pq->header_cache_name, sizeof(pq->header_cache_name), + "qib-user-sdma-headers-%u-%02u.%02u", unit, ctxt, sctxt); + pq->header_cache = dma_pool_create(pq->header_cache_name, + dev, + QIB_USER_SDMA_EXP_HEADER_LENGTH, + 4, 0); + if (!pq->header_cache) + goto err_slab; + + pq->dma_pages_root = RB_ROOT; + + sdma_rb_node = qib_user_sdma_rb_search(&qib_user_sdma_rb_root, + current->pid); + if (sdma_rb_node) { + sdma_rb_node->refcount++; + } else { + int ret; + sdma_rb_node = kmalloc(sizeof( + struct qib_user_sdma_rb_node), GFP_KERNEL); + if (!sdma_rb_node) + goto err_rb; + + sdma_rb_node->refcount = 1; + sdma_rb_node->pid = current->pid; + + ret = qib_user_sdma_rb_insert(&qib_user_sdma_rb_root, + sdma_rb_node); + BUG_ON(ret == 0); + } + pq->sdma_rb_node = sdma_rb_node; + + goto done; + +err_rb: + dma_pool_destroy(pq->header_cache); +err_slab: + kmem_cache_destroy(pq->pkt_slab); +err_kfree: + kfree(pq); + pq = NULL; + +done: + return pq; +} + +static void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt, + int i, u16 offset, u16 len, + u16 first_desc, u16 last_desc, + u16 put_page, u16 dma_mapped, + struct page *page, void *kvaddr, + dma_addr_t dma_addr, u16 dma_length) +{ + pkt->addr[i].offset = offset; + pkt->addr[i].length = len; + pkt->addr[i].first_desc = first_desc; + pkt->addr[i].last_desc = last_desc; + pkt->addr[i].put_page = put_page; + pkt->addr[i].dma_mapped = dma_mapped; + pkt->addr[i].page = page; + pkt->addr[i].kvaddr = kvaddr; + pkt->addr[i].addr = dma_addr; + pkt->addr[i].dma_length = dma_length; +} + +static void *qib_user_sdma_alloc_header(struct qib_user_sdma_queue *pq, + size_t len, dma_addr_t *dma_addr) +{ + void *hdr; + + if (len == QIB_USER_SDMA_EXP_HEADER_LENGTH) + hdr = dma_pool_alloc(pq->header_cache, GFP_KERNEL, + dma_addr); + else + hdr = NULL; + + if (!hdr) { + hdr = kmalloc(len, GFP_KERNEL); + if (!hdr) + return NULL; + + *dma_addr = 0; + } + + return hdr; +} + +static int qib_user_sdma_page_to_frags(const struct qib_devdata *dd, + struct qib_user_sdma_queue *pq, + struct qib_user_sdma_pkt *pkt, + struct page *page, u16 put, + u16 offset, u16 len, void *kvaddr) +{ + __le16 *pbc16; + void *pbcvaddr; + struct qib_message_header *hdr; + u16 newlen, pbclen, lastdesc, dma_mapped; + u32 vcto; + union qib_seqnum seqnum; + dma_addr_t pbcdaddr; + dma_addr_t dma_addr = + dma_map_page(&dd->pcidev->dev, + page, offset, len, DMA_TO_DEVICE); + int ret = 0; + + if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) { + /* + * dma mapping error, pkt has not managed + * this page yet, return the page here so + * the caller can ignore this page. + */ + if (put) { + put_page(page); + } else { + /* coalesce case */ + kunmap(page); + __free_page(page); + } + ret = -ENOMEM; + goto done; + } + offset = 0; + dma_mapped = 1; + + +next_fragment: + + /* + * In tid-sdma, the transfer length is restricted by + * receiver side current tid page length. + */ + if (pkt->tiddma && len > pkt->tidsm[pkt->tidsmidx].length) + newlen = pkt->tidsm[pkt->tidsmidx].length; + else + newlen = len; + + /* + * Then the transfer length is restricted by MTU. + * the last descriptor flag is determined by: + * 1. the current packet is at frag size length. + * 2. the current tid page is done if tid-sdma. + * 3. there is no more byte togo if sdma. + */ + lastdesc = 0; + if ((pkt->payload_size + newlen) >= pkt->frag_size) { + newlen = pkt->frag_size - pkt->payload_size; + lastdesc = 1; + } else if (pkt->tiddma) { + if (newlen == pkt->tidsm[pkt->tidsmidx].length) + lastdesc = 1; + } else { + if (newlen == pkt->bytes_togo) + lastdesc = 1; + } + + /* fill the next fragment in this page */ + qib_user_sdma_init_frag(pkt, pkt->naddr, /* index */ + offset, newlen, /* offset, len */ + 0, lastdesc, /* first last desc */ + put, dma_mapped, /* put page, dma mapped */ + page, kvaddr, /* struct page, virt addr */ + dma_addr, len); /* dma addr, dma length */ + pkt->bytes_togo -= newlen; + pkt->payload_size += newlen; + pkt->naddr++; + if (pkt->naddr == pkt->addrlimit) { + ret = -EFAULT; + goto done; + } + + /* If there is no more byte togo. (lastdesc==1) */ + if (pkt->bytes_togo == 0) { + /* The packet is done, header is not dma mapped yet. + * it should be from kmalloc */ + if (!pkt->addr[pkt->index].addr) { + pkt->addr[pkt->index].addr = + dma_map_single(&dd->pcidev->dev, + pkt->addr[pkt->index].kvaddr, + pkt->addr[pkt->index].dma_length, + DMA_TO_DEVICE); + if (dma_mapping_error(&dd->pcidev->dev, + pkt->addr[pkt->index].addr)) { + ret = -ENOMEM; + goto done; + } + pkt->addr[pkt->index].dma_mapped = 1; + } + + goto done; + } + + /* If tid-sdma, advance tid info. */ + if (pkt->tiddma) { + pkt->tidsm[pkt->tidsmidx].length -= newlen; + if (pkt->tidsm[pkt->tidsmidx].length) { + pkt->tidsm[pkt->tidsmidx].offset += newlen; + } else { + pkt->tidsmidx++; + if (pkt->tidsmidx == pkt->tidsmcount) { + ret = -EFAULT; + goto done; + } + } + } + + /* + * If this is NOT the last descriptor. (newlen==len) + * the current packet is not done yet, but the current + * send side page is done. + */ + if (lastdesc == 0) + goto done; + + /* + * If running this driver under PSM with message size + * fitting into one transfer unit, it is not possible + * to pass this line. otherwise, it is a buggggg. + */ + + /* + * Since the current packet is done, and there are more + * bytes togo, we need to create a new sdma header, copying + * from previous sdma header and modify both. + */ + pbclen = pkt->addr[pkt->index].length; + pbcvaddr = qib_user_sdma_alloc_header(pq, pbclen, &pbcdaddr); + if (!pbcvaddr) { + ret = -ENOMEM; + goto done; + } + /* Copy the previous sdma header to new sdma header */ + pbc16 = (__le16 *)pkt->addr[pkt->index].kvaddr; + memcpy(pbcvaddr, pbc16, pbclen); + + /* Modify the previous sdma header */ + hdr = (struct qib_message_header *)&pbc16[4]; + + /* New pbc length */ + pbc16[0] = cpu_to_le16(le16_to_cpu(pbc16[0])-(pkt->bytes_togo>>2)); + + /* New packet length */ + hdr->lrh[2] = cpu_to_be16(le16_to_cpu(pbc16[0])); + + if (pkt->tiddma) { + /* turn on the header suppression */ + hdr->iph.pkt_flags = + cpu_to_le16(le16_to_cpu(hdr->iph.pkt_flags)|0x2); + /* turn off ACK_REQ: 0x04 and EXPECTED_DONE: 0x20 */ + hdr->flags &= ~(0x04|0x20); + } else { + /* turn off extra bytes: 20-21 bits */ + hdr->bth[0] = cpu_to_be32(be32_to_cpu(hdr->bth[0])&0xFFCFFFFF); + /* turn off ACK_REQ: 0x04 */ + hdr->flags &= ~(0x04); + } + + /* New kdeth checksum */ + vcto = le32_to_cpu(hdr->iph.ver_ctxt_tid_offset); + hdr->iph.chksum = cpu_to_le16(QIB_LRH_BTH + + be16_to_cpu(hdr->lrh[2]) - + ((vcto>>16)&0xFFFF) - (vcto&0xFFFF) - + le16_to_cpu(hdr->iph.pkt_flags)); + + /* The packet is done, header is not dma mapped yet. + * it should be from kmalloc */ + if (!pkt->addr[pkt->index].addr) { + pkt->addr[pkt->index].addr = + dma_map_single(&dd->pcidev->dev, + pkt->addr[pkt->index].kvaddr, + pkt->addr[pkt->index].dma_length, + DMA_TO_DEVICE); + if (dma_mapping_error(&dd->pcidev->dev, + pkt->addr[pkt->index].addr)) { + ret = -ENOMEM; + goto done; + } + pkt->addr[pkt->index].dma_mapped = 1; + } + + /* Modify the new sdma header */ + pbc16 = (__le16 *)pbcvaddr; + hdr = (struct qib_message_header *)&pbc16[4]; + + /* New pbc length */ + pbc16[0] = cpu_to_le16(le16_to_cpu(pbc16[0])-(pkt->payload_size>>2)); + + /* New packet length */ + hdr->lrh[2] = cpu_to_be16(le16_to_cpu(pbc16[0])); + + if (pkt->tiddma) { + /* Set new tid and offset for new sdma header */ + hdr->iph.ver_ctxt_tid_offset = cpu_to_le32( + (le32_to_cpu(hdr->iph.ver_ctxt_tid_offset)&0xFF000000) + + (pkt->tidsm[pkt->tidsmidx].tid<<QLOGIC_IB_I_TID_SHIFT) + + (pkt->tidsm[pkt->tidsmidx].offset>>2)); + } else { + /* Middle protocol new packet offset */ + hdr->uwords[2] += pkt->payload_size; + } + + /* New kdeth checksum */ + vcto = le32_to_cpu(hdr->iph.ver_ctxt_tid_offset); + hdr->iph.chksum = cpu_to_le16(QIB_LRH_BTH + + be16_to_cpu(hdr->lrh[2]) - + ((vcto>>16)&0xFFFF) - (vcto&0xFFFF) - + le16_to_cpu(hdr->iph.pkt_flags)); + + /* Next sequence number in new sdma header */ + seqnum.val = be32_to_cpu(hdr->bth[2]); + if (pkt->tiddma) + seqnum.seq++; + else + seqnum.pkt++; + hdr->bth[2] = cpu_to_be32(seqnum.val); + + /* Init new sdma header. */ + qib_user_sdma_init_frag(pkt, pkt->naddr, /* index */ + 0, pbclen, /* offset, len */ + 1, 0, /* first last desc */ + 0, 0, /* put page, dma mapped */ + NULL, pbcvaddr, /* struct page, virt addr */ + pbcdaddr, pbclen); /* dma addr, dma length */ + pkt->index = pkt->naddr; + pkt->payload_size = 0; + pkt->naddr++; + if (pkt->naddr == pkt->addrlimit) { + ret = -EFAULT; + goto done; + } + + /* Prepare for next fragment in this page */ + if (newlen != len) { + if (dma_mapped) { + put = 0; + dma_mapped = 0; + page = NULL; + kvaddr = NULL; + } + len -= newlen; + offset += newlen; + + goto next_fragment; + } + +done: + return ret; +} + +/* we've too many pages in the iovec, coalesce to a single page */ +static int qib_user_sdma_coalesce(const struct qib_devdata *dd, + struct qib_user_sdma_queue *pq, + struct qib_user_sdma_pkt *pkt, + const struct iovec *iov, + unsigned long niov) +{ + int ret = 0; + struct page *page = alloc_page(GFP_KERNEL); + void *mpage_save; + char *mpage; + int i; + int len = 0; + + if (!page) { + ret = -ENOMEM; + goto done; + } + + mpage = kmap(page); + mpage_save = mpage; + for (i = 0; i < niov; i++) { + int cfur; + + cfur = copy_from_user(mpage, + iov[i].iov_base, iov[i].iov_len); + if (cfur) { + ret = -EFAULT; + goto free_unmap; + } + + mpage += iov[i].iov_len; + len += iov[i].iov_len; + } + + ret = qib_user_sdma_page_to_frags(dd, pq, pkt, + page, 0, 0, len, mpage_save); + goto done; + +free_unmap: + kunmap(page); + __free_page(page); +done: + return ret; +} + +/* + * How many pages in this iovec element? + */ +static int qib_user_sdma_num_pages(const struct iovec *iov) +{ + const unsigned long addr = (unsigned long) iov->iov_base; + const unsigned long len = iov->iov_len; + const unsigned long spage = addr & PAGE_MASK; + const unsigned long epage = (addr + len - 1) & PAGE_MASK; + + return 1 + ((epage - spage) >> PAGE_SHIFT); +} + +static void qib_user_sdma_free_pkt_frag(struct device *dev, + struct qib_user_sdma_queue *pq, + struct qib_user_sdma_pkt *pkt, + int frag) +{ + const int i = frag; + + if (pkt->addr[i].page) { + /* only user data has page */ + if (pkt->addr[i].dma_mapped) + dma_unmap_page(dev, + pkt->addr[i].addr, + pkt->addr[i].dma_length, + DMA_TO_DEVICE); + + if (pkt->addr[i].kvaddr) + kunmap(pkt->addr[i].page); + + if (pkt->addr[i].put_page) + put_page(pkt->addr[i].page); + else + __free_page(pkt->addr[i].page); + } else if (pkt->addr[i].kvaddr) { + /* for headers */ + if (pkt->addr[i].dma_mapped) { + /* from kmalloc & dma mapped */ + dma_unmap_single(dev, + pkt->addr[i].addr, + pkt->addr[i].dma_length, + DMA_TO_DEVICE); + kfree(pkt->addr[i].kvaddr); + } else if (pkt->addr[i].addr) { + /* free coherent mem from cache... */ + dma_pool_free(pq->header_cache, + pkt->addr[i].kvaddr, pkt->addr[i].addr); + } else { + /* from kmalloc but not dma mapped */ + kfree(pkt->addr[i].kvaddr); + } + } +} + +/* return number of pages pinned... */ +static int qib_user_sdma_pin_pages(const struct qib_devdata *dd, + struct qib_user_sdma_queue *pq, + struct qib_user_sdma_pkt *pkt, + unsigned long addr, int tlen, int npages) +{ + struct page *pages[8]; + int i, j; + int ret = 0; + + while (npages) { + if (npages > 8) + j = 8; + else + j = npages; + + ret = get_user_pages_fast(addr, j, 0, pages); + if (ret != j) { + i = 0; + j = ret; + ret = -ENOMEM; + goto free_pages; + } + + for (i = 0; i < j; i++) { + /* map the pages... */ + unsigned long fofs = addr & ~PAGE_MASK; + int flen = ((fofs + tlen) > PAGE_SIZE) ? + (PAGE_SIZE - fofs) : tlen; + + ret = qib_user_sdma_page_to_frags(dd, pq, pkt, + pages[i], 1, fofs, flen, NULL); + if (ret < 0) { + /* current page has beed taken + * care of inside above call. + */ + i++; + goto free_pages; + } + + addr += flen; + tlen -= flen; + } + + npages -= j; + } + + goto done; + + /* if error, return all pages not managed by pkt */ +free_pages: + while (i < j) + put_page(pages[i++]); + +done: + return ret; +} + +static int qib_user_sdma_pin_pkt(const struct qib_devdata *dd, + struct qib_user_sdma_queue *pq, + struct qib_user_sdma_pkt *pkt, + const struct iovec *iov, + unsigned long niov) +{ + int ret = 0; + unsigned long idx; + + for (idx = 0; idx < niov; idx++) { + const int npages = qib_user_sdma_num_pages(iov + idx); + const unsigned long addr = (unsigned long) iov[idx].iov_base; + + ret = qib_user_sdma_pin_pages(dd, pq, pkt, addr, + iov[idx].iov_len, npages); + if (ret < 0) + goto free_pkt; + } + + goto done; + +free_pkt: + /* we need to ignore the first entry here */ + for (idx = 1; idx < pkt->naddr; idx++) + qib_user_sdma_free_pkt_frag(&dd->pcidev->dev, pq, pkt, idx); + + /* need to dma unmap the first entry, this is to restore to + * the original state so that caller can free the memory in + * error condition. Caller does not know if dma mapped or not*/ + if (pkt->addr[0].dma_mapped) { + dma_unmap_single(&dd->pcidev->dev, + pkt->addr[0].addr, + pkt->addr[0].dma_length, + DMA_TO_DEVICE); + pkt->addr[0].addr = 0; + pkt->addr[0].dma_mapped = 0; + } + +done: + return ret; +} + +static int qib_user_sdma_init_payload(const struct qib_devdata *dd, + struct qib_user_sdma_queue *pq, + struct qib_user_sdma_pkt *pkt, + const struct iovec *iov, + unsigned long niov, int npages) +{ + int ret = 0; + + if (pkt->frag_size == pkt->bytes_togo && + npages >= ARRAY_SIZE(pkt->addr)) + ret = qib_user_sdma_coalesce(dd, pq, pkt, iov, niov); + else + ret = qib_user_sdma_pin_pkt(dd, pq, pkt, iov, niov); + + return ret; +} + +/* free a packet list -- return counter value of last packet */ +static void qib_user_sdma_free_pkt_list(struct device *dev, + struct qib_user_sdma_queue *pq, + struct list_head *list) +{ + struct qib_user_sdma_pkt *pkt, *pkt_next; + + list_for_each_entry_safe(pkt, pkt_next, list, list) { + int i; + + for (i = 0; i < pkt->naddr; i++) + qib_user_sdma_free_pkt_frag(dev, pq, pkt, i); + + if (pkt->largepkt) + kfree(pkt); + else + kmem_cache_free(pq->pkt_slab, pkt); + } + INIT_LIST_HEAD(list); +} + +/* + * copy headers, coalesce etc -- pq->lock must be held + * + * we queue all the packets to list, returning the + * number of bytes total. list must be empty initially, + * as, if there is an error we clean it... + */ +static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, + struct qib_pportdata *ppd, + struct qib_user_sdma_queue *pq, + const struct iovec *iov, + unsigned long niov, + struct list_head *list, + int *maxpkts, int *ndesc) +{ + unsigned long idx = 0; + int ret = 0; + int npkts = 0; + __le32 *pbc; + dma_addr_t dma_addr; + struct qib_user_sdma_pkt *pkt = NULL; + size_t len; + size_t nw; + u32 counter = pq->counter; + u16 frag_size; + + while (idx < niov && npkts < *maxpkts) { + const unsigned long addr = (unsigned long) iov[idx].iov_base; + const unsigned long idx_save = idx; + unsigned pktnw; + unsigned pktnwc; + int nfrags = 0; + int npages = 0; + int bytes_togo = 0; + int tiddma = 0; + int cfur; + + len = iov[idx].iov_len; + nw = len >> 2; + + if (len < QIB_USER_SDMA_MIN_HEADER_LENGTH || + len > PAGE_SIZE || len & 3 || addr & 3) { + ret = -EINVAL; + goto free_list; + } + + pbc = qib_user_sdma_alloc_header(pq, len, &dma_addr); + if (!pbc) { + ret = -ENOMEM; + goto free_list; + } + + cfur = copy_from_user(pbc, iov[idx].iov_base, len); + if (cfur) { + ret = -EFAULT; + goto free_pbc; + } + + /* + * This assignment is a bit strange. it's because the + * the pbc counts the number of 32 bit words in the full + * packet _except_ the first word of the pbc itself... + */ + pktnwc = nw - 1; + + /* + * pktnw computation yields the number of 32 bit words + * that the caller has indicated in the PBC. note that + * this is one less than the total number of words that + * goes to the send DMA engine as the first 32 bit word + * of the PBC itself is not counted. Armed with this count, + * we can verify that the packet is consistent with the + * iovec lengths. + */ + pktnw = le32_to_cpu(*pbc) & 0xFFFF; + if (pktnw < pktnwc) { + ret = -EINVAL; + goto free_pbc; + } + + idx++; + while (pktnwc < pktnw && idx < niov) { + const size_t slen = iov[idx].iov_len; + const unsigned long faddr = + (unsigned long) iov[idx].iov_base; + + if (slen & 3 || faddr & 3 || !slen) { + ret = -EINVAL; + goto free_pbc; + } + + npages += qib_user_sdma_num_pages(&iov[idx]); + + bytes_togo += slen; + pktnwc += slen >> 2; + idx++; + nfrags++; + } + + if (pktnwc != pktnw) { + ret = -EINVAL; + goto free_pbc; + } + + frag_size = ((le32_to_cpu(*pbc))>>16) & 0xFFFF; + if (((frag_size ? frag_size : bytes_togo) + len) > + ppd->ibmaxlen) { + ret = -EINVAL; + goto free_pbc; + } + + if (frag_size) { + int pktsize, tidsmsize, n; + + n = npages*((2*PAGE_SIZE/frag_size)+1); + pktsize = sizeof(*pkt) + sizeof(pkt->addr[0])*n; + + /* + * Determine if this is tid-sdma or just sdma. + */ + tiddma = (((le32_to_cpu(pbc[7])>> + QLOGIC_IB_I_TID_SHIFT)& + QLOGIC_IB_I_TID_MASK) != + QLOGIC_IB_I_TID_MASK); + + if (tiddma) + tidsmsize = iov[idx].iov_len; + else + tidsmsize = 0; + + pkt = kmalloc(pktsize+tidsmsize, GFP_KERNEL); + if (!pkt) { + ret = -ENOMEM; + goto free_pbc; + } + pkt->largepkt = 1; + pkt->frag_size = frag_size; + pkt->addrlimit = n + ARRAY_SIZE(pkt->addr); + + if (tiddma) { + char *tidsm = (char *)pkt + pktsize; + cfur = copy_from_user(tidsm, + iov[idx].iov_base, tidsmsize); + if (cfur) { + ret = -EFAULT; + goto free_pkt; + } + pkt->tidsm = + (struct qib_tid_session_member *)tidsm; + pkt->tidsmcount = tidsmsize/ + sizeof(struct qib_tid_session_member); + pkt->tidsmidx = 0; + idx++; + } + + /* + * pbc 'fill1' field is borrowed to pass frag size, + * we need to clear it after picking frag size, the + * hardware requires this field to be zero. + */ + *pbc = cpu_to_le32(le32_to_cpu(*pbc) & 0x0000FFFF); + } else { + pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL); + if (!pkt) { + ret = -ENOMEM; + goto free_pbc; + } + pkt->largepkt = 0; + pkt->frag_size = bytes_togo; + pkt->addrlimit = ARRAY_SIZE(pkt->addr); + } + pkt->bytes_togo = bytes_togo; + pkt->payload_size = 0; + pkt->counter = counter; + pkt->tiddma = tiddma; + + /* setup the first header */ + qib_user_sdma_init_frag(pkt, 0, /* index */ + 0, len, /* offset, len */ + 1, 0, /* first last desc */ + 0, 0, /* put page, dma mapped */ + NULL, pbc, /* struct page, virt addr */ + dma_addr, len); /* dma addr, dma length */ + pkt->index = 0; + pkt->naddr = 1; + + if (nfrags) { + ret = qib_user_sdma_init_payload(dd, pq, pkt, + iov + idx_save + 1, + nfrags, npages); + if (ret < 0) + goto free_pkt; + } else { + /* since there is no payload, mark the + * header as the last desc. */ + pkt->addr[0].last_desc = 1; + + if (dma_addr == 0) { + /* + * the header is not dma mapped yet. + * it should be from kmalloc. + */ + dma_addr = dma_map_single(&dd->pcidev->dev, + pbc, len, DMA_TO_DEVICE); + if (dma_mapping_error(&dd->pcidev->dev, + dma_addr)) { + ret = -ENOMEM; + goto free_pkt; + } + pkt->addr[0].addr = dma_addr; + pkt->addr[0].dma_mapped = 1; + } + } + + counter++; + npkts++; + pkt->pq = pq; + pkt->index = 0; /* reset index for push on hw */ + *ndesc += pkt->naddr; + + list_add_tail(&pkt->list, list); + } + + *maxpkts = npkts; + ret = idx; + goto done; + +free_pkt: + if (pkt->largepkt) + kfree(pkt); + else + kmem_cache_free(pq->pkt_slab, pkt); +free_pbc: + if (dma_addr) + dma_pool_free(pq->header_cache, pbc, dma_addr); + else + kfree(pbc); +free_list: + qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, list); +done: + return ret; +} + +static void qib_user_sdma_set_complete_counter(struct qib_user_sdma_queue *pq, + u32 c) +{ + pq->sent_counter = c; +} + +/* try to clean out queue -- needs pq->lock */ +static int qib_user_sdma_queue_clean(struct qib_pportdata *ppd, + struct qib_user_sdma_queue *pq) +{ + struct qib_devdata *dd = ppd->dd; + struct list_head free_list; + struct qib_user_sdma_pkt *pkt; + struct qib_user_sdma_pkt *pkt_prev; + unsigned long flags; + int ret = 0; + + if (!pq->num_sending) + return 0; + + INIT_LIST_HEAD(&free_list); + + /* + * We need this spin lock here because interrupt handler + * might modify this list in qib_user_sdma_send_desc(), also + * we can not get interrupted, otherwise it is a deadlock. + */ + spin_lock_irqsave(&pq->sent_lock, flags); + list_for_each_entry_safe(pkt, pkt_prev, &pq->sent, list) { + s64 descd = ppd->sdma_descq_removed - pkt->added; + + if (descd < 0) + break; + + list_move_tail(&pkt->list, &free_list); + + /* one more packet cleaned */ + ret++; + pq->num_sending--; + } + spin_unlock_irqrestore(&pq->sent_lock, flags); + + if (!list_empty(&free_list)) { + u32 counter; + + pkt = list_entry(free_list.prev, + struct qib_user_sdma_pkt, list); + counter = pkt->counter; + + qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list); + qib_user_sdma_set_complete_counter(pq, counter); + } + + return ret; +} + +void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq) +{ + if (!pq) + return; + + pq->sdma_rb_node->refcount--; + if (pq->sdma_rb_node->refcount == 0) { + rb_erase(&pq->sdma_rb_node->node, &qib_user_sdma_rb_root); + kfree(pq->sdma_rb_node); + } + dma_pool_destroy(pq->header_cache); + kmem_cache_destroy(pq->pkt_slab); + kfree(pq); +} + +/* clean descriptor queue, returns > 0 if some elements cleaned */ +static int qib_user_sdma_hwqueue_clean(struct qib_pportdata *ppd) +{ + int ret; + unsigned long flags; + + spin_lock_irqsave(&ppd->sdma_lock, flags); + ret = qib_sdma_make_progress(ppd); + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + + return ret; +} + +/* we're in close, drain packets so that we can cleanup successfully... */ +void qib_user_sdma_queue_drain(struct qib_pportdata *ppd, + struct qib_user_sdma_queue *pq) +{ + struct qib_devdata *dd = ppd->dd; + unsigned long flags; + int i; + + if (!pq) + return; + + for (i = 0; i < QIB_USER_SDMA_DRAIN_TIMEOUT; i++) { + mutex_lock(&pq->lock); + if (!pq->num_pending && !pq->num_sending) { + mutex_unlock(&pq->lock); + break; + } + qib_user_sdma_hwqueue_clean(ppd); + qib_user_sdma_queue_clean(ppd, pq); + mutex_unlock(&pq->lock); + msleep(10); + } + + if (pq->num_pending || pq->num_sending) { + struct qib_user_sdma_pkt *pkt; + struct qib_user_sdma_pkt *pkt_prev; + struct list_head free_list; + + mutex_lock(&pq->lock); + spin_lock_irqsave(&ppd->sdma_lock, flags); + /* + * Since we hold sdma_lock, it is safe without sent_lock. + */ + if (pq->num_pending) { + list_for_each_entry_safe(pkt, pkt_prev, + &ppd->sdma_userpending, list) { + if (pkt->pq == pq) { + list_move_tail(&pkt->list, &pq->sent); + pq->num_pending--; + pq->num_sending++; + } + } + } + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + + qib_dev_err(dd, "user sdma lists not empty: forcing!\n"); + INIT_LIST_HEAD(&free_list); + list_splice_init(&pq->sent, &free_list); + pq->num_sending = 0; + qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list); + mutex_unlock(&pq->lock); + } +} + +static inline __le64 qib_sdma_make_desc0(u8 gen, + u64 addr, u64 dwlen, u64 dwoffset) +{ + return cpu_to_le64(/* SDmaPhyAddr[31:0] */ + ((addr & 0xfffffffcULL) << 32) | + /* SDmaGeneration[1:0] */ + ((gen & 3ULL) << 30) | + /* SDmaDwordCount[10:0] */ + ((dwlen & 0x7ffULL) << 16) | + /* SDmaBufOffset[12:2] */ + (dwoffset & 0x7ffULL)); +} + +static inline __le64 qib_sdma_make_first_desc0(__le64 descq) +{ + return descq | cpu_to_le64(1ULL << 12); +} + +static inline __le64 qib_sdma_make_last_desc0(__le64 descq) +{ + /* last */ /* dma head */ + return descq | cpu_to_le64(1ULL << 11 | 1ULL << 13); +} + +static inline __le64 qib_sdma_make_desc1(u64 addr) +{ + /* SDmaPhyAddr[47:32] */ + return cpu_to_le64(addr >> 32); +} + +static void qib_user_sdma_send_frag(struct qib_pportdata *ppd, + struct qib_user_sdma_pkt *pkt, int idx, + unsigned ofs, u16 tail, u8 gen) +{ + const u64 addr = (u64) pkt->addr[idx].addr + + (u64) pkt->addr[idx].offset; + const u64 dwlen = (u64) pkt->addr[idx].length / 4; + __le64 *descqp; + __le64 descq0; + + descqp = &ppd->sdma_descq[tail].qw[0]; + + descq0 = qib_sdma_make_desc0(gen, addr, dwlen, ofs); + if (pkt->addr[idx].first_desc) + descq0 = qib_sdma_make_first_desc0(descq0); + if (pkt->addr[idx].last_desc) { + descq0 = qib_sdma_make_last_desc0(descq0); + if (ppd->sdma_intrequest) { + descq0 |= cpu_to_le64(1ULL << 15); + ppd->sdma_intrequest = 0; + } + } + + descqp[0] = descq0; + descqp[1] = qib_sdma_make_desc1(addr); +} + +void qib_user_sdma_send_desc(struct qib_pportdata *ppd, + struct list_head *pktlist) +{ + struct qib_devdata *dd = ppd->dd; + u16 nfree, nsent; + u16 tail, tail_c; + u8 gen, gen_c; + + nfree = qib_sdma_descq_freecnt(ppd); + if (!nfree) + return; + +retry: + nsent = 0; + tail_c = tail = ppd->sdma_descq_tail; + gen_c = gen = ppd->sdma_generation; + while (!list_empty(pktlist)) { + struct qib_user_sdma_pkt *pkt = + list_entry(pktlist->next, struct qib_user_sdma_pkt, + list); + int i, j, c = 0; + unsigned ofs = 0; + u16 dtail = tail; + + for (i = pkt->index; i < pkt->naddr && nfree; i++) { + qib_user_sdma_send_frag(ppd, pkt, i, ofs, tail, gen); + ofs += pkt->addr[i].length >> 2; + + if (++tail == ppd->sdma_descq_cnt) { + tail = 0; + ++gen; + ppd->sdma_intrequest = 1; + } else if (tail == (ppd->sdma_descq_cnt>>1)) { + ppd->sdma_intrequest = 1; + } + nfree--; + if (pkt->addr[i].last_desc == 0) + continue; + + /* + * If the packet is >= 2KB mtu equivalent, we + * have to use the large buffers, and have to + * mark each descriptor as part of a large + * buffer packet. + */ + if (ofs > dd->piosize2kmax_dwords) { + for (j = pkt->index; j <= i; j++) { + ppd->sdma_descq[dtail].qw[0] |= + cpu_to_le64(1ULL << 14); + if (++dtail == ppd->sdma_descq_cnt) + dtail = 0; + } + } + c += i + 1 - pkt->index; + pkt->index = i + 1; /* index for next first */ + tail_c = dtail = tail; + gen_c = gen; + ofs = 0; /* reset for next packet */ + } + + ppd->sdma_descq_added += c; + nsent += c; + if (pkt->index == pkt->naddr) { + pkt->added = ppd->sdma_descq_added; + pkt->pq->added = pkt->added; + pkt->pq->num_pending--; + spin_lock(&pkt->pq->sent_lock); + pkt->pq->num_sending++; + list_move_tail(&pkt->list, &pkt->pq->sent); + spin_unlock(&pkt->pq->sent_lock); + } + if (!nfree || (nsent<<2) > ppd->sdma_descq_cnt) + break; + } + + /* advance the tail on the chip if necessary */ + if (ppd->sdma_descq_tail != tail_c) { + ppd->sdma_generation = gen_c; + dd->f_sdma_update_tail(ppd, tail_c); + } + + if (nfree && !list_empty(pktlist)) + goto retry; + + return; +} + +/* pq->lock must be held, get packets on the wire... */ +static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd, + struct qib_user_sdma_queue *pq, + struct list_head *pktlist, int count) +{ + unsigned long flags; + + if (unlikely(!(ppd->lflags & QIBL_LINKACTIVE))) + return -ECOMM; + + /* non-blocking mode */ + if (pq->sdma_rb_node->refcount > 1) { + spin_lock_irqsave(&ppd->sdma_lock, flags); + if (unlikely(!__qib_sdma_running(ppd))) { + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + return -ECOMM; + } + pq->num_pending += count; + list_splice_tail_init(pktlist, &ppd->sdma_userpending); + qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending); + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + return 0; + } + + /* In this case, descriptors from this process are not + * linked to ppd pending queue, interrupt handler + * won't update this process, it is OK to directly + * modify without sdma lock. + */ + + + pq->num_pending += count; + /* + * Blocking mode for single rail process, we must + * release/regain sdma_lock to give other process + * chance to make progress. This is important for + * performance. + */ + do { + spin_lock_irqsave(&ppd->sdma_lock, flags); + if (unlikely(!__qib_sdma_running(ppd))) { + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + return -ECOMM; + } + qib_user_sdma_send_desc(ppd, pktlist); + if (!list_empty(pktlist)) + qib_sdma_make_progress(ppd); + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + } while (!list_empty(pktlist)); + + return 0; +} + +int qib_user_sdma_writev(struct qib_ctxtdata *rcd, + struct qib_user_sdma_queue *pq, + const struct iovec *iov, + unsigned long dim) +{ + struct qib_devdata *dd = rcd->dd; + struct qib_pportdata *ppd = rcd->ppd; + int ret = 0; + struct list_head list; + int npkts = 0; + + INIT_LIST_HEAD(&list); + + mutex_lock(&pq->lock); + + /* why not -ECOMM like qib_user_sdma_push_pkts() below? */ + if (!qib_sdma_running(ppd)) + goto done_unlock; + + /* if I have packets not complete yet */ + if (pq->added > ppd->sdma_descq_removed) + qib_user_sdma_hwqueue_clean(ppd); + /* if I have complete packets to be freed */ + if (pq->num_sending) + qib_user_sdma_queue_clean(ppd, pq); + + while (dim) { + int mxp = 1; + int ndesc = 0; + + ret = qib_user_sdma_queue_pkts(dd, ppd, pq, + iov, dim, &list, &mxp, &ndesc); + if (ret < 0) + goto done_unlock; + else { + dim -= ret; + iov += ret; + } + + /* force packets onto the sdma hw queue... */ + if (!list_empty(&list)) { + /* + * Lazily clean hw queue. + */ + if (qib_sdma_descq_freecnt(ppd) < ndesc) { + qib_user_sdma_hwqueue_clean(ppd); + if (pq->num_sending) + qib_user_sdma_queue_clean(ppd, pq); + } + + ret = qib_user_sdma_push_pkts(ppd, pq, &list, mxp); + if (ret < 0) + goto done_unlock; + else { + npkts += mxp; + pq->counter += mxp; + } + } + } + +done_unlock: + if (!list_empty(&list)) + qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &list); + mutex_unlock(&pq->lock); + + return (ret < 0) ? ret : npkts; +} + +int qib_user_sdma_make_progress(struct qib_pportdata *ppd, + struct qib_user_sdma_queue *pq) +{ + int ret = 0; + + mutex_lock(&pq->lock); + qib_user_sdma_hwqueue_clean(ppd); + ret = qib_user_sdma_queue_clean(ppd, pq); + mutex_unlock(&pq->lock); + + return ret; +} + +u32 qib_user_sdma_complete_counter(const struct qib_user_sdma_queue *pq) +{ + return pq ? pq->sent_counter : 0; +} + +u32 qib_user_sdma_inflight_counter(struct qib_user_sdma_queue *pq) +{ + return pq ? pq->counter : 0; +} diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.h b/drivers/infiniband/hw/qib/qib_user_sdma.h new file mode 100644 index 00000000000..ce8cbaf6a5c --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_user_sdma.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/device.h> + +struct qib_user_sdma_queue; + +struct qib_user_sdma_queue * +qib_user_sdma_queue_create(struct device *dev, int unit, int port, int sport); +void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq); + +int qib_user_sdma_writev(struct qib_ctxtdata *pd, + struct qib_user_sdma_queue *pq, + const struct iovec *iov, + unsigned long dim); + +int qib_user_sdma_make_progress(struct qib_pportdata *ppd, + struct qib_user_sdma_queue *pq); + +void qib_user_sdma_queue_drain(struct qib_pportdata *ppd, + struct qib_user_sdma_queue *pq); + +u32 qib_user_sdma_complete_counter(const struct qib_user_sdma_queue *pq); +u32 qib_user_sdma_inflight_counter(struct qib_user_sdma_queue *pq); diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c new file mode 100644 index 00000000000..9bcfbd84298 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -0,0 +1,2336 @@ +/* + * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <rdma/ib_mad.h> +#include <rdma/ib_user_verbs.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/utsname.h> +#include <linux/rculist.h> +#include <linux/mm.h> +#include <linux/random.h> + +#include "qib.h" +#include "qib_common.h" + +static unsigned int ib_qib_qp_table_size = 256; +module_param_named(qp_table_size, ib_qib_qp_table_size, uint, S_IRUGO); +MODULE_PARM_DESC(qp_table_size, "QP table size"); + +unsigned int ib_qib_lkey_table_size = 16; +module_param_named(lkey_table_size, ib_qib_lkey_table_size, uint, + S_IRUGO); +MODULE_PARM_DESC(lkey_table_size, + "LKEY table size in bits (2^n, 1 <= n <= 23)"); + +static unsigned int ib_qib_max_pds = 0xFFFF; +module_param_named(max_pds, ib_qib_max_pds, uint, S_IRUGO); +MODULE_PARM_DESC(max_pds, + "Maximum number of protection domains to support"); + +static unsigned int ib_qib_max_ahs = 0xFFFF; +module_param_named(max_ahs, ib_qib_max_ahs, uint, S_IRUGO); +MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support"); + +unsigned int ib_qib_max_cqes = 0x2FFFF; +module_param_named(max_cqes, ib_qib_max_cqes, uint, S_IRUGO); +MODULE_PARM_DESC(max_cqes, + "Maximum number of completion queue entries to support"); + +unsigned int ib_qib_max_cqs = 0x1FFFF; +module_param_named(max_cqs, ib_qib_max_cqs, uint, S_IRUGO); +MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support"); + +unsigned int ib_qib_max_qp_wrs = 0x3FFF; +module_param_named(max_qp_wrs, ib_qib_max_qp_wrs, uint, S_IRUGO); +MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support"); + +unsigned int ib_qib_max_qps = 16384; +module_param_named(max_qps, ib_qib_max_qps, uint, S_IRUGO); +MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support"); + +unsigned int ib_qib_max_sges = 0x60; +module_param_named(max_sges, ib_qib_max_sges, uint, S_IRUGO); +MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support"); + +unsigned int ib_qib_max_mcast_grps = 16384; +module_param_named(max_mcast_grps, ib_qib_max_mcast_grps, uint, S_IRUGO); +MODULE_PARM_DESC(max_mcast_grps, + "Maximum number of multicast groups to support"); + +unsigned int ib_qib_max_mcast_qp_attached = 16; +module_param_named(max_mcast_qp_attached, ib_qib_max_mcast_qp_attached, + uint, S_IRUGO); +MODULE_PARM_DESC(max_mcast_qp_attached, + "Maximum number of attached QPs to support"); + +unsigned int ib_qib_max_srqs = 1024; +module_param_named(max_srqs, ib_qib_max_srqs, uint, S_IRUGO); +MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support"); + +unsigned int ib_qib_max_srq_sges = 128; +module_param_named(max_srq_sges, ib_qib_max_srq_sges, uint, S_IRUGO); +MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support"); + +unsigned int ib_qib_max_srq_wrs = 0x1FFFF; +module_param_named(max_srq_wrs, ib_qib_max_srq_wrs, uint, S_IRUGO); +MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support"); + +static unsigned int ib_qib_disable_sma; +module_param_named(disable_sma, ib_qib_disable_sma, uint, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(disable_sma, "Disable the SMA"); + +/* + * Note that it is OK to post send work requests in the SQE and ERR + * states; qib_do_send() will process them and generate error + * completions as per IB 1.2 C10-96. + */ +const int ib_qib_state_ops[IB_QPS_ERR + 1] = { + [IB_QPS_RESET] = 0, + [IB_QPS_INIT] = QIB_POST_RECV_OK, + [IB_QPS_RTR] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK, + [IB_QPS_RTS] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK | + QIB_POST_SEND_OK | QIB_PROCESS_SEND_OK | + QIB_PROCESS_NEXT_SEND_OK, + [IB_QPS_SQD] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK | + QIB_POST_SEND_OK | QIB_PROCESS_SEND_OK, + [IB_QPS_SQE] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK | + QIB_POST_SEND_OK | QIB_FLUSH_SEND, + [IB_QPS_ERR] = QIB_POST_RECV_OK | QIB_FLUSH_RECV | + QIB_POST_SEND_OK | QIB_FLUSH_SEND, +}; + +struct qib_ucontext { + struct ib_ucontext ibucontext; +}; + +static inline struct qib_ucontext *to_iucontext(struct ib_ucontext + *ibucontext) +{ + return container_of(ibucontext, struct qib_ucontext, ibucontext); +} + +/* + * Translate ib_wr_opcode into ib_wc_opcode. + */ +const enum ib_wc_opcode ib_qib_wc_opcode[] = { + [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE, + [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, + [IB_WR_SEND] = IB_WC_SEND, + [IB_WR_SEND_WITH_IMM] = IB_WC_SEND, + [IB_WR_RDMA_READ] = IB_WC_RDMA_READ, + [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, + [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD +}; + +/* + * System image GUID. + */ +__be64 ib_qib_sys_image_guid; + +/** + * qib_copy_sge - copy data to SGE memory + * @ss: the SGE state + * @data: the data to copy + * @length: the length of the data + */ +void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length, int release) +{ + struct qib_sge *sge = &ss->sge; + + while (length) { + u32 len = sge->length; + + if (len > length) + len = length; + if (len > sge->sge_length) + len = sge->sge_length; + BUG_ON(len == 0); + memcpy(sge->vaddr, data, len); + sge->vaddr += len; + sge->length -= len; + sge->sge_length -= len; + if (sge->sge_length == 0) { + if (release) + qib_put_mr(sge->mr); + if (--ss->num_sge) + *sge = *ss->sg_list++; + } else if (sge->length == 0 && sge->mr->lkey) { + if (++sge->n >= QIB_SEGSZ) { + if (++sge->m >= sge->mr->mapsz) + break; + sge->n = 0; + } + sge->vaddr = + sge->mr->map[sge->m]->segs[sge->n].vaddr; + sge->length = + sge->mr->map[sge->m]->segs[sge->n].length; + } + data += len; + length -= len; + } +} + +/** + * qib_skip_sge - skip over SGE memory - XXX almost dup of prev func + * @ss: the SGE state + * @length: the number of bytes to skip + */ +void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release) +{ + struct qib_sge *sge = &ss->sge; + + while (length) { + u32 len = sge->length; + + if (len > length) + len = length; + if (len > sge->sge_length) + len = sge->sge_length; + BUG_ON(len == 0); + sge->vaddr += len; + sge->length -= len; + sge->sge_length -= len; + if (sge->sge_length == 0) { + if (release) + qib_put_mr(sge->mr); + if (--ss->num_sge) + *sge = *ss->sg_list++; + } else if (sge->length == 0 && sge->mr->lkey) { + if (++sge->n >= QIB_SEGSZ) { + if (++sge->m >= sge->mr->mapsz) + break; + sge->n = 0; + } + sge->vaddr = + sge->mr->map[sge->m]->segs[sge->n].vaddr; + sge->length = + sge->mr->map[sge->m]->segs[sge->n].length; + } + length -= len; + } +} + +/* + * Count the number of DMA descriptors needed to send length bytes of data. + * Don't modify the qib_sge_state to get the count. + * Return zero if any of the segments is not aligned. + */ +static u32 qib_count_sge(struct qib_sge_state *ss, u32 length) +{ + struct qib_sge *sg_list = ss->sg_list; + struct qib_sge sge = ss->sge; + u8 num_sge = ss->num_sge; + u32 ndesc = 1; /* count the header */ + + while (length) { + u32 len = sge.length; + + if (len > length) + len = length; + if (len > sge.sge_length) + len = sge.sge_length; + BUG_ON(len == 0); + if (((long) sge.vaddr & (sizeof(u32) - 1)) || + (len != length && (len & (sizeof(u32) - 1)))) { + ndesc = 0; + break; + } + ndesc++; + sge.vaddr += len; + sge.length -= len; + sge.sge_length -= len; + if (sge.sge_length == 0) { + if (--num_sge) + sge = *sg_list++; + } else if (sge.length == 0 && sge.mr->lkey) { + if (++sge.n >= QIB_SEGSZ) { + if (++sge.m >= sge.mr->mapsz) + break; + sge.n = 0; + } + sge.vaddr = + sge.mr->map[sge.m]->segs[sge.n].vaddr; + sge.length = + sge.mr->map[sge.m]->segs[sge.n].length; + } + length -= len; + } + return ndesc; +} + +/* + * Copy from the SGEs to the data buffer. + */ +static void qib_copy_from_sge(void *data, struct qib_sge_state *ss, u32 length) +{ + struct qib_sge *sge = &ss->sge; + + while (length) { + u32 len = sge->length; + + if (len > length) + len = length; + if (len > sge->sge_length) + len = sge->sge_length; + BUG_ON(len == 0); + memcpy(data, sge->vaddr, len); + sge->vaddr += len; + sge->length -= len; + sge->sge_length -= len; + if (sge->sge_length == 0) { + if (--ss->num_sge) + *sge = *ss->sg_list++; + } else if (sge->length == 0 && sge->mr->lkey) { + if (++sge->n >= QIB_SEGSZ) { + if (++sge->m >= sge->mr->mapsz) + break; + sge->n = 0; + } + sge->vaddr = + sge->mr->map[sge->m]->segs[sge->n].vaddr; + sge->length = + sge->mr->map[sge->m]->segs[sge->n].length; + } + data += len; + length -= len; + } +} + +/** + * qib_post_one_send - post one RC, UC, or UD send work request + * @qp: the QP to post on + * @wr: the work request to send + */ +static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr, + int *scheduled) +{ + struct qib_swqe *wqe; + u32 next; + int i; + int j; + int acc; + int ret; + unsigned long flags; + struct qib_lkey_table *rkt; + struct qib_pd *pd; + + spin_lock_irqsave(&qp->s_lock, flags); + + /* Check that state is OK to post send. */ + if (unlikely(!(ib_qib_state_ops[qp->state] & QIB_POST_SEND_OK))) + goto bail_inval; + + /* IB spec says that num_sge == 0 is OK. */ + if (wr->num_sge > qp->s_max_sge) + goto bail_inval; + + /* + * Don't allow RDMA reads or atomic operations on UC or + * undefined operations. + * Make sure buffer is large enough to hold the result for atomics. + */ + if (wr->opcode == IB_WR_FAST_REG_MR) { + if (qib_fast_reg_mr(qp, wr)) + goto bail_inval; + } else if (qp->ibqp.qp_type == IB_QPT_UC) { + if ((unsigned) wr->opcode >= IB_WR_RDMA_READ) + goto bail_inval; + } else if (qp->ibqp.qp_type != IB_QPT_RC) { + /* Check IB_QPT_SMI, IB_QPT_GSI, IB_QPT_UD opcode */ + if (wr->opcode != IB_WR_SEND && + wr->opcode != IB_WR_SEND_WITH_IMM) + goto bail_inval; + /* Check UD destination address PD */ + if (qp->ibqp.pd != wr->wr.ud.ah->pd) + goto bail_inval; + } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) + goto bail_inval; + else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP && + (wr->num_sge == 0 || + wr->sg_list[0].length < sizeof(u64) || + wr->sg_list[0].addr & (sizeof(u64) - 1))) + goto bail_inval; + else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) + goto bail_inval; + + next = qp->s_head + 1; + if (next >= qp->s_size) + next = 0; + if (next == qp->s_last) { + ret = -ENOMEM; + goto bail; + } + + rkt = &to_idev(qp->ibqp.device)->lk_table; + pd = to_ipd(qp->ibqp.pd); + wqe = get_swqe_ptr(qp, qp->s_head); + wqe->wr = *wr; + wqe->length = 0; + j = 0; + if (wr->num_sge) { + acc = wr->opcode >= IB_WR_RDMA_READ ? + IB_ACCESS_LOCAL_WRITE : 0; + for (i = 0; i < wr->num_sge; i++) { + u32 length = wr->sg_list[i].length; + int ok; + + if (length == 0) + continue; + ok = qib_lkey_ok(rkt, pd, &wqe->sg_list[j], + &wr->sg_list[i], acc); + if (!ok) + goto bail_inval_free; + wqe->length += length; + j++; + } + wqe->wr.num_sge = j; + } + if (qp->ibqp.qp_type == IB_QPT_UC || + qp->ibqp.qp_type == IB_QPT_RC) { + if (wqe->length > 0x80000000U) + goto bail_inval_free; + } else if (wqe->length > (dd_from_ibdev(qp->ibqp.device)->pport + + qp->port_num - 1)->ibmtu) + goto bail_inval_free; + else + atomic_inc(&to_iah(wr->wr.ud.ah)->refcount); + wqe->ssn = qp->s_ssn++; + qp->s_head = next; + + ret = 0; + goto bail; + +bail_inval_free: + while (j) { + struct qib_sge *sge = &wqe->sg_list[--j]; + + qib_put_mr(sge->mr); + } +bail_inval: + ret = -EINVAL; +bail: + if (!ret && !wr->next && + !qib_sdma_empty( + dd_from_ibdev(qp->ibqp.device)->pport + qp->port_num - 1)) { + qib_schedule_send(qp); + *scheduled = 1; + } + spin_unlock_irqrestore(&qp->s_lock, flags); + return ret; +} + +/** + * qib_post_send - post a send on a QP + * @ibqp: the QP to post the send on + * @wr: the list of work requests to post + * @bad_wr: the first bad WR is put here + * + * This may be called from interrupt context. + */ +static int qib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + struct qib_qp *qp = to_iqp(ibqp); + int err = 0; + int scheduled = 0; + + for (; wr; wr = wr->next) { + err = qib_post_one_send(qp, wr, &scheduled); + if (err) { + *bad_wr = wr; + goto bail; + } + } + + /* Try to do the send work in the caller's context. */ + if (!scheduled) + qib_do_send(&qp->s_work); + +bail: + return err; +} + +/** + * qib_post_receive - post a receive on a QP + * @ibqp: the QP to post the receive on + * @wr: the WR to post + * @bad_wr: the first bad WR is put here + * + * This may be called from interrupt context. + */ +static int qib_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct qib_qp *qp = to_iqp(ibqp); + struct qib_rwq *wq = qp->r_rq.wq; + unsigned long flags; + int ret; + + /* Check that state is OK to post receive. */ + if (!(ib_qib_state_ops[qp->state] & QIB_POST_RECV_OK) || !wq) { + *bad_wr = wr; + ret = -EINVAL; + goto bail; + } + + for (; wr; wr = wr->next) { + struct qib_rwqe *wqe; + u32 next; + int i; + + if ((unsigned) wr->num_sge > qp->r_rq.max_sge) { + *bad_wr = wr; + ret = -EINVAL; + goto bail; + } + + spin_lock_irqsave(&qp->r_rq.lock, flags); + next = wq->head + 1; + if (next >= qp->r_rq.size) + next = 0; + if (next == wq->tail) { + spin_unlock_irqrestore(&qp->r_rq.lock, flags); + *bad_wr = wr; + ret = -ENOMEM; + goto bail; + } + + wqe = get_rwqe_ptr(&qp->r_rq, wq->head); + wqe->wr_id = wr->wr_id; + wqe->num_sge = wr->num_sge; + for (i = 0; i < wr->num_sge; i++) + wqe->sg_list[i] = wr->sg_list[i]; + /* Make sure queue entry is written before the head index. */ + smp_wmb(); + wq->head = next; + spin_unlock_irqrestore(&qp->r_rq.lock, flags); + } + ret = 0; + +bail: + return ret; +} + +/** + * qib_qp_rcv - processing an incoming packet on a QP + * @rcd: the context pointer + * @hdr: the packet header + * @has_grh: true if the packet has a GRH + * @data: the packet data + * @tlen: the packet length + * @qp: the QP the packet came on + * + * This is called from qib_ib_rcv() to process an incoming packet + * for the given QP. + * Called at interrupt level. + */ +static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, + int has_grh, void *data, u32 tlen, struct qib_qp *qp) +{ + struct qib_ibport *ibp = &rcd->ppd->ibport_data; + + spin_lock(&qp->r_lock); + + /* Check for valid receive state. */ + if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) { + ibp->n_pkt_drops++; + goto unlock; + } + + switch (qp->ibqp.qp_type) { + case IB_QPT_SMI: + case IB_QPT_GSI: + if (ib_qib_disable_sma) + break; + /* FALLTHROUGH */ + case IB_QPT_UD: + qib_ud_rcv(ibp, hdr, has_grh, data, tlen, qp); + break; + + case IB_QPT_RC: + qib_rc_rcv(rcd, hdr, has_grh, data, tlen, qp); + break; + + case IB_QPT_UC: + qib_uc_rcv(ibp, hdr, has_grh, data, tlen, qp); + break; + + default: + break; + } + +unlock: + spin_unlock(&qp->r_lock); +} + +/** + * qib_ib_rcv - process an incoming packet + * @rcd: the context pointer + * @rhdr: the header of the packet + * @data: the packet payload + * @tlen: the packet length + * + * This is called from qib_kreceive() to process an incoming packet at + * interrupt level. Tlen is the length of the header + data + CRC in bytes. + */ +void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) +{ + struct qib_pportdata *ppd = rcd->ppd; + struct qib_ibport *ibp = &ppd->ibport_data; + struct qib_ib_header *hdr = rhdr; + struct qib_other_headers *ohdr; + struct qib_qp *qp; + u32 qp_num; + int lnh; + u8 opcode; + u16 lid; + + /* 24 == LRH+BTH+CRC */ + if (unlikely(tlen < 24)) + goto drop; + + /* Check for a valid destination LID (see ch. 7.11.1). */ + lid = be16_to_cpu(hdr->lrh[1]); + if (lid < QIB_MULTICAST_LID_BASE) { + lid &= ~((1 << ppd->lmc) - 1); + if (unlikely(lid != ppd->lid)) + goto drop; + } + + /* Check for GRH */ + lnh = be16_to_cpu(hdr->lrh[0]) & 3; + if (lnh == QIB_LRH_BTH) + ohdr = &hdr->u.oth; + else if (lnh == QIB_LRH_GRH) { + u32 vtf; + + ohdr = &hdr->u.l.oth; + if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR) + goto drop; + vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow); + if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) + goto drop; + } else + goto drop; + + opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0x7f; +#ifdef CONFIG_DEBUG_FS + rcd->opstats->stats[opcode].n_bytes += tlen; + rcd->opstats->stats[opcode].n_packets++; +#endif + + /* Get the destination QP number. */ + qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK; + if (qp_num == QIB_MULTICAST_QPN) { + struct qib_mcast *mcast; + struct qib_mcast_qp *p; + + if (lnh != QIB_LRH_GRH) + goto drop; + mcast = qib_mcast_find(ibp, &hdr->u.l.grh.dgid); + if (mcast == NULL) + goto drop; + this_cpu_inc(ibp->pmastats->n_multicast_rcv); + list_for_each_entry_rcu(p, &mcast->qp_list, list) + qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp); + /* + * Notify qib_multicast_detach() if it is waiting for us + * to finish. + */ + if (atomic_dec_return(&mcast->refcount) <= 1) + wake_up(&mcast->wait); + } else { + if (rcd->lookaside_qp) { + if (rcd->lookaside_qpn != qp_num) { + if (atomic_dec_and_test( + &rcd->lookaside_qp->refcount)) + wake_up( + &rcd->lookaside_qp->wait); + rcd->lookaside_qp = NULL; + } + } + if (!rcd->lookaside_qp) { + qp = qib_lookup_qpn(ibp, qp_num); + if (!qp) + goto drop; + rcd->lookaside_qp = qp; + rcd->lookaside_qpn = qp_num; + } else + qp = rcd->lookaside_qp; + this_cpu_inc(ibp->pmastats->n_unicast_rcv); + qib_qp_rcv(rcd, hdr, lnh == QIB_LRH_GRH, data, tlen, qp); + } + return; + +drop: + ibp->n_pkt_drops++; +} + +/* + * This is called from a timer to check for QPs + * which need kernel memory in order to send a packet. + */ +static void mem_timer(unsigned long data) +{ + struct qib_ibdev *dev = (struct qib_ibdev *) data; + struct list_head *list = &dev->memwait; + struct qib_qp *qp = NULL; + unsigned long flags; + + spin_lock_irqsave(&dev->pending_lock, flags); + if (!list_empty(list)) { + qp = list_entry(list->next, struct qib_qp, iowait); + list_del_init(&qp->iowait); + atomic_inc(&qp->refcount); + if (!list_empty(list)) + mod_timer(&dev->mem_timer, jiffies + 1); + } + spin_unlock_irqrestore(&dev->pending_lock, flags); + + if (qp) { + spin_lock_irqsave(&qp->s_lock, flags); + if (qp->s_flags & QIB_S_WAIT_KMEM) { + qp->s_flags &= ~QIB_S_WAIT_KMEM; + qib_schedule_send(qp); + } + spin_unlock_irqrestore(&qp->s_lock, flags); + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + } +} + +static void update_sge(struct qib_sge_state *ss, u32 length) +{ + struct qib_sge *sge = &ss->sge; + + sge->vaddr += length; + sge->length -= length; + sge->sge_length -= length; + if (sge->sge_length == 0) { + if (--ss->num_sge) + *sge = *ss->sg_list++; + } else if (sge->length == 0 && sge->mr->lkey) { + if (++sge->n >= QIB_SEGSZ) { + if (++sge->m >= sge->mr->mapsz) + return; + sge->n = 0; + } + sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr; + sge->length = sge->mr->map[sge->m]->segs[sge->n].length; + } +} + +#ifdef __LITTLE_ENDIAN +static inline u32 get_upper_bits(u32 data, u32 shift) +{ + return data >> shift; +} + +static inline u32 set_upper_bits(u32 data, u32 shift) +{ + return data << shift; +} + +static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off) +{ + data <<= ((sizeof(u32) - n) * BITS_PER_BYTE); + data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE); + return data; +} +#else +static inline u32 get_upper_bits(u32 data, u32 shift) +{ + return data << shift; +} + +static inline u32 set_upper_bits(u32 data, u32 shift) +{ + return data >> shift; +} + +static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off) +{ + data >>= ((sizeof(u32) - n) * BITS_PER_BYTE); + data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE); + return data; +} +#endif + +static void copy_io(u32 __iomem *piobuf, struct qib_sge_state *ss, + u32 length, unsigned flush_wc) +{ + u32 extra = 0; + u32 data = 0; + u32 last; + + while (1) { + u32 len = ss->sge.length; + u32 off; + + if (len > length) + len = length; + if (len > ss->sge.sge_length) + len = ss->sge.sge_length; + BUG_ON(len == 0); + /* If the source address is not aligned, try to align it. */ + off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1); + if (off) { + u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr & + ~(sizeof(u32) - 1)); + u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE); + u32 y; + + y = sizeof(u32) - off; + if (len > y) + len = y; + if (len + extra >= sizeof(u32)) { + data |= set_upper_bits(v, extra * + BITS_PER_BYTE); + len = sizeof(u32) - extra; + if (len == length) { + last = data; + break; + } + __raw_writel(data, piobuf); + piobuf++; + extra = 0; + data = 0; + } else { + /* Clear unused upper bytes */ + data |= clear_upper_bytes(v, len, extra); + if (len == length) { + last = data; + break; + } + extra += len; + } + } else if (extra) { + /* Source address is aligned. */ + u32 *addr = (u32 *) ss->sge.vaddr; + int shift = extra * BITS_PER_BYTE; + int ushift = 32 - shift; + u32 l = len; + + while (l >= sizeof(u32)) { + u32 v = *addr; + + data |= set_upper_bits(v, shift); + __raw_writel(data, piobuf); + data = get_upper_bits(v, ushift); + piobuf++; + addr++; + l -= sizeof(u32); + } + /* + * We still have 'extra' number of bytes leftover. + */ + if (l) { + u32 v = *addr; + + if (l + extra >= sizeof(u32)) { + data |= set_upper_bits(v, shift); + len -= l + extra - sizeof(u32); + if (len == length) { + last = data; + break; + } + __raw_writel(data, piobuf); + piobuf++; + extra = 0; + data = 0; + } else { + /* Clear unused upper bytes */ + data |= clear_upper_bytes(v, l, extra); + if (len == length) { + last = data; + break; + } + extra += l; + } + } else if (len == length) { + last = data; + break; + } + } else if (len == length) { + u32 w; + + /* + * Need to round up for the last dword in the + * packet. + */ + w = (len + 3) >> 2; + qib_pio_copy(piobuf, ss->sge.vaddr, w - 1); + piobuf += w - 1; + last = ((u32 *) ss->sge.vaddr)[w - 1]; + break; + } else { + u32 w = len >> 2; + + qib_pio_copy(piobuf, ss->sge.vaddr, w); + piobuf += w; + + extra = len & (sizeof(u32) - 1); + if (extra) { + u32 v = ((u32 *) ss->sge.vaddr)[w]; + + /* Clear unused upper bytes */ + data = clear_upper_bytes(v, extra, 0); + } + } + update_sge(ss, len); + length -= len; + } + /* Update address before sending packet. */ + update_sge(ss, length); + if (flush_wc) { + /* must flush early everything before trigger word */ + qib_flush_wc(); + __raw_writel(last, piobuf); + /* be sure trigger word is written */ + qib_flush_wc(); + } else + __raw_writel(last, piobuf); +} + +static noinline struct qib_verbs_txreq *__get_txreq(struct qib_ibdev *dev, + struct qib_qp *qp) +{ + struct qib_verbs_txreq *tx; + unsigned long flags; + + spin_lock_irqsave(&qp->s_lock, flags); + spin_lock(&dev->pending_lock); + + if (!list_empty(&dev->txreq_free)) { + struct list_head *l = dev->txreq_free.next; + + list_del(l); + spin_unlock(&dev->pending_lock); + spin_unlock_irqrestore(&qp->s_lock, flags); + tx = list_entry(l, struct qib_verbs_txreq, txreq.list); + } else { + if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK && + list_empty(&qp->iowait)) { + dev->n_txwait++; + qp->s_flags |= QIB_S_WAIT_TX; + list_add_tail(&qp->iowait, &dev->txwait); + } + qp->s_flags &= ~QIB_S_BUSY; + spin_unlock(&dev->pending_lock); + spin_unlock_irqrestore(&qp->s_lock, flags); + tx = ERR_PTR(-EBUSY); + } + return tx; +} + +static inline struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev, + struct qib_qp *qp) +{ + struct qib_verbs_txreq *tx; + unsigned long flags; + + spin_lock_irqsave(&dev->pending_lock, flags); + /* assume the list non empty */ + if (likely(!list_empty(&dev->txreq_free))) { + struct list_head *l = dev->txreq_free.next; + + list_del(l); + spin_unlock_irqrestore(&dev->pending_lock, flags); + tx = list_entry(l, struct qib_verbs_txreq, txreq.list); + } else { + /* call slow path to get the extra lock */ + spin_unlock_irqrestore(&dev->pending_lock, flags); + tx = __get_txreq(dev, qp); + } + return tx; +} + +void qib_put_txreq(struct qib_verbs_txreq *tx) +{ + struct qib_ibdev *dev; + struct qib_qp *qp; + unsigned long flags; + + qp = tx->qp; + dev = to_idev(qp->ibqp.device); + + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + if (tx->mr) { + qib_put_mr(tx->mr); + tx->mr = NULL; + } + if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) { + tx->txreq.flags &= ~QIB_SDMA_TXREQ_F_FREEBUF; + dma_unmap_single(&dd_from_dev(dev)->pcidev->dev, + tx->txreq.addr, tx->hdr_dwords << 2, + DMA_TO_DEVICE); + kfree(tx->align_buf); + } + + spin_lock_irqsave(&dev->pending_lock, flags); + + /* Put struct back on free list */ + list_add(&tx->txreq.list, &dev->txreq_free); + + if (!list_empty(&dev->txwait)) { + /* Wake up first QP wanting a free struct */ + qp = list_entry(dev->txwait.next, struct qib_qp, iowait); + list_del_init(&qp->iowait); + atomic_inc(&qp->refcount); + spin_unlock_irqrestore(&dev->pending_lock, flags); + + spin_lock_irqsave(&qp->s_lock, flags); + if (qp->s_flags & QIB_S_WAIT_TX) { + qp->s_flags &= ~QIB_S_WAIT_TX; + qib_schedule_send(qp); + } + spin_unlock_irqrestore(&qp->s_lock, flags); + + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + } else + spin_unlock_irqrestore(&dev->pending_lock, flags); +} + +/* + * This is called when there are send DMA descriptors that might be + * available. + * + * This is called with ppd->sdma_lock held. + */ +void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail) +{ + struct qib_qp *qp, *nqp; + struct qib_qp *qps[20]; + struct qib_ibdev *dev; + unsigned i, n; + + n = 0; + dev = &ppd->dd->verbs_dev; + spin_lock(&dev->pending_lock); + + /* Search wait list for first QP wanting DMA descriptors. */ + list_for_each_entry_safe(qp, nqp, &dev->dmawait, iowait) { + if (qp->port_num != ppd->port) + continue; + if (n == ARRAY_SIZE(qps)) + break; + if (qp->s_tx->txreq.sg_count > avail) + break; + avail -= qp->s_tx->txreq.sg_count; + list_del_init(&qp->iowait); + atomic_inc(&qp->refcount); + qps[n++] = qp; + } + + spin_unlock(&dev->pending_lock); + + for (i = 0; i < n; i++) { + qp = qps[i]; + spin_lock(&qp->s_lock); + if (qp->s_flags & QIB_S_WAIT_DMA_DESC) { + qp->s_flags &= ~QIB_S_WAIT_DMA_DESC; + qib_schedule_send(qp); + } + spin_unlock(&qp->s_lock); + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + } +} + +/* + * This is called with ppd->sdma_lock held. + */ +static void sdma_complete(struct qib_sdma_txreq *cookie, int status) +{ + struct qib_verbs_txreq *tx = + container_of(cookie, struct qib_verbs_txreq, txreq); + struct qib_qp *qp = tx->qp; + + spin_lock(&qp->s_lock); + if (tx->wqe) + qib_send_complete(qp, tx->wqe, IB_WC_SUCCESS); + else if (qp->ibqp.qp_type == IB_QPT_RC) { + struct qib_ib_header *hdr; + + if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) + hdr = &tx->align_buf->hdr; + else { + struct qib_ibdev *dev = to_idev(qp->ibqp.device); + + hdr = &dev->pio_hdrs[tx->hdr_inx].hdr; + } + qib_rc_send_complete(qp, hdr); + } + if (atomic_dec_and_test(&qp->s_dma_busy)) { + if (qp->state == IB_QPS_RESET) + wake_up(&qp->wait_dma); + else if (qp->s_flags & QIB_S_WAIT_DMA) { + qp->s_flags &= ~QIB_S_WAIT_DMA; + qib_schedule_send(qp); + } + } + spin_unlock(&qp->s_lock); + + qib_put_txreq(tx); +} + +static int wait_kmem(struct qib_ibdev *dev, struct qib_qp *qp) +{ + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&qp->s_lock, flags); + if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) { + spin_lock(&dev->pending_lock); + if (list_empty(&qp->iowait)) { + if (list_empty(&dev->memwait)) + mod_timer(&dev->mem_timer, jiffies + 1); + qp->s_flags |= QIB_S_WAIT_KMEM; + list_add_tail(&qp->iowait, &dev->memwait); + } + spin_unlock(&dev->pending_lock); + qp->s_flags &= ~QIB_S_BUSY; + ret = -EBUSY; + } + spin_unlock_irqrestore(&qp->s_lock, flags); + + return ret; +} + +static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr, + u32 hdrwords, struct qib_sge_state *ss, u32 len, + u32 plen, u32 dwords) +{ + struct qib_ibdev *dev = to_idev(qp->ibqp.device); + struct qib_devdata *dd = dd_from_dev(dev); + struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + struct qib_verbs_txreq *tx; + struct qib_pio_header *phdr; + u32 control; + u32 ndesc; + int ret; + + tx = qp->s_tx; + if (tx) { + qp->s_tx = NULL; + /* resend previously constructed packet */ + ret = qib_sdma_verbs_send(ppd, tx->ss, tx->dwords, tx); + goto bail; + } + + tx = get_txreq(dev, qp); + if (IS_ERR(tx)) + goto bail_tx; + + control = dd->f_setpbc_control(ppd, plen, qp->s_srate, + be16_to_cpu(hdr->lrh[0]) >> 12); + tx->qp = qp; + atomic_inc(&qp->refcount); + tx->wqe = qp->s_wqe; + tx->mr = qp->s_rdma_mr; + if (qp->s_rdma_mr) + qp->s_rdma_mr = NULL; + tx->txreq.callback = sdma_complete; + if (dd->flags & QIB_HAS_SDMA_TIMEOUT) + tx->txreq.flags = QIB_SDMA_TXREQ_F_HEADTOHOST; + else + tx->txreq.flags = QIB_SDMA_TXREQ_F_INTREQ; + if (plen + 1 > dd->piosize2kmax_dwords) + tx->txreq.flags |= QIB_SDMA_TXREQ_F_USELARGEBUF; + + if (len) { + /* + * Don't try to DMA if it takes more descriptors than + * the queue holds. + */ + ndesc = qib_count_sge(ss, len); + if (ndesc >= ppd->sdma_descq_cnt) + ndesc = 0; + } else + ndesc = 1; + if (ndesc) { + phdr = &dev->pio_hdrs[tx->hdr_inx]; + phdr->pbc[0] = cpu_to_le32(plen); + phdr->pbc[1] = cpu_to_le32(control); + memcpy(&phdr->hdr, hdr, hdrwords << 2); + tx->txreq.flags |= QIB_SDMA_TXREQ_F_FREEDESC; + tx->txreq.sg_count = ndesc; + tx->txreq.addr = dev->pio_hdrs_phys + + tx->hdr_inx * sizeof(struct qib_pio_header); + tx->hdr_dwords = hdrwords + 2; /* add PBC length */ + ret = qib_sdma_verbs_send(ppd, ss, dwords, tx); + goto bail; + } + + /* Allocate a buffer and copy the header and payload to it. */ + tx->hdr_dwords = plen + 1; + phdr = kmalloc(tx->hdr_dwords << 2, GFP_ATOMIC); + if (!phdr) + goto err_tx; + phdr->pbc[0] = cpu_to_le32(plen); + phdr->pbc[1] = cpu_to_le32(control); + memcpy(&phdr->hdr, hdr, hdrwords << 2); + qib_copy_from_sge((u32 *) &phdr->hdr + hdrwords, ss, len); + + tx->txreq.addr = dma_map_single(&dd->pcidev->dev, phdr, + tx->hdr_dwords << 2, DMA_TO_DEVICE); + if (dma_mapping_error(&dd->pcidev->dev, tx->txreq.addr)) + goto map_err; + tx->align_buf = phdr; + tx->txreq.flags |= QIB_SDMA_TXREQ_F_FREEBUF; + tx->txreq.sg_count = 1; + ret = qib_sdma_verbs_send(ppd, NULL, 0, tx); + goto unaligned; + +map_err: + kfree(phdr); +err_tx: + qib_put_txreq(tx); + ret = wait_kmem(dev, qp); +unaligned: + ibp->n_unaligned++; +bail: + return ret; +bail_tx: + ret = PTR_ERR(tx); + goto bail; +} + +/* + * If we are now in the error state, return zero to flush the + * send work request. + */ +static int no_bufs_available(struct qib_qp *qp) +{ + struct qib_ibdev *dev = to_idev(qp->ibqp.device); + struct qib_devdata *dd; + unsigned long flags; + int ret = 0; + + /* + * Note that as soon as want_buffer() is called and + * possibly before it returns, qib_ib_piobufavail() + * could be called. Therefore, put QP on the I/O wait list before + * enabling the PIO avail interrupt. + */ + spin_lock_irqsave(&qp->s_lock, flags); + if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) { + spin_lock(&dev->pending_lock); + if (list_empty(&qp->iowait)) { + dev->n_piowait++; + qp->s_flags |= QIB_S_WAIT_PIO; + list_add_tail(&qp->iowait, &dev->piowait); + dd = dd_from_dev(dev); + dd->f_wantpiobuf_intr(dd, 1); + } + spin_unlock(&dev->pending_lock); + qp->s_flags &= ~QIB_S_BUSY; + ret = -EBUSY; + } + spin_unlock_irqrestore(&qp->s_lock, flags); + return ret; +} + +static int qib_verbs_send_pio(struct qib_qp *qp, struct qib_ib_header *ibhdr, + u32 hdrwords, struct qib_sge_state *ss, u32 len, + u32 plen, u32 dwords) +{ + struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device); + struct qib_pportdata *ppd = dd->pport + qp->port_num - 1; + u32 *hdr = (u32 *) ibhdr; + u32 __iomem *piobuf_orig; + u32 __iomem *piobuf; + u64 pbc; + unsigned long flags; + unsigned flush_wc; + u32 control; + u32 pbufn; + + control = dd->f_setpbc_control(ppd, plen, qp->s_srate, + be16_to_cpu(ibhdr->lrh[0]) >> 12); + pbc = ((u64) control << 32) | plen; + piobuf = dd->f_getsendbuf(ppd, pbc, &pbufn); + if (unlikely(piobuf == NULL)) + return no_bufs_available(qp); + + /* + * Write the pbc. + * We have to flush after the PBC for correctness on some cpus + * or WC buffer can be written out of order. + */ + writeq(pbc, piobuf); + piobuf_orig = piobuf; + piobuf += 2; + + flush_wc = dd->flags & QIB_PIO_FLUSH_WC; + if (len == 0) { + /* + * If there is just the header portion, must flush before + * writing last word of header for correctness, and after + * the last header word (trigger word). + */ + if (flush_wc) { + qib_flush_wc(); + qib_pio_copy(piobuf, hdr, hdrwords - 1); + qib_flush_wc(); + __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1); + qib_flush_wc(); + } else + qib_pio_copy(piobuf, hdr, hdrwords); + goto done; + } + + if (flush_wc) + qib_flush_wc(); + qib_pio_copy(piobuf, hdr, hdrwords); + piobuf += hdrwords; + + /* The common case is aligned and contained in one segment. */ + if (likely(ss->num_sge == 1 && len <= ss->sge.length && + !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) { + u32 *addr = (u32 *) ss->sge.vaddr; + + /* Update address before sending packet. */ + update_sge(ss, len); + if (flush_wc) { + qib_pio_copy(piobuf, addr, dwords - 1); + /* must flush early everything before trigger word */ + qib_flush_wc(); + __raw_writel(addr[dwords - 1], piobuf + dwords - 1); + /* be sure trigger word is written */ + qib_flush_wc(); + } else + qib_pio_copy(piobuf, addr, dwords); + goto done; + } + copy_io(piobuf, ss, len, flush_wc); +done: + if (dd->flags & QIB_USE_SPCL_TRIG) { + u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023; + qib_flush_wc(); + __raw_writel(0xaebecede, piobuf_orig + spcl_off); + } + qib_sendbuf_done(dd, pbufn); + if (qp->s_rdma_mr) { + qib_put_mr(qp->s_rdma_mr); + qp->s_rdma_mr = NULL; + } + if (qp->s_wqe) { + spin_lock_irqsave(&qp->s_lock, flags); + qib_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS); + spin_unlock_irqrestore(&qp->s_lock, flags); + } else if (qp->ibqp.qp_type == IB_QPT_RC) { + spin_lock_irqsave(&qp->s_lock, flags); + qib_rc_send_complete(qp, ibhdr); + spin_unlock_irqrestore(&qp->s_lock, flags); + } + return 0; +} + +/** + * qib_verbs_send - send a packet + * @qp: the QP to send on + * @hdr: the packet header + * @hdrwords: the number of 32-bit words in the header + * @ss: the SGE to send + * @len: the length of the packet in bytes + * + * Return zero if packet is sent or queued OK. + * Return non-zero and clear qp->s_flags QIB_S_BUSY otherwise. + */ +int qib_verbs_send(struct qib_qp *qp, struct qib_ib_header *hdr, + u32 hdrwords, struct qib_sge_state *ss, u32 len) +{ + struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device); + u32 plen; + int ret; + u32 dwords = (len + 3) >> 2; + + /* + * Calculate the send buffer trigger address. + * The +1 counts for the pbc control dword following the pbc length. + */ + plen = hdrwords + dwords + 1; + + /* + * VL15 packets (IB_QPT_SMI) will always use PIO, so we + * can defer SDMA restart until link goes ACTIVE without + * worrying about just how we got there. + */ + if (qp->ibqp.qp_type == IB_QPT_SMI || + !(dd->flags & QIB_HAS_SEND_DMA)) + ret = qib_verbs_send_pio(qp, hdr, hdrwords, ss, len, + plen, dwords); + else + ret = qib_verbs_send_dma(qp, hdr, hdrwords, ss, len, + plen, dwords); + + return ret; +} + +int qib_snapshot_counters(struct qib_pportdata *ppd, u64 *swords, + u64 *rwords, u64 *spkts, u64 *rpkts, + u64 *xmit_wait) +{ + int ret; + struct qib_devdata *dd = ppd->dd; + + if (!(dd->flags & QIB_PRESENT)) { + /* no hardware, freeze, etc. */ + ret = -EINVAL; + goto bail; + } + *swords = dd->f_portcntr(ppd, QIBPORTCNTR_WORDSEND); + *rwords = dd->f_portcntr(ppd, QIBPORTCNTR_WORDRCV); + *spkts = dd->f_portcntr(ppd, QIBPORTCNTR_PKTSEND); + *rpkts = dd->f_portcntr(ppd, QIBPORTCNTR_PKTRCV); + *xmit_wait = dd->f_portcntr(ppd, QIBPORTCNTR_SENDSTALL); + + ret = 0; + +bail: + return ret; +} + +/** + * qib_get_counters - get various chip counters + * @dd: the qlogic_ib device + * @cntrs: counters are placed here + * + * Return the counters needed by recv_pma_get_portcounters(). + */ +int qib_get_counters(struct qib_pportdata *ppd, + struct qib_verbs_counters *cntrs) +{ + int ret; + + if (!(ppd->dd->flags & QIB_PRESENT)) { + /* no hardware, freeze, etc. */ + ret = -EINVAL; + goto bail; + } + cntrs->symbol_error_counter = + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBSYMBOLERR); + cntrs->link_error_recovery_counter = + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBLINKERRRECOV); + /* + * The link downed counter counts when the other side downs the + * connection. We add in the number of times we downed the link + * due to local link integrity errors to compensate. + */ + cntrs->link_downed_counter = + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBLINKDOWN); + cntrs->port_rcv_errors = + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RXDROPPKT) + + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RCVOVFL) + + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERR_RLEN) + + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_INVALIDRLEN) + + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRLINK) + + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRICRC) + + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRVCRC) + + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRLPCRC) + + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_BADFORMAT); + cntrs->port_rcv_errors += + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RXLOCALPHYERR); + cntrs->port_rcv_errors += + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RXVLERR); + cntrs->port_rcv_remphys_errors = + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RCVEBP); + cntrs->port_xmit_discards = + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_UNSUPVL); + cntrs->port_xmit_data = ppd->dd->f_portcntr(ppd, + QIBPORTCNTR_WORDSEND); + cntrs->port_rcv_data = ppd->dd->f_portcntr(ppd, + QIBPORTCNTR_WORDRCV); + cntrs->port_xmit_packets = ppd->dd->f_portcntr(ppd, + QIBPORTCNTR_PKTSEND); + cntrs->port_rcv_packets = ppd->dd->f_portcntr(ppd, + QIBPORTCNTR_PKTRCV); + cntrs->local_link_integrity_errors = + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_LLI); + cntrs->excessive_buffer_overrun_errors = + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_EXCESSBUFOVFL); + cntrs->vl15_dropped = + ppd->dd->f_portcntr(ppd, QIBPORTCNTR_VL15PKTDROP); + + ret = 0; + +bail: + return ret; +} + +/** + * qib_ib_piobufavail - callback when a PIO buffer is available + * @dd: the device pointer + * + * This is called from qib_intr() at interrupt level when a PIO buffer is + * available after qib_verbs_send() returned an error that no buffers were + * available. Disable the interrupt if there are no more QPs waiting. + */ +void qib_ib_piobufavail(struct qib_devdata *dd) +{ + struct qib_ibdev *dev = &dd->verbs_dev; + struct list_head *list; + struct qib_qp *qps[5]; + struct qib_qp *qp; + unsigned long flags; + unsigned i, n; + + list = &dev->piowait; + n = 0; + + /* + * Note: checking that the piowait list is empty and clearing + * the buffer available interrupt needs to be atomic or we + * could end up with QPs on the wait list with the interrupt + * disabled. + */ + spin_lock_irqsave(&dev->pending_lock, flags); + while (!list_empty(list)) { + if (n == ARRAY_SIZE(qps)) + goto full; + qp = list_entry(list->next, struct qib_qp, iowait); + list_del_init(&qp->iowait); + atomic_inc(&qp->refcount); + qps[n++] = qp; + } + dd->f_wantpiobuf_intr(dd, 0); +full: + spin_unlock_irqrestore(&dev->pending_lock, flags); + + for (i = 0; i < n; i++) { + qp = qps[i]; + + spin_lock_irqsave(&qp->s_lock, flags); + if (qp->s_flags & QIB_S_WAIT_PIO) { + qp->s_flags &= ~QIB_S_WAIT_PIO; + qib_schedule_send(qp); + } + spin_unlock_irqrestore(&qp->s_lock, flags); + + /* Notify qib_destroy_qp() if it is waiting. */ + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + } +} + +static int qib_query_device(struct ib_device *ibdev, + struct ib_device_attr *props) +{ + struct qib_devdata *dd = dd_from_ibdev(ibdev); + struct qib_ibdev *dev = to_idev(ibdev); + + memset(props, 0, sizeof(*props)); + + props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | + IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | + IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | + IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE; + props->page_size_cap = PAGE_SIZE; + props->vendor_id = + QIB_SRC_OUI_1 << 16 | QIB_SRC_OUI_2 << 8 | QIB_SRC_OUI_3; + props->vendor_part_id = dd->deviceid; + props->hw_ver = dd->minrev; + props->sys_image_guid = ib_qib_sys_image_guid; + props->max_mr_size = ~0ULL; + props->max_qp = ib_qib_max_qps; + props->max_qp_wr = ib_qib_max_qp_wrs; + props->max_sge = ib_qib_max_sges; + props->max_cq = ib_qib_max_cqs; + props->max_ah = ib_qib_max_ahs; + props->max_cqe = ib_qib_max_cqes; + props->max_mr = dev->lk_table.max; + props->max_fmr = dev->lk_table.max; + props->max_map_per_fmr = 32767; + props->max_pd = ib_qib_max_pds; + props->max_qp_rd_atom = QIB_MAX_RDMA_ATOMIC; + props->max_qp_init_rd_atom = 255; + /* props->max_res_rd_atom */ + props->max_srq = ib_qib_max_srqs; + props->max_srq_wr = ib_qib_max_srq_wrs; + props->max_srq_sge = ib_qib_max_srq_sges; + /* props->local_ca_ack_delay */ + props->atomic_cap = IB_ATOMIC_GLOB; + props->max_pkeys = qib_get_npkeys(dd); + props->max_mcast_grp = ib_qib_max_mcast_grps; + props->max_mcast_qp_attach = ib_qib_max_mcast_qp_attached; + props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * + props->max_mcast_grp; + + return 0; +} + +static int qib_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props) +{ + struct qib_devdata *dd = dd_from_ibdev(ibdev); + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + enum ib_mtu mtu; + u16 lid = ppd->lid; + + memset(props, 0, sizeof(*props)); + props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE); + props->lmc = ppd->lmc; + props->sm_lid = ibp->sm_lid; + props->sm_sl = ibp->sm_sl; + props->state = dd->f_iblink_state(ppd->lastibcstat); + props->phys_state = dd->f_ibphys_portstate(ppd->lastibcstat); + props->port_cap_flags = ibp->port_cap_flags; + props->gid_tbl_len = QIB_GUIDS_PER_PORT; + props->max_msg_sz = 0x80000000; + props->pkey_tbl_len = qib_get_npkeys(dd); + props->bad_pkey_cntr = ibp->pkey_violations; + props->qkey_viol_cntr = ibp->qkey_violations; + props->active_width = ppd->link_width_active; + /* See rate_show() */ + props->active_speed = ppd->link_speed_active; + props->max_vl_num = qib_num_vls(ppd->vls_supported); + props->init_type_reply = 0; + + props->max_mtu = qib_ibmtu ? qib_ibmtu : IB_MTU_4096; + switch (ppd->ibmtu) { + case 4096: + mtu = IB_MTU_4096; + break; + case 2048: + mtu = IB_MTU_2048; + break; + case 1024: + mtu = IB_MTU_1024; + break; + case 512: + mtu = IB_MTU_512; + break; + case 256: + mtu = IB_MTU_256; + break; + default: + mtu = IB_MTU_2048; + } + props->active_mtu = mtu; + props->subnet_timeout = ibp->subnet_timeout; + + return 0; +} + +static int qib_modify_device(struct ib_device *device, + int device_modify_mask, + struct ib_device_modify *device_modify) +{ + struct qib_devdata *dd = dd_from_ibdev(device); + unsigned i; + int ret; + + if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID | + IB_DEVICE_MODIFY_NODE_DESC)) { + ret = -EOPNOTSUPP; + goto bail; + } + + if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC) { + memcpy(device->node_desc, device_modify->node_desc, 64); + for (i = 0; i < dd->num_pports; i++) { + struct qib_ibport *ibp = &dd->pport[i].ibport_data; + + qib_node_desc_chg(ibp); + } + } + + if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) { + ib_qib_sys_image_guid = + cpu_to_be64(device_modify->sys_image_guid); + for (i = 0; i < dd->num_pports; i++) { + struct qib_ibport *ibp = &dd->pport[i].ibport_data; + + qib_sys_guid_chg(ibp); + } + } + + ret = 0; + +bail: + return ret; +} + +static int qib_modify_port(struct ib_device *ibdev, u8 port, + int port_modify_mask, struct ib_port_modify *props) +{ + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + + ibp->port_cap_flags |= props->set_port_cap_mask; + ibp->port_cap_flags &= ~props->clr_port_cap_mask; + if (props->set_port_cap_mask || props->clr_port_cap_mask) + qib_cap_mask_chg(ibp); + if (port_modify_mask & IB_PORT_SHUTDOWN) + qib_set_linkstate(ppd, QIB_IB_LINKDOWN); + if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR) + ibp->qkey_violations = 0; + return 0; +} + +static int qib_query_gid(struct ib_device *ibdev, u8 port, + int index, union ib_gid *gid) +{ + struct qib_devdata *dd = dd_from_ibdev(ibdev); + int ret = 0; + + if (!port || port > dd->num_pports) + ret = -EINVAL; + else { + struct qib_ibport *ibp = to_iport(ibdev, port); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + + gid->global.subnet_prefix = ibp->gid_prefix; + if (index == 0) + gid->global.interface_id = ppd->guid; + else if (index < QIB_GUIDS_PER_PORT) + gid->global.interface_id = ibp->guids[index - 1]; + else + ret = -EINVAL; + } + + return ret; +} + +static struct ib_pd *qib_alloc_pd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct qib_ibdev *dev = to_idev(ibdev); + struct qib_pd *pd; + struct ib_pd *ret; + + /* + * This is actually totally arbitrary. Some correctness tests + * assume there's a maximum number of PDs that can be allocated. + * We don't actually have this limit, but we fail the test if + * we allow allocations of more than we report for this value. + */ + + pd = kmalloc(sizeof *pd, GFP_KERNEL); + if (!pd) { + ret = ERR_PTR(-ENOMEM); + goto bail; + } + + spin_lock(&dev->n_pds_lock); + if (dev->n_pds_allocated == ib_qib_max_pds) { + spin_unlock(&dev->n_pds_lock); + kfree(pd); + ret = ERR_PTR(-ENOMEM); + goto bail; + } + + dev->n_pds_allocated++; + spin_unlock(&dev->n_pds_lock); + + /* ib_alloc_pd() will initialize pd->ibpd. */ + pd->user = udata != NULL; + + ret = &pd->ibpd; + +bail: + return ret; +} + +static int qib_dealloc_pd(struct ib_pd *ibpd) +{ + struct qib_pd *pd = to_ipd(ibpd); + struct qib_ibdev *dev = to_idev(ibpd->device); + + spin_lock(&dev->n_pds_lock); + dev->n_pds_allocated--; + spin_unlock(&dev->n_pds_lock); + + kfree(pd); + + return 0; +} + +int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr) +{ + /* A multicast address requires a GRH (see ch. 8.4.1). */ + if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE && + ah_attr->dlid != QIB_PERMISSIVE_LID && + !(ah_attr->ah_flags & IB_AH_GRH)) + goto bail; + if ((ah_attr->ah_flags & IB_AH_GRH) && + ah_attr->grh.sgid_index >= QIB_GUIDS_PER_PORT) + goto bail; + if (ah_attr->dlid == 0) + goto bail; + if (ah_attr->port_num < 1 || + ah_attr->port_num > ibdev->phys_port_cnt) + goto bail; + if (ah_attr->static_rate != IB_RATE_PORT_CURRENT && + ib_rate_to_mult(ah_attr->static_rate) < 0) + goto bail; + if (ah_attr->sl > 15) + goto bail; + return 0; +bail: + return -EINVAL; +} + +/** + * qib_create_ah - create an address handle + * @pd: the protection domain + * @ah_attr: the attributes of the AH + * + * This may be called from interrupt context. + */ +static struct ib_ah *qib_create_ah(struct ib_pd *pd, + struct ib_ah_attr *ah_attr) +{ + struct qib_ah *ah; + struct ib_ah *ret; + struct qib_ibdev *dev = to_idev(pd->device); + unsigned long flags; + + if (qib_check_ah(pd->device, ah_attr)) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + + ah = kmalloc(sizeof *ah, GFP_ATOMIC); + if (!ah) { + ret = ERR_PTR(-ENOMEM); + goto bail; + } + + spin_lock_irqsave(&dev->n_ahs_lock, flags); + if (dev->n_ahs_allocated == ib_qib_max_ahs) { + spin_unlock_irqrestore(&dev->n_ahs_lock, flags); + kfree(ah); + ret = ERR_PTR(-ENOMEM); + goto bail; + } + + dev->n_ahs_allocated++; + spin_unlock_irqrestore(&dev->n_ahs_lock, flags); + + /* ib_create_ah() will initialize ah->ibah. */ + ah->attr = *ah_attr; + atomic_set(&ah->refcount, 0); + + ret = &ah->ibah; + +bail: + return ret; +} + +struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid) +{ + struct ib_ah_attr attr; + struct ib_ah *ah = ERR_PTR(-EINVAL); + struct qib_qp *qp0; + + memset(&attr, 0, sizeof attr); + attr.dlid = dlid; + attr.port_num = ppd_from_ibp(ibp)->port; + rcu_read_lock(); + qp0 = rcu_dereference(ibp->qp0); + if (qp0) + ah = ib_create_ah(qp0->ibqp.pd, &attr); + rcu_read_unlock(); + return ah; +} + +/** + * qib_destroy_ah - destroy an address handle + * @ibah: the AH to destroy + * + * This may be called from interrupt context. + */ +static int qib_destroy_ah(struct ib_ah *ibah) +{ + struct qib_ibdev *dev = to_idev(ibah->device); + struct qib_ah *ah = to_iah(ibah); + unsigned long flags; + + if (atomic_read(&ah->refcount) != 0) + return -EBUSY; + + spin_lock_irqsave(&dev->n_ahs_lock, flags); + dev->n_ahs_allocated--; + spin_unlock_irqrestore(&dev->n_ahs_lock, flags); + + kfree(ah); + + return 0; +} + +static int qib_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) +{ + struct qib_ah *ah = to_iah(ibah); + + if (qib_check_ah(ibah->device, ah_attr)) + return -EINVAL; + + ah->attr = *ah_attr; + + return 0; +} + +static int qib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) +{ + struct qib_ah *ah = to_iah(ibah); + + *ah_attr = ah->attr; + + return 0; +} + +/** + * qib_get_npkeys - return the size of the PKEY table for context 0 + * @dd: the qlogic_ib device + */ +unsigned qib_get_npkeys(struct qib_devdata *dd) +{ + return ARRAY_SIZE(dd->rcd[0]->pkeys); +} + +/* + * Return the indexed PKEY from the port PKEY table. + * No need to validate rcd[ctxt]; the port is setup if we are here. + */ +unsigned qib_get_pkey(struct qib_ibport *ibp, unsigned index) +{ + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + struct qib_devdata *dd = ppd->dd; + unsigned ctxt = ppd->hw_pidx; + unsigned ret; + + /* dd->rcd null if mini_init or some init failures */ + if (!dd->rcd || index >= ARRAY_SIZE(dd->rcd[ctxt]->pkeys)) + ret = 0; + else + ret = dd->rcd[ctxt]->pkeys[index]; + + return ret; +} + +static int qib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey) +{ + struct qib_devdata *dd = dd_from_ibdev(ibdev); + int ret; + + if (index >= qib_get_npkeys(dd)) { + ret = -EINVAL; + goto bail; + } + + *pkey = qib_get_pkey(to_iport(ibdev, port), index); + ret = 0; + +bail: + return ret; +} + +/** + * qib_alloc_ucontext - allocate a ucontest + * @ibdev: the infiniband device + * @udata: not used by the QLogic_IB driver + */ + +static struct ib_ucontext *qib_alloc_ucontext(struct ib_device *ibdev, + struct ib_udata *udata) +{ + struct qib_ucontext *context; + struct ib_ucontext *ret; + + context = kmalloc(sizeof *context, GFP_KERNEL); + if (!context) { + ret = ERR_PTR(-ENOMEM); + goto bail; + } + + ret = &context->ibucontext; + +bail: + return ret; +} + +static int qib_dealloc_ucontext(struct ib_ucontext *context) +{ + kfree(to_iucontext(context)); + return 0; +} + +static void init_ibport(struct qib_pportdata *ppd) +{ + struct qib_verbs_counters cntrs; + struct qib_ibport *ibp = &ppd->ibport_data; + + spin_lock_init(&ibp->lock); + /* Set the prefix to the default value (see ch. 4.1.1) */ + ibp->gid_prefix = IB_DEFAULT_GID_PREFIX; + ibp->sm_lid = be16_to_cpu(IB_LID_PERMISSIVE); + ibp->port_cap_flags = IB_PORT_SYS_IMAGE_GUID_SUP | + IB_PORT_CLIENT_REG_SUP | IB_PORT_SL_MAP_SUP | + IB_PORT_TRAP_SUP | IB_PORT_AUTO_MIGR_SUP | + IB_PORT_DR_NOTICE_SUP | IB_PORT_CAP_MASK_NOTICE_SUP | + IB_PORT_OTHER_LOCAL_CHANGES_SUP; + if (ppd->dd->flags & QIB_HAS_LINK_LATENCY) + ibp->port_cap_flags |= IB_PORT_LINK_LATENCY_SUP; + ibp->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA; + ibp->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA; + ibp->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS; + ibp->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS; + ibp->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT; + + /* Snapshot current HW counters to "clear" them. */ + qib_get_counters(ppd, &cntrs); + ibp->z_symbol_error_counter = cntrs.symbol_error_counter; + ibp->z_link_error_recovery_counter = + cntrs.link_error_recovery_counter; + ibp->z_link_downed_counter = cntrs.link_downed_counter; + ibp->z_port_rcv_errors = cntrs.port_rcv_errors; + ibp->z_port_rcv_remphys_errors = cntrs.port_rcv_remphys_errors; + ibp->z_port_xmit_discards = cntrs.port_xmit_discards; + ibp->z_port_xmit_data = cntrs.port_xmit_data; + ibp->z_port_rcv_data = cntrs.port_rcv_data; + ibp->z_port_xmit_packets = cntrs.port_xmit_packets; + ibp->z_port_rcv_packets = cntrs.port_rcv_packets; + ibp->z_local_link_integrity_errors = + cntrs.local_link_integrity_errors; + ibp->z_excessive_buffer_overrun_errors = + cntrs.excessive_buffer_overrun_errors; + ibp->z_vl15_dropped = cntrs.vl15_dropped; + RCU_INIT_POINTER(ibp->qp0, NULL); + RCU_INIT_POINTER(ibp->qp1, NULL); +} + +/** + * qib_register_ib_device - register our device with the infiniband core + * @dd: the device data structure + * Return the allocated qib_ibdev pointer or NULL on error. + */ +int qib_register_ib_device(struct qib_devdata *dd) +{ + struct qib_ibdev *dev = &dd->verbs_dev; + struct ib_device *ibdev = &dev->ibdev; + struct qib_pportdata *ppd = dd->pport; + unsigned i, lk_tab_size; + int ret; + + dev->qp_table_size = ib_qib_qp_table_size; + get_random_bytes(&dev->qp_rnd, sizeof(dev->qp_rnd)); + dev->qp_table = kmalloc(dev->qp_table_size * sizeof *dev->qp_table, + GFP_KERNEL); + if (!dev->qp_table) { + ret = -ENOMEM; + goto err_qpt; + } + for (i = 0; i < dev->qp_table_size; i++) + RCU_INIT_POINTER(dev->qp_table[i], NULL); + + for (i = 0; i < dd->num_pports; i++) + init_ibport(ppd + i); + + /* Only need to initialize non-zero fields. */ + spin_lock_init(&dev->qpt_lock); + spin_lock_init(&dev->n_pds_lock); + spin_lock_init(&dev->n_ahs_lock); + spin_lock_init(&dev->n_cqs_lock); + spin_lock_init(&dev->n_qps_lock); + spin_lock_init(&dev->n_srqs_lock); + spin_lock_init(&dev->n_mcast_grps_lock); + init_timer(&dev->mem_timer); + dev->mem_timer.function = mem_timer; + dev->mem_timer.data = (unsigned long) dev; + + qib_init_qpn_table(dd, &dev->qpn_table); + + /* + * The top ib_qib_lkey_table_size bits are used to index the + * table. The lower 8 bits can be owned by the user (copied from + * the LKEY). The remaining bits act as a generation number or tag. + */ + spin_lock_init(&dev->lk_table.lock); + dev->lk_table.max = 1 << ib_qib_lkey_table_size; + lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table); + dev->lk_table.table = (struct qib_mregion __rcu **) + __get_free_pages(GFP_KERNEL, get_order(lk_tab_size)); + if (dev->lk_table.table == NULL) { + ret = -ENOMEM; + goto err_lk; + } + RCU_INIT_POINTER(dev->dma_mr, NULL); + for (i = 0; i < dev->lk_table.max; i++) + RCU_INIT_POINTER(dev->lk_table.table[i], NULL); + INIT_LIST_HEAD(&dev->pending_mmaps); + spin_lock_init(&dev->pending_lock); + dev->mmap_offset = PAGE_SIZE; + spin_lock_init(&dev->mmap_offset_lock); + INIT_LIST_HEAD(&dev->piowait); + INIT_LIST_HEAD(&dev->dmawait); + INIT_LIST_HEAD(&dev->txwait); + INIT_LIST_HEAD(&dev->memwait); + INIT_LIST_HEAD(&dev->txreq_free); + + if (ppd->sdma_descq_cnt) { + dev->pio_hdrs = dma_alloc_coherent(&dd->pcidev->dev, + ppd->sdma_descq_cnt * + sizeof(struct qib_pio_header), + &dev->pio_hdrs_phys, + GFP_KERNEL); + if (!dev->pio_hdrs) { + ret = -ENOMEM; + goto err_hdrs; + } + } + + for (i = 0; i < ppd->sdma_descq_cnt; i++) { + struct qib_verbs_txreq *tx; + + tx = kzalloc(sizeof *tx, GFP_KERNEL); + if (!tx) { + ret = -ENOMEM; + goto err_tx; + } + tx->hdr_inx = i; + list_add(&tx->txreq.list, &dev->txreq_free); + } + + /* + * The system image GUID is supposed to be the same for all + * IB HCAs in a single system but since there can be other + * device types in the system, we can't be sure this is unique. + */ + if (!ib_qib_sys_image_guid) + ib_qib_sys_image_guid = ppd->guid; + + strlcpy(ibdev->name, "qib%d", IB_DEVICE_NAME_MAX); + ibdev->owner = THIS_MODULE; + ibdev->node_guid = ppd->guid; + ibdev->uverbs_abi_ver = QIB_UVERBS_ABI_VERSION; + ibdev->uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_CREATE_AH) | + (1ull << IB_USER_VERBS_CMD_MODIFY_AH) | + (1ull << IB_USER_VERBS_CMD_QUERY_AH) | + (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_POLL_CQ) | + (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_POST_SEND) | + (1ull << IB_USER_VERBS_CMD_POST_RECV) | + (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | + (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); + ibdev->node_type = RDMA_NODE_IB_CA; + ibdev->phys_port_cnt = dd->num_pports; + ibdev->num_comp_vectors = 1; + ibdev->dma_device = &dd->pcidev->dev; + ibdev->query_device = qib_query_device; + ibdev->modify_device = qib_modify_device; + ibdev->query_port = qib_query_port; + ibdev->modify_port = qib_modify_port; + ibdev->query_pkey = qib_query_pkey; + ibdev->query_gid = qib_query_gid; + ibdev->alloc_ucontext = qib_alloc_ucontext; + ibdev->dealloc_ucontext = qib_dealloc_ucontext; + ibdev->alloc_pd = qib_alloc_pd; + ibdev->dealloc_pd = qib_dealloc_pd; + ibdev->create_ah = qib_create_ah; + ibdev->destroy_ah = qib_destroy_ah; + ibdev->modify_ah = qib_modify_ah; + ibdev->query_ah = qib_query_ah; + ibdev->create_srq = qib_create_srq; + ibdev->modify_srq = qib_modify_srq; + ibdev->query_srq = qib_query_srq; + ibdev->destroy_srq = qib_destroy_srq; + ibdev->create_qp = qib_create_qp; + ibdev->modify_qp = qib_modify_qp; + ibdev->query_qp = qib_query_qp; + ibdev->destroy_qp = qib_destroy_qp; + ibdev->post_send = qib_post_send; + ibdev->post_recv = qib_post_receive; + ibdev->post_srq_recv = qib_post_srq_receive; + ibdev->create_cq = qib_create_cq; + ibdev->destroy_cq = qib_destroy_cq; + ibdev->resize_cq = qib_resize_cq; + ibdev->poll_cq = qib_poll_cq; + ibdev->req_notify_cq = qib_req_notify_cq; + ibdev->get_dma_mr = qib_get_dma_mr; + ibdev->reg_phys_mr = qib_reg_phys_mr; + ibdev->reg_user_mr = qib_reg_user_mr; + ibdev->dereg_mr = qib_dereg_mr; + ibdev->alloc_fast_reg_mr = qib_alloc_fast_reg_mr; + ibdev->alloc_fast_reg_page_list = qib_alloc_fast_reg_page_list; + ibdev->free_fast_reg_page_list = qib_free_fast_reg_page_list; + ibdev->alloc_fmr = qib_alloc_fmr; + ibdev->map_phys_fmr = qib_map_phys_fmr; + ibdev->unmap_fmr = qib_unmap_fmr; + ibdev->dealloc_fmr = qib_dealloc_fmr; + ibdev->attach_mcast = qib_multicast_attach; + ibdev->detach_mcast = qib_multicast_detach; + ibdev->process_mad = qib_process_mad; + ibdev->mmap = qib_mmap; + ibdev->dma_ops = &qib_dma_mapping_ops; + + snprintf(ibdev->node_desc, sizeof(ibdev->node_desc), + "Intel Infiniband HCA %s", init_utsname()->nodename); + + ret = ib_register_device(ibdev, qib_create_port_files); + if (ret) + goto err_reg; + + ret = qib_create_agents(dev); + if (ret) + goto err_agents; + + ret = qib_verbs_register_sysfs(dd); + if (ret) + goto err_class; + + goto bail; + +err_class: + qib_free_agents(dev); +err_agents: + ib_unregister_device(ibdev); +err_reg: +err_tx: + while (!list_empty(&dev->txreq_free)) { + struct list_head *l = dev->txreq_free.next; + struct qib_verbs_txreq *tx; + + list_del(l); + tx = list_entry(l, struct qib_verbs_txreq, txreq.list); + kfree(tx); + } + if (ppd->sdma_descq_cnt) + dma_free_coherent(&dd->pcidev->dev, + ppd->sdma_descq_cnt * + sizeof(struct qib_pio_header), + dev->pio_hdrs, dev->pio_hdrs_phys); +err_hdrs: + free_pages((unsigned long) dev->lk_table.table, get_order(lk_tab_size)); +err_lk: + kfree(dev->qp_table); +err_qpt: + qib_dev_err(dd, "cannot register verbs: %d!\n", -ret); +bail: + return ret; +} + +void qib_unregister_ib_device(struct qib_devdata *dd) +{ + struct qib_ibdev *dev = &dd->verbs_dev; + struct ib_device *ibdev = &dev->ibdev; + u32 qps_inuse; + unsigned lk_tab_size; + + qib_verbs_unregister_sysfs(dd); + + qib_free_agents(dev); + + ib_unregister_device(ibdev); + + if (!list_empty(&dev->piowait)) + qib_dev_err(dd, "piowait list not empty!\n"); + if (!list_empty(&dev->dmawait)) + qib_dev_err(dd, "dmawait list not empty!\n"); + if (!list_empty(&dev->txwait)) + qib_dev_err(dd, "txwait list not empty!\n"); + if (!list_empty(&dev->memwait)) + qib_dev_err(dd, "memwait list not empty!\n"); + if (dev->dma_mr) + qib_dev_err(dd, "DMA MR not NULL!\n"); + + qps_inuse = qib_free_all_qps(dd); + if (qps_inuse) + qib_dev_err(dd, "QP memory leak! %u still in use\n", + qps_inuse); + + del_timer_sync(&dev->mem_timer); + qib_free_qpn_table(&dev->qpn_table); + while (!list_empty(&dev->txreq_free)) { + struct list_head *l = dev->txreq_free.next; + struct qib_verbs_txreq *tx; + + list_del(l); + tx = list_entry(l, struct qib_verbs_txreq, txreq.list); + kfree(tx); + } + if (dd->pport->sdma_descq_cnt) + dma_free_coherent(&dd->pcidev->dev, + dd->pport->sdma_descq_cnt * + sizeof(struct qib_pio_header), + dev->pio_hdrs, dev->pio_hdrs_phys); + lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table); + free_pages((unsigned long) dev->lk_table.table, + get_order(lk_tab_size)); + kfree(dev->qp_table); +} + +/* + * This must be called with s_lock held. + */ +void qib_schedule_send(struct qib_qp *qp) +{ + if (qib_send_ok(qp)) { + struct qib_ibport *ibp = + to_iport(qp->ibqp.device, qp->port_num); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); + + queue_work(ppd->qib_wq, &qp->s_work); + } +} diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h new file mode 100644 index 00000000000..bfc8948fdd3 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -0,0 +1,1173 @@ +/* + * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef QIB_VERBS_H +#define QIB_VERBS_H + +#include <linux/types.h> +#include <linux/spinlock.h> +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/kref.h> +#include <linux/workqueue.h> +#include <linux/kthread.h> +#include <linux/completion.h> +#include <rdma/ib_pack.h> +#include <rdma/ib_user_verbs.h> + +struct qib_ctxtdata; +struct qib_pportdata; +struct qib_devdata; +struct qib_verbs_txreq; + +#define QIB_MAX_RDMA_ATOMIC 16 +#define QIB_GUIDS_PER_PORT 5 + +#define QPN_MAX (1 << 24) +#define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) + +/* + * Increment this value if any changes that break userspace ABI + * compatibility are made. + */ +#define QIB_UVERBS_ABI_VERSION 2 + +/* + * Define an ib_cq_notify value that is not valid so we know when CQ + * notifications are armed. + */ +#define IB_CQ_NONE (IB_CQ_NEXT_COMP + 1) + +#define IB_SEQ_NAK (3 << 29) + +/* AETH NAK opcode values */ +#define IB_RNR_NAK 0x20 +#define IB_NAK_PSN_ERROR 0x60 +#define IB_NAK_INVALID_REQUEST 0x61 +#define IB_NAK_REMOTE_ACCESS_ERROR 0x62 +#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63 +#define IB_NAK_INVALID_RD_REQUEST 0x64 + +/* Flags for checking QP state (see ib_qib_state_ops[]) */ +#define QIB_POST_SEND_OK 0x01 +#define QIB_POST_RECV_OK 0x02 +#define QIB_PROCESS_RECV_OK 0x04 +#define QIB_PROCESS_SEND_OK 0x08 +#define QIB_PROCESS_NEXT_SEND_OK 0x10 +#define QIB_FLUSH_SEND 0x20 +#define QIB_FLUSH_RECV 0x40 +#define QIB_PROCESS_OR_FLUSH_SEND \ + (QIB_PROCESS_SEND_OK | QIB_FLUSH_SEND) + +/* IB Performance Manager status values */ +#define IB_PMA_SAMPLE_STATUS_DONE 0x00 +#define IB_PMA_SAMPLE_STATUS_STARTED 0x01 +#define IB_PMA_SAMPLE_STATUS_RUNNING 0x02 + +/* Mandatory IB performance counter select values. */ +#define IB_PMA_PORT_XMIT_DATA cpu_to_be16(0x0001) +#define IB_PMA_PORT_RCV_DATA cpu_to_be16(0x0002) +#define IB_PMA_PORT_XMIT_PKTS cpu_to_be16(0x0003) +#define IB_PMA_PORT_RCV_PKTS cpu_to_be16(0x0004) +#define IB_PMA_PORT_XMIT_WAIT cpu_to_be16(0x0005) + +#define QIB_VENDOR_IPG cpu_to_be16(0xFFA0) + +#define IB_BTH_REQ_ACK (1 << 31) +#define IB_BTH_SOLICITED (1 << 23) +#define IB_BTH_MIG_REQ (1 << 22) + +/* XXX Should be defined in ib_verbs.h enum ib_port_cap_flags */ +#define IB_PORT_OTHER_LOCAL_CHANGES_SUP (1 << 26) + +#define IB_GRH_VERSION 6 +#define IB_GRH_VERSION_MASK 0xF +#define IB_GRH_VERSION_SHIFT 28 +#define IB_GRH_TCLASS_MASK 0xFF +#define IB_GRH_TCLASS_SHIFT 20 +#define IB_GRH_FLOW_MASK 0xFFFFF +#define IB_GRH_FLOW_SHIFT 0 +#define IB_GRH_NEXT_HDR 0x1B + +#define IB_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL) + +/* Values for set/get portinfo VLCap OperationalVLs */ +#define IB_VL_VL0 1 +#define IB_VL_VL0_1 2 +#define IB_VL_VL0_3 3 +#define IB_VL_VL0_7 4 +#define IB_VL_VL0_14 5 + +static inline int qib_num_vls(int vls) +{ + switch (vls) { + default: + case IB_VL_VL0: + return 1; + case IB_VL_VL0_1: + return 2; + case IB_VL_VL0_3: + return 4; + case IB_VL_VL0_7: + return 8; + case IB_VL_VL0_14: + return 15; + } +} + +struct ib_reth { + __be64 vaddr; + __be32 rkey; + __be32 length; +} __packed; + +struct ib_atomic_eth { + __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */ + __be32 rkey; + __be64 swap_data; + __be64 compare_data; +} __packed; + +struct qib_other_headers { + __be32 bth[3]; + union { + struct { + __be32 deth[2]; + __be32 imm_data; + } ud; + struct { + struct ib_reth reth; + __be32 imm_data; + } rc; + struct { + __be32 aeth; + __be32 atomic_ack_eth[2]; + } at; + __be32 imm_data; + __be32 aeth; + struct ib_atomic_eth atomic_eth; + } u; +} __packed; + +/* + * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes + * long (72 w/ imm_data). Only the first 56 bytes of the IB header + * will be in the eager header buffer. The remaining 12 or 16 bytes + * are in the data buffer. + */ +struct qib_ib_header { + __be16 lrh[4]; + union { + struct { + struct ib_grh grh; + struct qib_other_headers oth; + } l; + struct qib_other_headers oth; + } u; +} __packed; + +struct qib_pio_header { + __le32 pbc[2]; + struct qib_ib_header hdr; +} __packed; + +/* + * There is one struct qib_mcast for each multicast GID. + * All attached QPs are then stored as a list of + * struct qib_mcast_qp. + */ +struct qib_mcast_qp { + struct list_head list; + struct qib_qp *qp; +}; + +struct qib_mcast { + struct rb_node rb_node; + union ib_gid mgid; + struct list_head qp_list; + wait_queue_head_t wait; + atomic_t refcount; + int n_attached; +}; + +/* Protection domain */ +struct qib_pd { + struct ib_pd ibpd; + int user; /* non-zero if created from user space */ +}; + +/* Address Handle */ +struct qib_ah { + struct ib_ah ibah; + struct ib_ah_attr attr; + atomic_t refcount; +}; + +/* + * This structure is used by qib_mmap() to validate an offset + * when an mmap() request is made. The vm_area_struct then uses + * this as its vm_private_data. + */ +struct qib_mmap_info { + struct list_head pending_mmaps; + struct ib_ucontext *context; + void *obj; + __u64 offset; + struct kref ref; + unsigned size; +}; + +/* + * This structure is used to contain the head pointer, tail pointer, + * and completion queue entries as a single memory allocation so + * it can be mmap'ed into user space. + */ +struct qib_cq_wc { + u32 head; /* index of next entry to fill */ + u32 tail; /* index of next ib_poll_cq() entry */ + union { + /* these are actually size ibcq.cqe + 1 */ + struct ib_uverbs_wc uqueue[0]; + struct ib_wc kqueue[0]; + }; +}; + +/* + * The completion queue structure. + */ +struct qib_cq { + struct ib_cq ibcq; + struct kthread_work comptask; + struct qib_devdata *dd; + spinlock_t lock; /* protect changes in this struct */ + u8 notify; + u8 triggered; + struct qib_cq_wc *queue; + struct qib_mmap_info *ip; +}; + +/* + * A segment is a linear region of low physical memory. + * XXX Maybe we should use phys addr here and kmap()/kunmap(). + * Used by the verbs layer. + */ +struct qib_seg { + void *vaddr; + size_t length; +}; + +/* The number of qib_segs that fit in a page. */ +#define QIB_SEGSZ (PAGE_SIZE / sizeof(struct qib_seg)) + +struct qib_segarray { + struct qib_seg segs[QIB_SEGSZ]; +}; + +struct qib_mregion { + struct ib_pd *pd; /* shares refcnt of ibmr.pd */ + u64 user_base; /* User's address for this region */ + u64 iova; /* IB start address of this region */ + size_t length; + u32 lkey; + u32 offset; /* offset (bytes) to start of region */ + int access_flags; + u32 max_segs; /* number of qib_segs in all the arrays */ + u32 mapsz; /* size of the map array */ + u8 page_shift; /* 0 - non unform/non powerof2 sizes */ + u8 lkey_published; /* in global table */ + struct completion comp; /* complete when refcount goes to zero */ + struct rcu_head list; + atomic_t refcount; + struct qib_segarray *map[0]; /* the segments */ +}; + +/* + * These keep track of the copy progress within a memory region. + * Used by the verbs layer. + */ +struct qib_sge { + struct qib_mregion *mr; + void *vaddr; /* kernel virtual address of segment */ + u32 sge_length; /* length of the SGE */ + u32 length; /* remaining length of the segment */ + u16 m; /* current index: mr->map[m] */ + u16 n; /* current index: mr->map[m]->segs[n] */ +}; + +/* Memory region */ +struct qib_mr { + struct ib_mr ibmr; + struct ib_umem *umem; + struct qib_mregion mr; /* must be last */ +}; + +/* + * Send work request queue entry. + * The size of the sg_list is determined when the QP is created and stored + * in qp->s_max_sge. + */ +struct qib_swqe { + struct ib_send_wr wr; /* don't use wr.sg_list */ + u32 psn; /* first packet sequence number */ + u32 lpsn; /* last packet sequence number */ + u32 ssn; /* send sequence number */ + u32 length; /* total length of data in sg_list */ + struct qib_sge sg_list[0]; +}; + +/* + * Receive work request queue entry. + * The size of the sg_list is determined when the QP (or SRQ) is created + * and stored in qp->r_rq.max_sge (or srq->rq.max_sge). + */ +struct qib_rwqe { + u64 wr_id; + u8 num_sge; + struct ib_sge sg_list[0]; +}; + +/* + * This structure is used to contain the head pointer, tail pointer, + * and receive work queue entries as a single memory allocation so + * it can be mmap'ed into user space. + * Note that the wq array elements are variable size so you can't + * just index into the array to get the N'th element; + * use get_rwqe_ptr() instead. + */ +struct qib_rwq { + u32 head; /* new work requests posted to the head */ + u32 tail; /* receives pull requests from here. */ + struct qib_rwqe wq[0]; +}; + +struct qib_rq { + struct qib_rwq *wq; + u32 size; /* size of RWQE array */ + u8 max_sge; + spinlock_t lock /* protect changes in this struct */ + ____cacheline_aligned_in_smp; +}; + +struct qib_srq { + struct ib_srq ibsrq; + struct qib_rq rq; + struct qib_mmap_info *ip; + /* send signal when number of RWQEs < limit */ + u32 limit; +}; + +struct qib_sge_state { + struct qib_sge *sg_list; /* next SGE to be used if any */ + struct qib_sge sge; /* progress state for the current SGE */ + u32 total_len; + u8 num_sge; +}; + +/* + * This structure holds the information that the send tasklet needs + * to send a RDMA read response or atomic operation. + */ +struct qib_ack_entry { + u8 opcode; + u8 sent; + u32 psn; + u32 lpsn; + union { + struct qib_sge rdma_sge; + u64 atomic_data; + }; +}; + +/* + * Variables prefixed with s_ are for the requester (sender). + * Variables prefixed with r_ are for the responder (receiver). + * Variables prefixed with ack_ are for responder replies. + * + * Common variables are protected by both r_rq.lock and s_lock in that order + * which only happens in modify_qp() or changing the QP 'state'. + */ +struct qib_qp { + struct ib_qp ibqp; + /* read mostly fields above and below */ + struct ib_ah_attr remote_ah_attr; + struct ib_ah_attr alt_ah_attr; + struct qib_qp __rcu *next; /* link list for QPN hash table */ + struct qib_swqe *s_wq; /* send work queue */ + struct qib_mmap_info *ip; + struct qib_ib_header *s_hdr; /* next packet header to send */ + unsigned long timeout_jiffies; /* computed from timeout */ + + enum ib_mtu path_mtu; + u32 remote_qpn; + u32 pmtu; /* decoded from path_mtu */ + u32 qkey; /* QKEY for this QP (for UD or RD) */ + u32 s_size; /* send work queue size */ + u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ + + u8 state; /* QP state */ + u8 qp_access_flags; + u8 alt_timeout; /* Alternate path timeout for this QP */ + u8 timeout; /* Timeout for this QP */ + u8 s_srate; + u8 s_mig_state; + u8 port_num; + u8 s_pkey_index; /* PKEY index to use */ + u8 s_alt_pkey_index; /* Alternate path PKEY index to use */ + u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ + u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ + u8 s_retry_cnt; /* number of times to retry */ + u8 s_rnr_retry_cnt; + u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ + u8 s_max_sge; /* size of s_wq->sg_list */ + u8 s_draining; + + /* start of read/write fields */ + + atomic_t refcount ____cacheline_aligned_in_smp; + wait_queue_head_t wait; + + + struct qib_ack_entry s_ack_queue[QIB_MAX_RDMA_ATOMIC + 1] + ____cacheline_aligned_in_smp; + struct qib_sge_state s_rdma_read_sge; + + spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */ + unsigned long r_aflags; + u64 r_wr_id; /* ID for current receive WQE */ + u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ + u32 r_len; /* total length of r_sge */ + u32 r_rcv_len; /* receive data len processed */ + u32 r_psn; /* expected rcv packet sequence number */ + u32 r_msn; /* message sequence number */ + + u8 r_state; /* opcode of last packet received */ + u8 r_flags; + u8 r_head_ack_queue; /* index into s_ack_queue[] */ + + struct list_head rspwait; /* link for waititing to respond */ + + struct qib_sge_state r_sge; /* current receive data */ + struct qib_rq r_rq; /* receive work queue */ + + spinlock_t s_lock ____cacheline_aligned_in_smp; + struct qib_sge_state *s_cur_sge; + u32 s_flags; + struct qib_verbs_txreq *s_tx; + struct qib_swqe *s_wqe; + struct qib_sge_state s_sge; /* current send request data */ + struct qib_mregion *s_rdma_mr; + atomic_t s_dma_busy; + u32 s_cur_size; /* size of send packet in bytes */ + u32 s_len; /* total length of s_sge */ + u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ + u32 s_next_psn; /* PSN for next request */ + u32 s_last_psn; /* last response PSN processed */ + u32 s_sending_psn; /* lowest PSN that is being sent */ + u32 s_sending_hpsn; /* highest PSN that is being sent */ + u32 s_psn; /* current packet sequence number */ + u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ + u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ + u32 s_head; /* new entries added here */ + u32 s_tail; /* next entry to process */ + u32 s_cur; /* current work queue entry */ + u32 s_acked; /* last un-ACK'ed entry */ + u32 s_last; /* last completed entry */ + u32 s_ssn; /* SSN of tail entry */ + u32 s_lsn; /* limit sequence number (credit) */ + u16 s_hdrwords; /* size of s_hdr in 32 bit words */ + u16 s_rdma_ack_cnt; + u8 s_state; /* opcode of last packet sent */ + u8 s_ack_state; /* opcode of packet to ACK */ + u8 s_nak_state; /* non-zero if NAK is pending */ + u8 r_nak_state; /* non-zero if NAK is pending */ + u8 s_retry; /* requester retry counter */ + u8 s_rnr_retry; /* requester RNR retry counter */ + u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ + u8 s_tail_ack_queue; /* index into s_ack_queue[] */ + + struct qib_sge_state s_ack_rdma_sge; + struct timer_list s_timer; + struct list_head iowait; /* link for wait PIO buf */ + + struct work_struct s_work; + + wait_queue_head_t wait_dma; + + struct qib_sge r_sg_list[0] /* verified SGEs */ + ____cacheline_aligned_in_smp; +}; + +/* + * Atomic bit definitions for r_aflags. + */ +#define QIB_R_WRID_VALID 0 +#define QIB_R_REWIND_SGE 1 + +/* + * Bit definitions for r_flags. + */ +#define QIB_R_REUSE_SGE 0x01 +#define QIB_R_RDMAR_SEQ 0x02 +#define QIB_R_RSP_NAK 0x04 +#define QIB_R_RSP_SEND 0x08 +#define QIB_R_COMM_EST 0x10 + +/* + * Bit definitions for s_flags. + * + * QIB_S_SIGNAL_REQ_WR - set if QP send WRs contain completion signaled + * QIB_S_BUSY - send tasklet is processing the QP + * QIB_S_TIMER - the RC retry timer is active + * QIB_S_ACK_PENDING - an ACK is waiting to be sent after RDMA read/atomics + * QIB_S_WAIT_FENCE - waiting for all prior RDMA read or atomic SWQEs + * before processing the next SWQE + * QIB_S_WAIT_RDMAR - waiting for a RDMA read or atomic SWQE to complete + * before processing the next SWQE + * QIB_S_WAIT_RNR - waiting for RNR timeout + * QIB_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE + * QIB_S_WAIT_DMA - waiting for send DMA queue to drain before generating + * next send completion entry not via send DMA + * QIB_S_WAIT_PIO - waiting for a send buffer to be available + * QIB_S_WAIT_TX - waiting for a struct qib_verbs_txreq to be available + * QIB_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available + * QIB_S_WAIT_KMEM - waiting for kernel memory to be available + * QIB_S_WAIT_PSN - waiting for a packet to exit the send DMA queue + * QIB_S_WAIT_ACK - waiting for an ACK packet before sending more requests + * QIB_S_SEND_ONE - send one packet, request ACK, then wait for ACK + */ +#define QIB_S_SIGNAL_REQ_WR 0x0001 +#define QIB_S_BUSY 0x0002 +#define QIB_S_TIMER 0x0004 +#define QIB_S_RESP_PENDING 0x0008 +#define QIB_S_ACK_PENDING 0x0010 +#define QIB_S_WAIT_FENCE 0x0020 +#define QIB_S_WAIT_RDMAR 0x0040 +#define QIB_S_WAIT_RNR 0x0080 +#define QIB_S_WAIT_SSN_CREDIT 0x0100 +#define QIB_S_WAIT_DMA 0x0200 +#define QIB_S_WAIT_PIO 0x0400 +#define QIB_S_WAIT_TX 0x0800 +#define QIB_S_WAIT_DMA_DESC 0x1000 +#define QIB_S_WAIT_KMEM 0x2000 +#define QIB_S_WAIT_PSN 0x4000 +#define QIB_S_WAIT_ACK 0x8000 +#define QIB_S_SEND_ONE 0x10000 +#define QIB_S_UNLIMITED_CREDIT 0x20000 + +/* + * Wait flags that would prevent any packet type from being sent. + */ +#define QIB_S_ANY_WAIT_IO (QIB_S_WAIT_PIO | QIB_S_WAIT_TX | \ + QIB_S_WAIT_DMA_DESC | QIB_S_WAIT_KMEM) + +/* + * Wait flags that would prevent send work requests from making progress. + */ +#define QIB_S_ANY_WAIT_SEND (QIB_S_WAIT_FENCE | QIB_S_WAIT_RDMAR | \ + QIB_S_WAIT_RNR | QIB_S_WAIT_SSN_CREDIT | QIB_S_WAIT_DMA | \ + QIB_S_WAIT_PSN | QIB_S_WAIT_ACK) + +#define QIB_S_ANY_WAIT (QIB_S_ANY_WAIT_IO | QIB_S_ANY_WAIT_SEND) + +#define QIB_PSN_CREDIT 16 + +/* + * Since struct qib_swqe is not a fixed size, we can't simply index into + * struct qib_qp.s_wq. This function does the array index computation. + */ +static inline struct qib_swqe *get_swqe_ptr(struct qib_qp *qp, + unsigned n) +{ + return (struct qib_swqe *)((char *)qp->s_wq + + (sizeof(struct qib_swqe) + + qp->s_max_sge * + sizeof(struct qib_sge)) * n); +} + +/* + * Since struct qib_rwqe is not a fixed size, we can't simply index into + * struct qib_rwq.wq. This function does the array index computation. + */ +static inline struct qib_rwqe *get_rwqe_ptr(struct qib_rq *rq, unsigned n) +{ + return (struct qib_rwqe *) + ((char *) rq->wq->wq + + (sizeof(struct qib_rwqe) + + rq->max_sge * sizeof(struct ib_sge)) * n); +} + +/* + * QPN-map pages start out as NULL, they get allocated upon + * first use and are never deallocated. This way, + * large bitmaps are not allocated unless large numbers of QPs are used. + */ +struct qpn_map { + void *page; +}; + +struct qib_qpn_table { + spinlock_t lock; /* protect changes in this struct */ + unsigned flags; /* flags for QP0/1 allocated for each port */ + u32 last; /* last QP number allocated */ + u32 nmaps; /* size of the map table */ + u16 limit; + u16 mask; + /* bit map of free QP numbers other than 0/1 */ + struct qpn_map map[QPNMAP_ENTRIES]; +}; + +struct qib_lkey_table { + spinlock_t lock; /* protect changes in this struct */ + u32 next; /* next unused index (speeds search) */ + u32 gen; /* generation count */ + u32 max; /* size of the table */ + struct qib_mregion __rcu **table; +}; + +struct qib_opcode_stats { + u64 n_packets; /* number of packets */ + u64 n_bytes; /* total number of bytes */ +}; + +struct qib_opcode_stats_perctx { + struct qib_opcode_stats stats[128]; +}; + +struct qib_pma_counters { + u64 n_unicast_xmit; /* total unicast packets sent */ + u64 n_unicast_rcv; /* total unicast packets received */ + u64 n_multicast_xmit; /* total multicast packets sent */ + u64 n_multicast_rcv; /* total multicast packets received */ +}; + +struct qib_ibport { + struct qib_qp __rcu *qp0; + struct qib_qp __rcu *qp1; + struct ib_mad_agent *send_agent; /* agent for SMI (traps) */ + struct qib_ah *sm_ah; + struct qib_ah *smi_ah; + struct rb_root mcast_tree; + spinlock_t lock; /* protect changes in this struct */ + + /* non-zero when timer is set */ + unsigned long mkey_lease_timeout; + unsigned long trap_timeout; + __be64 gid_prefix; /* in network order */ + __be64 mkey; + __be64 guids[QIB_GUIDS_PER_PORT - 1]; /* writable GUIDs */ + u64 tid; /* TID for traps */ + struct qib_pma_counters __percpu *pmastats; + u64 z_unicast_xmit; /* starting count for PMA */ + u64 z_unicast_rcv; /* starting count for PMA */ + u64 z_multicast_xmit; /* starting count for PMA */ + u64 z_multicast_rcv; /* starting count for PMA */ + u64 z_symbol_error_counter; /* starting count for PMA */ + u64 z_link_error_recovery_counter; /* starting count for PMA */ + u64 z_link_downed_counter; /* starting count for PMA */ + u64 z_port_rcv_errors; /* starting count for PMA */ + u64 z_port_rcv_remphys_errors; /* starting count for PMA */ + u64 z_port_xmit_discards; /* starting count for PMA */ + u64 z_port_xmit_data; /* starting count for PMA */ + u64 z_port_rcv_data; /* starting count for PMA */ + u64 z_port_xmit_packets; /* starting count for PMA */ + u64 z_port_rcv_packets; /* starting count for PMA */ + u32 z_local_link_integrity_errors; /* starting count for PMA */ + u32 z_excessive_buffer_overrun_errors; /* starting count for PMA */ + u32 z_vl15_dropped; /* starting count for PMA */ + u32 n_rc_resends; + u32 n_rc_acks; + u32 n_rc_qacks; + u32 n_rc_delayed_comp; + u32 n_seq_naks; + u32 n_rdma_seq; + u32 n_rnr_naks; + u32 n_other_naks; + u32 n_loop_pkts; + u32 n_pkt_drops; + u32 n_vl15_dropped; + u32 n_rc_timeouts; + u32 n_dmawait; + u32 n_unaligned; + u32 n_rc_dupreq; + u32 n_rc_seqnak; + u32 port_cap_flags; + u32 pma_sample_start; + u32 pma_sample_interval; + __be16 pma_counter_select[5]; + u16 pma_tag; + u16 pkey_violations; + u16 qkey_violations; + u16 mkey_violations; + u16 mkey_lease_period; + u16 sm_lid; + u16 repress_traps; + u8 sm_sl; + u8 mkeyprot; + u8 subnet_timeout; + u8 vl_high_limit; + u8 sl_to_vl[16]; + +}; + + +struct qib_ibdev { + struct ib_device ibdev; + struct list_head pending_mmaps; + spinlock_t mmap_offset_lock; /* protect mmap_offset */ + u32 mmap_offset; + struct qib_mregion __rcu *dma_mr; + + /* QP numbers are shared by all IB ports */ + struct qib_qpn_table qpn_table; + struct qib_lkey_table lk_table; + struct list_head piowait; /* list for wait PIO buf */ + struct list_head dmawait; /* list for wait DMA */ + struct list_head txwait; /* list for wait qib_verbs_txreq */ + struct list_head memwait; /* list for wait kernel memory */ + struct list_head txreq_free; + struct timer_list mem_timer; + struct qib_qp __rcu **qp_table; + struct qib_pio_header *pio_hdrs; + dma_addr_t pio_hdrs_phys; + /* list of QPs waiting for RNR timer */ + spinlock_t pending_lock; /* protect wait lists, PMA counters, etc. */ + u32 qp_table_size; /* size of the hash table */ + u32 qp_rnd; /* random bytes for hash */ + spinlock_t qpt_lock; + + u32 n_piowait; + u32 n_txwait; + + u32 n_pds_allocated; /* number of PDs allocated for device */ + spinlock_t n_pds_lock; + u32 n_ahs_allocated; /* number of AHs allocated for device */ + spinlock_t n_ahs_lock; + u32 n_cqs_allocated; /* number of CQs allocated for device */ + spinlock_t n_cqs_lock; + u32 n_qps_allocated; /* number of QPs allocated for device */ + spinlock_t n_qps_lock; + u32 n_srqs_allocated; /* number of SRQs allocated for device */ + spinlock_t n_srqs_lock; + u32 n_mcast_grps_allocated; /* number of mcast groups allocated */ + spinlock_t n_mcast_grps_lock; +#ifdef CONFIG_DEBUG_FS + /* per HCA debugfs */ + struct dentry *qib_ibdev_dbg; +#endif +}; + +struct qib_verbs_counters { + u64 symbol_error_counter; + u64 link_error_recovery_counter; + u64 link_downed_counter; + u64 port_rcv_errors; + u64 port_rcv_remphys_errors; + u64 port_xmit_discards; + u64 port_xmit_data; + u64 port_rcv_data; + u64 port_xmit_packets; + u64 port_rcv_packets; + u32 local_link_integrity_errors; + u32 excessive_buffer_overrun_errors; + u32 vl15_dropped; +}; + +static inline struct qib_mr *to_imr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct qib_mr, ibmr); +} + +static inline struct qib_pd *to_ipd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct qib_pd, ibpd); +} + +static inline struct qib_ah *to_iah(struct ib_ah *ibah) +{ + return container_of(ibah, struct qib_ah, ibah); +} + +static inline struct qib_cq *to_icq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct qib_cq, ibcq); +} + +static inline struct qib_srq *to_isrq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct qib_srq, ibsrq); +} + +static inline struct qib_qp *to_iqp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct qib_qp, ibqp); +} + +static inline struct qib_ibdev *to_idev(struct ib_device *ibdev) +{ + return container_of(ibdev, struct qib_ibdev, ibdev); +} + +/* + * Send if not busy or waiting for I/O and either + * a RC response is pending or we can process send work requests. + */ +static inline int qib_send_ok(struct qib_qp *qp) +{ + return !(qp->s_flags & (QIB_S_BUSY | QIB_S_ANY_WAIT_IO)) && + (qp->s_hdrwords || (qp->s_flags & QIB_S_RESP_PENDING) || + !(qp->s_flags & QIB_S_ANY_WAIT_SEND)); +} + +/* + * This must be called with s_lock held. + */ +void qib_schedule_send(struct qib_qp *qp); + +static inline int qib_pkey_ok(u16 pkey1, u16 pkey2) +{ + u16 p1 = pkey1 & 0x7FFF; + u16 p2 = pkey2 & 0x7FFF; + + /* + * Low 15 bits must be non-zero and match, and + * one of the two must be a full member. + */ + return p1 && p1 == p2 && ((__s16)pkey1 < 0 || (__s16)pkey2 < 0); +} + +void qib_bad_pqkey(struct qib_ibport *ibp, __be16 trap_num, u32 key, u32 sl, + u32 qp1, u32 qp2, __be16 lid1, __be16 lid2); +void qib_cap_mask_chg(struct qib_ibport *ibp); +void qib_sys_guid_chg(struct qib_ibport *ibp); +void qib_node_desc_chg(struct qib_ibport *ibp); +int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + struct ib_wc *in_wc, struct ib_grh *in_grh, + struct ib_mad *in_mad, struct ib_mad *out_mad); +int qib_create_agents(struct qib_ibdev *dev); +void qib_free_agents(struct qib_ibdev *dev); + +/* + * Compare the lower 24 bits of the two values. + * Returns an integer <, ==, or > than zero. + */ +static inline int qib_cmp24(u32 a, u32 b) +{ + return (((int) a) - ((int) b)) << 8; +} + +struct qib_mcast *qib_mcast_find(struct qib_ibport *ibp, union ib_gid *mgid); + +int qib_snapshot_counters(struct qib_pportdata *ppd, u64 *swords, + u64 *rwords, u64 *spkts, u64 *rpkts, + u64 *xmit_wait); + +int qib_get_counters(struct qib_pportdata *ppd, + struct qib_verbs_counters *cntrs); + +int qib_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); + +int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); + +int qib_mcast_tree_empty(struct qib_ibport *ibp); + +__be32 qib_compute_aeth(struct qib_qp *qp); + +struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn); + +struct ib_qp *qib_create_qp(struct ib_pd *ibpd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata); + +int qib_destroy_qp(struct ib_qp *ibqp); + +int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err); + +int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); + +int qib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_qp_init_attr *init_attr); + +unsigned qib_free_all_qps(struct qib_devdata *dd); + +void qib_init_qpn_table(struct qib_devdata *dd, struct qib_qpn_table *qpt); + +void qib_free_qpn_table(struct qib_qpn_table *qpt); + +#ifdef CONFIG_DEBUG_FS + +struct qib_qp_iter; + +struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev); + +int qib_qp_iter_next(struct qib_qp_iter *iter); + +void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter); + +#endif + +void qib_get_credit(struct qib_qp *qp, u32 aeth); + +unsigned qib_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult); + +void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail); + +void qib_put_txreq(struct qib_verbs_txreq *tx); + +int qib_verbs_send(struct qib_qp *qp, struct qib_ib_header *hdr, + u32 hdrwords, struct qib_sge_state *ss, u32 len); + +void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length, + int release); + +void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release); + +void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, + int has_grh, void *data, u32 tlen, struct qib_qp *qp); + +void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, + int has_grh, void *data, u32 tlen, struct qib_qp *qp); + +int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); + +struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid); + +void qib_rc_rnr_retry(unsigned long arg); + +void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr); + +void qib_rc_error(struct qib_qp *qp, enum ib_wc_status err); + +int qib_post_ud_send(struct qib_qp *qp, struct ib_send_wr *wr); + +void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, + int has_grh, void *data, u32 tlen, struct qib_qp *qp); + +int qib_alloc_lkey(struct qib_mregion *mr, int dma_region); + +void qib_free_lkey(struct qib_mregion *mr); + +int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, + struct qib_sge *isge, struct ib_sge *sge, int acc); + +int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, + u32 len, u64 vaddr, u32 rkey, int acc); + +int qib_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); + +struct ib_srq *qib_create_srq(struct ib_pd *ibpd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata); + +int qib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, + struct ib_udata *udata); + +int qib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); + +int qib_destroy_srq(struct ib_srq *ibsrq); + +int qib_cq_init(struct qib_devdata *dd); + +void qib_cq_exit(struct qib_devdata *dd); + +void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int sig); + +int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); + +struct ib_cq *qib_create_cq(struct ib_device *ibdev, int entries, + int comp_vector, struct ib_ucontext *context, + struct ib_udata *udata); + +int qib_destroy_cq(struct ib_cq *ibcq); + +int qib_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags); + +int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata); + +struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc); + +struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd, + struct ib_phys_buf *buffer_list, + int num_phys_buf, int acc, u64 *iova_start); + +struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int mr_access_flags, + struct ib_udata *udata); + +int qib_dereg_mr(struct ib_mr *ibmr); + +struct ib_mr *qib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len); + +struct ib_fast_reg_page_list *qib_alloc_fast_reg_page_list( + struct ib_device *ibdev, int page_list_len); + +void qib_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl); + +int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr); + +struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags, + struct ib_fmr_attr *fmr_attr); + +int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, + int list_len, u64 iova); + +int qib_unmap_fmr(struct list_head *fmr_list); + +int qib_dealloc_fmr(struct ib_fmr *ibfmr); + +static inline void qib_get_mr(struct qib_mregion *mr) +{ + atomic_inc(&mr->refcount); +} + +void mr_rcu_callback(struct rcu_head *list); + +static inline void qib_put_mr(struct qib_mregion *mr) +{ + if (unlikely(atomic_dec_and_test(&mr->refcount))) + call_rcu(&mr->list, mr_rcu_callback); +} + +static inline void qib_put_ss(struct qib_sge_state *ss) +{ + while (ss->num_sge) { + qib_put_mr(ss->sge.mr); + if (--ss->num_sge) + ss->sge = *ss->sg_list++; + } +} + + +void qib_release_mmap_info(struct kref *ref); + +struct qib_mmap_info *qib_create_mmap_info(struct qib_ibdev *dev, u32 size, + struct ib_ucontext *context, + void *obj); + +void qib_update_mmap_info(struct qib_ibdev *dev, struct qib_mmap_info *ip, + u32 size, void *obj); + +int qib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); + +int qib_get_rwqe(struct qib_qp *qp, int wr_id_only); + +void qib_migrate_qp(struct qib_qp *qp); + +int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr, + int has_grh, struct qib_qp *qp, u32 bth0); + +u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr, + struct ib_global_route *grh, u32 hwords, u32 nwords); + +void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr, + u32 bth0, u32 bth2); + +void qib_do_send(struct work_struct *work); + +void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe, + enum ib_wc_status status); + +void qib_send_rc_ack(struct qib_qp *qp); + +int qib_make_rc_req(struct qib_qp *qp); + +int qib_make_uc_req(struct qib_qp *qp); + +int qib_make_ud_req(struct qib_qp *qp); + +int qib_register_ib_device(struct qib_devdata *); + +void qib_unregister_ib_device(struct qib_devdata *); + +void qib_ib_rcv(struct qib_ctxtdata *, void *, void *, u32); + +void qib_ib_piobufavail(struct qib_devdata *); + +unsigned qib_get_npkeys(struct qib_devdata *); + +unsigned qib_get_pkey(struct qib_ibport *, unsigned); + +extern const enum ib_wc_opcode ib_qib_wc_opcode[]; + +/* + * Below HCA-independent IB PhysPortState values, returned + * by the f_ibphys_portstate() routine. + */ +#define IB_PHYSPORTSTATE_SLEEP 1 +#define IB_PHYSPORTSTATE_POLL 2 +#define IB_PHYSPORTSTATE_DISABLED 3 +#define IB_PHYSPORTSTATE_CFG_TRAIN 4 +#define IB_PHYSPORTSTATE_LINKUP 5 +#define IB_PHYSPORTSTATE_LINK_ERR_RECOVER 6 +#define IB_PHYSPORTSTATE_CFG_DEBOUNCE 8 +#define IB_PHYSPORTSTATE_CFG_IDLE 0xB +#define IB_PHYSPORTSTATE_RECOVERY_RETRAIN 0xC +#define IB_PHYSPORTSTATE_RECOVERY_WAITRMT 0xE +#define IB_PHYSPORTSTATE_RECOVERY_IDLE 0xF +#define IB_PHYSPORTSTATE_CFG_ENH 0x10 +#define IB_PHYSPORTSTATE_CFG_WAIT_ENH 0x13 + +extern const int ib_qib_state_ops[]; + +extern __be64 ib_qib_sys_image_guid; /* in network order */ + +extern unsigned int ib_qib_lkey_table_size; + +extern unsigned int ib_qib_max_cqes; + +extern unsigned int ib_qib_max_cqs; + +extern unsigned int ib_qib_max_qp_wrs; + +extern unsigned int ib_qib_max_qps; + +extern unsigned int ib_qib_max_sges; + +extern unsigned int ib_qib_max_mcast_grps; + +extern unsigned int ib_qib_max_mcast_qp_attached; + +extern unsigned int ib_qib_max_srqs; + +extern unsigned int ib_qib_max_srq_sges; + +extern unsigned int ib_qib_max_srq_wrs; + +extern const u32 ib_qib_rnr_table[]; + +extern struct ib_dma_mapping_ops qib_dma_mapping_ops; + +#endif /* QIB_VERBS_H */ diff --git a/drivers/infiniband/hw/qib/qib_verbs_mcast.c b/drivers/infiniband/hw/qib/qib_verbs_mcast.c new file mode 100644 index 00000000000..dabb697b1c2 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_verbs_mcast.c @@ -0,0 +1,368 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/rculist.h> + +#include "qib.h" + +/** + * qib_mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct + * @qp: the QP to link + */ +static struct qib_mcast_qp *qib_mcast_qp_alloc(struct qib_qp *qp) +{ + struct qib_mcast_qp *mqp; + + mqp = kmalloc(sizeof *mqp, GFP_KERNEL); + if (!mqp) + goto bail; + + mqp->qp = qp; + atomic_inc(&qp->refcount); + +bail: + return mqp; +} + +static void qib_mcast_qp_free(struct qib_mcast_qp *mqp) +{ + struct qib_qp *qp = mqp->qp; + + /* Notify qib_destroy_qp() if it is waiting. */ + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + + kfree(mqp); +} + +/** + * qib_mcast_alloc - allocate the multicast GID structure + * @mgid: the multicast GID + * + * A list of QPs will be attached to this structure. + */ +static struct qib_mcast *qib_mcast_alloc(union ib_gid *mgid) +{ + struct qib_mcast *mcast; + + mcast = kmalloc(sizeof *mcast, GFP_KERNEL); + if (!mcast) + goto bail; + + mcast->mgid = *mgid; + INIT_LIST_HEAD(&mcast->qp_list); + init_waitqueue_head(&mcast->wait); + atomic_set(&mcast->refcount, 0); + mcast->n_attached = 0; + +bail: + return mcast; +} + +static void qib_mcast_free(struct qib_mcast *mcast) +{ + struct qib_mcast_qp *p, *tmp; + + list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) + qib_mcast_qp_free(p); + + kfree(mcast); +} + +/** + * qib_mcast_find - search the global table for the given multicast GID + * @ibp: the IB port structure + * @mgid: the multicast GID to search for + * + * Returns NULL if not found. + * + * The caller is responsible for decrementing the reference count if found. + */ +struct qib_mcast *qib_mcast_find(struct qib_ibport *ibp, union ib_gid *mgid) +{ + struct rb_node *n; + unsigned long flags; + struct qib_mcast *mcast; + + spin_lock_irqsave(&ibp->lock, flags); + n = ibp->mcast_tree.rb_node; + while (n) { + int ret; + + mcast = rb_entry(n, struct qib_mcast, rb_node); + + ret = memcmp(mgid->raw, mcast->mgid.raw, + sizeof(union ib_gid)); + if (ret < 0) + n = n->rb_left; + else if (ret > 0) + n = n->rb_right; + else { + atomic_inc(&mcast->refcount); + spin_unlock_irqrestore(&ibp->lock, flags); + goto bail; + } + } + spin_unlock_irqrestore(&ibp->lock, flags); + + mcast = NULL; + +bail: + return mcast; +} + +/** + * qib_mcast_add - insert mcast GID into table and attach QP struct + * @mcast: the mcast GID table + * @mqp: the QP to attach + * + * Return zero if both were added. Return EEXIST if the GID was already in + * the table but the QP was added. Return ESRCH if the QP was already + * attached and neither structure was added. + */ +static int qib_mcast_add(struct qib_ibdev *dev, struct qib_ibport *ibp, + struct qib_mcast *mcast, struct qib_mcast_qp *mqp) +{ + struct rb_node **n = &ibp->mcast_tree.rb_node; + struct rb_node *pn = NULL; + int ret; + + spin_lock_irq(&ibp->lock); + + while (*n) { + struct qib_mcast *tmcast; + struct qib_mcast_qp *p; + + pn = *n; + tmcast = rb_entry(pn, struct qib_mcast, rb_node); + + ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw, + sizeof(union ib_gid)); + if (ret < 0) { + n = &pn->rb_left; + continue; + } + if (ret > 0) { + n = &pn->rb_right; + continue; + } + + /* Search the QP list to see if this is already there. */ + list_for_each_entry_rcu(p, &tmcast->qp_list, list) { + if (p->qp == mqp->qp) { + ret = ESRCH; + goto bail; + } + } + if (tmcast->n_attached == ib_qib_max_mcast_qp_attached) { + ret = ENOMEM; + goto bail; + } + + tmcast->n_attached++; + + list_add_tail_rcu(&mqp->list, &tmcast->qp_list); + ret = EEXIST; + goto bail; + } + + spin_lock(&dev->n_mcast_grps_lock); + if (dev->n_mcast_grps_allocated == ib_qib_max_mcast_grps) { + spin_unlock(&dev->n_mcast_grps_lock); + ret = ENOMEM; + goto bail; + } + + dev->n_mcast_grps_allocated++; + spin_unlock(&dev->n_mcast_grps_lock); + + mcast->n_attached++; + + list_add_tail_rcu(&mqp->list, &mcast->qp_list); + + atomic_inc(&mcast->refcount); + rb_link_node(&mcast->rb_node, pn, n); + rb_insert_color(&mcast->rb_node, &ibp->mcast_tree); + + ret = 0; + +bail: + spin_unlock_irq(&ibp->lock); + + return ret; +} + +int qib_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + struct qib_qp *qp = to_iqp(ibqp); + struct qib_ibdev *dev = to_idev(ibqp->device); + struct qib_ibport *ibp; + struct qib_mcast *mcast; + struct qib_mcast_qp *mqp; + int ret; + + if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) { + ret = -EINVAL; + goto bail; + } + + /* + * Allocate data structures since its better to do this outside of + * spin locks and it will most likely be needed. + */ + mcast = qib_mcast_alloc(gid); + if (mcast == NULL) { + ret = -ENOMEM; + goto bail; + } + mqp = qib_mcast_qp_alloc(qp); + if (mqp == NULL) { + qib_mcast_free(mcast); + ret = -ENOMEM; + goto bail; + } + ibp = to_iport(ibqp->device, qp->port_num); + switch (qib_mcast_add(dev, ibp, mcast, mqp)) { + case ESRCH: + /* Neither was used: OK to attach the same QP twice. */ + qib_mcast_qp_free(mqp); + qib_mcast_free(mcast); + break; + + case EEXIST: /* The mcast wasn't used */ + qib_mcast_free(mcast); + break; + + case ENOMEM: + /* Exceeded the maximum number of mcast groups. */ + qib_mcast_qp_free(mqp); + qib_mcast_free(mcast); + ret = -ENOMEM; + goto bail; + + default: + break; + } + + ret = 0; + +bail: + return ret; +} + +int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + struct qib_qp *qp = to_iqp(ibqp); + struct qib_ibdev *dev = to_idev(ibqp->device); + struct qib_ibport *ibp = to_iport(ibqp->device, qp->port_num); + struct qib_mcast *mcast = NULL; + struct qib_mcast_qp *p, *tmp; + struct rb_node *n; + int last = 0; + int ret; + + if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) { + ret = -EINVAL; + goto bail; + } + + spin_lock_irq(&ibp->lock); + + /* Find the GID in the mcast table. */ + n = ibp->mcast_tree.rb_node; + while (1) { + if (n == NULL) { + spin_unlock_irq(&ibp->lock); + ret = -EINVAL; + goto bail; + } + + mcast = rb_entry(n, struct qib_mcast, rb_node); + ret = memcmp(gid->raw, mcast->mgid.raw, + sizeof(union ib_gid)); + if (ret < 0) + n = n->rb_left; + else if (ret > 0) + n = n->rb_right; + else + break; + } + + /* Search the QP list. */ + list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) { + if (p->qp != qp) + continue; + /* + * We found it, so remove it, but don't poison the forward + * link until we are sure there are no list walkers. + */ + list_del_rcu(&p->list); + mcast->n_attached--; + + /* If this was the last attached QP, remove the GID too. */ + if (list_empty(&mcast->qp_list)) { + rb_erase(&mcast->rb_node, &ibp->mcast_tree); + last = 1; + } + break; + } + + spin_unlock_irq(&ibp->lock); + + if (p) { + /* + * Wait for any list walkers to finish before freeing the + * list element. + */ + wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1); + qib_mcast_qp_free(p); + } + if (last) { + atomic_dec(&mcast->refcount); + wait_event(mcast->wait, !atomic_read(&mcast->refcount)); + qib_mcast_free(mcast); + spin_lock_irq(&dev->n_mcast_grps_lock); + dev->n_mcast_grps_allocated--; + spin_unlock_irq(&dev->n_mcast_grps_lock); + } + + ret = 0; + +bail: + return ret; +} + +int qib_mcast_tree_empty(struct qib_ibport *ibp) +{ + return ibp->mcast_tree.rb_node == NULL; +} diff --git a/drivers/infiniband/hw/ipath/ipath_7220.h b/drivers/infiniband/hw/qib/qib_wc_ppc64.c index 74fa5cc5131..673cf4c22eb 100644 --- a/drivers/infiniband/hw/ipath/ipath_7220.h +++ b/drivers/infiniband/hw/qib/qib_wc_ppc64.c @@ -1,7 +1,5 @@ -#ifndef _IPATH_7220_H -#define _IPATH_7220_H /* - * Copyright (c) 2007 QLogic Corporation. All rights reserved. + * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -33,25 +31,32 @@ */ /* - * This header file provides the declarations and common definitions - * for (mostly) manipulation of the SerDes blocks within the IBA7220. - * the functions declared should only be called from within other - * 7220-related files such as ipath_iba7220.c or ipath_sd7220.c. + * This file is conditionally built on PowerPC only. Otherwise weak symbol + * versions of the functions exported from here are used. */ -int ipath_sd7220_presets(struct ipath_devdata *dd); -int ipath_sd7220_init(struct ipath_devdata *dd, int was_reset); -int ipath_sd7220_prog_ld(struct ipath_devdata *dd, int sdnum, u8 *img, - int len, int offset); -int ipath_sd7220_prog_vfy(struct ipath_devdata *dd, int sdnum, const u8 *img, - int len, int offset); -/* - * Below used for sdnum parameter, selecting one of the two sections - * used for PCIe, or the single SerDes used for IB, which is the - * only one currently used - */ -#define IB_7220_SERDES 2 -int ipath_sd7220_ib_load(struct ipath_devdata *dd); -int ipath_sd7220_ib_vfy(struct ipath_devdata *dd); +#include "qib.h" -#endif /* _IPATH_7220_H */ +/** + * qib_enable_wc - enable write combining for MMIO writes to the device + * @dd: qlogic_ib device + * + * Nothing to do on PowerPC, so just return without error. + */ +int qib_enable_wc(struct qib_devdata *dd) +{ + return 0; +} + +/** + * qib_unordered_wc - indicate whether write combining is unordered + * + * Because our performance depends on our ability to do write + * combining mmio writes in the most efficient way, we need to + * know if we are on a processor that may reorder stores when + * write combining. + */ +int qib_unordered_wc(void) +{ + return 1; +} diff --git a/drivers/infiniband/hw/qib/qib_wc_x86_64.c b/drivers/infiniband/hw/qib/qib_wc_x86_64.c new file mode 100644 index 00000000000..1d7281c5a02 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_wc_x86_64.c @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2012 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * This file is conditionally built on x86_64 only. Otherwise weak symbol + * versions of the functions exported from here are used. + */ + +#include <linux/pci.h> +#include <asm/mtrr.h> +#include <asm/processor.h> + +#include "qib.h" + +/** + * qib_enable_wc - enable write combining for MMIO writes to the device + * @dd: qlogic_ib device + * + * This routine is x86_64-specific; it twiddles the CPU's MTRRs to enable + * write combining. + */ +int qib_enable_wc(struct qib_devdata *dd) +{ + int ret = 0; + u64 pioaddr, piolen; + unsigned bits; + const unsigned long addr = pci_resource_start(dd->pcidev, 0); + const size_t len = pci_resource_len(dd->pcidev, 0); + + /* + * Set the PIO buffers to be WCCOMB, so we get HT bursts to the + * chip. Linux (possibly the hardware) requires it to be on a power + * of 2 address matching the length (which has to be a power of 2). + * For rev1, that means the base address, for rev2, it will be just + * the PIO buffers themselves. + * For chips with two sets of buffers, the calculations are + * somewhat more complicated; we need to sum, and the piobufbase + * register has both offsets, 2K in low 32 bits, 4K in high 32 bits. + * The buffers are still packed, so a single range covers both. + */ + if (dd->piobcnt2k && dd->piobcnt4k) { + /* 2 sizes for chip */ + unsigned long pio2kbase, pio4kbase; + pio2kbase = dd->piobufbase & 0xffffffffUL; + pio4kbase = (dd->piobufbase >> 32) & 0xffffffffUL; + if (pio2kbase < pio4kbase) { + /* all current chips */ + pioaddr = addr + pio2kbase; + piolen = pio4kbase - pio2kbase + + dd->piobcnt4k * dd->align4k; + } else { + pioaddr = addr + pio4kbase; + piolen = pio2kbase - pio4kbase + + dd->piobcnt2k * dd->palign; + } + } else { /* single buffer size (2K, currently) */ + pioaddr = addr + dd->piobufbase; + piolen = dd->piobcnt2k * dd->palign + + dd->piobcnt4k * dd->align4k; + } + + for (bits = 0; !(piolen & (1ULL << bits)); bits++) + /* do nothing */ ; + + if (piolen != (1ULL << bits)) { + piolen >>= bits; + while (piolen >>= 1) + bits++; + piolen = 1ULL << (bits + 1); + } + if (pioaddr & (piolen - 1)) { + u64 atmp; + atmp = pioaddr & ~(piolen - 1); + if (atmp < addr || (atmp + piolen) > (addr + len)) { + qib_dev_err(dd, + "No way to align address/size (%llx/%llx), no WC mtrr\n", + (unsigned long long) atmp, + (unsigned long long) piolen << 1); + ret = -ENODEV; + } else { + pioaddr = atmp; + piolen <<= 1; + } + } + + if (!ret) { + int cookie; + + cookie = mtrr_add(pioaddr, piolen, MTRR_TYPE_WRCOMB, 0); + if (cookie < 0) { + { + qib_devinfo(dd->pcidev, + "mtrr_add() WC for PIO bufs failed (%d)\n", + cookie); + ret = -EINVAL; + } + } else { + dd->wc_cookie = cookie; + dd->wc_base = (unsigned long) pioaddr; + dd->wc_len = (unsigned long) piolen; + } + } + + return ret; +} + +/** + * qib_disable_wc - disable write combining for MMIO writes to the device + * @dd: qlogic_ib device + */ +void qib_disable_wc(struct qib_devdata *dd) +{ + if (dd->wc_cookie) { + int r; + + r = mtrr_del(dd->wc_cookie, dd->wc_base, + dd->wc_len); + if (r < 0) + qib_devinfo(dd->pcidev, + "mtrr_del(%lx, %lx, %lx) failed: %d\n", + dd->wc_cookie, dd->wc_base, + dd->wc_len, r); + dd->wc_cookie = 0; /* even on failure */ + } +} + +/** + * qib_unordered_wc - indicate whether write combining is ordered + * + * Because our performance depends on our ability to do write combining mmio + * writes in the most efficient way, we need to know if we are on an Intel + * or AMD x86_64 processor. AMD x86_64 processors flush WC buffers out in + * the order completed, and so no special flushing is required to get + * correct ordering. Intel processors, however, will flush write buffers + * out in "random" orders, and so explicit ordering is needed at times. + */ +int qib_unordered_wc(void) +{ + return boot_cpu_data.x86_vendor != X86_VENDOR_AMD; +} diff --git a/drivers/infiniband/hw/usnic/Kconfig b/drivers/infiniband/hw/usnic/Kconfig new file mode 100644 index 00000000000..29ab11c34f3 --- /dev/null +++ b/drivers/infiniband/hw/usnic/Kconfig @@ -0,0 +1,10 @@ +config INFINIBAND_USNIC + tristate "Verbs support for Cisco VIC" + depends on NETDEVICES && ETHERNET && INET && PCI && INTEL_IOMMU + select ENIC + select NET_VENDOR_CISCO + select PCI_IOV + select INFINIBAND_USER_ACCESS + ---help--- + This is a low-level driver for Cisco's Virtual Interface + Cards (VICs), including the VIC 1240 and 1280 cards. diff --git a/drivers/infiniband/hw/usnic/Makefile b/drivers/infiniband/hw/usnic/Makefile new file mode 100644 index 00000000000..99fb2db47cd --- /dev/null +++ b/drivers/infiniband/hw/usnic/Makefile @@ -0,0 +1,15 @@ +ccflags-y := -Idrivers/net/ethernet/cisco/enic + +obj-$(CONFIG_INFINIBAND_USNIC)+= usnic_verbs.o + +usnic_verbs-y=\ +usnic_fwd.o \ +usnic_transport.o \ +usnic_uiom.o \ +usnic_uiom_interval_tree.o \ +usnic_vnic.o \ +usnic_ib_main.o \ +usnic_ib_qp_grp.o \ +usnic_ib_sysfs.o \ +usnic_ib_verbs.o \ +usnic_debugfs.o \ diff --git a/drivers/infiniband/hw/usnic/usnic.h b/drivers/infiniband/hw/usnic/usnic.h new file mode 100644 index 00000000000..5be13d8991b --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef USNIC_H_ +#define USNIC_H_ + +#define DRV_NAME "usnic_verbs" + +#define PCI_DEVICE_ID_CISCO_VIC_USPACE_NIC 0x00cf /* User space NIC */ + +#define DRV_VERSION "1.0.3" +#define DRV_RELDATE "December 19, 2013" + +#endif /* USNIC_H_ */ diff --git a/drivers/infiniband/hw/usnic/usnic_abi.h b/drivers/infiniband/hw/usnic/usnic_abi.h new file mode 100644 index 00000000000..04a66229584 --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_abi.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + + +#ifndef USNIC_ABI_H +#define USNIC_ABI_H + +/* ABI between userspace and kernel */ +#define USNIC_UVERBS_ABI_VERSION 4 + +#define USNIC_QP_GRP_MAX_WQS 8 +#define USNIC_QP_GRP_MAX_RQS 8 +#define USNIC_QP_GRP_MAX_CQS 16 + +enum usnic_transport_type { + USNIC_TRANSPORT_UNKNOWN = 0, + USNIC_TRANSPORT_ROCE_CUSTOM = 1, + USNIC_TRANSPORT_IPV4_UDP = 2, + USNIC_TRANSPORT_MAX = 3, +}; + +struct usnic_transport_spec { + enum usnic_transport_type trans_type; + union { + struct { + uint16_t port_num; + } usnic_roce; + struct { + uint32_t sock_fd; + } udp; + }; +}; + +struct usnic_ib_create_qp_cmd { + struct usnic_transport_spec spec; +}; + +/*TODO: Future - usnic_modify_qp needs to pass in generic filters */ +struct usnic_ib_create_qp_resp { + u32 vfid; + u32 qp_grp_id; + u64 bar_bus_addr; + u32 bar_len; +/* + * WQ, RQ, CQ are explicity specified bc exposing a generic resources inteface + * expands the scope of ABI to many files. + */ + u32 wq_cnt; + u32 rq_cnt; + u32 cq_cnt; + u32 wq_idx[USNIC_QP_GRP_MAX_WQS]; + u32 rq_idx[USNIC_QP_GRP_MAX_RQS]; + u32 cq_idx[USNIC_QP_GRP_MAX_CQS]; + u32 transport; + u32 reserved[9]; +}; + +#endif /* USNIC_ABI_H */ diff --git a/drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h b/drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h new file mode 100644 index 00000000000..39356726614 --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef USNIC_CMN_PKT_HDR_H +#define USNIC_CMN_PKT_HDR_H + +#define USNIC_ROCE_ETHERTYPE (0x8915) +#define USNIC_ROCE_GRH_VER (8) +#define USNIC_PROTO_VER (1) +#define USNIC_ROCE_GRH_VER_SHIFT (4) + +#endif /* USNIC_COMMON_PKT_HDR_H */ diff --git a/drivers/infiniband/hw/usnic/usnic_common_util.h b/drivers/infiniband/hw/usnic/usnic_common_util.h new file mode 100644 index 00000000000..9d737ed5e55 --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_common_util.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef USNIC_CMN_UTIL_H +#define USNIC_CMN_UTIL_H + +static inline void +usnic_mac_to_gid(const char *const mac, char *raw_gid) +{ + raw_gid[0] = 0xfe; + raw_gid[1] = 0x80; + memset(&raw_gid[2], 0, 6); + raw_gid[8] = mac[0]^2; + raw_gid[9] = mac[1]; + raw_gid[10] = mac[2]; + raw_gid[11] = 0xff; + raw_gid[12] = 0xfe; + raw_gid[13] = mac[3]; + raw_gid[14] = mac[4]; + raw_gid[15] = mac[5]; +} + +static inline void +usnic_mac_ip_to_gid(const char *const mac, const __be32 inaddr, char *raw_gid) +{ + raw_gid[0] = 0xfe; + raw_gid[1] = 0x80; + memset(&raw_gid[2], 0, 2); + memcpy(&raw_gid[4], &inaddr, 4); + raw_gid[8] = mac[0]^2; + raw_gid[9] = mac[1]; + raw_gid[10] = mac[2]; + raw_gid[11] = 0xff; + raw_gid[12] = 0xfe; + raw_gid[13] = mac[3]; + raw_gid[14] = mac[4]; + raw_gid[15] = mac[5]; +} + +static inline void +usnic_write_gid_if_id_from_mac(char *mac, char *raw_gid) +{ + raw_gid[8] = mac[0]^2; + raw_gid[9] = mac[1]; + raw_gid[10] = mac[2]; + raw_gid[11] = 0xff; + raw_gid[12] = 0xfe; + raw_gid[13] = mac[3]; + raw_gid[14] = mac[4]; + raw_gid[15] = mac[5]; +} + +#endif /* USNIC_COMMON_UTIL_H */ diff --git a/drivers/infiniband/hw/usnic/usnic_debugfs.c b/drivers/infiniband/hw/usnic/usnic_debugfs.c new file mode 100644 index 00000000000..5d13860161a --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_debugfs.c @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/debugfs.h> +#include <linux/module.h> + +#include "usnic.h" +#include "usnic_log.h" +#include "usnic_debugfs.h" +#include "usnic_ib_qp_grp.h" +#include "usnic_transport.h" + +static struct dentry *debugfs_root; +static struct dentry *flows_dentry; + +static ssize_t usnic_debugfs_buildinfo_read(struct file *f, char __user *data, + size_t count, loff_t *ppos) +{ + char buf[500]; + int res; + + if (*ppos > 0) + return 0; + + res = scnprintf(buf, sizeof(buf), + "version: %s\n" + "build date: %s\n", + DRV_VERSION, DRV_RELDATE); + + return simple_read_from_buffer(data, count, ppos, buf, res); +} + +static const struct file_operations usnic_debugfs_buildinfo_ops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = usnic_debugfs_buildinfo_read +}; + +static ssize_t flowinfo_read(struct file *f, char __user *data, + size_t count, loff_t *ppos) +{ + struct usnic_ib_qp_grp_flow *qp_flow; + int n; + int left; + char *ptr; + char buf[512]; + + qp_flow = f->private_data; + ptr = buf; + left = count; + + if (*ppos > 0) + return 0; + + spin_lock(&qp_flow->qp_grp->lock); + n = scnprintf(ptr, left, + "QP Grp ID: %d Transport: %s ", + qp_flow->qp_grp->grp_id, + usnic_transport_to_str(qp_flow->trans_type)); + UPDATE_PTR_LEFT(n, ptr, left); + if (qp_flow->trans_type == USNIC_TRANSPORT_ROCE_CUSTOM) { + n = scnprintf(ptr, left, "Port_Num:%hu\n", + qp_flow->usnic_roce.port_num); + UPDATE_PTR_LEFT(n, ptr, left); + } else if (qp_flow->trans_type == USNIC_TRANSPORT_IPV4_UDP) { + n = usnic_transport_sock_to_str(ptr, left, + qp_flow->udp.sock); + UPDATE_PTR_LEFT(n, ptr, left); + n = scnprintf(ptr, left, "\n"); + UPDATE_PTR_LEFT(n, ptr, left); + } + spin_unlock(&qp_flow->qp_grp->lock); + + return simple_read_from_buffer(data, count, ppos, buf, ptr - buf); +} + +static const struct file_operations flowinfo_ops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = flowinfo_read, +}; + +void usnic_debugfs_init(void) +{ + debugfs_root = debugfs_create_dir(DRV_NAME, NULL); + if (IS_ERR(debugfs_root)) { + usnic_err("Failed to create debugfs root dir, check if debugfs is enabled in kernel configuration\n"); + goto out_clear_root; + } + + flows_dentry = debugfs_create_dir("flows", debugfs_root); + if (IS_ERR_OR_NULL(flows_dentry)) { + usnic_err("Failed to create debugfs flow dir with err %ld\n", + PTR_ERR(flows_dentry)); + goto out_free_root; + } + + debugfs_create_file("build-info", S_IRUGO, debugfs_root, + NULL, &usnic_debugfs_buildinfo_ops); + return; + +out_free_root: + debugfs_remove_recursive(debugfs_root); +out_clear_root: + debugfs_root = NULL; +} + +void usnic_debugfs_exit(void) +{ + if (!debugfs_root) + return; + + debugfs_remove_recursive(debugfs_root); + debugfs_root = NULL; +} + +void usnic_debugfs_flow_add(struct usnic_ib_qp_grp_flow *qp_flow) +{ + if (IS_ERR_OR_NULL(flows_dentry)) + return; + + scnprintf(qp_flow->dentry_name, sizeof(qp_flow->dentry_name), + "%u", qp_flow->flow->flow_id); + qp_flow->dbgfs_dentry = debugfs_create_file(qp_flow->dentry_name, + S_IRUGO, + flows_dentry, + qp_flow, + &flowinfo_ops); + if (IS_ERR_OR_NULL(qp_flow->dbgfs_dentry)) { + usnic_err("Failed to create dbg fs entry for flow %u\n", + qp_flow->flow->flow_id); + } +} + +void usnic_debugfs_flow_remove(struct usnic_ib_qp_grp_flow *qp_flow) +{ + if (!IS_ERR_OR_NULL(qp_flow->dbgfs_dentry)) + debugfs_remove(qp_flow->dbgfs_dentry); +} diff --git a/drivers/infiniband/hw/usnic/usnic_debugfs.h b/drivers/infiniband/hw/usnic/usnic_debugfs.h new file mode 100644 index 00000000000..4087d24a88f --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_debugfs.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#ifndef USNIC_DEBUGFS_H_ +#define USNIC_DEBUGFS_H_ + +#include "usnic_ib_qp_grp.h" + +void usnic_debugfs_init(void); + +void usnic_debugfs_exit(void); +void usnic_debugfs_flow_add(struct usnic_ib_qp_grp_flow *qp_flow); +void usnic_debugfs_flow_remove(struct usnic_ib_qp_grp_flow *qp_flow); + +#endif /*!USNIC_DEBUGFS_H_ */ diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.c b/drivers/infiniband/hw/usnic/usnic_fwd.c new file mode 100644 index 00000000000..e3c9bd9d3ba --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_fwd.c @@ -0,0 +1,350 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include <linux/netdevice.h> +#include <linux/pci.h> + +#include "enic_api.h" +#include "usnic_common_pkt_hdr.h" +#include "usnic_fwd.h" +#include "usnic_log.h" + +static int usnic_fwd_devcmd_locked(struct usnic_fwd_dev *ufdev, int vnic_idx, + enum vnic_devcmd_cmd cmd, u64 *a0, + u64 *a1) +{ + int status; + struct net_device *netdev = ufdev->netdev; + + lockdep_assert_held(&ufdev->lock); + + status = enic_api_devcmd_proxy_by_index(netdev, + vnic_idx, + cmd, + a0, a1, + 1000); + if (status) { + if (status == ERR_EINVAL && cmd == CMD_DEL_FILTER) { + usnic_dbg("Dev %s vnic idx %u cmd %u already deleted", + ufdev->name, vnic_idx, cmd); + } else { + usnic_err("Dev %s vnic idx %u cmd %u failed with status %d\n", + ufdev->name, vnic_idx, cmd, + status); + } + } else { + usnic_dbg("Dev %s vnic idx %u cmd %u success", + ufdev->name, vnic_idx, cmd); + } + + return status; +} + +static int usnic_fwd_devcmd(struct usnic_fwd_dev *ufdev, int vnic_idx, + enum vnic_devcmd_cmd cmd, u64 *a0, u64 *a1) +{ + int status; + + spin_lock(&ufdev->lock); + status = usnic_fwd_devcmd_locked(ufdev, vnic_idx, cmd, a0, a1); + spin_unlock(&ufdev->lock); + + return status; +} + +struct usnic_fwd_dev *usnic_fwd_dev_alloc(struct pci_dev *pdev) +{ + struct usnic_fwd_dev *ufdev; + + ufdev = kzalloc(sizeof(*ufdev), GFP_KERNEL); + if (!ufdev) + return NULL; + + ufdev->pdev = pdev; + ufdev->netdev = pci_get_drvdata(pdev); + spin_lock_init(&ufdev->lock); + strncpy(ufdev->name, netdev_name(ufdev->netdev), + sizeof(ufdev->name) - 1); + + return ufdev; +} + +void usnic_fwd_dev_free(struct usnic_fwd_dev *ufdev) +{ + kfree(ufdev); +} + +void usnic_fwd_set_mac(struct usnic_fwd_dev *ufdev, char mac[ETH_ALEN]) +{ + spin_lock(&ufdev->lock); + memcpy(&ufdev->mac, mac, sizeof(ufdev->mac)); + spin_unlock(&ufdev->lock); +} + +int usnic_fwd_add_ipaddr(struct usnic_fwd_dev *ufdev, __be32 inaddr) +{ + int status; + + spin_lock(&ufdev->lock); + if (ufdev->inaddr == 0) { + ufdev->inaddr = inaddr; + status = 0; + } else { + status = -EFAULT; + } + spin_unlock(&ufdev->lock); + + return status; +} + +void usnic_fwd_del_ipaddr(struct usnic_fwd_dev *ufdev) +{ + spin_lock(&ufdev->lock); + ufdev->inaddr = 0; + spin_unlock(&ufdev->lock); +} + +void usnic_fwd_carrier_up(struct usnic_fwd_dev *ufdev) +{ + spin_lock(&ufdev->lock); + ufdev->link_up = 1; + spin_unlock(&ufdev->lock); +} + +void usnic_fwd_carrier_down(struct usnic_fwd_dev *ufdev) +{ + spin_lock(&ufdev->lock); + ufdev->link_up = 0; + spin_unlock(&ufdev->lock); +} + +void usnic_fwd_set_mtu(struct usnic_fwd_dev *ufdev, unsigned int mtu) +{ + spin_lock(&ufdev->lock); + ufdev->mtu = mtu; + spin_unlock(&ufdev->lock); +} + +static int usnic_fwd_dev_ready_locked(struct usnic_fwd_dev *ufdev) +{ + lockdep_assert_held(&ufdev->lock); + + if (!ufdev->link_up) + return -EPERM; + + return 0; +} + +static int validate_filter_locked(struct usnic_fwd_dev *ufdev, + struct filter *filter) +{ + + lockdep_assert_held(&ufdev->lock); + + if (filter->type == FILTER_IPV4_5TUPLE) { + if (!(filter->u.ipv4.flags & FILTER_FIELD_5TUP_DST_AD)) + return -EACCES; + if (!(filter->u.ipv4.flags & FILTER_FIELD_5TUP_DST_PT)) + return -EBUSY; + else if (ufdev->inaddr == 0) + return -EINVAL; + else if (filter->u.ipv4.dst_port == 0) + return -ERANGE; + else if (ntohl(ufdev->inaddr) != filter->u.ipv4.dst_addr) + return -EFAULT; + else + return 0; + } + + return 0; +} + +static void fill_tlv(struct filter_tlv *tlv, struct filter *filter, + struct filter_action *action) +{ + tlv->type = CLSF_TLV_FILTER; + tlv->length = sizeof(struct filter); + *((struct filter *)&tlv->val) = *filter; + + tlv = (struct filter_tlv *)((char *)tlv + sizeof(struct filter_tlv) + + sizeof(struct filter)); + tlv->type = CLSF_TLV_ACTION; + tlv->length = sizeof(struct filter_action); + *((struct filter_action *)&tlv->val) = *action; +} + +struct usnic_fwd_flow* +usnic_fwd_alloc_flow(struct usnic_fwd_dev *ufdev, struct filter *filter, + struct usnic_filter_action *uaction) +{ + struct filter_tlv *tlv; + struct pci_dev *pdev; + struct usnic_fwd_flow *flow; + uint64_t a0, a1; + uint64_t tlv_size; + dma_addr_t tlv_pa; + int status; + + pdev = ufdev->pdev; + tlv_size = (2*sizeof(struct filter_tlv) + sizeof(struct filter) + + sizeof(struct filter_action)); + + flow = kzalloc(sizeof(*flow), GFP_ATOMIC); + if (!flow) + return ERR_PTR(-ENOMEM); + + tlv = pci_alloc_consistent(pdev, tlv_size, &tlv_pa); + if (!tlv) { + usnic_err("Failed to allocate memory\n"); + status = -ENOMEM; + goto out_free_flow; + } + + fill_tlv(tlv, filter, &uaction->action); + + spin_lock(&ufdev->lock); + status = usnic_fwd_dev_ready_locked(ufdev); + if (status) { + usnic_err("Forwarding dev %s not ready with status %d\n", + ufdev->name, status); + goto out_free_tlv; + } + + status = validate_filter_locked(ufdev, filter); + if (status) { + usnic_err("Failed to validate filter with status %d\n", + status); + goto out_free_tlv; + } + + /* Issue Devcmd */ + a0 = tlv_pa; + a1 = tlv_size; + status = usnic_fwd_devcmd_locked(ufdev, uaction->vnic_idx, + CMD_ADD_FILTER, &a0, &a1); + if (status) { + usnic_err("VF %s Filter add failed with status:%d", + ufdev->name, status); + status = -EFAULT; + goto out_free_tlv; + } else { + usnic_dbg("VF %s FILTER ID:%llu", ufdev->name, a0); + } + + flow->flow_id = (uint32_t) a0; + flow->vnic_idx = uaction->vnic_idx; + flow->ufdev = ufdev; + +out_free_tlv: + spin_unlock(&ufdev->lock); + pci_free_consistent(pdev, tlv_size, tlv, tlv_pa); + if (!status) + return flow; +out_free_flow: + kfree(flow); + return ERR_PTR(status); +} + +int usnic_fwd_dealloc_flow(struct usnic_fwd_flow *flow) +{ + int status; + u64 a0, a1; + + a0 = flow->flow_id; + + status = usnic_fwd_devcmd(flow->ufdev, flow->vnic_idx, + CMD_DEL_FILTER, &a0, &a1); + if (status) { + if (status == ERR_EINVAL) { + usnic_dbg("Filter %u already deleted for VF Idx %u pf: %s status: %d", + flow->flow_id, flow->vnic_idx, + flow->ufdev->name, status); + } else { + usnic_err("PF %s VF Idx %u Filter: %u FILTER DELETE failed with status %d", + flow->ufdev->name, flow->vnic_idx, + flow->flow_id, status); + } + status = 0; + /* + * Log the error and fake success to the caller because if + * a flow fails to be deleted in the firmware, it is an + * unrecoverable error. + */ + } else { + usnic_dbg("PF %s VF Idx %u Filter: %u FILTER DELETED", + flow->ufdev->name, flow->vnic_idx, + flow->flow_id); + } + + kfree(flow); + return status; +} + +int usnic_fwd_enable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx) +{ + int status; + struct net_device *pf_netdev; + u64 a0, a1; + + pf_netdev = ufdev->netdev; + a0 = qp_idx; + a1 = CMD_QP_RQWQ; + + status = usnic_fwd_devcmd(ufdev, vnic_idx, CMD_QP_ENABLE, + &a0, &a1); + if (status) { + usnic_err("PF %s VNIC Index %u RQ Index: %u ENABLE Failed with status %d", + netdev_name(pf_netdev), + vnic_idx, + qp_idx, + status); + } else { + usnic_dbg("PF %s VNIC Index %u RQ Index: %u ENABLED", + netdev_name(pf_netdev), + vnic_idx, qp_idx); + } + + return status; +} + +int usnic_fwd_disable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx) +{ + int status; + u64 a0, a1; + struct net_device *pf_netdev; + + pf_netdev = ufdev->netdev; + a0 = qp_idx; + a1 = CMD_QP_RQWQ; + + status = usnic_fwd_devcmd(ufdev, vnic_idx, CMD_QP_DISABLE, + &a0, &a1); + if (status) { + usnic_err("PF %s VNIC Index %u RQ Index: %u DISABLE Failed with status %d", + netdev_name(pf_netdev), + vnic_idx, + qp_idx, + status); + } else { + usnic_dbg("PF %s VNIC Index %u RQ Index: %u DISABLED", + netdev_name(pf_netdev), + vnic_idx, + qp_idx); + } + + return status; +} diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.h b/drivers/infiniband/hw/usnic/usnic_fwd.h new file mode 100644 index 00000000000..93713a2230b --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_fwd.h @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef USNIC_FWD_H_ +#define USNIC_FWD_H_ + +#include <linux/if.h> +#include <linux/netdevice.h> +#include <linux/pci.h> +#include <linux/in.h> + +#include "usnic_abi.h" +#include "usnic_common_pkt_hdr.h" +#include "vnic_devcmd.h" + +struct usnic_fwd_dev { + struct pci_dev *pdev; + struct net_device *netdev; + spinlock_t lock; + /* + * The following fields can be read directly off the device. + * However, they should be set by a accessor function, except name, + * which cannot be changed. + */ + bool link_up; + char mac[ETH_ALEN]; + unsigned int mtu; + __be32 inaddr; + char name[IFNAMSIZ+1]; +}; + +struct usnic_fwd_flow { + uint32_t flow_id; + struct usnic_fwd_dev *ufdev; + unsigned int vnic_idx; +}; + +struct usnic_filter_action { + int vnic_idx; + struct filter_action action; +}; + +struct usnic_fwd_dev *usnic_fwd_dev_alloc(struct pci_dev *pdev); +void usnic_fwd_dev_free(struct usnic_fwd_dev *ufdev); + +void usnic_fwd_set_mac(struct usnic_fwd_dev *ufdev, char mac[ETH_ALEN]); +int usnic_fwd_add_ipaddr(struct usnic_fwd_dev *ufdev, __be32 inaddr); +void usnic_fwd_del_ipaddr(struct usnic_fwd_dev *ufdev); +void usnic_fwd_carrier_up(struct usnic_fwd_dev *ufdev); +void usnic_fwd_carrier_down(struct usnic_fwd_dev *ufdev); +void usnic_fwd_set_mtu(struct usnic_fwd_dev *ufdev, unsigned int mtu); + +/* + * Allocate a flow on this forwarding device. Whoever calls this function, + * must monitor netdev events on ufdev's netdevice. If NETDEV_REBOOT or + * NETDEV_DOWN is seen, flow will no longer function and must be + * immediately freed by calling usnic_dealloc_flow. + */ +struct usnic_fwd_flow* +usnic_fwd_alloc_flow(struct usnic_fwd_dev *ufdev, struct filter *filter, + struct usnic_filter_action *action); +int usnic_fwd_dealloc_flow(struct usnic_fwd_flow *flow); +int usnic_fwd_enable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx); +int usnic_fwd_disable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx); + +static inline void usnic_fwd_init_usnic_filter(struct filter *filter, + uint32_t usnic_id) +{ + filter->type = FILTER_USNIC_ID; + filter->u.usnic.ethtype = USNIC_ROCE_ETHERTYPE; + filter->u.usnic.flags = FILTER_FIELD_USNIC_ETHTYPE | + FILTER_FIELD_USNIC_ID | + FILTER_FIELD_USNIC_PROTO; + filter->u.usnic.proto_version = (USNIC_ROCE_GRH_VER << + USNIC_ROCE_GRH_VER_SHIFT) | + USNIC_PROTO_VER; + filter->u.usnic.usnic_id = usnic_id; +} + +static inline void usnic_fwd_init_udp_filter(struct filter *filter, + uint32_t daddr, uint16_t dport) +{ + filter->type = FILTER_IPV4_5TUPLE; + filter->u.ipv4.flags = FILTER_FIELD_5TUP_PROTO; + filter->u.ipv4.protocol = PROTO_UDP; + + if (daddr) { + filter->u.ipv4.flags |= FILTER_FIELD_5TUP_DST_AD; + filter->u.ipv4.dst_addr = daddr; + } + + if (dport) { + filter->u.ipv4.flags |= FILTER_FIELD_5TUP_DST_PT; + filter->u.ipv4.dst_port = dport; + } +} + +#endif /* !USNIC_FWD_H_ */ diff --git a/drivers/infiniband/hw/usnic/usnic_ib.h b/drivers/infiniband/hw/usnic/usnic_ib.h new file mode 100644 index 00000000000..e5a9297dd1b --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_ib.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef USNIC_IB_H_ +#define USNIC_IB_H_ + +#include <linux/iommu.h> +#include <linux/netdevice.h> + +#include <rdma/ib_verbs.h> + + +#include "usnic.h" +#include "usnic_abi.h" +#include "usnic_vnic.h" + +#define USNIC_IB_PORT_CNT 1 +#define USNIC_IB_NUM_COMP_VECTORS 1 + +extern unsigned int usnic_ib_share_vf; + +struct usnic_ib_ucontext { + struct ib_ucontext ibucontext; + /* Protected by usnic_ib_dev->usdev_lock */ + struct list_head qp_grp_list; + struct list_head link; +}; + +struct usnic_ib_pd { + struct ib_pd ibpd; + struct usnic_uiom_pd *umem_pd; +}; + +struct usnic_ib_mr { + struct ib_mr ibmr; + struct usnic_uiom_reg *umem; +}; + +struct usnic_ib_dev { + struct ib_device ib_dev; + struct pci_dev *pdev; + struct net_device *netdev; + struct usnic_fwd_dev *ufdev; + struct list_head ib_dev_link; + struct list_head vf_dev_list; + struct list_head ctx_list; + struct mutex usdev_lock; + + /* provisioning information */ + struct kref vf_cnt; + unsigned int vf_res_cnt[USNIC_VNIC_RES_TYPE_MAX]; + + /* sysfs vars for QPN reporting */ + struct kobject *qpn_kobj; +}; + +struct usnic_ib_vf { + struct usnic_ib_dev *pf; + spinlock_t lock; + struct usnic_vnic *vnic; + unsigned int qp_grp_ref_cnt; + struct usnic_ib_pd *pd; + struct list_head link; +}; + +static inline +struct usnic_ib_dev *to_usdev(struct ib_device *ibdev) +{ + return container_of(ibdev, struct usnic_ib_dev, ib_dev); +} + +static inline +struct usnic_ib_ucontext *to_ucontext(struct ib_ucontext *ibucontext) +{ + return container_of(ibucontext, struct usnic_ib_ucontext, ibucontext); +} + +static inline +struct usnic_ib_pd *to_upd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct usnic_ib_pd, ibpd); +} + +static inline +struct usnic_ib_ucontext *to_uucontext(struct ib_ucontext *ibucontext) +{ + return container_of(ibucontext, struct usnic_ib_ucontext, ibucontext); +} + +static inline +struct usnic_ib_mr *to_umr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct usnic_ib_mr, ibmr); +} +void usnic_ib_log_vf(struct usnic_ib_vf *vf); + +#define UPDATE_PTR_LEFT(N, P, L) \ +do { \ + L -= (N); \ + P += (N); \ +} while (0) + +#endif /* USNIC_IB_H_ */ diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c new file mode 100644 index 00000000000..fb6d026f92c --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -0,0 +1,682 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Author: Upinder Malhi <umalhi@cisco.com> + * Author: Anant Deepak <anadeepa@cisco.com> + * Author: Cesare Cantu' <cantuc@cisco.com> + * Author: Jeff Squyres <jsquyres@cisco.com> + * Author: Kiran Thirumalai <kithirum@cisco.com> + * Author: Xuyang Wang <xuywang@cisco.com> + * Author: Reese Faucette <rfaucett@cisco.com> + * + */ + +#include <linux/module.h> +#include <linux/inetdevice.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/errno.h> +#include <linux/pci.h> +#include <linux/netdevice.h> + +#include <rdma/ib_user_verbs.h> +#include <rdma/ib_addr.h> + +#include "usnic_abi.h" +#include "usnic_common_util.h" +#include "usnic_ib.h" +#include "usnic_ib_qp_grp.h" +#include "usnic_log.h" +#include "usnic_fwd.h" +#include "usnic_debugfs.h" +#include "usnic_ib_verbs.h" +#include "usnic_transport.h" +#include "usnic_uiom.h" +#include "usnic_ib_sysfs.h" + +unsigned int usnic_log_lvl = USNIC_LOG_LVL_ERR; +unsigned int usnic_ib_share_vf = 1; + +static const char usnic_version[] = + DRV_NAME ": Cisco VIC (USNIC) Verbs Driver v" + DRV_VERSION " (" DRV_RELDATE ")\n"; + +static DEFINE_MUTEX(usnic_ib_ibdev_list_lock); +static LIST_HEAD(usnic_ib_ibdev_list); + +/* Callback dump funcs */ +static int usnic_ib_dump_vf_hdr(void *obj, char *buf, int buf_sz) +{ + struct usnic_ib_vf *vf = obj; + return scnprintf(buf, buf_sz, "PF: %s ", vf->pf->ib_dev.name); +} +/* End callback dump funcs */ + +static void usnic_ib_dump_vf(struct usnic_ib_vf *vf, char *buf, int buf_sz) +{ + usnic_vnic_dump(vf->vnic, buf, buf_sz, vf, + usnic_ib_dump_vf_hdr, + usnic_ib_qp_grp_dump_hdr, usnic_ib_qp_grp_dump_rows); +} + +void usnic_ib_log_vf(struct usnic_ib_vf *vf) +{ + char buf[1000]; + usnic_ib_dump_vf(vf, buf, sizeof(buf)); + usnic_dbg("%s\n", buf); +} + +/* Start of netdev section */ +static inline const char *usnic_ib_netdev_event_to_string(unsigned long event) +{ + const char *event2str[] = {"NETDEV_NONE", "NETDEV_UP", "NETDEV_DOWN", + "NETDEV_REBOOT", "NETDEV_CHANGE", + "NETDEV_REGISTER", "NETDEV_UNREGISTER", "NETDEV_CHANGEMTU", + "NETDEV_CHANGEADDR", "NETDEV_GOING_DOWN", "NETDEV_FEAT_CHANGE", + "NETDEV_BONDING_FAILOVER", "NETDEV_PRE_UP", + "NETDEV_PRE_TYPE_CHANGE", "NETDEV_POST_TYPE_CHANGE", + "NETDEV_POST_INT", "NETDEV_UNREGISTER_FINAL", "NETDEV_RELEASE", + "NETDEV_NOTIFY_PEERS", "NETDEV_JOIN" + }; + + if (event >= ARRAY_SIZE(event2str)) + return "UNKNOWN_NETDEV_EVENT"; + else + return event2str[event]; +} + +static void usnic_ib_qp_grp_modify_active_to_err(struct usnic_ib_dev *us_ibdev) +{ + struct usnic_ib_ucontext *ctx; + struct usnic_ib_qp_grp *qp_grp; + enum ib_qp_state cur_state; + int status; + + BUG_ON(!mutex_is_locked(&us_ibdev->usdev_lock)); + + list_for_each_entry(ctx, &us_ibdev->ctx_list, link) { + list_for_each_entry(qp_grp, &ctx->qp_grp_list, link) { + cur_state = qp_grp->state; + if (cur_state == IB_QPS_INIT || + cur_state == IB_QPS_RTR || + cur_state == IB_QPS_RTS) { + status = usnic_ib_qp_grp_modify(qp_grp, + IB_QPS_ERR, + NULL); + if (status) { + usnic_err("Failed to transistion qp grp %u from %s to %s\n", + qp_grp->grp_id, + usnic_ib_qp_grp_state_to_string + (cur_state), + usnic_ib_qp_grp_state_to_string + (IB_QPS_ERR)); + } + } + } + } +} + +static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev, + unsigned long event) +{ + struct net_device *netdev; + struct ib_event ib_event; + + memset(&ib_event, 0, sizeof(ib_event)); + + mutex_lock(&us_ibdev->usdev_lock); + netdev = us_ibdev->netdev; + switch (event) { + case NETDEV_REBOOT: + usnic_info("PF Reset on %s\n", us_ibdev->ib_dev.name); + usnic_ib_qp_grp_modify_active_to_err(us_ibdev); + ib_event.event = IB_EVENT_PORT_ERR; + ib_event.device = &us_ibdev->ib_dev; + ib_event.element.port_num = 1; + ib_dispatch_event(&ib_event); + break; + case NETDEV_UP: + case NETDEV_DOWN: + case NETDEV_CHANGE: + if (!us_ibdev->ufdev->link_up && + netif_carrier_ok(netdev)) { + usnic_fwd_carrier_up(us_ibdev->ufdev); + usnic_info("Link UP on %s\n", us_ibdev->ib_dev.name); + ib_event.event = IB_EVENT_PORT_ACTIVE; + ib_event.device = &us_ibdev->ib_dev; + ib_event.element.port_num = 1; + ib_dispatch_event(&ib_event); + } else if (us_ibdev->ufdev->link_up && + !netif_carrier_ok(netdev)) { + usnic_fwd_carrier_down(us_ibdev->ufdev); + usnic_info("Link DOWN on %s\n", us_ibdev->ib_dev.name); + usnic_ib_qp_grp_modify_active_to_err(us_ibdev); + ib_event.event = IB_EVENT_PORT_ERR; + ib_event.device = &us_ibdev->ib_dev; + ib_event.element.port_num = 1; + ib_dispatch_event(&ib_event); + } else { + usnic_dbg("Ignoring %s on %s\n", + usnic_ib_netdev_event_to_string(event), + us_ibdev->ib_dev.name); + } + break; + case NETDEV_CHANGEADDR: + if (!memcmp(us_ibdev->ufdev->mac, netdev->dev_addr, + sizeof(us_ibdev->ufdev->mac))) { + usnic_dbg("Ignoring addr change on %s\n", + us_ibdev->ib_dev.name); + } else { + usnic_info(" %s old mac: %pM new mac: %pM\n", + us_ibdev->ib_dev.name, + us_ibdev->ufdev->mac, + netdev->dev_addr); + usnic_fwd_set_mac(us_ibdev->ufdev, netdev->dev_addr); + usnic_ib_qp_grp_modify_active_to_err(us_ibdev); + ib_event.event = IB_EVENT_GID_CHANGE; + ib_event.device = &us_ibdev->ib_dev; + ib_event.element.port_num = 1; + ib_dispatch_event(&ib_event); + } + + break; + case NETDEV_CHANGEMTU: + if (us_ibdev->ufdev->mtu != netdev->mtu) { + usnic_info("MTU Change on %s old: %u new: %u\n", + us_ibdev->ib_dev.name, + us_ibdev->ufdev->mtu, netdev->mtu); + usnic_fwd_set_mtu(us_ibdev->ufdev, netdev->mtu); + usnic_ib_qp_grp_modify_active_to_err(us_ibdev); + } else { + usnic_dbg("Ignoring MTU change on %s\n", + us_ibdev->ib_dev.name); + } + break; + default: + usnic_dbg("Ignoring event %s on %s", + usnic_ib_netdev_event_to_string(event), + us_ibdev->ib_dev.name); + } + mutex_unlock(&us_ibdev->usdev_lock); +} + +static int usnic_ib_netdevice_event(struct notifier_block *notifier, + unsigned long event, void *ptr) +{ + struct usnic_ib_dev *us_ibdev; + + struct net_device *netdev = netdev_notifier_info_to_dev(ptr); + + mutex_lock(&usnic_ib_ibdev_list_lock); + list_for_each_entry(us_ibdev, &usnic_ib_ibdev_list, ib_dev_link) { + if (us_ibdev->netdev == netdev) { + usnic_ib_handle_usdev_event(us_ibdev, event); + break; + } + } + mutex_unlock(&usnic_ib_ibdev_list_lock); + + return NOTIFY_DONE; +} + +static struct notifier_block usnic_ib_netdevice_notifier = { + .notifier_call = usnic_ib_netdevice_event +}; +/* End of netdev section */ + +/* Start of inet section */ +static int usnic_ib_handle_inet_event(struct usnic_ib_dev *us_ibdev, + unsigned long event, void *ptr) +{ + struct in_ifaddr *ifa = ptr; + struct ib_event ib_event; + + mutex_lock(&us_ibdev->usdev_lock); + + switch (event) { + case NETDEV_DOWN: + usnic_info("%s via ip notifiers", + usnic_ib_netdev_event_to_string(event)); + usnic_fwd_del_ipaddr(us_ibdev->ufdev); + usnic_ib_qp_grp_modify_active_to_err(us_ibdev); + ib_event.event = IB_EVENT_GID_CHANGE; + ib_event.device = &us_ibdev->ib_dev; + ib_event.element.port_num = 1; + ib_dispatch_event(&ib_event); + break; + case NETDEV_UP: + usnic_fwd_add_ipaddr(us_ibdev->ufdev, ifa->ifa_address); + usnic_info("%s via ip notifiers: ip %pI4", + usnic_ib_netdev_event_to_string(event), + &us_ibdev->ufdev->inaddr); + ib_event.event = IB_EVENT_GID_CHANGE; + ib_event.device = &us_ibdev->ib_dev; + ib_event.element.port_num = 1; + ib_dispatch_event(&ib_event); + break; + default: + usnic_info("Ignoring event %s on %s", + usnic_ib_netdev_event_to_string(event), + us_ibdev->ib_dev.name); + } + mutex_unlock(&us_ibdev->usdev_lock); + + return NOTIFY_DONE; +} + +static int usnic_ib_inetaddr_event(struct notifier_block *notifier, + unsigned long event, void *ptr) +{ + struct usnic_ib_dev *us_ibdev; + struct in_ifaddr *ifa = ptr; + struct net_device *netdev = ifa->ifa_dev->dev; + + mutex_lock(&usnic_ib_ibdev_list_lock); + list_for_each_entry(us_ibdev, &usnic_ib_ibdev_list, ib_dev_link) { + if (us_ibdev->netdev == netdev) { + usnic_ib_handle_inet_event(us_ibdev, event, ptr); + break; + } + } + mutex_unlock(&usnic_ib_ibdev_list_lock); + + return NOTIFY_DONE; +} +static struct notifier_block usnic_ib_inetaddr_notifier = { + .notifier_call = usnic_ib_inetaddr_event +}; +/* End of inet section*/ + +/* Start of PF discovery section */ +static void *usnic_ib_device_add(struct pci_dev *dev) +{ + struct usnic_ib_dev *us_ibdev; + union ib_gid gid; + struct in_ifaddr *in; + struct net_device *netdev; + + usnic_dbg("\n"); + netdev = pci_get_drvdata(dev); + + us_ibdev = (struct usnic_ib_dev *)ib_alloc_device(sizeof(*us_ibdev)); + if (IS_ERR_OR_NULL(us_ibdev)) { + usnic_err("Device %s context alloc failed\n", + netdev_name(pci_get_drvdata(dev))); + return ERR_PTR(us_ibdev ? PTR_ERR(us_ibdev) : -EFAULT); + } + + us_ibdev->ufdev = usnic_fwd_dev_alloc(dev); + if (IS_ERR_OR_NULL(us_ibdev->ufdev)) { + usnic_err("Failed to alloc ufdev for %s with err %ld\n", + pci_name(dev), PTR_ERR(us_ibdev->ufdev)); + goto err_dealloc; + } + + mutex_init(&us_ibdev->usdev_lock); + INIT_LIST_HEAD(&us_ibdev->vf_dev_list); + INIT_LIST_HEAD(&us_ibdev->ctx_list); + + us_ibdev->pdev = dev; + us_ibdev->netdev = pci_get_drvdata(dev); + us_ibdev->ib_dev.owner = THIS_MODULE; + us_ibdev->ib_dev.node_type = RDMA_NODE_USNIC_UDP; + us_ibdev->ib_dev.phys_port_cnt = USNIC_IB_PORT_CNT; + us_ibdev->ib_dev.num_comp_vectors = USNIC_IB_NUM_COMP_VECTORS; + us_ibdev->ib_dev.dma_device = &dev->dev; + us_ibdev->ib_dev.uverbs_abi_ver = USNIC_UVERBS_ABI_VERSION; + strlcpy(us_ibdev->ib_dev.name, "usnic_%d", IB_DEVICE_NAME_MAX); + + us_ibdev->ib_dev.uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_OPEN_QP); + + us_ibdev->ib_dev.query_device = usnic_ib_query_device; + us_ibdev->ib_dev.query_port = usnic_ib_query_port; + us_ibdev->ib_dev.query_pkey = usnic_ib_query_pkey; + us_ibdev->ib_dev.query_gid = usnic_ib_query_gid; + us_ibdev->ib_dev.get_link_layer = usnic_ib_port_link_layer; + us_ibdev->ib_dev.alloc_pd = usnic_ib_alloc_pd; + us_ibdev->ib_dev.dealloc_pd = usnic_ib_dealloc_pd; + us_ibdev->ib_dev.create_qp = usnic_ib_create_qp; + us_ibdev->ib_dev.modify_qp = usnic_ib_modify_qp; + us_ibdev->ib_dev.query_qp = usnic_ib_query_qp; + us_ibdev->ib_dev.destroy_qp = usnic_ib_destroy_qp; + us_ibdev->ib_dev.create_cq = usnic_ib_create_cq; + us_ibdev->ib_dev.destroy_cq = usnic_ib_destroy_cq; + us_ibdev->ib_dev.reg_user_mr = usnic_ib_reg_mr; + us_ibdev->ib_dev.dereg_mr = usnic_ib_dereg_mr; + us_ibdev->ib_dev.alloc_ucontext = usnic_ib_alloc_ucontext; + us_ibdev->ib_dev.dealloc_ucontext = usnic_ib_dealloc_ucontext; + us_ibdev->ib_dev.mmap = usnic_ib_mmap; + us_ibdev->ib_dev.create_ah = usnic_ib_create_ah; + us_ibdev->ib_dev.destroy_ah = usnic_ib_destroy_ah; + us_ibdev->ib_dev.post_send = usnic_ib_post_send; + us_ibdev->ib_dev.post_recv = usnic_ib_post_recv; + us_ibdev->ib_dev.poll_cq = usnic_ib_poll_cq; + us_ibdev->ib_dev.req_notify_cq = usnic_ib_req_notify_cq; + us_ibdev->ib_dev.get_dma_mr = usnic_ib_get_dma_mr; + + + if (ib_register_device(&us_ibdev->ib_dev, NULL)) + goto err_fwd_dealloc; + + usnic_fwd_set_mtu(us_ibdev->ufdev, us_ibdev->netdev->mtu); + usnic_fwd_set_mac(us_ibdev->ufdev, us_ibdev->netdev->dev_addr); + if (netif_carrier_ok(us_ibdev->netdev)) + usnic_fwd_carrier_up(us_ibdev->ufdev); + + in = ((struct in_device *)(netdev->ip_ptr))->ifa_list; + if (in != NULL) + usnic_fwd_add_ipaddr(us_ibdev->ufdev, in->ifa_address); + + usnic_mac_ip_to_gid(us_ibdev->netdev->perm_addr, + us_ibdev->ufdev->inaddr, &gid.raw[0]); + memcpy(&us_ibdev->ib_dev.node_guid, &gid.global.interface_id, + sizeof(gid.global.interface_id)); + kref_init(&us_ibdev->vf_cnt); + + usnic_info("Added ibdev: %s netdev: %s with mac %pM Link: %u MTU: %u\n", + us_ibdev->ib_dev.name, netdev_name(us_ibdev->netdev), + us_ibdev->ufdev->mac, us_ibdev->ufdev->link_up, + us_ibdev->ufdev->mtu); + return us_ibdev; + +err_fwd_dealloc: + usnic_fwd_dev_free(us_ibdev->ufdev); +err_dealloc: + usnic_err("failed -- deallocing device\n"); + ib_dealloc_device(&us_ibdev->ib_dev); + return NULL; +} + +static void usnic_ib_device_remove(struct usnic_ib_dev *us_ibdev) +{ + usnic_info("Unregistering %s\n", us_ibdev->ib_dev.name); + usnic_ib_sysfs_unregister_usdev(us_ibdev); + usnic_fwd_dev_free(us_ibdev->ufdev); + ib_unregister_device(&us_ibdev->ib_dev); + ib_dealloc_device(&us_ibdev->ib_dev); +} + +static void usnic_ib_undiscover_pf(struct kref *kref) +{ + struct usnic_ib_dev *us_ibdev, *tmp; + struct pci_dev *dev; + bool found = false; + + dev = container_of(kref, struct usnic_ib_dev, vf_cnt)->pdev; + mutex_lock(&usnic_ib_ibdev_list_lock); + list_for_each_entry_safe(us_ibdev, tmp, + &usnic_ib_ibdev_list, ib_dev_link) { + if (us_ibdev->pdev == dev) { + list_del(&us_ibdev->ib_dev_link); + usnic_ib_device_remove(us_ibdev); + found = true; + break; + } + } + + WARN(!found, "Failed to remove PF %s\n", pci_name(dev)); + + mutex_unlock(&usnic_ib_ibdev_list_lock); +} + +static struct usnic_ib_dev *usnic_ib_discover_pf(struct usnic_vnic *vnic) +{ + struct usnic_ib_dev *us_ibdev; + struct pci_dev *parent_pci, *vf_pci; + int err; + + vf_pci = usnic_vnic_get_pdev(vnic); + parent_pci = pci_physfn(vf_pci); + + BUG_ON(!parent_pci); + + mutex_lock(&usnic_ib_ibdev_list_lock); + list_for_each_entry(us_ibdev, &usnic_ib_ibdev_list, ib_dev_link) { + if (us_ibdev->pdev == parent_pci) { + kref_get(&us_ibdev->vf_cnt); + goto out; + } + } + + us_ibdev = usnic_ib_device_add(parent_pci); + if (IS_ERR_OR_NULL(us_ibdev)) { + us_ibdev = us_ibdev ? us_ibdev : ERR_PTR(-EFAULT); + goto out; + } + + err = usnic_ib_sysfs_register_usdev(us_ibdev); + if (err) { + usnic_ib_device_remove(us_ibdev); + us_ibdev = ERR_PTR(err); + goto out; + } + + list_add(&us_ibdev->ib_dev_link, &usnic_ib_ibdev_list); +out: + mutex_unlock(&usnic_ib_ibdev_list_lock); + return us_ibdev; +} +/* End of PF discovery section */ + +/* Start of PCI section */ + +static DEFINE_PCI_DEVICE_TABLE(usnic_ib_pci_ids) = { + {PCI_DEVICE(PCI_VENDOR_ID_CISCO, PCI_DEVICE_ID_CISCO_VIC_USPACE_NIC)}, + {0,} +}; + +static int usnic_ib_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + int err; + struct usnic_ib_dev *pf; + struct usnic_ib_vf *vf; + enum usnic_vnic_res_type res_type; + + vf = kzalloc(sizeof(*vf), GFP_KERNEL); + if (!vf) + return -ENOMEM; + + err = pci_enable_device(pdev); + if (err) { + usnic_err("Failed to enable %s with err %d\n", + pci_name(pdev), err); + goto out_clean_vf; + } + + err = pci_request_regions(pdev, DRV_NAME); + if (err) { + usnic_err("Failed to request region for %s with err %d\n", + pci_name(pdev), err); + goto out_disable_device; + } + + pci_set_master(pdev); + pci_set_drvdata(pdev, vf); + + vf->vnic = usnic_vnic_alloc(pdev); + if (IS_ERR_OR_NULL(vf->vnic)) { + err = vf->vnic ? PTR_ERR(vf->vnic) : -ENOMEM; + usnic_err("Failed to alloc vnic for %s with err %d\n", + pci_name(pdev), err); + goto out_release_regions; + } + + pf = usnic_ib_discover_pf(vf->vnic); + if (IS_ERR_OR_NULL(pf)) { + usnic_err("Failed to discover pf of vnic %s with err%ld\n", + pci_name(pdev), PTR_ERR(pf)); + err = pf ? PTR_ERR(pf) : -EFAULT; + goto out_clean_vnic; + } + + vf->pf = pf; + spin_lock_init(&vf->lock); + mutex_lock(&pf->usdev_lock); + list_add_tail(&vf->link, &pf->vf_dev_list); + /* + * Save max settings (will be same for each VF, easier to re-write than + * to say "if (!set) { set_values(); set=1; } + */ + for (res_type = USNIC_VNIC_RES_TYPE_EOL+1; + res_type < USNIC_VNIC_RES_TYPE_MAX; + res_type++) { + pf->vf_res_cnt[res_type] = usnic_vnic_res_cnt(vf->vnic, + res_type); + } + + mutex_unlock(&pf->usdev_lock); + + usnic_info("Registering usnic VF %s into PF %s\n", pci_name(pdev), + pf->ib_dev.name); + usnic_ib_log_vf(vf); + return 0; + +out_clean_vnic: + usnic_vnic_free(vf->vnic); +out_release_regions: + pci_set_drvdata(pdev, NULL); + pci_clear_master(pdev); + pci_release_regions(pdev); +out_disable_device: + pci_disable_device(pdev); +out_clean_vf: + kfree(vf); + return err; +} + +static void usnic_ib_pci_remove(struct pci_dev *pdev) +{ + struct usnic_ib_vf *vf = pci_get_drvdata(pdev); + struct usnic_ib_dev *pf = vf->pf; + + mutex_lock(&pf->usdev_lock); + list_del(&vf->link); + mutex_unlock(&pf->usdev_lock); + + kref_put(&pf->vf_cnt, usnic_ib_undiscover_pf); + usnic_vnic_free(vf->vnic); + pci_set_drvdata(pdev, NULL); + pci_clear_master(pdev); + pci_release_regions(pdev); + pci_disable_device(pdev); + kfree(vf); + + usnic_info("Removed VF %s\n", pci_name(pdev)); +} + +/* PCI driver entry points */ +static struct pci_driver usnic_ib_pci_driver = { + .name = DRV_NAME, + .id_table = usnic_ib_pci_ids, + .probe = usnic_ib_pci_probe, + .remove = usnic_ib_pci_remove, +}; +/* End of PCI section */ + +/* Start of module section */ +static int __init usnic_ib_init(void) +{ + int err; + + printk_once(KERN_INFO "%s", usnic_version); + + err = usnic_uiom_init(DRV_NAME); + if (err) { + usnic_err("Unable to initalize umem with err %d\n", err); + return err; + } + + if (pci_register_driver(&usnic_ib_pci_driver)) { + usnic_err("Unable to register with PCI\n"); + goto out_umem_fini; + } + + err = register_netdevice_notifier(&usnic_ib_netdevice_notifier); + if (err) { + usnic_err("Failed to register netdev notifier\n"); + goto out_pci_unreg; + } + + err = register_inetaddr_notifier(&usnic_ib_inetaddr_notifier); + if (err) { + usnic_err("Failed to register inet addr notifier\n"); + goto out_unreg_netdev_notifier; + } + + err = usnic_transport_init(); + if (err) { + usnic_err("Failed to initialize transport\n"); + goto out_unreg_inetaddr_notifier; + } + + usnic_debugfs_init(); + + return 0; + +out_unreg_inetaddr_notifier: + unregister_inetaddr_notifier(&usnic_ib_inetaddr_notifier); +out_unreg_netdev_notifier: + unregister_netdevice_notifier(&usnic_ib_netdevice_notifier); +out_pci_unreg: + pci_unregister_driver(&usnic_ib_pci_driver); +out_umem_fini: + usnic_uiom_fini(); + + return err; +} + +static void __exit usnic_ib_destroy(void) +{ + usnic_dbg("\n"); + usnic_debugfs_exit(); + usnic_transport_fini(); + unregister_inetaddr_notifier(&usnic_ib_inetaddr_notifier); + unregister_netdevice_notifier(&usnic_ib_netdevice_notifier); + pci_unregister_driver(&usnic_ib_pci_driver); + usnic_uiom_fini(); +} + +MODULE_DESCRIPTION("Cisco VIC (usNIC) Verbs Driver"); +MODULE_AUTHOR("Upinder Malhi <umalhi@cisco.com>"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION(DRV_VERSION); +module_param(usnic_log_lvl, uint, S_IRUGO | S_IWUSR); +module_param(usnic_ib_share_vf, uint, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(usnic_log_lvl, " Off=0, Err=1, Info=2, Debug=3"); +MODULE_PARM_DESC(usnic_ib_share_vf, "Off=0, On=1 VF sharing amongst QPs"); +MODULE_DEVICE_TABLE(pci, usnic_ib_pci_ids); + +module_init(usnic_ib_init); +module_exit(usnic_ib_destroy); +/* End of module section */ diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c new file mode 100644 index 00000000000..f8dfd76be89 --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c @@ -0,0 +1,761 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include <linux/bug.h> +#include <linux/errno.h> +#include <linux/module.h> +#include <linux/spinlock.h> + +#include "usnic_log.h" +#include "usnic_vnic.h" +#include "usnic_fwd.h" +#include "usnic_uiom.h" +#include "usnic_debugfs.h" +#include "usnic_ib_qp_grp.h" +#include "usnic_ib_sysfs.h" +#include "usnic_transport.h" + +#define DFLT_RQ_IDX 0 + +const char *usnic_ib_qp_grp_state_to_string(enum ib_qp_state state) +{ + switch (state) { + case IB_QPS_RESET: + return "Rst"; + case IB_QPS_INIT: + return "Init"; + case IB_QPS_RTR: + return "RTR"; + case IB_QPS_RTS: + return "RTS"; + case IB_QPS_SQD: + return "SQD"; + case IB_QPS_SQE: + return "SQE"; + case IB_QPS_ERR: + return "ERR"; + default: + return "UNKOWN STATE"; + + } +} + +int usnic_ib_qp_grp_dump_hdr(char *buf, int buf_sz) +{ + return scnprintf(buf, buf_sz, "|QPN\t|State\t|PID\t|VF Idx\t|Fil ID"); +} + +int usnic_ib_qp_grp_dump_rows(void *obj, char *buf, int buf_sz) +{ + struct usnic_ib_qp_grp *qp_grp = obj; + struct usnic_ib_qp_grp_flow *default_flow; + if (obj) { + default_flow = list_first_entry(&qp_grp->flows_lst, + struct usnic_ib_qp_grp_flow, link); + return scnprintf(buf, buf_sz, "|%d\t|%s\t|%d\t|%hu\t|%d", + qp_grp->ibqp.qp_num, + usnic_ib_qp_grp_state_to_string( + qp_grp->state), + qp_grp->owner_pid, + usnic_vnic_get_index(qp_grp->vf->vnic), + default_flow->flow->flow_id); + } else { + return scnprintf(buf, buf_sz, "|N/A\t|N/A\t|N/A\t|N/A\t|N/A"); + } +} + +static struct usnic_vnic_res_chunk * +get_qp_res_chunk(struct usnic_ib_qp_grp *qp_grp) +{ + lockdep_assert_held(&qp_grp->lock); + /* + * The QP res chunk, used to derive qp indices, + * are just indices of the RQs + */ + return usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ); +} + +static int enable_qp_grp(struct usnic_ib_qp_grp *qp_grp) +{ + + int status; + int i, vnic_idx; + struct usnic_vnic_res_chunk *res_chunk; + struct usnic_vnic_res *res; + + lockdep_assert_held(&qp_grp->lock); + + vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic); + + res_chunk = get_qp_res_chunk(qp_grp); + if (IS_ERR_OR_NULL(res_chunk)) { + usnic_err("Unable to get qp res with err %ld\n", + PTR_ERR(res_chunk)); + return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM; + } + + for (i = 0; i < res_chunk->cnt; i++) { + res = res_chunk->res[i]; + status = usnic_fwd_enable_qp(qp_grp->ufdev, vnic_idx, + res->vnic_idx); + if (status) { + usnic_err("Failed to enable qp %d of %s:%d\n with err %d\n", + res->vnic_idx, qp_grp->ufdev->name, + vnic_idx, status); + goto out_err; + } + } + + return 0; + +out_err: + for (i--; i >= 0; i--) { + res = res_chunk->res[i]; + usnic_fwd_disable_qp(qp_grp->ufdev, vnic_idx, + res->vnic_idx); + } + + return status; +} + +static int disable_qp_grp(struct usnic_ib_qp_grp *qp_grp) +{ + int i, vnic_idx; + struct usnic_vnic_res_chunk *res_chunk; + struct usnic_vnic_res *res; + int status = 0; + + lockdep_assert_held(&qp_grp->lock); + vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic); + + res_chunk = get_qp_res_chunk(qp_grp); + if (IS_ERR_OR_NULL(res_chunk)) { + usnic_err("Unable to get qp res with err %ld\n", + PTR_ERR(res_chunk)); + return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM; + } + + for (i = 0; i < res_chunk->cnt; i++) { + res = res_chunk->res[i]; + status = usnic_fwd_disable_qp(qp_grp->ufdev, vnic_idx, + res->vnic_idx); + if (status) { + usnic_err("Failed to disable rq %d of %s:%d\n with err %d\n", + res->vnic_idx, + qp_grp->ufdev->name, + vnic_idx, status); + } + } + + return status; + +} + +static int init_filter_action(struct usnic_ib_qp_grp *qp_grp, + struct usnic_filter_action *uaction) +{ + struct usnic_vnic_res_chunk *res_chunk; + + res_chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ); + if (IS_ERR_OR_NULL(res_chunk)) { + usnic_err("Unable to get %s with err %ld\n", + usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_RQ), + PTR_ERR(res_chunk)); + return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM; + } + + uaction->vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic); + uaction->action.type = FILTER_ACTION_RQ_STEERING; + uaction->action.u.rq_idx = res_chunk->res[DFLT_RQ_IDX]->vnic_idx; + + return 0; +} + +static struct usnic_ib_qp_grp_flow* +create_roce_custom_flow(struct usnic_ib_qp_grp *qp_grp, + struct usnic_transport_spec *trans_spec) +{ + uint16_t port_num; + int err; + struct filter filter; + struct usnic_filter_action uaction; + struct usnic_ib_qp_grp_flow *qp_flow; + struct usnic_fwd_flow *flow; + enum usnic_transport_type trans_type; + + trans_type = trans_spec->trans_type; + port_num = trans_spec->usnic_roce.port_num; + + /* Reserve Port */ + port_num = usnic_transport_rsrv_port(trans_type, port_num); + if (port_num == 0) + return ERR_PTR(-EINVAL); + + /* Create Flow */ + usnic_fwd_init_usnic_filter(&filter, port_num); + err = init_filter_action(qp_grp, &uaction); + if (err) + goto out_unreserve_port; + + flow = usnic_fwd_alloc_flow(qp_grp->ufdev, &filter, &uaction); + if (IS_ERR_OR_NULL(flow)) { + usnic_err("Unable to alloc flow failed with err %ld\n", + PTR_ERR(flow)); + err = flow ? PTR_ERR(flow) : -EFAULT; + goto out_unreserve_port; + } + + /* Create Flow Handle */ + qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC); + if (IS_ERR_OR_NULL(qp_flow)) { + err = qp_flow ? PTR_ERR(qp_flow) : -ENOMEM; + goto out_dealloc_flow; + } + qp_flow->flow = flow; + qp_flow->trans_type = trans_type; + qp_flow->usnic_roce.port_num = port_num; + qp_flow->qp_grp = qp_grp; + return qp_flow; + +out_dealloc_flow: + usnic_fwd_dealloc_flow(flow); +out_unreserve_port: + usnic_transport_unrsrv_port(trans_type, port_num); + return ERR_PTR(err); +} + +static void release_roce_custom_flow(struct usnic_ib_qp_grp_flow *qp_flow) +{ + usnic_fwd_dealloc_flow(qp_flow->flow); + usnic_transport_unrsrv_port(qp_flow->trans_type, + qp_flow->usnic_roce.port_num); + kfree(qp_flow); +} + +static struct usnic_ib_qp_grp_flow* +create_udp_flow(struct usnic_ib_qp_grp *qp_grp, + struct usnic_transport_spec *trans_spec) +{ + struct socket *sock; + int sock_fd; + int err; + struct filter filter; + struct usnic_filter_action uaction; + struct usnic_ib_qp_grp_flow *qp_flow; + struct usnic_fwd_flow *flow; + enum usnic_transport_type trans_type; + uint32_t addr; + uint16_t port_num; + int proto; + + trans_type = trans_spec->trans_type; + sock_fd = trans_spec->udp.sock_fd; + + /* Get and check socket */ + sock = usnic_transport_get_socket(sock_fd); + if (IS_ERR_OR_NULL(sock)) + return ERR_CAST(sock); + + err = usnic_transport_sock_get_addr(sock, &proto, &addr, &port_num); + if (err) + goto out_put_sock; + + if (proto != IPPROTO_UDP) { + usnic_err("Protocol for fd %d is not UDP", sock_fd); + err = -EPERM; + goto out_put_sock; + } + + /* Create flow */ + usnic_fwd_init_udp_filter(&filter, addr, port_num); + err = init_filter_action(qp_grp, &uaction); + if (err) + goto out_put_sock; + + flow = usnic_fwd_alloc_flow(qp_grp->ufdev, &filter, &uaction); + if (IS_ERR_OR_NULL(flow)) { + usnic_err("Unable to alloc flow failed with err %ld\n", + PTR_ERR(flow)); + err = flow ? PTR_ERR(flow) : -EFAULT; + goto out_put_sock; + } + + /* Create qp_flow */ + qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC); + if (IS_ERR_OR_NULL(qp_flow)) { + err = qp_flow ? PTR_ERR(qp_flow) : -ENOMEM; + goto out_dealloc_flow; + } + qp_flow->flow = flow; + qp_flow->trans_type = trans_type; + qp_flow->udp.sock = sock; + qp_flow->qp_grp = qp_grp; + return qp_flow; + +out_dealloc_flow: + usnic_fwd_dealloc_flow(flow); +out_put_sock: + usnic_transport_put_socket(sock); + return ERR_PTR(err); +} + +static void release_udp_flow(struct usnic_ib_qp_grp_flow *qp_flow) +{ + usnic_fwd_dealloc_flow(qp_flow->flow); + usnic_transport_put_socket(qp_flow->udp.sock); + kfree(qp_flow); +} + +static struct usnic_ib_qp_grp_flow* +create_and_add_flow(struct usnic_ib_qp_grp *qp_grp, + struct usnic_transport_spec *trans_spec) +{ + struct usnic_ib_qp_grp_flow *qp_flow; + enum usnic_transport_type trans_type; + + trans_type = trans_spec->trans_type; + switch (trans_type) { + case USNIC_TRANSPORT_ROCE_CUSTOM: + qp_flow = create_roce_custom_flow(qp_grp, trans_spec); + break; + case USNIC_TRANSPORT_IPV4_UDP: + qp_flow = create_udp_flow(qp_grp, trans_spec); + break; + default: + usnic_err("Unsupported transport %u\n", + trans_spec->trans_type); + return ERR_PTR(-EINVAL); + } + + if (!IS_ERR_OR_NULL(qp_flow)) { + list_add_tail(&qp_flow->link, &qp_grp->flows_lst); + usnic_debugfs_flow_add(qp_flow); + } + + + return qp_flow; +} + +static void release_and_remove_flow(struct usnic_ib_qp_grp_flow *qp_flow) +{ + usnic_debugfs_flow_remove(qp_flow); + list_del(&qp_flow->link); + + switch (qp_flow->trans_type) { + case USNIC_TRANSPORT_ROCE_CUSTOM: + release_roce_custom_flow(qp_flow); + break; + case USNIC_TRANSPORT_IPV4_UDP: + release_udp_flow(qp_flow); + break; + default: + WARN(1, "Unsupported transport %u\n", + qp_flow->trans_type); + break; + } +} + +static void release_and_remove_all_flows(struct usnic_ib_qp_grp *qp_grp) +{ + struct usnic_ib_qp_grp_flow *qp_flow, *tmp; + list_for_each_entry_safe(qp_flow, tmp, &qp_grp->flows_lst, link) + release_and_remove_flow(qp_flow); +} + +int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp, + enum ib_qp_state new_state, + void *data) +{ + int status = 0; + int vnic_idx; + struct ib_event ib_event; + enum ib_qp_state old_state; + struct usnic_transport_spec *trans_spec; + struct usnic_ib_qp_grp_flow *qp_flow; + + old_state = qp_grp->state; + vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic); + trans_spec = (struct usnic_transport_spec *) data; + + spin_lock(&qp_grp->lock); + switch (new_state) { + case IB_QPS_RESET: + switch (old_state) { + case IB_QPS_RESET: + /* NO-OP */ + break; + case IB_QPS_INIT: + release_and_remove_all_flows(qp_grp); + status = 0; + break; + case IB_QPS_RTR: + case IB_QPS_RTS: + case IB_QPS_ERR: + status = disable_qp_grp(qp_grp); + release_and_remove_all_flows(qp_grp); + break; + default: + status = -EINVAL; + } + break; + case IB_QPS_INIT: + switch (old_state) { + case IB_QPS_RESET: + if (trans_spec) { + qp_flow = create_and_add_flow(qp_grp, + trans_spec); + if (IS_ERR_OR_NULL(qp_flow)) { + status = qp_flow ? PTR_ERR(qp_flow) : -EFAULT; + break; + } + } else { + /* + * Optional to specify filters. + */ + status = 0; + } + break; + case IB_QPS_INIT: + if (trans_spec) { + qp_flow = create_and_add_flow(qp_grp, + trans_spec); + if (IS_ERR_OR_NULL(qp_flow)) { + status = qp_flow ? PTR_ERR(qp_flow) : -EFAULT; + break; + } + } else { + /* + * Doesn't make sense to go into INIT state + * from INIT state w/o adding filters. + */ + status = -EINVAL; + } + break; + case IB_QPS_RTR: + status = disable_qp_grp(qp_grp); + break; + case IB_QPS_RTS: + status = disable_qp_grp(qp_grp); + break; + default: + status = -EINVAL; + } + break; + case IB_QPS_RTR: + switch (old_state) { + case IB_QPS_INIT: + status = enable_qp_grp(qp_grp); + break; + default: + status = -EINVAL; + } + break; + case IB_QPS_RTS: + switch (old_state) { + case IB_QPS_RTR: + /* NO-OP FOR NOW */ + break; + default: + status = -EINVAL; + } + break; + case IB_QPS_ERR: + ib_event.device = &qp_grp->vf->pf->ib_dev; + ib_event.element.qp = &qp_grp->ibqp; + ib_event.event = IB_EVENT_QP_FATAL; + + switch (old_state) { + case IB_QPS_RESET: + qp_grp->ibqp.event_handler(&ib_event, + qp_grp->ibqp.qp_context); + break; + case IB_QPS_INIT: + release_and_remove_all_flows(qp_grp); + qp_grp->ibqp.event_handler(&ib_event, + qp_grp->ibqp.qp_context); + break; + case IB_QPS_RTR: + case IB_QPS_RTS: + status = disable_qp_grp(qp_grp); + release_and_remove_all_flows(qp_grp); + qp_grp->ibqp.event_handler(&ib_event, + qp_grp->ibqp.qp_context); + break; + default: + status = -EINVAL; + } + break; + default: + status = -EINVAL; + } + spin_unlock(&qp_grp->lock); + + if (!status) { + qp_grp->state = new_state; + usnic_info("Transistioned %u from %s to %s", + qp_grp->grp_id, + usnic_ib_qp_grp_state_to_string(old_state), + usnic_ib_qp_grp_state_to_string(new_state)); + } else { + usnic_err("Failed to transistion %u from %s to %s", + qp_grp->grp_id, + usnic_ib_qp_grp_state_to_string(old_state), + usnic_ib_qp_grp_state_to_string(new_state)); + } + + return status; +} + +static struct usnic_vnic_res_chunk** +alloc_res_chunk_list(struct usnic_vnic *vnic, + struct usnic_vnic_res_spec *res_spec, void *owner_obj) +{ + enum usnic_vnic_res_type res_type; + struct usnic_vnic_res_chunk **res_chunk_list; + int err, i, res_cnt, res_lst_sz; + + for (res_lst_sz = 0; + res_spec->resources[res_lst_sz].type != USNIC_VNIC_RES_TYPE_EOL; + res_lst_sz++) { + /* Do Nothing */ + } + + res_chunk_list = kzalloc(sizeof(*res_chunk_list)*(res_lst_sz+1), + GFP_ATOMIC); + if (!res_chunk_list) + return ERR_PTR(-ENOMEM); + + for (i = 0; res_spec->resources[i].type != USNIC_VNIC_RES_TYPE_EOL; + i++) { + res_type = res_spec->resources[i].type; + res_cnt = res_spec->resources[i].cnt; + + res_chunk_list[i] = usnic_vnic_get_resources(vnic, res_type, + res_cnt, owner_obj); + if (IS_ERR_OR_NULL(res_chunk_list[i])) { + err = res_chunk_list[i] ? + PTR_ERR(res_chunk_list[i]) : -ENOMEM; + usnic_err("Failed to get %s from %s with err %d\n", + usnic_vnic_res_type_to_str(res_type), + usnic_vnic_pci_name(vnic), + err); + goto out_free_res; + } + } + + return res_chunk_list; + +out_free_res: + for (i--; i > 0; i--) + usnic_vnic_put_resources(res_chunk_list[i]); + kfree(res_chunk_list); + return ERR_PTR(err); +} + +static void free_qp_grp_res(struct usnic_vnic_res_chunk **res_chunk_list) +{ + int i; + for (i = 0; res_chunk_list[i]; i++) + usnic_vnic_put_resources(res_chunk_list[i]); + kfree(res_chunk_list); +} + +static int qp_grp_and_vf_bind(struct usnic_ib_vf *vf, + struct usnic_ib_pd *pd, + struct usnic_ib_qp_grp *qp_grp) +{ + int err; + struct pci_dev *pdev; + + lockdep_assert_held(&vf->lock); + + pdev = usnic_vnic_get_pdev(vf->vnic); + if (vf->qp_grp_ref_cnt == 0) { + err = usnic_uiom_attach_dev_to_pd(pd->umem_pd, &pdev->dev); + if (err) { + usnic_err("Failed to attach %s to domain\n", + pci_name(pdev)); + return err; + } + vf->pd = pd; + } + vf->qp_grp_ref_cnt++; + + WARN_ON(vf->pd != pd); + qp_grp->vf = vf; + + return 0; +} + +static void qp_grp_and_vf_unbind(struct usnic_ib_qp_grp *qp_grp) +{ + struct pci_dev *pdev; + struct usnic_ib_pd *pd; + + lockdep_assert_held(&qp_grp->vf->lock); + + pd = qp_grp->vf->pd; + pdev = usnic_vnic_get_pdev(qp_grp->vf->vnic); + if (--qp_grp->vf->qp_grp_ref_cnt == 0) { + qp_grp->vf->pd = NULL; + usnic_uiom_detach_dev_from_pd(pd->umem_pd, &pdev->dev); + } + qp_grp->vf = NULL; +} + +static void log_spec(struct usnic_vnic_res_spec *res_spec) +{ + char buf[512]; + usnic_vnic_spec_dump(buf, sizeof(buf), res_spec); + usnic_dbg("%s\n", buf); +} + +static int qp_grp_id_from_flow(struct usnic_ib_qp_grp_flow *qp_flow, + uint32_t *id) +{ + enum usnic_transport_type trans_type = qp_flow->trans_type; + int err; + uint16_t port_num = 0; + + switch (trans_type) { + case USNIC_TRANSPORT_ROCE_CUSTOM: + *id = qp_flow->usnic_roce.port_num; + break; + case USNIC_TRANSPORT_IPV4_UDP: + err = usnic_transport_sock_get_addr(qp_flow->udp.sock, + NULL, NULL, + &port_num); + if (err) + return err; + /* + * Copy port_num to stack first and then to *id, + * so that the short to int cast works for little + * and big endian systems. + */ + *id = port_num; + break; + default: + usnic_err("Unsupported transport %u\n", trans_type); + return -EINVAL; + } + + return 0; +} + +struct usnic_ib_qp_grp * +usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf, + struct usnic_ib_pd *pd, + struct usnic_vnic_res_spec *res_spec, + struct usnic_transport_spec *transport_spec) +{ + struct usnic_ib_qp_grp *qp_grp; + int err; + enum usnic_transport_type transport = transport_spec->trans_type; + struct usnic_ib_qp_grp_flow *qp_flow; + + lockdep_assert_held(&vf->lock); + + err = usnic_vnic_res_spec_satisfied(&min_transport_spec[transport], + res_spec); + if (err) { + usnic_err("Spec does not meet miniumum req for transport %d\n", + transport); + log_spec(res_spec); + return ERR_PTR(err); + } + + qp_grp = kzalloc(sizeof(*qp_grp), GFP_ATOMIC); + if (!qp_grp) { + usnic_err("Unable to alloc qp_grp - Out of memory\n"); + return NULL; + } + + qp_grp->res_chunk_list = alloc_res_chunk_list(vf->vnic, res_spec, + qp_grp); + if (IS_ERR_OR_NULL(qp_grp->res_chunk_list)) { + err = qp_grp->res_chunk_list ? + PTR_ERR(qp_grp->res_chunk_list) : -ENOMEM; + usnic_err("Unable to alloc res for %d with err %d\n", + qp_grp->grp_id, err); + goto out_free_qp_grp; + } + + err = qp_grp_and_vf_bind(vf, pd, qp_grp); + if (err) + goto out_free_res; + + INIT_LIST_HEAD(&qp_grp->flows_lst); + spin_lock_init(&qp_grp->lock); + qp_grp->ufdev = ufdev; + qp_grp->state = IB_QPS_RESET; + qp_grp->owner_pid = current->pid; + + qp_flow = create_and_add_flow(qp_grp, transport_spec); + if (IS_ERR_OR_NULL(qp_flow)) { + usnic_err("Unable to create and add flow with err %ld\n", + PTR_ERR(qp_flow)); + err = qp_flow ? PTR_ERR(qp_flow) : -EFAULT; + goto out_qp_grp_vf_unbind; + } + + err = qp_grp_id_from_flow(qp_flow, &qp_grp->grp_id); + if (err) + goto out_release_flow; + qp_grp->ibqp.qp_num = qp_grp->grp_id; + + usnic_ib_sysfs_qpn_add(qp_grp); + + return qp_grp; + +out_release_flow: + release_and_remove_flow(qp_flow); +out_qp_grp_vf_unbind: + qp_grp_and_vf_unbind(qp_grp); +out_free_res: + free_qp_grp_res(qp_grp->res_chunk_list); +out_free_qp_grp: + kfree(qp_grp); + + return ERR_PTR(err); +} + +void usnic_ib_qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp) +{ + + WARN_ON(qp_grp->state != IB_QPS_RESET); + lockdep_assert_held(&qp_grp->vf->lock); + + release_and_remove_all_flows(qp_grp); + usnic_ib_sysfs_qpn_remove(qp_grp); + qp_grp_and_vf_unbind(qp_grp); + free_qp_grp_res(qp_grp->res_chunk_list); + kfree(qp_grp); +} + +struct usnic_vnic_res_chunk* +usnic_ib_qp_grp_get_chunk(struct usnic_ib_qp_grp *qp_grp, + enum usnic_vnic_res_type res_type) +{ + int i; + + for (i = 0; qp_grp->res_chunk_list[i]; i++) { + if (qp_grp->res_chunk_list[i]->type == res_type) + return qp_grp->res_chunk_list[i]; + } + + return ERR_PTR(-EINVAL); +} diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h new file mode 100644 index 00000000000..b0aafe8db0c --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef USNIC_IB_QP_GRP_H_ +#define USNIC_IB_QP_GRP_H_ + +#include <linux/debugfs.h> +#include <rdma/ib_verbs.h> + +#include "usnic_ib.h" +#include "usnic_abi.h" +#include "usnic_fwd.h" +#include "usnic_vnic.h" + +/* + * The qp group struct represents all the hw resources needed to present a ib_qp + */ +struct usnic_ib_qp_grp { + struct ib_qp ibqp; + enum ib_qp_state state; + int grp_id; + + struct usnic_fwd_dev *ufdev; + struct usnic_ib_ucontext *ctx; + struct list_head flows_lst; + + struct usnic_vnic_res_chunk **res_chunk_list; + + pid_t owner_pid; + struct usnic_ib_vf *vf; + struct list_head link; + + spinlock_t lock; + + struct kobject kobj; +}; + +struct usnic_ib_qp_grp_flow { + struct usnic_fwd_flow *flow; + enum usnic_transport_type trans_type; + union { + struct { + uint16_t port_num; + } usnic_roce; + struct { + struct socket *sock; + } udp; + }; + struct usnic_ib_qp_grp *qp_grp; + struct list_head link; + + /* Debug FS */ + struct dentry *dbgfs_dentry; + char dentry_name[32]; +}; + +static const struct +usnic_vnic_res_spec min_transport_spec[USNIC_TRANSPORT_MAX] = { + { /*USNIC_TRANSPORT_UNKNOWN*/ + .resources = { + {.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,}, + }, + }, + { /*USNIC_TRANSPORT_ROCE_CUSTOM*/ + .resources = { + {.type = USNIC_VNIC_RES_TYPE_WQ, .cnt = 1,}, + {.type = USNIC_VNIC_RES_TYPE_RQ, .cnt = 1,}, + {.type = USNIC_VNIC_RES_TYPE_CQ, .cnt = 1,}, + {.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,}, + }, + }, + { /*USNIC_TRANSPORT_IPV4_UDP*/ + .resources = { + {.type = USNIC_VNIC_RES_TYPE_WQ, .cnt = 1,}, + {.type = USNIC_VNIC_RES_TYPE_RQ, .cnt = 1,}, + {.type = USNIC_VNIC_RES_TYPE_CQ, .cnt = 1,}, + {.type = USNIC_VNIC_RES_TYPE_EOL, .cnt = 0,}, + }, + }, +}; + +const char *usnic_ib_qp_grp_state_to_string(enum ib_qp_state state); +int usnic_ib_qp_grp_dump_hdr(char *buf, int buf_sz); +int usnic_ib_qp_grp_dump_rows(void *obj, char *buf, int buf_sz); +struct usnic_ib_qp_grp * +usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf, + struct usnic_ib_pd *pd, + struct usnic_vnic_res_spec *res_spec, + struct usnic_transport_spec *trans_spec); +void usnic_ib_qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp); +int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp, + enum ib_qp_state new_state, + void *data); +struct usnic_vnic_res_chunk +*usnic_ib_qp_grp_get_chunk(struct usnic_ib_qp_grp *qp_grp, + enum usnic_vnic_res_type type); +static inline +struct usnic_ib_qp_grp *to_uqp_grp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct usnic_ib_qp_grp, ibqp); +} +#endif /* USNIC_IB_QP_GRP_H_ */ diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c new file mode 100644 index 00000000000..27dc67c1689 --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c @@ -0,0 +1,341 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/errno.h> + +#include <rdma/ib_user_verbs.h> +#include <rdma/ib_addr.h> + +#include "usnic_common_util.h" +#include "usnic_ib.h" +#include "usnic_ib_qp_grp.h" +#include "usnic_vnic.h" +#include "usnic_ib_verbs.h" +#include "usnic_log.h" + +static ssize_t usnic_ib_show_fw_ver(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct usnic_ib_dev *us_ibdev = + container_of(device, struct usnic_ib_dev, ib_dev.dev); + struct ethtool_drvinfo info; + + mutex_lock(&us_ibdev->usdev_lock); + us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info); + mutex_unlock(&us_ibdev->usdev_lock); + + return scnprintf(buf, PAGE_SIZE, "%s\n", info.fw_version); +} + +static ssize_t usnic_ib_show_board(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct usnic_ib_dev *us_ibdev = + container_of(device, struct usnic_ib_dev, ib_dev.dev); + unsigned short subsystem_device_id; + + mutex_lock(&us_ibdev->usdev_lock); + subsystem_device_id = us_ibdev->pdev->subsystem_device; + mutex_unlock(&us_ibdev->usdev_lock); + + return scnprintf(buf, PAGE_SIZE, "%hu\n", subsystem_device_id); +} + +/* + * Report the configuration for this PF + */ +static ssize_t +usnic_ib_show_config(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct usnic_ib_dev *us_ibdev; + char *ptr; + unsigned left; + unsigned n; + enum usnic_vnic_res_type res_type; + + us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev); + + /* Buffer space limit is 1 page */ + ptr = buf; + left = PAGE_SIZE; + + mutex_lock(&us_ibdev->usdev_lock); + if (atomic_read(&us_ibdev->vf_cnt.refcount) > 0) { + char *busname; + + /* + * bus name seems to come with annoying prefix. + * Remove it if it is predictable + */ + busname = us_ibdev->pdev->bus->name; + if (strncmp(busname, "PCI Bus ", 8) == 0) + busname += 8; + + n = scnprintf(ptr, left, + "%s: %s:%d.%d, %s, %pM, %u VFs\n Per VF:", + us_ibdev->ib_dev.name, + busname, + PCI_SLOT(us_ibdev->pdev->devfn), + PCI_FUNC(us_ibdev->pdev->devfn), + netdev_name(us_ibdev->netdev), + us_ibdev->ufdev->mac, + atomic_read(&us_ibdev->vf_cnt.refcount)); + UPDATE_PTR_LEFT(n, ptr, left); + + for (res_type = USNIC_VNIC_RES_TYPE_EOL; + res_type < USNIC_VNIC_RES_TYPE_MAX; + res_type++) { + if (us_ibdev->vf_res_cnt[res_type] == 0) + continue; + n = scnprintf(ptr, left, " %d %s%s", + us_ibdev->vf_res_cnt[res_type], + usnic_vnic_res_type_to_str(res_type), + (res_type < (USNIC_VNIC_RES_TYPE_MAX - 1)) ? + "," : ""); + UPDATE_PTR_LEFT(n, ptr, left); + } + n = scnprintf(ptr, left, "\n"); + UPDATE_PTR_LEFT(n, ptr, left); + } else { + n = scnprintf(ptr, left, "%s: no VFs\n", + us_ibdev->ib_dev.name); + UPDATE_PTR_LEFT(n, ptr, left); + } + mutex_unlock(&us_ibdev->usdev_lock); + + return ptr - buf; +} + +static ssize_t +usnic_ib_show_iface(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct usnic_ib_dev *us_ibdev; + + us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev); + + return scnprintf(buf, PAGE_SIZE, "%s\n", + netdev_name(us_ibdev->netdev)); +} + +static ssize_t +usnic_ib_show_max_vf(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct usnic_ib_dev *us_ibdev; + + us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev); + + return scnprintf(buf, PAGE_SIZE, "%u\n", + atomic_read(&us_ibdev->vf_cnt.refcount)); +} + +static ssize_t +usnic_ib_show_qp_per_vf(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct usnic_ib_dev *us_ibdev; + int qp_per_vf; + + us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev); + qp_per_vf = max(us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_WQ], + us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_RQ]); + + return scnprintf(buf, PAGE_SIZE, + "%d\n", qp_per_vf); +} + +static ssize_t +usnic_ib_show_cq_per_vf(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct usnic_ib_dev *us_ibdev; + + us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev); + + return scnprintf(buf, PAGE_SIZE, "%d\n", + us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]); +} + +static DEVICE_ATTR(fw_ver, S_IRUGO, usnic_ib_show_fw_ver, NULL); +static DEVICE_ATTR(board_id, S_IRUGO, usnic_ib_show_board, NULL); +static DEVICE_ATTR(config, S_IRUGO, usnic_ib_show_config, NULL); +static DEVICE_ATTR(iface, S_IRUGO, usnic_ib_show_iface, NULL); +static DEVICE_ATTR(max_vf, S_IRUGO, usnic_ib_show_max_vf, NULL); +static DEVICE_ATTR(qp_per_vf, S_IRUGO, usnic_ib_show_qp_per_vf, NULL); +static DEVICE_ATTR(cq_per_vf, S_IRUGO, usnic_ib_show_cq_per_vf, NULL); + +static struct device_attribute *usnic_class_attributes[] = { + &dev_attr_fw_ver, + &dev_attr_board_id, + &dev_attr_config, + &dev_attr_iface, + &dev_attr_max_vf, + &dev_attr_qp_per_vf, + &dev_attr_cq_per_vf, +}; + +struct qpn_attribute { + struct attribute attr; + ssize_t (*show)(struct usnic_ib_qp_grp *, char *buf); +}; + +/* + * Definitions for supporting QPN entries in sysfs + */ +static ssize_t +usnic_ib_qpn_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) +{ + struct usnic_ib_qp_grp *qp_grp; + struct qpn_attribute *qpn_attr; + + qp_grp = container_of(kobj, struct usnic_ib_qp_grp, kobj); + qpn_attr = container_of(attr, struct qpn_attribute, attr); + + return qpn_attr->show(qp_grp, buf); +} + +static const struct sysfs_ops usnic_ib_qpn_sysfs_ops = { + .show = usnic_ib_qpn_attr_show +}; + +#define QPN_ATTR_RO(NAME) \ +struct qpn_attribute qpn_attr_##NAME = __ATTR_RO(NAME) + +static ssize_t context_show(struct usnic_ib_qp_grp *qp_grp, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "0x%p\n", qp_grp->ctx); +} + +static ssize_t summary_show(struct usnic_ib_qp_grp *qp_grp, char *buf) +{ + int i, j, n; + int left; + char *ptr; + struct usnic_vnic_res_chunk *res_chunk; + struct usnic_vnic_res *vnic_res; + + left = PAGE_SIZE; + ptr = buf; + + n = scnprintf(ptr, left, + "QPN: %d State: (%s) PID: %u VF Idx: %hu ", + qp_grp->ibqp.qp_num, + usnic_ib_qp_grp_state_to_string(qp_grp->state), + qp_grp->owner_pid, + usnic_vnic_get_index(qp_grp->vf->vnic)); + UPDATE_PTR_LEFT(n, ptr, left); + + for (i = 0; qp_grp->res_chunk_list[i]; i++) { + res_chunk = qp_grp->res_chunk_list[i]; + for (j = 0; j < res_chunk->cnt; j++) { + vnic_res = res_chunk->res[j]; + n = scnprintf(ptr, left, "%s[%d] ", + usnic_vnic_res_type_to_str(vnic_res->type), + vnic_res->vnic_idx); + UPDATE_PTR_LEFT(n, ptr, left); + } + } + + n = scnprintf(ptr, left, "\n"); + UPDATE_PTR_LEFT(n, ptr, left); + + return ptr - buf; +} + +static QPN_ATTR_RO(context); +static QPN_ATTR_RO(summary); + +static struct attribute *usnic_ib_qpn_default_attrs[] = { + &qpn_attr_context.attr, + &qpn_attr_summary.attr, + NULL +}; + +static struct kobj_type usnic_ib_qpn_type = { + .sysfs_ops = &usnic_ib_qpn_sysfs_ops, + .default_attrs = usnic_ib_qpn_default_attrs +}; + +int usnic_ib_sysfs_register_usdev(struct usnic_ib_dev *us_ibdev) +{ + int i; + int err; + for (i = 0; i < ARRAY_SIZE(usnic_class_attributes); ++i) { + err = device_create_file(&us_ibdev->ib_dev.dev, + usnic_class_attributes[i]); + if (err) { + usnic_err("Failed to create device file %d for %s eith err %d", + i, us_ibdev->ib_dev.name, err); + return -EINVAL; + } + } + + /* create kernel object for looking at individual QPs */ + kobject_get(&us_ibdev->ib_dev.dev.kobj); + us_ibdev->qpn_kobj = kobject_create_and_add("qpn", + &us_ibdev->ib_dev.dev.kobj); + if (us_ibdev->qpn_kobj == NULL) { + kobject_put(&us_ibdev->ib_dev.dev.kobj); + return -ENOMEM; + } + + return 0; +} + +void usnic_ib_sysfs_unregister_usdev(struct usnic_ib_dev *us_ibdev) +{ + int i; + for (i = 0; i < ARRAY_SIZE(usnic_class_attributes); ++i) { + device_remove_file(&us_ibdev->ib_dev.dev, + usnic_class_attributes[i]); + } + + kobject_put(us_ibdev->qpn_kobj); +} + +void usnic_ib_sysfs_qpn_add(struct usnic_ib_qp_grp *qp_grp) +{ + struct usnic_ib_dev *us_ibdev; + int err; + + us_ibdev = qp_grp->vf->pf; + + err = kobject_init_and_add(&qp_grp->kobj, &usnic_ib_qpn_type, + kobject_get(us_ibdev->qpn_kobj), + "%d", qp_grp->grp_id); + if (err) { + kobject_put(us_ibdev->qpn_kobj); + return; + } +} + +void usnic_ib_sysfs_qpn_remove(struct usnic_ib_qp_grp *qp_grp) +{ + struct usnic_ib_dev *us_ibdev; + + us_ibdev = qp_grp->vf->pf; + + kobject_put(&qp_grp->kobj); + kobject_put(us_ibdev->qpn_kobj); +} diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h new file mode 100644 index 00000000000..0d09b493cd0 --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef USNIC_IB_SYSFS_H_ +#define USNIC_IB_SYSFS_H_ + +#include "usnic_ib.h" + +int usnic_ib_sysfs_register_usdev(struct usnic_ib_dev *us_ibdev); +void usnic_ib_sysfs_unregister_usdev(struct usnic_ib_dev *us_ibdev); +void usnic_ib_sysfs_qpn_add(struct usnic_ib_qp_grp *qp_grp); +void usnic_ib_sysfs_qpn_remove(struct usnic_ib_qp_grp *qp_grp); + +#endif /* !USNIC_IB_SYSFS_H_ */ diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c new file mode 100644 index 00000000000..53bd6a2d9cd --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -0,0 +1,768 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/errno.h> + +#include <rdma/ib_user_verbs.h> +#include <rdma/ib_addr.h> + +#include "usnic_abi.h" +#include "usnic_ib.h" +#include "usnic_common_util.h" +#include "usnic_ib_qp_grp.h" +#include "usnic_fwd.h" +#include "usnic_log.h" +#include "usnic_uiom.h" +#include "usnic_transport.h" + +#define USNIC_DEFAULT_TRANSPORT USNIC_TRANSPORT_ROCE_CUSTOM + +static void usnic_ib_fw_string_to_u64(char *fw_ver_str, u64 *fw_ver) +{ + *fw_ver = (u64) *fw_ver_str; +} + +static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp, + struct ib_udata *udata) +{ + struct usnic_ib_dev *us_ibdev; + struct usnic_ib_create_qp_resp resp; + struct pci_dev *pdev; + struct vnic_dev_bar *bar; + struct usnic_vnic_res_chunk *chunk; + struct usnic_ib_qp_grp_flow *default_flow; + int i, err; + + memset(&resp, 0, sizeof(resp)); + + us_ibdev = qp_grp->vf->pf; + pdev = usnic_vnic_get_pdev(qp_grp->vf->vnic); + if (!pdev) { + usnic_err("Failed to get pdev of qp_grp %d\n", + qp_grp->grp_id); + return -EFAULT; + } + + bar = usnic_vnic_get_bar(qp_grp->vf->vnic, 0); + if (!bar) { + usnic_err("Failed to get bar0 of qp_grp %d vf %s", + qp_grp->grp_id, pci_name(pdev)); + return -EFAULT; + } + + resp.vfid = usnic_vnic_get_index(qp_grp->vf->vnic); + resp.bar_bus_addr = bar->bus_addr; + resp.bar_len = bar->len; + + chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ); + if (IS_ERR_OR_NULL(chunk)) { + usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n", + usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_RQ), + qp_grp->grp_id, + PTR_ERR(chunk)); + return chunk ? PTR_ERR(chunk) : -ENOMEM; + } + + WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_RQ); + resp.rq_cnt = chunk->cnt; + for (i = 0; i < chunk->cnt; i++) + resp.rq_idx[i] = chunk->res[i]->vnic_idx; + + chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_WQ); + if (IS_ERR_OR_NULL(chunk)) { + usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n", + usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_WQ), + qp_grp->grp_id, + PTR_ERR(chunk)); + return chunk ? PTR_ERR(chunk) : -ENOMEM; + } + + WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_WQ); + resp.wq_cnt = chunk->cnt; + for (i = 0; i < chunk->cnt; i++) + resp.wq_idx[i] = chunk->res[i]->vnic_idx; + + chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_CQ); + if (IS_ERR_OR_NULL(chunk)) { + usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n", + usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_CQ), + qp_grp->grp_id, + PTR_ERR(chunk)); + return chunk ? PTR_ERR(chunk) : -ENOMEM; + } + + WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_CQ); + resp.cq_cnt = chunk->cnt; + for (i = 0; i < chunk->cnt; i++) + resp.cq_idx[i] = chunk->res[i]->vnic_idx; + + default_flow = list_first_entry(&qp_grp->flows_lst, + struct usnic_ib_qp_grp_flow, link); + resp.transport = default_flow->trans_type; + + err = ib_copy_to_udata(udata, &resp, sizeof(resp)); + if (err) { + usnic_err("Failed to copy udata for %s", us_ibdev->ib_dev.name); + return err; + } + + return 0; +} + +static struct usnic_ib_qp_grp* +find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev, + struct usnic_ib_pd *pd, + struct usnic_transport_spec *trans_spec, + struct usnic_vnic_res_spec *res_spec) +{ + struct usnic_ib_vf *vf; + struct usnic_vnic *vnic; + struct usnic_ib_qp_grp *qp_grp; + struct device *dev, **dev_list; + int i, found = 0; + + BUG_ON(!mutex_is_locked(&us_ibdev->usdev_lock)); + + if (list_empty(&us_ibdev->vf_dev_list)) { + usnic_info("No vfs to allocate\n"); + return NULL; + } + + if (usnic_ib_share_vf) { + /* Try to find resouces on a used vf which is in pd */ + dev_list = usnic_uiom_get_dev_list(pd->umem_pd); + for (i = 0; dev_list[i]; i++) { + dev = dev_list[i]; + vf = pci_get_drvdata(to_pci_dev(dev)); + spin_lock(&vf->lock); + vnic = vf->vnic; + if (!usnic_vnic_check_room(vnic, res_spec)) { + usnic_dbg("Found used vnic %s from %s\n", + us_ibdev->ib_dev.name, + pci_name(usnic_vnic_get_pdev( + vnic))); + found = 1; + break; + } + spin_unlock(&vf->lock); + + } + usnic_uiom_free_dev_list(dev_list); + } + + if (!found) { + /* Try to find resources on an unused vf */ + list_for_each_entry(vf, &us_ibdev->vf_dev_list, link) { + spin_lock(&vf->lock); + vnic = vf->vnic; + if (vf->qp_grp_ref_cnt == 0 && + usnic_vnic_check_room(vnic, res_spec) == 0) { + found = 1; + break; + } + spin_unlock(&vf->lock); + } + } + + if (!found) { + usnic_info("No free qp grp found on %s\n", + us_ibdev->ib_dev.name); + return ERR_PTR(-ENOMEM); + } + + qp_grp = usnic_ib_qp_grp_create(us_ibdev->ufdev, vf, pd, res_spec, + trans_spec); + spin_unlock(&vf->lock); + if (IS_ERR_OR_NULL(qp_grp)) { + usnic_err("Failed to allocate qp_grp\n"); + return ERR_PTR(qp_grp ? PTR_ERR(qp_grp) : -ENOMEM); + } + + return qp_grp; +} + +static void qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp) +{ + struct usnic_ib_vf *vf = qp_grp->vf; + + WARN_ON(qp_grp->state != IB_QPS_RESET); + + spin_lock(&vf->lock); + usnic_ib_qp_grp_destroy(qp_grp); + spin_unlock(&vf->lock); +} + +static void eth_speed_to_ib_speed(int speed, u8 *active_speed, + u8 *active_width) +{ + if (speed <= 10000) { + *active_width = IB_WIDTH_1X; + *active_speed = IB_SPEED_FDR10; + } else if (speed <= 20000) { + *active_width = IB_WIDTH_4X; + *active_speed = IB_SPEED_DDR; + } else if (speed <= 30000) { + *active_width = IB_WIDTH_4X; + *active_speed = IB_SPEED_QDR; + } else if (speed <= 40000) { + *active_width = IB_WIDTH_4X; + *active_speed = IB_SPEED_FDR10; + } else { + *active_width = IB_WIDTH_4X; + *active_speed = IB_SPEED_EDR; + } +} + +static int create_qp_validate_user_data(struct usnic_ib_create_qp_cmd cmd) +{ + if (cmd.spec.trans_type <= USNIC_TRANSPORT_UNKNOWN || + cmd.spec.trans_type >= USNIC_TRANSPORT_MAX) + return -EINVAL; + + return 0; +} + +/* Start of ib callback functions */ + +enum rdma_link_layer usnic_ib_port_link_layer(struct ib_device *device, + u8 port_num) +{ + return IB_LINK_LAYER_ETHERNET; +} + +int usnic_ib_query_device(struct ib_device *ibdev, + struct ib_device_attr *props) +{ + struct usnic_ib_dev *us_ibdev = to_usdev(ibdev); + union ib_gid gid; + struct ethtool_drvinfo info; + struct ethtool_cmd cmd; + int qp_per_vf; + + usnic_dbg("\n"); + mutex_lock(&us_ibdev->usdev_lock); + us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info); + us_ibdev->netdev->ethtool_ops->get_settings(us_ibdev->netdev, &cmd); + memset(props, 0, sizeof(*props)); + usnic_mac_ip_to_gid(us_ibdev->ufdev->mac, us_ibdev->ufdev->inaddr, + &gid.raw[0]); + memcpy(&props->sys_image_guid, &gid.global.interface_id, + sizeof(gid.global.interface_id)); + usnic_ib_fw_string_to_u64(&info.fw_version[0], &props->fw_ver); + props->max_mr_size = USNIC_UIOM_MAX_MR_SIZE; + props->page_size_cap = USNIC_UIOM_PAGE_SIZE; + props->vendor_id = PCI_VENDOR_ID_CISCO; + props->vendor_part_id = PCI_DEVICE_ID_CISCO_VIC_USPACE_NIC; + props->hw_ver = us_ibdev->pdev->subsystem_device; + qp_per_vf = max(us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_WQ], + us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_RQ]); + props->max_qp = qp_per_vf * + atomic_read(&us_ibdev->vf_cnt.refcount); + props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT | + IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; + props->max_cq = us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ] * + atomic_read(&us_ibdev->vf_cnt.refcount); + props->max_pd = USNIC_UIOM_MAX_PD_CNT; + props->max_mr = USNIC_UIOM_MAX_MR_CNT; + props->local_ca_ack_delay = 0; + props->max_pkeys = 0; + props->atomic_cap = IB_ATOMIC_NONE; + props->masked_atomic_cap = props->atomic_cap; + props->max_qp_rd_atom = 0; + props->max_qp_init_rd_atom = 0; + props->max_res_rd_atom = 0; + props->max_srq = 0; + props->max_srq_wr = 0; + props->max_srq_sge = 0; + props->max_fast_reg_page_list_len = 0; + props->max_mcast_grp = 0; + props->max_mcast_qp_attach = 0; + props->max_total_mcast_qp_attach = 0; + props->max_map_per_fmr = 0; + /* Owned by Userspace + * max_qp_wr, max_sge, max_sge_rd, max_cqe */ + mutex_unlock(&us_ibdev->usdev_lock); + + return 0; +} + +int usnic_ib_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props) +{ + struct usnic_ib_dev *us_ibdev = to_usdev(ibdev); + struct ethtool_cmd cmd; + + usnic_dbg("\n"); + + mutex_lock(&us_ibdev->usdev_lock); + us_ibdev->netdev->ethtool_ops->get_settings(us_ibdev->netdev, &cmd); + memset(props, 0, sizeof(*props)); + + props->lid = 0; + props->lmc = 1; + props->sm_lid = 0; + props->sm_sl = 0; + + if (!us_ibdev->ufdev->link_up) { + props->state = IB_PORT_DOWN; + props->phys_state = 3; + } else if (!us_ibdev->ufdev->inaddr) { + props->state = IB_PORT_INIT; + props->phys_state = 4; + } else { + props->state = IB_PORT_ACTIVE; + props->phys_state = 5; + } + + props->port_cap_flags = 0; + props->gid_tbl_len = 1; + props->pkey_tbl_len = 1; + props->bad_pkey_cntr = 0; + props->qkey_viol_cntr = 0; + eth_speed_to_ib_speed(cmd.speed, &props->active_speed, + &props->active_width); + props->max_mtu = IB_MTU_4096; + props->active_mtu = iboe_get_mtu(us_ibdev->ufdev->mtu); + /* Userspace will adjust for hdrs */ + props->max_msg_sz = us_ibdev->ufdev->mtu; + props->max_vl_num = 1; + mutex_unlock(&us_ibdev->usdev_lock); + + return 0; +} + +int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr) +{ + struct usnic_ib_qp_grp *qp_grp; + struct usnic_ib_vf *vf; + int err; + + usnic_dbg("\n"); + + memset(qp_attr, 0, sizeof(*qp_attr)); + memset(qp_init_attr, 0, sizeof(*qp_init_attr)); + + qp_grp = to_uqp_grp(qp); + vf = qp_grp->vf; + mutex_lock(&vf->pf->usdev_lock); + usnic_dbg("\n"); + qp_attr->qp_state = qp_grp->state; + qp_attr->cur_qp_state = qp_grp->state; + + switch (qp_grp->ibqp.qp_type) { + case IB_QPT_UD: + qp_attr->qkey = 0; + break; + default: + usnic_err("Unexpected qp_type %d\n", qp_grp->ibqp.qp_type); + err = -EINVAL; + goto err_out; + } + + mutex_unlock(&vf->pf->usdev_lock); + return 0; + +err_out: + mutex_unlock(&vf->pf->usdev_lock); + return err; +} + +int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid) +{ + + struct usnic_ib_dev *us_ibdev = to_usdev(ibdev); + usnic_dbg("\n"); + + if (index > 1) + return -EINVAL; + + mutex_lock(&us_ibdev->usdev_lock); + memset(&(gid->raw[0]), 0, sizeof(gid->raw)); + usnic_mac_ip_to_gid(us_ibdev->ufdev->mac, us_ibdev->ufdev->inaddr, + &gid->raw[0]); + mutex_unlock(&us_ibdev->usdev_lock); + + return 0; +} + +int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey) +{ + if (index > 1) + return -EINVAL; + + *pkey = 0xffff; + return 0; +} + +struct ib_pd *usnic_ib_alloc_pd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct usnic_ib_pd *pd; + void *umem_pd; + + usnic_dbg("\n"); + + pd = kzalloc(sizeof(*pd), GFP_KERNEL); + if (!pd) + return ERR_PTR(-ENOMEM); + + umem_pd = pd->umem_pd = usnic_uiom_alloc_pd(); + if (IS_ERR_OR_NULL(umem_pd)) { + kfree(pd); + return ERR_PTR(umem_pd ? PTR_ERR(umem_pd) : -ENOMEM); + } + + usnic_info("domain 0x%p allocated for context 0x%p and device %s\n", + pd, context, ibdev->name); + return &pd->ibpd; +} + +int usnic_ib_dealloc_pd(struct ib_pd *pd) +{ + usnic_info("freeing domain 0x%p\n", pd); + + usnic_uiom_dealloc_pd((to_upd(pd))->umem_pd); + kfree(pd); + return 0; +} + +struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) +{ + int err; + struct usnic_ib_dev *us_ibdev; + struct usnic_ib_qp_grp *qp_grp; + struct usnic_ib_ucontext *ucontext; + int cq_cnt; + struct usnic_vnic_res_spec res_spec; + struct usnic_ib_create_qp_cmd cmd; + struct usnic_transport_spec trans_spec; + + usnic_dbg("\n"); + + ucontext = to_uucontext(pd->uobject->context); + us_ibdev = to_usdev(pd->device); + + if (init_attr->create_flags) + return ERR_PTR(-EINVAL); + + err = ib_copy_from_udata(&cmd, udata, sizeof(cmd)); + if (err) { + usnic_err("%s: cannot copy udata for create_qp\n", + us_ibdev->ib_dev.name); + return ERR_PTR(-EINVAL); + } + + err = create_qp_validate_user_data(cmd); + if (err) { + usnic_err("%s: Failed to validate user data\n", + us_ibdev->ib_dev.name); + return ERR_PTR(-EINVAL); + } + + if (init_attr->qp_type != IB_QPT_UD) { + usnic_err("%s asked to make a non-UD QP: %d\n", + us_ibdev->ib_dev.name, init_attr->qp_type); + return ERR_PTR(-EINVAL); + } + + trans_spec = cmd.spec; + mutex_lock(&us_ibdev->usdev_lock); + cq_cnt = (init_attr->send_cq == init_attr->recv_cq) ? 1 : 2; + res_spec = min_transport_spec[trans_spec.trans_type]; + usnic_vnic_res_spec_update(&res_spec, USNIC_VNIC_RES_TYPE_CQ, cq_cnt); + qp_grp = find_free_vf_and_create_qp_grp(us_ibdev, to_upd(pd), + &trans_spec, + &res_spec); + if (IS_ERR_OR_NULL(qp_grp)) { + err = qp_grp ? PTR_ERR(qp_grp) : -ENOMEM; + goto out_release_mutex; + } + + err = usnic_ib_fill_create_qp_resp(qp_grp, udata); + if (err) { + err = -EBUSY; + goto out_release_qp_grp; + } + + qp_grp->ctx = ucontext; + list_add_tail(&qp_grp->link, &ucontext->qp_grp_list); + usnic_ib_log_vf(qp_grp->vf); + mutex_unlock(&us_ibdev->usdev_lock); + return &qp_grp->ibqp; + +out_release_qp_grp: + qp_grp_destroy(qp_grp); +out_release_mutex: + mutex_unlock(&us_ibdev->usdev_lock); + return ERR_PTR(err); +} + +int usnic_ib_destroy_qp(struct ib_qp *qp) +{ + struct usnic_ib_qp_grp *qp_grp; + struct usnic_ib_vf *vf; + + usnic_dbg("\n"); + + qp_grp = to_uqp_grp(qp); + vf = qp_grp->vf; + mutex_lock(&vf->pf->usdev_lock); + if (usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RESET, NULL)) { + usnic_err("Failed to move qp grp %u to reset\n", + qp_grp->grp_id); + } + + list_del(&qp_grp->link); + qp_grp_destroy(qp_grp); + mutex_unlock(&vf->pf->usdev_lock); + + return 0; +} + +int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + struct usnic_ib_qp_grp *qp_grp; + int status; + usnic_dbg("\n"); + + qp_grp = to_uqp_grp(ibqp); + + /* TODO: Future Support All States */ + mutex_lock(&qp_grp->vf->pf->usdev_lock); + if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_INIT) { + status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_INIT, NULL); + } else if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_RTR) { + status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RTR, NULL); + } else if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_RTS) { + status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RTS, NULL); + } else { + usnic_err("Unexpected combination mask: %u state: %u\n", + attr_mask & IB_QP_STATE, attr->qp_state); + status = -EINVAL; + } + + mutex_unlock(&qp_grp->vf->pf->usdev_lock); + return status; +} + +struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, int entries, + int vector, struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct ib_cq *cq; + + usnic_dbg("\n"); + cq = kzalloc(sizeof(*cq), GFP_KERNEL); + if (!cq) + return ERR_PTR(-EBUSY); + + return cq; +} + +int usnic_ib_destroy_cq(struct ib_cq *cq) +{ + usnic_dbg("\n"); + kfree(cq); + return 0; +} + +struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int access_flags, + struct ib_udata *udata) +{ + struct usnic_ib_mr *mr; + int err; + + usnic_dbg("start 0x%llx va 0x%llx length 0x%llx\n", start, + virt_addr, length); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (IS_ERR_OR_NULL(mr)) + return ERR_PTR(mr ? PTR_ERR(mr) : -ENOMEM); + + mr->umem = usnic_uiom_reg_get(to_upd(pd)->umem_pd, start, length, + access_flags, 0); + if (IS_ERR_OR_NULL(mr->umem)) { + err = mr->umem ? PTR_ERR(mr->umem) : -EFAULT; + goto err_free; + } + + mr->ibmr.lkey = mr->ibmr.rkey = 0; + return &mr->ibmr; + +err_free: + kfree(mr); + return ERR_PTR(err); +} + +int usnic_ib_dereg_mr(struct ib_mr *ibmr) +{ + struct usnic_ib_mr *mr = to_umr(ibmr); + + usnic_dbg("va 0x%lx length 0x%zx\n", mr->umem->va, mr->umem->length); + + usnic_uiom_reg_release(mr->umem, ibmr->pd->uobject->context->closing); + kfree(mr); + return 0; +} + +struct ib_ucontext *usnic_ib_alloc_ucontext(struct ib_device *ibdev, + struct ib_udata *udata) +{ + struct usnic_ib_ucontext *context; + struct usnic_ib_dev *us_ibdev = to_usdev(ibdev); + usnic_dbg("\n"); + + context = kmalloc(sizeof(*context), GFP_KERNEL); + if (!context) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&context->qp_grp_list); + mutex_lock(&us_ibdev->usdev_lock); + list_add_tail(&context->link, &us_ibdev->ctx_list); + mutex_unlock(&us_ibdev->usdev_lock); + + return &context->ibucontext; +} + +int usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) +{ + struct usnic_ib_ucontext *context = to_uucontext(ibcontext); + struct usnic_ib_dev *us_ibdev = to_usdev(ibcontext->device); + usnic_dbg("\n"); + + mutex_lock(&us_ibdev->usdev_lock); + BUG_ON(!list_empty(&context->qp_grp_list)); + list_del(&context->link); + mutex_unlock(&us_ibdev->usdev_lock); + kfree(context); + return 0; +} + +int usnic_ib_mmap(struct ib_ucontext *context, + struct vm_area_struct *vma) +{ + struct usnic_ib_ucontext *uctx = to_ucontext(context); + struct usnic_ib_dev *us_ibdev; + struct usnic_ib_qp_grp *qp_grp; + struct usnic_ib_vf *vf; + struct vnic_dev_bar *bar; + dma_addr_t bus_addr; + unsigned int len; + unsigned int vfid; + + usnic_dbg("\n"); + + us_ibdev = to_usdev(context->device); + vma->vm_flags |= VM_IO; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vfid = vma->vm_pgoff; + usnic_dbg("Page Offset %lu PAGE_SHIFT %u VFID %u\n", + vma->vm_pgoff, PAGE_SHIFT, vfid); + + mutex_lock(&us_ibdev->usdev_lock); + list_for_each_entry(qp_grp, &uctx->qp_grp_list, link) { + vf = qp_grp->vf; + if (usnic_vnic_get_index(vf->vnic) == vfid) { + bar = usnic_vnic_get_bar(vf->vnic, 0); + if ((vma->vm_end - vma->vm_start) != bar->len) { + usnic_err("Bar0 Len %lu - Request map %lu\n", + bar->len, + vma->vm_end - vma->vm_start); + mutex_unlock(&us_ibdev->usdev_lock); + return -EINVAL; + } + bus_addr = bar->bus_addr; + len = bar->len; + usnic_dbg("bus: %pa vaddr: %p size: %ld\n", + &bus_addr, bar->vaddr, bar->len); + mutex_unlock(&us_ibdev->usdev_lock); + + return remap_pfn_range(vma, + vma->vm_start, + bus_addr >> PAGE_SHIFT, + len, vma->vm_page_prot); + } + } + + mutex_unlock(&us_ibdev->usdev_lock); + usnic_err("No VF %u found\n", vfid); + return -EINVAL; +} + +/* In ib callbacks section - Start of stub funcs */ +struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd, + struct ib_ah_attr *ah_attr) +{ + usnic_dbg("\n"); + return ERR_PTR(-EPERM); +} + +int usnic_ib_destroy_ah(struct ib_ah *ah) +{ + usnic_dbg("\n"); + return -EINVAL; +} + +int usnic_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + usnic_dbg("\n"); + return -EINVAL; +} + +int usnic_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + usnic_dbg("\n"); + return -EINVAL; +} + +int usnic_ib_poll_cq(struct ib_cq *ibcq, int num_entries, + struct ib_wc *wc) +{ + usnic_dbg("\n"); + return -EINVAL; +} + +int usnic_ib_req_notify_cq(struct ib_cq *cq, + enum ib_cq_notify_flags flags) +{ + usnic_dbg("\n"); + return -EINVAL; +} + +struct ib_mr *usnic_ib_get_dma_mr(struct ib_pd *pd, int acc) +{ + usnic_dbg("\n"); + return ERR_PTR(-ENOMEM); +} + + +/* In ib callbacks section - End of stub funcs */ +/* End of ib callbacks section */ diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h new file mode 100644 index 00000000000..bb864f5aed7 --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef USNIC_IB_VERBS_H_ +#define USNIC_IB_VERBS_H_ + +#include "usnic_ib.h" + +enum rdma_link_layer usnic_ib_port_link_layer(struct ib_device *device, + u8 port_num); +int usnic_ib_query_device(struct ib_device *ibdev, + struct ib_device_attr *props); +int usnic_ib_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props); +int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr); +int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid); +int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey); +struct ib_pd *usnic_ib_alloc_pd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata); +int usnic_ib_dealloc_pd(struct ib_pd *pd); +struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata); +int usnic_ib_destroy_qp(struct ib_qp *qp); +int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); +struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, int entries, + int vector, struct ib_ucontext *context, + struct ib_udata *udata); +int usnic_ib_destroy_cq(struct ib_cq *cq); +struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int access_flags, + struct ib_udata *udata); +int usnic_ib_dereg_mr(struct ib_mr *ibmr); +struct ib_ucontext *usnic_ib_alloc_ucontext(struct ib_device *ibdev, + struct ib_udata *udata); +int usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext); +int usnic_ib_mmap(struct ib_ucontext *context, + struct vm_area_struct *vma); +struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd, + struct ib_ah_attr *ah_attr); +int usnic_ib_destroy_ah(struct ib_ah *ah); +int usnic_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr); +int usnic_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); +int usnic_ib_poll_cq(struct ib_cq *ibcq, int num_entries, + struct ib_wc *wc); +int usnic_ib_req_notify_cq(struct ib_cq *cq, + enum ib_cq_notify_flags flags); +struct ib_mr *usnic_ib_get_dma_mr(struct ib_pd *pd, int acc); +#endif /* !USNIC_IB_VERBS_H */ diff --git a/drivers/infiniband/hw/usnic/usnic_log.h b/drivers/infiniband/hw/usnic/usnic_log.h new file mode 100644 index 00000000000..75777a66c68 --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_log.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef USNIC_LOG_H_ +#define USNIC_LOG_H_ + +#include "usnic.h" + +extern unsigned int usnic_log_lvl; + +#define USNIC_LOG_LVL_NONE (0) +#define USNIC_LOG_LVL_ERR (1) +#define USNIC_LOG_LVL_INFO (2) +#define USNIC_LOG_LVL_DBG (3) + +#define usnic_printk(lvl, args...) \ + do { \ + printk(lvl "%s:%s:%d: ", DRV_NAME, __func__, \ + __LINE__); \ + printk(args); \ + } while (0) + +#define usnic_dbg(args...) \ + do { \ + if (unlikely(usnic_log_lvl >= USNIC_LOG_LVL_DBG)) { \ + usnic_printk(KERN_INFO, args); \ + } \ +} while (0) + +#define usnic_info(args...) \ +do { \ + if (usnic_log_lvl >= USNIC_LOG_LVL_INFO) { \ + usnic_printk(KERN_INFO, args); \ + } \ +} while (0) + +#define usnic_err(args...) \ + do { \ + if (usnic_log_lvl >= USNIC_LOG_LVL_ERR) { \ + usnic_printk(KERN_ERR, args); \ + } \ + } while (0) +#endif /* !USNIC_LOG_H_ */ diff --git a/drivers/infiniband/hw/usnic/usnic_transport.c b/drivers/infiniband/hw/usnic/usnic_transport.c new file mode 100644 index 00000000000..ddef6f77a78 --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_transport.c @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include <linux/bitmap.h> +#include <linux/file.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <net/inet_sock.h> + +#include "usnic_transport.h" +#include "usnic_log.h" + +/* ROCE */ +static unsigned long *roce_bitmap; +static u16 roce_next_port = 1; +#define ROCE_BITMAP_SZ ((1 << (8 /*CHAR_BIT*/ * sizeof(u16)))/8 /*CHAR BIT*/) +static DEFINE_SPINLOCK(roce_bitmap_lock); + +const char *usnic_transport_to_str(enum usnic_transport_type type) +{ + switch (type) { + case USNIC_TRANSPORT_UNKNOWN: + return "Unknown"; + case USNIC_TRANSPORT_ROCE_CUSTOM: + return "roce custom"; + case USNIC_TRANSPORT_IPV4_UDP: + return "IPv4 UDP"; + case USNIC_TRANSPORT_MAX: + return "Max?"; + default: + return "Not known"; + } +} + +int usnic_transport_sock_to_str(char *buf, int buf_sz, + struct socket *sock) +{ + int err; + uint32_t addr; + uint16_t port; + int proto; + + memset(buf, 0, buf_sz); + err = usnic_transport_sock_get_addr(sock, &proto, &addr, &port); + if (err) + return 0; + + return scnprintf(buf, buf_sz, "Proto:%u Addr:%pI4h Port:%hu", + proto, &addr, port); +} + +/* + * reserve a port number. if "0" specified, we will try to pick one + * starting at roce_next_port. roce_next_port will take on the values + * 1..4096 + */ +u16 usnic_transport_rsrv_port(enum usnic_transport_type type, u16 port_num) +{ + if (type == USNIC_TRANSPORT_ROCE_CUSTOM) { + spin_lock(&roce_bitmap_lock); + if (!port_num) { + port_num = bitmap_find_next_zero_area(roce_bitmap, + ROCE_BITMAP_SZ, + roce_next_port /* start */, + 1 /* nr */, + 0 /* align */); + roce_next_port = (port_num & 4095) + 1; + } else if (test_bit(port_num, roce_bitmap)) { + usnic_err("Failed to allocate port for %s\n", + usnic_transport_to_str(type)); + spin_unlock(&roce_bitmap_lock); + goto out_fail; + } + bitmap_set(roce_bitmap, port_num, 1); + spin_unlock(&roce_bitmap_lock); + } else { + usnic_err("Failed to allocate port - transport %s unsupported\n", + usnic_transport_to_str(type)); + goto out_fail; + } + + usnic_dbg("Allocating port %hu for %s\n", port_num, + usnic_transport_to_str(type)); + return port_num; + +out_fail: + return 0; +} + +void usnic_transport_unrsrv_port(enum usnic_transport_type type, u16 port_num) +{ + if (type == USNIC_TRANSPORT_ROCE_CUSTOM) { + spin_lock(&roce_bitmap_lock); + if (!port_num) { + usnic_err("Unreserved unvalid port num 0 for %s\n", + usnic_transport_to_str(type)); + goto out_roce_custom; + } + + if (!test_bit(port_num, roce_bitmap)) { + usnic_err("Unreserving invalid %hu for %s\n", + port_num, + usnic_transport_to_str(type)); + goto out_roce_custom; + } + bitmap_clear(roce_bitmap, port_num, 1); + usnic_dbg("Freeing port %hu for %s\n", port_num, + usnic_transport_to_str(type)); +out_roce_custom: + spin_unlock(&roce_bitmap_lock); + } else { + usnic_err("Freeing invalid port %hu for %d\n", port_num, type); + } +} + +struct socket *usnic_transport_get_socket(int sock_fd) +{ + struct socket *sock; + int err; + char buf[25]; + + /* sockfd_lookup will internally do a fget */ + sock = sockfd_lookup(sock_fd, &err); + if (!sock) { + usnic_err("Unable to lookup socket for fd %d with err %d\n", + sock_fd, err); + return ERR_PTR(-ENOENT); + } + + usnic_transport_sock_to_str(buf, sizeof(buf), sock); + usnic_dbg("Get sock %s\n", buf); + + return sock; +} + +void usnic_transport_put_socket(struct socket *sock) +{ + char buf[100]; + + usnic_transport_sock_to_str(buf, sizeof(buf), sock); + usnic_dbg("Put sock %s\n", buf); + sockfd_put(sock); +} + +int usnic_transport_sock_get_addr(struct socket *sock, int *proto, + uint32_t *addr, uint16_t *port) +{ + int len; + int err; + struct sockaddr_in sock_addr; + + err = sock->ops->getname(sock, + (struct sockaddr *)&sock_addr, + &len, 0); + if (err) + return err; + + if (sock_addr.sin_family != AF_INET) + return -EINVAL; + + if (proto) + *proto = sock->sk->sk_protocol; + if (port) + *port = ntohs(((struct sockaddr_in *)&sock_addr)->sin_port); + if (addr) + *addr = ntohl(((struct sockaddr_in *) + &sock_addr)->sin_addr.s_addr); + + return 0; +} + +int usnic_transport_init(void) +{ + roce_bitmap = kzalloc(ROCE_BITMAP_SZ, GFP_KERNEL); + if (!roce_bitmap) { + usnic_err("Failed to allocate bit map"); + return -ENOMEM; + } + + /* Do not ever allocate bit 0, hence set it here */ + bitmap_set(roce_bitmap, 0, 1); + return 0; +} + +void usnic_transport_fini(void) +{ + kfree(roce_bitmap); +} diff --git a/drivers/infiniband/hw/usnic/usnic_transport.h b/drivers/infiniband/hw/usnic/usnic_transport.h new file mode 100644 index 00000000000..7e5dc6d9f46 --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_transport.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef USNIC_TRANSPORT_H_ +#define USNIC_TRANSPORT_H_ + +#include "usnic_abi.h" + +const char *usnic_transport_to_str(enum usnic_transport_type trans_type); +/* + * Returns number of bytes written, excluding null terminator. If + * nothing was written, the function returns 0. + */ +int usnic_transport_sock_to_str(char *buf, int buf_sz, + struct socket *sock); +/* + * Reserve a port. If "port_num" is set, then the function will try + * to reserve that particular port. + */ +u16 usnic_transport_rsrv_port(enum usnic_transport_type type, u16 port_num); +void usnic_transport_unrsrv_port(enum usnic_transport_type type, u16 port_num); +/* + * Do a fget on the socket refered to by sock_fd and returns the socket. + * Socket will not be destroyed before usnic_transport_put_socket has + * been called. + */ +struct socket *usnic_transport_get_socket(int sock_fd); +void usnic_transport_put_socket(struct socket *sock); +/* + * Call usnic_transport_get_socket before calling *_sock_get_addr + */ +int usnic_transport_sock_get_addr(struct socket *sock, int *proto, + uint32_t *addr, uint16_t *port); +int usnic_transport_init(void); +void usnic_transport_fini(void); +#endif /* !USNIC_TRANSPORT_H */ diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c new file mode 100644 index 00000000000..801a1d6937e --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -0,0 +1,604 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2013 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mm.h> +#include <linux/dma-mapping.h> +#include <linux/sched.h> +#include <linux/hugetlb.h> +#include <linux/dma-attrs.h> +#include <linux/iommu.h> +#include <linux/workqueue.h> +#include <linux/list.h> +#include <linux/pci.h> + +#include "usnic_log.h" +#include "usnic_uiom.h" +#include "usnic_uiom_interval_tree.h" + +static struct workqueue_struct *usnic_uiom_wq; + +#define USNIC_UIOM_PAGE_CHUNK \ + ((PAGE_SIZE - offsetof(struct usnic_uiom_chunk, page_list)) /\ + ((void *) &((struct usnic_uiom_chunk *) 0)->page_list[1] - \ + (void *) &((struct usnic_uiom_chunk *) 0)->page_list[0])) + +static void usnic_uiom_reg_account(struct work_struct *work) +{ + struct usnic_uiom_reg *umem = container_of(work, + struct usnic_uiom_reg, work); + + down_write(&umem->mm->mmap_sem); + umem->mm->locked_vm -= umem->diff; + up_write(&umem->mm->mmap_sem); + mmput(umem->mm); + kfree(umem); +} + +static int usnic_uiom_dma_fault(struct iommu_domain *domain, + struct device *dev, + unsigned long iova, int flags, + void *token) +{ + usnic_err("Device %s iommu fault domain 0x%pK va 0x%lx flags 0x%x\n", + dev_name(dev), + domain, iova, flags); + return -ENOSYS; +} + +static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty) +{ + struct usnic_uiom_chunk *chunk, *tmp; + struct page *page; + struct scatterlist *sg; + int i; + dma_addr_t pa; + + list_for_each_entry_safe(chunk, tmp, chunk_list, list) { + for_each_sg(chunk->page_list, sg, chunk->nents, i) { + page = sg_page(sg); + pa = sg_phys(sg); + if (dirty) + set_page_dirty_lock(page); + put_page(page); + usnic_dbg("pa: %pa\n", &pa); + } + kfree(chunk); + } +} + +static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, + int dmasync, struct list_head *chunk_list) +{ + struct page **page_list; + struct scatterlist *sg; + struct usnic_uiom_chunk *chunk; + unsigned long locked; + unsigned long lock_limit; + unsigned long cur_base; + unsigned long npages; + int ret; + int off; + int i; + int flags; + dma_addr_t pa; + DEFINE_DMA_ATTRS(attrs); + + if (dmasync) + dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs); + + if (!can_do_mlock()) + return -EPERM; + + INIT_LIST_HEAD(chunk_list); + + page_list = (struct page **) __get_free_page(GFP_KERNEL); + if (!page_list) + return -ENOMEM; + + npages = PAGE_ALIGN(size + (addr & ~PAGE_MASK)) >> PAGE_SHIFT; + + down_write(¤t->mm->mmap_sem); + + locked = npages + current->mm->locked_vm; + lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + + if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { + ret = -ENOMEM; + goto out; + } + + flags = IOMMU_READ | IOMMU_CACHE; + flags |= (writable) ? IOMMU_WRITE : 0; + cur_base = addr & PAGE_MASK; + ret = 0; + + while (npages) { + ret = get_user_pages(current, current->mm, cur_base, + min_t(unsigned long, npages, + PAGE_SIZE / sizeof(struct page *)), + 1, !writable, page_list, NULL); + + if (ret < 0) + goto out; + + npages -= ret; + off = 0; + + while (ret) { + chunk = kmalloc(sizeof(*chunk) + + sizeof(struct scatterlist) * + min_t(int, ret, USNIC_UIOM_PAGE_CHUNK), + GFP_KERNEL); + if (!chunk) { + ret = -ENOMEM; + goto out; + } + + chunk->nents = min_t(int, ret, USNIC_UIOM_PAGE_CHUNK); + sg_init_table(chunk->page_list, chunk->nents); + for_each_sg(chunk->page_list, sg, chunk->nents, i) { + sg_set_page(sg, page_list[i + off], + PAGE_SIZE, 0); + pa = sg_phys(sg); + usnic_dbg("va: 0x%lx pa: %pa\n", + cur_base + i*PAGE_SIZE, &pa); + } + cur_base += chunk->nents * PAGE_SIZE; + ret -= chunk->nents; + off += chunk->nents; + list_add_tail(&chunk->list, chunk_list); + } + + ret = 0; + } + +out: + if (ret < 0) + usnic_uiom_put_pages(chunk_list, 0); + else + current->mm->locked_vm = locked; + + up_write(¤t->mm->mmap_sem); + free_page((unsigned long) page_list); + return ret; +} + +static void usnic_uiom_unmap_sorted_intervals(struct list_head *intervals, + struct usnic_uiom_pd *pd) +{ + struct usnic_uiom_interval_node *interval, *tmp; + long unsigned va, size; + + list_for_each_entry_safe(interval, tmp, intervals, link) { + va = interval->start << PAGE_SHIFT; + size = ((interval->last - interval->start) + 1) << PAGE_SHIFT; + while (size > 0) { + /* Workaround for RH 970401 */ + usnic_dbg("va 0x%lx size 0x%lx", va, PAGE_SIZE); + iommu_unmap(pd->domain, va, PAGE_SIZE); + va += PAGE_SIZE; + size -= PAGE_SIZE; + } + } +} + +static void __usnic_uiom_reg_release(struct usnic_uiom_pd *pd, + struct usnic_uiom_reg *uiomr, + int dirty) +{ + int npages; + unsigned long vpn_start, vpn_last; + struct usnic_uiom_interval_node *interval, *tmp; + int writable = 0; + LIST_HEAD(rm_intervals); + + npages = PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT; + vpn_start = (uiomr->va & PAGE_MASK) >> PAGE_SHIFT; + vpn_last = vpn_start + npages - 1; + + spin_lock(&pd->lock); + usnic_uiom_remove_interval(&pd->rb_root, vpn_start, + vpn_last, &rm_intervals); + usnic_uiom_unmap_sorted_intervals(&rm_intervals, pd); + + list_for_each_entry_safe(interval, tmp, &rm_intervals, link) { + if (interval->flags & IOMMU_WRITE) + writable = 1; + list_del(&interval->link); + kfree(interval); + } + + usnic_uiom_put_pages(&uiomr->chunk_list, dirty & writable); + spin_unlock(&pd->lock); +} + +static int usnic_uiom_map_sorted_intervals(struct list_head *intervals, + struct usnic_uiom_reg *uiomr) +{ + int i, err; + size_t size; + struct usnic_uiom_chunk *chunk; + struct usnic_uiom_interval_node *interval_node; + dma_addr_t pa; + dma_addr_t pa_start = 0; + dma_addr_t pa_end = 0; + long int va_start = -EINVAL; + struct usnic_uiom_pd *pd = uiomr->pd; + long int va = uiomr->va & PAGE_MASK; + int flags = IOMMU_READ | IOMMU_CACHE; + + flags |= (uiomr->writable) ? IOMMU_WRITE : 0; + chunk = list_first_entry(&uiomr->chunk_list, struct usnic_uiom_chunk, + list); + list_for_each_entry(interval_node, intervals, link) { +iter_chunk: + for (i = 0; i < chunk->nents; i++, va += PAGE_SIZE) { + pa = sg_phys(&chunk->page_list[i]); + if ((va >> PAGE_SHIFT) < interval_node->start) + continue; + + if ((va >> PAGE_SHIFT) == interval_node->start) { + /* First page of the interval */ + va_start = va; + pa_start = pa; + pa_end = pa; + } + + WARN_ON(va_start == -EINVAL); + + if ((pa_end + PAGE_SIZE != pa) && + (pa != pa_start)) { + /* PAs are not contiguous */ + size = pa_end - pa_start + PAGE_SIZE; + usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x", + va_start, &pa_start, size, flags); + err = iommu_map(pd->domain, va_start, pa_start, + size, flags); + if (err) { + usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n", + va_start, &pa_start, size, err); + goto err_out; + } + va_start = va; + pa_start = pa; + pa_end = pa; + } + + if ((va >> PAGE_SHIFT) == interval_node->last) { + /* Last page of the interval */ + size = pa - pa_start + PAGE_SIZE; + usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x\n", + va_start, &pa_start, size, flags); + err = iommu_map(pd->domain, va_start, pa_start, + size, flags); + if (err) { + usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n", + va_start, &pa_start, size, err); + goto err_out; + } + break; + } + + if (pa != pa_start) + pa_end += PAGE_SIZE; + } + + if (i == chunk->nents) { + /* + * Hit last entry of the chunk, + * hence advance to next chunk + */ + chunk = list_first_entry(&chunk->list, + struct usnic_uiom_chunk, + list); + goto iter_chunk; + } + } + + return 0; + +err_out: + usnic_uiom_unmap_sorted_intervals(intervals, pd); + return err; +} + +struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd, + unsigned long addr, size_t size, + int writable, int dmasync) +{ + struct usnic_uiom_reg *uiomr; + unsigned long va_base, vpn_start, vpn_last; + unsigned long npages; + int offset, err; + LIST_HEAD(sorted_diff_intervals); + + /* + * Intel IOMMU map throws an error if a translation entry is + * changed from read to write. This module may not unmap + * and then remap the entry after fixing the permission + * b/c this open up a small windows where hw DMA may page fault + * Hence, make all entries to be writable. + */ + writable = 1; + + va_base = addr & PAGE_MASK; + offset = addr & ~PAGE_MASK; + npages = PAGE_ALIGN(size + offset) >> PAGE_SHIFT; + vpn_start = (addr & PAGE_MASK) >> PAGE_SHIFT; + vpn_last = vpn_start + npages - 1; + + uiomr = kmalloc(sizeof(*uiomr), GFP_KERNEL); + if (!uiomr) + return ERR_PTR(-ENOMEM); + + uiomr->va = va_base; + uiomr->offset = offset; + uiomr->length = size; + uiomr->writable = writable; + uiomr->pd = pd; + + err = usnic_uiom_get_pages(addr, size, writable, dmasync, + &uiomr->chunk_list); + if (err) { + usnic_err("Failed get_pages vpn [0x%lx,0x%lx] err %d\n", + vpn_start, vpn_last, err); + goto out_free_uiomr; + } + + spin_lock(&pd->lock); + err = usnic_uiom_get_intervals_diff(vpn_start, vpn_last, + (writable) ? IOMMU_WRITE : 0, + IOMMU_WRITE, + &pd->rb_root, + &sorted_diff_intervals); + if (err) { + usnic_err("Failed disjoint interval vpn [0x%lx,0x%lx] err %d\n", + vpn_start, vpn_last, err); + goto out_put_pages; + } + + err = usnic_uiom_map_sorted_intervals(&sorted_diff_intervals, uiomr); + if (err) { + usnic_err("Failed map interval vpn [0x%lx,0x%lx] err %d\n", + vpn_start, vpn_last, err); + goto out_put_intervals; + + } + + err = usnic_uiom_insert_interval(&pd->rb_root, vpn_start, vpn_last, + (writable) ? IOMMU_WRITE : 0); + if (err) { + usnic_err("Failed insert interval vpn [0x%lx,0x%lx] err %d\n", + vpn_start, vpn_last, err); + goto out_unmap_intervals; + } + + usnic_uiom_put_interval_set(&sorted_diff_intervals); + spin_unlock(&pd->lock); + + return uiomr; + +out_unmap_intervals: + usnic_uiom_unmap_sorted_intervals(&sorted_diff_intervals, pd); +out_put_intervals: + usnic_uiom_put_interval_set(&sorted_diff_intervals); +out_put_pages: + usnic_uiom_put_pages(&uiomr->chunk_list, 0); + spin_unlock(&pd->lock); +out_free_uiomr: + kfree(uiomr); + return ERR_PTR(err); +} + +void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing) +{ + struct mm_struct *mm; + unsigned long diff; + + __usnic_uiom_reg_release(uiomr->pd, uiomr, 1); + + mm = get_task_mm(current); + if (!mm) { + kfree(uiomr); + return; + } + + diff = PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT; + + /* + * We may be called with the mm's mmap_sem already held. This + * can happen when a userspace munmap() is the call that drops + * the last reference to our file and calls our release + * method. If there are memory regions to destroy, we'll end + * up here and not be able to take the mmap_sem. In that case + * we defer the vm_locked accounting to the system workqueue. + */ + if (closing) { + if (!down_write_trylock(&mm->mmap_sem)) { + INIT_WORK(&uiomr->work, usnic_uiom_reg_account); + uiomr->mm = mm; + uiomr->diff = diff; + + queue_work(usnic_uiom_wq, &uiomr->work); + return; + } + } else + down_write(&mm->mmap_sem); + + current->mm->locked_vm -= diff; + up_write(&mm->mmap_sem); + mmput(mm); + kfree(uiomr); +} + +struct usnic_uiom_pd *usnic_uiom_alloc_pd(void) +{ + struct usnic_uiom_pd *pd; + void *domain; + + pd = kzalloc(sizeof(*pd), GFP_KERNEL); + if (!pd) + return ERR_PTR(-ENOMEM); + + pd->domain = domain = iommu_domain_alloc(&pci_bus_type); + if (IS_ERR_OR_NULL(domain)) { + usnic_err("Failed to allocate IOMMU domain with err %ld\n", + PTR_ERR(pd->domain)); + kfree(pd); + return ERR_PTR(domain ? PTR_ERR(domain) : -ENOMEM); + } + + iommu_set_fault_handler(pd->domain, usnic_uiom_dma_fault, NULL); + + spin_lock_init(&pd->lock); + INIT_LIST_HEAD(&pd->devs); + + return pd; +} + +void usnic_uiom_dealloc_pd(struct usnic_uiom_pd *pd) +{ + iommu_domain_free(pd->domain); + kfree(pd); +} + +int usnic_uiom_attach_dev_to_pd(struct usnic_uiom_pd *pd, struct device *dev) +{ + struct usnic_uiom_dev *uiom_dev; + int err; + + uiom_dev = kzalloc(sizeof(*uiom_dev), GFP_ATOMIC); + if (!uiom_dev) + return -ENOMEM; + uiom_dev->dev = dev; + + err = iommu_attach_device(pd->domain, dev); + if (err) + goto out_free_dev; + + if (!iommu_domain_has_cap(pd->domain, IOMMU_CAP_CACHE_COHERENCY)) { + usnic_err("IOMMU of %s does not support cache coherency\n", + dev_name(dev)); + err = -EINVAL; + goto out_detach_device; + } + + spin_lock(&pd->lock); + list_add_tail(&uiom_dev->link, &pd->devs); + pd->dev_cnt++; + spin_unlock(&pd->lock); + + return 0; + +out_detach_device: + iommu_detach_device(pd->domain, dev); +out_free_dev: + kfree(uiom_dev); + return err; +} + +void usnic_uiom_detach_dev_from_pd(struct usnic_uiom_pd *pd, struct device *dev) +{ + struct usnic_uiom_dev *uiom_dev; + int found = 0; + + spin_lock(&pd->lock); + list_for_each_entry(uiom_dev, &pd->devs, link) { + if (uiom_dev->dev == dev) { + found = 1; + break; + } + } + + if (!found) { + usnic_err("Unable to free dev %s - not found\n", + dev_name(dev)); + spin_unlock(&pd->lock); + return; + } + + list_del(&uiom_dev->link); + pd->dev_cnt--; + spin_unlock(&pd->lock); + + return iommu_detach_device(pd->domain, dev); +} + +struct device **usnic_uiom_get_dev_list(struct usnic_uiom_pd *pd) +{ + struct usnic_uiom_dev *uiom_dev; + struct device **devs; + int i = 0; + + spin_lock(&pd->lock); + devs = kcalloc(pd->dev_cnt + 1, sizeof(*devs), GFP_ATOMIC); + if (!devs) { + devs = ERR_PTR(-ENOMEM); + goto out; + } + + list_for_each_entry(uiom_dev, &pd->devs, link) { + devs[i++] = uiom_dev->dev; + } +out: + spin_unlock(&pd->lock); + return devs; +} + +void usnic_uiom_free_dev_list(struct device **devs) +{ + kfree(devs); +} + +int usnic_uiom_init(char *drv_name) +{ + if (!iommu_present(&pci_bus_type)) { + usnic_err("IOMMU required but not present or enabled. USNIC QPs will not function w/o enabling IOMMU\n"); + return -EPERM; + } + + usnic_uiom_wq = create_workqueue(drv_name); + if (!usnic_uiom_wq) { + usnic_err("Unable to alloc wq for drv %s\n", drv_name); + return -ENOMEM; + } + + return 0; +} + +void usnic_uiom_fini(void) +{ + flush_workqueue(usnic_uiom_wq); + destroy_workqueue(usnic_uiom_wq); +} diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.h b/drivers/infiniband/hw/usnic/usnic_uiom.h new file mode 100644 index 00000000000..70440996e8f --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_uiom.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef USNIC_UIOM_H_ +#define USNIC_UIOM_H_ + +#include <linux/list.h> +#include <linux/scatterlist.h> + +#include "usnic_uiom_interval_tree.h" + +#define USNIC_UIOM_READ (1) +#define USNIC_UIOM_WRITE (2) + +#define USNIC_UIOM_MAX_PD_CNT (1000) +#define USNIC_UIOM_MAX_MR_CNT (1000000) +#define USNIC_UIOM_MAX_MR_SIZE (~0UL) +#define USNIC_UIOM_PAGE_SIZE (PAGE_SIZE) + +struct usnic_uiom_dev { + struct device *dev; + struct list_head link; +}; + +struct usnic_uiom_pd { + struct iommu_domain *domain; + spinlock_t lock; + struct rb_root rb_root; + struct list_head devs; + int dev_cnt; +}; + +struct usnic_uiom_reg { + struct usnic_uiom_pd *pd; + unsigned long va; + size_t length; + int offset; + int page_size; + int writable; + struct list_head chunk_list; + struct work_struct work; + struct mm_struct *mm; + unsigned long diff; +}; + +struct usnic_uiom_chunk { + struct list_head list; + int nents; + struct scatterlist page_list[0]; +}; + +struct usnic_uiom_pd *usnic_uiom_alloc_pd(void); +void usnic_uiom_dealloc_pd(struct usnic_uiom_pd *pd); +int usnic_uiom_attach_dev_to_pd(struct usnic_uiom_pd *pd, struct device *dev); +void usnic_uiom_detach_dev_from_pd(struct usnic_uiom_pd *pd, + struct device *dev); +struct device **usnic_uiom_get_dev_list(struct usnic_uiom_pd *pd); +void usnic_uiom_free_dev_list(struct device **devs); +struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd, + unsigned long addr, size_t size, + int access, int dmasync); +void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing); +int usnic_uiom_init(char *drv_name); +void usnic_uiom_fini(void); +#endif /* USNIC_UIOM_H_ */ diff --git a/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c new file mode 100644 index 00000000000..3a4288e0fba --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c @@ -0,0 +1,254 @@ +/* + * Copyright (c) 2014, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/init.h> +#include <linux/list.h> +#include <linux/slab.h> +#include <linux/list_sort.h> + +#include <linux/interval_tree_generic.h> +#include "usnic_uiom_interval_tree.h" + +#define START(node) ((node)->start) +#define LAST(node) ((node)->last) + +#define MAKE_NODE(node, start, end, ref_cnt, flags, err, err_out) \ + do { \ + node = usnic_uiom_interval_node_alloc(start, \ + end, ref_cnt, flags); \ + if (!node) { \ + err = -ENOMEM; \ + goto err_out; \ + } \ + } while (0) + +#define MARK_FOR_ADD(node, list) (list_add_tail(&node->link, list)) + +#define MAKE_NODE_AND_APPEND(node, start, end, ref_cnt, flags, err, \ + err_out, list) \ + do { \ + MAKE_NODE(node, start, end, \ + ref_cnt, flags, err, \ + err_out); \ + MARK_FOR_ADD(node, list); \ + } while (0) + +#define FLAGS_EQUAL(flags1, flags2, mask) \ + (((flags1) & (mask)) == ((flags2) & (mask))) + +static struct usnic_uiom_interval_node* +usnic_uiom_interval_node_alloc(long int start, long int last, int ref_cnt, + int flags) +{ + struct usnic_uiom_interval_node *interval = kzalloc(sizeof(*interval), + GFP_ATOMIC); + if (!interval) + return NULL; + + interval->start = start; + interval->last = last; + interval->flags = flags; + interval->ref_cnt = ref_cnt; + + return interval; +} + +static int interval_cmp(void *priv, struct list_head *a, struct list_head *b) +{ + struct usnic_uiom_interval_node *node_a, *node_b; + + node_a = list_entry(a, struct usnic_uiom_interval_node, link); + node_b = list_entry(b, struct usnic_uiom_interval_node, link); + + /* long to int */ + if (node_a->start < node_b->start) + return -1; + else if (node_a->start > node_b->start) + return 1; + + return 0; +} + +static void +find_intervals_intersection_sorted(struct rb_root *root, unsigned long start, + unsigned long last, + struct list_head *list) +{ + struct usnic_uiom_interval_node *node; + + INIT_LIST_HEAD(list); + + for (node = usnic_uiom_interval_tree_iter_first(root, start, last); + node; + node = usnic_uiom_interval_tree_iter_next(node, start, last)) + list_add_tail(&node->link, list); + + list_sort(NULL, list, interval_cmp); +} + +int usnic_uiom_get_intervals_diff(unsigned long start, unsigned long last, + int flags, int flag_mask, + struct rb_root *root, + struct list_head *diff_set) +{ + struct usnic_uiom_interval_node *interval, *tmp; + int err = 0; + long int pivot = start; + LIST_HEAD(intersection_set); + + INIT_LIST_HEAD(diff_set); + + find_intervals_intersection_sorted(root, start, last, + &intersection_set); + + list_for_each_entry(interval, &intersection_set, link) { + if (pivot < interval->start) { + MAKE_NODE_AND_APPEND(tmp, pivot, interval->start - 1, + 1, flags, err, err_out, + diff_set); + pivot = interval->start; + } + + /* + * Invariant: Set [start, pivot] is either in diff_set or root, + * but not in both. + */ + + if (pivot > interval->last) { + continue; + } else if (pivot <= interval->last && + FLAGS_EQUAL(interval->flags, flags, + flag_mask)) { + pivot = interval->last + 1; + } + } + + if (pivot <= last) + MAKE_NODE_AND_APPEND(tmp, pivot, last, 1, flags, err, err_out, + diff_set); + + return 0; + +err_out: + list_for_each_entry_safe(interval, tmp, diff_set, link) { + list_del(&interval->link); + kfree(interval); + } + + return err; +} + +void usnic_uiom_put_interval_set(struct list_head *intervals) +{ + struct usnic_uiom_interval_node *interval, *tmp; + list_for_each_entry_safe(interval, tmp, intervals, link) + kfree(interval); +} + +int usnic_uiom_insert_interval(struct rb_root *root, unsigned long start, + unsigned long last, int flags) +{ + struct usnic_uiom_interval_node *interval, *tmp; + unsigned long istart, ilast; + int iref_cnt, iflags; + unsigned long lpivot = start; + int err = 0; + LIST_HEAD(to_add); + LIST_HEAD(intersection_set); + + find_intervals_intersection_sorted(root, start, last, + &intersection_set); + + list_for_each_entry(interval, &intersection_set, link) { + /* + * Invariant - lpivot is the left edge of next interval to be + * inserted + */ + istart = interval->start; + ilast = interval->last; + iref_cnt = interval->ref_cnt; + iflags = interval->flags; + + if (istart < lpivot) { + MAKE_NODE_AND_APPEND(tmp, istart, lpivot - 1, iref_cnt, + iflags, err, err_out, &to_add); + } else if (istart > lpivot) { + MAKE_NODE_AND_APPEND(tmp, lpivot, istart - 1, 1, flags, + err, err_out, &to_add); + lpivot = istart; + } else { + lpivot = istart; + } + + if (ilast > last) { + MAKE_NODE_AND_APPEND(tmp, lpivot, last, iref_cnt + 1, + iflags | flags, err, err_out, + &to_add); + MAKE_NODE_AND_APPEND(tmp, last + 1, ilast, iref_cnt, + iflags, err, err_out, &to_add); + } else { + MAKE_NODE_AND_APPEND(tmp, lpivot, ilast, iref_cnt + 1, + iflags | flags, err, err_out, + &to_add); + } + + lpivot = ilast + 1; + } + + if (lpivot <= last) + MAKE_NODE_AND_APPEND(tmp, lpivot, last, 1, flags, err, err_out, + &to_add); + + list_for_each_entry_safe(interval, tmp, &intersection_set, link) { + usnic_uiom_interval_tree_remove(interval, root); + kfree(interval); + } + + list_for_each_entry(interval, &to_add, link) + usnic_uiom_interval_tree_insert(interval, root); + + return 0; + +err_out: + list_for_each_entry_safe(interval, tmp, &to_add, link) + kfree(interval); + + return err; +} + +void usnic_uiom_remove_interval(struct rb_root *root, unsigned long start, + unsigned long last, struct list_head *removed) +{ + struct usnic_uiom_interval_node *interval; + + for (interval = usnic_uiom_interval_tree_iter_first(root, start, last); + interval; + interval = usnic_uiom_interval_tree_iter_next(interval, + start, + last)) { + if (--interval->ref_cnt == 0) + list_add_tail(&interval->link, removed); + } + + list_for_each_entry(interval, removed, link) + usnic_uiom_interval_tree_remove(interval, root); +} + +INTERVAL_TREE_DEFINE(struct usnic_uiom_interval_node, rb, + unsigned long, __subtree_last, + START, LAST, , usnic_uiom_interval_tree) diff --git a/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h new file mode 100644 index 00000000000..d4f752e258f --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef USNIC_UIOM_INTERVAL_TREE_H_ +#define USNIC_UIOM_INTERVAL_TREE_H_ + +#include <linux/rbtree.h> + +struct usnic_uiom_interval_node { + struct rb_node rb; + struct list_head link; + unsigned long start; + unsigned long last; + unsigned long __subtree_last; + unsigned int ref_cnt; + int flags; +}; + +extern void +usnic_uiom_interval_tree_insert(struct usnic_uiom_interval_node *node, + struct rb_root *root); +extern void +usnic_uiom_interval_tree_remove(struct usnic_uiom_interval_node *node, + struct rb_root *root); +extern struct usnic_uiom_interval_node * +usnic_uiom_interval_tree_iter_first(struct rb_root *root, + unsigned long start, + unsigned long last); +extern struct usnic_uiom_interval_node * +usnic_uiom_interval_tree_iter_next(struct usnic_uiom_interval_node *node, + unsigned long start, unsigned long last); +/* + * Inserts {start...last} into {root}. If there are overlaps, + * nodes will be broken up and merged + */ +int usnic_uiom_insert_interval(struct rb_root *root, + unsigned long start, unsigned long last, + int flags); +/* + * Removed {start...last} from {root}. The nodes removed are returned in + * 'removed.' The caller is responsibile for freeing memory of nodes in + * 'removed.' + */ +void usnic_uiom_remove_interval(struct rb_root *root, + unsigned long start, unsigned long last, + struct list_head *removed); +/* + * Returns {start...last} - {root} (relative complement of {start...last} in + * {root}) in diff_set sorted ascendingly + */ +int usnic_uiom_get_intervals_diff(unsigned long start, + unsigned long last, int flags, + int flag_mask, + struct rb_root *root, + struct list_head *diff_set); +/* Call this to free diff_set returned by usnic_uiom_get_intervals_diff */ +void usnic_uiom_put_interval_set(struct list_head *intervals); +#endif /* USNIC_UIOM_INTERVAL_TREE_H_ */ diff --git a/drivers/infiniband/hw/usnic/usnic_vnic.c b/drivers/infiniband/hw/usnic/usnic_vnic.c new file mode 100644 index 00000000000..656b88c39ed --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_vnic.c @@ -0,0 +1,467 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include <linux/errno.h> +#include <linux/module.h> +#include <linux/pci.h> + +#include "usnic_ib.h" +#include "vnic_resource.h" +#include "usnic_log.h" +#include "usnic_vnic.h" + +struct usnic_vnic { + struct vnic_dev *vdev; + struct vnic_dev_bar bar[PCI_NUM_RESOURCES]; + struct usnic_vnic_res_chunk chunks[USNIC_VNIC_RES_TYPE_MAX]; + spinlock_t res_lock; +}; + +static enum vnic_res_type _to_vnic_res_type(enum usnic_vnic_res_type res_type) +{ +#define DEFINE_USNIC_VNIC_RES_AT(usnic_vnic_res_t, vnic_res_type, desc, val) \ + vnic_res_type, +#define DEFINE_USNIC_VNIC_RES(usnic_vnic_res_t, vnic_res_type, desc) \ + vnic_res_type, + static enum vnic_res_type usnic_vnic_type_2_vnic_type[] = { + USNIC_VNIC_RES_TYPES}; +#undef DEFINE_USNIC_VNIC_RES +#undef DEFINE_USNIC_VNIC_RES_AT + + if (res_type >= USNIC_VNIC_RES_TYPE_MAX) + return RES_TYPE_MAX; + + return usnic_vnic_type_2_vnic_type[res_type]; +} + +const char *usnic_vnic_res_type_to_str(enum usnic_vnic_res_type res_type) +{ +#define DEFINE_USNIC_VNIC_RES_AT(usnic_vnic_res_t, vnic_res_type, desc, val) \ + desc, +#define DEFINE_USNIC_VNIC_RES(usnic_vnic_res_t, vnic_res_type, desc) \ + desc, + static const char * const usnic_vnic_res_type_desc[] = { + USNIC_VNIC_RES_TYPES}; +#undef DEFINE_USNIC_VNIC_RES +#undef DEFINE_USNIC_VNIC_RES_AT + + if (res_type >= USNIC_VNIC_RES_TYPE_MAX) + return "unknown"; + + return usnic_vnic_res_type_desc[res_type]; + +} + +const char *usnic_vnic_pci_name(struct usnic_vnic *vnic) +{ + return pci_name(usnic_vnic_get_pdev(vnic)); +} + +int usnic_vnic_dump(struct usnic_vnic *vnic, char *buf, + int buf_sz, + void *hdr_obj, + int (*printtitle)(void *, char*, int), + int (*printcols)(char *, int), + int (*printrow)(void *, char *, int)) +{ + struct usnic_vnic_res_chunk *chunk; + struct usnic_vnic_res *res; + struct vnic_dev_bar *bar0; + int i, j, offset; + + offset = 0; + bar0 = usnic_vnic_get_bar(vnic, 0); + offset += scnprintf(buf + offset, buf_sz - offset, + "VF:%hu BAR0 bus_addr=%pa vaddr=0x%p size=%ld ", + usnic_vnic_get_index(vnic), + &bar0->bus_addr, + bar0->vaddr, bar0->len); + if (printtitle) + offset += printtitle(hdr_obj, buf + offset, buf_sz - offset); + offset += scnprintf(buf + offset, buf_sz - offset, "\n"); + offset += scnprintf(buf + offset, buf_sz - offset, + "|RES\t|CTRL_PIN\t\t|IN_USE\t"); + if (printcols) + offset += printcols(buf + offset, buf_sz - offset); + offset += scnprintf(buf + offset, buf_sz - offset, "\n"); + + spin_lock(&vnic->res_lock); + for (i = 0; i < ARRAY_SIZE(vnic->chunks); i++) { + chunk = &vnic->chunks[i]; + for (j = 0; j < chunk->cnt; j++) { + res = chunk->res[j]; + offset += scnprintf(buf + offset, buf_sz - offset, + "|%s[%u]\t|0x%p\t|%u\t", + usnic_vnic_res_type_to_str(res->type), + res->vnic_idx, res->ctrl, !!res->owner); + if (printrow) { + offset += printrow(res->owner, buf + offset, + buf_sz - offset); + } + offset += scnprintf(buf + offset, buf_sz - offset, + "\n"); + } + } + spin_unlock(&vnic->res_lock); + return offset; +} + +void usnic_vnic_res_spec_update(struct usnic_vnic_res_spec *spec, + enum usnic_vnic_res_type trgt_type, + u16 cnt) +{ + int i; + + for (i = 0; i < USNIC_VNIC_RES_TYPE_MAX; i++) { + if (spec->resources[i].type == trgt_type) { + spec->resources[i].cnt = cnt; + return; + } + } + + WARN_ON(1); +} + +int usnic_vnic_res_spec_satisfied(const struct usnic_vnic_res_spec *min_spec, + struct usnic_vnic_res_spec *res_spec) +{ + int found, i, j; + + for (i = 0; i < USNIC_VNIC_RES_TYPE_MAX; i++) { + found = 0; + + for (j = 0; j < USNIC_VNIC_RES_TYPE_MAX; j++) { + if (res_spec->resources[i].type != + min_spec->resources[i].type) + continue; + found = 1; + if (min_spec->resources[i].cnt > + res_spec->resources[i].cnt) + return -EINVAL; + break; + } + + if (!found) + return -EINVAL; + } + return 0; +} + +int usnic_vnic_spec_dump(char *buf, int buf_sz, + struct usnic_vnic_res_spec *res_spec) +{ + enum usnic_vnic_res_type res_type; + int res_cnt; + int i; + int offset = 0; + + for (i = 0; i < USNIC_VNIC_RES_TYPE_MAX; i++) { + res_type = res_spec->resources[i].type; + res_cnt = res_spec->resources[i].cnt; + offset += scnprintf(buf + offset, buf_sz - offset, + "Res: %s Cnt: %d ", + usnic_vnic_res_type_to_str(res_type), + res_cnt); + } + + return offset; +} + +int usnic_vnic_check_room(struct usnic_vnic *vnic, + struct usnic_vnic_res_spec *res_spec) +{ + int i; + enum usnic_vnic_res_type res_type; + int res_cnt; + + for (i = 0; i < USNIC_VNIC_RES_TYPE_MAX; i++) { + res_type = res_spec->resources[i].type; + res_cnt = res_spec->resources[i].cnt; + + if (res_type == USNIC_VNIC_RES_TYPE_EOL) + break; + + if (res_cnt > usnic_vnic_res_free_cnt(vnic, res_type)) + return -EBUSY; + } + + return 0; +} + +int usnic_vnic_res_cnt(struct usnic_vnic *vnic, + enum usnic_vnic_res_type type) +{ + return vnic->chunks[type].cnt; +} + +int usnic_vnic_res_free_cnt(struct usnic_vnic *vnic, + enum usnic_vnic_res_type type) +{ + return vnic->chunks[type].free_cnt; +} + +struct usnic_vnic_res_chunk * +usnic_vnic_get_resources(struct usnic_vnic *vnic, enum usnic_vnic_res_type type, + int cnt, void *owner) +{ + struct usnic_vnic_res_chunk *src, *ret; + struct usnic_vnic_res *res; + int i; + + if (usnic_vnic_res_free_cnt(vnic, type) < cnt || cnt < 1 || !owner) + return ERR_PTR(-EINVAL); + + ret = kzalloc(sizeof(*ret), GFP_ATOMIC); + if (!ret) { + usnic_err("Failed to allocate chunk for %s - Out of memory\n", + usnic_vnic_pci_name(vnic)); + return ERR_PTR(-ENOMEM); + } + + ret->res = kzalloc(sizeof(*(ret->res))*cnt, GFP_ATOMIC); + if (!ret->res) { + usnic_err("Failed to allocate resources for %s. Out of memory\n", + usnic_vnic_pci_name(vnic)); + kfree(ret); + return ERR_PTR(-ENOMEM); + } + + spin_lock(&vnic->res_lock); + src = &vnic->chunks[type]; + for (i = 0; i < src->cnt && ret->cnt < cnt; i++) { + res = src->res[i]; + if (!res->owner) { + src->free_cnt--; + res->owner = owner; + ret->res[ret->cnt++] = res; + } + } + + spin_unlock(&vnic->res_lock); + ret->type = type; + ret->vnic = vnic; + WARN_ON(ret->cnt != cnt); + + return ret; +} + +void usnic_vnic_put_resources(struct usnic_vnic_res_chunk *chunk) +{ + + struct usnic_vnic_res *res; + int i; + struct usnic_vnic *vnic = chunk->vnic; + + spin_lock(&vnic->res_lock); + while ((i = --chunk->cnt) >= 0) { + res = chunk->res[i]; + chunk->res[i] = NULL; + res->owner = NULL; + vnic->chunks[res->type].free_cnt++; + } + spin_unlock(&vnic->res_lock); + + kfree(chunk->res); + kfree(chunk); +} + +u16 usnic_vnic_get_index(struct usnic_vnic *vnic) +{ + return usnic_vnic_get_pdev(vnic)->devfn - 1; +} + +static int usnic_vnic_alloc_res_chunk(struct usnic_vnic *vnic, + enum usnic_vnic_res_type type, + struct usnic_vnic_res_chunk *chunk) +{ + int cnt, err, i; + struct usnic_vnic_res *res; + + cnt = vnic_dev_get_res_count(vnic->vdev, _to_vnic_res_type(type)); + if (cnt < 1) + return -EINVAL; + + chunk->cnt = chunk->free_cnt = cnt; + chunk->res = kzalloc(sizeof(*(chunk->res))*cnt, GFP_KERNEL); + if (!chunk->res) + return -ENOMEM; + + for (i = 0; i < cnt; i++) { + res = kzalloc(sizeof(*res), GFP_KERNEL); + if (!res) { + err = -ENOMEM; + goto fail; + } + res->type = type; + res->vnic_idx = i; + res->vnic = vnic; + res->ctrl = vnic_dev_get_res(vnic->vdev, + _to_vnic_res_type(type), i); + chunk->res[i] = res; + } + + chunk->vnic = vnic; + return 0; +fail: + for (i--; i >= 0; i--) + kfree(chunk->res[i]); + kfree(chunk->res); + return err; +} + +static void usnic_vnic_free_res_chunk(struct usnic_vnic_res_chunk *chunk) +{ + int i; + for (i = 0; i < chunk->cnt; i++) + kfree(chunk->res[i]); + kfree(chunk->res); +} + +static int usnic_vnic_discover_resources(struct pci_dev *pdev, + struct usnic_vnic *vnic) +{ + enum usnic_vnic_res_type res_type; + int i; + int err = 0; + + for (i = 0; i < ARRAY_SIZE(vnic->bar); i++) { + if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM)) + continue; + vnic->bar[i].len = pci_resource_len(pdev, i); + vnic->bar[i].vaddr = pci_iomap(pdev, i, vnic->bar[i].len); + if (!vnic->bar[i].vaddr) { + usnic_err("Cannot memory-map BAR %d, aborting\n", + i); + err = -ENODEV; + goto out_clean_bar; + } + vnic->bar[i].bus_addr = pci_resource_start(pdev, i); + } + + vnic->vdev = vnic_dev_register(NULL, pdev, pdev, vnic->bar, + ARRAY_SIZE(vnic->bar)); + if (!vnic->vdev) { + usnic_err("Failed to register device %s\n", + pci_name(pdev)); + err = -EINVAL; + goto out_clean_bar; + } + + for (res_type = USNIC_VNIC_RES_TYPE_EOL + 1; + res_type < USNIC_VNIC_RES_TYPE_MAX; res_type++) { + err = usnic_vnic_alloc_res_chunk(vnic, res_type, + &vnic->chunks[res_type]); + if (err) { + usnic_err("Failed to alloc res %s with err %d\n", + usnic_vnic_res_type_to_str(res_type), + err); + goto out_clean_chunks; + } + } + + return 0; + +out_clean_chunks: + for (res_type--; res_type > USNIC_VNIC_RES_TYPE_EOL; res_type--) + usnic_vnic_free_res_chunk(&vnic->chunks[res_type]); + vnic_dev_unregister(vnic->vdev); +out_clean_bar: + for (i = 0; i < ARRAY_SIZE(vnic->bar); i++) { + if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM)) + continue; + if (!vnic->bar[i].vaddr) + break; + + iounmap(vnic->bar[i].vaddr); + } + + return err; +} + +struct pci_dev *usnic_vnic_get_pdev(struct usnic_vnic *vnic) +{ + return vnic_dev_get_pdev(vnic->vdev); +} + +struct vnic_dev_bar *usnic_vnic_get_bar(struct usnic_vnic *vnic, + int bar_num) +{ + return (bar_num < ARRAY_SIZE(vnic->bar)) ? &vnic->bar[bar_num] : NULL; +} + +static void usnic_vnic_release_resources(struct usnic_vnic *vnic) +{ + int i; + struct pci_dev *pdev; + enum usnic_vnic_res_type res_type; + + pdev = usnic_vnic_get_pdev(vnic); + + for (res_type = USNIC_VNIC_RES_TYPE_EOL + 1; + res_type < USNIC_VNIC_RES_TYPE_MAX; res_type++) + usnic_vnic_free_res_chunk(&vnic->chunks[res_type]); + + vnic_dev_unregister(vnic->vdev); + + for (i = 0; i < ARRAY_SIZE(vnic->bar); i++) { + if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM)) + continue; + iounmap(vnic->bar[i].vaddr); + } +} + +struct usnic_vnic *usnic_vnic_alloc(struct pci_dev *pdev) +{ + struct usnic_vnic *vnic; + int err = 0; + + if (!pci_is_enabled(pdev)) { + usnic_err("PCI dev %s is disabled\n", pci_name(pdev)); + return ERR_PTR(-EINVAL); + } + + vnic = kzalloc(sizeof(*vnic), GFP_KERNEL); + if (!vnic) { + usnic_err("Failed to alloc vnic for %s - out of memory\n", + pci_name(pdev)); + return ERR_PTR(-ENOMEM); + } + + spin_lock_init(&vnic->res_lock); + + err = usnic_vnic_discover_resources(pdev, vnic); + if (err) { + usnic_err("Failed to discover %s resources with err %d\n", + pci_name(pdev), err); + goto out_free_vnic; + } + + usnic_dbg("Allocated vnic for %s\n", usnic_vnic_pci_name(vnic)); + + return vnic; + +out_free_vnic: + kfree(vnic); + + return ERR_PTR(err); +} + +void usnic_vnic_free(struct usnic_vnic *vnic) +{ + usnic_vnic_release_resources(vnic); + kfree(vnic); +} diff --git a/drivers/infiniband/hw/usnic/usnic_vnic.h b/drivers/infiniband/hw/usnic/usnic_vnic.h new file mode 100644 index 00000000000..14d931a8829 --- /dev/null +++ b/drivers/infiniband/hw/usnic/usnic_vnic.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef USNIC_VNIC_H_ +#define USNIC_VNIC_H_ + +#include <linux/pci.h> + +#include "vnic_dev.h" + +/* =USNIC_VNIC_RES_TYPE= =VNIC_RES= =DESC= */ +#define USNIC_VNIC_RES_TYPES \ + DEFINE_USNIC_VNIC_RES_AT(EOL, RES_TYPE_EOL, "EOL", 0) \ + DEFINE_USNIC_VNIC_RES(WQ, RES_TYPE_WQ, "WQ") \ + DEFINE_USNIC_VNIC_RES(RQ, RES_TYPE_RQ, "RQ") \ + DEFINE_USNIC_VNIC_RES(CQ, RES_TYPE_CQ, "CQ") \ + DEFINE_USNIC_VNIC_RES(INTR, RES_TYPE_INTR_CTRL, "INT") \ + DEFINE_USNIC_VNIC_RES(MAX, RES_TYPE_MAX, "MAX")\ + +#define DEFINE_USNIC_VNIC_RES_AT(usnic_vnic_res_t, vnic_res_type, desc, val) \ + USNIC_VNIC_RES_TYPE_##usnic_vnic_res_t = val, +#define DEFINE_USNIC_VNIC_RES(usnic_vnic_res_t, vnic_res_type, desc) \ + USNIC_VNIC_RES_TYPE_##usnic_vnic_res_t, +enum usnic_vnic_res_type { + USNIC_VNIC_RES_TYPES +}; +#undef DEFINE_USNIC_VNIC_RES +#undef DEFINE_USNIC_VNIC_RES_AT + +struct usnic_vnic_res { + enum usnic_vnic_res_type type; + unsigned int vnic_idx; + struct usnic_vnic *vnic; + void __iomem *ctrl; + void *owner; +}; + +struct usnic_vnic_res_chunk { + enum usnic_vnic_res_type type; + int cnt; + int free_cnt; + struct usnic_vnic_res **res; + struct usnic_vnic *vnic; +}; + +struct usnic_vnic_res_desc { + enum usnic_vnic_res_type type; + uint16_t cnt; +}; + +struct usnic_vnic_res_spec { + struct usnic_vnic_res_desc resources[USNIC_VNIC_RES_TYPE_MAX]; +}; + +const char *usnic_vnic_res_type_to_str(enum usnic_vnic_res_type res_type); +const char *usnic_vnic_pci_name(struct usnic_vnic *vnic); +int usnic_vnic_dump(struct usnic_vnic *vnic, char *buf, int buf_sz, + void *hdr_obj, + int (*printtitle)(void *, char*, int), + int (*printcols)(char *, int), + int (*printrow)(void *, char *, int)); +void usnic_vnic_res_spec_update(struct usnic_vnic_res_spec *spec, + enum usnic_vnic_res_type trgt_type, + u16 cnt); +int usnic_vnic_res_spec_satisfied(const struct usnic_vnic_res_spec *min_spec, + struct usnic_vnic_res_spec *res_spec); +int usnic_vnic_spec_dump(char *buf, int buf_sz, + struct usnic_vnic_res_spec *res_spec); +int usnic_vnic_check_room(struct usnic_vnic *vnic, + struct usnic_vnic_res_spec *res_spec); +int usnic_vnic_res_cnt(struct usnic_vnic *vnic, + enum usnic_vnic_res_type type); +int usnic_vnic_res_free_cnt(struct usnic_vnic *vnic, + enum usnic_vnic_res_type type); +struct usnic_vnic_res_chunk * +usnic_vnic_get_resources(struct usnic_vnic *vnic, + enum usnic_vnic_res_type type, + int cnt, + void *owner); +void usnic_vnic_put_resources(struct usnic_vnic_res_chunk *chunk); +struct pci_dev *usnic_vnic_get_pdev(struct usnic_vnic *vnic); +struct vnic_dev_bar *usnic_vnic_get_bar(struct usnic_vnic *vnic, + int bar_num); +struct usnic_vnic *usnic_vnic_alloc(struct pci_dev *pdev); +void usnic_vnic_free(struct usnic_vnic *vnic); +u16 usnic_vnic_get_index(struct usnic_vnic *vnic); + +#endif /*!USNIC_VNIC_H_*/ |
