diff options
Diffstat (limited to 'drivers/infiniband/ulp/ipoib/ipoib_cm.c')
| -rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_cm.c | 241 |
1 files changed, 131 insertions, 110 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 0f2d3045061..933efcea0d0 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -31,12 +31,13 @@ */ #include <rdma/ib_cm.h> -#include <rdma/ib_cache.h> #include <net/dst.h> #include <net/icmp.h> #include <linux/icmpv6.h> #include <linux/delay.h> +#include <linux/slab.h> #include <linux/vmalloc.h> +#include <linux/moduleparam.h> #include "ipoib.h" @@ -84,7 +85,7 @@ static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags, ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE); for (i = 0; i < frags; ++i) - ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE); + ib_dma_unmap_page(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE); } static int ipoib_cm_post_receive_srq(struct net_device *dev, int id) @@ -139,7 +140,8 @@ static int ipoib_cm_post_receive_nonsrq(struct net_device *dev, static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev, struct ipoib_cm_rx_buf *rx_ring, int id, int frags, - u64 mapping[IPOIB_CM_RX_SG]) + u64 mapping[IPOIB_CM_RX_SG], + gfp_t gfp) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct sk_buff *skb; @@ -163,13 +165,13 @@ static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev, } for (i = 0; i < frags; i++) { - struct page *page = alloc_page(GFP_ATOMIC); + struct page *page = alloc_page(gfp); if (!page) goto partial_error; skb_fill_page_desc(skb, i, page, 0, PAGE_SIZE); - mapping[i + 1] = ib_dma_map_page(priv->ca, skb_shinfo(skb)->frags[i].page, + mapping[i + 1] = ib_dma_map_page(priv->ca, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); if (unlikely(ib_dma_mapping_error(priv->ca, mapping[i + 1]))) goto partial_error; @@ -183,7 +185,7 @@ partial_error: ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE); for (; i > 0; --i) - ib_dma_unmap_single(priv->ca, mapping[i], PAGE_SIZE, DMA_FROM_DEVICE); + ib_dma_unmap_page(priv->ca, mapping[i], PAGE_SIZE, DMA_FROM_DEVICE); dev_kfree_skb_any(skb); return NULL; @@ -202,7 +204,7 @@ static void ipoib_cm_free_rx_ring(struct net_device *dev, dev_kfree_skb_any(rx_ring[i].skb); } - kfree(rx_ring); + vfree(rx_ring); } static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv) @@ -337,7 +339,7 @@ static void ipoib_cm_init_rx_wr(struct net_device *dev, sge[i].length = PAGE_SIZE; wr->next = NULL; - wr->sg_list = priv->cm.rx_sge; + wr->sg_list = sge; wr->num_sge = priv->cm.num_frags; } @@ -352,9 +354,12 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i int ret; int i; - rx->rx_ring = kcalloc(ipoib_recvq_size, sizeof *rx->rx_ring, GFP_KERNEL); - if (!rx->rx_ring) + rx->rx_ring = vzalloc(ipoib_recvq_size * sizeof *rx->rx_ring); + if (!rx->rx_ring) { + printk(KERN_WARNING "%s: failed to allocate CM non-SRQ ring (%d entries)\n", + priv->ca->name, ipoib_recvq_size); return -ENOMEM; + } t = kmalloc(sizeof *t, GFP_KERNEL); if (!t) { @@ -378,7 +383,8 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i for (i = 0; i < ipoib_recvq_size; ++i) { if (!ipoib_cm_alloc_rx_skb(dev, rx->rx_ring, i, IPOIB_CM_RX_SG - 1, - rx->rx_ring[i].mapping)) { + rx->rx_ring[i].mapping, + GFP_KERNEL)) { ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); ret = -ENOMEM; goto err_count; @@ -456,7 +462,7 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even goto err_qp; } - psn = random32() & 0xffffff; + psn = prandom_u32() & 0xffffff; ret = ipoib_cm_modify_rx_qp(dev, cm_id, p->qp, psn); if (ret) goto err_modify; @@ -534,12 +540,13 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space, if (length == 0) { /* don't need this page */ - skb_fill_page_desc(toskb, i, frag->page, 0, PAGE_SIZE); + skb_fill_page_desc(toskb, i, skb_frag_page(frag), + 0, PAGE_SIZE); --skb_shinfo(skb)->nr_frags; } else { size = min(length, (unsigned) PAGE_SIZE); - frag->size = size; + skb_frag_size_set(frag, size); skb->data_len += size; skb->truesize += size; skb->len += size; @@ -634,7 +641,8 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len, (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE; - newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags, mapping); + newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags, + mapping, GFP_ATOMIC); if (unlikely(!newskb)) { /* * If we can't allocate a new RX buffer, dump @@ -658,7 +666,6 @@ copied: skb_reset_mac_header(skb); skb_pull(skb, IPOIB_ENCAP_LEN); - dev->last_rx = jiffies; ++dev->stats.rx_packets; dev->stats.rx_bytes += skb->len; @@ -705,6 +712,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_cm_tx_buf *tx_req; u64 addr; + int rc; if (unlikely(skb->len > tx->mtu)) { ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", @@ -736,9 +744,13 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ tx_req->mapping = addr; - if (unlikely(post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), - addr, skb->len))) { - ipoib_warn(priv, "post_send failed\n"); + skb_orphan(skb); + skb_dst_drop(skb); + + rc = post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), + addr, skb->len); + if (unlikely(rc)) { + ipoib_warn(priv, "post_send failed, error %d\n", rc); ++dev->stats.tx_errors; ib_dma_unmap_single(priv->ca, addr, skb->len, DMA_TO_DEVICE); dev_kfree_skb_any(skb); @@ -750,6 +762,12 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n", tx->qp->qp_num); netif_stop_queue(dev); + rc = ib_req_notify_cq(priv->send_cq, + IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); + if (rc < 0) + ipoib_warn(priv, "request notify on send CQ failed\n"); + else if (rc) + ipoib_send_comp_handler(priv->send_cq, dev); } } } @@ -781,7 +799,8 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) dev_kfree_skb_any(tx_req->skb); - spin_lock_irqsave(&priv->tx_lock, flags); + netif_tx_lock(dev); + ++tx->tx_tail; if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) && netif_queue_stopped(dev) && @@ -796,15 +815,12 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) "(status=%d, wrid=%d vend_err %x)\n", wc->status, wr_id, wc->vendor_err); - spin_lock(&priv->lock); + spin_lock_irqsave(&priv->lock, flags); neigh = tx->neigh; if (neigh) { neigh->cm = NULL; - list_del(&neigh->list); - if (neigh->ah) - ipoib_put_ah(neigh->ah); - ipoib_neigh_free(dev, neigh); + ipoib_neigh_free(neigh); tx->neigh = NULL; } @@ -816,10 +832,10 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags); - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); } - spin_unlock_irqrestore(&priv->tx_lock, flags); + netif_tx_unlock(dev); } int ipoib_cm_dev_open(struct net_device *dev) @@ -1014,10 +1030,20 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ .cap.max_send_sge = 1, .sq_sig_type = IB_SIGNAL_ALL_WR, .qp_type = IB_QPT_RC, - .qp_context = tx + .qp_context = tx, + .create_flags = IB_QP_CREATE_USE_GFP_NOIO }; - return ib_create_qp(priv->pd, &attr); + struct ib_qp *tx_qp; + + tx_qp = ib_create_qp(priv->pd, &attr); + if (PTR_ERR(tx_qp) == -EINVAL) { + ipoib_warn(priv, "can't use GFP_NOIO for QPs on device %s, using GFP_KERNEL\n", + priv->ca->name); + attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO; + tx_qp = ib_create_qp(priv->pd, &attr); + } + return tx_qp; } static int ipoib_cm_send_req(struct net_device *dev, @@ -1088,7 +1114,8 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, struct ipoib_dev_priv *priv = netdev_priv(p->dev); int ret; - p->tx_ring = vmalloc(ipoib_sendq_size * sizeof *p->tx_ring); + p->tx_ring = __vmalloc(ipoib_sendq_size * sizeof *p->tx_ring, + GFP_NOIO, PAGE_KERNEL); if (!p->tx_ring) { ipoib_warn(priv, "failed to allocate tx ring\n"); ret = -ENOMEM; @@ -1122,8 +1149,8 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, goto err_send_cm; } - ipoib_dbg(priv, "Request connection 0x%x for gid " IPOIB_GID_FMT " qpn 0x%x\n", - p->qp->qp_num, IPOIB_GID_ARG(pathrec->dgid), qpn); + ipoib_dbg(priv, "Request connection 0x%x for gid %pI6 qpn 0x%x\n", + p->qp->qp_num, pathrec->dgid.raw, qpn); return 0; @@ -1144,7 +1171,6 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p) { struct ipoib_dev_priv *priv = netdev_priv(p->dev); struct ipoib_cm_tx_buf *tx_req; - unsigned long flags; unsigned long begin; ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n", @@ -1175,12 +1201,12 @@ timeout: DMA_TO_DEVICE); dev_kfree_skb_any(tx_req->skb); ++p->tx_tail; - spin_lock_irqsave(&priv->tx_lock, flags); + netif_tx_lock_bh(p->dev); if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) && netif_queue_stopped(p->dev) && test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) netif_wake_queue(p->dev); - spin_unlock_irqrestore(&priv->tx_lock, flags); + netif_tx_unlock_bh(p->dev); } if (p->qp) @@ -1197,6 +1223,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, struct ipoib_dev_priv *priv = netdev_priv(tx->dev); struct net_device *dev = priv->dev; struct ipoib_neigh *neigh; + unsigned long flags; int ret; switch (event->event) { @@ -1215,16 +1242,13 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, case IB_CM_REJ_RECEIVED: case IB_CM_TIMEWAIT_EXIT: ipoib_dbg(priv, "CM error %d.\n", event->event); - spin_lock_irq(&priv->tx_lock); - spin_lock(&priv->lock); + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); neigh = tx->neigh; if (neigh) { neigh->cm = NULL; - list_del(&neigh->list); - if (neigh->ah) - ipoib_put_ah(neigh->ah); - ipoib_neigh_free(dev, neigh); + ipoib_neigh_free(neigh); tx->neigh = NULL; } @@ -1234,8 +1258,8 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, queue_work(ipoib_workqueue, &priv->cm.reap_task); } - spin_unlock(&priv->lock); - spin_unlock_irq(&priv->tx_lock); + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); break; default: break; @@ -1267,12 +1291,15 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx) { struct ipoib_dev_priv *priv = netdev_priv(tx->dev); + unsigned long flags; if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { + spin_lock_irqsave(&priv->lock, flags); list_move(&tx->list, &priv->cm.reap_list); queue_work(ipoib_workqueue, &priv->cm.reap_task); - ipoib_dbg(priv, "Reap connection for gid " IPOIB_GID_FMT "\n", - IPOIB_GID_ARG(tx->neigh->dgid)); + ipoib_dbg(priv, "Reap connection for gid %pI6\n", + tx->neigh->daddr + 4); tx->neigh = NULL; + spin_unlock_irqrestore(&priv->lock, flags); } } @@ -1289,82 +1316,94 @@ static void ipoib_cm_tx_start(struct work_struct *work) struct ib_sa_path_rec pathrec; u32 qpn; - spin_lock_irqsave(&priv->tx_lock, flags); - spin_lock(&priv->lock); + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); + while (!list_empty(&priv->cm.start_list)) { p = list_entry(priv->cm.start_list.next, typeof(*p), list); list_del_init(&p->list); neigh = p->neigh; - qpn = IPOIB_QPN(neigh->neighbour->ha); + qpn = IPOIB_QPN(neigh->daddr); memcpy(&pathrec, &p->path->pathrec, sizeof pathrec); - spin_unlock(&priv->lock); - spin_unlock_irqrestore(&priv->tx_lock, flags); + + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); + ret = ipoib_cm_tx_init(p, qpn, &pathrec); - spin_lock_irqsave(&priv->tx_lock, flags); - spin_lock(&priv->lock); + + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); + if (ret) { neigh = p->neigh; if (neigh) { neigh->cm = NULL; - list_del(&neigh->list); - if (neigh->ah) - ipoib_put_ah(neigh->ah); - ipoib_neigh_free(dev, neigh); + ipoib_neigh_free(neigh); } list_del(&p->list); kfree(p); } } - spin_unlock(&priv->lock); - spin_unlock_irqrestore(&priv->tx_lock, flags); + + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); } static void ipoib_cm_tx_reap(struct work_struct *work) { struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, cm.reap_task); + struct net_device *dev = priv->dev; struct ipoib_cm_tx *p; + unsigned long flags; + + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); - spin_lock_irq(&priv->tx_lock); - spin_lock(&priv->lock); while (!list_empty(&priv->cm.reap_list)) { p = list_entry(priv->cm.reap_list.next, typeof(*p), list); list_del(&p->list); - spin_unlock(&priv->lock); - spin_unlock_irq(&priv->tx_lock); + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); ipoib_cm_tx_destroy(p); - spin_lock_irq(&priv->tx_lock); - spin_lock(&priv->lock); + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); } - spin_unlock(&priv->lock); - spin_unlock_irq(&priv->tx_lock); + + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); } static void ipoib_cm_skb_reap(struct work_struct *work) { struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, cm.skb_task); + struct net_device *dev = priv->dev; struct sk_buff *skb; - + unsigned long flags; unsigned mtu = priv->mcast_mtu; - spin_lock_irq(&priv->tx_lock); - spin_lock(&priv->lock); + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); + while ((skb = skb_dequeue(&priv->cm.skb_queue))) { - spin_unlock(&priv->lock); - spin_unlock_irq(&priv->tx_lock); + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); + if (skb->protocol == htons(ETH_P_IP)) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) else if (skb->protocol == htons(ETH_P_IPV6)) - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, priv->dev); + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); #endif dev_kfree_skb_any(skb); - spin_lock_irq(&priv->tx_lock); - spin_lock(&priv->lock); + + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); } - spin_unlock(&priv->lock); - spin_unlock_irq(&priv->tx_lock); + + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); } void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb, @@ -1373,8 +1412,8 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb, struct ipoib_dev_priv *priv = netdev_priv(dev); int e = skb_queue_empty(&priv->cm.skb_queue); - if (skb->dst) - skb->dst->ops->update_pmtu(skb->dst, mtu); + if (skb_dst(skb)) + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); skb_queue_tail(&priv->cm.skb_queue, skb); if (e) @@ -1432,40 +1471,19 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, const char *buf, size_t count) { struct net_device *dev = to_net_dev(d); - struct ipoib_dev_priv *priv = netdev_priv(dev); - - /* flush paths if we switch modes so that connections are restarted */ - if (IPOIB_CM_SUPPORTED(dev->dev_addr) && !strcmp(buf, "connected\n")) { - set_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); - ipoib_warn(priv, "enabling connected mode " - "will cause multicast packet drops\n"); + int ret; - rtnl_lock(); - dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO); - rtnl_unlock(); - priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM; + if (!rtnl_trylock()) + return restart_syscall(); - ipoib_flush_paths(dev); - return count; - } + ret = ipoib_set_mode(dev, buf); - if (!strcmp(buf, "datagram\n")) { - clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); - - rtnl_lock(); - if (test_bit(IPOIB_FLAG_CSUM, &priv->flags)) { - dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG; - if (priv->hca_caps & IB_DEVICE_UD_TSO) - dev->features |= NETIF_F_TSO; - } - dev_set_mtu(dev, min(priv->mcast_mtu, dev->mtu)); - rtnl_unlock(); - ipoib_flush_paths(dev); + rtnl_unlock(); + if (!ret) return count; - } - return -EINVAL; + return ret; } static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO, show_mode, set_mode); @@ -1479,6 +1497,7 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_srq_init_attr srq_init_attr = { + .srq_type = IB_SRQT_BASIC, .attr = { .max_wr = ipoib_recvq_size, .max_sge = max_sge @@ -1494,14 +1513,15 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge) return; } - priv->cm.srq_ring = kzalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring, - GFP_KERNEL); + priv->cm.srq_ring = vzalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring); if (!priv->cm.srq_ring) { printk(KERN_WARNING "%s: failed to allocate CM SRQ ring (%d entries)\n", priv->ca->name, ipoib_recvq_size); ib_destroy_srq(priv->cm.srq); priv->cm.srq = NULL; + return; } + } int ipoib_cm_dev_init(struct net_device *dev) @@ -1551,7 +1571,8 @@ int ipoib_cm_dev_init(struct net_device *dev) for (i = 0; i < ipoib_recvq_size; ++i) { if (!ipoib_cm_alloc_rx_skb(dev, priv->cm.srq_ring, i, priv->cm.num_frags - 1, - priv->cm.srq_ring[i].mapping)) { + priv->cm.srq_ring[i].mapping, + GFP_KERNEL)) { ipoib_warn(priv, "failed to allocate " "receive buffer %d\n", i); ipoib_cm_dev_cleanup(dev); |
