diff options
Diffstat (limited to 'drivers/infiniband/ulp/ipoib/ipoib_multicast.c')
| -rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 253 |
1 files changed, 146 insertions, 107 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 8950e9546f4..d4e005720d0 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -34,12 +34,14 @@ #include <linux/skbuff.h> #include <linux/rtnetlink.h> +#include <linux/moduleparam.h> #include <linux/ip.h> #include <linux/in.h> #include <linux/igmp.h> #include <linux/inetdevice.h> #include <linux/delay.h> #include <linux/completion.h> +#include <linux/slab.h> #include <net/dst.h> @@ -67,30 +69,13 @@ struct ipoib_mcast_iter { static void ipoib_mcast_free(struct ipoib_mcast *mcast) { struct net_device *dev = mcast->dev; - struct ipoib_dev_priv *priv = netdev_priv(dev); - struct ipoib_neigh *neigh, *tmp; - unsigned long flags; int tx_dropped = 0; - ipoib_dbg_mcast(netdev_priv(dev), - "deleting multicast group " IPOIB_GID_FMT "\n", - IPOIB_GID_ARG(mcast->mcmember.mgid)); - - spin_lock_irqsave(&priv->lock, flags); + ipoib_dbg_mcast(netdev_priv(dev), "deleting multicast group %pI6\n", + mcast->mcmember.mgid.raw); - list_for_each_entry_safe(neigh, tmp, &mcast->neigh_list, list) { - /* - * It's safe to call ipoib_put_ah() inside priv->lock - * here, because we know that mcast->ah will always - * hold one more reference, so ipoib_put_ah() will - * never do more than decrement the ref count. - */ - if (neigh->ah) - ipoib_put_ah(neigh->ah); - ipoib_neigh_free(dev, neigh); - } - - spin_unlock_irqrestore(&priv->lock, flags); + /* remove all neigh connected to this mcast */ + ipoib_del_neighs_by_gid(dev, mcast->mcmember.mgid.raw); if (mcast->ah) ipoib_put_ah(mcast->ah); @@ -100,9 +85,9 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast) dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); } - spin_lock_irqsave(&priv->tx_lock, flags); + netif_tx_lock_bh(dev); dev->stats.tx_dropped += tx_dropped; - spin_unlock_irqrestore(&priv->tx_lock, flags); + netif_tx_unlock_bh(dev); kfree(mcast); } @@ -190,7 +175,9 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, mcast->mcmember = *mcmember; - /* Set the cached Q_Key before we attach if it's the broadcast group */ + /* Set the multicast MTU and cached Q_Key before we attach if it's + * the broadcast group. + */ if (!memcmp(mcast->mcmember.mgid.raw, priv->dev->broadcast + 4, sizeof (union ib_gid))) { spin_lock_irq(&priv->lock); @@ -198,17 +185,23 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, spin_unlock_irq(&priv->lock); return -EAGAIN; } + priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu)); priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey); spin_unlock_irq(&priv->lock); priv->tx_wr.wr.ud.remote_qkey = priv->qkey; set_qkey = 1; + + if (!ipoib_cm_admin_enabled(dev)) { + rtnl_lock(); + dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu)); + rtnl_unlock(); + } } if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { if (test_and_set_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { - ipoib_warn(priv, "multicast group " IPOIB_GID_FMT - " already attached\n", - IPOIB_GID_ARG(mcast->mcmember.mgid)); + ipoib_warn(priv, "multicast group %pI6 already attached\n", + mcast->mcmember.mgid.raw); return 0; } @@ -216,9 +209,8 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid), &mcast->mcmember.mgid, set_qkey); if (ret < 0) { - ipoib_warn(priv, "couldn't attach QP to multicast group " - IPOIB_GID_FMT "\n", - IPOIB_GID_ARG(mcast->mcmember.mgid)); + ipoib_warn(priv, "couldn't attach QP to multicast group %pI6\n", + mcast->mcmember.mgid.raw); clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags); return ret; @@ -242,16 +234,18 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, av.grh.dgid = mcast->mcmember.mgid; ah = ipoib_create_ah(dev, priv->pd, &av); - if (!ah) { - ipoib_warn(priv, "ib_address_create failed\n"); + if (IS_ERR(ah)) { + ipoib_warn(priv, "ib_address_create failed %ld\n", + -PTR_ERR(ah)); + /* use original error */ + return PTR_ERR(ah); } else { spin_lock_irq(&priv->lock); mcast->ah = ah; spin_unlock_irq(&priv->lock); - ipoib_dbg_mcast(priv, "MGID " IPOIB_GID_FMT - " AV %p, LID 0x%04x, SL %d\n", - IPOIB_GID_ARG(mcast->mcmember.mgid), + ipoib_dbg_mcast(priv, "MGID %pI6 AV %p, LID 0x%04x, SL %d\n", + mcast->mcmember.mgid.raw, mcast->ah->ah, be16_to_cpu(mcast->mcmember.mlid), mcast->mcmember.sl); @@ -259,23 +253,19 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, } /* actually send any queued packets */ - spin_lock_irq(&priv->tx_lock); + netif_tx_lock_bh(dev); while (!skb_queue_empty(&mcast->pkt_queue)) { struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue); - spin_unlock_irq(&priv->tx_lock); - skb->dev = dev; - - if (!skb->dst || !skb->dst->neighbour) { - /* put pseudoheader back on for next time */ - skb_push(skb, sizeof (struct ipoib_pseudoheader)); - } + netif_tx_unlock_bh(dev); + skb->dev = dev; if (dev_queue_xmit(skb)) ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n"); - spin_lock_irq(&priv->tx_lock); + + netif_tx_lock_bh(dev); } - spin_unlock_irq(&priv->tx_lock); + netif_tx_unlock_bh(dev); return 0; } @@ -286,7 +276,6 @@ ipoib_mcast_sendonly_join_complete(int status, { struct ipoib_mcast *mcast = multicast->context; struct net_device *dev = mcast->dev; - struct ipoib_dev_priv *priv = netdev_priv(dev); /* We trap for port events ourselves. */ if (status == -ENETRESET) @@ -297,17 +286,16 @@ ipoib_mcast_sendonly_join_complete(int status, if (status) { if (mcast->logcount++ < 20) - ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for " - IPOIB_GID_FMT ", status %d\n", - IPOIB_GID_ARG(mcast->mcmember.mgid), status); + ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for %pI6, status %d\n", + mcast->mcmember.mgid.raw, status); /* Flush out any queued packets */ - spin_lock_irq(&priv->tx_lock); + netif_tx_lock_bh(dev); while (!skb_queue_empty(&mcast->pkt_queue)) { ++dev->stats.tx_dropped; dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); } - spin_unlock_irq(&priv->tx_lock); + netif_tx_unlock_bh(dev); /* Clear the busy flag so we try again */ status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, @@ -358,14 +346,35 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n", ret); } else { - ipoib_dbg_mcast(priv, "no multicast record for " IPOIB_GID_FMT - ", starting join\n", - IPOIB_GID_ARG(mcast->mcmember.mgid)); + ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting join\n", + mcast->mcmember.mgid.raw); } return ret; } +void ipoib_mcast_carrier_on_task(struct work_struct *work) +{ + struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, + carrier_on_task); + struct ib_port_attr attr; + + /* + * Take rtnl_lock to avoid racing with ipoib_stop() and + * turning the carrier back on while a device is being + * removed. + */ + if (ib_query_port(priv->ca, priv->port, &attr) || + attr.state != IB_PORT_ACTIVE) { + ipoib_dbg(priv, "Keeping carrier off until IB port is active\n"); + return; + } + + rtnl_lock(); + netif_carrier_on(priv->dev); + rtnl_unlock(); +} + static int ipoib_mcast_join_complete(int status, struct ib_sa_multicast *multicast) { @@ -373,13 +382,14 @@ static int ipoib_mcast_join_complete(int status, struct net_device *dev = mcast->dev; struct ipoib_dev_priv *priv = netdev_priv(dev); - ipoib_dbg_mcast(priv, "join completion for " IPOIB_GID_FMT - " (status %d)\n", - IPOIB_GID_ARG(mcast->mcmember.mgid), status); + ipoib_dbg_mcast(priv, "join completion for %pI6 (status %d)\n", + mcast->mcmember.mgid.raw, status); /* We trap for port events ourselves. */ - if (status == -ENETRESET) - return 0; + if (status == -ENETRESET) { + status = 0; + goto out; + } if (!status) status = ipoib_mcast_join_finish(mcast, &multicast->rec); @@ -392,23 +402,24 @@ static int ipoib_mcast_join_complete(int status, &priv->mcast_task, 0); mutex_unlock(&mcast_mutex); + /* + * Defer carrier on work to ipoib_workqueue to avoid a + * deadlock on rtnl_lock here. + */ if (mcast == priv->broadcast) - netif_carrier_on(dev); + queue_work(ipoib_workqueue, &priv->carrier_on_task); - return 0; + status = 0; + goto out; } if (mcast->logcount++ < 20) { - if (status == -ETIMEDOUT) { - ipoib_dbg_mcast(priv, "multicast join failed for " IPOIB_GID_FMT - ", status %d\n", - IPOIB_GID_ARG(mcast->mcmember.mgid), - status); + if (status == -ETIMEDOUT || status == -EAGAIN) { + ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n", + mcast->mcmember.mgid.raw, status); } else { - ipoib_warn(priv, "multicast join failed for " - IPOIB_GID_FMT ", status %d\n", - IPOIB_GID_ARG(mcast->mcmember.mgid), - status); + ipoib_warn(priv, "multicast join failed for %pI6, status %d\n", + mcast->mcmember.mgid.raw, status); } } @@ -426,7 +437,8 @@ static int ipoib_mcast_join_complete(int status, mcast->backoff * HZ); spin_unlock_irq(&priv->lock); mutex_unlock(&mcast_mutex); - +out: + complete(&mcast->done); return status; } @@ -440,8 +452,7 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, ib_sa_comp_mask comp_mask; int ret = 0; - ipoib_dbg_mcast(priv, "joining MGID " IPOIB_GID_FMT "\n", - IPOIB_GID_ARG(mcast->mcmember.mgid)); + ipoib_dbg_mcast(priv, "joining MGID %pI6\n", mcast->mcmember.mgid.raw); rec.mgid = mcast->mcmember.mgid; rec.port_gid = priv->local_gid; @@ -477,11 +488,15 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, } set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); + init_completion(&mcast->done); + set_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags); + mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port, &rec, comp_mask, GFP_KERNEL, ipoib_mcast_join_complete, mcast); if (IS_ERR(mcast->mc)) { clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); + complete(&mcast->done); ret = PTR_ERR(mcast->mc); ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret); @@ -503,10 +518,18 @@ void ipoib_mcast_join_task(struct work_struct *work) struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, mcast_task.work); struct net_device *dev = priv->dev; + struct ib_port_attr port_attr; if (!test_bit(IPOIB_MCAST_RUN, &priv->flags)) return; + if (ib_query_port(priv->ca, priv->port, &port_attr) || + port_attr.state != IB_PORT_ACTIVE) { + ipoib_dbg(priv, "port state is not ACTIVE (state = %d) suspending join task\n", + port_attr.state); + return; + } + if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid)) ipoib_warn(priv, "ib_query_gid() failed\n"); else @@ -524,6 +547,9 @@ void ipoib_mcast_join_task(struct work_struct *work) if (!priv->broadcast) { struct ipoib_mcast *broadcast; + if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) + return; + broadcast = ipoib_mcast_alloc(dev, 1); if (!broadcast) { ipoib_warn(priv, "failed to allocate broadcast group\n"); @@ -573,14 +599,6 @@ void ipoib_mcast_join_task(struct work_struct *work) return; } - priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu)); - - if (!ipoib_cm_admin_enabled(dev)) { - rtnl_lock(); - dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu)); - rtnl_unlock(); - } - ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n"); clear_bit(IPOIB_MCAST_RUN, &priv->flags); @@ -626,8 +644,8 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) ib_sa_free_multicast(mcast->mc); if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { - ipoib_dbg_mcast(priv, "leaving MGID " IPOIB_GID_FMT "\n", - IPOIB_GID_ARG(mcast->mcmember.mgid)); + ipoib_dbg_mcast(priv, "leaving MGID %pI6\n", + mcast->mcmember.mgid.raw); /* Remove ourselves from the multicast group */ ret = ib_detach_mcast(priv->qp, &mcast->mcmember.mgid, @@ -639,16 +657,14 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) return 0; } -void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb) +void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_mcast *mcast; + unsigned long flags; + void *mgid = daddr + 4; - /* - * We can only be called from ipoib_start_xmit, so we're - * inside tx_lock -- no need to save/restore flags. - */ - spin_lock(&priv->lock); + spin_lock_irqsave(&priv->lock, flags); if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags) || !priv->broadcast || @@ -661,8 +677,8 @@ void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb) mcast = __ipoib_mcast_find(dev, mgid); if (!mcast) { /* Let's create a new send only group now */ - ipoib_dbg_mcast(priv, "setting up send only multicast group for " - IPOIB_GID_FMT "\n", IPOIB_GID_RAW_ARG(mgid)); + ipoib_dbg_mcast(priv, "setting up send only multicast group for %pI6\n", + mgid); mcast = ipoib_mcast_alloc(dev, 0); if (!mcast) { @@ -702,24 +718,28 @@ void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb) out: if (mcast && mcast->ah) { - if (skb->dst && - skb->dst->neighbour && - !*to_ipoib_neigh(skb->dst->neighbour)) { - struct ipoib_neigh *neigh = ipoib_neigh_alloc(skb->dst->neighbour, - skb->dev); + struct ipoib_neigh *neigh; + spin_unlock_irqrestore(&priv->lock, flags); + neigh = ipoib_neigh_get(dev, daddr); + spin_lock_irqsave(&priv->lock, flags); + if (!neigh) { + neigh = ipoib_neigh_alloc(daddr, dev); if (neigh) { kref_get(&mcast->ah->ref); neigh->ah = mcast->ah; list_add_tail(&neigh->list, &mcast->neigh_list); } } - + spin_unlock_irqrestore(&priv->lock, flags); ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN); + if (neigh) + ipoib_neigh_put(neigh); + return; } unlock: - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); } void ipoib_mcast_dev_flush(struct net_device *dev) @@ -747,18 +767,34 @@ void ipoib_mcast_dev_flush(struct net_device *dev) spin_unlock_irqrestore(&priv->lock, flags); + /* seperate between the wait to the leave*/ + list_for_each_entry_safe(mcast, tmcast, &remove_list, list) + if (test_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags)) + wait_for_completion(&mcast->done); + list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { ipoib_mcast_leave(dev, mcast); ipoib_mcast_free(mcast); } } +static int ipoib_mcast_addr_is_valid(const u8 *addr, const u8 *broadcast) +{ + /* reserved QPN, prefix, scope */ + if (memcmp(addr, broadcast, 6)) + return 0; + /* signature lower, pkey */ + if (memcmp(addr + 7, broadcast + 7, 3)) + return 0; + return 1; +} + void ipoib_mcast_restart_task(struct work_struct *work) { struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, restart_task); struct net_device *dev = priv->dev; - struct dev_mc_list *mclist; + struct netdev_hw_addr *ha; struct ipoib_mcast *mcast, *tmcast; LIST_HEAD(remove_list); unsigned long flags; @@ -783,10 +819,13 @@ void ipoib_mcast_restart_task(struct work_struct *work) clear_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags); /* Mark all of the entries that are found or don't exist */ - for (mclist = dev->mc_list; mclist; mclist = mclist->next) { + netdev_for_each_mc_addr(ha, dev) { union ib_gid mgid; - memcpy(mgid.raw, mclist->dmi_addr + 4, sizeof mgid); + if (!ipoib_mcast_addr_is_valid(ha->addr, dev->broadcast)) + continue; + + memcpy(mgid.raw, ha->addr + 4, sizeof mgid); mcast = __ipoib_mcast_find(dev, &mgid); if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { @@ -795,14 +834,14 @@ void ipoib_mcast_restart_task(struct work_struct *work) /* ignore group which is directly joined by userspace */ if (test_bit(IPOIB_FLAG_UMCAST, &priv->flags) && !ib_sa_get_mcmember_rec(priv->ca, priv->port, &mgid, &rec)) { - ipoib_dbg_mcast(priv, "ignoring multicast entry for mgid " - IPOIB_GID_FMT "\n", IPOIB_GID_ARG(mgid)); + ipoib_dbg_mcast(priv, "ignoring multicast entry for mgid %pI6\n", + mgid.raw); continue; } /* Not found or send-only group, let's add a new entry */ - ipoib_dbg_mcast(priv, "adding multicast entry for mgid " - IPOIB_GID_FMT "\n", IPOIB_GID_ARG(mgid)); + ipoib_dbg_mcast(priv, "adding multicast entry for mgid %pI6\n", + mgid.raw); nmcast = ipoib_mcast_alloc(dev, 0); if (!nmcast) { @@ -835,8 +874,8 @@ void ipoib_mcast_restart_task(struct work_struct *work) list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) { if (!test_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags) && !test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { - ipoib_dbg_mcast(priv, "deleting multicast group " IPOIB_GID_FMT "\n", - IPOIB_GID_ARG(mcast->mcmember.mgid)); + ipoib_dbg_mcast(priv, "deleting multicast group %pI6\n", + mcast->mcmember.mgid.raw); rb_erase(&mcast->rb_node, &priv->multicast_tree); |
