aboutsummaryrefslogtreecommitdiff
path: root/drivers/infiniband/ulp/ipoib/ipoib_main.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/ulp/ipoib/ipoib_main.c')
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c861
1 files changed, 592 insertions, 269 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 9ff7bc73ed9..5786a78ff8b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -46,11 +46,17 @@
#include <linux/ip.h>
#include <linux/in.h>
-#include <net/dst.h>
+#include <linux/jhash.h>
+#include <net/arp.h>
+
+#define DRV_VERSION "1.0.0"
+
+const char ipoib_driver_version[] = DRV_VERSION;
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("IP-over-InfiniBand net driver");
MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRV_VERSION);
int ipoib_sendq_size __read_mostly = IPOIB_TX_RING_SIZE;
int ipoib_recvq_size __read_mostly = IPOIB_RX_RING_SIZE;
@@ -60,15 +66,6 @@ MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue");
module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444);
MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue");
-static int lro;
-module_param(lro, bool, 0444);
-MODULE_PARM_DESC(lro, "Enable LRO (Large Receive Offload)");
-
-static int lro_max_aggr = IPOIB_LRO_MAX_AGGR;
-module_param(lro_max_aggr, int, 0644);
-MODULE_PARM_DESC(lro_max_aggr, "LRO: Max packets to be aggregated "
- "(default = 64)");
-
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
int ipoib_debug_level;
@@ -93,6 +90,7 @@ struct ib_sa_client ipoib_sa_client;
static void ipoib_add_one(struct ib_device *device);
static void ipoib_remove_one(struct ib_device *device);
+static void ipoib_neigh_reclaim(struct rcu_head *rp);
static struct ib_client ipoib_client = {
.name = "ipoib",
@@ -106,6 +104,8 @@ int ipoib_open(struct net_device *dev)
ipoib_dbg(priv, "bringing up interface\n");
+ netif_carrier_off(dev);
+
set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
if (ipoib_pkey_dev_delay_open(dev))
@@ -121,7 +121,7 @@ int ipoib_open(struct net_device *dev)
struct ipoib_dev_priv *cpriv;
/* Bring up any child interfaces too */
- mutex_lock(&priv->vlan_mutex);
+ down_read(&priv->vlan_rwsem);
list_for_each_entry(cpriv, &priv->child_intfs, list) {
int flags;
@@ -131,7 +131,7 @@ int ipoib_open(struct net_device *dev)
dev_change_flags(cpriv->dev, flags | IFF_UP);
}
- mutex_unlock(&priv->vlan_mutex);
+ up_read(&priv->vlan_rwsem);
}
netif_start_queue(dev);
@@ -157,14 +157,14 @@ static int ipoib_stop(struct net_device *dev)
netif_stop_queue(dev);
- ipoib_ib_dev_down(dev, 0);
+ ipoib_ib_dev_down(dev, 1);
ipoib_ib_dev_stop(dev, 0);
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
struct ipoib_dev_priv *cpriv;
/* Bring down any child interfaces too */
- mutex_lock(&priv->vlan_mutex);
+ down_read(&priv->vlan_rwsem);
list_for_each_entry(cpriv, &priv->child_intfs, list) {
int flags;
@@ -174,12 +174,27 @@ static int ipoib_stop(struct net_device *dev)
dev_change_flags(cpriv->dev, flags & ~IFF_UP);
}
- mutex_unlock(&priv->vlan_mutex);
+ up_read(&priv->vlan_rwsem);
}
return 0;
}
+static void ipoib_uninit(struct net_device *dev)
+{
+ ipoib_dev_cleanup(dev);
+}
+
+static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_features_t features)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+ if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags))
+ features &= ~(NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO);
+
+ return features;
+}
+
static int ipoib_change_mtu(struct net_device *dev, int new_mtu)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -207,6 +222,37 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu)
return 0;
}
+int ipoib_set_mode(struct net_device *dev, const char *buf)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+ /* flush paths if we switch modes so that connections are restarted */
+ if (IPOIB_CM_SUPPORTED(dev->dev_addr) && !strcmp(buf, "connected\n")) {
+ set_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
+ ipoib_warn(priv, "enabling connected mode "
+ "will cause multicast packet drops\n");
+ netdev_update_features(dev);
+ rtnl_unlock();
+ priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;
+
+ ipoib_flush_paths(dev);
+ rtnl_lock();
+ return 0;
+ }
+
+ if (!strcmp(buf, "datagram\n")) {
+ clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
+ netdev_update_features(dev);
+ dev_set_mtu(dev, min(priv->mcast_mtu, dev->mtu));
+ rtnl_unlock();
+ ipoib_flush_paths(dev);
+ rtnl_lock();
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
static struct ipoib_path *__path_find(struct net_device *dev, void *gid)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -263,30 +309,15 @@ static int __path_add(struct net_device *dev, struct ipoib_path *path)
static void path_free(struct net_device *dev, struct ipoib_path *path)
{
- struct ipoib_dev_priv *priv = netdev_priv(dev);
- struct ipoib_neigh *neigh, *tn;
struct sk_buff *skb;
- unsigned long flags;
while ((skb = __skb_dequeue(&path->queue)))
dev_kfree_skb_irq(skb);
- spin_lock_irqsave(&priv->lock, flags);
-
- list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) {
- /*
- * It's safe to call ipoib_put_ah() inside priv->lock
- * here, because we know that path->ah will always
- * hold one more reference, so ipoib_put_ah() will
- * never do more than decrement the ref count.
- */
- if (neigh->ah)
- ipoib_put_ah(neigh->ah);
-
- ipoib_neigh_free(dev, neigh);
- }
+ ipoib_dbg(netdev_priv(dev), "path_free\n");
- spin_unlock_irqrestore(&priv->lock, flags);
+ /* remove all neigh connected to this path */
+ ipoib_del_neighs_by_gid(dev, path->pathrec.dgid.raw);
if (path->ah)
ipoib_put_ah(path->ah);
@@ -431,7 +462,7 @@ static void path_rec_completion(int status,
spin_lock_irqsave(&priv->lock, flags);
- if (ah) {
+ if (!IS_ERR_OR_NULL(ah)) {
path->pathrec = *pathrec;
old_ah = path->ah;
@@ -457,19 +488,14 @@ static void path_rec_completion(int status,
}
kref_get(&path->ah->ref);
neigh->ah = path->ah;
- memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw,
- sizeof(union ib_gid));
- if (ipoib_cm_enabled(dev, neigh->neighbour)) {
+ if (ipoib_cm_enabled(dev, neigh->daddr)) {
if (!ipoib_cm_get(neigh))
ipoib_cm_set(neigh, ipoib_cm_create_tx(dev,
path,
neigh));
if (!ipoib_cm_get(neigh)) {
- list_del(&neigh->list);
- if (neigh->ah)
- ipoib_put_ah(neigh->ah);
- ipoib_neigh_free(dev, neigh);
+ ipoib_neigh_free(neigh);
continue;
}
}
@@ -485,6 +511,9 @@ static void path_rec_completion(int status,
spin_unlock_irqrestore(&priv->lock, flags);
+ if (IS_ERR_OR_NULL(ah))
+ ipoib_del_neighs_by_gid(dev, path->pathrec.dgid.raw);
+
if (old_ah)
ipoib_put_ah(old_ah);
@@ -554,25 +583,26 @@ static int path_rec_start(struct net_device *dev,
return 0;
}
-static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
+static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
+ struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_path *path;
struct ipoib_neigh *neigh;
unsigned long flags;
- neigh = ipoib_neigh_alloc(skb_dst(skb)->neighbour, skb->dev);
+ spin_lock_irqsave(&priv->lock, flags);
+ neigh = ipoib_neigh_alloc(daddr, dev);
if (!neigh) {
+ spin_unlock_irqrestore(&priv->lock, flags);
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb);
return;
}
- spin_lock_irqsave(&priv->lock, flags);
-
- path = __path_find(dev, skb_dst(skb)->neighbour->ha + 4);
+ path = __path_find(dev, daddr + 4);
if (!path) {
- path = path_rec_create(dev, skb_dst(skb)->neighbour->ha + 4);
+ path = path_rec_create(dev, daddr + 4);
if (!path)
goto err_path;
@@ -584,17 +614,12 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
if (path->ah) {
kref_get(&path->ah->ref);
neigh->ah = path->ah;
- memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw,
- sizeof(union ib_gid));
- if (ipoib_cm_enabled(dev, neigh->neighbour)) {
+ if (ipoib_cm_enabled(dev, neigh->daddr)) {
if (!ipoib_cm_get(neigh))
ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, path, neigh));
if (!ipoib_cm_get(neigh)) {
- list_del(&neigh->list);
- if (neigh->ah)
- ipoib_put_ah(neigh->ah);
- ipoib_neigh_free(dev, neigh);
+ ipoib_neigh_free(neigh);
goto err_drop;
}
if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE)
@@ -606,51 +631,35 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
}
} else {
spin_unlock_irqrestore(&priv->lock, flags);
- ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb_dst(skb)->neighbour->ha));
+ ipoib_send(dev, skb, path->ah, IPOIB_QPN(daddr));
+ ipoib_neigh_put(neigh);
return;
}
} else {
neigh->ah = NULL;
if (!path->query && path_rec_start(dev, path))
- goto err_list;
+ goto err_path;
__skb_queue_tail(&neigh->queue, skb);
}
spin_unlock_irqrestore(&priv->lock, flags);
+ ipoib_neigh_put(neigh);
return;
-err_list:
- list_del(&neigh->list);
-
err_path:
- ipoib_neigh_free(dev, neigh);
+ ipoib_neigh_free(neigh);
err_drop:
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb);
spin_unlock_irqrestore(&priv->lock, flags);
-}
-
-static void ipoib_path_lookup(struct sk_buff *skb, struct net_device *dev)
-{
- struct ipoib_dev_priv *priv = netdev_priv(skb->dev);
-
- /* Look up path record for unicasts */
- if (skb_dst(skb)->neighbour->ha[4] != 0xff) {
- neigh_add_path(skb, dev);
- return;
- }
-
- /* Add in the P_Key for multicasts */
- skb_dst(skb)->neighbour->ha[8] = (priv->pkey >> 8) & 0xff;
- skb_dst(skb)->neighbour->ha[9] = priv->pkey & 0xff;
- ipoib_mcast_send(dev, skb_dst(skb)->neighbour->ha + 4, skb);
+ ipoib_neigh_put(neigh);
}
static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
- struct ipoib_pseudoheader *phdr)
+ struct ipoib_cb *cb)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_path *path;
@@ -658,17 +667,15 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
spin_lock_irqsave(&priv->lock, flags);
- path = __path_find(dev, phdr->hwaddr + 4);
+ path = __path_find(dev, cb->hwaddr + 4);
if (!path || !path->valid) {
int new_path = 0;
if (!path) {
- path = path_rec_create(dev, phdr->hwaddr + 4);
+ path = path_rec_create(dev, cb->hwaddr + 4);
new_path = 1;
}
if (path) {
- /* put pseudoheader back on for next time */
- skb_push(skb, sizeof *phdr);
__skb_queue_tail(&path->queue, skb);
if (!path->query && path_rec_start(dev, path)) {
@@ -692,12 +699,10 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
be16_to_cpu(path->pathrec.dlid));
spin_unlock_irqrestore(&priv->lock, flags);
- ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr));
+ ipoib_send(dev, skb, path->ah, IPOIB_QPN(cb->hwaddr));
return;
} else if ((path->query || !path_rec_start(dev, path)) &&
skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
- /* put pseudoheader back on for next time */
- skb_push(skb, sizeof *phdr);
__skb_queue_tail(&path->queue, skb);
} else {
++dev->stats.tx_dropped;
@@ -711,85 +716,82 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_neigh *neigh;
+ struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb;
+ struct ipoib_header *header;
unsigned long flags;
- if (likely(skb_dst(skb) && skb_dst(skb)->neighbour)) {
- if (unlikely(!*to_ipoib_neigh(skb_dst(skb)->neighbour))) {
- ipoib_path_lookup(skb, dev);
- return NETDEV_TX_OK;
- }
+ header = (struct ipoib_header *) skb->data;
- neigh = *to_ipoib_neigh(skb_dst(skb)->neighbour);
-
- if (unlikely((memcmp(&neigh->dgid.raw,
- skb_dst(skb)->neighbour->ha + 4,
- sizeof(union ib_gid))) ||
- (neigh->dev != dev))) {
- spin_lock_irqsave(&priv->lock, flags);
- /*
- * It's safe to call ipoib_put_ah() inside
- * priv->lock here, because we know that
- * path->ah will always hold one more reference,
- * so ipoib_put_ah() will never do more than
- * decrement the ref count.
- */
- if (neigh->ah)
- ipoib_put_ah(neigh->ah);
- list_del(&neigh->list);
- ipoib_neigh_free(dev, neigh);
- spin_unlock_irqrestore(&priv->lock, flags);
- ipoib_path_lookup(skb, dev);
+ if (unlikely(cb->hwaddr[4] == 0xff)) {
+ /* multicast, arrange "if" according to probability */
+ if ((header->proto != htons(ETH_P_IP)) &&
+ (header->proto != htons(ETH_P_IPV6)) &&
+ (header->proto != htons(ETH_P_ARP)) &&
+ (header->proto != htons(ETH_P_RARP)) &&
+ (header->proto != htons(ETH_P_TIPC))) {
+ /* ethertype not supported by IPoIB */
+ ++dev->stats.tx_dropped;
+ dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
}
+ /* Add in the P_Key for multicast*/
+ cb->hwaddr[8] = (priv->pkey >> 8) & 0xff;
+ cb->hwaddr[9] = priv->pkey & 0xff;
+
+ neigh = ipoib_neigh_get(dev, cb->hwaddr);
+ if (likely(neigh))
+ goto send_using_neigh;
+ ipoib_mcast_send(dev, cb->hwaddr, skb);
+ return NETDEV_TX_OK;
+ }
- if (ipoib_cm_get(neigh)) {
- if (ipoib_cm_up(neigh)) {
- ipoib_cm_send(dev, skb, ipoib_cm_get(neigh));
- return NETDEV_TX_OK;
- }
- } else if (neigh->ah) {
- ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(skb_dst(skb)->neighbour->ha));
+ /* unicast, arrange "switch" according to probability */
+ switch (header->proto) {
+ case htons(ETH_P_IP):
+ case htons(ETH_P_IPV6):
+ case htons(ETH_P_TIPC):
+ neigh = ipoib_neigh_get(dev, cb->hwaddr);
+ if (unlikely(!neigh)) {
+ neigh_add_path(skb, cb->hwaddr, dev);
return NETDEV_TX_OK;
}
+ break;
+ case htons(ETH_P_ARP):
+ case htons(ETH_P_RARP):
+ /* for unicast ARP and RARP should always perform path find */
+ unicast_arp_send(skb, dev, cb);
+ return NETDEV_TX_OK;
+ default:
+ /* ethertype not supported by IPoIB */
+ ++dev->stats.tx_dropped;
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+ }
- if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
- spin_lock_irqsave(&priv->lock, flags);
- __skb_queue_tail(&neigh->queue, skb);
- spin_unlock_irqrestore(&priv->lock, flags);
- } else {
- ++dev->stats.tx_dropped;
- dev_kfree_skb_any(skb);
+send_using_neigh:
+ /* note we now hold a ref to neigh */
+ if (ipoib_cm_get(neigh)) {
+ if (ipoib_cm_up(neigh)) {
+ ipoib_cm_send(dev, skb, ipoib_cm_get(neigh));
+ goto unref;
}
- } else {
- struct ipoib_pseudoheader *phdr =
- (struct ipoib_pseudoheader *) skb->data;
- skb_pull(skb, sizeof *phdr);
-
- if (phdr->hwaddr[4] == 0xff) {
- /* Add in the P_Key for multicast*/
- phdr->hwaddr[8] = (priv->pkey >> 8) & 0xff;
- phdr->hwaddr[9] = priv->pkey & 0xff;
-
- ipoib_mcast_send(dev, phdr->hwaddr + 4, skb);
- } else {
- /* unicast GID -- should be ARP or RARP reply */
-
- if ((be16_to_cpup((__be16 *) skb->data) != ETH_P_ARP) &&
- (be16_to_cpup((__be16 *) skb->data) != ETH_P_RARP)) {
- ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x %pI6\n",
- skb_dst(skb) ? "neigh" : "dst",
- be16_to_cpup((__be16 *) skb->data),
- IPOIB_QPN(phdr->hwaddr),
- phdr->hwaddr + 4);
- dev_kfree_skb_any(skb);
- ++dev->stats.tx_dropped;
- return NETDEV_TX_OK;
- }
+ } else if (neigh->ah) {
+ ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(cb->hwaddr));
+ goto unref;
+ }
- unicast_arp_send(skb, dev, phdr);
- }
+ if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
+ spin_lock_irqsave(&priv->lock, flags);
+ __skb_queue_tail(&neigh->queue, skb);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ } else {
+ ++dev->stats.tx_dropped;
+ dev_kfree_skb_any(skb);
}
+unref:
+ ipoib_neigh_put(neigh);
+
return NETDEV_TX_OK;
}
@@ -811,6 +813,7 @@ static int ipoib_hard_header(struct sk_buff *skb,
const void *daddr, const void *saddr, unsigned len)
{
struct ipoib_header *header;
+ struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb;
header = (struct ipoib_header *) skb_push(skb, sizeof *header);
@@ -818,17 +821,13 @@ static int ipoib_hard_header(struct sk_buff *skb,
header->reserved = 0;
/*
- * If we don't have a neighbour structure, stuff the
- * destination address onto the front of the skb so we can
- * figure out where to send the packet later.
+ * we don't rely on dst_entry structure, always stuff the
+ * destination address into skb->cb so we can figure out where
+ * to send the packet later.
*/
- if ((!skb_dst(skb) || !skb_dst(skb)->neighbour) && daddr) {
- struct ipoib_pseudoheader *phdr =
- (struct ipoib_pseudoheader *) skb_push(skb, sizeof *phdr);
- memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN);
- }
+ memcpy(cb->hwaddr, daddr, INFINIBAND_ALEN);
- return 0;
+ return sizeof *header;
}
static void ipoib_set_mcast_list(struct net_device *dev)
@@ -843,95 +842,443 @@ static void ipoib_set_mcast_list(struct net_device *dev)
queue_work(ipoib_workqueue, &priv->restart_task);
}
-static void ipoib_neigh_cleanup(struct neighbour *n)
+static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr)
{
- struct ipoib_neigh *neigh;
- struct ipoib_dev_priv *priv = netdev_priv(n->dev);
+ /*
+ * Use only the address parts that contributes to spreading
+ * The subnet prefix is not used as one can not connect to
+ * same remote port (GUID) using the same remote QPN via two
+ * different subnets.
+ */
+ /* qpn octets[1:4) & port GUID octets[12:20) */
+ u32 *d32 = (u32 *) daddr;
+ u32 hv;
+
+ hv = jhash_3words(d32[3], d32[4], IPOIB_QPN_MASK & d32[0], 0);
+ return hv & htbl->mask;
+}
+
+struct ipoib_neigh *ipoib_neigh_get(struct net_device *dev, u8 *daddr)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_neigh_table *ntbl = &priv->ntbl;
+ struct ipoib_neigh_hash *htbl;
+ struct ipoib_neigh *neigh = NULL;
+ u32 hash_val;
+
+ rcu_read_lock_bh();
+
+ htbl = rcu_dereference_bh(ntbl->htbl);
+
+ if (!htbl)
+ goto out_unlock;
+
+ hash_val = ipoib_addr_hash(htbl, daddr);
+ for (neigh = rcu_dereference_bh(htbl->buckets[hash_val]);
+ neigh != NULL;
+ neigh = rcu_dereference_bh(neigh->hnext)) {
+ if (memcmp(daddr, neigh->daddr, INFINIBAND_ALEN) == 0) {
+ /* found, take one ref on behalf of the caller */
+ if (!atomic_inc_not_zero(&neigh->refcnt)) {
+ /* deleted */
+ neigh = NULL;
+ goto out_unlock;
+ }
+ neigh->alive = jiffies;
+ goto out_unlock;
+ }
+ }
+
+out_unlock:
+ rcu_read_unlock_bh();
+ return neigh;
+}
+
+static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
+{
+ struct ipoib_neigh_table *ntbl = &priv->ntbl;
+ struct ipoib_neigh_hash *htbl;
+ unsigned long neigh_obsolete;
+ unsigned long dt;
unsigned long flags;
- struct ipoib_ah *ah = NULL;
+ int i;
- neigh = *to_ipoib_neigh(n);
- if (neigh)
- priv = netdev_priv(neigh->dev);
- else
+ if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
return;
- ipoib_dbg(priv,
- "neigh_cleanup for %06x %pI6\n",
- IPOIB_QPN(n->ha),
- n->ha + 4);
spin_lock_irqsave(&priv->lock, flags);
- if (neigh->ah)
- ah = neigh->ah;
- list_del(&neigh->list);
- ipoib_neigh_free(n->dev, neigh);
+ htbl = rcu_dereference_protected(ntbl->htbl,
+ lockdep_is_held(&priv->lock));
+
+ if (!htbl)
+ goto out_unlock;
+
+ /* neigh is obsolete if it was idle for two GC periods */
+ dt = 2 * arp_tbl.gc_interval;
+ neigh_obsolete = jiffies - dt;
+ /* handle possible race condition */
+ if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
+ goto out_unlock;
+
+ for (i = 0; i < htbl->size; i++) {
+ struct ipoib_neigh *neigh;
+ struct ipoib_neigh __rcu **np = &htbl->buckets[i];
+
+ while ((neigh = rcu_dereference_protected(*np,
+ lockdep_is_held(&priv->lock))) != NULL) {
+ /* was the neigh idle for two GC periods */
+ if (time_after(neigh_obsolete, neigh->alive)) {
+ rcu_assign_pointer(*np,
+ rcu_dereference_protected(neigh->hnext,
+ lockdep_is_held(&priv->lock)));
+ /* remove from path/mc list */
+ list_del(&neigh->list);
+ call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
+ } else {
+ np = &neigh->hnext;
+ }
+ }
+ }
+
+out_unlock:
spin_unlock_irqrestore(&priv->lock, flags);
+}
- if (ah)
- ipoib_put_ah(ah);
+static void ipoib_reap_neigh(struct work_struct *work)
+{
+ struct ipoib_dev_priv *priv =
+ container_of(work, struct ipoib_dev_priv, neigh_reap_task.work);
+
+ __ipoib_reap_neigh(priv);
+
+ if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
+ queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task,
+ arp_tbl.gc_interval);
}
-struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour,
+
+static struct ipoib_neigh *ipoib_neigh_ctor(u8 *daddr,
struct net_device *dev)
{
struct ipoib_neigh *neigh;
- neigh = kmalloc(sizeof *neigh, GFP_ATOMIC);
+ neigh = kzalloc(sizeof *neigh, GFP_ATOMIC);
if (!neigh)
return NULL;
- neigh->neighbour = neighbour;
neigh->dev = dev;
- memset(&neigh->dgid.raw, 0, sizeof (union ib_gid));
- *to_ipoib_neigh(neighbour) = neigh;
+ memcpy(&neigh->daddr, daddr, sizeof(neigh->daddr));
skb_queue_head_init(&neigh->queue);
+ INIT_LIST_HEAD(&neigh->list);
ipoib_cm_set(neigh, NULL);
+ /* one ref on behalf of the caller */
+ atomic_set(&neigh->refcnt, 1);
return neigh;
}
-void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh)
+struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr,
+ struct net_device *dev)
{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_neigh_table *ntbl = &priv->ntbl;
+ struct ipoib_neigh_hash *htbl;
+ struct ipoib_neigh *neigh;
+ u32 hash_val;
+
+ htbl = rcu_dereference_protected(ntbl->htbl,
+ lockdep_is_held(&priv->lock));
+ if (!htbl) {
+ neigh = NULL;
+ goto out_unlock;
+ }
+
+ /* need to add a new neigh, but maybe some other thread succeeded?
+ * recalc hash, maybe hash resize took place so we do a search
+ */
+ hash_val = ipoib_addr_hash(htbl, daddr);
+ for (neigh = rcu_dereference_protected(htbl->buckets[hash_val],
+ lockdep_is_held(&priv->lock));
+ neigh != NULL;
+ neigh = rcu_dereference_protected(neigh->hnext,
+ lockdep_is_held(&priv->lock))) {
+ if (memcmp(daddr, neigh->daddr, INFINIBAND_ALEN) == 0) {
+ /* found, take one ref on behalf of the caller */
+ if (!atomic_inc_not_zero(&neigh->refcnt)) {
+ /* deleted */
+ neigh = NULL;
+ break;
+ }
+ neigh->alive = jiffies;
+ goto out_unlock;
+ }
+ }
+
+ neigh = ipoib_neigh_ctor(daddr, dev);
+ if (!neigh)
+ goto out_unlock;
+
+ /* one ref on behalf of the hash table */
+ atomic_inc(&neigh->refcnt);
+ neigh->alive = jiffies;
+ /* put in hash */
+ rcu_assign_pointer(neigh->hnext,
+ rcu_dereference_protected(htbl->buckets[hash_val],
+ lockdep_is_held(&priv->lock)));
+ rcu_assign_pointer(htbl->buckets[hash_val], neigh);
+ atomic_inc(&ntbl->entries);
+
+out_unlock:
+
+ return neigh;
+}
+
+void ipoib_neigh_dtor(struct ipoib_neigh *neigh)
+{
+ /* neigh reference count was dropprd to zero */
+ struct net_device *dev = neigh->dev;
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
struct sk_buff *skb;
- *to_ipoib_neigh(neigh->neighbour) = NULL;
+ if (neigh->ah)
+ ipoib_put_ah(neigh->ah);
while ((skb = __skb_dequeue(&neigh->queue))) {
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb);
}
if (ipoib_cm_get(neigh))
ipoib_cm_destroy_tx(ipoib_cm_get(neigh));
+ ipoib_dbg(netdev_priv(dev),
+ "neigh free for %06x %pI6\n",
+ IPOIB_QPN(neigh->daddr),
+ neigh->daddr + 4);
kfree(neigh);
+ if (atomic_dec_and_test(&priv->ntbl.entries)) {
+ if (test_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags))
+ complete(&priv->ntbl.flushed);
+ }
}
-static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms *parms)
+static void ipoib_neigh_reclaim(struct rcu_head *rp)
{
- parms->neigh_cleanup = ipoib_neigh_cleanup;
+ /* Called as a result of removal from hash table */
+ struct ipoib_neigh *neigh = container_of(rp, struct ipoib_neigh, rcu);
+ /* note TX context may hold another ref */
+ ipoib_neigh_put(neigh);
+}
+
+void ipoib_neigh_free(struct ipoib_neigh *neigh)
+{
+ struct net_device *dev = neigh->dev;
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_neigh_table *ntbl = &priv->ntbl;
+ struct ipoib_neigh_hash *htbl;
+ struct ipoib_neigh __rcu **np;
+ struct ipoib_neigh *n;
+ u32 hash_val;
+
+ htbl = rcu_dereference_protected(ntbl->htbl,
+ lockdep_is_held(&priv->lock));
+ if (!htbl)
+ return;
+
+ hash_val = ipoib_addr_hash(htbl, neigh->daddr);
+ np = &htbl->buckets[hash_val];
+ for (n = rcu_dereference_protected(*np,
+ lockdep_is_held(&priv->lock));
+ n != NULL;
+ n = rcu_dereference_protected(*np,
+ lockdep_is_held(&priv->lock))) {
+ if (n == neigh) {
+ /* found */
+ rcu_assign_pointer(*np,
+ rcu_dereference_protected(neigh->hnext,
+ lockdep_is_held(&priv->lock)));
+ /* remove from parent list */
+ list_del(&neigh->list);
+ call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
+ return;
+ } else {
+ np = &n->hnext;
+ }
+ }
+}
+
+static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv)
+{
+ struct ipoib_neigh_table *ntbl = &priv->ntbl;
+ struct ipoib_neigh_hash *htbl;
+ struct ipoib_neigh **buckets;
+ u32 size;
+
+ clear_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags);
+ ntbl->htbl = NULL;
+ htbl = kzalloc(sizeof(*htbl), GFP_KERNEL);
+ if (!htbl)
+ return -ENOMEM;
+ set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
+ size = roundup_pow_of_two(arp_tbl.gc_thresh3);
+ buckets = kzalloc(size * sizeof(*buckets), GFP_KERNEL);
+ if (!buckets) {
+ kfree(htbl);
+ return -ENOMEM;
+ }
+ htbl->size = size;
+ htbl->mask = (size - 1);
+ htbl->buckets = buckets;
+ ntbl->htbl = htbl;
+ htbl->ntbl = ntbl;
+ atomic_set(&ntbl->entries, 0);
+
+ /* start garbage collection */
+ clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
+ queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task,
+ arp_tbl.gc_interval);
return 0;
}
+static void neigh_hash_free_rcu(struct rcu_head *head)
+{
+ struct ipoib_neigh_hash *htbl = container_of(head,
+ struct ipoib_neigh_hash,
+ rcu);
+ struct ipoib_neigh __rcu **buckets = htbl->buckets;
+ struct ipoib_neigh_table *ntbl = htbl->ntbl;
+
+ kfree(buckets);
+ kfree(htbl);
+ complete(&ntbl->deleted);
+}
+
+void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct ipoib_neigh_table *ntbl = &priv->ntbl;
+ struct ipoib_neigh_hash *htbl;
+ unsigned long flags;
+ int i;
+
+ /* remove all neigh connected to a given path or mcast */
+ spin_lock_irqsave(&priv->lock, flags);
+
+ htbl = rcu_dereference_protected(ntbl->htbl,
+ lockdep_is_held(&priv->lock));
+
+ if (!htbl)
+ goto out_unlock;
+
+ for (i = 0; i < htbl->size; i++) {
+ struct ipoib_neigh *neigh;
+ struct ipoib_neigh __rcu **np = &htbl->buckets[i];
+
+ while ((neigh = rcu_dereference_protected(*np,
+ lockdep_is_held(&priv->lock))) != NULL) {
+ /* delete neighs belong to this parent */
+ if (!memcmp(gid, neigh->daddr + 4, sizeof (union ib_gid))) {
+ rcu_assign_pointer(*np,
+ rcu_dereference_protected(neigh->hnext,
+ lockdep_is_held(&priv->lock)));
+ /* remove from parent list */
+ list_del(&neigh->list);
+ call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
+ } else {
+ np = &neigh->hnext;
+ }
+
+ }
+ }
+out_unlock:
+ spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static void ipoib_flush_neighs(struct ipoib_dev_priv *priv)
+{
+ struct ipoib_neigh_table *ntbl = &priv->ntbl;
+ struct ipoib_neigh_hash *htbl;
+ unsigned long flags;
+ int i, wait_flushed = 0;
+
+ init_completion(&priv->ntbl.flushed);
+
+ spin_lock_irqsave(&priv->lock, flags);
+
+ htbl = rcu_dereference_protected(ntbl->htbl,
+ lockdep_is_held(&priv->lock));
+ if (!htbl)
+ goto out_unlock;
+
+ wait_flushed = atomic_read(&priv->ntbl.entries);
+ if (!wait_flushed)
+ goto free_htbl;
+
+ for (i = 0; i < htbl->size; i++) {
+ struct ipoib_neigh *neigh;
+ struct ipoib_neigh __rcu **np = &htbl->buckets[i];
+
+ while ((neigh = rcu_dereference_protected(*np,
+ lockdep_is_held(&priv->lock))) != NULL) {
+ rcu_assign_pointer(*np,
+ rcu_dereference_protected(neigh->hnext,
+ lockdep_is_held(&priv->lock)));
+ /* remove from path/mc list */
+ list_del(&neigh->list);
+ call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
+ }
+ }
+
+free_htbl:
+ rcu_assign_pointer(ntbl->htbl, NULL);
+ call_rcu(&htbl->rcu, neigh_hash_free_rcu);
+
+out_unlock:
+ spin_unlock_irqrestore(&priv->lock, flags);
+ if (wait_flushed)
+ wait_for_completion(&priv->ntbl.flushed);
+}
+
+static void ipoib_neigh_hash_uninit(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ int stopped;
+
+ ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n");
+ init_completion(&priv->ntbl.deleted);
+ set_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags);
+
+ /* Stop GC if called at init fail need to cancel work */
+ stopped = test_and_set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
+ if (!stopped)
+ cancel_delayed_work(&priv->neigh_reap_task);
+
+ ipoib_flush_neighs(priv);
+
+ wait_for_completion(&priv->ntbl.deleted);
+}
+
+
int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
+ if (ipoib_neigh_hash_init(priv) < 0)
+ goto out;
/* Allocate RX/TX "rings" to hold queued skbs */
priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
GFP_KERNEL);
if (!priv->rx_ring) {
printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
ca->name, ipoib_recvq_size);
- goto out;
+ goto out_neigh_hash_cleanup;
}
- priv->tx_ring = vmalloc(ipoib_sendq_size * sizeof *priv->tx_ring);
+ priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring);
if (!priv->tx_ring) {
printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n",
ca->name, ipoib_sendq_size);
goto out_rx_ring_cleanup;
}
- memset(priv->tx_ring, 0, ipoib_sendq_size * sizeof *priv->tx_ring);
/* priv->tx_head, tx_tail & tx_outstanding are already 0 */
@@ -946,6 +1293,8 @@ out_tx_ring_cleanup:
out_rx_ring_cleanup:
kfree(priv->rx_ring);
+out_neigh_hash_cleanup:
+ ipoib_neigh_hash_uninit(dev);
out:
return -ENOMEM;
}
@@ -953,15 +1302,20 @@ out:
void ipoib_dev_cleanup(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv, *tcpriv;
+ LIST_HEAD(head);
+
+ ASSERT_RTNL();
ipoib_delete_debug_files(dev);
/* Delete any child interfaces first */
list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
- unregister_netdev(cpriv->dev);
- ipoib_dev_cleanup(cpriv->dev);
- free_netdev(cpriv->dev);
+ /* Stop GC on child */
+ set_bit(IPOIB_STOP_NEIGH_GC, &cpriv->flags);
+ cancel_delayed_work(&cpriv->neigh_reap_task);
+ unregister_netdevice_queue(cpriv->dev, &head);
}
+ unregister_netdevice_many(&head);
ipoib_ib_dev_cleanup(dev);
@@ -970,71 +1324,26 @@ void ipoib_dev_cleanup(struct net_device *dev)
priv->rx_ring = NULL;
priv->tx_ring = NULL;
+
+ ipoib_neigh_hash_uninit(dev);
}
static const struct header_ops ipoib_header_ops = {
.create = ipoib_hard_header,
};
-static int get_skb_hdr(struct sk_buff *skb, void **iphdr,
- void **tcph, u64 *hdr_flags, void *priv)
-{
- unsigned int ip_len;
- struct iphdr *iph;
-
- if (unlikely(skb->protocol != htons(ETH_P_IP)))
- return -1;
-
- /*
- * In the future we may add an else clause that verifies the
- * checksum and allows devices which do not calculate checksum
- * to use LRO.
- */
- if (unlikely(skb->ip_summed != CHECKSUM_UNNECESSARY))
- return -1;
-
- /* Check for non-TCP packet */
- skb_reset_network_header(skb);
- iph = ip_hdr(skb);
- if (iph->protocol != IPPROTO_TCP)
- return -1;
-
- ip_len = ip_hdrlen(skb);
- skb_set_transport_header(skb, ip_len);
- *tcph = tcp_hdr(skb);
-
- /* check if IP header and TCP header are complete */
- if (ntohs(iph->tot_len) < ip_len + tcp_hdrlen(skb))
- return -1;
-
- *hdr_flags = LRO_IPV4 | LRO_TCP;
- *iphdr = iph;
-
- return 0;
-}
-
-static void ipoib_lro_setup(struct ipoib_dev_priv *priv)
-{
- priv->lro.lro_mgr.max_aggr = lro_max_aggr;
- priv->lro.lro_mgr.max_desc = IPOIB_MAX_LRO_DESCRIPTORS;
- priv->lro.lro_mgr.lro_arr = priv->lro.lro_desc;
- priv->lro.lro_mgr.get_skb_header = get_skb_hdr;
- priv->lro.lro_mgr.features = LRO_F_NAPI;
- priv->lro.lro_mgr.dev = priv->dev;
- priv->lro.lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY;
-}
-
static const struct net_device_ops ipoib_netdev_ops = {
+ .ndo_uninit = ipoib_uninit,
.ndo_open = ipoib_open,
.ndo_stop = ipoib_stop,
.ndo_change_mtu = ipoib_change_mtu,
+ .ndo_fix_features = ipoib_fix_features,
.ndo_start_xmit = ipoib_start_xmit,
.ndo_tx_timeout = ipoib_timeout,
- .ndo_set_multicast_list = ipoib_set_mcast_list,
- .ndo_neigh_setup = ipoib_neigh_setup_dev,
+ .ndo_set_rx_mode = ipoib_set_mcast_list,
};
-static void ipoib_setup(struct net_device *dev)
+void ipoib_setup(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -1043,17 +1352,13 @@ static void ipoib_setup(struct net_device *dev)
ipoib_set_ethtool_ops(dev);
- netif_napi_add(dev, &priv->napi, ipoib_poll, 100);
+ netif_napi_add(dev, &priv->napi, ipoib_poll, NAPI_POLL_WEIGHT);
dev->watchdog_timeo = HZ;
dev->flags |= IFF_BROADCAST | IFF_MULTICAST;
- /*
- * We add in INFINIBAND_ALEN to allow for the destination
- * address "pseudoheader" for skbs without neighbour struct.
- */
- dev->hard_header_len = IPOIB_ENCAP_LEN + INFINIBAND_ALEN;
+ dev->hard_header_len = IPOIB_ENCAP_LEN;
dev->addr_len = INFINIBAND_ALEN;
dev->type = ARPHRD_INFINIBAND;
dev->tx_queue_len = ipoib_sendq_size * 2;
@@ -1063,15 +1368,11 @@ static void ipoib_setup(struct net_device *dev)
memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN);
- netif_carrier_off(dev);
-
priv->dev = dev;
- ipoib_lro_setup(priv);
-
spin_lock_init(&priv->lock);
- mutex_init(&priv->vlan_mutex);
+ init_rwsem(&priv->vlan_rwsem);
INIT_LIST_HEAD(&priv->path_list);
INIT_LIST_HEAD(&priv->child_intfs);
@@ -1086,6 +1387,7 @@ static void ipoib_setup(struct net_device *dev)
INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy);
INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah);
+ INIT_DELAYED_WORK(&priv->neigh_reap_task, ipoib_reap_neigh);
}
struct ipoib_dev_priv *ipoib_intf_alloc(const char *name)
@@ -1117,12 +1419,9 @@ static ssize_t show_umcast(struct device *dev,
return sprintf(buf, "%d\n", test_bit(IPOIB_FLAG_UMCAST, &priv->flags));
}
-static ssize_t set_umcast(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
+void ipoib_set_umcast(struct net_device *ndev, int umcast_val)
{
- struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev));
- unsigned long umcast_val = simple_strtoul(buf, NULL, 0);
+ struct ipoib_dev_priv *priv = netdev_priv(ndev);
if (umcast_val > 0) {
set_bit(IPOIB_FLAG_UMCAST, &priv->flags);
@@ -1130,6 +1429,15 @@ static ssize_t set_umcast(struct device *dev,
"by userspace\n");
} else
clear_bit(IPOIB_FLAG_UMCAST, &priv->flags);
+}
+
+static ssize_t set_umcast(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned long umcast_val = simple_strtoul(buf, NULL, 0);
+
+ ipoib_set_umcast(to_net_dev(dev), umcast_val);
return count;
}
@@ -1150,7 +1458,7 @@ static ssize_t create_child(struct device *dev,
if (sscanf(buf, "%i", &pkey) != 1)
return -EINVAL;
- if (pkey < 0 || pkey > 0xffff)
+ if (pkey <= 0 || pkey > 0xffff || pkey == 0x8000)
return -EINVAL;
/*
@@ -1214,20 +1522,18 @@ int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
kfree(device_attr);
if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
- set_bit(IPOIB_FLAG_CSUM, &priv->flags);
- priv->dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM;
- }
+ priv->dev->hw_features = NETIF_F_SG |
+ NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
- if (lro)
- priv->dev->features |= NETIF_F_LRO;
+ if (priv->hca_caps & IB_DEVICE_UD_TSO)
+ priv->dev->hw_features |= NETIF_F_TSO;
- if (priv->dev->features & NETIF_F_SG && priv->hca_caps & IB_DEVICE_UD_TSO)
- priv->dev->features |= NETIF_F_TSO;
+ priv->dev->features |= priv->dev->hw_features;
+ }
return 0;
}
-
static struct net_device *ipoib_add_port(const char *format,
struct ib_device *hca, u8 port)
{
@@ -1254,6 +1560,8 @@ static struct net_device *ipoib_add_port(const char *format,
priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu);
priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu;
+ priv->dev->neigh_priv_len = sizeof(struct ipoib_neigh);
+
result = ib_query_pkey(hca, port, 0, &priv->pkey);
if (result) {
printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n",
@@ -1326,6 +1634,9 @@ sysfs_failed:
register_failed:
ib_unregister_event_handler(&priv->event_handler);
+ /* Stop GC if started before flush */
+ set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
+ cancel_delayed_work(&priv->neigh_reap_task);
flush_workqueue(ipoib_workqueue);
event_failed:
@@ -1384,6 +1695,8 @@ static void ipoib_remove_one(struct ib_device *device)
return;
dev_list = ib_get_client_data(device, &ipoib_client);
+ if (!dev_list)
+ return;
list_for_each_entry_safe(priv, tmp, dev_list, list) {
ib_unregister_event_handler(&priv->event_handler);
@@ -1392,10 +1705,12 @@ static void ipoib_remove_one(struct ib_device *device)
dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP);
rtnl_unlock();
+ /* Stop GC */
+ set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
+ cancel_delayed_work(&priv->neigh_reap_task);
flush_workqueue(ipoib_workqueue);
unregister_netdev(priv->dev);
- ipoib_dev_cleanup(priv->dev);
free_netdev(priv->dev);
}
@@ -1447,8 +1762,15 @@ static int __init ipoib_init_module(void)
if (ret)
goto err_sa;
+ ret = ipoib_netlink_init();
+ if (ret)
+ goto err_client;
+
return 0;
+err_client:
+ ib_unregister_client(&ipoib_client);
+
err_sa:
ib_sa_unregister_client(&ipoib_sa_client);
destroy_workqueue(ipoib_workqueue);
@@ -1461,6 +1783,7 @@ err_fs:
static void __exit ipoib_cleanup_module(void)
{
+ ipoib_netlink_fini();
ib_unregister_client(&ipoib_client);
ib_sa_unregister_client(&ipoib_sa_client);
ipoib_unregister_debugfs();