From 66172c09938bfc4efdcf9b5e0246a85b9b76dd54 Mon Sep 17 00:00:00 2001 From: Shlomo Pongratz Date: Wed, 29 Aug 2012 15:14:33 +0000 Subject: IPoIB: Fix memory leak in the neigh table deletion flow If the neighbours hash table is empty when unloading the module, then ipoib_flush_neighs(), the cleanup routine, isn't called and the memory used for the hash table itself leaked. To fix this, ipoib_flush_neighs() is allways called, and another completion object is added to signal when the table is freed. Once invoked, ipoib_flush_neighs() flushes all the neighbours (if there are any), calls the the hash table RCU free routine, which now signals completion of the deletion process, and waits for the last neighbour to be freed. Signed-off-by: Shlomo Pongratz Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib.h | 4 ++++ drivers/infiniband/ulp/ipoib/ipoib_main.c | 23 +++++++++++++++++------ 2 files changed, 21 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index ca43901ed86..e6bbeae1c30 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -262,7 +262,10 @@ struct ipoib_ethtool_st { u16 max_coalesced_frames; }; +struct ipoib_neigh_table; + struct ipoib_neigh_hash { + struct ipoib_neigh_table *ntbl; struct ipoib_neigh __rcu **buckets; struct rcu_head rcu; u32 mask; @@ -274,6 +277,7 @@ struct ipoib_neigh_table { rwlock_t rwlock; atomic_t entries; struct completion flushed; + struct completion deleted; }; /* diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 3e2085a3ee4..72c1fc28f07 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1095,6 +1095,7 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) htbl->mask = (size - 1); htbl->buckets = buckets; ntbl->htbl = htbl; + htbl->ntbl = ntbl; atomic_set(&ntbl->entries, 0); /* start garbage collection */ @@ -1111,9 +1112,11 @@ static void neigh_hash_free_rcu(struct rcu_head *head) struct ipoib_neigh_hash, rcu); struct ipoib_neigh __rcu **buckets = htbl->buckets; + struct ipoib_neigh_table *ntbl = htbl->ntbl; kfree(buckets); kfree(htbl); + complete(&ntbl->deleted); } void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid) @@ -1164,7 +1167,9 @@ static void ipoib_flush_neighs(struct ipoib_dev_priv *priv) struct ipoib_neigh_table *ntbl = &priv->ntbl; struct ipoib_neigh_hash *htbl; unsigned long flags; - int i; + int i, wait_flushed = 0; + + init_completion(&priv->ntbl.flushed); write_lock_bh(&ntbl->rwlock); @@ -1173,6 +1178,10 @@ static void ipoib_flush_neighs(struct ipoib_dev_priv *priv) if (!htbl) goto out_unlock; + wait_flushed = atomic_read(&priv->ntbl.entries); + if (!wait_flushed) + goto free_htbl; + for (i = 0; i < htbl->size; i++) { struct ipoib_neigh *neigh; struct ipoib_neigh __rcu **np = &htbl->buckets[i]; @@ -1190,11 +1199,14 @@ static void ipoib_flush_neighs(struct ipoib_dev_priv *priv) } } +free_htbl: rcu_assign_pointer(ntbl->htbl, NULL); call_rcu(&htbl->rcu, neigh_hash_free_rcu); out_unlock: write_unlock_bh(&ntbl->rwlock); + if (wait_flushed) + wait_for_completion(&priv->ntbl.flushed); } static void ipoib_neigh_hash_uninit(struct net_device *dev) @@ -1203,7 +1215,7 @@ static void ipoib_neigh_hash_uninit(struct net_device *dev) int stopped; ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n"); - init_completion(&priv->ntbl.flushed); + init_completion(&priv->ntbl.deleted); set_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags); /* Stop GC if called at init fail need to cancel work */ @@ -1211,10 +1223,9 @@ static void ipoib_neigh_hash_uninit(struct net_device *dev) if (!stopped) cancel_delayed_work(&priv->neigh_reap_task); - if (atomic_read(&priv->ntbl.entries)) { - ipoib_flush_neighs(priv); - wait_for_completion(&priv->ntbl.flushed); - } + ipoib_flush_neighs(priv); + + wait_for_completion(&priv->ntbl.deleted); } -- cgit v1.2.3-18-g5258 From b5120a6e11e90d98d8a752545ac60bfa1ea95f1a Mon Sep 17 00:00:00 2001 From: Shlomo Pongratz Date: Wed, 29 Aug 2012 15:14:34 +0000 Subject: IPoIB: Fix AB-BA deadlock when deleting neighbours Lockdep points out a circular locking dependency betwwen the ipoib device priv spinlock (priv->lock) and the neighbour table rwlock (ntbl->rwlock). In the normal path, ie neigbour garbage collection task, the neigh table rwlock is taken first and then if the neighbour needs to be deleted, priv->lock is taken. However in some error paths, such as in ipoib_cm_handle_tx_wc(), priv->lock is taken first and then ipoib_neigh_free routine is called which in turn takes the neighbour table ntbl->rwlock. The solution is to get rid the neigh table rwlock completely and use only priv->lock. Signed-off-by: Shlomo Pongratz Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib.h | 1 - drivers/infiniband/ulp/ipoib/ipoib_main.c | 70 ++++++++++---------------- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 2 - 3 files changed, 27 insertions(+), 46 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index e6bbeae1c30..0af216d21f8 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -274,7 +274,6 @@ struct ipoib_neigh_hash { struct ipoib_neigh_table { struct ipoib_neigh_hash __rcu *htbl; - rwlock_t rwlock; atomic_t entries; struct completion flushed; struct completion deleted; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 72c1fc28f07..1e19b5ae7c4 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -546,15 +546,15 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr, struct ipoib_neigh *neigh; unsigned long flags; + spin_lock_irqsave(&priv->lock, flags); neigh = ipoib_neigh_alloc(daddr, dev); if (!neigh) { + spin_unlock_irqrestore(&priv->lock, flags); ++dev->stats.tx_dropped; dev_kfree_skb_any(skb); return; } - spin_lock_irqsave(&priv->lock, flags); - path = __path_find(dev, daddr + 4); if (!path) { path = path_rec_create(dev, daddr + 4); @@ -863,10 +863,10 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv) if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) return; - write_lock_bh(&ntbl->rwlock); + spin_lock_irqsave(&priv->lock, flags); htbl = rcu_dereference_protected(ntbl->htbl, - lockdep_is_held(&ntbl->rwlock)); + lockdep_is_held(&priv->lock)); if (!htbl) goto out_unlock; @@ -883,16 +883,14 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv) struct ipoib_neigh __rcu **np = &htbl->buckets[i]; while ((neigh = rcu_dereference_protected(*np, - lockdep_is_held(&ntbl->rwlock))) != NULL) { + lockdep_is_held(&priv->lock))) != NULL) { /* was the neigh idle for two GC periods */ if (time_after(neigh_obsolete, neigh->alive)) { rcu_assign_pointer(*np, rcu_dereference_protected(neigh->hnext, - lockdep_is_held(&ntbl->rwlock))); + lockdep_is_held(&priv->lock))); /* remove from path/mc list */ - spin_lock_irqsave(&priv->lock, flags); list_del(&neigh->list); - spin_unlock_irqrestore(&priv->lock, flags); call_rcu(&neigh->rcu, ipoib_neigh_reclaim); } else { np = &neigh->hnext; @@ -902,7 +900,7 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv) } out_unlock: - write_unlock_bh(&ntbl->rwlock); + spin_unlock_irqrestore(&priv->lock, flags); } static void ipoib_reap_neigh(struct work_struct *work) @@ -947,10 +945,8 @@ struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr, struct ipoib_neigh *neigh; u32 hash_val; - write_lock_bh(&ntbl->rwlock); - htbl = rcu_dereference_protected(ntbl->htbl, - lockdep_is_held(&ntbl->rwlock)); + lockdep_is_held(&priv->lock)); if (!htbl) { neigh = NULL; goto out_unlock; @@ -961,10 +957,10 @@ struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr, */ hash_val = ipoib_addr_hash(htbl, daddr); for (neigh = rcu_dereference_protected(htbl->buckets[hash_val], - lockdep_is_held(&ntbl->rwlock)); + lockdep_is_held(&priv->lock)); neigh != NULL; neigh = rcu_dereference_protected(neigh->hnext, - lockdep_is_held(&ntbl->rwlock))) { + lockdep_is_held(&priv->lock))) { if (memcmp(daddr, neigh->daddr, INFINIBAND_ALEN) == 0) { /* found, take one ref on behalf of the caller */ if (!atomic_inc_not_zero(&neigh->refcnt)) { @@ -987,12 +983,11 @@ struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr, /* put in hash */ rcu_assign_pointer(neigh->hnext, rcu_dereference_protected(htbl->buckets[hash_val], - lockdep_is_held(&ntbl->rwlock))); + lockdep_is_held(&priv->lock))); rcu_assign_pointer(htbl->buckets[hash_val], neigh); atomic_inc(&ntbl->entries); out_unlock: - write_unlock_bh(&ntbl->rwlock); return neigh; } @@ -1040,35 +1035,29 @@ void ipoib_neigh_free(struct ipoib_neigh *neigh) struct ipoib_neigh *n; u32 hash_val; - write_lock_bh(&ntbl->rwlock); - htbl = rcu_dereference_protected(ntbl->htbl, - lockdep_is_held(&ntbl->rwlock)); + lockdep_is_held(&priv->lock)); if (!htbl) - goto out_unlock; + return; hash_val = ipoib_addr_hash(htbl, neigh->daddr); np = &htbl->buckets[hash_val]; for (n = rcu_dereference_protected(*np, - lockdep_is_held(&ntbl->rwlock)); + lockdep_is_held(&priv->lock)); n != NULL; n = rcu_dereference_protected(*np, - lockdep_is_held(&ntbl->rwlock))) { + lockdep_is_held(&priv->lock))) { if (n == neigh) { /* found */ rcu_assign_pointer(*np, rcu_dereference_protected(neigh->hnext, - lockdep_is_held(&ntbl->rwlock))); + lockdep_is_held(&priv->lock))); call_rcu(&neigh->rcu, ipoib_neigh_reclaim); - goto out_unlock; + return; } else { np = &n->hnext; } } - -out_unlock: - write_unlock_bh(&ntbl->rwlock); - } static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) @@ -1080,7 +1069,6 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) clear_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags); ntbl->htbl = NULL; - rwlock_init(&ntbl->rwlock); htbl = kzalloc(sizeof(*htbl), GFP_KERNEL); if (!htbl) return -ENOMEM; @@ -1128,10 +1116,10 @@ void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid) int i; /* remove all neigh connected to a given path or mcast */ - write_lock_bh(&ntbl->rwlock); + spin_lock_irqsave(&priv->lock, flags); htbl = rcu_dereference_protected(ntbl->htbl, - lockdep_is_held(&ntbl->rwlock)); + lockdep_is_held(&priv->lock)); if (!htbl) goto out_unlock; @@ -1141,16 +1129,14 @@ void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid) struct ipoib_neigh __rcu **np = &htbl->buckets[i]; while ((neigh = rcu_dereference_protected(*np, - lockdep_is_held(&ntbl->rwlock))) != NULL) { + lockdep_is_held(&priv->lock))) != NULL) { /* delete neighs belong to this parent */ if (!memcmp(gid, neigh->daddr + 4, sizeof (union ib_gid))) { rcu_assign_pointer(*np, rcu_dereference_protected(neigh->hnext, - lockdep_is_held(&ntbl->rwlock))); + lockdep_is_held(&priv->lock))); /* remove from parent list */ - spin_lock_irqsave(&priv->lock, flags); list_del(&neigh->list); - spin_unlock_irqrestore(&priv->lock, flags); call_rcu(&neigh->rcu, ipoib_neigh_reclaim); } else { np = &neigh->hnext; @@ -1159,7 +1145,7 @@ void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid) } } out_unlock: - write_unlock_bh(&ntbl->rwlock); + spin_unlock_irqrestore(&priv->lock, flags); } static void ipoib_flush_neighs(struct ipoib_dev_priv *priv) @@ -1171,10 +1157,10 @@ static void ipoib_flush_neighs(struct ipoib_dev_priv *priv) init_completion(&priv->ntbl.flushed); - write_lock_bh(&ntbl->rwlock); + spin_lock_irqsave(&priv->lock, flags); htbl = rcu_dereference_protected(ntbl->htbl, - lockdep_is_held(&ntbl->rwlock)); + lockdep_is_held(&priv->lock)); if (!htbl) goto out_unlock; @@ -1187,14 +1173,12 @@ static void ipoib_flush_neighs(struct ipoib_dev_priv *priv) struct ipoib_neigh __rcu **np = &htbl->buckets[i]; while ((neigh = rcu_dereference_protected(*np, - lockdep_is_held(&ntbl->rwlock))) != NULL) { + lockdep_is_held(&priv->lock))) != NULL) { rcu_assign_pointer(*np, rcu_dereference_protected(neigh->hnext, - lockdep_is_held(&ntbl->rwlock))); + lockdep_is_held(&priv->lock))); /* remove from path/mc list */ - spin_lock_irqsave(&priv->lock, flags); list_del(&neigh->list); - spin_unlock_irqrestore(&priv->lock, flags); call_rcu(&neigh->rcu, ipoib_neigh_reclaim); } } @@ -1204,7 +1188,7 @@ free_htbl: call_rcu(&htbl->rcu, neigh_hash_free_rcu); out_unlock: - write_unlock_bh(&ntbl->rwlock); + spin_unlock_irqrestore(&priv->lock, flags); if (wait_flushed) wait_for_completion(&priv->ntbl.flushed); } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 13f4aa7593c..75367249f44 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -707,9 +707,7 @@ out: neigh = ipoib_neigh_get(dev, daddr); spin_lock_irqsave(&priv->lock, flags); if (!neigh) { - spin_unlock_irqrestore(&priv->lock, flags); neigh = ipoib_neigh_alloc(daddr, dev); - spin_lock_irqsave(&priv->lock, flags); if (neigh) { kref_get(&mcast->ah->ref); neigh->ah = mcast->ah; -- cgit v1.2.3-18-g5258 From ae3bca90e94dbc9c906321b33f32de4a42a6cdf4 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 17 Aug 2012 14:45:33 +0000 Subject: RDMA/ocrdma: Fix CQE expansion of unsignaled WQE Fix CQE expansion of unsignaled WQE -- don't expand the CQE when the WQE index of the completed CQE matches with last pending WQE (tail) in the queue. Signed-off-by: Parav Pandit Signed-off-by: Roland Dreier --- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index cb5b7f7d4d3..b29a4246ef4 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -2219,7 +2219,6 @@ static bool ocrdma_poll_success_scqe(struct ocrdma_qp *qp, u32 wqe_idx; if (!qp->wqe_wr_id_tbl[tail].signaled) { - expand = true; /* CQE cannot be consumed yet */ *polled = false; /* WC cannot be consumed yet */ } else { ibwc->status = IB_WC_SUCCESS; @@ -2227,10 +2226,11 @@ static bool ocrdma_poll_success_scqe(struct ocrdma_qp *qp, ibwc->qp = &qp->ibqp; ocrdma_update_wc(qp, ibwc, tail); *polled = true; - wqe_idx = le32_to_cpu(cqe->wq.wqeidx) & OCRDMA_CQE_WQEIDX_MASK; - if (tail != wqe_idx) - expand = true; /* Coalesced CQE can't be consumed yet */ } + wqe_idx = le32_to_cpu(cqe->wq.wqeidx) & OCRDMA_CQE_WQEIDX_MASK; + if (tail != wqe_idx) + expand = true; /* Coalesced CQE can't be consumed yet */ + ocrdma_hwq_inc_tail(&qp->sq); return expand; } -- cgit v1.2.3-18-g5258 From 4c3550057b83bde3250b2f105c7f56326fb43044 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 12 Sep 2012 13:01:29 +0000 Subject: IB/qib: Fix failure of compliance test C14-024#06_LocalPortNum Commit 3236b2d469db ("IB/qib: MADs with misset M_Keys should return failure") introduced a return code assignment that unfortunately introduced an unconditional exit for the routine due to missing braces. This patch adds the braces to correct the original patch. Reviewed-by: Dean Luick Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_mad.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 19f1e6c45fb..ccb119143d2 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -471,9 +471,10 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev, if (port_num != port) { ibp = to_iport(ibdev, port_num); ret = check_mkey(ibp, smp, 0); - if (ret) + if (ret) { ret = IB_MAD_RESULT_FAILURE; goto bail; + } } } -- cgit v1.2.3-18-g5258