diff options
Diffstat (limited to 'net/core/neighbour.c')
| -rw-r--r-- | net/core/neighbour.c | 1882 |
1 files changed, 1093 insertions, 789 deletions
diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 19b8e003f15..ef31fef25e5 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -15,6 +15,9 @@ * Harald Welte Add neighbour cache statistics like rtstat */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/slab.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/module.h> @@ -35,23 +38,16 @@ #include <linux/random.h> #include <linux/string.h> #include <linux/log2.h> +#include <linux/inetdevice.h> +#include <net/addrconf.h> +#define DEBUG #define NEIGH_DEBUG 1 - -#define NEIGH_PRINTK(x...) printk(x) -#define NEIGH_NOPRINTK(x...) do { ; } while(0) -#define NEIGH_PRINTK0 NEIGH_PRINTK -#define NEIGH_PRINTK1 NEIGH_NOPRINTK -#define NEIGH_PRINTK2 NEIGH_NOPRINTK - -#if NEIGH_DEBUG >= 1 -#undef NEIGH_PRINTK1 -#define NEIGH_PRINTK1 NEIGH_PRINTK -#endif -#if NEIGH_DEBUG >= 2 -#undef NEIGH_PRINTK2 -#define NEIGH_PRINTK2 NEIGH_PRINTK -#endif +#define neigh_dbg(level, fmt, ...) \ +do { \ + if (level <= NEIGH_DEBUG) \ + pr_debug(fmt, ##__VA_ARGS__); \ +} while (0) #define PNEIGH_HASHMASK 0xF @@ -98,7 +94,7 @@ static const struct file_operations neigh_stat_seq_fops; static DEFINE_RWLOCK(neigh_tbl_lock); -static int neigh_blackhole(struct sk_buff *skb) +static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb) { kfree_skb(skb); return -ENETDOWN; @@ -121,23 +117,29 @@ static void neigh_cleanup_and_release(struct neighbour *neigh) unsigned long neigh_rand_reach_time(unsigned long base) { - return (base ? (net_random() % base) + (base >> 1) : 0); + return base ? (prandom_u32() % base) + (base >> 1) : 0; } +EXPORT_SYMBOL(neigh_rand_reach_time); static int neigh_forced_gc(struct neigh_table *tbl) { int shrunk = 0; int i; + struct neigh_hash_table *nht; NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs); write_lock_bh(&tbl->lock); - for (i = 0; i <= tbl->hash_mask; i++) { - struct neighbour *n, **np; + nht = rcu_dereference_protected(tbl->nht, + lockdep_is_held(&tbl->lock)); + for (i = 0; i < (1 << nht->hash_shift); i++) { + struct neighbour *n; + struct neighbour __rcu **np; - np = &tbl->hash_buckets[i]; - while ((n = *np) != NULL) { + np = &nht->hash_buckets[i]; + while ((n = rcu_dereference_protected(*np, + lockdep_is_held(&tbl->lock))) != NULL) { /* Neighbour record may be discarded if: * - nobody refers to it. * - it is not permanent @@ -145,7 +147,9 @@ static int neigh_forced_gc(struct neigh_table *tbl) write_lock(&n->lock); if (atomic_read(&n->refcnt) == 1 && !(n->nud_state & NUD_PERMANENT)) { - *np = n->next; + rcu_assign_pointer(*np, + rcu_dereference_protected(n->next, + lockdep_is_held(&tbl->lock))); n->dead = 1; shrunk = 1; write_unlock(&n->lock); @@ -197,16 +201,24 @@ static void pneigh_queue_purge(struct sk_buff_head *list) static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev) { int i; + struct neigh_hash_table *nht; - for (i = 0; i <= tbl->hash_mask; i++) { - struct neighbour *n, **np = &tbl->hash_buckets[i]; + nht = rcu_dereference_protected(tbl->nht, + lockdep_is_held(&tbl->lock)); - while ((n = *np) != NULL) { + for (i = 0; i < (1 << nht->hash_shift); i++) { + struct neighbour *n; + struct neighbour __rcu **np = &nht->hash_buckets[i]; + + while ((n = rcu_dereference_protected(*np, + lockdep_is_held(&tbl->lock))) != NULL) { if (dev && n->dev != dev) { np = &n->next; continue; } - *np = n->next; + rcu_assign_pointer(*np, + rcu_dereference_protected(n->next, + lockdep_is_held(&tbl->lock))); write_lock(&n->lock); neigh_del_timer(n); n->dead = 1; @@ -221,13 +233,14 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev) we must kill timers etc. and move it to safe state. */ - skb_queue_purge(&n->arp_queue); + __skb_queue_purge(&n->arp_queue); + n->arp_queue_len_bytes = 0; n->output = neigh_blackhole; if (n->nud_state & NUD_VALID) n->nud_state = NUD_NOARP; else n->nud_state = NUD_NONE; - NEIGH_PRINTK2("neigh %p is stray.\n", n); + neigh_dbg(2, "neigh %p is stray\n", n); } write_unlock(&n->lock); neigh_cleanup_and_release(n); @@ -241,6 +254,7 @@ void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev) neigh_flush_dev(tbl, dev); write_unlock_bh(&tbl->lock); } +EXPORT_SYMBOL(neigh_changeaddr); int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) { @@ -253,8 +267,9 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) pneigh_queue_purge(&tbl->proxy_queue); return 0; } +EXPORT_SYMBOL(neigh_ifdown); -static struct neighbour *neigh_alloc(struct neigh_table *tbl) +static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev) { struct neighbour *n = NULL; unsigned long now = jiffies; @@ -269,15 +284,17 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl) goto out_entries; } - n = kmem_cache_zalloc(tbl->kmem_cachep, GFP_ATOMIC); + n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC); if (!n) goto out_entries; - skb_queue_head_init(&n->arp_queue); + __skb_queue_head_init(&n->arp_queue); rwlock_init(&n->lock); + seqlock_init(&n->ha_lock); n->updated = n->used = now; n->nud_state = NUD_NONE; n->output = neigh_blackhole; + seqlock_init(&n->hh.hh_lock); n->parms = neigh_parms_clone(&tbl->parms); setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n); @@ -293,64 +310,93 @@ out_entries: goto out; } -static struct neighbour **neigh_hash_alloc(unsigned int entries) +static void neigh_get_hash_rnd(u32 *x) { - unsigned long size = entries * sizeof(struct neighbour *); - struct neighbour **ret; + get_random_bytes(x, sizeof(*x)); + *x |= 1; +} - if (size <= PAGE_SIZE) { - ret = kzalloc(size, GFP_ATOMIC); - } else { - ret = (struct neighbour **) - __get_free_pages(GFP_ATOMIC|__GFP_ZERO, get_order(size)); +static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) +{ + size_t size = (1 << shift) * sizeof(struct neighbour *); + struct neigh_hash_table *ret; + struct neighbour __rcu **buckets; + int i; + + ret = kmalloc(sizeof(*ret), GFP_ATOMIC); + if (!ret) + return NULL; + if (size <= PAGE_SIZE) + buckets = kzalloc(size, GFP_ATOMIC); + else + buckets = (struct neighbour __rcu **) + __get_free_pages(GFP_ATOMIC | __GFP_ZERO, + get_order(size)); + if (!buckets) { + kfree(ret); + return NULL; } + ret->hash_buckets = buckets; + ret->hash_shift = shift; + for (i = 0; i < NEIGH_NUM_HASH_RND; i++) + neigh_get_hash_rnd(&ret->hash_rnd[i]); return ret; } -static void neigh_hash_free(struct neighbour **hash, unsigned int entries) +static void neigh_hash_free_rcu(struct rcu_head *head) { - unsigned long size = entries * sizeof(struct neighbour *); + struct neigh_hash_table *nht = container_of(head, + struct neigh_hash_table, + rcu); + size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *); + struct neighbour __rcu **buckets = nht->hash_buckets; if (size <= PAGE_SIZE) - kfree(hash); + kfree(buckets); else - free_pages((unsigned long)hash, get_order(size)); + free_pages((unsigned long)buckets, get_order(size)); + kfree(nht); } -static void neigh_hash_grow(struct neigh_table *tbl, unsigned long new_entries) +static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl, + unsigned long new_shift) { - struct neighbour **new_hash, **old_hash; - unsigned int i, new_hash_mask, old_entries; + unsigned int i, hash; + struct neigh_hash_table *new_nht, *old_nht; NEIGH_CACHE_STAT_INC(tbl, hash_grows); - BUG_ON(!is_power_of_2(new_entries)); - new_hash = neigh_hash_alloc(new_entries); - if (!new_hash) - return; + old_nht = rcu_dereference_protected(tbl->nht, + lockdep_is_held(&tbl->lock)); + new_nht = neigh_hash_alloc(new_shift); + if (!new_nht) + return old_nht; - old_entries = tbl->hash_mask + 1; - new_hash_mask = new_entries - 1; - old_hash = tbl->hash_buckets; - - get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd)); - for (i = 0; i < old_entries; i++) { + for (i = 0; i < (1 << old_nht->hash_shift); i++) { struct neighbour *n, *next; - for (n = old_hash[i]; n; n = next) { - unsigned int hash_val = tbl->hash(n->primary_key, n->dev); - - hash_val &= new_hash_mask; - next = n->next; - - n->next = new_hash[hash_val]; - new_hash[hash_val] = n; + for (n = rcu_dereference_protected(old_nht->hash_buckets[i], + lockdep_is_held(&tbl->lock)); + n != NULL; + n = next) { + hash = tbl->hash(n->primary_key, n->dev, + new_nht->hash_rnd); + + hash >>= (32 - new_nht->hash_shift); + next = rcu_dereference_protected(n->next, + lockdep_is_held(&tbl->lock)); + + rcu_assign_pointer(n->next, + rcu_dereference_protected( + new_nht->hash_buckets[hash], + lockdep_is_held(&tbl->lock))); + rcu_assign_pointer(new_nht->hash_buckets[hash], n); } } - tbl->hash_buckets = new_hash; - tbl->hash_mask = new_hash_mask; - neigh_hash_free(old_hash, old_entries); + rcu_assign_pointer(tbl->nht, new_nht); + call_rcu(&old_nht->rcu, neigh_hash_free_rcu); + return new_nht; } struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, @@ -359,21 +405,29 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, struct neighbour *n; int key_len = tbl->key_len; u32 hash_val; + struct neigh_hash_table *nht; NEIGH_CACHE_STAT_INC(tbl, lookups); - read_lock_bh(&tbl->lock); - hash_val = tbl->hash(pkey, dev); - for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) { + rcu_read_lock_bh(); + nht = rcu_dereference_bh(tbl->nht); + hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift); + + for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); + n != NULL; + n = rcu_dereference_bh(n->next)) { if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) { - neigh_hold(n); + if (!atomic_inc_not_zero(&n->refcnt)) + n = NULL; NEIGH_CACHE_STAT_INC(tbl, hits); break; } } - read_unlock_bh(&tbl->lock); + + rcu_read_unlock_bh(); return n; } +EXPORT_SYMBOL(neigh_lookup); struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, const void *pkey) @@ -381,30 +435,39 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, struct neighbour *n; int key_len = tbl->key_len; u32 hash_val; + struct neigh_hash_table *nht; NEIGH_CACHE_STAT_INC(tbl, lookups); - read_lock_bh(&tbl->lock); - hash_val = tbl->hash(pkey, NULL); - for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) { + rcu_read_lock_bh(); + nht = rcu_dereference_bh(tbl->nht); + hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift); + + for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); + n != NULL; + n = rcu_dereference_bh(n->next)) { if (!memcmp(n->primary_key, pkey, key_len) && - (net == n->dev->nd_net)) { - neigh_hold(n); + net_eq(dev_net(n->dev), net)) { + if (!atomic_inc_not_zero(&n->refcnt)) + n = NULL; NEIGH_CACHE_STAT_INC(tbl, hits); break; } } - read_unlock_bh(&tbl->lock); + + rcu_read_unlock_bh(); return n; } +EXPORT_SYMBOL(neigh_lookup_nodev); -struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, - struct net_device *dev) +struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, + struct net_device *dev, bool want_ref) { u32 hash_val; int key_len = tbl->key_len; int error; - struct neighbour *n1, *rc, *n = neigh_alloc(tbl); + struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev); + struct neigh_hash_table *nht; if (!n) { rc = ERR_PTR(-ENOBUFS); @@ -421,6 +484,14 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, goto out_neigh_release; } + if (dev->netdev_ops->ndo_neigh_construct) { + error = dev->netdev_ops->ndo_neigh_construct(n); + if (error < 0) { + rc = ERR_PTR(error); + goto out_neigh_release; + } + } + /* Device specific setup. */ if (n->parms->neigh_setup && (error = n->parms->neigh_setup(n)) < 0) { @@ -428,34 +499,44 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, goto out_neigh_release; } - n->confirmed = jiffies - (n->parms->base_reachable_time << 1); + n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1); write_lock_bh(&tbl->lock); + nht = rcu_dereference_protected(tbl->nht, + lockdep_is_held(&tbl->lock)); - if (atomic_read(&tbl->entries) > (tbl->hash_mask + 1)) - neigh_hash_grow(tbl, (tbl->hash_mask + 1) << 1); + if (atomic_read(&tbl->entries) > (1 << nht->hash_shift)) + nht = neigh_hash_grow(tbl, nht->hash_shift + 1); - hash_val = tbl->hash(pkey, dev) & tbl->hash_mask; + hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift); if (n->parms->dead) { rc = ERR_PTR(-EINVAL); goto out_tbl_unlock; } - for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) { + for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val], + lockdep_is_held(&tbl->lock)); + n1 != NULL; + n1 = rcu_dereference_protected(n1->next, + lockdep_is_held(&tbl->lock))) { if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) { - neigh_hold(n1); + if (want_ref) + neigh_hold(n1); rc = n1; goto out_tbl_unlock; } } - n->next = tbl->hash_buckets[hash_val]; - tbl->hash_buckets[hash_val] = n; n->dead = 0; - neigh_hold(n); + if (want_ref) + neigh_hold(n); + rcu_assign_pointer(n->next, + rcu_dereference_protected(nht->hash_buckets[hash_val], + lockdep_is_held(&tbl->lock))); + rcu_assign_pointer(nht->hash_buckets[hash_val], n); write_unlock_bh(&tbl->lock); - NEIGH_PRINTK2("neigh %p is created.\n", n); + neigh_dbg(2, "neigh %p is created\n", n); rc = n; out: return rc; @@ -465,28 +546,44 @@ out_neigh_release: neigh_release(n); goto out; } +EXPORT_SYMBOL(__neigh_create); -struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl, - struct net *net, const void *pkey, struct net_device *dev) +static u32 pneigh_hash(const void *pkey, int key_len) { - struct pneigh_entry *n; - int key_len = tbl->key_len; u32 hash_val = *(u32 *)(pkey + key_len - 4); - hash_val ^= (hash_val >> 16); hash_val ^= hash_val >> 8; hash_val ^= hash_val >> 4; hash_val &= PNEIGH_HASHMASK; + return hash_val; +} - for (n = tbl->phash_buckets[hash_val]; n; n = n->next) { +static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n, + struct net *net, + const void *pkey, + int key_len, + struct net_device *dev) +{ + while (n) { if (!memcmp(n->key, pkey, key_len) && - (n->net == net) && + net_eq(pneigh_net(n), net) && (n->dev == dev || !n->dev)) - break; + return n; + n = n->next; } + return NULL; +} - return n; +struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl, + struct net *net, const void *pkey, struct net_device *dev) +{ + int key_len = tbl->key_len; + u32 hash_val = pneigh_hash(pkey, key_len); + + return __pneigh_lookup_1(tbl->phash_buckets[hash_val], + net, pkey, key_len, dev); } +EXPORT_SYMBOL_GPL(__pneigh_lookup); struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, struct net *net, const void *pkey, @@ -494,26 +591,14 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, { struct pneigh_entry *n; int key_len = tbl->key_len; - u32 hash_val = *(u32 *)(pkey + key_len - 4); - - hash_val ^= (hash_val >> 16); - hash_val ^= hash_val >> 8; - hash_val ^= hash_val >> 4; - hash_val &= PNEIGH_HASHMASK; + u32 hash_val = pneigh_hash(pkey, key_len); read_lock_bh(&tbl->lock); - - for (n = tbl->phash_buckets[hash_val]; n; n = n->next) { - if (!memcmp(n->key, pkey, key_len) && - (n->net == net) && - (n->dev == dev || !n->dev)) { - read_unlock_bh(&tbl->lock); - goto out; - } - } + n = __pneigh_lookup_1(tbl->phash_buckets[hash_val], + net, pkey, key_len, dev); read_unlock_bh(&tbl->lock); - n = NULL; - if (!creat) + + if (n || !creat) goto out; ASSERT_RTNL(); @@ -522,7 +607,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, if (!n) goto out; - n->net = hold_net(net); + write_pnet(&n->net, hold_net(net)); memcpy(n->key, pkey, key_len); n->dev = dev; if (dev) @@ -544,6 +629,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, out: return n; } +EXPORT_SYMBOL(pneigh_lookup); int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, @@ -551,25 +637,20 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, { struct pneigh_entry *n, **np; int key_len = tbl->key_len; - u32 hash_val = *(u32 *)(pkey + key_len - 4); - - hash_val ^= (hash_val >> 16); - hash_val ^= hash_val >> 8; - hash_val ^= hash_val >> 4; - hash_val &= PNEIGH_HASHMASK; + u32 hash_val = pneigh_hash(pkey, key_len); write_lock_bh(&tbl->lock); for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL; np = &n->next) { if (!memcmp(n->key, pkey, key_len) && n->dev == dev && - (n->net == net)) { + net_eq(pneigh_net(n), net)) { *np = n->next; write_unlock_bh(&tbl->lock); if (tbl->pdestructor) tbl->pdestructor(n); if (n->dev) dev_put(n->dev); - release_net(n->net); + release_net(pneigh_net(n)); kfree(n); return 0; } @@ -592,7 +673,7 @@ static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev) tbl->pdestructor(n); if (n->dev) dev_put(n->dev); - release_net(n->net); + release_net(pneigh_net(n)); kfree(n); continue; } @@ -616,41 +697,36 @@ static inline void neigh_parms_put(struct neigh_parms *parms) */ void neigh_destroy(struct neighbour *neigh) { - struct hh_cache *hh; + struct net_device *dev = neigh->dev; NEIGH_CACHE_STAT_INC(neigh->tbl, destroys); if (!neigh->dead) { - printk(KERN_WARNING - "Destroying alive neighbour %p\n", neigh); + pr_warn("Destroying alive neighbour %p\n", neigh); dump_stack(); return; } if (neigh_del_timer(neigh)) - printk(KERN_WARNING "Impossible event.\n"); - - while ((hh = neigh->hh) != NULL) { - neigh->hh = hh->hh_next; - hh->hh_next = NULL; + pr_warn("Impossible event\n"); - write_seqlock_bh(&hh->hh_lock); - hh->hh_output = neigh_blackhole; - write_sequnlock_bh(&hh->hh_lock); - if (atomic_dec_and_test(&hh->hh_refcnt)) - kfree(hh); - } + write_lock_bh(&neigh->lock); + __skb_queue_purge(&neigh->arp_queue); + write_unlock_bh(&neigh->lock); + neigh->arp_queue_len_bytes = 0; - skb_queue_purge(&neigh->arp_queue); + if (dev->netdev_ops->ndo_neigh_destroy) + dev->netdev_ops->ndo_neigh_destroy(neigh); - dev_put(neigh->dev); + dev_put(dev); neigh_parms_put(neigh->parms); - NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh); + neigh_dbg(2, "neigh %p is destroyed\n", neigh); atomic_dec(&neigh->tbl->entries); - kmem_cache_free(neigh->tbl->kmem_cachep, neigh); + kfree_rcu(neigh, rcu); } +EXPORT_SYMBOL(neigh_destroy); /* Neighbour state is suspicious; disable fast path. @@ -659,14 +735,9 @@ void neigh_destroy(struct neighbour *neigh) */ static void neigh_suspect(struct neighbour *neigh) { - struct hh_cache *hh; - - NEIGH_PRINTK2("neigh %p is suspected.\n", neigh); + neigh_dbg(2, "neigh %p is suspected\n", neigh); neigh->output = neigh->ops->output; - - for (hh = neigh->hh; hh; hh = hh->hh_next) - hh->hh_output = neigh->ops->output; } /* Neighbour state is OK; @@ -676,93 +747,137 @@ static void neigh_suspect(struct neighbour *neigh) */ static void neigh_connect(struct neighbour *neigh) { - struct hh_cache *hh; - - NEIGH_PRINTK2("neigh %p is connected.\n", neigh); + neigh_dbg(2, "neigh %p is connected\n", neigh); neigh->output = neigh->ops->connected_output; - - for (hh = neigh->hh; hh; hh = hh->hh_next) - hh->hh_output = neigh->ops->hh_output; } -static void neigh_periodic_timer(unsigned long arg) +static void neigh_periodic_work(struct work_struct *work) { - struct neigh_table *tbl = (struct neigh_table *)arg; - struct neighbour *n, **np; - unsigned long expire, now = jiffies; + struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work); + struct neighbour *n; + struct neighbour __rcu **np; + unsigned int i; + struct neigh_hash_table *nht; NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs); - write_lock(&tbl->lock); + write_lock_bh(&tbl->lock); + nht = rcu_dereference_protected(tbl->nht, + lockdep_is_held(&tbl->lock)); /* * periodically recompute ReachableTime from random function */ - if (time_after(now, tbl->last_rand + 300 * HZ)) { + if (time_after(jiffies, tbl->last_rand + 300 * HZ)) { struct neigh_parms *p; - tbl->last_rand = now; + tbl->last_rand = jiffies; for (p = &tbl->parms; p; p = p->next) p->reachable_time = - neigh_rand_reach_time(p->base_reachable_time); + neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); } - np = &tbl->hash_buckets[tbl->hash_chain_gc]; - tbl->hash_chain_gc = ((tbl->hash_chain_gc + 1) & tbl->hash_mask); + if (atomic_read(&tbl->entries) < tbl->gc_thresh1) + goto out; - while ((n = *np) != NULL) { - unsigned int state; + for (i = 0 ; i < (1 << nht->hash_shift); i++) { + np = &nht->hash_buckets[i]; - write_lock(&n->lock); + while ((n = rcu_dereference_protected(*np, + lockdep_is_held(&tbl->lock))) != NULL) { + unsigned int state; - state = n->nud_state; - if (state & (NUD_PERMANENT | NUD_IN_TIMER)) { - write_unlock(&n->lock); - goto next_elt; - } + write_lock(&n->lock); + + state = n->nud_state; + if (state & (NUD_PERMANENT | NUD_IN_TIMER)) { + write_unlock(&n->lock); + goto next_elt; + } - if (time_before(n->used, n->confirmed)) - n->used = n->confirmed; + if (time_before(n->used, n->confirmed)) + n->used = n->confirmed; - if (atomic_read(&n->refcnt) == 1 && - (state == NUD_FAILED || - time_after(now, n->used + n->parms->gc_staletime))) { - *np = n->next; - n->dead = 1; + if (atomic_read(&n->refcnt) == 1 && + (state == NUD_FAILED || + time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) { + *np = n->next; + n->dead = 1; + write_unlock(&n->lock); + neigh_cleanup_and_release(n); + continue; + } write_unlock(&n->lock); - neigh_cleanup_and_release(n); - continue; - } - write_unlock(&n->lock); next_elt: - np = &n->next; + np = &n->next; + } + /* + * It's fine to release lock here, even if hash table + * grows while we are preempted. + */ + write_unlock_bh(&tbl->lock); + cond_resched(); + write_lock_bh(&tbl->lock); + nht = rcu_dereference_protected(tbl->nht, + lockdep_is_held(&tbl->lock)); } - - /* Cycle through all hash buckets every base_reachable_time/2 ticks. - * ARP entry timeouts range from 1/2 base_reachable_time to 3/2 - * base_reachable_time. +out: + /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks. + * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2 + * BASE_REACHABLE_TIME. */ - expire = tbl->parms.base_reachable_time >> 1; - expire /= (tbl->hash_mask + 1); - if (!expire) - expire = 1; - - if (expire>HZ) - mod_timer(&tbl->gc_timer, round_jiffies(now + expire)); - else - mod_timer(&tbl->gc_timer, now + expire); - - write_unlock(&tbl->lock); + queue_delayed_work(system_power_efficient_wq, &tbl->gc_work, + NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1); + write_unlock_bh(&tbl->lock); } static __inline__ int neigh_max_probes(struct neighbour *n) { struct neigh_parms *p = n->parms; - return (n->nud_state & NUD_PROBE ? - p->ucast_probes : - p->ucast_probes + p->app_probes + p->mcast_probes); + int max_probes = NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES); + if (!(n->nud_state & NUD_PROBE)) + max_probes += NEIGH_VAR(p, MCAST_PROBES); + return max_probes; +} + +static void neigh_invalidate(struct neighbour *neigh) + __releases(neigh->lock) + __acquires(neigh->lock) +{ + struct sk_buff *skb; + + NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed); + neigh_dbg(2, "neigh %p is failed\n", neigh); + neigh->updated = jiffies; + + /* It is very thin place. report_unreachable is very complicated + routine. Particularly, it can hit the same neighbour entry! + + So that, we try to be accurate and avoid dead loop. --ANK + */ + while (neigh->nud_state == NUD_FAILED && + (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { + write_unlock(&neigh->lock); + neigh->ops->error_report(neigh, skb); + write_lock(&neigh->lock); + } + __skb_queue_purge(&neigh->arp_queue); + neigh->arp_queue_len_bytes = 0; +} + +static void neigh_probe(struct neighbour *neigh) + __releases(neigh->lock) +{ + struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue); + /* keep skb alive even if arp_queue overflows */ + if (skb) + skb = skb_copy(skb, GFP_ATOMIC); + write_unlock(&neigh->lock); + neigh->ops->solicit(neigh, skb); + atomic_inc(&neigh->probes); + kfree_skb(skb); } /* Called when a timer expires for a neighbour entry. */ @@ -771,7 +886,7 @@ static void neigh_timer_handler(unsigned long arg) { unsigned long now, next; struct neighbour *neigh = (struct neighbour *)arg; - unsigned state; + unsigned int state; int notify = 0; write_lock(&neigh->lock); @@ -780,27 +895,24 @@ static void neigh_timer_handler(unsigned long arg) now = jiffies; next = now + HZ; - if (!(state & NUD_IN_TIMER)) { -#ifndef CONFIG_SMP - printk(KERN_WARNING "neigh: timer & !nud_in_timer\n"); -#endif + if (!(state & NUD_IN_TIMER)) goto out; - } if (state & NUD_REACHABLE) { if (time_before_eq(now, neigh->confirmed + neigh->parms->reachable_time)) { - NEIGH_PRINTK2("neigh %p is still alive.\n", neigh); + neigh_dbg(2, "neigh %p is still alive\n", neigh); next = neigh->confirmed + neigh->parms->reachable_time; } else if (time_before_eq(now, - neigh->used + neigh->parms->delay_probe_time)) { - NEIGH_PRINTK2("neigh %p is delayed.\n", neigh); + neigh->used + + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { + neigh_dbg(2, "neigh %p is delayed\n", neigh); neigh->nud_state = NUD_DELAY; neigh->updated = jiffies; neigh_suspect(neigh); - next = now + neigh->parms->delay_probe_time; + next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME); } else { - NEIGH_PRINTK2("neigh %p is suspected.\n", neigh); + neigh_dbg(2, "neigh %p is suspected\n", neigh); neigh->nud_state = NUD_STALE; neigh->updated = jiffies; neigh_suspect(neigh); @@ -808,47 +920,32 @@ static void neigh_timer_handler(unsigned long arg) } } else if (state & NUD_DELAY) { if (time_before_eq(now, - neigh->confirmed + neigh->parms->delay_probe_time)) { - NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh); + neigh->confirmed + + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { + neigh_dbg(2, "neigh %p is now reachable\n", neigh); neigh->nud_state = NUD_REACHABLE; neigh->updated = jiffies; neigh_connect(neigh); notify = 1; next = neigh->confirmed + neigh->parms->reachable_time; } else { - NEIGH_PRINTK2("neigh %p is probed.\n", neigh); + neigh_dbg(2, "neigh %p is probed\n", neigh); neigh->nud_state = NUD_PROBE; neigh->updated = jiffies; atomic_set(&neigh->probes, 0); - next = now + neigh->parms->retrans_time; + next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME); } } else { /* NUD_PROBE|NUD_INCOMPLETE */ - next = now + neigh->parms->retrans_time; + next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME); } if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) && atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) { - struct sk_buff *skb; - neigh->nud_state = NUD_FAILED; - neigh->updated = jiffies; notify = 1; - NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed); - NEIGH_PRINTK2("neigh %p is failed.\n", neigh); - - /* It is very thin place. report_unreachable is very complicated - routine. Particularly, it can hit the same neighbour entry! - - So that, we try to be accurate and avoid dead loop. --ANK - */ - while (neigh->nud_state == NUD_FAILED && - (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { - write_unlock(&neigh->lock); - neigh->ops->error_report(neigh, skb); - write_lock(&neigh->lock); - } - skb_queue_purge(&neigh->arp_queue); + neigh_invalidate(neigh); + goto out; } if (neigh->nud_state & NUD_IN_TIMER) { @@ -858,15 +955,7 @@ static void neigh_timer_handler(unsigned long arg) neigh_hold(neigh); } if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) { - struct sk_buff *skb = skb_peek(&neigh->arp_queue); - /* keep skb alive even if arp_queue overflows */ - if (skb) - skb = skb_copy(skb, GFP_ATOMIC); - write_unlock(&neigh->lock); - neigh->ops->solicit(neigh, skb); - atomic_inc(&neigh->probes); - if (skb) - kfree_skb(skb); + neigh_probe(neigh); } else { out: write_unlock(&neigh->lock); @@ -881,7 +970,7 @@ out: int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) { int rc; - unsigned long now; + bool immediate_probe = false; write_lock_bh(&neigh->lock); @@ -889,57 +978,76 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)) goto out_unlock_bh; - now = jiffies; - if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { - if (neigh->parms->mcast_probes + neigh->parms->app_probes) { - atomic_set(&neigh->probes, neigh->parms->ucast_probes); + if (NEIGH_VAR(neigh->parms, MCAST_PROBES) + + NEIGH_VAR(neigh->parms, APP_PROBES)) { + unsigned long next, now = jiffies; + + atomic_set(&neigh->probes, + NEIGH_VAR(neigh->parms, UCAST_PROBES)); neigh->nud_state = NUD_INCOMPLETE; - neigh->updated = jiffies; - neigh_add_timer(neigh, now + 1); + neigh->updated = now; + next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), + HZ/2); + neigh_add_timer(neigh, next); + immediate_probe = true; } else { neigh->nud_state = NUD_FAILED; neigh->updated = jiffies; write_unlock_bh(&neigh->lock); - if (skb) - kfree_skb(skb); + kfree_skb(skb); return 1; } } else if (neigh->nud_state & NUD_STALE) { - NEIGH_PRINTK2("neigh %p is delayed.\n", neigh); + neigh_dbg(2, "neigh %p is delayed\n", neigh); neigh->nud_state = NUD_DELAY; neigh->updated = jiffies; - neigh_add_timer(neigh, - jiffies + neigh->parms->delay_probe_time); + neigh_add_timer(neigh, jiffies + + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME)); } if (neigh->nud_state == NUD_INCOMPLETE) { if (skb) { - if (skb_queue_len(&neigh->arp_queue) >= - neigh->parms->queue_len) { + while (neigh->arp_queue_len_bytes + skb->truesize > + NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) { struct sk_buff *buff; - buff = neigh->arp_queue.next; - __skb_unlink(buff, &neigh->arp_queue); + + buff = __skb_dequeue(&neigh->arp_queue); + if (!buff) + break; + neigh->arp_queue_len_bytes -= buff->truesize; kfree_skb(buff); + NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards); } + skb_dst_force(skb); __skb_queue_tail(&neigh->arp_queue, skb); + neigh->arp_queue_len_bytes += skb->truesize; } rc = 1; } out_unlock_bh: - write_unlock_bh(&neigh->lock); + if (immediate_probe) + neigh_probe(neigh); + else + write_unlock(&neigh->lock); + local_bh_enable(); return rc; } +EXPORT_SYMBOL(__neigh_event_send); static void neigh_update_hhs(struct neighbour *neigh) { struct hh_cache *hh; void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) - = neigh->dev->header_ops->cache_update; + = NULL; + + if (neigh->dev->header_ops) + update = neigh->dev->header_ops->cache_update; if (update) { - for (hh = neigh->hh; hh; hh = hh->hh_next) { + hh = &neigh->hh; + if (hh->hh_len) { write_seqlock_bh(&hh->hh_lock); update(hh, neigh->dev, neigh->ha); write_sequnlock_bh(&hh->hh_lock); @@ -996,6 +1104,11 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, neigh->nud_state = new; err = 0; notify = old & NUD_VALID; + if ((old & (NUD_INCOMPLETE | NUD_PROBE)) && + (new & NUD_FAILED)) { + neigh_invalidate(neigh); + notify = 1; + } goto out; } @@ -1057,14 +1170,17 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, neigh->parms->reachable_time : 0))); neigh->nud_state = new; + notify = 1; } if (lladdr != neigh->ha) { + write_seqlock(&neigh->ha_lock); memcpy(&neigh->ha, lladdr, dev->addr_len); + write_sequnlock(&neigh->ha_lock); neigh_update_hhs(neigh); if (!(new & NUD_CONNECTED)) neigh->confirmed = jiffies - - (neigh->parms->base_reachable_time << 1); + (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1); notify = 1; } if (new == old) @@ -1080,15 +1196,34 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, while (neigh->nud_state & NUD_VALID && (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { - struct neighbour *n1 = neigh; + struct dst_entry *dst = skb_dst(skb); + struct neighbour *n2, *n1 = neigh; write_unlock_bh(&neigh->lock); - /* On shaper/eql skb->dst->neighbour != neigh :( */ - if (skb->dst && skb->dst->neighbour) - n1 = skb->dst->neighbour; - n1->output(skb); + + rcu_read_lock(); + + /* Why not just use 'neigh' as-is? The problem is that + * things such as shaper, eql, and sch_teql can end up + * using alternative, different, neigh objects to output + * the packet in the output path. So what we need to do + * here is re-lookup the top-level neigh in the path so + * we can reinject the packet there. + */ + n2 = NULL; + if (dst) { + n2 = dst_neigh_lookup_skb(dst, skb); + if (n2) + n1 = n2; + } + n1->output(n1, skb); + if (n2) + neigh_release(n2); + rcu_read_unlock(); + write_lock_bh(&neigh->lock); } - skb_queue_purge(&neigh->arp_queue); + __skb_queue_purge(&neigh->arp_queue); + neigh->arp_queue_len_bytes = 0; } out: if (update_isrouter) { @@ -1103,6 +1238,22 @@ out: return err; } +EXPORT_SYMBOL(neigh_update); + +/* Update the neigh to listen temporarily for probe responses, even if it is + * in a NUD_FAILED state. The caller has to hold neigh->lock for writing. + */ +void __neigh_set_probe_once(struct neighbour *neigh) +{ + neigh->updated = jiffies; + if (!(neigh->nud_state & NUD_FAILED)) + return; + neigh->nud_state = NUD_INCOMPLETE; + atomic_set(&neigh->probes, neigh_max_probes(neigh)); + neigh_add_timer(neigh, + jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME)); +} +EXPORT_SYMBOL(__neigh_set_probe_once); struct neighbour *neigh_event_ns(struct neigh_table *tbl, u8 *lladdr, void *saddr, @@ -1115,48 +1266,32 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl, NEIGH_UPDATE_F_OVERRIDE); return neigh; } +EXPORT_SYMBOL(neigh_event_ns); -static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, - __be16 protocol) +/* called with read_lock_bh(&n->lock); */ +static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst) { - struct hh_cache *hh; struct net_device *dev = dst->dev; + __be16 prot = dst->ops->protocol; + struct hh_cache *hh = &n->hh; - for (hh = n->hh; hh; hh = hh->hh_next) - if (hh->hh_type == protocol) - break; + write_lock_bh(&n->lock); - if (!hh && (hh = kzalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) { - seqlock_init(&hh->hh_lock); - hh->hh_type = protocol; - atomic_set(&hh->hh_refcnt, 0); - hh->hh_next = NULL; + /* Only one thread can come in here and initialize the + * hh_cache entry. + */ + if (!hh->hh_len) + dev->header_ops->cache(n, hh, prot); - if (dev->header_ops->cache(n, hh)) { - kfree(hh); - hh = NULL; - } else { - atomic_inc(&hh->hh_refcnt); - hh->hh_next = n->hh; - n->hh = hh; - if (n->nud_state & NUD_CONNECTED) - hh->hh_output = n->ops->hh_output; - else - hh->hh_output = n->ops->output; - } - } - if (hh) { - atomic_inc(&hh->hh_refcnt); - dst->hh = hh; - } + write_unlock_bh(&n->lock); } /* This function can be used in contexts, where only old dev_queue_xmit - worked, f.e. if you want to override normal output path (eql, shaper), - but resolution is not made yet. + * worked, f.e. if you want to override normal output path (eql, shaper), + * but resolution is not made yet. */ -int neigh_compat_output(struct sk_buff *skb) +int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb) { struct net_device *dev = skb->dev; @@ -1164,104 +1299,108 @@ int neigh_compat_output(struct sk_buff *skb) if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL, skb->len) < 0 && - dev->header_ops->rebuild(skb)) + dev_rebuild_header(skb)) return 0; return dev_queue_xmit(skb); } +EXPORT_SYMBOL(neigh_compat_output); /* Slow and careful. */ -int neigh_resolve_output(struct sk_buff *skb) +int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb) { - struct dst_entry *dst = skb->dst; - struct neighbour *neigh; + struct dst_entry *dst = skb_dst(skb); int rc = 0; - if (!dst || !(neigh = dst->neighbour)) + if (!dst) goto discard; - __skb_pull(skb, skb_network_offset(skb)); - if (!neigh_event_send(neigh, skb)) { int err; struct net_device *dev = neigh->dev; - if (dev->header_ops->cache && !dst->hh) { - write_lock_bh(&neigh->lock); - if (!dst->hh) - neigh_hh_init(neigh, dst, dst->ops->protocol); - err = dev_hard_header(skb, dev, ntohs(skb->protocol), - neigh->ha, NULL, skb->len); - write_unlock_bh(&neigh->lock); - } else { - read_lock_bh(&neigh->lock); + unsigned int seq; + + if (dev->header_ops->cache && !neigh->hh.hh_len) + neigh_hh_init(neigh, dst); + + do { + __skb_pull(skb, skb_network_offset(skb)); + seq = read_seqbegin(&neigh->ha_lock); err = dev_hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len); - read_unlock_bh(&neigh->lock); - } + } while (read_seqretry(&neigh->ha_lock, seq)); + if (err >= 0) - rc = neigh->ops->queue_xmit(skb); + rc = dev_queue_xmit(skb); else goto out_kfree_skb; } out: return rc; discard: - NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n", - dst, dst ? dst->neighbour : NULL); + neigh_dbg(1, "%s: dst=%p neigh=%p\n", __func__, dst, neigh); out_kfree_skb: rc = -EINVAL; kfree_skb(skb); goto out; } +EXPORT_SYMBOL(neigh_resolve_output); /* As fast as possible without hh cache */ -int neigh_connected_output(struct sk_buff *skb) +int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb) { - int err; - struct dst_entry *dst = skb->dst; - struct neighbour *neigh = dst->neighbour; struct net_device *dev = neigh->dev; + unsigned int seq; + int err; - __skb_pull(skb, skb_network_offset(skb)); + do { + __skb_pull(skb, skb_network_offset(skb)); + seq = read_seqbegin(&neigh->ha_lock); + err = dev_hard_header(skb, dev, ntohs(skb->protocol), + neigh->ha, NULL, skb->len); + } while (read_seqretry(&neigh->ha_lock, seq)); - read_lock_bh(&neigh->lock); - err = dev_hard_header(skb, dev, ntohs(skb->protocol), - neigh->ha, NULL, skb->len); - read_unlock_bh(&neigh->lock); if (err >= 0) - err = neigh->ops->queue_xmit(skb); + err = dev_queue_xmit(skb); else { err = -EINVAL; kfree_skb(skb); } return err; } +EXPORT_SYMBOL(neigh_connected_output); + +int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb) +{ + return dev_queue_xmit(skb); +} +EXPORT_SYMBOL(neigh_direct_output); static void neigh_proxy_process(unsigned long arg) { struct neigh_table *tbl = (struct neigh_table *)arg; long sched_next = 0; unsigned long now = jiffies; - struct sk_buff *skb; + struct sk_buff *skb, *n; spin_lock(&tbl->proxy_queue.lock); - skb = tbl->proxy_queue.next; + skb_queue_walk_safe(&tbl->proxy_queue, skb, n) { + long tdif = NEIGH_CB(skb)->sched_next - now; - while (skb != (struct sk_buff *)&tbl->proxy_queue) { - struct sk_buff *back = skb; - long tdif = NEIGH_CB(back)->sched_next - now; - - skb = skb->next; if (tdif <= 0) { - struct net_device *dev = back->dev; - __skb_unlink(back, &tbl->proxy_queue); - if (tbl->proxy_redo && netif_running(dev)) - tbl->proxy_redo(back); - else - kfree_skb(back); + struct net_device *dev = skb->dev; + + __skb_unlink(skb, &tbl->proxy_queue); + if (tbl->proxy_redo && netif_running(dev)) { + rcu_read_lock(); + tbl->proxy_redo(skb); + rcu_read_unlock(); + } else { + kfree_skb(skb); + } dev_put(dev); } else if (!sched_next || tdif < sched_next) @@ -1277,9 +1416,11 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, struct sk_buff *skb) { unsigned long now = jiffies; - unsigned long sched_next = now + (net_random() % p->proxy_delay); - if (tbl->proxy_queue.qlen > p->proxy_qlen) { + unsigned long sched_next = now + (prandom_u32() % + NEIGH_VAR(p, PROXY_DELAY)); + + if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) { kfree_skb(skb); return; } @@ -1292,24 +1433,22 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, if (time_before(tbl->proxy_timer.expires, sched_next)) sched_next = tbl->proxy_timer.expires; } - dst_release(skb->dst); - skb->dst = NULL; + skb_dst_drop(skb); dev_hold(skb->dev); __skb_queue_tail(&tbl->proxy_queue, skb); mod_timer(&tbl->proxy_timer, sched_next); spin_unlock(&tbl->proxy_queue.lock); } +EXPORT_SYMBOL(pneigh_enqueue); -static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl, +static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl, struct net *net, int ifindex) { struct neigh_parms *p; for (p = &tbl->parms; p; p = p->next) { - if (p->net != net) - continue; - if ((p->dev && p->dev->ifindex == ifindex) || - (!p->dev && !ifindex)) + if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) || + (!p->dev && !ifindex && net_eq(net, &init_net))) return p; } @@ -1319,38 +1458,38 @@ static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl, struct neigh_parms *neigh_parms_alloc(struct net_device *dev, struct neigh_table *tbl) { - struct neigh_parms *p, *ref; - struct net *net; - - net = dev->nd_net; - ref = lookup_neigh_params(tbl, net, 0); - if (!ref) - return NULL; + struct neigh_parms *p; + struct net *net = dev_net(dev); + const struct net_device_ops *ops = dev->netdev_ops; - p = kmemdup(ref, sizeof(*p), GFP_KERNEL); + p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL); if (p) { p->tbl = tbl; atomic_set(&p->refcnt, 1); - INIT_RCU_HEAD(&p->rcu_head); p->reachable_time = - neigh_rand_reach_time(p->base_reachable_time); + neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); + dev_hold(dev); + p->dev = dev; + write_pnet(&p->net, hold_net(net)); + p->sysctl_table = NULL; - if (dev->neigh_setup && dev->neigh_setup(dev, p)) { + if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) { + release_net(net); + dev_put(dev); kfree(p); return NULL; } - dev_hold(dev); - p->dev = dev; - p->net = hold_net(net); - p->sysctl_table = NULL; write_lock_bh(&tbl->lock); p->next = tbl->parms.next; tbl->parms.next = p; write_unlock_bh(&tbl->lock); + + neigh_parms_data_state_cleanall(p); } return p; } +EXPORT_SYMBOL(neigh_parms_alloc); static void neigh_rcu_free_parms(struct rcu_head *head) { @@ -1379,61 +1518,56 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) } } write_unlock_bh(&tbl->lock); - NEIGH_PRINTK1("neigh_parms_release: not found\n"); + neigh_dbg(1, "%s: not found\n", __func__); } +EXPORT_SYMBOL(neigh_parms_release); static void neigh_parms_destroy(struct neigh_parms *parms) { - release_net(parms->net); + release_net(neigh_parms_net(parms)); kfree(parms); } static struct lock_class_key neigh_table_proxy_queue_class; -void neigh_table_init_no_netlink(struct neigh_table *tbl) +static void neigh_table_init_no_netlink(struct neigh_table *tbl) { unsigned long now = jiffies; unsigned long phsize; - tbl->parms.net = &init_net; + write_pnet(&tbl->parms.net, &init_net); atomic_set(&tbl->parms.refcnt, 1); - INIT_RCU_HEAD(&tbl->parms.rcu_head); tbl->parms.reachable_time = - neigh_rand_reach_time(tbl->parms.base_reachable_time); + neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME)); - if (!tbl->kmem_cachep) - tbl->kmem_cachep = - kmem_cache_create(tbl->id, tbl->entry_size, 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL); tbl->stats = alloc_percpu(struct neigh_statistics); if (!tbl->stats) panic("cannot create neighbour cache statistics"); #ifdef CONFIG_PROC_FS - tbl->pde = proc_create(tbl->id, 0, init_net.proc_net_stat, - &neigh_stat_seq_fops); - if (!tbl->pde) + if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat, + &neigh_stat_seq_fops, tbl)) panic("cannot create neighbour proc dir entry"); - tbl->pde->data = tbl; #endif - tbl->hash_mask = 1; - tbl->hash_buckets = neigh_hash_alloc(tbl->hash_mask + 1); + RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3)); phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *); tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL); - if (!tbl->hash_buckets || !tbl->phash_buckets) + if (!tbl->nht || !tbl->phash_buckets) panic("cannot allocate neighbour cache hashes"); - get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd)); + if (!tbl->entry_size) + tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) + + tbl->key_len, NEIGH_PRIV_ALIGN); + else + WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN); rwlock_init(&tbl->lock); - setup_timer(&tbl->gc_timer, neigh_periodic_timer, (unsigned long)tbl); - tbl->gc_timer.expires = now + 1; - add_timer(&tbl->gc_timer); - + INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work); + queue_delayed_work(system_power_efficient_wq, &tbl->gc_work, + tbl->parms.reachable_time); setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl); skb_queue_head_init_class(&tbl->proxy_queue, &neigh_table_proxy_queue_class); @@ -1457,23 +1591,24 @@ void neigh_table_init(struct neigh_table *tbl) write_unlock(&neigh_tbl_lock); if (unlikely(tmp)) { - printk(KERN_ERR "NEIGH: Registering multiple tables for " - "family %d\n", tbl->family); + pr_err("Registering multiple tables for family %d\n", + tbl->family); dump_stack(); } } +EXPORT_SYMBOL(neigh_table_init); int neigh_table_clear(struct neigh_table *tbl) { struct neigh_table **tp; /* It is not clean... Fix it to unload IPv6 module safely */ - del_timer_sync(&tbl->gc_timer); + cancel_delayed_work_sync(&tbl->gc_work); del_timer_sync(&tbl->proxy_timer); pneigh_queue_purge(&tbl->proxy_queue); neigh_ifdown(tbl, NULL); if (atomic_read(&tbl->entries)) - printk(KERN_CRIT "neighbour leakage\n"); + pr_crit("neighbour leakage\n"); write_lock(&neigh_tbl_lock); for (tp = &neigh_tables; *tp; tp = &(*tp)->next) { if (*tp == tbl) { @@ -1483,8 +1618,9 @@ int neigh_table_clear(struct neigh_table *tbl) } write_unlock(&neigh_tbl_lock); - neigh_hash_free(tbl->hash_buckets, tbl->hash_mask + 1); - tbl->hash_buckets = NULL; + call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu, + neigh_hash_free_rcu); + tbl->nht = NULL; kfree(tbl->phash_buckets); tbl->phash_buckets = NULL; @@ -1494,21 +1630,20 @@ int neigh_table_clear(struct neigh_table *tbl) free_percpu(tbl->stats); tbl->stats = NULL; - kmem_cache_destroy(tbl->kmem_cachep); - tbl->kmem_cachep = NULL; - return 0; } +EXPORT_SYMBOL(neigh_table_clear); -static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct ndmsg *ndm; struct nlattr *dst_attr; struct neigh_table *tbl; struct net_device *dev = NULL; int err = -EINVAL; + ASSERT_RTNL(); if (nlmsg_len(nlh) < sizeof(*ndm)) goto out; @@ -1518,7 +1653,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) ndm = nlmsg_data(nlh); if (ndm->ndm_ifindex) { - dev = dev_get_by_index(net, ndm->ndm_ifindex); + dev = __dev_get_by_index(net, ndm->ndm_ifindex); if (dev == NULL) { err = -ENODEV; goto out; @@ -1534,47 +1669,45 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) read_unlock(&neigh_tbl_lock); if (nla_len(dst_attr) < tbl->key_len) - goto out_dev_put; + goto out; if (ndm->ndm_flags & NTF_PROXY) { err = pneigh_delete(tbl, net, nla_data(dst_attr), dev); - goto out_dev_put; + goto out; } if (dev == NULL) - goto out_dev_put; + goto out; neigh = neigh_lookup(tbl, nla_data(dst_attr), dev); if (neigh == NULL) { err = -ENOENT; - goto out_dev_put; + goto out; } err = neigh_update(neigh, NULL, NUD_FAILED, NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN); neigh_release(neigh); - goto out_dev_put; + goto out; } read_unlock(&neigh_tbl_lock); err = -EAFNOSUPPORT; -out_dev_put: - if (dev) - dev_put(dev); out: return err; } -static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct ndmsg *ndm; struct nlattr *tb[NDA_MAX+1]; struct neigh_table *tbl; struct net_device *dev = NULL; int err; + ASSERT_RTNL(); err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); if (err < 0) goto out; @@ -1585,14 +1718,14 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) ndm = nlmsg_data(nlh); if (ndm->ndm_ifindex) { - dev = dev_get_by_index(net, ndm->ndm_ifindex); + dev = __dev_get_by_index(net, ndm->ndm_ifindex); if (dev == NULL) { err = -ENODEV; goto out; } if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) - goto out_dev_put; + goto out; } read_lock(&neigh_tbl_lock); @@ -1606,7 +1739,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) read_unlock(&neigh_tbl_lock); if (nla_len(tb[NDA_DST]) < tbl->key_len) - goto out_dev_put; + goto out; dst = nla_data(tb[NDA_DST]); lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL; @@ -1619,46 +1752,46 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) pn->flags = ndm->ndm_flags; err = 0; } - goto out_dev_put; + goto out; } if (dev == NULL) - goto out_dev_put; + goto out; neigh = neigh_lookup(tbl, dst, dev); if (neigh == NULL) { if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { err = -ENOENT; - goto out_dev_put; + goto out; } neigh = __neigh_lookup_errno(tbl, dst, dev); if (IS_ERR(neigh)) { err = PTR_ERR(neigh); - goto out_dev_put; + goto out; } } else { if (nlh->nlmsg_flags & NLM_F_EXCL) { err = -EEXIST; neigh_release(neigh); - goto out_dev_put; + goto out; } if (!(nlh->nlmsg_flags & NLM_F_REPLACE)) flags &= ~NEIGH_UPDATE_F_OVERRIDE; } - err = neigh_update(neigh, lladdr, ndm->ndm_state, flags); + if (ndm->ndm_flags & NTF_USE) { + neigh_event_send(neigh, NULL); + err = 0; + } else + err = neigh_update(neigh, lladdr, ndm->ndm_state, flags); neigh_release(neigh); - goto out_dev_put; + goto out; } read_unlock(&neigh_tbl_lock); err = -EAFNOSUPPORT; - -out_dev_put: - if (dev) - dev_put(dev); out: return err; } @@ -1671,29 +1804,41 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) if (nest == NULL) return -ENOBUFS; - if (parms->dev) - NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex); - - NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)); - NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len); - NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen); - NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes); - NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes); - NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes); - NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time); - NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME, - parms->base_reachable_time); - NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime); - NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time); - NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time); - NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay); - NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay); - NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime); - + if ((parms->dev && + nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) || + nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) || + nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, + NEIGH_VAR(parms, QUEUE_LEN_BYTES)) || + /* approximative value for deprecated QUEUE_LEN (in packets) */ + nla_put_u32(skb, NDTPA_QUEUE_LEN, + NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) || + nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) || + nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) || + nla_put_u32(skb, NDTPA_UCAST_PROBES, + NEIGH_VAR(parms, UCAST_PROBES)) || + nla_put_u32(skb, NDTPA_MCAST_PROBES, + NEIGH_VAR(parms, MCAST_PROBES)) || + nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) || + nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME, + NEIGH_VAR(parms, BASE_REACHABLE_TIME)) || + nla_put_msecs(skb, NDTPA_GC_STALETIME, + NEIGH_VAR(parms, GC_STALETIME)) || + nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME, + NEIGH_VAR(parms, DELAY_PROBE_TIME)) || + nla_put_msecs(skb, NDTPA_RETRANS_TIME, + NEIGH_VAR(parms, RETRANS_TIME)) || + nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, + NEIGH_VAR(parms, ANYCAST_DELAY)) || + nla_put_msecs(skb, NDTPA_PROXY_DELAY, + NEIGH_VAR(parms, PROXY_DELAY)) || + nla_put_msecs(skb, NDTPA_LOCKTIME, + NEIGH_VAR(parms, LOCKTIME))) + goto nla_put_failure; return nla_nest_end(skb, nest); nla_put_failure: - return nla_nest_cancel(skb, nest); + nla_nest_cancel(skb, nest); + return -EMSGSIZE; } static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, @@ -1713,30 +1858,34 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, ndtmsg->ndtm_pad1 = 0; ndtmsg->ndtm_pad2 = 0; - NLA_PUT_STRING(skb, NDTA_NAME, tbl->id); - NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval); - NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1); - NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2); - NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3); - + if (nla_put_string(skb, NDTA_NAME, tbl->id) || + nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) || + nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) || + nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) || + nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3)) + goto nla_put_failure; { unsigned long now = jiffies; unsigned int flush_delta = now - tbl->last_flush; unsigned int rand_delta = now - tbl->last_rand; - + struct neigh_hash_table *nht; struct ndt_config ndc = { .ndtc_key_len = tbl->key_len, .ndtc_entry_size = tbl->entry_size, .ndtc_entries = atomic_read(&tbl->entries), .ndtc_last_flush = jiffies_to_msecs(flush_delta), .ndtc_last_rand = jiffies_to_msecs(rand_delta), - .ndtc_hash_rnd = tbl->hash_rnd, - .ndtc_hash_mask = tbl->hash_mask, - .ndtc_hash_chain_gc = tbl->hash_chain_gc, .ndtc_proxy_qlen = tbl->proxy_queue.qlen, }; - NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc); + rcu_read_lock_bh(); + nht = rcu_dereference_bh(tbl->nht); + ndc.ndtc_hash_rnd = nht->hash_rnd[0]; + ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1); + rcu_read_unlock_bh(); + + if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc)) + goto nla_put_failure; } { @@ -1761,7 +1910,8 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, ndst.ndts_forced_gc_runs += st->forced_gc_runs; } - NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst); + if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst)) + goto nla_put_failure; } BUG_ON(tbl->parms.dev); @@ -1834,9 +1984,9 @@ static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = { [NDTPA_LOCKTIME] = { .type = NLA_U64 }, }; -static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct neigh_table *tbl; struct ndtmsg *ndtmsg; struct nlattr *tb[NDTA_MAX+1]; @@ -1886,7 +2036,7 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (tbp[NDTPA_IFINDEX]) ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]); - p = lookup_neigh_params(tbl, net, ifindex); + p = lookup_neigh_parms(tbl, net, ifindex); if (p == NULL) { err = -ENOENT; goto errout_tbl_lock; @@ -1898,45 +2048,68 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) switch (i) { case NDTPA_QUEUE_LEN: - p->queue_len = nla_get_u32(tbp[i]); + NEIGH_VAR_SET(p, QUEUE_LEN_BYTES, + nla_get_u32(tbp[i]) * + SKB_TRUESIZE(ETH_FRAME_LEN)); + break; + case NDTPA_QUEUE_LENBYTES: + NEIGH_VAR_SET(p, QUEUE_LEN_BYTES, + nla_get_u32(tbp[i])); break; case NDTPA_PROXY_QLEN: - p->proxy_qlen = nla_get_u32(tbp[i]); + NEIGH_VAR_SET(p, PROXY_QLEN, + nla_get_u32(tbp[i])); break; case NDTPA_APP_PROBES: - p->app_probes = nla_get_u32(tbp[i]); + NEIGH_VAR_SET(p, APP_PROBES, + nla_get_u32(tbp[i])); break; case NDTPA_UCAST_PROBES: - p->ucast_probes = nla_get_u32(tbp[i]); + NEIGH_VAR_SET(p, UCAST_PROBES, + nla_get_u32(tbp[i])); break; case NDTPA_MCAST_PROBES: - p->mcast_probes = nla_get_u32(tbp[i]); + NEIGH_VAR_SET(p, MCAST_PROBES, + nla_get_u32(tbp[i])); break; case NDTPA_BASE_REACHABLE_TIME: - p->base_reachable_time = nla_get_msecs(tbp[i]); + NEIGH_VAR_SET(p, BASE_REACHABLE_TIME, + nla_get_msecs(tbp[i])); break; case NDTPA_GC_STALETIME: - p->gc_staletime = nla_get_msecs(tbp[i]); + NEIGH_VAR_SET(p, GC_STALETIME, + nla_get_msecs(tbp[i])); break; case NDTPA_DELAY_PROBE_TIME: - p->delay_probe_time = nla_get_msecs(tbp[i]); + NEIGH_VAR_SET(p, DELAY_PROBE_TIME, + nla_get_msecs(tbp[i])); break; case NDTPA_RETRANS_TIME: - p->retrans_time = nla_get_msecs(tbp[i]); + NEIGH_VAR_SET(p, RETRANS_TIME, + nla_get_msecs(tbp[i])); break; case NDTPA_ANYCAST_DELAY: - p->anycast_delay = nla_get_msecs(tbp[i]); + NEIGH_VAR_SET(p, ANYCAST_DELAY, + nla_get_msecs(tbp[i])); break; case NDTPA_PROXY_DELAY: - p->proxy_delay = nla_get_msecs(tbp[i]); + NEIGH_VAR_SET(p, PROXY_DELAY, + nla_get_msecs(tbp[i])); break; case NDTPA_LOCKTIME: - p->locktime = nla_get_msecs(tbp[i]); + NEIGH_VAR_SET(p, LOCKTIME, + nla_get_msecs(tbp[i])); break; } } } + err = -ENOENT; + if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] || + tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) && + !net_eq(net, &init_net)) + goto errout_tbl_lock; + if (tb[NDTA_THRESH1]) tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]); @@ -1961,7 +2134,7 @@ errout: static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); int family, tidx, nidx = 0; int tbl_skip = cb->args[0]; int neigh_skip = cb->args[1]; @@ -1976,24 +2149,26 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) if (tidx < tbl_skip || (family && tbl->family != family)) continue; - if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid, + if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL, NLM_F_MULTI) <= 0) break; for (nidx = 0, p = tbl->parms.next; p; p = p->next) { - if (net != p->net) + if (!net_eq(neigh_parms_net(p), net)) continue; - if (nidx++ < neigh_skip) - continue; + if (nidx < neigh_skip) + goto next; if (neightbl_fill_param_info(skb, tbl, p, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL, NLM_F_MULTI) <= 0) goto out; + next: + nidx++; } neigh_skip = 0; @@ -2026,24 +2201,60 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, ndm->ndm_type = neigh->type; ndm->ndm_ifindex = neigh->dev->ifindex; - NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key); + if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key)) + goto nla_put_failure; read_lock_bh(&neigh->lock); ndm->ndm_state = neigh->nud_state; - if ((neigh->nud_state & NUD_VALID) && - nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) { - read_unlock_bh(&neigh->lock); - goto nla_put_failure; + if (neigh->nud_state & NUD_VALID) { + char haddr[MAX_ADDR_LEN]; + + neigh_ha_snapshot(haddr, neigh, neigh->dev); + if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) { + read_unlock_bh(&neigh->lock); + goto nla_put_failure; + } } - ci.ndm_used = now - neigh->used; - ci.ndm_confirmed = now - neigh->confirmed; - ci.ndm_updated = now - neigh->updated; + ci.ndm_used = jiffies_to_clock_t(now - neigh->used); + ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed); + ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated); ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1; read_unlock_bh(&neigh->lock); - NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes)); - NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci); + if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) || + nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) + goto nla_put_failure; + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn, + u32 pid, u32 seq, int type, unsigned int flags, + struct neigh_table *tbl) +{ + struct nlmsghdr *nlh; + struct ndmsg *ndm; + + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); + if (nlh == NULL) + return -EMSGSIZE; + + ndm = nlmsg_data(nlh); + ndm->ndm_family = tbl->family; + ndm->ndm_pad1 = 0; + ndm->ndm_pad2 = 0; + ndm->ndm_flags = pn->flags | NTF_PROXY; + ndm->ndm_type = RTN_UNICAST; + ndm->ndm_ifindex = pn->dev->ifindex; + ndm->ndm_state = NUD_NONE; + + if (nla_put(skb, NDA_DST, tbl->key_len, pn->key)) + goto nla_put_failure; return nlmsg_end(skb, nlh); @@ -2061,58 +2272,115 @@ static void neigh_update_notify(struct neighbour *neigh) static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, struct netlink_callback *cb) { - struct net * net = skb->sk->sk_net; + struct net *net = sock_net(skb->sk); struct neighbour *n; int rc, h, s_h = cb->args[1]; int idx, s_idx = idx = cb->args[2]; + struct neigh_hash_table *nht; - read_lock_bh(&tbl->lock); - for (h = 0; h <= tbl->hash_mask; h++) { - if (h < s_h) - continue; + rcu_read_lock_bh(); + nht = rcu_dereference_bh(tbl->nht); + + for (h = s_h; h < (1 << nht->hash_shift); h++) { if (h > s_h) s_idx = 0; - for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next) { - int lidx; - if (n->dev->nd_net != net) + for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0; + n != NULL; + n = rcu_dereference_bh(n->next)) { + if (!net_eq(dev_net(n->dev), net)) continue; - lidx = idx++; - if (lidx < s_idx) - continue; - if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid, + if (idx < s_idx) + goto next; + if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, NLM_F_MULTI) <= 0) { - read_unlock_bh(&tbl->lock); rc = -1; goto out; } +next: + idx++; } } - read_unlock_bh(&tbl->lock); rc = skb->len; out: + rcu_read_unlock_bh(); cb->args[1] = h; cb->args[2] = idx; return rc; } +static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct pneigh_entry *n; + struct net *net = sock_net(skb->sk); + int rc, h, s_h = cb->args[3]; + int idx, s_idx = idx = cb->args[4]; + + read_lock_bh(&tbl->lock); + + for (h = s_h; h <= PNEIGH_HASHMASK; h++) { + if (h > s_h) + s_idx = 0; + for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) { + if (dev_net(n->dev) != net) + continue; + if (idx < s_idx) + goto next; + if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWNEIGH, + NLM_F_MULTI, tbl) <= 0) { + read_unlock_bh(&tbl->lock); + rc = -1; + goto out; + } + next: + idx++; + } + } + + read_unlock_bh(&tbl->lock); + rc = skb->len; +out: + cb->args[3] = h; + cb->args[4] = idx; + return rc; + +} + static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) { struct neigh_table *tbl; int t, family, s_t; + int proxy = 0; + int err; read_lock(&neigh_tbl_lock); family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family; + + /* check for full ndmsg structure presence, family member is + * the same for both structures + */ + if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) && + ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY) + proxy = 1; + s_t = cb->args[0]; - for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) { + for (tbl = neigh_tables, t = 0; tbl; + tbl = tbl->next, t++) { if (t < s_t || (family && tbl->family != family)) continue; if (t > s_t) memset(&cb->args[1], 0, sizeof(cb->args) - sizeof(cb->args[0])); - if (neigh_dump_table(tbl, skb, cb) < 0) + if (proxy) + err = pneigh_dump_table(tbl, skb, cb); + else + err = neigh_dump_table(tbl, skb, cb); + if (err < 0) break; } read_unlock(&neigh_tbl_lock); @@ -2124,15 +2392,22 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie) { int chain; + struct neigh_hash_table *nht; - read_lock_bh(&tbl->lock); - for (chain = 0; chain <= tbl->hash_mask; chain++) { + rcu_read_lock_bh(); + nht = rcu_dereference_bh(tbl->nht); + + read_lock(&tbl->lock); /* avoid resizes */ + for (chain = 0; chain < (1 << nht->hash_shift); chain++) { struct neighbour *n; - for (n = tbl->hash_buckets[chain]; n; n = n->next) + for (n = rcu_dereference_bh(nht->hash_buckets[chain]); + n != NULL; + n = rcu_dereference_bh(n->next)) cb(n, cookie); } - read_unlock_bh(&tbl->lock); + read_unlock(&tbl->lock); + rcu_read_unlock_bh(); } EXPORT_SYMBOL(neigh_for_each); @@ -2141,18 +2416,25 @@ void __neigh_for_each_release(struct neigh_table *tbl, int (*cb)(struct neighbour *)) { int chain; + struct neigh_hash_table *nht; - for (chain = 0; chain <= tbl->hash_mask; chain++) { - struct neighbour *n, **np; + nht = rcu_dereference_protected(tbl->nht, + lockdep_is_held(&tbl->lock)); + for (chain = 0; chain < (1 << nht->hash_shift); chain++) { + struct neighbour *n; + struct neighbour __rcu **np; - np = &tbl->hash_buckets[chain]; - while ((n = *np) != NULL) { + np = &nht->hash_buckets[chain]; + while ((n = rcu_dereference_protected(*np, + lockdep_is_held(&tbl->lock))) != NULL) { int release; write_lock(&n->lock); release = cb(n); if (release) { - *np = n->next; + rcu_assign_pointer(*np, + rcu_dereference_protected(n->next, + lockdep_is_held(&tbl->lock))); n->dead = 1; } else np = &n->next; @@ -2169,17 +2451,17 @@ EXPORT_SYMBOL(__neigh_for_each_release); static struct neighbour *neigh_get_first(struct seq_file *seq) { struct neigh_seq_state *state = seq->private; - struct net *net = state->p.net; - struct neigh_table *tbl = state->tbl; + struct net *net = seq_file_net(seq); + struct neigh_hash_table *nht = state->nht; struct neighbour *n = NULL; int bucket = state->bucket; state->flags &= ~NEIGH_SEQ_IS_PNEIGH; - for (bucket = 0; bucket <= tbl->hash_mask; bucket++) { - n = tbl->hash_buckets[bucket]; + for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) { + n = rcu_dereference_bh(nht->hash_buckets[bucket]); while (n) { - if (n->dev->nd_net != net) + if (!net_eq(dev_net(n->dev), net)) goto next; if (state->neigh_sub_iter) { loff_t fakep = 0; @@ -2193,8 +2475,8 @@ static struct neighbour *neigh_get_first(struct seq_file *seq) break; if (n->nud_state & ~NUD_NOARP) break; - next: - n = n->next; +next: + n = rcu_dereference_bh(n->next); } if (n) @@ -2210,19 +2492,19 @@ static struct neighbour *neigh_get_next(struct seq_file *seq, loff_t *pos) { struct neigh_seq_state *state = seq->private; - struct net *net = state->p.net; - struct neigh_table *tbl = state->tbl; + struct net *net = seq_file_net(seq); + struct neigh_hash_table *nht = state->nht; if (state->neigh_sub_iter) { void *v = state->neigh_sub_iter(state, n, pos); if (v) return n; } - n = n->next; + n = rcu_dereference_bh(n->next); while (1) { while (n) { - if (n->dev->nd_net != net) + if (!net_eq(dev_net(n->dev), net)) goto next; if (state->neigh_sub_iter) { void *v = state->neigh_sub_iter(state, n, pos); @@ -2235,17 +2517,17 @@ static struct neighbour *neigh_get_next(struct seq_file *seq, if (n->nud_state & ~NUD_NOARP) break; - next: - n = n->next; +next: + n = rcu_dereference_bh(n->next); } if (n) break; - if (++state->bucket > tbl->hash_mask) + if (++state->bucket >= (1 << nht->hash_shift)) break; - n = tbl->hash_buckets[state->bucket]; + n = rcu_dereference_bh(nht->hash_buckets[state->bucket]); } if (n && pos) @@ -2258,6 +2540,7 @@ static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos) struct neighbour *n = neigh_get_first(seq); if (n) { + --(*pos); while (*pos) { n = neigh_get_next(seq, n, pos); if (!n) @@ -2270,7 +2553,7 @@ static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos) static struct pneigh_entry *pneigh_get_first(struct seq_file *seq) { struct neigh_seq_state *state = seq->private; - struct net * net = state->p.net; + struct net *net = seq_file_net(seq); struct neigh_table *tbl = state->tbl; struct pneigh_entry *pn = NULL; int bucket = state->bucket; @@ -2278,7 +2561,7 @@ static struct pneigh_entry *pneigh_get_first(struct seq_file *seq) state->flags |= NEIGH_SEQ_IS_PNEIGH; for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) { pn = tbl->phash_buckets[bucket]; - while (pn && (pn->net != net)) + while (pn && !net_eq(pneigh_net(pn), net)) pn = pn->next; if (pn) break; @@ -2293,15 +2576,18 @@ static struct pneigh_entry *pneigh_get_next(struct seq_file *seq, loff_t *pos) { struct neigh_seq_state *state = seq->private; - struct net * net = state->p.net; + struct net *net = seq_file_net(seq); struct neigh_table *tbl = state->tbl; - pn = pn->next; + do { + pn = pn->next; + } while (pn && !net_eq(pneigh_net(pn), net)); + while (!pn) { if (++state->bucket > PNEIGH_HASHMASK) break; pn = tbl->phash_buckets[state->bucket]; - while (pn && (pn->net != net)) + while (pn && !net_eq(pneigh_net(pn), net)) pn = pn->next; if (pn) break; @@ -2318,6 +2604,7 @@ static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos) struct pneigh_entry *pn = pneigh_get_first(seq); if (pn) { + --(*pos); while (*pos) { pn = pneigh_get_next(seq, pn, pos); if (!pn) @@ -2331,28 +2618,28 @@ static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos) { struct neigh_seq_state *state = seq->private; void *rc; + loff_t idxpos = *pos; - rc = neigh_get_idx(seq, pos); + rc = neigh_get_idx(seq, &idxpos); if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY)) - rc = pneigh_get_idx(seq, pos); + rc = pneigh_get_idx(seq, &idxpos); return rc; } void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags) - __acquires(tbl->lock) + __acquires(rcu_bh) { struct neigh_seq_state *state = seq->private; - loff_t pos_minus_one; state->tbl = tbl; state->bucket = 0; state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH); - read_lock_bh(&tbl->lock); + rcu_read_lock_bh(); + state->nht = rcu_dereference_bh(tbl->nht); - pos_minus_one = *pos - 1; - return *pos ? neigh_get_idx_any(seq, &pos_minus_one) : SEQ_START_TOKEN; + return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN; } EXPORT_SYMBOL(neigh_seq_start); @@ -2362,7 +2649,7 @@ void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos) void *rc; if (v == SEQ_START_TOKEN) { - rc = neigh_get_idx(seq, pos); + rc = neigh_get_first(seq); goto out; } @@ -2384,12 +2671,9 @@ out: EXPORT_SYMBOL(neigh_seq_next); void neigh_seq_stop(struct seq_file *seq, void *v) - __releases(tbl->lock) + __releases(rcu_bh) { - struct neigh_seq_state *state = seq->private; - struct neigh_table *tbl = state->tbl; - - read_unlock_bh(&tbl->lock); + rcu_read_unlock_bh(); } EXPORT_SYMBOL(neigh_seq_stop); @@ -2397,14 +2681,13 @@ EXPORT_SYMBOL(neigh_seq_stop); static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos) { - struct proc_dir_entry *pde = seq->private; - struct neigh_table *tbl = pde->data; + struct neigh_table *tbl = seq->private; int cpu; if (*pos == 0) return SEQ_START_TOKEN; - for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) { + for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu+1; @@ -2415,11 +2698,10 @@ static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos) static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct proc_dir_entry *pde = seq->private; - struct neigh_table *tbl = pde->data; + struct neigh_table *tbl = seq->private; int cpu; - for (cpu = *pos; cpu < NR_CPUS; ++cpu) { + for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu+1; @@ -2435,17 +2717,16 @@ static void neigh_stat_seq_stop(struct seq_file *seq, void *v) static int neigh_stat_seq_show(struct seq_file *seq, void *v) { - struct proc_dir_entry *pde = seq->private; - struct neigh_table *tbl = pde->data; + struct neigh_table *tbl = seq->private; struct neigh_statistics *st = v; if (v == SEQ_START_TOKEN) { - seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs\n"); + seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards\n"); return 0; } seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx " - "%08lx %08lx %08lx %08lx\n", + "%08lx %08lx %08lx %08lx %08lx\n", atomic_read(&tbl->entries), st->allocs, @@ -2461,7 +2742,8 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v) st->rcv_probes_ucast, st->periodic_gc_runs, - st->forced_gc_runs + st->forced_gc_runs, + st->unres_discards ); return 0; @@ -2480,7 +2762,7 @@ static int neigh_stat_seq_open(struct inode *inode, struct file *file) if (!ret) { struct seq_file *sf = file->private_data; - sf->private = PDE(inode); + sf->private = PDE_DATA(inode); } return ret; }; @@ -2506,7 +2788,7 @@ static inline size_t neigh_nlmsg_size(void) static void __neigh_notify(struct neighbour *n, int type, int flags) { - struct net *net = n->dev->nd_net; + struct net *net = dev_net(n->dev); struct sk_buff *skb; int err = -ENOBUFS; @@ -2521,316 +2803,338 @@ static void __neigh_notify(struct neighbour *n, int type, int flags) kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); + rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); + return; errout: if (err < 0) rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); } -#ifdef CONFIG_ARPD void neigh_app_ns(struct neighbour *n) { __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST); } -#endif /* CONFIG_ARPD */ +EXPORT_SYMBOL(neigh_app_ns); #ifdef CONFIG_SYSCTL +static int zero; +static int int_max = INT_MAX; +static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN); + +static int proc_unres_qlen(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int size, ret; + struct ctl_table tmp = *ctl; + + tmp.extra1 = &zero; + tmp.extra2 = &unres_qlen_max; + tmp.data = &size; + + size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN); + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + + if (write && !ret) + *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN); + return ret; +} + +static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev, + int family) +{ + switch (family) { + case AF_INET: + return __in_dev_arp_parms_get_rcu(dev); + case AF_INET6: + return __in6_dev_nd_parms_get_rcu(dev); + } + return NULL; +} + +static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p, + int index) +{ + struct net_device *dev; + int family = neigh_parms_family(p); + + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { + struct neigh_parms *dst_p = + neigh_get_dev_parms_rcu(dev, family); + + if (dst_p && !test_bit(index, dst_p->data_state)) + dst_p->data[index] = p->data[index]; + } + rcu_read_unlock(); +} + +static void neigh_proc_update(struct ctl_table *ctl, int write) +{ + struct net_device *dev = ctl->extra1; + struct neigh_parms *p = ctl->extra2; + struct net *net = neigh_parms_net(p); + int index = (int *) ctl->data - p->data; + + if (!write) + return; + + set_bit(index, p->data_state); + if (!dev) /* NULL dev means this is default value */ + neigh_copy_dflt_parms(net, p, index); +} + +static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + struct ctl_table tmp = *ctl; + int ret; + + tmp.extra1 = &zero; + tmp.extra2 = &int_max; + + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + neigh_proc_update(ctl, write); + return ret; +} + +int neigh_proc_dointvec(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); + + neigh_proc_update(ctl, write); + return ret; +} +EXPORT_SYMBOL(neigh_proc_dointvec); + +int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); + + neigh_proc_update(ctl, write); + return ret; +} +EXPORT_SYMBOL(neigh_proc_dointvec_jiffies); + +static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos); + + neigh_proc_update(ctl, write); + return ret; +} + +int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos); + + neigh_proc_update(ctl, write); + return ret; +} +EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies); + +static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos); + + neigh_proc_update(ctl, write); + return ret; +} + +#define NEIGH_PARMS_DATA_OFFSET(index) \ + (&((struct neigh_parms *) 0)->data[index]) + +#define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \ + [NEIGH_VAR_ ## attr] = { \ + .procname = name, \ + .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \ + .maxlen = sizeof(int), \ + .mode = mval, \ + .proc_handler = proc, \ + } + +#define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \ + NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax) + +#define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \ + NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies) + +#define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \ + NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies) + +#define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \ + NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies) + +#define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \ + NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies) + +#define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \ + NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen) static struct neigh_sysctl_table { struct ctl_table_header *sysctl_header; - struct ctl_table neigh_vars[__NET_NEIGH_MAX]; - char *dev_name; + struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1]; } neigh_sysctl_template __read_mostly = { .neigh_vars = { - { - .ctl_name = NET_NEIGH_MCAST_SOLICIT, - .procname = "mcast_solicit", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_NEIGH_UCAST_SOLICIT, - .procname = "ucast_solicit", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_NEIGH_APP_SOLICIT, - .procname = "app_solicit", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .procname = "retrans_time", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_userhz_jiffies, - }, - { - .ctl_name = NET_NEIGH_REACHABLE_TIME, - .procname = "base_reachable_time", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, - }, - { - .ctl_name = NET_NEIGH_DELAY_PROBE_TIME, - .procname = "delay_first_probe_time", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, - }, - { - .ctl_name = NET_NEIGH_GC_STALE_TIME, - .procname = "gc_stale_time", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, - }, - { - .ctl_name = NET_NEIGH_UNRES_QLEN, - .procname = "unres_qlen", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_NEIGH_PROXY_QLEN, - .procname = "proxy_qlen", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .procname = "anycast_delay", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_userhz_jiffies, - }, - { - .procname = "proxy_delay", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_userhz_jiffies, - }, - { - .procname = "locktime", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_userhz_jiffies, - }, - { - .ctl_name = NET_NEIGH_RETRANS_TIME_MS, - .procname = "retrans_time_ms", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_ms_jiffies, - .strategy = &sysctl_ms_jiffies, - }, - { - .ctl_name = NET_NEIGH_REACHABLE_TIME_MS, - .procname = "base_reachable_time_ms", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_ms_jiffies, - .strategy = &sysctl_ms_jiffies, - }, - { - .ctl_name = NET_NEIGH_GC_INTERVAL, + NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"), + NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"), + NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"), + NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"), + NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"), + NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"), + NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"), + NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"), + NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"), + NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"), + NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"), + NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"), + NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"), + NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"), + NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"), + [NEIGH_VAR_GC_INTERVAL] = { .procname = "gc_interval", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, + .proc_handler = proc_dointvec_jiffies, }, - { - .ctl_name = NET_NEIGH_GC_THRESH1, + [NEIGH_VAR_GC_THRESH1] = { .procname = "gc_thresh1", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .extra1 = &zero, + .extra2 = &int_max, + .proc_handler = proc_dointvec_minmax, }, - { - .ctl_name = NET_NEIGH_GC_THRESH2, + [NEIGH_VAR_GC_THRESH2] = { .procname = "gc_thresh2", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .extra1 = &zero, + .extra2 = &int_max, + .proc_handler = proc_dointvec_minmax, }, - { - .ctl_name = NET_NEIGH_GC_THRESH3, + [NEIGH_VAR_GC_THRESH3] = { .procname = "gc_thresh3", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, + .extra1 = &zero, + .extra2 = &int_max, + .proc_handler = proc_dointvec_minmax, }, {}, }, }; int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, - int p_id, int pdev_id, char *p_name, - proc_handler *handler, ctl_handler *strategy) + proc_handler *handler) { + int i; struct neigh_sysctl_table *t; - const char *dev_name_source = NULL; - -#define NEIGH_CTL_PATH_ROOT 0 -#define NEIGH_CTL_PATH_PROTO 1 -#define NEIGH_CTL_PATH_NEIGH 2 -#define NEIGH_CTL_PATH_DEV 3 - - struct ctl_path neigh_path[] = { - { .procname = "net", .ctl_name = CTL_NET, }, - { .procname = "proto", .ctl_name = 0, }, - { .procname = "neigh", .ctl_name = 0, }, - { .procname = "default", .ctl_name = NET_PROTO_CONF_DEFAULT, }, - { }, - }; + const char *dev_name_source; + char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ]; + char *p_name; t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL); if (!t) goto err; - t->neigh_vars[0].data = &p->mcast_probes; - t->neigh_vars[1].data = &p->ucast_probes; - t->neigh_vars[2].data = &p->app_probes; - t->neigh_vars[3].data = &p->retrans_time; - t->neigh_vars[4].data = &p->base_reachable_time; - t->neigh_vars[5].data = &p->delay_probe_time; - t->neigh_vars[6].data = &p->gc_staletime; - t->neigh_vars[7].data = &p->queue_len; - t->neigh_vars[8].data = &p->proxy_qlen; - t->neigh_vars[9].data = &p->anycast_delay; - t->neigh_vars[10].data = &p->proxy_delay; - t->neigh_vars[11].data = &p->locktime; - t->neigh_vars[12].data = &p->retrans_time; - t->neigh_vars[13].data = &p->base_reachable_time; + for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) { + t->neigh_vars[i].data += (long) p; + t->neigh_vars[i].extra1 = dev; + t->neigh_vars[i].extra2 = p; + } if (dev) { dev_name_source = dev->name; - neigh_path[NEIGH_CTL_PATH_DEV].ctl_name = dev->ifindex; /* Terminate the table early */ - memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14])); + memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0, + sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL])); } else { - dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname; - t->neigh_vars[14].data = (int *)(p + 1); - t->neigh_vars[15].data = (int *)(p + 1) + 1; - t->neigh_vars[16].data = (int *)(p + 1) + 2; - t->neigh_vars[17].data = (int *)(p + 1) + 3; + struct neigh_table *tbl = p->tbl; + dev_name_source = "default"; + t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval; + t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1; + t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2; + t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3; } - - if (handler || strategy) { + if (handler) { /* RetransTime */ - t->neigh_vars[3].proc_handler = handler; - t->neigh_vars[3].strategy = strategy; - t->neigh_vars[3].extra1 = dev; - if (!strategy) - t->neigh_vars[3].ctl_name = CTL_UNNUMBERED; + t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler; /* ReachableTime */ - t->neigh_vars[4].proc_handler = handler; - t->neigh_vars[4].strategy = strategy; - t->neigh_vars[4].extra1 = dev; - if (!strategy) - t->neigh_vars[4].ctl_name = CTL_UNNUMBERED; + t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler; /* RetransTime (in milliseconds)*/ - t->neigh_vars[12].proc_handler = handler; - t->neigh_vars[12].strategy = strategy; - t->neigh_vars[12].extra1 = dev; - if (!strategy) - t->neigh_vars[12].ctl_name = CTL_UNNUMBERED; + t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler; /* ReachableTime (in milliseconds) */ - t->neigh_vars[13].proc_handler = handler; - t->neigh_vars[13].strategy = strategy; - t->neigh_vars[13].extra1 = dev; - if (!strategy) - t->neigh_vars[13].ctl_name = CTL_UNNUMBERED; + t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler; } - t->dev_name = kstrdup(dev_name_source, GFP_KERNEL); - if (!t->dev_name) - goto free; + /* Don't export sysctls to unprivileged users */ + if (neigh_parms_net(p)->user_ns != &init_user_ns) + t->neigh_vars[0].procname = NULL; - neigh_path[NEIGH_CTL_PATH_DEV].procname = t->dev_name; - neigh_path[NEIGH_CTL_PATH_NEIGH].ctl_name = pdev_id; - neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name; - neigh_path[NEIGH_CTL_PATH_PROTO].ctl_name = p_id; + switch (neigh_parms_family(p)) { + case AF_INET: + p_name = "ipv4"; + break; + case AF_INET6: + p_name = "ipv6"; + break; + default: + BUG(); + } - t->sysctl_header = register_sysctl_paths(neigh_path, t->neigh_vars); + snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s", + p_name, dev_name_source); + t->sysctl_header = + register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars); if (!t->sysctl_header) - goto free_procname; + goto free; p->sysctl_table = t; return 0; -free_procname: - kfree(t->dev_name); free: kfree(t); err: return -ENOBUFS; } +EXPORT_SYMBOL(neigh_sysctl_register); void neigh_sysctl_unregister(struct neigh_parms *p) { if (p->sysctl_table) { struct neigh_sysctl_table *t = p->sysctl_table; p->sysctl_table = NULL; - unregister_sysctl_table(t->sysctl_header); - kfree(t->dev_name); + unregister_net_sysctl_table(t->sysctl_header); kfree(t); } } +EXPORT_SYMBOL(neigh_sysctl_unregister); #endif /* CONFIG_SYSCTL */ static int __init neigh_init(void) { - rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL); - rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL); - rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info); + rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL); - rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info); - rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL); + rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info, + NULL); + rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL); return 0; } subsys_initcall(neigh_init); -EXPORT_SYMBOL(__neigh_event_send); -EXPORT_SYMBOL(neigh_changeaddr); -EXPORT_SYMBOL(neigh_compat_output); -EXPORT_SYMBOL(neigh_connected_output); -EXPORT_SYMBOL(neigh_create); -EXPORT_SYMBOL(neigh_destroy); -EXPORT_SYMBOL(neigh_event_ns); -EXPORT_SYMBOL(neigh_ifdown); -EXPORT_SYMBOL(neigh_lookup); -EXPORT_SYMBOL(neigh_lookup_nodev); -EXPORT_SYMBOL(neigh_parms_alloc); -EXPORT_SYMBOL(neigh_parms_release); -EXPORT_SYMBOL(neigh_rand_reach_time); -EXPORT_SYMBOL(neigh_resolve_output); -EXPORT_SYMBOL(neigh_table_clear); -EXPORT_SYMBOL(neigh_table_init); -EXPORT_SYMBOL(neigh_table_init_no_netlink); -EXPORT_SYMBOL(neigh_update); -EXPORT_SYMBOL(pneigh_enqueue); -EXPORT_SYMBOL(pneigh_lookup); -EXPORT_SYMBOL_GPL(__pneigh_lookup); - -#ifdef CONFIG_ARPD -EXPORT_SYMBOL(neigh_app_ns); -#endif -#ifdef CONFIG_SYSCTL -EXPORT_SYMBOL(neigh_sysctl_register); -EXPORT_SYMBOL(neigh_sysctl_unregister); -#endif |
