diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /net/core/neighbour.c |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'net/core/neighbour.c')
-rw-r--r-- | net/core/neighbour.c | 2362 |
1 files changed, 2362 insertions, 0 deletions
diff --git a/net/core/neighbour.c b/net/core/neighbour.c new file mode 100644 index 00000000000..0a2f67bbef2 --- /dev/null +++ b/net/core/neighbour.c @@ -0,0 +1,2362 @@ +/* + * Generic address resolution entity + * + * Authors: + * Pedro Roque <roque@di.fc.ul.pt> + * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Fixes: + * Vitaly E. Lavrov releasing NULL neighbor in neigh_add. + * Harald Welte Add neighbour cache statistics like rtstat + */ + +#include <linux/config.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/socket.h> +#include <linux/sched.h> +#include <linux/netdevice.h> +#include <linux/proc_fs.h> +#ifdef CONFIG_SYSCTL +#include <linux/sysctl.h> +#endif +#include <linux/times.h> +#include <net/neighbour.h> +#include <net/dst.h> +#include <net/sock.h> +#include <linux/rtnetlink.h> +#include <linux/random.h> + +#define NEIGH_DEBUG 1 + +#define NEIGH_PRINTK(x...) printk(x) +#define NEIGH_NOPRINTK(x...) do { ; } while(0) +#define NEIGH_PRINTK0 NEIGH_PRINTK +#define NEIGH_PRINTK1 NEIGH_NOPRINTK +#define NEIGH_PRINTK2 NEIGH_NOPRINTK + +#if NEIGH_DEBUG >= 1 +#undef NEIGH_PRINTK1 +#define NEIGH_PRINTK1 NEIGH_PRINTK +#endif +#if NEIGH_DEBUG >= 2 +#undef NEIGH_PRINTK2 +#define NEIGH_PRINTK2 NEIGH_PRINTK +#endif + +#define PNEIGH_HASHMASK 0xF + +static void neigh_timer_handler(unsigned long arg); +#ifdef CONFIG_ARPD +static void neigh_app_notify(struct neighbour *n); +#endif +static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev); +void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev); + +static struct neigh_table *neigh_tables; +static struct file_operations neigh_stat_seq_fops; + +/* + Neighbour hash table buckets are protected with rwlock tbl->lock. + + - All the scans/updates to hash buckets MUST be made under this lock. + - NOTHING clever should be made under this lock: no callbacks + to protocol backends, no attempts to send something to network. + It will result in deadlocks, if backend/driver wants to use neighbour + cache. + - If the entry requires some non-trivial actions, increase + its reference count and release table lock. + + Neighbour entries are protected: + - with reference count. + - with rwlock neigh->lock + + Reference count prevents destruction. + + neigh->lock mainly serializes ll address data and its validity state. + However, the same lock is used to protect another entry fields: + - timer + - resolution queue + + Again, nothing clever shall be made under neigh->lock, + the most complicated procedure, which we allow is dev->hard_header. + It is supposed, that dev->hard_header is simplistic and does + not make callbacks to neighbour tables. + + The last lock is neigh_tbl_lock. It is pure SMP lock, protecting + list of neighbour tables. This list is used only in process context, + */ + +static DEFINE_RWLOCK(neigh_tbl_lock); + +static int neigh_blackhole(struct sk_buff *skb) +{ + kfree_skb(skb); + return -ENETDOWN; +} + +/* + * It is random distribution in the interval (1/2)*base...(3/2)*base. + * It corresponds to default IPv6 settings and is not overridable, + * because it is really reasonable choice. + */ + +unsigned long neigh_rand_reach_time(unsigned long base) +{ + return (base ? (net_random() % base) + (base >> 1) : 0); +} + + +static int neigh_forced_gc(struct neigh_table *tbl) +{ + int shrunk = 0; + int i; + + NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs); + + write_lock_bh(&tbl->lock); + for (i = 0; i <= tbl->hash_mask; i++) { + struct neighbour *n, **np; + + np = &tbl->hash_buckets[i]; + while ((n = *np) != NULL) { + /* Neighbour record may be discarded if: + * - nobody refers to it. + * - it is not permanent + */ + write_lock(&n->lock); + if (atomic_read(&n->refcnt) == 1 && + !(n->nud_state & NUD_PERMANENT)) { + *np = n->next; + n->dead = 1; + shrunk = 1; + write_unlock(&n->lock); + neigh_release(n); + continue; + } + write_unlock(&n->lock); + np = &n->next; + } + } + + tbl->last_flush = jiffies; + + write_unlock_bh(&tbl->lock); + + return shrunk; +} + +static int neigh_del_timer(struct neighbour *n) +{ + if ((n->nud_state & NUD_IN_TIMER) && + del_timer(&n->timer)) { + neigh_release(n); + return 1; + } + return 0; +} + +static void pneigh_queue_purge(struct sk_buff_head *list) +{ + struct sk_buff *skb; + + while ((skb = skb_dequeue(list)) != NULL) { + dev_put(skb->dev); + kfree_skb(skb); + } +} + +void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev) +{ + int i; + + write_lock_bh(&tbl->lock); + + for (i=0; i <= tbl->hash_mask; i++) { + struct neighbour *n, **np; + + np = &tbl->hash_buckets[i]; + while ((n = *np) != NULL) { + if (dev && n->dev != dev) { + np = &n->next; + continue; + } + *np = n->next; + write_lock_bh(&n->lock); + n->dead = 1; + neigh_del_timer(n); + write_unlock_bh(&n->lock); + neigh_release(n); + } + } + + write_unlock_bh(&tbl->lock); +} + +int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) +{ + int i; + + write_lock_bh(&tbl->lock); + + for (i = 0; i <= tbl->hash_mask; i++) { + struct neighbour *n, **np = &tbl->hash_buckets[i]; + + while ((n = *np) != NULL) { + if (dev && n->dev != dev) { + np = &n->next; + continue; + } + *np = n->next; + write_lock(&n->lock); + neigh_del_timer(n); + n->dead = 1; + + if (atomic_read(&n->refcnt) != 1) { + /* The most unpleasant situation. + We must destroy neighbour entry, + but someone still uses it. + + The destroy will be delayed until + the last user releases us, but + we must kill timers etc. and move + it to safe state. + */ + skb_queue_purge(&n->arp_queue); + n->output = neigh_blackhole; + if (n->nud_state & NUD_VALID) + n->nud_state = NUD_NOARP; + else + n->nud_state = NUD_NONE; + NEIGH_PRINTK2("neigh %p is stray.\n", n); + } + write_unlock(&n->lock); + neigh_release(n); + } + } + + pneigh_ifdown(tbl, dev); + write_unlock_bh(&tbl->lock); + + del_timer_sync(&tbl->proxy_timer); + pneigh_queue_purge(&tbl->proxy_queue); + return 0; +} + +static struct neighbour *neigh_alloc(struct neigh_table *tbl) +{ + struct neighbour *n = NULL; + unsigned long now = jiffies; + int entries; + + entries = atomic_inc_return(&tbl->entries) - 1; + if (entries >= tbl->gc_thresh3 || + (entries >= tbl->gc_thresh2 && + time_after(now, tbl->last_flush + 5 * HZ))) { + if (!neigh_forced_gc(tbl) && + entries >= tbl->gc_thresh3) + goto out_entries; + } + + n = kmem_cache_alloc(tbl->kmem_cachep, SLAB_ATOMIC); + if (!n) + goto out_entries; + + memset(n, 0, tbl->entry_size); + + skb_queue_head_init(&n->arp_queue); + rwlock_init(&n->lock); + n->updated = n->used = now; + n->nud_state = NUD_NONE; + n->output = neigh_blackhole; + n->parms = neigh_parms_clone(&tbl->parms); + init_timer(&n->timer); + n->timer.function = neigh_timer_handler; + n->timer.data = (unsigned long)n; + + NEIGH_CACHE_STAT_INC(tbl, allocs); + n->tbl = tbl; + atomic_set(&n->refcnt, 1); + n->dead = 1; +out: + return n; + +out_entries: + atomic_dec(&tbl->entries); + goto out; +} + +static struct neighbour **neigh_hash_alloc(unsigned int entries) +{ + unsigned long size = entries * sizeof(struct neighbour *); + struct neighbour **ret; + + if (size <= PAGE_SIZE) { + ret = kmalloc(size, GFP_ATOMIC); + } else { + ret = (struct neighbour **) + __get_free_pages(GFP_ATOMIC, get_order(size)); + } + if (ret) + memset(ret, 0, size); + + return ret; +} + +static void neigh_hash_free(struct neighbour **hash, unsigned int entries) +{ + unsigned long size = entries * sizeof(struct neighbour *); + + if (size <= PAGE_SIZE) + kfree(hash); + else + free_pages((unsigned long)hash, get_order(size)); +} + +static void neigh_hash_grow(struct neigh_table *tbl, unsigned long new_entries) +{ + struct neighbour **new_hash, **old_hash; + unsigned int i, new_hash_mask, old_entries; + + NEIGH_CACHE_STAT_INC(tbl, hash_grows); + + BUG_ON(new_entries & (new_entries - 1)); + new_hash = neigh_hash_alloc(new_entries); + if (!new_hash) + return; + + old_entries = tbl->hash_mask + 1; + new_hash_mask = new_entries - 1; + old_hash = tbl->hash_buckets; + + get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd)); + for (i = 0; i < old_entries; i++) { + struct neighbour *n, *next; + + for (n = old_hash[i]; n; n = next) { + unsigned int hash_val = tbl->hash(n->primary_key, n->dev); + + hash_val &= new_hash_mask; + next = n->next; + + n->next = new_hash[hash_val]; + new_hash[hash_val] = n; + } + } + tbl->hash_buckets = new_hash; + tbl->hash_mask = new_hash_mask; + + neigh_hash_free(old_hash, old_entries); +} + +struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, + struct net_device *dev) +{ + struct neighbour *n; + int key_len = tbl->key_len; + u32 hash_val = tbl->hash(pkey, dev) & tbl->hash_mask; + + NEIGH_CACHE_STAT_INC(tbl, lookups); + + read_lock_bh(&tbl->lock); + for (n = tbl->hash_buckets[hash_val]; n; n = n->next) { + if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) { + neigh_hold(n); + NEIGH_CACHE_STAT_INC(tbl, hits); + break; + } + } + read_unlock_bh(&tbl->lock); + return n; +} + +struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey) +{ + struct neighbour *n; + int key_len = tbl->key_len; + u32 hash_val = tbl->hash(pkey, NULL) & tbl->hash_mask; + + NEIGH_CACHE_STAT_INC(tbl, lookups); + + read_lock_bh(&tbl->lock); + for (n = tbl->hash_buckets[hash_val]; n; n = n->next) { + if (!memcmp(n->primary_key, pkey, key_len)) { + neigh_hold(n); + NEIGH_CACHE_STAT_INC(tbl, hits); + break; + } + } + read_unlock_bh(&tbl->lock); + return n; +} + +struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, + struct net_device *dev) +{ + u32 hash_val; + int key_len = tbl->key_len; + int error; + struct neighbour *n1, *rc, *n = neigh_alloc(tbl); + + if (!n) { + rc = ERR_PTR(-ENOBUFS); + goto out; + } + + memcpy(n->primary_key, pkey, key_len); + n->dev = dev; + dev_hold(dev); + + /* Protocol specific setup. */ + if (tbl->constructor && (error = tbl->constructor(n)) < 0) { + rc = ERR_PTR(error); + goto out_neigh_release; + } + + /* Device specific setup. */ + if (n->parms->neigh_setup && + (error = n->parms->neigh_setup(n)) < 0) { + rc = ERR_PTR(error); + goto out_neigh_release; + } + + n->confirmed = jiffies - (n->parms->base_reachable_time << 1); + + write_lock_bh(&tbl->lock); + + if (atomic_read(&tbl->entries) > (tbl->hash_mask + 1)) + neigh_hash_grow(tbl, (tbl->hash_mask + 1) << 1); + + hash_val = tbl->hash(pkey, dev) & tbl->hash_mask; + + if (n->parms->dead) { + rc = ERR_PTR(-EINVAL); + goto out_tbl_unlock; + } + + for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) { + if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) { + neigh_hold(n1); + rc = n1; + goto out_tbl_unlock; + } + } + + n->next = tbl->hash_buckets[hash_val]; + tbl->hash_buckets[hash_val] = n; + n->dead = 0; + neigh_hold(n); + write_unlock_bh(&tbl->lock); + NEIGH_PRINTK2("neigh %p is created.\n", n); + rc = n; +out: + return rc; +out_tbl_unlock: + write_unlock_bh(&tbl->lock); +out_neigh_release: + neigh_release(n); + goto out; +} + +struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey, + struct net_device *dev, int creat) +{ + struct pneigh_entry *n; + int key_len = tbl->key_len; + u32 hash_val = *(u32 *)(pkey + key_len - 4); + + hash_val ^= (hash_val >> 16); + hash_val ^= hash_val >> 8; + hash_val ^= hash_val >> 4; + hash_val &= PNEIGH_HASHMASK; + + read_lock_bh(&tbl->lock); + + for (n = tbl->phash_buckets[hash_val]; n; n = n->next) { + if (!memcmp(n->key, pkey, key_len) && + (n->dev == dev || !n->dev)) { + read_unlock_bh(&tbl->lock); + goto out; + } + } + read_unlock_bh(&tbl->lock); + n = NULL; + if (!creat) + goto out; + + n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL); + if (!n) + goto out; + + memcpy(n->key, pkey, key_len); + n->dev = dev; + if (dev) + dev_hold(dev); + + if (tbl->pconstructor && tbl->pconstructor(n)) { + if (dev) + dev_put(dev); + kfree(n); + n = NULL; + goto out; + } + + write_lock_bh(&tbl->lock); + n->next = tbl->phash_buckets[hash_val]; + tbl->phash_buckets[hash_val] = n; + write_unlock_bh(&tbl->lock); +out: + return n; +} + + +int pneigh_delete(struct neigh_table *tbl, const void *pkey, + struct net_device *dev) +{ + struct pneigh_entry *n, **np; + int key_len = tbl->key_len; + u32 hash_val = *(u32 *)(pkey + key_len - 4); + + hash_val ^= (hash_val >> 16); + hash_val ^= hash_val >> 8; + hash_val ^= hash_val >> 4; + hash_val &= PNEIGH_HASHMASK; + + write_lock_bh(&tbl->lock); + for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL; + np = &n->next) { + if (!memcmp(n->key, pkey, key_len) && n->dev == dev) { + *np = n->next; + write_unlock_bh(&tbl->lock); + if (tbl->pdestructor) + tbl->pdestructor(n); + if (n->dev) + dev_put(n->dev); + kfree(n); + return 0; + } + } + write_unlock_bh(&tbl->lock); + return -ENOENT; +} + +static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev) +{ + struct pneigh_entry *n, **np; + u32 h; + + for (h = 0; h <= PNEIGH_HASHMASK; h++) { + np = &tbl->phash_buckets[h]; + while ((n = *np) != NULL) { + if (!dev || n->dev == dev) { + *np = n->next; + if (tbl->pdestructor) + tbl->pdestructor(n); + if (n->dev) + dev_put(n->dev); + kfree(n); + continue; + } + np = &n->next; + } + } + return -ENOENT; +} + + +/* + * neighbour must already be out of the table; + * + */ +void neigh_destroy(struct neighbour *neigh) +{ + struct hh_cache *hh; + + NEIGH_CACHE_STAT_INC(neigh->tbl, destroys); + + if (!neigh->dead) { + printk(KERN_WARNING + "Destroying alive neighbour %p\n", neigh); + dump_stack(); + return; + } + + if (neigh_del_timer(neigh)) + printk(KERN_WARNING "Impossible event.\n"); + + while ((hh = neigh->hh) != NULL) { + neigh->hh = hh->hh_next; + hh->hh_next = NULL; + write_lock_bh(&hh->hh_lock); + hh->hh_output = neigh_blackhole; + write_unlock_bh(&hh->hh_lock); + if (atomic_dec_and_test(&hh->hh_refcnt)) + kfree(hh); + } + + if (neigh->ops && neigh->ops->destructor) + (neigh->ops->destructor)(neigh); + + skb_queue_purge(&neigh->arp_queue); + + dev_put(neigh->dev); + neigh_parms_put(neigh->parms); + + NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh); + + atomic_dec(&neigh->tbl->entries); + kmem_cache_free(neigh->tbl->kmem_cachep, neigh); +} + +/* Neighbour state is suspicious; + disable fast path. + + Called with write_locked neigh. + */ +static void neigh_suspect(struct neighbour *neigh) +{ + struct hh_cache *hh; + + NEIGH_PRINTK2("neigh %p is suspected.\n", neigh); + + neigh->output = neigh->ops->output; + + for (hh = neigh->hh; hh; hh = hh->hh_next) + hh->hh_output = neigh->ops->output; +} + +/* Neighbour state is OK; + enable fast path. + + Called with write_locked neigh. + */ +static void neigh_connect(struct neighbour *neigh) +{ + struct hh_cache *hh; + + NEIGH_PRINTK2("neigh %p is connected.\n", neigh); + + neigh->output = neigh->ops->connected_output; + + for (hh = neigh->hh; hh; hh = hh->hh_next) + hh->hh_output = neigh->ops->hh_output; +} + +static void neigh_periodic_timer(unsigned long arg) +{ + struct neigh_table *tbl = (struct neigh_table *)arg; + struct neighbour *n, **np; + unsigned long expire, now = jiffies; + + NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs); + + write_lock(&tbl->lock); + + /* + * periodically recompute ReachableTime from random function + */ + + if (time_after(now, tbl->last_rand + 300 * HZ)) { + struct neigh_parms *p; + tbl->last_rand = now; + for (p = &tbl->parms; p; p = p->next) + p->reachable_time = + neigh_rand_reach_time(p->base_reachable_time); + } + + np = &tbl->hash_buckets[tbl->hash_chain_gc]; + tbl->hash_chain_gc = ((tbl->hash_chain_gc + 1) & tbl->hash_mask); + + while ((n = *np) != NULL) { + unsigned int state; + + write_lock(&n->lock); + + state = n->nud_state; + if (state & (NUD_PERMANENT | NUD_IN_TIMER)) { + write_unlock(&n->lock); + goto next_elt; + } + + if (time_before(n->used, n->confirmed)) + n->used = n->confirmed; + + if (atomic_read(&n->refcnt) == 1 && + (state == NUD_FAILED || + time_after(now, n->used + n->parms->gc_staletime))) { + *np = n->next; + n->dead = 1; + write_unlock(&n->lock); + neigh_release(n); + continue; + } + write_unlock(&n->lock); + +next_elt: + np = &n->next; + } + + /* Cycle through all hash buckets every base_reachable_time/2 ticks. + * ARP entry timeouts range from 1/2 base_reachable_time to 3/2 + * base_reachable_time. + */ + expire = tbl->parms.base_reachable_time >> 1; + expire /= (tbl->hash_mask + 1); + if (!expire) + expire = 1; + + mod_timer(&tbl->gc_timer, now + expire); + + write_unlock(&tbl->lock); +} + +static __inline__ int neigh_max_probes(struct neighbour *n) +{ + struct neigh_parms *p = n->parms; + return (n->nud_state & NUD_PROBE ? + p->ucast_probes : + p->ucast_probes + p->app_probes + p->mcast_probes); +} + + +/* Called when a timer expires for a neighbour entry. */ + +static void neigh_timer_handler(unsigned long arg) +{ + unsigned long now, next; + struct neighbour *neigh = (struct neighbour *)arg; + unsigned state; + int notify = 0; + + write_lock(&neigh->lock); + + state = neigh->nud_state; + now = jiffies; + next = now + HZ; + + if (!(state & NUD_IN_TIMER)) { +#ifndef CONFIG_SMP + printk(KERN_WARNING "neigh: timer & !nud_in_timer\n"); +#endif + goto out; + } + + if (state & NUD_REACHABLE) { + if (time_before_eq(now, + neigh->confirmed + neigh->parms->reachable_time)) { + NEIGH_PRINTK2("neigh %p is still alive.\n", neigh); + next = neigh->confirmed + neigh->parms->reachable_time; + } else if (time_before_eq(now, + neigh->used + neigh->parms->delay_probe_time)) { + NEIGH_PRINTK2("neigh %p is delayed.\n", neigh); + neigh->nud_state = NUD_DELAY; + neigh_suspect(neigh); + next = now + neigh->parms->delay_probe_time; + } else { + NEIGH_PRINTK2("neigh %p is suspected.\n", neigh); + neigh->nud_state = NUD_STALE; + neigh_suspect(neigh); + } + } else if (state & NUD_DELAY) { + if (time_before_eq(now, + neigh->confirmed + neigh->parms->delay_probe_time)) { + NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh); + neigh->nud_state = NUD_REACHABLE; + neigh_connect(neigh); + next = neigh->confirmed + neigh->parms->reachable_time; + } else { + NEIGH_PRINTK2("neigh %p is probed.\n", neigh); + neigh->nud_state = NUD_PROBE; + atomic_set(&neigh->probes, 0); + next = now + neigh->parms->retrans_time; + } + } else { + /* NUD_PROBE|NUD_INCOMPLETE */ + next = now + neigh->parms->retrans_time; + } + + if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) && + atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) { + struct sk_buff *skb; + + neigh->nud_state = NUD_FAILED; + notify = 1; + NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed); + NEIGH_PRINTK2("neigh %p is failed.\n", neigh); + + /* It is very thin place. report_unreachable is very complicated + routine. Particularly, it can hit the same neighbour entry! + + So that, we try to be accurate and avoid dead loop. --ANK + */ + while (neigh->nud_state == NUD_FAILED && + (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { + write_unlock(&neigh->lock); + neigh->ops->error_report(neigh, skb); + write_lock(&neigh->lock); + } + skb_queue_purge(&neigh->arp_queue); + } + + if (neigh->nud_state & NUD_IN_TIMER) { + neigh_hold(neigh); + if (time_before(next, jiffies + HZ/2)) + next = jiffies + HZ/2; + neigh->timer.expires = next; + add_timer(&neigh->timer); + } + if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) { + struct sk_buff *skb = skb_peek(&neigh->arp_queue); + /* keep skb alive even if arp_queue overflows */ + if (skb) + skb_get(skb); + write_unlock(&neigh->lock); + neigh->ops->solicit(neigh, skb); + atomic_inc(&neigh->probes); + if (skb) + kfree_skb(skb); + } else { +out: + write_unlock(&neigh->lock); + } + +#ifdef CONFIG_ARPD + if (notify && neigh->parms->app_probes) + neigh_app_notify(neigh); +#endif + neigh_release(neigh); +} + +int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) +{ + int rc; + unsigned long now; + + write_lock_bh(&neigh->lock); + + rc = 0; + if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)) + goto out_unlock_bh; + + now = jiffies; + + if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { + if (neigh->parms->mcast_probes + neigh->parms->app_probes) { + atomic_set(&neigh->probes, neigh->parms->ucast_probes); + neigh->nud_state = NUD_INCOMPLETE; + neigh_hold(neigh); + neigh->timer.expires = now + 1; + add_timer(&neigh->timer); + } else { + neigh->nud_state = NUD_FAILED; + write_unlock_bh(&neigh->lock); + + if (skb) + kfree_skb(skb); + return 1; + } + } else if (neigh->nud_state & NUD_STALE) { + NEIGH_PRINTK2("neigh %p is delayed.\n", neigh); + neigh_hold(neigh); + neigh->nud_state = NUD_DELAY; + neigh->timer.expires = jiffies + neigh->parms->delay_probe_time; + add_timer(&neigh->timer); + } + + if (neigh->nud_state == NUD_INCOMPLETE) { + if (skb) { + if (skb_queue_len(&neigh->arp_queue) >= + neigh->parms->queue_len) { + struct sk_buff *buff; + buff = neigh->arp_queue.next; + __skb_unlink(buff, &neigh->arp_queue); + kfree_skb(buff); + } + __skb_queue_tail(&neigh->arp_queue, skb); + } + rc = 1; + } +out_unlock_bh: + write_unlock_bh(&neigh->lock); + return rc; +} + +static __inline__ void neigh_update_hhs(struct neighbour *neigh) +{ + struct hh_cache *hh; + void (*update)(struct hh_cache*, struct net_device*, unsigned char *) = + neigh->dev->header_cache_update; + + if (update) { + for (hh = neigh->hh; hh; hh = hh->hh_next) { + write_lock_bh(&hh->hh_lock); + update(hh, neigh->dev, neigh->ha); + write_unlock_bh(&hh->hh_lock); + } + } +} + + + +/* Generic update routine. + -- lladdr is new lladdr or NULL, if it is not supplied. + -- new is new state. + -- flags + NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr, + if it is different. + NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected" + lladdr instead of overriding it + if it is different. + It also allows to retain current state + if lladdr is unchanged. + NEIGH_UPDATE_F_ADMIN means that the change is administrative. + + NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing + NTF_ROUTER flag. + NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as + a router. + + Caller MUST hold reference count on the entry. + */ + +int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, + u32 flags) +{ + u8 old; + int err; +#ifdef CONFIG_ARPD + int notify = 0; +#endif + struct net_device *dev; + int update_isrouter = 0; + + write_lock_bh(&neigh->lock); + + dev = neigh->dev; + old = neigh->nud_state; + err = -EPERM; + + if (!(flags & NEIGH_UPDATE_F_ADMIN) && + (old & (NUD_NOARP | NUD_PERMANENT))) + goto out; + + if (!(new & NUD_VALID)) { + neigh_del_timer(neigh); + if (old & NUD_CONNECTED) + neigh_suspect(neigh); + neigh->nud_state = new; + err = 0; +#ifdef CONFIG_ARPD + notify = old & NUD_VALID; +#endif + goto out; + } + + /* Compare new lladdr with cached one */ + if (!dev->addr_len) { + /* First case: device needs no address. */ + lladdr = neigh->ha; + } else if (lladdr) { + /* The second case: if something is already cached + and a new address is proposed: + - compare new & old + - if they are different, check override flag + */ + if ((old & NUD_VALID) && + !memcmp(lladdr, neigh->ha, dev->addr_len)) + lladdr = neigh->ha; + } else { + /* No address is supplied; if we know something, + use it, otherwise discard the request. + */ + err = -EINVAL; + if (!(old & NUD_VALID)) + goto out; + lladdr = neigh->ha; + } + + if (new & NUD_CONNECTED) + neigh->confirmed = jiffies; + neigh->updated = jiffies; + + /* If entry was valid and address is not changed, + do not change entry state, if new one is STALE. + */ + err = 0; + update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER; + if (old & NUD_VALID) { + if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) { + update_isrouter = 0; + if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) && + (old & NUD_CONNECTED)) { + lladdr = neigh->ha; + new = NUD_STALE; + } else + goto out; + } else { + if (lladdr == neigh->ha && new == NUD_STALE && + ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) || + (old & NUD_CONNECTED)) + ) + new = old; + } + } + + if (new != old) { + neigh_del_timer(neigh); + if (new & NUD_IN_TIMER) { + neigh_hold(neigh); + neigh->timer.expires = jiffies + + ((new & NUD_REACHABLE) ? + neigh->parms->reachable_time : 0); + add_timer(&neigh->timer); + } + neigh->nud_state = new; + } + + if (lladdr != neigh->ha) { + memcpy(&neigh->ha, lladdr, dev->addr_len); + neigh_update_hhs(neigh); + if (!(new & NUD_CONNECTED)) + neigh->confirmed = jiffies - + (neigh->parms->base_reachable_time << 1); +#ifdef CONFIG_ARPD + notify = 1; +#endif + } + if (new == old) + goto out; + if (new & NUD_CONNECTED) + neigh_connect(neigh); + else + neigh_suspect(neigh); + if (!(old & NUD_VALID)) { + struct sk_buff *skb; + + /* Again: avoid dead loop if something went wrong */ + + while (neigh->nud_state & NUD_VALID && + (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { + struct neighbour *n1 = neigh; + write_unlock_bh(&neigh->lock); + /* On shaper/eql skb->dst->neighbour != neigh :( */ + if (skb->dst && skb->dst->neighbour) + n1 = skb->dst->neighbour; + n1->output(skb); + write_lock_bh(&neigh->lock); + } + skb_queue_purge(&neigh->arp_queue); + } +out: + if (update_isrouter) { + neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ? + (neigh->flags | NTF_ROUTER) : + (neigh->flags & ~NTF_ROUTER); + } + write_unlock_bh(&neigh->lock); +#ifdef CONFIG_ARPD + if (notify && neigh->parms->app_probes) + neigh_app_notify(neigh); +#endif + return err; +} + +struct neighbour *neigh_event_ns(struct neigh_table *tbl, + u8 *lladdr, void *saddr, + struct net_device *dev) +{ + struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev, + lladdr || !dev->addr_len); + if (neigh) + neigh_update(neigh, lladdr, NUD_STALE, + NEIGH_UPDATE_F_OVERRIDE); + return neigh; +} + +static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, + u16 protocol) +{ + struct hh_cache *hh; + struct net_device *dev = dst->dev; + + for (hh = n->hh; hh; hh = hh->hh_next) + if (hh->hh_type == protocol) + break; + + if (!hh && (hh = kmalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) { + memset(hh, 0, sizeof(struct hh_cache)); + rwlock_init(&hh->hh_lock); + hh->hh_type = protocol; + atomic_set(&hh->hh_refcnt, 0); + hh->hh_next = NULL; + if (dev->hard_header_cache(n, hh)) { + kfree(hh); + hh = NULL; + } else { + atomic_inc(&hh->hh_refcnt); + hh->hh_next = n->hh; + n->hh = hh; + if (n->nud_state & NUD_CONNECTED) + hh->hh_output = n->ops->hh_output; + else + hh->hh_output = n->ops->output; + } + } + if (hh) { + atomic_inc(&hh->hh_refcnt); + dst->hh = hh; + } +} + +/* This function can be used in contexts, where only old dev_queue_xmit + worked, f.e. if you want to override normal output path (eql, shaper), + but resolution is not made yet. + */ + +int neigh_compat_output(struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + + __skb_pull(skb, skb->nh.raw - skb->data); + + if (dev->hard_header && + dev->hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL, + skb->len) < 0 && + dev->rebuild_header(skb)) + return 0; + + return dev_queue_xmit(skb); +} + +/* Slow and careful. */ + +int neigh_resolve_output(struct sk_buff *skb) +{ + struct dst_entry *dst = skb->dst; + struct neighbour *neigh; + int rc = 0; + + if (!dst || !(neigh = dst->neighbour)) + goto discard; + + __skb_pull(skb, skb->nh.raw - skb->data); + + if (!neigh_event_send(neigh, skb)) { + int err; + struct net_device *dev = neigh->dev; + if (dev->hard_header_cache && !dst->hh) { + write_lock_bh(&neigh->lock); + if (!dst->hh) + neigh_hh_init(neigh, dst, dst->ops->protocol); + err = dev->hard_header(skb, dev, ntohs(skb->protocol), + neigh->ha, NULL, skb->len); + write_unlock_bh(&neigh->lock); + } else { + read_lock_bh(&neigh->lock); + err = dev->hard_header(skb, dev, ntohs(skb->protocol), + neigh->ha, NULL, skb->len); + read_unlock_bh(&neigh->lock); + } + if (err >= 0) + rc = neigh->ops->queue_xmit(skb); + else + goto out_kfree_skb; + } +out: + return rc; +discard: + NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n", + dst, dst ? dst->neighbour : NULL); +out_kfree_skb: + rc = -EINVAL; + kfree_skb(skb); + goto out; +} + +/* As fast as possible without hh cache */ + +int neigh_connected_output(struct sk_buff *skb) +{ + int err; + struct dst_entry *dst = skb->dst; + struct neighbour *neigh = dst->neighbour; + struct net_device *dev = neigh->dev; + + __skb_pull(skb, skb->nh.raw - skb->data); + + read_lock_bh(&neigh->lock); + err = dev->hard_header(skb, dev, ntohs(skb->protocol), + neigh->ha, NULL, skb->len); + read_unlock_bh(&neigh->lock); + if (err >= 0) + err = neigh->ops->queue_xmit(skb); |