Linux-2.6.12-rc2v2.6.12-rc2

Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
author: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 15:20:36 -0700
committer: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 15:20:36 -0700
commit: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree: 0bba044c4ce775e45a88a51686b5d9f90697ea9d /net/core/neighbour.c
1 files changed, 2362 insertions, 0 deletions
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
new file mode 100644
index 00000000000..0a2f67bbef2
--- /dev/null
+++ b/net/core/neighbour.c
@@ -0,0 +1,2362 @@
+/*
+ *	Generic address resolution entity
+ *
+ *	Authors:
+ *	Pedro Roque		<roque@di.fc.ul.pt>
+ *	Alexey Kuznetsov	<kuznet@ms2.inr.ac.ru>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ *
+ *	Fixes:
+ *	Vitaly E. Lavrov	releasing NULL neighbor in neigh_add.
+ *	Harald Welte		Add neighbour cache statistics like rtstat
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/socket.h>
+#include <linux/sched.h>
+#include <linux/netdevice.h>
+#include <linux/proc_fs.h>
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+#include <linux/times.h>
+#include <net/neighbour.h>
+#include <net/dst.h>
+#include <net/sock.h>
+#include <linux/rtnetlink.h>
+#include <linux/random.h>
+
+#define NEIGH_DEBUG 1
+
+#define NEIGH_PRINTK(x...) printk(x)
+#define NEIGH_NOPRINTK(x...) do { ; } while(0)
+#define NEIGH_PRINTK0 NEIGH_PRINTK
+#define NEIGH_PRINTK1 NEIGH_NOPRINTK
+#define NEIGH_PRINTK2 NEIGH_NOPRINTK
+
+#if NEIGH_DEBUG >= 1
+#undef NEIGH_PRINTK1
+#define NEIGH_PRINTK1 NEIGH_PRINTK
+#endif
+#if NEIGH_DEBUG >= 2
+#undef NEIGH_PRINTK2
+#define NEIGH_PRINTK2 NEIGH_PRINTK
+#endif
+
+#define PNEIGH_HASHMASK		0xF
+
+static void neigh_timer_handler(unsigned long arg);
+#ifdef CONFIG_ARPD
+static void neigh_app_notify(struct neighbour *n);
+#endif
+static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
+void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev);
+
+static struct neigh_table *neigh_tables;
+static struct file_operations neigh_stat_seq_fops;
+
+/*
+   Neighbour hash table buckets are protected with rwlock tbl->lock.
+
+   - All the scans/updates to hash buckets MUST be made under this lock.
+   - NOTHING clever should be made under this lock: no callbacks
+     to protocol backends, no attempts to send something to network.
+     It will result in deadlocks, if backend/driver wants to use neighbour
+     cache.
+   - If the entry requires some non-trivial actions, increase
+     its reference count and release table lock.
+
+   Neighbour entries are protected:
+   - with reference count.
+   - with rwlock neigh->lock
+
+   Reference count prevents destruction.
+
+   neigh->lock mainly serializes ll address data and its validity state.
+   However, the same lock is used to protect another entry fields:
+    - timer
+    - resolution queue
+
+   Again, nothing clever shall be made under neigh->lock,
+   the most complicated procedure, which we allow is dev->hard_header.
+   It is supposed, that dev->hard_header is simplistic and does
+   not make callbacks to neighbour tables.
+
+   The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
+   list of neighbour tables. This list is used only in process context,
+ */
+
+static DEFINE_RWLOCK(neigh_tbl_lock);
+
+static int neigh_blackhole(struct sk_buff *skb)
+{
+	kfree_skb(skb);
+	return -ENETDOWN;
+}
+
+/*
+ * It is random distribution in the interval (1/2)*base...(3/2)*base.
+ * It corresponds to default IPv6 settings and is not overridable,
+ * because it is really reasonable choice.
+ */
+
+unsigned long neigh_rand_reach_time(unsigned long base)
+{
+	return (base ? (net_random() % base) + (base >> 1) : 0);
+}
+
+
+static int neigh_forced_gc(struct neigh_table *tbl)
+{
+	int shrunk = 0;
+	int i;
+
+	NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
+
+	write_lock_bh(&tbl->lock);
+	for (i = 0; i <= tbl->hash_mask; i++) {
+		struct neighbour *n, **np;
+
+		np = &tbl->hash_buckets[i];
+		while ((n = *np) != NULL) {
+			/* Neighbour record may be discarded if:
+			 * - nobody refers to it.
+			 * - it is not permanent
+			 */
+			write_lock(&n->lock);
+			if (atomic_read(&n->refcnt) == 1 &&
+			    !(n->nud_state & NUD_PERMANENT)) {
+				*np	= n->next;
+				n->dead = 1;
+				shrunk	= 1;
+				write_unlock(&n->lock);
+				neigh_release(n);
+				continue;
+			}
+			write_unlock(&n->lock);
+			np = &n->next;
+		}
+	}
+
+	tbl->last_flush = jiffies;
+
+	write_unlock_bh(&tbl->lock);
+
+	return shrunk;
+}
+
+static int neigh_del_timer(struct neighbour *n)
+{
+	if ((n->nud_state & NUD_IN_TIMER) &&
+	    del_timer(&n->timer)) {
+		neigh_release(n);
+		return 1;
+	}
+	return 0;
+}
+
+static void pneigh_queue_purge(struct sk_buff_head *list)
+{
+	struct sk_buff *skb;
+
+	while ((skb = skb_dequeue(list)) != NULL) {
+		dev_put(skb->dev);
+		kfree_skb(skb);
+	}
+}
+
+void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
+{
+	int i;
+
+	write_lock_bh(&tbl->lock);
+
+	for (i=0; i <= tbl->hash_mask; i++) {
+		struct neighbour *n, **np;
+
+		np = &tbl->hash_buckets[i];
+		while ((n = *np) != NULL) {
+			if (dev && n->dev != dev) {
+				np = &n->next;
+				continue;
+			}
+			*np = n->next;
+			write_lock_bh(&n->lock);
+			n->dead = 1;
+			neigh_del_timer(n);
+			write_unlock_bh(&n->lock);
+			neigh_release(n);
+		}
+	}
+
+        write_unlock_bh(&tbl->lock);
+}
+
+int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
+{
+	int i;
+
+	write_lock_bh(&tbl->lock);
+
+	for (i = 0; i <= tbl->hash_mask; i++) {
+		struct neighbour *n, **np = &tbl->hash_buckets[i];
+
+		while ((n = *np) != NULL) {
+			if (dev && n->dev != dev) {
+				np = &n->next;
+				continue;
+			}
+			*np = n->next;
+			write_lock(&n->lock);
+			neigh_del_timer(n);
+			n->dead = 1;
+
+			if (atomic_read(&n->refcnt) != 1) {
+				/* The most unpleasant situation.
+				   We must destroy neighbour entry,
+				   but someone still uses it.
+
+				   The destroy will be delayed until
+				   the last user releases us, but
+				   we must kill timers etc. and move
+				   it to safe state.
+				 */
+				skb_queue_purge(&n->arp_queue);
+				n->output = neigh_blackhole;
+				if (n->nud_state & NUD_VALID)
+					n->nud_state = NUD_NOARP;
+				else
+					n->nud_state = NUD_NONE;
+				NEIGH_PRINTK2("neigh %p is stray.\n", n);
+			}
+			write_unlock(&n->lock);
+			neigh_release(n);
+		}
+	}
+
+	pneigh_ifdown(tbl, dev);
+	write_unlock_bh(&tbl->lock);
+
+	del_timer_sync(&tbl->proxy_timer);
+	pneigh_queue_purge(&tbl->proxy_queue);
+	return 0;
+}
+
+static struct neighbour *neigh_alloc(struct neigh_table *tbl)
+{
+	struct neighbour *n = NULL;
+	unsigned long now = jiffies;
+	int entries;
+
+	entries = atomic_inc_return(&tbl->entries) - 1;
+	if (entries >= tbl->gc_thresh3 ||
+	    (entries >= tbl->gc_thresh2 &&
+	     time_after(now, tbl->last_flush + 5 * HZ))) {
+		if (!neigh_forced_gc(tbl) &&
+		    entries >= tbl->gc_thresh3)
+			goto out_entries;
+	}
+
+	n = kmem_cache_alloc(tbl->kmem_cachep, SLAB_ATOMIC);
+	if (!n)
+		goto out_entries;
+
+	memset(n, 0, tbl->entry_size);
+
+	skb_queue_head_init(&n->arp_queue);
+	rwlock_init(&n->lock);
+	n->updated	  = n->used = now;
+	n->nud_state	  = NUD_NONE;
+	n->output	  = neigh_blackhole;
+	n->parms	  = neigh_parms_clone(&tbl->parms);
+	init_timer(&n->timer);
+	n->timer.function = neigh_timer_handler;
+	n->timer.data	  = (unsigned long)n;
+
+	NEIGH_CACHE_STAT_INC(tbl, allocs);
+	n->tbl		  = tbl;
+	atomic_set(&n->refcnt, 1);
+	n->dead		  = 1;
+out:
+	return n;
+
+out_entries:
+	atomic_dec(&tbl->entries);
+	goto out;
+}
+
+static struct neighbour **neigh_hash_alloc(unsigned int entries)
+{
+	unsigned long size = entries * sizeof(struct neighbour *);
+	struct neighbour **ret;
+
+	if (size <= PAGE_SIZE) {
+		ret = kmalloc(size, GFP_ATOMIC);
+	} else {
+		ret = (struct neighbour **)
+			__get_free_pages(GFP_ATOMIC, get_order(size));
+	}
+	if (ret)
+		memset(ret, 0, size);
+
+	return ret;
+}
+
+static void neigh_hash_free(struct neighbour **hash, unsigned int entries)
+{
+	unsigned long size = entries * sizeof(struct neighbour *);
+
+	if (size <= PAGE_SIZE)
+		kfree(hash);
+	else
+		free_pages((unsigned long)hash, get_order(size));
+}
+
+static void neigh_hash_grow(struct neigh_table *tbl, unsigned long new_entries)
+{
+	struct neighbour **new_hash, **old_hash;
+	unsigned int i, new_hash_mask, old_entries;
+
+	NEIGH_CACHE_STAT_INC(tbl, hash_grows);
+
+	BUG_ON(new_entries & (new_entries - 1));
+	new_hash = neigh_hash_alloc(new_entries);
+	if (!new_hash)
+		return;
+
+	old_entries = tbl->hash_mask + 1;
+	new_hash_mask = new_entries - 1;
+	old_hash = tbl->hash_buckets;
+
+	get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
+	for (i = 0; i < old_entries; i++) {
+		struct neighbour *n, *next;
+
+		for (n = old_hash[i]; n; n = next) {
+			unsigned int hash_val = tbl->hash(n->primary_key, n->dev);
+
+			hash_val &= new_hash_mask;
+			next = n->next;
+
+			n->next = new_hash[hash_val];
+			new_hash[hash_val] = n;
+		}
+	}
+	tbl->hash_buckets = new_hash;
+	tbl->hash_mask = new_hash_mask;
+
+	neigh_hash_free(old_hash, old_entries);
+}
+
+struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
+			       struct net_device *dev)
+{
+	struct neighbour *n;
+	int key_len = tbl->key_len;
+	u32 hash_val = tbl->hash(pkey, dev) & tbl->hash_mask;
+	
+	NEIGH_CACHE_STAT_INC(tbl, lookups);
+
+	read_lock_bh(&tbl->lock);
+	for (n = tbl->hash_buckets[hash_val]; n; n = n->next) {
+		if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
+			neigh_hold(n);
+			NEIGH_CACHE_STAT_INC(tbl, hits);
+			break;
+		}
+	}
+	read_unlock_bh(&tbl->lock);
+	return n;
+}
+
+struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey)
+{
+	struct neighbour *n;
+	int key_len = tbl->key_len;
+	u32 hash_val = tbl->hash(pkey, NULL) & tbl->hash_mask;
+
+	NEIGH_CACHE_STAT_INC(tbl, lookups);
+
+	read_lock_bh(&tbl->lock);
+	for (n = tbl->hash_buckets[hash_val]; n; n = n->next) {
+		if (!memcmp(n->primary_key, pkey, key_len)) {
+			neigh_hold(n);
+			NEIGH_CACHE_STAT_INC(tbl, hits);
+			break;
+		}
+	}
+	read_unlock_bh(&tbl->lock);
+	return n;
+}
+
+struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
+			       struct net_device *dev)
+{
+	u32 hash_val;
+	int key_len = tbl->key_len;
+	int error;
+	struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
+
+	if (!n) {
+		rc = ERR_PTR(-ENOBUFS);
+		goto out;
+	}
+
+	memcpy(n->primary_key, pkey, key_len);
+	n->dev = dev;
+	dev_hold(dev);
+
+	/* Protocol specific setup. */
+	if (tbl->constructor &&	(error = tbl->constructor(n)) < 0) {
+		rc = ERR_PTR(error);
+		goto out_neigh_release;
+	}
+
+	/* Device specific setup. */
+	if (n->parms->neigh_setup &&
+	    (error = n->parms->neigh_setup(n)) < 0) {
+		rc = ERR_PTR(error);
+		goto out_neigh_release;
+	}
+
+	n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
+
+	write_lock_bh(&tbl->lock);
+
+	if (atomic_read(&tbl->entries) > (tbl->hash_mask + 1))
+		neigh_hash_grow(tbl, (tbl->hash_mask + 1) << 1);
+
+	hash_val = tbl->hash(pkey, dev) & tbl->hash_mask;
+
+	if (n->parms->dead) {
+		rc = ERR_PTR(-EINVAL);
+		goto out_tbl_unlock;
+	}
+
+	for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) {
+		if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
+			neigh_hold(n1);
+			rc = n1;
+			goto out_tbl_unlock;
+		}
+	}
+
+	n->next = tbl->hash_buckets[hash_val];
+	tbl->hash_buckets[hash_val] = n;
+	n->dead = 0;
+	neigh_hold(n);
+	write_unlock_bh(&tbl->lock);
+	NEIGH_PRINTK2("neigh %p is created.\n", n);
+	rc = n;
+out:
+	return rc;
+out_tbl_unlock:
+	write_unlock_bh(&tbl->lock);
+out_neigh_release:
+	neigh_release(n);
+	goto out;
+}
+
+struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
+				    struct net_device *dev, int creat)
+{
+	struct pneigh_entry *n;
+	int key_len = tbl->key_len;
+	u32 hash_val = *(u32 *)(pkey + key_len - 4);
+
+	hash_val ^= (hash_val >> 16);
+	hash_val ^= hash_val >> 8;
+	hash_val ^= hash_val >> 4;
+	hash_val &= PNEIGH_HASHMASK;
+
+	read_lock_bh(&tbl->lock);
+
+	for (n = tbl->phash_buckets[hash_val]; n; n = n->next) {
+		if (!memcmp(n->key, pkey, key_len) &&
+		    (n->dev == dev || !n->dev)) {
+			read_unlock_bh(&tbl->lock);
+			goto out;
+		}
+	}
+	read_unlock_bh(&tbl->lock);
+	n = NULL;
+	if (!creat)
+		goto out;
+
+	n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
+	if (!n)
+		goto out;
+
+	memcpy(n->key, pkey, key_len);
+	n->dev = dev;
+	if (dev)
+		dev_hold(dev);
+
+	if (tbl->pconstructor && tbl->pconstructor(n)) {
+		if (dev)
+			dev_put(dev);
+		kfree(n);
+		n = NULL;
+		goto out;
+	}
+
+	write_lock_bh(&tbl->lock);
+	n->next = tbl->phash_buckets[hash_val];
+	tbl->phash_buckets[hash_val] = n;
+	write_unlock_bh(&tbl->lock);
+out:
+	return n;
+}
+
+
+int pneigh_delete(struct neigh_table *tbl, const void *pkey,
+		  struct net_device *dev)
+{
+	struct pneigh_entry *n, **np;
+	int key_len = tbl->key_len;
+	u32 hash_val = *(u32 *)(pkey + key_len - 4);
+
+	hash_val ^= (hash_val >> 16);
+	hash_val ^= hash_val >> 8;
+	hash_val ^= hash_val >> 4;
+	hash_val &= PNEIGH_HASHMASK;
+
+	write_lock_bh(&tbl->lock);
+	for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
+	     np = &n->next) {
+		if (!memcmp(n->key, pkey, key_len) && n->dev == dev) {
+			*np = n->next;
+			write_unlock_bh(&tbl->lock);
+			if (tbl->pdestructor)
+				tbl->pdestructor(n);
+			if (n->dev)
+				dev_put(n->dev);
+			kfree(n);
+			return 0;
+		}
+	}
+	write_unlock_bh(&tbl->lock);
+	return -ENOENT;
+}
+
+static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
+{
+	struct pneigh_entry *n, **np;
+	u32 h;
+
+	for (h = 0; h <= PNEIGH_HASHMASK; h++) {
+		np = &tbl->phash_buckets[h];
+		while ((n = *np) != NULL) {
+			if (!dev || n->dev == dev) {
+				*np = n->next;
+				if (tbl->pdestructor)
+					tbl->pdestructor(n);
+				if (n->dev)
+					dev_put(n->dev);
+				kfree(n);
+				continue;
+			}
+			np = &n->next;
+		}
+	}
+	return -ENOENT;
+}
+
+
+/*
+ *	neighbour must already be out of the table;
+ *
+ */
+void neigh_destroy(struct neighbour *neigh)
+{
+	struct hh_cache *hh;
+
+	NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
+
+	if (!neigh->dead) {
+		printk(KERN_WARNING
+		       "Destroying alive neighbour %p\n", neigh);
+		dump_stack();
+		return;
+	}
+
+	if (neigh_del_timer(neigh))
+		printk(KERN_WARNING "Impossible event.\n");
+
+	while ((hh = neigh->hh) != NULL) {
+		neigh->hh = hh->hh_next;
+		hh->hh_next = NULL;
+		write_lock_bh(&hh->hh_lock);
+		hh->hh_output = neigh_blackhole;
+		write_unlock_bh(&hh->hh_lock);
+		if (atomic_dec_and_test(&hh->hh_refcnt))
+			kfree(hh);
+	}
+
+	if (neigh->ops && neigh->ops->destructor)
+		(neigh->ops->destructor)(neigh);
+
+	skb_queue_purge(&neigh->arp_queue);
+
+	dev_put(neigh->dev);
+	neigh_parms_put(neigh->parms);
+
+	NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
+
+	atomic_dec(&neigh->tbl->entries);
+	kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
+}
+
+/* Neighbour state is suspicious;
+   disable fast path.
+
+   Called with write_locked neigh.
+ */
+static void neigh_suspect(struct neighbour *neigh)
+{
+	struct hh_cache *hh;
+
+	NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
+
+	neigh->output = neigh->ops->output;
+
+	for (hh = neigh->hh; hh; hh = hh->hh_next)
+		hh->hh_output = neigh->ops->output;
+}
+
+/* Neighbour state is OK;
+   enable fast path.
+
+   Called with write_locked neigh.
+ */
+static void neigh_connect(struct neighbour *neigh)
+{
+	struct hh_cache *hh;
+
+	NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
+
+	neigh->output = neigh->ops->connected_output;
+
+	for (hh = neigh->hh; hh; hh = hh->hh_next)
+		hh->hh_output = neigh->ops->hh_output;
+}
+
+static void neigh_periodic_timer(unsigned long arg)
+{
+	struct neigh_table *tbl = (struct neigh_table *)arg;
+	struct neighbour *n, **np;
+	unsigned long expire, now = jiffies;
+
+	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
+
+	write_lock(&tbl->lock);
+
+	/*
+	 *	periodically recompute ReachableTime from random function
+	 */
+
+	if (time_after(now, tbl->last_rand + 300 * HZ)) {
+		struct neigh_parms *p;
+		tbl->last_rand = now;
+		for (p = &tbl->parms; p; p = p->next)
+			p->reachable_time =
+				neigh_rand_reach_time(p->base_reachable_time);
+	}
+
+	np = &tbl->hash_buckets[tbl->hash_chain_gc];
+	tbl->hash_chain_gc = ((tbl->hash_chain_gc + 1) & tbl->hash_mask);
+
+	while ((n = *np) != NULL) {
+		unsigned int state;
+
+		write_lock(&n->lock);
+
+		state = n->nud_state;
+		if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
+			write_unlock(&n->lock);
+			goto next_elt;
+		}
+
+		if (time_before(n->used, n->confirmed))
+			n->used = n->confirmed;
+
+		if (atomic_read(&n->refcnt) == 1 &&
+		    (state == NUD_FAILED ||
+		     time_after(now, n->used + n->parms->gc_staletime))) {
+			*np = n->next;
+			n->dead = 1;
+			write_unlock(&n->lock);
+			neigh_release(n);
+			continue;
+		}
+		write_unlock(&n->lock);
+
+next_elt:
+		np = &n->next;
+	}
+
+ 	/* Cycle through all hash buckets every base_reachable_time/2 ticks.
+ 	 * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
+ 	 * base_reachable_time.
+	 */
+	expire = tbl->parms.base_reachable_time >> 1;
+	expire /= (tbl->hash_mask + 1);
+	if (!expire)
+		expire = 1;
+
+ 	mod_timer(&tbl->gc_timer, now + expire);
+
+	write_unlock(&tbl->lock);
+}
+
+static __inline__ int neigh_max_probes(struct neighbour *n)
+{
+	struct neigh_parms *p = n->parms;
+	return (n->nud_state & NUD_PROBE ?
+		p->ucast_probes :
+		p->ucast_probes + p->app_probes + p->mcast_probes);
+}
+
+
+/* Called when a timer expires for a neighbour entry. */
+
+static void neigh_timer_handler(unsigned long arg)
+{
+	unsigned long now, next;
+	struct neighbour *neigh = (struct neighbour *)arg;
+	unsigned state;
+	int notify = 0;
+
+	write_lock(&neigh->lock);
+
+	state = neigh->nud_state;
+	now = jiffies;
+	next = now + HZ;
+
+	if (!(state & NUD_IN_TIMER)) {
+#ifndef CONFIG_SMP
+		printk(KERN_WARNING "neigh: timer & !nud_in_timer\n");
+#endif
+		goto out;
+	}
+
+	if (state & NUD_REACHABLE) {
+		if (time_before_eq(now, 
+				   neigh->confirmed + neigh->parms->reachable_time)) {
+			NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
+			next = neigh->confirmed + neigh->parms->reachable_time;
+		} else if (time_before_eq(now,
+					  neigh->used + neigh->parms->delay_probe_time)) {
+			NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
+			neigh->nud_state = NUD_DELAY;
+			neigh_suspect(neigh);
+			next = now + neigh->parms->delay_probe_time;
+		} else {
+			NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
+			neigh->nud_state = NUD_STALE;
+			neigh_suspect(neigh);
+		}
+	} else if (state & NUD_DELAY) {
+		if (time_before_eq(now, 
+				   neigh->confirmed + neigh->parms->delay_probe_time)) {
+			NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
+			neigh->nud_state = NUD_REACHABLE;
+			neigh_connect(neigh);
+			next = neigh->confirmed + neigh->parms->reachable_time;
+		} else {
+			NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
+			neigh->nud_state = NUD_PROBE;
+			atomic_set(&neigh->probes, 0);
+			next = now + neigh->parms->retrans_time;
+		}
+	} else {
+		/* NUD_PROBE|NUD_INCOMPLETE */
+		next = now + neigh->parms->retrans_time;
+	}
+
+	if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
+	    atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
+		struct sk_buff *skb;
+
+		neigh->nud_state = NUD_FAILED;
+		notify = 1;
+		NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
+		NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
+
+		/* It is very thin place. report_unreachable is very complicated
+		   routine. Particularly, it can hit the same neighbour entry!
+
+		   So that, we try to be accurate and avoid dead loop. --ANK
+		 */
+		while (neigh->nud_state == NUD_FAILED &&
+		       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
+			write_unlock(&neigh->lock);
+			neigh->ops->error_report(neigh, skb);
+			write_lock(&neigh->lock);
+		}
+		skb_queue_purge(&neigh->arp_queue);
+	}
+
+	if (neigh->nud_state & NUD_IN_TIMER) {
+		neigh_hold(neigh);
+		if (time_before(next, jiffies + HZ/2))
+			next = jiffies + HZ/2;
+		neigh->timer.expires = next;
+		add_timer(&neigh->timer);
+	}
+	if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
+		struct sk_buff *skb = skb_peek(&neigh->arp_queue);
+		/* keep skb alive even if arp_queue overflows */
+		if (skb)
+			skb_get(skb);
+		write_unlock(&neigh->lock);
+		neigh->ops->solicit(neigh, skb);
+		atomic_inc(&neigh->probes);
+		if (skb)
+			kfree_skb(skb);
+	} else {
+out:
+		write_unlock(&neigh->lock);
+	}
+
+#ifdef CONFIG_ARPD
+	if (notify && neigh->parms->app_probes)
+		neigh_app_notify(neigh);
+#endif
+	neigh_release(neigh);
+}
+
+int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
+{
+	int rc;
+	unsigned long now;
+
+	write_lock_bh(&neigh->lock);
+
+	rc = 0;
+	if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
+		goto out_unlock_bh;
+
+	now = jiffies;
+	
+	if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
+		if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
+			atomic_set(&neigh->probes, neigh->parms->ucast_probes);
+			neigh->nud_state     = NUD_INCOMPLETE;
+			neigh_hold(neigh);
+			neigh->timer.expires = now + 1;
+			add_timer(&neigh->timer);
+		} else {
+			neigh->nud_state = NUD_FAILED;
+			write_unlock_bh(&neigh->lock);
+
+			if (skb)
+				kfree_skb(skb);
+			return 1;
+		}
+	} else if (neigh->nud_state & NUD_STALE) {
+		NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
+		neigh_hold(neigh);
+		neigh->nud_state = NUD_DELAY;
+		neigh->timer.expires = jiffies + neigh->parms->delay_probe_time;
+		add_timer(&neigh->timer);
+	}
+
+	if (neigh->nud_state == NUD_INCOMPLETE) {
+		if (skb) {
+			if (skb_queue_len(&neigh->arp_queue) >=
+			    neigh->parms->queue_len) {
+				struct sk_buff *buff;
+				buff = neigh->arp_queue.next;
+				__skb_unlink(buff, &neigh->arp_queue);
+				kfree_skb(buff);
+			}
+			__skb_queue_tail(&neigh->arp_queue, skb);
+		}
+		rc = 1;
+	}
+out_unlock_bh:
+	write_unlock_bh(&neigh->lock);
+	return rc;
+}
+
+static __inline__ void neigh_update_hhs(struct neighbour *neigh)
+{
+	struct hh_cache *hh;
+	void (*update)(struct hh_cache*, struct net_device*, unsigned char *) =
+		neigh->dev->header_cache_update;
+
+	if (update) {
+		for (hh = neigh->hh; hh; hh = hh->hh_next) {
+			write_lock_bh(&hh->hh_lock);
+			update(hh, neigh->dev, neigh->ha);
+			write_unlock_bh(&hh->hh_lock);
+		}
+	}
+}
+
+
+
+/* Generic update routine.
+   -- lladdr is new lladdr or NULL, if it is not supplied.
+   -- new    is new state.
+   -- flags
+	NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
+				if it is different.
+	NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
+				lladdr instead of overriding it 
+				if it is different.
+				It also allows to retain current state
+				if lladdr is unchanged.
+	NEIGH_UPDATE_F_ADMIN	means that the change is administrative.
+
+	NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing 
+				NTF_ROUTER flag.
+	NEIGH_UPDATE_F_ISROUTER	indicates if the neighbour is known as
+				a router.
+
+   Caller MUST hold reference count on the entry.
+ */
+
+int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
+		 u32 flags)
+{
+	u8 old;
+	int err;
+#ifdef CONFIG_ARPD
+	int notify = 0;
+#endif
+	struct net_device *dev;
+	int update_isrouter = 0;
+
+	write_lock_bh(&neigh->lock);
+
+	dev    = neigh->dev;
+	old    = neigh->nud_state;
+	err    = -EPERM;
+
+	if (!(flags & NEIGH_UPDATE_F_ADMIN) && 
+	    (old & (NUD_NOARP | NUD_PERMANENT)))
+		goto out;
+
+	if (!(new & NUD_VALID)) {
+		neigh_del_timer(neigh);
+		if (old & NUD_CONNECTED)
+			neigh_suspect(neigh);
+		neigh->nud_state = new;
+		err = 0;
+#ifdef CONFIG_ARPD
+		notify = old & NUD_VALID;
+#endif
+		goto out;
+	}
+
+	/* Compare new lladdr with cached one */
+	if (!dev->addr_len) {
+		/* First case: device needs no address. */
+		lladdr = neigh->ha;
+	} else if (lladdr) {
+		/* The second case: if something is already cached
+		   and a new address is proposed:
+		   - compare new & old
+		   - if they are different, check override flag
+		 */
+		if ((old & NUD_VALID) && 
+		    !memcmp(lladdr, neigh->ha, dev->addr_len))
+			lladdr = neigh->ha;
+	} else {
+		/* No address is supplied; if we know something,
+		   use it, otherwise discard the request.
+		 */
+		err = -EINVAL;
+		if (!(old & NUD_VALID))
+			goto out;
+		lladdr = neigh->ha;
+	}
+
+	if (new & NUD_CONNECTED)
+		neigh->confirmed = jiffies;
+	neigh->updated = jiffies;
+
+	/* If entry was valid and address is not changed,
+	   do not change entry state, if new one is STALE.
+	 */
+	err = 0;
+	update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
+	if (old & NUD_VALID) {
+		if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
+			update_isrouter = 0;
+			if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
+			    (old & NUD_CONNECTED)) {
+				lladdr = neigh->ha;
+				new = NUD_STALE;
+			} else
+				goto out;
+		} else {
+			if (lladdr == neigh->ha && new == NUD_STALE &&
+			    ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
+			     (old & NUD_CONNECTED))
+			    )
+				new = old;
+		}
+	}
+
+	if (new != old) {
+		neigh_del_timer(neigh);
+		if (new & NUD_IN_TIMER) {
+			neigh_hold(neigh);
+			neigh->timer.expires = jiffies + 
+						((new & NUD_REACHABLE) ? 
+						 neigh->parms->reachable_time : 0);
+			add_timer(&neigh->timer);
+		}
+		neigh->nud_state = new;
+	}
+
+	if (lladdr != neigh->ha) {
+		memcpy(&neigh->ha, lladdr, dev->addr_len);
+		neigh_update_hhs(neigh);
+		if (!(new & NUD_CONNECTED))
+			neigh->confirmed = jiffies -
+				      (neigh->parms->base_reachable_time << 1);
+#ifdef CONFIG_ARPD
+		notify = 1;
+#endif
+	}
+	if (new == old)
+		goto out;
+	if (new & NUD_CONNECTED)
+		neigh_connect(neigh);
+	else
+		neigh_suspect(neigh);
+	if (!(old & NUD_VALID)) {
+		struct sk_buff *skb;
+
+		/* Again: avoid dead loop if something went wrong */
+
+		while (neigh->nud_state & NUD_VALID &&
+		       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
+			struct neighbour *n1 = neigh;
+			write_unlock_bh(&neigh->lock);
+			/* On shaper/eql skb->dst->neighbour != neigh :( */
+			if (skb->dst && skb->dst->neighbour)
+				n1 = skb->dst->neighbour;
+			n1->output(skb);
+			write_lock_bh(&neigh->lock);
+		}
+		skb_queue_purge(&neigh->arp_queue);
+	}
+out:
+	if (update_isrouter) {
+		neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
+			(neigh->flags | NTF_ROUTER) :
+			(neigh->flags & ~NTF_ROUTER);
+	}
+	write_unlock_bh(&neigh->lock);
+#ifdef CONFIG_ARPD
+	if (notify && neigh->parms->app_probes)
+		neigh_app_notify(neigh);
+#endif
+	return err;
+}
+
+struct neighbour *neigh_event_ns(struct neigh_table *tbl,
+				 u8 *lladdr, void *saddr,
+				 struct net_device *dev)
+{
+	struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
+						 lladdr || !dev->addr_len);
+	if (neigh)
+		neigh_update(neigh, lladdr, NUD_STALE, 
+			     NEIGH_UPDATE_F_OVERRIDE);
+	return neigh;
+}
+
+static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
+			  u16 protocol)
+{
+	struct hh_cache	*hh;
+	struct net_device *dev = dst->dev;
+
+	for (hh = n->hh; hh; hh = hh->hh_next)
+		if (hh->hh_type == protocol)
+			break;
+
+	if (!hh && (hh = kmalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) {
+		memset(hh, 0, sizeof(struct hh_cache));
+		rwlock_init(&hh->hh_lock);
+		hh->hh_type = protocol;
+		atomic_set(&hh->hh_refcnt, 0);
+		hh->hh_next = NULL;
+		if (dev->hard_header_cache(n, hh)) {
+			kfree(hh);
+			hh = NULL;
+		} else {
+			atomic_inc(&hh->hh_refcnt);
+			hh->hh_next = n->hh;
+			n->hh	    = hh;
+			if (n->nud_state & NUD_CONNECTED)
+				hh->hh_output = n->ops->hh_output;
+			else
+				hh->hh_output = n->ops->output;
+		}
+	}
+	if (hh)	{
+		atomic_inc(&hh->hh_refcnt);
+		dst->hh = hh;
+	}
+}
+
+/* This function can be used in contexts, where only old dev_queue_xmit
+   worked, f.e. if you want to override normal output path (eql, shaper),
+   but resolution is not made yet.
+ */
+
+int neigh_compat_output(struct sk_buff *skb)
+{
+	struct net_device *dev = skb->dev;
+
+	__skb_pull(skb, skb->nh.raw - skb->data);
+
+	if (dev->hard_header &&
+	    dev->hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
+		    	     skb->len) < 0 &&
+	    dev->rebuild_header(skb))
+		return 0;
+
+	return dev_queue_xmit(skb);
+}
+
+/* Slow and careful. */
+
+int neigh_resolve_output(struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb->dst;
+	struct neighbour *neigh;
+	int rc = 0;
+
+	if (!dst || !(neigh = dst->neighbour))
+		goto discard;
+
+	__skb_pull(skb, skb->nh.raw - skb->data);
+
+	if (!neigh_event_send(neigh, skb)) {
+		int err;
+		struct net_device *dev = neigh->dev;
+		if (dev->hard_header_cache && !dst->hh) {
+			write_lock_bh(&neigh->lock);
+			if (!dst->hh)
+				neigh_hh_init(neigh, dst, dst->ops->protocol);
+			err = dev->hard_header(skb, dev, ntohs(skb->protocol),
+					       neigh->ha, NULL, skb->len);
+			write_unlock_bh(&neigh->lock);
+		} else {
+			read_lock_bh(&neigh->lock);
+			err = dev->hard_header(skb, dev, ntohs(skb->protocol),
+					       neigh->ha, NULL, skb->len);
+			read_unlock_bh(&neigh->lock);
+		}
+		if (err >= 0)
+			rc = neigh->ops->queue_xmit(skb);
+		else
+			goto out_kfree_skb;
+	}
+out:
+	return rc;
+discard:
+	NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
+		      dst, dst ? dst->neighbour : NULL);
+out_kfree_skb:
+	rc = -EINVAL;
+	kfree_skb(skb);
+	goto out;
+}
+
+/* As fast as possible without hh cache */
+
+int neigh_connected_output(struct sk_buff *skb)
+{
+	int err;
+	struct dst_entry *dst = skb->dst;
+	struct neighbour *neigh = dst->neighbour;
+	struct net_device *dev = neigh->dev;
+
+	__skb_pull(skb, skb->nh.raw - skb->data);
+
+	read_lock_bh(&neigh->lock);
+	err = dev->hard_header(skb, dev, ntohs(skb->protocol),
+			       neigh->ha, NULL, skb->len);
+	read_unlock_bh(&neigh->lock);
+	if (err >= 0)
+		err = neigh->ops->queue_xmit(skb);
author	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 15:20:36 -0700
committer	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 15:20:36 -0700
commit	1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree	0bba044c4ce775e45a88a51686b5d9f90697ea9d /net/core/neighbour.c