From 06c7427021f1cc83703f14659d8405ca773ba1ef Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 23 Aug 2005 22:06:09 -0700
Subject: [FIB_TRIE]: Don't ignore negative results from fib_semantic_match

When a semantic match occurs either success, not found or an error
(for matching unreachable routes/blackholes) is returned. fib_trie
ignores the errors and looks for a different matching route. Treat
results other than "no match" as success and end lookup.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index a701405fab0..45efd5f4741 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1333,9 +1333,9 @@ err:;
 }
 
 static inline int check_leaf(struct trie *t, struct leaf *l,  t_key key, int *plen, const struct flowi *flp,
-			     struct fib_result *res, int *err)
+			     struct fib_result *res)
 {
-	int i;
+	int err, i;
 	t_key mask;
 	struct leaf_info *li;
 	struct hlist_head *hhead = &l->list;
@@ -1348,18 +1348,18 @@ static inline int check_leaf(struct trie *t, struct leaf *l,  t_key key, int *pl
 		if (l->key != (key & mask))
 			continue;
 
-		if (((*err) = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) == 0) {
+		if ((err = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) <= 0) {
 			*plen = i;
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 			t->stats.semantic_match_passed++;
 #endif
-			return 1;
+			return err;
 		}
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 		t->stats.semantic_match_miss++;
 #endif
 	}
-	return 0;
+	return 1;
 }
 
 static int
@@ -1386,7 +1386,7 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result
 
 	/* Just a leaf? */
 	if (IS_LEAF(n)) {
-		if (check_leaf(t, (struct leaf *)n, key, &plen, flp, res, &ret))
+		if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0)
 			goto found;
 		goto failed;
 	}
@@ -1508,7 +1508,7 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result
 		       continue;
 		}
 		if (IS_LEAF(n)) {
-			if (check_leaf(t, (struct leaf *)n, key, &plen, flp, res, &ret))
+			if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0)
 				goto found;
 	       }
 backtrace:
-- 
cgit v1.2.3-18-g5258


From bf3a46aa9b96f6eb3a49a568f72a2801c3e830c0 Mon Sep 17 00:00:00 2001
From: Harald Welte <laforge@netfilter.org>
Date: Tue, 9 Aug 2005 19:22:01 -0700
Subject: [NETFILTER]: convert nfmark and conntrack mark to 32bit

As discussed at netconf'05, we convert nfmark and conntrack-mark to be
32bits even on 64bit architectures.

Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_conntrack_standalone.c |  2 +-
 net/ipv4/netfilter/ipt_CLUSTERIP.c           |  2 +-
 net/ipv4/netfilter/ipt_CONNMARK.c            | 11 ++++++++---
 net/ipv4/netfilter/ipt_MARK.c                | 12 ++++++++++++
 net/ipv4/netfilter/ipt_connmark.c            |  7 +++++++
 net/ipv4/netfilter/ipt_mark.c                |  7 +++++++
 6 files changed, 36 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index 61798c46e91..dccd4abab7a 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -185,7 +185,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
 			return -ENOSPC;
 
 #if defined(CONFIG_IP_NF_CONNTRACK_MARK)
-	if (seq_printf(s, "mark=%lu ", conntrack->mark))
+	if (seq_printf(s, "mark=%u ", conntrack->mark))
 		return -ENOSPC;
 #endif
 
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 6706d3a1bc4..2d05cafec22 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -367,7 +367,7 @@ target(struct sk_buff **pskb,
 #ifdef DEBUG_CLUSTERP
 	DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 #endif
-	DEBUGP("hash=%u ct_hash=%lu ", hash, ct->mark);
+	DEBUGP("hash=%u ct_hash=%u ", hash, ct->mark);
 	if (!clusterip_responsible(cipinfo->config, hash)) {
 		DEBUGP("not responsible\n");
 		return NF_DROP;
diff --git a/net/ipv4/netfilter/ipt_CONNMARK.c b/net/ipv4/netfilter/ipt_CONNMARK.c
index 30ddd3e18eb..8ed744157b1 100644
--- a/net/ipv4/netfilter/ipt_CONNMARK.c
+++ b/net/ipv4/netfilter/ipt_CONNMARK.c
@@ -40,9 +40,9 @@ target(struct sk_buff **pskb,
        void *userinfo)
 {
 	const struct ipt_connmark_target_info *markinfo = targinfo;
-	unsigned long diff;
-	unsigned long nfmark;
-	unsigned long newmark;
+	u_int32_t diff;
+	u_int32_t nfmark;
+	u_int32_t newmark;
 
 	enum ip_conntrack_info ctinfo;
 	struct ip_conntrack *ct = ip_conntrack_get((*pskb), &ctinfo);
@@ -94,6 +94,11 @@ checkentry(const char *tablename,
 	    }
 	}
 
+	if (matchinfo->mark > 0xffffffff || matchinfo->mask > 0xffffffff) {
+		printk(KERN_WARNING "CONNMARK: Only supports 32bit mark\n");
+		return 0;
+	}
+
 	return 1;
 }
 
diff --git a/net/ipv4/netfilter/ipt_MARK.c b/net/ipv4/netfilter/ipt_MARK.c
index 33c6f9b63b8..8526398346c 100644
--- a/net/ipv4/netfilter/ipt_MARK.c
+++ b/net/ipv4/netfilter/ipt_MARK.c
@@ -76,6 +76,8 @@ checkentry_v0(const char *tablename,
 	      unsigned int targinfosize,
 	      unsigned int hook_mask)
 {
+	struct ipt_mark_target_info *markinfo = targinfo;
+
 	if (targinfosize != IPT_ALIGN(sizeof(struct ipt_mark_target_info))) {
 		printk(KERN_WARNING "MARK: targinfosize %u != %Zu\n",
 		       targinfosize,
@@ -88,6 +90,11 @@ checkentry_v0(const char *tablename,
 		return 0;
 	}
 
+	if (markinfo->mark > 0xffffffff) {
+		printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n");
+		return 0;
+	}
+
 	return 1;
 }
 
@@ -120,6 +127,11 @@ checkentry_v1(const char *tablename,
 		return 0;
 	}
 
+	if (markinfo->mark > 0xffffffff) {
+		printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n");
+		return 0;
+	}
+
 	return 1;
 }
 
diff --git a/net/ipv4/netfilter/ipt_connmark.c b/net/ipv4/netfilter/ipt_connmark.c
index 2706f96cea5..bf8de47ce00 100644
--- a/net/ipv4/netfilter/ipt_connmark.c
+++ b/net/ipv4/netfilter/ipt_connmark.c
@@ -54,9 +54,16 @@ checkentry(const char *tablename,
 	   unsigned int matchsize,
 	   unsigned int hook_mask)
 {
+	struct ipt_connmark_info *cm = 
+				(struct ipt_connmark_info *)matchinfo;
 	if (matchsize != IPT_ALIGN(sizeof(struct ipt_connmark_info)))
 		return 0;
 
+	if (cm->mark > 0xffffffff || cm->mask > 0xffffffff) {
+		printk(KERN_WARNING "connmark: only support 32bit mark\n");
+		return 0;
+	}
+
 	return 1;
 }
 
diff --git a/net/ipv4/netfilter/ipt_mark.c b/net/ipv4/netfilter/ipt_mark.c
index 8955728127b..00bef6cdd3f 100644
--- a/net/ipv4/netfilter/ipt_mark.c
+++ b/net/ipv4/netfilter/ipt_mark.c
@@ -37,9 +37,16 @@ checkentry(const char *tablename,
            unsigned int matchsize,
            unsigned int hook_mask)
 {
+	struct ipt_mark_info *minfo = (struct ipt_mark_info *) matchinfo;
+
 	if (matchsize != IPT_ALIGN(sizeof(struct ipt_mark_info)))
 		return 0;
 
+	if (minfo->mark > 0xffffffff || minfo->mask > 0xffffffff) {
+		printk(KERN_WARNING "mark: only supports 32bit mark\n");
+		return 0;
+	}
+
 	return 1;
 }
 
-- 
cgit v1.2.3-18-g5258


From 6869c4d8e066e21623c812c448a05f1ed931c9c6 Mon Sep 17 00:00:00 2001
From: Harald Welte <laforge@netfilter.org>
Date: Tue, 9 Aug 2005 19:24:19 -0700
Subject: [NETFILTER]: reduce netfilter sk_buff enlargement

As discussed at netconf'05, we're trying to save every bit in sk_buff.
The patch below makes sk_buff 8 bytes smaller.  I did some basic
testing on my notebook and it seems to work.

The only real in-tree user of nfcache was IPVS, who only needs a
single bit.  Unfortunately I couldn't find some other free bit in
sk_buff to stuff that bit into, so I introduced a separate field for
them.  Maybe the IPVS guys can resolve that to further save space.

Initially I wanted to shrink pkt_type to three bits (PACKET_HOST and
alike are only 6 values defined), but unfortunately the bluetooth code
overloads pkt_type :(

The conntrack-event-api (out-of-tree) uses nfcache, but Rusty just
came up with a way how to do it without any skb fields, so it's safe
to remove it.

- remove all never-implemented 'nfcache' code
- don't have ipvs code abuse 'nfcache' field. currently get's their own
  compile-conditional skb->ipvs_property field.  IPVS maintainers can
  decide to move this bit elswhere, but nfcache needs to die.
- remove skb->nfcache field to save 4 bytes
- move skb->nfctinfo into three unused bits to save further 4 bytes

Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/netfilter/ebt_mark.c        |  5 ++---
 net/core/skbuff.c                      |  2 --
 net/ipv4/ip_output.c                   |  1 -
 net/ipv4/ipvs/ip_vs_core.c             |  9 +++++----
 net/ipv4/ipvs/ip_vs_xmit.c             |  2 +-
 net/ipv4/netfilter/ip_conntrack_core.c |  7 +------
 net/ipv4/netfilter/ip_nat_core.c       |  1 -
 net/ipv4/netfilter/ip_nat_standalone.c |  2 --
 net/ipv4/netfilter/ip_queue.c          |  1 -
 net/ipv4/netfilter/ip_tables.c         |  1 -
 net/ipv4/netfilter/ipt_CLASSIFY.c      |  4 +---
 net/ipv4/netfilter/ipt_CONNMARK.c      |  4 +---
 net/ipv4/netfilter/ipt_DSCP.c          |  1 -
 net/ipv4/netfilter/ipt_ECN.c           |  2 --
 net/ipv4/netfilter/ipt_MARK.c          | 10 ++++------
 net/ipv4/netfilter/ipt_REJECT.c        |  1 -
 net/ipv4/netfilter/ipt_TCPMSS.c        |  1 -
 net/ipv4/netfilter/ipt_TOS.c           |  1 -
 net/ipv6/ip6_output.c                  | 16 ++--------------
 net/ipv6/netfilter/ip6_queue.c         |  1 -
 net/ipv6/netfilter/ip6_tables.c        |  1 -
 net/ipv6/netfilter/ip6t_MARK.c         |  5 ++---
 22 files changed, 19 insertions(+), 59 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
index 02c632b4d32..c93d35ab95c 100644
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -23,10 +23,9 @@ static int ebt_target_mark(struct sk_buff **pskb, unsigned int hooknr,
 {
 	struct ebt_mark_t_info *info = (struct ebt_mark_t_info *)data;
 
-	if ((*pskb)->nfmark != info->mark) {
+	if ((*pskb)->nfmark != info->mark)
 		(*pskb)->nfmark = info->mark;
-		(*pskb)->nfcache |= NFC_ALTERED;
-	}
+
 	return info->target;
 }
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7eab867ede5..096991cb09d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -361,7 +361,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask)
 	n->destructor = NULL;
 #ifdef CONFIG_NETFILTER
 	C(nfmark);
-	C(nfcache);
 	C(nfct);
 	nf_conntrack_get(skb->nfct);
 	C(nfctinfo);
@@ -424,7 +423,6 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	new->destructor = NULL;
 #ifdef CONFIG_NETFILTER
 	new->nfmark	= old->nfmark;
-	new->nfcache	= old->nfcache;
 	new->nfct	= old->nfct;
 	nf_conntrack_get(old->nfct);
 	new->nfctinfo	= old->nfctinfo;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 80d13103b2b..766564cb420 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -392,7 +392,6 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 #endif
 #ifdef CONFIG_NETFILTER
 	to->nfmark = from->nfmark;
-	to->nfcache = from->nfcache;
 	/* Connection association is same as pre-frag packet */
 	nf_conntrack_put(to->nfct);
 	to->nfct = from->nfct;
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 5fb257dd07c..3ac7eeca04a 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -22,6 +22,7 @@
  *
  * Changes:
  *	Paul `Rusty' Russell		properly handle non-linear skbs
+ *	Harald Welte			don't use nfcache
  *
  */
 
@@ -529,7 +530,7 @@ static unsigned int ip_vs_post_routing(unsigned int hooknum,
 				       const struct net_device *out,
 				       int (*okfn)(struct sk_buff *))
 {
-	if (!((*pskb)->nfcache & NFC_IPVS_PROPERTY))
+	if (!((*pskb)->ipvs_property))
 		return NF_ACCEPT;
 
 	/* The packet was sent from IPVS, exit this chain */
@@ -701,7 +702,7 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
 	/* do the statistics and put it back */
 	ip_vs_out_stats(cp, skb);
 
-	skb->nfcache |= NFC_IPVS_PROPERTY;
+	skb->ipvs_property = 1;
 	verdict = NF_ACCEPT;
 
   out:
@@ -739,7 +740,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
 
 	EnterFunction(11);
 
-	if (skb->nfcache & NFC_IPVS_PROPERTY)
+	if (skb->ipvs_property)
 		return NF_ACCEPT;
 
 	iph = skb->nh.iph;
@@ -821,7 +822,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
 	ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
 	ip_vs_conn_put(cp);
 
-	skb->nfcache |= NFC_IPVS_PROPERTY;
+	skb->ipvs_property = 1;
 
 	LeaveFunction(11);
 	return NF_ACCEPT;
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index a8512a3fd08..3b87482049c 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -127,7 +127,7 @@ ip_vs_dst_reset(struct ip_vs_dest *dest)
 
 #define IP_VS_XMIT(skb, rt)				\
 do {							\
-	(skb)->nfcache |= NFC_IPVS_PROPERTY;		\
+	(skb)->ipvs_property = 1;			\
 	(skb)->ip_summed = CHECKSUM_NONE;		\
 	NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, (skb), NULL,	\
 		(rt)->u.dst.dev, dst_output);		\
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index a7f0c821a9b..04c3414361d 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -625,9 +625,6 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
 		return NF_DROP;
 	}
 
-	/* FIXME: Do this right please. --RR */
-	(*pskb)->nfcache |= NFC_UNKNOWN;
-
 /* Doesn't cover locally-generated broadcast, so not worth it. */
 #if 0
 	/* Ignore broadcast: no `connection'. */
@@ -943,10 +940,8 @@ ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user)
 	skb = ip_defrag(skb, user);
 	local_bh_enable();
 
-	if (skb) {
+	if (skb)
 		ip_send_check(skb->nh.iph);
-		skb->nfcache |= NFC_ALTERED;
-	}
 	return skb;
 }
 
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
index 739b6dde1c8..ed4d731880f 100644
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -321,7 +321,6 @@ manip_pkt(u_int16_t proto,
 {
 	struct iphdr *iph;
 
-	(*pskb)->nfcache |= NFC_ALTERED;
 	if (!skb_ip_make_writable(pskb, iphdroff + sizeof(*iph)))
 		return 0;
 
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 91d5ea1dbbc..9ecba979033 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -73,8 +73,6 @@ ip_nat_fn(unsigned int hooknum,
 	IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off
 		       & htons(IP_MF|IP_OFFSET)));
 
-	(*pskb)->nfcache |= NFC_UNKNOWN;
-
 	/* If we had a hardware checksum before, it's now invalid */
 	if ((*pskb)->ip_summed == CHECKSUM_HW)
 		if (skb_checksum_help(*pskb, (out == NULL)))
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index c6baa817438..bc0af8d8e91 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -392,7 +392,6 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
 		return -ENOMEM;
 	memcpy(e->skb->data, v->payload, v->data_len);
 	e->skb->ip_summed = CHECKSUM_NONE;
-	e->skb->nfcache |= NFC_ALTERED;
 
 	/*
 	 * Extra routing may needed on local out, as the QUEUE target never
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index c88dfcd38c5..ff8d85d2070 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -312,7 +312,6 @@ ipt_do_table(struct sk_buff **pskb,
 	do {
 		IP_NF_ASSERT(e);
 		IP_NF_ASSERT(back);
-		(*pskb)->nfcache |= e->nfcache;
 		if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
 			struct ipt_entry_target *t;
 
diff --git a/net/ipv4/netfilter/ipt_CLASSIFY.c b/net/ipv4/netfilter/ipt_CLASSIFY.c
index 9842e6e2318..dab78d8bd49 100644
--- a/net/ipv4/netfilter/ipt_CLASSIFY.c
+++ b/net/ipv4/netfilter/ipt_CLASSIFY.c
@@ -32,10 +32,8 @@ target(struct sk_buff **pskb,
 {
 	const struct ipt_classify_target_info *clinfo = targinfo;
 
-	if((*pskb)->priority != clinfo->priority) {
+	if((*pskb)->priority != clinfo->priority) 
 		(*pskb)->priority = clinfo->priority;
-		(*pskb)->nfcache |= NFC_ALTERED;
-	}
 
 	return IPT_CONTINUE;
 }
diff --git a/net/ipv4/netfilter/ipt_CONNMARK.c b/net/ipv4/netfilter/ipt_CONNMARK.c
index 8ed744157b1..13463802133 100644
--- a/net/ipv4/netfilter/ipt_CONNMARK.c
+++ b/net/ipv4/netfilter/ipt_CONNMARK.c
@@ -61,10 +61,8 @@ target(struct sk_buff **pskb,
 	    case IPT_CONNMARK_RESTORE:
 		nfmark = (*pskb)->nfmark;
 		diff = (ct->mark ^ nfmark) & markinfo->mask;
-		if (diff != 0) {
+		if (diff != 0)
 		    (*pskb)->nfmark = nfmark ^ diff;
-		    (*pskb)->nfcache |= NFC_ALTERED;
-		}
 		break;
 	    }
 	}
diff --git a/net/ipv4/netfilter/ipt_DSCP.c b/net/ipv4/netfilter/ipt_DSCP.c
index 3ea4509099f..975476fef27 100644
--- a/net/ipv4/netfilter/ipt_DSCP.c
+++ b/net/ipv4/netfilter/ipt_DSCP.c
@@ -51,7 +51,6 @@ target(struct sk_buff **pskb,
 						 sizeof(diffs),
 						 (*pskb)->nh.iph->check
 						 ^ 0xFFFF));
-		(*pskb)->nfcache |= NFC_ALTERED;
 	}
 	return IPT_CONTINUE;
 }
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 94a0ce1c1c9..f63a9bc0e4d 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -43,7 +43,6 @@ set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
 						 sizeof(diffs),
 						 (*pskb)->nh.iph->check
 						 ^0xFFFF));
-		(*pskb)->nfcache |= NFC_ALTERED;
 	} 
 	return 1;
 }
@@ -87,7 +86,6 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward)
 		tcph->check = csum_fold(csum_partial((char *)diffs,
 						     sizeof(diffs),
 						     tcph->check^0xFFFF));
-	(*pskb)->nfcache |= NFC_ALTERED;
 	return 1;
 }
 
diff --git a/net/ipv4/netfilter/ipt_MARK.c b/net/ipv4/netfilter/ipt_MARK.c
index 8526398346c..52b4f2c296b 100644
--- a/net/ipv4/netfilter/ipt_MARK.c
+++ b/net/ipv4/netfilter/ipt_MARK.c
@@ -29,10 +29,9 @@ target_v0(struct sk_buff **pskb,
 {
 	const struct ipt_mark_target_info *markinfo = targinfo;
 
-	if((*pskb)->nfmark != markinfo->mark) {
+	if((*pskb)->nfmark != markinfo->mark)
 		(*pskb)->nfmark = markinfo->mark;
-		(*pskb)->nfcache |= NFC_ALTERED;
-	}
+
 	return IPT_CONTINUE;
 }
 
@@ -61,10 +60,9 @@ target_v1(struct sk_buff **pskb,
 		break;
 	}
 
-	if((*pskb)->nfmark != mark) {
+	if((*pskb)->nfmark != mark)
 		(*pskb)->nfmark = mark;
-		(*pskb)->nfcache |= NFC_ALTERED;
-	}
+
 	return IPT_CONTINUE;
 }
 
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 91569644602..f115a84a4ac 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -156,7 +156,6 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 
 	/* This packet will not be the same as the other: clear nf fields */
 	nf_reset(nskb);
-	nskb->nfcache = 0;
 	nskb->nfmark = 0;
 #ifdef CONFIG_BRIDGE_NETFILTER
 	nf_bridge_put(nskb->nf_bridge);
diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c
index 7b84a254440..949288319ca 100644
--- a/net/ipv4/netfilter/ipt_TCPMSS.c
+++ b/net/ipv4/netfilter/ipt_TCPMSS.c
@@ -190,7 +190,6 @@ ipt_tcpmss_target(struct sk_buff **pskb,
 	       newmss);
 
  retmodified:
-	(*pskb)->nfcache |= NFC_UNKNOWN | NFC_ALTERED;
 	return IPT_CONTINUE;
 }
 
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
index 85c70d240f8..49abb7eef0a 100644
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -46,7 +46,6 @@ target(struct sk_buff **pskb,
 						 sizeof(diffs),
 						 (*pskb)->nh.iph->check
 						 ^0xFFFF));
-		(*pskb)->nfcache |= NFC_ALTERED;
 	}
 	return IPT_CONTINUE;
 }
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index ae652ca14bc..590d2b79719 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -185,19 +185,6 @@ int ip6_route_me_harder(struct sk_buff *skb)
 }
 #endif
 
-static inline int ip6_maybe_reroute(struct sk_buff *skb)
-{
-#ifdef CONFIG_NETFILTER
-	if (skb->nfcache & NFC_ALTERED){
-		if (ip6_route_me_harder(skb) != 0){
-			kfree_skb(skb);
-			return -EINVAL;
-		}
-	}
-#endif /* CONFIG_NETFILTER */
-	return dst_output(skb);
-}
-
 /*
  *	xmit an sk_buff (used by TCP)
  */
@@ -266,7 +253,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 	mtu = dst_mtu(dst);
 	if ((skb->len <= mtu) || ipfragok) {
 		IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
-		return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, ip6_maybe_reroute);
+		return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev,
+				dst_output);
 	}
 
 	if (net_ratelimit())
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index a16df5b27c8..83ccedceed1 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -388,7 +388,6 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
 		return -ENOMEM;
 	memcpy(e->skb->data, v->payload, v->data_len);
 	e->skb->ip_summed = CHECKSUM_NONE;
-	e->skb->nfcache |= NFC_ALTERED;
 
 	/*
 	 * Extra routing may needed on local out, as the QUEUE target never
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 73034511c8d..41a67cf6e33 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -401,7 +401,6 @@ ip6t_do_table(struct sk_buff **pskb,
 	do {
 		IP_NF_ASSERT(e);
 		IP_NF_ASSERT(back);
-		(*pskb)->nfcache |= e->nfcache;
 		if (ip6_packet_match(*pskb, indev, outdev, &e->ipv6,
 			&protoff, &offset)) {
 			struct ip6t_entry_target *t;
diff --git a/net/ipv6/netfilter/ip6t_MARK.c b/net/ipv6/netfilter/ip6t_MARK.c
index d09ceb05013..81924fcc585 100644
--- a/net/ipv6/netfilter/ip6t_MARK.c
+++ b/net/ipv6/netfilter/ip6t_MARK.c
@@ -28,10 +28,9 @@ target(struct sk_buff **pskb,
 {
 	const struct ip6t_mark_target_info *markinfo = targinfo;
 
-	if((*pskb)->nfmark != markinfo->mark) {
+	if((*pskb)->nfmark != markinfo->mark)
 		(*pskb)->nfmark = markinfo->mark;
-		(*pskb)->nfcache |= NFC_ALTERED;
-	}
+
 	return IP6T_CONTINUE;
 }
 
-- 
cgit v1.2.3-18-g5258


From 8728b834b226ffcf2c94a58530090e292af2a7bf Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 9 Aug 2005 19:25:21 -0700
Subject: [NET]: Kill skb->list

Remove the "list" member of struct sk_buff, as it is entirely
redundant.  All SKB list removal callers know which list the
SKB is on, so storing this in sk_buff does nothing other than
taking up some space.

Two tricky bits were SCTP, which I took care of, and two ATM
drivers which Francois Romieu <romieu@fr.zoreil.com> fixed
up.

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Francois Romieu <romieu@fr.zoreil.com>
---
 net/atm/ipcommon.c      |  3 ---
 net/ax25/ax25_subr.c    |  2 +-
 net/core/skbuff.c       | 57 +++++++++++++++++++-------------------------
 net/decnet/af_decnet.c  |  2 +-
 net/decnet/dn_nsp_out.c |  2 +-
 net/econet/af_econet.c  |  4 ++--
 net/ipv4/tcp.c          |  2 +-
 net/ipv4/tcp_input.c    | 29 +++++++++++++----------
 net/ipv4/tcp_output.c   |  6 ++---
 net/irda/irlap_frame.c  |  6 -----
 net/lapb/lapb_subr.c    |  2 +-
 net/llc/af_llc.c        |  2 +-
 net/llc/llc_conn.c      |  6 ++++-
 net/netrom/nr_subr.c    |  2 +-
 net/rose/rose_subr.c    |  2 +-
 net/sctp/socket.c       |  4 ++--
 net/sctp/ulpqueue.c     | 63 +++++++++++++++++++++++++++++--------------------
 net/unix/garbage.c      | 12 +++++-----
 net/x25/x25_subr.c      |  2 +-
 19 files changed, 105 insertions(+), 103 deletions(-)

(limited to 'net')

diff --git a/net/atm/ipcommon.c b/net/atm/ipcommon.c
index 181a3002d8a..4b1faca5013 100644
--- a/net/atm/ipcommon.c
+++ b/net/atm/ipcommon.c
@@ -34,7 +34,6 @@
 
 void skb_migrate(struct sk_buff_head *from,struct sk_buff_head *to)
 {
-	struct sk_buff *skb;
 	unsigned long flags;
 	struct sk_buff *skb_from = (struct sk_buff *) from;
 	struct sk_buff *skb_to = (struct sk_buff *) to;
@@ -47,8 +46,6 @@ void skb_migrate(struct sk_buff_head *from,struct sk_buff_head *to)
 	prev->next = skb_to;
 	to->prev->next = from->next;
 	to->prev = from->prev;
-	for (skb = from->next; skb != skb_to; skb = skb->next)
-		skb->list = to;
 	to->qlen += from->qlen;
 	spin_unlock(&to->lock);
 	from->prev = skb_from;
diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
index 99694b57f6f..eb7343c10a9 100644
--- a/net/ax25/ax25_subr.c
+++ b/net/ax25/ax25_subr.c
@@ -76,7 +76,7 @@ void ax25_requeue_frames(ax25_cb *ax25)
 		if (skb_prev == NULL)
 			skb_queue_head(&ax25->write_queue, skb);
 		else
-			skb_append(skb_prev, skb);
+			skb_append(skb_prev, skb, &ax25->write_queue);
 		skb_prev = skb;
 	}
 }
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 096991cb09d..e6564b0a683 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -281,8 +281,6 @@ void kfree_skbmem(struct sk_buff *skb)
 
 void __kfree_skb(struct sk_buff *skb)
 {
-	BUG_ON(skb->list != NULL);
-
 	dst_release(skb->dst);
 #ifdef CONFIG_XFRM
 	secpath_put(skb->sp);
@@ -333,7 +331,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask)
 #define C(x) n->x = skb->x
 
 	n->next = n->prev = NULL;
-	n->list = NULL;
 	n->sk = NULL;
 	C(stamp);
 	C(dev);
@@ -403,7 +400,6 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	 */
 	unsigned long offset = new->data - old->data;
 
-	new->list	= NULL;
 	new->sk		= NULL;
 	new->dev	= old->dev;
 	new->real_dev	= old->real_dev;
@@ -1342,50 +1338,43 @@ void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
 	__skb_queue_tail(list, newsk);
 	spin_unlock_irqrestore(&list->lock, flags);
 }
+
 /**
  *	skb_unlink	-	remove a buffer from a list
  *	@skb: buffer to remove
+ *	@list: list to use
  *
- *	Place a packet after a given packet in a list. The list locks are taken
- *	and this function is atomic with respect to other list locked calls
+ *	Remove a packet from a list. The list locks are taken and this
+ *	function is atomic with respect to other list locked calls
  *
- *	Works even without knowing the list it is sitting on, which can be
- *	handy at times. It also means that THE LIST MUST EXIST when you
- *	unlink. Thus a list must have its contents unlinked before it is
- *	destroyed.
+ *	You must know what list the SKB is on.
  */
-void skb_unlink(struct sk_buff *skb)
+void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
 {
-	struct sk_buff_head *list = skb->list;
-
-	if (list) {
-		unsigned long flags;
+	unsigned long flags;
 
-		spin_lock_irqsave(&list->lock, flags);
-		if (skb->list == list)
-			__skb_unlink(skb, skb->list);
-		spin_unlock_irqrestore(&list->lock, flags);
-	}
+	spin_lock_irqsave(&list->lock, flags);
+	__skb_unlink(skb, list);
+	spin_unlock_irqrestore(&list->lock, flags);
 }
 
-
 /**
  *	skb_append	-	append a buffer
  *	@old: buffer to insert after
  *	@newsk: buffer to insert
+ *	@list: list to use
  *
  *	Place a packet after a given packet in a list. The list locks are taken
  *	and this function is atomic with respect to other list locked calls.
  *	A buffer cannot be placed on two lists at the same time.
  */
-
-void skb_append(struct sk_buff *old, struct sk_buff *newsk)
+void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&old->list->lock, flags);
-	__skb_append(old, newsk);
-	spin_unlock_irqrestore(&old->list->lock, flags);
+	spin_lock_irqsave(&list->lock, flags);
+	__skb_append(old, newsk, list);
+	spin_unlock_irqrestore(&list->lock, flags);
 }
 
 
@@ -1393,19 +1382,21 @@ void skb_append(struct sk_buff *old, struct sk_buff *newsk)
  *	skb_insert	-	insert a buffer
  *	@old: buffer to insert before
  *	@newsk: buffer to insert
+ *	@list: list to use
+ *
+ *	Place a packet before a given packet in a list. The list locks are
+ * 	taken and this function is atomic with respect to other list locked
+ *	calls.
  *
- *	Place a packet before a given packet in a list. The list locks are taken
- *	and this function is atomic with respect to other list locked calls
  *	A buffer cannot be placed on two lists at the same time.
  */
-
-void skb_insert(struct sk_buff *old, struct sk_buff *newsk)
+void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&old->list->lock, flags);
-	__skb_insert(newsk, old->prev, old, old->list);
-	spin_unlock_irqrestore(&old->list->lock, flags);
+	spin_lock_irqsave(&list->lock, flags);
+	__skb_insert(newsk, old->prev, old, list);
+	spin_unlock_irqrestore(&list->lock, flags);
 }
 
 #if 0
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index acdd18e6adb..0c30409fe9e 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1763,7 +1763,7 @@ static int dn_recvmsg(struct kiocb *iocb, struct socket *sock,
 		nskb = skb->next;
 
 		if (skb->len == 0) {
-			skb_unlink(skb);
+			skb_unlink(skb, queue);
 			kfree_skb(skb);
 			/* 
 			 * N.B. Don't refer to skb or cb after this point
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index 8cce1fdbda9..e0bebf4bbca 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -479,7 +479,7 @@ int dn_nsp_check_xmit_queue(struct sock *sk, struct sk_buff *skb, struct sk_buff
 		xmit_count = cb2->xmit_count;
 		segnum = cb2->segnum;
 		/* Remove and drop ack'ed packet */
-		skb_unlink(ack);
+		skb_unlink(ack, q);
 		kfree_skb(ack);
 		ack = NULL;
 
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index de691e119e1..b807a314269 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -869,7 +869,7 @@ static void aun_tx_ack(unsigned long seq, int result)
 
 foundit:
 	tx_result(skb->sk, eb->cookie, result);
-	skb_unlink(skb);
+	skb_unlink(skb, &aun_queue);
 	spin_unlock_irqrestore(&aun_queue_lock, flags);
 	kfree_skb(skb);
 }
@@ -947,7 +947,7 @@ static void ab_cleanup(unsigned long h)
 		{
 			tx_result(skb->sk, eb->cookie, 
 				  ECTYPE_TRANSMIT_NOT_PRESENT);
-			skb_unlink(skb);
+			skb_unlink(skb, &aun_queue);
 			kfree_skb(skb);
 		}
 		skb = newskb;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 69b1fcf7007..d2696af46c7 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -975,7 +975,7 @@ do_fault:
 	if (!skb->len) {
 		if (sk->sk_send_head == skb)
 			sk->sk_send_head = NULL;
-		__skb_unlink(skb, skb->list);
+		__skb_unlink(skb, &sk->sk_write_queue);
 		sk_stream_free_skb(sk, skb);
 	}
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 53a8a5399f1..ffa24025cd0 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2085,7 +2085,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt
 			seq_rtt = now - scb->when;
 		tcp_dec_pcount_approx(&tp->fackets_out, skb);
 		tcp_packets_out_dec(tp, skb);
-		__skb_unlink(skb, skb->list);
+		__skb_unlink(skb, &sk->sk_write_queue);
 		sk_stream_free_skb(sk, skb);
 	}
 
@@ -2853,7 +2853,7 @@ static void tcp_ofo_queue(struct sock *sk)
 
 		if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
 			SOCK_DEBUG(sk, "ofo packet was already received \n");
-			__skb_unlink(skb, skb->list);
+			__skb_unlink(skb, &tp->out_of_order_queue);
 			__kfree_skb(skb);
 			continue;
 		}
@@ -2861,7 +2861,7 @@ static void tcp_ofo_queue(struct sock *sk)
 			   tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
 			   TCP_SKB_CB(skb)->end_seq);
 
-		__skb_unlink(skb, skb->list);
+		__skb_unlink(skb, &tp->out_of_order_queue);
 		__skb_queue_tail(&sk->sk_receive_queue, skb);
 		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
 		if(skb->h.th->fin)
@@ -3027,7 +3027,7 @@ drop:
 		u32 end_seq = TCP_SKB_CB(skb)->end_seq;
 
 		if (seq == TCP_SKB_CB(skb1)->end_seq) {
-			__skb_append(skb1, skb);
+			__skb_append(skb1, skb, &tp->out_of_order_queue);
 
 			if (!tp->rx_opt.num_sacks ||
 			    tp->selective_acks[0].end_seq != seq)
@@ -3071,7 +3071,7 @@ drop:
 			       tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, end_seq);
 			       break;
 		       }
-		       __skb_unlink(skb1, skb1->list);
+		       __skb_unlink(skb1, &tp->out_of_order_queue);
 		       tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, TCP_SKB_CB(skb1)->end_seq);
 		       __kfree_skb(skb1);
 		}
@@ -3088,8 +3088,9 @@ add_sack:
  * simplifies code)
  */
 static void
-tcp_collapse(struct sock *sk, struct sk_buff *head,
-	     struct sk_buff *tail, u32 start, u32 end)
+tcp_collapse(struct sock *sk, struct sk_buff_head *list,
+	     struct sk_buff *head, struct sk_buff *tail,
+	     u32 start, u32 end)
 {
 	struct sk_buff *skb;
 
@@ -3099,7 +3100,7 @@ tcp_collapse(struct sock *sk, struct sk_buff *head,
 		/* No new bits? It is possible on ofo queue. */
 		if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
 			struct sk_buff *next = skb->next;
-			__skb_unlink(skb, skb->list);
+			__skb_unlink(skb, list);
 			__kfree_skb(skb);
 			NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED);
 			skb = next;
@@ -3145,7 +3146,7 @@ tcp_collapse(struct sock *sk, struct sk_buff *head,
 		nskb->mac.raw = nskb->head + (skb->mac.raw-skb->head);
 		memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
 		TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
-		__skb_insert(nskb, skb->prev, skb, skb->list);
+		__skb_insert(nskb, skb->prev, skb, list);
 		sk_stream_set_owner_r(nskb, sk);
 
 		/* Copy data, releasing collapsed skbs. */
@@ -3164,7 +3165,7 @@ tcp_collapse(struct sock *sk, struct sk_buff *head,
 			}
 			if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
 				struct sk_buff *next = skb->next;
-				__skb_unlink(skb, skb->list);
+				__skb_unlink(skb, list);
 				__kfree_skb(skb);
 				NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED);
 				skb = next;
@@ -3200,7 +3201,8 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
 		if (skb == (struct sk_buff *)&tp->out_of_order_queue ||
 		    after(TCP_SKB_CB(skb)->seq, end) ||
 		    before(TCP_SKB_CB(skb)->end_seq, start)) {
-			tcp_collapse(sk, head, skb, start, end);
+			tcp_collapse(sk, &tp->out_of_order_queue,
+				     head, skb, start, end);
 			head = skb;
 			if (skb == (struct sk_buff *)&tp->out_of_order_queue)
 				break;
@@ -3237,7 +3239,8 @@ static int tcp_prune_queue(struct sock *sk)
 		tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
 
 	tcp_collapse_ofo_queue(sk);
-	tcp_collapse(sk, sk->sk_receive_queue.next,
+	tcp_collapse(sk, &sk->sk_receive_queue,
+		     sk->sk_receive_queue.next,
 		     (struct sk_buff*)&sk->sk_receive_queue,
 		     tp->copied_seq, tp->rcv_nxt);
 	sk_stream_mem_reclaim(sk);
@@ -3462,7 +3465,7 @@ static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
 		struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
 		tp->copied_seq++;
 		if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) {
-			__skb_unlink(skb, skb->list);
+			__skb_unlink(skb, &sk->sk_receive_queue);
 			__kfree_skb(skb);
 		}
 	}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index dd30dd137b7..a4d1eb9a092 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -505,7 +505,7 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned
 
 	/* Link BUFF into the send queue. */
 	skb_header_release(buff);
-	__skb_append(skb, buff);
+	__skb_append(skb, buff, &sk->sk_write_queue);
 
 	return 0;
 }
@@ -893,7 +893,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
 
 	/* Link BUFF into the send queue. */
 	skb_header_release(buff);
-	__skb_append(skb, buff);
+	__skb_append(skb, buff, &sk->sk_write_queue);
 
 	return 0;
 }
@@ -1238,7 +1238,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
 		       tcp_skb_pcount(next_skb) != 1);
 
 		/* Ok.  We will be able to collapse the packet. */
-		__skb_unlink(next_skb, next_skb->list);
+		__skb_unlink(next_skb, &sk->sk_write_queue);
 
 		memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size);
 
diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c
index 6dafbb43b52..eb65b4925b5 100644
--- a/net/irda/irlap_frame.c
+++ b/net/irda/irlap_frame.c
@@ -988,9 +988,6 @@ void irlap_resend_rejected_frames(struct irlap_cb *self, int command)
 			IRDA_DEBUG(0, "%s(), unable to copy\n", __FUNCTION__);
 			return;
 		}
-		/* Unlink tx_skb from list */
-		tx_skb->next = tx_skb->prev = NULL;
-		tx_skb->list = NULL;
 
 		/* Clear old Nr field + poll bit */
 		tx_skb->data[1] &= 0x0f;
@@ -1063,9 +1060,6 @@ void irlap_resend_rejected_frame(struct irlap_cb *self, int command)
 			IRDA_DEBUG(0, "%s(), unable to copy\n", __FUNCTION__);
 			return;
 		}
-		/* Unlink tx_skb from list */
-		tx_skb->next = tx_skb->prev = NULL;
-		tx_skb->list = NULL;
 
 		/* Clear old Nr field + poll bit */
 		tx_skb->data[1] &= 0x0f;
diff --git a/net/lapb/lapb_subr.c b/net/lapb/lapb_subr.c
index 5de05a0bc0f..8b5eefd70f0 100644
--- a/net/lapb/lapb_subr.c
+++ b/net/lapb/lapb_subr.c
@@ -78,7 +78,7 @@ void lapb_requeue_frames(struct lapb_cb *lapb)
 		if (!skb_prev)
 			skb_queue_head(&lapb->write_queue, skb);
 		else
-			skb_append(skb_prev, skb);
+			skb_append(skb_prev, skb, &lapb->write_queue);
 		skb_prev = skb;
 	}
 }
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 20b4cfebd74..f49b82da826 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -714,7 +714,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock,
 	if (uaddr)
 		memcpy(uaddr, llc_ui_skb_cb(skb), sizeof(*uaddr));
 	msg->msg_namelen = sizeof(*uaddr);
-	if (!skb->list) {
+	if (!skb->next) {
 dgram_free:
 		kfree_skb(skb);
 	}
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index eba812a9c69..57154861946 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -71,7 +71,11 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb)
 
 	if (!ev->ind_prim && !ev->cfm_prim) {
 		/* indicate or confirm not required */
-		if (!skb->list)
+		/* XXX this is not very pretty, perhaps we should store
+		 * XXX indicate/confirm-needed state in the llc_conn_state_ev
+		 * XXX control block of the SKB instead? -DaveM
+		 */
+		if (!skb->next)
 			goto out_kfree_skb;
 		goto out_skb_put;
 	}
diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c
index 0627347b14b..252c1b3ecd7 100644
--- a/net/netrom/nr_subr.c
+++ b/net/netrom/nr_subr.c
@@ -77,7 +77,7 @@ void nr_requeue_frames(struct sock *sk)
 		if (skb_prev == NULL)
 			skb_queue_head(&sk->sk_write_queue, skb);
 		else
-			skb_append(skb_prev, skb);
+			skb_append(skb_prev, skb, &sk->sk_write_queue);
 		skb_prev = skb;
 	}
 }
diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c
index 7db7e1cedc3..ae135e27799 100644
--- a/net/rose/rose_subr.c
+++ b/net/rose/rose_subr.c
@@ -74,7 +74,7 @@ void rose_requeue_frames(struct sock *sk)
 		if (skb_prev == NULL)
 			skb_queue_head(&sk->sk_write_queue, skb);
 		else
-			skb_append(skb_prev, skb);
+			skb_append(skb_prev, skb, &sk->sk_write_queue);
 		skb_prev = skb;
 	}
 }
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 091a66f06a3..4454afe4727 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4892,7 +4892,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
 	sctp_skb_for_each(skb, &oldsk->sk_receive_queue, tmp) {
 		event = sctp_skb2event(skb);
 		if (event->asoc == assoc) {
-			__skb_unlink(skb, skb->list);
+			__skb_unlink(skb, &oldsk->sk_receive_queue);
 			__skb_queue_tail(&newsk->sk_receive_queue, skb);
 		}
 	}
@@ -4921,7 +4921,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
 		sctp_skb_for_each(skb, &oldsp->pd_lobby, tmp) {
 			event = sctp_skb2event(skb);
 			if (event->asoc == assoc) {
-				__skb_unlink(skb, skb->list);
+				__skb_unlink(skb, &oldsp->pd_lobby);
 				__skb_queue_tail(queue, skb);
 			}
 		}
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 8bbc279d6c9..ec2c857eae7 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -50,9 +50,9 @@
 
 /* Forward declarations for internal helpers.  */
 static struct sctp_ulpevent * sctp_ulpq_reasm(struct sctp_ulpq *ulpq,
-						struct sctp_ulpevent *);
+					      struct sctp_ulpevent *);
 static struct sctp_ulpevent * sctp_ulpq_order(struct sctp_ulpq *,
-						struct sctp_ulpevent *);
+					      struct sctp_ulpevent *);
 
 /* 1st Level Abstractions */
 
@@ -125,7 +125,9 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
 		event = sctp_ulpq_order(ulpq, event);
 	}
 
-	/* Send event to the ULP.  */
+	/* Send event to the ULP.  'event' is the sctp_ulpevent for
+	 * very first SKB on the 'temp' list.
+	 */
 	if (event)
 		sctp_ulpq_tail_event(ulpq, event);
 
@@ -158,14 +160,18 @@ static int sctp_ulpq_clear_pd(struct sctp_ulpq *ulpq)
 	return sctp_clear_pd(ulpq->asoc->base.sk);
 }
 
-
-
+/* If the SKB of 'event' is on a list, it is the first such member
+ * of that list.
+ */
 int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
 {
 	struct sock *sk = ulpq->asoc->base.sk;
-	struct sk_buff_head *queue;
+	struct sk_buff_head *queue, *skb_list;
+	struct sk_buff *skb = sctp_event2skb(event);
 	int clear_pd = 0;
 
+	skb_list = (struct sk_buff_head *) skb->prev;
+
 	/* If the socket is just going to throw this away, do not
 	 * even try to deliver it.
 	 */
@@ -197,10 +203,10 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
 	/* If we are harvesting multiple skbs they will be
 	 * collected on a list.
 	 */
-	if (sctp_event2skb(event)->list)
-		sctp_skb_list_tail(sctp_event2skb(event)->list, queue);
+	if (skb_list)
+		sctp_skb_list_tail(skb_list, queue);
 	else
-		__skb_queue_tail(queue, sctp_event2skb(event));
+		__skb_queue_tail(queue, skb);
 
 	/* Did we just complete partial delivery and need to get
 	 * rolling again?  Move pending data to the receive
@@ -214,10 +220,11 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
 	return 1;
 
 out_free:
-	if (sctp_event2skb(event)->list)
-		sctp_queue_purge_ulpevents(sctp_event2skb(event)->list);
+	if (skb_list)
+		sctp_queue_purge_ulpevents(skb_list);
 	else
 		sctp_ulpevent_free(event);
+
 	return 0;
 }
 
@@ -269,7 +276,7 @@ static inline void sctp_ulpq_store_reasm(struct sctp_ulpq *ulpq,
  * payload was fragmented on the way and ip had to reassemble them.
  * We add the rest of skb's to the first skb's fraglist.
  */
-static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff *f_frag, struct sk_buff *l_frag)
+static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff_head *queue, struct sk_buff *f_frag, struct sk_buff *l_frag)
 {
 	struct sk_buff *pos;
 	struct sctp_ulpevent *event;
@@ -294,7 +301,7 @@ static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff *f_frag,
 		skb_shinfo(f_frag)->frag_list = pos;
 
 	/* Remove the first fragment from the reassembly queue.  */
-	__skb_unlink(f_frag, f_frag->list);
+	__skb_unlink(f_frag, queue);
 	while (pos) {
 
 		pnext = pos->next;
@@ -304,7 +311,7 @@ static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff *f_frag,
 		f_frag->data_len += pos->len;
 
 		/* Remove the fragment from the reassembly queue.  */
-		__skb_unlink(pos, pos->list);
+		__skb_unlink(pos, queue);
 	
 		/* Break if we have reached the last fragment.  */
 		if (pos == l_frag)
@@ -375,7 +382,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_u
 done:
 	return retval;
 found:
-	retval = sctp_make_reassembled_event(first_frag, pos);
+	retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, pos);
 	if (retval)
 		retval->msg_flags |= MSG_EOR;
 	goto done;
@@ -435,7 +442,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_partial(struct sctp_ulpq
 	 * further.
 	 */
 done:
-	retval = sctp_make_reassembled_event(first_frag, last_frag);
+	retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, last_frag);
 	if (retval && is_last)
 		retval->msg_flags |= MSG_EOR;
 
@@ -527,7 +534,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_first(struct sctp_ulpq *u
 	 * further.
 	 */
 done:
-	retval = sctp_make_reassembled_event(first_frag, last_frag);
+	retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, last_frag);
 	return retval;
 }
 
@@ -537,6 +544,7 @@ done:
 static inline void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq,
 					      struct sctp_ulpevent *event)
 {
+	struct sk_buff_head *event_list;
 	struct sk_buff *pos, *tmp;
 	struct sctp_ulpevent *cevent;
 	struct sctp_stream *in;
@@ -547,6 +555,8 @@ static inline void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq,
 	ssn = event->ssn;
 	in  = &ulpq->asoc->ssnmap->in;
 
+	event_list = (struct sk_buff_head *) sctp_event2skb(event)->prev;
+
 	/* We are holding the chunks by stream, by SSN.  */
 	sctp_skb_for_each(pos, &ulpq->lobby, tmp) {
 		cevent = (struct sctp_ulpevent *) pos->cb;
@@ -567,10 +577,10 @@ static inline void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq,
 		/* Found it, so mark in the ssnmap. */
 		sctp_ssn_next(in, sid);
 
-		__skb_unlink(pos, pos->list);
+		__skb_unlink(pos, &ulpq->lobby);
 
 		/* Attach all gathered skbs to the event.  */
-		__skb_queue_tail(sctp_event2skb(event)->list, pos);
+		__skb_queue_tail(event_list, pos);
 	}
 }
 
@@ -626,7 +636,7 @@ static inline void sctp_ulpq_store_ordered(struct sctp_ulpq *ulpq,
 }
 
 static struct sctp_ulpevent *sctp_ulpq_order(struct sctp_ulpq *ulpq,
-						struct sctp_ulpevent *event)
+					     struct sctp_ulpevent *event)
 {
 	__u16 sid, ssn;
 	struct sctp_stream *in;
@@ -667,7 +677,7 @@ static inline void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq)
 {
 	struct sk_buff *pos, *tmp;
 	struct sctp_ulpevent *cevent;
-	struct sctp_ulpevent *event = NULL;
+	struct sctp_ulpevent *event;
 	struct sctp_stream *in;
 	struct sk_buff_head temp;
 	__u16 csid, cssn;
@@ -675,6 +685,8 @@ static inline void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq)
 	in  = &ulpq->asoc->ssnmap->in;
 
 	/* We are holding the chunks by stream, by SSN.  */
+	skb_queue_head_init(&temp);
+	event = NULL;
 	sctp_skb_for_each(pos, &ulpq->lobby, tmp) {
 		cevent = (struct sctp_ulpevent *) pos->cb;
 		csid = cevent->stream;
@@ -686,19 +698,20 @@ static inline void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq)
 		/* Found it, so mark in the ssnmap. */	       
 		sctp_ssn_next(in, csid);
 
-		__skb_unlink(pos, pos->list);
+		__skb_unlink(pos, &ulpq->lobby);
 		if (!event) {						
 			/* Create a temporary list to collect chunks on.  */
 			event = sctp_skb2event(pos);
-			skb_queue_head_init(&temp);
 			__skb_queue_tail(&temp, sctp_event2skb(event));
 		} else {
 			/* Attach all gathered skbs to the event.  */
-			__skb_queue_tail(sctp_event2skb(event)->list, pos);
+			__skb_queue_tail(&temp, pos);
 		}
 	}
 
-	/* Send event to the ULP.  */
+	/* Send event to the ULP.  'event' is the sctp_ulpevent for
+	 * very first SKB on the 'temp' list.
+	 */
 	if (event)
 		sctp_ulpq_tail_event(ulpq, event);
 }
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 4bd95c8f593..46252d2807b 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -286,16 +286,16 @@ void unix_gc(void)
 			skb = skb_peek(&s->sk_receive_queue);
 			while (skb &&
 			       skb != (struct sk_buff *)&s->sk_receive_queue) {
-				nextsk=skb->next;
+				nextsk = skb->next;
 				/*
 				 *	Do we have file descriptors ?
 				 */
-				if(UNIXCB(skb).fp)
-				{
-					__skb_unlink(skb, skb->list);
-					__skb_queue_tail(&hitlist,skb);
+				if (UNIXCB(skb).fp) {
+					__skb_unlink(skb,
+						     &s->sk_receive_queue);
+					__skb_queue_tail(&hitlist, skb);
 				}
-				skb=nextsk;
+				skb = nextsk;
 			}
 			spin_unlock(&s->sk_receive_queue.lock);
 		}
diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c
index 7fd872ad0c2..e20cfadad4d 100644
--- a/net/x25/x25_subr.c
+++ b/net/x25/x25_subr.c
@@ -80,7 +80,7 @@ void x25_requeue_frames(struct sock *sk)
 		if (!skb_prev)
 			skb_queue_head(&sk->sk_write_queue, skb);
 		else
-			skb_append(skb_prev, skb);
+			skb_append(skb_prev, skb, &sk->sk_write_queue);
 		skb_prev = skb;
 	}
 }
-- 
cgit v1.2.3-18-g5258


From abc3bc58047efa72ee9c2e208cbeb73d261ad703 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 9 Aug 2005 19:25:56 -0700
Subject: [NET]: Kill skb->tc_classid

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c   | 2 --
 net/sched/act_api.c | 7 +------
 net/sched/gact.c    | 2 +-
 net/sched/ipt.c     | 2 +-
 net/sched/mirred.c  | 2 +-
 net/sched/pedit.c   | 2 +-
 net/sched/police.c  | 3 ++-
 net/sched/simple.c  | 2 +-
 8 files changed, 8 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index e6564b0a683..8896e6f8aa4 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -300,7 +300,6 @@ void __kfree_skb(struct sk_buff *skb)
 	skb->tc_index = 0;
 #ifdef CONFIG_NET_CLS_ACT
 	skb->tc_verd = 0;
-	skb->tc_classid = 0;
 #endif
 #endif
 
@@ -376,7 +375,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask)
 	n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd);
 	n->tc_verd = CLR_TC_MUNGED(n->tc_verd);
 	C(input_dev);
-	C(tc_classid);
 #endif
 
 #endif
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 249c61936ea..c896a0118a3 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -165,7 +165,7 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action *act,
 	while ((a = act) != NULL) {
 repeat:
 		if (a->ops && a->ops->act) {
-			ret = a->ops->act(&skb, a);
+			ret = a->ops->act(&skb, a, res);
 			if (TC_MUNGED & skb->tc_verd) {
 				/* copied already, allow trampling */
 				skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
@@ -179,11 +179,6 @@ repeat:
 		act = a->next;
 	}
 exec_done:
-	if (skb->tc_classid > 0) {
-		res->classid = skb->tc_classid;
-		res->class = 0;
-		skb->tc_classid = 0;
-	}
 	return ret;
 }
 
diff --git a/net/sched/gact.c b/net/sched/gact.c
index a811c89fef7..d1c6d542912 100644
--- a/net/sched/gact.c
+++ b/net/sched/gact.c
@@ -135,7 +135,7 @@ tcf_gact_cleanup(struct tc_action *a, int bind)
 }
 
 static int
-tcf_gact(struct sk_buff **pskb, struct tc_action *a)
+tcf_gact(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res)
 {
 	struct tcf_gact *p = PRIV(a, gact);
 	struct sk_buff *skb = *pskb;
diff --git a/net/sched/ipt.c b/net/sched/ipt.c
index b114d994d52..f50136eed21 100644
--- a/net/sched/ipt.c
+++ b/net/sched/ipt.c
@@ -201,7 +201,7 @@ tcf_ipt_cleanup(struct tc_action *a, int bind)
 }
 
 static int
-tcf_ipt(struct sk_buff **pskb, struct tc_action *a)
+tcf_ipt(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res)
 {
 	int ret = 0, result = 0;
 	struct tcf_ipt *p = PRIV(a, ipt);
diff --git a/net/sched/mirred.c b/net/sched/mirred.c
index f309ce33680..20d06916dc0 100644
--- a/net/sched/mirred.c
+++ b/net/sched/mirred.c
@@ -158,7 +158,7 @@ tcf_mirred_cleanup(struct tc_action *a, int bind)
 }
 
 static int
-tcf_mirred(struct sk_buff **pskb, struct tc_action *a)
+tcf_mirred(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res)
 {
 	struct tcf_mirred *p = PRIV(a, mirred);
 	struct net_device *dev;
diff --git a/net/sched/pedit.c b/net/sched/pedit.c
index 678be6a645f..767d24f4610 100644
--- a/net/sched/pedit.c
+++ b/net/sched/pedit.c
@@ -130,7 +130,7 @@ tcf_pedit_cleanup(struct tc_action *a, int bind)
 }
 
 static int
-tcf_pedit(struct sk_buff **pskb, struct tc_action *a)
+tcf_pedit(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res)
 {
 	struct tcf_pedit *p = PRIV(a, pedit);
 	struct sk_buff *skb = *pskb;
diff --git a/net/sched/police.c b/net/sched/police.c
index c03545faf52..eb39fb2f39b 100644
--- a/net/sched/police.c
+++ b/net/sched/police.c
@@ -284,7 +284,8 @@ static int tcf_act_police_cleanup(struct tc_action *a, int bind)
 	return 0;
 }
 
-static int tcf_act_police(struct sk_buff **pskb, struct tc_action *a)
+static int tcf_act_police(struct sk_buff **pskb, struct tc_action *a,
+                          struct tcf_result *res)
 {
 	psched_time_t now;
 	struct sk_buff *skb = *pskb;
diff --git a/net/sched/simple.c b/net/sched/simple.c
index 3ab4c675ab5..8a6ae4f491e 100644
--- a/net/sched/simple.c
+++ b/net/sched/simple.c
@@ -44,7 +44,7 @@ static DEFINE_RWLOCK(simp_lock);
 #include <net/pkt_act.h>
 #include <net/act_generic.h>
 
-static int tcf_simp(struct sk_buff **pskb, struct tc_action *a)
+static int tcf_simp(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res)
 {
 	struct sk_buff *skb = *pskb;
 	struct tcf_defact *p = PRIV(a, defact);
-- 
cgit v1.2.3-18-g5258


From ac3247baf8ecadf168642e3898b0212c29c79715 Mon Sep 17 00:00:00 2001
From: Harald Welte <laforge@netfilter.org>
Date: Tue, 9 Aug 2005 19:28:03 -0700
Subject: [NETFILTER]: connection tracking event notifiers

This adds a notifier chain based event mechanism for ip_conntrack state
changes.  As opposed to the previous implementations in patch-o-matic, we
do no longer need a field in the skb to achieve this.

Thanks to the valuable input from Patrick McHardy and Rusty on the idea
of a per_cpu implementation.

Signed-off-by: Harald Welte <laforge@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/Kconfig                   |  10 +++
 net/ipv4/netfilter/ip_conntrack_core.c       | 122 +++++++++++++++++++++++++--
 net/ipv4/netfilter/ip_conntrack_ftp.c        |  12 ++-
 net/ipv4/netfilter/ip_conntrack_proto_icmp.c |   1 +
 net/ipv4/netfilter/ip_conntrack_proto_sctp.c |   2 +
 net/ipv4/netfilter/ip_conntrack_proto_tcp.c  |   4 +
 net/ipv4/netfilter/ip_conntrack_proto_udp.c  |   3 +-
 net/ipv4/netfilter/ip_conntrack_standalone.c |  10 +++
 8 files changed, 154 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 46d4cb1c06f..ff3393eba92 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -40,6 +40,16 @@ config IP_NF_CONNTRACK_MARK
 	  of packets, but this mark value is kept in the conntrack session
 	  instead of the individual packets.
 	
+config IP_NF_CONNTRACK_EVENTS
+	bool "Connection tracking events"
+	depends on IP_NF_CONNTRACK
+	help
+	  If this option is enabled, the connection tracking code will
+	  provide a notifier chain that can be used by other kernel code
+	  to get notified about changes in the connection tracking state.
+	  
+	  IF unsure, say `N'.
+
 config IP_NF_CT_PROTO_SCTP
 	tristate  'SCTP protocol connection tracking support (EXPERIMENTAL)'
 	depends on IP_NF_CONNTRACK && EXPERIMENTAL
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 04c3414361d..caf89deae11 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -37,6 +37,7 @@
 #include <linux/err.h>
 #include <linux/percpu.h>
 #include <linux/moduleparam.h>
+#include <linux/notifier.h>
 
 /* ip_conntrack_lock protects the main hash table, protocol/helper/expected
    registrations, conntrack timers*/
@@ -49,7 +50,7 @@
 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
 #include <linux/netfilter_ipv4/listhelp.h>
 
-#define IP_CONNTRACK_VERSION	"2.1"
+#define IP_CONNTRACK_VERSION	"2.2"
 
 #if 0
 #define DEBUGP printk
@@ -76,6 +77,81 @@ unsigned int ip_ct_log_invalid;
 static LIST_HEAD(unconfirmed);
 static int ip_conntrack_vmalloc;
 
+#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
+struct notifier_block *ip_conntrack_chain;
+struct notifier_block *ip_conntrack_expect_chain;
+
+DEFINE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache);
+
+static inline void __deliver_cached_events(struct ip_conntrack_ecache *ecache)
+{
+	if (is_confirmed(ecache->ct) && !is_dying(ecache->ct) && ecache->events)
+		notifier_call_chain(&ip_conntrack_chain, ecache->events,
+				    ecache->ct);
+	ecache->events = 0;
+}
+
+void __ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ecache)
+{
+	__deliver_cached_events(ecache);
+}
+
+/* Deliver all cached events for a particular conntrack. This is called
+ * by code prior to async packet handling or freeing the skb */
+void 
+ip_conntrack_deliver_cached_events_for(const struct ip_conntrack *ct)
+{
+	struct ip_conntrack_ecache *ecache = 
+					&__get_cpu_var(ip_conntrack_ecache);
+
+	if (!ct)
+		return;
+
+	if (ecache->ct == ct) {
+		DEBUGP("ecache: delivering event for %p\n", ct);
+		__deliver_cached_events(ecache);
+	} else {
+		if (net_ratelimit())
+			printk(KERN_WARNING "ecache: want to deliver for %p, "
+				"but cache has %p\n", ct, ecache->ct);
+	}
+
+	/* signalize that events have already been delivered */
+	ecache->ct = NULL;
+}
+
+/* Deliver cached events for old pending events, if current conntrack != old */
+void ip_conntrack_event_cache_init(const struct sk_buff *skb)
+{
+	struct ip_conntrack *ct = (struct ip_conntrack *) skb->nfct;
+	struct ip_conntrack_ecache *ecache = 
+					&__get_cpu_var(ip_conntrack_ecache);
+
+	/* take care of delivering potentially old events */
+	if (ecache->ct != ct) {
+		enum ip_conntrack_info ctinfo;
+		/* we have to check, since at startup the cache is NULL */
+		if (likely(ecache->ct)) {
+			DEBUGP("ecache: entered for different conntrack: "
+			       "ecache->ct=%p, skb->nfct=%p. delivering "
+			       "events\n", ecache->ct, ct);
+			__deliver_cached_events(ecache);
+			ip_conntrack_put(ecache->ct);
+		} else {
+			DEBUGP("ecache: entered for conntrack %p, "
+				"cache was clean before\n", ct);
+		}
+
+		/* initialize for this conntrack/packet */
+		ecache->ct = ip_conntrack_get(skb, &ctinfo);
+		/* ecache->events cleared by __deliver_cached_devents() */
+	} else {
+		DEBUGP("ecache: re-entered for conntrack %p.\n", ct);
+	}
+}
+
+#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */
+
 DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
 
 void 
@@ -223,6 +299,8 @@ destroy_conntrack(struct nf_conntrack *nfct)
 	IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
 	IP_NF_ASSERT(!timer_pending(&ct->timeout));
 
+	set_bit(IPS_DYING_BIT, &ct->status);
+
 	/* To make sure we don't get any weird locking issues here:
 	 * destroy_conntrack() MUST NOT be called with a write lock
 	 * to ip_conntrack_lock!!! -HW */
@@ -261,6 +339,7 @@ static void death_by_timeout(unsigned long ul_conntrack)
 {
 	struct ip_conntrack *ct = (void *)ul_conntrack;
 
+	ip_conntrack_event(IPCT_DESTROY, ct);
 	write_lock_bh(&ip_conntrack_lock);
 	/* Inside lock so preempt is disabled on module removal path.
 	 * Otherwise we can get spurious warnings. */
@@ -374,6 +453,16 @@ __ip_conntrack_confirm(struct sk_buff **pskb)
 		set_bit(IPS_CONFIRMED_BIT, &ct->status);
 		CONNTRACK_STAT_INC(insert);
 		write_unlock_bh(&ip_conntrack_lock);
+		if (ct->helper)
+			ip_conntrack_event_cache(IPCT_HELPER, *pskb);
+#ifdef CONFIG_IP_NF_NAT_NEEDED
+		if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
+		    test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
+			ip_conntrack_event_cache(IPCT_NATINFO, *pskb);
+#endif
+		ip_conntrack_event_cache(master_ct(ct) ?
+					 IPCT_RELATED : IPCT_NEW, *pskb);
+
 		return NF_ACCEPT;
 	}
 
@@ -607,7 +696,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
 	struct ip_conntrack *ct;
 	enum ip_conntrack_info ctinfo;
 	struct ip_conntrack_protocol *proto;
-	int set_reply;
+	int set_reply = 0;
 	int ret;
 
 	/* Previously seen (loopback or untracked)?  Ignore. */
@@ -666,6 +755,8 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
 
 	IP_NF_ASSERT((*pskb)->nfct);
 
+	ip_conntrack_event_cache_init(*pskb);
+
 	ret = proto->packet(ct, *pskb, ctinfo);
 	if (ret < 0) {
 		/* Invalid: inverse of the return code tells
@@ -676,8 +767,8 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
 		return -ret;
 	}
 
-	if (set_reply)
-		set_bit(IPS_SEEN_REPLY_BIT, &ct->status);
+	if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
+		ip_conntrack_event_cache(IPCT_STATUS, *pskb);
 
 	return ret;
 }
@@ -824,6 +915,7 @@ int ip_conntrack_expect_related(struct ip_conntrack_expect *expect)
 		evict_oldest_expect(expect->master);
 
 	ip_conntrack_expect_insert(expect);
+	ip_conntrack_expect_event(IPEXP_NEW, expect);
 	ret = 0;
 out:
 	write_unlock_bh(&ip_conntrack_lock);
@@ -861,8 +953,10 @@ int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
 static inline int unhelp(struct ip_conntrack_tuple_hash *i,
 			 const struct ip_conntrack_helper *me)
 {
-	if (tuplehash_to_ctrack(i)->helper == me)
+	if (tuplehash_to_ctrack(i)->helper == me) {
+ 		ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i));
 		tuplehash_to_ctrack(i)->helper = NULL;
+	}
 	return 0;
 }
 
@@ -924,6 +1018,7 @@ void ip_ct_refresh_acct(struct ip_conntrack *ct,
 		if (del_timer(&ct->timeout)) {
 			ct->timeout.expires = jiffies + extra_jiffies;
 			add_timer(&ct->timeout);
+			ip_conntrack_event_cache(IPCT_REFRESH, skb);
 		}
 		ct_add_counters(ct, ctinfo, skb);
 		write_unlock_bh(&ip_conntrack_lock);
@@ -1012,6 +1107,23 @@ ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data)
 
 		ip_conntrack_put(ct);
 	}
+
+#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
+	{
+		/* we need to deliver all cached events in order to drop
+		 * the reference counts */
+		int cpu;
+		for_each_cpu(cpu) {
+			struct ip_conntrack_ecache *ecache = 
+					&per_cpu(ip_conntrack_ecache, cpu);
+			if (ecache->ct) {
+				__ip_ct_deliver_cached_events(ecache);
+				ip_conntrack_put(ecache->ct);
+				ecache->ct = NULL;
+			}
+		}
+	}
+#endif
 }
 
 /* Fast function for those who don't want to parse /proc (and I don't
diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c
index 7a3b773be3f..9658896f899 100644
--- a/net/ipv4/netfilter/ip_conntrack_ftp.c
+++ b/net/ipv4/netfilter/ip_conntrack_ftp.c
@@ -262,7 +262,8 @@ static int find_nl_seq(u32 seq, const struct ip_ct_ftp_master *info, int dir)
 }
 
 /* We don't update if it's older than what we have. */
-static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir)
+static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir,
+			  struct sk_buff *skb)
 {
 	unsigned int i, oldest = NUM_SEQ_TO_REMEMBER;
 
@@ -276,10 +277,13 @@ static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir)
 			oldest = i;
 	}
 
-	if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER)
+	if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) {
 		info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq;
-	else if (oldest != NUM_SEQ_TO_REMEMBER)
+		ip_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb);
+	} else if (oldest != NUM_SEQ_TO_REMEMBER) {
 		info->seq_aft_nl[dir][oldest] = nl_seq;
+		ip_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb);
+	}
 }
 
 static int help(struct sk_buff **pskb,
@@ -439,7 +443,7 @@ out_update_nl:
 	/* Now if this ends in \n, update ftp info.  Seq may have been
 	 * adjusted by NAT code. */
 	if (ends_in_nl)
-		update_nl_seq(seq, ct_ftp_info,dir);
+		update_nl_seq(seq, ct_ftp_info,dir, *pskb);
  out:
 	spin_unlock_bh(&ip_ftp_lock);
 	return ret;
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
index 602c74db325..dca1f63d6f5 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
@@ -102,6 +102,7 @@ static int icmp_packet(struct ip_conntrack *ct,
 			ct->timeout.function((unsigned long)ct);
 	} else {
 		atomic_inc(&ct->proto.icmp.count);
+		ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
 		ip_ct_refresh_acct(ct, ctinfo, skb, ip_ct_icmp_timeout);
 	}
 
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
index 31d75390bf1..3d5f878a07d 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
@@ -404,6 +404,8 @@ static int sctp_packet(struct ip_conntrack *conntrack,
 		}
 
 		conntrack->proto.sctp.state = newconntrack;
+		if (oldsctpstate != newconntrack)
+			ip_conntrack_event_cache(IPCT_PROTOINFO, skb);
 		write_unlock_bh(&sctp_lock);
 	}
 
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index 809dfed766d..a569ad1ee4d 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -973,6 +973,10 @@ static int tcp_packet(struct ip_conntrack *conntrack,
 		  ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
 	write_unlock_bh(&tcp_lock);
 
+	ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
+	if (new_state != old_state)
+		ip_conntrack_event_cache(IPCT_PROTOINFO, skb);
+
 	if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
 		/* If only reply is a RST, we can consider ourselves not to
 		   have an established connection: this is a fairly common
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
index 8c1eaba098d..6066eaf4d82 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
@@ -73,7 +73,8