aboutsummaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-06-17 12:52:15 +0200
committerIngo Molnar <mingo@elte.hu>2009-06-17 12:56:49 +0200
commiteadb8a091b27a840de7450f84ecff5ef13476424 (patch)
tree58c3782d40def63baa8167f3d31e3048cb4c7660 /net/ipv4
parent73874005cd8800440be4299bd095387fff4b90ac (diff)
parent65795efbd380a832ae508b04dba8f8e53f0b84d9 (diff)
Merge branch 'linus' into tracing/hw-breakpoints
Conflicts: arch/x86/Kconfig arch/x86/kernel/traps.c arch/x86/power/cpu.c arch/x86/power/cpu_32.c kernel/Makefile Semantic conflict: arch/x86/kernel/hw_breakpoint.c Merge reason: Resolve the conflicts, move from put_cpu_no_sched() to put_cpu() in arch/x86/kernel/hw_breakpoint.c. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Kconfig37
-rw-r--r--net/ipv4/af_inet.c25
-rw-r--r--net/ipv4/arp.c6
-rw-r--r--net/ipv4/devinet.c3
-rw-r--r--net/ipv4/fib_frontend.c1
-rw-r--r--net/ipv4/fib_hash.c1
-rw-r--r--net/ipv4/fib_lookup.h3
-rw-r--r--net/ipv4/fib_rules.c4
-rw-r--r--net/ipv4/fib_semantics.c3
-rw-r--r--net/ipv4/fib_trie.c52
-rw-r--r--net/ipv4/icmp.c20
-rw-r--r--net/ipv4/igmp.c8
-rw-r--r--net/ipv4/inet_diag.c2
-rw-r--r--net/ipv4/inet_timewait_sock.c26
-rw-r--r--net/ipv4/ip_forward.c6
-rw-r--r--net/ipv4/ip_fragment.c6
-rw-r--r--net/ipv4/ip_gre.c28
-rw-r--r--net/ipv4/ip_input.c21
-rw-r--r--net/ipv4/ip_options.c18
-rw-r--r--net/ipv4/ip_output.c49
-rw-r--r--net/ipv4/ip_sockglue.c86
-rw-r--r--net/ipv4/ipconfig.c53
-rw-r--r--net/ipv4/ipip.c16
-rw-r--r--net/ipv4/ipmr.c48
-rw-r--r--net/ipv4/netfilter.c28
-rw-r--r--net/ipv4/netfilter/arp_tables.c109
-rw-r--r--net/ipv4/netfilter/ip_queue.c2
-rw-r--r--net/ipv4/netfilter/ip_tables.c172
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c14
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c7
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c17
-rw-r--r--net/ipv4/netfilter/nf_nat_helper.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_sctp.c5
-rw-r--r--net/ipv4/netfilter/nf_nat_standalone.c7
-rw-r--r--net/ipv4/proc.c10
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/route.c114
-rw-r--r--net/ipv4/syncookies.c5
-rw-r--r--net/ipv4/tcp.c52
-rw-r--r--net/ipv4/tcp_input.c100
-rw-r--r--net/ipv4/tcp_ipv4.c12
-rw-r--r--net/ipv4/tcp_output.c4
-rw-r--r--net/ipv4/tcp_vegas.c11
-rw-r--r--net/ipv4/udp.c4
-rw-r--r--net/ipv4/xfrm4_input.c2
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c4
-rw-r--r--net/ipv4/xfrm4_output.c6
47 files changed, 673 insertions, 540 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 9d26a3da37e..70491d9035e 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -273,29 +273,20 @@ config IP_PIMSM_V2
you want to play with it.
config ARPD
- bool "IP: ARP daemon support (EXPERIMENTAL)"
- depends on EXPERIMENTAL
+ bool "IP: ARP daemon support"
---help---
- Normally, the kernel maintains an internal cache which maps IP
- addresses to hardware addresses on the local network, so that
- Ethernet/Token Ring/ etc. frames are sent to the proper address on
- the physical networking layer. For small networks having a few
- hundred directly connected hosts or less, keeping this address
- resolution (ARP) cache inside the kernel works well. However,
- maintaining an internal ARP cache does not work well for very large
- switched networks, and will use a lot of kernel memory if TCP/IP
- connections are made to many machines on the network.
-
- If you say Y here, the kernel's internal ARP cache will never grow
- to more than 256 entries (the oldest entries are expired in a LIFO
- manner) and communication will be attempted with the user space ARP
- daemon arpd. Arpd then answers the address resolution request either
- from its own cache or by asking the net.
-
- This code is experimental and also obsolete. If you want to use it,
- you need to find a version of the daemon arpd on the net somewhere,
- and you should also say Y to "Kernel/User network link driver",
- below. If unsure, say N.
+ The kernel maintains an internal cache which maps IP addresses to
+ hardware addresses on the local network, so that Ethernet/Token Ring/
+ etc. frames are sent to the proper address on the physical networking
+ layer. Normally, kernel uses the ARP protocol to resolve these
+ mappings.
+
+ Saying Y here adds support to have an user space daemon to do this
+ resolution instead. This is useful for implementing an alternate
+ address resolution protocol (e.g. NHRP on mGRE tunnels) and also for
+ testing purposes.
+
+ If unsure, say N.
config SYN_COOKIES
bool "IP: TCP syncookie support (disabled per default)"
@@ -408,7 +399,7 @@ config INET_XFRM_MODE_BEET
config INET_LRO
bool "Large Receive Offload (ipv4/tcp)"
-
+ default y
---help---
Support for Large Receive Offload (ipv4/tcp).
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 7f03373b8c0..566ea6c4321 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -116,7 +116,6 @@
#include <linux/mroute.h>
#endif
-extern void ip_mc_drop_socket(struct sock *sk);
/* The inetsw table contains everything that inet_create needs to
* build a new socket.
@@ -375,6 +374,7 @@ lookup_protocol:
inet->uc_ttl = -1;
inet->mc_loop = 1;
inet->mc_ttl = 1;
+ inet->mc_all = 1;
inet->mc_index = 0;
inet->mc_list = NULL;
@@ -1003,8 +1003,6 @@ void inet_register_protosw(struct inet_protosw *p)
out:
spin_unlock_bh(&inetsw_lock);
- synchronize_net();
-
return;
out_permanent:
@@ -1248,13 +1246,20 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
struct sk_buff **pp = NULL;
struct sk_buff *p;
struct iphdr *iph;
+ unsigned int hlen;
+ unsigned int off;
+ unsigned int id;
int flush = 1;
int proto;
- int id;
- iph = skb_gro_header(skb, sizeof(*iph));
- if (unlikely(!iph))
- goto out;
+ off = skb_gro_offset(skb);
+ hlen = off + sizeof(*iph);
+ iph = skb_gro_header_fast(skb, off);
+ if (skb_gro_header_hard(skb, hlen)) {
+ iph = skb_gro_header_slow(skb, hlen, off);
+ if (unlikely(!iph))
+ goto out;
+ }
proto = iph->protocol & (MAX_INET_PROTOS - 1);
@@ -1269,9 +1274,9 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
goto out_unlock;
- flush = ntohs(iph->tot_len) != skb_gro_len(skb) ||
- iph->frag_off != htons(IP_DF);
- id = ntohs(iph->id);
+ id = ntohl(*(u32 *)&iph->id);
+ flush = (u16)((ntohl(*(u32 *)iph) ^ skb_gro_len(skb)) | (id ^ IP_DF));
+ id >>= 16;
for (p = *head; p; p = p->next) {
struct iphdr *iph2;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index f11931c1838..8a3881e28ac 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -468,13 +468,13 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb)
__be32 paddr;
struct neighbour *n;
- if (!skb->dst) {
+ if (!skb_dst(skb)) {
printk(KERN_DEBUG "arp_find is called with dst==NULL\n");
kfree_skb(skb);
return 1;
}
- paddr = skb->rtable->rt_gateway;
+ paddr = skb_rtable(skb)->rt_gateway;
if (arp_set_predefined(inet_addr_type(dev_net(dev), paddr), haddr, paddr, dev))
return 0;
@@ -817,7 +817,7 @@ static int arp_process(struct sk_buff *skb)
if (arp->ar_op == htons(ARPOP_REQUEST) &&
ip_route_input(skb, tip, sip, 0, dev) == 0) {
- rt = skb->rtable;
+ rt = skb_rtable(skb);
addr_type = rt->rt_type;
if (addr_type == RTN_LOCAL) {
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 126bb911880..3863c3a4223 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1347,7 +1347,8 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write,
struct net *net = ctl->extra2;
if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
- rtnl_lock();
+ if (!rtnl_trylock())
+ return restart_syscall();
if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
inet_forward_change(net);
} else if (*valp) {
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index cafcc49d099..e2f95059256 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -40,7 +40,6 @@
#include <net/route.h>
#include <net/tcp.h>
#include <net/sock.h>
-#include <net/icmp.h>
#include <net/arp.h>
#include <net/ip_fib.h>
#include <net/rtnetlink.h>
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index ded8c44fb84..ecd39454235 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -263,7 +263,6 @@ fn_hash_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result
err = fib_semantic_match(&f->fn_alias,
flp, res,
- f->fn_key, fz->fz_mask,
fz->fz_order);
if (err <= 0)
goto out;
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 2c1623d2768..637b133973b 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -22,8 +22,7 @@ struct fib_alias {
/* Exported by fib_semantics.c */
extern int fib_semantic_match(struct list_head *head,
const struct flowi *flp,
- struct fib_result *res, __be32 zone, __be32 mask,
- int prefixlen);
+ struct fib_result *res, int prefixlen);
extern void fib_release_info(struct fib_info *);
extern struct fib_info *fib_create_info(struct fib_config *cfg);
extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 6080d712082..92d9d97ec5e 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -134,7 +134,7 @@ static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = {
};
static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct fib_rule_hdr *frh,
+ struct fib_rule_hdr *frh,
struct nlattr **tb)
{
struct net *net = sock_net(skb->sk);
@@ -209,7 +209,7 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
}
static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct fib_rule_hdr *frh)
+ struct fib_rule_hdr *frh)
{
struct fib4_rule *rule4 = (struct fib4_rule *) rule;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index f831df50090..9b096d6ff3f 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -866,8 +866,7 @@ failure:
/* Note! fib_semantic_match intentionally uses RCU list functions. */
int fib_semantic_match(struct list_head *head, const struct flowi *flp,
- struct fib_result *res, __be32 zone, __be32 mask,
- int prefixlen)
+ struct fib_result *res, int prefixlen)
{
struct fib_alias *fa;
int nh_sel = 0;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index ec0ae490f0b..d1a39b1277d 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -123,6 +123,7 @@ struct tnode {
union {
struct rcu_head rcu;
struct work_struct work;
+ struct tnode *tnode_free;
};
struct node *child[0];
};
@@ -161,6 +162,8 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n,
static struct node *resize(struct trie *t, struct tnode *tn);
static struct tnode *inflate(struct trie *t, struct tnode *tn);
static struct tnode *halve(struct trie *t, struct tnode *tn);
+/* tnodes to free after resize(); protected by RTNL */
+static struct tnode *tnode_free_head;
static struct kmem_cache *fn_alias_kmem __read_mostly;
static struct kmem_cache *trie_leaf_kmem __read_mostly;
@@ -385,6 +388,29 @@ static inline void tnode_free(struct tnode *tn)
call_rcu(&tn->rcu, __tnode_free_rcu);
}
+static void tnode_free_safe(struct tnode *tn)
+{
+ BUG_ON(IS_LEAF(tn));
+
+ if (node_parent((struct node *) tn)) {
+ tn->tnode_free = tnode_free_head;
+ tnode_free_head = tn;
+ } else {
+ tnode_free(tn);
+ }
+}
+
+static void tnode_free_flush(void)
+{
+ struct tnode *tn;
+
+ while ((tn = tnode_free_head)) {
+ tnode_free_head = tn->tnode_free;
+ tn->tnode_free = NULL;
+ tnode_free(tn);
+ }
+}
+
static struct leaf *leaf_new(void)
{
struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
@@ -495,7 +521,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
/* No children */
if (tn->empty_children == tnode_child_length(tn)) {
- tnode_free(tn);
+ tnode_free_safe(tn);
return NULL;
}
/* One child */
@@ -509,7 +535,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
/* compress one level */
node_set_parent(n, NULL);
- tnode_free(tn);
+ tnode_free_safe(tn);
return n;
}
/*
@@ -670,7 +696,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
/* compress one level */
node_set_parent(n, NULL);
- tnode_free(tn);
+ tnode_free_safe(tn);
return n;
}
@@ -756,7 +782,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
put_child(t, tn, 2*i, inode->child[0]);
put_child(t, tn, 2*i+1, inode->child[1]);
- tnode_free(inode);
+ tnode_free_safe(inode);
continue;
}
@@ -801,9 +827,9 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
put_child(t, tn, 2*i, resize(t, left));
put_child(t, tn, 2*i+1, resize(t, right));
- tnode_free(inode);
+ tnode_free_safe(inode);
}
- tnode_free(oldtnode);
+ tnode_free_safe(oldtnode);
return tn;
nomem:
{
@@ -885,7 +911,7 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
put_child(t, newBinNode, 1, right);
put_child(t, tn, i/2, resize(t, newBinNode));
}
- tnode_free(oldtnode);
+ tnode_free_safe(oldtnode);
return tn;
nomem:
{
@@ -986,9 +1012,11 @@ fib_find_node(struct trie *t, u32 key)
static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
{
int wasfull;
- t_key cindex, key = tn->key;
+ t_key cindex, key;
struct tnode *tp;
+ key = tn->key;
+
while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) {
cindex = tkey_extract_bits(key, tp->pos, tp->bits);
wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
@@ -998,14 +1026,17 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
(struct node *)tn, wasfull);
tp = node_parent((struct node *) tn);
+ tnode_free_flush();
if (!tp)
break;
tn = tp;
}
/* Handle last (top) tnode */
- if (IS_TNODE(tn))
+ if (IS_TNODE(tn)) {
tn = (struct tnode *)resize(t, (struct tnode *)tn);
+ tnode_free_flush();
+ }
return (struct node *)tn;
}
@@ -1347,8 +1378,7 @@ static int check_leaf(struct trie *t, struct leaf *l,
if (l->key != (key & ntohl(mask)))
continue;
- err = fib_semantic_match(&li->falh, flp, res,
- htonl(l->key), mask, plen);
+ err = fib_semantic_match(&li->falh, flp, res, plen);
#ifdef CONFIG_IP_FIB_TRIE_STATS
if (err <= 0)
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 3f50807237e..97c410e8438 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -356,7 +356,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
{
struct ipcm_cookie ipc;
- struct rtable *rt = skb->rtable;
+ struct rtable *rt = skb_rtable(skb);
struct net *net = dev_net(rt->u.dst.dev);
struct sock *sk;
struct inet_sock *inet;
@@ -416,7 +416,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
struct iphdr *iph;
int room;
struct icmp_bxm icmp_param;
- struct rtable *rt = skb_in->rtable;
+ struct rtable *rt = skb_rtable(skb_in);
struct ipcm_cookie ipc;
__be32 saddr;
u8 tos;
@@ -591,13 +591,13 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
goto relookup_failed;
/* Ugh! */
- odst = skb_in->dst;
+ odst = skb_dst(skb_in);
err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src,
RT_TOS(tos), rt2->u.dst.dev);
dst_release(&rt2->u.dst);
- rt2 = skb_in->rtable;
- skb_in->dst = odst;
+ rt2 = skb_rtable(skb_in);
+ skb_dst_set(skb_in, odst);
}
if (err)
@@ -659,7 +659,7 @@ static void icmp_unreach(struct sk_buff *skb)
u32 info = 0;
struct net *net;
- net = dev_net(skb->dst->dev);
+ net = dev_net(skb_dst(skb)->dev);
/*
* Incomplete header ?
@@ -822,7 +822,7 @@ static void icmp_echo(struct sk_buff *skb)
{
struct net *net;
- net = dev_net(skb->dst->dev);
+ net = dev_net(skb_dst(skb)->dev);
if (!net->ipv4.sysctl_icmp_echo_ignore_all) {
struct icmp_bxm icmp_param;
@@ -873,7 +873,7 @@ static void icmp_timestamp(struct sk_buff *skb)
out:
return;
out_err:
- ICMP_INC_STATS_BH(dev_net(skb->dst->dev), ICMP_MIB_INERRORS);
+ ICMP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ICMP_MIB_INERRORS);
goto out;
}
@@ -926,7 +926,7 @@ static void icmp_address(struct sk_buff *skb)
static void icmp_address_reply(struct sk_buff *skb)
{
- struct rtable *rt = skb->rtable;
+ struct rtable *rt = skb_rtable(skb);
struct net_device *dev = skb->dev;
struct in_device *in_dev;
struct in_ifaddr *ifa;
@@ -970,7 +970,7 @@ static void icmp_discard(struct sk_buff *skb)
int icmp_rcv(struct sk_buff *skb)
{
struct icmphdr *icmph;
- struct rtable *rt = skb->rtable;
+ struct rtable *rt = skb_rtable(skb);
struct net *net = dev_net(rt->u.dst.dev);
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 9eb6219af61..01b4284ed69 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -311,7 +311,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
return NULL;
}
- skb->dst = &rt->u.dst;
+ skb_dst_set(skb, &rt->u.dst);
skb->dev = dev;
skb_reserve(skb, LL_RESERVED_SPACE(dev));
@@ -659,7 +659,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
return -1;
}
- skb->dst = &rt->u.dst;
+ skb_dst_set(skb, &rt->u.dst);
skb_reserve(skb, LL_RESERVED_SPACE(dev));
@@ -948,7 +948,7 @@ int igmp_rcv(struct sk_buff *skb)
case IGMPV2_HOST_MEMBERSHIP_REPORT:
case IGMPV3_HOST_MEMBERSHIP_REPORT:
/* Is it our report looped back? */
- if (skb->rtable->fl.iif == 0)
+ if (skb_rtable(skb)->fl.iif == 0)
break;
/* don't rely on MC router hearing unicast reports */
if (skb->pkt_type == PACKET_MULTICAST ||
@@ -2196,7 +2196,7 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif)
break;
}
if (!pmc)
- return 1;
+ return inet->mc_all;
psl = pmc->sflist;
if (!psl)
return pmc->sfmode == MCAST_EXCLUDE;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 588a7796e3e..b0b273503e2 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -198,8 +198,6 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
tmo = 0;
r->idiag_family = tw->tw_family;
- r->idiag_state = tw->tw_state;
- r->idiag_timer = 0;
r->idiag_retrans = 0;
r->id.idiag_if = tw->tw_bound_dev_if;
r->id.idiag_cookie[0] = (u32)(unsigned long)tw;
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 8554d0ea171..61283f92882 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -9,6 +9,7 @@
*/
#include <linux/kernel.h>
+#include <linux/kmemcheck.h>
#include <net/inet_hashtables.h>
#include <net/inet_timewait_sock.h>
#include <net/ip.h>
@@ -49,19 +50,22 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
inet_twsk_put(tw);
}
-void inet_twsk_put(struct inet_timewait_sock *tw)
+static noinline void inet_twsk_free(struct inet_timewait_sock *tw)
{
- if (atomic_dec_and_test(&tw->tw_refcnt)) {
- struct module *owner = tw->tw_prot->owner;
- twsk_destructor((struct sock *)tw);
+ struct module *owner = tw->tw_prot->owner;
+ twsk_destructor((struct sock *)tw);
#ifdef SOCK_REFCNT_DEBUG
- printk(KERN_DEBUG "%s timewait_sock %p released\n",
- tw->tw_prot->name, tw);
+ pr_debug("%s timewait_sock %p released\n", tw->tw_prot->name, tw);
#endif
- release_net(twsk_net(tw));
- kmem_cache_free(tw->tw_prot->twsk_prot->twsk_slab, tw);
- module_put(owner);
- }
+ release_net(twsk_net(tw));
+ kmem_cache_free(tw->tw_prot->twsk_prot->twsk_slab, tw);
+ module_put(owner);
+}
+
+void inet_twsk_put(struct inet_timewait_sock *tw)
+{
+ if (atomic_dec_and_test(&tw->tw_refcnt))
+ inet_twsk_free(tw);
}
EXPORT_SYMBOL_GPL(inet_twsk_put);
@@ -117,6 +121,8 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat
if (tw != NULL) {
const struct inet_sock *inet = inet_sk(sk);
+ kmemcheck_annotate_bitfield(tw, flags);
+
/* Give us an identity. */
tw->tw_daddr = inet->daddr;
tw->tw_rcv_saddr = inet->rcv_saddr;
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index df3fe50bbf0..a2991bc8e32 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -42,7 +42,7 @@ static int ip_forward_finish(struct sk_buff *skb)
{
struct ip_options * opt = &(IPCB(skb)->opt);
- IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
+ IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
if (unlikely(opt->optlen))
ip_forward_options(skb);
@@ -81,7 +81,7 @@ int ip_forward(struct sk_buff *skb)
if (!xfrm4_route_forward(skb))
goto drop;
- rt = skb->rtable;
+ rt = skb_rtable(skb);
if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
goto sr_failed;
@@ -123,7 +123,7 @@ sr_failed:
too_many_hops:
/* Tell the sender its packet died... */
- IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_INHDRERRORS);
+ IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_INHDRERRORS);
icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);
drop:
kfree_skb(skb);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 7985346653b..575f9bd51cc 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -507,7 +507,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
/* If the first fragment is fragmented itself, we split
* it to two chunks: the first with data and paged part
* and the second, holding only fragments. */
- if (skb_shinfo(head)->frag_list) {
+ if (skb_has_frags(head)) {
struct sk_buff *clone;
int i, plen = 0;
@@ -516,7 +516,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
clone->next = head->next;
head->next = clone;
skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
- skb_shinfo(head)->frag_list = NULL;
+ skb_frag_list_init(head);
for (i=0; i<skb_shinfo(head)->nr_frags; i++)
plen += skb_shinfo(head)->frags[i].size;
clone->len = clone->data_len = head->data_len - plen;
@@ -573,7 +573,7 @@ int ip_defrag(struct sk_buff *skb, u32 user)
struct ipq *qp;
struct net *net;
- net = skb->dev ? dev_net(skb->dev) : dev_net(skb->dst->dev);
+ net = skb->dev ? dev_net(skb->dev) : dev_net(skb_dst(skb)->dev);
IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS);
/* Start by cleaning up the memory. */
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index e62510d5ea5..44e2a3d2359 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -602,7 +602,7 @@ static int ipgre_rcv(struct sk_buff *skb)
#ifdef CONFIG_NET_IPGRE_BROADCAST
if (ipv4_is_multicast(iph->daddr)) {
/* Looped back packet, drop it! */
- if (skb->rtable->fl.iif == 0)
+ if (skb_rtable(skb)->fl.iif == 0)
goto drop;
stats->multicast++;
skb->pkt_type = PACKET_BROADCAST;
@@ -643,8 +643,7 @@ static int ipgre_rcv(struct sk_buff *skb)
stats->rx_packets++;
stats->rx_bytes += len;
skb->dev = tunnel->dev;
- dst_release(skb->dst);
- skb->dst = NULL;
+ skb_dst_drop(skb);
nf_reset(skb);
skb_reset_network_header(skb);
@@ -698,13 +697,13 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
if ((dst = tiph->daddr) == 0) {
/* NBMA tunnel */
- if (skb->dst == NULL) {
+ if (skb_dst(skb) == NULL) {
stats->tx_fifo_errors++;
goto tx_error;
}
if (skb->protocol == htons(ETH_P_IP)) {
- rt = skb->rtable;
+ rt = skb_rtable(skb);
if ((dst = rt->rt_gateway) == 0)
goto tx_error_icmp;
}
@@ -712,7 +711,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
else if (skb->protocol == htons(ETH_P_IPV6)) {
struct in6_addr *addr6;
int addr_type;
- struct neighbour *neigh = skb->dst->neighbour;
+ struct neighbour *neigh = skb_dst(skb)->neighbour;
if (neigh == NULL)
goto tx_error;
@@ -766,10 +765,10 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
if (df)
mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
else
- mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
+ mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
- if (skb->dst)
- skb->dst->ops->update_pmtu(skb->dst, mtu);
+ if (skb_dst(skb))
+ skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
if (skb->protocol == htons(ETH_P_IP)) {
df |= (old_iph->frag_off&htons(IP_DF));
@@ -783,14 +782,14 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
}
#ifdef CONFIG_IPV6
else if (skb->protocol == htons(ETH_P_IPV6)) {
- struct rt6_info *rt6 = (struct rt6_info *)skb->dst;
+ struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
- if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
+ if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
if ((tunnel->parms.iph.daddr &&
!ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
rt6->rt6i_dst.plen == 128) {
rt6->rt6i_flags |= RTF_MODIFIED;
- skb->dst->metrics[RTAX_MTU-1] = mtu;
+ skb_dst(skb)->metrics[RTAX_MTU-1] = mtu;
}
}
@@ -837,8 +836,8 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
IPSKB_REROUTED);
- dst_release(skb->dst);
- skb->dst = &rt->u.dst;
+ skb_dst_drop(skb);
+ skb_dst_set(skb, &rt->u.dst);
/*
* Push down and install the IPIP header.
@@ -1238,6 +1237,7 @@ static void ipgre_tunnel_setup(struct net_device *dev)
dev->iflink = 0;
dev->addr_len = 4;
dev->features |= NETIF_F_NETNS_LOCAL;
+ dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
}
static int ipgre_tunnel_init(struct net_device *dev)
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 1a58a6fa1dc..490ce20faf3 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -329,7 +329,7 @@ static int ip_rcv_finish(struct sk_buff *skb)
* Initialise the virtual path cache for the packet. It describes
* how the packet travels inside Linux networking.
*/
- if (skb->dst == NULL) {
+ if (skb_dst(skb) == NULL) {
int err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos,
skb->dev);
if (unlikely(err)) {
@@ -344,9 +344,9 @@ static int ip_rcv_finish(struct sk_buff *skb)
}
#ifdef CONFIG_NET_CLS_ROUTE
- if (unlikely(skb->dst->tclassid)) {
+ if (unlikely(skb_dst(skb)->tclassid)) {
struct ip_rt_acct *st = per_cpu_ptr(ip_rt_acct, smp_processor_id());
- u32 idx = skb->dst->tclassid;
+ u32 idx = skb_dst(skb)->tclassid;
st[idx&0xFF].o_packets++;
st[idx&0xFF].o_bytes += skb->len;
st[(idx>>16)&0xFF].i_packets++;
@@ -357,11 +357,13 @@ static int ip_rcv_finish(struct sk_buff *skb)
if (iph->ihl > 5 && ip_rcv_options(skb))
goto drop;
- rt = skb->rtable;
- if (rt->rt_type == RTN_MULTICAST)
- IP_INC_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INMCASTPKTS);
- else if (rt->rt_type == RTN_BROADCAST)
- IP_INC_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INBCASTPKTS);
+ rt = skb_rtable(skb);
+ if (rt->rt_type == RTN_MULTICAST) {
+ IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INMCAST,
+ skb->len);
+ } else if (rt->rt_type == RTN_BROADCAST)
+ IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INBCAST,
+ skb->len);
return dst_input(skb);
@@ -384,7 +386,8 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
if (skb->pkt_type == PACKET_OTHERHOST)
goto drop;
- IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INRECEIVES);
+
+ IP_UPD_PO_STATS_BH(dev_net(dev), IPSTATS_MIB_IN, skb->len);
if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 2c88da6e786..94bf105ef3c 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -102,7 +102,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
sptr = skb_network_header(skb);
dptr = dopt->__data;
- daddr = skb->rtable->rt_spec_dst;
+ daddr = skb_rtable(skb)->rt_spec_dst;
if (sopt->rr) {
optlen = sptr[sopt->rr+1];
@@ -143,7 +143,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
__be32 addr;
memcpy(&addr, sptr+soffset-1, 4);
- if (inet_addr_type(dev_net(skb->dst->dev), addr) != RTN_LOCAL) {
+ if (inet_addr_type(dev_net(skb_dst(skb)->dev), addr) != RTN_LOCAL) {
dopt->ts_needtime = 1;
soffset += 8;
}
@@ -257,7 +257,7 @@ int ip_options_compile(struct net *net,
struct rtable *rt = NULL;
if (skb != NULL) {
- rt = skb->rtable;
+ rt = skb_rtable(skb);
optptr = (unsigned char *)&(ip_hdr(skb)[1]);
} else
optptr = opt->__data;
@@ -550,7 +550,7 @@ void ip_forward_options(struct sk_buff *skb)
{
struct ip_options * opt = &(IPCB(skb)->opt);
unsigned char * optptr;
- struct rtable *rt = skb->rtable;
+ struct rtable *rt = skb_rtable(skb);
unsigned char *raw = skb_network_header(skb);
if (opt->rr_needaddr) {
@@ -598,7 +598,7 @@ int ip_options_rcv_srr(struct sk_buff *skb)
__be32 nexthop;
struct iphdr *iph = ip_hdr(skb);
unsigned char *optptr = skb_network_header(skb) + opt->srr;
- struct rtable *rt = skb->rtable;
+ struct rtable *rt = skb_rtable(skb);
struct rtable *rt2;
int err;
@@ -623,13 +623,13 @@ int ip_options_rcv_srr(struct sk_buff *skb)
}
memcpy(&nexthop, &optptr[srrptr-1], 4);
- rt = skb->rtable;
- skb->rtable = NULL;
+ rt = skb_rtable(skb);
+ skb_dst_set(skb, NULL);
err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev);
- rt2 = skb->rtable;
+ rt2 = skb_rtable(skb);
if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) {
ip_rt_put(rt2);
- skb->rtable = rt;
+ skb_dst_set(skb, &rt->u.dst);
return -EINVAL;
}
ip_rt_put(rt);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 3e7e910c7c0..24702628266 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -95,7 +95,7 @@ int __ip_local_out(struct sk_buff *skb)
iph->tot_len = htons(skb->len);
ip_send_check(iph);
- return nf_hook(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
+ return nf_hook(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
dst_output);
}
@@ -118,7 +118,7 @@ static int ip_dev_loopback_xmit(struct sk_buff *newskb)
__skb_pull(newskb, skb_network_offset(newskb));
newskb->pkt_type = PACKET_LOOPBACK;
newskb->ip_summed = CHECKSUM_UNNECESSARY;
- WARN_ON(!newskb->dst);
+ WARN_ON(!skb_dst(newskb));
netif_rx(newskb);
return 0;
}
@@ -140,7 +140,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
__be32 saddr, __be32 daddr, struct ip_options *opt)
{
struct inet_sock *inet = inet_sk(sk);
- struct rtable *rt = skb->rtable;
+ struct rtable *rt = skb_rtable(skb);
struct iphdr *iph;
/* Build the IP header. */
@@ -176,15 +176,15 @@ EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
static inline int ip_finish_output2(struct sk_buff *skb)
{
- struct dst_entry *dst = skb->dst;
+ struct dst_entry *dst = skb_dst(skb);
struct rtable *rt = (struct rtable *)dst;
struct net_device *dev = dst->dev;
unsigned int hh_len = LL_RESERVED_SPACE(dev);
- if (rt->rt_type == RTN_MULTICAST)
- IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTMCASTPKTS);
- else if (rt->rt_type == RTN_BROADCAST)
- IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTBCASTPKTS);
+ if (rt->rt_type == RTN_MULTICAST) {
+ IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len);
+ } else if (rt->rt_type == RTN_BROADCAST)
+ IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTBCAST, skb->len);
/* Be paranoid, rather than too clever. */
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
@@ -217,14 +217,14 @@ static inline int ip_skb_dst_mtu(struct sk_buff *skb)
struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL;
return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ?
- skb->dst->dev->mtu : dst_mtu(skb->dst);
+ skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
}
static int ip_finish_output(struct sk_buff *skb)
{
#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
/* Policy lookup after SNAT yielded a new policy */
- if (skb->dst->xfrm != NULL) {
+ if (skb_dst(skb)->xfrm != NULL) {
IPCB(skb)->flags |= IPSKB_REROUTED;
return dst_output(skb);
}
@@ -238,13 +238,13 @@ static int ip_finish_output(struct sk_buff *skb)
int ip_mc_output(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
- struct rtable *rt = skb->rtable;
+ struct rtable *rt = skb_rtable(skb);
struct net_device *dev = rt->u.dst.dev;
/*
* If the indicated interface is up and running, send the packet.
*/
- IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTREQUESTS);
+ IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len);
skb->dev = dev;
skb->protocol = htons(ETH_P_IP);
@@ -296,9 +296,9 @@ int ip_mc_output(struct sk_buff *skb)
int ip_output(struct sk_buff *skb)
{
- struct net_device *dev = skb->dst->dev;
+ struct net_device *dev = skb_dst(skb)->dev;
- IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTREQUESTS);
+ IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len);
skb->dev = dev;
skb->protocol = htons(ETH_P_IP);
@@ -319,7 +319,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
/* Skip all of this if the packet is already routed,
* f.e. by something like SCTP.
*/
- rt = skb->rtable;
+ rt = skb_rtable(skb);
if (rt != NULL)
goto packet_routed;
@@ -355,7 +355,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
}
sk_setup_caps(sk, &rt->u.dst);
}
- skb->dst = dst_clone(&rt->u.dst);
+ skb_dst_set(skb, dst_clone(&rt->u.dst));
packet_routed:
if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
@@ -401,8 +401,8 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
to->pkt_type = from->pkt_type;
to->priority = from->priority;
to->protocol = from->protocol;
- dst_release(to->dst);
- to->dst = dst_clone(from->dst);
+ skb_dst_drop(to);
+ skb_dst_set(to, dst_clone(skb_dst(from)));
to->dev = from->dev;
to->mark = from->mark;
@@ -440,7 +440,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
unsigned int mtu, hlen, left, len, ll_rs, pad;
int offset;
__be16 not_last_frag;
- struct rtable *rt = skb->rtable;
+ struct rtable *rt = skb_rtable(skb);
int err = 0;
dev = rt->u.dst.dev;
@@ -474,7 +474,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
* LATER: this step can be merged to real generation of fragments,
* we can switch to copy when see the first bad fragment.
*/
- if (skb_shinfo(skb)->frag_list) {
+ if (skb_has_frags(skb)) {
struct sk_buff *frag;
int first_len = skb_pagelen(skb);
int truesizes = 0;
@@ -485,7 +485,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
skb_cloned(skb))
goto slow_path;
- for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
+ skb_walk_frags(skb, frag) {
/* Correct geometry. */
if (frag->len > mtu ||
((frag->len & 7) && frag->next) ||
@@ -498,7 +498,6 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
BUG_ON(frag->sk);
if (skb->sk) {
- sock_hold(skb->sk);
frag->sk = skb->sk;
frag->destructor = sock_wfree;
truesizes += frag->truesize;
@@ -510,7 +509,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
err = 0;
offset = 0;
frag = skb_shinfo(skb)->frag_list;
- skb_shinfo(skb)->frag_list = NULL;
+ skb_frag_list_init(skb);
skb->data_len = first_len - skb_headlen(skb);
skb->truesize -= truesizes;
skb->len = first_len;
@@ -1294,7 +1293,7 @@ int ip_push_pending_frames(struct sock *sk)
* on dst refcount
*/
inet->cork.dst = NULL;
- skb->dst = &rt->u.dst;
+ skb_dst_set(skb, &rt->u.dst);
if (iph->protocol == IPPROTO_ICMP)
icmp_out_count(net, ((struct icmphdr *)
@@ -1362,7 +1361,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
} replyopts;
struct ipcm_cookie ipc;
__be32 daddr;
- struct rtable *rt = skb->rtable;
+ struct rtable *rt = skb_rtable(skb);
if (ip_options_echo(&replyopts.opt, skb))
return;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 43c05854d75..fc7993e9061 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -57,7 +57,7 @@
static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
{
struct in_pktinfo info;
- struct rtable *rt = skb->rtable;
+ struct rtable *rt = skb_rtable(skb);
info.ipi_addr.s_addr = ip_hdr(skb)->daddr;
if (rt) {
@@ -157,38 +157,39 @@ void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
/* Ordered by supposed usage frequency */
if (flags & 1)
ip_cmsg_recv_pktinfo(msg, skb);
- if ((flags>>=1) == 0)
+ if ((flags >>= 1) == 0)
return;
if (flags & 1)
ip_cmsg_recv_ttl(msg, skb);
- if ((flags>>=1) == 0)
+ if ((flags >>= 1) == 0)
return;
if (flags & 1)
ip_cmsg_recv_tos(msg, skb);
- if ((flags>>=1) == 0)
+ if ((flags >>= 1) == 0)
return;
if (flags & 1)
ip_cmsg_recv_opts(msg, skb);
- if ((flags>>=1) == 0)
+ if ((flags >>= 1) == 0)
return;
if (flags & 1)
ip_cmsg_recv_retopts(msg, skb);
- if ((flags>>=1) == 0)
+ if ((flags >>= 1) == 0)
return;
if (flags & 1)
ip_cmsg_recv_security(msg, skb);
- if ((flags>>=1) == 0)
+ if ((flags >>= 1) == 0)
return;
if (flags & 1)
ip_cmsg_recv_dstaddr(msg, skb);
}
+EXPORT_SYMBOL(ip_cmsg_recv);
int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
{
@@ -203,7 +204,8 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
switch (cmsg->cmsg_type) {
case IP_RETOPTS:
err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
- err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg), err < 40 ? err : 40);
+ err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg),
+ err < 40 ? err : 40);
if (err)
return err;
break;
@@ -238,7 +240,8 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
struct ip_ra_chain *ip_ra_chain;
DEFINE_RWLOCK(ip_ra_lock);
-int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct sock *))
+int ip_ra_control(struct sock *sk, unsigned char on,
+ void (*destructor)(struct sock *))
{
struct ip_ra_chain *ra, *new_ra, **rap;
@@ -248,7 +251,7 @@ int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct s
new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
write_lock_bh(&ip_ra_lock);
- for (rap = &ip_ra_chain; (ra=*rap) != NULL; rap = &ra->next) {
+ for (rap = &ip_ra_chain; (ra = *rap) != NULL; rap = &ra->next) {
if (ra->sk == sk) {
if (on) {
write_unlock_bh(&ip_ra_lock);
@@ -416,7 +419,8 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len)
/* Reset and regenerate socket error */
spin_lock_bh(&sk->sk_error_queue.lock);
sk->sk_err = 0;
- if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
+ skb2 = skb_peek(&sk->sk_error_queue);
+ if (skb2 != NULL) {
sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
spin_unlock_bh(&sk->sk_error_queue.lock);
sk->sk_error_report(sk);
@@ -431,8 +435,8 @@ out:
/*
- * Socket option code for IP. This is the end of the line after any TCP,UDP etc options on
- * an IP socket.
+ * Socket option code for IP. This is the end of the line after any
+ * TCP,UDP etc options on an IP socket.
*/
static int do_ip_setsockopt(struct sock *sk, int level,
@@ -449,6 +453,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
(1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
(1<<IP_PASSSEC) | (1<<IP_TRANSPARENT))) ||
optname == IP_MULTICAST_TTL ||
+ optname == IP_MULTICAST_ALL ||
optname == IP_MULTICAST_LOOP ||
optname == IP_RECVORIGDSTADDR) {
if (optlen >= sizeof(int)) {
@@ -474,7 +479,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
switch (optname) {
case IP_OPTIONS:
{
- struct ip_options * opt = NULL;
+ struct ip_options *opt = NULL;
if (optlen > 40 || optlen < 0)
goto e_inval;
err = ip_options_get_from_user(sock_net(sk), &opt,
@@ -556,9 +561,9 @@ static int do_ip_setsockopt(struct sock *sk, int level,
}
break;
case IP_TTL:
- if (optlen<1)
+ if (optlen < 1)
goto e_inval;
- if (val != -1 && (val < 1 || val>255))
+ if (val != -1 && (val < 0 || val > 255))
goto e_inval;
inet->uc_ttl = val;
break;
@@ -570,7 +575,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
inet->hdrincl = val ? 1 : 0;
break;
case IP_MTU_DISCOVER:
- if (val<0 || val>3)
+ if (val < 0 || val > 3)
goto e_inval;
inet->pmtudisc = val;
break;
@@ -582,7 +587,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
case IP_MULTICAST_TTL:
if (sk->sk_type == SOCK_STREAM)
goto e_inval;
- if (optlen<1)
+ if (optlen < 1)
goto e_inval;
if (val == -1)
val = 1;
@@ -591,7 +596,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
inet->mc_ttl = val;
break;
case IP_MULTICAST_LOOP:
- if (optlen<1)
+ if (optlen < 1)
goto e_inval;
inet->mc_loop = !!val;
break;
@@ -613,7 +618,8 @@ static int do_ip_setsockopt(struct sock *sk, int level,
} else {
memset(&mreq, 0, sizeof(mreq));
if (optlen >= sizeof(struct in_addr) &&
- copy_from_user(&mreq.imr_address, optval, sizeof(struct in_addr)))
+ copy_from_user(&mreq.imr_address, optval,
+ sizeof(struct in_addr)))
break;
}
@@ -677,7 +683,6 @@ static int do_ip_setsockopt(struct sock *sk, int level,
}
case IP_MSFILTER:
{
- extern int sysctl_igmp_max_msf;
struct ip_msfilter *msf;
if (optlen < IP_MSFILTER_SIZE(0))
@@ -831,7 +836,6 @@ static int do_ip_setsockopt(struct sock *sk, int level,
}
case MCAST_MSFILTER:
{
- extern int sysctl_igmp_max_msf;
struct sockaddr_in *psin;
struct ip_msfilter *msf = NULL;
struct group_filter *gsf = NULL;
@@ -849,9 +853,9 @@ static int do_ip_setsockopt(struct sock *sk, int level,
break;
}
err = -EFAULT;
- if (copy_from_user(gsf, optval, optlen)) {
+ if (copy_from_user(gsf, optval, optlen))
goto mc_msf_out;
- }
+
/* numsrc >= (4G-140)/128 overflow in 32 bits */
if (gsf->gf_numsrc >= 0x1ffffff ||
gsf->gf_numsrc > sysctl_igmp_max_msf) {
@@ -879,7 +883,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
msf->imsf_fmode = gsf->gf_fmode;
msf->imsf_numsrc = gsf->gf_numsrc;
err = -EADDRNOTAVAIL;
- for (i=0; i<gsf->gf_numsrc; ++i) {
+ for (i = 0; i < gsf->gf_numsrc; ++i) {
psin = (struct sockaddr_in *)&gsf->gf_slist[i];
if (psin->sin_family != AF_INET)
@@ -890,17 +894,24 @@ static int do_ip_setsockopt(struct sock *sk, int level,
gsf = NULL;
err = ip_mc_msfilter(sk, msf, ifindex);
- mc_msf_out:
+mc_msf_out:
kfree(msf);
kfree(gsf);
break;
}
+ case IP_MULTICAST_ALL:
+ if (optlen < 1)
+ goto e_inval;
+ if (val != 0 && val != 1)
+ goto e_inval;
+ inet->mc_all = val;
+ break;
case IP_ROUTER_ALERT:
err = ip_ra_control(sk, val ? 1 : 0, NULL);
break;
case IP_FREEBIND:
- if (optlen<1)
+ if (optlen < 1)
goto e_inval;
inet->freebind = !!val;
break;
@@ -957,6 +968,7 @@ int ip_setsockopt(struct sock *sk, int level,
#endif
return err;
}
+EXPORT_SYMBOL(ip_setsockopt);
#ifdef CONFIG_COMPAT
int compat_ip_setsockopt(struct sock *sk, int level, int optname,
@@ -986,13 +998,12 @@ int compat_ip_setsockopt(struct sock *sk, int level, int optname,
#endif
return err;
}
-
EXPORT_SYMBOL(compat_ip_setsockopt);
#endif
/*
- * Get the options. Note for future reference. The GET of IP options gets the
- * _received_ ones. The set sets the _sent_ ones.
+ * Get the options. Note for future reference. The GET of IP options gets
+ * the _received_ ones. The set sets the _sent_ ones.
*/
static int do_ip_getsockopt(struct sock *sk, int level, int optname,
@@ -1143,10 +1154,14 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
return -EFAULT;
}
err = ip_mc_gsfget(sk, &gsf,
- (struct group_filter __user *)optval, optlen);
+ (struct group_filter __user *)optval,
+ optlen);
release_sock(sk);
return err;
}
+ case IP_MULTICAST_ALL:
+ val = inet->mc_all;
+ break;
case IP_PKTOPTIONS:
{
struct msghdr msg;
@@ -1187,7 +1202,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
}
release_sock(sk);
- if (len < sizeof(int) && len > 0 && val>=0 && val<=255) {
+ if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) {
unsigned char ucval = (unsigned char)val;
len = 1;
if (put_user(len, optlen))
@@ -1230,6 +1245,7 @@ int ip_getsockopt(struct sock *sk, int level,
#endif
return err;
}
+EXPORT_SYMBOL(ip_getsockopt);
#ifdef CONFIG_COMPAT
int compat_ip_getsockopt(struct sock *sk, int level, int optname,
@@ -1262,11 +1278,5 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname,
#endif
return err;
}
-
EXPORT_SYMBOL(compat_ip_getsockopt);
#endif
-
-EXPORT_SYMBOL(ip_cmsg_recv);
-
-EXPORT_SYMBOL(ip_getsockopt);
-EXPORT_SYMBOL(ip_setsockopt);
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 90d22ae0a41..f8d04c25645 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -139,6 +139,8 @@ __be32 ic_servaddr = NONE; /* Boot server IP address */
__be32 root_server_addr = NONE; /* Address of NFS server */
u8 root_server_path[256] = { 0, }; /* Path to mount as root */
+u32 ic_dev_xid; /* Device under configuration */
+
/* vendor class identifier */
static char vendor_class_identifier[253] __initdata;
@@ -158,6 +160,9 @@ static char user_dev_name[IFNAMSIZ] __initdata = { 0, };
/* Protocols supported by available interfaces */
static int ic_proto_have_if __initdata = 0;
+/* MTU for boot device */
+static int ic_dev_mtu __initdata = 0;
+
#ifdef IPCONFIG_DYNAMIC
static DEFINE_SPINLOCK(ic_recv_lock);
static volatile int ic_got_reply __initdata = 0; /* Proto(s) that replied */
@@ -284,7 +289,7 @@ set_sockaddr(struct sockaddr_in *sin, __be32 addr, __be16 port)
sin->sin_port = port;
}
-static int __init ic_dev_ioctl(unsigned int cmd, struct ifreq *arg)
+static int __init ic_devinet_ioctl(unsigned int cmd, struct ifreq *arg)
{
int res;
@@ -295,6 +300,17 @@ static int __init ic_dev_ioctl(unsigned int cmd, struct ifreq *arg)
return res;
}
+static int __init ic_dev_ioctl(unsigned int cmd, struct ifreq *arg)
+{
+ int res;
+
+ mm_segment_t oldfs = get_fs();
+ set_fs(get_ds());
+ res = dev_ioctl(&init_net, cmd, (struct ifreq __user *) arg);
+ set_fs(oldfs);
+ return res;
+}
+
static int __init ic_route_ioctl(unsigned int cmd, struct rtentry *arg)
{
int res;
@@ -319,20 +335,31 @@ static int __init ic_setup_if(void)
memset(&ir, 0, sizeof(ir));
strcpy(ir.ifr_ifrn.ifrn_name, ic_dev->name);
set_sockaddr(sin, ic_myaddr, 0);
- if ((err = ic_dev_ioctl(SIOCSIFADDR, &ir)) < 0) {
+ if ((err = ic_devinet_ioctl(SIOCSIFADDR, &ir)) < 0) {
printk(KERN_ERR "IP-Config: Unable to set interface address (%d).\n", err);
return -1;
}
set_sockaddr(sin, ic_netmask, 0);
- if ((err = ic_dev_ioctl(SIOCSIFNETMASK, &ir)) < 0) {
+ if ((err = ic_devinet_ioctl(SIOCSIFNETMASK, &ir)) < 0) {
printk(KERN_ERR "IP-Config: Unable to set interface netmask (%d).\n", err);
return -1;
}
set_sockaddr(sin, ic_myaddr | ~ic_netmask, 0);
- if ((err = ic_dev_ioctl(SIOCSIFBRDADDR, &ir)) < 0) {
+ if ((err = ic_devinet_ioctl(SIOCSIFBRDADDR, &ir)) < 0) {
printk(KERN_ERR "IP-Config: Unable to set interface broadcast address (%d).\n", err);
return -1;
}
+ /* Handle the case where we need non-standard MTU on the boot link (a network
+ * using jumbo frames, for instance). If we can't set the mtu, don't error
+ * out, we'll try to muddle along.
+ */
+ if (ic_dev_mtu != 0) {
+ strcpy(ir.ifr_name, ic_dev->name);
+ ir.ifr_mtu = ic_dev_mtu;
+ if ((err = ic_dev_ioctl(SIOCSIFMTU, &ir)) < 0)
+ printk(KERN_ERR "IP-Config: Unable to set interface mtu to %d (%d).\n",
+ ic_dev_mtu, err);
+ }
return 0;
}
@@ -621,6 +648,7 @@ ic_dhcp_init_options(u8 *options)
12, /* Host name */
15, /* Domain name */
17, /* Boot path */
+ 26, /* MTU */
40, /* NIS domain name */
};
@@ -796,6 +824,7 @@ static void __init ic_do_bootp_ext(u8 *ext)
{
u8 servers;
int i;
+ u16 mtu;
#ifdef IPCONFIG_DEBUG
u8 *c;
@@ -835,6 +864,10 @@ static void __init ic_do_bootp_ext(u8 *ext)
if (!root_server_path[0])
ic_bootp_string(root_server_path, ext+1, *ext, sizeof(root_server_path));
break;
+ case 26: /* Interface MTU */
+ memcpy(&mtu, ext+1, sizeof(mtu));
+ ic_dev_mtu = ntohs(mtu);
+ break;
case 40: /* NIS Domain name (_not_ DNS) */
ic_bootp_string(utsname()->domainname, ext+1, *ext, __NEW_UTS_LEN);
break;
@@ -932,6 +965,13 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
goto drop_unlock;
}
+ /* Is it a reply for the device we are configuring? */
+ if (b->xid != ic_dev_xid) {
+ if (net_ratelimit())
+ printk(KERN_ERR "DHCP/BOOTP: Ignoring delayed packet \n");
+ goto drop_unlock;
+ }
+
/* Parse extensions */
if (ext_len >= 4 &&
!memcmp(b->exten, ic_bootp_cookie, 4)) { /* Check magic cookie */
@@ -1115,6 +1155,9 @@ static int __init ic_dynamic(void)
get_random_bytes(&timeout, sizeof(timeout));
timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned) CONF_TIMEOUT_RANDOM);
for (;;) {
+ /* Track the device we are configuring */
+ ic_dev_xid = d->xid;
+
#ifdef IPCONFIG_BOOTP
if (do_bootp && (d->able & IC_BOOTP))
ic_bootp_send_if(d, jiffies - start_jiffies);
@@ -1391,6 +1434,8 @@ static int __init ip_auto_config(void)
printk(",\n bootserver=%pI4", &ic_servaddr);
printk(", rootserver=%pI4", &root_server_addr);
printk(", rootpath=%s", root_server_path);
+ if (ic_dev_mtu)
+ printk(", mtu=%d", ic_dev_mtu);
printk("\n");
#endif /* !SILENT */
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 9054139795a..93e2b787da2 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -370,8 +370,7 @@ static int ipip_rcv(struct sk_buff *skb)
tunnel->dev->stats.rx_packets++;
tunnel->dev->stats.rx_bytes += skb->len;
skb->dev = tunnel->dev;
- dst_release(skb->dst);
- skb->dst = NULL;
+ skb_dst_drop(skb);
nf_reset(skb);
ipip_ecn_decapsulate(iph, skb);
netif_rx(skb);
@@ -416,7 +415,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
if (!dst) {
/* NBMA tunnel */
- if ((rt = skb->rtable) == NULL) {
+ if ((rt = skb_rtable(skb)) == NULL) {
stats->tx_fifo_errors++;
goto tx_error;
}
@@ -447,15 +446,15 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
if (tiph->frag_off)
mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
else
- mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
+ mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
if (mtu < 68) {
stats->collisions++;
ip_rt_put(rt);
goto tx_error;
}
- if (skb->dst)
- skb->dst->ops->update_pmtu(skb->dst, mtu);
+ if (skb_dst(skb))
+ skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
df |= (old_iph->frag_off&htons(IP_DF));
@@ -502,8 +501,8 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
IPSKB_REROUTED);
- dst_release(skb->dst);
- skb->dst = &rt->u.dst;
+ skb_dst_drop(skb);
+ skb_dst_set(skb, &rt->u.dst);
/*
* Push down and install the IPIP header.
@@ -713,6 +712,7 @@ static void ipip_tunnel_setup(struct net_device *dev)
dev->iflink = 0;
dev->addr_len = 4;
dev->features |= NETIF_F_NETNS_LOCAL;
+ dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
}
static void ipip_tunnel_init(struct net_device *dev)
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 13e9dd3012b..9a8da5ed92b 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -226,9 +226,10 @@ static void reg_vif_setup(struct net_device *dev)
dev->flags = IFF_NOARP;
dev->netdev_ops = &reg_vif_netdev_ops,
dev->destructor = free_netdev;
+ dev->features |= NETIF_F_NETNS_LOCAL;
}
-static struct net_device *ipmr_reg_vif(void)
+static struct net_device *ipmr_reg_vif(struct net *net)
{
struct net_device *dev;
struct in_device *in_dev;
@@ -238,6 +239,8 @@ static struct net_device *ipmr_reg_vif(void)
if (dev == NULL)
return NULL;
+ dev_net_set(dev, net);
+
if (register_netdevice(dev)) {
free_netdev(dev);
return NULL;
@@ -448,7 +451,7 @@ static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
*/
if (net->ipv4.mroute_reg_vif_num >= 0)
return -EADDRINUSE;
- dev = ipmr_reg_vif();
+ dev = ipmr_reg_vif(net);
if (!dev)
return -ENOBUFS;
err = dev_set_allmulti(dev, 1);
@@ -651,7 +654,7 @@ static int ipmr_cache_report(struct net *net,
ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
msg = (struct igmpmsg *)skb_network_header(skb);
msg->im_vif = vifi;
- skb->dst = dst_clone(pkt->dst);
+ skb_dst_set(skb, dst_clone(skb_dst(pkt)));
/*
* Add our header
@@ -1031,16 +1034,6 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
if (v != net->ipv4.mroute_do_pim) {
net->ipv4.mroute_do_pim = v;
net->ipv4.mroute_do_assert = v;
-#ifdef CONFIG_IP_PIMSM_V2
- if (net->ipv4.mroute_do_pim)
- ret = inet_add_protocol(&pim_protocol,
- IPPROTO_PIM);
- else
- ret = inet_del_protocol(&pim_protocol,
- IPPROTO_PIM);
- if (ret < 0)
- ret = -EAGAIN;
-#endif
}
rtnl_unlock();
return ret;
@@ -1201,7 +1194,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
iph->protocol = IPPROTO_IPIP;
iph->ihl = 5;
iph->tot_len = htons(skb->len);
- ip_select_ident(iph, skb->dst, NULL);
+ ip_select_ident(iph, skb_dst(skb), NULL);
ip_send_check(iph);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
@@ -1212,7 +1205,7 @@ static inline int ipmr_forward_finish(struct sk_buff *skb)
{
struct ip_options * opt = &(IPCB(skb)->opt);
- IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
+ IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
if (unlikely(opt->optlen))
ip_forward_options(skb);
@@ -1290,8 +1283,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
vif->pkt_out++;
vif->bytes_out += skb->len;
- dst_release(skb->dst);
- skb->dst = &rt->u.dst;
+ skb_dst_drop(skb);
+ skb_dst_set(skb, &rt->u.dst);
ip_decrease_ttl(ip_hdr(skb));
/* FIXME: forward and output firewalls used to be called here.
@@ -1354,7 +1347,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
if (net->ipv4.vif_table[vif].dev != skb->dev) {
int true_vifi;
- if (skb->rtable->fl.iif == 0) {
+ if (skb_rtable(skb)->fl.iif == 0) {
/* It is our own packet, looped back.
Very complicated situation...
@@ -1430,7 +1423,7 @@ int ip_mr_input(struct sk_buff *skb)
{
struct mfc_cache *cache;
struct net *net = dev_net(skb->dev);
- int local = skb->rtable->rt_flags&RTCF_LOCAL;
+ int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
/* Packet is looped back after forward, it should not be
forwarded second time, but still can be delivered locally.
@@ -1543,8 +1536,7 @@ static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
skb->protocol = htons(ETH_P_IP);
skb->ip_summed = 0;
skb->pkt_type = PACKET_HOST;
- dst_release(skb->dst);
- skb->dst = NULL;
+ skb_dst_drop(skb);
reg_dev->stats.rx_bytes += skb->len;
reg_dev->stats.rx_packets++;
nf_reset(skb);
@@ -1646,7 +1638,7 @@ int ipmr_get_route(struct net *net,
{
int err;
struct mfc_cache *cache;
- struct rtable *rt = skb->rtable;
+ struct rtable *rt = skb_rtable(skb);
read_lock(&mrt_lock);
cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
@@ -1955,6 +1947,7 @@ static const struct file_operations ipmr_mfc_fops = {
#ifdef CONFIG_IP_PIMSM_V2
static struct net_protocol pim_protocol = {
.handler = pim_rcv,
+ .netns_ok = 1,
};
#endif
@@ -2041,8 +2034,19 @@ int __init ip_mr_init(void)
err = register_netdevice_notifier(&ip_mr_notifier);
if (err)
goto reg_notif_fail;
+#ifdef CONFIG_IP_PIMSM_V2
+ if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
+ printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
+ err = -EAGAIN;
+ goto add_proto_fail;
+ }
+#endif
return 0;
+#ifdef CONFIG_IP_PIMSM_V2
+add_proto_fail:
+ unregister_netdevice_notifier(&ip_mr_notifier);
+#endif
reg_notif_fail:
del_timer(&ipmr_expire_timer);
unregister_pernet_subsys(&ipmr_net_ops);
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index fdf6811c31a..1725dc0ef68 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -12,7 +12,7 @@
/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
{
- struct net *net = dev_net(skb->dst->dev);
+ struct net *net = dev_net(skb_dst(skb)->dev);
const struct iphdr *iph = ip_hdr(skb);
struct rtable *rt;
struct flowi fl = {};
@@ -41,8 +41,8 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
return -1;
/* Drop old route. */
- dst_release(skb->dst);
- skb->dst = &rt->u.dst;
+ skb_dst_drop(skb);
+ skb_dst_set(skb, &rt->u.dst);
} else {
/* non-local src, find valid iif to satisfy
* rp-filter when calling ip_route_input. */
@@ -50,7 +50,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
if (ip_route_output_key(net, &rt, &fl) != 0)
return -1;
- odst = skb->dst;
+ odst = skb_dst(skb);
if (ip_route_input(skb, iph->daddr, iph->saddr,
RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
dst_release(&rt->u.dst);
@@ -60,18 +60,22 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
dst_release(odst);
}
- if (skb->dst->error)
+ if (skb_dst(skb)->error)
return -1;
#ifdef CONFIG_XFRM
if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
- xfrm_decode_session(skb, &fl, AF_INET) == 0)
- if (xfrm_lookup(net, &skb->dst, &fl, skb->sk, 0))
+ xfrm_decode_session(skb, &fl, AF_INET) == 0) {
+ struct dst_entry *dst = skb_dst(skb);
+ skb_dst_set(skb, NULL);
+ if (xfrm_lookup(net, &dst, &fl, skb->sk, 0))
return -1;
+ skb_dst_set(skb, dst);
+ }
#endif
/* Change in oif may mean change in hh_len. */
- hh_len = skb->dst->dev->hard_header_len;
+ hh_len = skb_dst(skb)->dev->hard_header_len;
if (skb_headroom(skb) < hh_len &&
pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC))
return -1;
@@ -92,7 +96,7 @@ int ip_xfrm_me_harder(struct sk_buff *skb)
if (xfrm_decode_session(skb, &fl, AF_INET) < 0)
return -1;
- dst = skb->dst;
+ dst = skb_dst(skb);
if (dst->xfrm)
dst = ((struct xfrm_dst *)dst)->route;
dst_hold(dst);
@@ -100,11 +104,11 @@ int ip_xfrm_me_harder(struct sk_buff *skb)
if (xfrm_lookup(dev_net(dst->dev), &dst, &fl, skb->sk, 0) < 0)
return -1;
- dst_release(skb->dst);
- skb->dst = dst;
+ skb_dst_drop(skb);
+ skb_dst_set(skb, dst);
/* Change in oif may mean change in hh_len. */
- hh_len = skb->dst->dev->hard_header_len;
+ hh_len = skb_dst(skb)->dev->hard_header_len;
if (skb_headroom(skb) < hh_len &&
pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC))
return -1;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 831fe1879dc..7505dff4ffd 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -231,6 +231,12 @@ static inline struct arpt_entry *get_entry(void *base, unsigned int offset)
return (struct arpt_entry *)(base + offset);
}
+static inline __pure
+struct arpt_entry *arpt_next_entry(const struct arpt_entry *entry)
+{
+ return (void *)entry + entry->next_offset;
+}
+
unsigned int arpt_do_table(struct sk_buff *skb,
unsigned int hook,
const struct net_device *in,
@@ -267,67 +273,64 @@ unsigned int arpt_do_table(struct sk_buff *skb,
arp = arp_hdr(skb);
do {
- if (arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) {
- struct arpt_entry_target *t;
- int hdr_len;
-
- hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
- (2 * skb->dev->addr_len);
+ struct arpt_entry_target *t;
+ int hdr_len;
- ADD_COUNTER(e->counters, hdr_len, 1);
+ if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) {
+ e = arpt_next_entry(e);
+ continue;
+ }
- t = arpt_get_target(e);
+ hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
+ (2 * skb->dev->addr_len);
+ ADD_COUNTER(e->counters, hdr_len, 1);
- /* Standard target? */
- if (!t->u.kernel.target->target) {
- int v;
+ t = arpt_get_target(e);
- v = ((struct arpt_standard_target *)t)->verdict;
- if (v < 0) {
- /* Pop from stack? */
- if (v != ARPT_RETURN) {
- verdict = (unsigned)(-v) - 1;
- break;
- }
- e = back;
- back = get_entry(table_base,
- back->comefrom);
- continue;
- }
- if (table_base + v
- != (void *)e + e->next_offset) {
- /* Save old back ptr in next entry */
- struct arpt_entry *next
- = (void *)e + e->next_offset;
- next->comefrom =
- (void *)back - table_base;
-
- /* set back pointer to next entry */
- back = next;
- }
+ /* Standard target? */
+ if (!t->u.kernel.target->target) {
+ int v;
- e = get_entry(table_base, v);
- } else {
- /* Targets which reenter must return
- * abs. verdicts
- */
- tgpar.target = t->u.kernel.target;
- tgpar.targinfo = t->data;
- verdict = t->u.kernel.target->target(skb,
- &tgpar);
-
- /* Target might have changed stuff. */
- arp = arp_hdr(skb);
-
- if (verdict == ARPT_CONTINUE)
- e = (void *)e + e->next_offset;
- else
- /* Verdict */
+ v = ((struct arpt_standard_target *)t)->verdict;
+ if (v < 0) {
+ /* Pop from stack? */
+ if (v != ARPT_RETURN) {
+ verdict = (unsigned)(-v) - 1;
break;
+ }
+ e = back;
+ back = get_entry(table_base, back->comefrom);
+ continue;
}
- } else {
- e = (void *)e + e->next_offset;
+ if (table_base + v
+ != arpt_next_entry(e)) {
+ /* Save old back ptr in next entry */
+ struct arpt_entry *next = arpt_next_entry(e);
+ next->comefrom = (void *)back - table_base;
+
+ /* set back pointer to next entry */
+ back = next;
+ }
+
+ e = get_entry(table_base, v);
+ continue;
}
+
+ /* Targets which reenter must return
+ * abs. verdicts
+ */
+ tgpar.target = t->u.kernel.target;
+ tgpar.targinfo = t->data;
+ verdict = t->u.kernel.target->target(skb, &tgpar);
+
+ /* Target might have changed stuff. */
+ arp = arp_hdr(skb);
+
+ if (verdict == ARPT_CONTINUE)
+ e = arpt_next_entry(e);
+ else
+ /* Verdict */
+ break;
} while (!hotdrop);
xt_info_rdunlock_bh();
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 5f22c91c6e1..c156db21598 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -596,7 +596,7 @@ static int __init ip_queue_init(void)
#ifdef CONFIG_SYSCTL
ipq_sysctl_header = register_sysctl_paths(net_ipv4_ctl_path, ipq_table);
#endif
- status = nf_register_queue_handler(PF_INET, &nfqh);
+ status = nf_register_queue_handler(NFPROTO_IPV4, &nfqh);
if (status < 0) {
printk(KERN_ERR "ip_queue: failed to register queue handler\n");
goto cleanup_sysctl;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 2ec8d7290c4..fdefae6b5df 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -238,8 +238,8 @@ static struct nf_loginfo trace_loginfo = {
/* Mildly perf critical (only if packet tracing is on) */
static inline int
get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e,
- char *hookname, char **chainname,
- char **comment, unsigned int *rulenum)
+ const char *hookname, const char **chainname,
+ const char **comment, unsigned int *rulenum)
{
struct ipt_standard_target *t = (void *)ipt_get_target(s);
@@ -257,8 +257,8 @@ get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e,
&& unconditional(&s->ip)) {
/* Tail of chains: STANDARD target (return/policy) */
*comment = *chainname == hookname
- ? (char *)comments[NF_IP_TRACE_COMMENT_POLICY]
- : (char *)comments[NF_IP_TRACE_COMMENT_RETURN];
+ ? comments[NF_IP_TRACE_COMMENT_POLICY]
+ : comments[NF_IP_TRACE_COMMENT_RETURN];
}
return 1;
} else
@@ -277,14 +277,14 @@ static void trace_packet(struct sk_buff *skb,
{
void *table_base;
const struct ipt_entry *root;
- char *hookname, *chainname, *comment;
+ const char *hookname, *chainname, *comment;
unsigned int rulenum = 0;
- table_base = (void *)private->entries[smp_processor_id()];
+ table_base = private->entries[smp_processor_id()];
root = get_entry(table_base, private->hook_entry[hook]);
- hookname = chainname = (char *)hooknames[hook];
- comment = (char *)comments[NF_IP_TRACE_COMMENT_RULE];
+ hookname = chainname = hooknames[hook];
+ comment = comments[NF_IP_TRACE_COMMENT_RULE];
IPT_ENTRY_ITERATE(root,
private->size - private->hook_entry[hook],
@@ -297,6 +297,12 @@ static void trace_packet(struct sk_buff *skb,
}
#endif
+static inline __pure
+struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry)
+{
+ return (void *)entry + entry->next_offset;
+}
+
/* Returns one of the generic firewall policies, like NF_ACCEPT. */
unsigned int
ipt_do_table(struct sk_buff *skb,
@@ -305,6 +311,8 @@ ipt_do_table(struct sk_buff *skb,
const struct net_device *out,
struct xt_table *table)
{
+#define tb_comefrom ((struct ipt_entry *)table_base)->comefrom
+
static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
const struct iphdr *ip;
u_int16_t datalen;
@@ -335,7 +343,7 @@ ipt_do_table(struct sk_buff *skb,
mtpar.in = tgpar.in = in;
mtpar.out = tgpar.out = out;
mtpar.family = tgpar.family = NFPROTO_IPV4;
- tgpar.hooknum = hook;
+ mtpar.hooknum = tgpar.hooknum = hook;
IP_NF_ASSERT(table->valid_hooks & (1 << hook));
xt_info_rdlock_bh();
@@ -348,92 +356,84 @@ ipt_do_table(struct sk_buff *skb,
back = get_entry(table_base, private->underflow[hook]);
do {
+ struct ipt_entry_target *t;
+
IP_NF_ASSERT(e);
IP_NF_ASSERT(back);
- if (ip_packet_match(ip, indev, outdev,
- &e->ip, mtpar.fragoff)) {
- struct ipt_entry_target *t;
-
- if (IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0)
- goto no_match;
+ if (!ip_packet_match(ip, indev, outdev,
+ &e->ip, mtpar.fragoff) ||
+ IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0) {
+ e = ipt_next_entry(e);
+ continue;
+ }
- ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
+ ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
- t = ipt_get_target(e);
- IP_NF_ASSERT(t->u.kernel.target);
+ t = ipt_get_target(e);
+ IP_NF_ASSERT(t->u.kernel.target);
#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
- /* The packet is traced: log it */
- if (unlikely(skb->nf_trace))
- trace_packet(skb, hook, in, out,
- table->name, private, e);
+ /* The packet is traced: log it */
+ if (unlikely(skb->nf_trace))
+ trace_packet(skb, hook, in, out,
+ table->name, private, e);
#endif
- /* Standard target? */
- if (!t->u.kernel.target->target) {
- int v;
-
- v = ((struct ipt_standard_target *)t)->verdict;
- if (v < 0) {
- /* Pop from stack? */
- if (v != IPT_RETURN) {
- verdict = (unsigned)(-v) - 1;
- break;
- }
- e = back;
- back = get_entry(table_base,
- back->comefrom);
- continue;
- }
- if (table_base + v != (void *)e + e->next_offset
- && !(e->ip.flags & IPT_F_GOTO)) {
- /* Save old back ptr in next entry */
- struct ipt_entry *next
- = (void *)e + e->next_offset;
- next->comefrom
- = (void *)back - table_base;
- /* set back pointer to next entry */
- back = next;
+ /* Standard target? */
+ if (!t->u.kernel.target->target) {
+ int v;
+
+ v = ((struct ipt_standard_target *)t)->verdict;
+ if (v < 0) {
+ /* Pop from stack? */
+ if (v != IPT_RETURN) {
+ verdict = (unsigned)(-v) - 1;
+ break;
}
+ e = back;
+ back = get_entry(table_base, back->comefrom);
+ continue;
+ }
+ if (table_base + v != ipt_next_entry(e)
+ && !(e->ip.flags & IPT_F_GOTO)) {
+ /* Save old back ptr in next entry */
+ struct ipt_entry *next = ipt_next_entry(e);
+ next->comefrom = (void *)back - table_base;
+ /* set back pointer to next entry */
+ back = next;
+ }
+
+ e = get_entry(table_base, v);
+ continue;
+ }
+
+ /* Targets which reenter must return
+ abs. verdicts */
+ tgpar.target = t->u.kernel.target;
+ tgpar.targinfo = t->data;
+
- e = get_entry(table_base, v);
- } else {
- /* Targets which reenter must return
- abs. verdicts */
- tgpar.target = t->u.kernel.target;
- tgpar.targinfo = t->data;
#ifdef CONFIG_NETFILTER_DEBUG
- ((struct ipt_entry *)table_base)->comefrom
- = 0xeeeeeeec;
+ tb_comefrom = 0xeeeeeeec;
#endif
- verdict = t->u.kernel.target->target(skb,
- &tgpar);
+ verdict = t->u.kernel.target->target(skb, &tgpar);
#ifdef CONFIG_NETFILTER_DEBUG
- if (((struct ipt_entry *)table_base)->comefrom
- != 0xeeeeeeec
- && verdict == IPT_CONTINUE) {
- printk("Target %s reentered!\n",
- t->u.kernel.target->name);
- verdict = NF_DROP;
- }
- ((struct ipt_entry *)table_base)->comefrom
- = 0x57acc001;
+ if (tb_comefrom != 0xeeeeeeec && verdict == IPT_CONTINUE) {
+ printk("Target %s reentered!\n",
+ t->u.kernel.target->name);
+ verdict = NF_DROP;
+ }
+ tb_comefrom = 0x57acc001;
#endif
- /* Target might have changed stuff. */
- ip = ip_hdr(skb);
- datalen = skb->len - ip->ihl * 4;
-
- if (verdict == IPT_CONTINUE)
- e = (void *)e + e->next_offset;
- else
- /* Verdict */
- break;
- }
- } else {
+ /* Target might have changed stuff. */
+ ip = ip_hdr(skb);
+ datalen = skb->len - ip->ihl * 4;
- no_match:
- e = (void *)e + e->next_offset;
- }
+ if (verdict == IPT_CONTINUE)
+ e = ipt_next_entry(e);
+ else
+ /* Verdict */
+ break;
} while (!hotdrop);
xt_info_rdunlock_bh();
@@ -444,6 +444,8 @@ ipt_do_table(struct sk_buff *skb,
return NF_DROP;
else return verdict;
#endif
+
+#undef tb_comefrom
}
/* Figures out from what hook each rule can be called: returns 0 if
@@ -2158,7 +2160,7 @@ static bool icmp_checkentry(const struct xt_mtchk_param *par)
static struct xt_target ipt_standard_target __read_mostly = {
.name = IPT_STANDARD_TARGET,
.targetsize = sizeof(int),
- .family = AF_INET,
+ .family = NFPROTO_IPV4,
#ifdef CONFIG_COMPAT
.compatsize = sizeof(compat_int_t),
.compat_from_user = compat_standard_from_user,
@@ -2170,7 +2172,7 @@ static struct xt_target ipt_error_target __read_mostly = {
.name = IPT_ERROR_TARGET,
.target = ipt_error,
.targetsize = IPT_FUNCTION_MAXNAMELEN,
- .family = AF_INET,
+ .family = NFPROTO_IPV4,
};
static struct nf_sockopt_ops ipt_sockopts = {
@@ -2196,17 +2198,17 @@ static struct xt_match icmp_matchstruct __read_mostly = {
.matchsize = sizeof(struct ipt_icmp),
.checkentry = icmp_checkentry,
.proto = IPPROTO_ICMP,
- .family = AF_INET,
+ .family = NFPROTO_IPV4,
};
static int __net_init ip_tables_net_init(struct net *net)
{
- return xt_proto_init(net, AF_INET);
+ return xt_proto_init(net, NFPROTO_IPV4);
}
static void __net_exit ip_tables_net_exit(struct net *net)
{
- xt_proto_fini(net, AF_INET);
+ xt_proto_fini(net, NFPROTO_IPV4);
}
static struct pernet_operations ip_tables_net_ops = {
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index f389f60cb10..dada0863946 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -27,9 +27,6 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
MODULE_DESCRIPTION("Xtables: automatic-address SNAT");
-/* Lock protects masq region inside conntrack */
-static DEFINE_RWLOCK(masq_lock);
-
/* FIXME: Multiple targets. --RR */
static bool masquerade_tg_check(const struct xt_tgchk_param *par)
{
@@ -72,16 +69,14 @@ masquerade_tg(struct sk_buff *skb, const struct xt_target_param *par)
return NF_ACCEPT;
mr = par->targinfo;
- rt = skb->rtable;
+ rt = skb_rtable(skb);
newsrc = inet_select_addr(par->out, rt->rt_gateway, RT_SCOPE_UNIVERSE);
if (!newsrc) {
printk("MASQUERADE: %s ate my IP address\n", par->out->name);
return NF_DROP;
}
- write_lock_bh(&masq_lock);
nat->masq_index = par->out->ifindex;
- write_unlock_bh(&masq_lock);
/* Transfer from original range. */
newrange = ((struct nf_nat_range)
@@ -97,16 +92,11 @@ static int
device_cmp(struct nf_conn *i, void *ifindex)
{
const struct nf_conn_nat *nat = nfct_nat(i);
- int ret;
if (!nat)
return 0;
- read_lock_bh(&masq_lock);
- ret = (nat->masq_index == (int)(long)ifindex);
- read_unlock_bh(&masq_lock);
-
- return ret;
+ return nat->masq_index == (int)(long)ifindex;
}
static int masq_device_event(struct notifier_block *this,
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 0b4b6e0ff2b..c93ae44bff2 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -108,17 +108,16 @@ static void send_reset(struct sk_buff *oldskb, int hook)
addr_type = RTN_LOCAL;
/* ip_route_me_harder expects skb->dst to be set */
- dst_hold(oldskb->dst);
- nskb->dst = oldskb->dst;
+ skb_dst_set(nskb, dst_clone(skb_dst(oldskb)));
if (ip_route_me_harder(nskb, addr_type))
goto free_nskb;
- niph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT);
+ niph->ttl = dst_metric(skb_dst(nskb), RTAX_HOPLIMIT);
nskb->ip_summed = CHECKSUM_NONE;
/* "Never happens" */
- if (nskb->len > dst_mtu(nskb->dst))
+ if (nskb->len > dst_mtu(skb_dst(nskb)))
goto free_nskb;
nf_ct_attach(nskb, oldskb);
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 23b2c2ee869..d71ba767734 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -82,18 +82,10 @@ static int icmp_packet(struct nf_conn *ct,
u_int8_t pf,
unsigned int hooknum)
{
- /* Try to delete connection immediately after all replies:
- won't actually vanish as we still have skb, and del_timer
- means this will only run once even if count hits zero twice
- (theoretically possible with SMP) */
- if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
- if (atomic_dec_and_test(&ct->proto.icmp.count))
- nf_ct_kill_acct(ct, ctinfo, skb);
- } else {
- atomic_inc(&ct->proto.icmp.count);
- nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct);
- nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout);
- }
+ /* Do not immediately delete the connection after the first
+ successful reply to avoid excessive conntrackd traffic
+ and also to handle correctly ICMP echo reply duplicates. */
+ nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout);
return NF_ACCEPT;
}
@@ -117,7 +109,6 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
nf_ct_dump_tuple_ip(&ct->tuplehash[0].tuple);
return false;
}
- atomic_set(&ct->proto.icmp.count, 0);
return true;
}
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index cf7a42bf982..155c008626c 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -140,7 +140,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb,
const char *rep_buffer,
unsigned int rep_len)
{
- struct rtable *rt = skb->rtable;
+ struct rtable *rt = skb_rtable(skb);
struct iphdr *iph;
struct tcphdr *tcph;
int oldlen, datalen;
@@ -218,7 +218,7 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb,
const char *rep_buffer,
unsigned int rep_len)
{
- struct rtable *rt = skb->rtable;
+ struct rtable *rt = skb_rtable(skb);
struct iphdr *iph;
struct udphdr *udph;
int datalen, oldlen;
diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c
index 65e470bc612..3fc598eeeb1 100644
--- a/net/ipv4/netfilter/nf_nat_proto_sctp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_sctp.c
@@ -33,6 +33,7 @@ sctp_manip_pkt(struct sk_buff *skb,
enum nf_nat_manip_type maniptype)
{
const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
+ struct sk_buff *frag;
sctp_sctphdr_t *hdr;
unsigned int hdroff = iphdroff + iph->ihl*4;
__be32 oldip, newip;
@@ -57,8 +58,8 @@ sctp_manip_pkt(struct sk_buff *skb,
}
crc32 = sctp_start_cksum((u8 *)hdr, skb_headlen(skb) - hdroff);
- for (skb = skb_shinfo(skb)->frag_list; skb; skb = skb->next)
- crc32 = sctp_update_cksum((u8 *)skb->data, skb_headlen(skb),
+ skb_walk_frags(skb, frag)
+ crc32 = sctp_update_cksum((u8 *)frag->data, skb_headlen(frag),
crc32);
crc32 = sctp_end_cksum(crc32);
hdr->checksum = crc32;
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index b7dd695691a..5567bd0d075 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -167,10 +167,9 @@ nf_nat_in(unsigned int hooknum,
ret = nf_nat_fn(hooknum, skb, in, out, okfn);
if (ret != NF_DROP && ret != NF_STOLEN &&
- daddr != ip_hdr(skb)->daddr) {
- dst_release(skb->dst);
- skb->dst = NULL;
- }
+ daddr != ip_hdr(skb)->daddr)
+ skb_dst_drop(skb);
+
return ret;
}
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index cf0cdeeb1db..f25542c48b7 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -90,14 +90,14 @@ static const struct file_operations sockstat_seq_fops = {
/* snmp items */
static const struct snmp_mib snmp4_ipstats_list[] = {
- SNMP_MIB_ITEM("InReceives", IPSTATS_MIB_INRECEIVES),
+ SNMP_MIB_ITEM("InReceives", IPSTATS_MIB_INPKTS),
SNMP_MIB_ITEM("InHdrErrors", IPSTATS_MIB_INHDRERRORS),
SNMP_MIB_ITEM("InAddrErrors", IPSTATS_MIB_INADDRERRORS),
SNMP_MIB_ITEM("ForwDatagrams", IPSTATS_MIB_OUTFORWDATAGRAMS),
SNMP_MIB_ITEM("InUnknownProtos", IPSTATS_MIB_INUNKNOWNPROTOS),
SNMP_MIB_ITEM("InDiscards", IPSTATS_MIB_INDISCARDS),
SNMP_MIB_ITEM("InDelivers", IPSTATS_MIB_INDELIVERS),
- SNMP_MIB_ITEM("OutRequests", IPSTATS_MIB_OUTREQUESTS),
+ SNMP_MIB_ITEM("OutRequests", IPSTATS_MIB_OUTPKTS),
SNMP_MIB_ITEM("OutDiscards", IPSTATS_MIB_OUTDISCARDS),
SNMP_MIB_ITEM("OutNoRoutes", IPSTATS_MIB_OUTNOROUTES),
SNMP_MIB_ITEM("ReasmTimeout", IPSTATS_MIB_REASMTIMEOUT),
@@ -118,6 +118,12 @@ static const struct snmp_mib snmp4_ipextstats_list[] = {
SNMP_MIB_ITEM("OutMcastPkts", IPSTATS_MIB_OUTMCASTPKTS),
SNMP_MIB_ITEM("InBcastPkts", IPSTATS_MIB_INBCASTPKTS),
SNMP_MIB_ITEM("OutBcastPkts", IPSTATS_MIB_OUTBCASTPKTS),
+ SNMP_MIB_ITEM("InOctets", IPSTATS_MIB_INOCTETS),
+ SNMP_MIB_ITEM("OutOctets", IPSTATS_MIB_OUTOCTETS),
+ SNMP_MIB_ITEM("InMcastOctets", IPSTATS_MIB_INMCASTOCTETS),
+ SNMP_MIB_ITEM("OutMcastOctets", IPSTATS_MIB_OUTMCASTOCTETS),
+ SNMP_MIB_ITEM("InBcastOctets", IPSTATS_MIB_INBCASTOCTETS),
+ SNMP_MIB_ITEM("OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index f774651f0a4..3dc9171a272 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -343,7 +343,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
- skb->dst = dst_clone(&rt->u.dst);
+ skb_dst_set(skb, dst_clone(&rt->u.dst));
skb_reset_network_header(skb);
iph = ip_hdr(skb);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c4c60e9f068..cd76b3cb709 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -131,8 +131,8 @@ static int ip_rt_min_advmss __read_mostly = 256;
static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ;
static int rt_chain_length_max __read_mostly = 20;
-static void rt_worker_func(struct work_struct *work);
-static DECLARE_DELAYED_WORK(expires_work, rt_worker_func);
+static struct delayed_work expires_work;
+static unsigned long expires_ljiffies;
/*
* Interface to generic destination cache.
@@ -784,20 +784,23 @@ static void rt_check_expire(void)
{
static unsigned int rover;
unsigned int i = rover, goal;
- struct rtable *rth, **rthp;
- unsigned long length = 0, samples = 0;
+ struct rtable *rth, *aux, **rthp;
+ unsigned long samples = 0;
unsigned long sum = 0, sum2 = 0;
+ unsigned long delta;
u64 mult;
- mult = ((u64)ip_rt_gc_interval) << rt_hash_log;
+ delta = jiffies - expires_ljiffies;
+ expires_ljiffies = jiffies;
+ mult = ((u64)delta) << rt_hash_log;
if (ip_rt_gc_timeout > 1)
do_div(mult, ip_rt_gc_timeout);
goal = (unsigned int)mult;
if (goal > rt_hash_mask)
goal = rt_hash_mask + 1;
- length = 0;
for (; goal > 0; goal--) {
unsigned long tmo = ip_rt_gc_timeout;
+ unsigned long length;
i = (i + 1) & rt_hash_mask;
rthp = &rt_hash_table[i].chain;
@@ -809,8 +812,10 @@ static void rt_check_expire(void)
if (*rthp == NULL)
continue;
+ length = 0;
spin_lock_bh(rt_hash_lock_addr(i));
while ((rth = *rthp) != NULL) {
+ prefetch(rth->u.dst.rt_next);
if (rt_is_expired(rth)) {
*rthp = rth->u.dst.rt_next;
rt_free(rth);
@@ -819,33 +824,30 @@ static void rt_check_expire(void)
if (rth->u.dst.expires) {
/* Entry is expired even if it is in use */
if (time_before_eq(jiffies, rth->u.dst.expires)) {
+nofree:
tmo >>= 1;
rthp = &rth->u.dst.rt_next;
/*
- * Only bump our length if the hash
- * inputs on entries n and n+1 are not
- * the same, we only count entries on
+ * We only count entries on
* a chain with equal hash inputs once
* so that entries for different QOS
* levels, and other non-hash input
* attributes don't unfairly skew
* the length computation
*/
- if ((*rthp == NULL) ||
- !compare_hash_inputs(&(*rthp)->fl,
- &rth->fl))
- length += ONE;
+ for (aux = rt_hash_table[i].chain;;) {
+ if (aux == rth) {
+ length += ONE;
+ break;
+ }
+ if (compare_hash_inputs(&aux->fl, &rth->fl))
+ break;
+ aux = aux->u.dst.rt_next;
+ }
continue;
}
- } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) {
- tmo >>= 1;
- rthp = &rth->u.dst.rt_next;
- if ((*rthp == NULL) ||
- !compare_hash_inputs(&(*rthp)->fl,
- &rth->fl))
- length += ONE;
- continue;
- }
+ } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout))
+ goto nofree;
/* Cleanup aged off entries. */
*rthp = rth->u.dst.rt_next;
@@ -1065,10 +1067,10 @@ work_done:
out: return 0;
}
-static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp)
+static int rt_intern_hash(unsigned hash, struct rtable *rt,
+ struct rtable **rp, struct sk_buff *skb)
{
struct rtable *rth, **rthp;
- struct rtable *rthi;
unsigned long now;
struct rtable *cand, **candp;
u32 min_score;
@@ -1088,7 +1090,6 @@ restart:
}
rthp = &rt_hash_table[hash].chain;
- rthi = NULL;
spin_lock_bh(rt_hash_lock_addr(hash));
while ((rth = *rthp) != NULL) {
@@ -1117,7 +1118,10 @@ restart:
spin_unlock_bh(rt_hash_lock_addr(hash));
rt_drop(rt);
- *rp = rth;
+ if (rp)
+ *rp = rth;
+ else
+ skb_dst_set(skb, &rth->u.dst);
return 0;
}
@@ -1134,17 +1138,6 @@ restart:
chain_length++;
rthp = &rth->u.dst.rt_next;
-
- /*
- * check to see if the next entry in the chain
- * contains the same hash input values as rt. If it does
- * This is where we will insert into the list, instead of
- * at the head. This groups entries that differ by aspects not
- * relvant to the hash function together, which we use to adjust
- * our chain length
- */
- if (*rthp && compare_hash_inputs(&(*rthp)->fl, &rt->fl))
- rthi = rth;
}
if (cand) {
@@ -1205,10 +1198,7 @@ restart:
}
}
- if (rthi)
- rt->u.dst.rt_next = rthi->u.dst.rt_next;
- else
- rt->u.dst.rt_next = rt_hash_table[hash].chain;
+ rt->u.dst.rt_next = rt_hash_table[hash].chain;
#if RT_CACHE_DEBUG >= 2
if (rt->u.dst.rt_next) {
@@ -1224,13 +1214,13 @@ restart:
* previous writes to rt are comitted to memory
* before making rt visible to other CPUS.
*/
- if (rthi)
- rcu_assign_pointer(rthi->u.dst.rt_next, rt);
- else
- rcu_assign_pointer(rt_hash_table[hash].chain, rt);
+ rcu_assign_pointer(rt_hash_table[hash].chain, rt);
spin_unlock_bh(rt_hash_lock_addr(hash));
- *rp = rt;
+ if (rp)
+ *rp = rt;
+ else
+ skb_dst_set(skb, &rt->u.dst);
return 0;
}
@@ -1427,7 +1417,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
&netevent);
rt_del(hash, rth);
- if (!rt_intern_hash(hash, rt, &rt))
+ if (!rt_intern_hash(hash, rt, &rt, NULL))
ip_rt_put(rt);
goto do_next;
}
@@ -1493,7 +1483,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
void ip_rt_send_redirect(struct sk_buff *skb)
{
- struct rtable *rt = skb->rtable;
+ struct rtable *rt = skb_rtable(skb);
struct in_device *in_dev = in_dev_get(rt->u.dst.dev);
if (!in_dev)
@@ -1541,7 +1531,7 @@ out:
static int ip_error(struct sk_buff *skb)
{
- struct rtable *rt = skb->rtable;
+ struct rtable *rt = skb_rtable(skb);
unsigned long now;
int code;
@@ -1718,7 +1708,7 @@ static void ipv4_link_failure(struct sk_buff *skb)
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
- rt = skb->rtable;
+ rt = skb_rtable(skb);
if (rt)
dst_set_expires(&rt->u.dst, 0);
}
@@ -1878,7 +1868,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
in_dev_put(in_dev);
hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev)));
- return rt_intern_hash(hash, rth, &skb->rtable);
+ return rt_intern_hash(hash, rth, NULL, skb);
e_nobufs:
in_dev_put(in_dev);
@@ -2039,7 +2029,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
/* put it into the cache */
hash = rt_hash(daddr, saddr, fl->iif,
rt_genid(dev_net(rth->u.dst.dev)));
- return rt_intern_hash(hash, rth, &skb->rtable);
+ return rt_intern_hash(hash, rth, NULL, skb);
}
/*
@@ -2195,7 +2185,7 @@ local_input:
}
rth->rt_type = res.type;
hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net));
- err = rt_intern_hash(hash, rth, &skb->rtable);
+ err = rt_intern_hash(hash, rth, NULL, skb);
goto done;
no_route:
@@ -2264,7 +2254,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
dst_use(&rth->u.dst, jiffies);
RT_CACHE_STAT_INC(in_hit);
rcu_read_unlock();
- skb->rtable = rth;
+ skb_dst_set(skb, &rth->u.dst);
return 0;
}
RT_CACHE_STAT_INC(in_hlist_search);
@@ -2440,7 +2430,7 @@ static int ip_mkroute_output(struct rtable **rp,
if (err == 0) {
hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif,
rt_genid(dev_net(dev_out)));
- err = rt_intern_hash(hash, rth, rp);
+ err = rt_intern_hash(hash, rth, rp, NULL);
}
return err;
@@ -2783,7 +2773,7 @@ static int rt_fill_info(struct net *net,
struct sk_buff *skb, u32 pid, u32 seq, int event,
int nowait, unsigned int flags)
{
- struct rtable *rt = skb->rtable;
+ struct rtable *rt = skb_rtable(skb);
struct rtmsg *r;
struct nlmsghdr *nlh;
long expires;
@@ -2927,7 +2917,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
local_bh_enable();
- rt = skb->rtable;
+ rt = skb_rtable(skb);
if (err == 0 && rt->u.dst.error)
err = -rt->u.dst.error;
} else {
@@ -2947,7 +2937,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
if (err)
goto errout_free;
- skb->rtable = rt;
+ skb_dst_set(skb, &rt->u.dst);
if (rtm->rtm_flags & RTM_F_NOTIFY)
rt->rt_flags |= RTCF_NOTIFY;
@@ -2988,15 +2978,15 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
continue;
if (rt_is_expired(rt))
continue;
- skb->dst = dst_clone(&rt->u.dst);
+ skb_dst_set(skb, dst_clone(&rt->u.dst));
if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, RTM_NEWROUTE,
1, NLM_F_MULTI) <= 0) {
- dst_release(xchg(&skb->dst, NULL));
+ skb_dst_drop(skb);
rcu_read_unlock_bh();
goto done;
}
- dst_release(xchg(&skb->dst, NULL));
+ skb_dst_drop(skb);
}
rcu_read_unlock_bh();
}
@@ -3410,6 +3400,8 @@ int __init ip_rt_init(void)
/* All the timers, started at system startup tend
to synchronize. Perturb it a bit.
*/
+ INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func);
+ expires_ljiffies = jiffies;
schedule_delayed_work(&expires_work,
net_random() % ip_rt_gc_interval + ip_rt_gc_interval);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index b35a950d2e0..cd2b97f1b6e 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -161,13 +161,12 @@ static __u16 const msstab[] = {
*/
__u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
{
- struct tcp_sock *tp = tcp_sk(sk);
const struct iphdr *iph = ip_hdr(skb);
const struct tcphdr *th = tcp_hdr(skb);
int mssind;
const __u16 mss = *mssp;
- tp->last_synq_overflow = jiffies;
+ tcp_synq_overflow(sk);
/* XXX sort msstab[] by probability? Binary search? */
for (mssind = 0; mss > msstab[mssind + 1]; mssind++)
@@ -268,7 +267,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
if (!sysctl_tcp_syncookies || !th->ack)
goto out;
- if (time_after(jiffies, tp->last_synq_overflow + TCP_TIMEOUT_INIT) ||
+ if (tcp_synq_no_recent_overflow(sk) ||
(mss = cookie_check(skb, cookie)) == 0) {
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
goto out;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1d7f49c6f0c..17b89c523f9 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -439,12 +439,14 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
!tp->urg_data ||
before(tp->urg_seq, tp->copied_seq) ||
!before(tp->urg_seq, tp->rcv_nxt)) {
+ struct sk_buff *skb;
+
answ = tp->rcv_nxt - tp->copied_seq;
/* Subtract 1, if FIN is in queue. */
- if (answ && !skb_queue_empty(&sk->sk_receive_queue))
- answ -=
- tcp_hdr((struct sk_buff *)sk->sk_receive_queue.prev)->fin;
+ skb = skb_peek_tail(&sk->sk_receive_queue);
+ if (answ && skb)
+ answ -= tcp_hdr(skb)->fin;
} else
answ = tp->urg_seq - tp->copied_seq;
release_sock(sk);
@@ -1321,6 +1323,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
struct task_struct *user_recv = NULL;
int copied_early = 0;
struct sk_buff *skb;
+ u32 urg_hole = 0;
lock_sock(sk);
@@ -1381,11 +1384,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
/* Next get a buffer. */
- skb = skb_peek(&sk->sk_receive_queue);
- do {
- if (!skb)
- break;
-
+ skb_queue_walk(&sk->sk_receive_queue, skb) {
/* Now that we have two receive queues this
* shouldn't happen.
*/
@@ -1402,8 +1401,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
if (tcp_hdr(skb)->fin)
goto found_fin_ok;
WARN_ON(!(flags & MSG_PEEK));
- skb = skb->next;
- } while (skb != (struct sk_buff *)&sk->sk_receive_queue);
+ }
/* Well, if we have backlog, try to process it now yet. */
@@ -1532,7 +1530,8 @@ do_prequeue:
}
}
}
- if ((flags & MSG_PEEK) && peek_seq != tp->copied_seq) {
+ if ((flags & MSG_PEEK) &&
+ (peek_seq - copied - urg_hole != tp->copied_seq)) {
if (net_ratelimit())
printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n",
current->comm, task_pid_nr(current));
@@ -1553,6 +1552,7 @@ do_prequeue:
if (!urg_offset) {
if (!sock_flag(sk, SOCK_URGINLINE)) {
++*seq;
+ urg_hole++;
offset++;
used--;
if (!used)
@@ -2515,20 +2515,30 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
unsigned int thlen;
unsigned int flags;
unsigned int mss = 1;
+ unsigned int hlen;
+ unsigned int off;
int flush = 1;
int i;
- th = skb_gro_header(skb, sizeof(*th));
- if (unlikely(!th))
- goto out;
+ off = skb_gro_offset(skb);
+ hlen = off + sizeof(*th);
+ th = skb_gro_header_fast(skb, off);
+ if (skb_gro_header_hard(skb, hlen)) {
+ th = skb_gro_header_slow(skb, hlen, off);
+ if (unlikely(!th))
+ goto out;
+ }
thlen = th->doff * 4;
if (thlen < sizeof(*th))
goto out;
- th = skb_gro_header(skb, thlen);
- if (unlikely(!th))
- goto out;
+ hlen = off + thlen;
+ if (skb_gro_header_hard(skb, hlen)) {
+ th = skb_gro_header_slow(skb, hlen, off);
+ if (unlikely(!th))
+ goto out;
+ }
skb_gro_pull(skb, thlen);
@@ -2541,7 +2551,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
th2 = tcp_hdr(p);
- if ((th->source ^ th2->source) | (th->dest ^ th2->dest)) {
+ if (*(u32 *)&th->source ^ *(u32 *)&th2->source) {
NAPI_GRO_CB(p)->same_flow = 0;
continue;
}
@@ -2556,14 +2566,14 @@ found:
flush |= flags & TCP_FLAG_CWR;
flush |= (flags ^ tcp_flag_word(th2)) &
~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH);
- flush |= (th->ack_seq ^ th2->ack_seq) | (th->window ^ th2->window);
- for (i = sizeof(*th); !flush && i < thlen; i += 4)
+ flush |= th->ack_seq ^ th2->ack_seq;
+ for (i = sizeof(*th); i < thlen; i += 4)
flush |= *(u32 *)((u8 *)th + i) ^
*(u32 *)((u8 *)th2 + i);
mss = skb_shinfo(p)->gso_size;
- flush |= (len > mss) | !len;
+ flush |= (len - 1) >= mss;
flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
if (flush || skb_gro_receive(head, skb)) {
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index eec3e6f9956..2bdb0da237e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -77,7 +77,7 @@ int sysctl_tcp_window_scaling __read_mostly = 1;
int sysctl_tcp_sack __read_mostly = 1;
int sysctl_tcp_fack __read_mostly = 1;
int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
-int sysctl_tcp_ecn __read_mostly;
+int sysctl_tcp_ecn __read_mostly = 2;
int sysctl_tcp_dsack __read_mostly = 1;
int sysctl_tcp_app_win __read_mostly = 31;
int sysctl_tcp_adv_win_scale __read_mostly = 2;
@@ -4426,7 +4426,7 @@ drop:
}
__skb_queue_head(&tp->out_of_order_queue, skb);
} else {
- struct sk_buff *skb1 = tp->out_of_order_queue.prev;
+ struct sk_buff *skb1 = skb_peek_tail(&tp->out_of_order_queue);
u32 seq = TCP_SKB_CB(skb)->seq;
u32 end_seq = TCP_SKB_CB(skb)->end_seq;
@@ -4443,15 +4443,18 @@ drop:
}
/* Find place to insert this segment. */
- do {
+ while (1) {
if (!after(TCP_SKB_CB(skb1)->seq, seq))
break;
- } while ((skb1 = skb1->prev) !=
- (struct sk_buff *)&tp->out_of_order_queue);
+ if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) {
+ skb1 = NULL;
+ break;
+ }
+ skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1);
+ }
/* Do skb overlap to previous one? */
- if (skb1 != (struct sk_buff *)&tp->out_of_order_queue &&
- before(seq, TCP_SKB_CB(skb1)->end_seq)) {
+ if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
/* All the bits are present. Drop. */
__kfree_skb(skb);
@@ -4463,15 +4466,26 @@ drop:
tcp_dsack_set(sk, seq,
TCP_SKB_CB(skb1)->end_seq);
} else {
- skb1 = skb1->prev;
+ if (skb_queue_is_first(&tp->out_of_order_queue,
+ skb1))
+ skb1 = NULL;
+ else
+ skb1 = skb_queue_prev(
+ &tp->out_of_order_queue,
+ skb1);
}
}
- __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
+ if (!skb1)
+ __skb_queue_head(&tp->out_of_order_queue, skb);
+ else
+ __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
/* And clean segments covered by new one as whole. */
- while ((skb1 = skb->next) !=
- (struct sk_buff *)&tp->out_of_order_queue &&
- after(end_seq, TCP_SKB_CB(skb1)->seq)) {
+ while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) {
+ skb1 = skb_queue_next(&tp->out_of_order_queue, skb);
+
+ if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
+ break;
if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
end_seq);
@@ -4492,7 +4506,10 @@ add_sack:
static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
struct sk_buff_head *list)
{
- struct sk_buff *next = skb->next;
+ struct sk_buff *next = NULL;
+
+ if (!skb_queue_is_last(list, skb))
+ next = skb_queue_next(list, skb);
__skb_unlink(skb, list);
__kfree_skb(skb);
@@ -4503,6 +4520,9 @@ static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
/* Collapse contiguous sequence of skbs head..tail with
* sequence numbers start..end.
+ *
+ * If tail is NULL, this means until the end of the list.
+ *
* Segments with FIN/SYN are not collapsed (only because this
* simplifies code)
*/
@@ -4511,15 +4531,23 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
struct sk_buff *head, struct sk_buff *tail,
u32 start, u32 end)
{
- struct sk_buff *skb;
+ struct sk_buff *skb, *n;
+ bool end_of_skbs;
/* First, check that queue is collapsible and find
* the point where collapsing can be useful. */
- for (skb = head; skb != tail;) {
+ skb = head;
+restart:
+ end_of_skbs = true;
+ skb_queue_walk_from_safe(list, skb, n) {
+ if (skb == tail)
+ break;
/* No new bits? It is possible on ofo queue. */
if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
skb = tcp_collapse_one(sk, skb, list);
- continue;
+ if (!skb)
+ break;
+ goto restart;
}
/* The first skb to collapse is:
@@ -4529,16 +4557,24 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
*/
if (!tcp_hdr(skb)->syn && !tcp_hdr(skb)->fin &&
(tcp_win_from_space(skb->truesize) > skb->len ||
- before(TCP_SKB_CB(skb)->seq, start) ||
- (skb->next != tail &&
- TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb->next)->seq)))
+ before(TCP_SKB_CB(skb)->seq, start))) {
+ end_of_skbs = false;
break;
+ }
+
+ if (!skb_queue_is_last(list, skb)) {
+ struct sk_buff *next = skb_queue_next(list, skb);
+ if (next != tail &&
+ TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(next)->seq) {
+ end_of_skbs = false;
+ break;
+ }
+ }
/* Decided to skip this, advance start seq. */
start = TCP_SKB_CB(skb)->end_seq;
- skb = skb->next;
}
- if (skb == tail || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin)
+ if (end_of_skbs || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin)
return;
while (before(start, end)) {
@@ -4583,7 +4619,8 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
}
if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
skb = tcp_collapse_one(sk, skb, list);
- if (skb == tail ||
+ if (!skb ||
+ skb == tail ||
tcp_hdr(skb)->syn ||
tcp_hdr(skb)->fin)
return;
@@ -4610,17 +4647,21 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
head = skb;
for (;;) {
- skb = skb->next;
+ struct sk_buff *next = NULL;
+
+ if (!skb_queue_is_last(&tp->out_of_order_queue, skb))
+ next = skb_queue_next(&tp->out_of_order_queue, skb);
+ skb = next;
/* Segment is terminated when we see gap or when
* we are at the end of all the queue. */
- if (skb == (struct sk_buff *)&tp->out_of_order_queue ||
+ if (!skb ||
after(TCP_SKB_CB(skb)->seq, end) ||
before(TCP_SKB_CB(skb)->end_seq, start)) {
tcp_collapse(sk, &tp->out_of_order_queue,
head, skb, start, end);
head = skb;
- if (skb == (struct sk_buff *)&tp->out_of_order_queue)
+ if (!skb)
break;
/* Start new segment */
start = TCP_SKB_CB(skb)->seq;
@@ -4681,10 +4722,11 @@ static int tcp_prune_queue(struct sock *sk)
tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
tcp_collapse_ofo_queue(sk);
- tcp_collapse(sk, &sk->sk_receive_queue,
- sk->sk_receive_queue.next,
- (struct sk_buff *)&sk->sk_receive_queue,
- tp->copied_seq, tp->rcv_nxt);
+ if (!skb_queue_empty(&sk->sk_receive_queue))
+ tcp_collapse(sk, &sk->sk_receive_queue,
+ skb_peek(&sk->sk_receive_queue),
+ NULL,
+ tp->copied_seq, tp->rcv_nxt);
sk_mem_reclaim(sk);
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5d427f86b41..5a1ca2698c8 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -546,7 +546,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
if (th->rst)
return;
- if (skb->rtable->rt_type != RTN_LOCAL)
+ if (skb_rtable(skb)->rt_type != RTN_LOCAL)
return;
/* Swap the send and the receive. */
@@ -590,7 +590,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
arg.csumoffset = offsetof(struct tcphdr, check) / 2;
arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
- net = dev_net(skb->dst->dev);
+ net = dev_net(skb_dst(skb)->dev);
ip_send_reply(net->ipv4.tcp_sock, skb,
&arg, arg.iov[0].iov_len);
@@ -617,7 +617,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
];
} rep;
struct ip_reply_arg arg;
- struct net *net = dev_net(skb->dst->dev);
+ struct net *net = dev_net(skb_dst(skb)->dev);
memset(&rep.th, 0, sizeof(struct tcphdr));
memset(&arg, 0, sizeof(arg));
@@ -1185,7 +1185,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
#endif
/* Never answer to SYNs send to broadcast or multicast */
- if (skb->rtable->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
+ if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
goto drop;
/* TW buckets are converted to open requests without
@@ -1593,7 +1593,7 @@ process:
#endif
{
if (!tcp_prequeue(sk, skb))
- ret = tcp_v4_do_rcv(sk, skb);
+ ret = tcp_v4_do_rcv(sk, skb);
}
} else
sk_add_backlog(sk, skb);
@@ -2343,7 +2343,7 @@ void tcp4_proc_exit(void)
struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
{
- struct iphdr *iph = ip_hdr(skb);
+ struct iphdr *iph = skb_gro_network_header(skb);
switch (skb->ip_summed) {
case CHECKSUM_COMPLETE:
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 59aec609cec..416fc4c2e7e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -288,7 +288,7 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
struct tcp_sock *tp = tcp_sk(sk);
tp->ecn_flags = 0;
- if (sysctl_tcp_ecn) {
+ if (sysctl_tcp_ecn == 1) {
TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE | TCPCB_FLAG_CWR;
tp->ecn_flags = TCP_ECN_OK;
}
@@ -2202,7 +2202,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
/* Reserve space for headers. */
skb_reserve(skb, MAX_TCP_HEADER);
- skb->dst = dst_clone(dst);
+ skb_dst_set(skb, dst_clone(dst));
mss = dst_metric(dst, RTAX_ADVMSS);
if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index a453aac91bd..c6743eec9b7 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -158,6 +158,11 @@ void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event)
}
EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event);
+static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp)
+{
+ return min(tp->snd_ssthresh, tp->snd_cwnd-1);
+}
+
static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -221,11 +226,10 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
*/
diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT;
- if (diff > gamma && tp->snd_ssthresh > 2 ) {
+ if (diff > gamma && tp->snd_cwnd <= tp->snd_ssthresh) {
/* Going too fast. Time to slow down
* and switch to congestion avoidance.
*/
- tp->snd_ssthresh = 2;
/* Set cwnd to match the actual rate
* exactly:
@@ -235,6 +239,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
* utilization.
*/
tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1);
+ tp->snd_ssthresh = tcp_vegas_ssthresh(tp);
} else if (tp->snd_cwnd <= tp->snd_ssthresh) {
/* Slow start. */
@@ -250,6 +255,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
* we slow down.
*/
tp->snd_cwnd--;
+ tp->snd_ssthresh
+ = tcp_vegas_ssthresh(tp);
} else if (diff < alpha) {
/* We don't have enough extra packets
* in the network, so speed up.
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 7a1d1ce22e6..8f4158d7c9a 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -328,7 +328,7 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
if (unlikely(sk = skb_steal_sock(skb)))
return sk;
else
- return __udp4_lib_lookup(dev_net(skb->dst->dev), iph->saddr, sport,
+ return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport,
iph->daddr, dport, inet_iif(skb),
udptable);
}
@@ -1237,7 +1237,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
struct sock *sk;
struct udphdr *uh;
unsigned short ulen;
- struct rtable *rt = (struct rtable*)skb->dst;
+ struct rtable *rt = skb_rtable(skb);
__be32 saddr, daddr;
struct net *net = dev_net(skb->dev);
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 4ec2162a437..f9f922a0ba8 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -23,7 +23,7 @@ int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb)
static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb)
{
- if (skb->dst == NULL) {
+ if (skb_dst(skb) == NULL) {
const struct iphdr *iph = ip_hdr(skb);
if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos,
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 7135279f3f8..3444f3b34ec 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -28,7 +28,7 @@ static inline void ipip_ecn_decapsulate(struct sk_buff *skb)
*/
static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
{
- struct dst_entry *dst = skb->dst;
+ struct dst_entry *dst = skb_dst(skb);
struct iphdr *top_iph;
int flags;
@@ -41,7 +41,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
top_iph->ihl = 5;
top_iph->version = 4;
- top_iph->protocol = xfrm_af2proto(skb->dst->ops->family);
+ top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family);
/* DS disclosed */
top_iph->tos = INET_ECN_encapsulate(XFRM_MODE_SKB_CB(skb)->tos,
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 8c3180adddb..c908bd99bcb 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -29,7 +29,7 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb)
if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->local_df)
goto out;
- dst = skb->dst;
+ dst = skb_dst(skb);
mtu = dst_mtu(dst);
if (skb->len > mtu) {
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
@@ -72,7 +72,7 @@ EXPORT_SYMBOL(xfrm4_prepare_output);
static int xfrm4_output_finish(struct sk_buff *skb)
{
#ifdef CONFIG_NETFILTER
- if (!skb->dst->xfrm) {
+ if (!skb_dst(skb)->xfrm) {
IPCB(skb)->flags |= IPSKB_REROUTED;
return dst_output(skb);
}
@@ -87,6 +87,6 @@ static int xfrm4_output_finish(struct sk_buff *skb)
int xfrm4_output(struct sk_buff *skb)
{
return NF_HOOK_COND(PF_INET, NF_INET_POST_ROUTING, skb,
- NULL, skb->dst->dev, xfrm4_output_finish,
+ NULL, skb_dst(skb)->dev, xfrm4_output_finish,
!(IPCB(skb)->flags & IPSKB_REROUTED));
}