aboutsummaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-05-20 13:43:21 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2011-05-20 13:43:21 -0700
commit06f4e926d256d902dd9a53dcb400fd74974ce087 (patch)
tree0b438b67f5f0eff6fd617bc497a9dace6164a488 /net/ipv4
parent8e7bfcbab3825d1b404d615cb1b54f44ff81f981 (diff)
parentd93515611bbc70c2fe4db232e5feb448ed8e4cc9 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6: (1446 commits) macvlan: fix panic if lowerdev in a bond tg3: Add braces around 5906 workaround. tg3: Fix NETIF_F_LOOPBACK error macvlan: remove one synchronize_rcu() call networking: NET_CLS_ROUTE4 depends on INET irda: Fix error propagation in ircomm_lmp_connect_response() irda: Kill set but unused variable 'bytes' in irlan_check_command_param() irda: Kill set but unused variable 'clen' in ircomm_connect_indication() rxrpc: Fix set but unused variable 'usage' in rxrpc_get_transport() be2net: Kill set but unused variable 'req' in lancer_fw_download() irda: Kill set but unused vars 'saddr' and 'daddr' in irlan_provider_connect_indication() atl1c: atl1c_resume() is only used when CONFIG_PM_SLEEP is defined. rxrpc: Fix set but unused variable 'usage' in rxrpc_get_peer(). rxrpc: Kill set but unused variable 'local' in rxrpc_UDP_error_handler() rxrpc: Kill set but unused variable 'sp' in rxrpc_process_connection() rxrpc: Kill set but unused variable 'sp' in rxrpc_rotate_tx_window() pkt_sched: Kill set but unused variable 'protocol' in tc_classify() isdn: capi: Use pr_debug() instead of ifdefs. tg3: Update version to 3.119 tg3: Apply rx_discards fix to 5719/5720 ... Fix up trivial conflicts in arch/x86/Kconfig and net/mac80211/agg-tx.c as per Davem.
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Makefile2
-rw-r--r--net/ipv4/af_inet.c53
-rw-r--r--net/ipv4/ah4.c7
-rw-r--r--net/ipv4/cipso_ipv4.c113
-rw-r--r--net/ipv4/datagram.c22
-rw-r--r--net/ipv4/devinet.c4
-rw-r--r--net/ipv4/esp4.c7
-rw-r--r--net/ipv4/fib_frontend.c16
-rw-r--r--net/ipv4/fib_trie.c110
-rw-r--r--net/ipv4/icmp.c133
-rw-r--r--net/ipv4/igmp.c22
-rw-r--r--net/ipv4/inet_connection_sock.c59
-rw-r--r--net/ipv4/inet_diag.c2
-rw-r--r--net/ipv4/inet_lro.c4
-rw-r--r--net/ipv4/ip_forward.c2
-rw-r--r--net/ipv4/ip_fragment.c58
-rw-r--r--net/ipv4/ip_gre.c70
-rw-r--r--net/ipv4/ip_input.c4
-rw-r--r--net/ipv4/ip_options.c57
-rw-r--r--net/ipv4/ip_output.c158
-rw-r--r--net/ipv4/ip_sockglue.c37
-rw-r--r--net/ipv4/ipcomp.c4
-rw-r--r--net/ipv4/ipconfig.c35
-rw-r--r--net/ipv4/ipip.c36
-rw-r--r--net/ipv4/ipmr.c39
-rw-r--r--net/ipv4/netfilter/arp_tables.c18
-rw-r--r--net/ipv4/netfilter/ip_tables.c28
-rw-r--r--net/ipv4/netfilter/nf_nat_helper.c2
-rw-r--r--net/ipv4/ping.c935
-rw-r--r--net/ipv4/raw.c92
-rw-r--r--net/ipv4/route.c385
-rw-r--r--net/ipv4/syncookies.c22
-rw-r--r--net/ipv4/sysctl_net_ipv4.c68
-rw-r--r--net/ipv4/tcp.c7
-rw-r--r--net/ipv4/tcp_ipv4.c98
-rw-r--r--net/ipv4/tcp_output.c2
-rw-r--r--net/ipv4/udp.c78
-rw-r--r--net/ipv4/xfrm4_policy.c38
-rw-r--r--net/ipv4/xfrm4_state.c2
39 files changed, 1999 insertions, 830 deletions
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 0dc772d0d12..f2dc69cffb5 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -11,7 +11,7 @@ obj-y := route.o inetpeer.o protocol.o \
datagram.o raw.o udp.o udplite.o \
arp.o icmp.o devinet.o af_inet.o igmp.o \
fib_frontend.o fib_semantics.o fib_trie.o \
- inet_fragment.o
+ inet_fragment.o ping.o
obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
obj-$(CONFIG_PROC_FS) += proc.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 807d83c02ef..cc1463156cd 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -105,6 +105,7 @@
#include <net/tcp.h>
#include <net/udp.h>
#include <net/udplite.h>
+#include <net/ping.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/raw.h>
@@ -153,7 +154,7 @@ void inet_sock_destruct(struct sock *sk)
WARN_ON(sk->sk_wmem_queued);
WARN_ON(sk->sk_forward_alloc);
- kfree(inet->opt);
+ kfree(rcu_dereference_protected(inet->inet_opt, 1));
dst_release(rcu_dereference_check(sk->sk_dst_cache, 1));
sk_refcnt_debug_dec(sk);
}
@@ -1008,6 +1009,14 @@ static struct inet_protosw inetsw_array[] =
.flags = INET_PROTOSW_PERMANENT,
},
+ {
+ .type = SOCK_DGRAM,
+ .protocol = IPPROTO_ICMP,
+ .prot = &ping_prot,
+ .ops = &inet_dgram_ops,
+ .no_check = UDP_CSUM_DEFAULT,
+ .flags = INET_PROTOSW_REUSE,
+ },
{
.type = SOCK_RAW,
@@ -1103,14 +1112,19 @@ static int inet_sk_reselect_saddr(struct sock *sk)
struct inet_sock *inet = inet_sk(sk);
__be32 old_saddr = inet->inet_saddr;
__be32 daddr = inet->inet_daddr;
+ struct flowi4 *fl4;
struct rtable *rt;
__be32 new_saddr;
+ struct ip_options_rcu *inet_opt;
- if (inet->opt && inet->opt->srr)
- daddr = inet->opt->faddr;
+ inet_opt = rcu_dereference_protected(inet->inet_opt,
+ sock_owned_by_user(sk));
+ if (inet_opt && inet_opt->opt.srr)
+ daddr = inet_opt->opt.faddr;
/* Query new route. */
- rt = ip_route_connect(daddr, 0, RT_CONN_FLAGS(sk),
+ fl4 = &inet->cork.fl.u.ip4;
+ rt = ip_route_connect(fl4, daddr, 0, RT_CONN_FLAGS(sk),
sk->sk_bound_dev_if, sk->sk_protocol,
inet->inet_sport, inet->inet_dport, sk, false);
if (IS_ERR(rt))
@@ -1118,7 +1132,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
sk_setup_caps(sk, &rt->dst);
- new_saddr = rt->rt_src;
+ new_saddr = fl4->saddr;
if (new_saddr == old_saddr)
return 0;
@@ -1147,6 +1161,8 @@ int inet_sk_rebuild_header(struct sock *sk)
struct inet_sock *inet = inet_sk(sk);
struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
__be32 daddr;
+ struct ip_options_rcu *inet_opt;
+ struct flowi4 *fl4;
int err;
/* Route is OK, nothing to do. */
@@ -1154,10 +1170,14 @@ int inet_sk_rebuild_header(struct sock *sk)
return 0;
/* Reroute. */
+ rcu_read_lock();
+ inet_opt = rcu_dereference(inet->inet_opt);
daddr = inet->inet_daddr;
- if (inet->opt && inet->opt->srr)
- daddr = inet->opt->faddr;
- rt = ip_route_output_ports(sock_net(sk), sk, daddr, inet->inet_saddr,
+ if (inet_opt && inet_opt->opt.srr)
+ daddr = inet_opt->opt.faddr;
+ rcu_read_unlock();
+ fl4 = &inet->cork.fl.u.ip4;
+ rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr, inet->inet_saddr,
inet->inet_dport, inet->inet_sport,
sk->sk_protocol, RT_CONN_FLAGS(sk),
sk->sk_bound_dev_if);
@@ -1186,7 +1206,7 @@ EXPORT_SYMBOL(inet_sk_rebuild_header);
static int inet_gso_send_check(struct sk_buff *skb)
{
- struct iphdr *iph;
+ const struct iphdr *iph;
const struct net_protocol *ops;
int proto;
int ihl;
@@ -1293,7 +1313,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
const struct net_protocol *ops;
struct sk_buff **pp = NULL;
struct sk_buff *p;
- struct iphdr *iph;
+ const struct iphdr *iph;
unsigned int hlen;
unsigned int off;
unsigned int id;
@@ -1516,6 +1536,7 @@ static const struct net_protocol udp_protocol = {
static const struct net_protocol icmp_protocol = {
.handler = icmp_rcv,
+ .err_handler = ping_err,
.no_policy = 1,
.netns_ok = 1,
};
@@ -1631,6 +1652,10 @@ static int __init inet_init(void)
if (rc)
goto out_unregister_udp_proto;
+ rc = proto_register(&ping_prot, 1);
+ if (rc)
+ goto out_unregister_raw_proto;
+
/*
* Tell SOCKET that we are alive...
*/
@@ -1686,6 +1711,8 @@ static int __init inet_init(void)
/* Add UDP-Lite (RFC 3828) */
udplite4_register();
+ ping_init();
+
/*
* Set the ICMP layer up
*/
@@ -1716,6 +1743,8 @@ static int __init inet_init(void)
rc = 0;
out:
return rc;
+out_unregister_raw_proto:
+ proto_unregister(&raw_prot);
out_unregister_udp_proto:
proto_unregister(&udp_prot);
out_unregister_tcp_proto:
@@ -1740,11 +1769,15 @@ static int __init ipv4_proc_init(void)
goto out_tcp;
if (udp4_proc_init())
goto out_udp;
+ if (ping_proc_init())
+ goto out_ping;
if (ip_misc_proc_init())
goto out_misc;
out:
return rc;
out_misc:
+ ping_proc_exit();
+out_ping:
udp4_proc_exit();
out_udp:
tcp4_proc_exit();
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 4286fd3cc0e..c1f4154552f 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -73,7 +73,7 @@ static inline struct scatterlist *ah_req_sg(struct crypto_ahash *ahash,
* into IP header for icv calculation. Options are already checked
* for validity, so paranoia is not required. */
-static int ip_clear_mutable_options(struct iphdr *iph, __be32 *daddr)
+static int ip_clear_mutable_options(const struct iphdr *iph, __be32 *daddr)
{
unsigned char * optptr = (unsigned char*)(iph+1);
int l = iph->ihl*4 - sizeof(struct iphdr);
@@ -396,7 +396,7 @@ out:
static void ah4_err(struct sk_buff *skb, u32 info)
{
struct net *net = dev_net(skb->dev);
- struct iphdr *iph = (struct iphdr *)skb->data;
+ const struct iphdr *iph = (const struct iphdr *)skb->data;
struct ip_auth_hdr *ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2));
struct xfrm_state *x;
@@ -404,7 +404,8 @@ static void ah4_err(struct sk_buff *skb, u32 info)
icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
return;
- x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET);
+ x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
+ ah->spi, IPPROTO_AH, AF_INET);
if (!x)
return;
printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n",
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index a0af7ea8787..2b3c23c287c 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1857,6 +1857,11 @@ static int cipso_v4_genopt(unsigned char *buf, u32 buf_len,
return CIPSO_V4_HDR_LEN + ret_val;
}
+static void opt_kfree_rcu(struct rcu_head *head)
+{
+ kfree(container_of(head, struct ip_options_rcu, rcu));
+}
+
/**
* cipso_v4_sock_setattr - Add a CIPSO option to a socket
* @sk: the socket
@@ -1879,7 +1884,7 @@ int cipso_v4_sock_setattr(struct sock *sk,
unsigned char *buf = NULL;
u32 buf_len;
u32 opt_len;
- struct ip_options *opt = NULL;
+ struct ip_options_rcu *old, *opt = NULL;
struct inet_sock *sk_inet;
struct inet_connection_sock *sk_conn;
@@ -1915,22 +1920,25 @@ int cipso_v4_sock_setattr(struct sock *sk,
ret_val = -ENOMEM;
goto socket_setattr_failure;
}
- memcpy(opt->__data, buf, buf_len);
- opt->optlen = opt_len;
- opt->cipso = sizeof(struct iphdr);
+ memcpy(opt->opt.__data, buf, buf_len);
+ opt->opt.optlen = opt_len;
+ opt->opt.cipso = sizeof(struct iphdr);
kfree(buf);
buf = NULL;
sk_inet = inet_sk(sk);
+
+ old = rcu_dereference_protected(sk_inet->inet_opt, sock_owned_by_user(sk));
if (sk_inet->is_icsk) {
sk_conn = inet_csk(sk);
- if (sk_inet->opt)
- sk_conn->icsk_ext_hdr_len -= sk_inet->opt->optlen;
- sk_conn->icsk_ext_hdr_len += opt->optlen;
+ if (old)
+ sk_conn->icsk_ext_hdr_len -= old->opt.optlen;
+ sk_conn->icsk_ext_hdr_len += opt->opt.optlen;
sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie);
}
- opt = xchg(&sk_inet->opt, opt);
- kfree(opt);
+ rcu_assign_pointer(sk_inet->inet_opt, opt);
+ if (old)
+ call_rcu(&old->rcu, opt_kfree_rcu);
return 0;
@@ -1960,7 +1968,7 @@ int cipso_v4_req_setattr(struct request_sock *req,
unsigned char *buf = NULL;
u32 buf_len;
u32 opt_len;
- struct ip_options *opt = NULL;
+ struct ip_options_rcu *opt = NULL;
struct inet_request_sock *req_inet;
/* We allocate the maximum CIPSO option size here so we are probably
@@ -1988,15 +1996,16 @@ int cipso_v4_req_setattr(struct request_sock *req,
ret_val = -ENOMEM;
goto req_setattr_failure;
}
- memcpy(opt->__data, buf, buf_len);
- opt->optlen = opt_len;
- opt->cipso = sizeof(struct iphdr);
+ memcpy(opt->opt.__data, buf, buf_len);
+ opt->opt.optlen = opt_len;
+ opt->opt.cipso = sizeof(struct iphdr);
kfree(buf);
buf = NULL;
req_inet = inet_rsk(req);
opt = xchg(&req_inet->opt, opt);
- kfree(opt);
+ if (opt)
+ call_rcu(&opt->rcu, opt_kfree_rcu);
return 0;
@@ -2016,34 +2025,34 @@ req_setattr_failure:
* values on failure.
*
*/
-static int cipso_v4_delopt(struct ip_options **opt_ptr)
+static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr)
{
int hdr_delta = 0;
- struct ip_options *opt = *opt_ptr;
+ struct ip_options_rcu *opt = *opt_ptr;
- if (opt->srr || opt->rr || opt->ts || opt->router_alert) {
+ if (opt->opt.srr || opt->opt.rr || opt->opt.ts || opt->opt.router_alert) {
u8 cipso_len;
u8 cipso_off;
unsigned char *cipso_ptr;
int iter;
int optlen_new;
- cipso_off = opt->cipso - sizeof(struct iphdr);
- cipso_ptr = &opt->__data[cipso_off];
+ cipso_off = opt->opt.cipso - sizeof(struct iphdr);
+ cipso_ptr = &opt->opt.__data[cipso_off];
cipso_len = cipso_ptr[1];
- if (opt->srr > opt->cipso)
- opt->srr -= cipso_len;
- if (opt->rr > opt->cipso)
- opt->rr -= cipso_len;
- if (opt->ts > opt->cipso)
- opt->ts -= cipso_len;
- if (opt->router_alert > opt->cipso)
- opt->router_alert -= cipso_len;
- opt->cipso = 0;
+ if (opt->opt.srr > opt->opt.cipso)
+ opt->opt.srr -= cipso_len;
+ if (opt->opt.rr > opt->opt.cipso)
+ opt->opt.rr -= cipso_len;
+ if (opt->opt.ts > opt->opt.cipso)
+ opt->opt.ts -= cipso_len;
+ if (opt->opt.router_alert > opt->opt.cipso)
+ opt->opt.router_alert -= cipso_len;
+ opt->opt.cipso = 0;
memmove(cipso_ptr, cipso_ptr + cipso_len,
- opt->optlen - cipso_off - cipso_len);
+ opt->opt.optlen - cipso_off - cipso_len);
/* determining the new total option length is tricky because of
* the padding necessary, the only thing i can think to do at
@@ -2052,21 +2061,21 @@ static int cipso_v4_delopt(struct ip_options **opt_ptr)
* from there we can determine the new total option length */
iter = 0;
optlen_new = 0;
- while (iter < opt->optlen)
- if (opt->__data[iter] != IPOPT_NOP) {
- iter += opt->__data[iter + 1];
+ while (iter < opt->opt.optlen)
+ if (opt->opt.__data[iter] != IPOPT_NOP) {
+ iter += opt->opt.__data[iter + 1];
optlen_new = iter;
} else
iter++;
- hdr_delta = opt->optlen;
- opt->optlen = (optlen_new + 3) & ~3;
- hdr_delta -= opt->optlen;
+ hdr_delta = opt->opt.optlen;
+ opt->opt.optlen = (optlen_new + 3) & ~3;
+ hdr_delta -= opt->opt.optlen;
} else {
/* only the cipso option was present on the socket so we can
* remove the entire option struct */
*opt_ptr = NULL;
- hdr_delta = opt->optlen;
- kfree(opt);
+ hdr_delta = opt->opt.optlen;
+ call_rcu(&opt->rcu, opt_kfree_rcu);
}
return hdr_delta;
@@ -2083,15 +2092,15 @@ static int cipso_v4_delopt(struct ip_options **opt_ptr)
void cipso_v4_sock_delattr(struct sock *sk)
{
int hdr_delta;
- struct ip_options *opt;
+ struct ip_options_rcu *opt;
struct inet_sock *sk_inet;
sk_inet = inet_sk(sk);
- opt = sk_inet->opt;
- if (opt == NULL || opt->cipso == 0)
+ opt = rcu_dereference_protected(sk_inet->inet_opt, 1);
+ if (opt == NULL || opt->opt.cipso == 0)
return;
- hdr_delta = cipso_v4_delopt(&sk_inet->opt);
+ hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt);
if (sk_inet->is_icsk && hdr_delta > 0) {
struct inet_connection_sock *sk_conn = inet_csk(sk);
sk_conn->icsk_ext_hdr_len -= hdr_delta;
@@ -2109,12 +2118,12 @@ void cipso_v4_sock_delattr(struct sock *sk)
*/
void cipso_v4_req_delattr(struct request_sock *req)
{
- struct ip_options *opt;
+ struct ip_options_rcu *opt;
struct inet_request_sock *req_inet;
req_inet = inet_rsk(req);
opt = req_inet->opt;
- if (opt == NULL || opt->cipso == 0)
+ if (opt == NULL || opt->opt.cipso == 0)
return;
cipso_v4_delopt(&req_inet->opt);
@@ -2184,14 +2193,18 @@ getattr_return:
*/
int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr)
{
- struct ip_options *opt;
+ struct ip_options_rcu *opt;
+ int res = -ENOMSG;
- opt = inet_sk(sk)->opt;
- if (opt == NULL || opt->cipso == 0)
- return -ENOMSG;
-
- return cipso_v4_getattr(opt->__data + opt->cipso - sizeof(struct iphdr),
- secattr);
+ rcu_read_lock();
+ opt = rcu_dereference(inet_sk(sk)->inet_opt);
+ if (opt && opt->opt.cipso)
+ res = cipso_v4_getattr(opt->opt.__data +
+ opt->opt.cipso -
+ sizeof(struct iphdr),
+ secattr);
+ rcu_read_unlock();
+ return res;
}
/**
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 85bd24ca4f6..424fafbc8cb 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -24,6 +24,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct inet_sock *inet = inet_sk(sk);
struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
+ struct flowi4 *fl4;
struct rtable *rt;
__be32 saddr;
int oif;
@@ -38,6 +39,8 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
sk_dst_reset(sk);
+ lock_sock(sk);
+
oif = sk->sk_bound_dev_if;
saddr = inet->inet_saddr;
if (ipv4_is_multicast(usin->sin_addr.s_addr)) {
@@ -46,7 +49,8 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (!saddr)
saddr = inet->mc_addr;
}
- rt = ip_route_connect(usin->sin_addr.s_addr, saddr,
+ fl4 = &inet->cork.fl.u.ip4;
+ rt = ip_route_connect(fl4, usin->sin_addr.s_addr, saddr,
RT_CONN_FLAGS(sk), oif,
sk->sk_protocol,
inet->inet_sport, usin->sin_port, sk, true);
@@ -54,26 +58,30 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
err = PTR_ERR(rt);
if (err == -ENETUNREACH)
IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
- return err;
+ goto out;
}
if ((rt->rt_flags & RTCF_BROADCAST) && !sock_flag(sk, SOCK_BROADCAST)) {
ip_rt_put(rt);
- return -EACCES;
+ err = -EACCES;
+ goto out;
}
if (!inet->inet_saddr)
- inet->inet_saddr = rt->rt_src; /* Update source address */
+ inet->inet_saddr = fl4->saddr; /* Update source address */
if (!inet->inet_rcv_saddr) {
- inet->inet_rcv_saddr = rt->rt_src;
+ inet->inet_rcv_saddr = fl4->saddr;
if (sk->sk_prot->rehash)
sk->sk_prot->rehash(sk);
}
- inet->inet_daddr = rt->rt_dst;
+ inet->inet_daddr = fl4->daddr;
inet->inet_dport = usin->sin_port;
sk->sk_state = TCP_ESTABLISHED;
inet->inet_id = jiffies;
sk_dst_set(sk, &rt->dst);
- return 0;
+ err = 0;
+out:
+ release_sock(sk);
+ return err;
}
EXPORT_SYMBOL(ip4_datagram_connect);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index cd9ca0811cf..0d4a184af16 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1369,7 +1369,7 @@ errout:
static size_t inet_get_link_af_size(const struct net_device *dev)
{
- struct in_device *in_dev = __in_dev_get_rtnl(dev);
+ struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
if (!in_dev)
return 0;
@@ -1379,7 +1379,7 @@ static size_t inet_get_link_af_size(const struct net_device *dev)
static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
{
- struct in_device *in_dev = __in_dev_get_rtnl(dev);
+ struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
struct nlattr *nla;
int i;
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 03f994bcf7d..a5b413416da 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -276,7 +276,7 @@ error:
static int esp_input_done2(struct sk_buff *skb, int err)
{
- struct iphdr *iph;
+ const struct iphdr *iph;
struct xfrm_state *x = xfrm_input_state(skb);
struct esp_data *esp = x->data;
struct crypto_aead *aead = esp->aead;
@@ -484,7 +484,7 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu)
static void esp4_err(struct sk_buff *skb, u32 info)
{
struct net *net = dev_net(skb->dev);
- struct iphdr *iph = (struct iphdr *)skb->data;
+ const struct iphdr *iph = (const struct iphdr *)skb->data;
struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2));
struct xfrm_state *x;
@@ -492,7 +492,8 @@ static void esp4_err(struct sk_buff *skb, u32 info)
icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
return;
- x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET);
+ x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
+ esph->spi, IPPROTO_ESP, AF_INET);
if (!x)
return;
NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n",
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 451088330bb..22524716fe7 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -44,6 +44,7 @@
#include <net/arp.h>
#include <net/ip_fib.h>
#include <net/rtnetlink.h>
+#include <net/xfrm.h>
#ifndef CONFIG_IP_MULTIPLE_TABLES
@@ -188,9 +189,9 @@ EXPORT_SYMBOL(inet_dev_addr_type);
* - check, that packet arrived from expected physical interface.
* called with rcu_read_lock()
*/
-int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
- struct net_device *dev, __be32 *spec_dst,
- u32 *itag, u32 mark)
+int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos,
+ int oif, struct net_device *dev, __be32 *spec_dst,
+ u32 *itag)
{
struct in_device *in_dev;
struct flowi4 fl4;
@@ -202,7 +203,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
fl4.flowi4_oif = 0;
fl4.flowi4_iif = oif;
- fl4.flowi4_mark = mark;
fl4.daddr = src;
fl4.saddr = dst;
fl4.flowi4_tos = tos;
@@ -212,10 +212,12 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
in_dev = __in_dev_get_rcu(dev);
if (in_dev) {
no_addr = in_dev->ifa_list == NULL;
- rpf = IN_DEV_RPFILTER(in_dev);
+
+ /* Ignore rp_filter for packets protected by IPsec. */
+ rpf = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(in_dev);
+
accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
- if (mark && !IN_DEV_SRC_VMARK(in_dev))
- fl4.flowi4_mark = 0;
+ fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0;
}
if (in_dev == NULL)
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 11d4d28190b..c779ce96e5b 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -126,7 +126,7 @@ struct tnode {
struct work_struct work;
struct tnode *tnode_free;
};
- struct rt_trie_node *child[0];
+ struct rt_trie_node __rcu *child[0];
};
#ifdef CONFIG_IP_FIB_TRIE_STATS
@@ -151,7 +151,7 @@ struct trie_stat {
};
struct trie {
- struct rt_trie_node *trie;
+ struct rt_trie_node __rcu *trie;
#ifdef CONFIG_IP_FIB_TRIE_STATS
struct trie_use_stats stats;
#endif
@@ -177,16 +177,29 @@ static const int sync_pages = 128;
static struct kmem_cache *fn_alias_kmem __read_mostly;
static struct kmem_cache *trie_leaf_kmem __read_mostly;
-static inline struct tnode *node_parent(struct rt_trie_node *node)
+/*
+ * caller must hold RTNL
+ */
+static inline struct tnode *node_parent(const struct rt_trie_node *node)
{
- return (struct tnode *)(node->parent & ~NODE_TYPE_MASK);
+ unsigned long parent;
+
+ parent = rcu_dereference_index_check(node->parent, lockdep_rtnl_is_held());
+
+ return (struct tnode *)(parent & ~NODE_TYPE_MASK);
}
-static inline struct tnode *node_parent_rcu(struct rt_trie_node *node)
+/*
+ * caller must hold RCU read lock or RTNL
+ */
+static inline struct tnode *node_parent_rcu(const struct rt_trie_node *node)
{
- struct tnode *ret = node_parent(node);
+ unsigned long parent;
+
+ parent = rcu_dereference_index_check(node->parent, rcu_read_lock_held() ||
+ lockdep_rtnl_is_held());
- return rcu_dereference_rtnl(ret);
+ return (struct tnode *)(parent & ~NODE_TYPE_MASK);
}
/* Same as rcu_assign_pointer
@@ -198,18 +211,24 @@ static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr)
node->parent = (unsigned long)ptr | NODE_TYPE(node);
}
-static inline struct rt_trie_node *tnode_get_child(struct tnode *tn, unsigned int i)
+/*
+ * caller must hold RTNL
+ */
+static inline struct rt_trie_node *tnode_get_child(const struct tnode *tn, unsigned int i)
{
BUG_ON(i >= 1U << tn->bits);
- return tn->child[i];
+ return rtnl_dereference(tn->child[i]);
}
-static inline struct rt_trie_node *tnode_get_child_rcu(struct tnode *tn, unsigned int i)
+/*
+ * caller must hold RCU read lock or RTNL
+ */
+static inline struct rt_trie_node *tnode_get_child_rcu(const struct tnode *tn, unsigned int i)
{
- struct rt_trie_node *ret = tnode_get_child(tn, i);
+ BUG_ON(i >= 1U << tn->bits);
- return rcu_dereference_rtnl(ret);
+ return rcu_dereference_rtnl(tn->child[i]);
}
static inline int tnode_child_length(const struct tnode *tn)
@@ -482,7 +501,7 @@ static inline void put_child(struct trie *t, struct tnode *tn, int i,
static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n,
int wasfull)
{
- struct rt_trie_node *chi = tn->child[i];
+ struct rt_trie_node *chi = rtnl_dereference(tn->child[i]);
int isfull;
BUG_ON(i >= 1<<tn->bits);
@@ -660,7 +679,7 @@ one_child:
for (i = 0; i < tnode_child_length(tn); i++) {
struct rt_trie_node *n;
- n = tn->child[i];
+ n = rtnl_dereference(tn->child[i]);
if (!n)
continue;
@@ -674,6 +693,20 @@ one_child:
return (struct rt_trie_node *) tn;
}
+
+static void tnode_clean_free(struct tnode *tn)
+{
+ int i;
+ struct tnode *tofree;
+
+ for (i = 0; i < tnode_child_length(tn); i++) {
+ tofree = (struct tnode *)rtnl_dereference(tn->child[i]);
+ if (tofree)
+ tnode_free(tofree);
+ }
+ tnode_free(tn);
+}
+
static struct tnode *inflate(struct trie *t, struct tnode *tn)
{
struct tnode *oldtnode = tn;
@@ -750,8 +783,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
inode = (struct tnode *) node;
if (inode->bits == 1) {
- put_child(t, tn, 2*i, inode->child[0]);
- put_child(t, tn, 2*i+1, inode->child[1]);
+ put_child(t, tn, 2*i, rtnl_dereference(inode->child[0]));
+ put_child(t, tn, 2*i+1, rtnl_dereference(inode->child[1]));
tnode_free_safe(inode);
continue;
@@ -792,8 +825,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
size = tnode_child_length(left);
for (j = 0