From b71d1d426d263b0b6cb5760322efebbfc89d4463 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 22 Apr 2011 04:53:02 +0000 Subject: inet: constify ip headers and in6_addr Add const qualifiers to structs iphdr, ipv6hdr and in6_addr pointers where possible, to make code intention more obvious. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/dccp/ipv6.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index de1b7e37ad5..73add237324 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -54,8 +54,8 @@ static void dccp_v6_hash(struct sock *sk) /* add pseudo-header to DCCP checksum stored in skb->csum */ static inline __sum16 dccp_v6_csum_finish(struct sk_buff *skb, - struct in6_addr *saddr, - struct in6_addr *daddr) + const struct in6_addr *saddr, + const struct in6_addr *daddr) { return csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_DCCP, skb->csum); } @@ -87,7 +87,7 @@ static inline __u32 dccp_v6_init_sequence(struct sk_buff *skb) static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { - struct ipv6hdr *hdr = (struct ipv6hdr *)skb->data; + const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); struct dccp_sock *dp; struct ipv6_pinfo *np; @@ -296,7 +296,7 @@ static void dccp_v6_reqsk_destructor(struct request_sock *req) static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) { - struct ipv6hdr *rxip6h; + const struct ipv6hdr *rxip6h; struct sk_buff *skb; struct flowi6 fl6; struct net *net = dev_net(skb_dst(rxskb)->dev); -- cgit v1.2.3-18-g5258 From 2d7192d6cbab20e153c47fa1559ffd41ceef0e79 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 26 Apr 2011 13:28:44 -0700 Subject: ipv4: Sanitize and simplify ip_route_{connect,newports}() These functions are used together as a unit for route resolution during connect(). They address the chicken-and-egg problem that exists when ports need to be allocated during connect() processing, yet such port allocations require addressing information from the routing code. It's currently more heavy handed than it needs to be, and in particular we allocate and initialize a flow object twice. Let the callers provide the on-stack flow object. That way we only need to initialize it once in the ip_route_connect() call. Later, if ip_route_newports() needs to do anything, it re-uses that flow object as-is except for the ports which it updates before the route re-lookup. Also, describe why this set of facilities are needed and how it works in a big comment. Signed-off-by: David S. Miller Reviewed-by: Eric Dumazet --- net/dccp/ipv4.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index ae451c6d83b..b92ab655d44 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -40,12 +40,13 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { + const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; struct inet_sock *inet = inet_sk(sk); struct dccp_sock *dp = dccp_sk(sk); - const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; __be16 orig_sport, orig_dport; - struct rtable *rt; __be32 daddr, nexthop; + struct flowi4 fl4; + struct rtable *rt; int err; dp->dccps_role = DCCP_ROLE_CLIENT; @@ -65,7 +66,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) orig_sport = inet->inet_sport; orig_dport = usin->sin_port; - rt = ip_route_connect(nexthop, inet->inet_saddr, + rt = ip_route_connect(&fl4, nexthop, inet->inet_saddr, RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, IPPROTO_DCCP, orig_sport, orig_dport, sk, true); @@ -101,8 +102,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (err != 0) goto failure; - rt = ip_route_newports(rt, IPPROTO_DCCP, - orig_sport, orig_dport, + rt = ip_route_newports(&fl4, rt, orig_sport, orig_dport, inet->inet_sport, inet->inet_dport, sk); if (IS_ERR(rt)) { rt = NULL; -- cgit v1.2.3-18-g5258 From f6d8bd051c391c1c0458a30b2a7abcd939329259 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Apr 2011 09:45:37 +0000 Subject: inet: add RCU protection to inet->opt We lack proper synchronization to manipulate inet->opt ip_options Problem is ip_make_skb() calls ip_setup_cork() and ip_setup_cork() possibly makes a copy of ipc->opt (struct ip_options), without any protection against another thread manipulating inet->opt. Another thread can change inet->opt pointer and free old one under us. Use RCU to protect inet->opt (changed to inet->inet_opt). Instead of handling atomic refcounts, just copy ip_options when necessary, to avoid cache line dirtying. We cant insert an rcu_head in struct ip_options since its included in skb->cb[], so this patch is large because I had to introduce a new ip_options_rcu structure. Signed-off-by: Eric Dumazet Cc: Herbert Xu Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 16 ++++++++++------ net/dccp/ipv6.c | 2 +- 2 files changed, 11 insertions(+), 7 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index b92ab655d44..cbbcc6c036e 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -48,6 +48,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) struct flowi4 fl4; struct rtable *rt; int err; + struct ip_options_rcu *inet_opt; dp->dccps_role = DCCP_ROLE_CLIENT; @@ -58,10 +59,13 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) return -EAFNOSUPPORT; nexthop = daddr = usin->sin_addr.s_addr; - if (inet->opt != NULL && inet->opt->srr) { + + inet_opt = rcu_dereference_protected(inet->inet_opt, + sock_owned_by_user(sk)); + if (inet_opt != NULL && inet_opt->opt.srr) { if (daddr == 0) return -EINVAL; - nexthop = inet->opt->faddr; + nexthop = inet_opt->opt.faddr; } orig_sport = inet->inet_sport; @@ -78,7 +82,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) return -ENETUNREACH; } - if (inet->opt == NULL || !inet->opt->srr) + if (inet_opt == NULL || !inet_opt->opt.srr) daddr = rt->rt_dst; if (inet->inet_saddr == 0) @@ -89,8 +93,8 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_daddr = daddr; inet_csk(sk)->icsk_ext_hdr_len = 0; - if (inet->opt != NULL) - inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; + if (inet_opt) + inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; /* * Socket identity is still unknown (sport may be zero). * However we set state to DCCP_REQUESTING and not releasing socket @@ -405,7 +409,7 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, newinet->inet_daddr = ireq->rmt_addr; newinet->inet_rcv_saddr = ireq->loc_addr; newinet->inet_saddr = ireq->loc_addr; - newinet->opt = ireq->opt; + newinet->inet_opt = ireq->opt; ireq->opt = NULL; newinet->mc_index = inet_iif(skb); newinet->mc_ttl = ip_hdr(skb)->ttl; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 73add237324..8dc4348774a 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -573,7 +573,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, First: no IPv4 options. */ - newinet->opt = NULL; + newinet->inet_opt = NULL; /* Clone RX bits */ newnp->rxopt.all = np->rxopt.all; -- cgit v1.2.3-18-g5258 From 91ab0b60a12833b4715b838474f23496af8de30c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 28 Apr 2011 23:49:30 -0700 Subject: ipv4: Get route daddr from flow key in dccp_v4_connect(). Now that output route lookups update the flow with destination address selection, we can fetch it from fl4->daddr instead of rt->rt_dst Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/dccp') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index cbbcc6c036e..f4254bb4745 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -83,7 +83,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) } if (inet_opt == NULL || !inet_opt->opt.srr) - daddr = rt->rt_dst; + daddr = fl4.daddr; if (inet->inet_saddr == 0) inet->inet_saddr = rt->rt_src; -- cgit v1.2.3-18-g5258 From f1390160ddcd64a3cfd48b3280d0a616a31b9520 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 3 May 2011 20:06:41 -0700 Subject: dccp: Use flowi4->saddr in dccp_v4_connect() Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/dccp') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index f4254bb4745..36700a46b24 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -86,7 +86,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) daddr = fl4.daddr; if (inet->inet_saddr == 0) - inet->inet_saddr = rt->rt_src; + inet->inet_saddr = fl4.saddr; inet->inet_rcv_saddr = inet->inet_saddr; inet->inet_dport = usin->sin_port; -- cgit v1.2.3-18-g5258 From 2c42758cf6683e9c1657d20dcf2a7edd323d98ca Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 6 May 2011 16:10:41 -0700 Subject: dccp: Use cork flow in dccp_v4_connect() Since this is invoked from inet_stream_connect() the socket is locked and therefore this usage is safe. Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 36700a46b24..4ac1a728083 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -45,7 +45,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) struct dccp_sock *dp = dccp_sk(sk); __be16 orig_sport, orig_dport; __be32 daddr, nexthop; - struct flowi4 fl4; + struct flowi4 *fl4; struct rtable *rt; int err; struct ip_options_rcu *inet_opt; @@ -70,7 +70,8 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) orig_sport = inet->inet_sport; orig_dport = usin->sin_port; - rt = ip_route_connect(&fl4, nexthop, inet->inet_saddr, + fl4 = &inet->cork.fl.u.ip4; + rt = ip_route_connect(fl4, nexthop, inet->inet_saddr, RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, IPPROTO_DCCP, orig_sport, orig_dport, sk, true); @@ -83,10 +84,10 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) } if (inet_opt == NULL || !inet_opt->opt.srr) - daddr = fl4.daddr; + daddr = fl4->daddr; if (inet->inet_saddr == 0) - inet->inet_saddr = fl4.saddr; + inet->inet_saddr = fl4->saddr; inet->inet_rcv_saddr = inet->inet_saddr; inet->inet_dport = usin->sin_port; @@ -106,7 +107,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (err != 0) goto failure; - rt = ip_route_newports(&fl4, rt, orig_sport, orig_dport, + rt = ip_route_newports(fl4, rt, orig_sport, orig_dport, inet->inet_sport, inet->inet_dport, sk); if (IS_ERR(rt)) { rt = NULL; -- cgit v1.2.3-18-g5258 From 0e734419923bd8e599858f8fc196c7804bb85564 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 8 May 2011 15:28:03 -0700 Subject: ipv4: Use inet_csk_route_child_sock() in DCCP and TCP. Operation order is now transposed, we first create the child socket then we try to hook up the route. Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 4ac1a728083..46b15e9e9b5 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -396,15 +396,10 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, if (sk_acceptq_is_full(sk)) goto exit_overflow; - if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) - goto exit; - newsk = dccp_create_openreq_child(sk, req, skb); if (newsk == NULL) goto exit_nonewsk; - sk_setup_caps(newsk, dst); - newinet = inet_sk(newsk); ireq = inet_rsk(req); newinet->inet_daddr = ireq->rmt_addr; @@ -416,12 +411,15 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, newinet->mc_ttl = ip_hdr(skb)->ttl; newinet->inet_id = jiffies; + if (dst == NULL && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL) + goto put_and_exit; + + sk_setup_caps(newsk, dst); + dccp_sync_mss(newsk, dst_mtu(dst)); - if (__inet_inherit_port(sk, newsk) < 0) { - sock_put(newsk); - goto exit; - } + if (__inet_inherit_port(sk, newsk) < 0) + goto put_and_exit; __inet_hash_nolisten(newsk, NULL); return newsk; @@ -433,6 +431,9 @@ exit_nonewsk: exit: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); return NULL; +put_and_exit: + sock_put(newsk); + goto exit; } EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock); -- cgit v1.2.3-18-g5258 From d9d8da805dcb503ef8ee49918a94d49085060f23 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 6 May 2011 22:23:20 -0700 Subject: inet: Pass flowi to ->queue_xmit(). This allows us to acquire the exact route keying information from the protocol, however that might be managed. It handles all of the possibilities, from the simplest case of storing the key in inet->cork.fl to the more complex setup SCTP has where individual transports determine the flow. Signed-off-by: David S. Miller --- net/dccp/output.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/dccp') diff --git a/net/dccp/output.c b/net/dccp/output.c index 136d41cbcd0..fab108e51e5 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -43,7 +43,7 @@ static void dccp_skb_entail(struct sock *sk, struct sk_buff *skb) static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) { if (likely(skb != NULL)) { - const struct inet_sock *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); struct dccp_sock *dp = dccp_sk(sk); struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); @@ -136,7 +136,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) DCCP_INC_STATS(DCCP_MIB_OUTSEGS); - err = icsk->icsk_af_ops->queue_xmit(skb); + err = icsk->icsk_af_ops->queue_xmit(skb, &inet->cork.fl); return net_xmit_eval(err); } return -ENOBUFS; -- cgit v1.2.3-18-g5258 From 6bd023f3dddfc7c5f660089598c10e1f4167083b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 18 May 2011 18:32:03 -0400 Subject: ipv4: Make caller provide flowi4 key to inet_csk_route_req(). This way the caller can get at the fully resolved fl4->{daddr,saddr} etc. Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/dccp') diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 46b15e9e9b5..8c36adfd191 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -497,8 +497,9 @@ static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, int err = -1; struct sk_buff *skb; struct dst_entry *dst; + struct flowi4 fl4; - dst = inet_csk_route_req(sk, req); + dst = inet_csk_route_req(sk, &fl4, req); if (dst == NULL) goto out; -- cgit v1.2.3-18-g5258