diff options
Diffstat (limited to 'net/ipv6/udp.c')
| -rw-r--r-- | net/ipv6/udp.c | 786 |
1 files changed, 405 insertions, 381 deletions
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 9a009c66c8a..7092ff78fd8 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -45,27 +45,50 @@ #include <net/tcp_states.h> #include <net/ip6_checksum.h> #include <net/xfrm.h> +#include <net/inet6_hashtables.h> +#include <net/busy_poll.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <trace/events/skb.h> #include "udp_impl.h" +static unsigned int udp6_ehashfn(struct net *net, + const struct in6_addr *laddr, + const u16 lport, + const struct in6_addr *faddr, + const __be16 fport) +{ + static u32 udp6_ehash_secret __read_mostly; + static u32 udp_ipv6_hash_secret __read_mostly; + + u32 lhash, fhash; + + net_get_random_once(&udp6_ehash_secret, + sizeof(udp6_ehash_secret)); + net_get_random_once(&udp_ipv6_hash_secret, + sizeof(udp_ipv6_hash_secret)); + + lhash = (__force u32)laddr->s6_addr32[3]; + fhash = __ipv6_addr_jhash(faddr, udp_ipv6_hash_secret); + + return __inet6_ehashfn(lhash, lport, fhash, fport, + udp_ipv6_hash_secret + net_hash_mix(net)); +} + int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) { - const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); - __be32 sk1_rcv_saddr = sk_rcv_saddr(sk); - __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); int sk_ipv6only = ipv6_only_sock(sk); int sk2_ipv6only = inet_v6_ipv6only(sk2); - int addr_type = ipv6_addr_type(sk_rcv_saddr6); + int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr); int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; /* if both are mapped, treat as IPv4 */ if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) return (!sk2_ipv6only && - (!sk1_rcv_saddr || !sk2_rcv_saddr || - sk1_rcv_saddr == sk2_rcv_saddr)); + (!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr || + sk->sk_rcv_saddr == sk2->sk_rcv_saddr)); if (addr_type2 == IPV6_ADDR_ANY && !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) @@ -76,7 +99,7 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) return 1; if (sk2_rcv_saddr6 && - ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6)) + ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6)) return 1; return 0; @@ -103,8 +126,8 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum) { unsigned int hash2_nulladdr = udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum); - unsigned int hash2_partial = - udp6_portaddr_hash(sock_net(sk), &inet6_sk(sk)->rcv_saddr, 0); + unsigned int hash2_partial = + udp6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, 0); /* precompute partial secondary hash */ udp_sk(sk)->udp_portaddr_hash = hash2_partial; @@ -114,7 +137,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum) static void udp_v6_rehash(struct sock *sk) { u16 new_hash = udp6_portaddr_hash(sock_net(sk), - &inet6_sk(sk)->rcv_saddr, + &sk->sk_v6_rcv_saddr, inet_sk(sk)->inet_num); udp_lib_rehash(sk, new_hash); @@ -130,7 +153,6 @@ static inline int compute_score(struct sock *sk, struct net *net, if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum && sk->sk_family == PF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); struct inet_sock *inet = inet_sk(sk); score = 0; @@ -139,13 +161,13 @@ static inline int compute_score(struct sock *sk, struct net *net, return -1; score++; } - if (!ipv6_addr_any(&np->rcv_saddr)) { - if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) + if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) return -1; score++; } - if (!ipv6_addr_any(&np->daddr)) { - if (!ipv6_addr_equal(&np->daddr, saddr)) + if (!ipv6_addr_any(&sk->sk_v6_daddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) return -1; score++; } @@ -168,10 +190,9 @@ static inline int compute_score2(struct sock *sk, struct net *net, if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum && sk->sk_family == PF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); struct inet_sock *inet = inet_sk(sk); - if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) + if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) return -1; score = 0; if (inet->inet_dport) { @@ -179,8 +200,8 @@ static inline int compute_score2(struct sock *sk, struct net *net, return -1; score++; } - if (!ipv6_addr_any(&np->daddr)) { - if (!ipv6_addr_equal(&np->daddr, saddr)) + if (!ipv6_addr_any(&sk->sk_v6_daddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) return -1; score++; } @@ -202,7 +223,8 @@ static struct sock *udp6_lib_lookup2(struct net *net, { struct sock *sk, *result; struct hlist_nulls_node *node; - int score, badness; + int score, badness, matches = 0, reuseport = 0; + u32 hash = 0; begin: result = NULL; @@ -213,8 +235,18 @@ begin: if (score > badness) { result = sk; badness = score; - if (score == SCORE2_MAX) + reuseport = sk->sk_reuseport; + if (reuseport) { + hash = udp6_ehashfn(net, daddr, hnum, + saddr, sport); + matches = 1; + } else if (score == SCORE2_MAX) goto exact_match; + } else if (score == badness && reuseport) { + matches++; + if (((u64)hash * matches) >> 32 == 0) + result = sk; + hash = next_pseudo_random32(hash); } } /* @@ -238,7 +270,7 @@ exact_match: return result; } -static struct sock *__udp6_lib_lookup(struct net *net, +struct sock *__udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, int dif, struct udp_table *udptable) @@ -248,7 +280,8 @@ static struct sock *__udp6_lib_lookup(struct net *net, unsigned short hnum = ntohs(dport); unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; - int score, badness; + int score, badness, matches = 0, reuseport = 0; + u32 hash = 0; rcu_read_lock(); if (hslot->count > 10) { @@ -283,6 +316,17 @@ begin: if (score > badness) { result = sk; badness = score; + reuseport = sk->sk_reuseport; + if (reuseport) { + hash = udp6_ehashfn(net, daddr, hnum, + saddr, sport); + matches = 1; + } + } else if (score == badness && reuseport) { + matches++; + if (((u64)hash * matches) >> 32 == 0) + result = sk; + hash = next_pseudo_random32(hash); } } /* @@ -305,13 +349,14 @@ begin: rcu_read_unlock(); return result; } +EXPORT_SYMBOL_GPL(__udp6_lib_lookup); static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport, struct udp_table *udptable) { struct sock *sk; - struct ipv6hdr *iph = ipv6_hdr(skb); + const struct ipv6hdr *iph = ipv6_hdr(skb); if (unlikely(sk = skb_steal_sock(skb))) return sk; @@ -340,32 +385,30 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, struct ipv6_pinfo *np = inet6_sk(sk); struct inet_sock *inet = inet_sk(sk); struct sk_buff *skb; - unsigned int ulen; - int peeked; + unsigned int ulen, copied; + int peeked, off = 0; int err; int is_udplite = IS_UDPLITE(sk); int is_udp4; bool slow; - if (addr_len) - *addr_len=sizeof(struct sockaddr_in6); - if (flags & MSG_ERRQUEUE) - return ipv6_recv_error(sk, msg, len); + return ipv6_recv_error(sk, msg, len, addr_len); if (np->rxpmtu && np->rxopt.bits.rxpmtu) - return ipv6_recv_rxpmtu(sk, msg, len); + return ipv6_recv_rxpmtu(sk, msg, len, addr_len); try_again: skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), - &peeked, &err); + &peeked, &off, &err); if (!skb) goto out; ulen = skb->len - sizeof(struct udphdr); - if (len > ulen) - len = ulen; - else if (len < ulen) + copied = len; + if (copied > ulen) + copied = ulen; + else if (copied < ulen) msg->msg_flags |= MSG_TRUNC; is_udp4 = (skb->protocol == htons(ETH_P_IP)); @@ -376,22 +419,34 @@ try_again: * coverage checksum (UDP-Lite), do it before the copy. */ - if (len < ulen || UDP_SKB_CB(skb)->partial_cov) { + if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { if (udp_lib_checksum_complete(skb)) goto csum_copy_err; } if (skb_csum_unnecessary(skb)) err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), - msg->msg_iov,len); + msg->msg_iov, copied); else { err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); if (err == -EINVAL) goto csum_copy_err; } - if (err) + if (unlikely(err)) { + trace_kfree_skb(skb, udpv6_recvmsg); + if (!peeked) { + atomic_inc(&sk->sk_drops); + if (is_udp4) + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_INERRORS, + is_udplite); + else + UDP6_INC_STATS_USER(sock_net(sk), + UDP_MIB_INERRORS, + is_udplite); + } goto out_free; - + } if (!peeked) { if (is_udp4) UDP_INC_STATS_USER(sock_net(sk), @@ -405,34 +460,36 @@ try_again: /* Copy the address. */ if (msg->msg_name) { - struct sockaddr_in6 *sin6; - - sin6 = (struct sockaddr_in6 *) msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); sin6->sin6_family = AF_INET6; sin6->sin6_port = udp_hdr(skb)->source; sin6->sin6_flowinfo = 0; - sin6->sin6_scope_id = 0; - if (is_udp4) + if (is_udp4) { ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &sin6->sin6_addr); - else { - ipv6_addr_copy(&sin6->sin6_addr, - &ipv6_hdr(skb)->saddr); - if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) - sin6->sin6_scope_id = IP6CB(skb)->iif; + sin6->sin6_scope_id = 0; + } else { + sin6->sin6_addr = ipv6_hdr(skb)->saddr; + sin6->sin6_scope_id = + ipv6_iface_scope_id(&sin6->sin6_addr, + IP6CB(skb)->iif); } - + *addr_len = sizeof(*sin6); } + + if (np->rxopt.all) + ip6_datagram_recv_common_ctl(sk, msg, skb); + if (is_udp4) { if (inet->cmsg_flags) ip_cmsg_recv(msg, skb); } else { if (np->rxopt.all) - datagram_recv_ctl(sk, msg, skb); + ip6_datagram_recv_specific_ctl(sk, msg, skb); } - err = len; + err = copied; if (flags & MSG_TRUNC) err = ulen; @@ -444,17 +501,25 @@ out: csum_copy_err: slow = lock_sock_fast(sk); if (!skb_kill_datagram(sk, skb, flags)) { - if (is_udp4) + if (is_udp4) { + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_CSUMERRORS, is_udplite); UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); - else + } else { + UDP6_INC_STATS_USER(sock_net(sk), + UDP_MIB_CSUMERRORS, is_udplite); UDP6_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + } } unlock_sock_fast(sk, slow); - if (flags & MSG_DONTWAIT) + if (noblock) return -EAGAIN; + + /* starting over for a new packet */ + msg->msg_flags &= ~MSG_TRUNC; goto try_again; } @@ -463,9 +528,9 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct udp_table *udptable) { struct ipv6_pinfo *np; - struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data; - struct in6_addr *saddr = &hdr->saddr; - struct in6_addr *daddr = &hdr->daddr; + const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; + const struct in6_addr *saddr = &hdr->saddr; + const struct in6_addr *daddr = &hdr->daddr; struct udphdr *uh = (struct udphdr*)(skb->data+offset); struct sock *sk; int err; @@ -475,6 +540,16 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sk == NULL) return; + if (type == ICMPV6_PKT_TOOBIG) { + if (!ip6_sk_accept_pmtu(sk)) + goto out; + ip6_sk_update_pmtu(skb, sk, info); + } + if (type == NDISC_REDIRECT) { + ip6_sk_redirect(skb, sk); + goto out; + } + np = inet6_sk(sk); if (!icmpv6_err_convert(type, code, &err) && !np->recverr) @@ -492,6 +567,30 @@ out: sock_put(sk); } +static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + int rc; + + if (!ipv6_addr_any(&sk->sk_v6_daddr)) { + sock_rps_save_rxhash(sk, skb); + sk_mark_napi_id(sk, skb); + } + + rc = sock_queue_rcv_skb(sk, skb); + if (rc < 0) { + int is_udplite = IS_UDPLITE(sk); + + /* Note that an ENOMEM error is charged twice */ + if (rc == -ENOMEM) + UDP6_INC_STATS_BH(sock_net(sk), + UDP_MIB_RCVBUFERRORS, is_udplite); + UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + kfree_skb(skb); + return -1; + } + return 0; +} + static __inline__ void udpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info ) @@ -499,7 +598,15 @@ static __inline__ void udpv6_err(struct sk_buff *skb, __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table); } -int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) +static struct static_key udpv6_encap_needed __read_mostly; +void udpv6_encap_enable(void) +{ + if (!static_key_enabled(&udpv6_encap_needed)) + static_key_slow_inc(&udpv6_encap_needed); +} +EXPORT_SYMBOL(udpv6_encap_enable); + +int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { struct udp_sock *up = udp_sk(sk); int rc; @@ -508,6 +615,41 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto drop; + if (static_key_false(&udpv6_encap_needed) && up->encap_type) { + int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); + + /* + * This is an encapsulation socket so pass the skb to + * the socket's udp_encap_rcv() hook. Otherwise, just + * fall through and pass this up the UDP socket. + * up->encap_rcv() returns the following value: + * =0 if skb was successfully passed to the encap + * handler or was discarded by it. + * >0 if skb should be passed on to UDP. + * <0 if skb should be resubmitted as proto -N + */ + + /* if we're overly short, let UDP handle it */ + encap_rcv = ACCESS_ONCE(up->encap_rcv); + if (skb->len > sizeof(struct udphdr) && encap_rcv != NULL) { + int ret; + + /* Verify checksum before giving to encap */ + if (udp_lib_checksum_complete(skb)) + goto csum_error; + + ret = encap_rcv(sk, skb); + if (ret <= 0) { + UDP_INC_STATS_BH(sock_net(sk), + UDP_MIB_INDATAGRAMS, + is_udplite); + return -ret; + } + } + + /* FALLTHROUGH -- it's a UDP Packet */ + } + /* * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c). */ @@ -527,64 +669,74 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) } } - if (rcu_dereference_raw(sk->sk_filter)) { + if (rcu_access_pointer(sk->sk_filter)) { if (udp_lib_checksum_complete(skb)) - goto drop; + goto csum_error; } - if ((rc = ip_queue_rcv_skb(sk, skb)) < 0) { - /* Note that an ENOMEM error is charged twice */ - if (rc == -ENOMEM) - UDP6_INC_STATS_BH(sock_net(sk), - UDP_MIB_RCVBUFERRORS, is_udplite); - goto drop_no_sk_drops_inc; + if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) { + UDP6_INC_STATS_BH(sock_net(sk), + UDP_MIB_RCVBUFERRORS, is_udplite); + goto drop; } - return 0; + skb_dst_drop(skb); + + bh_lock_sock(sk); + rc = 0; + if (!sock_owned_by_user(sk)) + rc = __udpv6_queue_rcv_skb(sk, skb); + else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) { + bh_unlock_sock(sk); + goto drop; + } + bh_unlock_sock(sk); + + return rc; + +csum_error: + UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); drop: - atomic_inc(&sk->sk_drops); -drop_no_sk_drops_inc: UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + atomic_inc(&sk->sk_drops); kfree_skb(skb); return -1; } static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk, - __be16 loc_port, struct in6_addr *loc_addr, - __be16 rmt_port, struct in6_addr *rmt_addr, + __be16 loc_port, const struct in6_addr *loc_addr, + __be16 rmt_port, const struct in6_addr *rmt_addr, int dif) { struct hlist_nulls_node *node; - struct sock *s = sk; unsigned short num = ntohs(loc_port); - sk_nulls_for_each_from(s, node) { - struct inet_sock *inet = inet_sk(s); + sk_nulls_for_each_from(sk, node) { + struct inet_sock *inet = inet_sk(sk); - if (!net_eq(sock_net(s), net)) + if (!net_eq(sock_net(sk), net)) continue; - if (udp_sk(s)->udp_port_hash == num && - s->sk_family == PF_INET6) { - struct ipv6_pinfo *np = inet6_sk(s); + if (udp_sk(sk)->udp_port_hash == num && + sk->sk_family == PF_INET6) { if (inet->inet_dport) { if (inet->inet_dport != rmt_port) continue; } - if (!ipv6_addr_any(&np->daddr) && - !ipv6_addr_equal(&np->daddr, rmt_addr)) + if (!ipv6_addr_any(&sk->sk_v6_daddr) && + !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) continue; - if (s->sk_bound_dev_if && s->sk_bound_dev_if != dif) + if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) continue; - if (!ipv6_addr_any(&np->rcv_saddr)) { - if (!ipv6_addr_equal(&np->rcv_saddr, loc_addr)) + if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr)) continue; } - if (!inet6_mc_check(s, loc_addr, rmt_addr)) + if (!inet6_mc_check(sk, loc_addr, rmt_addr)) continue; - return s; + return sk; } } return NULL; @@ -593,44 +745,45 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk, static void flush_stack(struct sock **stack, unsigned int count, struct sk_buff *skb, unsigned int final) { - unsigned int i; + struct sk_buff *skb1 = NULL; struct sock *sk; - struct sk_buff *skb1; + unsigned int i; for (i = 0; i < count; i++) { - skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC); - sk = stack[i]; - if (skb1) { - if (sk_rcvqueues_full(sk, skb1)) { - kfree_skb(skb1); - goto drop; - } - bh_lock_sock(sk); - if (!sock_owned_by_user(sk)) - udpv6_queue_rcv_skb(sk, skb1); - else if (sk_add_backlog(sk, skb1)) { - kfree_skb(skb1); - bh_unlock_sock(sk); - goto drop; - } - bh_unlock_sock(sk); - continue; + if (likely(skb1 == NULL)) + skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC); + if (!skb1) { + atomic_inc(&sk->sk_drops); + UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, + IS_UDPLITE(sk)); + UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, + IS_UDPLITE(sk)); } -drop: - atomic_inc(&sk->sk_drops); - UDP6_INC_STATS_BH(sock_net(sk), - UDP_MIB_RCVBUFERRORS, IS_UDPLITE(sk)); - UDP6_INC_STATS_BH(sock_net(sk), - UDP_MIB_INERRORS, IS_UDPLITE(sk)); + + if (skb1 && udpv6_queue_rcv_skb(sk, skb1) <= 0) + skb1 = NULL; } + if (unlikely(skb1)) + kfree_skb(skb1); +} + +static void udp6_csum_zero_error(struct sk_buff *skb) +{ + /* RFC 2460 section 8.1 says that we SHOULD log + * this error. Well, it is reasonable. + */ + LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n", + &ipv6_hdr(skb)->saddr, ntohs(udp_hdr(skb)->source), + &ipv6_hdr(skb)->daddr, ntohs(udp_hdr(skb)->dest)); } + /* * Note: called only from the BH handler context, * so we don't need to lock the hashes. */ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, - struct in6_addr *saddr, struct in6_addr *daddr, + const struct in6_addr *saddr, const struct in6_addr *daddr, struct udp_table *udptable) { struct sock *sk, *stack[256 / sizeof(struct sock *)]; @@ -644,7 +797,12 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, dif = inet6_iif(skb); sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); while (sk) { - stack[count++] = sk; + /* If zero checksum and no_check is not on for + * the socket then skip it. + */ + if (uh->check || udp_sk(sk)->no_check6_rx) + stack[count++] = sk; + sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr, uh->source, saddr, dif); if (unlikely(count == ARRAY_SIZE(stack))) { @@ -673,47 +831,13 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, return 0; } -static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, - int proto) -{ - int err; - - UDP_SKB_CB(skb)->partial_cov = 0; - UDP_SKB_CB(skb)->cscov = skb->len; - - if (proto == IPPROTO_UDPLITE) { - err = udplite_checksum_init(skb, uh); - if (err) - return err; - } - - if (uh->check == 0) { - /* RFC 2460 section 8.1 says that we SHOULD log - this error. Well, it is reasonable. - */ - LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n"); - return 1; - } - if (skb->ip_summed == CHECKSUM_COMPLETE && - !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, - skb->len, proto, skb->csum)) - skb->ip_summed = CHECKSUM_UNNECESSARY; - - if (!skb_csum_unnecessary(skb)) - skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, - skb->len, proto, 0)); - - return 0; -} - int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, int proto) { struct net *net = dev_net(skb->dev); struct sock *sk; struct udphdr *uh; - struct in6_addr *saddr, *daddr; + const struct in6_addr *saddr, *daddr; u32 ulen = 0; if (!pskb_may_pull(skb, sizeof(struct udphdr))) @@ -747,7 +871,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, } if (udp6_csum_init(skb, uh, proto)) - goto discard; + goto csum_error; /* * Multicast receive code @@ -763,39 +887,42 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, * for sock caches... i'll skip this for now. */ sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable); + if (sk != NULL) { + int ret; - if (sk == NULL) { - if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) - goto discard; + if (!uh->check && !udp_sk(sk)->no_check6_rx) { + sock_put(sk); + udp6_csum_zero_error(skb); + goto csum_error; + } - if (udp_lib_checksum_complete(skb)) - goto discard; - UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS, - proto == IPPROTO_UDPLITE); + ret = udpv6_queue_rcv_skb(sk, skb); + sock_put(sk); - icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); + /* a return value > 0 means to resubmit the input, but + * it wants the return to be -protocol, or 0 + */ + if (ret > 0) + return -ret; - kfree_skb(skb); return 0; } - /* deliver */ - - if (sk_rcvqueues_full(sk, skb)) { - sock_put(sk); - goto discard; + if (!uh->check) { + udp6_csum_zero_error(skb); + goto csum_error; } - bh_lock_sock(sk); - if (!sock_owned_by_user(sk)) - udpv6_queue_rcv_skb(sk, skb); - else if (sk_add_backlog(sk, skb)) { - atomic_inc(&sk->sk_drops); - bh_unlock_sock(sk); - sock_put(sk); + + if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard; - } - bh_unlock_sock(sk); - sock_put(sk); + + if (udp_lib_checksum_complete(skb)) + goto csum_error; + + UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); + + kfree_skb(skb); return 0; short_packet: @@ -807,7 +934,9 @@ short_packet: skb->len, daddr, ntohs(uh->dest)); - + goto discard; +csum_error: + UDP6_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); discard: UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); kfree_skb(skb); @@ -886,11 +1015,16 @@ static int udp_v6_push_pending_frames(struct sock *sk) struct udphdr *uh; struct udp_sock *up = udp_sk(sk); struct inet_sock *inet = inet_sk(sk); - struct flowi *fl = &inet->cork.fl; + struct flowi6 *fl6; int err = 0; int is_udplite = IS_UDPLITE(sk); __wsum csum = 0; + if (up->pending == AF_INET) + return udp_push_pending_frames(sk); + + fl6 = &inet->cork.fl.u.ip6; + /* Grab the skbuff where UDP header space exists. */ if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) goto out; @@ -899,23 +1033,26 @@ static int udp_v6_push_pending_frames(struct sock *sk) * Create a UDP header */ uh = udp_hdr(skb); - uh->source = fl->fl_ip_sport; - uh->dest = fl->fl_ip_dport; + uh->source = fl6->fl6_sport; + uh->dest = fl6->fl6_dport; uh->len = htons(up->len); uh->check = 0; if (is_udplite) csum = udplite_csum_outgoing(sk, skb); - else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ - udp6_hwcsum_outgoing(sk, skb, &fl->fl6_src, &fl->fl6_dst, + else if (up->no_check6_tx) { /* UDP csum disabled */ + skb->ip_summed = CHECKSUM_NONE; + goto send; + } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ + udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr, up->len); goto send; } else csum = udp_csum_outgoing(sk, skb); /* add protocol-dependent pseudo-header */ - uh->check = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, - up->len, fl->proto, csum ); + uh->check = csum_ipv6_magic(&fl6->saddr, &fl6->daddr, + up->len, fl6->flowi6_proto, csum); if (uh->check == 0) uh->check = CSUM_MANGLED_0; @@ -943,11 +1080,11 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct udp_sock *up = udp_sk(sk); struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); struct in6_addr *daddr, *final_p, final; struct ipv6_txoptions *opt = NULL; struct ip6_flowlabel *flowlabel = NULL; - struct flowi fl; + struct flowi6 fl6; struct dst_entry *dst; int addr_len = msg->msg_namelen; int ulen = len; @@ -984,7 +1121,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, } else if (!up->pending) { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; - daddr = &np->daddr; + daddr = &sk->sk_v6_daddr; } else daddr = NULL; @@ -1030,22 +1167,21 @@ do_udp_sendmsg: } ulen += sizeof(struct udphdr); - memset(&fl, 0, sizeof(fl)); + memset(&fl6, 0, sizeof(fl6)); if (sin6) { if (sin6->sin6_port == 0) return -EINVAL; - fl.fl_ip_dport = sin6->sin6_port; + fl6.fl6_dport = sin6->sin6_port; daddr = &sin6->sin6_addr; if (np->sndflow) { - fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; - if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { - flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); + fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; + if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { + flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; - daddr = &flowlabel->dst; } } @@ -1054,44 +1190,44 @@ do_udp_sendmsg: * sk->sk_dst_cache. */ if (sk->sk_state == TCP_ESTABLISHED && - ipv6_addr_equal(daddr, &np->daddr)) - daddr = &np->daddr; + ipv6_addr_equal(daddr, &sk->sk_v6_daddr)) + daddr = &sk->sk_v6_daddr; if (addr_len >= sizeof(struct sockaddr_in6) && sin6->sin6_scope_id && - ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL) - fl.oif = sin6->sin6_scope_id; + __ipv6_addr_needs_scope_id(__ipv6_addr_type(daddr))) + fl6.flowi6_oif = sin6->sin6_scope_id; } else { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; - fl.fl_ip_dport = inet->inet_dport; - daddr = &np->daddr; - fl.fl6_flowlabel = np->flow_label; + fl6.fl6_dport = inet->inet_dport; + daddr = &sk->sk_v6_daddr; + fl6.flowlabel = np->flow_label; connected = 1; } - if (!fl.oif) - fl.oif = sk->sk_bound_dev_if; + if (!fl6.flowi6_oif) + fl6.flowi6_oif = sk->sk_bound_dev_if; - if (!fl.oif) - fl.oif = np->sticky_pktinfo.ipi6_ifindex; + if (!fl6.flowi6_oif) + fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; - fl.mark = sk->sk_mark; + fl6.flowi6_mark = sk->sk_mark; if (msg->msg_controllen) { opt = &opt_space; memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(*opt); - err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, - &tclass, &dontfrag); + err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, opt, + &hlimit, &tclass, &dontfrag); if (err < 0) { fl6_sock_release(flowlabel); return err; } - if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { - flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); + if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { + flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; } @@ -1105,55 +1241,40 @@ do_udp_sendmsg: opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); - fl.proto = sk->sk_protocol; + fl6.flowi6_proto = sk->sk_protocol; if (!ipv6_addr_any(daddr)) - ipv6_addr_copy(&fl.fl6_dst, daddr); + fl6.daddr = *daddr; else - fl.fl6_dst.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ - if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr)) - ipv6_addr_copy(&fl.fl6_src, &np->saddr); - fl.fl_ip_sport = inet->inet_sport; + fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ + if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr)) + fl6.saddr = np->saddr; + fl6.fl6_sport = inet->inet_sport; - final_p = fl6_update_dst(&fl, opt, &final); + final_p = fl6_update_dst(&fl6, opt, &final); if (final_p) connected = 0; - if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) { - fl.oif = np->mcast_oif; + if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) { + fl6.flowi6_oif = np->mcast_oif; connected = 0; - } + } else if (!fl6.flowi6_oif) + fl6.flowi6_oif = np->ucast_oif; - security_sk_classify_flow(sk, &fl); + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - err = ip6_sk_dst_lookup(sk, &dst, &fl); - if (err) + dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + dst = NULL; goto out; - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - - err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); - if (err < 0) { - if (err == -EREMOTE) - err = ip6_dst_blackhole(sk, &dst, &fl); - if (err < 0) - goto out; } - if (hlimit < 0) { - if (ipv6_addr_is_multicast(&fl.fl6_dst)) - hlimit = np->mcast_hops; - else - hlimit = np->hop_limit; - if (hlimit < 0) - hlimit = ip6_dst_hoplimit(dst); - } + if (hlimit < 0) + hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); if (tclass < 0) tclass = np->tclass; - if (dontfrag < 0) - dontfrag = np->dontfrag; - if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; back_from_confirm: @@ -1172,10 +1293,12 @@ back_from_confirm: up->pending = AF_INET6; do_append_data: + if (dontfrag < 0) + dontfrag = np->dontfrag; up->len += ulen; getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen, - sizeof(struct udphdr), hlimit, tclass, opt, &fl, + sizeof(struct udphdr), hlimit, tclass, opt, &fl6, (struct rt6_info*)dst, corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag); if (err) @@ -1188,10 +1311,10 @@ do_append_data: if (dst) { if (connected) { ip6_dst_store(sk, dst, - ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ? - &np->daddr : NULL, + ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ? + &sk->sk_v6_daddr : NULL, #ifdef CONFIG_IPV6_SUBTREES - ipv6_addr_equal(&fl.fl6_src, &np->saddr) ? + ipv6_addr_equal(&fl6.saddr, &np->saddr) ? &np->saddr : #endif NULL); @@ -1232,10 +1355,18 @@ do_confirm: void udpv6_destroy_sock(struct sock *sk) { + struct udp_sock *up = udp_sk(sk); lock_sock(sk); udp_v6_flush_pending_frames(sk); release_sock(sk); + if (static_key_false(&udpv6_encap_needed) && up->encap_type) { + void (*encap_destroy)(struct sock *sk); + encap_destroy = ACCESS_ONCE(up->encap_destroy); + if (encap_destroy) + encap_destroy(sk); + } + inet6_destroy_sock(sk); } @@ -1280,158 +1411,41 @@ int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, } #endif -static int udp6_ufo_send_check(struct sk_buff *skb) -{ - struct ipv6hdr *ipv6h; - struct udphdr *uh; - - if (!pskb_may_pull(skb, sizeof(*uh))) - return -EINVAL; - - ipv6h = ipv6_hdr(skb); - uh = udp_hdr(skb); - - uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len, - IPPROTO_UDP, 0); - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct udphdr, check); - skb->ip_summed = CHECKSUM_PARTIAL; - return 0; -} - -static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, int features) -{ - struct sk_buff *segs = ERR_PTR(-EINVAL); - unsigned int mss; - unsigned int unfrag_ip6hlen, unfrag_len; - struct frag_hdr *fptr; - u8 *mac_start, *prevhdr; - u8 nexthdr; - u8 frag_hdr_sz = sizeof(struct frag_hdr); - int offset; - __wsum csum; - - mss = skb_shinfo(skb)->gso_size; - if (unlikely(skb->len <= mss)) - goto out; - - if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { - /* Packet is from an untrusted source, reset gso_segs. */ - int type = skb_shinfo(skb)->gso_type; - - if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) || - !(type & (SKB_GSO_UDP)))) - goto out; - - skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); - - segs = NULL; - goto out; - } - - /* Do software UFO. Complete and fill in the UDP checksum as HW cannot - * do checksum of UDP packets sent as multiple IP fragments. - */ - offset = skb->csum_start - skb_headroom(skb); - csum = skb_checksum(skb, offset, skb->len- offset, 0); - offset += skb->csum_offset; - *(__sum16 *)(skb->data + offset) = csum_fold(csum); - skb->ip_summed = CHECKSUM_NONE; - - /* Check if there is enough headroom to insert fragment header. */ - if ((skb_headroom(skb) < frag_hdr_sz) && - pskb_expand_head(skb, frag_hdr_sz, 0, GFP_ATOMIC)) - goto out; - - /* Find the unfragmentable header and shift it left by frag_hdr_sz - * bytes to insert fragment header. - */ - unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); - nexthdr = *prevhdr; - *prevhdr = NEXTHDR_FRAGMENT; - unfrag_len = skb_network_header(skb) - skb_mac_header(skb) + - unfrag_ip6hlen; - mac_start = skb_mac_header(skb); - memmove(mac_start-frag_hdr_sz, mac_start, unfrag_len); - - skb->mac_header -= frag_hdr_sz; - skb->network_header -= frag_hdr_sz; - - fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen); - fptr->nexthdr = nexthdr; - fptr->reserved = 0; - ipv6_select_ident(fptr); - - /* Fragment the skb. ipv6 header and the remaining fields of the - * fragment header are updated in ipv6_gso_segment() - */ - segs = skb_segment(skb, features); - -out: - return segs; -} - static const struct inet6_protocol udpv6_protocol = { .handler = udpv6_rcv, .err_handler = udpv6_err, - .gso_send_check = udp6_ufo_send_check, - .gso_segment = udp6_ufo_fragment, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; /* ------------------------------------------------------------------------ */ #ifdef CONFIG_PROC_FS - -static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket) -{ - struct inet_sock *inet = inet_sk(sp); - struct ipv6_pinfo *np = inet6_sk(sp); - struct in6_addr *dest, *src; - __u16 destp, srcp; - - dest = &np->daddr; - src = &np->rcv_saddr; - destp = ntohs(inet->inet_dport); - srcp = ntohs(inet->inet_sport); - seq_printf(seq, - "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " - "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n", - bucket, - src->s6_addr32[0], src->s6_addr32[1], - src->s6_addr32[2], src->s6_addr32[3], srcp, - dest->s6_addr32[0], dest->s6_addr32[1], - dest->s6_addr32[2], dest->s6_addr32[3], destp, - sp->sk_state, - sk_wmem_alloc_get(sp), - sk_rmem_alloc_get(sp), - 0, 0L, 0, - sock_i_uid(sp), 0, - sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp, - atomic_read(&sp->sk_drops)); -} - int udp6_seq_show(struct seq_file *seq, void *v) { - if (v == SEQ_START_TOKEN) - seq_printf(seq, - " sl " - "local_address " - "remote_address " - "st tx_queue rx_queue tr tm->when retrnsmt" - " uid timeout inode ref pointer drops\n"); - else - udp6_sock_seq_show(seq, v, ((struct udp_iter_state *)seq->private)->bucket); + if (v == SEQ_START_TOKEN) { + seq_puts(seq, IPV6_SEQ_DGRAM_HEADER); + } else { + int bucket = ((struct udp_iter_state *)seq->private)->bucket; + struct inet_sock *inet = inet_sk(v); + __u16 srcp = ntohs(inet->inet_sport); + __u16 destp = ntohs(inet->inet_dport); + ip6_dgram_sock_seq_show(seq, v, srcp, destp, bucket); + } return 0; } +static const struct file_operations udp6_afinfo_seq_fops = { + .owner = THIS_MODULE, + .open = udp_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_net +}; + static struct udp_seq_afinfo udp6_seq_afinfo = { .name = "udp6", .family = AF_INET6, .udp_table = &udp_table, - .seq_fops = { - .owner = THIS_MODULE, - }, + .seq_fops = &udp6_afinfo_seq_fops, .seq_ops = { .show = udp6_seq_show, }, @@ -1447,6 +1461,17 @@ void udp6_proc_exit(struct net *net) { } #endif /* CONFIG_PROC_FS */ +void udp_v6_clear_sk(struct sock *sk, int size) +{ + struct inet_sock *inet = inet_sk(sk); + + /* we do not want to clear pinet6 field, because of RCU lookups */ + sk_prot_clear_portaddr_nulls(sk, offsetof(struct inet_sock, pinet6)); + + size -= offsetof(struct inet_sock, pinet6) + sizeof(inet->pinet6); + memset(&inet->pinet6 + 1, 0, size); +} + /* ------------------------------------------------------------------------ */ struct proto udpv6_prot = { @@ -1461,7 +1486,7 @@ struct proto udpv6_prot = { .getsockopt = udpv6_getsockopt, .sendmsg = udpv6_sendmsg, .recvmsg = udpv6_recvmsg, - .backlog_rcv = udpv6_queue_rcv_skb, + .backlog_rcv = __udpv6_queue_rcv_skb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .rehash = udp_v6_rehash, @@ -1477,7 +1502,7 @@ struct proto udpv6_prot = { .compat_setsockopt = compat_udpv6_setsockopt, .compat_getsockopt = compat_udpv6_getsockopt, #endif - .clear_sk = sk_prot_clear_portaddr_nulls, + .clear_sk = udp_v6_clear_sk, }; static struct inet_protosw udpv6_protosw = { @@ -1485,7 +1510,6 @@ static struct inet_protosw udpv6_protosw = { .protocol = IPPROTO_UDP, .prot = &udpv6_prot, .ops = &inet6_dgram_ops, - .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_PERMANENT, }; |
