diff options
Diffstat (limited to 'net/ipv4/udp.c')
| -rw-r--r-- | net/ipv4/udp.c | 443 | 
1 files changed, 340 insertions, 103 deletions
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 74d2c95db57..7d5a8661df7 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -103,6 +103,7 @@  #include <linux/seq_file.h>  #include <net/net_namespace.h>  #include <net/icmp.h> +#include <net/inet_hashtables.h>  #include <net/route.h>  #include <net/checksum.h>  #include <net/xfrm.h> @@ -219,10 +220,10 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,  		unsigned short first, last;  		DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN); -		inet_get_local_port_range(&low, &high); +		inet_get_local_port_range(net, &low, &high);  		remaining = (high - low) + 1; -		rand = net_random(); +		rand = prandom_u32();  		first = (((u64)rand * remaining) >> 32) + low;  		/*  		 * force rand to be an odd multiple of UDP_HTABLE_SIZE @@ -245,7 +246,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,  			do {  				if (low <= snum && snum <= high &&  				    !test_bit(snum >> udptable->log, bitmap) && -				    !inet_is_reserved_local_port(snum)) +				    !inet_is_local_reserved_port(net, snum))  					goto found;  				snum += rand;  			} while (snum != first); @@ -406,6 +407,18 @@ static inline int compute_score2(struct sock *sk, struct net *net,  	return score;  } +static unsigned int udp_ehashfn(struct net *net, const __be32 laddr, +				 const __u16 lport, const __be32 faddr, +				 const __be16 fport) +{ +	static u32 udp_ehash_secret __read_mostly; + +	net_get_random_once(&udp_ehash_secret, sizeof(udp_ehash_secret)); + +	return __inet_ehashfn(laddr, lport, faddr, fport, +			      udp_ehash_secret + net_hash_mix(net)); +} +  /* called with read_rcu_lock() */  static struct sock *udp4_lib_lookup2(struct net *net, @@ -429,8 +442,8 @@ begin:  			badness = score;  			reuseport = sk->sk_reuseport;  			if (reuseport) { -				hash = inet_ehashfn(net, daddr, hnum, -						    saddr, sport); +				hash = udp_ehashfn(net, daddr, hnum, +						   saddr, sport);  				matches = 1;  			}  		} else if (score == badness && reuseport) { @@ -510,8 +523,8 @@ begin:  			badness = score;  			reuseport = sk->sk_reuseport;  			if (reuseport) { -				hash = inet_ehashfn(net, daddr, hnum, -						    saddr, sport); +				hash = udp_ehashfn(net, daddr, hnum, +						   saddr, sport);  				matches = 1;  			}  		} else if (score == badness && reuseport) { @@ -547,15 +560,11 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,  						 __be16 sport, __be16 dport,  						 struct udp_table *udptable)  { -	struct sock *sk;  	const struct iphdr *iph = ip_hdr(skb); -	if (unlikely(sk = skb_steal_sock(skb))) -		return sk; -	else -		return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport, -					 iph->daddr, dport, inet_iif(skb), -					 udptable); +	return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport, +				 iph->daddr, dport, inet_iif(skb), +				 udptable);  }  struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, @@ -565,6 +574,26 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,  }  EXPORT_SYMBOL_GPL(udp4_lib_lookup); +static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk, +				       __be16 loc_port, __be32 loc_addr, +				       __be16 rmt_port, __be32 rmt_addr, +				       int dif, unsigned short hnum) +{ +	struct inet_sock *inet = inet_sk(sk); + +	if (!net_eq(sock_net(sk), net) || +	    udp_sk(sk)->udp_port_hash != hnum || +	    (inet->inet_daddr && inet->inet_daddr != rmt_addr) || +	    (inet->inet_dport != rmt_port && inet->inet_dport) || +	    (inet->inet_rcv_saddr && inet->inet_rcv_saddr != loc_addr) || +	    ipv6_only_sock(sk) || +	    (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) +		return false; +	if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, dif)) +		return false; +	return true; +} +  static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk,  					     __be16 loc_port, __be32 loc_addr,  					     __be16 rmt_port, __be32 rmt_addr, @@ -575,20 +604,11 @@ static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk,  	unsigned short hnum = ntohs(loc_port);  	sk_nulls_for_each_from(s, node) { -		struct inet_sock *inet = inet_sk(s); - -		if (!net_eq(sock_net(s), net) || -		    udp_sk(s)->udp_port_hash != hnum || -		    (inet->inet_daddr && inet->inet_daddr != rmt_addr) || -		    (inet->inet_dport != rmt_port && inet->inet_dport) || -		    (inet->inet_rcv_saddr && -		     inet->inet_rcv_saddr != loc_addr) || -		    ipv6_only_sock(s) || -		    (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)) -			continue; -		if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif)) -			continue; -		goto found; +		if (__udp_is_mcast_sock(net, s, +					loc_port, loc_addr, +					rmt_port, rmt_addr, +					dif, hnum)) +			goto found;  	}  	s = NULL;  found: @@ -658,7 +678,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)  		break;  	case ICMP_REDIRECT:  		ipv4_sk_redirect(skb, sk); -		break; +		goto out;  	}  	/* @@ -707,13 +727,12 @@ EXPORT_SYMBOL(udp_flush_pending_frames);  void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst)  {  	struct udphdr *uh = udp_hdr(skb); -	struct sk_buff *frags = skb_shinfo(skb)->frag_list;  	int offset = skb_transport_offset(skb);  	int len = skb->len - offset;  	int hlen = len;  	__wsum csum = 0; -	if (!frags) { +	if (!skb_has_frag_list(skb)) {  		/*  		 * Only one fragment on the socket.  		 */ @@ -722,15 +741,17 @@ void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst)  		uh->check = ~csum_tcpudp_magic(src, dst, len,  					       IPPROTO_UDP, 0);  	} else { +		struct sk_buff *frags; +  		/*  		 * HW-checksum won't work as there are two or more  		 * fragments on the socket so that all csums of sk_buffs  		 * should be together  		 */ -		do { +		skb_walk_frags(skb, frags) {  			csum = csum_add(csum, frags->csum);  			hlen -= frags->len; -		} while ((frags = frags->next)); +		}  		csum = skb_checksum(skb, offset, hlen, csum);  		skb->ip_summed = CHECKSUM_NONE; @@ -742,6 +763,43 @@ void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst)  }  EXPORT_SYMBOL_GPL(udp4_hwcsum); +/* Function to set UDP checksum for an IPv4 UDP packet. This is intended + * for the simple case like when setting the checksum for a UDP tunnel. + */ +void udp_set_csum(bool nocheck, struct sk_buff *skb, +		  __be32 saddr, __be32 daddr, int len) +{ +	struct udphdr *uh = udp_hdr(skb); + +	if (nocheck) +		uh->check = 0; +	else if (skb_is_gso(skb)) +		uh->check = ~udp_v4_check(len, saddr, daddr, 0); +	else if (skb_dst(skb) && skb_dst(skb)->dev && +		 (skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) { + +		BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); + +		skb->ip_summed = CHECKSUM_PARTIAL; +		skb->csum_start = skb_transport_header(skb) - skb->head; +		skb->csum_offset = offsetof(struct udphdr, check); +		uh->check = ~udp_v4_check(len, saddr, daddr, 0); +	} else { +		__wsum csum; + +		BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); + +		uh->check = 0; +		csum = skb_checksum(skb, 0, len, 0); +		uh->check = udp_v4_check(len, saddr, daddr, csum); +		if (uh->check == 0) +			uh->check = CSUM_MANGLED_0; + +		skb->ip_summed = CHECKSUM_UNNECESSARY; +	} +} +EXPORT_SYMBOL(udp_set_csum); +  static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)  {  	struct sock *sk = skb->sk; @@ -765,7 +823,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)  	if (is_udplite)  				 /*     UDP-Lite      */  		csum = udplite_csum(skb); -	else if (sk->sk_no_check == UDP_CSUM_NOXMIT) {   /* UDP csum disabled */ +	else if (sk->sk_no_check_tx) {   /* UDP csum disabled */  		skb->ip_summed = CHECKSUM_NONE;  		goto send; @@ -855,6 +913,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,  	ipc.opt = NULL;  	ipc.tx_flags = 0; +	ipc.ttl = 0; +	ipc.tos = -1;  	getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; @@ -880,7 +940,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,  	 *	Get and verify the address.  	 */  	if (msg->msg_name) { -		struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name; +		DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name);  		if (msg->msg_namelen < sizeof(*usin))  			return -EINVAL;  		if (usin->sin_family != AF_INET) { @@ -909,7 +969,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,  	sock_tx_timestamp(sk, &ipc.tx_flags);  	if (msg->msg_controllen) { -		err = ip_cmsg_send(sock_net(sk), msg, &ipc); +		err = ip_cmsg_send(sock_net(sk), msg, &ipc, +				   sk->sk_family == AF_INET6);  		if (err)  			return err;  		if (ipc.opt) @@ -938,7 +999,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,  		faddr = ipc.opt->opt.faddr;  		connected = 0;  	} -	tos = RT_TOS(inet->tos); +	tos = get_rttos(&ipc, inet);  	if (sock_flag(sk, SOCK_LOCALROUTE) ||  	    (msg->msg_flags & MSG_DONTROUTE) ||  	    (ipc.opt && ipc.opt->opt.is_strictroute)) { @@ -964,7 +1025,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,  		fl4 = &fl4_stack;  		flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,  				   RT_SCOPE_UNIVERSE, sk->sk_protocol, -				   inet_sk_flowi_flags(sk)|FLOWI_FLAG_CAN_SLEEP, +				   inet_sk_flowi_flags(sk),  				   faddr, saddr, dport, inet->inet_sport);  		security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); @@ -973,7 +1034,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,  			err = PTR_ERR(rt);  			rt = NULL;  			if (err == -ENETUNREACH) -				IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); +				IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);  			goto out;  		} @@ -1072,6 +1133,9 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset,  	struct udp_sock *up = udp_sk(sk);  	int ret; +	if (flags & MSG_SENDPAGE_NOTLAST) +		flags |= MSG_MORE; +  	if (!up->pending) {  		struct msghdr msg = {	.msg_flags = flags|MSG_MORE }; @@ -1201,7 +1265,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,  		size_t len, int noblock, int flags, int *addr_len)  {  	struct inet_sock *inet = inet_sk(sk); -	struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; +	DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);  	struct sk_buff *skb;  	unsigned int ulen, copied;  	int peeked, off = 0; @@ -1209,14 +1273,8 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,  	int is_udplite = IS_UDPLITE(sk);  	bool slow; -	/* -	 *	Check any passed addresses -	 */ -	if (addr_len) -		*addr_len = sizeof(*sin); -  	if (flags & MSG_ERRQUEUE) -		return ip_recv_error(sk, msg, len); +		return ip_recv_error(sk, msg, len, addr_len);  try_again:  	skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), @@ -1276,6 +1334,7 @@ try_again:  		sin->sin_port = udp_hdr(skb)->source;  		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;  		memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); +		*addr_len = sizeof(*sin);  	}  	if (inet->cmsg_flags)  		ip_cmsg_recv(msg, skb); @@ -1403,8 +1462,10 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)  {  	int rc; -	if (inet_sk(sk)->inet_daddr) +	if (inet_sk(sk)->inet_daddr) {  		sock_rps_save_rxhash(sk, skb); +		sk_mark_napi_id(sk, skb); +	}  	rc = sock_queue_rcv_skb(sk, skb);  	if (rc < 0) { @@ -1472,6 +1533,10 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)  		if (skb->len > sizeof(struct udphdr) && encap_rcv != NULL) {  			int ret; +			/* Verify checksum before giving to encap */ +			if (udp_lib_checksum_complete(skb)) +				goto csum_error; +  			ret = encap_rcv(sk, skb);  			if (ret <= 0) {  				UDP_INC_STATS_BH(sock_net(sk), @@ -1523,12 +1588,15 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)  		goto csum_error; -	if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) +	if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) { +		UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, +				 is_udplite);  		goto drop; +	}  	rc = 0; -	ipv4_pktinfo_prepare(skb); +	ipv4_pktinfo_prepare(sk, skb);  	bh_lock_sock(sk);  	if (!sock_owned_by_user(sk))  		rc = __udp_queue_rcv_skb(sk, skb); @@ -1577,6 +1645,18 @@ static void flush_stack(struct sock **stack, unsigned int count,  		kfree_skb(skb1);  } +/* For TCP sockets, sk_rx_dst is protected by socket lock + * For UDP, we use xchg() to guard against concurrent changes. + */ +static void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) +{ +	struct dst_entry *old; + +	dst_hold(dst); +	old = xchg(&sk->sk_rx_dst, dst); +	dst_release(old); +} +  /*   *	Multicasts and broadcasts go to each listener.   * @@ -1637,7 +1717,6 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,  static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,  				 int proto)  { -	const struct iphdr *iph;  	int err;  	UDP_SKB_CB(skb)->partial_cov = 0; @@ -1649,22 +1728,8 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,  			return err;  	} -	iph = ip_hdr(skb); -	if (uh->check == 0) { -		skb->ip_summed = CHECKSUM_UNNECESSARY; -	} else if (skb->ip_summed == CHECKSUM_COMPLETE) { -		if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, -				      proto, skb->csum)) -			skb->ip_summed = CHECKSUM_UNNECESSARY; -	} -	if (!skb_csum_unnecessary(skb)) -		skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, -					       skb->len, proto, 0); -	/* Probably, we should checksum udp header (it should be in cache -	 * in any case) and data in tiny packets (< rx copybreak). -	 */ - -	return 0; +	return skb_checksum_init_zero_check(skb, proto, uh->check, +					    inet_compute_pseudo);  }  /* @@ -1705,16 +1770,33 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,  	if (udp4_csum_init(skb, uh, proto))  		goto csum_error; -	if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) -		return __udp4_lib_mcast_deliver(net, skb, uh, -				saddr, daddr, udptable); +	sk = skb_steal_sock(skb); +	if (sk) { +		struct dst_entry *dst = skb_dst(skb); +		int ret; + +		if (unlikely(sk->sk_rx_dst != dst)) +			udp_sk_rx_dst_set(sk, dst); -	sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable); +		ret = udp_queue_rcv_skb(sk, skb); +		sock_put(sk); +		/* a return value > 0 means to resubmit the input, but +		 * it wants the return to be -protocol, or 0 +		 */ +		if (ret > 0) +			return -ret; +		return 0; +	} else { +		if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) +			return __udp4_lib_mcast_deliver(net, skb, uh, +					saddr, daddr, udptable); + +		sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable); +	}  	if (sk != NULL) {  		int ret; -		sk_mark_napi_id(sk, skb);  		ret = udp_queue_rcv_skb(sk, skb);  		sock_put(sk); @@ -1768,6 +1850,142 @@ drop:  	return 0;  } +/* We can only early demux multicast if there is a single matching socket. + * If more than one socket found returns NULL + */ +static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net, +						  __be16 loc_port, __be32 loc_addr, +						  __be16 rmt_port, __be32 rmt_addr, +						  int dif) +{ +	struct sock *sk, *result; +	struct hlist_nulls_node *node; +	unsigned short hnum = ntohs(loc_port); +	unsigned int count, slot = udp_hashfn(net, hnum, udp_table.mask); +	struct udp_hslot *hslot = &udp_table.hash[slot]; + +	/* Do not bother scanning a too big list */ +	if (hslot->count > 10) +		return NULL; + +	rcu_read_lock(); +begin: +	count = 0; +	result = NULL; +	sk_nulls_for_each_rcu(sk, node, &hslot->head) { +		if (__udp_is_mcast_sock(net, sk, +					loc_port, loc_addr, +					rmt_port, rmt_addr, +					dif, hnum)) { +			result = sk; +			++count; +		} +	} +	/* +	 * if the nulls value we got at the end of this lookup is +	 * not the expected one, we must restart lookup. +	 * We probably met an item that was moved to another chain. +	 */ +	if (get_nulls_value(node) != slot) +		goto begin; + +	if (result) { +		if (count != 1 || +		    unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) +			result = NULL; +		else if (unlikely(!__udp_is_mcast_sock(net, result, +						       loc_port, loc_addr, +						       rmt_port, rmt_addr, +						       dif, hnum))) { +			sock_put(result); +			result = NULL; +		} +	} +	rcu_read_unlock(); +	return result; +} + +/* For unicast we should only early demux connected sockets or we can + * break forwarding setups.  The chains here can be long so only check + * if the first socket is an exact match and if not move on. + */ +static struct sock *__udp4_lib_demux_lookup(struct net *net, +					    __be16 loc_port, __be32 loc_addr, +					    __be16 rmt_port, __be32 rmt_addr, +					    int dif) +{ +	struct sock *sk, *result; +	struct hlist_nulls_node *node; +	unsigned short hnum = ntohs(loc_port); +	unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum); +	unsigned int slot2 = hash2 & udp_table.mask; +	struct udp_hslot *hslot2 = &udp_table.hash2[slot2]; +	INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr); +	const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum); + +	rcu_read_lock(); +	result = NULL; +	udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) { +		if (INET_MATCH(sk, net, acookie, +			       rmt_addr, loc_addr, ports, dif)) +			result = sk; +		/* Only check first socket in chain */ +		break; +	} + +	if (result) { +		if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) +			result = NULL; +		else if (unlikely(!INET_MATCH(sk, net, acookie, +					      rmt_addr, loc_addr, +					      ports, dif))) { +			sock_put(result); +			result = NULL; +		} +	} +	rcu_read_unlock(); +	return result; +} + +void udp_v4_early_demux(struct sk_buff *skb) +{ +	struct net *net = dev_net(skb->dev); +	const struct iphdr *iph; +	const struct udphdr *uh; +	struct sock *sk; +	struct dst_entry *dst; +	int dif = skb->dev->ifindex; + +	/* validate the packet */ +	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr))) +		return; + +	iph = ip_hdr(skb); +	uh = udp_hdr(skb); + +	if (skb->pkt_type == PACKET_BROADCAST || +	    skb->pkt_type == PACKET_MULTICAST) +		sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, +						   uh->source, iph->saddr, dif); +	else if (skb->pkt_type == PACKET_HOST) +		sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr, +					     uh->source, iph->saddr, dif); +	else +		return; + +	if (!sk) +		return; + +	skb->sk = sk; +	skb->destructor = sock_edemux; +	dst = sk->sk_rx_dst; + +	if (dst) +		dst = dst_check(dst, 0); +	if (dst) +		skb_dst_set_noref(skb, dst); +} +  int udp_rcv(struct sk_buff *skb)  {  	return __udp4_lib_rcv(skb, &udp_table, IPPROTO_UDP); @@ -1795,7 +2013,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,  		       int (*push_pending_frames)(struct sock *))  {  	struct udp_sock *up = udp_sk(sk); -	int val; +	int val, valbool;  	int err = 0;  	int is_udplite = IS_UDPLITE(sk); @@ -1805,6 +2023,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,  	if (get_user(val, (int __user *)optval))  		return -EFAULT; +	valbool = val ? 1 : 0; +  	switch (optname) {  	case UDP_CORK:  		if (val != 0) { @@ -1834,6 +2054,14 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,  		}  		break; +	case UDP_NO_CHECK6_TX: +		up->no_check6_tx = valbool; +		break; + +	case UDP_NO_CHECK6_RX: +		up->no_check6_rx = valbool; +		break; +  	/*  	 * 	UDP-Lite's partial checksum coverage (RFC 3828).  	 */ @@ -1916,6 +2144,14 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,  		val = up->encap_type;  		break; +	case UDP_NO_CHECK6_TX: +		val = up->no_check6_tx; +		break; + +	case UDP_NO_CHECK6_RX: +		val = up->no_check6_rx; +		break; +  	/* The following two cannot be changed on UDP sockets, the return is  	 * always 0 (which corresponds to the full checksum coverage of UDP). */  	case UDPLITE_SEND_CSCOV: @@ -2150,7 +2386,7 @@ EXPORT_SYMBOL(udp_proc_unregister);  /* ------------------------------------------------------------------------ */  static void udp4_format_sock(struct sock *sp, struct seq_file *f, -		int bucket, int *len) +		int bucket)  {  	struct inet_sock *inet = inet_sk(sp);  	__be32 dest = inet->inet_daddr; @@ -2159,7 +2395,7 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f,  	__u16 srcp	  = ntohs(inet->inet_sport);  	seq_printf(f, "%5d: %08X:%04X %08X:%04X" -		" %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d%n", +		" %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d",  		bucket, src, srcp, dest, destp, sp->sk_state,  		sk_wmem_alloc_get(sp),  		sk_rmem_alloc_get(sp), @@ -2167,23 +2403,22 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f,  		from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)),  		0, sock_i_ino(sp),  		atomic_read(&sp->sk_refcnt), sp, -		atomic_read(&sp->sk_drops), len); +		atomic_read(&sp->sk_drops));  }  int udp4_seq_show(struct seq_file *seq, void *v)  { +	seq_setwidth(seq, 127);  	if (v == SEQ_START_TOKEN) -		seq_printf(seq, "%-127s\n", -			   "  sl  local_address rem_address   st tx_queue " +		seq_puts(seq, "  sl  local_address rem_address   st tx_queue "  			   "rx_queue tr tm->when retrnsmt   uid  timeout "  			   "inode ref pointer drops");  	else {  		struct udp_iter_state *state = seq->private; -		int len; -		udp4_format_sock(v, seq, state->bucket, &len); -		seq_printf(seq, "%*s\n", 127 - len, ""); +		udp4_format_sock(v, seq, state->bucket);  	} +	seq_pad(seq, '\n');  	return 0;  } @@ -2296,11 +2531,16 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,  				       netdev_features_t features)  {  	struct sk_buff *segs = ERR_PTR(-EINVAL); +	u16 mac_offset = skb->mac_header;  	int mac_len = skb->mac_len;  	int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);  	__be16 protocol = skb->protocol;  	netdev_features_t enc_features; -	int outer_hlen; +	int udp_offset, outer_hlen; +	unsigned int oldlen; +	bool need_csum; + +	oldlen = (u16)~skb->len;  	if (unlikely(!pskb_may_pull(skb, tnl_hlen)))  		goto out; @@ -2312,17 +2552,25 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,  	skb->mac_len = skb_inner_network_offset(skb);  	skb->protocol = htons(ETH_P_TEB); +	need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM); +	if (need_csum) +		skb->encap_hdr_csum = 1; +  	/* segment inner packet. */  	enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);  	segs = skb_mac_gso_segment(skb, enc_features); -	if (!segs || IS_ERR(segs)) +	if (!segs || IS_ERR(segs)) { +		skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset, +				     mac_len);  		goto out; +	}  	outer_hlen = skb_tnl_header_len(skb); +	udp_offset = outer_hlen - tnl_hlen;  	skb = segs;  	do {  		struct udphdr *uh; -		int udp_offset = outer_hlen - tnl_hlen; +		int len;  		skb_reset_inner_headers(skb);  		skb->encapsulation = 1; @@ -2333,31 +2581,20 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,  		skb_reset_mac_header(skb);  		skb_set_network_header(skb, mac_len);  		skb_set_transport_header(skb, udp_offset); +		len = skb->len - udp_offset;  		uh = udp_hdr(skb); -		uh->len = htons(skb->len - udp_offset); - -		/* csum segment if tunnel sets skb with csum. */ -		if (protocol == htons(ETH_P_IP) && unlikely(uh->check)) { -			struct iphdr *iph = ip_hdr(skb); +		uh->len = htons(len); -			uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, -						       skb->len - udp_offset, -						       IPPROTO_UDP, 0); -			uh->check = csum_fold(skb_checksum(skb, udp_offset, -							   skb->len - udp_offset, 0)); -			if (uh->check == 0) -				uh->check = CSUM_MANGLED_0; +		if (need_csum) { +			__be32 delta = htonl(oldlen + len); -		} else if (protocol == htons(ETH_P_IPV6)) { -			struct ipv6hdr *ipv6h = ipv6_hdr(skb); -			u32 len = skb->len - udp_offset; +			uh->check = ~csum_fold((__force __wsum) +					       ((__force u32)uh->check + +						(__force u32)delta)); +			uh->check = gso_make_checksum(skb, ~uh->check); -			uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, -						     len, IPPROTO_UDP, 0); -			uh->check = csum_fold(skb_checksum(skb, udp_offset, len, 0));  			if (uh->check == 0)  				uh->check = CSUM_MANGLED_0; -			skb->ip_summed = CHECKSUM_NONE;  		}  		skb->protocol = protocol;  | 
