diff options
Diffstat (limited to 'net/ipv6/ip6_output.c')
| -rw-r--r-- | net/ipv6/ip6_output.c | 211 | 
1 files changed, 138 insertions, 73 deletions
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index a54c45ce4a4..45702b8cd14 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -105,7 +105,7 @@ static int ip6_finish_output2(struct sk_buff *skb)  	}  	rcu_read_lock_bh(); -	nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); +	nexthop = rt6_nexthop((struct rt6_info *)dst);  	neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);  	if (unlikely(!neigh))  		neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); @@ -116,8 +116,8 @@ static int ip6_finish_output2(struct sk_buff *skb)  	}  	rcu_read_unlock_bh(); -	IP6_INC_STATS_BH(dev_net(dst->dev), -			 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); +	IP6_INC_STATS(dev_net(dst->dev), +		      ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);  	kfree_skb(skb);  	return -EINVAL;  } @@ -125,13 +125,14 @@ static int ip6_finish_output2(struct sk_buff *skb)  static int ip6_finish_output(struct sk_buff *skb)  {  	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || -	    dst_allfrag(skb_dst(skb))) +	    dst_allfrag(skb_dst(skb)) || +	    (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))  		return ip6_fragment(skb, ip6_finish_output2);  	else  		return ip6_finish_output2(skb);  } -int ip6_output(struct sk_buff *skb) +int ip6_output(struct sock *sk, struct sk_buff *skb)  {  	struct net_device *dev = skb_dst(skb)->dev;  	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); @@ -218,7 +219,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,  	skb->mark = sk->sk_mark;  	mtu = dst_mtu(dst); -	if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { +	if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {  		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),  			      IPSTATS_MIB_OUT, skb->len);  		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, @@ -320,6 +321,45 @@ static inline int ip6_forward_finish(struct sk_buff *skb)  	return dst_output(skb);  } +static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) +{ +	unsigned int mtu; +	struct inet6_dev *idev; + +	if (dst_metric_locked(dst, RTAX_MTU)) { +		mtu = dst_metric_raw(dst, RTAX_MTU); +		if (mtu) +			return mtu; +	} + +	mtu = IPV6_MIN_MTU; +	rcu_read_lock(); +	idev = __in6_dev_get(dst->dev); +	if (idev) +		mtu = idev->cnf.mtu6; +	rcu_read_unlock(); + +	return mtu; +} + +static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) +{ +	if (skb->len <= mtu) +		return false; + +	/* ipv6 conntrack defrag sets max_frag_size + ignore_df */ +	if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu) +		return true; + +	if (skb->ignore_df) +		return false; + +	if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) +		return false; + +	return true; +} +  int ip6_forward(struct sk_buff *skb)  {  	struct dst_entry *dst = skb_dst(skb); @@ -331,17 +371,18 @@ int ip6_forward(struct sk_buff *skb)  	if (net->ipv6.devconf_all->forwarding == 0)  		goto error; +	if (skb->pkt_type != PACKET_HOST) +		goto drop; +  	if (skb_warn_if_lro(skb))  		goto drop;  	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { -		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); +		IP6_INC_STATS_BH(net, ip6_dst_idev(dst), +				 IPSTATS_MIB_INDISCARDS);  		goto drop;  	} -	if (skb->pkt_type != PACKET_HOST) -		goto drop; -  	skb_forward_csum(skb);  	/* @@ -369,8 +410,8 @@ int ip6_forward(struct sk_buff *skb)  		/* Force OUTPUT device used as source address */  		skb->dev = dst->dev;  		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); -		IP6_INC_STATS_BH(net, -				 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); +		IP6_INC_STATS_BH(net, ip6_dst_idev(dst), +				 IPSTATS_MIB_INHDRERRORS);  		kfree_skb(skb);  		return -ETIMEDOUT; @@ -383,14 +424,15 @@ int ip6_forward(struct sk_buff *skb)  		if (proxied > 0)  			return ip6_input(skb);  		else if (proxied < 0) { -			IP6_INC_STATS(net, ip6_dst_idev(dst), -				      IPSTATS_MIB_INDISCARDS); +			IP6_INC_STATS_BH(net, ip6_dst_idev(dst), +					 IPSTATS_MIB_INDISCARDS);  			goto drop;  		}  	}  	if (!xfrm6_route_forward(skb)) { -		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); +		IP6_INC_STATS_BH(net, ip6_dst_idev(dst), +				 IPSTATS_MIB_INDISCARDS);  		goto drop;  	}  	dst = skb_dst(skb); @@ -438,25 +480,25 @@ int ip6_forward(struct sk_buff *skb)  		}  	} -	mtu = dst_mtu(dst); +	mtu = ip6_dst_mtu_forward(dst);  	if (mtu < IPV6_MIN_MTU)  		mtu = IPV6_MIN_MTU; -	if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) || -	    (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) { +	if (ip6_pkt_too_big(skb, mtu)) {  		/* Again, force OUTPUT device used as source address */  		skb->dev = dst->dev;  		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); -		IP6_INC_STATS_BH(net, -				 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS); -		IP6_INC_STATS_BH(net, -				 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); +		IP6_INC_STATS_BH(net, ip6_dst_idev(dst), +				 IPSTATS_MIB_INTOOBIGERRORS); +		IP6_INC_STATS_BH(net, ip6_dst_idev(dst), +				 IPSTATS_MIB_FRAGFAILS);  		kfree_skb(skb);  		return -EMSGSIZE;  	}  	if (skb_cow(skb, dst->dev->hard_header_len)) { -		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); +		IP6_INC_STATS_BH(net, ip6_dst_idev(dst), +				 IPSTATS_MIB_OUTDISCARDS);  		goto drop;  	} @@ -492,12 +534,23 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)  	to->tc_index = from->tc_index;  #endif  	nf_copy(to, from); -#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) -	to->nf_trace = from->nf_trace; -#endif  	skb_copy_secmark(to, from);  } +static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) +{ +	static u32 ip6_idents_hashrnd __read_mostly; +	u32 hash, id; + +	net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd)); + +	hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd); +	hash = __ipv6_addr_jhash(&rt->rt6i_src.addr, hash); + +	id = ip_idents_reserve(hash, 1); +	fhdr->identification = htonl(id); +} +  int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))  {  	struct sk_buff *frag; @@ -520,7 +573,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))  	/* We must not fragment if the socket is set to force MTU discovery  	 * or if the skb it not generated by a local socket.  	 */ -	if (unlikely(!skb->local_df && skb->len > mtu) || +	if (unlikely(!skb->ignore_df && skb->len > mtu) ||  		     (IP6CB(skb)->frag_max_size &&  		      IP6CB(skb)->frag_max_size > mtu)) {  		if (skb->sk && dst_allfrag(skb_dst(skb))) @@ -874,7 +927,7 @@ static int ip6_dst_lookup_tail(struct sock *sk,  	 */  	rt = (struct rt6_info *) *dst;  	rcu_read_lock_bh(); -	n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr)); +	n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt));  	err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;  	rcu_read_unlock_bh(); @@ -909,7 +962,7 @@ static int ip6_dst_lookup_tail(struct sock *sk,  out_err_release:  	if (err == -ENETUNREACH) -		IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES); +		IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);  	dst_release(*dst);  	*dst = NULL;  	return err; @@ -937,7 +990,6 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup);   *	@sk: socket which provides route info   *	@fl6: flow to lookup   *	@final_dst: final destination address for ipsec lookup - *	@can_sleep: we are in a sleepable context   *   *	This function performs a route lookup on the given flow.   * @@ -945,8 +997,7 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup);   *	error code.   */  struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, -				      const struct in6_addr *final_dst, -				      bool can_sleep) +				      const struct in6_addr *final_dst)  {  	struct dst_entry *dst = NULL;  	int err; @@ -956,8 +1007,6 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,  		return ERR_PTR(err);  	if (final_dst)  		fl6->daddr = *final_dst; -	if (can_sleep) -		fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;  	return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);  } @@ -968,7 +1017,6 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);   *	@sk: socket which provides the dst cache and route info   *	@fl6: flow to lookup   *	@final_dst: final destination address for ipsec lookup - *	@can_sleep: we are in a sleepable context   *   *	This function performs a route lookup on the given flow with the   *	possibility of using the cached route in the socket if it is valid. @@ -979,8 +1027,7 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);   *	error code.   */  struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, -					 const struct in6_addr *final_dst, -					 bool can_sleep) +					 const struct in6_addr *final_dst)  {  	struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);  	int err; @@ -992,8 +1039,6 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,  		return ERR_PTR(err);  	if (final_dst)  		fl6->daddr = *final_dst; -	if (can_sleep) -		fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;  	return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);  } @@ -1008,6 +1053,7 @@ static inline int ip6_ufo_append_data(struct sock *sk,  {  	struct sk_buff *skb; +	struct frag_hdr fhdr;  	int err;  	/* There is support for UDP large send offload by network @@ -1015,8 +1061,6 @@ static inline int ip6_ufo_append_data(struct sock *sk,  	 * udp datagram  	 */  	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { -		struct frag_hdr fhdr; -  		skb = sock_alloc_send_skb(sk,  			hh_len + fragheaderlen + transhdrlen + 20,  			(flags & MSG_DONTWAIT), &err); @@ -1036,20 +1080,24 @@ static inline int ip6_ufo_append_data(struct sock *sk,  		skb->transport_header = skb->network_header + fragheaderlen;  		skb->protocol = htons(ETH_P_IPV6); -		skb->ip_summed = CHECKSUM_PARTIAL;  		skb->csum = 0; -		/* Specify the length of each IPv6 datagram fragment. -		 * It has to be a multiple of 8. -		 */ -		skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - -					     sizeof(struct frag_hdr)) & ~7; -		skb_shinfo(skb)->gso_type = SKB_GSO_UDP; -		ipv6_select_ident(&fhdr, rt); -		skb_shinfo(skb)->ip6_frag_id = fhdr.identification;  		__skb_queue_tail(&sk->sk_write_queue, skb); +	} else if (skb_is_gso(skb)) { +		goto append;  	} +	skb->ip_summed = CHECKSUM_PARTIAL; +	/* Specify the length of each IPv6 datagram fragment. +	 * It has to be a multiple of 8. +	 */ +	skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - +				     sizeof(struct frag_hdr)) & ~7; +	skb_shinfo(skb)->gso_type = SKB_GSO_UDP; +	ipv6_select_ident(&fhdr, rt); +	skb_shinfo(skb)->ip6_frag_id = fhdr.identification; + +append:  	return skb_append_datato_frags(sk, skb, getfrag, from,  				       (length - transhdrlen));  } @@ -1071,21 +1119,19 @@ static void ip6_append_data_mtu(unsigned int *mtu,  				unsigned int fragheaderlen,  				struct sk_buff *skb,  				struct rt6_info *rt, -				bool pmtuprobe) +				unsigned int orig_mtu)  {  	if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {  		if (skb == NULL) {  			/* first fragment, reserve header_len */ -			*mtu = *mtu - rt->dst.header_len; +			*mtu = orig_mtu - rt->dst.header_len;  		} else {  			/*  			 * this fragment is not first, the headers  			 * space is regarded as data space.  			 */ -			*mtu = min(*mtu, pmtuprobe ? -				   rt->dst.dev->mtu : -				   dst_mtu(rt->dst.path)); +			*mtu = orig_mtu;  		}  		*maxfraglen = ((*mtu - fragheaderlen) & ~7)  			      + fragheaderlen - sizeof(struct frag_hdr); @@ -1102,7 +1148,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,  	struct ipv6_pinfo *np = inet6_sk(sk);  	struct inet_cork *cork;  	struct sk_buff *skb, *skb_prev = NULL; -	unsigned int maxfraglen, fragheaderlen, mtu; +	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;  	int exthdrlen;  	int dst_exthdrlen;  	int hh_len; @@ -1158,10 +1204,10 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,  		np->cork.hop_limit = hlimit;  		np->cork.tclass = tclass;  		if (rt->dst.flags & DST_XFRM_TUNNEL) -			mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? +			mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?  			      rt->dst.dev->mtu : dst_mtu(&rt->dst);  		else -			mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? +			mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?  			      rt->dst.dev->mtu : dst_mtu(rt->dst.path);  		if (np->frag_size < mtu) {  			if (np->frag_size) @@ -1184,16 +1230,43 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,  		dst_exthdrlen = 0;  		mtu = cork->fragsize;  	} +	orig_mtu = mtu;  	hh_len = LL_RESERVED_SPACE(rt->dst.dev);  	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +  			(opt ? opt->opt_nflen : 0); -	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); +	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - +		     sizeof(struct frag_hdr);  	if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { -		if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { -			ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen); +		unsigned int maxnonfragsize, headersize; + +		headersize = sizeof(struct ipv6hdr) + +			     (opt ? opt->opt_flen + opt->opt_nflen : 0) + +			     (dst_allfrag(&rt->dst) ? +			      sizeof(struct frag_hdr) : 0) + +			     rt->rt6i_nfheader_len; + +		if (ip6_sk_ignore_df(sk)) +			maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN; +		else +			maxnonfragsize = mtu; + +		/* dontfrag active */ +		if ((cork->length + length > mtu - headersize) && dontfrag && +		    (sk->sk_protocol == IPPROTO_UDP || +		     sk->sk_protocol == IPPROTO_RAW)) { +			ipv6_local_rxpmtu(sk, fl6, mtu - headersize + +						   sizeof(struct ipv6hdr)); +			goto emsgsize; +		} + +		if (cork->length + length > maxnonfragsize - headersize) { +emsgsize: +			ipv6_local_error(sk, EMSGSIZE, fl6, +					 mtu - headersize + +					 sizeof(struct ipv6hdr));  			return -EMSGSIZE;  		}  	} @@ -1218,12 +1291,6 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,  	 * --yoshfuji  	 */ -	if ((length > mtu) && dontfrag && (sk->sk_protocol == IPPROTO_UDP || -					   sk->sk_protocol == IPPROTO_RAW)) { -		ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); -		return -EMSGSIZE; -	} -  	skb = skb_peek_tail(&sk->sk_write_queue);  	cork->length += length;  	if (((length > mtu) || @@ -1263,8 +1330,7 @@ alloc_new_skb:  			if (skb == NULL || skb_prev == NULL)  				ip6_append_data_mtu(&mtu, &maxfraglen,  						    fragheaderlen, skb, rt, -						    np->pmtudisc == -						    IPV6_PMTUDISC_PROBE); +						    orig_mtu);  			skb_prev = skb; @@ -1492,8 +1558,7 @@ int ip6_push_pending_frames(struct sock *sk)  	}  	/* Allow local fragmentation. */ -	if (np->pmtudisc < IPV6_PMTUDISC_DO) -		skb->local_df = 1; +	skb->ignore_df = ip6_sk_ignore_df(sk);  	*final_dst = fl6->daddr;  	__skb_pull(skb, skb_network_header_len(skb)); @@ -1520,8 +1585,8 @@ int ip6_push_pending_frames(struct sock *sk)  	if (proto == IPPROTO_ICMPV6) {  		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); -		ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type); -		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); +		ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type); +		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);  	}  	err = ip6_local_out(skb);  | 
