diff options
Diffstat (limited to 'net/ipv6/ip6_output.c')
| -rw-r--r-- | net/ipv6/ip6_output.c | 791 | 
1 files changed, 431 insertions, 360 deletions
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 99157b4cd56..45702b8cd14 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -56,50 +56,13 @@  #include <net/checksum.h>  #include <linux/mroute6.h> -static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); - -int __ip6_local_out(struct sk_buff *skb) -{ -	int len; - -	len = skb->len - sizeof(struct ipv6hdr); -	if (len > IPV6_MAXPLEN) -		len = 0; -	ipv6_hdr(skb)->payload_len = htons(len); - -	return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, -		       skb_dst(skb)->dev, dst_output); -} - -int ip6_local_out(struct sk_buff *skb) -{ -	int err; - -	err = __ip6_local_out(skb); -	if (likely(err == 1)) -		err = dst_output(skb); - -	return err; -} -EXPORT_SYMBOL_GPL(ip6_local_out); - -/* dev_loopback_xmit for use with netfilter. */ -static int ip6_dev_loopback_xmit(struct sk_buff *newskb) -{ -	skb_reset_mac_header(newskb); -	__skb_pull(newskb, skb_network_offset(newskb)); -	newskb->pkt_type = PACKET_LOOPBACK; -	newskb->ip_summed = CHECKSUM_UNNECESSARY; -	WARN_ON(!skb_dst(newskb)); - -	netif_rx_ni(newskb); -	return 0; -} -  static int ip6_finish_output2(struct sk_buff *skb)  {  	struct dst_entry *dst = skb_dst(skb);  	struct net_device *dev = dst->dev; +	struct neighbour *neigh; +	struct in6_addr *nexthop; +	int ret;  	skb->protocol = htons(ETH_P_IPV6);  	skb->dev = dev; @@ -120,7 +83,7 @@ static int ip6_finish_output2(struct sk_buff *skb)  			if (newskb)  				NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,  					newskb, NULL, newskb->dev, -					ip6_dev_loopback_xmit); +					dev_loopback_xmit);  			if (ipv6_hdr(skb)->hop_limit == 0) {  				IP6_INC_STATS(dev_net(dev), idev, @@ -132,37 +95,44 @@ static int ip6_finish_output2(struct sk_buff *skb)  		IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,  				skb->len); + +		if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <= +		    IPV6_ADDR_SCOPE_NODELOCAL && +		    !(dev->flags & IFF_LOOPBACK)) { +			kfree_skb(skb); +			return 0; +		}  	} -	if (dst->hh) -		return neigh_hh_output(dst->hh, skb); -	else if (dst->neighbour) -		return dst->neighbour->output(skb); +	rcu_read_lock_bh(); +	nexthop = rt6_nexthop((struct rt6_info *)dst); +	neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); +	if (unlikely(!neigh)) +		neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); +	if (!IS_ERR(neigh)) { +		ret = dst_neigh_output(dst, neigh, skb); +		rcu_read_unlock_bh(); +		return ret; +	} +	rcu_read_unlock_bh(); -	IP6_INC_STATS_BH(dev_net(dst->dev), -			 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); +	IP6_INC_STATS(dev_net(dst->dev), +		      ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);  	kfree_skb(skb);  	return -EINVAL;  } -static inline int ip6_skb_dst_mtu(struct sk_buff *skb) -{ -	struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; - -	return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ? -	       skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); -} -  static int ip6_finish_output(struct sk_buff *skb)  {  	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || -	    dst_allfrag(skb_dst(skb))) +	    dst_allfrag(skb_dst(skb)) || +	    (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))  		return ip6_fragment(skb, ip6_finish_output2);  	else  		return ip6_finish_output2(skb);  } -int ip6_output(struct sk_buff *skb) +int ip6_output(struct sock *sk, struct sk_buff *skb)  {  	struct net_device *dev = skb_dst(skb)->dev;  	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); @@ -182,18 +152,17 @@ int ip6_output(struct sk_buff *skb)   *	xmit an sk_buff (used by TCP, SCTP and DCCP)   */ -int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, -	     struct ipv6_txoptions *opt) +int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, +	     struct ipv6_txoptions *opt, int tclass)  {  	struct net *net = sock_net(sk);  	struct ipv6_pinfo *np = inet6_sk(sk); -	struct in6_addr *first_hop = &fl->fl6_dst; +	struct in6_addr *first_hop = &fl6->daddr;  	struct dst_entry *dst = skb_dst(skb);  	struct ipv6hdr *hdr; -	u8  proto = fl->proto; +	u8  proto = fl6->flowi6_proto;  	int seg_len = skb->len;  	int hlimit = -1; -	int tclass = 0;  	u32 mtu;  	if (opt) { @@ -214,7 +183,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,  				kfree_skb(skb);  				return -ENOBUFS;  			} -			kfree_skb(skb); +			consume_skb(skb);  			skb = skb2;  			skb_set_owner_w(skb, sk);  		} @@ -231,37 +200,34 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,  	/*  	 *	Fill in the IPv6 header  	 */ -	if (np) { -		tclass = np->tclass; +	if (np)  		hlimit = np->hop_limit; -	}  	if (hlimit < 0)  		hlimit = ip6_dst_hoplimit(dst); -	*(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel; +	ip6_flow_hdr(hdr, tclass, fl6->flowlabel);  	hdr->payload_len = htons(seg_len);  	hdr->nexthdr = proto;  	hdr->hop_limit = hlimit; -	ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); -	ipv6_addr_copy(&hdr->daddr, first_hop); +	hdr->saddr = fl6->saddr; +	hdr->daddr = *first_hop; +	skb->protocol = htons(ETH_P_IPV6);  	skb->priority = sk->sk_priority;  	skb->mark = sk->sk_mark;  	mtu = dst_mtu(dst); -	if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { +	if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {  		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),  			      IPSTATS_MIB_OUT, skb->len);  		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,  			       dst->dev, dst_output);  	} -	if (net_ratelimit()) -		printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");  	skb->dev = dst->dev; -	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); +	ipv6_local_error(sk, EMSGSIZE, fl6, mtu);  	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);  	kfree_skb(skb);  	return -EMSGSIZE; @@ -269,42 +235,6 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,  EXPORT_SYMBOL(ip6_xmit); -/* - *	To avoid extra problems ND packets are send through this - *	routine. It's code duplication but I really want to avoid - *	extra checks since ipv6_build_header is used by TCP (which - *	is for us performance critical) - */ - -int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev, -	       const struct in6_addr *saddr, const struct in6_addr *daddr, -	       int proto, int len) -{ -	struct ipv6_pinfo *np = inet6_sk(sk); -	struct ipv6hdr *hdr; -	int totlen; - -	skb->protocol = htons(ETH_P_IPV6); -	skb->dev = dev; - -	totlen = len + sizeof(struct ipv6hdr); - -	skb_reset_network_header(skb); -	skb_put(skb, sizeof(struct ipv6hdr)); -	hdr = ipv6_hdr(skb); - -	*(__be32*)hdr = htonl(0x60000000); - -	hdr->payload_len = htons(len); -	hdr->nexthdr = proto; -	hdr->hop_limit = np->hop_limit; - -	ipv6_addr_copy(&hdr->saddr, saddr); -	ipv6_addr_copy(&hdr->daddr, daddr); - -	return 0; -} -  static int ip6_call_ra_chain(struct sk_buff *skb, int sel)  {  	struct ip6_ra_chain *ra; @@ -338,10 +268,11 @@ static int ip6_forward_proxy_check(struct sk_buff *skb)  {  	struct ipv6hdr *hdr = ipv6_hdr(skb);  	u8 nexthdr = hdr->nexthdr; +	__be16 frag_off;  	int offset;  	if (ipv6_ext_hdr(nexthdr)) { -		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr); +		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);  		if (offset < 0)  			return 0;  	} else @@ -390,6 +321,45 @@ static inline int ip6_forward_finish(struct sk_buff *skb)  	return dst_output(skb);  } +static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) +{ +	unsigned int mtu; +	struct inet6_dev *idev; + +	if (dst_metric_locked(dst, RTAX_MTU)) { +		mtu = dst_metric_raw(dst, RTAX_MTU); +		if (mtu) +			return mtu; +	} + +	mtu = IPV6_MIN_MTU; +	rcu_read_lock(); +	idev = __in6_dev_get(dst->dev); +	if (idev) +		mtu = idev->cnf.mtu6; +	rcu_read_unlock(); + +	return mtu; +} + +static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) +{ +	if (skb->len <= mtu) +		return false; + +	/* ipv6 conntrack defrag sets max_frag_size + ignore_df */ +	if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu) +		return true; + +	if (skb->ignore_df) +		return false; + +	if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) +		return false; + +	return true; +} +  int ip6_forward(struct sk_buff *skb)  {  	struct dst_entry *dst = skb_dst(skb); @@ -401,11 +371,15 @@ int ip6_forward(struct sk_buff *skb)  	if (net->ipv6.devconf_all->forwarding == 0)  		goto error; +	if (skb->pkt_type != PACKET_HOST) +		goto drop; +  	if (skb_warn_if_lro(skb))  		goto drop;  	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { -		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); +		IP6_INC_STATS_BH(net, ip6_dst_idev(dst), +				 IPSTATS_MIB_INDISCARDS);  		goto drop;  	} @@ -424,9 +398,8 @@ int ip6_forward(struct sk_buff *skb)  	 *	cannot be fragmented, because there is no warranty  	 *	that different fragments will go along one path. --ANK  	 */ -	if (opt->ra) { -		u8 *ptr = skb_network_header(skb) + opt->ra; -		if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3])) +	if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) { +		if (ip6_call_ra_chain(skb, ntohs(opt->ra)))  			return 0;  	} @@ -437,8 +410,8 @@ int ip6_forward(struct sk_buff *skb)  		/* Force OUTPUT device used as source address */  		skb->dev = dst->dev;  		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); -		IP6_INC_STATS_BH(net, -				 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); +		IP6_INC_STATS_BH(net, ip6_dst_idev(dst), +				 IPSTATS_MIB_INHDRERRORS);  		kfree_skb(skb);  		return -ETIMEDOUT; @@ -451,14 +424,15 @@ int ip6_forward(struct sk_buff *skb)  		if (proxied > 0)  			return ip6_input(skb);  		else if (proxied < 0) { -			IP6_INC_STATS(net, ip6_dst_idev(dst), -				      IPSTATS_MIB_INDISCARDS); +			IP6_INC_STATS_BH(net, ip6_dst_idev(dst), +					 IPSTATS_MIB_INDISCARDS);  			goto drop;  		}  	}  	if (!xfrm6_route_forward(skb)) { -		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); +		IP6_INC_STATS_BH(net, ip6_dst_idev(dst), +				 IPSTATS_MIB_INDISCARDS);  		goto drop;  	}  	dst = skb_dst(skb); @@ -467,11 +441,10 @@ int ip6_forward(struct sk_buff *skb)  	   send redirects to source routed frames.  	   We don't send redirects to frames decapsulated from IPsec.  	 */ -	if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 && -	    !skb_sec_path(skb)) { +	if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {  		struct in6_addr *target = NULL; +		struct inet_peer *peer;  		struct rt6_info *rt; -		struct neighbour *n = dst->neighbour;  		/*  		 *	incoming and outgoing devices are the same @@ -479,16 +452,20 @@ int ip6_forward(struct sk_buff *skb)  		 */  		rt = (struct rt6_info *) dst; -		if ((rt->rt6i_flags & RTF_GATEWAY)) -			target = (struct in6_addr*)&n->primary_key; +		if (rt->rt6i_flags & RTF_GATEWAY) +			target = &rt->rt6i_gateway;  		else  			target = &hdr->daddr; +		peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); +  		/* Limit redirects both by destination (here)  		   and by source (inside ndisc_send_redirect)  		 */ -		if (xrlim_allow(dst, 1*HZ)) -			ndisc_send_redirect(skb, n, target); +		if (inet_peer_xrlim_allow(peer, 1*HZ)) +			ndisc_send_redirect(skb, target); +		if (peer) +			inet_putpeer(peer);  	} else {  		int addrtype = ipv6_addr_type(&hdr->saddr); @@ -503,24 +480,25 @@ int ip6_forward(struct sk_buff *skb)  		}  	} -	mtu = dst_mtu(dst); +	mtu = ip6_dst_mtu_forward(dst);  	if (mtu < IPV6_MIN_MTU)  		mtu = IPV6_MIN_MTU; -	if (skb->len > mtu && !skb_is_gso(skb)) { +	if (ip6_pkt_too_big(skb, mtu)) {  		/* Again, force OUTPUT device used as source address */  		skb->dev = dst->dev;  		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); -		IP6_INC_STATS_BH(net, -				 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS); -		IP6_INC_STATS_BH(net, -				 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); +		IP6_INC_STATS_BH(net, ip6_dst_idev(dst), +				 IPSTATS_MIB_INTOOBIGERRORS); +		IP6_INC_STATS_BH(net, ip6_dst_idev(dst), +				 IPSTATS_MIB_FRAGFAILS);  		kfree_skb(skb);  		return -EMSGSIZE;  	}  	if (skb_cow(skb, dst->dev->hard_header_len)) { -		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); +		IP6_INC_STATS_BH(net, ip6_dst_idev(dst), +				 IPSTATS_MIB_OUTDISCARDS);  		goto drop;  	} @@ -531,6 +509,7 @@ int ip6_forward(struct sk_buff *skb)  	hdr->hop_limit--;  	IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); +	IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);  	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,  		       ip6_forward_finish); @@ -555,53 +534,24 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)  	to->tc_index = from->tc_index;  #endif  	nf_copy(to, from); -#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ -    defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) -	to->nf_trace = from->nf_trace; -#endif  	skb_copy_secmark(to, from);  } -int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) +static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)  { -	u16 offset = sizeof(struct ipv6hdr); -	struct ipv6_opt_hdr *exthdr = -				(struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); -	unsigned int packet_len = skb->tail - skb->network_header; -	int found_rhdr = 0; -	*nexthdr = &ipv6_hdr(skb)->nexthdr; +	static u32 ip6_idents_hashrnd __read_mostly; +	u32 hash, id; -	while (offset + 1 <= packet_len) { - -		switch (**nexthdr) { - -		case NEXTHDR_HOP: -			break; -		case NEXTHDR_ROUTING: -			found_rhdr = 1; -			break; -		case NEXTHDR_DEST: -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) -			if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) -				break; -#endif -			if (found_rhdr) -				return offset; -			break; -		default : -			return offset; -		} +	net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd)); -		offset += ipv6_optlen(exthdr); -		*nexthdr = &exthdr->nexthdr; -		exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + -						 offset); -	} +	hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd); +	hash = __ipv6_addr_jhash(&rt->rt6i_src.addr, hash); -	return offset; +	id = ip_idents_reserve(hash, 1); +	fhdr->identification = htonl(id);  } -static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) +int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))  {  	struct sk_buff *frag;  	struct rt6_info *rt = (struct rt6_info*)skb_dst(skb); @@ -609,6 +559,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))  	struct ipv6hdr *tmp_hdr;  	struct frag_hdr *fh;  	unsigned int mtu, hlen, left, len; +	int hroom, troom;  	__be32 frag_id = 0;  	int ptr, offset = 0, err=0;  	u8 *prevhdr, nexthdr = 0; @@ -622,7 +573,12 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))  	/* We must not fragment if the socket is set to force MTU discovery  	 * or if the skb it not generated by a local socket.  	 */ -	if (!skb->local_df && skb->len > mtu) { +	if (unlikely(!skb->ignore_df && skb->len > mtu) || +		     (IP6CB(skb)->frag_max_size && +		      IP6CB(skb)->frag_max_size > mtu)) { +		if (skb->sk && dst_allfrag(skb_dst(skb))) +			sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK); +  		skb->dev = skb_dst(skb)->dev;  		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);  		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), @@ -685,7 +641,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))  		skb_reset_network_header(skb);  		memcpy(skb_network_header(skb), tmp_hdr, hlen); -		ipv6_select_ident(fh); +		ipv6_select_ident(fh, rt);  		fh->nexthdr = nexthdr;  		fh->reserved = 0;  		fh->frag_off = htons(IP6_MF); @@ -741,7 +697,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))  		if (err == 0) {  			IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),  				      IPSTATS_MIB_FRAGOKS); -			dst_release(&rt->dst); +			ip6_rt_put(rt);  			return 0;  		} @@ -753,7 +709,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))  		IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),  			      IPSTATS_MIB_FRAGFAILS); -		dst_release(&rt->dst); +		ip6_rt_put(rt);  		return err;  slow_path_clean: @@ -767,6 +723,10 @@ slow_path_clean:  	}  slow_path: +	if ((skb->ip_summed == CHECKSUM_PARTIAL) && +	    skb_checksum_help(skb)) +		goto fail; +  	left = skb->len - hlen;		/* Space per frame */  	ptr = hlen;			/* Where to start from */ @@ -775,6 +735,8 @@ slow_path:  	 */  	*prevhdr = NEXTHDR_FRAGMENT; +	hroom = LL_RESERVED_SPACE(rt->dst.dev); +	troom = rt->dst.dev->needed_tailroom;  	/*  	 *	Keep copying data until we run out. @@ -784,7 +746,7 @@ slow_path:  		/* IF: it doesn't fit, use 'mtu' - the data space left */  		if (len > mtu)  			len = mtu; -		/* IF: we are not sending upto and including the packet end +		/* IF: we are not sending up to and including the packet end  		   then align the next start on an eight byte boundary */  		if (len < left)	{  			len &= ~7; @@ -793,7 +755,8 @@ slow_path:  		 *	Allocate buffer.  		 */ -		if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) { +		if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) + +				      hroom + troom, GFP_ATOMIC)) == NULL) {  			NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");  			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),  				      IPSTATS_MIB_FRAGFAILS); @@ -806,7 +769,7 @@ slow_path:  		 */  		ip6_copy_metadata(frag, skb); -		skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev)); +		skb_reserve(frag, hroom);  		skb_put(frag, len + hlen + sizeof(struct frag_hdr));  		skb_reset_network_header(frag);  		fh = (struct frag_hdr *)(skb_network_header(frag) + hlen); @@ -831,7 +794,7 @@ slow_path:  		fh->nexthdr = nexthdr;  		fh->reserved = 0;  		if (!frag_id) { -			ipv6_select_ident(fh); +			ipv6_select_ident(fh, rt);  			frag_id = fh->identification;  		} else  			fh->identification = frag_id; @@ -864,7 +827,7 @@ slow_path:  	}  	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),  		      IPSTATS_MIB_FRAGOKS); -	kfree_skb(skb); +	consume_skb(skb);  	return err;  fail: @@ -874,9 +837,9 @@ fail:  	return err;  } -static inline int ip6_rt_check(struct rt6key *rt_key, -			       struct in6_addr *fl_addr, -			       struct in6_addr *addr_cache) +static inline int ip6_rt_check(const struct rt6key *rt_key, +			       const struct in6_addr *fl_addr, +			       const struct in6_addr *addr_cache)  {  	return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&  		(addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)); @@ -884,14 +847,20 @@ static inline int ip6_rt_check(struct rt6key *rt_key,  static struct dst_entry *ip6_sk_dst_check(struct sock *sk,  					  struct dst_entry *dst, -					  struct flowi *fl) +					  const struct flowi6 *fl6)  {  	struct ipv6_pinfo *np = inet6_sk(sk); -	struct rt6_info *rt = (struct rt6_info *)dst; +	struct rt6_info *rt;  	if (!dst)  		goto out; +	if (dst->ops->family != AF_INET6) { +		dst_release(dst); +		return NULL; +	} + +	rt = (struct rt6_info *)dst;  	/* Yes, checking route validity in not connected  	 * case is not very simple. Take into account,  	 * that we do not support routing by source, TOS, @@ -909,11 +878,11 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk,  	 *    sockets.  	 * 2. oif also should be the same.  	 */ -	if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) || +	if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||  #ifdef CONFIG_IPV6_SUBTREES -	    ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) || +	    ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||  #endif -	    (fl->oif && fl->oif != dst->dev->ifindex)) { +	    (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {  		dst_release(dst);  		dst = NULL;  	} @@ -923,22 +892,26 @@ out:  }  static int ip6_dst_lookup_tail(struct sock *sk, -			       struct dst_entry **dst, struct flowi *fl) +			       struct dst_entry **dst, struct flowi6 *fl6)  { -	int err;  	struct net *net = sock_net(sk); +#ifdef CONFIG_IPV6_OPTIMISTIC_DAD +	struct neighbour *n; +	struct rt6_info *rt; +#endif +	int err;  	if (*dst == NULL) -		*dst = ip6_route_output(net, sk, fl); +		*dst = ip6_route_output(net, sk, fl6);  	if ((err = (*dst)->error))  		goto out_err_release; -	if (ipv6_addr_any(&fl->fl6_src)) { -		err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev, -					 &fl->fl6_dst, -					 sk ? inet6_sk(sk)->srcprefs : 0, -					 &fl->fl6_src); +	if (ipv6_addr_any(&fl6->saddr)) { +		struct rt6_info *rt = (struct rt6_info *) *dst; +		err = ip6_route_get_saddr(net, rt, &fl6->daddr, +					  sk ? inet6_sk(sk)->srcprefs : 0, +					  &fl6->saddr);  		if (err)  			goto out_err_release;  	} @@ -952,12 +925,18 @@ static int ip6_dst_lookup_tail(struct sock *sk,  	 * dst entry and replace it instead with the  	 * dst entry of the nexthop router  	 */ -	if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) { +	rt = (struct rt6_info *) *dst; +	rcu_read_lock_bh(); +	n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt)); +	err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; +	rcu_read_unlock_bh(); + +	if (err) {  		struct inet6_ifaddr *ifp; -		struct flowi fl_gw; +		struct flowi6 fl_gw6;  		int redirect; -		ifp = ipv6_get_ifaddr(net, &fl->fl6_src, +		ifp = ipv6_get_ifaddr(net, &fl6->saddr,  				      (*dst)->dev, 1);  		redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); @@ -970,9 +949,9 @@ static int ip6_dst_lookup_tail(struct sock *sk,  			 * default router instead  			 */  			dst_release(*dst); -			memcpy(&fl_gw, fl, sizeof(struct flowi)); -			memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr)); -			*dst = ip6_route_output(net, sk, &fl_gw); +			memcpy(&fl_gw6, fl6, sizeof(struct flowi6)); +			memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr)); +			*dst = ip6_route_output(net, sk, &fl_gw6);  			if ((err = (*dst)->error))  				goto out_err_release;  		} @@ -983,7 +962,7 @@ static int ip6_dst_lookup_tail(struct sock *sk,  out_err_release:  	if (err == -ENETUNREACH) -		IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES); +		IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);  	dst_release(*dst);  	*dst = NULL;  	return err; @@ -993,52 +972,88 @@ out_err_release:   *	ip6_dst_lookup - perform route lookup on flow   *	@sk: socket which provides route info   *	@dst: pointer to dst_entry * for result - *	@fl: flow to lookup + *	@fl6: flow to lookup   *   *	This function performs a route lookup on the given flow.   *   *	It returns zero on success, or a standard errno code on error.   */ -int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) +int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)  {  	*dst = NULL; -	return ip6_dst_lookup_tail(sk, dst, fl); +	return ip6_dst_lookup_tail(sk, dst, fl6);  }  EXPORT_SYMBOL_GPL(ip6_dst_lookup);  /** - *	ip6_sk_dst_lookup - perform socket cached route lookup on flow + *	ip6_dst_lookup_flow - perform route lookup on flow with ipsec + *	@sk: socket which provides route info + *	@fl6: flow to lookup + *	@final_dst: final destination address for ipsec lookup + * + *	This function performs a route lookup on the given flow. + * + *	It returns a valid dst pointer on success, or a pointer encoded + *	error code. + */ +struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, +				      const struct in6_addr *final_dst) +{ +	struct dst_entry *dst = NULL; +	int err; + +	err = ip6_dst_lookup_tail(sk, &dst, fl6); +	if (err) +		return ERR_PTR(err); +	if (final_dst) +		fl6->daddr = *final_dst; + +	return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); +} +EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); + +/** + *	ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow   *	@sk: socket which provides the dst cache and route info - *	@dst: pointer to dst_entry * for result - *	@fl: flow to lookup + *	@fl6: flow to lookup + *	@final_dst: final destination address for ipsec lookup   *   *	This function performs a route lookup on the given flow with the   *	possibility of using the cached route in the socket if it is valid.   *	It will take the socket dst lock when operating on the dst cache.   *	As a result, this function can only be used in process context.   * - *	It returns zero on success, or a standard errno code on error. + *	It returns a valid dst pointer on success, or a pointer encoded + *	error code.   */ -int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) +struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, +					 const struct in6_addr *final_dst)  { -	*dst = NULL; -	if (sk) { -		*dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); -		*dst = ip6_sk_dst_check(sk, *dst, fl); -	} +	struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); +	int err; + +	dst = ip6_sk_dst_check(sk, dst, fl6); + +	err = ip6_dst_lookup_tail(sk, &dst, fl6); +	if (err) +		return ERR_PTR(err); +	if (final_dst) +		fl6->daddr = *final_dst; -	return ip6_dst_lookup_tail(sk, dst, fl); +	return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);  } -EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup); +EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);  static inline int ip6_ufo_append_data(struct sock *sk,  			int getfrag(void *from, char *to, int offset, int len,  			int odd, struct sk_buff *skb),  			void *from, int length, int hh_len, int fragheaderlen, -			int transhdrlen, int mtu,unsigned int flags) +			int transhdrlen, int mtu,unsigned int flags, +			struct rt6_info *rt)  {  	struct sk_buff *skb; +	struct frag_hdr fhdr;  	int err;  	/* There is support for UDP large send offload by network @@ -1050,7 +1065,7 @@ static inline int ip6_ufo_append_data(struct sock *sk,  			hh_len + fragheaderlen + transhdrlen + 20,  			(flags & MSG_DONTWAIT), &err);  		if (skb == NULL) -			return -ENOMEM; +			return err;  		/* reserve space for Hardware header */  		skb_reserve(skb, hh_len); @@ -1064,34 +1079,27 @@ static inline int ip6_ufo_append_data(struct sock *sk,  		/* initialize protocol header pointer */  		skb->transport_header = skb->network_header + fragheaderlen; -		skb->ip_summed = CHECKSUM_PARTIAL; +		skb->protocol = htons(ETH_P_IPV6);  		skb->csum = 0; -		sk->sk_sndmsg_off = 0; -	} -	err = skb_append_datato_frags(sk,skb, getfrag, from, -				      (length - transhdrlen)); -	if (!err) { -		struct frag_hdr fhdr; - -		/* Specify the length of each IPv6 datagram fragment. -		 * It has to be a multiple of 8. -		 */ -		skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - -					     sizeof(struct frag_hdr)) & ~7; -		skb_shinfo(skb)->gso_type = SKB_GSO_UDP; -		ipv6_select_ident(&fhdr); -		skb_shinfo(skb)->ip6_frag_id = fhdr.identification;  		__skb_queue_tail(&sk->sk_write_queue, skb); - -		return 0; +	} else if (skb_is_gso(skb)) { +		goto append;  	} -	/* There is not enough support do UPD LSO, -	 * so follow normal path -	 */ -	kfree_skb(skb); -	return err; +	skb->ip_summed = CHECKSUM_PARTIAL; +	/* Specify the length of each IPv6 datagram fragment. +	 * It has to be a multiple of 8. +	 */ +	skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - +				     sizeof(struct frag_hdr)) & ~7; +	skb_shinfo(skb)->gso_type = SKB_GSO_UDP; +	ipv6_select_ident(&fhdr, rt); +	skb_shinfo(skb)->ip6_frag_id = fhdr.identification; + +append: +	return skb_append_datato_frags(sk, skb, getfrag, from, +				       (length - transhdrlen));  }  static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, @@ -1106,26 +1114,52 @@ static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,  	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;  } +static void ip6_append_data_mtu(unsigned int *mtu, +				int *maxfraglen, +				unsigned int fragheaderlen, +				struct sk_buff *skb, +				struct rt6_info *rt, +				unsigned int orig_mtu) +{ +	if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { +		if (skb == NULL) { +			/* first fragment, reserve header_len */ +			*mtu = orig_mtu - rt->dst.header_len; + +		} else { +			/* +			 * this fragment is not first, the headers +			 * space is regarded as data space. +			 */ +			*mtu = orig_mtu; +		} +		*maxfraglen = ((*mtu - fragheaderlen) & ~7) +			      + fragheaderlen - sizeof(struct frag_hdr); +	} +} +  int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,  	int offset, int len, int odd, struct sk_buff *skb),  	void *from, int length, int transhdrlen, -	int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl, +	int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,  	struct rt6_info *rt, unsigned int flags, int dontfrag)  {  	struct inet_sock *inet = inet_sk(sk);  	struct ipv6_pinfo *np = inet6_sk(sk); -	struct sk_buff *skb; -	unsigned int maxfraglen, fragheaderlen; +	struct inet_cork *cork; +	struct sk_buff *skb, *skb_prev = NULL; +	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;  	int exthdrlen; +	int dst_exthdrlen;  	int hh_len; -	int mtu;  	int copy;  	int err;  	int offset = 0; -	int csummode = CHECKSUM_NONE; +	__u8 tx_flags = 0;  	if (flags&MSG_PROBE)  		return 0; +	cork = &inet->cork.base;  	if (skb_queue_empty(&sk->sk_write_queue)) {  		/*  		 * setup for corking @@ -1134,7 +1168,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,  			if (WARN_ON(np->cork.opt))  				return -EINVAL; -			np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation); +			np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation);  			if (unlikely(np->cork.opt == NULL))  				return -ENOBUFS; @@ -1165,48 +1199,82 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,  			/* need source address above miyazawa*/  		}  		dst_hold(&rt->dst); -		inet->cork.dst = &rt->dst; -		inet->cork.fl = *fl; +		cork->dst = &rt->dst; +		inet->cork.fl.u.ip6 = *fl6;  		np->cork.hop_limit = hlimit;  		np->cork.tclass = tclass; -		mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? -		      rt->dst.dev->mtu : dst_mtu(rt->dst.path); +		if (rt->dst.flags & DST_XFRM_TUNNEL) +			mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? +			      rt->dst.dev->mtu : dst_mtu(&rt->dst); +		else +			mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? +			      rt->dst.dev->mtu : dst_mtu(rt->dst.path);  		if (np->frag_size < mtu) {  			if (np->frag_size)  				mtu = np->frag_size;  		} -		inet->cork.fragsize = mtu; +		cork->fragsize = mtu;  		if (dst_allfrag(rt->dst.path)) -			inet->cork.flags |= IPCORK_ALLFRAG; -		inet->cork.length = 0; -		sk->sk_sndmsg_page = NULL; -		sk->sk_sndmsg_off = 0; -		exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) - -			    rt->rt6i_nfheader_len; +			cork->flags |= IPCORK_ALLFRAG; +		cork->length = 0; +		exthdrlen = (opt ? opt->opt_flen : 0);  		length += exthdrlen;  		transhdrlen += exthdrlen; +		dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;  	} else { -		rt = (struct rt6_info *)inet->cork.dst; -		fl = &inet->cork.fl; +		rt = (struct rt6_info *)cork->dst; +		fl6 = &inet->cork.fl.u.ip6;  		opt = np->cork.opt;  		transhdrlen = 0;  		exthdrlen = 0; -		mtu = inet->cork.fragsize; +		dst_exthdrlen = 0; +		mtu = cork->fragsize;  	} +	orig_mtu = mtu;  	hh_len = LL_RESERVED_SPACE(rt->dst.dev);  	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +  			(opt ? opt->opt_nflen : 0); -	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); +	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - +		     sizeof(struct frag_hdr);  	if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { -		if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { -			ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen); +		unsigned int maxnonfragsize, headersize; + +		headersize = sizeof(struct ipv6hdr) + +			     (opt ? opt->opt_flen + opt->opt_nflen : 0) + +			     (dst_allfrag(&rt->dst) ? +			      sizeof(struct frag_hdr) : 0) + +			     rt->rt6i_nfheader_len; + +		if (ip6_sk_ignore_df(sk)) +			maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN; +		else +			maxnonfragsize = mtu; + +		/* dontfrag active */ +		if ((cork->length + length > mtu - headersize) && dontfrag && +		    (sk->sk_protocol == IPPROTO_UDP || +		     sk->sk_protocol == IPPROTO_RAW)) { +			ipv6_local_rxpmtu(sk, fl6, mtu - headersize + +						   sizeof(struct ipv6hdr)); +			goto emsgsize; +		} + +		if (cork->length + length > maxnonfragsize - headersize) { +emsgsize: +			ipv6_local_error(sk, EMSGSIZE, fl6, +					 mtu - headersize + +					 sizeof(struct ipv6hdr));  			return -EMSGSIZE;  		}  	} +	/* For UDP, check if TX timestamp is enabled */ +	if (sk->sk_type == SOCK_DGRAM) +		sock_tx_timestamp(sk, &tx_flags); +  	/*  	 * Let's try using as much space as possible.  	 * Use MTU if total length of the message fits into the MTU. @@ -1223,32 +1291,26 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,  	 * --yoshfuji  	 */ -	inet->cork.length += length; -	if (length > mtu) { -		int proto = sk->sk_protocol; -		if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){ -			ipv6_local_rxpmtu(sk, fl, mtu-exthdrlen); -			return -EMSGSIZE; -		} - -		if (proto == IPPROTO_UDP && -		    (rt->dst.dev->features & NETIF_F_UFO)) { - -			err = ip6_ufo_append_data(sk, getfrag, from, length, -						  hh_len, fragheaderlen, -						  transhdrlen, mtu, flags); -			if (err) -				goto error; -			return 0; -		} +	skb = skb_peek_tail(&sk->sk_write_queue); +	cork->length += length; +	if (((length > mtu) || +	     (skb && skb_is_gso(skb))) && +	    (sk->sk_protocol == IPPROTO_UDP) && +	    (rt->dst.dev->features & NETIF_F_UFO)) { +		err = ip6_ufo_append_data(sk, getfrag, from, length, +					  hh_len, fragheaderlen, +					  transhdrlen, mtu, flags, rt); +		if (err) +			goto error; +		return 0;  	} -	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) +	if (!skb)  		goto alloc_new_skb;  	while (length > 0) {  		/* Check if the remaining data fits into current packet. */ -		copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; +		copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;  		if (copy < length)  			copy = maxfraglen - skb->len; @@ -1258,38 +1320,46 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,  			unsigned int fraglen;  			unsigned int fraggap;  			unsigned int alloclen; -			struct sk_buff *skb_prev;  alloc_new_skb: -			skb_prev = skb; -  			/* There's no room in the current skb */ -			if (skb_prev) -				fraggap = skb_prev->len - maxfraglen; +			if (skb) +				fraggap = skb->len - maxfraglen;  			else  				fraggap = 0; +			/* update mtu and maxfraglen if necessary */ +			if (skb == NULL || skb_prev == NULL) +				ip6_append_data_mtu(&mtu, &maxfraglen, +						    fragheaderlen, skb, rt, +						    orig_mtu); + +			skb_prev = skb;  			/*  			 * If remaining data exceeds the mtu,  			 * we know we need more fragment(s).  			 */  			datalen = length + fraggap; -			if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) -				datalen = maxfraglen - fragheaderlen; -			fraglen = datalen + fragheaderlen; +			if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) +				datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;  			if ((flags & MSG_MORE) &&  			    !(rt->dst.dev->features&NETIF_F_SG))  				alloclen = mtu;  			else  				alloclen = datalen + fragheaderlen; -			/* -			 * The last fragment gets additional space at tail. -			 * Note: we overallocate on fragments with MSG_MODE -			 * because we have no idea if we're the last one. -			 */ -			if (datalen == length + fraggap) -				alloclen += rt->dst.trailer_len; +			alloclen += dst_exthdrlen; + +			if (datalen != length + fraggap) { +				/* +				 * this is not the last fragment, the trailer +				 * space is regarded as data space. +				 */ +				datalen += rt->dst.trailer_len; +			} + +			alloclen += rt->dst.trailer_len; +			fraglen = datalen + fragheaderlen;  			/*  			 * We just reserve space for fragment header. @@ -1311,16 +1381,27 @@ alloc_new_skb:  							   sk->sk_allocation);  				if (unlikely(skb == NULL))  					err = -ENOBUFS; +				else { +					/* Only the initial fragment +					 * is time stamped. +					 */ +					tx_flags = 0; +				}  			}  			if (skb == NULL)  				goto error;  			/*  			 *	Fill in the control structures  			 */ -			skb->ip_summed = csummode; +			skb->protocol = htons(ETH_P_IPV6); +			skb->ip_summed = CHECKSUM_NONE;  			skb->csum = 0; -			/* reserve for fragmentation */ -			skb_reserve(skb, hh_len+sizeof(struct frag_hdr)); +			/* reserve for fragmentation and ipsec header */ +			skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + +				    dst_exthdrlen); + +			if (sk->sk_type == SOCK_DGRAM) +				skb_shinfo(skb)->tx_flags = tx_flags;  			/*  			 *	Find where to start putting bytes @@ -1340,6 +1421,7 @@ alloc_new_skb:  				pskb_trim_unique(skb_prev, maxfraglen);  			}  			copy = datalen - transhdrlen - fraggap; +  			if (copy < 0) {  				err = -EINVAL;  				kfree_skb(skb); @@ -1354,7 +1436,7 @@ alloc_new_skb:  			length -= datalen - fraggap;  			transhdrlen = 0;  			exthdrlen = 0; -			csummode = CHECKSUM_NONE; +			dst_exthdrlen = 0;  			/*  			 * Put the packet on the pending queue @@ -1378,46 +1460,31 @@ alloc_new_skb:  			}  		} else {  			int i = skb_shinfo(skb)->nr_frags; -			skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; -			struct page *page = sk->sk_sndmsg_page; -			int off = sk->sk_sndmsg_off; -			unsigned int left; - -			if (page && (left = PAGE_SIZE - off) > 0) { -				if (copy >= left) -					copy = left; -				if (page != frag->page) { -					if (i == MAX_SKB_FRAGS) { -						err = -EMSGSIZE; -						goto error; -					} -					get_page(page); -					skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); -					frag = &skb_shinfo(skb)->frags[i]; -				} -			} else if(i < MAX_SKB_FRAGS) { -				if (copy > PAGE_SIZE) -					copy = PAGE_SIZE; -				page = alloc_pages(sk->sk_allocation, 0); -				if (page == NULL) { -					err = -ENOMEM; -					goto error; -				} -				sk->sk_sndmsg_page = page; -				sk->sk_sndmsg_off = 0; +			struct page_frag *pfrag = sk_page_frag(sk); -				skb_fill_page_desc(skb, i, page, 0, 0); -				frag = &skb_shinfo(skb)->frags[i]; -			} else { -				err = -EMSGSIZE; -				goto error; -			} -			if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) { -				err = -EFAULT; +			err = -ENOMEM; +			if (!sk_page_frag_refill(sk, pfrag))  				goto error; + +			if (!skb_can_coalesce(skb, i, pfrag->page, +					      pfrag->offset)) { +				err = -EMSGSIZE; +				if (i == MAX_SKB_FRAGS) +					goto error; + +				__skb_fill_page_desc(skb, i, pfrag->page, +						     pfrag->offset, 0); +				skb_shinfo(skb)->nr_frags = ++i; +				get_page(pfrag->page);  			} -			sk->sk_sndmsg_off += copy; -			frag->size += copy; +			copy = min_t(int, copy, pfrag->size - pfrag->offset); +			if (getfrag(from, +				    page_address(pfrag->page) + pfrag->offset, +				    offset, copy, skb->len, skb) < 0) +				goto error_efault; + +			pfrag->offset += copy; +			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);  			skb->len += copy;  			skb->data_len += copy;  			skb->truesize += copy; @@ -1426,12 +1493,17 @@ alloc_new_skb:  		offset += copy;  		length -= copy;  	} +  	return 0; + +error_efault: +	err = -EFAULT;  error: -	inet->cork.length -= length; +	cork->length -= length;  	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);  	return err;  } +EXPORT_SYMBOL_GPL(ip6_append_data);  static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)  { @@ -1444,10 +1516,10 @@ static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)  		np->cork.opt = NULL;  	} -	if (inet->cork.dst) { -		dst_release(inet->cork.dst); -		inet->cork.dst = NULL; -		inet->cork.flags &= ~IPCORK_ALLFRAG; +	if (inet->cork.base.dst) { +		dst_release(inet->cork.base.dst); +		inet->cork.base.dst = NULL; +		inet->cork.base.flags &= ~IPCORK_ALLFRAG;  	}  	memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));  } @@ -1462,9 +1534,9 @@ int ip6_push_pending_frames(struct sock *sk)  	struct net *net = sock_net(sk);  	struct ipv6hdr *hdr;  	struct ipv6_txoptions *opt = np->cork.opt; -	struct rt6_info *rt = (struct rt6_info *)inet->cork.dst; -	struct flowi *fl = &inet->cork.fl; -	unsigned char proto = fl->proto; +	struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst; +	struct flowi6 *fl6 = &inet->cork.fl.u.ip6; +	unsigned char proto = fl6->flowi6_proto;  	int err = 0;  	if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) @@ -1486,10 +1558,9 @@ int ip6_push_pending_frames(struct sock *sk)  	}  	/* Allow local fragmentation. */ -	if (np->pmtudisc < IPV6_PMTUDISC_DO) -		skb->local_df = 1; +	skb->ignore_df = ip6_sk_ignore_df(sk); -	ipv6_addr_copy(final_dst, &fl->fl6_dst); +	*final_dst = fl6->daddr;  	__skb_pull(skb, skb_network_header_len(skb));  	if (opt && opt->opt_flen)  		ipv6_push_frag_opts(skb, opt, &proto); @@ -1500,13 +1571,11 @@ int ip6_push_pending_frames(struct sock *sk)  	skb_reset_network_header(skb);  	hdr = ipv6_hdr(skb); -	*(__be32*)hdr = fl->fl6_flowlabel | -		     htonl(0x60000000 | ((int)np->cork.tclass << 20)); - +	ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel);  	hdr->hop_limit = np->cork.hop_limit;  	hdr->nexthdr = proto; -	ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); -	ipv6_addr_copy(&hdr->daddr, final_dst); +	hdr->saddr = fl6->saddr; +	hdr->daddr = *final_dst;  	skb->priority = sk->sk_priority;  	skb->mark = sk->sk_mark; @@ -1516,8 +1585,8 @@ int ip6_push_pending_frames(struct sock *sk)  	if (proto == IPPROTO_ICMPV6) {  		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); -		ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type); -		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); +		ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type); +		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);  	}  	err = ip6_local_out(skb); @@ -1535,6 +1604,7 @@ error:  	IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);  	goto out;  } +EXPORT_SYMBOL_GPL(ip6_push_pending_frames);  void ip6_flush_pending_frames(struct sock *sk)  { @@ -1549,3 +1619,4 @@ void ip6_flush_pending_frames(struct sock *sk)  	ip6_cork_release(inet_sk(sk), inet6_sk(sk));  } +EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);  | 
