diff options
Diffstat (limited to 'net/core/skbuff.c')
| -rw-r--r-- | net/core/skbuff.c | 749 | 
1 files changed, 591 insertions, 158 deletions
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d81cff119f7..c1a33033cbe 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -47,6 +47,8 @@  #include <linux/in.h>  #include <linux/inet.h>  #include <linux/slab.h> +#include <linux/tcp.h> +#include <linux/udp.h>  #include <linux/netdevice.h>  #ifdef CONFIG_NET_CLS_ACT  #include <net/pkt_sched.h> @@ -65,6 +67,7 @@  #include <net/dst.h>  #include <net/sock.h>  #include <net/checksum.h> +#include <net/ip6_checksum.h>  #include <net/xfrm.h>  #include <asm/uaccess.h> @@ -74,36 +77,6 @@  struct kmem_cache *skbuff_head_cache __read_mostly;  static struct kmem_cache *skbuff_fclone_cache __read_mostly; -static void sock_pipe_buf_release(struct pipe_inode_info *pipe, -				  struct pipe_buffer *buf) -{ -	put_page(buf->page); -} - -static void sock_pipe_buf_get(struct pipe_inode_info *pipe, -				struct pipe_buffer *buf) -{ -	get_page(buf->page); -} - -static int sock_pipe_buf_steal(struct pipe_inode_info *pipe, -			       struct pipe_buffer *buf) -{ -	return 1; -} - - -/* Pipe buffer operations for a socket. */ -static const struct pipe_buf_operations sock_pipe_buf_ops = { -	.can_merge = 0, -	.map = generic_pipe_buf_map, -	.unmap = generic_pipe_buf_unmap, -	.confirm = generic_pipe_buf_confirm, -	.release = sock_pipe_buf_release, -	.steal = sock_pipe_buf_steal, -	.get = sock_pipe_buf_get, -}; -  /**   *	skb_panic - private function for out-of-line support   *	@skb:	buffer @@ -476,6 +449,18 @@ void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,  }  EXPORT_SYMBOL(skb_add_rx_frag); +void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size, +			  unsigned int truesize) +{ +	skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + +	skb_frag_size_add(frag, size); +	skb->len += size; +	skb->data_len += size; +	skb->truesize += truesize; +} +EXPORT_SYMBOL(skb_coalesce_rx_frag); +  static void skb_drop_list(struct sk_buff **listp)  {  	kfree_skb_list(*listp); @@ -580,9 +565,6 @@ static void skb_release_head_state(struct sk_buff *skb)  #if IS_ENABLED(CONFIG_NF_CONNTRACK)  	nf_conntrack_put(skb->nfct);  #endif -#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED -	nf_conntrack_put_reasm(skb->nfct_reasm); -#endif  #ifdef CONFIG_BRIDGE_NETFILTER  	nf_bridge_put(skb->nf_bridge);  #endif @@ -703,17 +685,19 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)  	new->inner_network_header = old->inner_network_header;  	new->inner_mac_header = old->inner_mac_header;  	skb_dst_copy(new, old); -	new->rxhash		= old->rxhash; +	skb_copy_hash(new, old);  	new->ooo_okay		= old->ooo_okay; -	new->l4_rxhash		= old->l4_rxhash;  	new->no_fcs		= old->no_fcs;  	new->encapsulation	= old->encapsulation; +	new->encap_hdr_csum	= old->encap_hdr_csum; +	new->csum_valid		= old->csum_valid; +	new->csum_complete_sw	= old->csum_complete_sw;  #ifdef CONFIG_XFRM  	new->sp			= secpath_get(old->sp);  #endif  	memcpy(new->cb, old->cb, sizeof(old->cb));  	new->csum		= old->csum; -	new->local_df		= old->local_df; +	new->ignore_df		= old->ignore_df;  	new->pkt_type		= old->pkt_type;  	new->ip_summed		= old->ip_summed;  	skb_copy_queue_mapping(new, old); @@ -726,9 +710,6 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)  	new->mark		= old->mark;  	new->skb_iif		= old->skb_iif;  	__nf_copy(new, old); -#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) -	new->nf_trace		= old->nf_trace; -#endif  #ifdef CONFIG_NET_SCHED  	new->tc_index		= old->tc_index;  #ifdef CONFIG_NET_CLS_ACT @@ -903,6 +884,9 @@ EXPORT_SYMBOL(skb_clone);  static void skb_headers_offset_update(struct sk_buff *skb, int off)  { +	/* Only adjust this if it actually is csum_start rather than csum */ +	if (skb->ip_summed == CHECKSUM_PARTIAL) +		skb->csum_start += off;  	/* {transport,network,mac}_header and tail are relative to skb->head */  	skb->transport_header += off;  	skb->network_header   += off; @@ -970,10 +954,13 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)  EXPORT_SYMBOL(skb_copy);  /** - *	__pskb_copy	-	create copy of an sk_buff with private head. + *	__pskb_copy_fclone	-  create copy of an sk_buff with private head.   *	@skb: buffer to copy   *	@headroom: headroom of new skb   *	@gfp_mask: allocation priority + *	@fclone: if true allocate the copy of the skb from the fclone + *	cache instead of the head cache; it is recommended to set this + *	to true for the cases where the copy will likely be cloned   *   *	Make a copy of both an &sk_buff and part of its data, located   *	in header. Fragmented data remain shared. This is used when @@ -983,11 +970,12 @@ EXPORT_SYMBOL(skb_copy);   *	The returned buffer has a reference count of 1.   */ -struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask) +struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom, +				   gfp_t gfp_mask, bool fclone)  {  	unsigned int size = skb_headlen(skb) + headroom; -	struct sk_buff *n = __alloc_skb(size, gfp_mask, -					skb_alloc_rx_flag(skb), NUMA_NO_NODE); +	int flags = skb_alloc_rx_flag(skb) | (fclone ? SKB_ALLOC_FCLONE : 0); +	struct sk_buff *n = __alloc_skb(size, gfp_mask, flags, NUMA_NO_NODE);  	if (!n)  		goto out; @@ -1027,7 +1015,7 @@ struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask)  out:  	return n;  } -EXPORT_SYMBOL(__pskb_copy); +EXPORT_SYMBOL(__pskb_copy_fclone);  /**   *	pskb_expand_head - reallocate header of &sk_buff @@ -1036,8 +1024,8 @@ EXPORT_SYMBOL(__pskb_copy);   *	@ntail: room to add at tail   *	@gfp_mask: allocation priority   * - *	Expands (or creates identical copy, if &nhead and &ntail are zero) - *	header of skb. &sk_buff itself is not changed. &sk_buff MUST have + *	Expands (or creates identical copy, if @nhead and @ntail are zero) + *	header of @skb. &sk_buff itself is not changed. &sk_buff MUST have   *	reference count of 1. Returns zero in the case of success or error,   *	if expansion failed. In the last case, &sk_buff is not changed.   * @@ -1109,9 +1097,6 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,  #endif  	skb->tail	      += off;  	skb_headers_offset_update(skb, nhead); -	/* Only adjust this if it actually is csum_start rather than csum */ -	if (skb->ip_summed == CHECKSUM_PARTIAL) -		skb->csum_start += nhead;  	skb->cloned   = 0;  	skb->hdr_len  = 0;  	skb->nohdr    = 0; @@ -1176,7 +1161,6 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,  					NUMA_NO_NODE);  	int oldheadroom = skb_headroom(skb);  	int head_copy_len, head_copy_off; -	int off;  	if (!n)  		return NULL; @@ -1200,11 +1184,7 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,  	copy_skb_header(n, skb); -	off                  = newheadroom - oldheadroom; -	if (n->ip_summed == CHECKSUM_PARTIAL) -		n->csum_start += off; - -	skb_headers_offset_update(n, off); +	skb_headers_offset_update(n, newheadroom - oldheadroom);  	return n;  } @@ -1257,6 +1237,29 @@ free_skb:  EXPORT_SYMBOL(skb_pad);  /** + *	pskb_put - add data to the tail of a potentially fragmented buffer + *	@skb: start of the buffer to use + *	@tail: tail fragment of the buffer to use + *	@len: amount of data to add + * + *	This function extends the used data area of the potentially + *	fragmented buffer. @tail must be the last fragment of @skb -- or + *	@skb itself. If this would exceed the total buffer size the kernel + *	will panic. A pointer to the first byte of the extra data is + *	returned. + */ + +unsigned char *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len) +{ +	if (tail != skb) { +		skb->data_len += len; +		skb->len += len; +	} +	return skb_put(tail, len); +} +EXPORT_SYMBOL_GPL(pskb_put); + +/**   *	skb_put - add data to a buffer   *	@skb: buffer to use   *	@len: amount of data to add @@ -1803,7 +1806,7 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,  		.partial = partial,  		.nr_pages_max = MAX_SKB_FRAGS,  		.flags = flags, -		.ops = &sock_pipe_buf_ops, +		.ops = &nosteal_pipe_buf_ops,  		.spd_release = sock_spd_release,  	};  	struct sk_buff *frag_iter; @@ -1933,9 +1936,8 @@ fault:  EXPORT_SYMBOL(skb_store_bits);  /* Checksum skb data. */ - -__wsum skb_checksum(const struct sk_buff *skb, int offset, -			  int len, __wsum csum) +__wsum __skb_checksum(const struct sk_buff *skb, int offset, int len, +		      __wsum csum, const struct skb_checksum_ops *ops)  {  	int start = skb_headlen(skb);  	int i, copy = start - offset; @@ -1946,7 +1948,7 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,  	if (copy > 0) {  		if (copy > len)  			copy = len; -		csum = csum_partial(skb->data + offset, copy, csum); +		csum = ops->update(skb->data + offset, copy, csum);  		if ((len -= copy) == 0)  			return csum;  		offset += copy; @@ -1967,10 +1969,10 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,  			if (copy > len)  				copy = len;  			vaddr = kmap_atomic(skb_frag_page(frag)); -			csum2 = csum_partial(vaddr + frag->page_offset + -					     offset - start, copy, 0); +			csum2 = ops->update(vaddr + frag->page_offset + +					    offset - start, copy, 0);  			kunmap_atomic(vaddr); -			csum = csum_block_add(csum, csum2, pos); +			csum = ops->combine(csum, csum2, pos, copy);  			if (!(len -= copy))  				return csum;  			offset += copy; @@ -1989,9 +1991,9 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,  			__wsum csum2;  			if (copy > len)  				copy = len; -			csum2 = skb_checksum(frag_iter, offset - start, -					     copy, 0); -			csum = csum_block_add(csum, csum2, pos); +			csum2 = __skb_checksum(frag_iter, offset - start, +					       copy, 0, ops); +			csum = ops->combine(csum, csum2, pos, copy);  			if ((len -= copy) == 0)  				return csum;  			offset += copy; @@ -2003,6 +2005,18 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,  	return csum;  } +EXPORT_SYMBOL(__skb_checksum); + +__wsum skb_checksum(const struct sk_buff *skb, int offset, +		    int len, __wsum csum) +{ +	const struct skb_checksum_ops ops = { +		.update  = csum_partial_ext, +		.combine = csum_block_add_ext, +	}; + +	return __skb_checksum(skb, offset, len, csum, &ops); +}  EXPORT_SYMBOL(skb_checksum);  /* Both of above in one bottle. */ @@ -2084,6 +2098,104 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,  }  EXPORT_SYMBOL(skb_copy_and_csum_bits); + /** + *	skb_zerocopy_headlen - Calculate headroom needed for skb_zerocopy() + *	@from: source buffer + * + *	Calculates the amount of linear headroom needed in the 'to' skb passed + *	into skb_zerocopy(). + */ +unsigned int +skb_zerocopy_headlen(const struct sk_buff *from) +{ +	unsigned int hlen = 0; + +	if (!from->head_frag || +	    skb_headlen(from) < L1_CACHE_BYTES || +	    skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) +		hlen = skb_headlen(from); + +	if (skb_has_frag_list(from)) +		hlen = from->len; + +	return hlen; +} +EXPORT_SYMBOL_GPL(skb_zerocopy_headlen); + +/** + *	skb_zerocopy - Zero copy skb to skb + *	@to: destination buffer + *	@from: source buffer + *	@len: number of bytes to copy from source buffer + *	@hlen: size of linear headroom in destination buffer + * + *	Copies up to `len` bytes from `from` to `to` by creating references + *	to the frags in the source buffer. + * + *	The `hlen` as calculated by skb_zerocopy_headlen() specifies the + *	headroom in the `to` buffer. + * + *	Return value: + *	0: everything is OK + *	-ENOMEM: couldn't orphan frags of @from due to lack of memory + *	-EFAULT: skb_copy_bits() found some problem with skb geometry + */ +int +skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen) +{ +	int i, j = 0; +	int plen = 0; /* length of skb->head fragment */ +	int ret; +	struct page *page; +	unsigned int offset; + +	BUG_ON(!from->head_frag && !hlen); + +	/* dont bother with small payloads */ +	if (len <= skb_tailroom(to)) +		return skb_copy_bits(from, 0, skb_put(to, len), len); + +	if (hlen) { +		ret = skb_copy_bits(from, 0, skb_put(to, hlen), hlen); +		if (unlikely(ret)) +			return ret; +		len -= hlen; +	} else { +		plen = min_t(int, skb_headlen(from), len); +		if (plen) { +			page = virt_to_head_page(from->head); +			offset = from->data - (unsigned char *)page_address(page); +			__skb_fill_page_desc(to, 0, page, offset, plen); +			get_page(page); +			j = 1; +			len -= plen; +		} +	} + +	to->truesize += len + plen; +	to->len += len + plen; +	to->data_len += len + plen; + +	if (unlikely(skb_orphan_frags(from, GFP_ATOMIC))) { +		skb_tx_error(from); +		return -ENOMEM; +	} + +	for (i = 0; i < skb_shinfo(from)->nr_frags; i++) { +		if (!len) +			break; +		skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i]; +		skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len); +		len -= skb_shinfo(to)->frags[j].size; +		skb_frag_ref(to, j); +		j++; +	} +	skb_shinfo(to)->nr_frags = j; + +	return 0; +} +EXPORT_SYMBOL_GPL(skb_zerocopy); +  void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)  {  	__wsum csum; @@ -2522,14 +2634,14 @@ EXPORT_SYMBOL(skb_prepare_seq_read);   * @data: destination pointer for data to be returned   * @st: state variable   * - * Reads a block of skb data at &consumed relative to the + * Reads a block of skb data at @consumed relative to the   * lower offset specified to skb_prepare_seq_read(). Assigns - * the head of the data block to &data and returns the length + * the head of the data block to @data and returns the length   * of the block or 0 if the end of the skb data or the upper   * offset has been reached.   *   * The caller is not required to consume all of the data - * returned, i.e. &consumed is typically set to the number + * returned, i.e. @consumed is typically set to the number   * of bytes already consumed and the next call to   * skb_seq_read() will return the remaining part of the block.   * @@ -2746,67 +2858,96 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum);  /**   *	skb_segment - Perform protocol segmentation on skb. - *	@skb: buffer to segment + *	@head_skb: buffer to segment   *	@features: features for the output path (see dev->features)   *   *	This function performs segmentation on the given skb.  It returns   *	a pointer to the first in a list of new skbs for the segments.   *	In case of error it returns ERR_PTR(err).   */ -struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) +struct sk_buff *skb_segment(struct sk_buff *head_skb, +			    netdev_features_t features)  {  	struct sk_buff *segs = NULL;  	struct sk_buff *tail = NULL; -	struct sk_buff *fskb = skb_shinfo(skb)->frag_list; -	unsigned int mss = skb_shinfo(skb)->gso_size; -	unsigned int doffset = skb->data - skb_mac_header(skb); +	struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list; +	skb_frag_t *frag = skb_shinfo(head_skb)->frags; +	unsigned int mss = skb_shinfo(head_skb)->gso_size; +	unsigned int doffset = head_skb->data - skb_mac_header(head_skb); +	struct sk_buff *frag_skb = head_skb;  	unsigned int offset = doffset; -	unsigned int tnl_hlen = skb_tnl_header_len(skb); +	unsigned int tnl_hlen = skb_tnl_header_len(head_skb);  	unsigned int headroom;  	unsigned int len;  	__be16 proto;  	bool csum;  	int sg = !!(features & NETIF_F_SG); -	int nfrags = skb_shinfo(skb)->nr_frags; +	int nfrags = skb_shinfo(head_skb)->nr_frags;  	int err = -ENOMEM;  	int i = 0;  	int pos; +	int dummy; -	proto = skb_network_protocol(skb); +	__skb_push(head_skb, doffset); +	proto = skb_network_protocol(head_skb, &dummy);  	if (unlikely(!proto))  		return ERR_PTR(-EINVAL); -	csum = !!can_checksum_protocol(features, proto); -	__skb_push(skb, doffset); -	headroom = skb_headroom(skb); -	pos = skb_headlen(skb); +	csum = !head_skb->encap_hdr_csum && +	    !!can_checksum_protocol(features, proto); + +	headroom = skb_headroom(head_skb); +	pos = skb_headlen(head_skb);  	do {  		struct sk_buff *nskb; -		skb_frag_t *frag; +		skb_frag_t *nskb_frag;  		int hsize;  		int size; -		len = skb->len - offset; +		len = head_skb->len - offset;  		if (len > mss)  			len = mss; -		hsize = skb_headlen(skb) - offset; +		hsize = skb_headlen(head_skb) - offset;  		if (hsize < 0)  			hsize = 0;  		if (hsize > len || !sg)  			hsize = len; -		if (!hsize && i >= nfrags) { -			BUG_ON(fskb->len != len); +		if (!hsize && i >= nfrags && skb_headlen(list_skb) && +		    (skb_headlen(list_skb) == len || sg)) { +			BUG_ON(skb_headlen(list_skb) > len); + +			i = 0; +			nfrags = skb_shinfo(list_skb)->nr_frags; +			frag = skb_shinfo(list_skb)->frags; +			frag_skb = list_skb; +			pos += skb_headlen(list_skb); + +			while (pos < offset + len) { +				BUG_ON(i >= nfrags); -			pos += len; -			nskb = skb_clone(fskb, GFP_ATOMIC); -			fskb = fskb->next; +				size = skb_frag_size(frag); +				if (pos + size > offset + len) +					break; + +				i++; +				pos += size; +				frag++; +			} + +			nskb = skb_clone(list_skb, GFP_ATOMIC); +			list_skb = list_skb->next;  			if (unlikely(!nskb))  				goto err; +			if (unlikely(pskb_trim(nskb, len))) { +				kfree_skb(nskb); +				goto err; +			} +  			hsize = skb_end_offset(nskb);  			if (skb_cow_head(nskb, doffset + headroom)) {  				kfree_skb(nskb); @@ -2818,7 +2959,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)  			__skb_push(nskb, doffset);  		} else {  			nskb = __alloc_skb(hsize + doffset + headroom, -					   GFP_ATOMIC, skb_alloc_rx_flag(skb), +					   GFP_ATOMIC, skb_alloc_rx_flag(head_skb),  					   NUMA_NO_NODE);  			if (unlikely(!nskb)) @@ -2834,80 +2975,82 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)  			segs = nskb;  		tail = nskb; -		__copy_skb_header(nskb, skb); -		nskb->mac_len = skb->mac_len; +		__copy_skb_header(nskb, head_skb); +		nskb->mac_len = head_skb->mac_len; -		/* nskb and skb might have different headroom */ -		if (nskb->ip_summed == CHECKSUM_PARTIAL) -			nskb->csum_start += skb_headroom(nskb) - headroom; +		skb_headers_offset_update(nskb, skb_headroom(nskb) - headroom); -		skb_reset_mac_header(nskb); -		skb_set_network_header(nskb, skb->mac_len); -		nskb->transport_header = (nskb->network_header + -					  skb_network_header_len(skb)); - -		skb_copy_from_linear_data_offset(skb, -tnl_hlen, +		skb_copy_from_linear_data_offset(head_skb, -tnl_hlen,  						 nskb->data - tnl_hlen,  						 doffset + tnl_hlen); -		if (fskb != skb_shinfo(skb)->frag_list) +		if (nskb->len == len + doffset)  			goto perform_csum_check;  		if (!sg) {  			nskb->ip_summed = CHECKSUM_NONE; -			nskb->csum = skb_copy_and_csum_bits(skb, offset, +			nskb->csum = skb_copy_and_csum_bits(head_skb, offset,  							    skb_put(nskb, len),  							    len, 0); +			SKB_GSO_CB(nskb)->csum_start = +			    skb_headroom(nskb) + doffset;  			continue;  		} -		frag = skb_shinfo(nskb)->frags; +		nskb_frag = skb_shinfo(nskb)->frags; -		skb_copy_from_linear_data_offset(skb, offset, +		skb_copy_from_linear_data_offset(head_skb, offset,  						 skb_put(nskb, hsize), hsize); -		skb_shinfo(nskb)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; +		skb_shinfo(nskb)->tx_flags = skb_shinfo(head_skb)->tx_flags & +			SKBTX_SHARED_FRAG; + +		while (pos < offset + len) { +			if (i >= nfrags) { +				BUG_ON(skb_headlen(list_skb)); -		while (pos < offset + len && i < nfrags) { -			*frag = skb_shinfo(skb)->frags[i]; -			__skb_frag_ref(frag); -			size = skb_frag_size(frag); +				i = 0; +				nfrags = skb_shinfo(list_skb)->nr_frags; +				frag = skb_shinfo(list_skb)->frags; +				frag_skb = list_skb; + +				BUG_ON(!nfrags); + +				list_skb = list_skb->next; +			} + +			if (unlikely(skb_shinfo(nskb)->nr_frags >= +				     MAX_SKB_FRAGS)) { +				net_warn_ratelimited( +					"skb_segment: too many frags: %u %u\n", +					pos, mss); +				goto err; +			} + +			if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC))) +				goto err; + +			*nskb_frag = *frag; +			__skb_frag_ref(nskb_frag); +			size = skb_frag_size(nskb_frag);  			if (pos < offset) { -				frag->page_offset += offset - pos; -				skb_frag_size_sub(frag, offset - pos); +				nskb_frag->page_offset += offset - pos; +				skb_frag_size_sub(nskb_frag, offset - pos);  			}  			skb_shinfo(nskb)->nr_frags++;  			if (pos + size <= offset + len) {  				i++; +				frag++;  				pos += size;  			} else { -				skb_frag_size_sub(frag, pos + size - (offset + len)); +				skb_frag_size_sub(nskb_frag, pos + size - (offset + len));  				goto skip_fraglist;  			} -			frag++; -		} - -		if (pos < offset + len) { -			struct sk_buff *fskb2 = fskb; - -			BUG_ON(pos + fskb->len != offset + len); - -			pos += fskb->len; -			fskb = fskb->next; - -			if (fskb2->next) { -				fskb2 = skb_clone(fskb2, GFP_ATOMIC); -				if (!fskb2) -					goto err; -			} else -				skb_get(fskb2); - -			SKB_FRAG_ASSERT(nskb); -			skb_shinfo(nskb)->frag_list = fskb2; +			nskb_frag++;  		}  skip_fraglist: @@ -2920,48 +3063,45 @@ perform_csum_check:  			nskb->csum = skb_checksum(nskb, doffset,  						  nskb->len - doffset, 0);  			nskb->ip_summed = CHECKSUM_NONE; +			SKB_GSO_CB(nskb)->csum_start = +			    skb_headroom(nskb) + doffset;  		} -	} while ((offset += len) < skb->len); +	} while ((offset += len) < head_skb->len);  	return segs;  err: -	while ((skb = segs)) { -		segs = skb->next; -		kfree_skb(skb); -	} +	kfree_skb_list(segs);  	return ERR_PTR(err);  }  EXPORT_SYMBOL_GPL(skb_segment);  int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)  { -	struct sk_buff *p = *head; -	struct sk_buff *nskb; -	struct skb_shared_info *skbinfo = skb_shinfo(skb); -	struct skb_shared_info *pinfo = skb_shinfo(p); -	unsigned int headroom; -	unsigned int len = skb_gro_len(skb); +	struct skb_shared_info *pinfo, *skbinfo = skb_shinfo(skb);  	unsigned int offset = skb_gro_offset(skb);  	unsigned int headlen = skb_headlen(skb); +	struct sk_buff *nskb, *lp, *p = *head; +	unsigned int len = skb_gro_len(skb);  	unsigned int delta_truesize; +	unsigned int headroom; -	if (p->len + len >= 65536) +	if (unlikely(p->len + len >= 65536))  		return -E2BIG; -	if (pinfo->frag_list) -		goto merge; -	else if (headlen <= offset) { +	lp = NAPI_GRO_CB(p)->last; +	pinfo = skb_shinfo(lp); + +	if (headlen <= offset) {  		skb_frag_t *frag;  		skb_frag_t *frag2;  		int i = skbinfo->nr_frags;  		int nr_frags = pinfo->nr_frags + i; -		offset -= headlen; -  		if (nr_frags > MAX_SKB_FRAGS) -			return -E2BIG; +			goto merge; +		offset -= headlen;  		pinfo->nr_frags = nr_frags;  		skbinfo->nr_frags = 0; @@ -2992,7 +3132,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)  		unsigned int first_offset;  		if (nr_frags + 1 + skbinfo->nr_frags > MAX_SKB_FRAGS) -			return -E2BIG; +			goto merge;  		first_offset = skb->data -  			       (unsigned char *)page_address(page) + @@ -3010,7 +3150,10 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)  		delta_truesize = skb->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff));  		NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD;  		goto done; -	} else if (skb_gro_len(p) != pinfo->gso_size) +	} +	if (pinfo->frag_list) +		goto merge; +	if (skb_gro_len(p) != pinfo->gso_size)  		return -E2BIG;  	headroom = skb_headroom(p); @@ -3062,16 +3205,24 @@ merge:  	__skb_pull(skb, offset); -	NAPI_GRO_CB(p)->last->next = skb; +	if (NAPI_GRO_CB(p)->last == p) +		skb_shinfo(p)->frag_list = skb; +	else +		NAPI_GRO_CB(p)->last->next = skb;  	NAPI_GRO_CB(p)->last = skb;  	skb_header_release(skb); +	lp = p;  done:  	NAPI_GRO_CB(p)->count++;  	p->data_len += len;  	p->truesize += delta_truesize;  	p->len += len; - +	if (lp != p) { +		lp->data_len += len; +		lp->truesize += delta_truesize; +		lp->len += len; +	}  	NAPI_GRO_CB(skb)->same_flow = 1;  	return 0;  } @@ -3162,6 +3313,32 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)  	return elt;  } +/* As compared with skb_to_sgvec, skb_to_sgvec_nomark only map skb to given + * sglist without mark the sg which contain last skb data as the end. + * So the caller can mannipulate sg list as will when padding new data after + * the first call without calling sg_unmark_end to expend sg list. + * + * Scenario to use skb_to_sgvec_nomark: + * 1. sg_init_table + * 2. skb_to_sgvec_nomark(payload1) + * 3. skb_to_sgvec_nomark(payload2) + * + * This is equivalent to: + * 1. sg_init_table + * 2. skb_to_sgvec(payload1) + * 3. sg_unmark_end + * 4. skb_to_sgvec(payload2) + * + * When mapping mutilple payload conditionally, skb_to_sgvec_nomark + * is more preferable. + */ +int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg, +			int offset, int len) +{ +	return __skb_to_sgvec(skb, sg, offset, len); +} +EXPORT_SYMBOL_GPL(skb_to_sgvec_nomark); +  int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)  {  	int nsg = __skb_to_sgvec(skb, sg, offset, len); @@ -3294,8 +3471,6 @@ static void sock_rmem_free(struct sk_buff *skb)   */  int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)  { -	int len = skb->len; -  	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=  	    (unsigned int)sk->sk_rcvbuf)  		return -ENOMEM; @@ -3310,7 +3485,7 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)  	skb_queue_tail(&sk->sk_error_queue, skb);  	if (!sock_flag(sk, SOCK_DEAD)) -		sk->sk_data_ready(sk, len); +		sk->sk_data_ready(sk);  	return 0;  }  EXPORT_SYMBOL(sock_queue_err_skb); @@ -3403,6 +3578,238 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off)  }  EXPORT_SYMBOL_GPL(skb_partial_csum_set); +static int skb_maybe_pull_tail(struct sk_buff *skb, unsigned int len, +			       unsigned int max) +{ +	if (skb_headlen(skb) >= len) +		return 0; + +	/* If we need to pullup then pullup to the max, so we +	 * won't need to do it again. +	 */ +	if (max > skb->len) +		max = skb->len; + +	if (__pskb_pull_tail(skb, max - skb_headlen(skb)) == NULL) +		return -ENOMEM; + +	if (skb_headlen(skb) < len) +		return -EPROTO; + +	return 0; +} + +#define MAX_TCP_HDR_LEN (15 * 4) + +static __sum16 *skb_checksum_setup_ip(struct sk_buff *skb, +				      typeof(IPPROTO_IP) proto, +				      unsigned int off) +{ +	switch (proto) { +		int err; + +	case IPPROTO_TCP: +		err = skb_maybe_pull_tail(skb, off + sizeof(struct tcphdr), +					  off + MAX_TCP_HDR_LEN); +		if (!err && !skb_partial_csum_set(skb, off, +						  offsetof(struct tcphdr, +							   check))) +			err = -EPROTO; +		return err ? ERR_PTR(err) : &tcp_hdr(skb)->check; + +	case IPPROTO_UDP: +		err = skb_maybe_pull_tail(skb, off + sizeof(struct udphdr), +					  off + sizeof(struct udphdr)); +		if (!err && !skb_partial_csum_set(skb, off, +						  offsetof(struct udphdr, +							   check))) +			err = -EPROTO; +		return err ? ERR_PTR(err) : &udp_hdr(skb)->check; +	} + +	return ERR_PTR(-EPROTO); +} + +/* This value should be large enough to cover a tagged ethernet header plus + * maximally sized IP and TCP or UDP headers. + */ +#define MAX_IP_HDR_LEN 128 + +static int skb_checksum_setup_ipv4(struct sk_buff *skb, bool recalculate) +{ +	unsigned int off; +	bool fragment; +	__sum16 *csum; +	int err; + +	fragment = false; + +	err = skb_maybe_pull_tail(skb, +				  sizeof(struct iphdr), +				  MAX_IP_HDR_LEN); +	if (err < 0) +		goto out; + +	if (ip_hdr(skb)->frag_off & htons(IP_OFFSET | IP_MF)) +		fragment = true; + +	off = ip_hdrlen(skb); + +	err = -EPROTO; + +	if (fragment) +		goto out; + +	csum = skb_checksum_setup_ip(skb, ip_hdr(skb)->protocol, off); +	if (IS_ERR(csum)) +		return PTR_ERR(csum); + +	if (recalculate) +		*csum = ~csum_tcpudp_magic(ip_hdr(skb)->saddr, +					   ip_hdr(skb)->daddr, +					   skb->len - off, +					   ip_hdr(skb)->protocol, 0); +	err = 0; + +out: +	return err; +} + +/* This value should be large enough to cover a tagged ethernet header plus + * an IPv6 header, all options, and a maximal TCP or UDP header. + */ +#define MAX_IPV6_HDR_LEN 256 + +#define OPT_HDR(type, skb, off) \ +	(type *)(skb_network_header(skb) + (off)) + +static int skb_checksum_setup_ipv6(struct sk_buff *skb, bool recalculate) +{ +	int err; +	u8 nexthdr; +	unsigned int off; +	unsigned int len; +	bool fragment; +	bool done; +	__sum16 *csum; + +	fragment = false; +	done = false; + +	off = sizeof(struct ipv6hdr); + +	err = skb_maybe_pull_tail(skb, off, MAX_IPV6_HDR_LEN); +	if (err < 0) +		goto out; + +	nexthdr = ipv6_hdr(skb)->nexthdr; + +	len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len); +	while (off <= len && !done) { +		switch (nexthdr) { +		case IPPROTO_DSTOPTS: +		case IPPROTO_HOPOPTS: +		case IPPROTO_ROUTING: { +			struct ipv6_opt_hdr *hp; + +			err = skb_maybe_pull_tail(skb, +						  off + +						  sizeof(struct ipv6_opt_hdr), +						  MAX_IPV6_HDR_LEN); +			if (err < 0) +				goto out; + +			hp = OPT_HDR(struct ipv6_opt_hdr, skb, off); +			nexthdr = hp->nexthdr; +			off += ipv6_optlen(hp); +			break; +		} +		case IPPROTO_AH: { +			struct ip_auth_hdr *hp; + +			err = skb_maybe_pull_tail(skb, +						  off + +						  sizeof(struct ip_auth_hdr), +						  MAX_IPV6_HDR_LEN); +			if (err < 0) +				goto out; + +			hp = OPT_HDR(struct ip_auth_hdr, skb, off); +			nexthdr = hp->nexthdr; +			off += ipv6_authlen(hp); +			break; +		} +		case IPPROTO_FRAGMENT: { +			struct frag_hdr *hp; + +			err = skb_maybe_pull_tail(skb, +						  off + +						  sizeof(struct frag_hdr), +						  MAX_IPV6_HDR_LEN); +			if (err < 0) +				goto out; + +			hp = OPT_HDR(struct frag_hdr, skb, off); + +			if (hp->frag_off & htons(IP6_OFFSET | IP6_MF)) +				fragment = true; + +			nexthdr = hp->nexthdr; +			off += sizeof(struct frag_hdr); +			break; +		} +		default: +			done = true; +			break; +		} +	} + +	err = -EPROTO; + +	if (!done || fragment) +		goto out; + +	csum = skb_checksum_setup_ip(skb, nexthdr, off); +	if (IS_ERR(csum)) +		return PTR_ERR(csum); + +	if (recalculate) +		*csum = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, +					 &ipv6_hdr(skb)->daddr, +					 skb->len - off, nexthdr, 0); +	err = 0; + +out: +	return err; +} + +/** + * skb_checksum_setup - set up partial checksum offset + * @skb: the skb to set up + * @recalculate: if true the pseudo-header checksum will be recalculated + */ +int skb_checksum_setup(struct sk_buff *skb, bool recalculate) +{ +	int err; + +	switch (skb->protocol) { +	case htons(ETH_P_IP): +		err = skb_checksum_setup_ipv4(skb, recalculate); +		break; + +	case htons(ETH_P_IPV6): +		err = skb_checksum_setup_ipv6(skb, recalculate); +		break; + +	default: +		err = -EPROTO; +		break; +	} + +	return err; +} +EXPORT_SYMBOL(skb_checksum_setup); +  void __skb_warn_lro_forwarding(const struct sk_buff *skb)  {  	net_warn_ratelimited("%s: received packets cannot be forwarded while LRO is enabled\n", @@ -3519,6 +3926,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)  	skb->tstamp.tv64 = 0;  	skb->pkt_type = PACKET_HOST;  	skb->skb_iif = 0; +	skb->ignore_df = 0;  	skb_dst_drop(skb);  	skb->mark = 0;  	secpath_reset(skb); @@ -3526,3 +3934,28 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)  	nf_reset_trace(skb);  }  EXPORT_SYMBOL_GPL(skb_scrub_packet); + +/** + * skb_gso_transport_seglen - Return length of individual segments of a gso packet + * + * @skb: GSO skb + * + * skb_gso_transport_seglen is used to determine the real size of the + * individual segments, including Layer4 headers (TCP/UDP). + * + * The MAC/L2 or network (IP, IPv6) headers are not accounted for. + */ +unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) +{ +	const struct skb_shared_info *shinfo = skb_shinfo(skb); + +	if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) +		return tcp_hdrlen(skb) + shinfo->gso_size; + +	/* UFO sets gso_size to the size of the fragmentation +	 * payload, i.e. the size of the L4 (UDP) header is already +	 * accounted for. +	 */ +	return shinfo->gso_size; +} +EXPORT_SYMBOL_GPL(skb_gso_transport_seglen);  | 
