diff options
Diffstat (limited to 'net/ipv6/reassembly.c')
| -rw-r--r-- | net/ipv6/reassembly.c | 240 | 
1 files changed, 119 insertions, 121 deletions
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 07beeb06f75..cc85a9ba501 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -26,6 +26,9 @@   *	YOSHIFUJI,H. @USAGI	Always remove fragment header to   *				calculate ICV correctly.   */ + +#define pr_fmt(fmt) "IPv6: " fmt +  #include <linux/errno.h>  #include <linux/types.h>  #include <linux/string.h> @@ -42,6 +45,7 @@  #include <linux/jhash.h>  #include <linux/skbuff.h>  #include <linux/slab.h> +#include <linux/export.h>  #include <net/sock.h>  #include <net/snmp.h> @@ -54,6 +58,7 @@  #include <net/ndisc.h>  #include <net/addrconf.h>  #include <net/inet_frag.h> +#include <net/inet_ecn.h>  struct ip6frag_skb_cb  { @@ -63,36 +68,12 @@ struct ip6frag_skb_cb  #define FRAG6_CB(skb)	((struct ip6frag_skb_cb*)((skb)->cb)) - -/* - *	Equivalent of ipv4 struct ipq - */ - -struct frag_queue -{ -	struct inet_frag_queue	q; - -	__be32			id;		/* fragment id		*/ -	u32			user; -	struct in6_addr		saddr; -	struct in6_addr		daddr; - -	int			iif; -	unsigned int		csum; -	__u16			nhoffset; -}; - -static struct inet_frags ip6_frags; - -int ip6_frag_nqueues(struct net *net) +static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)  { -	return net->ipv6.frags.nqueues; +	return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);  } -int ip6_frag_mem(struct net *net) -{ -	return atomic_read(&net->ipv6.frags.mem); -} +static struct inet_frags ip6_frags;  static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,  			  struct net_device *dev); @@ -101,47 +82,36 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,   * callers should be careful not to use the hash value outside the ipfrag_lock   * as doing so could race with ipfrag_hash_rnd being recalculated.   */ -unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr, -			     const struct in6_addr *daddr, u32 rnd) +static unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr, +				    const struct in6_addr *daddr)  {  	u32 c; -	c = jhash_3words((__force u32)saddr->s6_addr32[0], -			 (__force u32)saddr->s6_addr32[1], -			 (__force u32)saddr->s6_addr32[2], -			 rnd); - -	c = jhash_3words((__force u32)saddr->s6_addr32[3], -			 (__force u32)daddr->s6_addr32[0], -			 (__force u32)daddr->s6_addr32[1], -			 c); - -	c =  jhash_3words((__force u32)daddr->s6_addr32[2], -			  (__force u32)daddr->s6_addr32[3], -			  (__force u32)id, -			  c); +	net_get_random_once(&ip6_frags.rnd, sizeof(ip6_frags.rnd)); +	c = jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr), +			 (__force u32)id, ip6_frags.rnd);  	return c & (INETFRAGS_HASHSZ - 1);  } -EXPORT_SYMBOL_GPL(inet6_hash_frag);  static unsigned int ip6_hashfn(struct inet_frag_queue *q)  {  	struct frag_queue *fq;  	fq = container_of(q, struct frag_queue, q); -	return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr, ip6_frags.rnd); +	return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr);  } -int ip6_frag_match(struct inet_frag_queue *q, void *a) +bool ip6_frag_match(struct inet_frag_queue *q, void *a)  {  	struct frag_queue *fq;  	struct ip6_create_arg *arg = a;  	fq = container_of(q, struct frag_queue, q); -	return (fq->id == arg->id && fq->user == arg->user && -			ipv6_addr_equal(&fq->saddr, arg->src) && -			ipv6_addr_equal(&fq->daddr, arg->dst)); +	return	fq->id == arg->id && +		fq->user == arg->user && +		ipv6_addr_equal(&fq->saddr, arg->src) && +		ipv6_addr_equal(&fq->daddr, arg->dst);  }  EXPORT_SYMBOL(ip6_frag_match); @@ -152,51 +122,24 @@ void ip6_frag_init(struct inet_frag_queue *q, void *a)  	fq->id = arg->id;  	fq->user = arg->user; -	ipv6_addr_copy(&fq->saddr, arg->src); -	ipv6_addr_copy(&fq->daddr, arg->dst); +	fq->saddr = *arg->src; +	fq->daddr = *arg->dst; +	fq->ecn = arg->ecn;  }  EXPORT_SYMBOL(ip6_frag_init); -/* Destruction primitives. */ - -static __inline__ void fq_put(struct frag_queue *fq) -{ -	inet_frag_put(&fq->q, &ip6_frags); -} - -/* Kill fq entry. It is not destroyed immediately, - * because caller (and someone more) holds reference count. - */ -static __inline__ void fq_kill(struct frag_queue *fq) -{ -	inet_frag_kill(&fq->q, &ip6_frags); -} - -static void ip6_evictor(struct net *net, struct inet6_dev *idev) -{ -	int evicted; - -	evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags); -	if (evicted) -		IP6_ADD_STATS_BH(net, idev, IPSTATS_MIB_REASMFAILS, evicted); -} - -static void ip6_frag_expire(unsigned long data) +void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, +			   struct inet_frags *frags)  { -	struct frag_queue *fq;  	struct net_device *dev = NULL; -	struct net *net; - -	fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);  	spin_lock(&fq->q.lock);  	if (fq->q.last_in & INET_FRAG_COMPLETE)  		goto out; -	fq_kill(fq); +	inet_frag_kill(&fq->q, frags); -	net = container_of(fq->q.net, struct net, ipv6.frags);  	rcu_read_lock();  	dev = dev_get_by_index_rcu(net, fq->iif);  	if (!dev) @@ -220,11 +163,24 @@ out_rcu_unlock:  	rcu_read_unlock();  out:  	spin_unlock(&fq->q.lock); -	fq_put(fq); +	inet_frag_put(&fq->q, frags); +} +EXPORT_SYMBOL(ip6_expire_frag_queue); + +static void ip6_frag_expire(unsigned long data) +{ +	struct frag_queue *fq; +	struct net *net; + +	fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); +	net = container_of(fq->q.net, struct net, ipv6.frags); + +	ip6_expire_frag_queue(net, fq, &ip6_frags);  }  static __inline__ struct frag_queue * -fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst) +fq_find(struct net *net, __be32 id, const struct in6_addr *src, +	const struct in6_addr *dst, u8 ecn)  {  	struct inet_frag_queue *q;  	struct ip6_create_arg arg; @@ -234,14 +190,16 @@ fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst)  	arg.user = IP6_DEFRAG_LOCAL_DELIVER;  	arg.src = src;  	arg.dst = dst; +	arg.ecn = ecn;  	read_lock(&ip6_frags.lock); -	hash = inet6_hash_frag(id, src, dst, ip6_frags.rnd); +	hash = inet6_hash_frag(id, src, dst);  	q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash); -	if (q == NULL) +	if (IS_ERR_OR_NULL(q)) { +		inet_frag_maybe_warn_overflow(q, pr_fmt());  		return NULL; - +	}  	return container_of(q, struct frag_queue, q);  } @@ -252,6 +210,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,  	struct net_device *dev;  	int offset, end;  	struct net *net = dev_net(skb_dst(skb)->dev); +	u8 ecn;  	if (fq->q.last_in & INET_FRAG_COMPLETE)  		goto err; @@ -269,6 +228,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,  		return -1;  	} +	ecn = ip6_frag_ecn(ipv6_hdr(skb)); +  	if (skb->ip_summed == CHECKSUM_COMPLETE) {  		const unsigned char *nh = skb_network_header(skb);  		skb->csum = csum_sub(skb->csum, @@ -335,12 +296,11 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,  	}  found: -	/* RFC5722, Section 4: -	 *                                  When reassembling an IPv6 datagram, if +	/* RFC5722, Section 4, amended by Errata ID : 3089 +	 *                          When reassembling an IPv6 datagram, if  	 *   one or more its constituent fragments is determined to be an  	 *   overlapping fragment, the entire datagram (and any constituent -	 *   fragments, including those not yet received) MUST be silently -	 *   discarded. +	 *   fragments) MUST be silently discarded.  	 */  	/* Check for overlap with preceding fragment. */ @@ -370,7 +330,8 @@ found:  	}  	fq->q.stamp = skb->tstamp;  	fq->q.meat += skb->len; -	atomic_add(skb->truesize, &fq->q.net->mem); +	fq->ecn |= ecn; +	add_frag_mem_limit(&fq->q, skb->truesize);  	/* The first fragment.  	 * nhoffset is obtained from the first fragment, of course. @@ -381,16 +342,22 @@ found:  	}  	if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && -	    fq->q.meat == fq->q.len) -		return ip6_frag_reasm(fq, prev, dev); +	    fq->q.meat == fq->q.len) { +		int res; +		unsigned long orefdst = skb->_skb_refdst; + +		skb->_skb_refdst = 0UL; +		res = ip6_frag_reasm(fq, prev, dev); +		skb->_skb_refdst = orefdst; +		return res; +	} -	write_lock(&ip6_frags.lock); -	list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list); -	write_unlock(&ip6_frags.lock); +	skb_dst_drop(skb); +	inet_frag_lru_move(&fq->q);  	return -1;  discard_fq: -	fq_kill(fq); +	inet_frag_kill(&fq->q, &ip6_frags);  err:  	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),  		      IPSTATS_MIB_REASMFAILS); @@ -414,8 +381,14 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,  	struct sk_buff *fp, *head = fq->q.fragments;  	int    payload_len;  	unsigned int nhoff; +	int sum_truesize; +	u8 ecn; + +	inet_frag_kill(&fq->q, &ip6_frags); -	fq_kill(fq); +	ecn = ip_frag_ecn_table[fq->ecn]; +	if (unlikely(ecn == 0xff)) +		goto out_fail;  	/* Make the one we just received the head. */  	if (prev) { @@ -433,7 +406,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,  		skb_morph(head, fq->q.fragments);  		head->next = fq->q.fragments->next; -		kfree_skb(fq->q.fragments); +		consume_skb(fq->q.fragments);  		fq->q.fragments = head;  	} @@ -448,7 +421,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,  		goto out_oversize;  	/* Head of list must not be cloned. */ -	if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) +	if (skb_unclone(head, GFP_ATOMIC))  		goto out_oom;  	/* If the first fragment is fragmented itself, we split @@ -464,14 +437,14 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,  		head->next = clone;  		skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;  		skb_frag_list_init(head); -		for (i=0; i<skb_shinfo(head)->nr_frags; i++) -			plen += skb_shinfo(head)->frags[i].size; +		for (i = 0; i < skb_shinfo(head)->nr_frags; i++) +			plen += skb_frag_size(&skb_shinfo(head)->frags[i]);  		clone->len = clone->data_len = head->data_len - plen;  		head->data_len -= clone->len;  		head->len -= clone->len;  		clone->csum = 0;  		clone->ip_summed = head->ip_summed; -		atomic_add(clone->truesize, &fq->q.net->mem); +		add_frag_mem_limit(&fq->q, clone->truesize);  	}  	/* We have to remove fragment header from datagram and to relocate @@ -483,26 +456,41 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,  	head->mac_header += sizeof(struct frag_hdr);  	head->network_header += sizeof(struct frag_hdr); -	skb_shinfo(head)->frag_list = head->next;  	skb_reset_transport_header(head);  	skb_push(head, head->data - skb_network_header(head)); -	for (fp=head->next; fp; fp = fp->next) { -		head->data_len += fp->len; -		head->len += fp->len; +	sum_truesize = head->truesize; +	for (fp = head->next; fp;) { +		bool headstolen; +		int delta; +		struct sk_buff *next = fp->next; + +		sum_truesize += fp->truesize;  		if (head->ip_summed != fp->ip_summed)  			head->ip_summed = CHECKSUM_NONE;  		else if (head->ip_summed == CHECKSUM_COMPLETE)  			head->csum = csum_add(head->csum, fp->csum); -		head->truesize += fp->truesize; + +		if (skb_try_coalesce(head, fp, &headstolen, &delta)) { +			kfree_skb_partial(fp, headstolen); +		} else { +			if (!skb_shinfo(head)->frag_list) +				skb_shinfo(head)->frag_list = fp; +			head->data_len += fp->len; +			head->len += fp->len; +			head->truesize += fp->truesize; +		} +		fp = next;  	} -	atomic_sub(head->truesize, &fq->q.net->mem); +	sub_frag_mem_limit(&fq->q, sum_truesize);  	head->next = NULL;  	head->dev = dev;  	head->tstamp = fq->q.stamp;  	ipv6_hdr(head)->payload_len = htons(payload_len); +	ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);  	IP6CB(head)->nhoff = nhoff; +	IP6CB(head)->flags |= IP6SKB_FRAGMENTED;  	/* Yes, and fold redundant checksum back. 8) */  	if (head->ip_summed == CHECKSUM_COMPLETE) @@ -518,12 +506,10 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,  	return 1;  out_oversize: -	if (net_ratelimit()) -		printk(KERN_DEBUG "ip6_frag_reasm: payload len = %d\n", payload_len); +	net_dbg_ratelimited("ip6_frag_reasm: payload len = %d\n", payload_len);  	goto out_fail;  out_oom: -	if (net_ratelimit()) -		printk(KERN_DEBUG "ip6_frag_reasm: no memory for reassembly\n"); +	net_dbg_ratelimited("ip6_frag_reasm: no memory for reassembly\n");  out_fail:  	rcu_read_lock();  	IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); @@ -535,8 +521,12 @@ static int ipv6_frag_rcv(struct sk_buff *skb)  {  	struct frag_hdr *fhdr;  	struct frag_queue *fq; -	struct ipv6hdr *hdr = ipv6_hdr(skb); +	const struct ipv6hdr *hdr = ipv6_hdr(skb);  	struct net *net = dev_net(skb_dst(skb)->dev); +	int evicted; + +	if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED) +		goto fail_hdr;  	IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS); @@ -558,13 +548,17 @@ static int ipv6_frag_rcv(struct sk_buff *skb)  				 ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS);  		IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb); +		IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;  		return 1;  	} -	if (atomic_read(&net->ipv6.frags.mem) > net->ipv6.frags.high_thresh) -		ip6_evictor(net, ip6_dst_idev(skb_dst(skb))); +	evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags, false); +	if (evicted) +		IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), +				 IPSTATS_MIB_REASMFAILS, evicted); -	fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr); +	fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr, +		     ip6_frag_ecn(hdr));  	if (fq != NULL) {  		int ret; @@ -573,7 +567,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)  		ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);  		spin_unlock(&fq->q.lock); -		fq_put(fq); +		inet_frag_put(&fq->q, &ip6_frags);  		return ret;  	} @@ -644,9 +638,13 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net)  		table[0].data = &net->ipv6.frags.high_thresh;  		table[1].data = &net->ipv6.frags.low_thresh;  		table[2].data = &net->ipv6.frags.timeout; + +		/* Don't export sysctls to unprivileged users */ +		if (net->user_ns != &init_user_ns) +			table[0].procname = NULL;  	} -	hdr = register_net_sysctl_table(net, net_ipv6_ctl_path, table); +	hdr = register_net_sysctl(net, "net/ipv6", table);  	if (hdr == NULL)  		goto err_reg; @@ -674,7 +672,7 @@ static struct ctl_table_header *ip6_ctl_header;  static int ip6_frags_sysctl_register(void)  { -	ip6_ctl_header = register_net_sysctl_rotable(net_ipv6_ctl_path, +	ip6_ctl_header = register_net_sysctl(&init_net, "net/ipv6",  			ip6_frags_ctl_table);  	return ip6_ctl_header == NULL ? -ENOMEM : 0;  }  | 
