diff options
Diffstat (limited to 'net/ipv4/inet_fragment.c')
| -rw-r--r-- | net/ipv4/inet_fragment.c | 165 | 
1 files changed, 120 insertions, 45 deletions
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 5ff2a51b6d0..3b01959bf4b 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -21,7 +21,30 @@  #include <linux/rtnetlink.h>  #include <linux/slab.h> +#include <net/sock.h>  #include <net/inet_frag.h> +#include <net/inet_ecn.h> + +/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements + * Value : 0xff if frame should be dropped. + *         0 or INET_ECN_CE value, to be ORed in to final iph->tos field + */ +const u8 ip_frag_ecn_table[16] = { +	/* at least one fragment had CE, and others ECT_0 or ECT_1 */ +	[IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0]			= INET_ECN_CE, +	[IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1]			= INET_ECN_CE, +	[IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1]	= INET_ECN_CE, + +	/* invalid combinations : drop frame */ +	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff, +	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff, +	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff, +	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, +	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff, +	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff, +	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, +}; +EXPORT_SYMBOL(ip_frag_ecn_table);  static void inet_frag_secret_rebuild(unsigned long dummy)  { @@ -29,20 +52,27 @@ static void inet_frag_secret_rebuild(unsigned long dummy)  	unsigned long now = jiffies;  	int i; +	/* Per bucket lock NOT needed here, due to write lock protection */  	write_lock(&f->lock); +  	get_random_bytes(&f->rnd, sizeof(u32));  	for (i = 0; i < INETFRAGS_HASHSZ; i++) { +		struct inet_frag_bucket *hb;  		struct inet_frag_queue *q; -		struct hlist_node *p, *n; +		struct hlist_node *n; -		hlist_for_each_entry_safe(q, p, n, &f->hash[i], list) { +		hb = &f->hash[i]; +		hlist_for_each_entry_safe(q, n, &hb->chain, list) {  			unsigned int hval = f->hashfn(q);  			if (hval != i) { +				struct inet_frag_bucket *hb_dest; +  				hlist_del(&q->list);  				/* Relink to new hash chain. */ -				hlist_add_head(&q->list, &f->hash[hval]); +				hb_dest = &f->hash[hval]; +				hlist_add_head(&q->list, &hb_dest->chain);  			}  		}  	} @@ -55,14 +85,14 @@ void inet_frags_init(struct inet_frags *f)  {  	int i; -	for (i = 0; i < INETFRAGS_HASHSZ; i++) -		INIT_HLIST_HEAD(&f->hash[i]); +	for (i = 0; i < INETFRAGS_HASHSZ; i++) { +		struct inet_frag_bucket *hb = &f->hash[i]; +		spin_lock_init(&hb->chain_lock); +		INIT_HLIST_HEAD(&hb->chain); +	}  	rwlock_init(&f->lock); -	f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ -				   (jiffies ^ (jiffies >> 6))); -  	setup_timer(&f->secret_timer, inet_frag_secret_rebuild,  			(unsigned long)f);  	f->secret_timer.expires = jiffies + f->secret_interval; @@ -73,8 +103,9 @@ EXPORT_SYMBOL(inet_frags_init);  void inet_frags_init_net(struct netns_frags *nf)  {  	nf->nqueues = 0; -	atomic_set(&nf->mem, 0); +	init_frag_mem_limit(nf);  	INIT_LIST_HEAD(&nf->lru_list); +	spin_lock_init(&nf->lru_lock);  }  EXPORT_SYMBOL(inet_frags_init_net); @@ -89,18 +120,28 @@ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)  	nf->low_thresh = 0;  	local_bh_disable(); -	inet_frag_evictor(nf, f); +	inet_frag_evictor(nf, f, true);  	local_bh_enable(); + +	percpu_counter_destroy(&nf->mem);  }  EXPORT_SYMBOL(inet_frags_exit_net);  static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)  { -	write_lock(&f->lock); +	struct inet_frag_bucket *hb; +	unsigned int hash; + +	read_lock(&f->lock); +	hash = f->hashfn(fq); +	hb = &f->hash[hash]; + +	spin_lock(&hb->chain_lock);  	hlist_del(&fq->list); -	list_del(&fq->lru_list); -	fq->net->nqueues--; -	write_unlock(&f->lock); +	spin_unlock(&hb->chain_lock); + +	read_unlock(&f->lock); +	inet_frag_lru_del(fq);  }  void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) @@ -117,12 +158,8 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)  EXPORT_SYMBOL(inet_frag_kill);  static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f, -		struct sk_buff *skb, int *work) +		struct sk_buff *skb)  { -	if (work) -		*work -= skb->truesize; - -	atomic_sub(skb->truesize, &nf->mem);  	if (f->skb_free)  		f->skb_free(skb);  	kfree_skb(skb); @@ -133,6 +170,7 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,  {  	struct sk_buff *fp;  	struct netns_frags *nf; +	unsigned int sum, sum_truesize = 0;  	WARN_ON(!(q->last_in & INET_FRAG_COMPLETE));  	WARN_ON(del_timer(&q->timer) != 0); @@ -143,13 +181,14 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,  	while (fp) {  		struct sk_buff *xp = fp->next; -		frag_kfree_skb(nf, f, fp, work); +		sum_truesize += fp->truesize; +		frag_kfree_skb(nf, f, fp);  		fp = xp;  	} - +	sum = sum_truesize + f->qsize;  	if (work) -		*work -= f->qsize; -	atomic_sub(f->qsize, &nf->mem); +		*work -= sum; +	sub_frag_mem_limit(q, sum);  	if (f->destructor)  		f->destructor(q); @@ -158,23 +197,32 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,  }  EXPORT_SYMBOL(inet_frag_destroy); -int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f) +int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force)  {  	struct inet_frag_queue *q;  	int work, evicted = 0; -	work = atomic_read(&nf->mem) - nf->low_thresh; -	while (work > 0) { -		read_lock(&f->lock); +	if (!force) { +		if (frag_mem_limit(nf) <= nf->high_thresh) +			return 0; +	} + +	work = frag_mem_limit(nf) - nf->low_thresh; +	while (work > 0 || force) { +		spin_lock(&nf->lru_lock); +  		if (list_empty(&nf->lru_list)) { -			read_unlock(&f->lock); +			spin_unlock(&nf->lru_lock);  			break;  		}  		q = list_first_entry(&nf->lru_list,  				struct inet_frag_queue, lru_list);  		atomic_inc(&q->refcnt); -		read_unlock(&f->lock); +		/* Remove q from list to avoid several CPUs grabbing it */ +		list_del_init(&q->lru_list); + +		spin_unlock(&nf->lru_lock);  		spin_lock(&q->lock);  		if (!(q->last_in & INET_FRAG_COMPLETE)) @@ -194,28 +242,30 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,  		struct inet_frag_queue *qp_in, struct inet_frags *f,  		void *arg)  { +	struct inet_frag_bucket *hb;  	struct inet_frag_queue *qp; -#ifdef CONFIG_SMP -	struct hlist_node *n; -#endif  	unsigned int hash; -	write_lock(&f->lock); +	read_lock(&f->lock); /* Protects against hash rebuild */  	/*  	 * While we stayed w/o the lock other CPU could update  	 * the rnd seed, so we need to re-calculate the hash  	 * chain. Fortunatelly the qp_in can be used to get one.  	 */  	hash = f->hashfn(qp_in); +	hb = &f->hash[hash]; +	spin_lock(&hb->chain_lock); +  #ifdef CONFIG_SMP  	/* With SMP race we have to recheck hash table, because  	 * such entry could be created on other cpu, while we -	 * promoted read lock to write lock. +	 * released the hash bucket lock.  	 */ -	hlist_for_each_entry(qp, n, &f->hash[hash], list) { +	hlist_for_each_entry(qp, &hb->chain, list) {  		if (qp->net == nf && f->match(qp, arg)) {  			atomic_inc(&qp->refcnt); -			write_unlock(&f->lock); +			spin_unlock(&hb->chain_lock); +			read_unlock(&f->lock);  			qp_in->last_in |= INET_FRAG_COMPLETE;  			inet_frag_put(qp_in, f);  			return qp; @@ -227,10 +277,11 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,  		atomic_inc(&qp->refcnt);  	atomic_inc(&qp->refcnt); -	hlist_add_head(&qp->list, &f->hash[hash]); -	list_add_tail(&qp->lru_list, &nf->lru_list); -	nf->nqueues++; -	write_unlock(&f->lock); +	hlist_add_head(&qp->list, &hb->chain); +	inet_frag_lru_add(nf, qp); +	spin_unlock(&hb->chain_lock); +	read_unlock(&f->lock); +  	return qp;  } @@ -243,12 +294,14 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,  	if (q == NULL)  		return NULL; +	q->net = nf;  	f->constructor(q, arg); -	atomic_add(f->qsize, &nf->mem); +	add_frag_mem_limit(q, f->qsize); +  	setup_timer(&q->timer, f->frag_expire, (unsigned long)q);  	spin_lock_init(&q->lock);  	atomic_set(&q->refcnt, 1); -	q->net = nf; +	INIT_LIST_HEAD(&q->lru_list);  	return q;  } @@ -269,18 +322,40 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,  		struct inet_frags *f, void *key, unsigned int hash)  	__releases(&f->lock)  { +	struct inet_frag_bucket *hb;  	struct inet_frag_queue *q; -	struct hlist_node *n; +	int depth = 0; + +	hb = &f->hash[hash]; -	hlist_for_each_entry(q, n, &f->hash[hash], list) { +	spin_lock(&hb->chain_lock); +	hlist_for_each_entry(q, &hb->chain, list) {  		if (q->net == nf && f->match(q, key)) {  			atomic_inc(&q->refcnt); +			spin_unlock(&hb->chain_lock);  			read_unlock(&f->lock);  			return q;  		} +		depth++;  	} +	spin_unlock(&hb->chain_lock);  	read_unlock(&f->lock); -	return inet_frag_create(nf, f, key); +	if (depth <= INETFRAGS_MAXDEPTH) +		return inet_frag_create(nf, f, key); +	else +		return ERR_PTR(-ENOBUFS);  }  EXPORT_SYMBOL(inet_frag_find); + +void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q, +				   const char *prefix) +{ +	static const char msg[] = "inet_frag_find: Fragment hash bucket" +		" list length grew over limit " __stringify(INETFRAGS_MAXDEPTH) +		". Dropping fragment.\n"; + +	if (PTR_ERR(q) == -ENOBUFS) +		LIMIT_NETDEBUG(KERN_WARNING "%s%s", prefix, msg); +} +EXPORT_SYMBOL(inet_frag_maybe_warn_overflow);  | 
