diff options
Diffstat (limited to 'net/ipv4/inet_hashtables.c')
| -rw-r--r-- | net/ipv4/inet_hashtables.c | 168 | 
1 files changed, 96 insertions, 72 deletions
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 1b344f30b46..43116e8c8e1 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -21,8 +21,34 @@  #include <net/inet_connection_sock.h>  #include <net/inet_hashtables.h> +#include <net/secure_seq.h>  #include <net/ip.h> +static unsigned int inet_ehashfn(struct net *net, const __be32 laddr, +				 const __u16 lport, const __be32 faddr, +				 const __be16 fport) +{ +	static u32 inet_ehash_secret __read_mostly; + +	net_get_random_once(&inet_ehash_secret, sizeof(inet_ehash_secret)); + +	return __inet_ehashfn(laddr, lport, faddr, fport, +			      inet_ehash_secret + net_hash_mix(net)); +} + + +static unsigned int inet_sk_ehashfn(const struct sock *sk) +{ +	const struct inet_sock *inet = inet_sk(sk); +	const __be32 laddr = inet->inet_rcv_saddr; +	const __u16 lport = inet->inet_num; +	const __be32 faddr = inet->inet_daddr; +	const __be16 fport = inet->inet_dport; +	struct net *net = sock_net(sk); + +	return inet_ehashfn(net, laddr, lport, faddr, fport); +} +  /*   * Allocate and initialize a new local port bind bucket.   * The bindhash mutex for snum's hash chain must be held here. @@ -38,6 +64,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,  		write_pnet(&tb->ib_net, hold_net(net));  		tb->port      = snum;  		tb->fastreuse = 0; +		tb->fastreuseport = 0;  		tb->num_owners = 0;  		INIT_HLIST_HEAD(&tb->owners);  		hlist_add_head(&tb->node, &head->chain); @@ -118,13 +145,12 @@ int __inet_inherit_port(struct sock *sk, struct sock *child)  		 * that the listener socket's icsk_bind_hash is the same  		 * as that of the child socket. We have to look up or  		 * create a new bind bucket for the child here. */ -		struct hlist_node *node; -		inet_bind_bucket_for_each(tb, node, &head->chain) { +		inet_bind_bucket_for_each(tb, &head->chain) {  			if (net_eq(ib_net(tb), sock_net(sk)) &&  			    tb->port == port)  				break;  		} -		if (!node) { +		if (!tb) {  			tb = inet_bind_bucket_create(table->bind_bucket_cachep,  						     sock_net(sk), head, port);  			if (!tb) { @@ -133,8 +159,7 @@ int __inet_inherit_port(struct sock *sk, struct sock *child)  			}  		}  	} -	sk_add_bind_node(child, &tb->owners); -	inet_csk(child)->icsk_bind_hash = tb; +	inet_bind_hash(child, tb, port);  	spin_unlock(&head->lock);  	return 0; @@ -151,16 +176,16 @@ static inline int compute_score(struct sock *sk, struct net *net,  	if (net_eq(sock_net(sk), net) && inet->inet_num == hnum &&  			!ipv6_only_sock(sk)) {  		__be32 rcv_saddr = inet->inet_rcv_saddr; -		score = sk->sk_family == PF_INET ? 1 : 0; +		score = sk->sk_family == PF_INET ? 2 : 1;  		if (rcv_saddr) {  			if (rcv_saddr != daddr)  				return -1; -			score += 2; +			score += 4;  		}  		if (sk->sk_bound_dev_if) {  			if (sk->sk_bound_dev_if != dif)  				return -1; -			score += 2; +			score += 4;  		}  	}  	return score; @@ -176,6 +201,7 @@ static inline int compute_score(struct sock *sk, struct net *net,  struct sock *__inet_lookup_listener(struct net *net,  				    struct inet_hashinfo *hashinfo, +				    const __be32 saddr, __be16 sport,  				    const __be32 daddr, const unsigned short hnum,  				    const int dif)  { @@ -183,17 +209,29 @@ struct sock *__inet_lookup_listener(struct net *net,  	struct hlist_nulls_node *node;  	unsigned int hash = inet_lhashfn(net, hnum);  	struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; -	int score, hiscore; +	int score, hiscore, matches = 0, reuseport = 0; +	u32 phash = 0;  	rcu_read_lock();  begin:  	result = NULL; -	hiscore = -1; +	hiscore = 0;  	sk_nulls_for_each_rcu(sk, node, &ilb->head) {  		score = compute_score(sk, net, hnum, daddr, dif);  		if (score > hiscore) {  			result = sk;  			hiscore = score; +			reuseport = sk->sk_reuseport; +			if (reuseport) { +				phash = inet_ehashfn(net, daddr, hnum, +						     saddr, sport); +				matches = 1; +			} +		} else if (score == hiscore && reuseport) { +			matches++; +			if (((u64)phash * matches) >> 32 == 0) +				result = sk; +			phash = next_pseudo_random32(phash);  		}  	}  	/* @@ -217,13 +255,26 @@ begin:  }  EXPORT_SYMBOL_GPL(__inet_lookup_listener); -struct sock * __inet_lookup_established(struct net *net, +/* All sockets share common refcount, but have different destructors */ +void sock_gen_put(struct sock *sk) +{ +	if (!atomic_dec_and_test(&sk->sk_refcnt)) +		return; + +	if (sk->sk_state == TCP_TIME_WAIT) +		inet_twsk_free(inet_twsk(sk)); +	else +		sk_free(sk); +} +EXPORT_SYMBOL_GPL(sock_gen_put); + +struct sock *__inet_lookup_established(struct net *net,  				  struct inet_hashinfo *hashinfo,  				  const __be32 saddr, const __be16 sport,  				  const __be32 daddr, const u16 hnum,  				  const int dif)  { -	INET_ADDR_COOKIE(acookie, saddr, daddr) +	INET_ADDR_COOKIE(acookie, saddr, daddr);  	const __portpair ports = INET_COMBINED_PORTS(sport, hnum);  	struct sock *sk;  	const struct hlist_nulls_node *node; @@ -237,16 +288,18 @@ struct sock * __inet_lookup_established(struct net *net,  	rcu_read_lock();  begin:  	sk_nulls_for_each_rcu(sk, node, &head->chain) { -		if (INET_MATCH(sk, net, hash, acookie, -					saddr, daddr, ports, dif)) { +		if (sk->sk_hash != hash) +			continue; +		if (likely(INET_MATCH(sk, net, acookie, +				      saddr, daddr, ports, dif))) {  			if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) -				goto begintw; -			if (unlikely(!INET_MATCH(sk, net, hash, acookie, -				saddr, daddr, ports, dif))) { -				sock_put(sk); +				goto out; +			if (unlikely(!INET_MATCH(sk, net, acookie, +						 saddr, daddr, ports, dif))) { +				sock_gen_put(sk);  				goto begin;  			} -			goto out; +			goto found;  		}  	}  	/* @@ -256,33 +309,9 @@ begin:  	 */  	if (get_nulls_value(node) != slot)  		goto begin; - -begintw: -	/* Must check for a TIME_WAIT'er before going to listener hash. */ -	sk_nulls_for_each_rcu(sk, node, &head->twchain) { -		if (INET_TW_MATCH(sk, net, hash, acookie, -					saddr, daddr, ports, dif)) { -			if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) { -				sk = NULL; -				goto out; -			} -			if (unlikely(!INET_TW_MATCH(sk, net, hash, acookie, -				 saddr, daddr, ports, dif))) { -				sock_put(sk); -				goto begintw; -			} -			goto out; -		} -	} -	/* -	 * if the nulls value we got at the end of this lookup is -	 * not the expected one, we must restart lookup. -	 * We probably met an item that was moved to another chain. -	 */ -	if (get_nulls_value(node) != slot) -		goto begintw; -	sk = NULL;  out: +	sk = NULL; +found:  	rcu_read_unlock();  	return sk;  } @@ -298,7 +327,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,  	__be32 daddr = inet->inet_rcv_saddr;  	__be32 saddr = inet->inet_daddr;  	int dif = sk->sk_bound_dev_if; -	INET_ADDR_COOKIE(acookie, saddr, daddr) +	INET_ADDR_COOKIE(acookie, saddr, daddr);  	const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);  	struct net *net = sock_net(sk);  	unsigned int hash = inet_ehashfn(net, daddr, lport, @@ -307,35 +336,29 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,  	spinlock_t *lock = inet_ehash_lockp(hinfo, hash);  	struct sock *sk2;  	const struct hlist_nulls_node *node; -	struct inet_timewait_sock *tw; +	struct inet_timewait_sock *tw = NULL;  	int twrefcnt = 0;  	spin_lock(lock); -	/* Check TIME-WAIT sockets first. */ -	sk_nulls_for_each(sk2, node, &head->twchain) { -		tw = inet_twsk(sk2); - -		if (INET_TW_MATCH(sk2, net, hash, acookie, -					saddr, daddr, ports, dif)) { -			if (twsk_unique(sk, sk2, twp)) -				goto unique; -			else -				goto not_unique; -		} -	} -	tw = NULL; - -	/* And established part... */  	sk_nulls_for_each(sk2, node, &head->chain) { -		if (INET_MATCH(sk2, net, hash, acookie, -					saddr, daddr, ports, dif)) +		if (sk2->sk_hash != hash) +			continue; + +		if (likely(INET_MATCH(sk2, net, acookie, +					 saddr, daddr, ports, dif))) { +			if (sk2->sk_state == TCP_TIME_WAIT) { +				tw = inet_twsk(sk2); +				if (twsk_unique(sk, sk2, twp)) +					break; +			}  			goto not_unique; +		}  	} -unique:  	/* Must record num and sport now. Otherwise we will see -	 * in hash table socket with a funny identity. */ +	 * in hash table socket with a funny identity. +	 */  	inet->inet_num = lport;  	inet->inet_sport = htons(lport);  	sk->sk_hash = hash; @@ -444,7 +467,7 @@ void inet_unhash(struct sock *sk)  		lock = inet_ehash_lockp(hashinfo, sk->sk_hash);  	spin_lock_bh(lock); -	done =__sk_nulls_del_node_init_rcu(sk); +	done = __sk_nulls_del_node_init_rcu(sk);  	if (done)  		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);  	spin_unlock_bh(lock); @@ -469,16 +492,15 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,  		int i, remaining, low, high, port;  		static u32 hint;  		u32 offset = hint + port_offset; -		struct hlist_node *node;  		struct inet_timewait_sock *tw = NULL; -		inet_get_local_port_range(&low, &high); +		inet_get_local_port_range(net, &low, &high);  		remaining = (high - low) + 1;  		local_bh_disable();  		for (i = 1; i <= remaining; i++) {  			port = low + (i + offset) % remaining; -			if (inet_is_reserved_local_port(port)) +			if (inet_is_local_reserved_port(net, port))  				continue;  			head = &hinfo->bhash[inet_bhashfn(net, port,  					hinfo->bhash_size)]; @@ -488,10 +510,11 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,  			 * because the established check is already  			 * unique enough.  			 */ -			inet_bind_bucket_for_each(tb, node, &head->chain) { +			inet_bind_bucket_for_each(tb, &head->chain) {  				if (net_eq(ib_net(tb), net) &&  				    tb->port == port) { -					if (tb->fastreuse >= 0) +					if (tb->fastreuse >= 0 || +					    tb->fastreuseport >= 0)  						goto next_port;  					WARN_ON(hlist_empty(&tb->owners));  					if (!check_established(death_row, sk, @@ -508,6 +531,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,  				break;  			}  			tb->fastreuse = -1; +			tb->fastreuseport = -1;  			goto ok;  		next_port:  | 
