diff options
Diffstat (limited to 'net/ipv4/inet_hashtables.c')
| -rw-r--r-- | net/ipv4/inet_hashtables.c | 154 |
1 files changed, 84 insertions, 70 deletions
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index fa3ae814871..43116e8c8e1 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -24,6 +24,31 @@ #include <net/secure_seq.h> #include <net/ip.h> +static unsigned int inet_ehashfn(struct net *net, const __be32 laddr, + const __u16 lport, const __be32 faddr, + const __be16 fport) +{ + static u32 inet_ehash_secret __read_mostly; + + net_get_random_once(&inet_ehash_secret, sizeof(inet_ehash_secret)); + + return __inet_ehashfn(laddr, lport, faddr, fport, + inet_ehash_secret + net_hash_mix(net)); +} + + +static unsigned int inet_sk_ehashfn(const struct sock *sk) +{ + const struct inet_sock *inet = inet_sk(sk); + const __be32 laddr = inet->inet_rcv_saddr; + const __u16 lport = inet->inet_num; + const __be32 faddr = inet->inet_daddr; + const __be16 fport = inet->inet_dport; + struct net *net = sock_net(sk); + + return inet_ehashfn(net, laddr, lport, faddr, fport); +} + /* * Allocate and initialize a new local port bind bucket. * The bindhash mutex for snum's hash chain must be held here. @@ -39,6 +64,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, write_pnet(&tb->ib_net, hold_net(net)); tb->port = snum; tb->fastreuse = 0; + tb->fastreuseport = 0; tb->num_owners = 0; INIT_HLIST_HEAD(&tb->owners); hlist_add_head(&tb->node, &head->chain); @@ -119,13 +145,12 @@ int __inet_inherit_port(struct sock *sk, struct sock *child) * that the listener socket's icsk_bind_hash is the same * as that of the child socket. We have to look up or * create a new bind bucket for the child here. */ - struct hlist_node *node; - inet_bind_bucket_for_each(tb, node, &head->chain) { + inet_bind_bucket_for_each(tb, &head->chain) { if (net_eq(ib_net(tb), sock_net(sk)) && tb->port == port) break; } - if (!node) { + if (!tb) { tb = inet_bind_bucket_create(table->bind_bucket_cachep, sock_net(sk), head, port); if (!tb) { @@ -151,16 +176,16 @@ static inline int compute_score(struct sock *sk, struct net *net, if (net_eq(sock_net(sk), net) && inet->inet_num == hnum && !ipv6_only_sock(sk)) { __be32 rcv_saddr = inet->inet_rcv_saddr; - score = sk->sk_family == PF_INET ? 1 : 0; + score = sk->sk_family == PF_INET ? 2 : 1; if (rcv_saddr) { if (rcv_saddr != daddr) return -1; - score += 2; + score += 4; } if (sk->sk_bound_dev_if) { if (sk->sk_bound_dev_if != dif) return -1; - score += 2; + score += 4; } } return score; @@ -176,6 +201,7 @@ static inline int compute_score(struct sock *sk, struct net *net, struct sock *__inet_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, + const __be32 saddr, __be16 sport, const __be32 daddr, const unsigned short hnum, const int dif) { @@ -183,17 +209,29 @@ struct sock *__inet_lookup_listener(struct net *net, struct hlist_nulls_node *node; unsigned int hash = inet_lhashfn(net, hnum); struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; - int score, hiscore; + int score, hiscore, matches = 0, reuseport = 0; + u32 phash = 0; rcu_read_lock(); begin: result = NULL; - hiscore = -1; + hiscore = 0; sk_nulls_for_each_rcu(sk, node, &ilb->head) { score = compute_score(sk, net, hnum, daddr, dif); if (score > hiscore) { result = sk; hiscore = score; + reuseport = sk->sk_reuseport; + if (reuseport) { + phash = inet_ehashfn(net, daddr, hnum, + saddr, sport); + matches = 1; + } + } else if (score == hiscore && reuseport) { + matches++; + if (((u64)phash * matches) >> 32 == 0) + result = sk; + phash = next_pseudo_random32(phash); } } /* @@ -217,13 +255,26 @@ begin: } EXPORT_SYMBOL_GPL(__inet_lookup_listener); +/* All sockets share common refcount, but have different destructors */ +void sock_gen_put(struct sock *sk) +{ + if (!atomic_dec_and_test(&sk->sk_refcnt)) + return; + + if (sk->sk_state == TCP_TIME_WAIT) + inet_twsk_free(inet_twsk(sk)); + else + sk_free(sk); +} +EXPORT_SYMBOL_GPL(sock_gen_put); + struct sock *__inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo, const __be32 saddr, const __be16 sport, const __be32 daddr, const u16 hnum, const int dif) { - INET_ADDR_COOKIE(acookie, saddr, daddr) + INET_ADDR_COOKIE(acookie, saddr, daddr); const __portpair ports = INET_COMBINED_PORTS(sport, hnum); struct sock *sk; const struct hlist_nulls_node *node; @@ -242,13 +293,13 @@ begin: if (likely(INET_MATCH(sk, net, acookie, saddr, daddr, ports, dif))) { if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) - goto begintw; + goto out; if (unlikely(!INET_MATCH(sk, net, acookie, saddr, daddr, ports, dif))) { - sock_put(sk); + sock_gen_put(sk); goto begin; } - goto out; + goto found; } } /* @@ -258,37 +309,9 @@ begin: */ if (get_nulls_value(node) != slot) goto begin; - -begintw: - /* Must check for a TIME_WAIT'er before going to listener hash. */ - sk_nulls_for_each_rcu(sk, node, &head->twchain) { - if (sk->sk_hash != hash) - continue; - if (likely(INET_TW_MATCH(sk, net, acookie, - saddr, daddr, ports, - dif))) { - if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) { - sk = NULL; - goto out; - } - if (unlikely(!INET_TW_MATCH(sk, net, acookie, - saddr, daddr, ports, - dif))) { - sock_put(sk); - goto begintw; - } - goto out; - } - } - /* - * if the nulls value we got at the end of this lookup is - * not the expected one, we must restart lookup. - * We probably met an item that was moved to another chain. - */ - if (get_nulls_value(node) != slot) - goto begintw; - sk = NULL; out: + sk = NULL; +found: rcu_read_unlock(); return sk; } @@ -304,7 +327,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, __be32 daddr = inet->inet_rcv_saddr; __be32 saddr = inet->inet_daddr; int dif = sk->sk_bound_dev_if; - INET_ADDR_COOKIE(acookie, saddr, daddr) + INET_ADDR_COOKIE(acookie, saddr, daddr); const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); struct net *net = sock_net(sk); unsigned int hash = inet_ehashfn(net, daddr, lport, @@ -313,39 +336,29 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, spinlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; const struct hlist_nulls_node *node; - struct inet_timewait_sock *tw; + struct inet_timewait_sock *tw = NULL; int twrefcnt = 0; spin_lock(lock); - /* Check TIME-WAIT sockets first. */ - sk_nulls_for_each(sk2, node, &head->twchain) { - if (sk2->sk_hash != hash) - continue; - - if (likely(INET_TW_MATCH(sk2, net, acookie, - saddr, daddr, ports, dif))) { - tw = inet_twsk(sk2); - if (twsk_unique(sk, sk2, twp)) - goto unique; - else - goto not_unique; - } - } - tw = NULL; - - /* And established part... */ sk_nulls_for_each(sk2, node, &head->chain) { if (sk2->sk_hash != hash) continue; + if (likely(INET_MATCH(sk2, net, acookie, - saddr, daddr, ports, dif))) + saddr, daddr, ports, dif))) { + if (sk2->sk_state == TCP_TIME_WAIT) { + tw = inet_twsk(sk2); + if (twsk_unique(sk, sk2, twp)) + break; + } goto not_unique; + } } -unique: /* Must record num and sport now. Otherwise we will see - * in hash table socket with a funny identity. */ + * in hash table socket with a funny identity. + */ inet->inet_num = lport; inet->inet_sport = htons(lport); sk->sk_hash = hash; @@ -454,7 +467,7 @@ void inet_unhash(struct sock *sk) lock = inet_ehash_lockp(hashinfo, sk->sk_hash); spin_lock_bh(lock); - done =__sk_nulls_del_node_init_rcu(sk); + done = __sk_nulls_del_node_init_rcu(sk); if (done) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); spin_unlock_bh(lock); @@ -479,16 +492,15 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, int i, remaining, low, high, port; static u32 hint; u32 offset = hint + port_offset; - struct hlist_node *node; struct inet_timewait_sock *tw = NULL; - inet_get_local_port_range(&low, &high); + inet_get_local_port_range(net, &low, &high); remaining = (high - low) + 1; local_bh_disable(); for (i = 1; i <= remaining; i++) { port = low + (i + offset) % remaining; - if (inet_is_reserved_local_port(port)) + if (inet_is_local_reserved_port(net, port)) continue; head = &hinfo->bhash[inet_bhashfn(net, port, hinfo->bhash_size)]; @@ -498,10 +510,11 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, * because the established check is already * unique enough. */ - inet_bind_bucket_for_each(tb, node, &head->chain) { + inet_bind_bucket_for_each(tb, &head->chain) { if (net_eq(ib_net(tb), net) && tb->port == port) { - if (tb->fastreuse >= 0) + if (tb->fastreuse >= 0 || + tb->fastreuseport >= 0) goto next_port; WARN_ON(hlist_empty(&tb->owners)); if (!check_established(death_row, sk, @@ -518,6 +531,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, break; } tb->fastreuse = -1; + tb->fastreuseport = -1; goto ok; next_port: |
