aboutsummaryrefslogtreecommitdiff
path: root/net/ipv6/inet6_hashtables.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6/inet6_hashtables.c')
-rw-r--r--net/ipv6/inet6_hashtables.c307
1 files changed, 196 insertions, 111 deletions
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 99fd25f7f00..262e13c02ec 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -20,31 +20,75 @@
#include <net/inet_connection_sock.h>
#include <net/inet_hashtables.h>
#include <net/inet6_hashtables.h>
+#include <net/secure_seq.h>
#include <net/ip.h>
-void __inet6_hash(struct sock *sk)
+static unsigned int inet6_ehashfn(struct net *net,
+ const struct in6_addr *laddr,
+ const u16 lport,
+ const struct in6_addr *faddr,
+ const __be16 fport)
{
- struct inet_hashinfo *hashinfo = sk->sk_prot->hashinfo;
- struct hlist_head *list;
- rwlock_t *lock;
+ static u32 inet6_ehash_secret __read_mostly;
+ static u32 ipv6_hash_secret __read_mostly;
- BUG_TRAP(sk_unhashed(sk));
+ u32 lhash, fhash;
+
+ net_get_random_once(&inet6_ehash_secret, sizeof(inet6_ehash_secret));
+ net_get_random_once(&ipv6_hash_secret, sizeof(ipv6_hash_secret));
+
+ lhash = (__force u32)laddr->s6_addr32[3];
+ fhash = __ipv6_addr_jhash(faddr, ipv6_hash_secret);
+
+ return __inet6_ehashfn(lhash, lport, fhash, fport,
+ inet6_ehash_secret + net_hash_mix(net));
+}
+
+static int inet6_sk_ehashfn(const struct sock *sk)
+{
+ const struct inet_sock *inet = inet_sk(sk);
+ const struct in6_addr *laddr = &sk->sk_v6_rcv_saddr;
+ const struct in6_addr *faddr = &sk->sk_v6_daddr;
+ const __u16 lport = inet->inet_num;
+ const __be16 fport = inet->inet_dport;
+ struct net *net = sock_net(sk);
+
+ return inet6_ehashfn(net, laddr, lport, faddr, fport);
+}
+
+int __inet6_hash(struct sock *sk, struct inet_timewait_sock *tw)
+{
+ struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
+ int twrefcnt = 0;
+
+ WARN_ON(!sk_unhashed(sk));
if (sk->sk_state == TCP_LISTEN) {
- list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
- lock = &hashinfo->lhash_lock;
- inet_listen_wlock(hashinfo);
+ struct inet_listen_hashbucket *ilb;
+
+ ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
+ spin_lock(&ilb->lock);
+ __sk_nulls_add_node_rcu(sk, &ilb->head);
+ spin_unlock(&ilb->lock);
} else {
unsigned int hash;
+ struct hlist_nulls_head *list;
+ spinlock_t *lock;
+
sk->sk_hash = hash = inet6_sk_ehashfn(sk);
list = &inet_ehash_bucket(hashinfo, hash)->chain;
lock = inet_ehash_lockp(hashinfo, hash);
- write_lock(lock);
+ spin_lock(lock);
+ __sk_nulls_add_node_rcu(sk, list);
+ if (tw) {
+ WARN_ON(sk->sk_hash != tw->tw_hash);
+ twrefcnt = inet_twsk_unhash(tw);
+ }
+ spin_unlock(lock);
}
- __sk_add_node(sk, list);
- sock_prot_inuse_add(sk->sk_prot, 1);
- write_unlock(lock);
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+ return twrefcnt;
}
EXPORT_SYMBOL(__inet6_hash);
@@ -63,76 +107,119 @@ struct sock *__inet6_lookup_established(struct net *net,
const int dif)
{
struct sock *sk;
- const struct hlist_node *node;
+ const struct hlist_nulls_node *node;
const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
/* Optimize here for direct hit, only listening connections can
* have wildcards anyways.
*/
- unsigned int hash = inet6_ehashfn(daddr, hnum, saddr, sport);
- struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash);
- rwlock_t *lock = inet_ehash_lockp(hashinfo, hash);
-
- prefetch(head->chain.first);
- read_lock(lock);
- sk_for_each(sk, node, &head->chain) {
- /* For IPV6 do the cheaper port and family tests first. */
- if (INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif))
- goto hit; /* You sunk my battleship! */
- }
- /* Must check for a TIME_WAIT'er before going to listener hash. */
- sk_for_each(sk, node, &head->twchain) {
- if (INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif))
- goto hit;
- }
- read_unlock(lock);
- return NULL;
+ unsigned int hash = inet6_ehashfn(net, daddr, hnum, saddr, sport);
+ unsigned int slot = hash & hashinfo->ehash_mask;
+ struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
+
-hit:
- sock_hold(sk);
- read_unlock(lock);
+ rcu_read_lock();
+begin:
+ sk_nulls_for_each_rcu(sk, node, &head->chain) {
+ if (sk->sk_hash != hash)
+ continue;
+ if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif))
+ continue;
+ if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))
+ goto out;
+
+ if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif))) {
+ sock_gen_put(sk);
+ goto begin;
+ }
+ goto found;
+ }
+ if (get_nulls_value(node) != slot)
+ goto begin;
+out:
+ sk = NULL;
+found:
+ rcu_read_unlock();
return sk;
}
EXPORT_SYMBOL(__inet6_lookup_established);
+static inline int compute_score(struct sock *sk, struct net *net,
+ const unsigned short hnum,
+ const struct in6_addr *daddr,
+ const int dif)
+{
+ int score = -1;
+
+ if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum &&
+ sk->sk_family == PF_INET6) {
+
+ score = 1;
+ if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
+ if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
+ return -1;
+ score++;
+ }
+ if (sk->sk_bound_dev_if) {
+ if (sk->sk_bound_dev_if != dif)
+ return -1;
+ score++;
+ }
+ }
+ return score;
+}
+
struct sock *inet6_lookup_listener(struct net *net,
- struct inet_hashinfo *hashinfo, const struct in6_addr *daddr,
+ struct inet_hashinfo *hashinfo, const struct in6_addr *saddr,
+ const __be16 sport, const struct in6_addr *daddr,
const unsigned short hnum, const int dif)
{
struct sock *sk;
- const struct hlist_node *node;
- struct sock *result = NULL;
- int score, hiscore = 0;
-
- read_lock(&hashinfo->lhash_lock);
- sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) {
- if (sk->sk_net == net && inet_sk(sk)->num == hnum &&
- sk->sk_family == PF_INET6) {
- const struct ipv6_pinfo *np = inet6_sk(sk);
-
- score = 1;
- if (!ipv6_addr_any(&np->rcv_saddr)) {
- if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
- continue;
- score++;
- }
- if (sk->sk_bound_dev_if) {
- if (sk->sk_bound_dev_if != dif)
- continue;
- score++;
- }
- if (score == 3) {
- result = sk;
- break;
+ const struct hlist_nulls_node *node;
+ struct sock *result;
+ int score, hiscore, matches = 0, reuseport = 0;
+ u32 phash = 0;
+ unsigned int hash = inet_lhashfn(net, hnum);
+ struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
+
+ rcu_read_lock();
+begin:
+ result = NULL;
+ hiscore = 0;
+ sk_nulls_for_each(sk, node, &ilb->head) {
+ score = compute_score(sk, net, hnum, daddr, dif);
+ if (score > hiscore) {
+ hiscore = score;
+ result = sk;
+ reuseport = sk->sk_reuseport;
+ if (reuseport) {
+ phash = inet6_ehashfn(net, daddr, hnum,
+ saddr, sport);
+ matches = 1;
}
- if (score > hiscore) {
- hiscore = score;
+ } else if (score == hiscore && reuseport) {
+ matches++;
+ if (((u64)phash * matches) >> 32 == 0)
result = sk;
- }
+ phash = next_pseudo_random32(phash);
}
}
- if (result)
- sock_hold(result);
- read_unlock(&hashinfo->lhash_lock);
+ /*
+ * if the nulls value we got at the end of this lookup is
+ * not the expected one, we must restart lookup.
+ * We probably met an item that was moved to another chain.
+ */
+ if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE)
+ goto begin;
+ if (result) {
+ if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
+ result = NULL;
+ else if (unlikely(compute_score(result, net, hnum, daddr,
+ dif) < hiscore)) {
+ sock_put(result);
+ goto begin;
+ }
+ }
+ rcu_read_unlock();
return result;
}
@@ -160,77 +247,75 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
{
struct inet_hashinfo *hinfo = death_row->hashinfo;
struct inet_sock *inet = inet_sk(sk);
- const struct ipv6_pinfo *np = inet6_sk(sk);
- const struct in6_addr *daddr = &np->rcv_saddr;
- const struct in6_addr *saddr = &np->daddr;
+ const struct in6_addr *daddr = &sk->sk_v6_rcv_saddr;
+ const struct in6_addr *saddr = &sk->sk_v6_daddr;
const int dif = sk->sk_bound_dev_if;
- const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport);
- const unsigned int hash = inet6_ehashfn(daddr, lport, saddr,
- inet->dport);
+ const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
+ struct net *net = sock_net(sk);
+ const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr,
+ inet->inet_dport);
struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
- rwlock_t *lock = inet_ehash_lockp(hinfo, hash);
+ spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
struct sock *sk2;
- const struct hlist_node *node;
- struct inet_timewait_sock *tw;
- struct net *net = sk->sk_net;
-
- prefetch(head->chain.first);
- write_lock(lock);
-
- /* Check TIME-WAIT sockets first. */
- sk_for_each(sk2, node, &head->twchain) {
- tw = inet_twsk(sk2);
-
- if (INET6_TW_MATCH(sk2, net, hash, saddr, daddr, ports, dif)) {
- if (twsk_unique(sk, sk2, twp))
- goto unique;
- else
- goto not_unique;
- }
- }
- tw = NULL;
+ const struct hlist_nulls_node *node;
+ struct inet_timewait_sock *tw = NULL;
+ int twrefcnt = 0;
- /* And established part... */
- sk_for_each(sk2, node, &head->chain) {
- if (INET6_MATCH(sk2, net, hash, saddr, daddr, ports, dif))
+ spin_lock(lock);
+
+ sk_nulls_for_each(sk2, node, &head->chain) {
+ if (sk2->sk_hash != hash)
+ continue;
+
+ if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif))) {
+ if (sk2->sk_state == TCP_TIME_WAIT) {
+ tw = inet_twsk(sk2);
+ if (twsk_unique(sk, sk2, twp))
+ break;
+ }
goto not_unique;
+ }
}
-unique:
/* Must record num and sport now. Otherwise we will see
- * in hash table socket with a funny identity. */
- inet->num = lport;
- inet->sport = htons(lport);
- BUG_TRAP(sk_unhashed(sk));
- __sk_add_node(sk, &head->chain);
+ * in hash table socket with a funny identity.
+ */
+ inet->inet_num = lport;
+ inet->inet_sport = htons(lport);
sk->sk_hash = hash;
- sock_prot_inuse_add(sk->sk_prot, 1);
- write_unlock(lock);
+ WARN_ON(!sk_unhashed(sk));
+ __sk_nulls_add_node_rcu(sk, &head->chain);
+ if (tw) {
+ twrefcnt = inet_twsk_unhash(tw);
+ NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
+ }
+ spin_unlock(lock);
+ if (twrefcnt)
+ inet_twsk_put(tw);
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
- if (twp != NULL) {
+ if (twp) {
*twp = tw;
- NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
- } else if (tw != NULL) {
+ } else if (tw) {
/* Silly. Should hash-dance instead... */
inet_twsk_deschedule(tw, death_row);
- NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
inet_twsk_put(tw);
}
return 0;
not_unique:
- write_unlock(lock);
+ spin_unlock(lock);
return -EADDRNOTAVAIL;
}
static inline u32 inet6_sk_port_offset(const struct sock *sk)
{
const struct inet_sock *inet = inet_sk(sk);
- const struct ipv6_pinfo *np = inet6_sk(sk);
- return secure_ipv6_port_ephemeral(np->rcv_saddr.s6_addr32,
- np->daddr.s6_addr32,
- inet->dport);
+
+ return secure_ipv6_port_ephemeral(sk->sk_v6_rcv_saddr.s6_addr32,
+ sk->sk_v6_daddr.s6_addr32,
+ inet->inet_dport);
}
int inet6_hash_connect(struct inet_timewait_death_row *death_row,