diff options
Diffstat (limited to 'net/ipv4/inet_timewait_sock.c')
| -rw-r--r-- | net/ipv4/inet_timewait_sock.c | 220 |
1 files changed, 147 insertions, 73 deletions
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 8554d0ea171..6d592f8555f 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -9,62 +9,119 @@ */ #include <linux/kernel.h> +#include <linux/kmemcheck.h> +#include <linux/slab.h> +#include <linux/module.h> #include <net/inet_hashtables.h> #include <net/inet_timewait_sock.h> #include <net/ip.h> + +/** + * inet_twsk_unhash - unhash a timewait socket from established hash + * @tw: timewait socket + * + * unhash a timewait socket from established hash, if hashed. + * ehash lock must be held by caller. + * Returns 1 if caller should call inet_twsk_put() after lock release. + */ +int inet_twsk_unhash(struct inet_timewait_sock *tw) +{ + if (hlist_nulls_unhashed(&tw->tw_node)) + return 0; + + hlist_nulls_del_rcu(&tw->tw_node); + sk_nulls_node_init(&tw->tw_node); + /* + * We cannot call inet_twsk_put() ourself under lock, + * caller must call it for us. + */ + return 1; +} + +/** + * inet_twsk_bind_unhash - unhash a timewait socket from bind hash + * @tw: timewait socket + * @hashinfo: hashinfo pointer + * + * unhash a timewait socket from bind hash, if hashed. + * bind hash lock must be held by caller. + * Returns 1 if caller should call inet_twsk_put() after lock release. + */ +int inet_twsk_bind_unhash(struct inet_timewait_sock *tw, + struct inet_hashinfo *hashinfo) +{ + struct inet_bind_bucket *tb = tw->tw_tb; + + if (!tb) + return 0; + + __hlist_del(&tw->tw_bind_node); + tw->tw_tb = NULL; + inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); + /* + * We cannot call inet_twsk_put() ourself under lock, + * caller must call it for us. + */ + return 1; +} + /* Must be called with locally disabled BHs. */ static void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashinfo) { struct inet_bind_hashbucket *bhead; - struct inet_bind_bucket *tb; + int refcnt; /* Unlink from established hashes. */ spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); spin_lock(lock); - if (hlist_nulls_unhashed(&tw->tw_node)) { - spin_unlock(lock); - return; - } - hlist_nulls_del_rcu(&tw->tw_node); - sk_nulls_node_init(&tw->tw_node); + refcnt = inet_twsk_unhash(tw); spin_unlock(lock); /* Disassociate with bind bucket. */ bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num, hashinfo->bhash_size)]; + spin_lock(&bhead->lock); - tb = tw->tw_tb; - __hlist_del(&tw->tw_bind_node); - tw->tw_tb = NULL; - inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); + refcnt += inet_twsk_bind_unhash(tw, hashinfo); spin_unlock(&bhead->lock); + + BUG_ON(refcnt >= atomic_read(&tw->tw_refcnt)); + atomic_sub(refcnt, &tw->tw_refcnt); +} + +void inet_twsk_free(struct inet_timewait_sock *tw) +{ + struct module *owner = tw->tw_prot->owner; + twsk_destructor((struct sock *)tw); #ifdef SOCK_REFCNT_DEBUG - if (atomic_read(&tw->tw_refcnt) != 1) { - printk(KERN_DEBUG "%s timewait_sock %p refcnt=%d\n", - tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt)); - } + pr_debug("%s timewait_sock %p released\n", tw->tw_prot->name, tw); #endif - inet_twsk_put(tw); + release_net(twsk_net(tw)); + kmem_cache_free(tw->tw_prot->twsk_prot->twsk_slab, tw); + module_put(owner); } void inet_twsk_put(struct inet_timewait_sock *tw) { - if (atomic_dec_and_test(&tw->tw_refcnt)) { - struct module *owner = tw->tw_prot->owner; - twsk_destructor((struct sock *)tw); -#ifdef SOCK_REFCNT_DEBUG - printk(KERN_DEBUG "%s timewait_sock %p released\n", - tw->tw_prot->name, tw); -#endif - release_net(twsk_net(tw)); - kmem_cache_free(tw->tw_prot->twsk_prot->twsk_slab, tw); - module_put(owner); - } + if (atomic_dec_and_test(&tw->tw_refcnt)) + inet_twsk_free(tw); } EXPORT_SYMBOL_GPL(inet_twsk_put); +static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw, + struct hlist_nulls_head *list) +{ + hlist_nulls_add_head_rcu(&tw->tw_node, list); +} + +static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, + struct hlist_head *list) +{ + hlist_add_head(&tw->tw_bind_node, list); +} + /* * Enter the time wait state. This is called with locally disabled BH. * Essentially we whip up a timewait bucket, copy the relevant info into it @@ -82,7 +139,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, Note, that any socket with inet->num != 0 MUST be bound in binding cache, even if it is closed. */ - bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->num, + bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num, hashinfo->bhash_size)]; spin_lock(&bhead->lock); tw->tw_tb = icsk->icsk_bind_hash; @@ -93,20 +150,23 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, spin_lock(lock); /* - * Step 2: Hash TW into TIMEWAIT chain. - * Should be done before removing sk from established chain - * because readers are lockless and search established first. + * Step 2: Hash TW into tcp ehash chain. + * Notes : + * - tw_refcnt is set to 3 because : + * - We have one reference from bhash chain. + * - We have one reference from ehash chain. + * We can use atomic_set() because prior spin_lock()/spin_unlock() + * committed into memory all tw fields. */ - atomic_inc(&tw->tw_refcnt); - inet_twsk_add_node_rcu(tw, &ehead->twchain); + atomic_set(&tw->tw_refcnt, 1 + 1 + 1); + inet_twsk_add_node_rcu(tw, &ehead->chain); - /* Step 3: Remove SK from established hash. */ + /* Step 3: Remove SK from hash chain */ if (__sk_nulls_del_node_init_rcu(sk)) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); spin_unlock(lock); } - EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state) @@ -117,15 +177,18 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat if (tw != NULL) { const struct inet_sock *inet = inet_sk(sk); + kmemcheck_annotate_bitfield(tw, flags); + /* Give us an identity. */ - tw->tw_daddr = inet->daddr; - tw->tw_rcv_saddr = inet->rcv_saddr; + tw->tw_daddr = inet->inet_daddr; + tw->tw_rcv_saddr = inet->inet_rcv_saddr; tw->tw_bound_dev_if = sk->sk_bound_dev_if; - tw->tw_num = inet->num; + tw->tw_tos = inet->tos; + tw->tw_num = inet->inet_num; tw->tw_state = TCP_TIME_WAIT; tw->tw_substate = state; - tw->tw_sport = inet->sport; - tw->tw_dport = inet->dport; + tw->tw_sport = inet->inet_sport; + tw->tw_dport = inet->inet_dport; tw->tw_family = sk->sk_family; tw->tw_reuse = sk->sk_reuse; tw->tw_hash = sk->sk_hash; @@ -133,14 +196,18 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat tw->tw_transparent = inet->transparent; tw->tw_prot = sk->sk_prot_creator; twsk_net_set(tw, hold_net(sock_net(sk))); - atomic_set(&tw->tw_refcnt, 1); + /* + * Because we use RCU lookups, we should not set tw_refcnt + * to a non null value before everything is setup for this + * timewait socket. + */ + atomic_set(&tw->tw_refcnt, 0); inet_twsk_dead_node_init(tw); __module_get(tw->tw_prot->owner); } return tw; } - EXPORT_SYMBOL_GPL(inet_twsk_alloc); /* Returns non-zero if quota exceeded. */ @@ -148,7 +215,6 @@ static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr, const int slot) { struct inet_timewait_sock *tw; - struct hlist_node *node; unsigned int killed; int ret; @@ -161,7 +227,7 @@ static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr, killed = 0; ret = 0; rescan: - inet_twsk_for_each_inmate(tw, node, &twdr->cells[slot]) { + inet_twsk_for_each_inmate(tw, &twdr->cells[slot]) { __inet_twsk_del_dead_node(tw); spin_unlock(&twdr->death_lock); __inet_twsk_kill(tw, twdr->hashinfo); @@ -195,7 +261,7 @@ rescan: void inet_twdr_hangman(unsigned long data) { struct inet_timewait_death_row *twdr; - int unsigned need_timer; + unsigned int need_timer; twdr = (struct inet_timewait_death_row *)data; spin_lock(&twdr->death_lock); @@ -212,14 +278,13 @@ void inet_twdr_hangman(unsigned long data) /* We purged the entire slot, anything left? */ if (twdr->tw_count) need_timer = 1; + twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1)); } - twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1)); if (need_timer) mod_timer(&twdr->tw_timer, jiffies + twdr->period); out: spin_unlock(&twdr->death_lock); } - EXPORT_SYMBOL_GPL(inet_twdr_hangman); void inet_twdr_twkill_work(struct work_struct *work) @@ -250,7 +315,6 @@ void inet_twdr_twkill_work(struct work_struct *work) spin_unlock_bh(&twdr->death_lock); } } - EXPORT_SYMBOL_GPL(inet_twdr_twkill_work); /* These are always called from BH context. See callers in @@ -270,7 +334,6 @@ void inet_twsk_deschedule(struct inet_timewait_sock *tw, spin_unlock(&twdr->death_lock); __inet_twsk_kill(tw, twdr->hashinfo); } - EXPORT_SYMBOL(inet_twsk_deschedule); void inet_twsk_schedule(struct inet_timewait_sock *tw, @@ -323,11 +386,11 @@ void inet_twsk_schedule(struct inet_timewait_sock *tw, if (slot >= INET_TWDR_TWKILL_SLOTS) slot = INET_TWDR_TWKILL_SLOTS - 1; } - tw->tw_ttd = jiffies + timeo; + tw->tw_ttd = inet_tw_time_stamp() + timeo; slot = (twdr->slot + slot) & (INET_TWDR_TWKILL_SLOTS - 1); list = &twdr->cells[slot]; } else { - tw->tw_ttd = jiffies + (slot << INET_TWDR_RECYCLE_TICK); + tw->tw_ttd = inet_tw_time_stamp() + (slot << INET_TWDR_RECYCLE_TICK); if (twdr->twcal_hand < 0) { twdr->twcal_hand = 0; @@ -351,7 +414,6 @@ void inet_twsk_schedule(struct inet_timewait_sock *tw, mod_timer(&twdr->tw_timer, jiffies + twdr->period); spin_unlock(&twdr->death_lock); } - EXPORT_SYMBOL_GPL(inet_twsk_schedule); void inet_twdr_twcal_tick(unsigned long data) @@ -374,10 +436,10 @@ void inet_twdr_twcal_tick(unsigned long data) for (n = 0; n < INET_TWDR_RECYCLE_SLOTS; n++) { if (time_before_eq(j, now)) { - struct hlist_node *node, *safe; + struct hlist_node *safe; struct inet_timewait_sock *tw; - inet_twsk_for_each_inmate_safe(tw, node, safe, + inet_twsk_for_each_inmate_safe(tw, safe, &twdr->twcal_row[slot]) { __inet_twsk_del_dead_node(tw); __inet_twsk_kill(tw, twdr->hashinfo); @@ -412,40 +474,52 @@ out: #endif spin_unlock(&twdr->death_lock); } - EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick); -void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, +void inet_twsk_purge(struct inet_hashinfo *hashinfo, struct inet_timewait_death_row *twdr, int family) { struct inet_timewait_sock *tw; struct sock *sk; struct hlist_nulls_node *node; - int h; + unsigned int slot; - local_bh_disable(); - for (h = 0; h < (hashinfo->ehash_size); h++) { - struct inet_ehash_bucket *head = - inet_ehash_bucket(hashinfo, h); - spinlock_t *lock = inet_ehash_lockp(hashinfo, h); + for (slot = 0; slot <= hashinfo->ehash_mask; slot++) { + struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; +restart_rcu: + rcu_read_lock(); restart: - spin_lock(lock); - sk_nulls_for_each(sk, node, &head->twchain) { - + sk_nulls_for_each_rcu(sk, node, &head->chain) { + if (sk->sk_state != TCP_TIME_WAIT) + continue; tw = inet_twsk(sk); - if (!net_eq(twsk_net(tw), net) || - tw->tw_family != family) + if ((tw->tw_family != family) || + atomic_read(&twsk_net(tw)->count)) + continue; + + if (unlikely(!atomic_inc_not_zero(&tw->tw_refcnt))) continue; - atomic_inc(&tw->tw_refcnt); - spin_unlock(lock); + if (unlikely((tw->tw_family != family) || + atomic_read(&twsk_net(tw)->count))) { + inet_twsk_put(tw); + goto restart; + } + + rcu_read_unlock(); + local_bh_disable(); inet_twsk_deschedule(tw, twdr); + local_bh_enable(); inet_twsk_put(tw); - - goto restart; + goto restart_rcu; } - spin_unlock(lock); + /* If the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != slot) + goto restart; + rcu_read_unlock(); } - local_bh_enable(); } EXPORT_SYMBOL_GPL(inet_twsk_purge); |
