diff options
Diffstat (limited to 'net/ipv4/inet_timewait_sock.c')
| -rw-r--r-- | net/ipv4/inet_timewait_sock.c | 132 |
1 files changed, 80 insertions, 52 deletions
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 0fdf45e4c90..6d592f8555f 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -10,14 +10,20 @@ #include <linux/kernel.h> #include <linux/kmemcheck.h> +#include <linux/slab.h> +#include <linux/module.h> #include <net/inet_hashtables.h> #include <net/inet_timewait_sock.h> #include <net/ip.h> -/* - * unhash a timewait socket from established hash - * lock must be hold by caller +/** + * inet_twsk_unhash - unhash a timewait socket from established hash + * @tw: timewait socket + * + * unhash a timewait socket from established hash, if hashed. + * ehash lock must be held by caller. + * Returns 1 if caller should call inet_twsk_put() after lock release. */ int inet_twsk_unhash(struct inet_timewait_sock *tw) { @@ -26,6 +32,37 @@ int inet_twsk_unhash(struct inet_timewait_sock *tw) hlist_nulls_del_rcu(&tw->tw_node); sk_nulls_node_init(&tw->tw_node); + /* + * We cannot call inet_twsk_put() ourself under lock, + * caller must call it for us. + */ + return 1; +} + +/** + * inet_twsk_bind_unhash - unhash a timewait socket from bind hash + * @tw: timewait socket + * @hashinfo: hashinfo pointer + * + * unhash a timewait socket from bind hash, if hashed. + * bind hash lock must be held by caller. + * Returns 1 if caller should call inet_twsk_put() after lock release. + */ +int inet_twsk_bind_unhash(struct inet_timewait_sock *tw, + struct inet_hashinfo *hashinfo) +{ + struct inet_bind_bucket *tb = tw->tw_tb; + + if (!tb) + return 0; + + __hlist_del(&tw->tw_bind_node); + tw->tw_tb = NULL; + inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); + /* + * We cannot call inet_twsk_put() ourself under lock, + * caller must call it for us. + */ return 1; } @@ -34,7 +71,6 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashinfo) { struct inet_bind_hashbucket *bhead; - struct inet_bind_bucket *tb; int refcnt; /* Unlink from established hashes. */ spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); @@ -46,28 +82,16 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, /* Disassociate with bind bucket. */ bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num, hashinfo->bhash_size)]; + spin_lock(&bhead->lock); - tb = tw->tw_tb; - if (tb) { - __hlist_del(&tw->tw_bind_node); - tw->tw_tb = NULL; - inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); - refcnt++; - } + refcnt += inet_twsk_bind_unhash(tw, hashinfo); spin_unlock(&bhead->lock); -#ifdef SOCK_REFCNT_DEBUG - if (atomic_read(&tw->tw_refcnt) != 1) { - printk(KERN_DEBUG "%s timewait_sock %p refcnt=%d\n", - tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt)); - } -#endif - while (refcnt) { - inet_twsk_put(tw); - refcnt--; - } + + BUG_ON(refcnt >= atomic_read(&tw->tw_refcnt)); + atomic_sub(refcnt, &tw->tw_refcnt); } -static noinline void inet_twsk_free(struct inet_timewait_sock *tw) +void inet_twsk_free(struct inet_timewait_sock *tw) { struct module *owner = tw->tw_prot->owner; twsk_destructor((struct sock *)tw); @@ -86,6 +110,18 @@ void inet_twsk_put(struct inet_timewait_sock *tw) } EXPORT_SYMBOL_GPL(inet_twsk_put); +static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw, + struct hlist_nulls_head *list) +{ + hlist_nulls_add_head_rcu(&tw->tw_node, list); +} + +static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, + struct hlist_head *list) +{ + hlist_add_head(&tw->tw_bind_node, list); +} + /* * Enter the time wait state. This is called with locally disabled BH. * Essentially we whip up a timewait bucket, copy the relevant info into it @@ -114,29 +150,23 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, spin_lock(lock); /* - * Step 2: Hash TW into TIMEWAIT chain. - * Should be done before removing sk from established chain - * because readers are lockless and search established first. + * Step 2: Hash TW into tcp ehash chain. + * Notes : + * - tw_refcnt is set to 3 because : + * - We have one reference from bhash chain. + * - We have one reference from ehash chain. + * We can use atomic_set() because prior spin_lock()/spin_unlock() + * committed into memory all tw fields. */ - inet_twsk_add_node_rcu(tw, &ehead->twchain); + atomic_set(&tw->tw_refcnt, 1 + 1 + 1); + inet_twsk_add_node_rcu(tw, &ehead->chain); - /* Step 3: Remove SK from established hash. */ + /* Step 3: Remove SK from hash chain */ if (__sk_nulls_del_node_init_rcu(sk)) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - /* - * Notes : - * - We initially set tw_refcnt to 0 in inet_twsk_alloc() - * - We add one reference for the bhash link - * - We add one reference for the ehash link - * - We want this refcnt update done before allowing other - * threads to find this tw in ehash chain. - */ - atomic_add(1 + 1 + 1, &tw->tw_refcnt); - spin_unlock(lock); } - EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state) @@ -153,6 +183,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat tw->tw_daddr = inet->inet_daddr; tw->tw_rcv_saddr = inet->inet_rcv_saddr; tw->tw_bound_dev_if = sk->sk_bound_dev_if; + tw->tw_tos = inet->tos; tw->tw_num = inet->inet_num; tw->tw_state = TCP_TIME_WAIT; tw->tw_substate = state; @@ -177,7 +208,6 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat return tw; } - EXPORT_SYMBOL_GPL(inet_twsk_alloc); /* Returns non-zero if quota exceeded. */ @@ -185,7 +215,6 @@ static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr, const int slot) { struct inet_timewait_sock *tw; - struct hlist_node *node; unsigned int killed; int ret; @@ -198,7 +227,7 @@ static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr, killed = 0; ret = 0; rescan: - inet_twsk_for_each_inmate(tw, node, &twdr->cells[slot]) { + inet_twsk_for_each_inmate(tw, &twdr->cells[slot]) { __inet_twsk_del_dead_node(tw); spin_unlock(&twdr->death_lock); __inet_twsk_kill(tw, twdr->hashinfo); @@ -232,7 +261,7 @@ rescan: void inet_twdr_hangman(unsigned long data) { struct inet_timewait_death_row *twdr; - int unsigned need_timer; + unsigned int need_timer; twdr = (struct inet_timewait_death_row *)data; spin_lock(&twdr->death_lock); @@ -256,7 +285,6 @@ void inet_twdr_hangman(unsigned long data) out: spin_unlock(&twdr->death_lock); } - EXPORT_SYMBOL_GPL(inet_twdr_hangman); void inet_twdr_twkill_work(struct work_struct *work) @@ -287,7 +315,6 @@ void inet_twdr_twkill_work(struct work_struct *work) spin_unlock_bh(&twdr->death_lock); } } - EXPORT_SYMBOL_GPL(inet_twdr_twkill_work); /* These are always called from BH context. See callers in @@ -307,7 +334,6 @@ void inet_twsk_deschedule(struct inet_timewait_sock *tw, spin_unlock(&twdr->death_lock); __inet_twsk_kill(tw, twdr->hashinfo); } - EXPORT_SYMBOL(inet_twsk_deschedule); void inet_twsk_schedule(struct inet_timewait_sock *tw, @@ -360,11 +386,11 @@ void inet_twsk_schedule(struct inet_timewait_sock *tw, if (slot >= INET_TWDR_TWKILL_SLOTS) slot = INET_TWDR_TWKILL_SLOTS - 1; } - tw->tw_ttd = jiffies + timeo; + tw->tw_ttd = inet_tw_time_stamp() + timeo; slot = (twdr->slot + slot) & (INET_TWDR_TWKILL_SLOTS - 1); list = &twdr->cells[slot]; } else { - tw->tw_ttd = jiffies + (slot << INET_TWDR_RECYCLE_TICK); + tw->tw_ttd = inet_tw_time_stamp() + (slot << INET_TWDR_RECYCLE_TICK); if (twdr->twcal_hand < 0) { twdr->twcal_hand = 0; @@ -388,7 +414,6 @@ void inet_twsk_schedule(struct inet_timewait_sock *tw, mod_timer(&twdr->tw_timer, jiffies + twdr->period); spin_unlock(&twdr->death_lock); } - EXPORT_SYMBOL_GPL(inet_twsk_schedule); void inet_twdr_twcal_tick(unsigned long data) @@ -411,10 +436,10 @@ void inet_twdr_twcal_tick(unsigned long data) for (n = 0; n < INET_TWDR_RECYCLE_SLOTS; n++) { if (time_before_eq(j, now)) { - struct hlist_node *node, *safe; + struct hlist_node *safe; struct inet_timewait_sock *tw; - inet_twsk_for_each_inmate_safe(tw, node, safe, + inet_twsk_for_each_inmate_safe(tw, safe, &twdr->twcal_row[slot]) { __inet_twsk_del_dead_node(tw); __inet_twsk_kill(tw, twdr->hashinfo); @@ -449,7 +474,6 @@ out: #endif spin_unlock(&twdr->death_lock); } - EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick); void inet_twsk_purge(struct inet_hashinfo *hashinfo, @@ -465,7 +489,9 @@ void inet_twsk_purge(struct inet_hashinfo *hashinfo, restart_rcu: rcu_read_lock(); restart: - sk_nulls_for_each_rcu(sk, node, &head->twchain) { + sk_nulls_for_each_rcu(sk, node, &head->chain) { + if (sk->sk_state != TCP_TIME_WAIT) + continue; tw = inet_twsk(sk); if ((tw->tw_family != family) || atomic_read(&twsk_net(tw)->count)) @@ -481,7 +507,9 @@ restart: } rcu_read_unlock(); + local_bh_disable(); inet_twsk_deschedule(tw, twdr); + local_bh_enable(); inet_twsk_put(tw); goto restart_rcu; } |
