/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* Implementation of the Transmission Control Protocol(TCP).
*
* Version: $Id: tcp_minisocks.c,v 1.15 2002/02/01 22:01:04 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Mark Evans, <evansmp@uhura.aston.ac.uk>
* Corey Minyard <wf-rch!minyard@relay.EU.net>
* Florian La Roche, <flla@stud.uni-sb.de>
* Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
* Linus Torvalds, <torvalds@cs.helsinki.fi>
* Alan Cox, <gw4pts@gw4pts.ampr.org>
* Matthew Dillon, <dillon@apollo.west.oic.com>
* Arnt Gulbrandsen, <agulbra@nvg.unit.no>
* Jorge Cwik, <jorge@laser.satlink.net>
*/
#include <linux/config.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/sysctl.h>
#include <linux/workqueue.h>
#include <net/tcp.h>
#include <net/inet_common.h>
#include <net/xfrm.h>
#ifdef CONFIG_SYSCTL
#define SYNC_INIT 0 /* let the user enable it */
#else
#define SYNC_INIT 1
#endif
int sysctl_tcp_tw_recycle;
int sysctl_tcp_max_tw_buckets = NR_FILE*2;
int sysctl_tcp_syncookies = SYNC_INIT;
int sysctl_tcp_abort_on_overflow;
static void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo);
static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
{
if (seq == s_win)
return 1;
if (after(end_seq, s_win) && before(seq, e_win))
return 1;
return (seq == e_win && seq == end_seq);
}
/* New-style handling of TIME_WAIT sockets. */
int tcp_tw_count;
/* Must be called with locally disabled BHs. */
static void tcp_timewait_kill(struct tcp_tw_bucket *tw)
{
struct tcp_ehash_bucket *ehead;
struct tcp_bind_hashbucket *bhead;
struct tcp_bind_bucket *tb;
/* Unlink from established hashes. */
ehead = &tcp_ehash[tw->tw_hashent];
write_lock(&ehead->lock);
if (hlist_unhashed(&tw->tw_node)) {
write_unlock(&ehead->lock);
return;
}
__hlist_del(&tw->tw_node);
sk_node_init(&tw->tw_node);
write_unlock(&ehead->lock);
/* Disassociate with bind bucket. */
bhead = &tcp_bhash[tcp_bhashfn(tw->tw_num)];
spin_lock(&bhead->lock);
tb = tw->tw_tb;
__hlist_del(&tw->tw_bind_node);
tw->tw_tb = NULL;
tcp_bucket_destroy(tb);
spin_unlock(&bhead->lock);
#ifdef INET_REFCNT_DEBUG
if (atomic_read(&tw->tw_refcnt) != 1) {
printk(KERN_DEBUG "tw_bucket %p refcnt=%d\n", tw,
atomic_read(&tw->tw_refcnt));
}
#endif
tcp_tw_put(tw);
}
/*
* * Main purpose of TIME-WAIT state is to close connection gracefully,
* when one of ends sits in LAST-ACK or CLOSING retransmitting FIN
* (and, probably, tail of data) and one or more our ACKs are lost.
* * What is TIME-WAIT timeout? It is associated with maximal packet
* lifetime in the internet, which results in wrong conclusion, that
* it is set to catch "old duplicate segments" wandering out of their path.
* It is not quite correct. This timeout is calculated so that it exceeds
* maximal retransmission timeout enough to allow to lose one (or more)
* segments sent by peer and our ACKs. This time may be calculated from RTO.
* * When TIME-WAIT socket receives RST, it means that another end
* finally closed and we are allowed to kill TIME-WAIT too.
* * Second purpose of TIME-WAIT is catching old duplicate segments.
* Well, certainly it is pure paranoia, but if we load TIME-WAIT
* with this semantics, we MUST NOT kill TIME-WAIT state with RSTs.
* * If we invented some more clever way to catch duplicates
* (f.e. based on PAWS), we could truncate TIME-WAIT to several RTOs.
*
* The algorithm below is based on FORMAL INTERPRETATION of RFCs.
* When you compare it to RFCs, please, read section SEGMENT ARRIVES
* from the very beginning.
*
* NOTE. With recycling (and later with fin-wait-2) TW bucket
* is _not_ stateless. It means, that strictly speaking we must
* spinlock it. I do not want! Well, probability of misbehaviour
* is ridiculously low and, seems, we could use some mb() tricks
* to avoid misread sequence numbers, states etc. --ANK
*/
enum tcp_tw_status
tcp_timewait_state_process(struct tcp_tw_bucket *tw, struct sk_buff *skb,
struct tcphdr *th, unsigned len)
{
struct tcp_options_received tmp_opt;
int paws_reject = 0;
tmp_opt.saw_tstamp = 0;
if (th->doff > (sizeof(struct tcphdr) >> 2) && tw->tw_ts_recent_stamp) {
tcp_parse_options(skb, &tmp_opt, 0);
if (tmp_opt.saw_tstamp) {
tmp_opt.ts_recent = tw->tw_ts_recent;
tmp_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
paws_reject = tcp_paws_check(&tmp_opt, th->rst);
}
}
if (tw->tw_substate == TCP_FIN_WAIT2) {
/* Just repeat all the checks of tcp_rcv_state_process() */
/* Out of window, send ACK */
if (paws_reject ||
!tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
tw->tw_rcv_nxt,
tw->tw_rcv_nxt + tw->