diff options
Diffstat (limited to 'net/ipv4/tcp_timer.c')
| -rw-r--r-- | net/ipv4/tcp_timer.c | 169 | 
1 files changed, 109 insertions, 60 deletions
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 74a6aa00365..286227abed1 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -32,17 +32,6 @@ int sysctl_tcp_retries2 __read_mostly = TCP_RETR2;  int sysctl_tcp_orphan_retries __read_mostly;  int sysctl_tcp_thin_linear_timeouts __read_mostly; -static void tcp_write_timer(unsigned long); -static void tcp_delack_timer(unsigned long); -static void tcp_keepalive_timer (unsigned long data); - -void tcp_init_xmit_timers(struct sock *sk) -{ -	inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, -				  &tcp_keepalive_timer); -} -EXPORT_SYMBOL(tcp_init_xmit_timers); -  static void tcp_write_err(struct sock *sk)  {  	sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; @@ -77,10 +66,7 @@ static int tcp_out_of_resources(struct sock *sk, int do_reset)  	if (sk->sk_err_soft)  		shift++; -	if (tcp_too_many_orphans(sk, shift)) { -		if (net_ratelimit()) -			printk(KERN_INFO "Out of socket memory\n"); - +	if (tcp_check_oom(sk, shift)) {  		/* Catch exceptional cases, when connection requires reset.  		 *      1. Last segment was sent recently. */  		if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN || @@ -170,14 +156,21 @@ static bool retransmits_timed_out(struct sock *sk,  static int tcp_write_timeout(struct sock *sk)  {  	struct inet_connection_sock *icsk = inet_csk(sk); +	struct tcp_sock *tp = tcp_sk(sk);  	int retry_until; -	bool do_reset, syn_set = 0; +	bool do_reset, syn_set = false;  	if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { -		if (icsk->icsk_retransmits) +		if (icsk->icsk_retransmits) {  			dst_negative_advice(sk); +			if (tp->syn_fastopen || tp->syn_data) +				tcp_fastopen_cache_set(sk, 0, NULL, true); +			if (tp->syn_data) +				NET_INC_STATS_BH(sock_net(sk), +						 LINUX_MIB_TCPFASTOPENACTIVEFAIL); +		}  		retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; -		syn_set = 1; +		syn_set = true;  	} else {  		if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) {  			/* Black hole detection */ @@ -208,21 +201,11 @@ static int tcp_write_timeout(struct sock *sk)  	return 0;  } -static void tcp_delack_timer(unsigned long data) +void tcp_delack_timer_handler(struct sock *sk)  { -	struct sock *sk = (struct sock *)data;  	struct tcp_sock *tp = tcp_sk(sk);  	struct inet_connection_sock *icsk = inet_csk(sk); -	bh_lock_sock(sk); -	if (sock_owned_by_user(sk)) { -		/* Try again later. */ -		icsk->icsk_ack.blocked = 1; -		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); -		sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN); -		goto out_unlock; -	} -  	sk_mem_reclaim_partial(sk);  	if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) @@ -259,12 +242,26 @@ static void tcp_delack_timer(unsigned long data)  		tcp_send_ack(sk);  		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKS);  	} -	TCP_CHECK_TIMER(sk);  out: -	if (tcp_memory_pressure) +	if (sk_under_memory_pressure(sk))  		sk_mem_reclaim(sk); -out_unlock: +} + +static void tcp_delack_timer(unsigned long data) +{ +	struct sock *sk = (struct sock *)data; + +	bh_lock_sock(sk); +	if (!sock_owned_by_user(sk)) { +		tcp_delack_timer_handler(sk); +	} else { +		inet_csk(sk)->icsk_ack.blocked = 1; +		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); +		/* deleguate our work to tcp_release_cb() */ +		if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags)) +			sock_hold(sk); +	}  	bh_unlock_sock(sk);  	sock_put(sk);  } @@ -315,6 +312,35 @@ static void tcp_probe_timer(struct sock *sk)  }  /* + *	Timer for Fast Open socket to retransmit SYNACK. Note that the + *	sk here is the child socket, not the parent (listener) socket. + */ +static void tcp_fastopen_synack_timer(struct sock *sk) +{ +	struct inet_connection_sock *icsk = inet_csk(sk); +	int max_retries = icsk->icsk_syn_retries ? : +	    sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */ +	struct request_sock *req; + +	req = tcp_sk(sk)->fastopen_rsk; +	req->rsk_ops->syn_ack_timeout(sk, req); + +	if (req->num_timeout >= max_retries) { +		tcp_write_err(sk); +		return; +	} +	/* XXX (TFO) - Unlike regular SYN-ACK retransmit, we ignore error +	 * returned from rtx_syn_ack() to make it more persistent like +	 * regular retransmit because if the child socket has been accepted +	 * it's not good to give up too easily. +	 */ +	inet_rtx_syn_ack(sk, req); +	req->num_timeout++; +	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, +			  TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX); +} + +/*   *	The TCP retransmit timer.   */ @@ -323,11 +349,22 @@ void tcp_retransmit_timer(struct sock *sk)  	struct tcp_sock *tp = tcp_sk(sk);  	struct inet_connection_sock *icsk = inet_csk(sk); +	if (tp->fastopen_rsk) { +		WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && +			     sk->sk_state != TCP_FIN_WAIT1); +		tcp_fastopen_synack_timer(sk); +		/* Before we receive ACK to our SYN-ACK don't retransmit +		 * anything else (e.g., data or FIN segments). +		 */ +		return; +	}  	if (!tp->packets_out)  		goto out;  	WARN_ON(tcp_write_queue_empty(sk)); +	tp->tlp_high_seq = 0; +  	if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&  	    !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {  		/* Receiver dastardly shrinks window. Our retransmits @@ -335,22 +372,21 @@ void tcp_retransmit_timer(struct sock *sk)  		 * connection. If the socket is an orphan, time it out,  		 * we cannot allow such beasts to hang infinitely.  		 */ -#ifdef TCP_DEBUG  		struct inet_sock *inet = inet_sk(sk);  		if (sk->sk_family == AF_INET) { -			LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", -			       &inet->inet_daddr, ntohs(inet->inet_dport), -			       inet->inet_num, tp->snd_una, tp->snd_nxt); +			LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n"), +				       &inet->inet_daddr, +				       ntohs(inet->inet_dport), inet->inet_num, +				       tp->snd_una, tp->snd_nxt);  		} -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6)  		else if (sk->sk_family == AF_INET6) { -			struct ipv6_pinfo *np = inet6_sk(sk); -			LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", -			       &np->daddr, ntohs(inet->inet_dport), -			       inet->inet_num, tp->snd_una, tp->snd_nxt); +			LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n"), +				       &sk->sk_v6_daddr, +				       ntohs(inet->inet_dport), inet->inet_num, +				       tp->snd_una, tp->snd_nxt);  		}  #endif -#endif  		if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) {  			tcp_write_err(sk);  			goto out; @@ -386,11 +422,7 @@ void tcp_retransmit_timer(struct sock *sk)  		NET_INC_STATS_BH(sock_net(sk), mib_idx);  	} -	if (tcp_use_frto(sk)) { -		tcp_enter_frto(sk); -	} else { -		tcp_enter_loss(sk, 0); -	} +	tcp_enter_loss(sk, 0);  	if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk)) > 0) {  		/* Retransmission failed because of local congestion, @@ -449,19 +481,11 @@ out_reset_timer:  out:;  } -static void tcp_write_timer(unsigned long data) +void tcp_write_timer_handler(struct sock *sk)  { -	struct sock *sk = (struct sock *)data;  	struct inet_connection_sock *icsk = inet_csk(sk);  	int event; -	bh_lock_sock(sk); -	if (sock_owned_by_user(sk)) { -		/* Try again later */ -		sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + (HZ / 20)); -		goto out_unlock; -	} -  	if (sk->sk_state == TCP_CLOSE || !icsk->icsk_pending)  		goto out; @@ -471,21 +495,40 @@ static void tcp_write_timer(unsigned long data)  	}  	event = icsk->icsk_pending; -	icsk->icsk_pending = 0;  	switch (event) { +	case ICSK_TIME_EARLY_RETRANS: +		tcp_resume_early_retransmit(sk); +		break; +	case ICSK_TIME_LOSS_PROBE: +		tcp_send_loss_probe(sk); +		break;  	case ICSK_TIME_RETRANS: +		icsk->icsk_pending = 0;  		tcp_retransmit_timer(sk);  		break;  	case ICSK_TIME_PROBE0: +		icsk->icsk_pending = 0;  		tcp_probe_timer(sk);  		break;  	} -	TCP_CHECK_TIMER(sk);  out:  	sk_mem_reclaim(sk); -out_unlock: +} + +static void tcp_write_timer(unsigned long data) +{ +	struct sock *sk = (struct sock *)data; + +	bh_lock_sock(sk); +	if (!sock_owned_by_user(sk)) { +		tcp_write_timer_handler(sk); +	} else { +		/* deleguate our work to tcp_release_cb() */ +		if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags)) +			sock_hold(sk); +	}  	bh_unlock_sock(sk);  	sock_put(sk);  } @@ -589,7 +632,6 @@ static void tcp_keepalive_timer (unsigned long data)  		elapsed = keepalive_time_when(tp) - elapsed;  	} -	TCP_CHECK_TIMER(sk);  	sk_mem_reclaim(sk);  resched: @@ -603,3 +645,10 @@ out:  	bh_unlock_sock(sk);  	sock_put(sk);  } + +void tcp_init_xmit_timers(struct sock *sk) +{ +	inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, +				  &tcp_keepalive_timer); +} +EXPORT_SYMBOL(tcp_init_xmit_timers);  | 
