diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
| -rw-r--r-- | net/ipv4/tcp_input.c | 296 | 
1 files changed, 180 insertions, 116 deletions
| diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bef9f04c22b..52b5c2d0ecd 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -206,7 +206,7 @@ static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp)  		tp->ecn_flags |= TCP_ECN_QUEUE_CWR;  } -static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, struct sk_buff *skb) +static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb)  {  	if (tcp_hdr(skb)->cwr)  		tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; @@ -217,32 +217,41 @@ static inline void TCP_ECN_withdraw_cwr(struct tcp_sock *tp)  	tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;  } -static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb) +static inline void TCP_ECN_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)  { -	if (tp->ecn_flags & TCP_ECN_OK) { -		if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags)) -			tp->ecn_flags |= TCP_ECN_DEMAND_CWR; +	if (!(tp->ecn_flags & TCP_ECN_OK)) +		return; + +	switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) { +	case INET_ECN_NOT_ECT:  		/* Funny extension: if ECT is not set on a segment, -		 * it is surely retransmit. It is not in ECN RFC, -		 * but Linux follows this rule. */ -		else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags))) +		 * and we already seen ECT on a previous segment, +		 * it is probably a retransmit. +		 */ +		if (tp->ecn_flags & TCP_ECN_SEEN)  			tcp_enter_quickack_mode((struct sock *)tp); +		break; +	case INET_ECN_CE: +		tp->ecn_flags |= TCP_ECN_DEMAND_CWR; +		/* fallinto */ +	default: +		tp->ecn_flags |= TCP_ECN_SEEN;  	}  } -static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, struct tcphdr *th) +static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)  {  	if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))  		tp->ecn_flags &= ~TCP_ECN_OK;  } -static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, struct tcphdr *th) +static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th)  {  	if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))  		tp->ecn_flags &= ~TCP_ECN_OK;  } -static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th) +static inline int TCP_ECN_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th)  {  	if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))  		return 1; @@ -256,14 +265,11 @@ static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th)  static void tcp_fixup_sndbuf(struct sock *sk)  { -	int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 + -		     sizeof(struct sk_buff); +	int sndmem = SKB_TRUESIZE(tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER); -	if (sk->sk_sndbuf < 3 * sndmem) { -		sk->sk_sndbuf = 3 * sndmem; -		if (sk->sk_sndbuf > sysctl_tcp_wmem[2]) -			sk->sk_sndbuf = sysctl_tcp_wmem[2]; -	} +	sndmem *= TCP_INIT_CWND; +	if (sk->sk_sndbuf < sndmem) +		sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);  }  /* 2. Tuning advertised window (window_clamp, rcv_ssthresh) @@ -309,7 +315,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)  	return 0;  } -static void tcp_grow_window(struct sock *sk, struct sk_buff *skb) +static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)  {  	struct tcp_sock *tp = tcp_sk(sk); @@ -339,17 +345,24 @@ static void tcp_grow_window(struct sock *sk, struct sk_buff *skb)  static void tcp_fixup_rcvbuf(struct sock *sk)  { -	struct tcp_sock *tp = tcp_sk(sk); -	int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); +	u32 mss = tcp_sk(sk)->advmss; +	u32 icwnd = TCP_DEFAULT_INIT_RCVWND; +	int rcvmem; -	/* Try to select rcvbuf so that 4 mss-sized segments -	 * will fit to window and corresponding skbs will fit to our rcvbuf. -	 * (was 3; 4 is minimum to allow fast retransmit to work.) +	/* Limit to 10 segments if mss <= 1460, +	 * or 14600/mss segments, with a minimum of two segments.  	 */ -	while (tcp_win_from_space(rcvmem) < tp->advmss) +	if (mss > 1460) +		icwnd = max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2); + +	rcvmem = SKB_TRUESIZE(mss + MAX_TCP_HEADER); +	while (tcp_win_from_space(rcvmem) < mss)  		rcvmem += 128; -	if (sk->sk_rcvbuf < 4 * rcvmem) -		sk->sk_rcvbuf = min(4 * rcvmem, sysctl_tcp_rmem[2]); + +	rcvmem *= icwnd; + +	if (sk->sk_rcvbuf < rcvmem) +		sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]);  }  /* 4. Try to fixup all. It is made immediately after connection enters @@ -416,7 +429,7 @@ static void tcp_clamp_window(struct sock *sk)   */  void tcp_initialize_rcv_mss(struct sock *sk)  { -	struct tcp_sock *tp = tcp_sk(sk); +	const struct tcp_sock *tp = tcp_sk(sk);  	unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);  	hint = min(hint, tp->rcv_wnd / 2); @@ -531,8 +544,7 @@ void tcp_rcv_space_adjust(struct sock *sk)  			space /= tp->advmss;  			if (!space)  				space = 1; -			rcvmem = (tp->advmss + MAX_TCP_HEADER + -				  16 + sizeof(struct sk_buff)); +			rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);  			while (tcp_win_from_space(rcvmem) < tp->advmss)  				rcvmem += 128;  			space *= rcvmem; @@ -812,7 +824,7 @@ void tcp_update_metrics(struct sock *sk)  	}  } -__u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst) +__u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)  {  	__u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); @@ -880,6 +892,11 @@ static void tcp_init_metrics(struct sock *sk)  		tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH);  		if (tp->snd_ssthresh > tp->snd_cwnd_clamp)  			tp->snd_ssthresh = tp->snd_cwnd_clamp; +	} else { +		/* ssthresh may have been reduced unnecessarily during. +		 * 3WHS. Restore it back to its initial default. +		 */ +		tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;  	}  	if (dst_metric(dst, RTAX_REORDERING) &&  	    tp->reordering != dst_metric(dst, RTAX_REORDERING)) { @@ -887,10 +904,7 @@ static void tcp_init_metrics(struct sock *sk)  		tp->reordering = dst_metric(dst, RTAX_REORDERING);  	} -	if (dst_metric(dst, RTAX_RTT) == 0) -		goto reset; - -	if (!tp->srtt && dst_metric_rtt(dst, RTAX_RTT) < (TCP_TIMEOUT_INIT << 3)) +	if (dst_metric(dst, RTAX_RTT) == 0 || tp->srtt == 0)  		goto reset;  	/* Initial rtt is determined from SYN,SYN-ACK. @@ -916,19 +930,26 @@ static void tcp_init_metrics(struct sock *sk)  		tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));  	}  	tcp_set_rto(sk); -	if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) {  reset: -		/* Play conservative. If timestamps are not -		 * supported, TCP will fail to recalculate correct -		 * rtt, if initial rto is too small. FORGET ALL AND RESET! +	if (tp->srtt == 0) { +		/* RFC2988bis: We've failed to get a valid RTT sample from +		 * 3WHS. This is most likely due to retransmission, +		 * including spurious one. Reset the RTO back to 3secs +		 * from the more aggressive 1sec to avoid more spurious +		 * retransmission.  		 */ -		if (!tp->rx_opt.saw_tstamp && tp->srtt) { -			tp->srtt = 0; -			tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT; -			inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; -		} +		tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK; +		inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK;  	} -	tp->snd_cwnd = tcp_init_cwnd(tp, dst); +	/* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been +	 * retransmitted. In light of RFC2988bis' more aggressive 1sec +	 * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK +	 * retransmission has occurred. +	 */ +	if (tp->total_retrans > 1) +		tp->snd_cwnd = 1; +	else +		tp->snd_cwnd = tcp_init_cwnd(tp, dst);  	tp->snd_cwnd_stamp = tcp_time_stamp;  } @@ -1115,7 +1136,7 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack,  		return 0;  	/* ...Then it's D-SACK, and must reside below snd_una completely */ -	if (!after(end_seq, tp->snd_una)) +	if (after(end_seq, tp->snd_una))  		return 0;  	if (!before(start_seq, tp->undo_marker)) @@ -1195,7 +1216,7 @@ static void tcp_mark_lost_retrans(struct sock *sk)  		tp->lost_retrans_low = new_low_seq;  } -static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb, +static int tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,  			   struct tcp_sack_block_wire *sp, int num_sacks,  			   u32 prior_snd_una)  { @@ -1289,7 +1310,7 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,  	return in_sack;  } -static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, +static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk,  			  struct tcp_sacktag_state *state,  			  int dup_sack, int pcount)  { @@ -1380,9 +1401,7 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,  	BUG_ON(!pcount); -	/* Tweak before seqno plays */ -	if (!tcp_is_fack(tp) && tcp_is_sack(tp) && tp->lost_skb_hint && -	    !before(TCP_SKB_CB(tp->lost_skb_hint)->seq, TCP_SKB_CB(skb)->seq)) +	if (skb == tp->lost_skb_hint)  		tp->lost_cnt_hint += pcount;  	TCP_SKB_CB(prev)->end_seq += shifted; @@ -1431,7 +1450,7 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,  		tp->lost_cnt_hint -= tcp_skb_pcount(prev);  	} -	TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(prev)->flags; +	TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(prev)->tcp_flags;  	if (skb == tcp_highest_sack(sk))  		tcp_advance_highest_sack(sk, skb); @@ -1446,13 +1465,13 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,  /* I wish gso_size would have a bit more sane initialization than   * something-or-zero which complicates things   */ -static int tcp_skb_seglen(struct sk_buff *skb) +static int tcp_skb_seglen(const struct sk_buff *skb)  {  	return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb);  }  /* Shifting pages past head area doesn't work */ -static int skb_can_shift(struct sk_buff *skb) +static int skb_can_shift(const struct sk_buff *skb)  {  	return !skb_headlen(skb) && skb_is_nonlinear(skb);  } @@ -1701,19 +1720,19 @@ static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,  	return skb;  } -static int tcp_sack_cache_ok(struct tcp_sock *tp, struct tcp_sack_block *cache) +static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_block *cache)  {  	return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);  }  static int -tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, +tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,  			u32 prior_snd_una)  {  	const struct inet_connection_sock *icsk = inet_csk(sk);  	struct tcp_sock *tp = tcp_sk(sk); -	unsigned char *ptr = (skb_transport_header(ack_skb) + -			      TCP_SKB_CB(ack_skb)->sacked); +	const unsigned char *ptr = (skb_transport_header(ack_skb) + +				    TCP_SKB_CB(ack_skb)->sacked);  	struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);  	struct tcp_sack_block sp[TCP_NUM_SACKS];  	struct tcp_sack_block *cache; @@ -2277,7 +2296,7 @@ static int tcp_check_sack_reneging(struct sock *sk, int flag)  	return 0;  } -static inline int tcp_fackets_out(struct tcp_sock *tp) +static inline int tcp_fackets_out(const struct tcp_sock *tp)  {  	return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;  } @@ -2297,19 +2316,20 @@ static inline int tcp_fackets_out(struct tcp_sock *tp)   * they differ. Since neither occurs due to loss, TCP should really   * ignore them.   */ -static inline int tcp_dupack_heuristics(struct tcp_sock *tp) +static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)  {  	return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;  } -static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb) +static inline int tcp_skb_timedout(const struct sock *sk, +				   const struct sk_buff *skb)  {  	return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto;  } -static inline int tcp_head_timedout(struct sock *sk) +static inline int tcp_head_timedout(const struct sock *sk)  { -	struct tcp_sock *tp = tcp_sk(sk); +	const struct tcp_sock *tp = tcp_sk(sk);  	return tp->packets_out &&  	       tcp_skb_timedout(sk, tcp_write_queue_head(sk)); @@ -2620,7 +2640,7 @@ static void tcp_cwnd_down(struct sock *sk, int flag)  /* Nothing was retransmitted or returned timestamp is less   * than timestamp of the first retransmission.   */ -static inline int tcp_packet_delayed(struct tcp_sock *tp) +static inline int tcp_packet_delayed(const struct tcp_sock *tp)  {  	return !tp->retrans_stamp ||  		(tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && @@ -2681,7 +2701,7 @@ static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh)  	tp->snd_cwnd_stamp = tcp_time_stamp;  } -static inline int tcp_may_undo(struct tcp_sock *tp) +static inline int tcp_may_undo(const struct tcp_sock *tp)  {  	return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));  } @@ -2745,9 +2765,9 @@ static void tcp_try_undo_dsack(struct sock *sk)   * that successive retransmissions of a segment must not advance   * retrans_stamp under any conditions.   */ -static int tcp_any_retrans_done(struct sock *sk) +static int tcp_any_retrans_done(const struct sock *sk)  { -	struct tcp_sock *tp = tcp_sk(sk); +	const struct tcp_sock *tp = tcp_sk(sk);  	struct sk_buff *skb;  	if (tp->retrans_out) @@ -2821,9 +2841,13 @@ static int tcp_try_undo_loss(struct sock *sk)  static inline void tcp_complete_cwr(struct sock *sk)  {  	struct tcp_sock *tp = tcp_sk(sk); -	/* Do not moderate cwnd if it's already undone in cwr or recovery */ -	if (tp->undo_marker && tp->snd_cwnd > tp->snd_ssthresh) { -		tp->snd_cwnd = tp->snd_ssthresh; + +	/* Do not moderate cwnd if it's already undone in cwr or recovery. */ +	if (tp->undo_marker) { +		if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR) +			tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); +		else /* PRR */ +			tp->snd_cwnd = tp->snd_ssthresh;  		tp->snd_cwnd_stamp = tcp_time_stamp;  	}  	tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); @@ -2941,6 +2965,38 @@ void tcp_simple_retransmit(struct sock *sk)  }  EXPORT_SYMBOL(tcp_simple_retransmit); +/* This function implements the PRR algorithm, specifcally the PRR-SSRB + * (proportional rate reduction with slow start reduction bound) as described in + * http://www.ietf.org/id/draft-mathis-tcpm-proportional-rate-reduction-01.txt. + * It computes the number of packets to send (sndcnt) based on packets newly + * delivered: + *   1) If the packets in flight is larger than ssthresh, PRR spreads the + *	cwnd reductions across a full RTT. + *   2) If packets in flight is lower than ssthresh (such as due to excess + *	losses and/or application stalls), do not perform any further cwnd + *	reductions, but instead slow start up to ssthresh. + */ +static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked, +					int fast_rexmit, int flag) +{ +	struct tcp_sock *tp = tcp_sk(sk); +	int sndcnt = 0; +	int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); + +	if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) { +		u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered + +			       tp->prior_cwnd - 1; +		sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out; +	} else { +		sndcnt = min_t(int, delta, +			       max_t(int, tp->prr_delivered - tp->prr_out, +				     newly_acked_sacked) + 1); +	} + +	sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0)); +	tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt; +} +  /* Process an event, which can update packets-in-flight not trivially.   * Main goal of this function is to calculate new estimate for left_out,   * taking into account both packets sitting in receiver's buffer and @@ -2952,7 +3008,8 @@ EXPORT_SYMBOL(tcp_simple_retransmit);   * It does _not_ decide what to send, it is made in function   * tcp_xmit_retransmit_queue().   */ -static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) +static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, +				  int newly_acked_sacked, int flag)  {  	struct inet_connection_sock *icsk = inet_csk(sk);  	struct tcp_sock *tp = tcp_sk(sk); @@ -3102,22 +3159,27 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)  		tp->bytes_acked = 0;  		tp->snd_cwnd_cnt = 0; +		tp->prior_cwnd = tp->snd_cwnd; +		tp->prr_delivered = 0; +		tp->prr_out = 0;  		tcp_set_ca_state(sk, TCP_CA_Recovery);  		fast_rexmit = 1;  	}  	if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))  		tcp_update_scoreboard(sk, fast_rexmit); -	tcp_cwnd_down(sk, flag); +	tp->prr_delivered += newly_acked_sacked; +	tcp_update_cwnd_in_recovery(sk, newly_acked_sacked, fast_rexmit, flag);  	tcp_xmit_retransmit_queue(sk);  } -static void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt) +void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt)  {  	tcp_rtt_estimator(sk, seq_rtt);  	tcp_set_rto(sk);  	inet_csk(sk)->icsk_backoff = 0;  } +EXPORT_SYMBOL(tcp_valid_rtt_meas);  /* Read draft-ietf-tcplw-high-performance before mucking   * with this code. (Supersedes RFC1323) @@ -3184,7 +3246,7 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)   */  static void tcp_rearm_rto(struct sock *sk)  { -	struct tcp_sock *tp = tcp_sk(sk); +	const struct tcp_sock *tp = tcp_sk(sk);  	if (!tp->packets_out) {  		inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); @@ -3288,7 +3350,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,  		 * connection startup slow start one packet too  		 * quickly.  This is severely frowned upon behavior.  		 */ -		if (!(scb->flags & TCPHDR_SYN)) { +		if (!(scb->tcp_flags & TCPHDR_SYN)) {  			flag |= FLAG_DATA_ACKED;  		} else {  			flag |= FLAG_SYN_ACKED; @@ -3436,7 +3498,7 @@ static inline int tcp_may_update_window(const struct tcp_sock *tp,   * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2   * and in FreeBSD. NetBSD's one is even worse.) is wrong.   */ -static int tcp_ack_update_window(struct sock *sk, struct sk_buff *skb, u32 ack, +static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 ack,  				 u32 ack_seq)  {  	struct tcp_sock *tp = tcp_sk(sk); @@ -3612,7 +3674,7 @@ static int tcp_process_frto(struct sock *sk, int flag)  }  /* This routine deals with incoming acks, but not outgoing ones. */ -static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) +static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)  {  	struct inet_connection_sock *icsk = inet_csk(sk);  	struct tcp_sock *tp = tcp_sk(sk); @@ -3622,6 +3684,8 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)  	u32 prior_in_flight;  	u32 prior_fackets;  	int prior_packets; +	int prior_sacked = tp->sacked_out; +	int newly_acked_sacked = 0;  	int frto_cwnd = 0;  	/* If the ack is older than previous acks @@ -3693,6 +3757,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)  	/* See if we can take anything off of the retransmit queue. */  	flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); +	newly_acked_sacked = (prior_packets - prior_sacked) - +			     (tp->packets_out - tp->sacked_out); +  	if (tp->frto_counter)  		frto_cwnd = tcp_process_frto(sk, flag);  	/* Guarantee sacktag reordering detection against wrap-arounds */ @@ -3705,7 +3772,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)  		    tcp_may_raise_cwnd(sk, flag))  			tcp_cong_avoid(sk, ack, prior_in_flight);  		tcp_fastretrans_alert(sk, prior_packets - tp->packets_out, -				      flag); +				      newly_acked_sacked, flag);  	} else {  		if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)  			tcp_cong_avoid(sk, ack, prior_in_flight); @@ -3744,14 +3811,14 @@ old_ack:   * But, this can also be called on packets in the established flow when   * the fast version below fails.   */ -void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, -		       u8 **hvpp, int estab) +void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx, +		       const u8 **hvpp, int estab)  { -	unsigned char *ptr; -	struct tcphdr *th = tcp_hdr(skb); +	const unsigned char *ptr; +	const struct tcphdr *th = tcp_hdr(skb);  	int length = (th->doff * 4) - sizeof(struct tcphdr); -	ptr = (unsigned char *)(th + 1); +	ptr = (const unsigned char *)(th + 1);  	opt_rx->saw_tstamp = 0;  	while (length > 0) { @@ -3862,9 +3929,9 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,  }  EXPORT_SYMBOL(tcp_parse_options); -static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th) +static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr *th)  { -	__be32 *ptr = (__be32 *)(th + 1); +	const __be32 *ptr = (const __be32 *)(th + 1);  	if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)  			  | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) { @@ -3881,8 +3948,9 @@ static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th)  /* Fast parse options. This hopes to only see timestamps.   * If it is wrong it falls back on tcp_parse_options().   */ -static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, -				  struct tcp_sock *tp, u8 **hvpp) +static int tcp_fast_parse_options(const struct sk_buff *skb, +				  const struct tcphdr *th, +				  struct tcp_sock *tp, const u8 **hvpp)  {  	/* In the spirit of fast parsing, compare doff directly to constant  	 * values.  Because equality is used, short doff can be ignored here. @@ -3903,10 +3971,10 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,  /*   * Parse MD5 Signature option   */ -u8 *tcp_parse_md5sig_option(struct tcphdr *th) +const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)  { -	int length = (th->doff << 2) - sizeof (*th); -	u8 *ptr = (u8*)(th + 1); +	int length = (th->doff << 2) - sizeof(*th); +	const u8 *ptr = (const u8 *)(th + 1);  	/* If the TCP option is too short, we can short cut */  	if (length < TCPOLEN_MD5SIG) @@ -3983,8 +4051,8 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)  static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)  { -	struct tcp_sock *tp = tcp_sk(sk); -	struct tcphdr *th = tcp_hdr(skb); +	const struct tcp_sock *tp = tcp_sk(sk); +	const struct tcphdr *th = tcp_hdr(skb);  	u32 seq = TCP_SKB_CB(skb)->seq;  	u32 ack = TCP_SKB_CB(skb)->ack_seq; @@ -4023,7 +4091,7 @@ static inline int tcp_paws_discard(const struct sock *sk,   * (borrowed from freebsd)   */ -static inline int tcp_sequence(struct tcp_sock *tp, u32 seq, u32 end_seq) +static inline int tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)  {  	return	!before(end_seq, tp->rcv_wup) &&  		!after(seq, tp->rcv_nxt + tcp_receive_window(tp)); @@ -4068,7 +4136,7 @@ static void tcp_reset(struct sock *sk)   *   *	If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.   */ -static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th) +static void tcp_fin(struct sock *sk)  {  	struct tcp_sock *tp = tcp_sk(sk); @@ -4180,7 +4248,7 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)  		tcp_sack_extend(tp->duplicate_sack, seq, end_seq);  } -static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb) +static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)  {  	struct tcp_sock *tp = tcp_sk(sk); @@ -4339,7 +4407,7 @@ static void tcp_ofo_queue(struct sock *sk)  		__skb_queue_tail(&sk->sk_receive_queue, skb);  		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;  		if (tcp_hdr(skb)->fin) -			tcp_fin(skb, sk, tcp_hdr(skb)); +			tcp_fin(sk);  	}  } @@ -4367,7 +4435,7 @@ static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)  static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)  { -	struct tcphdr *th = tcp_hdr(skb); +	const struct tcphdr *th = tcp_hdr(skb);  	struct tcp_sock *tp = tcp_sk(sk);  	int eaten = -1; @@ -4421,7 +4489,7 @@ queue_and_out:  		if (skb->len)  			tcp_event_data_recv(sk, skb);  		if (th->fin) -			tcp_fin(skb, sk, th); +			tcp_fin(sk);  		if (!skb_queue_empty(&tp->out_of_order_queue)) {  			tcp_ofo_queue(sk); @@ -4851,9 +4919,9 @@ void tcp_cwnd_application_limited(struct sock *sk)  	tp->snd_cwnd_stamp = tcp_time_stamp;  } -static int tcp_should_expand_sndbuf(struct sock *sk) +static int tcp_should_expand_sndbuf(const struct sock *sk)  { -	struct tcp_sock *tp = tcp_sk(sk); +	const struct tcp_sock *tp = tcp_sk(sk);  	/* If the user specified a specific send buffer setting, do  	 * not modify it. @@ -4887,8 +4955,10 @@ static void tcp_new_space(struct sock *sk)  	struct tcp_sock *tp = tcp_sk(sk);  	if (tcp_should_expand_sndbuf(sk)) { -		int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) + -			MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); +		int sndmem = SKB_TRUESIZE(max_t(u32, +						tp->rx_opt.mss_clamp, +						tp->mss_cache) + +					  MAX_TCP_HEADER);  		int demanded = max_t(unsigned int, tp->snd_cwnd,  				     tp->reordering + 1);  		sndmem *= 2 * demanded; @@ -4960,7 +5030,7 @@ static inline void tcp_ack_snd_check(struct sock *sk)   *	either form (or just set the sysctl tcp_stdurg).   */ -static void tcp_check_urg(struct sock *sk, struct tcphdr *th) +static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)  {  	struct tcp_sock *tp = tcp_sk(sk);  	u32 ptr = ntohs(th->urg_ptr); @@ -5026,7 +5096,7 @@ static void tcp_check_urg(struct sock *sk, struct tcphdr *th)  }  /* This is the 'fast' part of urgent handling. */ -static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th) +static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th)  {  	struct tcp_sock *tp = tcp_sk(sk); @@ -5147,9 +5217,9 @@ out:   * play significant role here.   */  static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, -			      struct tcphdr *th, int syn_inerr) +			      const struct tcphdr *th, int syn_inerr)  { -	u8 *hash_location; +	const u8 *hash_location;  	struct tcp_sock *tp = tcp_sk(sk);  	/* RFC1323: H1. Apply PAWS check first. */ @@ -5230,7 +5300,7 @@ discard:   *	tcp_data_queue when everything is OK.   */  int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, -			struct tcphdr *th, unsigned len) +			const struct tcphdr *th, unsigned int len)  {  	struct tcp_sock *tp = tcp_sk(sk);  	int res; @@ -5441,9 +5511,9 @@ discard:  EXPORT_SYMBOL(tcp_rcv_established);  static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, -					 struct tcphdr *th, unsigned len) +					 const struct tcphdr *th, unsigned int len)  { -	u8 *hash_location; +	const u8 *hash_location;  	struct inet_connection_sock *icsk = inet_csk(sk);  	struct tcp_sock *tp = tcp_sk(sk);  	struct tcp_cookie_values *cvp = tp->cookie_values; @@ -5718,7 +5788,7 @@ reset_and_undo:   */  int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, -			  struct tcphdr *th, unsigned len) +			  const struct tcphdr *th, unsigned int len)  {  	struct tcp_sock *tp = tcp_sk(sk);  	struct inet_connection_sock *icsk = inet_csk(sk); @@ -5806,12 +5876,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,  					      tp->rx_opt.snd_wscale;  				tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); -				/* tcp_ack considers this ACK as duplicate -				 * and does not calculate rtt. -				 * Force it here. -				 */ -				tcp_ack_update_rtt(sk, 0, 0); -  				if (tp->rx_opt.tstamp_ok)  					tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; | 
