From b9d86585dc6c9265aa373c7036458fe8aa7627c6 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Thu, 15 Nov 2007 19:33:31 -0800 Subject: [TCP]: Move !in_sack test earlier in sacktag & reorganize if()s MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All intermediate conditions include it already, make them simpler as well. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b39f0d86e44..901240f4068 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1406,28 +1406,25 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ if (unlikely(in_sack < 0)) break; + if (!in_sack) { + fack_count += tcp_skb_pcount(skb); + continue; + } + sacked = TCP_SKB_CB(skb)->sacked; /* Account D-SACK for retransmitted packet. */ - if ((dup_sack && in_sack) && - (sacked & TCPCB_RETRANS) && - after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) - tp->undo_retrans--; - - /* The frame is ACKed. */ - if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) { - if (sacked&TCPCB_RETRANS) { - if ((dup_sack && in_sack) && - (sacked&TCPCB_SACKED_ACKED)) - reord = min(fack_count, reord); - } - - /* Nothing to do; acked frame is about to be dropped. */ - fack_count += tcp_skb_pcount(skb); - continue; + if (dup_sack && (sacked & TCPCB_RETRANS)) { + if (after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) + tp->undo_retrans--; + if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una) && + (sacked & TCPCB_SACKED_ACKED)) + reord = min(fack_count, reord); } - if (!in_sack) { + + /* Nothing to do; acked frame is about to be dropped (was ACKed). */ + if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) { fack_count += tcp_skb_pcount(skb); continue; } -- cgit v1.2.3-70-g09d2 From f577111302677e6d1448475821cc19ba8835f60e Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Thu, 15 Nov 2007 19:35:11 -0800 Subject: [TCP]: Extend reordering detection to cover CA_Loss partially MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This implements more accurately what is stated in sacktag's overall comment: "Both of these heuristics are not used in Loss state, when we cannot account for retransmits accurately." When CA_Loss state is entered, the state changer ensures that undo_marker is only set if no TCPCB_RETRANS skbs were found, thus having non-zero undo_marker in CA_Loss basically tells that the R-bits still accurately reflect the current state of TCP. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 901240f4068..26713e5d89d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1511,7 +1511,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ tcp_verify_left_out(tp); - if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss && + if ((reord < tp->fackets_out) && + ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) && (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark))) tcp_update_reordering(sk, tp->fackets_out - reord, 0); -- cgit v1.2.3-70-g09d2 From 85cc391c0e4584db594bfc4005c63c07c76c5077 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Thu, 15 Nov 2007 19:39:31 -0800 Subject: [TCP]: non-FACK SACK follows conservative SACK loss recovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Many assumptions that are true when no reordering or other strange events happen are not a part of the RFC3517. FACK implementation is based on such assumptions. Previously (before the rewrite) the non-FACK SACK was basically doing fast rexmit and then it times out all skbs when first cumulative ACK arrives, which cannot really be called SACK based recovery :-). RFC3517 SACK disables these things: - Per SKB timeouts & head timeout entry to recovery - Marking at least one skb while in recovery (RFC3517 does this only for the fast retransmission but not for the other skbs when cumulative ACKs arrive in the recovery) - Sacktag's loss detection flavors B and C (see comment before tcp_sacktag_write_queue) This does not implement the "last resort" rule 3 of NextSeg, which allows retransmissions also when not enough SACK blocks have yet arrived above a segment for IsLost to return true [RFC3517]. The implementation differs from RFC3517 in these points: - Rate-halving is used instead of FlightSize / 2 - Instead of using dupACKs to trigger the recovery, the number of SACK blocks is used as FACK does with SACK blocks+holes (which provides more accurate number). It seems that the difference can affect negatively only if the receiver does not generate SACK blocks at all even though it claimed to be SACK-capable. - Dupthresh is not a constant one. Dynamical adjustments include both holes and sacked segments (equal to what FACK has) due to complexity involved in determining the number sacked blocks between highest_sack and the reordered segment. Thus it's will be an over-estimate. Implementation note: tcp_clean_rtx_queue doesn't need a lost_cnt tweak because head skb at that point cannot be SACKED_ACKED (nor would such situation last for long enough to cause problems). Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 80 ++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 62 insertions(+), 18 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 26713e5d89d..c0e8f2b1fa7 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -863,6 +863,9 @@ void tcp_enter_cwr(struct sock *sk, const int set_ssthresh) */ static void tcp_disable_fack(struct tcp_sock *tp) { + /* RFC3517 uses different metric in lost marker => reset on change */ + if (tcp_is_fack(tp)) + tp->lost_skb_hint = NULL; tp->rx_opt.sack_ok &= ~2; } @@ -1470,6 +1473,13 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ tp->sacked_out += tcp_skb_pcount(skb); fack_count += tcp_skb_pcount(skb); + + /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ + if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) && + before(TCP_SKB_CB(skb)->seq, + TCP_SKB_CB(tp->lost_skb_hint)->seq)) + tp->lost_cnt_hint += tcp_skb_pcount(skb); + if (fack_count > tp->fackets_out) tp->fackets_out = fack_count; @@ -1504,7 +1514,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ flag &= ~FLAG_ONLY_ORIG_SACKED; } - if (tp->retrans_out && + if (tcp_is_fack(tp) && tp->retrans_out && after(highest_sack_end_seq, tp->lost_retrans_low) && icsk->icsk_ca_state == TCP_CA_Recovery) flag |= tcp_mark_lost_retrans(sk, highest_sack_end_seq); @@ -1858,6 +1868,26 @@ static inline int tcp_fackets_out(struct tcp_sock *tp) return tcp_is_reno(tp) ? tp->sacked_out+1 : tp->fackets_out; } +/* Heurestics to calculate number of duplicate ACKs. There's no dupACKs + * counter when SACK is enabled (without SACK, sacked_out is used for + * that purpose). + * + * Instead, with FACK TCP uses fackets_out that includes both SACKed + * segments up to the highest received SACK block so far and holes in + * between them. + * + * With reordering, holes may still be in flight, so RFC3517 recovery + * uses pure sacked_out (total number of SACKed segments) even though + * it violates the RFC that uses duplicate ACKs, often these are equal + * but when e.g. out-of-window ACKs or packet duplication occurs, + * they differ. Since neither occurs due to loss, TCP should really + * ignore them. + */ +static inline int tcp_dupack_heurestics(struct tcp_sock *tp) +{ + return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1; +} + static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb) { return (tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto); @@ -1978,13 +2008,13 @@ static int tcp_time_to_recover(struct sock *sk) return 1; /* Not-A-Trick#2 : Classic rule... */ - if (tcp_fackets_out(tp) > tp->reordering) + if (tcp_dupack_heurestics(tp) > tp->reordering) return 1; /* Trick#3 : when we use RFC2988 timer restart, fast * retransmit can be triggered by timeout of queue head. */ - if (tcp_head_timedout(sk)) + if (tcp_is_fack(tp) && tcp_head_timedout(sk)) return 1; /* Trick#4: It is still not OK... But will it be useful to delay @@ -2017,8 +2047,10 @@ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, tp->retransmit_skb_hint = NULL; } -/* Mark head of queue up as lost. */ -static void tcp_mark_head_lost(struct sock *sk, int packets) +/* Mark head of queue up as lost. With RFC3517 SACK, the packets is + * is against sacked "cnt", otherwise it's against facked "cnt" + */ +static void tcp_mark_head_lost(struct sock *sk, int packets, int fast_rexmit) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; @@ -2040,8 +2072,13 @@ static void tcp_mark_head_lost(struct sock *sk, int packets) /* this is not the most efficient way to do this... */ tp->lost_skb_hint = skb; tp->lost_cnt_hint = cnt; - cnt += tcp_skb_pcount(skb); - if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, tp->high_seq)) + + if (tcp_is_fack(tp) || + (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) + cnt += tcp_skb_pcount(skb); + + if (((!fast_rexmit || (tp->lost_out > 0)) && (cnt > packets)) || + after(TCP_SKB_CB(skb)->end_seq, tp->high_seq)) break; if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; @@ -2054,17 +2091,22 @@ static void tcp_mark_head_lost(struct sock *sk, int packets) /* Account newly detected lost packet(s) */ -static void tcp_update_scoreboard(struct sock *sk) +static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit) { struct tcp_sock *tp = tcp_sk(sk); - if (tcp_is_fack(tp)) { + if (tcp_is_reno(tp)) { + tcp_mark_head_lost(sk, 1, fast_rexmit); + } else if (tcp_is_fack(tp)) { int lost = tp->fackets_out - tp->reordering; if (lost <= 0) lost = 1; - tcp_mark_head_lost(sk, lost); + tcp_mark_head_lost(sk, lost, fast_rexmit); } else { - tcp_mark_head_lost(sk, 1); + int sacked_upto = tp->sacked_out - tp->reordering; + if (sacked_upto < 0) + sacked_upto = 0; + tcp_mark_head_lost(sk, sacked_upto, fast_rexmit); } /* New heuristics: it is possible only after we switched @@ -2072,7 +2114,7 @@ static void tcp_update_scoreboard(struct sock *sk) * Hence, we can detect timed out packets during fast * retransmit without falling to slow start. */ - if (!tcp_is_reno(tp) && tcp_head_timedout(sk)) { + if (tcp_is_fack(tp) && tcp_head_timedout(sk)) { struct sk_buff *skb; skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint @@ -2245,7 +2287,7 @@ static int tcp_try_undo_partial(struct sock *sk, int acked) { struct tcp_sock *tp = tcp_sk(sk); /* Partial ACK arrived. Force Hoe's retransmit. */ - int failed = tcp_is_reno(tp) || tp->fackets_out>tp->reordering; + int failed = tcp_is_reno(tp) || (tcp_fackets_out(tp) > tp->reordering); if (tcp_may_undo(tp)) { /* Plain luck! Hole if filled with delayed @@ -2379,7 +2421,8 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) struct tcp_sock *tp = tcp_sk(sk); int is_dupack = !(flag&(FLAG_SND_UNA_ADVANCED|FLAG_NOT_DUP)); int do_lost = is_dupack || ((flag&FLAG_DATA_SACKED) && - (tp->fackets_out > tp->reordering)); + (tcp_fackets_out(tp) > tp->reordering)); + int fast_rexmit = 0; /* Some technical things: * 1. Reno does not count dupacks (sacked_out) automatically. */ @@ -2399,11 +2442,11 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) return; /* C. Process data loss notification, provided it is valid. */ - if ((flag&FLAG_DATA_LOST) && + if (tcp_is_fack(tp) && (flag & FLAG_DATA_LOST) && before(tp->snd_una, tp->high_seq) && icsk->icsk_ca_state != TCP_CA_Open && tp->fackets_out > tp->reordering) { - tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering); + tcp_mark_head_lost(sk, tp->fackets_out-tp->reordering, 0); NET_INC_STATS_BH(LINUX_MIB_TCPLOSS); } @@ -2522,10 +2565,11 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) tp->bytes_acked = 0; tp->snd_cwnd_cnt = 0; tcp_set_ca_state(sk, TCP_CA_Recovery); + fast_rexmit = 1; } - if (do_lost || tcp_head_timedout(sk)) - tcp_update_scoreboard(sk); + if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk))) + tcp_update_scoreboard(sk, fast_rexmit); tcp_cwnd_down(sk, flag); tcp_xmit_retransmit_queue(sk); } -- cgit v1.2.3-70-g09d2 From a47e5a988a575e64c8c9bae65a1dfe3dca7cba32 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Thu, 15 Nov 2007 19:41:46 -0800 Subject: [TCP]: Convert highest_sack to sk_buff to allow direct access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is going to replace the sack fastpath hint quite soon... :-) Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/linux/tcp.h | 6 ++++-- include/net/tcp.h | 10 ++++++++++ net/ipv4/tcp_input.c | 12 ++++++------ net/ipv4/tcp_output.c | 19 ++++++++++--------- 4 files changed, 30 insertions(+), 17 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index bac17c59b24..34acee66223 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -332,8 +332,10 @@ struct tcp_sock { struct tcp_sack_block_wire recv_sack_cache[4]; - u32 highest_sack; /* Start seq of globally highest revd SACK - * (validity guaranteed only if sacked_out > 0) */ + struct sk_buff *highest_sack; /* highest skb with SACK received + * (validity guaranteed only if + * sacked_out > 0) + */ /* from STCP, retrans queue hinting */ struct sk_buff* lost_skb_hint; diff --git a/include/net/tcp.h b/include/net/tcp.h index d893b448076..0ede804b16d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1312,6 +1312,16 @@ static inline int tcp_write_queue_empty(struct sock *sk) return skb_queue_empty(&sk->sk_write_queue); } +/* Start sequence of the highest skb with SACKed bit, valid only if + * sacked > 0 or when the caller has ensured validity by itself. + */ +static inline u32 tcp_highest_sack_seq(struct tcp_sock *tp) +{ + if (!tp->sacked_out) + return tp->snd_una; + return TCP_SKB_CB(tp->highest_sack)->seq; +} + /* /proc */ enum tcp_seq_states { TCP_SEQ_STATE_LISTENING, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c0e8f2b1fa7..31294b52ad4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1245,7 +1245,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; int reord = tp->packets_out; int prior_fackets; - u32 highest_sack_end_seq = tp->lost_retrans_low; + u32 highest_sack_end_seq; int flag = 0; int found_dup_sack = 0; int cached_fack_count; @@ -1256,7 +1256,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ if (!tp->sacked_out) { if (WARN_ON(tp->fackets_out)) tp->fackets_out = 0; - tp->highest_sack = tp->snd_una; + tp->highest_sack = tcp_write_queue_head(sk); } prior_fackets = tp->fackets_out; @@ -1483,10 +1483,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ if (fack_count > tp->fackets_out) tp->fackets_out = fack_count; - if (after(TCP_SKB_CB(skb)->seq, tp->highest_sack)) { - tp->highest_sack = TCP_SKB_CB(skb)->seq; - highest_sack_end_seq = TCP_SKB_CB(skb)->end_seq; - } + if (after(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) + tp->highest_sack = skb; + } else { if (dup_sack && (sacked&TCPCB_RETRANS)) reord = min(fack_count, reord); @@ -1514,6 +1513,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ flag &= ~FLAG_ONLY_ORIG_SACKED; } + highest_sack_end_seq = TCP_SKB_CB(tp->highest_sack)->end_seq; if (tcp_is_fack(tp) && tp->retrans_out && after(highest_sack_end_seq, tp->lost_retrans_low) && icsk->icsk_ca_state == TCP_CA_Recovery) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f4c1eef89af..ce506af5ce0 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -657,13 +657,15 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned * tweak SACK fastpath hint too as it would overwrite all changes unless * hint is also changed. */ -static void tcp_adjust_fackets_out(struct tcp_sock *tp, struct sk_buff *skb, +static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb, int decr) { + struct tcp_sock *tp = tcp_sk(sk); + if (!tp->sacked_out || tcp_is_reno(tp)) return; - if (!before(tp->highest_sack, TCP_SKB_CB(skb)->seq)) + if (!before(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq)) tp->fackets_out -= decr; /* cnt_hint is "off-by-one" compared with fackets_out (see sacktag) */ @@ -712,9 +714,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq; TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq; - if (tcp_is_sack(tp) && tp->sacked_out && - (TCP_SKB_CB(skb)->seq == tp->highest_sack)) - tp->highest_sack = TCP_SKB_CB(buff)->seq; + if (tcp_is_sack(tp) && tp->sacked_out && (skb == tp->highest_sack)) + tp->highest_sack = buff; /* PSH and FIN should only be set in the second packet. */ flags = TCP_SKB_CB(skb)->flags; @@ -772,7 +773,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss tcp_dec_pcount_approx_int(&tp->sacked_out, diff); tcp_verify_left_out(tp); } - tcp_adjust_fackets_out(tp, skb, diff); + tcp_adjust_fackets_out(sk, skb, diff); } /* Link BUFF into the send queue. */ @@ -1712,7 +1713,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m tcp_skb_pcount(next_skb) != 1); if (WARN_ON(tcp_is_sack(tp) && tp->sacked_out && - (TCP_SKB_CB(next_skb)->seq == tp->highest_sack))) + (next_skb == tp->highest_sack))) return; /* Ok. We will be able to collapse the packet. */ @@ -1747,7 +1748,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m if (tcp_is_reno(tp) && tp->sacked_out) tcp_dec_pcount_approx(&tp->sacked_out, next_skb); - tcp_adjust_fackets_out(tp, next_skb, tcp_skb_pcount(next_skb)); + tcp_adjust_fackets_out(sk, next_skb, tcp_skb_pcount(next_skb)); tp->packets_out -= tcp_skb_pcount(next_skb); /* changed transmit queue under us so clear hints */ @@ -2028,7 +2029,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) break; tp->forward_skb_hint = skb; - if (after(TCP_SKB_CB(skb)->seq, tp->highest_sack)) + if (after(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) break; if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) -- cgit v1.2.3-70-g09d2 From 9f58f3b721f52a4d3f497ea57f830ccd307f1d76 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Thu, 15 Nov 2007 19:42:54 -0800 Subject: [TCP]: Make lost retrans detection more self-contained MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Highest_sack_end_seq is no longer calculated in the loop, thus it can be pushed to the worker function altogether making that function independent of the sacktag. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 31294b52ad4..7a2bfd85fff 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1115,16 +1115,23 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack, * * Search retransmitted skbs from write_queue that were sent when snd_nxt was * less than what is now known to be received by the other end (derived from - * SACK blocks by the caller). Also calculate the lowest snd_nxt among the - * remaining retransmitted skbs to avoid some costly processing per ACKs. + * highest SACK block). Also calculate the lowest snd_nxt among the remaining + * retransmitted skbs to avoid some costly processing per ACKs. */ -static int tcp_mark_lost_retrans(struct sock *sk, u32 received_upto) +static int tcp_mark_lost_retrans(struct sock *sk) { + const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; int flag = 0; int cnt = 0; u32 new_low_seq = tp->snd_nxt; + u32 received_upto = TCP_SKB_CB(tp->highest_sack)->end_seq; + + if (!tcp_is_fack(tp) || !tp->retrans_out || + !after(received_upto, tp->lost_retrans_low) || + icsk->icsk_ca_state != TCP_CA_Recovery) + return flag; tcp_for_write_queue(skb, sk) { u32 ack_seq = TCP_SKB_CB(skb)->ack_seq; @@ -1245,7 +1252,6 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; int reord = tp->packets_out; int prior_fackets; - u32 highest_sack_end_seq; int flag = 0; int found_dup_sack = 0; int cached_fack_count; @@ -1513,11 +1519,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ flag &= ~FLAG_ONLY_ORIG_SACKED; } - highest_sack_end_seq = TCP_SKB_CB(tp->highest_sack)->end_seq; - if (tcp_is_fack(tp) && tp->retrans_out && - after(highest_sack_end_seq, tp->lost_retrans_low) && - icsk->icsk_ca_state == TCP_CA_Recovery) - flag |= tcp_mark_lost_retrans(sk, highest_sack_end_seq); + flag |= tcp_mark_lost_retrans(sk); tcp_verify_left_out(tp); -- cgit v1.2.3-70-g09d2 From b7d4815f35ab1d0f1eef2521a94a7d4c789290a2 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Thu, 15 Nov 2007 19:43:56 -0800 Subject: [TCP]: Prior_fackets can be replaced by highest_sack seq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 7a2bfd85fff..5e01ac2c003 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1251,7 +1251,6 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ struct sk_buff *cached_skb; int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; int reord = tp->packets_out; - int prior_fackets; int flag = 0; int found_dup_sack = 0; int cached_fack_count; @@ -1264,7 +1263,6 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ tp->fackets_out = 0; tp->highest_sack = tcp_write_queue_head(sk); } - prior_fackets = tp->fackets_out; found_dup_sack = tcp_check_dsack(tp, ack_skb, sp, num_sacks, prior_snd_una); @@ -1457,7 +1455,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ /* New sack for not retransmitted frame, * which was in hole. It is reordering. */ - if (fack_count < prior_fackets) + if (before(TCP_SKB_CB(skb)->seq, + tcp_highest_sack_seq(tp))) reord = min(fack_count, reord); /* SACK enhanced F-RTO (RFC4138; Appendix B) */ -- cgit v1.2.3-70-g09d2 From 9e10c47cb9fe3154416787523f7a0df02133063f Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Thu, 15 Nov 2007 19:44:56 -0800 Subject: [TCP]: Create tcp_sacktag_one(). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Worker function that implements the main logic of the inner-most loop of tcp_sacktag_write_queue(). Idea was originally presented by David S. Miller. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 192 +++++++++++++++++++++++++-------------------------- 1 file changed, 96 insertions(+), 96 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5e01ac2c003..a62e0f90f56 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1240,6 +1240,99 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, return in_sack; } +static int tcp_sacktag_one(struct sk_buff *skb, struct tcp_sock *tp, + int *reord, int dup_sack, int fack_count) +{ + u8 sacked = TCP_SKB_CB(skb)->sacked; + int flag = 0; + + /* Account D-SACK for retransmitted packet. */ + if (dup_sack && (sacked & TCPCB_RETRANS)) { + if (after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) + tp->undo_retrans--; + if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una) && + (sacked & TCPCB_SACKED_ACKED)) + *reord = min(fack_count, *reord); + } + + /* Nothing to do; acked frame is about to be dropped (was ACKed). */ + if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) + return flag; + + if (!(sacked & TCPCB_SACKED_ACKED)) { + if (sacked & TCPCB_SACKED_RETRANS) { + /* If the segment is not tagged as lost, + * we do not clear RETRANS, believing + * that retransmission is still in flight. + */ + if (sacked & TCPCB_LOST) { + TCP_SKB_CB(skb)->sacked &= + ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); + tp->lost_out -= tcp_skb_pcount(skb); + tp->retrans_out -= tcp_skb_pcount(skb); + + /* clear lost hint */ + tp->retransmit_skb_hint = NULL; + } + } else { + if (!(sacked & TCPCB_RETRANS)) { + /* New sack for not retransmitted frame, + * which was in hole. It is reordering. + */ + if (before(TCP_SKB_CB(skb)->seq, + tcp_highest_sack_seq(tp))) + *reord = min(fack_count, *reord); + + /* SACK enhanced F-RTO (RFC4138; Appendix B) */ + if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) + flag |= FLAG_ONLY_ORIG_SACKED; + } + + if (sacked & TCPCB_LOST) { + TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; + tp->lost_out -= tcp_skb_pcount(skb); + + /* clear lost hint */ + tp->retransmit_skb_hint = NULL; + } + } + + TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED; + flag |= FLAG_DATA_SACKED; + tp->sacked_out += tcp_skb_pcount(skb); + + fack_count += tcp_skb_pcount(skb); + + /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ + if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) && + before(TCP_SKB_CB(skb)->seq, + TCP_SKB_CB(tp->lost_skb_hint)->seq)) + tp->lost_cnt_hint += tcp_skb_pcount(skb); + + if (fack_count > tp->fackets_out) + tp->fackets_out = fack_count; + + if (after(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) + tp->highest_sack = skb; + + } else { + if (dup_sack && (sacked & TCPCB_RETRANS)) + *reord = min(fack_count, *reord); + } + + /* D-SACK. We can detect redundant retransmission in S|R and plain R + * frames and clear it. undo_retrans is decreased above, L|R frames + * are accounted above as well. + */ + if (dup_sack && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)) { + TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; + tp->retrans_out -= tcp_skb_pcount(skb); + tp->retransmit_skb_hint = NULL; + } + + return flag; +} + static int tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_una) { @@ -1375,7 +1468,6 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ tcp_for_write_queue_from(skb, sk) { int in_sack = 0; - u8 sacked; if (skb == tcp_send_head(sk)) break; @@ -1413,102 +1505,10 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ if (unlikely(in_sack < 0)) break; - if (!in_sack) { - fack_count += tcp_skb_pcount(skb); - continue; - } - - sacked = TCP_SKB_CB(skb)->sacked; - - /* Account D-SACK for retransmitted packet. */ - if (dup_sack && (sacked & TCPCB_RETRANS)) { - if (after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) - tp->undo_retrans--; - if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una) && - (sacked & TCPCB_SACKED_ACKED)) - reord = min(fack_count, reord); - } - - - /* Nothing to do; acked frame is about to be dropped (was ACKed). */ - if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) { - fack_count += tcp_skb_pcount(skb); - continue; - } - - if (!(sacked&TCPCB_SACKED_ACKED)) { - if (sacked & TCPCB_SACKED_RETRANS) { - /* If the segment is not tagged as lost, - * we do not clear RETRANS, believing - * that retransmission is still in flight. - */ - if (sacked & TCPCB_LOST) { - TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); - tp->lost_out -= tcp_skb_pcount(skb); - tp->retrans_out -= tcp_skb_pcount(skb); - - /* clear lost hint */ - tp->retransmit_skb_hint = NULL; - } - } else { - if (!(sacked & TCPCB_RETRANS)) { - /* New sack for not retransmitted frame, - * which was in hole. It is reordering. - */ - if (before(TCP_SKB_CB(skb)->seq, - tcp_highest_sack_seq(tp))) - reord = min(fack_count, reord); - - /* SACK enhanced F-RTO (RFC4138; Appendix B) */ - if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) - flag |= FLAG_ONLY_ORIG_SACKED; - } - - if (sacked & TCPCB_LOST) { - TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; - tp->lost_out -= tcp_skb_pcount(skb); - - /* clear lost hint */ - tp->retransmit_skb_hint = NULL; - } - } - - TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED; - flag |= FLAG_DATA_SACKED; - tp->sacked_out += tcp_skb_pcount(skb); - - fack_count += tcp_skb_pcount(skb); - - /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ - if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) && - before(TCP_SKB_CB(skb)->seq, - TCP_SKB_CB(tp->lost_skb_hint)->seq)) - tp->lost_cnt_hint += tcp_skb_pcount(skb); - - if (fack_count > tp->fackets_out) - tp->fackets_out = fack_count; + if (in_sack) + flag |= tcp_sacktag_one(skb, tp, &reord, dup_sack, fack_count); - if (after(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) - tp->highest_sack = skb; - - } else { - if (dup_sack && (sacked&TCPCB_RETRANS)) - reord = min(fack_count, reord); - - fack_count += tcp_skb_pcount(skb); - } - - /* D-SACK. We can detect redundant retransmission - * in S|R and plain R frames and clear it. - * undo_retrans is decreased above, L|R frames - * are accounted above as well. - */ - if (dup_sack && - (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) { - TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - tp->retrans_out -= tcp_skb_pcount(skb); - tp->retransmit_skb_hint = NULL; - } + fack_count += tcp_skb_pcount(skb); } /* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct -- cgit v1.2.3-70-g09d2 From fd6dad616d4fe2f08d690f25ca76b0102158fb3a Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Thu, 15 Nov 2007 19:49:47 -0800 Subject: [TCP]: Earlier SACK block verification & simplify access to them MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- net/ipv4/tcp_input.c | 85 +++++++++++++++++++++++++++++++--------------------- 2 files changed, 52 insertions(+), 35 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 34acee66223..794497c7d75 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -330,7 +330,7 @@ struct tcp_sock { struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */ struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/ - struct tcp_sack_block_wire recv_sack_cache[4]; + struct tcp_sack_block recv_sack_cache[4]; struct sk_buff *highest_sack; /* highest skb with SACK received * (validity guaranteed only if diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a62e0f90f56..a287747e9dd 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1340,9 +1340,11 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ struct tcp_sock *tp = tcp_sk(sk); unsigned char *ptr = (skb_transport_header(ack_skb) + TCP_SKB_CB(ack_skb)->sacked); - struct tcp_sack_block_wire *sp = (struct tcp_sack_block_wire *)(ptr+2); + struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2); + struct tcp_sack_block sp[4]; struct sk_buff *cached_skb; int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; + int used_sacks; int reord = tp->packets_out; int flag = 0; int found_dup_sack = 0; @@ -1357,7 +1359,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ tp->highest_sack = tcp_write_queue_head(sk); } - found_dup_sack = tcp_check_dsack(tp, ack_skb, sp, + found_dup_sack = tcp_check_dsack(tp, ack_skb, sp_wire, num_sacks, prior_snd_una); if (found_dup_sack) flag |= FLAG_DSACKING_ACK; @@ -1372,14 +1374,49 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ if (!tp->packets_out) goto out; + used_sacks = 0; + first_sack_index = 0; + for (i = 0; i < num_sacks; i++) { + int dup_sack = !i && found_dup_sack; + + sp[used_sacks].start_seq = ntohl(get_unaligned(&sp_wire[i].start_seq)); + sp[used_sacks].end_seq = ntohl(get_unaligned(&sp_wire[i].end_seq)); + + if (!tcp_is_sackblock_valid(tp, dup_sack, + sp[used_sacks].start_seq, + sp[used_sacks].end_seq)) { + if (dup_sack) { + if (!tp->undo_marker) + NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDNOUNDO); + else + NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDOLD); + } else { + /* Don't count olds caused by ACK reordering */ + if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) && + !after(sp[used_sacks].end_seq, tp->snd_una)) + continue; + NET_INC_STATS_BH(LINUX_MIB_TCPSACKDISCARD); + } + if (i == 0) + first_sack_index = -1; + continue; + } + + /* Ignore very old stuff early */ + if (!after(sp[used_sacks].end_seq, prior_snd_una)) + continue; + + used_sacks++; + } + /* SACK fastpath: * if the only SACK change is the increase of the end_seq of * the first block then only apply that SACK block * and use retrans queue hinting otherwise slowpath */ force_one_sack = 1; - for (i = 0; i < num_sacks; i++) { - __be32 start_seq = sp[i].start_seq; - __be32 end_seq = sp[i].end_seq; + for (i = 0; i < used_sacks; i++) { + u32 start_seq = sp[i].start_seq; + u32 end_seq = sp[i].end_seq; if (i == 0) { if (tp->recv_sack_cache[i].start_seq != start_seq) @@ -1398,19 +1435,17 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ tp->recv_sack_cache[i].end_seq = 0; } - first_sack_index = 0; if (force_one_sack) - num_sacks = 1; + used_sacks = 1; else { int j; tp->fastpath_skb_hint = NULL; /* order SACK blocks to allow in order walk of the retrans queue */ - for (i = num_sacks-1; i > 0; i--) { + for (i = used_sacks - 1; i > 0; i--) { for (j = 0; j < i; j++){ - if (after(ntohl(sp[j].start_seq), - ntohl(sp[j+1].start_seq))){ - struct tcp_sack_block_wire tmp; + if (after(sp[j].start_seq, sp[j+1].start_seq)) { + struct tcp_sack_block tmp; tmp = sp[j]; sp[j] = sp[j+1]; @@ -1433,32 +1468,14 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ cached_fack_count = 0; } - for (i = 0; i < num_sacks; i++) { + for (i = 0; i < used_sacks; i++) { struct sk_buff *skb; - __u32 start_seq = ntohl(sp->start_seq); - __u32 end_seq = ntohl(sp->end_seq); + u32 start_seq = sp[i].start_seq; + u32 end_seq = sp[i].end_seq; int fack_count; int dup_sack = (found_dup_sack && (i == first_sack_index)); int next_dup = (found_dup_sack && (i+1 == first_sack_index)); - sp++; - - if (!tcp_is_sackblock_valid(tp, dup_sack, start_seq, end_seq)) { - if (dup_sack) { - if (!tp->undo_marker) - NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDNOUNDO); - else - NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDOLD); - } else { - /* Don't count olds caused by ACK reordering */ - if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) && - !after(end_seq, tp->snd_una)) - continue; - NET_INC_STATS_BH(LINUX_MIB_TCPSACKDISCARD); - } - continue; - } - skb = cached_skb; fack_count = cached_fack_count; @@ -1489,8 +1506,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ /* Due to sorting DSACK may reside within this SACK block! */ if (next_dup) { - u32 dup_start = ntohl(sp->start_seq); - u32 dup_end = ntohl(sp->end_seq); + u32 dup_start = sp[i+1].start_seq; + u32 dup_end = sp[i+1].end_seq; if (before(TCP_SKB_CB(skb)->seq, dup_end)) { in_sack = tcp_match_skb_to_sack(sk, skb, dup_start, dup_end); -- cgit v1.2.3-70-g09d2 From 68f8353b480e5f2e136c38a511abdbb88eaa8ce2 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Thu, 15 Nov 2007 19:50:37 -0800 Subject: [TCP]: Rewrite SACK block processing & sack_recv_cache use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Key points of this patch are: - In case new SACK information is advance only type, no skb processing below previously discovered highest point is done - Optimize cases below highest point too since there's no need to always go up to highest point (which is very likely still present in that SACK), this is not entirely true though because I'm dropping the fastpath_skb_hint which could previously optimize those cases even better. Whether that's significant, I'm not too sure. Currently it will provide skipping by walking. Combined with RB-tree, all skipping would become fast too regardless of window size (can be done incrementally later). Previously a number of cases in TCP SACK processing fails to take advantage of costly stored information in sack_recv_cache, most importantly, expected events such as cumulative ACK and new hole ACKs. Processing on such ACKs result in rather long walks building up latencies (which easily gets nasty when window is huge). Those latencies are often completely unnecessary compared with the amount of _new_ information received, usually for cumulative ACK there's no new information at all, yet TCP walks whole queue unnecessary potentially taking a number of costly cache misses on the way, etc.! Since the inclusion of highest_sack, there's a lot information that is very likely redundant (SACK fastpath hint stuff, fackets_out, highest_sack), though there's no ultimate guarantee that they'll remain the same whole the time (in all unearthly scenarios). Take advantage of this knowledge here and drop fastpath hint and use direct access to highest SACKed skb as a replacement. Effectively "special cased" fastpath is dropped. This change adds some complexity to introduce better coveraged "fastpath", though the added complexity should make TCP behave more cache friendly. The current ACK's SACK blocks are compared against each cached block individially and only ranges that are new are then scanned by the high constant walk. For other parts of write queue, even when in previously known part of the SACK blocks, a faster skip function is used (if necessary at all). In addition, whenever possible, TCP fast-forwards to highest_sack skb that was made available by an earlier patch. In typical case, no other things but this fast-forward and mandatory markings after that occur making the access pattern quite similar to the former fastpath "special case". DSACKs are special case that must always be walked. The local to recv_sack_cache copying could be more intelligent w.r.t DSACKs which are likely to be there only once but that is left to a separate patch. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/linux/tcp.h | 3 - include/net/tcp.h | 1 - net/ipv4/tcp_input.c | 271 +++++++++++++++++++++++++++++++------------------- net/ipv4/tcp_output.c | 14 +-- 4 files changed, 172 insertions(+), 117 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 794497c7d75..08027f1d7f3 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -343,10 +343,7 @@ struct tcp_sock { struct sk_buff *scoreboard_skb_hint; struct sk_buff *retransmit_skb_hint; struct sk_buff *forward_skb_hint; - struct sk_buff *fastpath_skb_hint; - int fastpath_cnt_hint; /* Lags behind by current skb's pcount - * compared to respective fackets_out */ int lost_cnt_hint; int retransmit_cnt_hint; diff --git a/include/net/tcp.h b/include/net/tcp.h index 0ede804b16d..f0c5e7a2940 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1081,7 +1081,6 @@ static inline void tcp_clear_retrans_hints_partial(struct tcp_sock *tp) static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp) { tcp_clear_retrans_hints_partial(tp); - tp->fastpath_skb_hint = NULL; } /* MD5 Signature */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a287747e9dd..3ad6a19ad30 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1333,6 +1333,88 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct tcp_sock *tp, return flag; } +static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, + struct tcp_sack_block *next_dup, + u32 start_seq, u32 end_seq, + int dup_sack_in, int *fack_count, + int *reord, int *flag) +{ + struct tcp_sock *tp = tcp_sk(sk); + + tcp_for_write_queue_from(skb, sk) { + int in_sack = 0; + int dup_sack = dup_sack_in; + + if (skb == tcp_send_head(sk)) + break; + + /* queue is in-order => we can short-circuit the walk early */ + if (!before(TCP_SKB_CB(skb)->seq, end_seq)) + break; + + if ((next_dup != NULL) && + before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) { + in_sack = tcp_match_skb_to_sack(sk, skb, + next_dup->start_seq, + next_dup->end_seq); + if (in_sack > 0) + dup_sack = 1; + } + + if (in_sack <= 0) + in_sack = tcp_match_skb_to_sack(sk, skb, start_seq, end_seq); + if (unlikely(in_sack < 0)) + break; + + if (in_sack) + *flag |= tcp_sacktag_one(skb, tp, reord, dup_sack, *fack_count); + + *fack_count += tcp_skb_pcount(skb); + } + return skb; +} + +/* Avoid all extra work that is being done by sacktag while walking in + * a normal way + */ +static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk, + u32 skip_to_seq) +{ + tcp_for_write_queue_from(skb, sk) { + if (skb == tcp_send_head(sk)) + break; + + if (before(TCP_SKB_CB(skb)->end_seq, skip_to_seq)) + break; + } + return skb; +} + +static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb, + struct sock *sk, + struct tcp_sack_block *next_dup, + u32 skip_to_seq, + int *fack_count, int *reord, + int *flag) +{ + if (next_dup == NULL) + return skb; + + if (before(next_dup->start_seq, skip_to_seq)) { + skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq); + tcp_sacktag_walk(skb, sk, NULL, + next_dup->start_seq, next_dup->end_seq, + 1, fack_count, reord, flag); + } + + return skb; +} + +static int tcp_sack_cache_ok(struct tcp_sock *tp, struct tcp_sack_block *cache) +{ + return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache); +} + static int tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_una) { @@ -1342,16 +1424,16 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ TCP_SKB_CB(ack_skb)->sacked); struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2); struct tcp_sack_block sp[4]; - struct sk_buff *cached_skb; + struct tcp_sack_block *cache; + struct sk_buff *skb; int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; int used_sacks; int reord = tp->packets_out; int flag = 0; int found_dup_sack = 0; - int cached_fack_count; - int i; + int fack_count; + int i, j; int first_sack_index; - int force_one_sack; if (!tp->sacked_out) { if (WARN_ON(tp->fackets_out)) @@ -1409,132 +1491,123 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ used_sacks++; } - /* SACK fastpath: - * if the only SACK change is the increase of the end_seq of - * the first block then only apply that SACK block - * and use retrans queue hinting otherwise slowpath */ - force_one_sack = 1; - for (i = 0; i < used_sacks; i++) { - u32 start_seq = sp[i].start_seq; - u32 end_seq = sp[i].end_seq; - - if (i == 0) { - if (tp->recv_sack_cache[i].start_seq != start_seq) - force_one_sack = 0; - } else { - if ((tp->recv_sack_cache[i].start_seq != start_seq) || - (tp->recv_sack_cache[i].end_seq != end_seq)) - force_one_sack = 0; - } - tp->recv_sack_cache[i].start_seq = start_seq; - tp->recv_sack_cache[i].end_seq = end_seq; - } - /* Clear the rest of the cache sack blocks so they won't match mistakenly. */ - for (; i < ARRAY_SIZE(tp->recv_sack_cache); i++) { - tp->recv_sack_cache[i].start_seq = 0; - tp->recv_sack_cache[i].end_seq = 0; - } + /* order SACK blocks to allow in order walk of the retrans queue */ + for (i = used_sacks - 1; i > 0; i--) { + for (j = 0; j < i; j++){ + if (after(sp[j].start_seq, sp[j+1].start_seq)) { + struct tcp_sack_block tmp; - if (force_one_sack) - used_sacks = 1; - else { - int j; - tp->fastpath_skb_hint = NULL; - - /* order SACK blocks to allow in order walk of the retrans queue */ - for (i = used_sacks - 1; i > 0; i--) { - for (j = 0; j < i; j++){ - if (after(sp[j].start_seq, sp[j+1].start_seq)) { - struct tcp_sack_block tmp; - - tmp = sp[j]; - sp[j] = sp[j+1]; - sp[j+1] = tmp; - - /* Track where the first SACK block goes to */ - if (j == first_sack_index) - first_sack_index = j+1; - } + tmp = sp[j]; + sp[j] = sp[j+1]; + sp[j+1] = tmp; + /* Track where the first SACK block goes to */ + if (j == first_sack_index) + first_sack_index = j+1; } } } - /* Use SACK fastpath hint if valid */ - cached_skb = tp->fastpath_skb_hint; - cached_fack_count = tp->fastpath_cnt_hint; - if (!cached_skb) { - cached_skb = tcp_write_queue_head(sk); - cached_fack_count = 0; + skb = tcp_write_queue_head(sk); + fack_count = 0; + i = 0; + + if (!tp->sacked_out) { + /* It's already past, so skip checking against it */ + cache = tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache); + } else { + cache = tp->recv_sack_cache; + /* Skip empty blocks in at head of the cache */ + while (tcp_sack_cache_ok(tp, cache) && !cache->start_seq && + !cache->end_seq) + cache++; } - for (i = 0; i < used_sacks; i++) { - struct sk_buff *skb; + while (i < used_sacks) { u32 start_seq = sp[i].start_seq; u32 end_seq = sp[i].end_seq; - int fack_count; int dup_sack = (found_dup_sack && (i == first_sack_index)); - int next_dup = (found_dup_sack && (i+1 == first_sack_index)); + struct tcp_sack_block *next_dup = NULL; - skb = cached_skb; - fack_count = cached_fack_count; + if (found_dup_sack && ((i + 1) == first_sack_index)) + next_dup = &sp[i + 1]; /* Event "B" in the comment above. */ if (after(end_seq, tp->high_seq)) flag |= FLAG_DATA_LOST; - tcp_for_write_queue_from(skb, sk) { - int in_sack = 0; - - if (skb == tcp_send_head(sk)) - break; - - cached_skb = skb; - cached_fack_count = fack_count; - if (i == first_sack_index) { - tp->fastpath_skb_hint = skb; - tp->fastpath_cnt_hint = fack_count; + /* Skip too early cached blocks */ + while (tcp_sack_cache_ok(tp, cache) && + !before(start_seq, cache->end_seq)) + cache++; + + /* Can skip some work by looking recv_sack_cache? */ + if (tcp_sack_cache_ok(tp, cache) && !dup_sack && + after(end_seq, cache->start_seq)) { + + /* Head todo? */ + if (before(start_seq, cache->start_seq)) { + skb = tcp_sacktag_skip(skb, sk, start_seq); + skb = tcp_sacktag_walk(skb, sk, next_dup, start_seq, + cache->start_seq, dup_sack, + &fack_count, &reord, &flag); } - /* The retransmission queue is always in order, so - * we can short-circuit the walk early. - */ - if (!before(TCP_SKB_CB(skb)->seq, end_seq)) - break; - - dup_sack = (found_dup_sack && (i == first_sack_index)); + /* Rest of the block already fully processed? */ + if (!after(end_seq, cache->end_seq)) { + skb = tcp_maybe_skipping_dsack(skb, sk, next_dup, cache->end_seq, + &fack_count, &reord, &flag); + goto advance_sp; + } - /* Due to sorting DSACK may reside within this SACK block! */ - if (next_dup) { - u32 dup_start = sp[i+1].start_seq; - u32 dup_end = sp[i+1].end_seq; + /* ...tail remains todo... */ + if (TCP_SKB_CB(tp->highest_sack)->end_seq == cache->end_seq) { + /* ...but better entrypoint exists! Check that DSACKs are + * properly accounted while skipping here + */ + tcp_maybe_skipping_dsack(skb, sk, next_dup, cache->end_seq, + &fack_count, &reord, &flag); - if (before(TCP_SKB_CB(skb)->seq, dup_end)) { - in_sack = tcp_match_skb_to_sack(sk, skb, dup_start, dup_end); - if (in_sack > 0) - dup_sack = 1; - } + skb = tcp_write_queue_next(sk, tp->highest_sack); + fack_count = tp->fackets_out; + cache++; + goto walk; } - /* DSACK info lost if out-of-mem, try SACK still */ - if (in_sack <= 0) - in_sack = tcp_match_skb_to_sack(sk, skb, start_seq, end_seq); - if (unlikely(in_sack < 0)) - break; - - if (in_sack) - flag |= tcp_sacktag_one(skb, tp, &reord, dup_sack, fack_count); + skb = tcp_sacktag_skip(skb, sk, cache->end_seq); + /* Check overlap against next cached too (past this one already) */ + cache++; + continue; + } - fack_count += tcp_skb_pcount(skb); + if (!before(start_seq, tcp_highest_sack_seq(tp))) { + skb = tcp_write_queue_next(sk, tp->highest_sack); + fack_count = tp->fackets_out; } + skb = tcp_sacktag_skip(skb, sk, start_seq); + +walk: + skb = tcp_sacktag_walk(skb, sk, next_dup, start_seq, end_seq, + dup_sack, &fack_count, &reord, &flag); +advance_sp: /* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct * due to in-order walk */ if (after(end_seq, tp->frto_highmark)) flag &= ~FLAG_ONLY_ORIG_SACKED; + + i++; } + /* Clear the head of the cache sack blocks so we can skip it next time */ + for (i = 0; i < ARRAY_SIZE(tp->recv_sack_cache) - used_sacks; i++) { + tp->recv_sack_cache[i].start_seq = 0; + tp->recv_sack_cache[i].end_seq = 0; + } + for (j = 0; j < used_sacks; j++) + tp->recv_sack_cache[i++] = sp[j]; + flag |= tcp_mark_lost_retrans(sk); tcp_verify_left_out(tp); @@ -2821,9 +2894,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p, } tp->fackets_out -= min(pkts_acked, tp->fackets_out); - /* hint's skb might be NULL but we don't need to care */ - tp->fastpath_cnt_hint -= min_t(u32, pkts_acked, - tp->fastpath_cnt_hint); + if (ca_ops->pkts_acked) { s32 rtt_us = -1; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ce506af5ce0..030fc69ea21 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -653,9 +653,7 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned } /* When a modification to fackets out becomes necessary, we need to check - * skb is counted to fackets_out or not. Another important thing is to - * tweak SACK fastpath hint too as it would overwrite all changes unless - * hint is also changed. + * skb is counted to fackets_out or not. */ static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb, int decr) @@ -667,11 +665,6 @@ static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb, if (!before(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq)) tp->fackets_out -= decr; - - /* cnt_hint is "off-by-one" compared with fackets_out (see sacktag) */ - if (tp->fastpath_skb_hint != NULL && - after(TCP_SKB_CB(tp->fastpath_skb_hint)->seq, TCP_SKB_CB(skb)->seq)) - tp->fastpath_cnt_hint -= decr; } /* Function to create two new TCP segments. Shrinks the given segment @@ -1753,11 +1746,6 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m /* changed transmit queue under us so clear hints */ tcp_clear_retrans_hints_partial(tp); - /* manually tune sacktag skb hint */ - if (tp->fastpath_skb_hint == next_skb) { - tp->fastpath_skb_hint = skb; - tp->fastpath_cnt_hint -= tcp_skb_pcount(skb); - } sk_stream_free_skb(sk, next_skb); } -- cgit v1.2.3-70-g09d2 From 20de20beba6e9bd2e1c83696bfefa3b16cda9a74 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Fri, 16 Nov 2007 16:17:05 -0800 Subject: [TCP]: Correct DSACK check placing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously one of the in-block skip branches was missing it. Also, drop it from tail-fully-processed case because the next iteration will do exactly the same thing, i.e., process the SACK block that contains the DSACK information. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3ad6a19ad30..79996b16b94 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1554,20 +1554,15 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ } /* Rest of the block already fully processed? */ - if (!after(end_seq, cache->end_seq)) { - skb = tcp_maybe_skipping_dsack(skb, sk, next_dup, cache->end_seq, - &fack_count, &reord, &flag); + if (!after(end_seq, cache->end_seq)) goto advance_sp; - } + + skb = tcp_maybe_skipping_dsack(skb, sk, next_dup, cache->end_seq, + &fack_count, &reord, &flag); /* ...tail remains todo... */ if (TCP_SKB_CB(tp->highest_sack)->end_seq == cache->end_seq) { - /* ...but better entrypoint exists! Check that DSACKs are - * properly accounted while skipping here - */ - tcp_maybe_skipping_dsack(skb, sk, next_dup, cache->end_seq, - &fack_count, &reord, &flag); - + /* ...but better entrypoint exists! */ skb = tcp_write_queue_next(sk, tp->highest_sack); fack_count = tp->fackets_out; cache++; -- cgit v1.2.3-70-g09d2 From 8d8ad9d7c4bfe79bc91b7fc419ecfb9dcdfe6a51 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 26 Nov 2007 20:10:50 +0800 Subject: [NET]: Name magic constants in sock_wake_async() The sock_wake_async() performs a bit different actions depending on "how" argument. Unfortunately this argument ony has numerical magic values. I propose to give names to their constants to help people reading this function callers understand what's going on without looking into this function all the time. I suppose this is 2.6.25 material, but if it's not (or the naming seems poor/bad/awful), I can rework it against the current net-2.6 tree. Signed-off-by: Pavel Emelyanov Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/net.h | 7 +++++++ net/atm/common.c | 2 +- net/core/sock.c | 8 ++++---- net/core/stream.c | 2 +- net/dccp/input.c | 8 ++++---- net/dccp/output.c | 2 +- net/ipv4/tcp_input.c | 12 ++++++------ net/rxrpc/af_rxrpc.c | 2 +- net/sctp/socket.c | 3 ++- net/socket.c | 9 ++++----- net/unix/af_unix.c | 8 ++++---- 11 files changed, 35 insertions(+), 28 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/include/linux/net.h b/include/linux/net.h index 0235d917d5c..f95f12c5840 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -186,6 +186,13 @@ struct net_proto_family { struct iovec; struct kvec; +enum { + SOCK_WAKE_IO, + SOCK_WAKE_WAITD, + SOCK_WAKE_SPACE, + SOCK_WAKE_URG, +}; + extern int sock_wake_async(struct socket *sk, int how, int band); extern int sock_register(const struct net_proto_family *fam); extern void sock_unregister(int family); diff --git a/net/atm/common.c b/net/atm/common.c index eba09a04f6b..c865517ba44 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -113,7 +113,7 @@ static void vcc_write_space(struct sock *sk) if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible(sk->sk_sleep); - sk_wake_async(sk, 2, POLL_OUT); + sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } read_unlock(&sk->sk_callback_lock); diff --git a/net/core/sock.c b/net/core/sock.c index eac7aa0721d..118214047ed 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1498,7 +1498,7 @@ static void sock_def_error_report(struct sock *sk) read_lock(&sk->sk_callback_lock); if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible(sk->sk_sleep); - sk_wake_async(sk,0,POLL_ERR); + sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); read_unlock(&sk->sk_callback_lock); } @@ -1507,7 +1507,7 @@ static void sock_def_readable(struct sock *sk, int len) read_lock(&sk->sk_callback_lock); if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible(sk->sk_sleep); - sk_wake_async(sk,1,POLL_IN); + sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); read_unlock(&sk->sk_callback_lock); } @@ -1524,7 +1524,7 @@ static void sock_def_write_space(struct sock *sk) /* Should agree with poll, otherwise some programs break */ if (sock_writeable(sk)) - sk_wake_async(sk, 2, POLL_OUT); + sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } read_unlock(&sk->sk_callback_lock); @@ -1539,7 +1539,7 @@ void sk_send_sigurg(struct sock *sk) { if (sk->sk_socket && sk->sk_socket->file) if (send_sigurg(&sk->sk_socket->file->f_owner)) - sk_wake_async(sk, 3, POLL_PRI); + sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI); } void sk_reset_timer(struct sock *sk, struct timer_list* timer, diff --git a/net/core/stream.c b/net/core/stream.c index b2fb846f42a..5586879bb9b 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -35,7 +35,7 @@ void sk_stream_write_space(struct sock *sk) if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible(sk->sk_sleep); if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) - sock_wake_async(sock, 2, POLL_OUT); + sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT); } } diff --git a/net/dccp/input.c b/net/dccp/input.c index df0fb2c149a..ef299fbd7c2 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -37,7 +37,7 @@ static void dccp_rcv_close(struct sock *sk, struct sk_buff *skb) dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED); dccp_fin(sk, skb); dccp_set_state(sk, DCCP_CLOSED); - sk_wake_async(sk, 1, POLL_HUP); + sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP); } static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb) @@ -90,7 +90,7 @@ static void dccp_rcv_reset(struct sock *sk, struct sk_buff *skb) dccp_fin(sk, skb); if (err && !sock_flag(sk, SOCK_DEAD)) - sk_wake_async(sk, 0, POLL_ERR); + sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); dccp_time_wait(sk, DCCP_TIME_WAIT, 0); } @@ -416,7 +416,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, if (!sock_flag(sk, SOCK_DEAD)) { sk->sk_state_change(sk); - sk_wake_async(sk, 0, POLL_OUT); + sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); } if (sk->sk_write_pending || icsk->icsk_ack.pingpong || @@ -624,7 +624,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, switch (old_state) { case DCCP_PARTOPEN: sk->sk_state_change(sk); - sk_wake_async(sk, 0, POLL_OUT); + sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); break; } } else if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) { diff --git a/net/dccp/output.c b/net/dccp/output.c index f49544618f2..33ce737ef3a 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -170,7 +170,7 @@ void dccp_write_space(struct sock *sk) wake_up_interruptible(sk->sk_sleep); /* Should agree with poll, otherwise some programs break */ if (sock_writeable(sk)) - sk_wake_async(sk, 2, POLL_OUT); + sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); read_unlock(&sk->sk_callback_lock); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 79996b16b94..97ea3eda206 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3595,9 +3595,9 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th) /* Do not send POLL_HUP for half duplex close. */ if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == TCP_CLOSE) - sk_wake_async(sk, 1, POLL_HUP); + sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP); else - sk_wake_async(sk, 1, POLL_IN); + sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); } } @@ -4956,7 +4956,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, if (!sock_flag(sk, SOCK_DEAD)) { sk->sk_state_change(sk); - sk_wake_async(sk, 0, POLL_OUT); + sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); } if (sk->sk_write_pending || @@ -5186,9 +5186,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * are not waked up, because sk->sk_sleep == * NULL and sk->sk_socket == NULL. */ - if (sk->sk_socket) { - sk_wake_async(sk,0,POLL_OUT); - } + if (sk->sk_socket) + sk_wake_async(sk, + SOCK_WAKE_IO, POLL_OUT); tp->snd_una = TCP_SKB_CB(skb)->ack_seq; tp->snd_wnd = ntohs(th->window) << diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index d6389450c4b..5e82f1c0afb 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -65,7 +65,7 @@ static void rxrpc_write_space(struct sock *sk) if (rxrpc_writable(sk)) { if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible(sk->sk_sleep); - sk_wake_async(sk, 2, POLL_OUT); + sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } read_unlock(&sk->sk_callback_lock); } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ea9649ca0b2..dc2f9221f09 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6008,7 +6008,8 @@ static void __sctp_write_space(struct sctp_association *asoc) */ if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) - sock_wake_async(sock, 2, POLL_OUT); + sock_wake_async(sock, + SOCK_WAKE_SPACE, POLL_OUT); } } } diff --git a/net/socket.c b/net/socket.c index aeeab388cc3..9ebca5c695d 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1070,20 +1070,19 @@ int sock_wake_async(struct socket *sock, int how, int band) if (!sock || !sock->fasync_list) return -1; switch (how) { - case 1: - + case SOCK_WAKE_WAITD: if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) break; goto call_kill; - case 2: + case SOCK_WAKE_SPACE: if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags)) break; /* fall through */ - case 0: + case SOCK_WAKE_IO: call_kill: __kill_fasync(sock->fasync_list, SIGIO, band); break; - case 3: + case SOCK_WAKE_URG: __kill_fasync(sock->fasync_list, SIGURG, band); } return 0; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 0f1ecbf86d0..393197afb19 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -317,7 +317,7 @@ static void unix_write_space(struct sock *sk) if (unix_writable(sk)) { if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible_sync(sk->sk_sleep); - sk_wake_async(sk, 2, POLL_OUT); + sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } read_unlock(&sk->sk_callback_lock); } @@ -403,7 +403,7 @@ static int unix_release_sock (struct sock *sk, int embrion) unix_state_unlock(skpair); skpair->sk_state_change(skpair); read_lock(&skpair->sk_callback_lock); - sk_wake_async(skpair,1,POLL_HUP); + sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); read_unlock(&skpair->sk_callback_lock); } sock_put(skpair); /* It may now die */ @@ -1900,9 +1900,9 @@ static int unix_shutdown(struct socket *sock, int mode) other->sk_state_change(other); read_lock(&other->sk_callback_lock); if (peer_mode == SHUTDOWN_MASK) - sk_wake_async(other,1,POLL_HUP); + sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); else if (peer_mode & RCV_SHUTDOWN) - sk_wake_async(other,1,POLL_IN); + sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); read_unlock(&other->sk_callback_lock); } if (other) -- cgit v1.2.3-70-g09d2 From ea4f76ae13b4240dac304ed50636391d6b22e9c5 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Fri, 30 Nov 2007 00:59:07 +1100 Subject: [TCP]: Two fixes to new sacktag code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1) Skip condition used to be wrong way around which made SACK processing very broken, missed many blocks because of that. 2) Use highest_sack advancement only if some skbs are already sacked because otherwise tcp_write_queue_next may move things too far (occurs mainly with GSO). The other similar advancement is not problem because highest_sack was previosly put to point a sacked skb. These problems were located because of problem report from Matt Mathis . Signed-off-by: Ilpo Järvinen Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 97ea3eda206..33d284e3f7f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1384,7 +1384,7 @@ static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk, if (skb == tcp_send_head(sk)) break; - if (before(TCP_SKB_CB(skb)->end_seq, skip_to_seq)) + if (!before(TCP_SKB_CB(skb)->end_seq, skip_to_seq)) break; } return skb; @@ -1575,7 +1575,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ continue; } - if (!before(start_seq, tcp_highest_sack_seq(tp))) { + if (tp->sacked_out && !before(start_seq, tcp_highest_sack_seq(tp))) { skb = tcp_write_queue_next(sk, tp->highest_sack); fack_count = tp->fackets_out; } -- cgit v1.2.3-70-g09d2 From bce392f3b02755a8c615d4ced3d3b9cb1d9e3648 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Sun, 2 Dec 2007 00:47:56 +0200 Subject: [TCP]: Move LOSTRETRANS MIB outside !(L|S) check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Usually those skbs will have L set, not counting them as lost retransmissions is misleading. Signed-off-by: Ilpo Järvinen Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 33d284e3f7f..6ca77f8bcee 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1160,8 +1160,8 @@ static int tcp_mark_lost_retrans(struct sock *sk) tp->lost_out += tcp_skb_pcount(skb); TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; flag |= FLAG_DATA_SACKED; - NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT); } + NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT); } else { if (before(ack_seq, new_low_seq)) new_low_seq = ack_seq; -- cgit v1.2.3-70-g09d2 From 407ef1de03e87225d75a9bed271f35ea6880f5f1 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Sun, 2 Dec 2007 00:47:57 +0200 Subject: [TCP]: Remove superflucious FLAG_DATA_SACKED MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To get there, highest_sack must have advanced. When it advances, a new skb is SACKed, which already sets that FLAG. Besides, the original purpose of it has puzzled me, never understood why LOST bit setting of retransmitted skb is marked with FLAG_DATA_SACKED. Signed-off-by: Ilpo Järvinen Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6ca77f8bcee..87111084280 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1118,12 +1118,11 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack, * highest SACK block). Also calculate the lowest snd_nxt among the remaining * retransmitted skbs to avoid some costly processing per ACKs. */ -static int tcp_mark_lost_retrans(struct sock *sk) +static void tcp_mark_lost_retrans(struct sock *sk) { const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - int flag = 0; int cnt = 0; u32 new_low_seq = tp->snd_nxt; u32 received_upto = TCP_SKB_CB(tp->highest_sack)->end_seq; @@ -1131,7 +1130,7 @@ static int tcp_mark_lost_retrans(struct sock *sk) if (!tcp_is_fack(tp) || !tp->retrans_out || !after(received_upto, tp->lost_retrans_low) || icsk->icsk_ca_state != TCP_CA_Recovery) - return flag; + return; tcp_for_write_queue(skb, sk) { u32 ack_seq = TCP_SKB_CB(skb)->ack_seq; @@ -1159,7 +1158,6 @@ static int tcp_mark_lost_retrans(struct sock *sk) if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) { tp->lost_out += tcp_skb_pcount(skb); TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - flag |= FLAG_DATA_SACKED; } NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT); } else { @@ -1171,8 +1169,6 @@ static int tcp_mark_lost_retrans(struct sock *sk) if (tp->retrans_out) tp->lost_retrans_low = new_low_seq; - - return flag; } static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb, @@ -1603,7 +1599,7 @@ advance_sp: for (j = 0; j < used_sacks; j++) tp->recv_sack_cache[i++] = sp[j]; - flag |= tcp_mark_lost_retrans(sk); + tcp_mark_lost_retrans(sk); tcp_verify_left_out(tp); -- cgit v1.2.3-70-g09d2 From ede9f3b186bc3eb0fce084bdcab500efc3721a80 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Sun, 2 Dec 2007 00:47:58 +0200 Subject: [TCP]: Unite identical code from two seqno split blocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bogus seqno compares just mislead, the code is identical for both sides of the seqno compare (and was even executed just once because of return in between). Signed-off-by: Ilpo Järvinen Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 87111084280..d313dea361a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1246,8 +1246,7 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct tcp_sock *tp, if (dup_sack && (sacked & TCPCB_RETRANS)) { if (after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) tp->undo_retrans--; - if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una) && - (sacked & TCPCB_SACKED_ACKED)) + if (sacked & TCPCB_SACKED_ACKED) *reord = min(fack_count, *reord); } @@ -1310,10 +1309,6 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct tcp_sock *tp, if (after(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) tp->highest_sack = skb; - - } else { - if (dup_sack && (sacked & TCPCB_RETRANS)) - *reord = min(fack_count, *reord); } /* D-SACK. We can detect redundant retransmission in S|R and plain R -- cgit v1.2.3-70-g09d2 From c3a05c6050a339c92e49fae0ba77dbba0d41fd99 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Sun, 2 Dec 2007 00:47:59 +0200 Subject: [TCP]: Cong.ctrl modules: remove unused good_ack from cong_avoid MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilpo Järvinen Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/tcp.h | 4 ++-- net/ipv4/tcp_bic.c | 3 +-- net/ipv4/tcp_cong.c | 2 +- net/ipv4/tcp_cubic.c | 3 +-- net/ipv4/tcp_highspeed.c | 3 +-- net/ipv4/tcp_htcp.c | 3 +-- net/ipv4/tcp_hybla.c | 5 ++--- net/ipv4/tcp_illinois.c | 3 +-- net/ipv4/tcp_input.c | 9 ++++----- net/ipv4/tcp_lp.c | 4 ++-- net/ipv4/tcp_scalable.c | 3 +-- net/ipv4/tcp_vegas.c | 7 +++---- net/ipv4/tcp_veno.c | 7 +++---- net/ipv4/tcp_yeah.c | 3 +-- 14 files changed, 24 insertions(+), 35 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/include/net/tcp.h b/include/net/tcp.h index 7e583261f3a..cdd0050b97a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -652,7 +652,7 @@ struct tcp_congestion_ops { /* lower bound for congestion window (optional) */ u32 (*min_cwnd)(const struct sock *sk); /* do new cwnd calculation (required) */ - void (*cong_avoid)(struct sock *sk, u32 ack, u32 in_flight, int good_ack); + void (*cong_avoid)(struct sock *sk, u32 ack, u32 in_flight); /* call before changing ca_state (optional) */ void (*set_state)(struct sock *sk, u8 new_state); /* call when cwnd event occurs (optional) */ @@ -683,7 +683,7 @@ extern void tcp_slow_start(struct tcp_sock *tp); extern struct tcp_congestion_ops tcp_init_congestion_ops; extern u32 tcp_reno_ssthresh(struct sock *sk); -extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int flag); +extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight); extern u32 tcp_reno_min_cwnd(const struct sock *sk); extern struct tcp_congestion_ops tcp_reno; diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 5dba0fc8f57..5212ed9b0c9 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -136,8 +136,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) ca->cnt = 1; } -static void bictcp_cong_avoid(struct sock *sk, u32 ack, - u32 in_flight, int data_acked) +static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) { struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 55fca1820c3..4451750b478 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -324,7 +324,7 @@ EXPORT_SYMBOL_GPL(tcp_slow_start); /* This is Jacobson's slow start and congestion avoidance. * SIGCOMM '88, p. 328. */ -void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int flag) +void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) { struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 80bd084a9f9..3aa0b23c1ea 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -246,8 +246,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) ca->cnt = 1; } -static void bictcp_cong_avoid(struct sock *sk, u32 ack, - u32 in_flight, int data_acked) +static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) { struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index 14a073d8b60..8b6caaf75bb 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c @@ -109,8 +109,7 @@ static void hstcp_init(struct sock *sk) tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128); } -static void hstcp_cong_avoid(struct sock *sk, u32 adk, - u32 in_flight, int data_acked) +static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 in_flight) { struct tcp_sock *tp = tcp_sk(sk); struct hstcp *ca = inet_csk_ca(sk); diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 5215691f276..af99776146f 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -225,8 +225,7 @@ static u32 htcp_recalc_ssthresh(struct sock *sk) return max((tp->snd_cwnd * ca->beta) >> 7, 2U); } -static void htcp_cong_avoid(struct sock *sk, u32 ack, - u32 in_flight, int data_acked) +static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) { struct tcp_sock *tp = tcp_sk(sk); struct htcp *ca = inet_csk_ca(sk); diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index b3e55cf5617..44618b67591 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -85,8 +85,7 @@ static inline u32 hybla_fraction(u32 odds) * o Give cwnd a new value based on the model proposed * o remember increments <1 */ -static void hybla_cong_avoid(struct sock *sk, u32 ack, - u32 in_flight, int flag) +static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) { struct tcp_sock *tp = tcp_sk(sk); struct hybla *ca = inet_csk_ca(sk); @@ -103,7 +102,7 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, return; if (!ca->hybla_en) - return tcp_reno_cong_avoid(sk, ack, in_flight, flag); + return tcp_reno_cong_avoid(sk, ack, in_flight); if (ca->rho == 0) hybla_recalc_param(sk); diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index 5aa5f5496d6..1eba160b72d 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -256,8 +256,7 @@ static void tcp_illinois_state(struct sock *sk, u8 new_state) /* * Increase window in response to successful acknowledgment. */ -static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, - u32 in_flight, int flag) +static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) { struct tcp_sock *tp = tcp_sk(sk); struct illinois *ca = inet_csk_ca(sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d313dea361a..cb441188870 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2710,11 +2710,10 @@ static inline void tcp_ack_update_rtt(struct sock *sk, const int flag, tcp_ack_no_tstamp(sk, seq_rtt, flag); } -static void tcp_cong_avoid(struct sock *sk, u32 ack, - u32 in_flight, int good) +static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) { const struct inet_connection_sock *icsk = inet_csk(sk); - icsk->icsk_ca_ops->cong_avoid(sk, ack, in_flight, good); + icsk->icsk_ca_ops->cong_avoid(sk, ack, in_flight); tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp; } @@ -3238,11 +3237,11 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) /* Advance CWND, if state allows this. */ if ((flag & FLAG_DATA_ACKED) && !frto_cwnd && tcp_may_raise_cwnd(sk, flag)) - tcp_cong_avoid(sk, ack, prior_in_flight, 0); + tcp_cong_avoid(sk, ack, prior_in_flight); tcp_fastretrans_alert(sk, prior_packets - tp->packets_out, flag); } else { if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) - tcp_cong_avoid(sk, ack, prior_in_flight, 1); + tcp_cong_avoid(sk, ack, prior_in_flight); } if ((flag & FLAG_FORWARD_PROGRESS) || !(flag&FLAG_NOT_DUP)) diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index e7f5ef92cbd..ce3c41ff50b 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -115,12 +115,12 @@ static void tcp_lp_init(struct sock *sk) * Will only call newReno CA when away from inference. * From TCP-LP's paper, this will be handled in additive increasement. */ -static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight, int flag) +static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) { struct lp *lp = inet_csk_ca(sk); if (!(lp->flag & LP_WITHIN_INF)) - tcp_reno_cong_avoid(sk, ack, in_flight, flag); + tcp_reno_cong_avoid(sk, ack, in_flight); } /** diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index be27a33a1c6..2747ec7bfb6 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c @@ -15,8 +15,7 @@ #define TCP_SCALABLE_AI_CNT 50U #define TCP_SCALABLE_MD_SCALE 3 -static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, - u32 in_flight, int flag) +static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) { struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 007304e9984..be24d6ee34b 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -162,14 +162,13 @@ void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event) } EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event); -static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, - u32 in_flight, int flag) +static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) { struct tcp_sock *tp = tcp_sk(sk); struct vegas *vegas = inet_csk_ca(sk); if (!vegas->doing_vegas_now) - return tcp_reno_cong_avoid(sk, ack, in_flight, flag); + return tcp_reno_cong_avoid(sk, ack, in_flight); /* The key players are v_beg_snd_una and v_beg_snd_nxt. * @@ -228,7 +227,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, /* We don't have enough RTT samples to do the Vegas * calculation, so we'll behave like Reno. */ - tcp_reno_cong_avoid(sk, ack, in_flight, flag); + tcp_reno_cong_avoid(sk, ack, in_flight); } else { u32 rtt, target_cwnd, diff; diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 8fb2aee0b1a..d16689e9851 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -114,14 +114,13 @@ static void tcp_veno_cwnd_event(struct sock *sk, enum tcp_ca_event event) tcp_veno_init(sk); } -static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, - u32 in_flight, int flag) +static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) { struct tcp_sock *tp = tcp_sk(sk); struct veno *veno = inet_csk_ca(sk); if (!veno->doing_veno_now) - return tcp_reno_cong_avoid(sk, ack, in_flight, flag); + return tcp_reno_cong_avoid(sk, ack, in_flight); /* limited by applications */ if (!tcp_is_cwnd_limited(sk, in_flight)) @@ -132,7 +131,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, /* We don't have enough rtt samples to do the Veno * calculation, so we'll behave like Reno. */ - tcp_reno_cong_avoid(sk, ack, in_flight, flag); + tcp_reno_cong_avoid(sk, ack, in_flight); } else { u32 rtt, target_cwnd; diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index c107fba7430..e03b10183a8 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -69,8 +69,7 @@ static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, s32 rtt_us) tcp_vegas_pkts_acked(sk, pkts_acked, rtt_us); } -static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, - u32 in_flight, int flag) +static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) { struct tcp_sock *tp = tcp_sk(sk); struct yeah *yeah = inet_csk_ca(sk); -- cgit v1.2.3-70-g09d2 From 89d478f7f2160780be9d7c9c6bac9bd350d9ed96 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Sun, 30 Dec 2007 04:35:27 -0800 Subject: [TCP]: Remove duplicated code block from clean_rtx_queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilpo Järvinen Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 50 +++++++++++++++++++++----------------------------- 1 file changed, 21 insertions(+), 29 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index cb441188870..6bc594a63c9 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2801,42 +2801,34 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p, tcp_mtup_probe_success(sk, skb); } - if (sacked) { - if (sacked & TCPCB_RETRANS) { - if (sacked & TCPCB_SACKED_RETRANS) - tp->retrans_out -= packets_acked; - flag |= FLAG_RETRANS_DATA_ACKED; - ca_seq_rtt = -1; - seq_rtt = -1; - if ((flag & FLAG_DATA_ACKED) || - (packets_acked > 1)) - flag |= FLAG_NONHEAD_RETRANS_ACKED; - } else { - ca_seq_rtt = now - scb->when; - last_ackt = skb->tstamp; - if (seq_rtt < 0) { - seq_rtt = ca_seq_rtt; - } - if (!(sacked & TCPCB_SACKED_ACKED)) - reord = min(cnt, reord); - } - - if (sacked & TCPCB_SACKED_ACKED) - tp->sacked_out -= packets_acked; - if (sacked & TCPCB_LOST) - tp->lost_out -= packets_acked; - - if ((sacked & TCPCB_URG) && tp->urg_mode && - !before(end_seq, tp->snd_up)) - tp->urg_mode = 0; + if (sacked & TCPCB_RETRANS) { + if (sacked & TCPCB_SACKED_RETRANS) + tp->retrans_out -= packets_acked; + flag |= FLAG_RETRANS_DATA_ACKED; + ca_seq_rtt = -1; + seq_rtt = -1; + if ((flag & FLAG_DATA_ACKED) || + (packets_acked > 1)) + flag |= FLAG_NONHEAD_RETRANS_ACKED; } else { ca_seq_rtt = now - scb->when; last_ackt = skb->tstamp; if (seq_rtt < 0) { seq_rtt = ca_seq_rtt; } - reord = min(cnt, reord); + if (!(sacked & TCPCB_SACKED_ACKED)) + reord = min(cnt, reord); } + + if (sacked & TCPCB_SACKED_ACKED) + tp->sacked_out -= packets_acked; + if (sacked & TCPCB_LOST) + tp->lost_out -= packets_acked; + + if ((sacked & TCPCB_URG) && tp->urg_mode && + !before(end_seq, tp->snd_up)) + tp->urg_mode = 0; + tp->packets_out -= packets_acked; cnt += packets_acked; -- cgit v1.2.3-70-g09d2 From ea60658cde9f4df7d05b95c81a72a95ad07f0701 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Sun, 2 Dec 2007 00:48:04 +0200 Subject: [TCP]: Add unlikely() to urgent handling in clean_rtx_queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilpo Järvinen Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6bc594a63c9..8e4d74ee31e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2825,8 +2825,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p, if (sacked & TCPCB_LOST) tp->lost_out -= packets_acked; - if ((sacked & TCPCB_URG) && tp->urg_mode && - !before(end_seq, tp->snd_up)) + if (unlikely((sacked & TCPCB_URG) && tp->urg_mode && + !before(end_seq, tp->snd_up))) tp->urg_mode = 0; tp->packets_out -= packets_acked; -- cgit v1.2.3-70-g09d2 From 7201883599ac8bff76300117155e299b1a54092f Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Sun, 30 Dec 2007 04:37:55 -0800 Subject: [TCP]: Cleanup local variables of clean_rtx_queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilpo Järvinen Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8e4d74ee31e..263c536def5 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2765,8 +2765,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p, u32 now = tcp_time_stamp; int fully_acked = 1; int flag = 0; - int prior_packets = tp->packets_out; - u32 cnt = 0; + u32 pkts_acked = 0; u32 reord = tp->packets_out; s32 seq_rtt = -1; s32 ca_seq_rtt = -1; @@ -2775,7 +2774,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p, while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { struct tcp_skb_cb *scb = TCP_SKB_CB(skb); u32 end_seq; - u32 packets_acked; + u32 acked_pcount; u8 sacked = scb->sacked; /* Determine how many packets and what bytes were acked, tso and else */ @@ -2784,14 +2783,14 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p, !after(tp->snd_una, scb->seq)) break; - packets_acked = tcp_tso_acked(sk, skb); - if (!packets_acked) + acked_pcount = tcp_tso_acked(sk, skb); + if (!acked_pcount) break; fully_acked = 0; end_seq = tp->snd_una; } else { - packets_acked = tcp_skb_pcount(skb); + acked_pcount = tcp_skb_pcount(skb); end_seq = scb->end_seq; } @@ -2803,12 +2802,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p, if (sacked & TCPCB_RETRANS) { if (sacked & TCPCB_SACKED_RETRANS) - tp->retrans_out -= packets_acked; + tp->retrans_out -= acked_pcount; flag |= FLAG_RETRANS_DATA_ACKED; ca_seq_rtt = -1; seq_rtt = -1; if ((flag & FLAG_DATA_ACKED) || - (packets_acked > 1)) + (acked_pcount > 1)) flag |= FLAG_NONHEAD_RETRANS_ACKED; } else { ca_seq_rtt = now - scb->when; @@ -2817,20 +2816,20 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p, seq_rtt = ca_seq_rtt; } if (!(sacked & TCPCB_SACKED_ACKED)) - reord = min(cnt, reord); + reord = min(pkts_acked, reord); } if (sacked & TCPCB_SACKED_ACKED) - tp->sacked_out -= packets_acked; + tp->sacked_out -= acked_pcount; if (sacked & TCPCB_LOST) - tp->lost_out -= packets_acked; + tp->lost_out -= acked_pcount; if (unlikely((sacked & TCPCB_URG) && tp->urg_mode && !before(end_seq, tp->snd_up))) tp->urg_mode = 0; - tp->packets_out -= packets_acked; - cnt += packets_acked; + tp->packets_out -= acked_pcount; + pkts_acked += acked_pcount; /* Initial outgoing SYN's get put onto the write_queue * just like anything else we transmit. It is not @@ -2855,7 +2854,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p, } if (flag & FLAG_ACKED) { - u32 pkts_acked = prior_packets - tp->packets_out; const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; -- cgit v1.2.3-70-g09d2 From 6859d49475d4f32abe640372117e4b687906e6b6 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Sun, 2 Dec 2007 00:48:06 +0200 Subject: [TCP]: Abstract tp->highest_sack accessing & point to next skb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pointing to the next skb is necessary to avoid referencing already SACKed skbs which will soon be on a separate list. Signed-off-by: Ilpo Järvinen Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/tcp.h | 35 ++++++++++++++++++++++++++++++++++- net/ipv4/tcp_input.c | 27 +++++++++++++++------------ net/ipv4/tcp_output.c | 11 +++-------- 3 files changed, 52 insertions(+), 21 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/include/net/tcp.h b/include/net/tcp.h index 6e392babda4..5ec1cacca8a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1267,8 +1267,12 @@ static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb __tcp_add_write_queue_tail(sk, skb); /* Queue it, remembering where we must start sending. */ - if (sk->sk_send_head == NULL) + if (sk->sk_send_head == NULL) { sk->sk_send_head = skb; + + if (tcp_sk(sk)->highest_sack == NULL) + tcp_sk(sk)->highest_sack = skb; + } } static inline void __tcp_add_write_queue_head(struct sock *sk, struct sk_buff *skb) @@ -1318,9 +1322,38 @@ static inline u32 tcp_highest_sack_seq(struct tcp_sock *tp) { if (!tp->sacked_out) return tp->snd_una; + + if (tp->highest_sack == NULL) + return tp->snd_nxt; + return TCP_SKB_CB(tp->highest_sack)->seq; } +static inline void tcp_advance_highest_sack(struct sock *sk, struct sk_buff *skb) +{ + tcp_sk(sk)->highest_sack = tcp_skb_is_last(sk, skb) ? NULL : + tcp_write_queue_next(sk, skb); +} + +static inline struct sk_buff *tcp_highest_sack(struct sock *sk) +{ + return tcp_sk(sk)->highest_sack; +} + +static inline void tcp_highest_sack_reset(struct sock *sk) +{ + tcp_sk(sk)->highest_sack = tcp_write_queue_head(sk); +} + +/* Called when old skb is about to be deleted (to be combined with new skb) */ +static inline void tcp_highest_sack_combine(struct sock *sk, + struct sk_buff *old, + struct sk_buff *new) +{ + if (tcp_sk(sk)->sacked_out && (old == tcp_sk(sk)->highest_sack)) + tcp_sk(sk)->highest_sack = new; +} + /* /proc */ enum tcp_seq_states { TCP_SEQ_STATE_LISTENING, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 263c536def5..bc2d5f70966 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1125,7 +1125,7 @@ static void tcp_mark_lost_retrans(struct sock *sk) struct sk_buff *skb; int cnt = 0; u32 new_low_seq = tp->snd_nxt; - u32 received_upto = TCP_SKB_CB(tp->highest_sack)->end_seq; + u32 received_upto = tcp_highest_sack_seq(tp); if (!tcp_is_fack(tp) || !tp->retrans_out || !after(received_upto, tp->lost_retrans_low) || @@ -1236,9 +1236,10 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, return in_sack; } -static int tcp_sacktag_one(struct sk_buff *skb, struct tcp_sock *tp, +static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, int *reord, int dup_sack, int fack_count) { + struct tcp_sock *tp = tcp_sk(sk); u8 sacked = TCP_SKB_CB(skb)->sacked; int flag = 0; @@ -1307,8 +1308,8 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct tcp_sock *tp, if (fack_count > tp->fackets_out) tp->fackets_out = fack_count; - if (after(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) - tp->highest_sack = skb; + if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) + tcp_advance_highest_sack(sk, skb); } /* D-SACK. We can detect redundant retransmission in S|R and plain R @@ -1330,8 +1331,6 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, int dup_sack_in, int *fack_count, int *reord, int *flag) { - struct tcp_sock *tp = tcp_sk(sk); - tcp_for_write_queue_from(skb, sk) { int in_sack = 0; int dup_sack = dup_sack_in; @@ -1358,7 +1357,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, break; if (in_sack) - *flag |= tcp_sacktag_one(skb, tp, reord, dup_sack, *fack_count); + *flag |= tcp_sacktag_one(skb, sk, reord, dup_sack, *fack_count); *fack_count += tcp_skb_pcount(skb); } @@ -1429,7 +1428,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ if (!tp->sacked_out) { if (WARN_ON(tp->fackets_out)) tp->fackets_out = 0; - tp->highest_sack = tcp_write_queue_head(sk); + tcp_highest_sack_reset(sk); } found_dup_sack = tcp_check_dsack(tp, ack_skb, sp_wire, @@ -1552,9 +1551,11 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ &fack_count, &reord, &flag); /* ...tail remains todo... */ - if (TCP_SKB_CB(tp->highest_sack)->end_seq == cache->end_seq) { + if (tcp_highest_sack_seq(tp) == cache->end_seq) { /* ...but better entrypoint exists! */ - skb = tcp_write_queue_next(sk, tp->highest_sack); + skb = tcp_highest_sack(sk); + if (skb == NULL) + break; fack_count = tp->fackets_out; cache++; goto walk; @@ -1566,8 +1567,10 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ continue; } - if (tp->sacked_out && !before(start_seq, tcp_highest_sack_seq(tp))) { - skb = tcp_write_queue_next(sk, tp->highest_sack); + if (!before(start_seq, tcp_highest_sack_seq(tp))) { + skb = tcp_highest_sack(sk); + if (skb == NULL) + break; fack_count = tp->fackets_out; } skb = tcp_sacktag_skip(skb, sk, start_seq); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7d8583a15d0..9a985b55e7d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -667,7 +667,7 @@ static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb, if (!tp->sacked_out || tcp_is_reno(tp)) return; - if (!before(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq)) + if (after(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq)) tp->fackets_out -= decr; } @@ -711,9 +711,6 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq; TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq; - if (tcp_is_sack(tp) && tp->sacked_out && (skb == tp->highest_sack)) - tp->highest_sack = buff; - /* PSH and FIN should only be set in the second packet. */ flags = TCP_SKB_CB(skb)->flags; TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); @@ -1707,9 +1704,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); - if (WARN_ON(tcp_is_sack(tp) && tp->sacked_out && - (next_skb == tp->highest_sack))) - return; + tcp_highest_sack_combine(sk, next_skb, skb); /* Ok. We will be able to collapse the packet. */ tcp_unlink_write_queue(next_skb, sk); @@ -2019,7 +2014,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) break; tp->forward_skb_hint = skb; - if (after(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) + if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) break; if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) -- cgit v1.2.3-70-g09d2 From dfd4f0ae2e111e2b93c295938c0e64ebbb69ae6e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 21 Dec 2007 06:07:53 -0800 Subject: [TCP]: Avoid two divides in __tcp_grow_window() tcp_win_from_space() being signed, compiler might emit an integer divide to compute tcp_win_from_space()/2 . Using right shifts is OK here and less expensive. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bc2d5f70966..519bd24e0d3 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -289,8 +289,8 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); /* Optimize this! */ - int truesize = tcp_win_from_space(skb->truesize)/2; - int window = tcp_win_from_space(sysctl_tcp_rmem[2])/2; + int truesize = tcp_win_from_space(skb->truesize) >> 1; + int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1; while (tp->rcv_ssthresh <= window) { if (truesize <= skb->len) -- cgit v1.2.3-70-g09d2 From c776ee01bd83d9ba58220293a6e76e9772be0e66 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Mon, 24 Dec 2007 21:55:39 -0800 Subject: [TCP]: Remove seq_rtt ptr from clean_rtx_queue args MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While checking Gavin's patch I noticed that the returned seq_rtt is not used by the caller. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 519bd24e0d3..efea9873208 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2759,8 +2759,7 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) * is before the ack sequence we can discard it as it's confirmed to have * arrived at the other end. */ -static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p, - int prior_fackets) +static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) { struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); @@ -2915,7 +2914,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p, } } #endif - *seq_rtt_p = seq_rtt; return flag; } @@ -3152,7 +3150,6 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) u32 ack = TCP_SKB_CB(skb)->ack_seq; u32 prior_in_flight; u32 prior_fackets; - s32 seq_rtt; int prior_packets; int frto_cwnd = 0; @@ -3218,7 +3215,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) goto no_queue; /* See if we can take anything off of the retransmit queue. */ - flag |= tcp_clean_rtx_queue(sk, &seq_rtt, prior_fackets); + flag |= tcp_clean_rtx_queue(sk, prior_fackets); if (tp->frto_counter) frto_cwnd = tcp_process_frto(sk, flag); -- cgit v1.2.3-70-g09d2 From 3ab224be6d69de912ee21302745ea45a99274dbc Mon Sep 17 00:00:00 2001 From: Hideo Aoki Date: Mon, 31 Dec 2007 00:11:19 -0800 Subject: [NET] CORE: Introducing new memory accounting interface. This patch introduces new memory accounting functions for each network protocol. Most of them are renamed from memory accounting functions for stream protocols. At the same time, some stream memory accounting functions are removed since other functions do same thing. Renaming: sk_stream_free_skb() -> sk_wmem_free_skb() __sk_stream_mem_reclaim() -> __sk_mem_reclaim() sk_stream_mem_reclaim() -> sk_mem_reclaim() sk_stream_mem_schedule -> __sk_mem_schedule() sk_stream_pages() -> sk_mem_pages() sk_stream_rmem_schedule() -> sk_rmem_schedule() sk_stream_wmem_schedule() -> sk_wmem_schedule() sk_charge_skb() -> sk_mem_charge() Removeing sk_stream_rfree(): consolidates into sock_rfree() sk_stream_set_owner_r(): consolidates into skb_set_owner_r() sk_stream_mem_schedule() The following functions are added. sk_has_account(): check if the protocol supports accounting sk_mem_uncharge(): do the opposite of sk_mem_charge() In addition, to achieve consolidation, updating sk_wmem_queued is removed from sk_mem_charge(). Next, to consolidate memory accounting functions, this patch adds memory accounting calls to network core functions. Moreover, present memory accounting call is renamed to new accounting call. Finally we replace present memory accounting calls with new interface in TCP and SCTP. Signed-off-by: Takahiro Yasui Signed-off-by: Hideo Aoki Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 3 +- include/net/sock.h | 98 ++++++++++++++++++++++++++------------------- include/net/tcp.h | 4 +- net/core/datagram.c | 2 + net/core/sock.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++++ net/core/stream.c | 84 +------------------------------------- net/ipv4/tcp.c | 23 ++++++----- net/ipv4/tcp_input.c | 26 ++++++------ net/ipv4/tcp_output.c | 26 +++++++----- net/ipv4/tcp_timer.c | 8 ++-- net/sctp/protocol.c | 2 +- net/sctp/sm_statefuns.c | 2 +- net/sctp/socket.c | 11 ++--- net/sctp/ulpevent.c | 2 +- net/sctp/ulpqueue.c | 2 +- 15 files changed, 222 insertions(+), 175 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 1b81ede7c2b..4977b0a8153 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -463,8 +463,7 @@ static inline void sctp_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) skb->destructor = sctp_sock_rfree; atomic_add(event->rmem_len, &sk->sk_rmem_alloc); /* - * This mimics the behavior of - * sk_stream_set_owner_r + * This mimics the behavior of skb_set_owner_r */ sk->sk_forward_alloc -= event->rmem_len; } diff --git a/include/net/sock.h b/include/net/sock.h index d27ba6fdd03..3d938f6c672 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -460,25 +460,6 @@ static inline int sk_stream_memory_free(struct sock *sk) return sk->sk_wmem_queued < sk->sk_sndbuf; } -extern void sk_stream_rfree(struct sk_buff *skb); - -static inline void sk_stream_set_owner_r(struct sk_buff *skb, struct sock *sk) -{ - skb->sk = sk; - skb->destructor = sk_stream_rfree; - atomic_add(skb->truesize, &sk->sk_rmem_alloc); - sk->sk_forward_alloc -= skb->truesize; -} - -static inline void sk_stream_free_skb(struct sock *sk, struct sk_buff *skb) -{ - skb_truesize_check(skb); - sock_set_flag(sk, SOCK_QUEUE_SHRUNK); - sk->sk_wmem_queued -= skb->truesize; - sk->sk_forward_alloc += skb->truesize; - __kfree_skb(skb); -} - /* The per-socket spinlock must be held here. */ static inline void sk_add_backlog(struct sock *sk, struct sk_buff *skb) { @@ -576,7 +557,7 @@ struct proto { /* * Pressure flag: try to collapse. * Technical note: it is used by multiple contexts non atomically. - * All the sk_stream_mem_schedule() is of this nature: accounting + * All the __sk_mem_schedule() is of this nature: accounting * is strict, actions are advisory and have some latency. */ int *memory_pressure; @@ -712,33 +693,73 @@ static inline struct inode *SOCK_INODE(struct socket *socket) return &container_of(socket, struct socket_alloc, socket)->vfs_inode; } -extern void __sk_stream_mem_reclaim(struct sock *sk); -extern int sk_stream_mem_schedule(struct sock *sk, int size, int kind); +/* + * Functions for memory accounting + */ +extern int __sk_mem_schedule(struct sock *sk, int size, int kind); +extern void __sk_mem_reclaim(struct sock *sk); -#define SK_STREAM_MEM_QUANTUM ((int)PAGE_SIZE) -#define SK_STREAM_MEM_QUANTUM_SHIFT ilog2(SK_STREAM_MEM_QUANTUM) +#define SK_MEM_QUANTUM ((int)PAGE_SIZE) +#define SK_MEM_QUANTUM_SHIFT ilog2(SK_MEM_QUANTUM) +#define SK_MEM_SEND 0 +#define SK_MEM_RECV 1 -static inline int sk_stream_pages(int amt) +static inline int sk_mem_pages(int amt) { - return (amt + SK_STREAM_MEM_QUANTUM - 1) >> SK_STREAM_MEM_QUANTUM_SHIFT; + return (amt + SK_MEM_QUANTUM - 1) >> SK_MEM_QUANTUM_SHIFT; } -static inline void sk_stream_mem_reclaim(struct sock *sk) +static inline int sk_has_account(struct sock *sk) { - if (sk->sk_forward_alloc >= SK_STREAM_MEM_QUANTUM) - __sk_stream_mem_reclaim(sk); + /* return true if protocol supports memory accounting */ + return !!sk->sk_prot->memory_allocated; } -static inline int sk_stream_rmem_schedule(struct sock *sk, struct sk_buff *skb) +static inline int sk_wmem_schedule(struct sock *sk, int size) { - return (int)skb->truesize <= sk->sk_forward_alloc || - sk_stream_mem_schedule(sk, skb->truesize, 1); + if (!sk_has_account(sk)) + return 1; + return size <= sk->sk_forward_alloc || + __sk_mem_schedule(sk, size, SK_MEM_SEND); } -static inline int sk_stream_wmem_schedule(struct sock *sk, int size) +static inline int sk_rmem_schedule(struct sock *sk, int size) { + if (!sk_has_account(sk)) + return 1; return size <= sk->sk_forward_alloc || - sk_stream_mem_schedule(sk, size, 0); + __sk_mem_schedule(sk, size, SK_MEM_RECV); +} + +static inline void sk_mem_reclaim(struct sock *sk) +{ + if (!sk_has_account(sk)) + return; + if (sk->sk_forward_alloc >= SK_MEM_QUANTUM) + __sk_mem_reclaim(sk); +} + +static inline void sk_mem_charge(struct sock *sk, int size) +{ + if (!sk_has_account(sk)) + return; + sk->sk_forward_alloc -= size; +} + +static inline void sk_mem_uncharge(struct sock *sk, int size) +{ + if (!sk_has_account(sk)) + return; + sk->sk_forward_alloc += size; +} + +static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) +{ + skb_truesize_check(skb); + sock_set_flag(sk, SOCK_QUEUE_SHRUNK); + sk->sk_wmem_queued -= skb->truesize; + sk_mem_uncharge(sk, skb->truesize); + __kfree_skb(skb); } /* Used by processes to "lock" a socket state, so that @@ -1076,12 +1097,6 @@ static inline int sk_can_gso(const struct sock *sk) extern void sk_setup_caps(struct sock *sk, struct dst_entry *dst); -static inline void sk_charge_skb(struct sock *sk, struct sk_buff *skb) -{ - sk->sk_wmem_queued += skb->truesize; - sk->sk_forward_alloc -= skb->truesize; -} - static inline int skb_copy_to_page(struct sock *sk, char __user *from, struct sk_buff *skb, struct page *page, int off, int copy) @@ -1101,7 +1116,7 @@ static inline int skb_copy_to_page(struct sock *sk, char __user *from, skb->data_len += copy; skb->truesize += copy; sk->sk_wmem_queued += copy; - sk->sk_forward_alloc -= copy; + sk_mem_charge(sk, copy); return 0; } @@ -1127,6 +1142,7 @@ static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk) skb->sk = sk; skb->destructor = sock_rfree; atomic_add(skb->truesize, &sk->sk_rmem_alloc); + sk_mem_charge(sk, skb->truesize); } extern void sk_reset_timer(struct sock *sk, struct timer_list* timer, diff --git a/include/net/tcp.h b/include/net/tcp.h index 13ebe11a0af..76286e80205 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1196,8 +1196,8 @@ static inline void tcp_write_queue_purge(struct sock *sk) struct sk_buff *skb; while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) - sk_stream_free_skb(sk, skb); - sk_stream_mem_reclaim(sk); + sk_wmem_free_skb(sk, skb); + sk_mem_reclaim(sk); } static inline struct sk_buff *tcp_write_queue_head(struct sock *sk) diff --git a/net/core/datagram.c b/net/core/datagram.c index 2d733131d7c..8a28fc93b72 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -209,6 +209,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, void skb_free_datagram(struct sock *sk, struct sk_buff *skb) { kfree_skb(skb); + sk_mem_reclaim(sk); } /** @@ -248,6 +249,7 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) } kfree_skb(skb); + sk_mem_reclaim(sk); return err; } diff --git a/net/core/sock.c b/net/core/sock.c index 118214047ed..8c184c4a381 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -282,6 +282,11 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if (err) goto out; + if (!sk_rmem_schedule(sk, skb->truesize)) { + err = -ENOBUFS; + goto out; + } + skb->dev = NULL; skb_set_owner_r(skb, sk); @@ -1107,7 +1112,9 @@ void sock_rfree(struct sk_buff *skb) { struct sock *sk = skb->sk; + skb_truesize_check(skb); atomic_sub(skb->truesize, &sk->sk_rmem_alloc); + sk_mem_uncharge(skb->sk, skb->truesize); } @@ -1384,6 +1391,103 @@ int sk_wait_data(struct sock *sk, long *timeo) EXPORT_SYMBOL(sk_wait_data); +/** + * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated + * @sk: socket + * @size: memory size to allocate + * @kind: allocation type + * + * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means + * rmem allocation. This function assumes that protocols which have + * memory_pressure use sk_wmem_queued as write buffer accounting. + */ +int __sk_mem_schedule(struct sock *sk, int size, int kind) +{ + struct proto *prot = sk->sk_prot; + int amt = sk_mem_pages(size); + int allocated; + + sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; + allocated = atomic_add_return(amt, prot->memory_allocated); + + /* Under limit. */ + if (allocated <= prot->sysctl_mem[0]) { + if (prot->memory_pressure && *prot->memory_pressure) + *prot->memory_pressure = 0; + return 1; + } + + /* Under pressure. */ + if (allocated > prot->sysctl_mem[1]) + if (prot->enter_memory_pressure) + prot->enter_memory_pressure(); + + /* Over hard limit. */ + if (allocated > prot->sysctl_mem[2]) + goto suppress_allocation; + + /* guarantee minimum buffer size under pressure */ + if (kind == SK_MEM_RECV) { + if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0]) + return 1; + } else { /* SK_MEM_SEND */ + if (sk->sk_type == SOCK_STREAM) { + if (sk->sk_wmem_queued < prot->sysctl_wmem[0]) + return 1; + } else if (atomic_read(&sk->sk_wmem_alloc) < + prot->sysctl_wmem[0]) + return 1; + } + + if (prot->memory_pressure) { + if (!*prot->memory_pressure || + prot->sysctl_mem[2] > atomic_read(prot->sockets_allocated) * + sk_mem_pages(sk->sk_wmem_queued + + atomic_read(&sk->sk_rmem_alloc) + + sk->sk_forward_alloc)) + return 1; + } + +suppress_allocation: + + if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) { + sk_stream_moderate_sndbuf(sk); + + /* Fail only if socket is _under_ its sndbuf. + * In this case we cannot block, so that we have to fail. + */ + if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) + return 1; + } + + /* Alas. Undo changes. */ + sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM; + atomic_sub(amt, prot->memory_allocated); + return 0; +} + +EXPORT_SYMBOL(__sk_mem_schedule); + +/** + * __sk_reclaim - reclaim memory_allocated + * @sk: socket + */ +void __sk_mem_reclaim(struct sock *sk) +{ + struct proto *prot = sk->sk_prot; + + atomic_sub(sk->sk_forward_alloc / SK_MEM_QUANTUM, + prot->memory_allocated); + sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1; + + if (prot->memory_pressure && *prot->memory_pressure && + (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0])) + *prot->memory_pressure = 0; +} + +EXPORT_SYMBOL(__sk_mem_reclaim); + + /* * Set of default routines for initialising struct proto_ops when * the protocol does not support a particular function. In certain diff --git a/net/core/stream.c b/net/core/stream.c index bf188ffdbdb..4a0ad152c9c 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -172,17 +172,6 @@ do_interrupted: EXPORT_SYMBOL(sk_stream_wait_memory); -void sk_stream_rfree(struct sk_buff *skb) -{ - struct sock *sk = skb->sk; - - skb_truesize_check(skb); - atomic_sub(skb->truesize, &sk->sk_rmem_alloc); - sk->sk_forward_alloc += skb->truesize; -} - -EXPORT_SYMBOL(sk_stream_rfree); - int sk_stream_error(struct sock *sk, int flags, int err) { if (err == -EPIPE) @@ -194,77 +183,6 @@ int sk_stream_error(struct sock *sk, int flags, int err) EXPORT_SYMBOL(sk_stream_error); -void __sk_stream_mem_reclaim(struct sock *sk) -{ - atomic_sub(sk->sk_forward_alloc >> SK_STREAM_MEM_QUANTUM_SHIFT, - sk->sk_prot->memory_allocated); - sk->sk_forward_alloc &= SK_STREAM_MEM_QUANTUM - 1; - if (*sk->sk_prot->memory_pressure && - (atomic_read(sk->sk_prot->memory_allocated) < - sk->sk_prot->sysctl_mem[0])) - *sk->sk_prot->memory_pressure = 0; -} - -EXPORT_SYMBOL(__sk_stream_mem_reclaim); - -int sk_stream_mem_schedule(struct sock *sk, int size, int kind) -{ - int amt = sk_stream_pages(size); - struct proto *prot = sk->sk_prot; - - sk->sk_forward_alloc += amt * SK_STREAM_MEM_QUANTUM; - atomic_add(amt, prot->memory_allocated); - - /* Under limit. */ - if (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0]) { - if (*prot->memory_pressure) - *prot->memory_pressure = 0; - return 1; - } - - /* Over hard limit. */ - if (atomic_read(prot->memory_allocated) > prot->sysctl_mem[2]) { - prot->enter_memory_pressure(); - goto suppress_allocation; - } - - /* Under pressure. */ - if (atomic_read(prot->memory_allocated) > prot->sysctl_mem[1]) - prot->enter_memory_pressure(); - - if (kind) { - if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0]) - return 1; - } else if (sk->sk_wmem_queued < prot->sysctl_wmem[0]) - return 1; - - if (!*prot->memory_pressure || - prot->sysctl_mem[2] > atomic_read(prot->sockets_allocated) * - sk_stream_pages(sk->sk_wmem_queued + - atomic_read(&sk->sk_rmem_alloc) + - sk->sk_forward_alloc)) - return 1; - -suppress_allocation: - - if (!kind) { - sk_stream_moderate_sndbuf(sk); - - /* Fail only if socket is _under_ its sndbuf. - * In this case we cannot block, so that we have to fail. - */ - if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) - return 1; - } - - /* Alas. Undo changes. */ - sk->sk_forward_alloc -= amt * SK_STREAM_MEM_QUANTUM; - atomic_sub(amt, prot->memory_allocated); - return 0; -} - -EXPORT_SYMBOL(sk_stream_mem_schedule); - void sk_stream_kill_queues(struct sock *sk) { /* First the read buffer. */ @@ -277,7 +195,7 @@ void sk_stream_kill_queues(struct sock *sk) BUG_TRAP(skb_queue_empty(&sk->sk_write_queue)); /* Account for returned memory. */ - sk_stream_mem_reclaim(sk); + sk_mem_reclaim(sk); BUG_TRAP(!sk->sk_wmem_queued); BUG_TRAP(!sk->sk_forward_alloc); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index fdaf965a679..2cbfa6df797 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -308,7 +308,7 @@ struct tcp_splice_state { /* * Pressure flag: try to collapse. * Technical note: it is used by multiple contexts non atomically. - * All the sk_stream_mem_schedule() is of this nature: accounting + * All the __sk_mem_schedule() is of this nature: accounting * is strict, actions are advisory and have some latency. */ int tcp_memory_pressure __read_mostly; @@ -485,7 +485,8 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb) tcb->sacked = 0; skb_header_release(skb); tcp_add_write_queue_tail(sk, skb); - sk_charge_skb(sk, skb); + sk->sk_wmem_queued += skb->truesize; + sk_mem_charge(sk, skb->truesize); if (tp->nonagle & TCP_NAGLE_PUSH) tp->nonagle &= ~TCP_NAGLE_PUSH; } @@ -638,7 +639,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp) skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp); if (skb) { - if (sk_stream_wmem_schedule(sk, skb->truesize)) { + if (sk_wmem_schedule(sk, skb->truesize)) { /* * Make sure that we have exactly size bytes * available to the caller, no more, no less. @@ -707,7 +708,7 @@ new_segment: tcp_mark_push(tp, skb); goto new_segment; } - if (!sk_stream_wmem_schedule(sk, copy)) + if (!sk_wmem_schedule(sk, copy)) goto wait_for_memory; if (can_coalesce) { @@ -721,7 +722,7 @@ new_segment: skb->data_len += copy; skb->truesize += copy; sk->sk_wmem_queued += copy; - sk->sk_forward_alloc -= copy; + sk_mem_charge(sk, copy); skb->ip_summed = CHECKSUM_PARTIAL; tp->write_seq += copy; TCP_SKB_CB(skb)->end_seq += copy; @@ -928,7 +929,7 @@ new_segment: if (copy > PAGE_SIZE - off) copy = PAGE_SIZE - off; - if (!sk_stream_wmem_schedule(sk, copy)) + if (!sk_wmem_schedule(sk, copy)) goto wait_for_memory; if (!page) { @@ -1019,7 +1020,7 @@ do_fault: * reset, where we can be unlinking the send_head. */ tcp_check_send_head(sk, skb); - sk_stream_free_skb(sk, skb); + sk_wmem_free_skb(sk, skb); } do_error: @@ -1738,7 +1739,7 @@ void tcp_close(struct sock *sk, long timeout) __kfree_skb(skb); } - sk_stream_mem_reclaim(sk); + sk_mem_reclaim(sk); /* As outlined in RFC 2525, section 2.17, we send a RST here because * data was lost. To witness the awful effects of the old behavior of @@ -1841,7 +1842,7 @@ adjudge_to_death: } } if (sk->sk_state != TCP_CLOSE) { - sk_stream_mem_reclaim(sk); + sk_mem_reclaim(sk); if (tcp_too_many_orphans(sk, atomic_read(sk->sk_prot->orphan_count))) { if (net_ratelimit()) @@ -2658,11 +2659,11 @@ void __init tcp_init(void) limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7); max_share = min(4UL*1024*1024, limit); - sysctl_tcp_wmem[0] = SK_STREAM_MEM_QUANTUM; + sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; sysctl_tcp_wmem[1] = 16*1024; sysctl_tcp_wmem[2] = max(64*1024, max_share); - sysctl_tcp_rmem[0] = SK_STREAM_MEM_QUANTUM; + sysctl_tcp_rmem[0] = SK_MEM_QUANTUM; sysctl_tcp_rmem[1] = 87380; sysctl_tcp_rmem[2] = max(87380, max_share); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index efea9873208..722c9cbb91e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -591,7 +591,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) * restart window, so that we send ACKs quickly. */ tcp_incr_quickack(sk); - sk_stream_mem_reclaim(sk); + sk_mem_reclaim(sk); } } icsk->icsk_ack.lrcvtime = now; @@ -2851,7 +2851,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) break; tcp_unlink_write_queue(skb, sk); - sk_stream_free_skb(sk, skb); + sk_wmem_free_skb(sk, skb); tcp_clear_all_retrans_hints(tp); } @@ -3567,7 +3567,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th) __skb_queue_purge(&tp->out_of_order_queue); if (tcp_is_sack(tp)) tcp_sack_reset(&tp->rx_opt); - sk_stream_mem_reclaim(sk); + sk_mem_reclaim(sk); if (!sock_flag(sk, SOCK_DEAD)) { sk->sk_state_change(sk); @@ -3850,12 +3850,12 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) queue_and_out: if (eaten < 0 && (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - !sk_stream_rmem_schedule(sk, skb))) { + !sk_rmem_schedule(sk, skb->truesize))) { if (tcp_prune_queue(sk) < 0 || - !sk_stream_rmem_schedule(sk, skb)) + !sk_rmem_schedule(sk, skb->truesize)) goto drop; } - sk_stream_set_owner_r(skb, sk); + skb_set_owner_r(skb, sk); __skb_queue_tail(&sk->sk_receive_queue, skb); } tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; @@ -3924,9 +3924,9 @@ drop: TCP_ECN_check_ce(tp, skb); if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - !sk_stream_rmem_schedule(sk, skb)) { + !sk_rmem_schedule(sk, skb->truesize)) { if (tcp_prune_queue(sk) < 0 || - !sk_stream_rmem_schedule(sk, skb)) + !sk_rmem_schedule(sk, skb->truesize)) goto drop; } @@ -3937,7 +3937,7 @@ drop: SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n", tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); - sk_stream_set_owner_r(skb, sk); + skb_set_owner_r(skb, sk); if (!skb_peek(&tp->out_of_order_queue)) { /* Initial out of order segment, build 1 SACK. */ @@ -4079,7 +4079,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; __skb_insert(nskb, skb->prev, skb, list); - sk_stream_set_owner_r(nskb, sk); + skb_set_owner_r(nskb, sk); /* Copy data, releasing collapsed skbs. */ while (copy > 0) { @@ -4177,7 +4177,7 @@ static int tcp_prune_queue(struct sock *sk) sk->sk_receive_queue.next, (struct sk_buff*)&sk->sk_receive_queue, tp->copied_seq, tp->rcv_nxt); - sk_stream_mem_reclaim(sk); + sk_mem_reclaim(sk); if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) return 0; @@ -4197,7 +4197,7 @@ static int tcp_prune_queue(struct sock *sk) */ if (tcp_is_sack(tp)) tcp_sack_reset(&tp->rx_opt); - sk_stream_mem_reclaim(sk); + sk_mem_reclaim(sk); } if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) @@ -4699,7 +4699,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, /* Bulk data transfer: receiver */ __skb_pull(skb,tcp_header_len); __skb_queue_tail(&sk->sk_receive_queue, skb); - sk_stream_set_owner_r(skb, sk); + skb_set_owner_r(skb, sk); tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 9058e0a2510..7a4834a2ae8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -637,7 +637,8 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) tp->write_seq = TCP_SKB_CB(skb)->end_seq; skb_header_release(skb); tcp_add_write_queue_tail(sk, skb); - sk_charge_skb(sk, skb); + sk->sk_wmem_queued += skb->truesize; + sk_mem_charge(sk, skb->truesize); } static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) @@ -701,7 +702,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss if (buff == NULL) return -ENOMEM; /* We'll just try again later. */ - sk_charge_skb(sk, buff); + sk->sk_wmem_queued += buff->truesize; + sk_mem_charge(sk, buff->truesize); nlen = skb->len - len - nsize; buff->truesize += nlen; skb->truesize -= nlen; @@ -825,7 +827,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) skb->truesize -= len; sk->sk_wmem_queued -= len; - sk->sk_forward_alloc += len; + sk_mem_uncharge(sk, len); sock_set_flag(sk, SOCK_QUEUE_SHRUNK); /* Any change of skb->len requires recalculation of tso @@ -1197,7 +1199,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, if (unlikely(buff == NULL)) return -ENOMEM; - sk_charge_skb(sk, buff); + sk->sk_wmem_queued += buff->truesize; + sk_mem_charge(sk, buff->truesize); buff->truesize += nlen; skb->truesize -= nlen; @@ -1350,7 +1353,8 @@ static int tcp_mtu_probe(struct sock *sk) /* We're allowed to probe. Build it now. */ if ((nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC)) == NULL) return -1; - sk_charge_skb(sk, nskb); + sk->sk_wmem_queued += nskb->truesize; + sk_mem_charge(sk, nskb->truesize); skb = tcp_send_head(sk); @@ -1377,7 +1381,7 @@ static int tcp_mtu_probe(struct sock *sk) * Throw it away. */ TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags; tcp_unlink_write_queue(skb, sk); - sk_stream_free_skb(sk, skb); + sk_wmem_free_skb(sk, skb); } else { TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); @@ -1744,7 +1748,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m /* changed transmit queue under us so clear hints */ tcp_clear_retrans_hints_partial(tp); - sk_stream_free_skb(sk, next_skb); + sk_wmem_free_skb(sk, next_skb); } } @@ -2139,8 +2143,9 @@ int tcp_send_synack(struct sock *sk) tcp_unlink_write_queue(skb, sk); skb_header_release(nskb); __tcp_add_write_queue_head(sk, nskb); - sk_stream_free_skb(sk, skb); - sk_charge_skb(sk, nskb); + sk_wmem_free_skb(sk, skb); + sk->sk_wmem_queued += nskb->truesize; + sk_mem_charge(sk, nskb->truesize); skb = nskb; } @@ -2343,7 +2348,8 @@ int tcp_connect(struct sock *sk) tp->retrans_stamp = TCP_SKB_CB(buff)->when; skb_header_release(buff); __tcp_add_write_queue_tail(sk, buff); - sk_charge_skb(sk, buff); + sk->sk_wmem_queued += buff->truesize; + sk_mem_charge(sk, buff->truesize); tp->packets_out += tcp_skb_pcount(buff); tcp_transmit_skb(sk, buff, 1, GFP_KERNEL); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index ea85bc00c61..17931be6d58 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -186,7 +186,7 @@ static void tcp_delack_timer(unsigned long data) goto out_unlock; } - sk_stream_mem_reclaim(sk); + sk_mem_reclaim(sk); if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) goto out; @@ -226,7 +226,7 @@ static void tcp_delack_timer(unsigned long data) out: if (tcp_memory_pressure) - sk_stream_mem_reclaim(sk); + sk_mem_reclaim(sk); out_unlock: bh_unlock_sock(sk); sock_put(sk); @@ -420,7 +420,7 @@ static void tcp_write_timer(unsigned long data) TCP_CHECK_TIMER(sk); out: - sk_stream_mem_reclaim(sk); + sk_mem_reclaim(sk); out_unlock: bh_unlock_sock(sk); sock_put(sk); @@ -514,7 +514,7 @@ static void tcp_keepalive_timer (unsigned long data) } TCP_CHECK_TIMER(sk); - sk_stream_mem_reclaim(sk); + sk_mem_reclaim(sk); resched: inet_csk_reset_keepalive_timer (sk, elapsed); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index e466e00b9a9..b9219649502 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1109,7 +1109,7 @@ SCTP_STATIC __init int sctp_init(void) sysctl_sctp_rmem[1] = (1500 *(sizeof(struct sk_buff) + 1)); sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share); - sysctl_sctp_wmem[0] = SK_STREAM_MEM_QUANTUM; + sysctl_sctp_wmem[0] = SK_MEM_QUANTUM; sysctl_sctp_wmem[1] = 16*1024; sysctl_sctp_wmem[2] = max(64*1024, max_share); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 511d8c9a171..b1267519183 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -5844,7 +5844,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, /* * Also try to renege to limit our memory usage in the event that * we are under memory pressure - * If we can't renege, don't worry about it, the sk_stream_rmem_schedule + * If we can't renege, don't worry about it, the sk_rmem_schedule * in sctp_ulpevent_make_rcvmsg will drop the frame if we grow our * memory usage too much */ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 7a8650f01d0..710df67a678 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -174,7 +174,8 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk) sizeof(struct sctp_chunk); atomic_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc); - sk_charge_skb(sk, chunk->skb); + sk->sk_wmem_queued += chunk->skb->truesize; + sk_mem_charge(sk, chunk->skb->truesize); } /* Verify that this is a valid address. */ @@ -6035,10 +6036,10 @@ static void sctp_wfree(struct sk_buff *skb) atomic_sub(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc); /* - * This undoes what is done via sk_charge_skb + * This undoes what is done via sctp_set_owner_w and sk_mem_charge */ sk->sk_wmem_queued -= skb->truesize; - sk->sk_forward_alloc += skb->truesize; + sk_mem_uncharge(sk, skb->truesize); sock_wfree(skb); __sctp_write_space(asoc); @@ -6059,9 +6060,9 @@ void sctp_sock_rfree(struct sk_buff *skb) atomic_sub(event->rmem_len, &sk->sk_rmem_alloc); /* - * Mimic the behavior of sk_stream_rfree + * Mimic the behavior of sock_rfree */ - sk->sk_forward_alloc += event->rmem_len; + sk_mem_uncharge(sk, event->rmem_len); } diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 307314356e1..047c27df98f 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -700,7 +700,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, if (rx_count >= asoc->base.sk->sk_rcvbuf) { if ((asoc->base.sk->sk_userlocks & SOCK_RCVBUF_LOCK) || - (!sk_stream_rmem_schedule(asoc->base.sk, chunk->skb))) + (!sk_rmem_schedule(asoc->base.sk, chunk->skb->truesize))) goto fail; } diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 1733fa29a50..c25caefa3bc 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -1046,7 +1046,7 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, sctp_ulpq_partial_delivery(ulpq, chunk, gfp); } - sk_stream_mem_reclaim(asoc->base.sk); + sk_mem_reclaim(asoc->base.sk); return; } -- cgit v1.2.3-70-g09d2 From 3ccd3130b3f681a4aef6392327256786b3b6aa04 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Mon, 31 Dec 2007 04:43:32 -0800 Subject: [TCP]: Make invariant check complain about invalid sacked_out MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Earlier resolution for NewReno's sacked_out should now keep it small enough for this to become invariant-like check. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 722c9cbb91e..41f4b862319 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2504,11 +2504,8 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) (tcp_fackets_out(tp) > tp->reordering)); int fast_rexmit = 0; - /* Some technical things: - * 1. Reno does not count dupacks (sacked_out) automatically. */ - if (!tp->packets_out) + if (WARN_ON(!tp->packets_out && tp->sacked_out)) tp->sacked_out = 0; - if (WARN_ON(!tp->sacked_out && tp->fackets_out)) tp->fackets_out = 0; -- cgit v1.2.3-70-g09d2 From 90840defabbd819180c7528e12d550776848f833 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Mon, 31 Dec 2007 04:48:41 -0800 Subject: [TCP]: Introduce tcp_wnd_end() to reduce line lengths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/net/tcp.h | 6 ++++++ net/ipv4/tcp_input.c | 3 +-- net/ipv4/tcp_output.c | 20 ++++++++++---------- 3 files changed, 17 insertions(+), 12 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/include/net/tcp.h b/include/net/tcp.h index 76286e80205..6a732d4919f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -785,6 +785,12 @@ static __inline__ __u32 tcp_max_burst(const struct tcp_sock *tp) return 3; } +/* Returns end sequence number of the receiver's advertised window */ +static inline u32 tcp_wnd_end(const struct tcp_sock *tp) +{ + return tp->snd_una + tp->snd_wnd; +} + /* RFC2861 Check whether we are limited by application or congestion window * This is the inverse of cwnd check in tcp_tso_should_defer */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 41f4b862319..366f63ae4d5 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2921,8 +2921,7 @@ static void tcp_ack_probe(struct sock *sk) /* Was it a usable window open? */ - if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, - tp->snd_una + tp->snd_wnd)) { + if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tcp_wnd_end(tp))) { icsk->icsk_backoff = 0; inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0); /* Socket must be waked up by subsequent tcp_data_snd_check(). diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 1ca638b8316..821fae27142 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -89,10 +89,10 @@ static inline __u32 tcp_acceptable_seq(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - if (!before(tp->snd_una+tp->snd_wnd, tp->snd_nxt)) + if (!before(tcp_wnd_end(tp), tp->snd_nxt)) return tp->snd_nxt; else - return tp->snd_una+tp->snd_wnd; + return tcp_wnd_end(tp); } /* Calculate mss to advertise in SYN segment. @@ -1024,7 +1024,7 @@ static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb, struct tcp_sock *tp = tcp_sk(sk); u32 needed, window, cwnd_len; - window = (tp->snd_una + tp->snd_wnd - TCP_SKB_CB(skb)->seq); + window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; cwnd_len = mss_now * cwnd; if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk))) @@ -1134,7 +1134,7 @@ static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb, uns if (skb->len > cur_mss) end_seq = TCP_SKB_CB(skb)->seq + cur_mss; - return !after(end_seq, tp->snd_una + tp->snd_wnd); + return !after(end_seq, tcp_wnd_end(tp)); } /* This checks if the data bearing packet SKB (usually tcp_send_head(sk)) @@ -1251,7 +1251,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) BUG_ON(tcp_skb_pcount(skb) <= 1 || (tp->snd_cwnd <= in_flight)); - send_win = (tp->snd_una + tp->snd_wnd) - TCP_SKB_CB(skb)->seq; + send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; /* From in_flight test above, we know that cwnd > in_flight. */ cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache; @@ -1332,7 +1332,7 @@ static int tcp_mtu_probe(struct sock *sk) if (tp->snd_wnd < size_needed) return -1; - if (after(tp->snd_nxt + size_needed, tp->snd_una + tp->snd_wnd)) + if (after(tp->snd_nxt + size_needed, tcp_wnd_end(tp))) return 0; /* Do we need to wait to drain cwnd? With none in flight, don't stall */ @@ -1687,7 +1687,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m return; /* Next skb is out of window. */ - if (after(TCP_SKB_CB(next_skb)->end_seq, tp->snd_una+tp->snd_wnd)) + if (after(TCP_SKB_CB(next_skb)->end_seq, tcp_wnd_end(tp))) return; /* Punt if not enough space exists in the first SKB for @@ -1831,7 +1831,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) * case, when window is shrunk to zero. In this case * our retransmit serves as a zero window probe. */ - if (!before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd) + if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) && TCP_SKB_CB(skb)->seq != tp->snd_una) return -EAGAIN; @@ -2497,10 +2497,10 @@ int tcp_write_wakeup(struct sock *sk) struct sk_buff *skb; if ((skb = tcp_send_head(sk)) != NULL && - before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)) { + before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) { int err; unsigned int mss = tcp_current_mss(sk, 0); - unsigned int seg_size = tp->snd_una+tp->snd_wnd-TCP_SKB_CB(skb)->seq; + unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq)) tp->pushed_seq = TCP_SKB_CB(skb)->end_seq; -- cgit v1.2.3-70-g09d2 From cadbd0313bc897f5917d013174cdf9077edf4aa5 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Mon, 31 Dec 2007 04:49:21 -0800 Subject: [TCP]: Dropped unnecessary skb/sacked accessing in reneging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SACK reneging can be precalculated to a FLAG in clean_rtx_queue which has the right skb looked up. This will help a bit in future because skb->sacked access will be changed eventually, changing it already won't hurt any. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 366f63ae4d5..7bac1fac065 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -105,6 +105,7 @@ int sysctl_tcp_abc __read_mostly; #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ #define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ #define FLAG_NONHEAD_RETRANS_ACKED 0x1000 /* Non-head rexmitted data was ACKed */ +#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) @@ -1918,18 +1919,15 @@ void tcp_enter_loss(struct sock *sk, int how) tp->frto_counter = 0; } -static int tcp_check_sack_reneging(struct sock *sk) +/* If ACK arrived pointing to a remembered SACK, it means that our + * remembered SACKs do not reflect real state of receiver i.e. + * receiver _host_ is heavily congested (or buggy). + * + * Do processing similar to RTO timeout. + */ +static int tcp_check_sack_reneging(struct sock *sk, int flag) { - struct sk_buff *skb; - - /* If ACK arrived pointing to a remembered SACK, - * it means that our remembered SACKs do not reflect - * real state of receiver i.e. - * receiver _host_ is heavily congested (or buggy). - * Do processing similar to RTO timeout. - */ - if ((skb = tcp_write_queue_head(sk)) != NULL && - (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { + if (flag & FLAG_SACK_RENEGING) { struct inet_connection_sock *icsk = inet_csk(sk); NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING); @@ -2515,7 +2513,7 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) tp->prior_ssthresh = 0; /* B. In all the states check for reneging SACKs. */ - if (tp->sacked_out && tcp_check_sack_reneging(sk)) + if (tcp_check_sack_reneging(sk, flag)) return; /* C. Process data loss notification, provided it is valid. */ @@ -2852,6 +2850,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) tcp_clear_all_retrans_hints(tp); } + if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) + flag |= FLAG_SACK_RENEGING; + if (flag & FLAG_ACKED) { const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; -- cgit v1.2.3-70-g09d2 From 4828e7f49a402930e8b3e72de695c8d37e0f98ee Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Mon, 31 Dec 2007 04:50:19 -0800 Subject: [TCP]: Remove TCPCB_URG & TCPCB_AT_TAIL as unnecessary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The snd_up check should be enough. I suspect this has been there to provide a minor optimization in clean_rtx_queue which used to have a small if (!->sacked) block which could skip snd_up check among the other work. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/net/tcp.h | 4 ---- net/ipv4/tcp.c | 1 - net/ipv4/tcp_input.c | 3 +-- net/ipv4/tcp_output.c | 7 +++---- 4 files changed, 4 insertions(+), 11 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/include/net/tcp.h b/include/net/tcp.h index 6a732d4919f..48081ada92a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -578,10 +578,6 @@ struct tcp_skb_cb { #define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */ #define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS) -#define TCPCB_URG 0x20 /* Urgent pointer advanced here */ - -#define TCPCB_AT_TAIL (TCPCB_URG) - __u16 urg_ptr; /* Valid w/URG flags is set. */ __u32 ack_seq; /* Sequence number ACK'd */ }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2cbfa6df797..34085e3a409 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -497,7 +497,6 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags, if (flags & MSG_OOB) { tp->urg_mode = 1; tp->snd_up = tp->write_seq; - TCP_SKB_CB(skb)->sacked |= TCPCB_URG; } } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 7bac1fac065..1e7fd811366 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2821,8 +2821,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) if (sacked & TCPCB_LOST) tp->lost_out -= acked_pcount; - if (unlikely((sacked & TCPCB_URG) && tp->urg_mode && - !before(end_seq, tp->snd_up))) + if (unlikely(tp->urg_mode && !before(end_seq, tp->snd_up))) tp->urg_mode = 0; tp->packets_out -= acked_pcount; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 821fae27142..cd21528665f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -711,7 +711,6 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); TCP_SKB_CB(buff)->flags = flags; TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; - TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL; if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) { /* Copy and checksum data tail into the new buffer. */ @@ -1726,7 +1725,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m /* All done, get rid of second SKB and account for it so * packet counting does not break. */ - TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked&(TCPCB_EVER_RETRANS|TCPCB_AT_TAIL); + TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS; if (TCP_SKB_CB(next_skb)->sacked&TCPCB_SACKED_RETRANS) tp->retrans_out -= tcp_skb_pcount(next_skb); if (TCP_SKB_CB(next_skb)->sacked&TCPCB_LOST) @@ -2475,7 +2474,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent) skb_reserve(skb, MAX_TCP_HEADER); skb->csum = 0; TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK; - TCP_SKB_CB(skb)->sacked = urgent; + TCP_SKB_CB(skb)->sacked = 0; skb_shinfo(skb)->gso_segs = 1; skb_shinfo(skb)->gso_size = 0; skb_shinfo(skb)->gso_type = 0; @@ -2527,7 +2526,7 @@ int tcp_write_wakeup(struct sock *sk) } else { if (tp->urg_mode && between(tp->snd_up, tp->snd_una+1, tp->snd_una+0xFFFF)) - tcp_xmit_probe_skb(sk, TCPCB_URG); + tcp_xmit_probe_skb(sk, 1); return tcp_xmit_probe_skb(sk, 0); } } -- cgit v1.2.3-70-g09d2 From 056834d9f6f6eaf4cc7268569e53acab957aac27 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Mon, 31 Dec 2007 14:57:14 -0800 Subject: [TCP]: cleanup tcp_{in,out}put.c style MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These were manually selected from indent's results which as is are too noisy to be of any use without human reason. In addition, some extra newlines between function and its comment were removed too. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 436 ++++++++++++++++++++++++++------------------------ net/ipv4/tcp_output.c | 146 ++++++++--------- 2 files changed, 300 insertions(+), 282 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 1e7fd811366..18e099c6fa6 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -121,8 +121,7 @@ int sysctl_tcp_abc __read_mostly; /* Adapt the MSS value used to make delayed ack decision to the * real world. */ -static void tcp_measure_rcv_mss(struct sock *sk, - const struct sk_buff *skb) +static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) { struct inet_connection_sock *icsk = inet_csk(sk); const unsigned int lss = icsk->icsk_ack.last_seg_size; @@ -133,7 +132,7 @@ static void tcp_measure_rcv_mss(struct sock *sk, /* skb->len may jitter because of SACKs, even if peer * sends good full-sized frames. */ - len = skb_shinfo(skb)->gso_size ?: skb->len; + len = skb_shinfo(skb)->gso_size ? : skb->len; if (len >= icsk->icsk_ack.rcv_mss) { icsk->icsk_ack.rcv_mss = len; } else { @@ -173,8 +172,8 @@ static void tcp_incr_quickack(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); unsigned quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss); - if (quickacks==0) - quickacks=2; + if (quickacks == 0) + quickacks = 2; if (quickacks > icsk->icsk_ack.quick) icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS); } @@ -199,7 +198,7 @@ static inline int tcp_in_quickack_mode(const struct sock *sk) static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp) { - if (tp->ecn_flags&TCP_ECN_OK) + if (tp->ecn_flags & TCP_ECN_OK) tp->ecn_flags |= TCP_ECN_QUEUE_CWR; } @@ -216,7 +215,7 @@ static inline void TCP_ECN_withdraw_cwr(struct tcp_sock *tp) static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb) { - if (tp->ecn_flags&TCP_ECN_OK) { + if (tp->ecn_flags & TCP_ECN_OK) { if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags)) tp->ecn_flags |= TCP_ECN_DEMAND_CWR; /* Funny extension: if ECT is not set on a segment, @@ -229,19 +228,19 @@ static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb) static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, struct tcphdr *th) { - if ((tp->ecn_flags&TCP_ECN_OK) && (!th->ece || th->cwr)) + if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr)) tp->ecn_flags &= ~TCP_ECN_OK; } static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, struct tcphdr *th) { - if ((tp->ecn_flags&TCP_ECN_OK) && (!th->ece || !th->cwr)) + if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr)) tp->ecn_flags &= ~TCP_ECN_OK; } static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th) { - if (th->ece && !th->syn && (tp->ecn_flags&TCP_ECN_OK)) + if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK)) return 1; return 0; } @@ -303,8 +302,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb) return 0; } -static void tcp_grow_window(struct sock *sk, - struct sk_buff *skb) +static void tcp_grow_window(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); @@ -318,12 +316,13 @@ static void tcp_grow_window(struct sock *sk, * will fit to rcvbuf in future. */ if (tcp_win_from_space(skb->truesize) <= skb->len) - incr = 2*tp->advmss; + incr = 2 * tp->advmss; else incr = __tcp_grow_window(sk, skb); if (incr) { - tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, tp->window_clamp); + tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, + tp->window_clamp); inet_csk(sk)->icsk_ack.quick |= 1; } } @@ -398,10 +397,9 @@ static void tcp_clamp_window(struct sock *sk) sysctl_tcp_rmem[2]); } if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) - tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss); + tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss); } - /* Initialize RCV_MSS value. * RCV_MSS is an our guess about MSS used by the peer. * We haven't any direct information about the MSS. @@ -414,7 +412,7 @@ void tcp_initialize_rcv_mss(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache); - hint = min(hint, tp->rcv_wnd/2); + hint = min(hint, tp->rcv_wnd / 2); hint = min(hint, TCP_MIN_RCVMSS); hint = max(hint, TCP_MIN_MSS); @@ -471,16 +469,15 @@ static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp) goto new_measure; if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq)) return; - tcp_rcv_rtt_update(tp, - jiffies - tp->rcv_rtt_est.time, - 1); + tcp_rcv_rtt_update(tp, jiffies - tp->rcv_rtt_est.time, 1); new_measure: tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd; tp->rcv_rtt_est.time = tcp_time_stamp; } -static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, const struct sk_buff *skb) +static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, + const struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); if (tp->rx_opt.rcv_tsecr && @@ -503,8 +500,7 @@ void tcp_rcv_space_adjust(struct sock *sk) goto new_measure; time = tcp_time_stamp - tp->rcvq_space.time; - if (time < (tp->rcv_rtt_est.rtt >> 3) || - tp->rcv_rtt_est.rtt == 0) + if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0) return; space = 2 * (tp->copied_seq - tp->rcvq_space.seq); @@ -580,7 +576,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) } else { int m = now - icsk->icsk_ack.lrcvtime; - if (m <= TCP_ATO_MIN/2) { + if (m <= TCP_ATO_MIN / 2) { /* The fastest case is the first. */ icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2; } else if (m < icsk->icsk_ack.ato) { @@ -609,7 +605,7 @@ static u32 tcp_rto_min(struct sock *sk) u32 rto_min = TCP_RTO_MIN; if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) - rto_min = dst->metrics[RTAX_RTO_MIN-1]; + rto_min = dst->metrics[RTAX_RTO_MIN - 1]; return rto_min; } @@ -672,14 +668,14 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) } if (after(tp->snd_una, tp->rtt_seq)) { if (tp->mdev_max < tp->rttvar) - tp->rttvar -= (tp->rttvar-tp->mdev_max)>>2; + tp->rttvar -= (tp->rttvar - tp->mdev_max) >> 2; tp->rtt_seq = tp->snd_nxt; tp->mdev_max = tcp_rto_min(sk); } } else { /* no previous measure. */ - tp->srtt = m<<3; /* take the measured time to be rtt */ - tp->mdev = m<<1; /* make sure rto = 3*rtt */ + tp->srtt = m << 3; /* take the measured time to be rtt */ + tp->mdev = m << 1; /* make sure rto = 3*rtt */ tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk)); tp->rtt_seq = tp->snd_nxt; } @@ -733,7 +729,7 @@ void tcp_update_metrics(struct sock *sk) dst_confirm(dst); - if (dst && (dst->flags&DST_HOST)) { + if (dst && (dst->flags & DST_HOST)) { const struct inet_connection_sock *icsk = inet_csk(sk); int m; @@ -743,7 +739,7 @@ void tcp_update_metrics(struct sock *sk) * Reset our results. */ if (!(dst_metric_locked(dst, RTAX_RTT))) - dst->metrics[RTAX_RTT-1] = 0; + dst->metrics[RTAX_RTT - 1] = 0; return; } @@ -755,9 +751,9 @@ void tcp_update_metrics(struct sock *sk) */ if (!(dst_metric_locked(dst, RTAX_RTT))) { if (m <= 0) - dst->metrics[RTAX_RTT-1] = tp->srtt; + dst->metrics[RTAX_RTT - 1] = tp->srtt; else - dst->metrics[RTAX_RTT-1] -= (m>>3); + dst->metrics[RTAX_RTT - 1] -= (m >> 3); } if (!(dst_metric_locked(dst, RTAX_RTTVAR))) { @@ -770,7 +766,7 @@ void tcp_update_metrics(struct sock *sk) m = tp->mdev; if (m >= dst_metric(dst, RTAX_RTTVAR)) - dst->metrics[RTAX_RTTVAR-1] = m; + dst->metrics[RTAX_RTTVAR - 1] = m; else dst->metrics[RTAX_RTTVAR-1] -= (dst->metrics[RTAX_RTTVAR-1] - m)>>2; @@ -784,7 +780,7 @@ void tcp_update_metrics(struct sock *sk) dst->metrics[RTAX_SSTHRESH-1] = tp->snd_cwnd >> 1; if (!dst_metric_locked(dst, RTAX_CWND) && tp->snd_cwnd > dst_metric(dst, RTAX_CWND)) - dst->metrics[RTAX_CWND-1] = tp->snd_cwnd; + dst->metrics[RTAX_CWND - 1] = tp->snd_cwnd; } else if (tp->snd_cwnd > tp->snd_ssthresh && icsk->icsk_ca_state == TCP_CA_Open) { /* Cong. avoidance phase, cwnd is reliable. */ @@ -1353,12 +1349,14 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, } if (in_sack <= 0) - in_sack = tcp_match_skb_to_sack(sk, skb, start_seq, end_seq); + in_sack = tcp_match_skb_to_sack(sk, skb, start_seq, + end_seq); if (unlikely(in_sack < 0)) break; if (in_sack) - *flag |= tcp_sacktag_one(skb, sk, reord, dup_sack, *fack_count); + *flag |= tcp_sacktag_one(skb, sk, reord, dup_sack, + *fack_count); *fack_count += tcp_skb_pcount(skb); } @@ -1407,7 +1405,8 @@ static int tcp_sack_cache_ok(struct tcp_sock *tp, struct tcp_sack_block *cache) } static int -tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_una) +tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, + u32 prior_snd_una) { const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); @@ -1417,7 +1416,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ struct tcp_sack_block sp[4]; struct tcp_sack_block *cache; struct sk_buff *skb; - int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; + int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE) >> 3; int used_sacks; int reord = tp->packets_out; int flag = 0; @@ -1484,17 +1483,17 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ /* order SACK blocks to allow in order walk of the retrans queue */ for (i = used_sacks - 1; i > 0; i--) { - for (j = 0; j < i; j++){ - if (after(sp[j].start_seq, sp[j+1].start_seq)) { + for (j = 0; j < i; j++) { + if (after(sp[j].start_seq, sp[j + 1].start_seq)) { struct tcp_sack_block tmp; tmp = sp[j]; - sp[j] = sp[j+1]; - sp[j+1] = tmp; + sp[j] = sp[j + 1]; + sp[j + 1] = tmp; /* Track where the first SACK block goes to */ if (j == first_sack_index) - first_sack_index = j+1; + first_sack_index = j + 1; } } } @@ -1539,17 +1538,21 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ /* Head todo? */ if (before(start_seq, cache->start_seq)) { skb = tcp_sacktag_skip(skb, sk, start_seq); - skb = tcp_sacktag_walk(skb, sk, next_dup, start_seq, - cache->start_seq, dup_sack, - &fack_count, &reord, &flag); + skb = tcp_sacktag_walk(skb, sk, next_dup, + start_seq, + cache->start_seq, + dup_sack, &fack_count, + &reord, &flag); } /* Rest of the block already fully processed? */ if (!after(end_seq, cache->end_seq)) goto advance_sp; - skb = tcp_maybe_skipping_dsack(skb, sk, next_dup, cache->end_seq, - &fack_count, &reord, &flag); + skb = tcp_maybe_skipping_dsack(skb, sk, next_dup, + cache->end_seq, + &fack_count, &reord, + &flag); /* ...tail remains todo... */ if (tcp_highest_sack_seq(tp) == cache->end_seq) { @@ -1654,10 +1657,10 @@ static void tcp_remove_reno_sacks(struct sock *sk, int acked) if (acked > 0) { /* One ACK acked hole. The rest eat duplicate ACKs. */ - if (acked-1 >= tp->sacked_out) + if (acked - 1 >= tp->sacked_out) tp->sacked_out = 0; else - tp->sacked_out -= acked-1; + tp->sacked_out -= acked - 1; } tcp_check_reno_reordering(sk, acked); tcp_verify_left_out(tp); @@ -1691,10 +1694,10 @@ int tcp_use_frto(struct sock *sk) tcp_for_write_queue_from(skb, sk) { if (skb == tcp_send_head(sk)) break; - if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS) + if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) return 0; /* Short-circuit when first non-SACKed skb has been checked */ - if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) + if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) break; } return 1; @@ -1804,7 +1807,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) * Count the retransmission made on RTO correctly (only when * waiting for the first ACK and did not get it)... */ - if ((tp->frto_counter == 1) && !(flag&FLAG_DATA_ACKED)) { + if ((tp->frto_counter == 1) && !(flag & FLAG_DATA_ACKED)) { /* For some reason this R-bit might get cleared? */ if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) tp->retrans_out += tcp_skb_pcount(skb); @@ -1817,7 +1820,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) } /* Don't lost mark skbs that were fwd transmitted after RTO */ - if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) && + if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) && !after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out += tcp_skb_pcount(skb); @@ -1832,7 +1835,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) tp->bytes_acked = 0; tp->reordering = min_t(unsigned int, tp->reordering, - sysctl_tcp_reordering); + sysctl_tcp_reordering); tcp_set_ca_state(sk, TCP_CA_Loss); tp->high_seq = tp->frto_highmark; TCP_ECN_queue_cwr(tp); @@ -1899,7 +1902,7 @@ void tcp_enter_loss(struct sock *sk, int how) if (skb == tcp_send_head(sk)) break; - if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS) + if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) tp->undo_marker = 0; TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED; if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) { @@ -1911,7 +1914,7 @@ void tcp_enter_loss(struct sock *sk, int how) tcp_verify_left_out(tp); tp->reordering = min_t(unsigned int, tp->reordering, - sysctl_tcp_reordering); + sysctl_tcp_reordering); tcp_set_ca_state(sk, TCP_CA_Loss); tp->high_seq = tp->snd_nxt; TCP_ECN_queue_cwr(tp); @@ -1943,7 +1946,7 @@ static int tcp_check_sack_reneging(struct sock *sk, int flag) static inline int tcp_fackets_out(struct tcp_sock *tp) { - return tcp_is_reno(tp) ? tp->sacked_out+1 : tp->fackets_out; + return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out; } /* Heurestics to calculate number of duplicate ACKs. There's no dupACKs @@ -2116,12 +2119,11 @@ static int tcp_time_to_recover(struct sock *sk) * retransmitted past LOST markings in the first place? I'm not fully sure * about undo and end of connection cases, which can cause R without L? */ -static void tcp_verify_retransmit_hint(struct tcp_sock *tp, - struct sk_buff *skb) +static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) { if ((tp->retransmit_skb_hint != NULL) && before(TCP_SKB_CB(skb)->seq, - TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) + TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) tp->retransmit_skb_hint = NULL; } @@ -2156,7 +2158,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int fast_rexmit) cnt += tcp_skb_pcount(skb); if (((!fast_rexmit || (tp->lost_out > 0)) && (cnt > packets)) || - after(TCP_SKB_CB(skb)->end_seq, tp->high_seq)) + after(TCP_SKB_CB(skb)->end_seq, tp->high_seq)) break; if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; @@ -2223,7 +2225,7 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit) static inline void tcp_moderate_cwnd(struct tcp_sock *tp) { tp->snd_cwnd = min(tp->snd_cwnd, - tcp_packets_in_flight(tp)+tcp_max_burst(tp)); + tcp_packets_in_flight(tp) + tcp_max_burst(tp)); tp->snd_cwnd_stamp = tcp_time_stamp; } @@ -2243,15 +2245,15 @@ static void tcp_cwnd_down(struct sock *sk, int flag) struct tcp_sock *tp = tcp_sk(sk); int decr = tp->snd_cwnd_cnt + 1; - if ((flag&(FLAG_ANY_PROGRESS|FLAG_DSACKING_ACK)) || - (tcp_is_reno(tp) && !(flag&FLAG_NOT_DUP))) { - tp->snd_cwnd_cnt = decr&1; + if ((flag & (FLAG_ANY_PROGRESS | FLAG_DSACKING_ACK)) || + (tcp_is_reno(tp) && !(flag & FLAG_NOT_DUP))) { + tp->snd_cwnd_cnt = decr & 1; decr >>= 1; if (decr && tp->snd_cwnd > tcp_cwnd_min(sk)) tp->snd_cwnd -= decr; - tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp)+1); + tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1); tp->snd_cwnd_stamp = tcp_time_stamp; } } @@ -2295,7 +2297,7 @@ static void tcp_undo_cwr(struct sock *sk, const int undo) if (icsk->icsk_ca_ops->undo_cwnd) tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk); else - tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh<<1); + tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1); if (undo && tp->prior_ssthresh > tp->snd_ssthresh) { tp->snd_ssthresh = tp->prior_ssthresh; @@ -2314,8 +2316,7 @@ static void tcp_undo_cwr(struct sock *sk, const int undo) static inline int tcp_may_undo(struct tcp_sock *tp) { - return tp->undo_marker && - (!tp->undo_retrans || tcp_packet_delayed(tp)); + return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp)); } /* People celebrate: "We love our President!" */ @@ -2434,7 +2435,7 @@ static void tcp_try_to_open(struct sock *sk, int flag) if (tp->retrans_out == 0) tp->retrans_stamp = 0; - if (flag&FLAG_ECE) + if (flag & FLAG_ECE) tcp_enter_cwr(sk, 1); if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { @@ -2480,7 +2481,6 @@ static void tcp_mtup_probe_success(struct sock *sk, struct sk_buff *skb) tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); } - /* Process an event, which can update packets-in-flight not trivially. * Main goal of this function is to calculate new estimate for left_out, * taking into account both packets sitting in receiver's buffer and @@ -2492,13 +2492,12 @@ static void tcp_mtup_probe_success(struct sock *sk, struct sk_buff *skb) * It does _not_ decide what to send, it is made in function * tcp_xmit_retransmit_queue(). */ -static void -tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) +static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - int is_dupack = !(flag&(FLAG_SND_UNA_ADVANCED|FLAG_NOT_DUP)); - int do_lost = is_dupack || ((flag&FLAG_DATA_SACKED) && + int is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); + int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && (tcp_fackets_out(tp) > tp->reordering)); int fast_rexmit = 0; @@ -2509,7 +2508,7 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) /* Now state machine starts. * A. ECE, hence prohibit cwnd undoing, the reduction is required. */ - if (flag&FLAG_ECE) + if (flag & FLAG_ECE) tp->prior_ssthresh = 0; /* B. In all the states check for reneging SACKs. */ @@ -2521,7 +2520,7 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) before(tp->snd_una, tp->high_seq) && icsk->icsk_ca_state != TCP_CA_Open && tp->fackets_out > tp->reordering) { - tcp_mark_head_lost(sk, tp->fackets_out-tp->reordering, 0); + tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering, 0); NET_INC_STATS_BH(LINUX_MIB_TCPLOSS); } @@ -2581,7 +2580,7 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) do_lost = tcp_try_undo_partial(sk, pkts_acked); break; case TCP_CA_Loss: - if (flag&FLAG_DATA_ACKED) + if (flag & FLAG_DATA_ACKED) icsk->icsk_retransmits = 0; if (!tcp_try_undo_loss(sk)) { tcp_moderate_cwnd(tp); @@ -2631,7 +2630,7 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) tp->undo_retrans = tp->retrans_out; if (icsk->icsk_ca_state < TCP_CA_CWR) { - if (!(flag&FLAG_ECE)) + if (!(flag & FLAG_ECE)) tp->prior_ssthresh = tcp_current_ssthresh(sk); tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); TCP_ECN_queue_cwr(tp); @@ -2725,7 +2724,8 @@ static void tcp_rearm_rto(struct sock *sk) if (!tp->packets_out) { inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); } else { - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, TCP_RTO_MAX); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + inet_csk(sk)->icsk_rto, TCP_RTO_MAX); } } @@ -2803,8 +2803,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) flag |= FLAG_RETRANS_DATA_ACKED; ca_seq_rtt = -1; seq_rtt = -1; - if ((flag & FLAG_DATA_ACKED) || - (acked_pcount > 1)) + if ((flag & FLAG_DATA_ACKED) || (acked_pcount > 1)) flag |= FLAG_NONHEAD_RETRANS_ACKED; } else { ca_seq_rtt = now - scb->when; @@ -2950,8 +2949,9 @@ static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag) /* Check that window update is acceptable. * The function assumes that snd_una<=ack<=snd_next. */ -static inline int tcp_may_update_window(const struct tcp_sock *tp, const u32 ack, - const u32 ack_seq, const u32 nwin) +static inline int tcp_may_update_window(const struct tcp_sock *tp, + const u32 ack, const u32 ack_seq, + const u32 nwin) { return (after(ack, tp->snd_una) || after(ack_seq, tp->snd_wl1) || @@ -3020,7 +3020,7 @@ static void tcp_ratehalving_spur_to_response(struct sock *sk) static void tcp_undo_spur_to_response(struct sock *sk, int flag) { - if (flag&FLAG_ECE) + if (flag & FLAG_ECE) tcp_ratehalving_spur_to_response(sk); else tcp_undo_cwr(sk, 1); @@ -3063,7 +3063,7 @@ static int tcp_process_frto(struct sock *sk, int flag) tcp_verify_left_out(tp); /* Duplicate the behavior from Loss state (fastretrans_alert) */ - if (flag&FLAG_DATA_ACKED) + if (flag & FLAG_DATA_ACKED) inet_csk(sk)->icsk_retransmits = 0; if ((flag & FLAG_NONHEAD_RETRANS_ACKED) || @@ -3080,16 +3080,16 @@ static int tcp_process_frto(struct sock *sk, int flag) * ACK isn't duplicate nor advances window, e.g., opposite dir * data, winupdate */ - if (!(flag&FLAG_ANY_PROGRESS) && (flag&FLAG_NOT_DUP)) + if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP)) return 1; - if (!(flag&FLAG_DATA_ACKED)) { + if (!(flag & FLAG_DATA_ACKED)) { tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3), flag); return 1; } } else { - if (!(flag&FLAG_DATA_ACKED) && (tp->frto_counter == 1)) { + if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) { /* Prevent sending of new data. */ tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp)); @@ -3097,10 +3097,12 @@ static int tcp_process_frto(struct sock *sk, int flag) } if ((tp->frto_counter >= 2) && - (!(flag&FLAG_FORWARD_PROGRESS) || - ((flag&FLAG_DATA_SACKED) && !(flag&FLAG_ONLY_ORIG_SACKED)))) { + (!(flag & FLAG_FORWARD_PROGRESS) || + ((flag & FLAG_DATA_SACKED) && + !(flag & FLAG_ONLY_ORIG_SACKED)))) { /* RFC4138 shortcoming (see comment above) */ - if (!(flag&FLAG_FORWARD_PROGRESS) && (flag&FLAG_NOT_DUP)) + if (!(flag & FLAG_FORWARD_PROGRESS) && + (flag & FLAG_NOT_DUP)) return 1; tcp_enter_frto_loss(sk, 3, flag); @@ -3166,13 +3168,14 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) tp->bytes_acked += ack - prior_snd_una; else if (icsk->icsk_ca_state == TCP_CA_Loss) /* we assume just one segment left network */ - tp->bytes_acked += min(ack - prior_snd_una, tp->mss_cache); + tp->bytes_acked += min(ack - prior_snd_una, + tp->mss_cache); } prior_fackets = tp->fackets_out; prior_in_flight = tcp_packets_in_flight(tp); - if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) { + if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) { /* Window is constant, pure forward advance. * No more checks are required. * Note, we use the fact that SND.UNA>=SND.WL2. @@ -3224,13 +3227,14 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) if ((flag & FLAG_DATA_ACKED) && !frto_cwnd && tcp_may_raise_cwnd(sk, flag)) tcp_cong_avoid(sk, ack, prior_in_flight); - tcp_fastretrans_alert(sk, prior_packets - tp->packets_out, flag); + tcp_fastretrans_alert(sk, prior_packets - tp->packets_out, + flag); } else { if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) tcp_cong_avoid(sk, ack, prior_in_flight); } - if ((flag & FLAG_FORWARD_PROGRESS) || !(flag&FLAG_NOT_DUP)) + if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) dst_confirm(sk->sk_dst_cache); return 1; @@ -3255,22 +3259,22 @@ uninteresting_ack: return 0; } - /* Look for tcp options. Normally only called on SYN and SYNACK packets. * But, this can also be called on packets in the established flow when * the fast version below fails. */ -void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab) +void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, + int estab) { unsigned char *ptr; struct tcphdr *th = tcp_hdr(skb); - int length=(th->doff*4)-sizeof(struct tcphdr); + int length = (th->doff * 4) - sizeof(struct tcphdr); ptr = (unsigned char *)(th + 1); opt_rx->saw_tstamp = 0; while (length > 0) { - int opcode=*ptr++; + int opcode = *ptr++; int opsize; switch (opcode) { @@ -3359,7 +3363,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, struct tcp_sock *tp) { - if (th->doff == sizeof(struct tcphdr)>>2) { + if (th->doff == sizeof(struct tcphdr) >> 2) { tp->rx_opt.saw_tstamp = 0; return 0; } else if (tp->rx_opt.tstamp_ok && @@ -3444,7 +3448,8 @@ static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb) (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ); } -static inline int tcp_paws_discard(const struct sock *sk, const struct sk_buff *skb) +static inline int tcp_paws_discard(const struct sock *sk, + const struct sk_buff *skb) { const struct tcp_sock *tp = tcp_sk(sk); return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW && @@ -3476,16 +3481,16 @@ static void tcp_reset(struct sock *sk) { /* We want the right error as BSD sees it (and indeed as we do). */ switch (sk->sk_state) { - case TCP_SYN_SENT: - sk->sk_err = ECONNREFUSED; - break; - case TCP_CLOSE_WAIT: - sk->sk_err = EPIPE; - break; - case TCP_CLOSE: - return; - default: - sk->sk_err = ECONNRESET; + case TCP_SYN_SENT: + sk->sk_err = ECONNREFUSED; + break; + case TCP_CLOSE_WAIT: + sk->sk_err = EPIPE; + break; + case TCP_CLOSE: + return; + default: + sk->sk_err = ECONNRESET; } if (!sock_flag(sk, SOCK_DEAD)) @@ -3518,43 +3523,43 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th) sock_set_flag(sk, SOCK_DONE); switch (sk->sk_state) { - case TCP_SYN_RECV: - case TCP_ESTABLISHED: - /* Move to CLOSE_WAIT */ - tcp_set_state(sk, TCP_CLOSE_WAIT); - inet_csk(sk)->icsk_ack.pingpong = 1; - break; + case TCP_SYN_RECV: + case TCP_ESTABLISHED: + /* Move to CLOSE_WAIT */ + tcp_set_state(sk, TCP_CLOSE_WAIT); + inet_csk(sk)->icsk_ack.pingpong = 1; + break; - case TCP_CLOSE_WAIT: - case TCP_CLOSING: - /* Received a retransmission of the FIN, do - * nothing. - */ - break; - case TCP_LAST_ACK: - /* RFC793: Remain in the LAST-ACK state. */ - break; + case TCP_CLOSE_WAIT: + case TCP_CLOSING: + /* Received a retransmission of the FIN, do + * nothing. + */ + break; + case TCP_LAST_ACK: + /* RFC793: Remain in the LAST-ACK state. */ + break; - case TCP_FIN_WAIT1: - /* This case occurs when a simultaneous close - * happens, we must ack the received FIN and - * enter the CLOSING state. - */ - tcp_send_ack(sk); - tcp_set_state(sk, TCP_CLOSING); - break; - case TCP_FIN_WAIT2: - /* Received a FIN -- send ACK and enter TIME_WAIT. */ - tcp_send_ack(sk); - tcp_time_wait(sk, TCP_TIME_WAIT, 0); - break; - default: - /* Only TCP_LISTEN and TCP_CLOSE are left, in these - * cases we should never reach this piece of code. - */ - printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n", - __FUNCTION__, sk->sk_state); - break; + case TCP_FIN_WAIT1: + /* This case occurs when a simultaneous close + * happens, we must ack the received FIN and + * enter the CLOSING state. + */ + tcp_send_ack(sk); + tcp_set_state(sk, TCP_CLOSING); + break; + case TCP_FIN_WAIT2: + /* Received a FIN -- send ACK and enter TIME_WAIT. */ + tcp_send_ack(sk); + tcp_time_wait(sk, TCP_TIME_WAIT, 0); + break; + default: + /* Only TCP_LISTEN and TCP_CLOSE are left, in these + * cases we should never reach this piece of code. + */ + printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n", + __FUNCTION__, sk->sk_state); + break; } /* It _is_ possible, that we have something out-of-order _after_ FIN. @@ -3577,7 +3582,8 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th) } } -static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq) +static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, + u32 end_seq) { if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) { if (before(seq, sp->start_seq)) @@ -3600,7 +3606,8 @@ static void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq) tp->rx_opt.dsack = 1; tp->duplicate_sack[0].start_seq = seq; tp->duplicate_sack[0].end_seq = end_seq; - tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + 1, 4 - tp->rx_opt.tstamp_ok); + tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + 1, + 4 - tp->rx_opt.tstamp_ok); } } @@ -3640,12 +3647,12 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp) { int this_sack; struct tcp_sack_block *sp = &tp->selective_acks[0]; - struct tcp_sack_block *swalk = sp+1; + struct tcp_sack_block *swalk = sp + 1; /* See if the recent change to the first SACK eats into * or hits the sequence space of other SACK blocks, if so coalesce. */ - for (this_sack = 1; this_sack < tp->rx_opt.num_sacks; ) { + for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;) { if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) { int i; @@ -3653,16 +3660,19 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp) * Decrease num_sacks. */ tp->rx_opt.num_sacks--; - tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok); - for (i=this_sack; i < tp->rx_opt.num_sacks; i++) - sp[i] = sp[i+1]; + tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + + tp->rx_opt.dsack, + 4 - tp->rx_opt.tstamp_ok); + for (i = this_sack; i < tp->rx_opt.num_sacks; i++) + sp[i] = sp[i + 1]; continue; } this_sack++, swalk++; } } -static inline void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sack_block *sack2) +static inline void tcp_sack_swap(struct tcp_sack_block *sack1, + struct tcp_sack_block *sack2) { __u32 tmp; @@ -3685,11 +3695,11 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq) if (!cur_sacks) goto new_sack; - for (this_sack=0; this_sack0; this_sack--, sp--) - tcp_sack_swap(sp, sp-1); + for (; this_sack > 0; this_sack--, sp--) + tcp_sack_swap(sp, sp - 1); if (cur_sacks > 1) tcp_sack_maybe_coalesce(tp); return; @@ -3708,14 +3718,15 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq) sp--; } for (; this_sack > 0; this_sack--, sp--) - *sp = *(sp-1); + *sp = *(sp - 1); new_sack: /* Build the new head SACK, and we're done. */ sp->start_seq = seq; sp->end_seq = end_seq; tp->rx_opt.num_sacks++; - tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok); + tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, + 4 - tp->rx_opt.tstamp_ok); } /* RCV.NXT advances, some SACKs should be eaten. */ @@ -3733,7 +3744,7 @@ static void tcp_sack_remove(struct tcp_sock *tp) return; } - for (this_sack = 0; this_sack < num_sacks; ) { + for (this_sack = 0; this_sack < num_sacks;) { /* Check if the start of the sack is covered by RCV.NXT. */ if (!before(tp->rcv_nxt, sp->start_seq)) { int i; @@ -3752,7 +3763,9 @@ static void tcp_sack_remove(struct tcp_sock *tp) } if (num_sacks != tp->rx_opt.num_sacks) { tp->rx_opt.num_sacks = num_sacks; - tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok); + tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + + tp->rx_opt.dsack, + 4 - tp->rx_opt.tstamp_ok); } } @@ -3805,14 +3818,14 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) goto drop; - __skb_pull(skb, th->doff*4); + __skb_pull(skb, th->doff * 4); TCP_ECN_accept_cwr(tp, skb); if (tp->rx_opt.dsack) { tp->rx_opt.dsack = 0; tp->rx_opt.eff_sacks = min_t(unsigned int, tp->rx_opt.num_sacks, - 4 - tp->rx_opt.tstamp_ok); + 4 - tp->rx_opt.tstamp_ok); } /* Queue data for delivery to the user. @@ -3828,7 +3841,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) tp->copied_seq == tp->rcv_nxt && tp->ucopy.len && sock_owned_by_user(sk) && !tp->urg_data) { int chunk = min_t(unsigned int, skb->len, - tp->ucopy.len); + tp->ucopy.len); __set_current_state(TASK_RUNNING); @@ -3945,7 +3958,7 @@ drop: tp->selective_acks[0].end_seq = TCP_SKB_CB(skb)->end_seq; } - __skb_queue_head(&tp->out_of_order_queue,skb); + __skb_queue_head(&tp->out_of_order_queue, skb); } else { struct sk_buff *skb1 = tp->out_of_order_queue.prev; u32 seq = TCP_SKB_CB(skb)->seq; @@ -3968,10 +3981,10 @@ drop: if (!after(TCP_SKB_CB(skb1)->seq, seq)) break; } while ((skb1 = skb1->prev) != - (struct sk_buff*)&tp->out_of_order_queue); + (struct sk_buff *)&tp->out_of_order_queue); /* Do skb overlap to previous one? */ - if (skb1 != (struct sk_buff*)&tp->out_of_order_queue && + if (skb1 != (struct sk_buff *)&tp->out_of_order_queue && before(seq, TCP_SKB_CB(skb1)->end_seq)) { if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { /* All the bits are present. Drop. */ @@ -3981,7 +3994,8 @@ drop: } if (after(seq, TCP_SKB_CB(skb1)->seq)) { /* Partial overlap. */ - tcp_dsack_set(tp, seq, TCP_SKB_CB(skb1)->end_seq); + tcp_dsack_set(tp, seq, + TCP_SKB_CB(skb1)->end_seq); } else { skb1 = skb1->prev; } @@ -3990,15 +4004,17 @@ drop: /* And clean segments covered by new one as whole. */ while ((skb1 = skb->next) != - (struct sk_buff*)&tp->out_of_order_queue && + (struct sk_buff *)&tp->out_of_order_queue && after(end_seq, TCP_SKB_CB(skb1)->seq)) { - if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) { - tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, end_seq); - break; - } - __skb_unlink(skb1, &tp->out_of_order_queue); - tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, TCP_SKB_CB(skb1)->end_seq); - __kfree_skb(skb1); + if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) { + tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, + end_seq); + break; + } + __skb_unlink(skb1, &tp->out_of_order_queue); + tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, + TCP_SKB_CB(skb1)->end_seq); + __kfree_skb(skb1); } add_sack: @@ -4021,7 +4037,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, /* First, check that queue is collapsible and find * the point where collapsing can be useful. */ - for (skb = head; skb != tail; ) { + for (skb = head; skb != tail;) { /* No new bits? It is possible on ofo queue. */ if (!before(start, TCP_SKB_CB(skb)->end_seq)) { struct sk_buff *next = skb->next; @@ -4059,9 +4075,9 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, /* Too big header? This can happen with IPv6. */ if (copy < 0) return; - if (end-start < copy) - copy = end-start; - nskb = alloc_skb(copy+header, GFP_ATOMIC); + if (end - start < copy) + copy = end - start; + nskb = alloc_skb(copy + header, GFP_ATOMIC); if (!nskb) return; @@ -4171,7 +4187,7 @@ static int tcp_prune_queue(struct sock *sk) tcp_collapse_ofo_queue(sk); tcp_collapse(sk, &sk->sk_receive_queue, sk->sk_receive_queue.next, - (struct sk_buff*)&sk->sk_receive_queue, + (struct sk_buff *)&sk->sk_receive_queue, tp->copied_seq, tp->rcv_nxt); sk_mem_reclaim(sk); @@ -4210,7 +4226,6 @@ static int tcp_prune_queue(struct sock *sk) return -1; } - /* RFC2861, slow part. Adjust cwnd, after it was not full during one rto. * As additional protections, we do not touch cwnd in retransmission phases, * and if application hit its sndbuf limit recently. @@ -4272,8 +4287,8 @@ static void tcp_new_space(struct sock *sk) int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff), demanded = max_t(unsigned int, tp->snd_cwnd, - tp->reordering + 1); - sndmem *= 2*demanded; + tp->reordering + 1); + sndmem *= 2 * demanded; if (sndmem > sk->sk_sndbuf) sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]); tp->snd_cwnd_stamp = tcp_time_stamp; @@ -4314,8 +4329,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) /* We ACK each frame or... */ tcp_in_quickack_mode(sk) || /* We have out of order data. */ - (ofo_possible && - skb_peek(&tp->out_of_order_queue))) { + (ofo_possible && skb_peek(&tp->out_of_order_queue))) { /* Then ack it now */ tcp_send_ack(sk); } else { @@ -4343,7 +4357,7 @@ static inline void tcp_ack_snd_check(struct sock *sk) * either form (or just set the sysctl tcp_stdurg). */ -static void tcp_check_urg(struct sock * sk, struct tcphdr * th) +static void tcp_check_urg(struct sock *sk, struct tcphdr *th) { struct tcp_sock *tp = tcp_sk(sk); u32 ptr = ntohs(th->urg_ptr); @@ -4392,8 +4406,7 @@ static void tcp_check_urg(struct sock * sk, struct tcphdr * th) * buggy users. */ if (tp->urg_seq == tp->copied_seq && tp->urg_data && - !sock_flag(sk, SOCK_URGINLINE) && - tp->copied_seq != tp->rcv_nxt) { + !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) { struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); tp->copied_seq++; if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) { @@ -4402,8 +4415,8 @@ static void tcp_check_urg(struct sock * sk, struct tcphdr * th) } } - tp->urg_data = TCP_URG_NOTYET; - tp->urg_seq = ptr; + tp->urg_data = TCP_URG_NOTYET; + tp->urg_seq = ptr; /* Disable header prediction. */ tp->pred_flags = 0; @@ -4416,7 +4429,7 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th) /* Check if we get a new urgent pointer - normally not. */ if (th->urg) - tcp_check_urg(sk,th); + tcp_check_urg(sk, th); /* Do we wait for any urgent data? - normally not... */ if (tp->urg_data == TCP_URG_NOTYET) { @@ -4458,7 +4471,8 @@ static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen) return err; } -static __sum16 __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb) +static __sum16 __tcp_checksum_complete_user(struct sock *sk, + struct sk_buff *skb) { __sum16 result; @@ -4472,14 +4486,16 @@ static __sum16 __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb return result; } -static inline int tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb) +static inline int tcp_checksum_complete_user(struct sock *sk, + struct sk_buff *skb) { return !skb_csum_unnecessary(skb) && - __tcp_checksum_complete_user(sk, skb); + __tcp_checksum_complete_user(sk, skb); } #ifdef CONFIG_NET_DMA -static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen) +static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, + int hlen) { struct tcp_sock *tp = tcp_sk(sk); int chunk = skb->len - hlen; @@ -4495,7 +4511,9 @@ static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) { dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan, - skb, hlen, tp->ucopy.iov, chunk, tp->ucopy.pinned_list); + skb, hlen, + tp->ucopy.iov, chunk, + tp->ucopy.pinned_list); if (dma_cookie < 0) goto out; @@ -4577,7 +4595,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, */ if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags && - TCP_SKB_CB(skb)->seq == tp->rcv_nxt) { + TCP_SKB_CB(skb)->seq == tp->rcv_nxt) { int tcp_header_len = tp->tcp_header_len; /* Timestamp header prediction: tcp_header_len @@ -4646,7 +4664,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, eaten = 1; } #endif - if (tp->ucopy.task == current && sock_owned_by_user(sk) && !copied_early) { + if (tp->ucopy.task == current && + sock_owned_by_user(sk) && !copied_early) { __set_current_state(TASK_RUNNING); if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) @@ -4693,7 +4712,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, NET_INC_STATS_BH(LINUX_MIB_TCPHPHITS); /* Bulk data transfer: receiver */ - __skb_pull(skb,tcp_header_len); + __skb_pull(skb, tcp_header_len); __skb_queue_tail(&sk->sk_receive_queue, skb); skb_set_owner_r(skb, sk); tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; @@ -4725,7 +4744,7 @@ no_ack: } slow_path: - if (len < (th->doff<<2) || tcp_checksum_complete_user(sk, skb)) + if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb)) goto csum_error; /* @@ -4975,7 +4994,8 @@ discard: } /* PAWS check. */ - if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp && tcp_paws_check(&tp->rx_opt, 0)) + if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp && + tcp_paws_check(&tp->rx_opt, 0)) goto discard_and_undo; if (th->syn) { @@ -5010,7 +5030,6 @@ discard: tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); tcp_initialize_rcv_mss(sk); - tcp_send_synack(sk); #if 0 /* Note, we could accept data and URG from this segment. @@ -5042,7 +5061,6 @@ reset_and_undo: return 1; } - /* * This function implements the receiving procedure of RFC 793 for * all states except ESTABLISHED and TIME_WAIT. @@ -5164,7 +5182,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, */ if (sk->sk_socket) sk_wake_async(sk, - SOCK_WAKE_IO, POLL_OUT); + SOCK_WAKE_IO, POLL_OUT); tp->snd_una = TCP_SKB_CB(skb)->ack_seq; tp->snd_wnd = ntohs(th->window) << @@ -5176,8 +5194,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * and does not calculate rtt. * Fix it at least with timestamps. */ - if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && - !tp->srtt) + if (tp->rx_opt.saw_tstamp && + tp->rx_opt.rcv_tsecr && !tp->srtt) tcp_ack_saw_tstamp(sk, 0); if (tp->rx_opt.tstamp_ok) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 454cf84b615..bb7e80a284e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -221,14 +221,14 @@ void tcp_select_initial_window(int __space, __u32 mss, * following RFC2414. Senders, not following this RFC, * will be satisfied with 2. */ - if (mss > (1<<*rcv_wscale)) { + if (mss > (1 << *rcv_wscale)) { int init_cwnd = 4; - if (mss > 1460*3) + if (mss > 1460 * 3) init_cwnd = 2; else if (mss > 1460) init_cwnd = 3; - if (*rcv_wnd > init_cwnd*mss) - *rcv_wnd = init_cwnd*mss; + if (*rcv_wnd > init_cwnd * mss) + *rcv_wnd = init_cwnd * mss; } /* Set the clamp no higher than max representable value */ @@ -278,11 +278,10 @@ static u16 tcp_select_window(struct sock *sk) return new_win; } -static inline void TCP_ECN_send_synack(struct tcp_sock *tp, - struct sk_buff *skb) +static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb) { TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR; - if (!(tp->ecn_flags&TCP_ECN_OK)) + if (!(tp->ecn_flags & TCP_ECN_OK)) TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE; } @@ -292,7 +291,7 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb) tp->ecn_flags = 0; if (sysctl_tcp_ecn) { - TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE|TCPCB_FLAG_CWR; + TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE | TCPCB_FLAG_CWR; tp->ecn_flags = TCP_ECN_OK; } } @@ -314,7 +313,7 @@ static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb, if (skb->len != tcp_header_len && !before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) { INET_ECN_xmit(sk); - if (tp->ecn_flags&TCP_ECN_QUEUE_CWR) { + if (tp->ecn_flags & TCP_ECN_QUEUE_CWR) { tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR; tcp_hdr(skb)->cwr = 1; skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; @@ -431,7 +430,7 @@ static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack, (TCPOPT_NOP << 16) | (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); - *md5_hash = (__u8 *) ptr; + *md5_hash = (__u8 *)ptr; } #endif } @@ -447,7 +446,8 @@ static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack, * We are working here with either a clone of the original * SKB, or a fresh unique copy made by the retransmit engine. */ -static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, gfp_t gfp_mask) +static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, + gfp_t gfp_mask) { const struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet; @@ -551,8 +551,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, th->urg_ptr = 0; if (unlikely(tp->urg_mode && - between(tp->snd_up, tcb->seq+1, tcb->seq+0xFFFF))) { - th->urg_ptr = htons(tp->snd_up-tcb->seq); + between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF))) { + th->urg_ptr = htons(tp->snd_up - tcb->seq); th->urg = 1; } @@ -616,7 +616,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, #undef SYSCTL_FLAG_SACK } - /* This routine just queue's the buffer * * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames, @@ -634,7 +633,8 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) sk_mem_charge(sk, skb->truesize); } -static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) +static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, + unsigned int mss_now) { if (skb->len <= mss_now || !sk_can_gso(sk)) { /* Avoid the costly divide in the normal @@ -670,7 +670,8 @@ static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb, * packet to the list. This won't be called frequently, I hope. * Remember, these are still headerless SKBs at this point. */ -int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss_now) +int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, + unsigned int mss_now) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *buff; @@ -708,13 +709,14 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss /* PSH and FIN should only be set in the second packet. */ flags = TCP_SKB_CB(skb)->flags; - TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); + TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH); TCP_SKB_CB(buff)->flags = flags; TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) { /* Copy and checksum data tail into the new buffer. */ - buff->csum = csum_partial_copy_nocheck(skb->data + len, skb_put(buff, nsize), + buff->csum = csum_partial_copy_nocheck(skb->data + len, + skb_put(buff, nsize), nsize, 0); skb_trim(skb, len); @@ -781,7 +783,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) eat = len; k = 0; - for (i=0; inr_frags; i++) { + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { if (skb_shinfo(skb)->frags[i].size <= eat) { put_page(skb_shinfo(skb)->frags[i].page); eat -= skb_shinfo(skb)->frags[i].size; @@ -804,8 +806,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) { - if (skb_cloned(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) return -ENOMEM; /* If len == headlen, we avoid __skb_pull to preserve alignment. */ @@ -909,7 +910,6 @@ void tcp_mtup_init(struct sock *sk) NOTE2. inet_csk(sk)->icsk_pmtu_cookie and tp->mss_cache are READ ONLY outside this function. --ANK (980731) */ - unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) { struct tcp_sock *tp = tcp_sk(sk); @@ -922,8 +922,8 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) mss_now = tcp_mtu_to_mss(sk, pmtu); /* Bound mss with half of window */ - if (tp->max_window && mss_now > (tp->max_window>>1)) - mss_now = max((tp->max_window>>1), 68U - tp->tcp_header_len); + if (tp->max_window && mss_now > (tp->max_window >> 1)) + mss_now = max((tp->max_window >> 1), 68U - tp->tcp_header_len); /* And store cached results */ icsk->icsk_pmtu_cookie = pmtu; @@ -977,8 +977,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) inet_csk(sk)->icsk_ext_hdr_len - tp->tcp_header_len); - if (tp->max_window && - (xmit_size_goal > (tp->max_window >> 1))) + if (tp->max_window && (xmit_size_goal > (tp->max_window >> 1))) xmit_size_goal = max((tp->max_window >> 1), 68U - tp->tcp_header_len); @@ -990,7 +989,6 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) } /* Congestion window validation. (RFC2861) */ - static void tcp_cwnd_validate(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); @@ -1017,8 +1015,7 @@ static void tcp_cwnd_validate(struct sock *sk) * per input skb which could be mostly avoided here (if desired). */ static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb, - unsigned int mss_now, - unsigned int cwnd) + unsigned int mss_now, unsigned int cwnd) { struct tcp_sock *tp = tcp_sk(sk); u32 needed, window, cwnd_len; @@ -1039,7 +1036,8 @@ static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb, /* Can at least one segment of SKB be sent right now, according to the * congestion window rules? If so, return how many segments are allowed. */ -static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *skb) +static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, + struct sk_buff *skb) { u32 in_flight, cwnd; @@ -1059,13 +1057,12 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *sk /* This must be invoked the first time we consider transmitting * SKB onto the wire. */ -static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) +static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, + unsigned int mss_now) { int tso_segs = tcp_skb_pcount(skb); - if (!tso_segs || - (tso_segs > 1 && - tcp_skb_mss(skb) != mss_now)) { + if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) { tcp_set_skb_tso_segs(sk, skb, mss_now); tso_segs = tcp_skb_pcount(skb); } @@ -1085,16 +1082,13 @@ static inline int tcp_minshall_check(const struct tcp_sock *tp) * 4. Or TCP_CORK is not set, and all sent packets are ACKed. * With Minshall's modification: all sent small packets are ACKed. */ - static inline int tcp_nagle_check(const struct tcp_sock *tp, const struct sk_buff *skb, unsigned mss_now, int nonagle) { return (skb->len < mss_now && - ((nonagle&TCP_NAGLE_CORK) || - (!nonagle && - tp->packets_out && - tcp_minshall_check(tp)))); + ((nonagle & TCP_NAGLE_CORK) || + (!nonagle && tp->packets_out && tcp_minshall_check(tp)))); } /* Return non-zero if the Nagle test allows this packet to be @@ -1126,7 +1120,8 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb, } /* Does at least the first segment of SKB fit into the send window? */ -static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb, unsigned int cur_mss) +static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb, + unsigned int cur_mss) { u32 end_seq = TCP_SKB_CB(skb)->end_seq; @@ -1152,8 +1147,7 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb, return 0; cwnd_quota = tcp_cwnd_test(tp, skb); - if (cwnd_quota && - !tcp_snd_wnd_test(tp, skb, cur_mss)) + if (cwnd_quota && !tcp_snd_wnd_test(tp, skb, cur_mss)) cwnd_quota = 0; return cwnd_quota; @@ -1177,7 +1171,8 @@ int tcp_may_send_now(struct sock *sk) * know that all the data is in scatter-gather pages, and that the * packet has never been sent out before (and thus is not cloned). */ -static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, unsigned int mss_now) +static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, + unsigned int mss_now) { struct sk_buff *buff; int nlen = skb->len - len; @@ -1203,7 +1198,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, /* PSH and FIN should only be set in the second packet. */ flags = TCP_SKB_CB(skb)->flags; - TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); + TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH); TCP_SKB_CB(buff)->flags = flags; /* This packet was never sent out yet, so no SACK bits. */ @@ -1247,8 +1242,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) in_flight = tcp_packets_in_flight(tp); - BUG_ON(tcp_skb_pcount(skb) <= 1 || - (tp->snd_cwnd <= in_flight)); + BUG_ON(tcp_skb_pcount(skb) <= 1 || (tp->snd_cwnd <= in_flight)); send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; @@ -1281,7 +1275,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) } /* Ok, it looks like it is advisable to defer. */ - tp->tso_deferred = 1 | (jiffies<<1); + tp->tso_deferred = 1 | (jiffies << 1); return 1; @@ -1293,7 +1287,8 @@ send_now: /* Create a new MTU probe if we are ready. * Returns 0 if we should wait to probe (no cwnd available), * 1 if a probe was sent, - * -1 otherwise */ + * -1 otherwise + */ static int tcp_mtu_probe(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); @@ -1318,7 +1313,7 @@ static int tcp_mtu_probe(struct sock *sk) /* Very simple search strategy: just double the MSS. */ mss_now = tcp_current_mss(sk, 0); - probe_size = 2*tp->mss_cache; + probe_size = 2 * tp->mss_cache; size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache; if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) { /* TODO: set timer for probe_converge_event */ @@ -1366,7 +1361,8 @@ static int tcp_mtu_probe(struct sock *sk) skb_copy_bits(skb, 0, skb_put(nskb, copy), copy); else nskb->csum = skb_copy_and_csum_bits(skb, 0, - skb_put(nskb, copy), copy, nskb->csum); + skb_put(nskb, copy), + copy, nskb->csum); if (skb->len <= copy) { /* We've eaten all the data from this skb. @@ -1380,7 +1376,8 @@ static int tcp_mtu_probe(struct sock *sk) if (!skb_shinfo(skb)->nr_frags) { skb_pull(skb, copy); if (skb->ip_summed != CHECKSUM_PARTIAL) - skb->csum = csum_partial(skb->data, skb->len, 0); + skb->csum = csum_partial(skb->data, + skb->len, 0); } else { __pskb_trim_head(skb, copy); tcp_set_skb_tso_segs(sk, skb, mss_now); @@ -1400,7 +1397,7 @@ static int tcp_mtu_probe(struct sock *sk) TCP_SKB_CB(nskb)->when = tcp_time_stamp; if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) { /* Decrement cwnd here because we are sending - * effectively two packets. */ + * effectively two packets. */ tp->snd_cwnd--; tcp_event_new_data_sent(sk, nskb); @@ -1414,7 +1411,6 @@ static int tcp_mtu_probe(struct sock *sk) return -1; } - /* This routine writes packets to the network. It advances the * send_head. This happens as incoming acks open up the remote * window for us. @@ -1626,7 +1622,8 @@ u32 __tcp_select_window(struct sock *sk) icsk->icsk_ack.quick = 0; if (tcp_memory_pressure) - tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss); + tp->rcv_ssthresh = min(tp->rcv_ssthresh, + 4U * tp->advmss); if (free_space < mss) return 0; @@ -1659,7 +1656,7 @@ u32 __tcp_select_window(struct sock *sk) * is too small. */ if (window <= free_space - mss || window > free_space) - window = (free_space/mss)*mss; + window = (free_space / mss) * mss; else if (mss == full_space && free_space > window + (full_space >> 1)) window = free_space; @@ -1669,7 +1666,8 @@ u32 __tcp_select_window(struct sock *sk) } /* Attempt to collapse two adjacent SKB's during retransmission. */ -static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int mss_now) +static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, + int mss_now) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); @@ -1762,12 +1760,12 @@ void tcp_simple_retransmit(struct sock *sk) if (skb == tcp_send_head(sk)) break; if (skb->len > mss && - !(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) { - if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) { + !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { + if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; tp->retrans_out -= tcp_skb_pcount(skb); } - if (!(TCP_SKB_CB(skb)->sacked&TCPCB_LOST)) { + if (!(TCP_SKB_CB(skb)->sacked & TCPCB_LOST)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out += tcp_skb_pcount(skb); lost = 1; @@ -1846,8 +1844,10 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) (skb->len < (cur_mss >> 1)) && (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) && (!tcp_skb_is_last(sk, skb)) && - (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) && - (tcp_skb_pcount(skb) == 1 && tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) && + (skb_shinfo(skb)->nr_frags == 0 && + skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) && + (tcp_skb_pcount(skb) == 1 && + tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) && (sysctl_tcp_retrans_collapse != 0)) tcp_retrans_try_collapse(sk, skb, cur_mss); @@ -1885,7 +1885,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) tp->total_retrans++; #if FASTRETRANS_DEBUG > 0 - if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) { + if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { if (net_ratelimit()) printk(KERN_DEBUG "retrans_out leaked.\n"); } @@ -1927,7 +1927,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) if (tp->retransmit_skb_hint) { skb = tp->retransmit_skb_hint; packet_cnt = tp->retransmit_cnt_hint; - }else{ + } else { skb = tcp_write_queue_head(sk); packet_cnt = 0; } @@ -1954,7 +1954,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) return; if (sacked & TCPCB_LOST) { - if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) { + if (!(sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) { if (tcp_retransmit_skb(sk, skb)) { tp->retransmit_skb_hint = NULL; return; @@ -2036,7 +2036,6 @@ void tcp_xmit_retransmit_queue(struct sock *sk) } } - /* Send a fin. The caller locks the socket for us. This cannot be * allowed to fail queueing a FIN frame under any circumstances. */ @@ -2122,14 +2121,14 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) */ int tcp_send_synack(struct sock *sk) { - struct sk_buff* skb; + struct sk_buff *skb; skb = tcp_write_queue_head(sk); - if (skb == NULL || !(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_SYN)) { + if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN)) { printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n"); return -EFAULT; } - if (!(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_ACK)) { + if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_ACK)) { if (skb_cloned(skb)) { struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); if (nskb == NULL) @@ -2153,8 +2152,8 @@ int tcp_send_synack(struct sock *sk) /* * Prepare a SYN-ACK. */ -struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, - struct request_sock *req) +struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, + struct request_sock *req) { struct inet_request_sock *ireq = inet_rsk(req); struct tcp_sock *tp = tcp_sk(sk); @@ -2372,9 +2371,10 @@ void tcp_send_delayed_ack(struct sock *sk) if (ato > TCP_DELACK_MIN) { const struct tcp_sock *tp = tcp_sk(sk); - int max_ato = HZ/2; + int max_ato = HZ / 2; - if (icsk->icsk_ack.pingpong || (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)) + if (icsk->icsk_ack.pingpong || + (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)) max_ato = TCP_DELACK_MAX; /* Slow path, intersegment interval is "high". */ @@ -2384,7 +2384,7 @@ void tcp_send_delayed_ack(struct sock *sk) * directly. */ if (tp->srtt) { - int rtt = max(tp->srtt>>3, TCP_DELACK_MIN); + int rtt = max(tp->srtt >> 3, TCP_DELACK_MIN); if (rtt < max_ato) max_ato = rtt; -- cgit v1.2.3-70-g09d2 From f038ac8f9b9735358ef410d45f4cff1da810c1cb Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Thu, 3 Jan 2008 20:36:55 -0800 Subject: [TCP]: cleanup tcp_parse_options deep indented switch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Removed case indentation level & combined some nested ifs, mostly within 80 lines now. This is a leftover from indent patch, it just had to be done manually to avoid messing it up completely. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 135 +++++++++++++++++++++++++-------------------------- 1 file changed, 67 insertions(+), 68 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 18e099c6fa6..fa2c85ca5bc 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3278,81 +3278,80 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, int opsize; switch (opcode) { - case TCPOPT_EOL: + case TCPOPT_EOL: + return; + case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ + length--; + continue; + default: + opsize = *ptr++; + if (opsize < 2) /* "silly options" */ return; - case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ - length--; - continue; - default: - opsize=*ptr++; - if (opsize < 2) /* "silly options" */ - return; - if (opsize > length) - return; /* don't parse partial options */ - switch (opcode) { - case TCPOPT_MSS: - if (opsize==TCPOLEN_MSS && th->syn && !estab) { - u16 in_mss = ntohs(get_unaligned((__be16 *)ptr)); - if (in_mss) { - if (opt_rx->user_mss && opt_rx->user_mss < in_mss) - in_mss = opt_rx->user_mss; - opt_rx->mss_clamp = in_mss; - } - } - break; - case TCPOPT_WINDOW: - if (opsize==TCPOLEN_WINDOW && th->syn && !estab) - if (sysctl_tcp_window_scaling) { - __u8 snd_wscale = *(__u8 *) ptr; - opt_rx->wscale_ok = 1; - if (snd_wscale > 14) { - if (net_ratelimit()) - printk(KERN_INFO "tcp_parse_options: Illegal window " - "scaling value %d >14 received.\n", - snd_wscale); - snd_wscale = 14; - } - opt_rx->snd_wscale = snd_wscale; - } - break; - case TCPOPT_TIMESTAMP: - if (opsize==TCPOLEN_TIMESTAMP) { - if ((estab && opt_rx->tstamp_ok) || - (!estab && sysctl_tcp_timestamps)) { - opt_rx->saw_tstamp = 1; - opt_rx->rcv_tsval = ntohl(get_unaligned((__be32 *)ptr)); - opt_rx->rcv_tsecr = ntohl(get_unaligned((__be32 *)(ptr+4))); - } + if (opsize > length) + return; /* don't parse partial options */ + switch (opcode) { + case TCPOPT_MSS: + if (opsize == TCPOLEN_MSS && th->syn && !estab) { + u16 in_mss = ntohs(get_unaligned((__be16 *)ptr)); + if (in_mss) { + if (opt_rx->user_mss && + opt_rx->user_mss < in_mss) + in_mss = opt_rx->user_mss; + opt_rx->mss_clamp = in_mss; } - break; - case TCPOPT_SACK_PERM: - if (opsize==TCPOLEN_SACK_PERM && th->syn && !estab) { - if (sysctl_tcp_sack) { - opt_rx->sack_ok = 1; - tcp_sack_reset(opt_rx); - } + } + break; + case TCPOPT_WINDOW: + if (opsize == TCPOLEN_WINDOW && th->syn && + !estab && sysctl_tcp_window_scaling) { + __u8 snd_wscale = *(__u8 *)ptr; + opt_rx->wscale_ok = 1; + if (snd_wscale > 14) { + if (net_ratelimit()) + printk(KERN_INFO "tcp_parse_options: Illegal window " + "scaling value %d >14 received.\n", + snd_wscale); + snd_wscale = 14; } - break; + opt_rx->snd_wscale = snd_wscale; + } + break; + case TCPOPT_TIMESTAMP: + if ((opsize == TCPOLEN_TIMESTAMP) && + ((estab && opt_rx->tstamp_ok) || + (!estab && sysctl_tcp_timestamps))) { + opt_rx->saw_tstamp = 1; + opt_rx->rcv_tsval = ntohl(get_unaligned((__be32 *)ptr)); + opt_rx->rcv_tsecr = ntohl(get_unaligned((__be32 *)(ptr+4))); + } + break; + case TCPOPT_SACK_PERM: + if (opsize == TCPOLEN_SACK_PERM && th->syn && + !estab && sysctl_tcp_sack) { + opt_rx->sack_ok = 1; + tcp_sack_reset(opt_rx); + } + break; - case TCPOPT_SACK: - if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) && - !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) && - opt_rx->sack_ok) { - TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th; - } - break; + case TCPOPT_SACK: + if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) && + !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) && + opt_rx->sack_ok) { + TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th; + } + break; #ifdef CONFIG_TCP_MD5SIG - case TCPOPT_MD5SIG: - /* - * The MD5 Hash has already been - * checked (see tcp_v{4,6}_do_rcv()). - */ - break; + case TCPOPT_MD5SIG: + /* + * The MD5 Hash has already been + * checked (see tcp_v{4,6}_do_rcv()). + */ + break; #endif - } + } - ptr+=opsize-2; - length-=opsize; + ptr += opsize-2; + length -= opsize; } } } -- cgit v1.2.3-70-g09d2