aboutsummaryrefslogtreecommitdiff
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c153
1 files changed, 72 insertions, 81 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index da2c3b8794f..53300fa2359 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -441,10 +441,7 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
*ptr++ = htonl(sp[this_sack].end_seq);
}
- if (tp->rx_opt.dsack) {
- tp->rx_opt.dsack = 0;
- tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks;
- }
+ tp->rx_opt.dsack = 0;
}
}
@@ -550,6 +547,7 @@ static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb,
struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL;
struct tcp_sock *tp = tcp_sk(sk);
unsigned size = 0;
+ unsigned int eff_sacks;
#ifdef CONFIG_TCP_MD5SIG
*md5 = tp->af_specific->md5_lookup(sk, sk);
@@ -568,10 +566,11 @@ static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb,
size += TCPOLEN_TSTAMP_ALIGNED;
}
- if (unlikely(tp->rx_opt.eff_sacks)) {
+ eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
+ if (unlikely(eff_sacks)) {
const unsigned remaining = MAX_TCP_OPTION_SPACE - size;
opts->num_sack_blocks =
- min_t(unsigned, tp->rx_opt.eff_sacks,
+ min_t(unsigned, eff_sacks,
(remaining - TCPOLEN_SACK_BASE_ALIGNED) /
TCPOLEN_SACK_PERBLOCK);
size += TCPOLEN_SACK_BASE_ALIGNED +
@@ -663,10 +662,14 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
th->urg_ptr = 0;
/* The urg_mode check is necessary during a below snd_una win probe */
- if (unlikely(tcp_urg_mode(tp) &&
- between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF))) {
- th->urg_ptr = htons(tp->snd_up - tcb->seq);
- th->urg = 1;
+ if (unlikely(tcp_urg_mode(tp) && before(tcb->seq, tp->snd_up))) {
+ if (before(tp->snd_up, tcb->seq + 0x10000)) {
+ th->urg_ptr = htons(tp->snd_up - tcb->seq);
+ th->urg = 1;
+ } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) {
+ th->urg_ptr = 0xFFFF;
+ th->urg = 1;
+ }
}
tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location);
@@ -751,6 +754,36 @@ static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb,
tp->fackets_out -= decr;
}
+/* Pcount in the middle of the write queue got changed, we need to do various
+ * tweaks to fix counters
+ */
+static void tcp_adjust_pcount(struct sock *sk, struct sk_buff *skb, int decr)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ tp->packets_out -= decr;
+
+ if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
+ tp->sacked_out -= decr;
+ if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
+ tp->retrans_out -= decr;
+ if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST)
+ tp->lost_out -= decr;
+
+ /* Reno case is special. Sigh... */
+ if (tcp_is_reno(tp) && decr > 0)
+ tp->sacked_out -= min_t(u32, tp->sacked_out, decr);
+
+ tcp_adjust_fackets_out(sk, skb, decr);
+
+ if (tp->lost_skb_hint &&
+ before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
+ (tcp_is_fack(tp) || TCP_SKB_CB(skb)->sacked))
+ tp->lost_cnt_hint -= decr;
+
+ tcp_verify_left_out(tp);
+}
+
/* Function to create two new TCP segments. Shrinks the given segment
* to the specified size and appends a new segment with the rest of the
* packet to the list. This won't be called frequently, I hope.
@@ -763,11 +796,10 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
struct sk_buff *buff;
int nsize, old_factor;
int nlen;
- u16 flags;
+ u8 flags;
BUG_ON(len > skb->len);
- tcp_clear_retrans_hints_partial(tp);
nsize = skb_headlen(skb) - len;
if (nsize < 0)
nsize = 0;
@@ -834,22 +866,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
int diff = old_factor - tcp_skb_pcount(skb) -
tcp_skb_pcount(buff);
- tp->packets_out -= diff;
-
- if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
- tp->sacked_out -= diff;
- if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
- tp->retrans_out -= diff;
-
- if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST)
- tp->lost_out -= diff;
-
- /* Adjust Reno SACK estimate. */
- if (tcp_is_reno(tp) && diff > 0) {
- tcp_dec_pcount_approx_int(&tp->sacked_out, diff);
- tcp_verify_left_out(tp);
- }
- tcp_adjust_fackets_out(sk, skb, diff);
+ if (diff)
+ tcp_adjust_pcount(sk, skb, diff);
}
/* Link BUFF into the send queue. */
@@ -913,7 +931,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
* factor and mss.
*/
if (tcp_skb_pcount(skb) > 1)
- tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk, 1));
+ tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk));
return 0;
}
@@ -974,15 +992,6 @@ void tcp_mtup_init(struct sock *sk)
icsk->icsk_mtup.probe_size = 0;
}
-/* Bound MSS / TSO packet size with the half of the window */
-static int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
-{
- if (tp->max_window && pktsize > (tp->max_window >> 1))
- return max(tp->max_window >> 1, 68U - tp->tcp_header_len);
- else
- return pktsize;
-}
-
/* This function synchronize snd mss to current pmtu/exthdr set.
tp->rx_opt.user_mss is mss set by user by TCP_MAXSEG. It does NOT counts
@@ -1029,22 +1038,17 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
/* Compute the current effective MSS, taking SACKs and IP options,
* and even PMTU discovery events into account.
*/
-unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
+unsigned int tcp_current_mss(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct dst_entry *dst = __sk_dst_get(sk);
u32 mss_now;
- u16 xmit_size_goal;
- int doing_tso = 0;
unsigned header_len;
struct tcp_out_options opts;
struct tcp_md5sig_key *md5;
mss_now = tp->mss_cache;
- if (large_allowed && sk_can_gso(sk))
- doing_tso = 1;
-
if (dst) {
u32 mtu = dst_mtu(dst);
if (mtu != inet_csk(sk)->icsk_pmtu_cookie)
@@ -1062,19 +1066,6 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
mss_now -= delta;
}
- xmit_size_goal = mss_now;
-
- if (doing_tso) {
- xmit_size_goal = ((sk->sk_gso_max_size - 1) -
- inet_csk(sk)->icsk_af_ops->net_header_len -
- inet_csk(sk)->icsk_ext_hdr_len -
- tp->tcp_header_len);
-
- xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
- xmit_size_goal -= (xmit_size_goal % mss_now);
- }
- tp->xmit_size_goal = xmit_size_goal;
-
return mss_now;
}
@@ -1256,7 +1247,7 @@ int tcp_may_send_now(struct sock *sk)
struct sk_buff *skb = tcp_send_head(sk);
return (skb &&
- tcp_snd_test(sk, skb, tcp_current_mss(sk, 1),
+ tcp_snd_test(sk, skb, tcp_current_mss(sk),
(tcp_skb_is_last(sk, skb) ?
tp->nonagle : TCP_NAGLE_PUSH)));
}
@@ -1273,7 +1264,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
{
struct sk_buff *buff;
int nlen = skb->len - len;
- u16 flags;
+ u8 flags;
/* All of a TSO frame must be composed of paged data. */
if (skb->len != skb->data_len)
@@ -1352,6 +1343,10 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
if (limit >= sk->sk_gso_max_size)
goto send_now;
+ /* Middle in queue won't get any more data, full sendable already? */
+ if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
+ goto send_now;
+
if (sysctl_tcp_tso_win_divisor) {
u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
@@ -1405,11 +1400,11 @@ static int tcp_mtu_probe(struct sock *sk)
icsk->icsk_mtup.probe_size ||
inet_csk(sk)->icsk_ca_state != TCP_CA_Open ||
tp->snd_cwnd < 11 ||
- tp->rx_opt.eff_sacks)
+ tp->rx_opt.num_sacks || tp->rx_opt.dsack)
return -1;
/* Very simple search strategy: just double the MSS. */
- mss_now = tcp_current_mss(sk, 0);
+ mss_now = tcp_current_mss(sk);
probe_size = 2 * tp->mss_cache;
size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) {
@@ -1754,11 +1749,9 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
int skb_size, next_skb_size;
- u16 flags;
skb_size = skb->len;
next_skb_size = next_skb->len;
- flags = TCP_SKB_CB(skb)->flags;
BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
@@ -1778,30 +1771,21 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
/* Update sequence range on original skb. */
TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
- /* Merge over control information. */
- flags |= TCP_SKB_CB(next_skb)->flags; /* This moves PSH/FIN etc. over */
- TCP_SKB_CB(skb)->flags = flags;
+ /* Merge over control information. This moves PSH/FIN etc. over */
+ TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(next_skb)->flags;
/* All done, get rid of second SKB and account for it so
* packet counting does not break.
*/
TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS;
- if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_RETRANS)
- tp->retrans_out -= tcp_skb_pcount(next_skb);
- if (TCP_SKB_CB(next_skb)->sacked & TCPCB_LOST)
- tp->lost_out -= tcp_skb_pcount(next_skb);
- /* Reno case is special. Sigh... */
- if (tcp_is_reno(tp) && tp->sacked_out)
- tcp_dec_pcount_approx(&tp->sacked_out, next_skb);
-
- tcp_adjust_fackets_out(sk, next_skb, tcp_skb_pcount(next_skb));
- tp->packets_out -= tcp_skb_pcount(next_skb);
/* changed transmit queue under us so clear hints */
tcp_clear_retrans_hints_partial(tp);
if (next_skb == tp->retransmit_skb_hint)
tp->retransmit_skb_hint = skb;
+ tcp_adjust_pcount(sk, next_skb, tcp_skb_pcount(next_skb));
+
sk_wmem_free_skb(sk, next_skb);
}
@@ -1894,7 +1878,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
return -EHOSTUNREACH; /* Routing failure or similar. */
- cur_mss = tcp_current_mss(sk, 0);
+ cur_mss = tcp_current_mss(sk);
/* If receiver has shrunk his window, and skb is out of
* new window, do not retransmit it. The exception is the
@@ -1908,6 +1892,13 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
if (skb->len > cur_mss) {
if (tcp_fragment(sk, skb, cur_mss, cur_mss))
return -ENOMEM; /* We'll try again later. */
+ } else {
+ int oldpcount = tcp_skb_pcount(skb);
+
+ if (unlikely(oldpcount > 1)) {
+ tcp_init_tso_segs(sk, skb, cur_mss);
+ tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb));
+ }
}
tcp_retrans_try_collapse(sk, skb, cur_mss);
@@ -2061,7 +2052,7 @@ begin_fwd:
goto begin_fwd;
} else if (!(sacked & TCPCB_LOST)) {
- if (hole == NULL && !(sacked & TCPCB_SACKED_RETRANS))
+ if (hole == NULL && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED)))
hole = skb;
continue;
@@ -2100,7 +2091,7 @@ void tcp_send_fin(struct sock *sk)
* unsent frames. But be careful about outgoing SACKS
* and IP options.
*/
- mss_now = tcp_current_mss(sk, 1);
+ mss_now = tcp_current_mss(sk);
if (tcp_send_head(sk) != NULL) {
TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN;
@@ -2325,7 +2316,7 @@ static void tcp_connect_init(struct sock *sk)
sk->sk_err = 0;
sock_reset_flag(sk, SOCK_DONE);
tp->snd_wnd = 0;
- tcp_init_wl(tp, tp->write_seq, 0);
+ tcp_init_wl(tp, 0);
tp->snd_una = tp->write_seq;
tp->snd_sml = tp->write_seq;
tp->snd_up = tp->write_seq;
@@ -2512,7 +2503,7 @@ int tcp_write_wakeup(struct sock *sk)
if ((skb = tcp_send_head(sk)) != NULL &&
before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
int err;
- unsigned int mss = tcp_current_mss(sk, 0);
+ unsigned int mss = tcp_current_mss(sk);
unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))