aboutsummaryrefslogtreecommitdiff
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c66
1 files changed, 37 insertions, 29 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 3d61c52bdf7..179b51e6bda 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1074,7 +1074,7 @@ static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int de
* Remember, these are still headerless SKBs at this point.
*/
int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
- unsigned int mss_now)
+ unsigned int mss_now, gfp_t gfp)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *buff;
@@ -1089,11 +1089,11 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
if (nsize < 0)
nsize = 0;
- if (skb_unclone(skb, GFP_ATOMIC))
+ if (skb_unclone(skb, gfp))
return -ENOMEM;
/* Get a new skb... force flag on. */
- buff = sk_stream_alloc_skb(sk, nsize, GFP_ATOMIC);
+ buff = sk_stream_alloc_skb(sk, nsize, gfp);
if (buff == NULL)
return -ENOMEM; /* We'll just try again later. */
@@ -1402,11 +1402,19 @@ static void tcp_cwnd_application_limited(struct sock *sk)
tp->snd_cwnd_stamp = tcp_time_stamp;
}
-static void tcp_cwnd_validate(struct sock *sk, u32 unsent_segs)
+static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
{
struct tcp_sock *tp = tcp_sk(sk);
- tp->lsnd_pending = tp->packets_out + unsent_segs;
+ /* Track the maximum number of outstanding packets in each
+ * window, and remember whether we were cwnd-limited then.
+ */
+ if (!before(tp->snd_una, tp->max_packets_seq) ||
+ tp->packets_out > tp->max_packets_out) {
+ tp->max_packets_out = tp->packets_out;
+ tp->max_packets_seq = tp->snd_nxt;
+ tp->is_cwnd_limited = is_cwnd_limited;
+ }
if (tcp_is_cwnd_limited(sk)) {
/* Network is feed fully. */
@@ -1617,7 +1625,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
/* All of a TSO frame must be composed of paged data. */
if (skb->len != skb->data_len)
- return tcp_fragment(sk, skb, len, mss_now);
+ return tcp_fragment(sk, skb, len, mss_now, gfp);
buff = sk_stream_alloc_skb(sk, 0, gfp);
if (unlikely(buff == NULL))
@@ -1660,7 +1668,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
*
* This algorithm is from John Heffner.
*/
-static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
+static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
+ bool *is_cwnd_limited)
{
struct tcp_sock *tp = tcp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -1724,6 +1733,9 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
if (!tp->tso_deferred)
tp->tso_deferred = 1 | (jiffies << 1);
+ if (cong_win < send_win && cong_win < skb->len)
+ *is_cwnd_limited = true;
+
return true;
send_now:
@@ -1881,9 +1893,10 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
- unsigned int tso_segs, sent_pkts, unsent_segs = 0;
+ unsigned int tso_segs, sent_pkts;
int cwnd_quota;
int result;
+ bool is_cwnd_limited = false;
sent_pkts = 0;
@@ -1908,6 +1921,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
cwnd_quota = tcp_cwnd_test(tp, skb);
if (!cwnd_quota) {
+ is_cwnd_limited = true;
if (push_one == 2)
/* Force out a loss probe pkt. */
cwnd_quota = 1;
@@ -1924,8 +1938,9 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
nonagle : TCP_NAGLE_PUSH))))
break;
} else {
- if (!push_one && tcp_tso_should_defer(sk, skb))
- goto compute_unsent_segs;
+ if (!push_one &&
+ tcp_tso_should_defer(sk, skb, &is_cwnd_limited))
+ break;
}
/* TCP Small Queues :
@@ -1946,18 +1961,10 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
/* It is possible TX completion already happened
* before we set TSQ_THROTTLED, so we must
* test again the condition.
- * We abuse smp_mb__after_clear_bit() because
- * there is no smp_mb__after_set_bit() yet
*/
- smp_mb__after_clear_bit();
- if (atomic_read(&sk->sk_wmem_alloc) > limit) {
- u32 unsent_bytes;
-
-compute_unsent_segs:
- unsent_bytes = tp->write_seq - tp->snd_nxt;
- unsent_segs = DIV_ROUND_UP(unsent_bytes, mss_now);
+ smp_mb__after_atomic();
+ if (atomic_read(&sk->sk_wmem_alloc) > limit)
break;
- }
}
limit = mss_now;
@@ -1997,7 +2004,7 @@ repair:
/* Send one loss probe per tail loss episode. */
if (push_one != 2)
tcp_schedule_loss_probe(sk);
- tcp_cwnd_validate(sk, unsent_segs);
+ tcp_cwnd_validate(sk, is_cwnd_limited);
return false;
}
return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk));
@@ -2113,7 +2120,8 @@ void tcp_send_loss_probe(struct sock *sk)
goto rearm_timer;
if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) {
- if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss)))
+ if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss,
+ GFP_ATOMIC)))
goto rearm_timer;
skb = tcp_write_queue_tail(sk);
}
@@ -2121,9 +2129,7 @@ void tcp_send_loss_probe(struct sock *sk)
if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
goto rearm_timer;
- /* Probe with zero data doesn't trigger fast recovery. */
- if (skb->len > 0)
- err = __tcp_retransmit_skb(sk, skb);
+ err = __tcp_retransmit_skb(sk, skb);
/* Record snd_nxt for loss detection. */
if (likely(!err))
@@ -2454,7 +2460,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
return -EAGAIN;
if (skb->len > cur_mss) {
- if (tcp_fragment(sk, skb, cur_mss, cur_mss))
+ if (tcp_fragment(sk, skb, cur_mss, cur_mss, GFP_ATOMIC))
return -ENOMEM; /* We'll try again later. */
} else {
int oldpcount = tcp_skb_pcount(skb);
@@ -2519,8 +2525,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
if (!tp->retrans_stamp)
tp->retrans_stamp = TCP_SKB_CB(skb)->when;
- tp->undo_retrans += tcp_skb_pcount(skb);
-
/* snd_nxt is stored to detect loss of retransmitted segment,
* see tcp_input.c tcp_sacktag_write_queue().
*/
@@ -2528,6 +2532,10 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
} else if (err != -EBUSY) {
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
}
+
+ if (tp->undo_retrans < 0)
+ tp->undo_retrans = 0;
+ tp->undo_retrans += tcp_skb_pcount(skb);
return err;
}
@@ -3235,7 +3243,7 @@ int tcp_write_wakeup(struct sock *sk)
skb->len > mss) {
seg_size = min(seg_size, mss);
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
- if (tcp_fragment(sk, skb, seg_size, mss))
+ if (tcp_fragment(sk, skb, seg_size, mss, GFP_ATOMIC))
return -1;
} else if (!tcp_skb_pcount(skb))
tcp_set_skb_tso_segs(sk, skb, mss);