From c54b4b7655447c1f24f6d50779c22eba9ee0fd24 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 14 Mar 2011 07:52:13 +0000 Subject: tcp_cubic: fix comparison of jiffies Jiffies wraps around therefore the correct way to compare is to use cast to signed value. Note: cubic is not using full jiffies value on 64 bit arch because using full unsigned long makes struct bictcp grow too large for the available ca_priv area. Includes correction from Sangtae Ha to improve ack train detection. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/tcp_cubic.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/ipv4/tcp_cubic.c') diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 71d5f2f29fa..43bb34c9b8b 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -342,9 +342,11 @@ static void hystart_update(struct sock *sk, u32 delay) u32 curr_jiffies = jiffies; /* first detection parameter - ack-train detection */ - if (curr_jiffies - ca->last_jiffies <= msecs_to_jiffies(2)) { + if ((s32)(curr_jiffies - ca->last_jiffies) <= + msecs_to_jiffies(2)) { ca->last_jiffies = curr_jiffies; - if (curr_jiffies - ca->round_start >= ca->delay_min>>4) + if ((s32) (curr_jiffies - ca->round_start) > + ca->delay_min >> 4) ca->found |= HYSTART_ACK_TRAIN; } -- cgit v1.2.3-18-g5258 From aac46324e12a2bf2e9e0855ad6a287945e34ad39 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 14 Mar 2011 07:52:14 +0000 Subject: tcp_cubic: make ack train delta value a parameter Make the spacing between ACK's that indicates a train a tuneable value like other hystart values. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/tcp_cubic.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net/ipv4/tcp_cubic.c') diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 43bb34c9b8b..66d3b00233e 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -52,6 +52,7 @@ static int tcp_friendliness __read_mostly = 1; static int hystart __read_mostly = 1; static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY; static int hystart_low_window __read_mostly = 16; +static int hystart_ack_delta __read_mostly = 2; static u32 cube_rtt_scale __read_mostly; static u32 beta_scale __read_mostly; @@ -75,6 +76,8 @@ MODULE_PARM_DESC(hystart_detect, "hyrbrid slow start detection mechanisms" " 1: packet-train 2: delay 3: both packet-train and delay"); module_param(hystart_low_window, int, 0644); MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start"); +module_param(hystart_ack_delta, int, 0644); +MODULE_PARM_DESC(hystart_ack_delta, "spacing between ack's indicating train (msecs)"); /* BIC TCP Parameters */ struct bictcp { @@ -343,7 +346,7 @@ static void hystart_update(struct sock *sk, u32 delay) /* first detection parameter - ack-train detection */ if ((s32)(curr_jiffies - ca->last_jiffies) <= - msecs_to_jiffies(2)) { + msecs_to_jiffies(hystart_ack_delta)) { ca->last_jiffies = curr_jiffies; if ((s32) (curr_jiffies - ca->round_start) > ca->delay_min >> 4) -- cgit v1.2.3-18-g5258 From 17a6e9f1aa9ba07ca13a1eaf1e631e743af50cca Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 14 Mar 2011 07:52:15 +0000 Subject: tcp_cubic: fix clock dependency The hystart code was written with assumption that HZ=1000. Replace the use of jiffies with bictcp_clock as a millisecond real time clock. Signed-off-by: Stephen Hemminger Reported-by: Lucas Nussbaum Signed-off-by: David S. Miller --- net/ipv4/tcp_cubic.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) (limited to 'net/ipv4/tcp_cubic.c') diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 66d3b00233e..f4fb2d4b90a 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -88,7 +88,7 @@ struct bictcp { u32 last_time; /* time when updated last_cwnd */ u32 bic_origin_point;/* origin point of bic function */ u32 bic_K; /* time to origin point from the beginning of the current epoch */ - u32 delay_min; /* min delay */ + u32 delay_min; /* min delay (msec << 3) */ u32 epoch_start; /* beginning of an epoch */ u32 ack_cnt; /* number of acks */ u32 tcp_cwnd; /* estimated tcp cwnd */ @@ -98,7 +98,7 @@ struct bictcp { u8 found; /* the exit point is found? */ u32 round_start; /* beginning of each round */ u32 end_seq; /* end_seq of the round */ - u32 last_jiffies; /* last time when the ACK spacing is close */ + u32 last_ack; /* last time when the ACK spacing is close */ u32 curr_rtt; /* the minimum rtt of current round */ }; @@ -119,12 +119,21 @@ static inline void bictcp_reset(struct bictcp *ca) ca->found = 0; } +static inline u32 bictcp_clock(void) +{ +#if HZ < 1000 + return ktime_to_ms(ktime_get_real()); +#else + return jiffies_to_msecs(jiffies); +#endif +} + static inline void bictcp_hystart_reset(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); - ca->round_start = ca->last_jiffies = jiffies; + ca->round_start = ca->last_ack = bictcp_clock(); ca->end_seq = tp->snd_nxt; ca->curr_rtt = 0; ca->sample_cnt = 0; @@ -239,8 +248,8 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) */ /* change the unit from HZ to bictcp_HZ */ - t = ((tcp_time_stamp + (ca->delay_min>>3) - ca->epoch_start) - << BICTCP_HZ) / HZ; + t = ((tcp_time_stamp + msecs_to_jiffies(ca->delay_min>>3) + - ca->epoch_start) << BICTCP_HZ) / HZ; if (t < ca->bic_K) /* t - K */ offs = ca->bic_K - t; @@ -342,14 +351,12 @@ static void hystart_update(struct sock *sk, u32 delay) struct bictcp *ca = inet_csk_ca(sk); if (!(ca->found & hystart_detect)) { - u32 curr_jiffies = jiffies; + u32 now = bictcp_clock(); /* first detection parameter - ack-train detection */ - if ((s32)(curr_jiffies - ca->last_jiffies) <= - msecs_to_jiffies(hystart_ack_delta)) { - ca->last_jiffies = curr_jiffies; - if ((s32) (curr_jiffies - ca->round_start) > - ca->delay_min >> 4) + if ((s32)(now - ca->last_ack) <= hystart_ack_delta) { + ca->last_ack = now; + if ((s32)(now - ca->round_start) > ca->delay_min >> 4) ca->found |= HYSTART_ACK_TRAIN; } @@ -396,7 +403,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) if ((s32)(tcp_time_stamp - ca->epoch_start) < HZ) return; - delay = usecs_to_jiffies(rtt_us) << 3; + delay = (rtt_us << 3) / USEC_PER_MSEC; if (delay == 0) delay = 1; -- cgit v1.2.3-18-g5258 From 3b585b34493ec9db382d6c325d4ed77b9eb2d2a5 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 14 Mar 2011 07:52:16 +0000 Subject: tcp_cubic: enable high resolution ack time if needed This is a refined version of an earlier patch by Lucas Nussbaum. Cubic needs RTT values in milliseconds. If HZ < 1000 then the values will be too coarse. Signed-off-by: Stephen Hemminger Reported-by: Lucas Nussbaum Signed-off-by: David S. Miller --- net/ipv4/tcp_cubic.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net/ipv4/tcp_cubic.c') diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index f4fb2d4b90a..5e0491d742c 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -459,6 +459,10 @@ static int __init cubictcp_register(void) /* divide by bic_scale and by constant Srtt (100ms) */ do_div(cube_factor, bic_scale * 10); + /* hystart needs ms clock resolution */ + if (hystart && HZ < 1000) + cubictcp.flags |= TCP_CONG_RTT_STAMP; + return tcp_register_congestion_control(&cubictcp); } -- cgit v1.2.3-18-g5258 From 2b4636a5f8ca547000f6aba24ec1c58f31f4a91d Mon Sep 17 00:00:00 2001 From: Sangtae Ha Date: Mon, 14 Mar 2011 07:52:17 +0000 Subject: tcp_cubic: make the delay threshold of HyStart less sensitive Make HyStart less sensitive to abrupt delay variations due to buffer bloat. Signed-off-by: Sangtae Ha Acked-by: Stephen Hemminger Reported-by: Lucas Nussbaum Signed-off-by: David S. Miller --- net/ipv4/tcp_cubic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/tcp_cubic.c') diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 5e0491d742c..7172c129ff1 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -39,7 +39,7 @@ /* Number of delay samples for detecting the increase of delay */ #define HYSTART_MIN_SAMPLES 8 -#define HYSTART_DELAY_MIN (2U<<3) +#define HYSTART_DELAY_MIN (4U<<3) #define HYSTART_DELAY_MAX (16U<<3) #define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX) -- cgit v1.2.3-18-g5258 From b5ccd07337489fa9c9d32e0b628a2168b7953adf Mon Sep 17 00:00:00 2001 From: Sangtae Ha Date: Mon, 14 Mar 2011 07:52:18 +0000 Subject: tcp_cubic: fix low utilization of CUBIC with HyStart HyStart sets the initial exit point of slow start. Suppose that HyStart exits at 0.5BDP in a BDP network and no history exists. If the BDP of a network is large, CUBIC's initial cwnd growth may be too conservative to utilize the link. CUBIC increases the cwnd 20% per RTT in this case. Signed-off-by: Sangtae Ha Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/tcp_cubic.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net/ipv4/tcp_cubic.c') diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 7172c129ff1..90d92dd4cf1 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -270,6 +270,13 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) ca->cnt = 100 * cwnd; /* very small increment*/ } + /* + * The initial growth of cubic function may be too conservative + * when the available bandwidth is still unknown. + */ + if (ca->loss_cwnd == 0 && ca->cnt > 20) + ca->cnt = 20; /* increase cwnd 5% per RTT */ + /* TCP Friendly */ if (tcp_friendliness) { u32 scale = beta_scale; -- cgit v1.2.3-18-g5258