diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 3 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 12 | ||||
-rw-r--r-- | net/ipv4/tcp_bic.c | 46 | ||||
-rw-r--r-- | net/ipv4/tcp_cong.c | 44 | ||||
-rw-r--r-- | net/ipv4/tcp_diag.c | 16 | ||||
-rw-r--r-- | net/ipv4/tcp_highspeed.c | 17 | ||||
-rw-r--r-- | net/ipv4/tcp_htcp.c | 53 | ||||
-rw-r--r-- | net/ipv4/tcp_hybla.c | 31 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 223 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 9 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 5 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 36 | ||||
-rw-r--r-- | net/ipv4/tcp_scalable.c | 6 | ||||
-rw-r--r-- | net/ipv4/tcp_timer.c | 26 | ||||
-rw-r--r-- | net/ipv4/tcp_vegas.c | 44 | ||||
-rw-r--r-- | net/ipv4/tcp_westwood.c | 58 |
16 files changed, 348 insertions, 281 deletions
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 026630a15ea..fe3c6d3d0c9 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -508,7 +508,8 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, newsk->sk_write_space = sk_stream_write_space; newicsk->icsk_retransmits = 0; - newicsk->icsk_backoff = 0; + newicsk->icsk_backoff = 0; + newicsk->icsk_probes_out = 0; /* Deinitialize accept_queue to trap illegal accesses. */ memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0eed64a1991..02848e72e9c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1671,11 +1671,11 @@ int tcp_disconnect(struct sock *sk, int flags) tp->write_seq = 1; icsk->icsk_backoff = 0; tp->snd_cwnd = 2; - tp->probes_out = 0; + icsk->icsk_probes_out = 0; tp->packets_out = 0; tp->snd_ssthresh = 0x7fffffff; tp->snd_cwnd_cnt = 0; - tcp_set_ca_state(tp, TCP_CA_Open); + tcp_set_ca_state(sk, TCP_CA_Open); tcp_clear_retrans(tp); inet_csk_delack_init(sk); sk->sk_send_head = NULL; @@ -1718,7 +1718,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, name[val] = 0; lock_sock(sk); - err = tcp_set_congestion_control(tp, name); + err = tcp_set_congestion_control(sk, name); release_sock(sk); return err; } @@ -1886,9 +1886,9 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) memset(info, 0, sizeof(*info)); info->tcpi_state = sk->sk_state; - info->tcpi_ca_state = tp->ca_state; + info->tcpi_ca_state = icsk->icsk_ca_state; info->tcpi_retransmits = icsk->icsk_retransmits; - info->tcpi_probes = tp->probes_out; + info->tcpi_probes = icsk->icsk_probes_out; info->tcpi_backoff = icsk->icsk_backoff; if (tp->rx_opt.tstamp_ok) @@ -2016,7 +2016,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, len = min_t(unsigned int, len, TCP_CA_NAME_MAX); if (put_user(len, optlen)) return -EFAULT; - if (copy_to_user(optval, tp->ca_ops->name, len)) + if (copy_to_user(optval, icsk->icsk_ca_ops->name, len)) return -EFAULT; return 0; default: diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index ec38d45d664..b940346de4e 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -86,11 +86,11 @@ static inline void bictcp_reset(struct bictcp *ca) ca->delayed_ack = 2 << ACK_RATIO_SHIFT; } -static void bictcp_init(struct tcp_sock *tp) +static void bictcp_init(struct sock *sk) { - bictcp_reset(tcp_ca(tp)); + bictcp_reset(inet_csk_ca(sk)); if (initial_ssthresh) - tp->snd_ssthresh = initial_ssthresh; + tcp_sk(sk)->snd_ssthresh = initial_ssthresh; } /* @@ -156,9 +156,10 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) /* Detect low utilization in congestion avoidance */ -static inline void bictcp_low_utilization(struct tcp_sock *tp, int flag) +static inline void bictcp_low_utilization(struct sock *sk, int flag) { - struct bictcp *ca = tcp_ca(tp); + const struct tcp_sock *tp = tcp_sk(sk); + struct bictcp *ca = inet_csk_ca(sk); u32 dist, delay; /* No time stamp */ @@ -208,12 +209,13 @@ static inline void bictcp_low_utilization(struct tcp_sock *tp, int flag) } -static void bictcp_cong_avoid(struct tcp_sock *tp, u32 ack, +static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 seq_rtt, u32 in_flight, int data_acked) { - struct bictcp *ca = tcp_ca(tp); + struct tcp_sock *tp = tcp_sk(sk); + struct bictcp *ca = inet_csk_ca(sk); - bictcp_low_utilization(tp, data_acked); + bictcp_low_utilization(sk, data_acked); if (in_flight < tp->snd_cwnd) return; @@ -242,9 +244,10 @@ static void bictcp_cong_avoid(struct tcp_sock *tp, u32 ack, * behave like Reno until low_window is reached, * then increase congestion window slowly */ -static u32 bictcp_recalc_ssthresh(struct tcp_sock *tp) +static u32 bictcp_recalc_ssthresh(struct sock *sk) { - struct bictcp *ca = tcp_ca(tp); + const struct tcp_sock *tp = tcp_sk(sk); + struct bictcp *ca = inet_csk_ca(sk); ca->epoch_start = 0; /* end of epoch */ @@ -269,31 +272,34 @@ static u32 bictcp_recalc_ssthresh(struct tcp_sock *tp) return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U); } -static u32 bictcp_undo_cwnd(struct tcp_sock *tp) +static u32 bictcp_undo_cwnd(struct sock *sk) { - struct bictcp *ca = tcp_ca(tp); - + const struct tcp_sock *tp = tcp_sk(sk); + const struct bictcp *ca = inet_csk_ca(sk); return max(tp->snd_cwnd, ca->last_max_cwnd); } -static u32 bictcp_min_cwnd(struct tcp_sock *tp) +static u32 bictcp_min_cwnd(struct sock *sk) { + const struct tcp_sock *tp = tcp_sk(sk); return tp->snd_ssthresh; } -static void bictcp_state(struct tcp_sock *tp, u8 new_state) +static void bictcp_state(struct sock *sk, u8 new_state) { if (new_state == TCP_CA_Loss) - bictcp_reset(tcp_ca(tp)); + bictcp_reset(inet_csk_ca(sk)); } /* Track delayed acknowledgement ratio using sliding window * ratio = (15*ratio + sample) / 16 */ -static void bictcp_acked(struct tcp_sock *tp, u32 cnt) +static void bictcp_acked(struct sock *sk, u32 cnt) { - if (cnt > 0 && tp->ca_state == TCP_CA_Open) { - struct bictcp *ca = tcp_ca(tp); + const struct inet_connection_sock *icsk = inet_csk(sk); + + if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) { + struct bictcp *ca = inet_csk_ca(sk); cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT; ca->delayed_ack += cnt; } @@ -314,7 +320,7 @@ static struct tcp_congestion_ops bictcp = { static int __init bictcp_register(void) { - BUG_ON(sizeof(struct bictcp) > TCP_CA_PRIV_SIZE); + BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&bictcp); } diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 4970d10a778..bbf2d6624e8 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -73,33 +73,36 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca) EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); /* Assign choice of congestion control. */ -void tcp_init_congestion_control(struct tcp_sock *tp) +void tcp_init_congestion_control(struct sock *sk) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_congestion_ops *ca; - if (tp->ca_ops != &tcp_init_congestion_ops) + if (icsk->icsk_ca_ops != &tcp_init_congestion_ops) return; rcu_read_lock(); list_for_each_entry_rcu(ca, &tcp_cong_list, list) { if (try_module_get(ca->owner)) { - tp->ca_ops = ca; + icsk->icsk_ca_ops = ca; break; } } rcu_read_unlock(); - if (tp->ca_ops->init) - tp->ca_ops->init(tp); + if (icsk->icsk_ca_ops->init) + icsk->icsk_ca_ops->init(sk); } /* Manage refcounts on socket close. */ -void tcp_cleanup_congestion_control(struct tcp_sock *tp) +void tcp_cleanup_congestion_control(struct sock *sk) { - if (tp->ca_ops->release) - tp->ca_ops->release(tp); - module_put(tp->ca_ops->owner); + struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ca_ops->release) + icsk->icsk_ca_ops->release(sk); + module_put(icsk->icsk_ca_ops->owner); } /* Used by sysctl to change default congestion control */ @@ -143,14 +146,15 @@ void tcp_get_default_congestion_control(char *name) } /* Change congestion control for socket */ -int tcp_set_congestion_control(struct tcp_sock *tp, const char *name) +int tcp_set_congestion_control(struct sock *sk, const char *name) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_congestion_ops *ca; int err = 0; rcu_read_lock(); ca = tcp_ca_find(name); - if (ca == tp->ca_ops) + if (ca == icsk->icsk_ca_ops) goto out; if (!ca) @@ -160,10 +164,10 @@ int tcp_set_congestion_control(struct tcp_sock *tp, const char *name) err = -EBUSY; else { - tcp_cleanup_congestion_control(tp); - tp->ca_ops = ca; - if (tp->ca_ops->init) - tp->ca_ops->init(tp); + tcp_cleanup_congestion_control(sk); + icsk->icsk_ca_ops = ca; + if (icsk->icsk_ca_ops->init) + icsk->icsk_ca_ops->init(sk); } out: rcu_read_unlock(); @@ -177,9 +181,11 @@ int tcp_set_congestion_control(struct tcp_sock *tp, const char *name) /* This is Jacobson's slow start and congestion avoidance. * SIGCOMM '88, p. 328. */ -void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, u32 in_flight, +void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, int flag) { + struct tcp_sock *tp = tcp_sk(sk); + if (in_flight < tp->snd_cwnd) return; @@ -202,15 +208,17 @@ void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, u32 in_flight, EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); /* Slow start threshold is half the congestion window (min 2) */ -u32 tcp_reno_ssthresh(struct tcp_sock *tp) +u32 tcp_reno_ssthresh(struct sock *sk) { + const struct tcp_sock *tp = tcp_sk(sk); return max(tp->snd_cwnd >> 1U, 2U); } EXPORT_SYMBOL_GPL(tcp_reno_ssthresh); /* Lower bound on congestion window. */ -u32 tcp_reno_min_cwnd(struct tcp_sock *tp) +u32 tcp_reno_min_cwnd(struct sock *sk) { + const struct tcp_sock *tp = tcp_sk(sk); return tp->snd_ssthresh/2; } EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 5f4c74f45e8..4288ecfec9a 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -66,10 +66,10 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, if (ext & (1<<(TCPDIAG_INFO-1))) info = TCPDIAG_PUT(skb, TCPDIAG_INFO, sizeof(*info)); - if (ext & (1<<(TCPDIAG_CONG-1))) { - size_t len = strlen(tp->ca_ops->name); + if ((ext & (1 << (TCPDIAG_CONG - 1))) && icsk->icsk_ca_ops) { + size_t len = strlen(icsk->icsk_ca_ops->name); strcpy(TCPDIAG_PUT(skb, TCPDIAG_CONG, len+1), - tp->ca_ops->name); + icsk->icsk_ca_ops->name); } } r->tcpdiag_family = sk->sk_family; @@ -136,18 +136,17 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, r->tcpdiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { r->tcpdiag_timer = 4; - r->tcpdiag_retrans = tp->probes_out; + r->tcpdiag_retrans = icsk->icsk_probes_out; r->tcpdiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); } else if (timer_pending(&sk->sk_timer)) { r->tcpdiag_timer = 2; - r->tcpdiag_retrans = tp->probes_out; + r->tcpdiag_retrans = icsk->icsk_probes_out; r->tcpdiag_expires = EXPIRES_IN_MS(sk->sk_timer.expires); } else { r->tcpdiag_timer = 0; r->tcpdiag_expires = 0; } #undef EXPIRES_IN_MS - r->tcpdiag_rqueue = tp->rcv_nxt - tp->copied_seq; r->tcpdiag_wqueue = tp->write_seq - tp->snd_una; r->tcpdiag_uid = sock_i_uid(sk); @@ -163,8 +162,9 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, if (info) tcp_get_info(sk, info); - if (sk->sk_state < TCP_TIME_WAIT && tp->ca_ops->get_info) - tp->ca_ops->get_info(tp, ext, skb); + if (sk->sk_state < TCP_TIME_WAIT && + icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info) + icsk->icsk_ca_ops->get_info(sk, ext, skb); nlh->nlmsg_len = skb->tail - b; return skb->len; diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index 36c51f8136b..6acc04bde08 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c @@ -98,9 +98,10 @@ struct hstcp { u32 ai; }; -static void hstcp_init(struct tcp_sock *tp) +static void hstcp_init(struct sock *sk) { - struct hstcp *ca = tcp_ca(tp); + struct tcp_sock *tp = tcp_sk(sk); + struct hstcp *ca = inet_csk_ca(sk); ca->ai = 0; @@ -109,10 +110,11 @@ static void hstcp_init(struct tcp_sock *tp) tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128); } -static void hstcp_cong_avoid(struct tcp_sock *tp, u32 adk, u32 rtt, +static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 rtt, u32 in_flight, int good) { - struct hstcp *ca = tcp_ca(tp); + struct tcp_sock *tp = tcp_sk(sk); + struct hstcp *ca = inet_csk_ca(sk); if (in_flight < tp->snd_cwnd) return; @@ -143,9 +145,10 @@ static void hstcp_cong_avoid(struct tcp_sock *tp, u32 adk, u32 rtt, } } -static u32 hstcp_ssthresh(struct tcp_sock *tp) +static u32 hstcp_ssthresh(struct sock *sk) { - struct hstcp *ca = tcp_ca(tp); + const struct tcp_sock *tp = tcp_sk(sk); + const struct hstcp *ca = inet_csk_ca(sk); /* Do multiplicative decrease */ return max(tp->snd_cwnd - ((tp->snd_cwnd * hstcp_aimd_vals[ca->ai].md) >> 8), 2U); @@ -164,7 +167,7 @@ static struct tcp_congestion_ops tcp_highspeed = { static int __init hstcp_register(void) { - BUG_ON(sizeof(struct hstcp) > TCP_CA_PRIV_SIZE); + BUG_ON(sizeof(struct hstcp) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&tcp_highspeed); } diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 40168275acf..e47b37984e9 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -55,18 +55,21 @@ static inline void htcp_reset(struct htcp *ca) ca->snd_cwnd_cnt2 = 0; } -static u32 htcp_cwnd_undo(struct tcp_sock *tp) +static u32 htcp_cwnd_undo(struct sock *sk) { - struct htcp *ca = tcp_ca(tp); + const struct tcp_sock *tp = tcp_sk(sk); + struct htcp *ca = inet_csk_ca(sk); ca->ccount = ca->undo_ccount; ca->maxRTT = ca->undo_maxRTT; ca->old_maxB = ca->undo_old_maxB; return max(tp->snd_cwnd, (tp->snd_ssthresh<<7)/ca->beta); } -static inline void measure_rtt(struct tcp_sock *tp) +static inline void measure_rtt(struct sock *sk) { - struct htcp *ca = tcp_ca(tp); + const struct inet_connection_sock *icsk = inet_csk(sk); + const struct tcp_sock *tp = tcp_sk(sk); + struct htcp *ca = inet_csk_ca(sk); u32 srtt = tp->srtt>>3; /* keep track of minimum RTT seen so far, minRTT is zero at first */ @@ -74,7 +77,7 @@ static inline void measure_rtt(struct tcp_sock *tp) ca->minRTT = srtt; /* max RTT */ - if (tp->ca_state == TCP_CA_Open && tp->snd_ssthresh < 0xFFFF && ca->ccount > 3) { + if (icsk->icsk_ca_state == TCP_CA_Open && tp->snd_ssthresh < 0xFFFF && ca->ccount > 3) { if (ca->maxRTT < ca->minRTT) ca->maxRTT = ca->minRTT; if (ca->maxRTT < srtt && srtt <= ca->maxRTT+HZ/50) @@ -82,13 +85,16 @@ static inline void measure_rtt(struct tcp_sock *tp) } } -static void measure_achieved_throughput(struct tcp_sock *tp, u32 pkts_acked) +static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked) { - struct htcp *ca = tcp_ca(tp); + const struct inet_connection_sock *icsk = inet_csk(sk); + const struct tcp_sock *tp = tcp_sk(sk); + struct htcp *ca = inet_csk_ca(sk); u32 now = tcp_time_stamp; /* achieved throughput calculations */ - if (tp->ca_state != TCP_CA_Open && tp->ca_state != TCP_CA_Disorder) { + if (icsk->icsk_ca_state != TCP_CA_Open && + icsk->icsk_ca_state != TCP_CA_Disorder) { ca->packetcount = 0; ca->lasttime = now; return; @@ -173,9 +179,9 @@ static inline void htcp_alpha_update(struct htcp *ca) * that point do we really have a real sense of maxRTT (the queues en route * were getting just too full now). */ -static void htcp_param_update(struct tcp_sock *tp) +static void htcp_param_update(struct sock *sk) { - struct htcp *ca = tcp_ca(tp); + struct htcp *ca = inet_csk_ca(sk); u32 minRTT = ca->minRTT; u32 maxRTT = ca->maxRTT; @@ -187,17 +193,19 @@ static void htcp_param_update(struct tcp_sock *tp) ca->maxRTT = minRTT + ((maxRTT-minRTT)*95)/100; } -static u32 htcp_recalc_ssthresh(struct tcp_sock *tp) +static u32 htcp_recalc_ssthresh(struct sock *sk) { - struct htcp *ca = tcp_ca(tp); - htcp_param_update(tp); + const struct tcp_sock *tp = tcp_sk(sk); + const struct htcp *ca = inet_csk_ca(sk); + htcp_param_update(sk); return max((tp->snd_cwnd * ca->beta) >> 7, 2U); } -static void htcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, +static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, int data_acked) { - struct htcp *ca = tcp_ca(tp); + struct tcp_sock *tp = tcp_sk(sk); + struct htcp *ca = inet_csk_ca(sk); if (in_flight < tp->snd_cwnd) return; @@ -207,7 +215,7 @@ static void htcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, if (tp->snd_cwnd < tp->snd_cwnd_clamp) tp->snd_cwnd++; } else { - measure_rtt(tp); + measure_rtt(sk); /* keep track of number of round-trip times since last backoff event */ if (ca->snd_cwnd_cnt2++ > tp->snd_cwnd) { @@ -229,28 +237,29 @@ static void htcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, } /* Lower bound on congestion window. */ -static u32 htcp_min_cwnd(struct tcp_sock *tp) +static u32 htcp_min_cwnd(struct sock *sk) { + const struct tcp_sock *tp = tcp_sk(sk); return tp->snd_ssthresh; } -static void htcp_init(struct tcp_sock *tp) +static void htcp_init(struct sock *sk) { - struct htcp *ca = tcp_ca(tp); + struct htcp *ca = inet_csk_ca(sk); memset(ca, 0, sizeof(struct htcp)); ca->alpha = ALPHA_BASE; ca->beta = BETA_MIN; } -static void htcp_state(struct tcp_sock *tp, u8 new_state) +static void htcp_state(struct sock *sk, u8 new_state) { switch (new_state) { case TCP_CA_CWR: case TCP_CA_Recovery: case TCP_CA_Loss: - htcp_reset(tcp_ca(tp)); + htcp_reset(inet_csk_ca(sk)); break; } } @@ -269,7 +278,7 @@ static struct tcp_congestion_ops htcp = { static int __init htcp_register(void) { - BUG_ON(sizeof(struct htcp) > TCP_CA_PRIV_SIZE); + BUG_ON(sizeof(struct htcp) > ICSK_CA_PRIV_SIZE); BUILD_BUG_ON(BETA_MIN >= BETA_MAX); if (!use_bandwidth_switch) htcp.pkts_acked = NULL; diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index 13a66342c30..77add63623d 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -33,19 +33,20 @@ MODULE_PARM_DESC(rtt0, "reference rout trip time (ms)"); /* This is called to refresh values for hybla parameters */ -static inline void hybla_recalc_param (struct tcp_sock *tp) +static inline void hybla_recalc_param (struct sock *sk) { - struct hybla *ca = tcp_ca(tp); + struct hybla *ca = inet_csk_ca(sk); - ca->rho_3ls = max_t(u32, tp->srtt / msecs_to_jiffies(rtt0), 8); + ca->rho_3ls = max_t(u32, tcp_sk(sk)->srtt / msecs_to_jiffies(rtt0), 8); ca->rho = ca->rho_3ls >> 3; ca->rho2_7ls = (ca->rho_3ls * ca->rho_3ls) << 1; ca->rho2 = ca->rho2_7ls >>7; } -static void hybla_init(struct tcp_sock *tp) +static void hybla_init(struct sock *sk) { - struct hybla *ca = tcp_ca(tp); + struct tcp_sock *tp = tcp_sk(sk); + struct hybla *ca = inet_csk_ca(sk); ca->rho = 0; ca->rho2 = 0; @@ -57,17 +58,16 @@ static void hybla_init(struct tcp_sock *tp) tp->snd_cwnd_clamp = 65535; /* 1st Rho measurement based on initial srtt */ - hybla_recalc_param(tp); + hybla_recalc_param(sk); /* set minimum rtt as this is the 1st ever seen */ ca->minrtt = tp->srtt; tp->snd_cwnd = ca->rho; } -static void hybla_state(struct tcp_sock *tp, u8 ca_state) +static void hybla_state(struct sock *sk, u8 ca_state) { - struct hybla *ca = tcp_ca(tp); - + struct hybla *ca = inet_csk_ca(sk); ca->hybla_en = (ca_state == TCP_CA_Open); } @@ -86,27 +86,28 @@ static inline u32 hybla_fraction(u32 odds) * o Give cwnd a new value based on the model proposed * o remember increments <1 */ -static void hybla_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, +static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, int flag) { - struct hybla *ca = tcp_ca(tp); + struct tcp_sock *tp = tcp_sk(sk); + struct hybla *ca = inet_csk_ca(sk); u32 increment, odd, rho_fractions; int is_slowstart = 0; /* Recalculate rho only if this srtt is the lowest */ if (tp->srtt < ca->minrtt){ - hybla_recalc_param(tp); + hybla_recalc_param(sk); ca->minrtt = tp->srtt; } if (!ca->hybla_en) - return tcp_reno_cong_avoid(tp, ack, rtt, in_flight, flag); + return tcp_reno_cong_avoid(sk, ack, rtt, in_flight, flag); if (in_flight < tp->snd_cwnd) return; if (ca->rho == 0) - hybla_recalc_param(tp); + hybla_recalc_param(sk); rho_fractions = ca->rho_3ls - (ca->rho << 3); @@ -170,7 +171,7 @@ static struct tcp_congestion_ops tcp_hybla = { static int __init hybla_register(void) { - BUG_ON(sizeof(struct hybla) > TCP_CA_PRIV_SIZE); + BUG_ON(sizeof(struct hybla) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&tcp_hybla); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 71d456148de..fdd9547fb78 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -325,11 +325,12 @@ static void tcp_init_buffer_space(struct sock *sk) /* 5. Recalculate window clamp after socket hit its memory bounds. */ static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp) { + struct inet_connection_sock *icsk = inet_csk(sk); struct sk_buff *skb; unsigned int app_win = tp->rcv_nxt - tp->copied_seq; int ofo_win = 0; - inet_csk(sk)->icsk_ack.quick = 0; + icsk->icsk_ack.quick = 0; skb_queue_walk(&tp->out_of_order_queue, skb) { ofo_win += skb->len; @@ -350,8 +351,8 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp) app_win += ofo_win; if (atomic_read(&sk->sk_rmem_alloc) >= 2 * sk->sk_rcvbuf) app_win >>= 1; - if (app_win > inet_csk(sk)->icsk_ack.rcv_mss) - app_win -= inet_csk(sk)->icsk_ack.rcv_mss; + if (app_win > icsk->icsk_ack.rcv_mss) + app_win -= icsk->icsk_ack.rcv_mss; app_win = max(app_win, 2U*tp->advmss); if (!ofo_win) @@ -549,8 +550,10 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_ * To save cycles in the RFC 1323 implementation it was better to break * it up into three procedures. -- erics */ -static void tcp_rtt_estimator(struct tcp_sock *tp, __u32 mrtt, u32 *usrtt) +static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt) { + struct tcp_sock *tp = tcp_sk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); long m = mrtt; /* RTT */ /* The following amusing code comes from Jacobson's @@ -610,8 +613,8 @@ static void tcp_rtt_estimator(struct tcp_sock *tp, __u32 mrtt, u32 *usrtt) tp->rtt_seq = tp->snd_nxt; } - if (tp->ca_ops->rtt_sample) - tp->ca_ops->rtt_sample(tp, *usrtt); + if (icsk->icsk_ca_ops->rtt_sample) + icsk->icsk_ca_ops->rtt_sample(sk, *usrtt); } /* Calculate rto without backoff. This is the second half of Van Jacobson's @@ -663,9 +666,10 @@ void tcp_update_metrics(struct sock *sk) dst_confirm(dst); if (dst && (dst->flags&DST_HOST)) { + const struct inet_connection_sock *icsk = inet_csk(sk); int m; - if (inet_csk(sk)->icsk_backoff || !tp->srtt) { + if (icsk->icsk_backoff || !tp->srtt) { /* This session failed to estimate rtt. Why? * Probably, no packets returned in time. * Reset our results. @@ -714,7 +718,7 @@ void tcp_update_metrics(struct sock *sk) tp->snd_cwnd > dst_metric(dst, RTAX_CWND)) dst->metrics[RTAX_CWND-1] = tp->snd_cwnd; } else if (tp->snd_cwnd > tp->snd_ssthresh && - tp->ca_state == TCP_CA_Open) { + icsk->icsk_ca_state == TCP_CA_Open) { /* Cong. avoidance phase, cwnd is reliable. */ if (!dst_metric_locked(dst, RTAX_SSTHRESH)) dst->metrics[RTAX_SSTHRESH-1] = @@ -828,8 +832,10 @@ reset: } } -static void tcp_update_reordering(struct tcp_sock *tp, int metric, int ts) +static void tcp_update_reordering(struct sock *sk, const int metric, + const int ts) { + struct tcp_sock *tp = tcp_sk(sk); if (metric > tp->reordering) { tp->reordering = min(TCP_MAX_REORDERING, metric); @@ -844,7 +850,7 @@ static void tcp_update_reordering(struct tcp_sock *tp, int metric, int ts) NET_INC_STATS_BH(LINUX_MIB_TCPSACKREORDER); #if FASTRETRANS_DEBUG > 1 printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n", - tp->rx_opt.sack_ok, tp->ca_state, + tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state, tp->reordering, tp->fackets_out, tp->sacked_out, @@ -906,6 +912,7 @@ static void tcp_update_reordering(struct tcp_sock *tp, int metric, int ts) static int tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_una) { + const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked; struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2); @@ -1071,7 +1078,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ * we have to account for reordering! Ugly, * but should help. */ - if (lost_retrans && tp->ca_state == TCP_CA_Recovery) { + if (lost_retrans && icsk->icsk_ca_state == TCP_CA_Recovery) { struct sk_buff *skb; sk_stream_for_retrans_queue(skb, sk) { @@ -1100,8 +1107,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ tp->left_out = tp->sacked_out + tp->lost_out; - if ((reord < tp->fackets_out) && tp->ca_state != TCP_CA_Loss) - tcp_update_reordering(tp, ((tp->fackets_out + 1) - reord), 0); + if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss) + tcp_update_reordering(sk, ((tp->fackets_out + 1) - reord), 0); #if FASTRETRANS_DEBUG > 0 BUG_TRAP((int)tp->sacked_out >= 0); @@ -1118,17 +1125,18 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ */ void tcp_enter_frto(struct sock *sk) { + const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; tp->frto_counter = 1; - if (tp->ca_state <= TCP_CA_Disorder || + if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq || - (tp->ca_state == TCP_CA_Loss && !inet_csk(sk)->icsk_retransmits)) { - tp->prior_ssthresh = tcp_current_ssthresh(tp); - tp->snd_ssthresh = tp->ca_ops->ssthresh(tp); - tcp_ca_event(tp, CA_EVENT_FRTO); + (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { + tp->prior_ssthresh = tcp_current_ssthresh(sk); + tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); + tcp_ca_event(sk, CA_EVENT_FRTO); } /* Have to clear retransmission markers here to keep the bookkeeping @@ -1145,7 +1153,7 @@ void tcp_enter_frto(struct sock *sk) } tcp_sync_left_out(tp); - tcp_set_ca_state(tp, TCP_CA_Open); + tcp_set_ca_state(sk, TCP_CA_Open); tp->frto_highmark = tp->snd_nxt; } @@ -1191,7 +1199,7 @@ static void tcp_enter_frto_loss(struct sock *sk) tp->reordering = min_t(unsigned int, tp->reordering, sysctl_tcp_reordering); - tcp_set_ca_state(tp, TCP_CA_Loss); + tcp_set_ca_state(sk, TCP_CA_Loss); tp->high_seq = tp->frto_highmark; TCP_ECN_queue_cwr(tp); } @@ -1215,16 +1223,17 @@ void tcp_clear_retrans(struct tcp_sock *tp) */ void tcp_enter_loss(struct sock *sk, int how) { + const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; int cnt = 0; /* Reduce ssthresh if it has not yet been made inside this window. */ - if (tp->ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq || - (tp->ca_state == TCP_CA_Loss && !inet_csk(sk)->icsk_retransmits)) { - tp->prior_ssthresh = tcp_current_ssthresh(tp); - tp->snd_ssthresh = tp->ca_ops->ssthresh(tp); - tcp_ca_event(tp, CA_EVENT_LOSS); + if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq || + (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { + tp->prior_ssthresh = tcp_current_ssthresh(sk); + tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); + tcp_ca_event(sk, CA_EVENT_LOSS); } tp->snd_cwnd = 1; tp->snd_cwnd_cnt = 0; @@ -1255,7 +1264,7 @@ void tcp_enter_loss(struct sock *sk, int how) tp->reordering = min_t(unsigned int, tp->reordering, sysctl_tcp_reordering); - tcp_set_ca_state(tp, TCP_CA_Loss); + tcp_set_ca_state(sk, TCP_CA_Loss); tp->high_seq = tp->snd_nxt; TCP_ECN_queue_cwr(tp); } @@ -1272,13 +1281,14 @@ static int tcp_check_sack_reneging(struct sock *sk) */ if ((skb = skb_peek(&sk->sk_write_queue)) != NULL && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { + struct inet_connection_sock *icsk = inet_csk(sk); NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING); tcp_enter_loss(sk, 1); - inet_csk(sk)->icsk_retransmits++; + icsk->icsk_retransmits++; tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue)); inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - inet_csk(sk)->icsk_rto, TCP_RTO_MAX); + icsk->icsk_rto, TCP_RTO_MAX); return 1; } return 0; @@ -1431,8 +1441,9 @@ static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp) * in assumption of absent reordering, interpret this as reordering. * The only another reason could be bug in receiver TCP. */ -static void tcp_check_reno_reordering(struct tcp_sock *tp, int addend) +static void tcp_check_reno_reordering(struct sock *sk, const int addend) { + struct tcp_sock *tp = tcp_sk(sk); u32 holes; holes = max(tp->lost_out, 1U); @@ -1440,16 +1451,17 @@ static void tcp_check_reno_reordering(struct tcp_sock *tp, int addend) if ((tp->sacked_out + holes) > tp->packets_out) { tp->sacked_out = tp->packets_out - holes; - tcp_update_reordering(tp, tp->packets_out+addend, 0); + tcp_update_reordering(sk, tp->packets_out + addend, 0); } } /* Emulate SACKs for SACKless connection: account for a new dupack. */ -static void tcp_add_reno_sack(struct tcp_sock *tp) +static void tcp_add_reno_sack(struct sock *sk) { + struct tcp_sock *tp = tcp_sk(sk); tp->sacked_out++; - tcp_check_reno_reordering(tp, 0); + tcp_check_reno_reordering(sk, 0); tcp_sync_left_out(tp); } @@ -1464,7 +1476,7 @@ static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_sock *tp, int acke else tp->sacked_out -= acked-1; } - tcp_check_reno_reordering(tp, acked); + tcp_check_reno_reordering(sk, acked); tcp_sync_left_out(tp); } @@ -1538,14 +1550,16 @@ static inline void tcp_moderate_cwnd(struct tcp_sock *tp) } /* Decrease cwnd each second ack. */ -static void tcp_cwnd_down(struct tcp_sock *tp) +static void tcp_cwnd_down(struct sock *sk) { + const struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); int decr = tp->snd_cwnd_cnt + 1; tp->snd_cwnd_cnt = decr&1; decr >>= 1; - if (decr && tp->snd_cwnd > tp->ca_ops->min_cwnd(tp)) + if (decr && tp->snd_cwnd > icsk->icsk_ca_ops->min_cwnd(sk)) tp->snd_cwnd -= decr; tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp)+1); @@ -1579,11 +1593,15 @@ static void DBGUNDO(struct sock *sk, struct tcp_sock *tp, const char *msg) #define DBGUNDO(x...) do { } while (0) #endif -static void tcp_undo_cwr(struct tcp_sock *tp, int undo) +static void tcp_undo_cwr(struct sock *sk, const int undo) { + struct tcp_sock *tp = tcp_sk(sk); + if (tp->prior_ssthresh) { - if (tp->ca_ops->undo_cwnd) - tp->snd_cwnd = tp->ca_ops->undo_cwnd(tp); + const struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ca_ops-&g |