diff options
-rw-r--r-- | include/linux/tcp.h | 21 | ||||
-rw-r--r-- | include/net/inet_connection_sock.h | 15 | ||||
-rw-r--r-- | include/net/tcp.h | 74 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 3 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 12 | ||||
-rw-r--r-- | net/ipv4/tcp_bic.c | 46 | ||||
-rw-r--r-- | net/ipv4/tcp_cong.c | 44 | ||||
-rw-r--r-- | net/ipv4/tcp_diag.c | 16 | ||||
-rw-r--r-- | net/ipv4/tcp_highspeed.c | 17 | ||||
-rw-r--r-- | net/ipv4/tcp_htcp.c | 53 | ||||
-rw-r--r-- | net/ipv4/tcp_hybla.c | 31 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 223 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 9 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 5 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 36 | ||||
-rw-r--r-- | net/ipv4/tcp_scalable.c | 6 | ||||
-rw-r--r-- | net/ipv4/tcp_timer.c | 26 | ||||
-rw-r--r-- | net/ipv4/tcp_vegas.c | 44 | ||||
-rw-r--r-- | net/ipv4/tcp_westwood.c | 58 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 7 |
20 files changed, 412 insertions, 334 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 62009684074..ac4ca44c75c 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -258,19 +258,15 @@ struct tcp_sock { __u32 mss_cache; /* Cached effective mss, not including SACKS */ __u16 xmit_size_goal; /* Goal for segmenting output packets */ __u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */ - __u8 ca_state; /* State of fast-retransmit machine */ - __u8 keepalive_probes; /* num of allowed keep alive probes */ - __u16 advmss; /* Advertised MSS */ __u32 window_clamp; /* Maximal window to advertise */ __u32 rcv_ssthresh; /* Current window clamp */ __u32 frto_highmark; /* snd_nxt when RTO occurred */ __u8 reordering; /* Packet reordering metric. */ __u8 frto_counter; /* Number of new acks after RTO */ - __u8 nonagle; /* Disable Nagle algorithm? */ - /* ONE BYTE HOLE, TRY TO PACK */ + __u8 keepalive_probes; /* num of allowed keep alive probes */ /* RTT measurement */ __u32 srtt; /* smoothed round trip time << 3 */ @@ -311,8 +307,7 @@ struct tcp_sock { struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */ struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/ - __u8 probes_out; /* unanswered 0 window probes */ - __u8 ecn_flags; /* ECN status bits. */ + __u16 advmss; /* Advertised MSS */ __u16 prior_ssthresh; /* ssthresh saved at recovery start */ __u32 lost_out; /* Lost packets */ __u32 sacked_out; /* SACK'd packets */ @@ -327,7 +322,7 @@ struct tcp_sock { __u32 urg_seq; /* Seq of received urgent pointer */ __u16 urg_data; /* Saved octet of OOB data and control flags */ __u8 urg_mode; /* In urgent mode */ - /* ONE BYTE HOLE, TRY TO PACK! */ + __u8 ecn_flags; /* ECN status bits. */ __u32 snd_up; /* Urgent pointer */ __u32 total_retrans; /* Total retransmits for entire connection */ @@ -351,11 +346,6 @@ struct tcp_sock { __u32 seq; __u32 time; } rcvq_space; - - /* Pluggable TCP congestion control hook */ - struct tcp_congestion_ops *ca_ops; - u32 ca_priv[16]; -#define TCP_CA_PRIV_SIZE (16*sizeof(u32)) }; static inline struct tcp_sock *tcp_sk(const struct sock *sk) @@ -377,11 +367,6 @@ static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk) return (struct tcp_timewait_sock *)sk; } -static inline void *tcp_ca(const struct tcp_sock *tp) -{ - return (void *) tp->ca_priv; -} - #endif #endif /* _LINUX_TCP_H */ diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index bec19d5cff2..4d7e708c07d 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -27,6 +27,7 @@ struct inet_bind_bucket; struct inet_hashinfo; +struct tcp_congestion_ops; /** inet_connection_sock - INET connection oriented sock * @@ -35,10 +36,13 @@ struct inet_hashinfo; * @icsk_timeout: Timeout * @icsk_retransmit_timer: Resend (no ack) * @icsk_rto: Retransmit timeout + * @icsk_ca_ops Pluggable congestion control hook + * @icsk_ca_state: Congestion control state * @icsk_retransmits: Number of unrecovered [RTO] timeouts * @icsk_pending: Scheduled timer event * @icsk_backoff: Backoff * @icsk_syn_retries: Number of allowed SYN (or equivalent) retries + * @icsk_probes_out: unanswered 0 window probes * @icsk_ack: Delayed ACK control data */ struct inet_connection_sock { @@ -50,10 +54,14 @@ struct inet_connection_sock { struct timer_list icsk_retransmit_timer; struct timer_list icsk_delack_timer; __u32 icsk_rto; + struct tcp_congestion_ops *icsk_ca_ops; + __u8 icsk_ca_state; __u8 icsk_retransmits; __u8 icsk_pending; __u8 icsk_backoff; __u8 icsk_syn_retries; + __u8 icsk_probes_out; + /* 2 BYTES HOLE, TRY TO PACK! */ struct { __u8 pending; /* ACK is pending */ __u8 quick; /* Scheduled number of quick acks */ @@ -65,6 +73,8 @@ struct inet_connection_sock { __u16 last_seg_size; /* Size of last incoming segment */ __u16 rcv_mss; /* MSS used for delayed ACK decisions */ } icsk_ack; + u32 icsk_ca_priv[16]; +#define ICSK_CA_PRIV_SIZE (16 * sizeof(u32)) }; #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ @@ -77,6 +87,11 @@ static inline struct inet_connection_sock *inet_csk(const struct sock *sk) return (struct inet_connection_sock *)sk; } +static inline void *inet_csk_ca(const struct sock *sk) +{ + return (void *)inet_csk(sk)->icsk_ca_priv; +} + extern struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, const unsigned int __nocast priority); diff --git a/include/net/tcp.h b/include/net/tcp.h index d489ac548e4..0b3f7294c5c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -669,29 +669,29 @@ struct tcp_congestion_ops { struct list_head list; /* initialize private data (optional) */ - void (*init)(struct tcp_sock *tp); + void (*init)(struct sock *sk); /* cleanup private data (optional) */ - void (*release)(struct tcp_sock *tp); + void (*release)(struct sock *sk); /* return slow start threshold (required) */ - u32 (*ssthresh)(struct tcp_sock *tp); + u32 (*ssthresh)(struct sock *sk); /* lower bound for congestion window (optional) */ - u32 (*min_cwnd)(struct tcp_sock *tp); + u32 (*min_cwnd)(struct sock *sk); /* do new cwnd calculation (required) */ - void (*cong_avoid)(struct tcp_sock *tp, u32 ack, + void (*cong_avoid)(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, int good_ack); /* round trip time sample per acked packet (optional) */ - void (*rtt_sample)(struct tcp_sock *tp, u32 usrtt); + void (*rtt_sample)(struct sock *sk, u32 usrtt); /* call before changing ca_state (optional) */ - void (*set_state)(struct tcp_sock *tp, u8 new_state); + void (*set_state)(struct sock *sk, u8 new_state); /* call when cwnd event occurs (optional) */ - void (*cwnd_event)(struct tcp_sock *tp, enum tcp_ca_event ev); + void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev); /* new value of cwnd after loss (optional) */ - u32 (*undo_cwnd)(struct tcp_sock *tp); + u32 (*undo_cwnd)(struct sock *sk); /* hook for packet ack accounting (optional) */ - void (*pkts_acked)(struct tcp_sock *tp, u32 num_acked); + void (*pkts_acked)(struct sock *sk, u32 num_acked); /* get info for tcp_diag (optional) */ - void (*get_info)(struct tcp_sock *tp, u32 ext, struct sk_buff *skb); + void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb); char name[TCP_CA_NAME_MAX]; struct module *owner; @@ -700,30 +700,34 @@ struct tcp_congestion_ops { extern int tcp_register_congestion_control(struct tcp_congestion_ops *type); extern void tcp_unregister_congestion_control(struct tcp_congestion_ops *type); -extern void tcp_init_congestion_control(struct tcp_sock *tp); -extern void tcp_cleanup_congestion_control(struct tcp_sock *tp); +extern void tcp_init_congestion_control(struct sock *sk); +extern void tcp_cleanup_congestion_control(struct sock *sk); extern int tcp_set_default_congestion_control(const char *name); extern void tcp_get_default_congestion_control(char *name); -extern int tcp_set_congestion_control(struct tcp_sock *tp, const char *name); +extern int tcp_set_congestion_control(struct sock *sk, const char *name); extern struct tcp_congestion_ops tcp_init_congestion_ops; -extern u32 tcp_reno_ssthresh(struct tcp_sock *tp); -extern void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, +extern u32 tcp_reno_ssthresh(struct sock *sk); +extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, int flag); -extern u32 tcp_reno_min_cwnd(struct tcp_sock *tp); +extern u32 tcp_reno_min_cwnd(struct sock *sk); extern struct tcp_congestion_ops tcp_reno; -static inline void tcp_set_ca_state(struct tcp_sock *tp, u8 ca_state) +static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state) { - if (tp->ca_ops->set_state) - tp->ca_ops->set_state(tp, ca_state); - tp->ca_state = ca_state; + struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ca_ops->set_state) + icsk->icsk_ca_ops->set_state(sk, ca_state); + icsk->icsk_ca_state = ca_state; } -static inline void tcp_ca_event(struct tcp_sock *tp, enum tcp_ca_event event) +static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) { - if (tp->ca_ops->cwnd_event) - tp->ca_ops->cwnd_event(tp, event); + const struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ca_ops->cwnd_event) + icsk->icsk_ca_ops->cwnd_event(sk, event); } /* This determines how many packets are "in the network" to the best @@ -749,9 +753,10 @@ static __inline__ unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) * The exception is rate halving phase, when cwnd is decreasing towards * ssthresh. */ -static inline __u32 tcp_current_ssthresh(struct tcp_sock *tp) +static inline __u32 tcp_current_ssthresh(const struct sock *sk) { - if ((1<<tp->ca_state)&(TCPF_CA_CWR|TCPF_CA_Recovery)) + const struct tcp_sock *tp = tcp_sk(sk); + if ((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_CWR | TCPF_CA_Recovery)) return tp->snd_ssthresh; else return max(tp->snd_ssthresh, @@ -768,10 +773,13 @@ static inline void tcp_sync_left_out(struct tcp_sock *tp) } /* Set slow start threshold and cwnd not falling to slow start */ -static inline void __tcp_enter_cwr(struct tcp_sock *tp) +static inline void __tcp_enter_cwr(struct sock *sk) { + const struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + tp->undo_marker = 0; - tp->snd_ssthresh = tp->ca_ops->ssthresh(tp); + tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1U); tp->snd_cwnd_cnt = 0; @@ -780,12 +788,14 @@ static inline void __tcp_enter_cwr(struct tcp_sock *tp) TCP_ECN_queue_cwr(tp); } -static inline void tcp_enter_cwr(struct tcp_sock *tp) +static inline void tcp_enter_cwr(struct sock *sk) { + struct tcp_sock *tp = tcp_sk(sk); + tp->prior_ssthresh = 0; - if (tp->ca_state < TCP_CA_CWR) { - __tcp_enter_cwr(tp); - tcp_set_ca_state(tp, TCP_CA_CWR); + if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { + __tcp_enter_cwr(sk); + tcp_set_ca_state(sk, TCP_CA_CWR); } } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 026630a15ea..fe3c6d3d0c9 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -508,7 +508,8 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, newsk->sk_write_space = sk_stream_write_space; newicsk->icsk_retransmits = 0; - newicsk->icsk_backoff = 0; + newicsk->icsk_backoff = 0; + newicsk->icsk_probes_out = 0; /* Deinitialize accept_queue to trap illegal accesses. */ memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0eed64a1991..02848e72e9c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1671,11 +1671,11 @@ int tcp_disconnect(struct sock *sk, int flags) tp->write_seq = 1; icsk->icsk_backoff = 0; tp->snd_cwnd = 2; - tp->probes_out = 0; + icsk->icsk_probes_out = 0; tp->packets_out = 0; tp->snd_ssthresh = 0x7fffffff; tp->snd_cwnd_cnt = 0; - tcp_set_ca_state(tp, TCP_CA_Open); + tcp_set_ca_state(sk, TCP_CA_Open); tcp_clear_retrans(tp); inet_csk_delack_init(sk); sk->sk_send_head = NULL; @@ -1718,7 +1718,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, name[val] = 0; lock_sock(sk); - err = tcp_set_congestion_control(tp, name); + err = tcp_set_congestion_control(sk, name); release_sock(sk); return err; } @@ -1886,9 +1886,9 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) memset(info, 0, sizeof(*info)); info->tcpi_state = sk->sk_state; - info->tcpi_ca_state = tp->ca_state; + info->tcpi_ca_state = icsk->icsk_ca_state; info->tcpi_retransmits = icsk->icsk_retransmits; - info->tcpi_probes = tp->probes_out; + info->tcpi_probes = icsk->icsk_probes_out; info->tcpi_backoff = icsk->icsk_backoff; if (tp->rx_opt.tstamp_ok) @@ -2016,7 +2016,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, len = min_t(unsigned int, len, TCP_CA_NAME_MAX); if (put_user(len, optlen)) return -EFAULT; - if (copy_to_user(optval, tp->ca_ops->name, len)) + if (copy_to_user(optval, icsk->icsk_ca_ops->name, len)) return -EFAULT; return 0; default: diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index ec38d45d664..b940346de4e 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -86,11 +86,11 @@ static inline void bictcp_reset(struct bictcp *ca) ca->delayed_ack = 2 << ACK_RATIO_SHIFT; } -static void bictcp_init(struct tcp_sock *tp) +static void bictcp_init(struct sock *sk) { - bictcp_reset(tcp_ca(tp)); + bictcp_reset(inet_csk_ca(sk)); if (initial_ssthresh) - tp->snd_ssthresh = initial_ssthresh; + tcp_sk(sk)->snd_ssthresh = initial_ssthresh; } /* @@ -156,9 +156,10 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) /* Detect low utilization in congestion avoidance */ -static inline void bictcp_low_utilization(struct tcp_sock *tp, int flag) +static inline void bictcp_low_utilization(struct sock *sk, int flag) { - struct bictcp *ca = tcp_ca(tp); + const struct tcp_sock *tp = tcp_sk(sk); + struct bictcp *ca = inet_csk_ca(sk); u32 dist, delay; /* No time stamp */ @@ -208,12 +209,13 @@ static inline void bictcp_low_utilization(struct tcp_sock *tp, int flag) } -static void bictcp_cong_avoid(struct tcp_sock *tp, u32 ack, +static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 seq_rtt, u32 in_flight, int data_acked) { - struct bictcp *ca = tcp_ca(tp); + struct tcp_sock *tp = tcp_sk(sk); + struct bictcp *ca = inet_csk_ca(sk); - bictcp_low_utilization(tp, data_acked); + bictcp_low_utilization(sk, data_acked); if (in_flight < tp->snd_cwnd) return; @@ -242,9 +244,10 @@ static void bictcp_cong_avoid(struct tcp_sock *tp, u32 ack, * behave like Reno until low_window is reached, * then increase congestion window slowly */ -static u32 bictcp_recalc_ssthresh(struct tcp_sock *tp) +static u32 bictcp_recalc_ssthresh(struct sock *sk) { - struct bictcp *ca = tcp_ca(tp); + const struct tcp_sock *tp = tcp_sk(sk); + struct bictcp *ca = inet_csk_ca(sk); ca->epoch_start = 0; /* end of epoch */ @@ -269,31 +272,34 @@ static u32 bictcp_recalc_ssthresh(struct tcp_sock *tp) return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U); } -static u32 bictcp_undo_cwnd(struct tcp_sock *tp) +static u32 bictcp_undo_cwnd(struct sock *sk) { - struct bictcp *ca = tcp_ca(tp); - + const struct tcp_sock *tp = tcp_sk(sk); + const struct bictcp *ca = inet_csk_ca(sk); return max(tp->snd_cwnd, ca->last_max_cwnd); } -static u32 bictcp_min_cwnd(struct tcp_sock *tp) +static u32 bictcp_min_cwnd(struct sock *sk) { + const struct tcp_sock *tp = tcp_sk(sk); return tp->snd_ssthresh; } -static void bictcp_state(struct tcp_sock *tp, u8 new_state) +static void bictcp_state(struct sock *sk, u8 new_state) { if (new_state == TCP_CA_Loss) - bictcp_reset(tcp_ca(tp)); + bictcp_reset(inet_csk_ca(sk)); } /* Track delayed acknowledgement ratio using sliding window * ratio = (15*ratio + sample) / 16 */ -static void bictcp_acked(struct tcp_sock *tp, u32 cnt) +static void bictcp_acked(struct sock *sk, u32 cnt) { - if (cnt > 0 && tp->ca_state == TCP_CA_Open) { - struct bictcp *ca = tcp_ca(tp); + const struct inet_connection_sock *icsk = inet_csk(sk); + + if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) { + struct bictcp *ca = inet_csk_ca(sk); cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT; ca->delayed_ack += cnt; } @@ -314,7 +320,7 @@ static struct tcp_congestion_ops bictcp = { static int __init bictcp_register(void) { - BUG_ON(sizeof(struct bictcp) > TCP_CA_PRIV_SIZE); + BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&bictcp); } diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 4970d10a778..bbf2d6624e8 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -73,33 +73,36 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca) EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); /* Assign choice of congestion control. */ -void tcp_init_congestion_control(struct tcp_sock *tp) +void tcp_init_congestion_control(struct sock *sk) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_congestion_ops *ca; - if (tp->ca_ops != &tcp_init_congestion_ops) + if (icsk->icsk_ca_ops != &tcp_init_congestion_ops) return; rcu_read_lock(); list_for_each_entry_rcu(ca, &tcp_cong_list, list) { if (try_module_get(ca->owner)) { - tp->ca_ops = ca; + icsk->icsk_ca_ops = ca; break; } } rcu_read_unlock(); - if (tp->ca_ops->init) - tp->ca_ops->init(tp); + if (icsk->icsk_ca_ops->init) + icsk->icsk_ca_ops->init(sk); } /* Manage refcounts on socket close. */ -void tcp_cleanup_congestion_control(struct tcp_sock *tp) +void tcp_cleanup_congestion_control(struct sock *sk) { - if (tp->ca_ops->release) - tp->ca_ops->release(tp); - module_put(tp->ca_ops->owner); + struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ca_ops->release) + icsk->icsk_ca_ops->release(sk); + module_put(icsk->icsk_ca_ops->owner); } /* Used by sysctl to change default congestion control */ @@ -143,14 +146,15 @@ void tcp_get_default_congestion_control(char *name) } /* Change congestion control for socket */ -int tcp_set_congestion_control(struct tcp_sock *tp, const char *name) +int tcp_set_congestion_control(struct sock *sk, const char *name) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_congestion_ops *ca; int err = 0; rcu_read_lock(); ca = tcp_ca_find(name); - if (ca == tp->ca_ops) + if (ca == icsk->icsk_ca_ops) goto out; if (!ca) @@ -160,10 +164,10 @@ int tcp_set_congestion_control(struct tcp_sock *tp, const char *name) err = -EBUSY; else { - tcp_cleanup_congestion_control(tp); - tp->ca_ops = ca; - if (tp->ca_ops->init) - tp->ca_ops->init(tp); + tcp_cleanup_congestion_control(sk); + icsk->icsk_ca_ops = ca; + if (icsk->icsk_ca_ops->init) + icsk->icsk_ca_ops->init(sk); } out: rcu_read_unlock(); @@ -177,9 +181,11 @@ int tcp_set_congestion_control(struct tcp_sock *tp, const char *name) /* This is Jacobson's slow start and congestion avoidance. * SIGCOMM '88, p. 328. */ -void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, u32 in_flight, +void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, int flag) { + struct tcp_sock *tp = tcp_sk(sk); + if (in_flight < tp->snd_cwnd) return; @@ -202,15 +208,17 @@ void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, u32 in_flight, EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); /* Slow start threshold is half the congestion window (min 2) */ -u32 tcp_reno_ssthresh(struct tcp_sock *tp) +u32 tcp_reno_ssthresh(struct sock *sk) { + const struct tcp_sock *tp = tcp_sk(sk); return max(tp->snd_cwnd >> 1U, 2U); } EXPORT_SYMBOL_GPL(tcp_reno_ssthresh); /* Lower bound on congestion window. */ -u32 tcp_reno_min_cwnd(struct tcp_sock *tp) +u32 tcp_reno_min_cwnd(struct sock *sk) { + const struct tcp_sock *tp = tcp_sk(sk); return tp->snd_ssthresh/2; } EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 5f4c74f45e8..4288ecfec9a 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -66,10 +66,10 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, if (ext & (1<<(TCPDIAG_INFO-1))) info = TCPDIAG_PUT(skb, TCPDIAG_INFO, sizeof(*info)); - if (ext & (1<<(TCPDIAG_CONG-1))) { - size_t len = strlen(tp->ca_ops->name); + if ((ext & (1 << (TCPDIAG_CONG - 1))) && icsk->icsk_ca_ops) { + size_t len = strlen(icsk->icsk_ca_ops->name); strcpy(TCPDIAG_PUT(skb, TCPDIAG_CONG, len+1), - tp->ca_ops->name); + icsk->icsk_ca_ops->name); } } r->tcpdiag_family = sk->sk_family; @@ -136,18 +136,17 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, r->tcpdiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { r->tcpdiag_timer = 4; - r->tcpdiag_retrans = tp->probes_out; + r->tcpdiag_retrans = icsk->icsk_probes_out; r->tcpdiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); } else if (timer_pending(&sk->sk_timer)) { r->tcpdiag_timer = 2; - r->tcpdiag_retrans = tp->probes_out; + r->tcpdiag_retrans = icsk->icsk_probes_out; r->tcpdiag_expires = EXPIRES_IN_MS(sk->sk_timer.expires); } else { r->tcpdiag_timer = 0; r->tcpdiag_expires = 0; } #undef EXPIRES_IN_MS - r->tcpdiag_rqueue = tp->rcv_nxt - tp->copied_seq; r->tcpdiag_wqueue = tp->write_seq - tp->snd_una; r->tcpdiag_uid = sock_i_uid(sk); @@ -163,8 +162,9 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, if (info) tcp_get_info(sk, info); - if (sk->sk_state < TCP_TIME_WAIT && tp->ca_ops->get_info) - tp->ca_ops->get_info(tp, ext, skb); + if (sk->sk_state < TCP_TIME_WAIT && + icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info) + icsk->icsk_ca_ops->get_info(sk, ext, skb); nlh->nlmsg_len = skb->tail - b; return skb->len; diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index 36c51f8136b..6acc04bde08 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c @@ -98,9 +98,10 @@ struct hstcp { u32 ai; }; -static void hstcp_init(struct tcp_sock *tp) +static void hstcp_init(struct sock *sk) { - struct hstcp *ca = tcp_ca(tp); + struct tcp_sock *tp = tcp_sk(sk); + struct hstcp *ca = inet_csk_ca(sk); ca->ai = 0; @@ -109,10 +110,11 @@ static void hstcp_init(struct tcp_sock *tp) tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128); } -static void hstcp_cong_avoid(struct tcp_sock *tp, u32 adk, u32 rtt, +static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 rtt, u32 in_flight, int good) { - struct hstcp *ca = tcp_ca(tp); + struct tcp_sock *tp = tcp_sk(sk); + struct hstcp *ca = inet_csk_ca(sk); if (in_flight < tp->snd_cwnd) return; @@ -143,9 +145,10 @@ static void hstcp_cong_avoid(struct tcp_sock *tp, u32 adk, u32 rtt, } } -static u32 hstcp_ssthresh(struct tcp_sock *tp) +static u32 hstcp_ssthresh(struct sock *sk) { - struct hstcp *ca = tcp_ca(tp); + const struct tcp_sock *tp = tcp_sk(sk); + const struct hstcp *ca = inet_csk_ca(sk); /* Do multiplicative decrease */ return max(tp->snd_cwnd - ((tp->snd_cwnd * hstcp_aimd_vals[ca->ai].md) >> 8), 2U); @@ -164,7 +167,7 @@ static struct tcp_congestion_ops tcp_highspeed = { static int __init hstcp_register(void) { - BUG_ON(sizeof(struct hstcp) > TCP_CA_PRIV_SIZE); + BUG_ON(sizeof(struct hstcp) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&tcp_highspeed); } diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 40168275acf..e47b37984e9 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -55,18 +55,21 @@ static inline void htcp_reset(struct htcp *ca) ca->snd_cwnd_cnt2 = 0; } -static u32 htcp_cwnd_undo(struct tcp_sock *tp) +static u32 htcp_cwnd_undo(struct sock *sk) { - struct htcp *ca = tcp_ca(tp); + const struct tcp_sock *tp = tcp_sk(sk); + struct htcp *ca = inet_csk_ca(sk); ca->ccount = ca->undo_ccount; ca->maxRTT = ca->undo_maxRTT; ca->old_maxB = ca->undo_old_maxB; return max(tp->snd_cwnd, (tp->snd_ssthresh<<7)/ca->beta); } -static inline void measure_rtt(struct tcp_sock *tp) +static inline void measure_rtt(struct sock *sk) { - struct htcp *ca = tcp_ca(tp); + const struct inet_connection_sock *icsk = inet_csk(sk); + const struct tcp_sock *tp = tcp_sk(sk); + struct htcp *ca = inet_csk_ca(sk); u32 srtt = tp->srtt>>3; /* keep track of minimum RTT seen so far, minRTT is zero at first */ @@ -74,7 +77,7 @@ static inline void measure_rtt(struct tcp_sock *tp) ca->minRTT = srtt; /* max RTT */ - if (tp->ca_state == TCP_CA_Open && tp->snd_ssthresh < 0xFFFF && ca->ccount > 3) { + if (icsk->icsk_ca_state == TCP_CA_Open && tp->snd_ssthresh < 0xFFFF && ca->ccount > 3) { if (ca->maxRTT < ca->minRTT) ca->maxRTT = ca->minRTT; if (ca->maxRTT < srtt && srtt <= ca->maxRTT+HZ/50) @@ -82,13 +85,16 @@ static inline void measure_rtt(struct tcp_sock *tp) } } -static void measure_achieved_throughput(struct tcp_sock *tp, u32 pkts_acked) +static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked) { - struct htcp *ca = tcp_ca(tp); + const struct inet_connection_sock *icsk = inet_csk(sk); + const struct tcp_sock *tp = tcp_sk(sk); + struct htcp *ca = inet_csk_ca(sk); u32 now = tcp_time_stamp; /* achieved throughput calculations */ - if (tp->ca_state != TCP_CA_Open && tp->ca_state != TCP_CA_Disorder) { + if (icsk->icsk_ca_state != TCP_CA_Open && + icsk->icsk_ca_state != TCP_CA_Disorder) { ca->packetcount = 0; ca->lasttime = now; return; @@ -173,9 +179,9 @@ static inline void htcp_alpha_update(struct htcp *ca) * that point do we really have a real sense of maxRTT (the queues en route * were getting just too full now). */ -static void htcp_param_update(struct tcp_sock *tp) +static void htcp_param_update(struct sock *sk) { - struct htcp *ca = tcp_ca(tp); + struct htcp *ca = inet_csk_ca(sk); u32 minRTT = ca->minRTT; u32 maxRTT = ca->maxRTT; @@ -187,17 +193,19 @@ static void htcp_param_update(struct tcp_sock *tp) ca->maxRTT = minRTT + ((maxRTT-minRTT)*95)/100; } -static u32 htcp_recalc_ssthresh(struct tcp_sock *tp) +static u32 htcp_recalc_ssthresh(struct sock *sk) { - struct htcp *ca = tcp_ca(tp); - htcp_param_update(tp); + const struct tcp_sock *tp = tcp_sk(sk); + const struct htcp *ca = inet_csk_ca(sk); + htcp_param_update(sk); return max((tp->snd_cwnd * ca->beta) >> 7, 2U); } -static void htcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, +static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, int data_acked) { - struct htcp *ca = tcp_ca(tp); + struct tcp_sock *tp = tcp_sk(sk); + struct htcp *ca = inet_csk_ca(sk); if (in_flight < tp->snd_cwnd) return; @@ -207,7 +215,7 @@ static void htcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, if (tp->snd_cwnd < tp->snd_cwnd_clamp) tp->snd_cwnd++; } else { - measure_rtt(tp); + measure_rtt(sk); /* keep track of number of round-trip times since last backoff event */ if (ca->snd_cwnd_cnt2++ > tp->snd_cwnd) { @@ -229,28 +237,29 @@ static void htcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, } /* Lower bound on congestion window. */ -static u32 htcp_min_cwnd(struct tcp_sock *tp) +static u32 htcp_min_cwnd(struct sock *sk) { + const struct tcp_sock *tp = tcp_sk(sk); return tp->snd_ssthresh; } -static void htcp_init(struct tcp_sock *tp) +static void htcp_init(struct sock *sk) { - struct htcp *ca = tcp_ca(tp); + struct htcp *ca = inet_csk_ca(sk); memset(ca, 0, sizeof(struct htcp)); ca->alpha = ALPHA_BASE; ca->beta = BETA_MIN; } -static void htcp_state(struct tcp_sock *tp, u8 new_state) +static void htcp_state(struct sock *sk, u8 new_state) { switch (new_state) { case TCP_CA_CWR: case TCP_CA_Recovery: case TCP_CA_Loss: - htcp_reset(tcp_ca(tp)); + htcp_reset(inet_csk_ca(sk)); break; } } @@ -269,7 +278,7 @@ static struct tcp_congestion_ops htcp = { static int __init htcp_register(void) { - BUG_ON(sizeof(struct htcp) > TCP_CA_PRIV_SIZE); + BUG_ON(sizeof(struct htcp) > ICSK_CA_PRIV_SIZE); BUILD_BUG_ON(BETA_MIN >= BETA_MAX); if (!use_bandwidth_switch) htcp.pkts_acked = NULL; diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index 13a66342c30..77add63623d 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -33,19 +33,20 @@ MODULE_PARM_DESC(rtt0, "reference rout trip time (ms)"); /* This is called to refresh values for hybla parameters */ -static inline void hybla_recalc_param (struct tcp_sock *tp) +static inline void hybla_recalc_param (struct sock *sk) { - struct hybla *ca = tcp_ca(tp); + struct hybla *ca = inet_csk_ca(sk); - ca->rho_3ls = max_t(u32, tp->srtt / msecs_to_jiffies(rtt0), 8); + ca->rho_3ls = max_t(u32, tcp_sk(sk)->srtt / msecs_to_jiffies(rtt0), 8); ca->rho = ca->rho_3ls >> 3; ca->rho2_7ls = (ca->rho_3ls * ca->rho_3ls) << 1; ca->rho2 = ca->rho2_7ls >>7; } -static void hybla_init(struct tcp_sock *tp) +static void hybla_init(struct sock *sk) { - struct hybla *ca = tcp_ca(tp); + struct tcp_sock *tp = tcp_sk(sk); + struct hybla *ca = inet_csk_ca(sk); ca->rho = 0; ca->rho2 = 0; @@ -57,17 +58,16 @@ static void hybla_init(struct tcp_sock *tp) tp->snd_cwnd_clamp = 65535; /* 1st Rho measurement based on initial srtt */ - hybla_recalc_param(tp); + hybla_recalc_param(sk); /* set minimum rtt as this is the 1st ever seen */ ca->minrtt = tp->srtt; tp->snd_cwnd = ca->rho; } -static void hybla_state(struct tcp_sock *tp, u8 ca_state) +static void hybla_state(struct sock *sk, u8 ca_state) { - struct hybla *ca = tcp_ca(tp); - + struct hybla *ca = inet_csk_ca(sk); ca->hybla_en = (ca_state == TCP_CA_Open); } @@ -86,27 +86,28 @@ static inline u32 hybla_fraction(u32 odds) * o Give cwnd a new value based on the model proposed * o remember increments <1 */ -static void hybla_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, +static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, int flag) { - struct hybla *ca = tcp_ca(tp); + struct tcp_sock *tp = tcp_sk(sk); + struct hybla *ca = inet_csk_ca(sk); u32 increment, odd, rho_fractions; int is_slowstart = 0; /* Recalculate rho only if this srtt is the lowest */ if (tp->srtt < ca->minrtt){ - hybla_recalc_param(tp); + hybla_recalc_param(sk); ca->minrtt = tp->srtt; } if (!ca->hybla_en) - return tcp_reno_cong_avoid(tp, ack, rtt, in_flight, flag); + return tcp_reno_cong_avoid(sk, ack, rtt, in_flight, flag); if (in_flight < tp->snd_cwnd) return; if (ca->rho == 0) - hybla_recalc_param(tp); + hybla_recalc_param(sk); rho_fractions = ca-> |