diff options
Diffstat (limited to 'net/dccp/ccids')
| -rw-r--r-- | net/dccp/ccids/Kconfig | 5 | ||||
| -rw-r--r-- | net/dccp/ccids/ccid2.c | 329 | ||||
| -rw-r--r-- | net/dccp/ccids/ccid2.h | 33 | ||||
| -rw-r--r-- | net/dccp/ccids/ccid3.c | 26 | ||||
| -rw-r--r-- | net/dccp/ccids/lib/loss_interval.c | 1 | ||||
| -rw-r--r-- | net/dccp/ccids/lib/loss_interval.h | 8 | ||||
| -rw-r--r-- | net/dccp/ccids/lib/packet_history.c | 3 | ||||
| -rw-r--r-- | net/dccp/ccids/lib/packet_history.h | 25 | ||||
| -rw-r--r-- | net/dccp/ccids/lib/tfrc.c | 3 | ||||
| -rw-r--r-- | net/dccp/ccids/lib/tfrc.h | 24 | ||||
| -rw-r--r-- | net/dccp/ccids/lib/tfrc_equation.c | 2 |
11 files changed, 281 insertions, 178 deletions
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig index 0581143cb80..8ba3fc9d6d1 100644 --- a/net/dccp/ccids/Kconfig +++ b/net/dccp/ccids/Kconfig @@ -1,5 +1,4 @@ -menu "DCCP CCIDs Configuration (EXPERIMENTAL)" - depends on EXPERIMENTAL +menu "DCCP CCIDs Configuration" config IP_DCCP_CCID2_DEBUG bool "CCID-2 debugging messages" @@ -12,7 +11,7 @@ config IP_DCCP_CCID2_DEBUG If in doubt, say N. config IP_DCCP_CCID3 - bool "CCID-3 (TCP-Friendly) (EXPERIMENTAL)" + bool "CCID-3 (TCP-Friendly)" def_bool y if (IP_DCCP = y || IP_DCCP = m) ---help--- CCID-3 denotes TCP-Friendly Rate Control (TFRC), an equation-based diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 6576eae9e77..f053198e730 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -29,7 +29,7 @@ #ifdef CONFIG_IP_DCCP_CCID2_DEBUG -static int ccid2_debug; +static bool ccid2_debug; #define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a) #else #define ccid2_pr_debug(format, a...) @@ -85,7 +85,6 @@ static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) { - struct dccp_sock *dp = dccp_sk(sk); u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->tx_cwnd, 2); /* @@ -98,14 +97,33 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val, max_ratio); val = max_ratio; } - if (val > DCCPF_ACK_RATIO_MAX) - val = DCCPF_ACK_RATIO_MAX; + dccp_feat_signal_nn_change(sk, DCCPF_ACK_RATIO, + min_t(u32, val, DCCPF_ACK_RATIO_MAX)); +} - if (val == dp->dccps_l_ack_ratio) - return; +static void ccid2_check_l_ack_ratio(struct sock *sk) +{ + struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); - ccid2_pr_debug("changing local ack ratio to %u\n", val); - dp->dccps_l_ack_ratio = val; + /* + * After a loss, idle period, application limited period, or RTO we + * need to check that the ack ratio is still less than the congestion + * window. Otherwise, we will send an entire congestion window of + * packets and got no response because we haven't sent ack ratio + * packets yet. + * If the ack ratio does need to be reduced, we reduce it to half of + * the congestion window (or 1 if that's zero) instead of to the + * congestion window. This prevents problems if one ack is lost. + */ + if (dccp_feat_nn_get(sk, DCCPF_ACK_RATIO) > hc->tx_cwnd) + ccid2_change_l_ack_ratio(sk, hc->tx_cwnd/2 ? : 1U); +} + +static void ccid2_change_l_seq_window(struct sock *sk, u64 val) +{ + dccp_feat_signal_nn_change(sk, DCCPF_SEQUENCE_WINDOW, + clamp_val(val, DCCPF_SEQ_WMIN, + DCCPF_SEQ_WMAX)); } static void ccid2_hc_tx_rto_expire(unsigned long data) @@ -153,17 +171,97 @@ out: sock_put(sk); } +/* + * Congestion window validation (RFC 2861). + */ +static bool ccid2_do_cwv = true; +module_param(ccid2_do_cwv, bool, 0644); +MODULE_PARM_DESC(ccid2_do_cwv, "Perform RFC2861 Congestion Window Validation"); + +/** + * ccid2_update_used_window - Track how much of cwnd is actually used + * This is done in addition to CWV. The sender needs to have an idea of how many + * packets may be in flight, to set the local Sequence Window value accordingly + * (RFC 4340, 7.5.2). The CWV mechanism is exploited to keep track of the + * maximum-used window. We use an EWMA low-pass filter to filter out noise. + */ +static void ccid2_update_used_window(struct ccid2_hc_tx_sock *hc, u32 new_wnd) +{ + hc->tx_expected_wnd = (3 * hc->tx_expected_wnd + new_wnd) / 4; +} + +/* This borrows the code of tcp_cwnd_application_limited() */ +static void ccid2_cwnd_application_limited(struct sock *sk, const u32 now) +{ + struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); + /* don't reduce cwnd below the initial window (IW) */ + u32 init_win = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache), + win_used = max(hc->tx_cwnd_used, init_win); + + if (win_used < hc->tx_cwnd) { + hc->tx_ssthresh = max(hc->tx_ssthresh, + (hc->tx_cwnd >> 1) + (hc->tx_cwnd >> 2)); + hc->tx_cwnd = (hc->tx_cwnd + win_used) >> 1; + } + hc->tx_cwnd_used = 0; + hc->tx_cwnd_stamp = now; + + ccid2_check_l_ack_ratio(sk); +} + +/* This borrows the code of tcp_cwnd_restart() */ +static void ccid2_cwnd_restart(struct sock *sk, const u32 now) +{ + struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); + u32 cwnd = hc->tx_cwnd, restart_cwnd, + iwnd = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache); + + hc->tx_ssthresh = max(hc->tx_ssthresh, (cwnd >> 1) + (cwnd >> 2)); + + /* don't reduce cwnd below the initial window (IW) */ + restart_cwnd = min(cwnd, iwnd); + cwnd >>= (now - hc->tx_lsndtime) / hc->tx_rto; + hc->tx_cwnd = max(cwnd, restart_cwnd); + + hc->tx_cwnd_stamp = now; + hc->tx_cwnd_used = 0; + + ccid2_check_l_ack_ratio(sk); +} + static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) { struct dccp_sock *dp = dccp_sk(sk); struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); + const u32 now = ccid2_time_stamp; struct ccid2_seq *next; - hc->tx_pipe++; + /* slow-start after idle periods (RFC 2581, RFC 2861) */ + if (ccid2_do_cwv && !hc->tx_pipe && + (s32)(now - hc->tx_lsndtime) >= hc->tx_rto) + ccid2_cwnd_restart(sk, now); + + hc->tx_lsndtime = now; + hc->tx_pipe += 1; + + /* see whether cwnd was fully used (RFC 2861), update expected window */ + if (ccid2_cwnd_network_limited(hc)) { + ccid2_update_used_window(hc, hc->tx_cwnd); + hc->tx_cwnd_used = 0; + hc->tx_cwnd_stamp = now; + } else { + if (hc->tx_pipe > hc->tx_cwnd_used) + hc->tx_cwnd_used = hc->tx_pipe; + + ccid2_update_used_window(hc, hc->tx_cwnd_used); + + if (ccid2_do_cwv && (s32)(now - hc->tx_cwnd_stamp) >= hc->tx_rto) + ccid2_cwnd_application_limited(sk, now); + } hc->tx_seqh->ccid2s_seq = dp->dccps_gss; hc->tx_seqh->ccid2s_acked = 0; - hc->tx_seqh->ccid2s_sent = ccid2_time_stamp; + hc->tx_seqh->ccid2s_sent = now; next = hc->tx_seqh->ccid2s_next; /* check if we need to alloc more space */ @@ -246,68 +344,6 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) #endif } -/* XXX Lame code duplication! - * returns -1 if none was found. - * else returns the next offset to use in the function call. - */ -static int ccid2_ackvector(struct sock *sk, struct sk_buff *skb, int offset, - unsigned char **vec, unsigned char *veclen) -{ - const struct dccp_hdr *dh = dccp_hdr(skb); - unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); - unsigned char *opt_ptr; - const unsigned char *opt_end = (unsigned char *)dh + - (dh->dccph_doff * 4); - unsigned char opt, len; - unsigned char *value; - - BUG_ON(offset < 0); - options += offset; - opt_ptr = options; - if (opt_ptr >= opt_end) - return -1; - - while (opt_ptr != opt_end) { - opt = *opt_ptr++; - len = 0; - value = NULL; - - /* Check if this isn't a single byte option */ - if (opt > DCCPO_MAX_RESERVED) { - if (opt_ptr == opt_end) - goto out_invalid_option; - - len = *opt_ptr++; - if (len < 3) - goto out_invalid_option; - /* - * Remove the type and len fields, leaving - * just the value size - */ - len -= 2; - value = opt_ptr; - opt_ptr += len; - - if (opt_ptr > opt_end) - goto out_invalid_option; - } - - switch (opt) { - case DCCPO_ACK_VECTOR_0: - case DCCPO_ACK_VECTOR_1: - *vec = value; - *veclen = len; - return offset + (opt_ptr - options); - } - } - - return -1; - -out_invalid_option: - DCCP_BUG("Invalid option - this should not happen (previous parsing)!"); - return -1; -} - /** * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm * This code is almost identical with TCP's tcp_rtt_estimator(), since @@ -391,17 +427,37 @@ static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp, unsigned int *maxincr) { struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); - - if (hc->tx_cwnd < hc->tx_ssthresh) { - if (*maxincr > 0 && ++hc->tx_packets_acked == 2) { + struct dccp_sock *dp = dccp_sk(sk); + int r_seq_used = hc->tx_cwnd / dp->dccps_l_ack_ratio; + + if (hc->tx_cwnd < dp->dccps_l_seq_win && + r_seq_used < dp->dccps_r_seq_win) { + if (hc->tx_cwnd < hc->tx_ssthresh) { + if (*maxincr > 0 && ++hc->tx_packets_acked >= 2) { + hc->tx_cwnd += 1; + *maxincr -= 1; + hc->tx_packets_acked = 0; + } + } else if (++hc->tx_packets_acked >= hc->tx_cwnd) { hc->tx_cwnd += 1; - *maxincr -= 1; hc->tx_packets_acked = 0; } - } else if (++hc->tx_packets_acked >= hc->tx_cwnd) { - hc->tx_cwnd += 1; - hc->tx_packets_acked = 0; } + + /* + * Adjust the local sequence window and the ack ratio to allow about + * 5 times the number of packets in the network (RFC 4340 7.5.2) + */ + if (r_seq_used * CCID2_WIN_CHANGE_FACTOR >= dp->dccps_r_seq_win) + ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio * 2); + else if (r_seq_used * CCID2_WIN_CHANGE_FACTOR < dp->dccps_r_seq_win/2) + ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio / 2 ? : 1U); + + if (hc->tx_cwnd * CCID2_WIN_CHANGE_FACTOR >= dp->dccps_l_seq_win) + ccid2_change_l_seq_window(sk, dp->dccps_l_seq_win * 2); + else if (hc->tx_cwnd * CCID2_WIN_CHANGE_FACTOR < dp->dccps_l_seq_win/2) + ccid2_change_l_seq_window(sk, dp->dccps_l_seq_win / 2); + /* * FIXME: RTT is sampled several times per acknowledgment (for each * entry in the Ack Vector), instead of once per Ack (as in TCP SACK). @@ -427,9 +483,21 @@ static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U; hc->tx_ssthresh = max(hc->tx_cwnd, 2U); - /* Avoid spurious timeouts resulting from Ack Ratio > cwnd */ - if (dccp_sk(sk)->dccps_l_ack_ratio > hc->tx_cwnd) - ccid2_change_l_ack_ratio(sk, hc->tx_cwnd); + ccid2_check_l_ack_ratio(sk); +} + +static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type, + u8 option, u8 *optval, u8 optlen) +{ + struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); + + switch (option) { + case DCCPO_ACK_VECTOR_0: + case DCCPO_ACK_VECTOR_1: + return dccp_ackvec_parsed_add(&hc->tx_av_chunks, optval, optlen, + option - DCCPO_ACK_VECTOR_0); + } + return 0; } static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) @@ -437,11 +505,9 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) struct dccp_sock *dp = dccp_sk(sk); struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); const bool sender_was_blocked = ccid2_cwnd_network_limited(hc); + struct dccp_ackvec_parsed *avp; u64 ackno, seqno; struct ccid2_seq *seqp; - unsigned char *vector; - unsigned char veclen; - int offset = 0; int done = 0; unsigned int maxincr = 0; @@ -468,24 +534,27 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) if (hc->tx_rpdupack >= NUMDUPACK) { hc->tx_rpdupack = -1; /* XXX lame */ hc->tx_rpseq = 0; - +#ifdef __CCID2_COPES_GRACEFULLY_WITH_ACK_CONGESTION_CONTROL__ + /* + * FIXME: Ack Congestion Control is broken; in + * the current state instabilities occurred with + * Ack Ratios greater than 1; causing hang-ups + * and long RTO timeouts. This needs to be fixed + * before opening up dynamic changes. -- gerrit + */ ccid2_change_l_ack_ratio(sk, 2 * dp->dccps_l_ack_ratio); +#endif } } } /* check forward path congestion */ - /* still didn't send out new data packets */ - if (hc->tx_seqh == hc->tx_seqt) + if (dccp_packet_without_ack(skb)) return; - switch (DCCP_SKB_CB(skb)->dccpd_type) { - case DCCP_PKT_ACK: - case DCCP_PKT_DATAACK: - break; - default: - return; - } + /* still didn't send out new data packets */ + if (hc->tx_seqh == hc->tx_seqt) + goto done; ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; if (after48(ackno, hc->tx_high_ack)) @@ -509,16 +578,16 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2); /* go through all ack vectors */ - while ((offset = ccid2_ackvector(sk, skb, offset, - &vector, &veclen)) != -1) { + list_for_each_entry(avp, &hc->tx_av_chunks, node) { /* go through this ack vector */ - while (veclen--) { - const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; - u64 ackno_end_rl = SUB48(ackno, rl); + for (; avp->len--; avp->vec++) { + u64 ackno_end_rl = SUB48(ackno, + dccp_ackvec_runlen(avp->vec)); - ccid2_pr_debug("ackvec start:%llu end:%llu\n", + ccid2_pr_debug("ackvec %llu |%u,%u|\n", (unsigned long long)ackno, - (unsigned long long)ackno_end_rl); + dccp_ackvec_state(avp->vec) >> 6, + dccp_ackvec_runlen(avp->vec)); /* if the seqno we are analyzing is larger than the * current ackno, then move towards the tail of our * seqnos. @@ -537,17 +606,15 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) * run length */ while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) { - const u8 state = *vector & - DCCP_ACKVEC_STATE_MASK; + const u8 state = dccp_ackvec_state(avp->vec); /* new packet received or marked */ - if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED && + if (state != DCCPAV_NOT_RECEIVED && !seqp->ccid2s_acked) { - if (state == - DCCP_ACKVEC_STATE_ECN_MARKED) { + if (state == DCCPAV_ECN_MARKED) ccid2_congestion_event(sk, seqp); - } else + else ccid2_new_ack(sk, seqp, &maxincr); @@ -566,7 +633,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) break; ackno = SUB48(ackno_end_rl, 1); - vector++; } if (done) break; @@ -634,10 +700,11 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) sk_stop_timer(sk, &hc->tx_rtotimer); else sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto); - +done: /* check if incoming Acks allow pending packets to be sent */ if (sender_was_blocked && !ccid2_cwnd_network_limited(hc)) tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet); + dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks); } static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) @@ -651,6 +718,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) /* Use larger initial windows (RFC 4341, section 5). */ hc->tx_cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache); + hc->tx_expected_wnd = hc->tx_cwnd; /* Make sure that Ack Ratio is enabled and within bounds. */ max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2); @@ -663,9 +731,11 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) hc->tx_rto = DCCP_TIMEOUT_INIT; hc->tx_rpdupack = -1; - hc->tx_last_cong = ccid2_time_stamp; + hc->tx_last_cong = hc->tx_lsndtime = hc->tx_cwnd_stamp = ccid2_time_stamp; + hc->tx_cwnd_used = 0; setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, (unsigned long)sk); + INIT_LIST_HEAD(&hc->tx_av_chunks); return 0; } @@ -683,32 +753,29 @@ static void ccid2_hc_tx_exit(struct sock *sk) static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) { - const struct dccp_sock *dp = dccp_sk(sk); struct ccid2_hc_rx_sock *hc = ccid2_hc_rx_sk(sk); - switch (DCCP_SKB_CB(skb)->dccpd_type) { - case DCCP_PKT_DATA: - case DCCP_PKT_DATAACK: - hc->rx_data++; - if (hc->rx_data >= dp->dccps_r_ack_ratio) { - dccp_send_ack(sk); - hc->rx_data = 0; - } - break; + if (!dccp_data_packet(skb)) + return; + + if (++hc->rx_num_data_pkts >= dccp_sk(sk)->dccps_r_ack_ratio) { + dccp_send_ack(sk); + hc->rx_num_data_pkts = 0; } } struct ccid_operations ccid2_ops = { - .ccid_id = DCCPC_CCID2, - .ccid_name = "TCP-like", - .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock), - .ccid_hc_tx_init = ccid2_hc_tx_init, - .ccid_hc_tx_exit = ccid2_hc_tx_exit, - .ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet, - .ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent, - .ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv, - .ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock), - .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv, + .ccid_id = DCCPC_CCID2, + .ccid_name = "TCP-like", + .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock), + .ccid_hc_tx_init = ccid2_hc_tx_init, + .ccid_hc_tx_exit = ccid2_hc_tx_exit, + .ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet, + .ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent, + .ccid_hc_tx_parse_options = ccid2_hc_tx_parse_options, + .ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv, + .ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock), + .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv, }; #ifdef CONFIG_IP_DCCP_CCID2_DEBUG diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h index 25cb6b216ed..18c97543e52 100644 --- a/net/dccp/ccids/ccid2.h +++ b/net/dccp/ccids/ccid2.h @@ -43,6 +43,12 @@ struct ccid2_seq { #define CCID2_SEQBUF_LEN 1024 #define CCID2_SEQBUF_MAX 128 +/* + * Multiple of congestion window to keep the sequence window at + * (RFC 4340 7.5.2) + */ +#define CCID2_WIN_CHANGE_FACTOR 5 + /** * struct ccid2_hc_tx_sock - CCID2 TX half connection * @tx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5 @@ -53,8 +59,13 @@ struct ccid2_seq { * @tx_rttvar: moving average/maximum of @mdev_max * @tx_rto: RTO value deriving from SRTT and RTTVAR (RFC 2988) * @tx_rtt_seq: to decay RTTVAR at most once per flight + * @tx_cwnd_used: actually used cwnd, W_used of RFC 2861 + * @tx_expected_wnd: moving average of @tx_cwnd_used + * @tx_cwnd_stamp: to track idle periods in CWV + * @tx_lsndtime: last time (in jiffies) a data packet was sent * @tx_rpseq: last consecutive seqno * @tx_rpdupack: dupacks since rpseq + * @tx_av_chunks: list of Ack Vectors received on current skb */ struct ccid2_hc_tx_sock { u32 tx_cwnd; @@ -75,10 +86,17 @@ struct ccid2_hc_tx_sock { u64 tx_rtt_seq:48; struct timer_list tx_rtotimer; + /* Congestion Window validation (optional, RFC 2861) */ + u32 tx_cwnd_used, + tx_expected_wnd, + tx_cwnd_stamp, + tx_lsndtime; + u64 tx_rpseq; int tx_rpdupack; u32 tx_last_cong; u64 tx_high_ack; + struct list_head tx_av_chunks; }; static inline bool ccid2_cwnd_network_limited(struct ccid2_hc_tx_sock *hc) @@ -86,8 +104,21 @@ static inline bool ccid2_cwnd_network_limited(struct ccid2_hc_tx_sock *hc) return hc->tx_pipe >= hc->tx_cwnd; } +/* + * Convert RFC 3390 larger initial window into an equivalent number of packets. + * This is based on the numbers specified in RFC 5681, 3.1. + */ +static inline u32 rfc3390_bytes_to_packets(const u32 smss) +{ + return smss <= 1095 ? 4 : (smss > 2190 ? 2 : 3); +} + +/** + * struct ccid2_hc_rx_sock - Receiving end of CCID-2 half-connection + * @rx_num_data_pkts: number of data packets received since last feedback + */ struct ccid2_hc_rx_sock { - int rx_data; + u32 rx_num_data_pkts; }; static inline struct ccid2_hc_tx_sock *ccid2_hc_tx_sk(const struct sock *sk) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 3d604e1349c..119c04317d4 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -38,7 +38,7 @@ #include <asm/unaligned.h> #ifdef CONFIG_IP_DCCP_CCID3_DEBUG -static int ccid3_debug; +static bool ccid3_debug; #define ccid3_pr_debug(format, a...) DCCP_PR_DEBUG(ccid3_debug, format, ##a) #else #define ccid3_pr_debug(format, a...) @@ -98,8 +98,9 @@ static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hc) { hc->tx_t_ipi = scaled_div32(((u64)hc->tx_s) << 6, hc->tx_x); + DCCP_BUG_ON(hc->tx_t_ipi == 0); ccid3_pr_debug("t_ipi=%u, s=%u, X=%u\n", hc->tx_t_ipi, - hc->tx_s, (unsigned)(hc->tx_x >> 6)); + hc->tx_s, (unsigned int)(hc->tx_x >> 6)); } static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hc, ktime_t now) @@ -112,6 +113,7 @@ static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hc, ktime_t now) /** * ccid3_hc_tx_update_x - Update allowed sending rate X * @stamp: most recent time if available - can be left NULL. + * * This function tracks draft rfc3448bis, check there for latest details. * * Note: X and X_recv are both stored in units of 64 * bytes/second, to support @@ -152,17 +154,19 @@ static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp) if (hc->tx_x != old_x) { ccid3_pr_debug("X_prev=%u, X_now=%u, X_calc=%u, " - "X_recv=%u\n", (unsigned)(old_x >> 6), - (unsigned)(hc->tx_x >> 6), hc->tx_x_calc, - (unsigned)(hc->tx_x_recv >> 6)); + "X_recv=%u\n", (unsigned int)(old_x >> 6), + (unsigned int)(hc->tx_x >> 6), hc->tx_x_calc, + (unsigned int)(hc->tx_x_recv >> 6)); ccid3_update_send_interval(hc); } } -/* - * Track the mean packet size `s' (cf. RFC 4342, 5.3 and RFC 3448, 4.1) +/** + * ccid3_hc_tx_update_s - Track the mean packet size `s' * @len: DCCP packet payload size in bytes + * + * cf. RFC 4342, 5.3 and RFC 3448, 4.1 */ static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hc, int len) { @@ -236,8 +240,6 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) * * Note that X_recv is scaled by 2^6 while X_calc is not */ - BUG_ON(hc->tx_p && !hc->tx_x_calc); - if (hc->tx_x_calc > (hc->tx_x_recv >> 5)) hc->tx_x_recv = max(hc->tx_x_recv / 2, @@ -271,6 +273,7 @@ out: /** * ccid3_hc_tx_send_packet - Delay-based dequeueing of TX packets * @skb: next packet candidate to send on @sk + * * This function uses the convention of ccid_packet_dequeue_eval() and * returns a millisecond-delay value between 0 and t_mbi = 64000 msec. */ @@ -426,8 +429,8 @@ done_computing_x: "p=%u, X_calc=%u, X_recv=%u, X=%u\n", dccp_role(sk), sk, hc->tx_rtt, r_sample, hc->tx_s, hc->tx_p, hc->tx_x_calc, - (unsigned)(hc->tx_x_recv >> 6), - (unsigned)(hc->tx_x >> 6)); + (unsigned int)(hc->tx_x_recv >> 6), + (unsigned int)(hc->tx_x >> 6)); /* unschedule no feedback timer */ sk_stop_timer(sk, &hc->tx_no_feedback_timer); @@ -532,6 +535,7 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, case DCCP_SOCKOPT_CCID_TX_INFO: if (len < sizeof(tfrc)) return -EINVAL; + memset(&tfrc, 0, sizeof(tfrc)); tfrc.tfrctx_x = hc->tx_x; tfrc.tfrctx_x_recv = hc->tx_x_recv; tfrc.tfrctx_x_calc = hc->tx_x_calc; diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index 497723c4d4b..57f9fd78c4d 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -133,6 +133,7 @@ static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur, * @rh: Receive history containing a fresh loss event * @calc_first_li: Caller-dependent routine to compute length of first interval * @sk: Used by @calc_first_li in caller-specific way (subtyping) + * * Updates I_mean and returns 1 if a new interval has in fact been added to @lh. */ int tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh, diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h index d1d2f5383b7..57f631a86cc 100644 --- a/net/dccp/ccids/lib/loss_interval.h +++ b/net/dccp/ccids/lib/loss_interval.h @@ -65,9 +65,9 @@ static inline u8 tfrc_lh_length(struct tfrc_loss_hist *lh) struct tfrc_rx_hist; -extern int tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *, - u32 (*first_li)(struct sock *), struct sock *); -extern u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *); -extern void tfrc_lh_cleanup(struct tfrc_loss_hist *lh); +int tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *, + u32 (*first_li)(struct sock *), struct sock *); +u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *); +void tfrc_lh_cleanup(struct tfrc_loss_hist *lh); #endif /* _DCCP_LI_HIST_ */ diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index de8fe294bf0..08df7a3acb3 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -315,6 +315,7 @@ static void __three_after_loss(struct tfrc_rx_hist *h) * @ndp: The NDP count belonging to @skb * @calc_first_li: Caller-dependent computation of first loss interval in @lh * @sk: Used by @calc_first_li (see tfrc_lh_interval_add) + * * Chooses action according to pending loss, updates LI database when a new * loss was detected, and does required post-processing. Returns 1 when caller * should send feedback, 0 otherwise. @@ -387,7 +388,7 @@ static inline struct tfrc_rx_hist_entry * } /** - * tfrc_rx_hist_rtt_prev_s: previously suitable (wrt rtt_last_s) RTT-sampling entry + * tfrc_rx_hist_rtt_prev_s - previously suitable (wrt rtt_last_s) RTT-sampling entry */ static inline struct tfrc_rx_hist_entry * tfrc_rx_hist_rtt_prev_s(const struct tfrc_rx_hist *h) diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index 7ee4a9d9d33..ee362b0b630 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -60,8 +60,8 @@ static inline struct tfrc_tx_hist_entry * return head; } -extern int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno); -extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp); +int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno); +void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp); /* Subtraction a-b modulo-16, respects circular wrap-around */ #define SUB16(a, b) (((a) + 16 - (b)) & 0xF) @@ -139,20 +139,17 @@ static inline bool tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h) return h->loss_count > 0; } -extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, - const struct sk_buff *skb, const u64 ndp); +void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, const struct sk_buff *skb, + const u64 ndp); -extern int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb); +int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb); struct tfrc_loss_hist; -extern int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, - struct tfrc_loss_hist *lh, - struct sk_buff *skb, const u64 ndp, - u32 (*first_li)(struct sock *sk), - struct sock *sk); -extern u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, - const struct sk_buff *skb); -extern int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h); -extern void tfrc_rx_hist_purge(struct tfrc_rx_hist *h); +int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, struct tfrc_loss_hist *lh, + struct sk_buff *skb, const u64 ndp, + u32 (*first_li)(struct sock *sk), struct sock *sk); +u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb); +int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h); +void tfrc_rx_hist_purge(struct tfrc_rx_hist *h); #endif /* _DCCP_PKT_HIST_ */ diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c index 4902029854d..62b5828acde 100644 --- a/net/dccp/ccids/lib/tfrc.c +++ b/net/dccp/ccids/lib/tfrc.c @@ -4,10 +4,11 @@ * Copyright (c) 2007 The University of Aberdeen, Scotland, UK * Copyright (c) 2007 Arnaldo Carvalho de Melo <acme@redhat.com> */ +#include <linux/moduleparam.h> #include "tfrc.h" #ifdef CONFIG_IP_DCCP_TFRC_DEBUG -int tfrc_debug; +bool tfrc_debug; module_param(tfrc_debug, bool, 0644); MODULE_PARM_DESC(tfrc_debug, "Enable TFRC debug messages"); #endif diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h index f8ee3f54977..40ee7d62b65 100644 --- a/net/dccp/ccids/lib/tfrc.h +++ b/net/dccp/ccids/lib/tfrc.h @@ -21,7 +21,7 @@ #include "packet_history.h" #ifdef CONFIG_IP_DCCP_TFRC_DEBUG -extern int tfrc_debug; +extern bool tfrc_debug; #define tfrc_pr_debug(format, a...) DCCP_PR_DEBUG(tfrc_debug, format, ##a) #else #define tfrc_pr_debug(format, a...) @@ -55,21 +55,21 @@ static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 weight) return avg ? (weight * avg + (10 - weight) * newval) / 10 : newval; } -extern u32 tfrc_calc_x(u16 s, u32 R, u32 p); -extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue); -extern u32 tfrc_invert_loss_event_rate(u32 loss_event_rate); +u32 tfrc_calc_x(u16 s, u32 R, u32 p); +u32 tfrc_calc_x_reverse_lookup(u32 fvalue); +u32 tfrc_invert_loss_event_rate(u32 loss_event_rate); -extern int tfrc_tx_packet_history_init(void); -extern void tfrc_tx_packet_history_exit(void); -extern int tfrc_rx_packet_history_init(void); -extern void tfrc_rx_packet_history_exit(void); +int tfrc_tx_packet_history_init(void); +void tfrc_tx_packet_history_exit(void); +int tfrc_rx_packet_history_init(void); +void tfrc_rx_packet_history_exit(void); -extern int tfrc_li_init(void); -extern void tfrc_li_exit(void); +int tfrc_li_init(void); +void tfrc_li_exit(void); #ifdef CONFIG_IP_DCCP_TFRC_LIB -extern int tfrc_lib_init(void); -extern void tfrc_lib_exit(void); +int tfrc_lib_init(void); +void tfrc_lib_exit(void); #else #define tfrc_lib_init() (0) #define tfrc_lib_exit() diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c index a052a4377e2..88ef98285be 100644 --- a/net/dccp/ccids/lib/tfrc_equation.c +++ b/net/dccp/ccids/lib/tfrc_equation.c @@ -611,6 +611,7 @@ static inline u32 tfrc_binsearch(u32 fval, u8 small) * @s: packet size in bytes * @R: RTT scaled by 1000000 (i.e., microseconds) * @p: loss ratio estimate scaled by 1000000 + * * Returns X_calc in bytes per second (not scaled). */ u32 tfrc_calc_x(u16 s, u32 R, u32 p) @@ -659,6 +660,7 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p) /** * tfrc_calc_x_reverse_lookup - try to find p given f(p) * @fvalue: function value to match, scaled by 1000000 + * * Returns closest match for p, also scaled by 1000000 */ u32 tfrc_calc_x_reverse_lookup(u32 fvalue) |
