diff options
Diffstat (limited to 'net/dccp')
31 files changed, 940 insertions, 630 deletions
diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig index b75968a0401..8c0ef71bed2 100644 --- a/net/dccp/Kconfig +++ b/net/dccp/Kconfig @@ -1,6 +1,6 @@ menuconfig IP_DCCP - tristate "The DCCP Protocol (EXPERIMENTAL)" - depends on INET && EXPERIMENTAL + tristate "The DCCP Protocol" + depends on INET ---help--- Datagram Congestion Control Protocol (RFC 4340) diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index 25b7a8d1ad5..ba07824af4c 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -12,6 +12,7 @@ #include "dccp.h" #include <linux/kernel.h> #include <linux/slab.h> +#include <linux/export.h> static struct kmem_cache *dccp_ackvec_slab; static struct kmem_cache *dccp_ackvec_record_slab; diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h index e2ab0627a5f..3284bfa988c 100644 --- a/net/dccp/ackvec.h +++ b/net/dccp/ackvec.h @@ -50,7 +50,8 @@ static inline u8 dccp_ackvec_state(const u8 *cell) return *cell & ~DCCPAV_MAX_RUNLEN; } -/** struct dccp_ackvec - Ack Vector main data structure +/** + * struct dccp_ackvec - Ack Vector main data structure * * This implements a fixed-size circular buffer within an array and is largely * based on Appendix A of RFC 4340. @@ -76,7 +77,8 @@ struct dccp_ackvec { struct list_head av_records; }; -/** struct dccp_ackvec_record - Records information about sent Ack Vectors +/** + * struct dccp_ackvec_record - Records information about sent Ack Vectors * * These list entries define the additional information which the HC-Receiver * keeps about recently-sent Ack Vectors; again refer to RFC 4340, Appendix A. @@ -99,16 +101,16 @@ struct dccp_ackvec_record { u8 avr_ack_nonce:1; }; -extern int dccp_ackvec_init(void); -extern void dccp_ackvec_exit(void); +int dccp_ackvec_init(void); +void dccp_ackvec_exit(void); -extern struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority); -extern void dccp_ackvec_free(struct dccp_ackvec *av); +struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority); +void dccp_ackvec_free(struct dccp_ackvec *av); -extern void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb); -extern int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seq, u8 sum); -extern void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno); -extern u16 dccp_ackvec_buflen(const struct dccp_ackvec *av); +void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb); +int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seq, u8 sum); +void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno); +u16 dccp_ackvec_buflen(const struct dccp_ackvec *av); static inline bool dccp_ackvec_is_empty(const struct dccp_ackvec *av) { @@ -121,6 +123,7 @@ static inline bool dccp_ackvec_is_empty(const struct dccp_ackvec *av) * @len: length of @vec * @nonce: whether @vec had an ECN nonce of 0 or 1 * @node: FIFO - arranged in descending order of ack_ackno + * * This structure is used by CCIDs to access Ack Vectors in a received skb. */ struct dccp_ackvec_parsed { @@ -130,7 +133,6 @@ struct dccp_ackvec_parsed { struct list_head node; }; -extern int dccp_ackvec_parsed_add(struct list_head *head, - u8 *vec, u8 len, u8 nonce); -extern void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks); +int dccp_ackvec_parsed_add(struct list_head *head, u8 *vec, u8 len, u8 nonce); +void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks); #endif /* _ACKVEC_H */ diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c index 36479ca61e0..597557254dd 100644 --- a/net/dccp/ccid.c +++ b/net/dccp/ccid.c @@ -46,6 +46,7 @@ bool ccid_support_check(u8 const *ccid_array, u8 array_len) * ccid_get_builtin_ccids - Populate a list of built-in CCIDs * @ccid_array: pointer to copy into * @array_len: value to return length into + * * This function allocates memory - caller must see that it is freed after use. */ int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len) @@ -118,7 +119,7 @@ static int ccid_activate(struct ccid_operations *ccid_ops) if (ccid_ops->ccid_hc_tx_slab == NULL) goto out_free_rx_slab; - pr_info("CCID: Activated CCID %d (%s)\n", + pr_info("DCCP: Activated CCID %d (%s)\n", ccid_ops->ccid_id, ccid_ops->ccid_name); err = 0; out: @@ -136,7 +137,7 @@ static void ccid_deactivate(struct ccid_operations *ccid_ops) ccid_kmem_cache_destroy(ccid_ops->ccid_hc_rx_slab); ccid_ops->ccid_hc_rx_slab = NULL; - pr_info("CCID: Deactivated CCID %d (%s)\n", + pr_info("DCCP: Deactivated CCID %d (%s)\n", ccid_ops->ccid_id, ccid_ops->ccid_name); } diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 75c3582a767..6eb837a47b5 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -93,8 +93,8 @@ extern struct ccid_operations ccid2_ops; extern struct ccid_operations ccid3_ops; #endif -extern int ccid_initialize_builtins(void); -extern void ccid_cleanup_builtins(void); +int ccid_initialize_builtins(void); +void ccid_cleanup_builtins(void); struct ccid { struct ccid_operations *ccid_ops; @@ -106,12 +106,12 @@ static inline void *ccid_priv(const struct ccid *ccid) return (void *)ccid->ccid_priv; } -extern bool ccid_support_check(u8 const *ccid_array, u8 array_len); -extern int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len); -extern int ccid_getsockopt_builtin_ccids(struct sock *sk, int len, - char __user *, int __user *); +bool ccid_support_check(u8 const *ccid_array, u8 array_len); +int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len); +int ccid_getsockopt_builtin_ccids(struct sock *sk, int len, + char __user *, int __user *); -extern struct ccid *ccid_new(const u8 id, struct sock *sk, bool rx); +struct ccid *ccid_new(const u8 id, struct sock *sk, bool rx); static inline int ccid_get_current_rx_ccid(struct dccp_sock *dp) { @@ -131,8 +131,8 @@ static inline int ccid_get_current_tx_ccid(struct dccp_sock *dp) return ccid->ccid_ops->ccid_id; } -extern void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk); -extern void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk); +void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk); +void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk); /* * Congestion control of queued data packets via CCID decision. @@ -246,7 +246,7 @@ static inline int ccid_hc_rx_getsockopt(struct ccid *ccid, struct sock *sk, u32 __user *optval, int __user *optlen) { int rc = -ENOPROTOOPT; - if (ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL) + if (ccid != NULL && ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL) rc = ccid->ccid_ops->ccid_hc_rx_getsockopt(sk, optname, len, optval, optlen); return rc; @@ -257,7 +257,7 @@ static inline int ccid_hc_tx_getsockopt(struct ccid *ccid, struct sock *sk, u32 __user *optval, int __user *optlen) { int rc = -ENOPROTOOPT; - if (ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL) + if (ccid != NULL && ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL) rc = ccid->ccid_ops->ccid_hc_tx_getsockopt(sk, optname, len, optval, optlen); return rc; diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig index 0581143cb80..8ba3fc9d6d1 100644 --- a/net/dccp/ccids/Kconfig +++ b/net/dccp/ccids/Kconfig @@ -1,5 +1,4 @@ -menu "DCCP CCIDs Configuration (EXPERIMENTAL)" - depends on EXPERIMENTAL +menu "DCCP CCIDs Configuration" config IP_DCCP_CCID2_DEBUG bool "CCID-2 debugging messages" @@ -12,7 +11,7 @@ config IP_DCCP_CCID2_DEBUG If in doubt, say N. config IP_DCCP_CCID3 - bool "CCID-3 (TCP-Friendly) (EXPERIMENTAL)" + bool "CCID-3 (TCP-Friendly)" def_bool y if (IP_DCCP = y || IP_DCCP = m) ---help--- CCID-3 denotes TCP-Friendly Rate Control (TFRC), an equation-based diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index e96d5e81003..f053198e730 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -29,7 +29,7 @@ #ifdef CONFIG_IP_DCCP_CCID2_DEBUG -static int ccid2_debug; +static bool ccid2_debug; #define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a) #else #define ccid2_pr_debug(format, a...) @@ -85,7 +85,6 @@ static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) { - struct dccp_sock *dp = dccp_sk(sk); u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->tx_cwnd, 2); /* @@ -98,14 +97,33 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val) DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val, max_ratio); val = max_ratio; } - if (val > DCCPF_ACK_RATIO_MAX) - val = DCCPF_ACK_RATIO_MAX; + dccp_feat_signal_nn_change(sk, DCCPF_ACK_RATIO, + min_t(u32, val, DCCPF_ACK_RATIO_MAX)); +} - if (val == dp->dccps_l_ack_ratio) - return; +static void ccid2_check_l_ack_ratio(struct sock *sk) +{ + struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); + + /* + * After a loss, idle period, application limited period, or RTO we + * need to check that the ack ratio is still less than the congestion + * window. Otherwise, we will send an entire congestion window of + * packets and got no response because we haven't sent ack ratio + * packets yet. + * If the ack ratio does need to be reduced, we reduce it to half of + * the congestion window (or 1 if that's zero) instead of to the + * congestion window. This prevents problems if one ack is lost. + */ + if (dccp_feat_nn_get(sk, DCCPF_ACK_RATIO) > hc->tx_cwnd) + ccid2_change_l_ack_ratio(sk, hc->tx_cwnd/2 ? : 1U); +} - ccid2_pr_debug("changing local ack ratio to %u\n", val); - dp->dccps_l_ack_ratio = val; +static void ccid2_change_l_seq_window(struct sock *sk, u64 val) +{ + dccp_feat_signal_nn_change(sk, DCCPF_SEQUENCE_WINDOW, + clamp_val(val, DCCPF_SEQ_WMIN, + DCCPF_SEQ_WMAX)); } static void ccid2_hc_tx_rto_expire(unsigned long data) @@ -153,17 +171,97 @@ out: sock_put(sk); } +/* + * Congestion window validation (RFC 2861). + */ +static bool ccid2_do_cwv = true; +module_param(ccid2_do_cwv, bool, 0644); +MODULE_PARM_DESC(ccid2_do_cwv, "Perform RFC2861 Congestion Window Validation"); + +/** + * ccid2_update_used_window - Track how much of cwnd is actually used + * This is done in addition to CWV. The sender needs to have an idea of how many + * packets may be in flight, to set the local Sequence Window value accordingly + * (RFC 4340, 7.5.2). The CWV mechanism is exploited to keep track of the + * maximum-used window. We use an EWMA low-pass filter to filter out noise. + */ +static void ccid2_update_used_window(struct ccid2_hc_tx_sock *hc, u32 new_wnd) +{ + hc->tx_expected_wnd = (3 * hc->tx_expected_wnd + new_wnd) / 4; +} + +/* This borrows the code of tcp_cwnd_application_limited() */ +static void ccid2_cwnd_application_limited(struct sock *sk, const u32 now) +{ + struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); + /* don't reduce cwnd below the initial window (IW) */ + u32 init_win = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache), + win_used = max(hc->tx_cwnd_used, init_win); + + if (win_used < hc->tx_cwnd) { + hc->tx_ssthresh = max(hc->tx_ssthresh, + (hc->tx_cwnd >> 1) + (hc->tx_cwnd >> 2)); + hc->tx_cwnd = (hc->tx_cwnd + win_used) >> 1; + } + hc->tx_cwnd_used = 0; + hc->tx_cwnd_stamp = now; + + ccid2_check_l_ack_ratio(sk); +} + +/* This borrows the code of tcp_cwnd_restart() */ +static void ccid2_cwnd_restart(struct sock *sk, const u32 now) +{ + struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); + u32 cwnd = hc->tx_cwnd, restart_cwnd, + iwnd = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache); + + hc->tx_ssthresh = max(hc->tx_ssthresh, (cwnd >> 1) + (cwnd >> 2)); + + /* don't reduce cwnd below the initial window (IW) */ + restart_cwnd = min(cwnd, iwnd); + cwnd >>= (now - hc->tx_lsndtime) / hc->tx_rto; + hc->tx_cwnd = max(cwnd, restart_cwnd); + + hc->tx_cwnd_stamp = now; + hc->tx_cwnd_used = 0; + + ccid2_check_l_ack_ratio(sk); +} + static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len) { struct dccp_sock *dp = dccp_sk(sk); struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); + const u32 now = ccid2_time_stamp; struct ccid2_seq *next; - hc->tx_pipe++; + /* slow-start after idle periods (RFC 2581, RFC 2861) */ + if (ccid2_do_cwv && !hc->tx_pipe && + (s32)(now - hc->tx_lsndtime) >= hc->tx_rto) + ccid2_cwnd_restart(sk, now); + + hc->tx_lsndtime = now; + hc->tx_pipe += 1; + + /* see whether cwnd was fully used (RFC 2861), update expected window */ + if (ccid2_cwnd_network_limited(hc)) { + ccid2_update_used_window(hc, hc->tx_cwnd); + hc->tx_cwnd_used = 0; + hc->tx_cwnd_stamp = now; + } else { + if (hc->tx_pipe > hc->tx_cwnd_used) + hc->tx_cwnd_used = hc->tx_pipe; + + ccid2_update_used_window(hc, hc->tx_cwnd_used); + + if (ccid2_do_cwv && (s32)(now - hc->tx_cwnd_stamp) >= hc->tx_rto) + ccid2_cwnd_application_limited(sk, now); + } hc->tx_seqh->ccid2s_seq = dp->dccps_gss; hc->tx_seqh->ccid2s_acked = 0; - hc->tx_seqh->ccid2s_sent = ccid2_time_stamp; + hc->tx_seqh->ccid2s_sent = now; next = hc->tx_seqh->ccid2s_next; /* check if we need to alloc more space */ @@ -329,17 +427,37 @@ static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp, unsigned int *maxincr) { struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); - - if (hc->tx_cwnd < hc->tx_ssthresh) { - if (*maxincr > 0 && ++hc->tx_packets_acked == 2) { + struct dccp_sock *dp = dccp_sk(sk); + int r_seq_used = hc->tx_cwnd / dp->dccps_l_ack_ratio; + + if (hc->tx_cwnd < dp->dccps_l_seq_win && + r_seq_used < dp->dccps_r_seq_win) { + if (hc->tx_cwnd < hc->tx_ssthresh) { + if (*maxincr > 0 && ++hc->tx_packets_acked >= 2) { + hc->tx_cwnd += 1; + *maxincr -= 1; + hc->tx_packets_acked = 0; + } + } else if (++hc->tx_packets_acked >= hc->tx_cwnd) { hc->tx_cwnd += 1; - *maxincr -= 1; hc->tx_packets_acked = 0; } - } else if (++hc->tx_packets_acked >= hc->tx_cwnd) { - hc->tx_cwnd += 1; - hc->tx_packets_acked = 0; } + + /* + * Adjust the local sequence window and the ack ratio to allow about + * 5 times the number of packets in the network (RFC 4340 7.5.2) + */ + if (r_seq_used * CCID2_WIN_CHANGE_FACTOR >= dp->dccps_r_seq_win) + ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio * 2); + else if (r_seq_used * CCID2_WIN_CHANGE_FACTOR < dp->dccps_r_seq_win/2) + ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio / 2 ? : 1U); + + if (hc->tx_cwnd * CCID2_WIN_CHANGE_FACTOR >= dp->dccps_l_seq_win) + ccid2_change_l_seq_window(sk, dp->dccps_l_seq_win * 2); + else if (hc->tx_cwnd * CCID2_WIN_CHANGE_FACTOR < dp->dccps_l_seq_win/2) + ccid2_change_l_seq_window(sk, dp->dccps_l_seq_win / 2); + /* * FIXME: RTT is sampled several times per acknowledgment (for each * entry in the Ack Vector), instead of once per Ack (as in TCP SACK). @@ -365,9 +483,7 @@ static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U; hc->tx_ssthresh = max(hc->tx_cwnd, 2U); - /* Avoid spurious timeouts resulting from Ack Ratio > cwnd */ - if (dccp_sk(sk)->dccps_l_ack_ratio > hc->tx_cwnd) - ccid2_change_l_ack_ratio(sk, hc->tx_cwnd); + ccid2_check_l_ack_ratio(sk); } static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type, @@ -418,8 +534,16 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) if (hc->tx_rpdupack >= NUMDUPACK) { hc->tx_rpdupack = -1; /* XXX lame */ hc->tx_rpseq = 0; - +#ifdef __CCID2_COPES_GRACEFULLY_WITH_ACK_CONGESTION_CONTROL__ + /* + * FIXME: Ack Congestion Control is broken; in + * the current state instabilities occurred with + * Ack Ratios greater than 1; causing hang-ups + * and long RTO timeouts. This needs to be fixed + * before opening up dynamic changes. -- gerrit + */ ccid2_change_l_ack_ratio(sk, 2 * dp->dccps_l_ack_ratio); +#endif } } } @@ -594,6 +718,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) /* Use larger initial windows (RFC 4341, section 5). */ hc->tx_cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache); + hc->tx_expected_wnd = hc->tx_cwnd; /* Make sure that Ack Ratio is enabled and within bounds. */ max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2); @@ -606,7 +731,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) hc->tx_rto = DCCP_TIMEOUT_INIT; hc->tx_rpdupack = -1; - hc->tx_last_cong = ccid2_time_stamp; + hc->tx_last_cong = hc->tx_lsndtime = hc->tx_cwnd_stamp = ccid2_time_stamp; + hc->tx_cwnd_used = 0; setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, (unsigned long)sk); INIT_LIST_HEAD(&hc->tx_av_chunks); @@ -627,18 +753,14 @@ static void ccid2_hc_tx_exit(struct sock *sk) static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) { - const struct dccp_sock *dp = dccp_sk(sk); struct ccid2_hc_rx_sock *hc = ccid2_hc_rx_sk(sk); - switch (DCCP_SKB_CB(skb)->dccpd_type) { - case DCCP_PKT_DATA: - case DCCP_PKT_DATAACK: - hc->rx_data++; - if (hc->rx_data >= dp->dccps_r_ack_ratio) { - dccp_send_ack(sk); - hc->rx_data = 0; - } - break; + if (!dccp_data_packet(skb)) + return; + + if (++hc->rx_num_data_pkts >= dccp_sk(sk)->dccps_r_ack_ratio) { + dccp_send_ack(sk); + hc->rx_num_data_pkts = 0; } } diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h index e9985dafc2c..18c97543e52 100644 --- a/net/dccp/ccids/ccid2.h +++ b/net/dccp/ccids/ccid2.h @@ -43,6 +43,12 @@ struct ccid2_seq { #define CCID2_SEQBUF_LEN 1024 #define CCID2_SEQBUF_MAX 128 +/* + * Multiple of congestion window to keep the sequence window at + * (RFC 4340 7.5.2) + */ +#define CCID2_WIN_CHANGE_FACTOR 5 + /** * struct ccid2_hc_tx_sock - CCID2 TX half connection * @tx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5 @@ -53,6 +59,10 @@ struct ccid2_seq { * @tx_rttvar: moving average/maximum of @mdev_max * @tx_rto: RTO value deriving from SRTT and RTTVAR (RFC 2988) * @tx_rtt_seq: to decay RTTVAR at most once per flight + * @tx_cwnd_used: actually used cwnd, W_used of RFC 2861 + * @tx_expected_wnd: moving average of @tx_cwnd_used + * @tx_cwnd_stamp: to track idle periods in CWV + * @tx_lsndtime: last time (in jiffies) a data packet was sent * @tx_rpseq: last consecutive seqno * @tx_rpdupack: dupacks since rpseq * @tx_av_chunks: list of Ack Vectors received on current skb @@ -76,6 +86,12 @@ struct ccid2_hc_tx_sock { u64 tx_rtt_seq:48; struct timer_list tx_rtotimer; + /* Congestion Window validation (optional, RFC 2861) */ + u32 tx_cwnd_used, + tx_expected_wnd, + tx_cwnd_stamp, + tx_lsndtime; + u64 tx_rpseq; int tx_rpdupack; u32 tx_last_cong; @@ -88,8 +104,21 @@ static inline bool ccid2_cwnd_network_limited(struct ccid2_hc_tx_sock *hc) return hc->tx_pipe >= hc->tx_cwnd; } +/* + * Convert RFC 3390 larger initial window into an equivalent number of packets. + * This is based on the numbers specified in RFC 5681, 3.1. + */ +static inline u32 rfc3390_bytes_to_packets(const u32 smss) +{ + return smss <= 1095 ? 4 : (smss > 2190 ? 2 : 3); +} + +/** + * struct ccid2_hc_rx_sock - Receiving end of CCID-2 half-connection + * @rx_num_data_pkts: number of data packets received since last feedback + */ struct ccid2_hc_rx_sock { - int rx_data; + u32 rx_num_data_pkts; }; static inline struct ccid2_hc_tx_sock *ccid2_hc_tx_sk(const struct sock *sk) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 3d604e1349c..119c04317d4 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -38,7 +38,7 @@ #include <asm/unaligned.h> #ifdef CONFIG_IP_DCCP_CCID3_DEBUG -static int ccid3_debug; +static bool ccid3_debug; #define ccid3_pr_debug(format, a...) DCCP_PR_DEBUG(ccid3_debug, format, ##a) #else #define ccid3_pr_debug(format, a...) @@ -98,8 +98,9 @@ static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hc) { hc->tx_t_ipi = scaled_div32(((u64)hc->tx_s) << 6, hc->tx_x); + DCCP_BUG_ON(hc->tx_t_ipi == 0); ccid3_pr_debug("t_ipi=%u, s=%u, X=%u\n", hc->tx_t_ipi, - hc->tx_s, (unsigned)(hc->tx_x >> 6)); + hc->tx_s, (unsigned int)(hc->tx_x >> 6)); } static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hc, ktime_t now) @@ -112,6 +113,7 @@ static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hc, ktime_t now) /** * ccid3_hc_tx_update_x - Update allowed sending rate X * @stamp: most recent time if available - can be left NULL. + * * This function tracks draft rfc3448bis, check there for latest details. * * Note: X and X_recv are both stored in units of 64 * bytes/second, to support @@ -152,17 +154,19 @@ static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp) if (hc->tx_x != old_x) { ccid3_pr_debug("X_prev=%u, X_now=%u, X_calc=%u, " - "X_recv=%u\n", (unsigned)(old_x >> 6), - (unsigned)(hc->tx_x >> 6), hc->tx_x_calc, - (unsigned)(hc->tx_x_recv >> 6)); + "X_recv=%u\n", (unsigned int)(old_x >> 6), + (unsigned int)(hc->tx_x >> 6), hc->tx_x_calc, + (unsigned int)(hc->tx_x_recv >> 6)); ccid3_update_send_interval(hc); } } -/* - * Track the mean packet size `s' (cf. RFC 4342, 5.3 and RFC 3448, 4.1) +/** + * ccid3_hc_tx_update_s - Track the mean packet size `s' * @len: DCCP packet payload size in bytes + * + * cf. RFC 4342, 5.3 and RFC 3448, 4.1 */ static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hc, int len) { @@ -236,8 +240,6 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data) * * Note that X_recv is scaled by 2^6 while X_calc is not */ - BUG_ON(hc->tx_p && !hc->tx_x_calc); - if (hc->tx_x_calc > (hc->tx_x_recv >> 5)) hc->tx_x_recv = max(hc->tx_x_recv / 2, @@ -271,6 +273,7 @@ out: /** * ccid3_hc_tx_send_packet - Delay-based dequeueing of TX packets * @skb: next packet candidate to send on @sk + * * This function uses the convention of ccid_packet_dequeue_eval() and * returns a millisecond-delay value between 0 and t_mbi = 64000 msec. */ @@ -426,8 +429,8 @@ done_computing_x: "p=%u, X_calc=%u, X_recv=%u, X=%u\n", dccp_role(sk), sk, hc->tx_rtt, r_sample, hc->tx_s, hc->tx_p, hc->tx_x_calc, - (unsigned)(hc->tx_x_recv >> 6), - (unsigned)(hc->tx_x >> 6)); + (unsigned int)(hc->tx_x_recv >> 6), + (unsigned int)(hc->tx_x >> 6)); /* unschedule no feedback timer */ sk_stop_timer(sk, &hc->tx_no_feedback_timer); @@ -532,6 +535,7 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, case DCCP_SOCKOPT_CCID_TX_INFO: if (len < sizeof(tfrc)) return -EINVAL; + memset(&tfrc, 0, sizeof(tfrc)); tfrc.tfrctx_x = hc->tx_x; tfrc.tfrctx_x_recv = hc->tx_x_recv; tfrc.tfrctx_x_calc = hc->tx_x_calc; diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index 497723c4d4b..57f9fd78c4d 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -133,6 +133,7 @@ static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur, * @rh: Receive history containing a fresh loss event * @calc_first_li: Caller-dependent routine to compute length of first interval * @sk: Used by @calc_first_li in caller-specific way (subtyping) + * * Updates I_mean and returns 1 if a new interval has in fact been added to @lh. */ int tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh, diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h index d1d2f5383b7..57f631a86cc 100644 --- a/net/dccp/ccids/lib/loss_interval.h +++ b/net/dccp/ccids/lib/loss_interval.h @@ -65,9 +65,9 @@ static inline u8 tfrc_lh_length(struct tfrc_loss_hist *lh) struct tfrc_rx_hist; -extern int tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *, - u32 (*first_li)(struct sock *), struct sock *); -extern u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *); -extern void tfrc_lh_cleanup(struct tfrc_loss_hist *lh); +int tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *, + u32 (*first_li)(struct sock *), struct sock *); +u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *); +void tfrc_lh_cleanup(struct tfrc_loss_hist *lh); #endif /* _DCCP_LI_HIST_ */ diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index de8fe294bf0..08df7a3acb3 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -315,6 +315,7 @@ static void __three_after_loss(struct tfrc_rx_hist *h) * @ndp: The NDP count belonging to @skb * @calc_first_li: Caller-dependent computation of first loss interval in @lh * @sk: Used by @calc_first_li (see tfrc_lh_interval_add) + * * Chooses action according to pending loss, updates LI database when a new * loss was detected, and does required post-processing. Returns 1 when caller * should send feedback, 0 otherwise. @@ -387,7 +388,7 @@ static inline struct tfrc_rx_hist_entry * } /** - * tfrc_rx_hist_rtt_prev_s: previously suitable (wrt rtt_last_s) RTT-sampling entry + * tfrc_rx_hist_rtt_prev_s - previously suitable (wrt rtt_last_s) RTT-sampling entry */ static inline struct tfrc_rx_hist_entry * tfrc_rx_hist_rtt_prev_s(const struct tfrc_rx_hist *h) diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index 7ee4a9d9d33..ee362b0b630 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -60,8 +60,8 @@ static inline struct tfrc_tx_hist_entry * return head; } -extern int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno); -extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp); +int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno); +void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp); /* Subtraction a-b modulo-16, respects circular wrap-around */ #define SUB16(a, b) (((a) + 16 - (b)) & 0xF) @@ -139,20 +139,17 @@ static inline bool tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h) return h->loss_count > 0; } -extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, - const struct sk_buff *skb, const u64 ndp); +void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, const struct sk_buff *skb, + const u64 ndp); -extern int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb); +int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb); struct tfrc_loss_hist; -extern int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, - struct tfrc_loss_hist *lh, - struct sk_buff *skb, const u64 ndp, - u32 (*first_li)(struct sock *sk), - struct sock *sk); -extern u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, - const struct sk_buff *skb); -extern int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h); -extern void tfrc_rx_hist_purge(struct tfrc_rx_hist *h); +int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, struct tfrc_loss_hist *lh, + struct sk_buff *skb, const u64 ndp, + u32 (*first_li)(struct sock *sk), struct sock *sk); +u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb); +int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h); +void tfrc_rx_hist_purge(struct tfrc_rx_hist *h); #endif /* _DCCP_PKT_HIST_ */ diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c index 4902029854d..62b5828acde 100644 --- a/net/dccp/ccids/lib/tfrc.c +++ b/net/dccp/ccids/lib/tfrc.c @@ -4,10 +4,11 @@ * Copyright (c) 2007 The University of Aberdeen, Scotland, UK * Copyright (c) 2007 Arnaldo Carvalho de Melo <acme@redhat.com> */ +#include <linux/moduleparam.h> #include "tfrc.h" #ifdef CONFIG_IP_DCCP_TFRC_DEBUG -int tfrc_debug; +bool tfrc_debug; module_param(tfrc_debug, bool, 0644); MODULE_PARM_DESC(tfrc_debug, "Enable TFRC debug messages"); #endif diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h index f8ee3f54977..40ee7d62b65 100644 --- a/net/dccp/ccids/lib/tfrc.h +++ b/net/dccp/ccids/lib/tfrc.h @@ -21,7 +21,7 @@ #include "packet_history.h" #ifdef CONFIG_IP_DCCP_TFRC_DEBUG -extern int tfrc_debug; +extern bool tfrc_debug; #define tfrc_pr_debug(format, a...) DCCP_PR_DEBUG(tfrc_debug, format, ##a) #else #define tfrc_pr_debug(format, a...) @@ -55,21 +55,21 @@ static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 weight) return avg ? (weight * avg + (10 - weight) * newval) / 10 : newval; } -extern u32 tfrc_calc_x(u16 s, u32 R, u32 p); -extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue); -extern u32 tfrc_invert_loss_event_rate(u32 loss_event_rate); +u32 tfrc_calc_x(u16 s, u32 R, u32 p); +u32 tfrc_calc_x_reverse_lookup(u32 fvalue); +u32 tfrc_invert_loss_event_rate(u32 loss_event_rate); -extern int tfrc_tx_packet_history_init(void); -extern void tfrc_tx_packet_history_exit(void); -extern int tfrc_rx_packet_history_init(void); -extern void tfrc_rx_packet_history_exit(void); +int tfrc_tx_packet_history_init(void); +void tfrc_tx_packet_history_exit(void); +int tfrc_rx_packet_history_init(void); +void tfrc_rx_packet_history_exit(void); -extern int tfrc_li_init(void); -extern void tfrc_li_exit(void); +int tfrc_li_init(void); +void tfrc_li_exit(void); #ifdef CONFIG_IP_DCCP_TFRC_LIB -extern int tfrc_lib_init(void); -extern void tfrc_lib_exit(void); +int tfrc_lib_init(void); +void tfrc_lib_exit(void); #else #define tfrc_lib_init() (0) #define tfrc_lib_exit() diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c index a052a4377e2..88ef98285be 100644 --- a/net/dccp/ccids/lib/tfrc_equation.c +++ b/net/dccp/ccids/lib/tfrc_equation.c @@ -611,6 +611,7 @@ static inline u32 tfrc_binsearch(u32 fval, u8 small) * @s: packet size in bytes * @R: RTT scaled by 1000000 (i.e., microseconds) * @p: loss ratio estimate scaled by 1000000 + * * Returns X_calc in bytes per second (not scaled). */ u32 tfrc_calc_x(u16 s, u32 R, u32 p) @@ -659,6 +660,7 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p) /** * tfrc_calc_x_reverse_lookup - try to find p given f(p) * @fvalue: function value to match, scaled by 1000000 + * * Returns closest match for p, also scaled by 1000000 */ u32 tfrc_calc_x_reverse_lookup(u32 fvalue) diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 5fdb0722901..c67816647cc 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -39,7 +39,7 @@ "%s: " fmt, __func__, ##a) #ifdef CONFIG_IP_DCCP_DEBUG -extern int dccp_debug; +extern bool dccp_debug; #define dccp_pr_debug(format, a...) DCCP_PR_DEBUG(dccp_debug, format, ##a) #define dccp_pr_debug_cat(format, a...) DCCP_PRINTK(dccp_debug, format, ##a) #define dccp_debug(fmt, a...) dccp_pr_debug_cat(KERN_DEBUG fmt, ##a) @@ -53,7 +53,7 @@ extern struct inet_hashinfo dccp_hashinfo; extern struct percpu_counter dccp_orphan_count; -extern void dccp_time_wait(struct sock *sk, int state, int timeo); +void dccp_time_wait(struct sock *sk, int state, int timeo); /* * Set safe upper bounds for header and option length. Since Data Offset is 8 @@ -75,7 +75,7 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo); * state, about 60 seconds */ /* RFC 1122, 4.2.3.1 initial RTO value */ -#define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ)) +#define DCCP_TIMEOUT_INIT ((unsigned int)(3 * HZ)) /* * The maximum back-off value for retransmissions. This is needed for @@ -84,7 +84,7 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo); * - feature-negotiation retransmission (sec. 6.6.3), * - Acks in client-PARTOPEN state (sec. 8.1.5). */ -#define DCCP_RTO_MAX ((unsigned)(64 * HZ)) +#define DCCP_RTO_MAX ((unsigned int)(64 * HZ)) /* * RTT sampling: sanity bounds and fallback RTT value from RFC 4340, section 3.4 @@ -224,114 +224,108 @@ static inline void dccp_csum_outgoing(struct sk_buff *skb) skb->csum = skb_checksum(skb, 0, (cov > skb->len)? skb->len : cov, 0); } -extern void dccp_v4_send_check(struct sock *sk, struct sk_buff *skb); +void dccp_v4_send_check(struct sock *sk, struct sk_buff *skb); -extern int dccp_retransmit_skb(struct sock *sk); +int dccp_retransmit_skb(struct sock *sk); -extern void dccp_send_ack(struct sock *sk); -extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, - struct request_sock *rsk); +void dccp_send_ack(struct sock *sk); +void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, + struct request_sock *rsk); -extern void dccp_send_sync(struct sock *sk, const u64 seq, - const enum dccp_pkt_type pkt_type); +void dccp_send_sync(struct sock *sk, const u64 seq, + const enum dccp_pkt_type pkt_type); /* * TX Packet Dequeueing Interface */ -extern void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb); -extern bool dccp_qpolicy_full(struct sock *sk); -extern void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb); -extern struct sk_buff *dccp_qpolicy_top(struct sock *sk); -extern struct sk_buff *dccp_qpolicy_pop(struct sock *sk); -extern bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param); +void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb); +bool dccp_qpolicy_full(struct sock *sk); +void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb); +struct sk_buff *dccp_qpolicy_top(struct sock *sk); +struct sk_buff *dccp_qpolicy_pop(struct sock *sk); +bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param); /* * TX Packet Output and TX Timers */ -extern void dccp_write_xmit(struct sock *sk); -extern void dccp_write_space(struct sock *sk); -extern void dccp_flush_write_queue(struct sock *sk, long *time_budget); +void dccp_write_xmit(struct sock *sk); +void dccp_write_space(struct sock *sk); +void dccp_flush_write_queue(struct sock *sk, long *time_budget); -extern void dccp_init_xmit_timers(struct sock *sk); +void dccp_init_xmit_timers(struct sock *sk); static inline void dccp_clear_xmit_timers(struct sock *sk) { inet_csk_clear_xmit_timers(sk); } -extern unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu); +unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu); -extern const char *dccp_packet_name(const int type); +const char *dccp_packet_name(const int type); -extern void dccp_set_state(struct sock *sk, const int state); -extern void dccp_done(struct sock *sk); +void dccp_set_state(struct sock *sk, const int state); +void dccp_done(struct sock *sk); -extern int dccp_reqsk_init(struct request_sock *rq, struct dccp_sock const *dp, - struct sk_buff const *skb); +int dccp_reqsk_init(struct request_sock *rq, struct dccp_sock const *dp, + struct sk_buff const *skb); -extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb); +int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb); -extern struct sock *dccp_create_openreq_child(struct sock *sk, - const struct request_sock *req, - const struct sk_buff *skb); +struct sock *dccp_create_openreq_child(struct sock *sk, + const struct request_sock *req, + const struct sk_buff *skb); -extern int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb); +int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb); -extern struct sock *dccp_v4_request_recv_sock(struct sock *sk, - struct sk_buff *skb, - struct request_sock *req, - struct dst_entry *dst); -extern struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, - struct request_sock *req, - struct request_sock **prev); +struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct dst_entry *dst); +struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct request_sock **prev); -extern int dccp_child_process(struct sock *parent, struct sock *child, - struct sk_buff *skb); -extern int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, - struct dccp_hdr *dh, unsigned len); -extern int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, - const struct dccp_hdr *dh, const unsigned len); +int dccp_child_process(struct sock *parent, struct sock *child, + struct sk_buff *skb); +int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, + struct dccp_hdr *dh, unsigned int len); +int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, + const struct dccp_hdr *dh, const unsigned int len); -extern int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized); -extern void dccp_destroy_sock(struct sock *sk); +int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized); +void dccp_destroy_sock(struct sock *sk); -extern void dccp_close(struct sock *sk, long timeout); -extern struct sk_buff *dccp_make_response(struct sock *sk, - struct dst_entry *dst, - struct request_sock *req); +void dccp_close(struct sock *sk, long timeout); +struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, + struct request_sock *req); -extern int dccp_connect(struct sock *sk); -extern int dccp_disconnect(struct sock *sk, int flags); -extern int dccp_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen); -extern int dccp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen); +int dccp_connect(struct sock *sk); +int dccp_disconnect(struct sock *sk, int flags); +int dccp_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen); +int dccp_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, unsigned int optlen); #ifdef CONFIG_COMPAT -extern int compat_dccp_getsockopt(struct sock *sk, - int level, int optname, - char __user *optval, int __user *optlen); -extern int compat_dccp_setsockopt(struct sock *sk, - int level, int optname, - char __user *optval, unsigned int optlen); +int compat_dccp_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen); +int compat_dccp_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, unsigned int optlen); #endif -extern int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg); -extern int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t size); -extern int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, int nonblock, - int flags, int *addr_len); -extern void dccp_shutdown(struct sock *sk, int how); -extern int inet_dccp_listen(struct socket *sock, int backlog); -extern unsigned int dccp_poll(struct file *file, struct socket *sock, - poll_table *wait); -extern int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, - int addr_len); - -extern struct sk_buff *dccp_ctl_make_reset(struct sock *sk, - struct sk_buff *skb); -extern int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code); -extern void dccp_send_close(struct sock *sk, const int active); -extern int dccp_invalid_packet(struct sk_buff *skb); -extern u32 dccp_sample_rtt(struct sock *sk, long delta); +int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg); +int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t size); +int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len, int nonblock, int flags, + int *addr_len); +void dccp_shutdown(struct sock *sk, int how); +int inet_dccp_listen(struct socket *sock, int backlog); +unsigned int dccp_poll(struct file *file, struct socket *sock, + poll_table *wait); +int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); + +struct sk_buff *dccp_ctl_make_reset(struct sock *sk, struct sk_buff *skb); +int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code); +void dccp_send_close(struct sock *sk, const int active); +int dccp_invalid_packet(struct sk_buff *skb); +u32 dccp_sample_rtt(struct sock *sk, long delta); static inline int dccp_bad_service_code(const struct sock *sk, const __be32 service) @@ -352,12 +346,13 @@ static inline int dccp_bad_service_code(const struct sock *sk, * @dccpd_opt_len: total length of all options (5.8) in the packet * @dccpd_seq: sequence number * @dccpd_ack_seq: acknowledgment number subheader field value + * * This is used for transmission as well as for reception. */ struct dccp_skb_cb { union { struct inet_skb_parm h4; -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) struct inet6_skb_parm h6; #endif } header; @@ -474,24 +469,24 @@ static inline int dccp_ack_pending(const struct sock *sk) return dccp_ackvec_pending(sk) || inet_csk_ack_scheduled(sk); } -extern int dccp_feat_finalise_settings(struct dccp_sock *dp); -extern int dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq); -extern int dccp_feat_insert_opts(struct dccp_sock*, struct dccp_request_sock*, - struct sk_buff *skb); -extern int dccp_feat_activate_values(struct sock *sk, struct list_head *fn); -extern void dccp_feat_list_purge(struct list_head *fn_list); - -extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb); -extern int dccp_insert_options_rsk(struct dccp_request_sock*, struct sk_buff*); -extern int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed); -extern u32 dccp_timestamp(void); -extern void dccp_timestamping_init(void); -extern int dccp_insert_option(struct sk_buff *skb, unsigned char option, - const void *value, unsigned char len); +int dccp_feat_signal_nn_change(struct sock *sk, u8 feat, u64 nn_val); +int dccp_feat_finalise_settings(struct dccp_sock *dp); +int dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq); +int dccp_feat_insert_opts(struct dccp_sock*, struct dccp_request_sock*, + struct sk_buff *skb); +int dccp_feat_activate_values(struct sock *sk, struct list_head *fn); +void dccp_feat_list_purge(struct list_head *fn_list); + +int dccp_insert_options(struct sock *sk, struct sk_buff *skb); +int dccp_insert_options_rsk(struct dccp_request_sock *, struct sk_buff *); +u32 dccp_timestamp(void); +void dccp_timestamping_init(void); +int dccp_insert_option(struct sk_buff *skb, unsigned char option, + const void *value, unsigned char len); #ifdef CONFIG_SYSCTL -extern int dccp_sysctl_init(void); -extern void dccp_sysctl_exit(void); +int dccp_sysctl_init(void); +void dccp_sysctl_exit(void); #else static inline int dccp_sysctl_init(void) { diff --git a/net/dccp/diag.c b/net/dccp/diag.c index b21f261da75..028fc43aacb 100644 --- a/net/dccp/diag.c +++ b/net/dccp/diag.c @@ -48,11 +48,23 @@ static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, dccp_get_info(sk, _info); } +static void dccp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, + struct inet_diag_req_v2 *r, struct nlattr *bc) +{ + inet_diag_dump_icsk(&dccp_hashinfo, skb, cb, r, bc); +} + +static int dccp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, + struct inet_diag_req_v2 *req) +{ + return inet_diag_dump_one_icsk(&dccp_hashinfo, in_skb, nlh, req); +} + static const struct inet_diag_handler dccp_diag_handler = { - .idiag_hashinfo = &dccp_hashinfo, + .dump = dccp_diag_dump, + .dump_one = dccp_diag_dump_one, .idiag_get_info = dccp_diag_get_info, - .idiag_type = DCCPDIAG_GETSOCK, - .idiag_info_size = sizeof(struct tcp_info), + .idiag_type = IPPROTO_DCCP, }; static int __init dccp_diag_init(void) @@ -71,4 +83,4 @@ module_exit(dccp_diag_fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>"); MODULE_DESCRIPTION("DCCP inet_diag handler"); -MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_INET_DIAG, DCCPDIAG_GETSOCK); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-33 /* AF_INET - IPPROTO_DCCP */); diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 568def95272..9733ddbc96c 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -12,6 +12,7 @@ * ----------- * o Feature negotiation is coordinated with connection setup (as in TCP), wild * changes of parameters of an established connection are not supported. + * o Changing non-negotiable (NN) values is supported in state OPEN/PARTOPEN. * o All currently known SP features have 1-byte quantities. If in the future * extensions of RFCs 4340..42 define features with item lengths larger than * one byte, a feature-specific extension of the code will be required. @@ -343,6 +344,21 @@ static int __dccp_feat_activate(struct sock *sk, const int idx, return dccp_feat_table[idx].activation_hdlr(sk, val, rx); } +/** + * dccp_feat_activate - Activate feature value on socket + * @sk: fully connected DCCP socket (after handshake is complete) + * @feat_num: feature to activate, one of %dccp_feature_numbers + * @local: whether local (1) or remote (0) @feat_num is meant + * @fval: the value (SP or NN) to activate, or NULL to use the default value + * + * For general use this function is preferable over __dccp_feat_activate(). + */ +static int dccp_feat_activate(struct sock *sk, u8 feat_num, bool local, + dccp_feat_val const *fval) +{ + return __dccp_feat_activate(sk, dccp_feat_index(feat_num), local, fval); +} + /* Test for "Req'd" feature (RFC 4340, 6.4) */ static inline int dccp_feat_must_be_understood(u8 feat_num) { @@ -431,6 +447,7 @@ static struct dccp_feat_entry *dccp_feat_list_lookup(struct list_head *fn_list, * @head: list to add to * @feat: feature number * @local: whether the local (1) or remote feature with number @feat is meant + * * This is the only constructor and serves to ensure the above invariants. */ static struct dccp_feat_entry * @@ -475,8 +492,8 @@ static int dccp_feat_push_change(struct list_head *fn_list, u8 feat, u8 local, new->feat_num = feat; new->is_local = local; new->state = FEAT_INITIALISING; - new->needs_confirm = 0; - new->empty_confirm = 0; + new->needs_confirm = false; + new->empty_confirm = false; new->val = *fval; new->needs_mandatory = mandatory; @@ -489,6 +506,7 @@ static int dccp_feat_push_change(struct list_head *fn_list, u8 feat, u8 local, * @feat: one of %dccp_feature_numbers * @local: whether local (1) or remote (0) @feat_num is being confirmed * @fval: pointer to NN/SP value to be inserted or NULL + * * Returns 0 on success, a Reset code for further processing otherwise. */ static int dccp_feat_push_confirm(struct list_head *fn_list, u8 feat, u8 local, @@ -502,12 +520,12 @@ static int dccp_feat_push_confirm(struct list_head *fn_list, u8 feat, u8 local, new->feat_num = feat; new->is_local = local; new->state = FEAT_STABLE; /* transition in 6.6.2 */ - new->needs_confirm = 1; + new->needs_confirm = true; new->empty_confirm = (fval == NULL); new->val.nn = 0; /* zeroes the whole structure */ if (!new->empty_confirm) new->val = *fval; - new->needs_mandatory = 0; + new->needs_mandatory = false; return 0; } @@ -650,11 +668,22 @@ int dccp_feat_insert_opts(struct dccp_sock *dp, struct dccp_request_sock *dreq, return -1; if (pos->needs_mandatory && dccp_insert_option_mandatory(skb)) return -1; - /* - * Enter CHANGING after transmitting the Change option (6.6.2). - */ - if (pos->state == FEAT_INITIALISING) - pos->state = FEAT_CHANGING; + + if (skb->sk->sk_state == DCCP_OPEN && + (opt == DCCPO_CONFIRM_R || opt == DCCPO_CONFIRM_L)) { + /* + * Confirms don't get retransmitted (6.6.3) once the + * connection is in state OPEN + */ + dccp_feat_list_pop(pos); + } else { + /* + * Enter CHANGING after transmitting the Change + * option (6.6.2). + */ + if (pos->state == FEAT_INITIALISING) + pos->state = FEAT_CHANGING; + } } return 0; } @@ -665,6 +694,7 @@ int dccp_feat_insert_opts(struct dccp_sock *dp, struct dccp_request_sock *dreq, * @feat: an NN feature from %dccp_feature_numbers * @mandatory: use Mandatory option if 1 * @nn_val: value to register (restricted to 4 bytes) + * * Note that NN features are local by definition (RFC 4340, 6.3.2). */ static int __feat_register_nn(struct list_head *fn, u8 feat, @@ -730,6 +760,72 @@ int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local, 0, list, len); } +/** + * dccp_feat_nn_get - Query current/pending value of NN feature + * @sk: DCCP socket of an established connection + * @feat: NN feature number from %dccp_feature_numbers + * + * For a known NN feature, returns value currently being negotiated, or + * current (confirmed) value if no negotiation is going on. + */ +u64 dccp_feat_nn_get(struct sock *sk, u8 feat) +{ + if (dccp_feat_type(feat) == FEAT_NN) { + struct dccp_sock *dp = dccp_sk(sk); + struct dccp_feat_entry *entry; + + entry = dccp_feat_list_lookup(&dp->dccps_featneg, feat, 1); + if (entry != NULL) + return entry->val.nn; + + switch (feat) { + case DCCPF_ACK_RATIO: + return dp->dccps_l_ack_ratio; + case DCCPF_SEQUENCE_WINDOW: + return dp->dccps_l_seq_win; + } + } + DCCP_BUG("attempt to look up unsupported feature %u", feat); + return 0; +} +EXPORT_SYMBOL_GPL(dccp_feat_nn_get); + +/** + * dccp_feat_signal_nn_change - Update NN values for an established connection + * @sk: DCCP socket of an established connection + * @feat: NN feature number from %dccp_feature_numbers + * @nn_val: the new value to use + * + * This function is used to communicate NN updates out-of-band. + */ +int dccp_feat_signal_nn_change(struct sock *sk, u8 feat, u64 nn_val) +{ + struct list_head *fn = &dccp_sk(sk)->dccps_featneg; + dccp_feat_val fval = { .nn = nn_val }; + struct dccp_feat_entry *entry; + + if (sk->sk_state != DCCP_OPEN && sk->sk_state != DCCP_PARTOPEN) + return 0; + + if (dccp_feat_type(feat) != FEAT_NN || + !dccp_feat_is_valid_nn_val(feat, nn_val)) + return -EINVAL; + + if (nn_val == dccp_feat_nn_get(sk, feat)) + return 0; /* already set or negotiation under way */ + + entry = dccp_feat_list_lookup(fn, feat, 1); + if (entry != NULL) { + dccp_pr_debug("Clobbering existing NN entry %llu -> %llu\n", + (unsigned long long)entry->val.nn, + (unsigned long long)nn_val); + dccp_feat_list_pop(entry); + } + + inet_csk_schedule_ack(sk); + return dccp_feat_push_change(fn, feat, 1, 0, &fval); +} +EXPORT_SYMBOL_GPL(dccp_feat_signal_nn_change); /* * Tracking features whose value depend on the choice of CCID @@ -840,6 +936,7 @@ static const struct ccid_dependency *dccp_feat_ccid_deps(u8 ccid, bool is_local) * @fn: feature-negotiation list to update * @id: CCID number to track * @is_local: whether TX CCID (1) or RX CCID (0) is meant + * * This function needs to be called after registering all other features. */ static int dccp_feat_propagate_ccid(struct list_head *fn, u8 id, bool is_local) @@ -863,6 +960,7 @@ static int dccp_feat_propagate_ccid(struct list_head *fn, u8 id, bool is_local) /** * dccp_feat_finalise_settings - Finalise settings before starting negotiation * @dp: client or listening socket (settings will be inherited) + * * This is called after all registrations (socket initialisation, sysctls, and * sockopt calls), and before sending the first packet containing Change options * (ie. client-Request or server-Response), to ensure internal consistency. @@ -1065,7 +1163,7 @@ static u8 dccp_feat_change_recv(struct list_head *fn, u8 is_mandatory, u8 opt, } if (dccp_feat_reconcile(&entry->val, val, len, server, true)) { - entry->empty_confirm = 0; + entry->empty_confirm = false; } else if (is_mandatory) { return DCCP_RESET_CODE_MANDATORY_ERROR; } else if (entry->state == FEAT_INITIALISING) { @@ -1081,10 +1179,10 @@ static u8 dccp_feat_change_recv(struct list_head *fn, u8 is_mandatory, u8 opt, defval = dccp_feat_default_value(feat); if (!dccp_feat_reconcile(&entry->val, &defval, 1, server, true)) return DCCP_RESET_CODE_OPTION_ERROR; - entry->empty_confirm = 1; + entry->empty_confirm = true; } - entry->needs_confirm = 1; - entry->needs_mandatory = 0; + entry->needs_confirm = true; + entry->needs_mandatory = false; entry->state = FEAT_STABLE; return 0; @@ -1187,6 +1285,101 @@ confirmation_failed: } /** + * dccp_feat_handle_nn_established - Fast-path reception of NN options + * @sk: socket of an established DCCP connection + * @mandatory: whether @opt was preceded by a Mandatory option + * @opt: %DCCPO_CHANGE_L | %DCCPO_CONFIRM_R (NN only) + * @feat: NN number, one of %dccp_feature_numbers + * @val: NN value + * @len: length of @val in bytes + * + * This function combines the functionality of change_recv/confirm_recv, with + * the following differences (reset codes are the same): + * - cleanup after receiving the Confirm; + * - values are directly activated after successful parsing; + * - deliberately restricted to NN features. + * The restriction to NN features is essential since SP features can have non- + * predictable outcomes (depending on the remote configuration), and are inter- + * dependent (CCIDs for instance cause further dependencies). + */ +static u8 dccp_feat_handle_nn_established(struct sock *sk, u8 mandatory, u8 opt, + u8 feat, u8 *val, u8 len) +{ + struct list_head *fn = &dccp_sk(sk)->dccps_featneg; + const bool local = (opt == DCCPO_CONFIRM_R); + struct dccp_feat_entry *entry; + u8 type = dccp_feat_type(feat); + dccp_feat_val fval; + + dccp_feat_print_opt(opt, feat, val, len, mandatory); + + /* Ignore non-mandatory unknown and non-NN features */ + if (type == FEAT_UNKNOWN) { + if (local && !mandatory) + return 0; + goto fast_path_unknown; + } else if (type != FEAT_NN) { + return 0; + } + + /* + * We don't accept empty Confirms, since in fast-path feature + * negotiation the values are enabled immediately after sending + * the Change option. + * Empty Changes on the other hand are invalid (RFC 4340, 6.1). + */ + if (len == 0 || len > sizeof(fval.nn)) + goto fast_path_unknown; + + if (opt == DCCPO_CHANGE_L) { + fval.nn = dccp_decode_value_var(val, len); + if (!dccp_feat_is_valid_nn_val(feat, fval.nn)) + goto fast_path_unknown; + + if (dccp_feat_push_confirm(fn, feat, local, &fval) || + dccp_feat_activate(sk, feat, local, &fval)) + return DCCP_RESET_CODE_TOO_BUSY; + + /* set the `Ack Pending' flag to piggyback a Confirm */ + inet_csk_schedule_ack(sk); + + } else if (opt == DCCPO_CONFIRM_R) { + entry = dccp_feat_list_lookup(fn, feat, local); + if (entry == NULL || entry->state != FEAT_CHANGING) + return 0; + + fval.nn = dccp_decode_value_var(val, len); + /* + * Just ignore a value that doesn't match our current value. + * If the option changes twice within two RTTs, then at least + * one CONFIRM will be received for the old value after a + * new CHANGE was sent. + */ + if (fval.nn != entry->val.nn) + return 0; + + /* Only activate after receiving the Confirm option (6.6.1). */ + dccp_feat_activate(sk, feat, local, &fval); + + /* It has been confirmed - so remove the entry */ + dccp_feat_list_pop(entry); + + } else { + DCCP_WARN("Received illegal option %u\n", opt); + goto fast_path_failed; + } + return 0; + +fast_path_unknown: + if (!mandatory) + return dccp_push_empty_confirm(fn, feat, local); + +fast_path_failed: + return mandatory ? DCCP_RESET_CODE_MANDATORY_ERROR + : DCCP_RESET_CODE_OPTION_ERROR; +} + +/** * dccp_feat_parse_options - Process Feature-Negotiation Options * @sk: for general use and used by the client during connection setup * @dreq: used by the server during connection setup @@ -1195,6 +1388,7 @@ confirmation_failed: * @feat: one of %dccp_feature_numbers * @val: value contents of @opt * @len: length of @val in bytes + * * Returns 0 on success, a Reset code for ending the connection otherwise. */ int dccp_feat_parse_options(struct sock *sk, struct dccp_request_sock *dreq, @@ -1221,6 +1415,14 @@ int dccp_feat_parse_options(struct sock *sk, struct dccp_request_sock *dreq, return dccp_feat_confirm_recv(fn, mandatory, opt, feat, val, len, server); } + break; + /* + * Support for exchanging NN options on an established connection. + */ + case DCCP_OPEN: + case DCCP_PARTOPEN: + return dccp_feat_handle_nn_established(sk, mandatory, opt, feat, + val, len); } return 0; /* ignore FN options in all other states */ } diff --git a/net/dccp/feat.h b/net/dccp/feat.h index e56a4e5e634..0e75cebb218 100644 --- a/net/dccp/feat.h +++ b/net/dccp/feat.h @@ -107,13 +107,13 @@ extern unsigned long sysctl_dccp_sequence_window; extern int sysctl_dccp_rx_ccid; extern int sysctl_dccp_tx_ccid; -extern int dccp_feat_init(struct sock *sk); -extern void dccp_feat_initialise_sysctls(void); -extern int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local, - u8 const *list, u8 len); -extern int dccp_feat_parse_options(struct sock *, struct dccp_request_sock *, - u8 mand, u8 opt, u8 feat, u8 *val, u8 len); -extern int dccp_feat_clone_list(struct list_head const *, struct list_head *); +int dccp_feat_init(struct sock *sk); +void dccp_feat_initialise_sysctls(void); +int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local, + u8 const *list, u8 len); +int dccp_feat_parse_options(struct sock *, struct dccp_request_sock *, + u8 mand, u8 opt, u8 feat, u8 *val, u8 len); +int dccp_feat_clone_list(struct list_head const *, struct list_head *); /* * Encoding variable-length options and their maximum length. @@ -127,10 +127,11 @@ extern int dccp_feat_clone_list(struct list_head const *, struct list_head *); */ #define DCCP_OPTVAL_MAXLEN 6 -extern void dccp_encode_value_var(const u64 value, u8 *to, const u8 len); -extern u64 dccp_decode_value_var(const u8 *bf, const u8 len); +void dccp_encode_value_var(const u64 value, u8 *to, const u8 len); +u64 dccp_decode_value_var(const u8 *bf, const u8 len); +u64 dccp_feat_nn_get(struct sock *sk, u8 feat); -extern int dccp_insert_option_mandatory(struct sk_buff *skb); -extern int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat, - u8 *val, u8 len, bool repeat_first); +int dccp_insert_option_mandatory(struct sk_buff *skb); +int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat, u8 *val, u8 len, + bool repeat_first); #endif /* _DCCP_FEAT_H */ diff --git a/net/dccp/input.c b/net/dccp/input.c index 4222e7a654b..3c8ec7d4a34 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -28,7 +28,7 @@ static void dccp_enqueue_skb(struct sock *sk, struct sk_buff *skb) __skb_pull(skb, dccp_hdr(skb)->dccph_doff * 4); __skb_queue_tail(&sk->sk_receive_queue, skb); skb_set_owner_r(skb, sk); - sk->sk_data_ready(sk, 0); + sk->sk_data_ready(sk); } static void dccp_fin(struct sock *sk, struct sk_buff *skb) @@ -285,7 +285,7 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) } static int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb, - const struct dccp_hdr *dh, const unsigned len) + const struct dccp_hdr *dh, const unsigned int len) { struct dccp_sock *dp = dccp_sk(sk); @@ -366,7 +366,7 @@ discard: } int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, - const struct dccp_hdr *dh, const unsigned len) + const struct dccp_hdr *dh, const unsigned int len) { if (dccp_check_seqno(sk, skb)) goto discard; @@ -388,7 +388,7 @@ EXPORT_SYMBOL_GPL(dccp_rcv_established); static int dccp_rcv_request_sent_state_process(struct sock *sk, struct sk_buff *skb, const struct dccp_hdr *dh, - const unsigned len) + const unsigned int len) { /* * Step 4: Prepare sequence numbers in REQUEST @@ -521,7 +521,7 @@ unable_to_proceed: static int dccp_rcv_respond_partopen_state_process(struct sock *sk, struct sk_buff *skb, const struct dccp_hdr *dh, - const unsigned len) + const unsigned int len) { struct dccp_sock *dp = dccp_sk(sk); u32 sample = dp->dccps_options_received.dccpor_timestamp_echo; @@ -572,7 +572,7 @@ static int dccp_rcv_respond_partopen_state_process(struct sock *sk, } int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, - struct dccp_hdr *dh, unsigned len) + struct dccp_hdr *dh, unsigned int len) { struct dccp_sock *dp = dccp_sk(sk); struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); @@ -619,20 +619,31 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, return 1; } - if (sk->sk_state != DCCP_REQUESTING && sk->sk_state != DCCP_RESPOND) { - if (dccp_check_seqno(sk, skb)) - goto discard; - - /* - * Step 8: Process options and mark acknowledgeable - */ - if (dccp_parse_options(sk, NULL, skb)) - return 1; + /* Step 6: Check sequence numbers (omitted in LISTEN/REQUEST state) */ + if (sk->sk_state != DCCP_REQUESTING && dccp_check_seqno(sk, skb)) + goto discard; - dccp_handle_ackvec_processing(sk, skb); - dccp_deliver_input_to_ccids(sk, skb); + /* + * Step 7: Check for unexpected packet types + * If (S.is_server and P.type == Response) + * or (S.is_client and P.type == Request) + * or (S.state == RESPOND and P.type == Data), + * Send Sync packet acknowledging P.seqno + * Drop packet and return + */ + if ((dp->dccps_role != DCCP_ROLE_CLIENT && + dh->dccph_type == DCCP_PKT_RESPONSE) || + (dp->dccps_role == DCCP_ROLE_CLIENT && + dh->dccph_type == DCCP_PKT_REQUEST) || + (sk->sk_state == DCCP_RESPOND && dh->dccph_type == DCCP_PKT_DATA)) { + dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC); + goto discard; } + /* Step 8: Process options */ + if (dccp_parse_options(sk, NULL, skb)) + return 1; + /* * Step 9: Process Reset * If P.type == Reset, @@ -640,31 +651,15 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * S.state := TIMEWAIT * Set TIMEWAIT timer * Drop packet and return - */ + */ if (dh->dccph_type == DCCP_PKT_RESET) { dccp_rcv_reset(sk, skb); return 0; - /* - * Step 7: Check for unexpected packet types - * If (S.is_server and P.type == Response) - * or (S.is_client and P.type == Request) - * or (S.state == RESPOND and P.type == Data), - * Send Sync packet acknowledging P.seqno - * Drop packet and return - */ - } else if ((dp->dccps_role != DCCP_ROLE_CLIENT && - dh->dccph_type == DCCP_PKT_RESPONSE) || - (dp->dccps_role == DCCP_ROLE_CLIENT && - dh->dccph_type == DCCP_PKT_REQUEST) || - (sk->sk_state == DCCP_RESPOND && - dh->dccph_type == DCCP_PKT_DATA)) { - dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC); - goto discard; - } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { + } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { /* Step 13 */ if (dccp_rcv_closereq(sk, skb)) return 0; goto discard; - } else if (dh->dccph_type == DCCP_PKT_CLOSE) { + } else if (dh->dccph_type == DCCP_PKT_CLOSE) { /* Step 14 */ if (dccp_rcv_close(sk, skb)) return 0; goto discard; @@ -679,8 +674,12 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, __kfree_skb(skb); return 0; - case DCCP_RESPOND: case DCCP_PARTOPEN: + /* Step 8: if using Ack Vectors, mark packet acknowledgeable */ + dccp_handle_ackvec_processing(sk, skb); + dccp_deliver_input_to_ccids(sk, skb); + /* fall through */ + case DCCP_RESPOND: queued = dccp_rcv_respond_partopen_state_process(sk, skb, dh, len); break; @@ -711,6 +710,7 @@ EXPORT_SYMBOL_GPL(dccp_rcv_state_process); /** * dccp_sample_rtt - Validate and finalise computation of RTT sample * @delta: number of microseconds between packet and acknowledgment + * * The routine is kept generic to work in different contexts. It should be * called immediately when the ACK used for the RTT sample arrives. */ diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 45a434f9416..6ca645c4b48 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -26,6 +26,7 @@ #include <net/timewait_sock.h> #include <net/tcp_states.h> #include <net/xfrm.h> +#include <net/secure_seq.h> #include "ackvec.h" #include "ccid.h" @@ -40,13 +41,15 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { + const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; struct inet_sock *inet = inet_sk(sk); struct dccp_sock *dp = dccp_sk(sk); - const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; - struct rtable *rt; + __be16 orig_sport, orig_dport; __be32 daddr, nexthop; - int tmp; + struct flowi4 *fl4; + struct rtable *rt; int err; + struct ip_options_rcu *inet_opt; dp->dccps_role = DCCP_ROLE_CLIENT; @@ -57,37 +60,43 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) return -EAFNOSUPPORT; nexthop = daddr = usin->sin_addr.s_addr; - if (inet->opt != NULL && inet->opt->srr) { + + inet_opt = rcu_dereference_protected(inet->inet_opt, + sock_owned_by_user(sk)); + if (inet_opt != NULL && inet_opt->opt.srr) { if (daddr == 0) return -EINVAL; - nexthop = inet->opt->faddr; + nexthop = inet_opt->opt.faddr; } - tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr, - RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, - IPPROTO_DCCP, - inet->inet_sport, usin->sin_port, sk, 1); - if (tmp < 0) - return tmp; + orig_sport = inet->inet_sport; + orig_dport = usin->sin_port; + fl4 = &inet->cork.fl.u.ip4; + rt = ip_route_connect(fl4, nexthop, inet->inet_saddr, + RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, + IPPROTO_DCCP, + orig_sport, orig_dport, sk); + if (IS_ERR(rt)) + return PTR_ERR(rt); if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { ip_rt_put(rt); return -ENETUNREACH; } - if (inet->opt == NULL || !inet->opt->srr) - daddr = rt->rt_dst; + if (inet_opt == NULL || !inet_opt->opt.srr) + daddr = fl4->daddr; if (inet->inet_saddr == 0) - inet->inet_saddr = rt->rt_src; + inet->inet_saddr = fl4->saddr; inet->inet_rcv_saddr = inet->inet_saddr; inet->inet_dport = usin->sin_port; inet->inet_daddr = daddr; inet_csk(sk)->icsk_ext_hdr_len = 0; - if (inet->opt != NULL) - inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; + if (inet_opt) + inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; /* * Socket identity is still unknown (sport may be zero). * However we set state to DCCP_REQUESTING and not releasing socket @@ -99,11 +108,13 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (err != 0) goto failure; - err = ip_route_newports(&rt, IPPROTO_DCCP, inet->inet_sport, - inet->inet_dport, sk); - if (err != 0) + rt = ip_route_newports(fl4, rt, orig_sport, orig_dport, + inet->inet_sport, inet->inet_dport, sk); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + rt = NULL; goto failure; - + } /* OK, now commit destination to socket. */ sk_setup_caps(sk, &rt->dst); @@ -150,17 +161,10 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, if (sk->sk_state == DCCP_LISTEN) return; - /* We don't check in the destentry if pmtu discovery is forbidden - * on this route. We just assume that no packet_to_big packets - * are send back when pmtu discovery is not active. - * There is a small race when the user changes this flag in the - * route, but I think that's acceptable. - */ - if ((dst = __sk_dst_check(sk, 0)) == NULL) + dst = inet_csk_update_pmtu(sk, mtu); + if (!dst) return; - dst->ops->update_pmtu(dst, mtu); - /* Something is about to be wrong... Remember soft error * for the case, if this connection will not able to recover. */ @@ -170,6 +174,7 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, mtu = dst_mtu(dst); if (inet->pmtudisc != IP_PMTUDISC_DONT && + ip_sk_accept_pmtu(sk) && inet_csk(sk)->icsk_pmtu_cookie > mtu) { dccp_sync_mss(sk, mtu); @@ -184,6 +189,14 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, } /* else let the usual retransmit timer handle it */ } +static void dccp_do_redirect(struct sk_buff *skb, struct sock *sk) +{ + struct dst_entry *dst = __sk_dst_check(sk, 0); + + if (dst) + dst->ops->redirect(dst, sk, skb); +} + /* * This routine is called by the ICMP module when it gets some sort of error * condition. If err < 0 then the socket should be closed and the error @@ -248,6 +261,9 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) } switch (type) { + case ICMP_REDIRECT: + dccp_do_redirect(skb, sk); + goto out; case ICMP_SOURCE_QUENCH: /* Just silently ignore these. */ goto out; @@ -289,7 +305,8 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) */ WARN_ON(req->sk); - if (seq != dccp_rsk(req)->dreq_iss) { + if (!between48(seq, dccp_rsk(req)->dreq_iss, + dccp_rsk(req)->dreq_gss)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } @@ -387,32 +404,30 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, if (sk_acceptq_is_full(sk)) goto exit_overflow; - if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) - goto exit; - newsk = dccp_create_openreq_child(sk, req, skb); if (newsk == NULL) goto exit_nonewsk; - sk_setup_caps(newsk, dst); - newinet = inet_sk(newsk); ireq = inet_rsk(req); - newinet->inet_daddr = ireq->rmt_addr; - newinet->inet_rcv_saddr = ireq->loc_addr; - newinet->inet_saddr = ireq->loc_addr; - newinet->opt = ireq->opt; + newinet->inet_daddr = ireq->ir_rmt_addr; + newinet->inet_rcv_saddr = ireq->ir_loc_addr; + newinet->inet_saddr = ireq->ir_loc_addr; + newinet->inet_opt = ireq->opt; ireq->opt = NULL; newinet->mc_index = inet_iif(skb); newinet->mc_ttl = ip_hdr(skb)->ttl; newinet->inet_id = jiffies; + if (dst == NULL && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL) + goto put_and_exit; + + sk_setup_caps(newsk, dst); + dccp_sync_mss(newsk, dst_mtu(dst)); - if (__inet_inherit_port(sk, newsk) < 0) { - sock_put(newsk); - goto exit; - } + if (__inet_inherit_port(sk, newsk) < 0) + goto put_and_exit; __inet_hash_nolisten(newsk, NULL); return newsk; @@ -424,6 +439,10 @@ exit_nonewsk: exit: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); return NULL; +put_and_exit: + inet_csk_prepare_forced_close(newsk); + dccp_done(newsk); + goto exit; } EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock); @@ -461,17 +480,20 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, struct sk_buff *skb) { struct rtable *rt; - struct flowi fl = { .oif = skb_rtable(skb)->rt_iif, - .fl4_dst = ip_hdr(skb)->saddr, - .fl4_src = ip_hdr(skb)->daddr, - .fl4_tos = RT_CONN_FLAGS(sk), - .proto = sk->sk_protocol, - .fl_ip_sport = dccp_hdr(skb)->dccph_dport, - .fl_ip_dport = dccp_hdr(skb)->dccph_sport - }; - - security_skb_classify_flow(skb, &fl); - if (ip_route_output_flow(net, &rt, &fl, sk, 0)) { + const struct iphdr *iph = ip_hdr(skb); + struct flowi4 fl4 = { + .flowi4_oif = inet_iif(skb), + .daddr = iph->saddr, + .saddr = iph->daddr, + .flowi4_tos = RT_CONN_FLAGS(sk), + .flowi4_proto = sk->sk_protocol, + .fl4_sport = dccp_hdr(skb)->dccph_dport, + .fl4_dport = dccp_hdr(skb)->dccph_sport, + }; + + security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); + rt = ip_route_output_flow(net, &fl4, sk); + if (IS_ERR(rt)) { IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } @@ -479,14 +501,14 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, return &rt->dst; } -static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, - struct request_values *rv_unused) +static int dccp_v4_send_response(struct sock *sk, struct request_sock *req) { int err = -1; struct sk_buff *skb; struct dst_entry *dst; + struct flowi4 fl4; - dst = inet_csk_route_req(sk, req); + dst = inet_csk_route_req(sk, &fl4, req); if (dst == NULL) goto out; @@ -495,10 +517,10 @@ static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, const struct inet_request_sock *ireq = inet_rsk(req); struct dccp_hdr *dh = dccp_hdr(skb); - dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->loc_addr, - ireq->rmt_addr); - err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, - ireq->rmt_addr, + dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->ir_loc_addr, + ireq->ir_rmt_addr); + err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, + ireq->ir_rmt_addr, ireq->opt); err = net_xmit_eval(err); } @@ -556,6 +578,11 @@ static void dccp_v4_reqsk_destructor(struct request_sock *req) kfree(inet_rsk(req)->opt); } +void dccp_syn_ack_timeout(struct sock *sk, struct request_sock *req) +{ +} +EXPORT_SYMBOL(dccp_syn_ack_timeout); + static struct request_sock_ops dccp_request_sock_ops __read_mostly = { .family = PF_INET, .obj_size = sizeof(struct dccp_request_sock), @@ -563,6 +590,7 @@ static struct request_sock_ops dccp_request_sock_ops __read_mostly = { .send_ack = dccp_reqsk_send_ack, .destructor = dccp_v4_reqsk_destructor, .send_reset = dccp_v4_ctl_send_reset, + .syn_ack_timeout = dccp_syn_ack_timeout, }; int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) @@ -614,22 +642,23 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) goto drop_and_free; ireq = inet_rsk(req); - ireq->loc_addr = ip_hdr(skb)->daddr; - ireq->rmt_addr = ip_hdr(skb)->saddr; + ireq->ir_loc_addr = ip_hdr(skb)->daddr; + ireq->ir_rmt_addr = ip_hdr(skb)->saddr; /* * Step 3: Process LISTEN state * * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie * - * In fact we defer setting S.GSR, S.SWL, S.SWH to - * dccp_create_openreq_child. + * Setting S.SWL/S.SWH to is deferred to dccp_create_openreq_child(). */ dreq->dreq_isr = dcb->dccpd_seq; + dreq->dreq_gsr = dreq->dreq_isr; dreq->dreq_iss = dccp_v4_init_sequence(skb); + dreq->dreq_gss = dreq->dreq_iss; dreq->dreq_service = service; - if (dccp_v4_send_response(sk, req, NULL)) + if (dccp_v4_send_response(sk, req)) goto drop_and_free; inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); @@ -960,6 +989,7 @@ static const struct net_protocol dccp_v4_protocol = { .err_handler = dccp_v4_err, .no_policy = 1, .netns_ok = 1, + .icmp_strict_tag_validation = 1, }; static const struct proto_ops inet_dccp_ops = { @@ -994,7 +1024,6 @@ static struct inet_protosw dccp_v4_protosw = { .protocol = IPPROTO_DCCP, .prot = &dccp_v4_prot, .ops = &inet_dccp_ops, - .no_check = 0, .flags = INET_PROTOSW_ICSK, }; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index dca711df9b6..4db3c2a1679 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -29,6 +29,7 @@ #include <net/transp_v6.h> #include <net/ip6_checksum.h> #include <net/xfrm.h> +#include <net/secure_seq.h> #include "dccp.h" #include "ipv6.h" @@ -54,8 +55,8 @@ static void dccp_v6_hash(struct sock *sk) /* add pseudo-header to DCCP checksum stored in skb->csum */ static inline __sum16 dccp_v6_csum_finish(struct sk_buff *skb, - struct in6_addr *saddr, - struct in6_addr *daddr) + const struct in6_addr *saddr, + const struct in6_addr *daddr) { return csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_DCCP, skb->csum); } @@ -66,16 +67,10 @@ static inline void dccp_v6_send_check(struct sock *sk, struct sk_buff *skb) struct dccp_hdr *dh = dccp_hdr(skb); dccp_csum_outgoing(skb); - dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &np->daddr); + dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &sk->sk_v6_daddr); } -static inline __u32 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, - __be16 sport, __be16 dport ) -{ - return secure_tcpv6_sequence_number(saddr, daddr, sport, dport); -} - -static inline __u32 dccp_v6_init_sequence(struct sk_buff *skb) +static inline __u64 dccp_v6_init_sequence(struct sk_buff *skb) { return secure_dccpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32, ipv6_hdr(skb)->saddr.s6_addr32, @@ -87,7 +82,7 @@ static inline __u32 dccp_v6_init_sequence(struct sk_buff *skb) static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { - struct ipv6hdr *hdr = (struct ipv6hdr *)skb->data; + const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); struct dccp_sock *dp; struct ipv6_pinfo *np; @@ -135,51 +130,31 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, np = inet6_sk(sk); + if (type == NDISC_REDIRECT) { + struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); + + if (dst) + dst->ops->redirect(dst, sk, skb); + goto out; + } + if (type == ICMPV6_PKT_TOOBIG) { struct dst_entry *dst = NULL; + if (!ip6_sk_accept_pmtu(sk)) + goto out; + if (sock_owned_by_user(sk)) goto out; if ((1 << sk->sk_state) & (DCCPF_LISTEN | DCCPF_CLOSED)) goto out; - /* icmp should have updated the destination cache entry */ - dst = __sk_dst_check(sk, np->dst_cookie); - if (dst == NULL) { - struct inet_sock *inet = inet_sk(sk); - struct flowi fl; - - /* BUGGG_FUTURE: Again, it is not clear how - to handle rthdr case. Ignore this complexity - for now. - */ - memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_DCCP; - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - ipv6_addr_copy(&fl.fl6_src, &np->saddr); - fl.oif = sk->sk_bound_dev_if; - fl.fl_ip_dport = inet->inet_dport; - fl.fl_ip_sport = inet->inet_sport; - security_sk_classify_flow(sk, &fl); - - err = ip6_dst_lookup(sk, &dst, &fl); - if (err) { - sk->sk_err_soft = -err; - goto out; - } - - err = xfrm_lookup(net, &dst, &fl, sk, 0); - if (err < 0) { - sk->sk_err_soft = -err; - goto out; - } - } else - dst_hold(dst); + dst = inet6_csk_update_pmtu(sk, ntohl(info)); + if (!dst) + goto out; - if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { + if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) dccp_sync_mss(sk, dst_mtu(dst)); - } /* else let the usual retransmit timer handle it */ - dst_release(dst); goto out; } @@ -204,7 +179,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, */ WARN_ON(req->sk != NULL); - if (seq != dccp_rsk(req)->dreq_iss) { + if (!between48(seq, dccp_rsk(req)->dreq_iss, + dccp_rsk(req)->dreq_gss)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } @@ -241,58 +217,49 @@ out: } -static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, - struct request_values *rv_unused) +static int dccp_v6_send_response(struct sock *sk, struct request_sock *req) { - struct inet6_request_sock *ireq6 = inet6_rsk(req); + struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; - struct ipv6_txoptions *opt = NULL; struct in6_addr *final_p, final; - struct flowi fl; + struct flowi6 fl6; int err = -1; struct dst_entry *dst; - memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_DCCP; - ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); - ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); - fl.fl6_flowlabel = 0; - fl.oif = ireq6->iif; - fl.fl_ip_dport = inet_rsk(req)->rmt_port; - fl.fl_ip_sport = inet_rsk(req)->loc_port; - security_req_classify_flow(req, &fl); - - opt = np->opt; - - final_p = fl6_update_dst(&fl, opt, &final); + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_DCCP; + fl6.daddr = ireq->ir_v6_rmt_addr; + fl6.saddr = ireq->ir_v6_loc_addr; + fl6.flowlabel = 0; + fl6.flowi6_oif = ireq->ir_iif; + fl6.fl6_dport = ireq->ir_rmt_port; + fl6.fl6_sport = htons(ireq->ir_num); + security_req_classify_flow(req, flowi6_to_flowi(&fl6)); - err = ip6_dst_lookup(sk, &dst, &fl); - if (err) - goto done; - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); + final_p = fl6_update_dst(&fl6, np->opt, &final); - err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0); - if (err < 0) + dst = ip6_dst_lookup_flow(sk, &fl6, final_p); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + dst = NULL; goto done; + } skb = dccp_make_response(sk, dst, req); if (skb != NULL) { struct dccp_hdr *dh = dccp_hdr(skb); dh->dccph_checksum = dccp_v6_csum_finish(skb, - &ireq6->loc_addr, - &ireq6->rmt_addr); - ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); - err = ip6_xmit(sk, skb, &fl, opt); + &ireq->ir_v6_loc_addr, + &ireq->ir_v6_rmt_addr); + fl6.daddr = ireq->ir_v6_rmt_addr; + err = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass); err = net_xmit_eval(err); } done: - if (opt != NULL && opt != np->opt) - sock_kfree_s(sk, opt, opt->tot_len); dst_release(dst); return err; } @@ -300,15 +267,14 @@ done: static void dccp_v6_reqsk_destructor(struct request_sock *req) { dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg); - if (inet6_rsk(req)->pktopts != NULL) - kfree_skb(inet6_rsk(req)->pktopts); + kfree_skb(inet_rsk(req)->pktopts); } static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) { - struct ipv6hdr *rxip6h; + const struct ipv6hdr *rxip6h; struct sk_buff *skb; - struct flowi fl; + struct flowi6 fl6; struct net *net = dev_net(skb_dst(rxskb)->dev); struct sock *ctl_sk = net->dccp.v6_ctl_sk; struct dst_entry *dst; @@ -327,25 +293,24 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) dccp_hdr(skb)->dccph_checksum = dccp_v6_csum_finish(skb, &rxip6h->saddr, &rxip6h->daddr); - memset(&fl, 0, sizeof(fl)); - ipv6_addr_copy(&fl.fl6_dst, &rxip6h->saddr); - ipv6_addr_copy(&fl.fl6_src, &rxip6h->daddr); + memset(&fl6, 0, sizeof(fl6)); + fl6.daddr = rxip6h->saddr; + fl6.saddr = rxip6h->daddr; - fl.proto = IPPROTO_DCCP; - fl.oif = inet6_iif(rxskb); - fl.fl_ip_dport = dccp_hdr(skb)->dccph_dport; - fl.fl_ip_sport = dccp_hdr(skb)->dccph_sport; - security_skb_classify_flow(rxskb, &fl); + fl6.flowi6_proto = IPPROTO_DCCP; + fl6.flowi6_oif = inet6_iif(rxskb); + fl6.fl6_dport = dccp_hdr(skb)->dccph_dport; + fl6.fl6_sport = dccp_hdr(skb)->dccph_sport; + security_skb_classify_flow(rxskb, flowi6_to_flowi(&fl6)); /* sk = NULL, but it is safe for now. RST socket required. */ - if (!ip6_dst_lookup(ctl_sk, &dst, &fl)) { - if (xfrm_lookup(net, &dst, &fl, NULL, 0) >= 0) { - skb_dst_set(skb, dst); - ip6_xmit(ctl_sk, skb, &fl, NULL); - DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); - DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); - return; - } + dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL); + if (!IS_ERR(dst)) { + skb_dst_set(skb, dst); + ip6_xmit(ctl_sk, skb, &fl6, NULL, 0); + DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); + DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); + return; } kfree_skb(skb); @@ -358,6 +323,7 @@ static struct request_sock_ops dccp6_request_sock_ops = { .send_ack = dccp_reqsk_send_ack, .destructor = dccp_v6_reqsk_destructor, .send_reset = dccp_v6_ctl_send_reset, + .syn_ack_timeout = dccp_syn_ack_timeout, }; static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) @@ -395,7 +361,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { struct request_sock *req; struct dccp_request_sock *dreq; - struct inet6_request_sock *ireq6; + struct inet_request_sock *ireq; struct ipv6_pinfo *np = inet6_sk(sk); const __be32 service = dccp_hdr_request(skb)->dccph_req_service; struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); @@ -434,36 +400,37 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (security_inet_conn_request(sk, skb, req)) goto drop_and_free; - ireq6 = inet6_rsk(req); - ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr); - ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr); + ireq = inet_rsk(req); + ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; + ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; if (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { atomic_inc(&skb->users); - ireq6->pktopts = skb; + ireq->pktopts = skb; } - ireq6->iif = sk->sk_bound_dev_if; + ireq->ir_iif = sk->sk_bound_dev_if; /* So that link locals have meaning */ if (!sk->sk_bound_dev_if && - ipv6_addr_type(&ireq6->rmt_addr) & IPV6_ADDR_LINKLOCAL) - ireq6->iif = inet6_iif(skb); + ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) + ireq->ir_iif = inet6_iif(skb); /* * Step 3: Process LISTEN state * * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie * - * In fact we defer setting S.GSR, S.SWL, S.SWH to - * dccp_create_openreq_child. + * Setting S.SWL/S.SWH to is deferred to dccp_create_openreq_child(). */ dreq->dreq_isr = dcb->dccpd_seq; + dreq->dreq_gsr = dreq->dreq_isr; dreq->dreq_iss = dccp_v6_init_sequence(skb); + dreq->dreq_gss = dreq->dreq_iss; dreq->dreq_service = service; - if (dccp_v6_send_response(sk, req, NULL)) + if (dccp_v6_send_response(sk, req)) goto drop_and_free; inet6_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); @@ -481,13 +448,11 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, struct request_sock *req, struct dst_entry *dst) { - struct inet6_request_sock *ireq6 = inet6_rsk(req); + struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct inet_sock *newinet; - struct dccp_sock *newdp; struct dccp6_sock *newdp6; struct sock *newsk; - struct ipv6_txoptions *opt; if (skb->protocol == htons(ETH_P_IP)) { /* @@ -498,18 +463,17 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, return NULL; newdp6 = (struct dccp6_sock *)newsk; - newdp = dccp_sk(newsk); newinet = inet_sk(newsk); newinet->pinet6 = &newdp6->inet6; newnp = inet6_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr); + ipv6_addr_set_v4mapped(newinet->inet_daddr, &newsk->sk_v6_daddr); ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr); - ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr); + newsk->sk_v6_rcv_saddr = newnp->saddr; inet_csk(newsk)->icsk_af_ops = &dccp_ipv6_mapped; newsk->sk_backlog_rcv = dccp_v4_do_rcv; @@ -533,32 +497,26 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, return newsk; } - opt = np->opt; if (sk_acceptq_is_full(sk)) goto out_overflow; if (dst == NULL) { struct in6_addr *final_p, final; - struct flowi fl; - - memset(&fl, 0, sizeof(fl)); - fl.proto = IPPROTO_DCCP; - ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); - final_p = fl6_update_dst(&fl, opt, &final); - ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); - fl.oif = sk->sk_bound_dev_if; - fl.fl_ip_dport = inet_rsk(req)->rmt_port; - fl.fl_ip_sport = inet_rsk(req)->loc_port; - security_sk_classify_flow(sk, &fl); - - if (ip6_dst_lookup(sk, &dst, &fl)) - goto out; - - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - - if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) + struct flowi6 fl6; + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_DCCP; + fl6.daddr = ireq->ir_v6_rmt_addr; + final_p = fl6_update_dst(&fl6, np->opt, &final); + fl6.saddr = ireq->ir_v6_loc_addr; + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.fl6_dport = ireq->ir_rmt_port; + fl6.fl6_sport = htons(ireq->ir_num); + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + + dst = ip6_dst_lookup_flow(sk, &fl6, final_p); + if (IS_ERR(dst)) goto out; } @@ -578,31 +536,30 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, newdp6 = (struct dccp6_sock *)newsk; newinet = inet_sk(newsk); newinet->pinet6 = &newdp6->inet6; - newdp = dccp_sk(newsk); newnp = inet6_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - ipv6_addr_copy(&newnp->daddr, &ireq6->rmt_addr); - ipv6_addr_copy(&newnp->saddr, &ireq6->loc_addr); - ipv6_addr_copy(&newnp->rcv_saddr, &ireq6->loc_addr); - newsk->sk_bound_dev_if = ireq6->iif; + newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; + newnp->saddr = ireq->ir_v6_loc_addr; + newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; + newsk->sk_bound_dev_if = ireq->ir_iif; /* Now IPv6 options... First: no IPv4 options. */ - newinet->opt = NULL; + newinet->inet_opt = NULL; /* Clone RX bits */ newnp->rxopt.all = np->rxopt.all; /* Clone pktoptions received with SYN */ newnp->pktoptions = NULL; - if (ireq6->pktopts != NULL) { - newnp->pktoptions = skb_clone(ireq6->pktopts, GFP_ATOMIC); - kfree_skb(ireq6->pktopts); - ireq6->pktopts = NULL; + if (ireq->pktopts != NULL) { + newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC); + consume_skb(ireq->pktopts); + ireq->pktopts = NULL; if (newnp->pktoptions) skb_set_owner_r(newnp->pktoptions, newsk); } @@ -616,11 +573,8 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, * Yes, keeping reference count would be much more clever, but we make * one more one thing there: reattach optmem to newsk. */ - if (opt != NULL) { - newnp->opt = ipv6_dup_options(newsk, opt); - if (opt != np->opt) - sock_kfree_s(sk, opt, opt->tot_len); - } + if (np->opt != NULL) + newnp->opt = ipv6_dup_options(newsk, np->opt); inet_csk(newsk)->icsk_ext_hdr_len = 0; if (newnp->opt != NULL) @@ -633,7 +587,8 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, newinet->inet_rcv_saddr = LOOPBACK4_IPV6; if (__inet_inherit_port(sk, newsk) < 0) { - sock_put(newsk); + inet_csk_prepare_forced_close(newsk); + dccp_done(newsk); goto out; } __inet6_hash(newsk, NULL); @@ -646,8 +601,6 @@ out_nonewsk: dst_release(dst); out: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); - if (opt != NULL && opt != np->opt) - sock_kfree_s(sk, opt, opt->tot_len); return NULL; } @@ -878,7 +831,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct ipv6_pinfo *np = inet6_sk(sk); struct dccp_sock *dp = dccp_sk(sk); struct in6_addr *saddr = NULL, *final_p, final; - struct flowi fl; + struct flowi6 fl6; struct dst_entry *dst; int addr_type; int err; @@ -891,17 +844,16 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (usin->sin6_family != AF_INET6) return -EAFNOSUPPORT; - memset(&fl, 0, sizeof(fl)); + memset(&fl6, 0, sizeof(fl6)); if (np->sndflow) { - fl.fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; - IP6_ECN_flow_init(fl.fl6_flowlabel); - if (fl.fl6_flowlabel & IPV6_FLOWLABEL_MASK) { + fl6.flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; + IP6_ECN_flow_init(fl6.flowlabel); + if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) { struct ip6_flowlabel *flowlabel; - flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); + flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; - ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst); fl6_sock_release(flowlabel); } } @@ -934,8 +886,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, return -EINVAL; } - ipv6_addr_copy(&np->daddr, &usin->sin6_addr); - np->flow_label = fl.fl6_flowlabel; + sk->sk_v6_daddr = usin->sin6_addr; + np->flow_label = fl6.flowlabel; /* * DCCP over IPv4 @@ -964,46 +916,37 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, goto failure; } ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); - ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &np->rcv_saddr); + ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &sk->sk_v6_rcv_saddr); return err; } - if (!ipv6_addr_any(&np->rcv_saddr)) - saddr = &np->rcv_saddr; + if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) + saddr = &sk->sk_v6_rcv_saddr; - fl.proto = IPPROTO_DCCP; - ipv6_addr_copy(&fl.fl6_dst, &np->daddr); - ipv6_addr_copy(&fl.fl6_src, saddr ? saddr : &np->saddr); - fl.oif = sk->sk_bound_dev_if; - fl.fl_ip_dport = usin->sin6_port; - fl.fl_ip_sport = inet->inet_sport; - security_sk_classify_flow(sk, &fl); + fl6.flowi6_proto = IPPROTO_DCCP; + fl6.daddr = sk->sk_v6_daddr; + fl6.saddr = saddr ? *saddr : np->saddr; + fl6.flowi6_oif = sk->sk_bound_dev_if; + fl6.fl6_dport = usin->sin6_port; + fl6.fl6_sport = inet->inet_sport; + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - final_p = fl6_update_dst(&fl, np->opt, &final); + final_p = fl6_update_dst(&fl6, np->opt, &final); - err = ip6_dst_lookup(sk, &dst, &fl); - if (err) + dst = ip6_dst_lookup_flow(sk, &fl6, final_p); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); goto failure; - - if (final_p) - ipv6_addr_copy(&fl.fl6_dst, final_p); - - err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); - if (err < 0) { - if (err == -EREMOTE) - err = ip6_dst_blackhole(sk, &dst, &fl); - if (err < 0) - goto failure; } if (saddr == NULL) { - saddr = &fl.fl6_src; - ipv6_addr_copy(&np->rcv_saddr, saddr); + saddr = &fl6.saddr; + sk->sk_v6_rcv_saddr = *saddr; } /* set the source address */ - ipv6_addr_copy(&np->saddr, saddr); + np->saddr = *saddr; inet->inet_rcv_saddr = LOOPBACK4_IPV6; __ip6_dst_store(sk, dst, NULL, NULL); @@ -1021,7 +964,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, goto late_failure; dp->dccps_iss = secure_dccpv6_sequence_number(np->saddr.s6_addr32, - np->daddr.s6_addr32, + sk->sk_v6_daddr.s6_addr32, inet->inet_sport, inet->inet_dport); err = dccp_connect(sk); diff --git a/net/dccp/ipv6.h b/net/dccp/ipv6.h index 6eef81fdbe5..af259e15e7f 100644 --- a/net/dccp/ipv6.h +++ b/net/dccp/ipv6.h @@ -25,12 +25,10 @@ struct dccp6_sock { struct dccp6_request_sock { struct dccp_request_sock dccp; - struct inet6_request_sock inet6; }; struct dccp6_timewait_sock { struct inet_timewait_sock inet; - struct inet6_timewait_sock tw6; }; #endif /* _DCCP_IPV6_H */ diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index d7041a0963a..c69eb9c4fbb 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -53,15 +53,12 @@ void dccp_time_wait(struct sock *sk, int state, int timeo) if (tw != NULL) { const struct inet_connection_sock *icsk = inet_csk(sk); const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) if (tw->tw_family == PF_INET6) { const struct ipv6_pinfo *np = inet6_sk(sk); - struct inet6_timewait_sock *tw6; - tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot); - tw6 = inet6_twsk((struct sock *)tw); - ipv6_addr_copy(&tw6->tw_v6_daddr, &np->daddr); - ipv6_addr_copy(&tw6->tw_v6_rcv_saddr, &np->rcv_saddr); + tw->tw_v6_daddr = sk->sk_v6_daddr; + tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr; tw->tw_ipv6only = np->ipv6only; } #endif @@ -100,7 +97,7 @@ struct sock *dccp_create_openreq_child(struct sock *sk, * (* Generate a new socket and switch to that socket *) * Set S := new socket for this port pair */ - struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC); + struct sock *newsk = inet_csk_clone_lock(sk, req, GFP_ATOMIC); if (newsk != NULL) { struct dccp_request_sock *dreq = dccp_rsk(req); @@ -127,9 +124,11 @@ struct sock *dccp_create_openreq_child(struct sock *sk, * activation below, as these windows all depend on the local * and remote Sequence Window feature values (7.5.2). */ - newdp->dccps_gss = newdp->dccps_iss = dreq->dreq_iss; + newdp->dccps_iss = dreq->dreq_iss; + newdp->dccps_gss = dreq->dreq_gss; newdp->dccps_gar = newdp->dccps_iss; - newdp->dccps_gsr = newdp->dccps_isr = dreq->dreq_isr; + newdp->dccps_isr = dreq->dreq_isr; + newdp->dccps_gsr = dreq->dreq_gsr; /* * Activate features: initialise CCIDs, sequence windows etc. @@ -164,16 +163,15 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, /* Check for retransmitted REQUEST */ if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) { - if (after48(DCCP_SKB_CB(skb)->dccpd_seq, dreq->dreq_isr)) { + if (after48(DCCP_SKB_CB(skb)->dccpd_seq, dreq->dreq_gsr)) { dccp_pr_debug("Retransmitted REQUEST\n"); - dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq; + dreq->dreq_gsr = DCCP_SKB_CB(skb)->dccpd_seq; /* * Send another RESPONSE packet * To protect against Request floods, increment retrans * counter (backoff, monitored by dccp_response_timer). */ - req->retrans++; - req->rsk_ops->rtx_syn_ack(sk, req, NULL); + inet_rtx_syn_ack(sk, req); } /* Network Duplicate, discard packet */ return NULL; @@ -186,12 +184,14 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, goto drop; /* Invalid ACK */ - if (DCCP_SKB_CB(skb)->dccpd_ack_seq != dreq->dreq_iss) { + if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, + dreq->dreq_iss, dreq->dreq_gss)) { dccp_pr_debug("Invalid ACK number: ack_seq=%llu, " - "dreq_iss=%llu\n", + "dreq_iss=%llu, dreq_gss=%llu\n", (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq, - (unsigned long long) dreq->dreq_iss); + (unsigned long long) dreq->dreq_iss, + (unsigned long long) dreq->dreq_gss); goto drop; } @@ -237,7 +237,7 @@ int dccp_child_process(struct sock *parent, struct sock *child, /* Wakeup parent, send SIGIO */ if (state == DCCP_RESPOND && child->sk_state != state) - parent->sk_data_ready(parent, 0); + parent->sk_data_ready(parent); } else { /* Alas, it is possible again, because we do lookup * in main socket hash table and lock on listening @@ -266,10 +266,10 @@ int dccp_reqsk_init(struct request_sock *req, { struct dccp_request_sock *dreq = dccp_rsk(req); - inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport; - inet_rsk(req)->loc_port = dccp_hdr(skb)->dccph_dport; - inet_rsk(req)->acked = 0; - dreq->dreq_timestamp_echo = 0; + inet_rsk(req)->ir_rmt_port = dccp_hdr(skb)->dccph_sport; + inet_rsk(req)->ir_num = ntohs(dccp_hdr(skb)->dccph_dport); + inet_rsk(req)->acked = 0; + dreq->dreq_timestamp_echo = 0; /* inherit feature negotiation options from listening socket */ return dccp_feat_clone_list(&dp->dccps_featneg, &dreq->dreq_featneg); diff --git a/net/dccp/options.c b/net/dccp/options.c index f06ffcfc8d7..9bce31886bd 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -123,6 +123,8 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, case DCCPO_CHANGE_L ... DCCPO_CONFIRM_R: if (pkt_type == DCCP_PKT_DATA) /* RFC 4340, 6 */ break; + if (len == 0) + goto out_invalid_option; rc = dccp_feat_parse_options(sk, dreq, mandatory, opt, *value, value + 1, len - 1); if (rc) @@ -341,38 +343,6 @@ static inline int dccp_elapsed_time_len(const u32 elapsed_time) return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4; } -/* FIXME: This function is currently not used anywhere */ -int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed_time) -{ - const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); - const int len = 2 + elapsed_time_len; - unsigned char *to; - - if (elapsed_time_len == 0) - return 0; - - if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) - return -1; - - DCCP_SKB_CB(skb)->dccpd_opt_len += len; - - to = skb_push(skb, len); - *to++ = DCCPO_ELAPSED_TIME; - *to++ = len; - - if (elapsed_time_len == 2) { - const __be16 var16 = htons((u16)elapsed_time); - memcpy(to, &var16, 2); - } else { - const __be32 var32 = htonl(elapsed_time); - memcpy(to, &var32, 4); - } - - return 0; -} - -EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time); - static int dccp_insert_option_timestamp(struct sk_buff *skb) { __be32 now = htonl(dccp_timestamp()); @@ -525,6 +495,7 @@ int dccp_insert_option_mandatory(struct sk_buff *skb) * @val: NN value or SP array (preferred element first) to copy * @len: true length of @val in bytes (excluding first element repetition) * @repeat_first: whether to copy the first element of @val twice + * * The last argument is used to construct Confirm options, where the preferred * value and the preference list appear separately (RFC 4340, 6.3.1). Preference * lists are kept such that the preferred entry is always first, so we only need @@ -542,7 +513,7 @@ int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat, } if (unlikely(val == NULL || len == 0)) - len = repeat_first = 0; + len = repeat_first = false; tot_len = 3 + repeat_first + len; if (DCCP_SKB_CB(skb)->dccpd_opt_len + tot_len > DCCP_MAX_OPT_LEN) { diff --git a/net/dccp/output.c b/net/dccp/output.c index 784d3021054..0248e8a3460 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -27,11 +27,13 @@ static inline void dccp_event_ack_sent(struct sock *sk) inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); } -static void dccp_skb_entail(struct sock *sk, struct sk_buff *skb) +/* enqueue @skb on sk_send_head for retransmission, return clone to send now */ +static struct sk_buff *dccp_skb_entail(struct sock *sk, struct sk_buff *skb) { skb_set_owner_w(skb, sk); WARN_ON(sk->sk_send_head); sk->sk_send_head = skb; + return skb_clone(sk->sk_send_head, gfp_any()); } /* @@ -43,7 +45,7 @@ static void dccp_skb_entail(struct sock *sk, struct sk_buff *skb) static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) { if (likely(skb != NULL)) { - const struct inet_sock *inet = inet_sk(sk); + struct inet_sock *inet = inet_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); struct dccp_sock *dp = dccp_sk(sk); struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); @@ -136,14 +138,14 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) DCCP_INC_STATS(DCCP_MIB_OUTSEGS); - err = icsk->icsk_af_ops->queue_xmit(skb); + err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); return net_xmit_eval(err); } return -ENOBUFS; } /** - * dccp_determine_ccmps - Find out about CCID-specfic packet-size limits + * dccp_determine_ccmps - Find out about CCID-specific packet-size limits * We only consider the HC-sender CCID for setting the CCMPS (RFC 4340, 14.), * since the RX CCID is restricted to feedback packets (Acks), which are small * in comparison with the data traffic. A value of 0 means "no current CCMPS". @@ -212,6 +214,7 @@ void dccp_write_space(struct sock *sk) * dccp_wait_for_ccid - Await CCID send permission * @sk: socket to wait for * @delay: timeout in jiffies + * * This is used by CCIDs which need to delay the send time in process context. */ static int dccp_wait_for_ccid(struct sock *sk, unsigned long delay) @@ -406,10 +409,10 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, skb_dst_set(skb, dst_clone(dst)); dreq = dccp_rsk(req); - if (inet_rsk(req)->acked) /* increase ISS upon retransmission */ - dccp_inc_seqno(&dreq->dreq_iss); + if (inet_rsk(req)->acked) /* increase GSS upon retransmission */ + dccp_inc_seqno(&dreq->dreq_gss); DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; - DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss; + DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_gss; /* Resolve feature dependencies resulting from choice of CCID */ if (dccp_feat_server_ccid_dependencies(dreq)) @@ -421,14 +424,14 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, /* Build and checksum header */ dh = dccp_zeroed_hdr(skb, dccp_header_size); - dh->dccph_sport = inet_rsk(req)->loc_port; - dh->dccph_dport = inet_rsk(req)->rmt_port; + dh->dccph_sport = htons(inet_rsk(req)->ir_num); + dh->dccph_dport = inet_rsk(req)->ir_rmt_port; dh->dccph_doff = (dccp_header_size + DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; dh->dccph_type = DCCP_PKT_RESPONSE; dh->dccph_x = 1; - dccp_hdr_set_seq(dh, dreq->dreq_iss); - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dreq->dreq_isr); + dccp_hdr_set_seq(dh, dreq->dreq_gss); + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dreq->dreq_gsr); dccp_hdr_response(skb)->dccph_resp_service = dreq->dreq_service; dccp_csum_outgoing(skb); @@ -552,8 +555,7 @@ int dccp_connect(struct sock *sk) DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; - dccp_skb_entail(sk, skb); - dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL)); + dccp_transmit_skb(sk, dccp_skb_entail(sk, skb)); DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS); /* Timer for repeating the REQUEST until an answer. */ @@ -678,8 +680,7 @@ void dccp_send_close(struct sock *sk, const int active) DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE; if (active) { - dccp_skb_entail(sk, skb); - dccp_transmit_skb(sk, skb_clone(skb, prio)); + skb = dccp_skb_entail(sk, skb); /* * Retransmission timer for active-close: RFC 4340, 8.3 requires * to retransmit the Close/CloseReq until the CLOSING/CLOSEREQ @@ -692,6 +693,6 @@ void dccp_send_close(struct sock *sk, const int active) */ inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, DCCP_TIMEOUT_INIT, DCCP_RTO_MAX); - } else - dccp_transmit_skb(sk, skb); + } + dccp_transmit_skb(sk, skb); } diff --git a/net/dccp/probe.c b/net/dccp/probe.c index 33d0e6297c2..595ddf0459d 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -160,18 +160,23 @@ static __init int dccpprobe_init(void) spin_lock_init(&dccpw.lock); if (kfifo_alloc(&dccpw.fifo, bufsize, GFP_KERNEL)) return ret; - if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops)) + if (!proc_create(procname, S_IRUSR, init_net.proc_net, &dccpprobe_fops)) goto err0; - try_then_request_module((ret = register_jprobe(&dccp_send_probe)) == 0, - "dccp"); + ret = register_jprobe(&dccp_send_probe); + if (ret) { + ret = request_module("dccp"); + if (!ret) + ret = register_jprobe(&dccp_send_probe); + } + if (ret) goto err1; pr_info("DCCP watch registered (port=%d)\n", port); return 0; err1: - proc_net_remove(&init_net, procname); + remove_proc_entry(procname, init_net.proc_net); err0: kfifo_free(&dccpw.fifo); return ret; @@ -181,7 +186,7 @@ module_init(dccpprobe_init); static __exit void dccpprobe_exit(void) { kfifo_free(&dccpw.fifo); - proc_net_remove(&init_net, procname); + remove_proc_entry(procname, init_net.proc_net); unregister_jprobe(&dccp_send_probe); } diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 152975d942d..de2c1e71930 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -184,7 +184,6 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) dp->dccps_rate_last = jiffies; dp->dccps_role = DCCP_ROLE_UNDEFINED; dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; - dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1; dp->dccps_tx_qlen = sysctl_dccp_tx_qlen; dccp_init_xmit_timers(sk); @@ -337,7 +336,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock, mask |= POLLIN | POLLRDNORM; if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { - if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { + if (sk_stream_is_writeable(sk)) { mask |= POLLOUT | POLLWRNORM; } else { /* send SIGIO later */ set_bit(SOCK_ASYNC_NOSPACE, @@ -348,7 +347,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock, * wspace test but before the flags are set, * IO signal will be lost. */ - if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) + if (sk_stream_is_writeable(sk)) mask |= POLLOUT | POLLWRNORM; } } @@ -849,7 +848,7 @@ int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, default: dccp_pr_debug("packet_type=%s\n", dccp_packet_name(dh->dccph_type)); - sk_eat_skb(sk, skb, 0); + sk_eat_skb(sk, skb, false); } verify_sock_status: if (sock_flag(sk, SOCK_DONE)) { @@ -906,7 +905,7 @@ verify_sock_status: len = skb->len; found_fin_ok: if (!(flags & MSG_PEEK)) - sk_eat_skb(sk, skb, 0); + sk_eat_skb(sk, skb, false); break; } while (1); out: @@ -1085,14 +1084,15 @@ EXPORT_SYMBOL_GPL(dccp_shutdown); static inline int dccp_mib_init(void) { - return snmp_mib_init((void __percpu **)dccp_statistics, - sizeof(struct dccp_mib), - __alignof__(struct dccp_mib)); + dccp_statistics = alloc_percpu(struct dccp_mib); + if (!dccp_statistics) + return -ENOMEM; + return 0; } static inline void dccp_mib_exit(void) { - snmp_mib_free((void __percpu **)dccp_statistics); + free_percpu(dccp_statistics); } static int thash_entries; @@ -1100,7 +1100,7 @@ module_param(thash_entries, int, 0444); MODULE_PARM_DESC(thash_entries, "Number of ehash buckets"); #ifdef CONFIG_IP_DCCP_DEBUG -int dccp_debug; +bool dccp_debug; module_param(dccp_debug, bool, 0644); MODULE_PARM_DESC(dccp_debug, "Enable debug messages"); @@ -1159,10 +1159,8 @@ static int __init dccp_init(void) goto out_free_bind_bucket_cachep; } - for (i = 0; i <= dccp_hashinfo.ehash_mask; i++) { + for (i = 0; i <= dccp_hashinfo.ehash_mask; i++) INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i); - INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].twchain, i); - } if (inet_ehash_locks_alloc(&dccp_hashinfo)) goto out_free_dccp_ehash; diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c index 42348824ee3..53731e45403 100644 --- a/net/dccp/sysctl.c +++ b/net/dccp/sysctl.c @@ -20,6 +20,7 @@ /* Boundary values */ static int zero = 0, + one = 1, u8_max = 0xFF; static unsigned long seqw_min = DCCPF_SEQ_WMIN, seqw_max = 0xFFFFFFFF; /* maximum on 32 bit */ @@ -58,7 +59,7 @@ static struct ctl_table dccp_default_table[] = { .maxlen = sizeof(sysctl_dccp_request_retries), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = &one, .extra2 = &u8_max, }, { @@ -98,18 +99,11 @@ static struct ctl_table dccp_default_table[] = { { } }; -static struct ctl_path dccp_path[] = { - { .procname = "net", }, - { .procname = "dccp", }, - { .procname = "default", }, - { } -}; - static struct ctl_table_header *dccp_table_header; int __init dccp_sysctl_init(void) { - dccp_table_header = register_sysctl_paths(dccp_path, + dccp_table_header = register_net_sysctl(&init_net, "net/dccp/default", dccp_default_table); return dccp_table_header != NULL ? 0 : -ENOMEM; @@ -118,7 +112,7 @@ int __init dccp_sysctl_init(void) void dccp_sysctl_exit(void) { if (dccp_table_header != NULL) { - unregister_sysctl_table(dccp_table_header); + unregister_net_sysctl_table(dccp_table_header); dccp_table_header = NULL; } } diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 7587870b704..1cd46a345cb 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -12,6 +12,7 @@ #include <linux/dccp.h> #include <linux/skbuff.h> +#include <linux/export.h> #include "dccp.h" @@ -279,7 +280,7 @@ static ktime_t dccp_timestamp_seed; */ u32 dccp_timestamp(void) { - s64 delta = ktime_us_delta(ktime_get_real(), dccp_timestamp_seed); + u64 delta = (u64)ktime_us_delta(ktime_get_real(), dccp_timestamp_seed); do_div(delta, 10); return delta; |
