aboutsummaryrefslogtreecommitdiff
path: root/net/dccp
diff options
context:
space:
mode:
Diffstat (limited to 'net/dccp')
-rw-r--r--net/dccp/Kconfig4
-rw-r--r--net/dccp/ackvec.c1
-rw-r--r--net/dccp/ackvec.h28
-rw-r--r--net/dccp/ccid.c5
-rw-r--r--net/dccp/ccid.h22
-rw-r--r--net/dccp/ccids/Kconfig5
-rw-r--r--net/dccp/ccids/ccid2.c186
-rw-r--r--net/dccp/ccids/ccid2.h31
-rw-r--r--net/dccp/ccids/ccid3.c26
-rw-r--r--net/dccp/ccids/lib/loss_interval.c1
-rw-r--r--net/dccp/ccids/lib/loss_interval.h8
-rw-r--r--net/dccp/ccids/lib/packet_history.c3
-rw-r--r--net/dccp/ccids/lib/packet_history.h25
-rw-r--r--net/dccp/ccids/lib/tfrc.c3
-rw-r--r--net/dccp/ccids/lib/tfrc.h24
-rw-r--r--net/dccp/ccids/lib/tfrc_equation.c2
-rw-r--r--net/dccp/dccp.h193
-rw-r--r--net/dccp/diag.c20
-rw-r--r--net/dccp/feat.c228
-rw-r--r--net/dccp/feat.h25
-rw-r--r--net/dccp/input.c74
-rw-r--r--net/dccp/ipv4.c163
-rw-r--r--net/dccp/ipv6.c321
-rw-r--r--net/dccp/ipv6.h2
-rw-r--r--net/dccp/minisocks.c42
-rw-r--r--net/dccp/options.c37
-rw-r--r--net/dccp/output.c35
-rw-r--r--net/dccp/probe.c15
-rw-r--r--net/dccp/proto.c24
-rw-r--r--net/dccp/sysctl.c14
-rw-r--r--net/dccp/timer.c3
31 files changed, 940 insertions, 630 deletions
diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig
index b75968a0401..8c0ef71bed2 100644
--- a/net/dccp/Kconfig
+++ b/net/dccp/Kconfig
@@ -1,6 +1,6 @@
menuconfig IP_DCCP
- tristate "The DCCP Protocol (EXPERIMENTAL)"
- depends on INET && EXPERIMENTAL
+ tristate "The DCCP Protocol"
+ depends on INET
---help---
Datagram Congestion Control Protocol (RFC 4340)
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 25b7a8d1ad5..ba07824af4c 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -12,6 +12,7 @@
#include "dccp.h"
#include <linux/kernel.h>
#include <linux/slab.h>
+#include <linux/export.h>
static struct kmem_cache *dccp_ackvec_slab;
static struct kmem_cache *dccp_ackvec_record_slab;
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index e2ab0627a5f..3284bfa988c 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -50,7 +50,8 @@ static inline u8 dccp_ackvec_state(const u8 *cell)
return *cell & ~DCCPAV_MAX_RUNLEN;
}
-/** struct dccp_ackvec - Ack Vector main data structure
+/**
+ * struct dccp_ackvec - Ack Vector main data structure
*
* This implements a fixed-size circular buffer within an array and is largely
* based on Appendix A of RFC 4340.
@@ -76,7 +77,8 @@ struct dccp_ackvec {
struct list_head av_records;
};
-/** struct dccp_ackvec_record - Records information about sent Ack Vectors
+/**
+ * struct dccp_ackvec_record - Records information about sent Ack Vectors
*
* These list entries define the additional information which the HC-Receiver
* keeps about recently-sent Ack Vectors; again refer to RFC 4340, Appendix A.
@@ -99,16 +101,16 @@ struct dccp_ackvec_record {
u8 avr_ack_nonce:1;
};
-extern int dccp_ackvec_init(void);
-extern void dccp_ackvec_exit(void);
+int dccp_ackvec_init(void);
+void dccp_ackvec_exit(void);
-extern struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority);
-extern void dccp_ackvec_free(struct dccp_ackvec *av);
+struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority);
+void dccp_ackvec_free(struct dccp_ackvec *av);
-extern void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb);
-extern int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seq, u8 sum);
-extern void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno);
-extern u16 dccp_ackvec_buflen(const struct dccp_ackvec *av);
+void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb);
+int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seq, u8 sum);
+void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno);
+u16 dccp_ackvec_buflen(const struct dccp_ackvec *av);
static inline bool dccp_ackvec_is_empty(const struct dccp_ackvec *av)
{
@@ -121,6 +123,7 @@ static inline bool dccp_ackvec_is_empty(const struct dccp_ackvec *av)
* @len: length of @vec
* @nonce: whether @vec had an ECN nonce of 0 or 1
* @node: FIFO - arranged in descending order of ack_ackno
+ *
* This structure is used by CCIDs to access Ack Vectors in a received skb.
*/
struct dccp_ackvec_parsed {
@@ -130,7 +133,6 @@ struct dccp_ackvec_parsed {
struct list_head node;
};
-extern int dccp_ackvec_parsed_add(struct list_head *head,
- u8 *vec, u8 len, u8 nonce);
-extern void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks);
+int dccp_ackvec_parsed_add(struct list_head *head, u8 *vec, u8 len, u8 nonce);
+void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks);
#endif /* _ACKVEC_H */
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
index 36479ca61e0..597557254dd 100644
--- a/net/dccp/ccid.c
+++ b/net/dccp/ccid.c
@@ -46,6 +46,7 @@ bool ccid_support_check(u8 const *ccid_array, u8 array_len)
* ccid_get_builtin_ccids - Populate a list of built-in CCIDs
* @ccid_array: pointer to copy into
* @array_len: value to return length into
+ *
* This function allocates memory - caller must see that it is freed after use.
*/
int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len)
@@ -118,7 +119,7 @@ static int ccid_activate(struct ccid_operations *ccid_ops)
if (ccid_ops->ccid_hc_tx_slab == NULL)
goto out_free_rx_slab;
- pr_info("CCID: Activated CCID %d (%s)\n",
+ pr_info("DCCP: Activated CCID %d (%s)\n",
ccid_ops->ccid_id, ccid_ops->ccid_name);
err = 0;
out:
@@ -136,7 +137,7 @@ static void ccid_deactivate(struct ccid_operations *ccid_ops)
ccid_kmem_cache_destroy(ccid_ops->ccid_hc_rx_slab);
ccid_ops->ccid_hc_rx_slab = NULL;
- pr_info("CCID: Deactivated CCID %d (%s)\n",
+ pr_info("DCCP: Deactivated CCID %d (%s)\n",
ccid_ops->ccid_id, ccid_ops->ccid_name);
}
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index 75c3582a767..6eb837a47b5 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -93,8 +93,8 @@ extern struct ccid_operations ccid2_ops;
extern struct ccid_operations ccid3_ops;
#endif
-extern int ccid_initialize_builtins(void);
-extern void ccid_cleanup_builtins(void);
+int ccid_initialize_builtins(void);
+void ccid_cleanup_builtins(void);
struct ccid {
struct ccid_operations *ccid_ops;
@@ -106,12 +106,12 @@ static inline void *ccid_priv(const struct ccid *ccid)
return (void *)ccid->ccid_priv;
}
-extern bool ccid_support_check(u8 const *ccid_array, u8 array_len);
-extern int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len);
-extern int ccid_getsockopt_builtin_ccids(struct sock *sk, int len,
- char __user *, int __user *);
+bool ccid_support_check(u8 const *ccid_array, u8 array_len);
+int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len);
+int ccid_getsockopt_builtin_ccids(struct sock *sk, int len,
+ char __user *, int __user *);
-extern struct ccid *ccid_new(const u8 id, struct sock *sk, bool rx);
+struct ccid *ccid_new(const u8 id, struct sock *sk, bool rx);
static inline int ccid_get_current_rx_ccid(struct dccp_sock *dp)
{
@@ -131,8 +131,8 @@ static inline int ccid_get_current_tx_ccid(struct dccp_sock *dp)
return ccid->ccid_ops->ccid_id;
}
-extern void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk);
-extern void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk);
+void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk);
+void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk);
/*
* Congestion control of queued data packets via CCID decision.
@@ -246,7 +246,7 @@ static inline int ccid_hc_rx_getsockopt(struct ccid *ccid, struct sock *sk,
u32 __user *optval, int __user *optlen)
{
int rc = -ENOPROTOOPT;
- if (ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL)
+ if (ccid != NULL && ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL)
rc = ccid->ccid_ops->ccid_hc_rx_getsockopt(sk, optname, len,
optval, optlen);
return rc;
@@ -257,7 +257,7 @@ static inline int ccid_hc_tx_getsockopt(struct ccid *ccid, struct sock *sk,
u32 __user *optval, int __user *optlen)
{
int rc = -ENOPROTOOPT;
- if (ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL)
+ if (ccid != NULL && ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL)
rc = ccid->ccid_ops->ccid_hc_tx_getsockopt(sk, optname, len,
optval, optlen);
return rc;
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index 0581143cb80..8ba3fc9d6d1 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -1,5 +1,4 @@
-menu "DCCP CCIDs Configuration (EXPERIMENTAL)"
- depends on EXPERIMENTAL
+menu "DCCP CCIDs Configuration"
config IP_DCCP_CCID2_DEBUG
bool "CCID-2 debugging messages"
@@ -12,7 +11,7 @@ config IP_DCCP_CCID2_DEBUG
If in doubt, say N.
config IP_DCCP_CCID3
- bool "CCID-3 (TCP-Friendly) (EXPERIMENTAL)"
+ bool "CCID-3 (TCP-Friendly)"
def_bool y if (IP_DCCP = y || IP_DCCP = m)
---help---
CCID-3 denotes TCP-Friendly Rate Control (TFRC), an equation-based
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index e96d5e81003..f053198e730 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -29,7 +29,7 @@
#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
-static int ccid2_debug;
+static bool ccid2_debug;
#define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a)
#else
#define ccid2_pr_debug(format, a...)
@@ -85,7 +85,6 @@ static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
{
- struct dccp_sock *dp = dccp_sk(sk);
u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->tx_cwnd, 2);
/*
@@ -98,14 +97,33 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val, max_ratio);
val = max_ratio;
}
- if (val > DCCPF_ACK_RATIO_MAX)
- val = DCCPF_ACK_RATIO_MAX;
+ dccp_feat_signal_nn_change(sk, DCCPF_ACK_RATIO,
+ min_t(u32, val, DCCPF_ACK_RATIO_MAX));
+}
- if (val == dp->dccps_l_ack_ratio)
- return;
+static void ccid2_check_l_ack_ratio(struct sock *sk)
+{
+ struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
+
+ /*
+ * After a loss, idle period, application limited period, or RTO we
+ * need to check that the ack ratio is still less than the congestion
+ * window. Otherwise, we will send an entire congestion window of
+ * packets and got no response because we haven't sent ack ratio
+ * packets yet.
+ * If the ack ratio does need to be reduced, we reduce it to half of
+ * the congestion window (or 1 if that's zero) instead of to the
+ * congestion window. This prevents problems if one ack is lost.
+ */
+ if (dccp_feat_nn_get(sk, DCCPF_ACK_RATIO) > hc->tx_cwnd)
+ ccid2_change_l_ack_ratio(sk, hc->tx_cwnd/2 ? : 1U);
+}
- ccid2_pr_debug("changing local ack ratio to %u\n", val);
- dp->dccps_l_ack_ratio = val;
+static void ccid2_change_l_seq_window(struct sock *sk, u64 val)
+{
+ dccp_feat_signal_nn_change(sk, DCCPF_SEQUENCE_WINDOW,
+ clamp_val(val, DCCPF_SEQ_WMIN,
+ DCCPF_SEQ_WMAX));
}
static void ccid2_hc_tx_rto_expire(unsigned long data)
@@ -153,17 +171,97 @@ out:
sock_put(sk);
}
+/*
+ * Congestion window validation (RFC 2861).
+ */
+static bool ccid2_do_cwv = true;
+module_param(ccid2_do_cwv, bool, 0644);
+MODULE_PARM_DESC(ccid2_do_cwv, "Perform RFC2861 Congestion Window Validation");
+
+/**
+ * ccid2_update_used_window - Track how much of cwnd is actually used
+ * This is done in addition to CWV. The sender needs to have an idea of how many
+ * packets may be in flight, to set the local Sequence Window value accordingly
+ * (RFC 4340, 7.5.2). The CWV mechanism is exploited to keep track of the
+ * maximum-used window. We use an EWMA low-pass filter to filter out noise.
+ */
+static void ccid2_update_used_window(struct ccid2_hc_tx_sock *hc, u32 new_wnd)
+{
+ hc->tx_expected_wnd = (3 * hc->tx_expected_wnd + new_wnd) / 4;
+}
+
+/* This borrows the code of tcp_cwnd_application_limited() */
+static void ccid2_cwnd_application_limited(struct sock *sk, const u32 now)
+{
+ struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
+ /* don't reduce cwnd below the initial window (IW) */
+ u32 init_win = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache),
+ win_used = max(hc->tx_cwnd_used, init_win);
+
+ if (win_used < hc->tx_cwnd) {
+ hc->tx_ssthresh = max(hc->tx_ssthresh,
+ (hc->tx_cwnd >> 1) + (hc->tx_cwnd >> 2));
+ hc->tx_cwnd = (hc->tx_cwnd + win_used) >> 1;
+ }
+ hc->tx_cwnd_used = 0;
+ hc->tx_cwnd_stamp = now;
+
+ ccid2_check_l_ack_ratio(sk);
+}
+
+/* This borrows the code of tcp_cwnd_restart() */
+static void ccid2_cwnd_restart(struct sock *sk, const u32 now)
+{
+ struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
+ u32 cwnd = hc->tx_cwnd, restart_cwnd,
+ iwnd = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache);
+
+ hc->tx_ssthresh = max(hc->tx_ssthresh, (cwnd >> 1) + (cwnd >> 2));
+
+ /* don't reduce cwnd below the initial window (IW) */
+ restart_cwnd = min(cwnd, iwnd);
+ cwnd >>= (now - hc->tx_lsndtime) / hc->tx_rto;
+ hc->tx_cwnd = max(cwnd, restart_cwnd);
+
+ hc->tx_cwnd_stamp = now;
+ hc->tx_cwnd_used = 0;
+
+ ccid2_check_l_ack_ratio(sk);
+}
+
static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
{
struct dccp_sock *dp = dccp_sk(sk);
struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
+ const u32 now = ccid2_time_stamp;
struct ccid2_seq *next;
- hc->tx_pipe++;
+ /* slow-start after idle periods (RFC 2581, RFC 2861) */
+ if (ccid2_do_cwv && !hc->tx_pipe &&
+ (s32)(now - hc->tx_lsndtime) >= hc->tx_rto)
+ ccid2_cwnd_restart(sk, now);
+
+ hc->tx_lsndtime = now;
+ hc->tx_pipe += 1;
+
+ /* see whether cwnd was fully used (RFC 2861), update expected window */
+ if (ccid2_cwnd_network_limited(hc)) {
+ ccid2_update_used_window(hc, hc->tx_cwnd);
+ hc->tx_cwnd_used = 0;
+ hc->tx_cwnd_stamp = now;
+ } else {
+ if (hc->tx_pipe > hc->tx_cwnd_used)
+ hc->tx_cwnd_used = hc->tx_pipe;
+
+ ccid2_update_used_window(hc, hc->tx_cwnd_used);
+
+ if (ccid2_do_cwv && (s32)(now - hc->tx_cwnd_stamp) >= hc->tx_rto)
+ ccid2_cwnd_application_limited(sk, now);
+ }
hc->tx_seqh->ccid2s_seq = dp->dccps_gss;
hc->tx_seqh->ccid2s_acked = 0;
- hc->tx_seqh->ccid2s_sent = ccid2_time_stamp;
+ hc->tx_seqh->ccid2s_sent = now;
next = hc->tx_seqh->ccid2s_next;
/* check if we need to alloc more space */
@@ -329,17 +427,37 @@ static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp,
unsigned int *maxincr)
{
struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
-
- if (hc->tx_cwnd < hc->tx_ssthresh) {
- if (*maxincr > 0 && ++hc->tx_packets_acked == 2) {
+ struct dccp_sock *dp = dccp_sk(sk);
+ int r_seq_used = hc->tx_cwnd / dp->dccps_l_ack_ratio;
+
+ if (hc->tx_cwnd < dp->dccps_l_seq_win &&
+ r_seq_used < dp->dccps_r_seq_win) {
+ if (hc->tx_cwnd < hc->tx_ssthresh) {
+ if (*maxincr > 0 && ++hc->tx_packets_acked >= 2) {
+ hc->tx_cwnd += 1;
+ *maxincr -= 1;
+ hc->tx_packets_acked = 0;
+ }
+ } else if (++hc->tx_packets_acked >= hc->tx_cwnd) {
hc->tx_cwnd += 1;
- *maxincr -= 1;
hc->tx_packets_acked = 0;
}
- } else if (++hc->tx_packets_acked >= hc->tx_cwnd) {
- hc->tx_cwnd += 1;
- hc->tx_packets_acked = 0;
}
+
+ /*
+ * Adjust the local sequence window and the ack ratio to allow about
+ * 5 times the number of packets in the network (RFC 4340 7.5.2)
+ */
+ if (r_seq_used * CCID2_WIN_CHANGE_FACTOR >= dp->dccps_r_seq_win)
+ ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio * 2);
+ else if (r_seq_used * CCID2_WIN_CHANGE_FACTOR < dp->dccps_r_seq_win/2)
+ ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio / 2 ? : 1U);
+
+ if (hc->tx_cwnd * CCID2_WIN_CHANGE_FACTOR >= dp->dccps_l_seq_win)
+ ccid2_change_l_seq_window(sk, dp->dccps_l_seq_win * 2);
+ else if (hc->tx_cwnd * CCID2_WIN_CHANGE_FACTOR < dp->dccps_l_seq_win/2)
+ ccid2_change_l_seq_window(sk, dp->dccps_l_seq_win / 2);
+
/*
* FIXME: RTT is sampled several times per acknowledgment (for each
* entry in the Ack Vector), instead of once per Ack (as in TCP SACK).
@@ -365,9 +483,7 @@ static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U;
hc->tx_ssthresh = max(hc->tx_cwnd, 2U);
- /* Avoid spurious timeouts resulting from Ack Ratio > cwnd */
- if (dccp_sk(sk)->dccps_l_ack_ratio > hc->tx_cwnd)
- ccid2_change_l_ack_ratio(sk, hc->tx_cwnd);
+ ccid2_check_l_ack_ratio(sk);
}
static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type,
@@ -418,8 +534,16 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
if (hc->tx_rpdupack >= NUMDUPACK) {
hc->tx_rpdupack = -1; /* XXX lame */
hc->tx_rpseq = 0;
-
+#ifdef __CCID2_COPES_GRACEFULLY_WITH_ACK_CONGESTION_CONTROL__
+ /*
+ * FIXME: Ack Congestion Control is broken; in
+ * the current state instabilities occurred with
+ * Ack Ratios greater than 1; causing hang-ups
+ * and long RTO timeouts. This needs to be fixed
+ * before opening up dynamic changes. -- gerrit
+ */
ccid2_change_l_ack_ratio(sk, 2 * dp->dccps_l_ack_ratio);
+#endif
}
}
}
@@ -594,6 +718,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
/* Use larger initial windows (RFC 4341, section 5). */
hc->tx_cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache);
+ hc->tx_expected_wnd = hc->tx_cwnd;
/* Make sure that Ack Ratio is enabled and within bounds. */
max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2);
@@ -606,7 +731,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
hc->tx_rto = DCCP_TIMEOUT_INIT;
hc->tx_rpdupack = -1;
- hc->tx_last_cong = ccid2_time_stamp;
+ hc->tx_last_cong = hc->tx_lsndtime = hc->tx_cwnd_stamp = ccid2_time_stamp;
+ hc->tx_cwnd_used = 0;
setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire,
(unsigned long)sk);
INIT_LIST_HEAD(&hc->tx_av_chunks);
@@ -627,18 +753,14 @@ static void ccid2_hc_tx_exit(struct sock *sk)
static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
{
- const struct dccp_sock *dp = dccp_sk(sk);
struct ccid2_hc_rx_sock *hc = ccid2_hc_rx_sk(sk);
- switch (DCCP_SKB_CB(skb)->dccpd_type) {
- case DCCP_PKT_DATA:
- case DCCP_PKT_DATAACK:
- hc->rx_data++;
- if (hc->rx_data >= dp->dccps_r_ack_ratio) {
- dccp_send_ack(sk);
- hc->rx_data = 0;
- }
- break;
+ if (!dccp_data_packet(skb))
+ return;
+
+ if (++hc->rx_num_data_pkts >= dccp_sk(sk)->dccps_r_ack_ratio) {
+ dccp_send_ack(sk);
+ hc->rx_num_data_pkts = 0;
}
}
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index e9985dafc2c..18c97543e52 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -43,6 +43,12 @@ struct ccid2_seq {
#define CCID2_SEQBUF_LEN 1024
#define CCID2_SEQBUF_MAX 128
+/*
+ * Multiple of congestion window to keep the sequence window at
+ * (RFC 4340 7.5.2)
+ */
+#define CCID2_WIN_CHANGE_FACTOR 5
+
/**
* struct ccid2_hc_tx_sock - CCID2 TX half connection
* @tx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5
@@ -53,6 +59,10 @@ struct ccid2_seq {
* @tx_rttvar: moving average/maximum of @mdev_max
* @tx_rto: RTO value deriving from SRTT and RTTVAR (RFC 2988)
* @tx_rtt_seq: to decay RTTVAR at most once per flight
+ * @tx_cwnd_used: actually used cwnd, W_used of RFC 2861
+ * @tx_expected_wnd: moving average of @tx_cwnd_used
+ * @tx_cwnd_stamp: to track idle periods in CWV
+ * @tx_lsndtime: last time (in jiffies) a data packet was sent
* @tx_rpseq: last consecutive seqno
* @tx_rpdupack: dupacks since rpseq
* @tx_av_chunks: list of Ack Vectors received on current skb
@@ -76,6 +86,12 @@ struct ccid2_hc_tx_sock {
u64 tx_rtt_seq:48;
struct timer_list tx_rtotimer;
+ /* Congestion Window validation (optional, RFC 2861) */
+ u32 tx_cwnd_used,
+ tx_expected_wnd,
+ tx_cwnd_stamp,
+ tx_lsndtime;
+
u64 tx_rpseq;
int tx_rpdupack;
u32 tx_last_cong;
@@ -88,8 +104,21 @@ static inline bool ccid2_cwnd_network_limited(struct ccid2_hc_tx_sock *hc)
return hc->tx_pipe >= hc->tx_cwnd;
}
+/*
+ * Convert RFC 3390 larger initial window into an equivalent number of packets.
+ * This is based on the numbers specified in RFC 5681, 3.1.
+ */
+static inline u32 rfc3390_bytes_to_packets(const u32 smss)
+{
+ return smss <= 1095 ? 4 : (smss > 2190 ? 2 : 3);
+}
+
+/**
+ * struct ccid2_hc_rx_sock - Receiving end of CCID-2 half-connection
+ * @rx_num_data_pkts: number of data packets received since last feedback
+ */
struct ccid2_hc_rx_sock {
- int rx_data;
+ u32 rx_num_data_pkts;
};
static inline struct ccid2_hc_tx_sock *ccid2_hc_tx_sk(const struct sock *sk)
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 3d604e1349c..119c04317d4 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -38,7 +38,7 @@
#include <asm/unaligned.h>
#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
-static int ccid3_debug;
+static bool ccid3_debug;
#define ccid3_pr_debug(format, a...) DCCP_PR_DEBUG(ccid3_debug, format, ##a)
#else
#define ccid3_pr_debug(format, a...)
@@ -98,8 +98,9 @@ static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hc)
{
hc->tx_t_ipi = scaled_div32(((u64)hc->tx_s) << 6, hc->tx_x);
+ DCCP_BUG_ON(hc->tx_t_ipi == 0);
ccid3_pr_debug("t_ipi=%u, s=%u, X=%u\n", hc->tx_t_ipi,
- hc->tx_s, (unsigned)(hc->tx_x >> 6));
+ hc->tx_s, (unsigned int)(hc->tx_x >> 6));
}
static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hc, ktime_t now)
@@ -112,6 +113,7 @@ static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hc, ktime_t now)
/**
* ccid3_hc_tx_update_x - Update allowed sending rate X
* @stamp: most recent time if available - can be left NULL.
+ *
* This function tracks draft rfc3448bis, check there for latest details.
*
* Note: X and X_recv are both stored in units of 64 * bytes/second, to support
@@ -152,17 +154,19 @@ static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp)
if (hc->tx_x != old_x) {
ccid3_pr_debug("X_prev=%u, X_now=%u, X_calc=%u, "
- "X_recv=%u\n", (unsigned)(old_x >> 6),
- (unsigned)(hc->tx_x >> 6), hc->tx_x_calc,
- (unsigned)(hc->tx_x_recv >> 6));
+ "X_recv=%u\n", (unsigned int)(old_x >> 6),
+ (unsigned int)(hc->tx_x >> 6), hc->tx_x_calc,
+ (unsigned int)(hc->tx_x_recv >> 6));
ccid3_update_send_interval(hc);
}
}
-/*
- * Track the mean packet size `s' (cf. RFC 4342, 5.3 and RFC 3448, 4.1)
+/**
+ * ccid3_hc_tx_update_s - Track the mean packet size `s'
* @len: DCCP packet payload size in bytes
+ *
+ * cf. RFC 4342, 5.3 and RFC 3448, 4.1
*/
static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hc, int len)
{
@@ -236,8 +240,6 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
*
* Note that X_recv is scaled by 2^6 while X_calc is not
*/
- BUG_ON(hc->tx_p && !hc->tx_x_calc);
-
if (hc->tx_x_calc > (hc->tx_x_recv >> 5))
hc->tx_x_recv =
max(hc->tx_x_recv / 2,
@@ -271,6 +273,7 @@ out:
/**
* ccid3_hc_tx_send_packet - Delay-based dequeueing of TX packets
* @skb: next packet candidate to send on @sk
+ *
* This function uses the convention of ccid_packet_dequeue_eval() and
* returns a millisecond-delay value between 0 and t_mbi = 64000 msec.
*/
@@ -426,8 +429,8 @@ done_computing_x:
"p=%u, X_calc=%u, X_recv=%u, X=%u\n",
dccp_role(sk), sk, hc->tx_rtt, r_sample,
hc->tx_s, hc->tx_p, hc->tx_x_calc,
- (unsigned)(hc->tx_x_recv >> 6),
- (unsigned)(hc->tx_x >> 6));
+ (unsigned int)(hc->tx_x_recv >> 6),
+ (unsigned int)(hc->tx_x >> 6));
/* unschedule no feedback timer */
sk_stop_timer(sk, &hc->tx_no_feedback_timer);
@@ -532,6 +535,7 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
case DCCP_SOCKOPT_CCID_TX_INFO:
if (len < sizeof(tfrc))
return -EINVAL;
+ memset(&tfrc, 0, sizeof(tfrc));
tfrc.tfrctx_x = hc->tx_x;
tfrc.tfrctx_x_recv = hc->tx_x_recv;
tfrc.tfrctx_x_calc = hc->tx_x_calc;
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 497723c4d4b..57f9fd78c4d 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -133,6 +133,7 @@ static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur,
* @rh: Receive history containing a fresh loss event
* @calc_first_li: Caller-dependent routine to compute length of first interval
* @sk: Used by @calc_first_li in caller-specific way (subtyping)
+ *
* Updates I_mean and returns 1 if a new interval has in fact been added to @lh.
*/
int tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh,
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
index d1d2f5383b7..57f631a86cc 100644
--- a/net/dccp/ccids/lib/loss_interval.h
+++ b/net/dccp/ccids/lib/loss_interval.h
@@ -65,9 +65,9 @@ static inline u8 tfrc_lh_length(struct tfrc_loss_hist *lh)
struct tfrc_rx_hist;
-extern int tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *,
- u32 (*first_li)(struct sock *), struct sock *);
-extern u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *);
-extern void tfrc_lh_cleanup(struct tfrc_loss_hist *lh);
+int tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *,
+ u32 (*first_li)(struct sock *), struct sock *);
+u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *);
+void tfrc_lh_cleanup(struct tfrc_loss_hist *lh);
#endif /* _DCCP_LI_HIST_ */
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index de8fe294bf0..08df7a3acb3 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -315,6 +315,7 @@ static void __three_after_loss(struct tfrc_rx_hist *h)
* @ndp: The NDP count belonging to @skb
* @calc_first_li: Caller-dependent computation of first loss interval in @lh
* @sk: Used by @calc_first_li (see tfrc_lh_interval_add)
+ *
* Chooses action according to pending loss, updates LI database when a new
* loss was detected, and does required post-processing. Returns 1 when caller
* should send feedback, 0 otherwise.
@@ -387,7 +388,7 @@ static inline struct tfrc_rx_hist_entry *
}
/**
- * tfrc_rx_hist_rtt_prev_s: previously suitable (wrt rtt_last_s) RTT-sampling entry
+ * tfrc_rx_hist_rtt_prev_s - previously suitable (wrt rtt_last_s) RTT-sampling entry
*/
static inline struct tfrc_rx_hist_entry *
tfrc_rx_hist_rtt_prev_s(const struct tfrc_rx_hist *h)
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 7ee4a9d9d33..ee362b0b630 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -60,8 +60,8 @@ static inline struct tfrc_tx_hist_entry *
return head;
}
-extern int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno);
-extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp);
+int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno);
+void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp);
/* Subtraction a-b modulo-16, respects circular wrap-around */
#define SUB16(a, b) (((a) + 16 - (b)) & 0xF)
@@ -139,20 +139,17 @@ static inline bool tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h)
return h->loss_count > 0;
}
-extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
- const struct sk_buff *skb, const u64 ndp);
+void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, const struct sk_buff *skb,
+ const u64 ndp);
-extern int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb);
+int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb);
struct tfrc_loss_hist;
-extern int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
- struct tfrc_loss_hist *lh,
- struct sk_buff *skb, const u64 ndp,
- u32 (*first_li)(struct sock *sk),
- struct sock *sk);
-extern u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h,
- const struct sk_buff *skb);
-extern int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h);
-extern void tfrc_rx_hist_purge(struct tfrc_rx_hist *h);
+int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, struct tfrc_loss_hist *lh,
+ struct sk_buff *skb, const u64 ndp,
+ u32 (*first_li)(struct sock *sk), struct sock *sk);
+u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb);
+int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h);
+void tfrc_rx_hist_purge(struct tfrc_rx_hist *h);
#endif /* _DCCP_PKT_HIST_ */
diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c
index 4902029854d..62b5828acde 100644
--- a/net/dccp/ccids/lib/tfrc.c
+++ b/net/dccp/ccids/lib/tfrc.c
@@ -4,10 +4,11 @@
* Copyright (c) 2007 The University of Aberdeen, Scotland, UK
* Copyright (c) 2007 Arnaldo Carvalho de Melo <acme@redhat.com>
*/
+#include <linux/moduleparam.h>
#include "tfrc.h"
#ifdef CONFIG_IP_DCCP_TFRC_DEBUG
-int tfrc_debug;
+bool tfrc_debug;
module_param(tfrc_debug, bool, 0644);
MODULE_PARM_DESC(tfrc_debug, "Enable TFRC debug messages");
#endif
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
index f8ee3f54977..40ee7d62b65 100644
--- a/net/dccp/ccids/lib/tfrc.h
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -21,7 +21,7 @@
#include "packet_history.h"
#ifdef CONFIG_IP_DCCP_TFRC_DEBUG
-extern int tfrc_debug;
+extern bool tfrc_debug;
#define tfrc_pr_debug(format, a...) DCCP_PR_DEBUG(tfrc_debug, format, ##a)
#else
#define tfrc_pr_debug(format, a...)
@@ -55,21 +55,21 @@ static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 weight)
return avg ? (weight * avg + (10 - weight) * newval) / 10 : newval;
}
-extern u32 tfrc_calc_x(u16 s, u32 R, u32 p);
-extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue);
-extern u32 tfrc_invert_loss_event_rate(u32 loss_event_rate);
+u32 tfrc_calc_x(u16 s, u32 R, u32 p);
+u32 tfrc_calc_x_reverse_lookup(u32 fvalue);
+u32 tfrc_invert_loss_event_rate(u32 loss_event_rate);
-extern int tfrc_tx_packet_history_init(void);
-extern void tfrc_tx_packet_history_exit(void);
-extern int tfrc_rx_packet_history_init(void);
-extern void tfrc_rx_packet_history_exit(void);
+int tfrc_tx_packet_history_init(void);
+void tfrc_tx_packet_history_exit(void);
+int tfrc_rx_packet_history_init(void);
+void tfrc_rx_packet_history_exit(void);
-extern int tfrc_li_init(void);
-extern void tfrc_li_exit(void);
+int tfrc_li_init(void);
+void tfrc_li_exit(void);
#ifdef CONFIG_IP_DCCP_TFRC_LIB
-extern int tfrc_lib_init(void);
-extern void tfrc_lib_exit(void);
+int tfrc_lib_init(void);
+void tfrc_lib_exit(void);
#else
#define tfrc_lib_init() (0)
#define tfrc_lib_exit()
diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c
index a052a4377e2..88ef98285be 100644
--- a/net/dccp/ccids/lib/tfrc_equation.c
+++ b/net/dccp/ccids/lib/tfrc_equation.c
@@ -611,6 +611,7 @@ static inline u32 tfrc_binsearch(u32 fval, u8 small)
* @s: packet size in bytes
* @R: RTT scaled by 1000000 (i.e., microseconds)
* @p: loss ratio estimate scaled by 1000000
+ *
* Returns X_calc in bytes per second (not scaled).
*/
u32 tfrc_calc_x(u16 s, u32 R, u32 p)
@@ -659,6 +660,7 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p)
/**
* tfrc_calc_x_reverse_lookup - try to find p given f(p)
* @fvalue: function value to match, scaled by 1000000
+ *
* Returns closest match for p, also scaled by 1000000
*/
u32 tfrc_calc_x_reverse_lookup(u32 fvalue)
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 5fdb0722901..c67816647cc 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -39,7 +39,7 @@
"%s: " fmt, __func__, ##a)
#ifdef CONFIG_IP_DCCP_DEBUG
-extern int dccp_debug;
+extern bool dccp_debug;
#define dccp_pr_debug(format, a...) DCCP_PR_DEBUG(dccp_debug, format, ##a)
#define dccp_pr_debug_cat(format, a...) DCCP_PRINTK(dccp_debug, format, ##a)
#define dccp_debug(fmt, a...) dccp_pr_debug_cat(KERN_DEBUG fmt, ##a)
@@ -53,7 +53,7 @@ extern struct inet_hashinfo dccp_hashinfo;
extern struct percpu_counter dccp_orphan_count;
-extern void dccp_time_wait(struct sock *sk, int state, int timeo);
+void dccp_time_wait(struct sock *sk, int state, int timeo);
/*
* Set safe upper bounds for header and option length. Since Data Offset is 8
@@ -75,7 +75,7 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo);
* state, about 60 seconds */
/* RFC 1122, 4.2.3.1 initial RTO value */
-#define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ))
+#define DCCP_TIMEOUT_INIT ((unsigned int)(3 * HZ))
/*
* The maximum back-off value for retransmissions. This is needed for
@@ -84,7 +84,7 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo);
* - feature-negotiation retransmission (sec. 6.6.3),
* - Acks in client-PARTOPEN state (sec. 8.1.5).
*/
-#define DCCP_RTO_MAX ((unsigned)(64 * HZ))
+#define DCCP_RTO_MAX ((unsigned int)(64 * HZ))
/*
* RTT sampling: sanity bounds and fallback RTT value from RFC 4340, section 3.4
@@ -224,114 +224,108 @@ static inline void dccp_csum_outgoing(struct sk_buff *skb)
skb->csum = skb_checksum(skb, 0, (cov > skb->len)? skb->len : cov, 0);
}
-extern void dccp_v4_send_check(struct sock *sk, struct sk_buff *skb);
+void dccp_v4_send_check(struct sock *sk, struct sk_buff *skb);
-extern int dccp_retransmit_skb(struct sock *sk);
+int dccp_retransmit_skb(struct sock *sk);
-extern void dccp_send_ack(struct sock *sk);
-extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
- struct request_sock *rsk);
+void dccp_send_ack(struct sock *sk);
+void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
+ struct request_sock *rsk);
-extern void dccp_send_sync(struct sock *sk, const u64 seq,
- const enum dccp_pkt_type pkt_type);
+void dccp_send_sync(struct sock *sk, const u64 seq,
+ const enum dccp_pkt_type pkt_type);
/*
* TX Packet Dequeueing Interface
*/
-extern void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb);
-extern bool dccp_qpolicy_full(struct sock *sk);
-extern void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb);
-extern struct sk_buff *dccp_qpolicy_top(struct sock *sk);
-extern struct sk_buff *dccp_qpolicy_pop(struct sock *sk);
-extern bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param);
+void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb);
+bool dccp_qpolicy_full(struct sock *sk);
+void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb);
+struct sk_buff *dccp_qpolicy_top(struct sock *sk);
+struct sk_buff *dccp_qpolicy_pop(struct sock *sk);
+bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param);
/*
* TX Packet Output and TX Timers
*/
-extern void dccp_write_xmit(struct sock *sk);
-extern void dccp_write_space(struct sock *sk);
-extern void dccp_flush_write_queue(struct sock *sk, long *time_budget);
+void dccp_write_xmit(struct sock *sk);
+void dccp_write_space(struct sock *sk);
+void dccp_flush_write_queue(struct sock *sk, long *time_budget);
-extern void dccp_init_xmit_timers(struct sock *sk);
+void dccp_init_xmit_timers(struct sock *sk);
static inline void dccp_clear_xmit_timers(struct sock *sk)
{
inet_csk_clear_xmit_timers(sk);
}
-extern unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu);
+unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu);
-extern const char *dccp_packet_name(const int type);
+const char *dccp_packet_name(const int type);
-extern void dccp_set_state(struct sock *sk, const int state);
-extern void dccp_done(struct sock *sk);
+void dccp_set_state(struct sock *sk, const int state);
+void dccp_done(struct sock *sk);
-extern int dccp_reqsk_init(struct request_sock *rq, struct dccp_sock const *dp,
- struct sk_buff const *skb);
+int dccp_reqsk_init(struct request_sock *rq, struct dccp_sock const *dp,
+ struct sk_buff const *skb);
-extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
+int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
-extern struct sock *dccp_create_openreq_child(struct sock *sk,
- const struct request_sock *req,
- const struct sk_buff *skb);
+struct sock *dccp_create_openreq_child(struct sock *sk,
+ const struct request_sock *req,
+ const struct sk_buff *skb);
-extern int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
+int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
-extern struct sock *dccp_v4_request_recv_sock(struct sock *sk,
- struct sk_buff *skb,
- struct request_sock *req,
- struct dst_entry *dst);
-extern struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
- struct request_sock *req,
- struct request_sock **prev);
+struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
+ struct request_sock *req,
+ struct dst_entry *dst);
+struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
+ struct request_sock *req,
+ struct request_sock **prev);
-extern int dccp_child_process(struct sock *parent, struct sock *child,
- struct sk_buff *skb);
-extern int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
- struct dccp_hdr *dh, unsigned len);
-extern int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
- const struct dccp_hdr *dh, const unsigned len);
+int dccp_child_process(struct sock *parent, struct sock *child,
+ struct sk_buff *skb);
+int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
+ struct dccp_hdr *dh, unsigned int len);
+int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
+ const struct dccp_hdr *dh, const unsigned int len);
-extern int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized);
-extern void dccp_destroy_sock(struct sock *sk);
+int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized);
+void dccp_destroy_sock(struct sock *sk);
-extern void dccp_close(struct sock *sk, long timeout);
-extern struct sk_buff *dccp_make_response(struct sock *sk,
- struct dst_entry *dst,
- struct request_sock *req);
+void dccp_close(struct sock *sk, long timeout);
+struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
+ struct request_sock *req);
-extern int dccp_connect(struct sock *sk);
-extern int dccp_disconnect(struct sock *sk, int flags);
-extern int dccp_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen);
-extern int dccp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen);
+int dccp_connect(struct sock *sk);
+int dccp_disconnect(struct sock *sk, int flags);
+int dccp_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen);
+int dccp_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, unsigned int optlen);
#ifdef CONFIG_COMPAT
-extern int compat_dccp_getsockopt(struct sock *sk,
- int level, int optname,
- char __user *optval, int __user *optlen);
-extern int compat_dccp_setsockopt(struct sock *sk,
- int level, int optname,
- char __user *optval, unsigned int optlen);
+int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen);
+int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, unsigned int optlen);
#endif
-extern int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg);
-extern int dccp_sendmsg(struct kiocb *iocb, struct sock *sk,
- struct msghdr *msg, size_t size);
-extern int dccp_recvmsg(struct kiocb *iocb, struct sock *sk,
- struct msghdr *msg, size_t len, int nonblock,
- int flags, int *addr_len);
-extern void dccp_shutdown(struct sock *sk, int how);
-extern int inet_dccp_listen(struct socket *sock, int backlog);
-extern unsigned int dccp_poll(struct file *file, struct socket *sock,
- poll_table *wait);
-extern int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
- int addr_len);
-
-extern struct sk_buff *dccp_ctl_make_reset(struct sock *sk,
- struct sk_buff *skb);
-extern int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code);
-extern void dccp_send_close(struct sock *sk, const int active);
-extern int dccp_invalid_packet(struct sk_buff *skb);
-extern u32 dccp_sample_rtt(struct sock *sk, long delta);
+int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg);
+int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+ size_t size);
+int dccp_recvmsg(struct kiocb *iocb, struct sock *sk,
+ struct msghdr *msg, size_t len, int nonblock, int flags,
+ int *addr_len);
+void dccp_shutdown(struct sock *sk, int how);
+int inet_dccp_listen(struct socket *sock, int backlog);
+unsigned int dccp_poll(struct file *file, struct socket *sock,
+ poll_table *wait);
+int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
+
+struct sk_buff *dccp_ctl_make_reset(struct sock *sk, struct sk_buff *skb);
+int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code);
+void dccp_send_close(struct sock *sk, const int active);
+int dccp_invalid_packet(struct sk_buff *skb);
+u32 dccp_sample_rtt(struct sock *sk, long delta);
static inline int dccp_bad_service_code(const struct sock *sk,
const __be32 service)
@@ -352,12 +346,13 @@ static inline int dccp_bad_service_code(const struct sock *sk,
* @dccpd_opt_len: total length of all options (5.8) in the packet
* @dccpd_seq: sequence number
* @dccpd_ack_seq: acknowledgment number subheader field value
+ *
* This is used for transmission as well as for reception.
*/
struct dccp_skb_cb {
union {
struct inet_skb_parm h4;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
struct inet6_skb_parm h6;
#endif
} header;
@@ -474,24 +469,24 @@ static inline int dccp_ack_pending(const struct sock *sk)
return dccp_ackvec_pending(sk) || inet_csk_ack_scheduled(sk);
}
-extern int dccp_feat_finalise_settings(struct dccp_sock *dp);
-extern int dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq);
-extern int dccp_feat_insert_opts(struct dccp_sock*, struct dccp_request_sock*,
- struct sk_buff *skb);
-extern int dccp_feat_activate_values(struct sock *sk, struct list_head *fn);
-extern void dccp_feat_list_purge(struct list_head *fn_list);
-
-extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb);
-extern int dccp_insert_options_rsk(struct dccp_request_sock*, struct sk_buff*);
-extern int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed);
-extern u32 dccp_timestamp(void);
-extern void dccp_timestamping_init(void);
-extern int dccp_insert_option(struct sk_buff *skb, unsigned char option,
- const void *value, unsigned char len);
+int dccp_feat_signal_nn_change(struct sock *sk, u8 feat, u64 nn_val);
+int dccp_feat_finalise_settings(struct dccp_sock *dp);
+int dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq);
+int dccp_feat_insert_opts(struct dccp_sock*, struct dccp_request_sock*,
+ struct sk_buff *skb);
+int dccp_feat_activate_values(struct sock *sk, struct list_head *fn);
+void dccp_feat_list_purge(struct list_head *fn_list);
+
+int dccp_insert_options(struct sock *sk, struct sk_buff *skb);
+int dccp_insert_options_rsk(struct dccp_request_sock *, struct sk_buff *);
+u32 dccp_timestamp(void);
+void dccp_timestamping_init(void);
+int dccp_insert_option(struct sk_buff *skb, unsigned char option,
+ const void *value, unsigned char len);
#ifdef CONFIG_SYSCTL
-extern int dccp_sysctl_init(void);
-extern void dccp_sysctl_exit(void);
+int dccp_sysctl_init(void);
+void dccp_sysctl_exit(void);
#else
static inline int dccp_sysctl_init(void)
{
diff --git a/net/dccp/diag.c b/net/dccp/diag.c
index b21f261da75..028fc43aacb 100644
--- a/net/dccp/diag.c
+++ b/net/dccp/diag.c
@@ -48,11 +48,23 @@ static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
dccp_get_info(sk, _info);
}
+static void dccp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
+ struct inet_diag_req_v2 *r, struct nlattr *bc)
+{
+ inet_diag_dump_icsk(&dccp_hashinfo, skb, cb, r, bc);
+}
+
+static int dccp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
+ struct inet_diag_req_v2 *req)
+{
+ return inet_diag_dump_one_icsk(&dccp_hashinfo, in_skb, nlh, req);
+}
+
static const struct inet_diag_handler dccp_diag_handler = {
- .idiag_hashinfo = &dccp_hashinfo,
+ .dump = dccp_diag_dump,
+ .dump_one = dccp_diag_dump_one,
.idiag_get_info = dccp_diag_get_info,
- .idiag_type = DCCPDIAG_GETSOCK,
- .idiag_info_size = sizeof(struct tcp_info),
+ .idiag_type = IPPROTO_DCCP,
};
static int __init dccp_diag_init(void)
@@ -71,4 +83,4 @@ module_exit(dccp_diag_fini);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>");
MODULE_DESCRIPTION("DCCP inet_diag handler");
-MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_INET_DIAG, DCCPDIAG_GETSOCK);
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-33 /* AF_INET - IPPROTO_DCCP */);
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 568def95272..9733ddbc96c 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -12,6 +12,7 @@
* -----------
* o Feature negotiation is coordinated with connection setup (as in TCP), wild
* changes of parameters of an established connection are not supported.
+ * o Changing non-negotiable (NN) values is supported in state OPEN/PARTOPEN.
* o All currently known SP features have 1-byte quantities. If in the future
* extensions of RFCs 4340..42 define features with item lengths larger than
* one byte, a feature-specific extension of the code will be required.
@@ -343,6 +344,21 @@ static int __dccp_feat_activate(struct sock *sk, const int idx,
return dccp_feat_table[idx].activation_hdlr(sk, val, rx);
}
+/**
+ * dccp_feat_activate - Activate feature value on socket
+ * @sk: fully connected DCCP socket (after handshake is complete)
+ * @feat_num: feature to activate, one of %dccp_feature_numbers
+ * @local: whether local (1) or remote (0) @feat_num is meant
+ * @fval: the value (SP or NN) to activate, or NULL to use the default value
+ *
+ * For general use this function is preferable over __dccp_feat_activate().
+ */
+static int dccp_feat_activate(struct sock *sk, u8 feat_num, bool local,
+ dccp_feat_val const *fval)
+{
+ return __dccp_feat_activate(sk, dccp_feat_index(feat_num), local, fval);
+}
+
/* Test for "Req'd" feature (RFC 4340, 6.4) */
static inline int dccp_feat_must_be_understood(u8 feat_num)
{
@@ -431,6 +447,7 @@ static struct dccp_feat_entry *dccp_feat_list_lookup(struct list_head *fn_list,
* @head: list to add to
* @feat: feature number
* @local: whether the local (1) or remote feature with number @feat is meant
+ *
* This is the only constructor and serves to ensure the above invariants.
*/
static struct dccp_feat_entry *
@@ -475,8 +492,8 @@ static int dccp_feat_push_change(struct list_head *fn_list, u8 feat, u8 local,
new->feat_num = feat;
new->is_local = local;
new->state = FEAT_INITIALISING;
- new->needs_confirm = 0;
- new->empty_confirm = 0;
+ new->needs_confirm = false;
+ new->empty_confirm = false;
new->val = *fval;
new->needs_mandatory = mandatory;
@@ -489,6 +506,7 @@ static int dccp_feat_push_change(struct list_head *fn_list, u8 feat, u8 local,
* @feat: one of %dccp_feature_numbers
* @local: whether local (1) or remote (0) @feat_num is being confirmed
* @fval: pointer to NN/SP value to be inserted or NULL
+ *
* Returns 0 on success, a Reset code for further processing otherwise.
*/
static int dccp_feat_push_confirm(struct list_head *fn_list, u8 feat, u8 local,
@@ -502,12 +520,12 @@ static int dccp_feat_push_confirm(struct list_head *fn_list, u8 feat, u8 local,
new->feat_num = feat;
new->is_local = local;
new->state = FEAT_STABLE; /* transition in 6.6.2 */
- new->needs_confirm = 1;
+ new->needs_confirm = true;
new->empty_confirm = (fval == NULL);
new->val.nn = 0; /* zeroes the whole structure */
if (!new->empty_confirm)
new->val = *fval;
- new->needs_mandatory = 0;
+ new->needs_mandatory = false;
return 0;
}
@@ -650,11 +668,22 @@ int dccp_feat_insert_opts(struct dccp_sock *dp, struct dccp_request_sock *dreq,
return -1;
if (pos->needs_mandatory && dccp_insert_option_mandatory(skb))
return -1;
- /*
- * Enter CHANGING after transmitting the Change option (6.6.2).
- */
- if (pos->state == FEAT_INITIALISING)
- pos->state = FEAT_CHANGING;
+
+ if (skb->sk->sk_state == DCCP_OPEN &&
+ (opt == DCCPO_CONFIRM_R || opt == DCCPO_CONFIRM_L)) {
+ /*
+ * Confirms don't get retransmitted (6.6.3) once the
+ * connection is in state OPEN
+ */
+ dccp_feat_list_pop(pos);
+ } else {
+ /*
+ * Enter CHANGING after transmitting the Change
+ * option (6.6.2).
+ */
+ if (pos->state == FEAT_INITIALISING)
+ pos->state = FEAT_CHANGING;
+ }
}
return 0;
}
@@ -665,6 +694,7 @@ int dccp_feat_insert_opts(struct dccp_sock *dp, struct dccp_request_sock *dreq,
* @feat: an NN feature from %dccp_feature_numbers
* @mandatory: use Mandatory option if 1
* @nn_val: value to register (restricted to 4 bytes)
+ *
* Note that NN features are local by definition (RFC 4340, 6.3.2).
*/
static int __feat_register_nn(struct list_head *fn, u8 feat,
@@ -730,6 +760,72 @@ int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
0, list, len);
}
+/**
+ * dccp_feat_nn_get - Query current/pending value of NN feature
+ * @sk: DCCP socket of an established connection
+ * @feat: NN feature number from %dccp_feature_numbers
+ *
+ * For a known NN feature, returns value currently being negotiated, or
+ * current (confirmed) value if no negotiation is going on.
+ */
+u64 dccp_feat_nn_get(struct sock *sk, u8 feat)
+{
+ if (dccp_feat_type(feat) == FEAT_NN) {
+ struct dccp_sock *dp = dccp_sk(sk);
+ struct dccp_feat_entry *entry;
+
+ entry = dccp_feat_list_lookup(&dp->dccps_featneg, feat, 1);
+ if (entry != NULL)
+ return entry->val.nn;
+
+ switch (feat) {
+ case DCCPF_ACK_RATIO:
+ return dp->dccps_l_ack_ratio;
+ case DCCPF_SEQUENCE_WINDOW:
+ return dp->dccps_l_seq_win;
+ }
+ }
+ DCCP_BUG("attempt to look up unsupported feature %u", feat);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dccp_feat_nn_get);
+
+/**
+ * dccp_feat_signal_nn_change - Update NN values for an established connection
+ * @sk: DCCP socket of an established connection
+ * @feat: NN feature number from %dccp_feature_numbers
+ * @nn_val: the new value to use
+ *
+ * This function is used to communicate NN updates out-of-band.
+ */
+int dccp_feat_signal_nn_change(struct sock *sk, u8 feat, u64 nn_val)
+{
+ struct list_head *fn = &dccp_sk(sk)->dccps_featneg;
+ dccp_feat_val fval = { .nn = nn_val };
+ struct dccp_feat_entry *entry;
+
+ if (sk->sk_state != DCCP_OPEN && sk->sk_state != DCCP_PARTOPEN)
+ return 0;
+
+ if (dccp_feat_type(feat) != FEAT_NN ||
+ !dccp_feat_is_valid_nn_val(feat, nn_val))
+ return -EINVAL;
+
+ if (nn_val == dccp_feat_nn_get(sk, feat))
+ return 0; /* already set or negotiation under way */
+
+ entry = dccp_feat_list_lookup(fn, feat, 1);
+ if (entry != NULL) {
+ dccp_pr_debug("Clobbering existing NN entry %llu -> %llu\n",
+ (unsigned long long)entry->val.nn,
+ (unsigned long long)nn_val);
+ dccp_feat_list_pop(entry);
+ }
+
+ inet_csk_schedule_ack(sk);
+ return dccp_feat_push_change(fn, feat, 1, 0, &fval);
+}
+EXPORT_SYMBOL_GPL(dccp_feat_signal_nn_change);
/*
* Tracking features whose value depend on the choice of CCID
@@ -840,6 +936,7 @@ static const struct ccid_dependency *dccp_feat_ccid_deps(u8 ccid, bool is_local)
* @fn: feature-negotiation list to update
* @id: CCID number to track
* @is_local: whether TX CCID (1) or RX CCID (0) is meant
+ *
* This function needs to be called after registering all other features.
*/
static int dccp_feat_propagate_ccid(struct list_head *fn, u8 id, bool is_local)
@@ -863,6 +960,7 @@ static int dccp_feat_propagate_ccid(struct list_head *fn, u8 id, bool is_local)
/**
* dccp_feat_finalise_settings - Finalise settings before starting negotiation
* @dp: client or listening socket (settings will be inherited)
+ *
* This is called after all registrations (socket initialisation, sysctls, and
* sockopt calls), and before sending the first packet containing Change options
* (ie. client-Request or server-Response), to ensure internal consistency.
@@ -1065,7 +1163,7 @@ static u8 dccp_feat_change_recv(struct list_head *fn, u8 is_mandatory, u8 opt,
}
if (dccp_feat_reconcile(&entry->val, val, len, server, true)) {
- entry->empty_confirm = 0;
+ entry->empty_confirm = false;
} else if (is_mandatory) {
return DCCP_RESET_CODE_MANDATORY_ERROR;
} else if (entry->state == FEAT_INITIALISING) {
@@ -1081,10 +1179,10 @@ static u8 dccp_feat_change_recv(struct list_head *fn, u8 is_mandatory, u8 opt,
defval = dccp_feat_default_value(feat);
if (!dccp_feat_reconcile(&entry->val, &defval, 1, server, true))
return DCCP_RESET_CODE_OPTION_ERROR;
- entry->empty_confirm = 1;
+ entry->empty_confirm = true;
}
- entry->needs_confirm = 1;
- entry->needs_mandatory = 0;
+ entry->needs_confirm = true;
+ entry->needs_mandatory = false;
entry->state = FEAT_STABLE;
return 0;
@@ -1187,6 +1285,101 @@ confirmation_failed:
}
/**
+ * dccp_feat_handle_nn_established - Fast-path reception of NN options
+ * @sk: socket of an established DCCP connection
+ * @mandatory: whether @opt was preceded by a Mandatory option
+ * @opt: %DCCPO_CHANGE_L | %DCCPO_CONFIRM_R (NN only)
+ * @feat: NN number, one of %dccp_feature_numbers
+ * @val: NN value
+ * @len: length of @val in bytes
+ *
+ * This function combines the functionality of change_recv/confirm_recv, with
+ * the following differences (reset codes are the same):
+ * - cleanup after receiving the Confirm;
+ * - values are directly activated after successful parsing;
+ * - deliberately restricted to NN features.
+ * The restriction to NN features is essential since SP features can have non-
+ * predictable outcomes (depending on the remote configuration), and are inter-
+ * dependent (CCIDs for instance cause further dependencies).
+ */
+static u8 dccp_feat_handle_nn_established(struct sock *sk, u8 mandatory, u8 opt,
+ u8 feat, u8 *val, u8 len)
+{
+ struct list_head *fn = &dccp_sk(sk)->dccps_featneg;
+ const bool local = (opt == DCCPO_CONFIRM_R);
+ struct dccp_feat_entry *entry;
+ u8 type = dccp_feat_type(feat);
+ dccp_feat_val fval;
+
+ dccp_feat_print_opt(opt, feat, val, len, mandatory);
+
+ /* Ignore non-mandatory unknown and non-NN features */
+ if (type == FEAT_UNKNOWN) {
+ if (local && !mandatory)
+ return 0;
+ goto fast_path_unknown;
+ } else if (type != FEAT_NN) {
+ return 0;
+ }
+
+ /*
+ * We don't accept empty Confirms, since in fast-path feature
+ * negotiation the values are enabled immediately after sending
+ * the Change option.
+ * Empty Changes on the other hand are invalid (RFC 4340, 6.1).
+ */
+ if (len == 0 || len > sizeof(fval.nn))
+ goto fast_path_unknown;
+
+ if (opt == DCCPO_CHANGE_L) {
+ fval.nn = dccp_decode_value_var(val, len);
+ if (!dccp_feat_is_valid_nn_val(feat, fval.nn))
+ goto fast_path_unknown;
+
+ if (dccp_feat_push_confirm(fn, feat, local, &fval) ||
+ dccp_feat_activate(sk, feat, local, &fval))
+ return DCCP_RESET_CODE_TOO_BUSY;
+
+ /* set the `Ack Pending' flag to piggyback a Confirm */
+ inet_csk_schedule_ack(sk);
+
+ } else if (opt == DCCPO_CONFIRM_R) {
+ entry = dccp_feat_list_lookup(fn, feat, local);
+ if (entry == NULL || entry->state != FEAT_CHANGING)
+ return 0;
+
+ fval.nn = dccp_decode_value_var(val, len);
+ /*
+ * Just ignore a value that doesn't match our current value.
+ * If the option changes twice within two RTTs, then at least
+ * one CONFIRM will be received for the old value after a
+ * new CHANGE was sent.
+ */
+ if (fval.nn != entry->val.nn)
+ return 0;
+
+ /* Only activate after receiving the Confirm option (6.6.1). */
+ dccp_feat_activate(sk, feat, local, &fval);
+
+ /* It has been confirmed - so remove the entry */
+ dccp_feat_list_pop(entry);
+
+ } else {
+ DCCP_WARN("Received illegal option %u\n", opt);
+ goto fast_path_failed;
+ }
+ return 0;
+
+fast_path_unknown:
+ if (!mandatory)
+ return dccp_push_empty_confirm(fn, feat, local);
+
+fast_path_failed:
+ return mandatory ? DCCP_RESET_CODE_MANDATORY_ERROR
+ : DCCP_RESET_CODE_OPTION_ERROR;
+}
+
+/**
* dccp_feat_parse_options - Process Feature-Negotiation Options
* @sk: for general use and used by the client during connection setup
* @dreq: used by the server during connection setup
@@ -1195,6 +1388,7 @@ confirmation_failed:
* @feat: one of %dccp_feature_numbers
* @val: value contents of @opt
* @len: length of @val in bytes
+ *
* Returns 0 on success, a Reset code for ending the connection otherwise.
*/
int dccp_feat_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
@@ -1221,6 +1415,14 @@ int dccp_feat_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
return dccp_feat_confirm_recv(fn, mandatory, opt, feat,
val, len, server);
}
+ break;
+ /*
+ * Support for exchanging NN options on an established connection.
+ */
+ case DCCP_OPEN:
+ case DCCP_PARTOPEN:
+ return dccp_feat_handle_nn_established(sk, mandatory, opt, feat,
+ val, len);
}
return 0; /* ignore FN options in all other states */
}
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index e56a4e5e634..0e75cebb218 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -107,13 +107,13 @@ extern unsigned long sysctl_dccp_sequence_window;
extern int sysctl_dccp_rx_ccid;
extern int sysctl_dccp_tx_ccid;
-extern int dccp_feat_init(struct sock *sk);
-extern void dccp_feat_initialise_sysctls(void);
-extern int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
- u8 const *list, u8 len);
-extern int dccp_feat_parse_options(struct sock *, struct dccp_request_sock *,
- u8 mand, u8 opt, u8 feat, u8 *val, u8 len);
-extern int dccp_feat_clone_list(struct list_head const *, struct list_head *);
+int dccp_feat_init(struct sock *sk);
+void dccp_feat_initialise_sysctls(void);
+int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
+ u8 const *list, u8 len);
+int dccp_feat_parse_options(struct sock *, struct dccp_request_sock *,
+ u8 mand, u8 opt, u8 feat, u8 *val, u8 len);
+int dccp_feat_clone_list(struct list_head const *, struct list_head *);
/*
* Encoding variable-length options and their maximum length.
@@ -127,10 +127,11 @@ extern int dccp_feat_clone_list(struct list_head const *, struct list_head *);
*/
#define DCCP_OPTVAL_MAXLEN 6
-extern void dccp_encode_value_var(const u64 value, u8 *to, const u8 len);
-extern u64 dccp_decode_value_var(const u8 *bf, const u8 len);
+void dccp_encode_value_var(const u64 value, u8 *to, const u8 len);
+u64 dccp_decode_value_var(const u8 *bf, const u8 len);
+u64 dccp_feat_nn_get(struct sock *sk, u8 feat);
-extern int dccp_insert_option_mandatory(struct sk_buff *skb);
-extern int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat,
- u8 *val, u8 len, bool repeat_first);
+int dccp_insert_option_mandatory(struct sk_buff *skb);
+int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat, u8 *val, u8 len,
+ bool repeat_first);
#endif /* _DCCP_FEAT_H */
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 4222e7a654b..3c8ec7d4a34 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -28,7 +28,7 @@ static void dccp_enqueue_skb(struct sock *sk, struct sk_buff *skb)
__skb_pull(skb, dccp_hdr(skb)->dccph_doff * 4);
__skb_queue_tail(&sk->sk_receive_queue, skb);
skb_set_owner_r(skb, sk);
- sk->sk_data_ready(sk, 0);
+ sk->sk_data_ready(sk);
}
static void dccp_fin(struct sock *sk, struct sk_buff *skb)
@@ -285,7 +285,7 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
}
static int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
- const struct dccp_hdr *dh, const unsigned len)
+ const struct dccp_hdr *dh, const unsigned int len)
{
struct dccp_sock *dp = dccp_sk(sk);
@@ -366,7 +366,7 @@ discard:
}
int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
- const struct dccp_hdr *dh, const unsigned len)
+ const struct dccp_hdr *dh, const unsigned int len)
{
if (dccp_check_seqno(sk, skb))
goto discard;
@@ -388,7 +388,7 @@ EXPORT_SYMBOL_GPL(dccp_rcv_established);
static int dccp_rcv_request_sent_state_process(struct sock *sk,
struct sk_buff *skb,
const struct dccp_hdr *dh,
- const unsigned len)
+ const unsigned int len)
{
/*
* Step 4: Prepare sequence numbers in REQUEST
@@ -521,7 +521,7 @@ unable_to_proceed:
static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
struct sk_buff *skb,
const struct dccp_hdr *dh,
- const unsigned len)
+ const unsigned int len)
{
struct dccp_sock *dp = dccp_sk(sk);
u32 sample = dp->dccps_options_received.dccpor_timestamp_echo;
@@ -572,7 +572,7 @@ static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
}
int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
- struct dccp_hdr *dh, unsigned len)
+ struct dccp_hdr *dh, unsigned int len)
{
struct dccp_sock *dp = dccp_sk(sk);
struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
@@ -619,20 +619,31 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
return 1;
}
- if (sk->sk_state != DCCP_REQUESTING && sk->sk_state != DCCP_RESPOND) {
- if (dccp_check_seqno(sk, skb))
- goto discard;
-
- /*
- * Step 8: Process options and mark acknowledgeable
- */
- if (dccp_parse_options(sk, NULL, skb))
- return 1;
+ /* Step 6: Check sequence numbers (omitted in LISTEN/REQUEST state) */
+ if (sk->sk_state != DCCP_REQUESTING && dccp_check_seqno(sk, skb))
+ goto discard;
- dccp_handle_ackvec_processing(sk, skb);
- dccp_deliver_input_to_ccids(sk, skb);
+ /*
+ * Step 7: Check for unexpected packet types
+ * If (S.is_server and P.type == Response)
+ * or (S.is_client and P.type == Request)
+ * or (S.state == RESPOND and P.type == Data),
+ * Send Sync packet acknowledging P.seqno
+ * Drop packet and return
+ */
+ if ((dp->dccps_role != DCCP_ROLE_CLIENT &&
+ dh->dccph_type == DCCP_PKT_RESPONSE) ||
+ (dp->dccps_role == DCCP_ROLE_CLIENT &&
+ dh->dccph_type == DCCP_PKT_REQUEST) ||
+ (sk->sk_state == DCCP_RESPOND && dh->dccph_type == DCCP_PKT_DATA)) {
+ dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC);
+ goto discard;
}
+ /* Step 8: Process options */
+ if (dccp_parse_options(sk, NULL, skb))
+ return 1;
+
/*
* Step 9: Process Reset
* If P.type == Reset,
@@ -640,31 +651,15 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
* S.state := TIMEWAIT
* Set TIMEWAIT timer
* Drop packet and return
- */
+ */
if (dh->dccph_type == DCCP_PKT_RESET) {
dccp_rcv_reset(sk, skb);
return 0;
- /*
- * Step 7: Check for unexpected packet types
- * If (S.is_server and P.type == Response)
- * or (S.is_client and P.type == Request)
- * or (S.state == RESPOND and P.type == Data),
- * Send Sync packet acknowledging P.seqno
- * Drop packet and return
- */
- } else if ((dp->dccps_role != DCCP_ROLE_CLIENT &&
- dh->dccph_type == DCCP_PKT_RESPONSE) ||
- (dp->dccps_role == DCCP_ROLE_CLIENT &&
- dh->dccph_type == DCCP_PKT_REQUEST) ||
- (sk->sk_state == DCCP_RESPOND &&
- dh->dccph_type == DCCP_PKT_DATA)) {
- dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC);
- goto discard;
- } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) {
+ } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { /* Step 13 */
if (dccp_rcv_closereq(sk, skb))
return 0;
goto discard;
- } else if (dh->dccph_type == DCCP_PKT_CLOSE) {
+ } else if (dh->dccph_type == DCCP_PKT_CLOSE) { /* Step 14 */
if (dccp_rcv_close(sk, skb))
return 0;
goto discard;
@@ -679,8 +674,12 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
__kfree_skb(skb);
return 0;
- case DCCP_RESPOND:
case DCCP_PARTOPEN:
+ /* Step 8: if using Ack Vectors, mark packet acknowledgeable */
+ dccp_handle_ackvec_processing(sk, skb);
+ dccp_deliver_input_to_ccids(sk, skb);
+ /* fall through */
+ case DCCP_RESPOND:
queued = dccp_rcv_respond_partopen_state_process(sk, skb,
dh, len);
break;
@@ -711,6 +710,7 @@ EXPORT_SYMBOL_GPL(dccp_rcv_state_process);
/**
* dccp_sample_rtt - Validate and finalise computation of RTT sample
* @delta: number of microseconds between packet and acknowledgment
+ *
* The routine is kept generic to work in different contexts. It should be
* called immediately when the ACK used for the RTT sample arrives.
*/
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 45a434f9416..6ca645c4b48 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -26,6 +26,7 @@
#include <net/timewait_sock.h>
#include <net/tcp_states.h>
#include <net/xfrm.h>
+#include <net/secure_seq.h>
#include "ackvec.h"
#include "ccid.h"
@@ -40,13 +41,15 @@
int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
+ const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
struct inet_sock *inet = inet_sk(sk);
struct dccp_sock *dp = dccp_sk(sk);
- const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
- struct rtable *rt;
+ __be16 orig_sport, orig_dport;
__be32 daddr, nexthop;
- int tmp;
+ struct flowi4 *fl4;
+ struct rtable *rt;
int err;
+ struct ip_options_rcu *inet_opt;
dp->dccps_role = DCCP_ROLE_CLIENT;
@@ -57,37 +60,43 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
return -EAFNOSUPPORT;
nexthop = daddr = usin->sin_addr.s_addr;
- if (inet->opt != NULL && inet->opt->srr) {
+
+ inet_opt = rcu_dereference_protected(inet->inet_opt,
+ sock_owned_by_user(sk));
+ if (inet_opt != NULL && inet_opt->opt.srr) {
if (daddr == 0)
return -EINVAL;
- nexthop = inet->opt->faddr;
+ nexthop = inet_opt->opt.faddr;
}
- tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr,
- RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
- IPPROTO_DCCP,
- inet->inet_sport, usin->sin_port, sk, 1);
- if (tmp < 0)
- return tmp;
+ orig_sport = inet->inet_sport;
+ orig_dport = usin->sin_port;
+ fl4 = &inet->cork.fl.u.ip4;
+ rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
+ RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
+ IPPROTO_DCCP,
+ orig_sport, orig_dport, sk);
+ if (IS_ERR(rt))
+ return PTR_ERR(rt);
if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
ip_rt_put(rt);
return -ENETUNREACH;
}
- if (inet->opt == NULL || !inet->opt->srr)
- daddr = rt->rt_dst;
+ if (inet_opt == NULL || !inet_opt->opt.srr)
+ daddr = fl4->daddr;
if (inet->inet_saddr == 0)
- inet->inet_saddr = rt->rt_src;
+ inet->inet_saddr = fl4->saddr;
inet->inet_rcv_saddr = inet->inet_saddr;
inet->inet_dport = usin->sin_port;
inet->inet_daddr = daddr;
inet_csk(sk)->icsk_ext_hdr_len = 0;
- if (inet->opt != NULL)
- inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
+ if (inet_opt)
+ inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
/*
* Socket identity is still unknown (sport may be zero).
* However we set state to DCCP_REQUESTING and not releasing socket
@@ -99,11 +108,13 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (err != 0)
goto failure;
- err = ip_route_newports(&rt, IPPROTO_DCCP, inet->inet_sport,
- inet->inet_dport, sk);
- if (err != 0)
+ rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
+ inet->inet_sport, inet->inet_dport, sk);
+ if (IS_ERR(rt)) {
+ err = PTR_ERR(rt);
+ rt = NULL;
goto failure;
-
+ }
/* OK, now commit destination to socket. */
sk_setup_caps(sk, &rt->dst);
@@ -150,17 +161,10 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk,
if (sk->sk_state == DCCP_LISTEN)
return;
- /* We don't check in the destentry if pmtu discovery is forbidden
- * on this route. We just assume that no packet_to_big packets
- * are send back when pmtu discovery is not active.
- * There is a small race when the user changes this flag in the
- * route, but I think that's acceptable.
- */
- if ((dst = __sk_dst_check(sk, 0)) == NULL)
+ dst = inet_csk_update_pmtu(sk, mtu);
+ if (!dst)
return;
- dst->ops->update_pmtu(dst, mtu);
-
/* Something is about to be wrong... Remember soft error
* for the case, if this connection will not able to recover.
*/
@@ -170,6 +174,7 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk,
mtu = dst_mtu(dst);
if (inet->pmtudisc != IP_PMTUDISC_DONT &&
+ ip_sk_accept_pmtu(sk) &&
inet_csk(sk)->icsk_pmtu_cookie > mtu) {
dccp_sync_mss(sk, mtu);
@@ -184,6 +189,14 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk,
} /* else let the usual retransmit timer handle it */
}
+static void dccp_do_redirect(struct sk_buff *skb, struct sock *sk)
+{
+ struct dst_entry *dst = __sk_dst_check(sk, 0);
+
+ if (dst)
+ dst->ops->redirect(dst, sk, skb);
+}
+
/*
* This routine is called by the ICMP module when it gets some sort of error
* condition. If err < 0 then the socket should be closed and the error
@@ -248,6 +261,9 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
}
switch (type) {
+ case ICMP_REDIRECT:
+ dccp_do_redirect(skb, sk);
+ goto out;
case ICMP_SOURCE_QUENCH:
/* Just silently ignore these. */
goto out;
@@ -289,7 +305,8 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
*/
WARN_ON(req->sk);
- if (seq != dccp_rsk(req)->dreq_iss) {
+ if (!between48(seq, dccp_rsk(req)->dreq_iss,
+ dccp_rsk(req)->dreq_gss)) {
NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
goto out;
}
@@ -387,32 +404,30 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
if (sk_acceptq_is_full(sk))
goto exit_overflow;
- if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL)
- goto exit;
-
newsk = dccp_create_openreq_child(sk, req, skb);
if (newsk == NULL)
goto exit_nonewsk;
- sk_setup_caps(newsk, dst);
-
newinet = inet_sk(newsk);
ireq = inet_rsk(req);
- newinet->inet_daddr = ireq->rmt_addr;
- newinet->inet_rcv_saddr = ireq->loc_addr;
- newinet->inet_saddr = ireq->loc_addr;
- newinet->opt = ireq->opt;
+ newinet->inet_daddr = ireq->ir_rmt_addr;
+ newinet->inet_rcv_saddr = ireq->ir_loc_addr;
+ newinet->inet_saddr = ireq->ir_loc_addr;
+ newinet->inet_opt = ireq->opt;
ireq->opt = NULL;
newinet->mc_index = inet_iif(skb);
newinet->mc_ttl = ip_hdr(skb)->ttl;
newinet->inet_id = jiffies;
+ if (dst == NULL && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
+ goto put_and_exit;
+
+ sk_setup_caps(newsk, dst);
+
dccp_sync_mss(newsk, dst_mtu(dst));
- if (__inet_inherit_port(sk, newsk) < 0) {
- sock_put(newsk);
- goto exit;
- }
+ if (__inet_inherit_port(sk, newsk) < 0)
+ goto put_and_exit;
__inet_hash_nolisten(newsk, NULL);
return newsk;
@@ -424,6 +439,10 @@ exit_nonewsk:
exit:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
return NULL;
+put_and_exit:
+ inet_csk_prepare_forced_close(newsk);
+ dccp_done(newsk);
+ goto exit;
}
EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock);
@@ -461,17 +480,20 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
struct rtable *rt;
- struct flowi fl = { .oif = skb_rtable(skb)->rt_iif,
- .fl4_dst = ip_hdr(skb)->saddr,
- .fl4_src = ip_hdr(skb)->daddr,
- .fl4_tos = RT_CONN_FLAGS(sk),
- .proto = sk->sk_protocol,
- .fl_ip_sport = dccp_hdr(skb)->dccph_dport,
- .fl_ip_dport = dccp_hdr(skb)->dccph_sport
- };
-
- security_skb_classify_flow(skb, &fl);
- if (ip_route_output_flow(net, &rt, &fl, sk, 0)) {
+ const struct iphdr *iph = ip_hdr(skb);
+ struct flowi4 fl4 = {
+ .flowi4_oif = inet_iif(skb),
+ .daddr = iph->saddr,
+ .saddr = iph->daddr,
+ .flowi4_tos = RT_CONN_FLAGS(sk),
+ .flowi4_proto = sk->sk_protocol,
+ .fl4_sport = dccp_hdr(skb)->dccph_dport,
+ .fl4_dport = dccp_hdr(skb)->dccph_sport,
+ };
+
+ security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
+ rt = ip_route_output_flow(net, &fl4, sk);
+ if (IS_ERR(rt)) {
IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
return NULL;
}
@@ -479,14 +501,14 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
return &rt->dst;
}
-static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
- struct request_values *rv_unused)
+static int dccp_v4_send_response(struct sock *sk, struct request_sock *req)
{
int err = -1;
struct sk_buff *skb;
struct dst_entry *dst;
+ struct flowi4 fl4;
- dst = inet_csk_route_req(sk, req);
+ dst = inet_csk_route_req(sk, &fl4, req);
if (dst == NULL)
goto out;
@@ -495,10 +517,10 @@ static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
const struct inet_request_sock *ireq = inet_rsk(req);
struct dccp_hdr *dh = dccp_hdr(skb);
- dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->loc_addr,
- ireq->rmt_addr);
- err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
- ireq->rmt_addr,
+ dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->ir_loc_addr,
+ ireq->ir_rmt_addr);
+ err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
+ ireq->ir_rmt_addr,
ireq->opt);
err = net_xmit_eval(err);
}
@@ -556,6 +578,11 @@ static void dccp_v4_reqsk_destructor(struct request_sock *req)
kfree(inet_rsk(req)->opt);
}
+void dccp_syn_ack_timeout(struct sock *sk, struct request_sock *req)
+{
+}
+EXPORT_SYMBOL(dccp_syn_ack_timeout);
+
static struct request_sock_ops dccp_request_sock_ops __read_mostly = {
.family = PF_INET,
.obj_size = sizeof(struct dccp_request_sock),
@@ -563,6 +590,7 @@ static struct request_sock_ops dccp_request_sock_ops __read_mostly = {
.send_ack = dccp_reqsk_send_ack,
.destructor = dccp_v4_reqsk_destructor,
.send_reset = dccp_v4_ctl_send_reset,
+ .syn_ack_timeout = dccp_syn_ack_timeout,
};
int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
@@ -614,22 +642,23 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
goto drop_and_free;
ireq = inet_rsk(req);
- ireq->loc_addr = ip_hdr(skb)->daddr;
- ireq->rmt_addr = ip_hdr(skb)->saddr;
+ ireq->ir_loc_addr = ip_hdr(skb)->daddr;
+ ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
/*
* Step 3: Process LISTEN state
*
* Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
*
- * In fact we defer setting S.GSR, S.SWL, S.SWH to
- * dccp_create_openreq_child.
+ * Setting S.SWL/S.SWH to is deferred to dccp_create_openreq_child().
*/
dreq->dreq_isr = dcb->dccpd_seq;
+ dreq->dreq_gsr = dreq->dreq_isr;
dreq->dreq_iss = dccp_v4_init_sequence(skb);
+ dreq->dreq_gss = dreq->dreq_iss;
dreq->dreq_service = service;
- if (dccp_v4_send_response(sk, req, NULL))
+ if (dccp_v4_send_response(sk, req))
goto drop_and_free;
inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
@@ -960,6 +989,7 @@ static const struct net_protocol dccp_v4_protocol = {
.err_handler = dccp_v4_err,
.no_policy = 1,
.netns_ok = 1,
+ .icmp_strict_tag_validation = 1,
};
static const struct proto_ops inet_dccp_ops = {
@@ -994,7 +1024,6 @@ static struct inet_protosw dccp_v4_protosw = {
.protocol = IPPROTO_DCCP,
.prot = &dccp_v4_prot,
.ops = &inet_dccp_ops,
- .no_check = 0,
.flags = INET_PROTOSW_ICSK,
};
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index dca711df9b6..4db3c2a1679 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -29,6 +29,7 @@
#include <net/transp_v6.h>
#include <net/ip6_checksum.h>
#include <net/xfrm.h>
+#include <net/secure_seq.h>
#include "dccp.h"
#include "ipv6.h"
@@ -54,8 +55,8 @@ static void dccp_v6_hash(struct sock *sk)
/* add pseudo-header to DCCP checksum stored in skb->csum */
static inline __sum16 dccp_v6_csum_finish(struct sk_buff *skb,
- struct in6_addr *saddr,
- struct in6_addr *daddr)
+ const struct in6_addr *saddr,
+ const struct in6_addr *daddr)
{
return csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_DCCP, skb->csum);
}
@@ -66,16 +67,10 @@ static inline void dccp_v6_send_check(struct sock *sk, struct sk_buff *skb)
struct dccp_hdr *dh = dccp_hdr(skb);
dccp_csum_outgoing(skb);
- dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &np->daddr);
+ dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &sk->sk_v6_daddr);
}
-static inline __u32 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
- __be16 sport, __be16 dport )
-{
- return secure_tcpv6_sequence_number(saddr, daddr, sport, dport);
-}
-
-static inline __u32 dccp_v6_init_sequence(struct sk_buff *skb)
+static inline __u64 dccp_v6_init_sequence(struct sk_buff *skb)
{
return secure_dccpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
ipv6_hdr(skb)->saddr.s6_addr32,
@@ -87,7 +82,7 @@ static inline __u32 dccp_v6_init_sequence(struct sk_buff *skb)
static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
- struct ipv6hdr *hdr = (struct ipv6hdr *)skb->data;
+ const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset);
struct dccp_sock *dp;
struct ipv6_pinfo *np;
@@ -135,51 +130,31 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
np = inet6_sk(sk);
+ if (type == NDISC_REDIRECT) {
+ struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
+
+ if (dst)
+ dst->ops->redirect(dst, sk, skb);
+ goto out;
+ }
+
if (type == ICMPV6_PKT_TOOBIG) {
struct dst_entry *dst = NULL;
+ if (!ip6_sk_accept_pmtu(sk))
+ goto out;
+
if (sock_owned_by_user(sk))
goto out;
if ((1 << sk->sk_state) & (DCCPF_LISTEN | DCCPF_CLOSED))
goto out;
- /* icmp should have updated the destination cache entry */
- dst = __sk_dst_check(sk, np->dst_cookie);
- if (dst == NULL) {
- struct inet_sock *inet = inet_sk(sk);
- struct flowi fl;
-
- /* BUGGG_FUTURE: Again, it is not clear how
- to handle rthdr case. Ignore this complexity
- for now.
- */
- memset(&fl, 0, sizeof(fl));
- fl.proto = IPPROTO_DCCP;
- ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
- ipv6_addr_copy(&fl.fl6_src, &np->saddr);
- fl.oif = sk->sk_bound_dev_if;
- fl.fl_ip_dport = inet->inet_dport;
- fl.fl_ip_sport = inet->inet_sport;
- security_sk_classify_flow(sk, &fl);
-
- err = ip6_dst_lookup(sk, &dst, &fl);
- if (err) {
- sk->sk_err_soft = -err;
- goto out;
- }
-
- err = xfrm_lookup(net, &dst, &fl, sk, 0);
- if (err < 0) {
- sk->sk_err_soft = -err;
- goto out;
- }
- } else
- dst_hold(dst);
+ dst = inet6_csk_update_pmtu(sk, ntohl(info));
+ if (!dst)
+ goto out;
- if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
+ if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst))
dccp_sync_mss(sk, dst_mtu(dst));
- } /* else let the usual retransmit timer handle it */
- dst_release(dst);
goto out;
}
@@ -204,7 +179,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
*/
WARN_ON(req->sk != NULL);
- if (seq != dccp_rsk(req)->dreq_iss) {
+ if (!between48(seq, dccp_rsk(req)->dreq_iss,
+ dccp_rsk(req)->dreq_gss)) {
NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
goto out;
}
@@ -241,58 +217,49 @@ out:
}
-static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
- struct request_values *rv_unused)
+static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)
{
- struct inet6_request_sock *ireq6 = inet6_rsk(req);
+ struct inet_request_sock *ireq = inet_rsk(req);
struct ipv6_pinfo *np = inet6_sk(sk);
struct sk_buff *skb;
- struct ipv6_txoptions *opt = NULL;
struct in6_addr *final_p, final;
- struct flowi fl;
+ struct flowi6 fl6;
int err = -1;
struct dst_entry *dst;
- memset(&fl, 0, sizeof(fl));
- fl.proto = IPPROTO_DCCP;
- ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
- ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr);
- fl.fl6_flowlabel = 0;
- fl.oif = ireq6->iif;
- fl.fl_ip_dport = inet_rsk(req)->rmt_port;
- fl.fl_ip_sport = inet_rsk(req)->loc_port;
- security_req_classify_flow(req, &fl);
-
- opt = np->opt;
-
- final_p = fl6_update_dst(&fl, opt, &final);
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_proto = IPPROTO_DCCP;
+ fl6.daddr = ireq->ir_v6_rmt_addr;
+ fl6.saddr = ireq->ir_v6_loc_addr;
+ fl6.flowlabel = 0;
+ fl6.flowi6_oif = ireq->ir_iif;
+ fl6.fl6_dport = ireq->ir_rmt_port;
+ fl6.fl6_sport = htons(ireq->ir_num);
+ security_req_classify_flow(req, flowi6_to_flowi(&fl6));
- err = ip6_dst_lookup(sk, &dst, &fl);
- if (err)
- goto done;
- if (final_p)
- ipv6_addr_copy(&fl.fl6_dst, final_p);
+ final_p = fl6_update_dst(&fl6, np->opt, &final);
- err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0);
- if (err < 0)
+ dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
+ if (IS_ERR(dst)) {
+ err = PTR_ERR(dst);
+ dst = NULL;
goto done;
+ }
skb = dccp_make_response(sk, dst, req);
if (skb != NULL) {
struct dccp_hdr *dh = dccp_hdr(skb);
dh->dccph_checksum = dccp_v6_csum_finish(skb,
- &ireq6->loc_addr,
- &ireq6->rmt_addr);
- ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
- err = ip6_xmit(sk, skb, &fl, opt);
+ &ireq->ir_v6_loc_addr,
+ &ireq->ir_v6_rmt_addr);
+ fl6.daddr = ireq->ir_v6_rmt_addr;
+ err = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
err = net_xmit_eval(err);
}
done:
- if (opt != NULL && opt != np->opt)
- sock_kfree_s(sk, opt, opt->tot_len);
dst_release(dst);
return err;
}
@@ -300,15 +267,14 @@ done:
static void dccp_v6_reqsk_destructor(struct request_sock *req)
{
dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg);
- if (inet6_rsk(req)->pktopts != NULL)
- kfree_skb(inet6_rsk(req)->pktopts);
+ kfree_skb(inet_rsk(req)->pktopts);
}
static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
{
- struct ipv6hdr *rxip6h;
+ const struct ipv6hdr *rxip6h;
struct sk_buff *skb;
- struct flowi fl;
+ struct flowi6 fl6;
struct net *net = dev_net(skb_dst(rxskb)->dev);
struct sock *ctl_sk = net->dccp.v6_ctl_sk;
struct dst_entry *dst;
@@ -327,25 +293,24 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
dccp_hdr(skb)->dccph_checksum = dccp_v6_csum_finish(skb, &rxip6h->saddr,
&rxip6h->daddr);
- memset(&fl, 0, sizeof(fl));
- ipv6_addr_copy(&fl.fl6_dst, &rxip6h->saddr);
- ipv6_addr_copy(&fl.fl6_src, &rxip6h->daddr);
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.daddr = rxip6h->saddr;
+ fl6.saddr = rxip6h->daddr;
- fl.proto = IPPROTO_DCCP;
- fl.oif = inet6_iif(rxskb);
- fl.fl_ip_dport = dccp_hdr(skb)->dccph_dport;
- fl.fl_ip_sport = dccp_hdr(skb)->dccph_sport;
- security_skb_classify_flow(rxskb, &fl);
+ fl6.flowi6_proto = IPPROTO_DCCP;
+ fl6.flowi6_oif = inet6_iif(rxskb);
+ fl6.fl6_dport = dccp_hdr(skb)->dccph_dport;
+ fl6.fl6_sport = dccp_hdr(skb)->dccph_sport;
+ security_skb_classify_flow(rxskb, flowi6_to_flowi(&fl6));
/* sk = NULL, but it is safe for now. RST socket required. */
- if (!ip6_dst_lookup(ctl_sk, &dst, &fl)) {
- if (xfrm_lookup(net, &dst, &fl, NULL, 0) >= 0) {
- skb_dst_set(skb, dst);
- ip6_xmit(ctl_sk, skb, &fl, NULL);
- DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
- DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
- return;
- }
+ dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
+ if (!IS_ERR(dst)) {
+ skb_dst_set(skb, dst);
+ ip6_xmit(ctl_sk, skb, &fl6, NULL, 0);
+ DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
+ DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
+ return;
}
kfree_skb(skb);
@@ -358,6 +323,7 @@ static struct request_sock_ops dccp6_request_sock_ops = {
.send_ack = dccp_reqsk_send_ack,
.destructor = dccp_v6_reqsk_destructor,
.send_reset = dccp_v6_ctl_send_reset,
+ .syn_ack_timeout = dccp_syn_ack_timeout,
};
static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
@@ -395,7 +361,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
{
struct request_sock *req;
struct dccp_request_sock *dreq;
- struct inet6_request_sock *ireq6;
+ struct inet_request_sock *ireq;
struct ipv6_pinfo *np = inet6_sk(sk);
const __be32 service = dccp_hdr_request(skb)->dccph_req_service;
struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
@@ -434,36 +400,37 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (security_inet_conn_request(sk, skb, req))
goto drop_and_free;
- ireq6 = inet6_rsk(req);
- ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr);
- ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr);
+ ireq = inet_rsk(req);
+ ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
+ ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
if (ipv6_opt_accepted(sk, skb) ||
np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
atomic_inc(&skb->users);
- ireq6->pktopts = skb;
+ ireq->pktopts = skb;
}
- ireq6->iif = sk->sk_bound_dev_if;
+ ireq->ir_iif = sk->sk_bound_dev_if;
/* So that link locals have meaning */
if (!sk->sk_bound_dev_if &&
- ipv6_addr_type(&ireq6->rmt_addr) & IPV6_ADDR_LINKLOCAL)
- ireq6->iif = inet6_iif(skb);
+ ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
+ ireq->ir_iif = inet6_iif(skb);
/*
* Step 3: Process LISTEN state
*
* Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
*
- * In fact we defer setting S.GSR, S.SWL, S.SWH to
- * dccp_create_openreq_child.
+ * Setting S.SWL/S.SWH to is deferred to dccp_create_openreq_child().
*/
dreq->dreq_isr = dcb->dccpd_seq;
+ dreq->dreq_gsr = dreq->dreq_isr;
dreq->dreq_iss = dccp_v6_init_sequence(skb);
+ dreq->dreq_gss = dreq->dreq_iss;
dreq->dreq_service = service;
- if (dccp_v6_send_response(sk, req, NULL))
+ if (dccp_v6_send_response(sk, req))
goto drop_and_free;
inet6_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
@@ -481,13 +448,11 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
struct request_sock *req,
struct dst_entry *dst)
{
- struct inet6_request_sock *ireq6 = inet6_rsk(req);
+ struct inet_request_sock *ireq = inet_rsk(req);
struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
struct inet_sock *newinet;
- struct dccp_sock *newdp;
struct dccp6_sock *newdp6;
struct sock *newsk;
- struct ipv6_txoptions *opt;
if (skb->protocol == htons(ETH_P_IP)) {
/*
@@ -498,18 +463,17 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
return NULL;
newdp6 = (struct dccp6_sock *)newsk;
- newdp = dccp_sk(newsk);
newinet = inet_sk(newsk);
newinet->pinet6 = &newdp6->inet6;
newnp = inet6_sk(newsk);
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
- ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr);
+ ipv6_addr_set_v4mapped(newinet->inet_daddr, &newsk->sk_v6_daddr);
ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
- ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
+ newsk->sk_v6_rcv_saddr = newnp->saddr;
inet_csk(newsk)->icsk_af_ops = &dccp_ipv6_mapped;
newsk->sk_backlog_rcv = dccp_v4_do_rcv;
@@ -533,32 +497,26 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
return newsk;
}
- opt = np->opt;
if (sk_acceptq_is_full(sk))
goto out_overflow;
if (dst == NULL) {
struct in6_addr *final_p, final;
- struct flowi fl;
-
- memset(&fl, 0, sizeof(fl));
- fl.proto = IPPROTO_DCCP;
- ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
- final_p = fl6_update_dst(&fl, opt, &final);
- ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr);
- fl.oif = sk->sk_bound_dev_if;
- fl.fl_ip_dport = inet_rsk(req)->rmt_port;
- fl.fl_ip_sport = inet_rsk(req)->loc_port;
- security_sk_classify_flow(sk, &fl);
-
- if (ip6_dst_lookup(sk, &dst, &fl))
- goto out;
-
- if (final_p)
- ipv6_addr_copy(&fl.fl6_dst, final_p);
-
- if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0)
+ struct flowi6 fl6;
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_proto = IPPROTO_DCCP;
+ fl6.daddr = ireq->ir_v6_rmt_addr;
+ final_p = fl6_update_dst(&fl6, np->opt, &final);
+ fl6.saddr = ireq->ir_v6_loc_addr;
+ fl6.flowi6_oif = sk->sk_bound_dev_if;
+ fl6.fl6_dport = ireq->ir_rmt_port;
+ fl6.fl6_sport = htons(ireq->ir_num);
+ security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+
+ dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
+ if (IS_ERR(dst))
goto out;
}
@@ -578,31 +536,30 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
newdp6 = (struct dccp6_sock *)newsk;
newinet = inet_sk(newsk);
newinet->pinet6 = &newdp6->inet6;
- newdp = dccp_sk(newsk);
newnp = inet6_sk(newsk);
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
- ipv6_addr_copy(&newnp->daddr, &ireq6->rmt_addr);
- ipv6_addr_copy(&newnp->saddr, &ireq6->loc_addr);
- ipv6_addr_copy(&newnp->rcv_saddr, &ireq6->loc_addr);
- newsk->sk_bound_dev_if = ireq6->iif;
+ newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
+ newnp->saddr = ireq->ir_v6_loc_addr;
+ newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
+ newsk->sk_bound_dev_if = ireq->ir_iif;
/* Now IPv6 options...
First: no IPv4 options.
*/
- newinet->opt = NULL;
+ newinet->inet_opt = NULL;
/* Clone RX bits */
newnp->rxopt.all = np->rxopt.all;
/* Clone pktoptions received with SYN */
newnp->pktoptions = NULL;
- if (ireq6->pktopts != NULL) {
- newnp->pktoptions = skb_clone(ireq6->pktopts, GFP_ATOMIC);
- kfree_skb(ireq6->pktopts);
- ireq6->pktopts = NULL;
+ if (ireq->pktopts != NULL) {
+ newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC);
+ consume_skb(ireq->pktopts);
+ ireq->pktopts = NULL;
if (newnp->pktoptions)
skb_set_owner_r(newnp->pktoptions, newsk);
}
@@ -616,11 +573,8 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
* Yes, keeping reference count would be much more clever, but we make
* one more one thing there: reattach optmem to newsk.
*/
- if (opt != NULL) {
- newnp->opt = ipv6_dup_options(newsk, opt);
- if (opt != np->opt)
- sock_kfree_s(sk, opt, opt->tot_len);
- }
+ if (np->opt != NULL)
+ newnp->opt = ipv6_dup_options(newsk, np->opt);
inet_csk(newsk)->icsk_ext_hdr_len = 0;
if (newnp->opt != NULL)
@@ -633,7 +587,8 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
if (__inet_inherit_port(sk, newsk) < 0) {
- sock_put(newsk);
+ inet_csk_prepare_forced_close(newsk);
+ dccp_done(newsk);
goto out;
}
__inet6_hash(newsk, NULL);
@@ -646,8 +601,6 @@ out_nonewsk:
dst_release(dst);
out:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
- if (opt != NULL && opt != np->opt)
- sock_kfree_s(sk, opt, opt->tot_len);
return NULL;
}
@@ -878,7 +831,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
struct ipv6_pinfo *np = inet6_sk(sk);
struct dccp_sock *dp = dccp_sk(sk);
struct in6_addr *saddr = NULL, *final_p, final;
- struct flowi fl;
+ struct flowi6 fl6;
struct dst_entry *dst;
int addr_type;
int err;
@@ -891,17 +844,16 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
if (usin->sin6_family != AF_INET6)
return -EAFNOSUPPORT;
- memset(&fl, 0, sizeof(fl));
+ memset(&fl6, 0, sizeof(fl6));
if (np->sndflow) {
- fl.fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK;
- IP6_ECN_flow_init(fl.fl6_flowlabel);
- if (fl.fl6_flowlabel & IPV6_FLOWLABEL_MASK) {
+ fl6.flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK;
+ IP6_ECN_flow_init(fl6.flowlabel);
+ if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) {
struct ip6_flowlabel *flowlabel;
- flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
+ flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
if (flowlabel == NULL)
return -EINVAL;
- ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
fl6_sock_release(flowlabel);
}
}
@@ -934,8 +886,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
return -EINVAL;
}
- ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
- np->flow_label = fl.fl6_flowlabel;
+ sk->sk_v6_daddr = usin->sin6_addr;
+ np->flow_label = fl6.flowlabel;
/*
* DCCP over IPv4
@@ -964,46 +916,37 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
goto failure;
}
ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
- ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &np->rcv_saddr);
+ ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &sk->sk_v6_rcv_saddr);
return err;
}
- if (!ipv6_addr_any(&np->rcv_saddr))
- saddr = &np->rcv_saddr;
+ if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
+ saddr = &sk->sk_v6_rcv_saddr;
- fl.proto = IPPROTO_DCCP;
- ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
- ipv6_addr_copy(&fl.fl6_src, saddr ? saddr : &np->saddr);
- fl.oif = sk->sk_bound_dev_if;
- fl.fl_ip_dport = usin->sin6_port;
- fl.fl_ip_sport = inet->inet_sport;
- security_sk_classify_flow(sk, &fl);
+ fl6.flowi6_proto = IPPROTO_DCCP;
+ fl6.daddr = sk->sk_v6_daddr;
+ fl6.saddr = saddr ? *saddr : np->saddr;
+ fl6.flowi6_oif = sk->sk_bound_dev_if;
+ fl6.fl6_dport = usin->sin6_port;
+ fl6.fl6_sport = inet->inet_sport;
+ security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
- final_p = fl6_update_dst(&fl, np->opt, &final);
+ final_p = fl6_update_dst(&fl6, np->opt, &final);
- err = ip6_dst_lookup(sk, &dst, &fl);
- if (err)
+ dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
+ if (IS_ERR(dst)) {
+ err = PTR_ERR(dst);
goto failure;
-
- if (final_p)
- ipv6_addr_copy(&fl.fl6_dst, final_p);
-
- err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT);
- if (err < 0) {
- if (err == -EREMOTE)
- err = ip6_dst_blackhole(sk, &dst, &fl);
- if (err < 0)
- goto failure;
}
if (saddr == NULL) {
- saddr = &fl.fl6_src;
- ipv6_addr_copy(&np->rcv_saddr, saddr);
+ saddr = &fl6.saddr;
+ sk->sk_v6_rcv_saddr = *saddr;
}
/* set the source address */
- ipv6_addr_copy(&np->saddr, saddr);
+ np->saddr = *saddr;
inet->inet_rcv_saddr = LOOPBACK4_IPV6;
__ip6_dst_store(sk, dst, NULL, NULL);
@@ -1021,7 +964,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
goto late_failure;
dp->dccps_iss = secure_dccpv6_sequence_number(np->saddr.s6_addr32,
- np->daddr.s6_addr32,
+ sk->sk_v6_daddr.s6_addr32,
inet->inet_sport,
inet->inet_dport);
err = dccp_connect(sk);
diff --git a/net/dccp/ipv6.h b/net/dccp/ipv6.h
index 6eef81fdbe5..af259e15e7f 100644
--- a/net/dccp/ipv6.h
+++ b/net/dccp/ipv6.h
@@ -25,12 +25,10 @@ struct dccp6_sock {
struct dccp6_request_sock {
struct dccp_request_sock dccp;
- struct inet6_request_sock inet6;
};
struct dccp6_timewait_sock {
struct inet_timewait_sock inet;
- struct inet6_timewait_sock tw6;
};
#endif /* _DCCP_IPV6_H */
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index d7041a0963a..c69eb9c4fbb 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -53,15 +53,12 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
if (tw != NULL) {
const struct inet_connection_sock *icsk = inet_csk(sk);
const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
if (tw->tw_family == PF_INET6) {
const struct ipv6_pinfo *np = inet6_sk(sk);
- struct inet6_timewait_sock *tw6;
- tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot);
- tw6 = inet6_twsk((struct sock *)tw);
- ipv6_addr_copy(&tw6->tw_v6_daddr, &np->daddr);
- ipv6_addr_copy(&tw6->tw_v6_rcv_saddr, &np->rcv_saddr);
+ tw->tw_v6_daddr = sk->sk_v6_daddr;
+ tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
tw->tw_ipv6only = np->ipv6only;
}
#endif
@@ -100,7 +97,7 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
* (* Generate a new socket and switch to that socket *)
* Set S := new socket for this port pair
*/
- struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC);
+ struct sock *newsk = inet_csk_clone_lock(sk, req, GFP_ATOMIC);
if (newsk != NULL) {
struct dccp_request_sock *dreq = dccp_rsk(req);
@@ -127,9 +124,11 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
* activation below, as these windows all depend on the local
* and remote Sequence Window feature values (7.5.2).
*/
- newdp->dccps_gss = newdp->dccps_iss = dreq->dreq_iss;
+ newdp->dccps_iss = dreq->dreq_iss;
+ newdp->dccps_gss = dreq->dreq_gss;
newdp->dccps_gar = newdp->dccps_iss;
- newdp->dccps_gsr = newdp->dccps_isr = dreq->dreq_isr;
+ newdp->dccps_isr = dreq->dreq_isr;
+ newdp->dccps_gsr = dreq->dreq_gsr;
/*
* Activate features: initialise CCIDs, sequence windows etc.
@@ -164,16 +163,15 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
/* Check for retransmitted REQUEST */
if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) {
- if (after48(DCCP_SKB_CB(skb)->dccpd_seq, dreq->dreq_isr)) {
+ if (after48(DCCP_SKB_CB(skb)->dccpd_seq, dreq->dreq_gsr)) {
dccp_pr_debug("Retransmitted REQUEST\n");
- dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq;
+ dreq->dreq_gsr = DCCP_SKB_CB(skb)->dccpd_seq;
/*
* Send another RESPONSE packet
* To protect against Request floods, increment retrans
* counter (backoff, monitored by dccp_response_timer).
*/
- req->retrans++;
- req->rsk_ops->rtx_syn_ack(sk, req, NULL);
+ inet_rtx_syn_ack(sk, req);
}
/* Network Duplicate, discard packet */
return NULL;
@@ -186,12 +184,14 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
goto drop;
/* Invalid ACK */
- if (DCCP_SKB_CB(skb)->dccpd_ack_seq != dreq->dreq_iss) {
+ if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
+ dreq->dreq_iss, dreq->dreq_gss)) {
dccp_pr_debug("Invalid ACK number: ack_seq=%llu, "
- "dreq_iss=%llu\n",
+ "dreq_iss=%llu, dreq_gss=%llu\n",
(unsigned long long)
DCCP_SKB_CB(skb)->dccpd_ack_seq,
- (unsigned long long) dreq->dreq_iss);
+ (unsigned long long) dreq->dreq_iss,
+ (unsigned long long) dreq->dreq_gss);
goto drop;
}
@@ -237,7 +237,7 @@ int dccp_child_process(struct sock *parent, struct sock *child,
/* Wakeup parent, send SIGIO */
if (state == DCCP_RESPOND && child->sk_state != state)
- parent->sk_data_ready(parent, 0);
+ parent->sk_data_ready(parent);
} else {
/* Alas, it is possible again, because we do lookup
* in main socket hash table and lock on listening
@@ -266,10 +266,10 @@ int dccp_reqsk_init(struct request_sock *req,
{
struct dccp_request_sock *dreq = dccp_rsk(req);
- inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport;
- inet_rsk(req)->loc_port = dccp_hdr(skb)->dccph_dport;
- inet_rsk(req)->acked = 0;
- dreq->dreq_timestamp_echo = 0;
+ inet_rsk(req)->ir_rmt_port = dccp_hdr(skb)->dccph_sport;
+ inet_rsk(req)->ir_num = ntohs(dccp_hdr(skb)->dccph_dport);
+ inet_rsk(req)->acked = 0;
+ dreq->dreq_timestamp_echo = 0;
/* inherit feature negotiation options from listening socket */
return dccp_feat_clone_list(&dp->dccps_featneg, &dreq->dreq_featneg);
diff --git a/net/dccp/options.c b/net/dccp/options.c
index f06ffcfc8d7..9bce31886bd 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -123,6 +123,8 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
case DCCPO_CHANGE_L ... DCCPO_CONFIRM_R:
if (pkt_type == DCCP_PKT_DATA) /* RFC 4340, 6 */
break;
+ if (len == 0)
+ goto out_invalid_option;
rc = dccp_feat_parse_options(sk, dreq, mandatory, opt,
*value, value + 1, len - 1);
if (rc)
@@ -341,38 +343,6 @@ static inline int dccp_elapsed_time_len(const u32 elapsed_time)
return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4;
}
-/* FIXME: This function is currently not used anywhere */
-int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed_time)
-{
- const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
- const int len = 2 + elapsed_time_len;
- unsigned char *to;
-
- if (elapsed_time_len == 0)
- return 0;
-
- if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
- return -1;
-
- DCCP_SKB_CB(skb)->dccpd_opt_len += len;
-
- to = skb_push(skb, len);
- *to++ = DCCPO_ELAPSED_TIME;
- *to++ = len;
-
- if (elapsed_time_len == 2) {
- const __be16 var16 = htons((u16)elapsed_time);
- memcpy(to, &var16, 2);
- } else {
- const __be32 var32 = htonl(elapsed_time);
- memcpy(to, &var32, 4);
- }
-
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time);
-
static int dccp_insert_option_timestamp(struct sk_buff *skb)
{
__be32 now = htonl(dccp_timestamp());
@@ -525,6 +495,7 @@ int dccp_insert_option_mandatory(struct sk_buff *skb)
* @val: NN value or SP array (preferred element first) to copy
* @len: true length of @val in bytes (excluding first element repetition)
* @repeat_first: whether to copy the first element of @val twice
+ *
* The last argument is used to construct Confirm options, where the preferred
* value and the preference list appear separately (RFC 4340, 6.3.1). Preference
* lists are kept such that the preferred entry is always first, so we only need
@@ -542,7 +513,7 @@ int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat,
}
if (unlikely(val == NULL || len == 0))
- len = repeat_first = 0;
+ len = repeat_first = false;
tot_len = 3 + repeat_first + len;
if (DCCP_SKB_CB(skb)->dccpd_opt_len + tot_len > DCCP_MAX_OPT_LEN) {
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 784d3021054..0248e8a3460 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -27,11 +27,13 @@ static inline void dccp_event_ack_sent(struct sock *sk)
inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
}
-static void dccp_skb_entail(struct sock *sk, struct sk_buff *skb)
+/* enqueue @skb on sk_send_head for retransmission, return clone to send now */
+static struct sk_buff *dccp_skb_entail(struct sock *sk, struct sk_buff *skb)
{
skb_set_owner_w(skb, sk);
WARN_ON(sk->sk_send_head);
sk->sk_send_head = skb;
+ return skb_clone(sk->sk_send_head, gfp_any());
}
/*
@@ -43,7 +45,7 @@ static void dccp_skb_entail(struct sock *sk, struct sk_buff *skb)
static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
{
if (likely(skb != NULL)) {
- const struct inet_sock *inet = inet_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
struct dccp_sock *dp = dccp_sk(sk);
struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
@@ -136,14 +138,14 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
- err = icsk->icsk_af_ops->queue_xmit(skb);
+ err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
return net_xmit_eval(err);
}
return -ENOBUFS;
}
/**
- * dccp_determine_ccmps - Find out about CCID-specfic packet-size limits
+ * dccp_determine_ccmps - Find out about CCID-specific packet-size limits
* We only consider the HC-sender CCID for setting the CCMPS (RFC 4340, 14.),
* since the RX CCID is restricted to feedback packets (Acks), which are small
* in comparison with the data traffic. A value of 0 means "no current CCMPS".
@@ -212,6 +214,7 @@ void dccp_write_space(struct sock *sk)
* dccp_wait_for_ccid - Await CCID send permission
* @sk: socket to wait for
* @delay: timeout in jiffies
+ *
* This is used by CCIDs which need to delay the send time in process context.
*/
static int dccp_wait_for_ccid(struct sock *sk, unsigned long delay)
@@ -406,10 +409,10 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
skb_dst_set(skb, dst_clone(dst));
dreq = dccp_rsk(req);
- if (inet_rsk(req)->acked) /* increase ISS upon retransmission */
- dccp_inc_seqno(&dreq->dreq_iss);
+ if (inet_rsk(req)->acked) /* increase GSS upon retransmission */
+ dccp_inc_seqno(&dreq->dreq_gss);
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE;
- DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss;
+ DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_gss;
/* Resolve feature dependencies resulting from choice of CCID */
if (dccp_feat_server_ccid_dependencies(dreq))
@@ -421,14 +424,14 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
/* Build and checksum header */
dh = dccp_zeroed_hdr(skb, dccp_header_size);
- dh->dccph_sport = inet_rsk(req)->loc_port;
- dh->dccph_dport = inet_rsk(req)->rmt_port;
+ dh->dccph_sport = htons(inet_rsk(req)->ir_num);
+ dh->dccph_dport = inet_rsk(req)->ir_rmt_port;
dh->dccph_doff = (dccp_header_size +
DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
dh->dccph_type = DCCP_PKT_RESPONSE;
dh->dccph_x = 1;
- dccp_hdr_set_seq(dh, dreq->dreq_iss);
- dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dreq->dreq_isr);
+ dccp_hdr_set_seq(dh, dreq->dreq_gss);
+ dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dreq->dreq_gsr);
dccp_hdr_response(skb)->dccph_resp_service = dreq->dreq_service;
dccp_csum_outgoing(skb);
@@ -552,8 +555,7 @@ int dccp_connect(struct sock *sk)
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST;
- dccp_skb_entail(sk, skb);
- dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL));
+ dccp_transmit_skb(sk, dccp_skb_entail(sk, skb));
DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS);
/* Timer for repeating the REQUEST until an answer. */
@@ -678,8 +680,7 @@ void dccp_send_close(struct sock *sk, const int active)
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE;
if (active) {
- dccp_skb_entail(sk, skb);
- dccp_transmit_skb(sk, skb_clone(skb, prio));
+ skb = dccp_skb_entail(sk, skb);
/*
* Retransmission timer for active-close: RFC 4340, 8.3 requires
* to retransmit the Close/CloseReq until the CLOSING/CLOSEREQ
@@ -692,6 +693,6 @@ void dccp_send_close(struct sock *sk, const int active)
*/
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
DCCP_TIMEOUT_INIT, DCCP_RTO_MAX);
- } else
- dccp_transmit_skb(sk, skb);
+ }
+ dccp_transmit_skb(sk, skb);
}
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index 33d0e6297c2..595ddf0459d 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -160,18 +160,23 @@ static __init int dccpprobe_init(void)
spin_lock_init(&dccpw.lock);
if (kfifo_alloc(&dccpw.fifo, bufsize, GFP_KERNEL))
return ret;
- if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops))
+ if (!proc_create(procname, S_IRUSR, init_net.proc_net, &dccpprobe_fops))
goto err0;
- try_then_request_module((ret = register_jprobe(&dccp_send_probe)) == 0,
- "dccp");
+ ret = register_jprobe(&dccp_send_probe);
+ if (ret) {
+ ret = request_module("dccp");
+ if (!ret)
+ ret = register_jprobe(&dccp_send_probe);
+ }
+
if (ret)
goto err1;
pr_info("DCCP watch registered (port=%d)\n", port);
return 0;
err1:
- proc_net_remove(&init_net, procname);
+ remove_proc_entry(procname, init_net.proc_net);
err0:
kfifo_free(&dccpw.fifo);
return ret;
@@ -181,7 +186,7 @@ module_init(dccpprobe_init);
static __exit void dccpprobe_exit(void)
{
kfifo_free(&dccpw.fifo);
- proc_net_remove(&init_net, procname);
+ remove_proc_entry(procname, init_net.proc_net);
unregister_jprobe(&dccp_send_probe);
}
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 152975d942d..de2c1e71930 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -184,7 +184,6 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
dp->dccps_rate_last = jiffies;
dp->dccps_role = DCCP_ROLE_UNDEFINED;
dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
- dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
dp->dccps_tx_qlen = sysctl_dccp_tx_qlen;
dccp_init_xmit_timers(sk);
@@ -337,7 +336,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock,
mask |= POLLIN | POLLRDNORM;
if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
- if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
+ if (sk_stream_is_writeable(sk)) {
mask |= POLLOUT | POLLWRNORM;
} else { /* send SIGIO later */
set_bit(SOCK_ASYNC_NOSPACE,
@@ -348,7 +347,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock,
* wspace test but before the flags are set,
* IO signal will be lost.
*/
- if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
+ if (sk_stream_is_writeable(sk))
mask |= POLLOUT | POLLWRNORM;
}
}
@@ -849,7 +848,7 @@ int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
default:
dccp_pr_debug("packet_type=%s\n",
dccp_packet_name(dh->dccph_type));
- sk_eat_skb(sk, skb, 0);
+ sk_eat_skb(sk, skb, false);
}
verify_sock_status:
if (sock_flag(sk, SOCK_DONE)) {
@@ -906,7 +905,7 @@ verify_sock_status:
len = skb->len;
found_fin_ok:
if (!(flags & MSG_PEEK))
- sk_eat_skb(sk, skb, 0);
+ sk_eat_skb(sk, skb, false);
break;
} while (1);
out:
@@ -1085,14 +1084,15 @@ EXPORT_SYMBOL_GPL(dccp_shutdown);
static inline int dccp_mib_init(void)
{
- return snmp_mib_init((void __percpu **)dccp_statistics,
- sizeof(struct dccp_mib),
- __alignof__(struct dccp_mib));
+ dccp_statistics = alloc_percpu(struct dccp_mib);
+ if (!dccp_statistics)
+ return -ENOMEM;
+ return 0;
}
static inline void dccp_mib_exit(void)
{
- snmp_mib_free((void __percpu **)dccp_statistics);
+ free_percpu(dccp_statistics);
}
static int thash_entries;
@@ -1100,7 +1100,7 @@ module_param(thash_entries, int, 0444);
MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
#ifdef CONFIG_IP_DCCP_DEBUG
-int dccp_debug;
+bool dccp_debug;
module_param(dccp_debug, bool, 0644);
MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
@@ -1159,10 +1159,8 @@ static int __init dccp_init(void)
goto out_free_bind_bucket_cachep;
}
- for (i = 0; i <= dccp_hashinfo.ehash_mask; i++) {
+ for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
- INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].twchain, i);
- }
if (inet_ehash_locks_alloc(&dccp_hashinfo))
goto out_free_dccp_ehash;
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
index 42348824ee3..53731e45403 100644
--- a/net/dccp/sysctl.c
+++ b/net/dccp/sysctl.c
@@ -20,6 +20,7 @@
/* Boundary values */
static int zero = 0,
+ one = 1,
u8_max = 0xFF;
static unsigned long seqw_min = DCCPF_SEQ_WMIN,
seqw_max = 0xFFFFFFFF; /* maximum on 32 bit */
@@ -58,7 +59,7 @@ static struct ctl_table dccp_default_table[] = {
.maxlen = sizeof(sysctl_dccp_request_retries),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = &one,
.extra2 = &u8_max,
},
{
@@ -98,18 +99,11 @@ static struct ctl_table dccp_default_table[] = {
{ }
};
-static struct ctl_path dccp_path[] = {
- { .procname = "net", },
- { .procname = "dccp", },
- { .procname = "default", },
- { }
-};
-
static struct ctl_table_header *dccp_table_header;
int __init dccp_sysctl_init(void)
{
- dccp_table_header = register_sysctl_paths(dccp_path,
+ dccp_table_header = register_net_sysctl(&init_net, "net/dccp/default",
dccp_default_table);
return dccp_table_header != NULL ? 0 : -ENOMEM;
@@ -118,7 +112,7 @@ int __init dccp_sysctl_init(void)
void dccp_sysctl_exit(void)
{
if (dccp_table_header != NULL) {
- unregister_sysctl_table(dccp_table_header);
+ unregister_net_sysctl_table(dccp_table_header);
dccp_table_header = NULL;
}
}
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 7587870b704..1cd46a345cb 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -12,6 +12,7 @@
#include <linux/dccp.h>
#include <linux/skbuff.h>
+#include <linux/export.h>
#include "dccp.h"
@@ -279,7 +280,7 @@ static ktime_t dccp_timestamp_seed;
*/
u32 dccp_timestamp(void)
{
- s64 delta = ktime_us_delta(ktime_get_real(), dccp_timestamp_seed);
+ u64 delta = (u64)ktime_us_delta(ktime_get_real(), dccp_timestamp_seed);
do_div(delta, 10);
return delta;