From 931731123a103cfb3f70ac4b7abfc71d94ba1f03 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 9 Nov 2006 19:58:25 -0800 Subject: [TCP]: Don't set SKB owner in tcp_transmit_skb(). The data itself is already charged to the SKB, doing the skb_set_owner_w() just generates a lot of noise and extra atomics we don't really need. Lmbench improvements on lat_tcp are minimal: before: TCP latency using localhost: 23.2701 microseconds TCP latency using localhost: 23.1994 microseconds TCP latency using localhost: 23.2257 microseconds after: TCP latency using localhost: 22.8380 microseconds TCP latency using localhost: 22.9465 microseconds TCP latency using localhost: 22.8462 microseconds Signed-off-by: David S. Miller --- net/dccp/output.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/dccp/output.c') diff --git a/net/dccp/output.c b/net/dccp/output.c index 7102e3aed4c..2cc4f4b2a9d 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -125,7 +125,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) DCCP_INC_STATS(DCCP_MIB_OUTSEGS); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - err = icsk->icsk_af_ops->queue_xmit(skb, 0); + err = icsk->icsk_af_ops->queue_xmit(skb, sk, 0); if (err <= 0) return err; @@ -426,7 +426,7 @@ int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code) code); if (skb != NULL) { memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - err = inet_csk(sk)->icsk_af_ops->queue_xmit(skb, 0); + err = inet_csk(sk)->icsk_af_ops->queue_xmit(skb, sk, 0); if (err == NET_XMIT_CN) err = 0; } -- cgit v1.2.3-18-g5258 From 9b42078ed6edfe04e9dc9a59b946ad912aeef717 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Fri, 10 Nov 2006 11:22:32 -0200 Subject: [DCCP]: Combine allocating & zeroing header space on skb This is a code simplification: it combines three often recurring operations into one inline function, * allocate `len' bytes header space in skb * fill these `len' bytes with zeroes * cast the start of this header space as dccp_hdr Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/output.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'net/dccp/output.c') diff --git a/net/dccp/output.c b/net/dccp/output.c index 2cc4f4b2a9d..1ae2248557c 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -88,11 +88,9 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) return -EPROTO; } - skb->h.raw = skb_push(skb, dccp_header_size); - dh = dccp_hdr(skb); /* Build DCCP header and checksum it. */ - memset(dh, 0, dccp_header_size); + dh = dccp_zeroed_hdr(skb, dccp_header_size); dh->dccph_type = dcb->dccpd_type; dh->dccph_sport = inet->sport; dh->dccph_dport = inet->dport; @@ -340,10 +338,7 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, return NULL; } - skb->h.raw = skb_push(skb, dccp_header_size); - - dh = dccp_hdr(skb); - memset(dh, 0, dccp_header_size); + dh = dccp_zeroed_hdr(skb, dccp_header_size); dh->dccph_sport = inet_sk(sk)->sport; dh->dccph_dport = inet_rsk(req)->rmt_port; @@ -392,10 +387,7 @@ static struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, return NULL; } - skb->h.raw = skb_push(skb, dccp_header_size); - - dh = dccp_hdr(skb); - memset(dh, 0, dccp_header_size); + dh = dccp_zeroed_hdr(skb, dccp_header_size); dh->dccph_sport = inet_sk(sk)->sport; dh->dccph_dport = inet_sk(sk)->dport; -- cgit v1.2.3-18-g5258 From f45b3ec481581f24719d8ab0bc812c02fcedc2bc Mon Sep 17 00:00:00 2001 From: Ian McDonald Date: Fri, 10 Nov 2006 13:09:10 -0200 Subject: [DCCP]: Fix logfile overflow This patch fixes data being spewed into the logs continually. As the code stood if there was a large queue and long delays timeo would go down to zero and never get reset. This fixes it by resetting timeo. Put constant into header as well. Signed-off-by: Ian McDonald Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/output.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net/dccp/output.c') diff --git a/net/dccp/output.c b/net/dccp/output.c index 1ae2248557c..51654975e8e 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -249,8 +249,8 @@ void dccp_write_xmit(struct sock *sk, int block) { struct dccp_sock *dp = dccp_sk(sk); struct sk_buff *skb; - long timeo = 30000; /* If a packet is taking longer than 2 secs - we have other issues */ + long timeo = DCCP_XMIT_TIMEO; /* If a packet is taking longer than + this we have other issues */ while ((skb = skb_peek(&sk->sk_write_queue))) { int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb, @@ -261,8 +261,10 @@ void dccp_write_xmit(struct sock *sk, int block) sk_reset_timer(sk, &dp->dccps_xmit_timer, msecs_to_jiffies(err)+jiffies); break; - } else + } else { err = dccp_wait_for_ccid(sk, skb, &timeo); + timeo = DCCP_XMIT_TIMEO; + } if (err) { printk(KERN_CRIT "%s:err at dccp_wait_for_ccid" " %d\n", __FUNCTION__, err); -- cgit v1.2.3-18-g5258 From 6f4e5fff1e4d46714ea554fd83e44eab534e8b11 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Fri, 10 Nov 2006 17:43:06 -0200 Subject: [DCCP]: Support for partial checksums (RFC 4340, sec. 9.2) This patch does the following: a) introduces variable-length checksums as specified in [RFC 4340, sec. 9.2] b) provides necessary socket options and documentation as to how to use them c) basic support and infrastructure for the Minimum Checksum Coverage feature [RFC 4340, sec. 9.2.1]: acceptability tests, user notification and user interface In addition, it (1) fixes two bugs in the DCCPv4 checksum computation: * pseudo-header used checksum_len instead of skb->len * incorrect checksum coverage calculation based on dccph_x (2) removes dccp_v4_verify_checksum() since it reduplicates code of the checksum computation; code calling this function is updated accordingly. (3) now uses skb_checksum(), which is safer than checksum_partial() if the sk_buff has is a non-linear buffer (has pages attached to it). (4) fixes an outstanding TODO item: * If P.CsCov is too large for the packet size, drop packet and return. The code has been tested with applications, the latest version of tcpdump now comes with support for partial DCCP checksums. Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/output.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'net/dccp/output.c') diff --git a/net/dccp/output.c b/net/dccp/output.c index 51654975e8e..992caedd772 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -96,6 +96,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) dh->dccph_dport = inet->dport; dh->dccph_doff = (dccp_header_size + dcb->dccpd_opt_len) / 4; dh->dccph_ccval = dcb->dccpd_ccval; + dh->dccph_cscov = dp->dccps_pcslen; /* XXX For now we're using only 48 bits sequence numbers */ dh->dccph_x = 1; @@ -115,7 +116,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) break; } - icsk->icsk_af_ops->send_check(sk, skb->len, skb); + icsk->icsk_af_ops->send_check(sk, 0, skb); if (set_ack) dccp_event_ack_sent(sk); @@ -329,7 +330,6 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, skb_reserve(skb, sk->sk_prot->max_header); skb->dst = dst_clone(dst); - skb->csum = 0; dreq = dccp_rsk(req); DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; @@ -352,6 +352,8 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dreq->dreq_isr); dccp_hdr_response(skb)->dccph_resp_service = dreq->dreq_service; + dccp_csum_outgoing(skb); + DCCP_INC_STATS(DCCP_MIB_OUTSEGS); return skb; } @@ -376,7 +378,6 @@ static struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, skb_reserve(skb, sk->sk_prot->max_header); skb->dst = dst_clone(dst); - skb->csum = 0; dccp_inc_seqno(&dp->dccps_gss); @@ -401,7 +402,7 @@ static struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dp->dccps_gsr); dccp_hdr_reset(skb)->dccph_reset_code = code; - inet_csk(sk)->icsk_af_ops->send_check(sk, skb->len, skb); + inet_csk(sk)->icsk_af_ops->send_check(sk, 0, skb); DCCP_INC_STATS(DCCP_MIB_OUTSEGS); return skb; @@ -475,7 +476,6 @@ int dccp_connect(struct sock *sk) skb_reserve(skb, sk->sk_prot->max_header); DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; - skb->csum = 0; dccp_skb_entail(sk, skb); dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL)); @@ -507,7 +507,6 @@ void dccp_send_ack(struct sock *sk) /* Reserve space for headers */ skb_reserve(skb, sk->sk_prot->max_header); - skb->csum = 0; DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_ACK; dccp_transmit_skb(sk, skb); } @@ -561,7 +560,6 @@ void dccp_send_sync(struct sock *sk, const u64 seq, /* Reserve space for headers and prepare control bits. */ skb_reserve(skb, sk->sk_prot->max_header); - skb->csum = 0; DCCP_SKB_CB(skb)->dccpd_type = pkt_type; DCCP_SKB_CB(skb)->dccpd_seq = seq; @@ -587,7 +585,6 @@ void dccp_send_close(struct sock *sk, const int active) /* Reserve space for headers and prepare control bits. */ skb_reserve(skb, sk->sk_prot->max_header); - skb->csum = 0; DCCP_SKB_CB(skb)->dccpd_type = dp->dccps_role == DCCP_ROLE_CLIENT ? DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ; -- cgit v1.2.3-18-g5258 From e11d9d30802278af22e78d8c10f348b683670cd9 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 13 Nov 2006 13:12:07 -0200 Subject: [DCCP]: Increment sequence numbers on retransmitted Response packets Problem: --- net/dccp/output.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net/dccp/output.c') diff --git a/net/dccp/output.c b/net/dccp/output.c index 992caedd772..08ee5547a2f 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -332,6 +332,8 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, skb->dst = dst_clone(dst); dreq = dccp_rsk(req); + if (inet_rsk(req)->acked) /* increase ISS upon retransmission */ + dccp_inc_seqno(&dreq->dreq_iss); DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss; @@ -354,6 +356,8 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, dccp_csum_outgoing(skb); + /* We use `acked' to remember that a Response was already sent. */ + inet_rsk(req)->acked = 1; DCCP_INC_STATS(DCCP_MIB_OUTSEGS); return skb; } -- cgit v1.2.3-18-g5258 From d7f7365f5776723da6df73540d855069c2daaa5c Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 13 Nov 2006 13:34:38 -0200 Subject: [DCCPv6]: Choose a genuine initial sequence number This * resolves a FIXME - DCCPv6 connections started all with an initial sequence number of 1; * provides a redirection `secure_dccpv6_sequence_number' in case the init_sequence_v6 code should be updated later; * concentrates the update of S.GAR into dccp_connect_init(); * removes a duplicate dccp_update_gss() in ipv4.c; * uses inet->dport instead of usin->sin_port, due to the following assignment in dccp_v4_connect(): inet->dport = usin->sin_port; Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/output.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net/dccp/output.c') diff --git a/net/dccp/output.c b/net/dccp/output.c index 08ee5547a2f..0994b13f0f1 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -448,7 +448,6 @@ static inline void dccp_connect_init(struct sock *sk) dccp_sync_mss(sk, dst_mtu(dst)); - dccp_update_gss(sk, dp->dccps_iss); /* * SWL and AWL are initially adjusted so that they are not less than * the initial Sequence Numbers received and sent, respectively: @@ -457,8 +456,13 @@ static inline void dccp_connect_init(struct sock *sk) * These adjustments MUST be applied only at the beginning of the * connection. */ + dccp_update_gss(sk, dp->dccps_iss); dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss)); + /* S.GAR - greatest valid acknowledgement number received on a non-Sync; + * initialized to S.ISS (sec. 8.5) */ + dp->dccps_gar = dp->dccps_iss; + icsk->icsk_retransmits = 0; init_timer(&dp->dccps_xmit_timer); dp->dccps_xmit_timer.data = (unsigned long)sk; -- cgit v1.2.3-18-g5258 From b9df3cb8cf9a96e63dfdcd3056a9cbc71f2459e7 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Tue, 14 Nov 2006 11:21:36 -0200 Subject: [TCP/DCCP]: Introduce net_xmit_eval Throughout the TCP/DCCP (and tunnelling) code, it often happens that the return code of a transmit function needs to be tested against NET_XMIT_CN which is a value that does not indicate a strict error condition. This patch uses a macro for these recurring situations which is consistent with the already existing macro net_xmit_errno, saving on duplicated code. Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/output.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'net/dccp/output.c') diff --git a/net/dccp/output.c b/net/dccp/output.c index 0994b13f0f1..ef22f3cc791 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -125,16 +125,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); err = icsk->icsk_af_ops->queue_xmit(skb, sk, 0); - if (err <= 0) - return err; - - /* NET_XMIT_CN is special. It does not guarantee, - * that this packet is lost. It tells that device - * is about to start to drop packets or already - * drops some packets of the same priority and - * invokes us to send less aggressively. - */ - return err == NET_XMIT_CN ? 0 : err; + return net_xmit_eval(err); } return -ENOBUFS; } @@ -426,8 +417,7 @@ int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code) if (skb != NULL) { memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); err = inet_csk(sk)->icsk_af_ops->queue_xmit(skb, sk, 0); - if (err == NET_XMIT_CN) - err = 0; + return net_xmit_eval(err); } } -- cgit v1.2.3-18-g5258 From 09dbc3895e3242346bd434dae743c456fd28fc6a Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Tue, 14 Nov 2006 12:57:34 -0200 Subject: [DCCP]: Miscellaneous code tidy-ups This patch does not change code; it performs some trivial clean/tidy-ups: * removal of a `debug_prefix' string in favour of the already existing dccp_role(sk) * add documentation of structures and constants * separated out the cases for invalid packets (step 1 of the packet validation) * removing duplicate statements * combining declaration & initialisation Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/output.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/dccp/output.c') diff --git a/net/dccp/output.c b/net/dccp/output.c index ef22f3cc791..c34eada7f02 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -333,6 +333,7 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, return NULL; } + /* Build and checksum header */ dh = dccp_zeroed_hdr(skb, dccp_header_size); dh->dccph_sport = inet_sk(sk)->sport; -- cgit v1.2.3-18-g5258 From 59348b19efebfd6a8d0791ff81d207b16594c94b Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 20 Nov 2006 18:39:23 -0200 Subject: [DCCP]: Simplified conditions due to use of enum:8 states This reaps the benefit of the earlier patch, which changed the type of CCID 3 states to use enums, in that many conditions are now simplified and the number of possible (unexpected) values is greatly reduced. In a few instances, this also allowed to simplify pre-conditions; where care has been taken to retain logical equivalence. [DCCP]: Introduce a consistent BUG/WARN message scheme This refines the existing set of DCCP messages so that * BUG(), BUG_ON(), WARN_ON() have meaningful DCCP-specific counterparts * DCCP_CRIT (for severe warnings) is not rate-limited * DCCP_WARN() is introduced as rate-limited wrapper Using these allows a faster and cleaner transition to their original counterparts once the code has matured into a full DCCP implementation. Signed-off-by: Gerrit Renker Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/output.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'net/dccp/output.c') diff --git a/net/dccp/output.c b/net/dccp/output.c index c34eada7f02..bfd9c575789 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -257,11 +257,8 @@ void dccp_write_xmit(struct sock *sk, int block) err = dccp_wait_for_ccid(sk, skb, &timeo); timeo = DCCP_XMIT_TIMEO; } - if (err) { - printk(KERN_CRIT "%s:err at dccp_wait_for_ccid" - " %d\n", __FUNCTION__, err); - dump_stack(); - } + if (err) + DCCP_BUG("err=%d after dccp_wait_for_ccid", err); } skb_dequeue(&sk->sk_write_queue); @@ -283,12 +280,9 @@ void dccp_write_xmit(struct sock *sk, int block) err = dccp_transmit_skb(sk, skb); ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); - if (err) { - printk(KERN_CRIT "%s:err from " - "ccid_hc_tx_packet_sent %d\n", - __FUNCTION__, err); - dump_stack(); - } + if (err) + DCCP_BUG("err=%d after ccid_hc_tx_packet_sent", + err); } else kfree(skb); } -- cgit v1.2.3-18-g5258 From 6b57c93dc3aa0115b589cb89ef862d46ab9bd95e Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Tue, 28 Nov 2006 19:55:06 -0200 Subject: [DCCP]: Use `unsigned' for packet lengths This patch implements a suggestion by Ian McDonald and 1) Avoids tests against negative packet lengths by using unsigned int for packet payload lengths in the CCID send_packet()/packet_sent() routines 2) As a consequence, it removes an now unnecessary test with regard to `len > 0' in ccid3_hc_tx_packet_sent: that condition is always true, since * negative packet lengths are avoided * ccid3_hc_tx_send_packet flags an error whenever the payload length is 0. As a consequence, ccid3_hc_tx_packet_sent is never called as all errors returned by ccid_hc_tx_send_packet are caught in dccp_write_xmit 3) Removes the third argument of ccid_hc_tx_send_packet (the `len' parameter), since it is currently always set to skb->len. The code is updated with regard to this parameter change. Signed-off-by: Gerrit Renker Signed-off-by: Ian McDonald Signed-off-by: Arnaldo Carvalho de Melo --- net/dccp/output.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net/dccp/output.c') diff --git a/net/dccp/output.c b/net/dccp/output.c index bfd9c575789..400c30b6fca 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -195,8 +195,7 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, if (signal_pending(current)) goto do_interrupted; - rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb, - skb->len); + rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); if (rc <= 0) break; delay = msecs_to_jiffies(rc); @@ -245,8 +244,7 @@ void dccp_write_xmit(struct sock *sk, int block) this we have other issues */ while ((skb = skb_peek(&sk->sk_write_queue))) { - int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb, - skb->len); + int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); if (err > 0) { if (!block) { -- cgit v1.2.3-18-g5258